diff --git a/.claude/skills/generate-llms-catalog/SKILL.md b/.claude/skills/generate-llms-catalog/SKILL.md new file mode 100644 index 00000000..83e5758a --- /dev/null +++ b/.claude/skills/generate-llms-catalog/SKILL.md @@ -0,0 +1,77 @@ +--- +name: generate-llms-catalog +description: >- + Generate llms.txt and catalog.json files for LLM consumption from the + Bactopia docs site content. Run this skill whenever documentation content + is added, removed, or updated, when the user asks to regenerate the LLM + catalog, refresh llms.txt, update the LLM index, or sync the static LLM + files with the current documentation. +--- + +# Generate LLMs Catalog + +Regenerate `static/llms.txt` and `static/catalog.json` from all documentation +content files. These files provide machine-readable indexes of the Bactopia +documentation for LLM consumption. + +- `llms.txt` follows the [llms.txt standard](https://llmstxt.org/) +- `catalog.json` provides structured metadata (title, description, tags, URL) per page + +## Steps + +### 1. Run the generator + +```bash +python bin/generate-llms-catalog.py +``` + +Or via Make: + +```bash +make llms-catalog +``` + +The script walks all 6 content areas (docs, bactopia-tools, bactopia-pipelines, +developers, impact, blog), parses YAML frontmatter from each `.md`/`.mdx` file, +and generates both output files. + +Expected output: + +``` +Collecting Bactopia... + 7 pages +Collecting Bactopia Tools... + 68 pages +Collecting Bactopia Pipelines... + 4 pages +Collecting Developers... + 221 pages +Collecting Impact & Outreach... + 5 pages +Collecting Blog... + 2 pages + +Wrote static/llms.txt +Wrote static/catalog.json + +Total: 307 pages across 6 sections +``` + +### 2. Verify output + +Check that both files were generated correctly: + +- `static/llms.txt` -- should have `# Bactopia` heading, blockquote tagline, + section headings matching the navbar, and one `- [Title](URL): description` + entry per page +- `static/catalog.json` -- should be valid JSON with `site`, `generated`, + `total_pages`, and `sections` array. Each section should have `pages` with + `title`, `description`, `url`, `path`, `tags`, and `source_file` + +### 3. Build check + +Run `npm run build` to confirm the site builds with the new static files. +Both should be served at the site root: + +- `https://bactopia.github.io/llms.txt` +- `https://bactopia.github.io/catalog.json` diff --git a/.claude/skills/generate-skills-docs/SKILL.md b/.claude/skills/generate-skills-docs/SKILL.md new file mode 100644 index 00000000..c8ab46d9 --- /dev/null +++ b/.claude/skills/generate-skills-docs/SKILL.md @@ -0,0 +1,184 @@ +--- +name: generate-skills-docs +description: >- + Generate the AI Skills reference page for the Bactopia docs site. + Runs a Python parser to extract structured metadata from the bactopia repo's + SKILL.md files, then generates developers/ai-skills/index.mdx with an overview + table and per-skill entries. Use this skill whenever skills are added, + removed, or updated in the bactopia repo, when the user asks to regenerate + the skills docs page, refresh the skills reference, update the AI skills + documentation, or sync the docs site with the current set of bactopia skills. +--- + +# Generate Skills Docs + +Regenerate the AI Skills reference page (`developers/ai-skills/index.mdx`) from the +SKILL.md files in the bactopia repo. This skill combines a deterministic Python +parser for structured extraction with Claude's ability to synthesize +natural-language content (when-to-use bullets, examples, related skills). + +## Steps + +### 1. Run the parser + +Run `bin/generate-skills.py` to extract structured metadata from all SKILL.md files: + +```bash +python bin/generate-skills.py /home/rpetit3/repos/bactopia/bactopia/.claude/skills --json +``` + +If the user provides a different bactopia repo path, substitute it. The script outputs +JSON with this shape per skill: + +```json +{ + "name": "skill-name", + "description": "Full description from frontmatter", + "first_sentence": "First sentence of description.", + "summary": "First paragraph of the markdown body.", + "category": "Scaffolding|Maintenance|Review & Quality|Testing|Project", + "cli_command": "bactopia-*", + "cli_page": "/developers/cli/bactopia-*" +} +``` + +### 2. Generate the MDX page + +Using the JSON output, write `developers/ai-skills/index.mdx` with this structure: + +#### Frontmatter + +```yaml +--- +title: AI Skills +description: Reference for AI skills that automate Bactopia development tasks +--- +``` + +#### Page body + +Follow this outline exactly: + +```markdown +# AI Skills + +[intro paragraph -- see below] + +## Overview + +[summary table of all skills] + +## Scaffolding + +[skills with add-* prefix] + +## Maintenance + +[skills with update-*/merge-* prefix] + +## Review & Quality + +[skills with review-* prefix] + +## Testing + +[skills with run-* prefix] + +## Project + +[skills with project-* prefix] +``` + +#### Intro paragraph + +Write 2-3 sentences explaining: +- These skills automate common Bactopia development tasks through AI-assisted coding tools +- Each skill wraps one or more bactopia-py CLI commands with interactive guidance +- Skills live in the bactopia repo at `.claude/skills/` and are invoked with `/skill-name` + +Include a link: `[View skills on GitHub](https://github.com/bactopia/bactopia/tree/main/.claude/skills)` + +#### Overview table + +```markdown +| Skill | Category | Description | +|-------|----------|-------------| +| [`/add-bactopia-tool`](#add-bactopia-tool) | Scaffolding | First sentence from description... | +``` + +Use the `first_sentence` field from the JSON. Link each skill name to its heading +anchor on the same page. + +#### Per-skill entries + +For each skill, write an entry under its category heading: + +```markdown +### `/skill-name` + +[summary -- first paragraph from the SKILL.md body] + +**Wraps:** [`bactopia-command`](/developers/cli/bactopia-command) + +**When to use:** +- [2-3 bullet points extracted from the description's trigger phrases] +- [Focus on the "Use when asked to..." patterns in the description] + +**Examples:** +\`\`\` +/skill-name argument1 +/skill-name argument2 +\`\`\` + +**Related skills:** [`/sibling`](#sibling), [`/other`](#other) +``` + +Guidelines for each field: + +- **Summary**: Use the `summary` field from JSON (first paragraph of body). + Keep it to 1-2 sentences. If the summary is too long, trim to the essential point. +- **Wraps**: Link to the CLI Reference page using the `cli_page` field. +- **When to use**: Extract from the `description` field. Look for phrases like + "Use when asked to...", "Use this skill whenever...", or the comma-separated + list of trigger contexts. Convert to 2-3 concise bullet points. +- **Examples**: Create 2-3 realistic invocation examples. For skills that take + a component name (like `/run-tests` or `/update-module`), show examples with + different component names. For skills with no arguments (like `/project-status`), + show just the bare invocation. +- **Related skills**: Cross-reference skills that are commonly used together. + Use these relationships: + - `add-bactopia-tool` <-> `add-module`, `add-subworkflow`, `run-tests`, `update-catalog` + - `add-module` <-> `add-bactopia-tool`, `add-subworkflow` + - `add-subworkflow` <-> `add-bactopia-tool`, `add-module` + - `update-module` <-> `merge-schemas`, `project-status` + - `update-catalog` <-> `project-status`, `merge-schemas` + - `merge-schemas` <-> `update-module`, `update-catalog` + - `review-groovydoc` <-> `review-citations`, `review-docs` + - `review-citations` <-> `review-groovydoc`, `review-docs` + - `review-docs` <-> `review-groovydoc`, `review-citations`, `project-status` + - `review-tests` <-> `run-tests` + - `run-tests` <-> `review-tests` + - `project-status` <-> `update-catalog`, `run-tests` + +### 3. Verify sidebar and index + +Check that `sidebars-developers.ts` includes `'ai-skills'` after +`'nf-bactopia/index'`. If not, add it. + +Check that `developers/index.mdx` has an "AI Skills" section between +"nf-bactopia Plugin" and "Subworkflows". If not, add: + +```markdown +## AI Skills + +Automation skills that orchestrate Bactopia CLI commands through AI-assisted +coding tools for scaffolding, maintenance, review, and testing tasks. + +[Browse AI Skills](/developers/ai-skills) +``` + +### 4. Build check + +Run `npm run build` to confirm the page builds without errors. If there are +MDX parsing issues, fix them (common culprits: unescaped `<`, `>`, `{`, `}` +in descriptions). diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md new file mode 100644 index 00000000..14339146 --- /dev/null +++ b/.github/pull_request_template.md @@ -0,0 +1,23 @@ +## Summary + + + +## Type of Change + +- [ ] Content update (new or modified documentation) +- [ ] Correction (typo, broken link, inaccurate information) +- [ ] Site infrastructure (config, styling, components, CI/CD) +- [ ] Auto-generation (templates, scripts, data files) + +## Version Impact + +- [ ] This change affects the current live version only (no snapshot needed) +- [ ] A version snapshot should be created before merging (new Bactopia release) +- [ ] A snapshot rebuild is needed after merging (fix to snapshotted content) + +## Checklist + +- [ ] Site builds without errors (`npm run build`) +- [ ] Changes verified in the dev server (`npm start`) +- [ ] `snapshots.json` updated (if adding/removing a version) +- [ ] LLM catalog regenerated (`make llms-catalog`) if pages were added/removed/renamed diff --git a/.github/workflows/create-snapshot.yml b/.github/workflows/create-snapshot.yml new file mode 100644 index 00000000..faa07d76 --- /dev/null +++ b/.github/workflows/create-snapshot.yml @@ -0,0 +1,97 @@ +name: Create Version Snapshot + +on: + workflow_dispatch: + inputs: + version: + description: 'Version tag (e.g., v4.0.0)' + required: true + bactopia_ref: + description: 'bactopia/bactopia ref to checkout (branch, tag, or SHA)' + required: false + default: 'main' + +jobs: + create-snapshot: + runs-on: ubuntu-latest + permissions: + contents: write + steps: + - name: Checkout docs repo + uses: actions/checkout@v4 + + - name: Checkout bactopia source repo + uses: actions/checkout@v4 + with: + repository: bactopia/bactopia + path: bactopia-source + ref: ${{ inputs.bactopia_ref }} + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: '3.12' + + - name: Install Python dependencies + run: pip install -r requirements.txt + + - name: Set up Node.js + uses: actions/setup-node@v4 + with: + node-version: '22' + cache: 'npm' + + - name: Install Node dependencies + run: npm ci + + - name: Generate docs from bactopia source + run: make generate BACTOPIA_REPO=bactopia-source + env: + BACTOPIA_DEV_PYTHON: python + + - name: Build snapshot with version banner + run: npm run build + env: + DOCS_VERSION: ${{ inputs.version }} + + - name: Report snapshot file count + run: | + count=$(find build -type f | wc -l) + echo "Snapshot file count: $count" + echo "SNAPSHOT_FILES=$count" >> "$GITHUB_ENV" + + - name: Push snapshot to orphan branch + run: | + cd build + git init + git checkout --orphan "snapshot/${{ inputs.version }}" + git add -A + git -c user.name="github-actions" -c user.email="github-actions@github.com" \ + commit -m "Snapshot ${{ inputs.version }} (${{ env.SNAPSHOT_FILES }} files)" + git remote add origin "https://x-access-token:${{ secrets.GITHUB_TOKEN }}@github.com/${{ github.repository }}.git" + git push origin "snapshot/${{ inputs.version }}" --force + echo "Pushed snapshot/${{ inputs.version }} (${{ env.SNAPSHOT_FILES }} files)" + + - name: Update snapshots.json on master + run: | + python3 -c " + import json, sys + f = 'snapshots.json' + d = json.load(open(f)) + v = '${{ inputs.version }}' + n = int('${{ env.SNAPSHOT_FILES }}') + # Update existing entry or add new one + existing = [s for s in d['snapshots'] if s['version'] == v] + if existing: + existing[0]['files'] = n + existing[0]['active'] = True + print(f'Updated {v} (files={n})') + else: + d['snapshots'].insert(0, {'version': v, 'branch': f'snapshot/{v}', 'files': n, 'active': True}) + print(f'Added {v} (files={n})') + json.dump(d, open(f, 'w'), indent=2) + " + git add snapshots.json + git -c user.name="github-actions" -c user.email="github-actions@github.com" \ + commit -m "add ${{ inputs.version }} to snapshots.json (${{ env.SNAPSHOT_FILES }} files)" + git push diff --git a/.github/workflows/deploy.yml b/.github/workflows/deploy.yml new file mode 100644 index 00000000..9620b0c4 --- /dev/null +++ b/.github/workflows/deploy.yml @@ -0,0 +1,79 @@ +name: Deploy to Cloudflare Pages + +on: + push: + branches: + - master + workflow_dispatch: + inputs: + bactopia_ref: + description: 'bactopia/bactopia ref to checkout (branch, tag, or SHA)' + required: false + default: 'main' + +jobs: + build-and-deploy: + runs-on: ubuntu-latest + permissions: + contents: read + steps: + - name: Checkout docs repo + uses: actions/checkout@v4 + + - name: Checkout bactopia source repo + uses: actions/checkout@v4 + with: + repository: bactopia/bactopia + path: bactopia-source + ref: ${{ github.event.inputs.bactopia_ref || 'main' }} + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: '3.12' + + - name: Install Python dependencies + run: pip install -r requirements.txt + + - name: Set up Node.js + uses: actions/setup-node@v4 + with: + node-version: '22' + cache: 'npm' + + - name: Install Node dependencies + run: npm ci + + - name: Generate docs from bactopia source + run: make generate BACTOPIA_REPO=bactopia-source + env: + BACTOPIA_DEV_PYTHON: python + + - name: Build Docusaurus site + run: npm run build + + - name: Assemble version snapshots from branches + run: | + for snap in $(python3 -c "import json; [print(s['version']) for s in json.load(open('snapshots.json'))['snapshots'] if s['active']]"); do + branch="snapshot/$snap" + target="build/$snap" + echo "Fetching $branch -> $target" + git fetch origin "$branch" --depth=1 || { echo "WARNING: branch $branch not found, skipping"; continue; } + mkdir -p "$target" + git archive "origin/$branch" | tar -x -C "$target" + done + + - name: Report deployment file count + run: | + count=$(find build -type f | wc -l) + echo "Total deployment files: $count / 20000" + if [ "$count" -gt 19000 ]; then + echo "::warning::Approaching Cloudflare Pages 20k file limit: $count files" + fi + + - name: Deploy to Cloudflare Pages + uses: cloudflare/wrangler-action@v3 + with: + apiToken: ${{ secrets.CLOUDFLARE_API_TOKEN }} + accountId: ${{ secrets.CLOUDFLARE_ACCOUNT_ID }} + command: pages deploy build/ --project-name=bactopia-docs --branch=main diff --git a/.gitignore b/.gitignore index ab7815dc..5801125a 100644 --- a/.gitignore +++ b/.gitignore @@ -9,3 +9,6 @@ npm-debug.log* .DS_Store Thumbs.db .idea/ +data/bactopia.json +data/cli.json +TODO.md diff --git a/.vscode/settings.json b/.vscode/settings.json index d5cf2132..065cd2d6 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -1,10 +1,103 @@ { "cSpell.words": [ + "abricate", + "abritamr", + "aeruginosa", + "agrvate", + "amrfinderplus", + "Ariba", + "aspera", + "authorships", "Bactopia", + "bakta", + "basepair", + "basepairs", + "bbmap", "bioconda", + "bioinformatic", + "bioinformatics", + "bioproject", + "Biospecimen", + "blastdb", + "blastn", + "blastp", + "blastx", + "Blin", "cairosvg", + "CFBR", + "contig", + "contigs", + "cpus", + "Csvtk", + "datahub", + "Dragonflye", + "easyops", + "emmtyper", + "endfor", + "EOSS", + "fasp", + "FASTA", + "fastani", + "fastp", + "fastq", + "fastqc", + "fastqs", + "flye", + "FOFN", + "Frontmatter", + "ftype", + "gaeip", + "genbank", + "groovydoc", + "Gubbins", + "Kleborate", + "Kmer", + "kmers", + "llms", + "metagenomic", "MKDOCS", + "mlst", + "MRSA", + "Nanoplot", + "Nanopore", + "NATA", + "NCBI", "nextflow", - "pyyaml" + "nfconfig", + "noopener", + "OPENALEX", + "panaroo", + "pbptyper", + "phix", + "PHRED", + "Pigz", + "porechop", + "PRJNA", + "Prokka", + "Pseudomonas", + "pysam", + "pyyaml", + "qtrim", + "Rasusa", + "rpetit", + "runtype", + "SAMN", + "samplesheet", + "sccmec", + "Seroba", + "serogrouping", + "Shovill", + "silico", + "slurm", + "spatyper", + "Staphopia", + "subsampling", + "subworkflow", + "subworkflows", + "taxid", + "trimq", + "typer", + "Unicycler", + "wyphd" ] } diff --git a/CLAUDE.md b/CLAUDE.md new file mode 100644 index 00000000..4c94af74 --- /dev/null +++ b/CLAUDE.md @@ -0,0 +1,171 @@ +# Bactopia Documentation Site + +Docusaurus 3.10 site for [Bactopia](https://github.com/bactopia/bactopia), a bacterial genomics workflow. + +- Site: https://bactopia.io +- Repo: bactopia/bactopia.github.io (deployed to Cloudflare Pages) + +## Architecture + +Five content sections with separate sidebar files, each registered as a Docusaurus docs plugin: + +| Directory | Route Base | Sidebar File | +|----------------------|----------------------|--------------------------------| +| `docs/` | `/` | `sidebars.ts` | +| `bactopia-tools/` | `/bactopia-tools` | `sidebars-bactopia-tools.ts` | +| `bactopia-pipelines/`| `/bactopia-pipelines`| `sidebars-bactopia-pipelines.ts`| +| `developers/` | `/developers` | `sidebars-developers.ts` | +| `impact/` | `/impact-and-outreach`| `sidebars-impact.ts` | + +Plus blog at `/blog` (configured in preset-classic). + +Versioned snapshots of older releases are stored as orphan git branches (`snapshot/vX.Y.Z`). +See the [Version Snapshots](#version-snapshots) section for details. + +## Auto-generation Pipeline + +Most content under `bactopia-tools/`, `bactopia-pipelines/`, `developers/`, and `impact/` is +auto-generated from the [bactopia](https://github.com/bactopia/bactopia) source repo. + +Pipeline: Python scripts in `bin/` read data sources, render pages via Jinja2 templates in `templates/`, +orchestrated by `Makefile`. + +Scripts in `bin/`: + +| Script | Input | Output | +|--------|-------|--------| +| `parse-bactopia.py` | bactopia repo | `data/bactopia.json` | +| `parse-cli.py` | bactopia-py (Click introspection) | `data/cli.json` | +| `generate-workflows.py` | `data/bactopia.json` | `bactopia-tools/*.mdx`, `bactopia-pipelines/*.mdx` | +| `generate-subworkflows.py` | `data/bactopia.json` | `developers/subworkflows/*.mdx` | +| `generate-modules.py` | `data/bactopia.json` | `developers/modules/*.mdx` | +| `generate-cli.py` | `data/cli.json` | `developers/cli/*.mdx` | +| `generate-citations.py` | `data/citations.yml` | `impact/citations.md` | +| `generate-acknowledgements.py` | `data/bactopia.json` | `impact/acknowledgements.md` | +| `generate-enhancements.py` | `data/contributions.yml` | `impact/enhancements.md` | +| `generate-skills.py` | bactopia repo SKILL.md files | JSON to stdout (used by skills docs skill) | +| `generate-llms-catalog.py` | all content directories | `static/llms.txt`, `static/catalog.json` | +| `update-citations.py` | external sources | `data/citations.yml` | +| `generator_utils.py` | -- | shared helpers (escape_mdx, create_jinja_env, etc.) | + +Data files in `data/`: +- `bactopia.json` -- parsed bactopia repo metadata (generated by `make parse`) +- `cli.json` -- parsed CLI metadata (generated by `make parse-cli`) +- `citations.yml` -- citation database (committed) +- `contributions.yml` -- contribution records (committed) + +## Environment + +- Node.js >= 18 for Docusaurus +- Python via bactopia-dev conda env for generation scripts +- `BACTOPIA_DEV_PYTHON` defaults to `/home/rpetit3/.conda/envs/bactopia-dev/bin/python` + +## Commands + +| Command | Purpose | +|---------|---------| +| `npm start` | Dev server (port 8000, host 0.0.0.0) | +| `npm run build` | Production build to `build/` | +| `npm run serve` | Serve production build locally | +| `npm run typecheck` | TypeScript type checking | +| `npm run clear` | Clear Docusaurus cache | +| `make generate BACTOPIA_REPO=../bactopia` | Generate all docs from bactopia source | +| `make llms-catalog` | Generate `static/llms.txt` and `static/catalog.json` | +| `make snapshot-list` | Show all version snapshots and file budget | +| `make snapshot-add VERSION=vX.Y.Z FILES=N` | Register a new active snapshot | +| `make snapshot-deactivate VERSION=vX.Y.Z` | Remove a version from the active deploy | +| `make snapshot-activate VERSION=vX.Y.Z` | Restore a version to the active deploy | + +## LLM Index Files + +`static/llms.txt` and `static/catalog.json` are machine-readable indexes of the documentation, +generated by `bin/generate-llms-catalog.py` (or `make llms-catalog`). + +- `llms.txt` -- follows the [llms.txt standard](https://llmstxt.org/). Markdown file with H1 site + title, blockquote tagline, H2 per section, and `- [Title](URL): description` entries per page. + Served at `/llms.txt`. +- `catalog.json` -- structured JSON with site metadata, section hierarchy, and per-page metadata + (title, description, tags, URL path, source file). Served at `/catalog.json`. + +Both are committed and should be regenerated when content pages are added, removed, or renamed. + +Frontmatter fields: +- `title` -- required +- `description` -- required (quoted string or block scalar) +- `tags` -- optional (list of lowercase-hyphenated strings) +- `sidebar_position` -- optional (integer) +- `slug` -- optional (only on index pages) + +MDX files use React components from `src/components/` (CardGrid, Card, Icon, etc.). +Characters `<`, `>`, `{`, `}` must be escaped in MDX body text (use `escape_mdx` from `generator_utils`). + +## Generator Script Pattern + +Each generator script in `bin/` follows this structure: +- `argparse` for inputs and output paths +- Load data from JSON/YAML +- Use `create_jinja_env()` with templates, or direct string generation +- Write output with `Path.write_text()` +- Print summary to stdout + +## Version Snapshots + +Each release gets a static snapshot so users can access older docs. Snapshots are stored as +orphan git branches (`snapshot/vX.Y.Z`) and assembled into the deploy output at build time. + +- `/` always serves the current version (label set in `docusaurus.config.ts` under `versions.current.label`) +- `/vX.Y.Z/` serves active snapshots (built with `baseUrl: '/vX.Y.Z/'` and an announcement banner) +- `snapshots.json` is the registry of all versions; the `active` flag controls deploy inclusion +- Cloudflare Pages has a 20,000 file limit; use `make snapshot-list` to check remaining budget + +### Creating a snapshot (new Bactopia release) + +1. Go to GitHub Actions and run the **Create Version Snapshot** workflow + - `version`: the version tag (e.g., `v4.0.0`) + - `bactopia_ref`: the bactopia repo ref to build from (e.g., `v4.0.0`) +2. The workflow builds the site with the version banner, pushes an orphan branch + `snapshot/vX.Y.Z`, and updates `snapshots.json` on master +3. The deploy workflow runs automatically, assembling the snapshot into the build output + +### Updating the current version label + +After creating a snapshot for the outgoing version, update the label in `docusaurus.config.ts`: + +```typescript +versions: { + current: { + label: 'v4.1.0', // update to new version + ... + }, +}, +``` + +### Rebuilding a snapshot + +Re-run the **Create Version Snapshot** workflow with the same version. The orphan branch +is force-pushed with the new build output. + +### Dropping old versions (approaching 20k file limit) + +```bash +make snapshot-list # check budget +make snapshot-deactivate VERSION=v2.1.0 # remove from active deploy +# commit and push snapshots.json +``` + +The version moves to "Archived Versions" in the dropdown (links to the GitHub branch). +The branch is preserved and can be re-activated with `make snapshot-activate`. + +### Key files + +| File | Purpose | +|------|---------| +| `snapshots.json` | Version registry (all versions, active flag, file counts) | +| `docusaurus.config.ts` | `DOCS_VERSION` env var controls banner and `baseUrl` | +| `.github/workflows/create-snapshot.yml` | Builds snapshot and pushes orphan branch | +| `.github/workflows/deploy.yml` | Assembles active snapshots at deploy time | + +## Skills + +Claude Code skills live in `.claude/skills//SKILL.md` with YAML frontmatter +(`name`, `description`) followed by markdown instructions. diff --git a/Makefile b/Makefile new file mode 100644 index 00000000..a4fea2ed --- /dev/null +++ b/Makefile @@ -0,0 +1,106 @@ +BACTOPIA_REPO ?= + +BACTOPIA_DEV_PYTHON ?= /home/rpetit3/.conda/envs/bactopia-dev/bin/python + +.PHONY: generate parse copy-changelog generate-workflows generate-subworkflows generate-modules generate-citations generate-acknowledgements generate-enhancements parse-cli generate-cli generate-tools-index update-citations generate-llms-catalog llms-catalog clean-generated snapshot-list snapshot-add snapshot-deactivate snapshot-activate + +generate: parse copy-changelog generate-workflows generate-subworkflows generate-modules generate-citations generate-acknowledgements generate-enhancements parse-cli generate-cli generate-tools-index + +parse: + @test -n "$(BACTOPIA_REPO)" || (echo "Error: BACTOPIA_REPO is not set. Pass it explicitly, for example: make generate BACTOPIA_REPO=../bactopia" >&2; exit 1) + python bin/parse-bactopia.py $(BACTOPIA_REPO) --output data/bactopia.json + +copy-changelog: + @test -n "$(BACTOPIA_REPO)" || (echo "Error: BACTOPIA_REPO is not set." >&2; exit 1) + cp $(BACTOPIA_REPO)/CHANGELOG.md docs/changelog.md + +generate-workflows: + python bin/generate-workflows.py data/bactopia.json --tools-dir bactopia-tools/ --pipelines-dir bactopia-pipelines/ --docs-dir docs/ + +generate-subworkflows: + python bin/generate-subworkflows.py data/bactopia.json --output-dir developers/subworkflows/ + +generate-modules: + python bin/generate-modules.py data/bactopia.json --output-dir developers/modules/ + +generate-citations: + python bin/generate-citations.py data/citations.yml --output impact/citations.md + +generate-acknowledgements: + python bin/generate-acknowledgements.py data/bactopia.json --output impact/acknowledgements.md + +generate-enhancements: + python bin/generate-enhancements.py data/contributions.yml --output impact/enhancements.md + +parse-cli: + $(BACTOPIA_DEV_PYTHON) bin/parse-cli.py --output data/cli.json + +generate-cli: + python bin/generate-cli.py data/cli.json --output-dir developers/cli/ + +generate-tools-index: + python bin/generate-tools-index.py data/tool-categories.yml --tools-dir bactopia-tools/ --output bactopia-tools/index.mdx + +update-citations: + python bin/update-citations.py --output data/citations.yml + +generate-llms-catalog: + python bin/generate-llms-catalog.py + +llms-catalog: generate-llms-catalog + +clean-generated: + rm -rf data/bactopia.json data/cli.json bactopia-tools/*.mdx bactopia-pipelines/*.mdx developers/subworkflows/*.mdx developers/modules/*.mdx developers/cli/*.mdx impact/citations.md impact/acknowledgements.md impact/enhancements.md static/llms.txt static/catalog.json docs/changelog.md + +# --- Version Snapshot Management --- + +snapshot-list: + @python3 -c "\ + import json; \ + d = json.load(open('snapshots.json')); \ + active = [s for s in d['snapshots'] if s['active']]; \ + archived = [s for s in d['snapshots'] if not s['active']]; \ + active_files = sum(s['files'] for s in active); \ + main = 2000; \ + print('Active versions:'); \ + [print(f' {s[\"version\"]:>10} {s[\"files\"]:>5} files') for s in active]; \ + print(f'\nArchived versions:'); \ + ([print(f' {s[\"version\"]:>10} {s[\"files\"]:>5} files') for s in archived] if archived else [print(' (none)')]); \ + print(f'\nEstimated deploy: ~{main} (main) + {active_files} (snapshots) = {main + active_files} / 20000'); \ + print(f'Remaining budget: ~{20000 - main - active_files} files (~{(20000 - main - active_files) // main} more snapshots)')" + +snapshot-add: + @test -n "$(VERSION)" || (echo "Error: VERSION is required. Usage: make snapshot-add VERSION=v4.0.0 FILES=1839" >&2; exit 1) + @python3 -c "\ + import json, sys; \ + f = 'snapshots.json'; d = json.load(open(f)); \ + v = '$(VERSION)'; n = int('$(FILES)' or '0'); \ + exists = [s for s in d['snapshots'] if s['version'] == v]; \ + (print(f'Error: {v} already exists', file=sys.stderr) or sys.exit(1)) if exists else None; \ + d['snapshots'].insert(0, {'version': v, 'branch': f'snapshot/{v}', 'files': n, 'active': True}); \ + json.dump(d, open(f, 'w'), indent=2); \ + print(f'Added {v} (files={n}, active=true)')" + +snapshot-deactivate: + @test -n "$(VERSION)" || (echo "Error: VERSION is required. Usage: make snapshot-deactivate VERSION=v2.1.0" >&2; exit 1) + @python3 -c "\ + import json, sys; \ + f = 'snapshots.json'; d = json.load(open(f)); \ + v = '$(VERSION)'; \ + matched = [s for s in d['snapshots'] if s['version'] == v]; \ + (print(f'Error: {v} not found', file=sys.stderr) or sys.exit(1)) if not matched else None; \ + [s.update({'active': False}) for s in d['snapshots'] if s['version'] == v]; \ + json.dump(d, open(f, 'w'), indent=2); \ + print(f'Deactivated {v} (will not be included in deploy)')" + +snapshot-activate: + @test -n "$(VERSION)" || (echo "Error: VERSION is required. Usage: make snapshot-activate VERSION=v2.1.0" >&2; exit 1) + @python3 -c "\ + import json, sys; \ + f = 'snapshots.json'; d = json.load(open(f)); \ + v = '$(VERSION)'; \ + matched = [s for s in d['snapshots'] if s['version'] == v]; \ + (print(f'Error: {v} not found', file=sys.stderr) or sys.exit(1)) if not matched else None; \ + [s.update({'active': True}) for s in d['snapshots'] if s['version'] == v]; \ + json.dump(d, open(f, 'w'), indent=2); \ + print(f'Activated {v} (will be included in deploy)')" diff --git a/README.md b/README.md index ec2517d3..b7734e6e 100644 --- a/README.md +++ b/README.md @@ -1,22 +1,135 @@ # bactopia.github.io -Bactopia Documentation -## Local Install +Documentation for [Bactopia](https://github.com/bactopia/bactopia), built with [Docusaurus 3](https://docusaurus.io/). -```{bash} -# Create a MkDocs environment -mamba create -n bactopia-docs -c conda-forge -c bioconda mkdocs pip setuptools -conda activate bactopia-docs +## Prerequisites -# Install mkdocs-material -pip install mkdocs-material -# Or, insiders edition -pip install git+https://${GH_TOKEN}@github.com/squidfunk/mkdocs-material-insiders.git +- **Node.js** >= 18 +- **Conda** environment `bactopia-dev` from the [bactopia repo](https://github.com/bactopia/bactopia) (provides Node.js, Python, and all shared dependencies) -# Clone the repo +```bash +# Create the environment (from the bactopia repo) +cd /path/to/bactopia +conda env create -f environment.yml -n bactopia-dev +conda activate bactopia-dev +``` + +## Development Setup + +```bash git clone git@github.com:bactopia/bactopia.github.io.git cd bactopia.github.io +npm install +npm start +``` + +The dev server runs at `http://0.0.0.0:8000` with hot reload. + +## Available Scripts + +| Script | Description | +| ------------------- | ------------------------------------ | +| `npm start` | Start the dev server with hot reload | +| `npm run build` | Production build to `build/` | +| `npm run serve` | Serve the production build locally | +| `npm run clear` | Clear the Docusaurus cache | +| `npm run typecheck` | Run TypeScript type checking | + +## Generating Docs from Bactopia Source + +Most documentation under `bactopia-tools/`, `bactopia-pipelines/`, `developers/`, and `impact/` is auto-generated from the [bactopia](https://github.com/bactopia/bactopia) source repo using Python scripts in `bin/`. + +```bash +# Generate all docs (requires bactopia repo path) +make generate BACTOPIA_REPO=/path/to/bactopia +``` + +Individual targets are also available: + +| Target | Description | +| ---------------------------- | ------------------------------------------------- | +| `parse` | Parse the bactopia repo into `data/bactopia.json` | +| `generate-workflows` | Generate tool and pipeline docs | +| `generate-subworkflows` | Generate subworkflow docs | +| `generate-modules` | Generate module docs | +| `generate-citations` | Generate citations page | +| `generate-acknowledgements` | Generate acknowledgements page | +| `generate-enhancements` | Generate enhancements page | +| `parse-cli` | Parse bactopia-py CLI into `data/cli.json` | +| `generate-cli` | Generate CLI reference docs | +| `update-citations` | Refresh `data/citations.yml` from source | +| `clean-generated` | Remove all generated files | + +By default, `parse-cli` uses the Python from `bactopia-dev`. Override with: + +```bash +make parse-cli BACTOPIA_DEV_PYTHON=/path/to/python +``` + +## Project Structure -# Serve the docs -mkdocs serve -a 0.0.0.0:8000 +```text +docs/ Main Bactopia documentation +bactopia-tools/ Auto-generated tool docs +bactopia-pipelines/ Auto-generated pipeline docs +developers/ Developer guides, subworkflows, modules, CLI reference +impact/ Impact & outreach content +blog/ Blog posts +data/ JSON/YAML data files (citations, CLI, bactopia metadata) +src/ React components, custom CSS, pages +bin/ Python doc-generation scripts +templates/ Jinja2 templates for doc generation +static/ Static assets (images, logos) ``` + +## Version Snapshots + +The site uses 5 Docusaurus docs plugins, so native versioning (which only supports the default plugin) is not used. Instead, each release is preserved as a static build snapshot stored in an orphan git branch (`snapshot/vX.Y.Z`). The deploy workflow assembles active snapshots into the build output at deploy time. + +- `/` -- always serves the current version +- `/vX.Y.Z/` -- serves active version snapshots (with an announcement banner linking to latest) +- `snapshots.json` -- registry of all versions; the `active` flag controls which are included in the deploy +- Cloudflare Pages free plan has a 20,000 file limit; each snapshot uses ~2,000 files + +### Creating a snapshot (new Bactopia release) + +1. Go to **Actions > Create Version Snapshot** and run the workflow + - `version`: the version tag (e.g., `v4.0.0`) + - `bactopia_ref`: the bactopia repo ref to build from (e.g., `v4.0.0`) +2. The workflow builds the site with a version banner, pushes an orphan branch `snapshot/vX.Y.Z`, and updates `snapshots.json` on master +3. The deploy workflow runs automatically, including the new snapshot + +After creating the snapshot, update the version label in `docusaurus.config.ts` to the new version: + +```typescript +versions: { + current: { + label: 'v4.1.0', // update to new version + ... + }, +}, +``` + +### Rebuilding a snapshot + +Re-run the **Create Version Snapshot** workflow with the same version. The orphan branch is force-pushed with the new build. + +### Managing versions + +```bash +make snapshot-list # show all versions and file budget +make snapshot-add VERSION=vX.Y.Z FILES=N # manually register a snapshot +make snapshot-deactivate VERSION=v2.1.0 # drop from active deploy +make snapshot-activate VERSION=v2.1.0 # restore to active deploy +``` + +Deactivated versions appear under "Archived Versions" in the dropdown, linking to the GitHub branch. Re-activate anytime with `make snapshot-activate`. + +## Deployment + +The site is deployed to [Cloudflare Pages](https://pages.cloudflare.com/) via GitHub Actions. Pushes to `master` trigger the deploy workflow (`.github/workflows/deploy.yml`), which: + +1. Generates docs from the bactopia source repo +2. Builds the Docusaurus site +3. Fetches active version snapshots from their orphan branches +4. Deploys the assembled output to Cloudflare Pages diff --git a/bactopia-pipelines/cleanyerreads.mdx b/bactopia-pipelines/cleanyerreads.mdx new file mode 100644 index 00000000..ae142409 --- /dev/null +++ b/bactopia-pipelines/cleanyerreads.mdx @@ -0,0 +1,279 @@ +--- +title: cleanyerreads +description: "Quality control and optional host read removal from raw sequencing reads." +tags: + - reads + - quality-control + - trimming + - filtering + - host-removal + - preprocessing + - named-workflow +--- + +# cleanyerreads + +**Tags:** reads quality-control trimming filtering host-removal preprocessing named-workflow + +Quality control and optional host read removal from raw sequencing reads. + +This workflow performs comprehensive read quality control including trimming, +adapter removal, quality filtering, and optionally removes host contamination +using [nohuman](https://github.com/mbhall88/nohuman) or [SRA Human Scrubber](https://github.com/ncbi/sra-human-scrubber). +It processes raw sequencing reads to produce high-quality clean reads ready +for downstream analysis. + +## Usage + +clean-yer-reads CLI: + +```bash +clean-yer-reads \ + --input samples.csv \ + --outdir results/ +``` + +Nextflow: + +```bash +nextflow run bactopia/bactopia/workflows/cleanyerreads/main.nf \ + --input samples.csv \ + --outdir results/ +``` + +## Outputs + +### Expected Output Files + +``` + +├── +│ ├── main +│ │ ├── gather +│ │ │ ├── -meta.tsv +│ │ │ └── logs +│ │ │ ├── nf.command.{begin,err,log,out,run,sh,trace} +│ │ │ └── versions.yml +│ │ └── qc +│ │ ├── _SE.fastq.gz +│ │ ├── logs +│ │ │ ├── -fastp.log +│ │ │ ├── nf.command.{begin,err,log,out,run,sh,trace} +│ │ │ └── versions.yml +│ │ └── supplemental +│ │ ├── .fastp.html +│ │ ├── .fastp.json +│ │ ├── _SE-final.json +│ │ ├── _SE-final_fastqc.html +│ │ ├── _SE-final_fastqc.zip +│ │ ├── _SE-original.json +│ │ ├── _SE-original_fastqc.html +│ │ └── _SE-original_fastqc.zip +│ └── tools +│ └── srahumanscrubber +│ ├── .scrub.report.tsv +│ ├── .scrubbed.fastq.gz +│ └── logs +│ ├── nf.command.{begin,err,log,out,run,sh,trace} +│ └── versions.yml +└── bactopia-runs + └── cleanyerreads- + ├── merged-results + │ ├── logs + │ │ ├── meta-concat + │ │ │ ├── nf.command.{begin,err,log,out,run,sh,trace} + │ │ │ └── versions.yml + │ │ └── scrubber-concat + │ │ ├── nf.command.{begin,err,log,out,run,sh,trace} + │ │ └── versions.yml + │ ├── meta.tsv + │ └── scrubber.tsv + └── nf-reports + ├── cleanyerreads-dag.dot + ├── cleanyerreads-report.html + └── cleanyerreads-timeline.html +``` + +### Quality Control Reports + +| File | Description | +|------|-------------| +| `supplemental/*_fastqc.*` | FastQC reports for raw and cleaned reads | +| `supplemental/*-NanoPlot.*` | NanoPlot reports for Nanopore reads | +| `supplemental/*.fastp.*` | Fastp quality reports (when applicable) | +| `supplemental/*_original.json` | Quality metrics for original reads | +| `supplemental/*_final.json` | Quality metrics for final reads | + +### Cleaned Reads + +| File | Description | +|------|-------------| +| `*.fastq.gz` | Quality controlled and trimmed reads | +| `*.unclassified.fastq.gz` | Host-decontaminated reads (if scrubber enabled) | + +### Host Decontamination + +:::note +Only created when host removal is enabled +::: + +| File | Description | +|------|-------------| +| `*.kraken2.report.txt` | Kraken2 classification report | +| `*.scrub.report.tsv` | Human contamination report | + + +### Audit Trail + +Below are files that can assist you in understanding which parameters and program versions were used. + +#### Logs + +Each process that is executed will have a folder named `logs`. In this folder are helpful +files for you to review if the need ever arises. + +| Extension | Description | +|--------------|-------------| +| .begin | An empty file used to designate the process started | +| .err | Contains STDERR outputs from the process | +| .log | Contains both STDERR and STDOUT outputs from the process | +| .out | Contains STDOUT outputs from the process | +| .run | The script Nextflow uses to stage/unstage files and queue processes based on given profile | +| .sh | The script executed by bash for the process | +| .trace | The Nextflow trace report for the process | +| versions.yml | A YAML formatted file with program versions | + +#### Nextflow Reports + +These Nextflow reports provide great a great summary of your run. These can be used to optimize +resource usage and estimate expected costs if using cloud platforms. + +| Filename | Description | +|----------|-------------| +| cleanyerreads-dag.dot | The Nextflow [DAG visualization](https://docs.seqera.io/nextflow/reports#workflow-diagram) | +| cleanyerreads-report.html | The Nextflow [Execution Report](https://docs.seqera.io/nextflow/reports#execution-report) | +| cleanyerreads-timeline.html | The Nextflow [Timeline Report](https://docs.seqera.io/nextflow/reports#execution-timeline) | +| cleanyerreads-trace.txt | The Nextflow [Trace](https://docs.seqera.io/nextflow/reports#trace-file) report | + +## Parameters + +### Required Parameters + +The following parameters are how you will provide either local or remote samples to be processed by Bactopia. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--samples` | string | | A FOFN (via bactopia prepare) with sample names and paths to FASTQ/FASTAs to process + | +| `--r1` | string | | First set of compressed (gzip) Illumina paired-end FASTQ reads (requires --r2 and --sample) | +| `--r2` | string | | Second set of compressed (gzip) Illumina paired-end FASTQ reads (requires --r1 and --sample) | +| `--se` | string | | Compressed (gzip) Illumina single-end FASTQ reads (requires --sample) | +| `--ont` | string | | Compressed (gzip) Oxford Nanopore FASTQ reads (requires --sample) | +| `--hybrid` | boolean | `false` | Create hybrid assembly using Unicycler. (requires --r1, --r2, --ont and --sample) | +| `--short_polish` | boolean | `false` | Create hybrid assembly from long-read assembly and short read polishing. (requires --r1, --r2, --ont and --sample) | +| `--sample` | string | | Sample name to use for the input sequences + | +| `--accessions` | string | | A file containing ENA/SRA Experiment accessions or NCBI Assembly accessions to processed | +| `--accession` | string | | Sample name to use for the input sequences + | +| `--assembly` | string | | A assembled genome in compressed FASTA format. (requires --sample) | +| `--check_samples` | boolean | `false` | Validate the input FOFN provided by --samples | + +
+Dataset Parameters + +Define where the pipeline should find input data and save output data. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--species` | string | | Name of species for species-specific dataset to use | +| `--ask_merlin` | boolean | | Ask Merlin to execute species specific Bactopia tools based on Mash distances | +| `--coverage` | integer | `100` | Reduce samples to a given coverage, requires a genome size | +| `--genome_size` | integer | `0` | Expected genome size (bp) for all samples, required for read error correction and read subsampling | +| `--use_bakta` | boolean | | Use Bakta for annotation, instead of Prokka | +
+ +
+Optional Parameters + +These optional parameters can be useful in certain settings. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--outdir` | string | `bactopia` | Base directory to write results to | +
+ +
+Nextflow Profile Parameters + +Parameters to fine-tune your Nextflow setup. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--datasets_cache` | string | `/.bactopia/datasets` | Directory where downloaded datasets should be stored. | +
+ +
+Helpful Parameters + +Uncommonly used parameters that might be useful. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--wf` | string | `bactopia` | Specify which workflow or Bactopia Tool to execute | +| `--list_wfs` | boolean | | List the available workflows and Bactopia Tools to use with '--wf' | +| `--help_all` | boolean | | An alias for --help --show_hidden_params | +| `--version` | boolean | | Display version text. | +
+ +## Composition + +This workflow uses the following subworkflows: + +- [bactopia_gather](/developers/subworkflows/bactopia_gather) - Search, validate, gather, and standardize input samples. +- [bactopia_qc](/developers/subworkflows/bactopia_qc) - Perform comprehensive quality control on sequencing reads. +- [scrubber](/developers/subworkflows/scrubber) - Remove contaminant sequences from metagenomic data. + +## Citations + +If you use this in your analysis, please cite the following. + +- [Bactopia](https://bactopia.github.io/) + Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) + +- [BBTools](https://jgi.doe.gov/data-and-tools/bbtools/) + Bushnell B [BBMap short read aligner, and other bioinformatic tools.](http://sourceforge.net/projects/bbmap/) (Link) + +- [fastp](https://github.com/OpenGene/fastp) + Chen S, Zhou Y, Chen Y, and Gu J [fastp: an ultra-fast all-in-one FASTQ preprocessor.](https://doi.org/10.1093/bioinformatics/bty560) _Bioinformatics_, 34(17), i884-i890. (2018) + +- [FastQC](https://github.com/s-andrews/FastQC) + Andrews S [FastQC: a quality control tool for high throughput sequence data.](http://www.bioinformatics.babraham.ac.uk/projects/fastqc) (WebLink) + +- [fastq-scan](https://github.com/rpetit3/fastq-scan) + Petit III RA [fastq-scan: generate summary statistics of input FASTQ sequences.](https://github.com/rpetit3/fastq-scan) (GitHub) + +- [Kraken2](https://github.com/DerrickWood/kraken2) + Wood DE, Lu J, Langmead B [Improved metagenomic analysis with Kraken 2.](https://doi.org/10.1186/s13059-019-1891-0) *Genome Biology*, 20(1), 257. (2019) + +- [Lighter](https://github.com/mourisl/Lighter) + Song L, Florea L, Langmead B [Lighter: Fast and Memory-efficient Sequencing Error Correction without Counting](https://doi.org/10.1186/s13059-014-0509-9). _Genome Biol._ 15(11):509 (2014) + +- [NanoPlot](https://github.com/wdecoster/NanoPlot) + De Coster W, D'Hert S, Schultz DT, Cruts M, Van Broeckhoven C [NanoPack: visualizing and processing long-read sequencing data](https://doi.org/10.1093/bioinformatics/bty149) _Bioinformatics_ Volume 34, Issue 15 (2018) + +- [Nanoq](https://github.com/esteinig/nanoq) + Steinig E [Nanoq: Minimal but speedy quality control for nanopore reads in Rust](https://github.com/esteinig/nanoq) (GitHub) + +- [Porechop](https://github.com/rrwick/Porechop) + Wick RR, Judd LM, Gorrie CL, Holt KE. [Completing bacterial genome assemblies with multiplex MinION sequencing.](https://doi.org/10.1099/mgen.0.000132) _Microb Genom._ 3(10):e000132 (2017) + +- [Rasusa](https://github.com/mbhall88/rasusa) + Hall MB [Rasusa: Randomly subsample sequencing reads to a specified coverage.](https://doi.org/10.5281/zenodo.3731394) (2019). + +- [SRA Human Scrubber](https://github.com/ncbi/sra-human-scrubber) + Katz KS, Shutov O, Lapoint R, Kimelman M, Brister JR, and O'Sullivan C [STAT: a fast, scalable, MinHash-based k-mer tool to assess Sequence Read Archive next-generation sequence submissions.](https://doi.org/10.1186/s13059-021-02490-0) _Genome Biology_, 22(1), 270 (2021) + +## Source + +[View source on GitHub](https://github.com/bactopia/bactopia/tree/main/workflows/cleanyerreads) diff --git a/bactopia-pipelines/index.mdx b/bactopia-pipelines/index.mdx new file mode 100644 index 00000000..ac7e2e44 --- /dev/null +++ b/bactopia-pipelines/index.mdx @@ -0,0 +1,15 @@ +--- +title: Bactopia Pipelines +description: Bactopia analysis pipelines +sidebar_position: 1 +--- + +# Bactopia Pipelines + +Bactopia Pipelines are complete analysis pipelines built from Bactopia's subworkflows and modules. There are 3 pipelines available. + +| Pipeline | Description | +|----------|-------------| +| [cleanyerreads](/bactopia-pipelines/cleanyerreads) | Quality control and optional host read removal from raw sequencing reads. | +| [staphopia](/bactopia-pipelines/staphopia) | Comprehensive analysis pipeline for Staphylococcus aureus isolates. | +| [teton](/bactopia-pipelines/teton) | Taxonomic classification and abundance profiling of metagenomic reads. | diff --git a/bactopia-pipelines/staphopia.mdx b/bactopia-pipelines/staphopia.mdx new file mode 100644 index 00000000..578b44a2 --- /dev/null +++ b/bactopia-pipelines/staphopia.mdx @@ -0,0 +1,391 @@ +--- +title: staphopia +description: "Comprehensive analysis pipeline for Staphylococcus aureus isolates." +tags: + - staphylococcus-aureus + - assembly + - annotation + - amr + - mlst + - spa-typing + - agr-typing + - sccmec + - named-workflow +--- + +# staphopia + +**Tags:** staphylococcus-aureus assembly annotation amr mlst spa-typing agr-typing sccmec named-workflow + +Comprehensive analysis pipeline for Staphylococcus aureus isolates. + +This workflow performs complete bacterial analysis including quality control, +assembly, annotation, antimicrobial resistance detection, MLST typing, +and Staphylococcus-specific analysis using [Spatyper](https://github.com/HCGB-IGTP/spaTyper), +[AgrVATE](https://github.com/VishnuRaghuram94/AgrVATE), and [SCCmecFinder](https://github.com/rpetit3/sccmec). +It processes raw sequencing reads and produces a comprehensive genomic characterization for S. aureus isolates. + +## Usage + +staphopia CLI: + +```bash +staphopia \ + --input samples.csv \ + --outdir results/ +``` + +Nextflow: + +```bash +nextflow run bactopia/bactopia/workflows/staphopia/main.nf \ + --input samples.csv \ + --outdir results/ +``` + +## Outputs + +### Expected Output Files + +``` + +├── +│ ├── main +│ │ ├── annotator +│ │ │ └── prokka +│ │ │ ├── -blastdb.tar.gz +│ │ │ ├── .faa.gz +│ │ │ ├── .ffn.gz +│ │ │ ├── .fna.gz +│ │ │ ├── .fsa.gz +│ │ │ ├── .gbk.gz +│ │ │ ├── .gff.gz +│ │ │ ├── .sqn.gz +│ │ │ ├── .tbl.gz +│ │ │ ├── .tsv +│ │ │ ├── .txt +│ │ │ └── logs +│ │ │ ├── .err +│ │ │ ├── .log +│ │ │ ├── nf.command.{begin,err,log,out,run,sh,trace} +│ │ │ └── versions.yml +│ │ ├── assembler +│ │ │ ├── .fna.gz +│ │ │ ├── .tsv +│ │ │ ├── logs +│ │ │ │ ├── nf.command.{begin,err,log,out,run,sh,trace} +│ │ │ │ ├── shovill.log +│ │ │ │ └── versions.yml +│ │ │ └── supplemental +│ │ │ ├── flash.hist +│ │ │ ├── flash.histogram +│ │ │ └── shovill.corrections +│ │ ├── gather +│ │ │ ├── -meta.tsv +│ │ │ └── logs +│ │ │ ├── nf.command.{begin,err,log,out,run,sh,trace} +│ │ │ └── versions.yml +│ │ ├── qc +│ │ │ ├── _R1.fastq.gz +│ │ │ ├── _R2.fastq.gz +│ │ │ ├── logs +│ │ │ │ ├── -fastp.log +│ │ │ │ ├── nf.command.{begin,err,log,out,run,sh,trace} +│ │ │ │ └── versions.yml +│ │ │ └── supplemental +│ │ │ ├── .fastp.html +│ │ │ ├── .fastp.json +│ │ │ ├── _R1-final.json +│ │ │ ├── _R1-final_fastqc.html +│ │ │ ├── _R1-final_fastqc.zip +│ │ │ ├── _R1-original.json +│ │ │ ├── _R1-original_fastqc.html +│ │ │ ├── _R1-original_fastqc.zip +│ │ │ ├── _R2-final.json +│ │ │ ├── _R2-final_fastqc.html +│ │ │ ├── _R2-final_fastqc.zip +│ │ │ ├── _R2-original.json +│ │ │ ├── _R2-original_fastqc.html +│ │ │ └── _R2-original_fastqc.zip +│ │ └── sketcher +│ │ ├── -k21.msh +│ │ ├── -k31.msh +│ │ ├── -mash-refseq88-k21.txt +│ │ ├── -sourmash-gtdb-rs207-k31.txt +│ │ ├── .sig +│ │ └── logs +│ │ ├── nf.command.{begin,err,log,out,run,sh,trace} +│ │ └── versions.yml +│ └── tools +│ ├── agrvate +│ │ ├── .tsv +│ │ ├── logs +│ │ │ ├── nf.command.{begin,err,log,out,run,sh,trace} +│ │ │ └── versions.yml +│ │ └── supplemental +│ │ ├── -agr_gp.tab +│ │ ├── -blastn_log.txt +│ │ ├── -hmm-log.txt +│ │ ├── -hmm.tab +│ │ └── .fna-error-report.tab +│ ├── amrfinderplus +│ │ ├── .tsv +│ │ └── logs +│ │ ├── nf.command.{begin,err,log,out,run,sh,trace} +│ │ └── versions.yml +│ ├── mlst +│ │ ├── .tsv +│ │ └── logs +│ │ ├── nf.command.{begin,err,log,out,run,sh,trace} +│ │ └── versions.yml +│ ├── sccmec +│ │ ├── .regions.blastn.tsv +│ │ ├── .regions.details.tsv +│ │ ├── .targets.blastn.tsv +│ │ ├── .targets.details.tsv +│ │ ├── .tsv +│ │ └── logs +│ │ ├── nf.command.{begin,err,log,out,run,sh,trace} +│ │ └── versions.yml +│ └── spatyper +│ ├── .tsv +│ └── logs +│ ├── nf.command.{begin,err,log,out,run,sh,trace} +│ └── versions.yml +└── bactopia-runs + └── staphopia- + ├── merged-results + │ ├── agrvate.tsv + │ ├── amrfinderplus.tsv + │ ├── assembly-scan.tsv + │ ├── logs + │ │ ├── agrvate-concat + │ │ │ ├── nf.command.{begin,err,log,out,run,sh,trace} + │ │ │ └── versions.yml + │ │ ├── amrfinderplus-concat + │ │ │ ├── nf.command.{begin,err,log,out,run,sh,trace} + │ │ │ └── versions.yml + │ │ ├── assembly-scan-concat + │ │ │ ├── nf.command.{begin,err,log,out,run,sh,trace} + │ │ │ └── versions.yml + │ │ ├── meta-concat + │ │ │ ├── nf.command.{begin,err,log,out,run,sh,trace} + │ │ │ └── versions.yml + │ │ ├── mlst-concat + │ │ │ ├── nf.command.{begin,err,log,out,run,sh,trace} + │ │ │ └── versions.yml + │ │ ├── sccmec-concat + │ │ │ ├── nf.command.{begin,err,log,out,run,sh,trace} + │ │ │ └── versions.yml + │ │ └── spatyper-concat + │ │ ├── nf.command.{begin,err,log,out,run,sh,trace} + │ │ └── versions.yml + │ ├── meta.tsv + │ ├── mlst.tsv + │ ├── sccmec.tsv + │ └── spatyper.tsv + └── nf-reports + ├── staphopia-dag.dot + ├── staphopia-report.html + └── staphopia-timeline.html +``` + +### Quality Control + +| File | Description | +|------|-------------| +| `supplemental/*_fastqc.*` | FastQC quality control reports for raw and cleaned reads | +| `supplemental/*-NanoPlot.*` | NanoPlot reports for Nanopore reads | +| `supplemental/*.fastp.*` | Fastp quality reports (when applicable) | + +### Assembly + +| File | Description | +|------|-------------| +| `*.fna` | Assembled genome sequences in FASTA format | +| `assembly-stats.tsv` | Assembly quality metrics per sample | + +### Annotation + +:::note +Output format depends on chosen annotation tool (Bakta or Prokka) +::: + +| File | Description | +|------|-------------| +| `*.gff.gz` | Genome annotation in GFF3 format (compressed) | +| `*.gbk.gz` | Genome annotation in GenBank format (compressed) | +| `*.faa.gz` | Protein sequences (compressed) | +| `*.fna.gz` | Nucleotide sequences from annotation (compressed) | +| `annotation.tsv` | Annotation summary tables | + +### Typing + +| File | Description | +|------|-------------| +| `mlst.tsv` | MLST sequence type results | +| `agrvate-*` | Agr locus typing results | +| `spatyper-*` | spa typing results | +| `sccmec-*` | SCCmec typing results (targets, regions, details) | + +### Antimicrobial Resistance + +| File | Description | +|------|-------------| +| `amrfinderplus.tsv` | AMR gene detection results | +| `amrfinderplus.mutation.tsv` | AMR point mutation results | + +### Comparative Analysis + +| File | Description | +|------|-------------| +| `*-k21.msh` | Mash sketch files (k=21) | +| `*-k31.msh` | Mash sketch files (k=31) | +| `*-mash-refseq88-*.txt` | Mash screening results against RefSeq | +| `*.sig` | Sourmash signatures | +| `sourmash-*.txt` | Sourmash classification results | + +### Merged Results + +:::note +Run-level aggregated results from all samples +::: + +| File | Description | +|------|-------------| +| `merged-assembly-stats.tsv` | Consolidated assembly statistics | +| `merged-mlst.tsv` | Consolidated MLST results | +| `staphtyper.tsv` | Consolidated Staphylococcus typing summary | + + +### Audit Trail + +Below are files that can assist you in understanding which parameters and program versions were used. + +#### Logs + +Each process that is executed will have a folder named `logs`. In this folder are helpful +files for you to review if the need ever arises. + +| Extension | Description | +|--------------|-------------| +| .begin | An empty file used to designate the process started | +| .err | Contains STDERR outputs from the process | +| .log | Contains both STDERR and STDOUT outputs from the process | +| .out | Contains STDOUT outputs from the process | +| .run | The script Nextflow uses to stage/unstage files and queue processes based on given profile | +| .sh | The script executed by bash for the process | +| .trace | The Nextflow trace report for the process | +| versions.yml | A YAML formatted file with program versions | + +#### Nextflow Reports + +These Nextflow reports provide great a great summary of your run. These can be used to optimize +resource usage and estimate expected costs if using cloud platforms. + +| Filename | Description | +|----------|-------------| +| staphopia-dag.dot | The Nextflow [DAG visualization](https://docs.seqera.io/nextflow/reports#workflow-diagram) | +| staphopia-report.html | The Nextflow [Execution Report](https://docs.seqera.io/nextflow/reports#execution-report) | +| staphopia-timeline.html | The Nextflow [Timeline Report](https://docs.seqera.io/nextflow/reports#execution-timeline) | +| staphopia-trace.txt | The Nextflow [Trace](https://docs.seqera.io/nextflow/reports#trace-file) report | + +## Parameters + +### Required Parameters + +The following parameters are how you will provide either local or remote samples to be processed by Bactopia. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--samples` | string | | A FOFN (via bactopia prepare) with sample names and paths to FASTQ/FASTAs to process + | +| `--r1` | string | | First set of compressed (gzip) Illumina paired-end FASTQ reads (requires --r2 and --sample) | +| `--r2` | string | | Second set of compressed (gzip) Illumina paired-end FASTQ reads (requires --r1 and --sample) | +| `--se` | string | | Compressed (gzip) Illumina single-end FASTQ reads (requires --sample) | +| `--ont` | string | | Compressed (gzip) Oxford Nanopore FASTQ reads (requires --sample) | +| `--hybrid` | boolean | `false` | Create hybrid assembly using Unicycler. (requires --r1, --r2, --ont and --sample) | +| `--short_polish` | boolean | `false` | Create hybrid assembly from long-read assembly and short read polishing. (requires --r1, --r2, --ont and --sample) | +| `--sample` | string | | Sample name to use for the input sequences + | +| `--accessions` | string | | A file containing ENA/SRA Experiment accessions or NCBI Assembly accessions to processed | +| `--accession` | string | | Sample name to use for the input sequences + | +| `--assembly` | string | | A assembled genome in compressed FASTA format. (requires --sample) | +| `--check_samples` | boolean | `false` | Validate the input FOFN provided by --samples | + +
+Dataset Parameters + +Define where the pipeline should find input data and save output data. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--species` | string | | Name of species for species-specific dataset to use | +| `--ask_merlin` | boolean | | Ask Merlin to execute species specific Bactopia tools based on Mash distances | +| `--coverage` | integer | `100` | Reduce samples to a given coverage, requires a genome size | +| `--genome_size` | integer | `0` | Expected genome size (bp) for all samples, required for read error correction and read subsampling | +| `--use_bakta` | boolean | | Use Bakta for annotation, instead of Prokka | +
+ +
+Optional Parameters + +These optional parameters can be useful in certain settings. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--outdir` | string | `bactopia` | Base directory to write results to | +
+ +
+Nextflow Profile Parameters + +Parameters to fine-tune your Nextflow setup. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--datasets_cache` | string | `/.bactopia/datasets` | Directory where downloaded datasets should be stored. | +
+ +
+Helpful Parameters + +Uncommonly used parameters that might be useful. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--wf` | string | `bactopia` | Specify which workflow or Bactopia Tool to execute | +| `--list_wfs` | boolean | | List the available workflows and Bactopia Tools to use with '--wf' | +| `--help_all` | boolean | | An alias for --help --show_hidden_params | +| `--version` | boolean | | Display version text. | +
+ +## Composition + +This workflow uses the following subworkflows: + +- [amrfinderplus](/developers/subworkflows/amrfinderplus) - Find antimicrobial resistance genes and point mutations. +- [bactopia_assembler](/developers/subworkflows/bactopia_assembler) - Assemble bacterial genomes using automated assembler selection. +- [bactopia_datasets](/developers/subworkflows/bactopia_datasets) - Download and provide pre-compiled datasets required by Bactopia. +- [bactopia_gather](/developers/subworkflows/bactopia_gather) - Search, validate, gather, and standardize input samples. +- [bactopia_qc](/developers/subworkflows/bactopia_qc) - Perform comprehensive quality control on sequencing reads. +- [bactopia_sketcher](/developers/subworkflows/bactopia_sketcher) - Create genomic sketches and perform rapid taxonomic classification. +- [bakta](/developers/subworkflows/bakta) - Rapid bacterial genome annotation. +- [mlst](/developers/subworkflows/mlst) - Determine multilocus sequence types (MLST) from bacterial assemblies. +- [prokka](/developers/subworkflows/prokka) - Annotate bacterial genomes with functional information. +- [staphtyper](/developers/subworkflows/staphtyper) - Determine the agr, spa and SCCmec types for _Staphylococcus aureus_ genomes. + +## Citations + +If you use this in your analysis, please cite the following. + +- [Bactopia](https://bactopia.github.io/) + Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) + +- [Staphopia](https://staphopia.emory.edu) + Petit III RA, Read TD [_Staphylococcus aureus_ viewed from the perspective of 40,000+ genomes.](http://dx.doi.org/10.7717/peerj.5261) _PeerJ_ 6, e5261 (2018) + +## Source + +[View source on GitHub](https://github.com/bactopia/bactopia/tree/main/workflows/staphopia) diff --git a/bactopia-pipelines/teton.mdx b/bactopia-pipelines/teton.mdx new file mode 100644 index 00000000..950e634a --- /dev/null +++ b/bactopia-pipelines/teton.mdx @@ -0,0 +1,279 @@ +--- +title: teton +description: "Taxonomic classification and abundance profiling of metagenomic reads." +tags: + - metagenomics + - classification + - kraken2 + - bracken + - abundance + - profiling + - named-workflow +--- + +# teton + +**Tags:** metagenomics classification kraken2 bracken abundance profiling named-workflow + +Taxonomic classification and abundance profiling of metagenomic reads. + +This workflow performs metagenomic classification using [Kraken2](https://github.com/DerrickWood/kraken2) +and [Bracken](https://github.com/jenniferlu717/Bracken), with optional host read removal +using SRA Scrubber. It processes metagenomic sequencing reads to estimate bacterial +genome sizes and separate bacterial from non-bacterial organisms. + +## Usage + +teton CLI: + +```bash +teton \ + --input samples.csv \ + --outdir results/ +``` + +Nextflow: + +```bash +nextflow run bactopia/bactopia/workflows/teton/main.nf \ + --input samples.csv \ + --outdir results/ +``` + +## Outputs + +### Expected Output Files + +``` + +├── +│ ├── teton- +│ │ ├── main +│ │ │ └── gather +│ │ │ ├── -meta.tsv +│ │ │ └── logs +│ │ │ ├── nf.command.{begin,err,log,out,run,sh,trace} +│ │ │ └── versions.yml +│ │ ├── teton-prepare +│ │ │ ├── -sizemeup.txt +│ │ │ ├── .bacteria.tsv +│ │ │ ├── .nonbacteria.tsv +│ │ │ └── logs +│ │ │ ├── nf.command.{begin,err,log,out,run,sh,trace} +│ │ │ └── versions.yml +│ │ └── tools +│ │ └── bracken +│ │ ├── .bracken.abundances.txt +│ │ ├── .bracken.adjusted.abundances.txt +│ │ ├── .bracken.classification.txt +│ │ ├── .bracken.krona.html +│ │ ├── .bracken.report.txt +│ │ ├── .bracken.tsv +│ │ ├── .kraken2.krona.html +│ │ ├── .kraken2.report.txt +│ │ └── logs +│ │ ├── nf.command.{begin,err,log,out,run,sh,trace} +│ │ └── versions.yml +│ └── tools +│ └── nohuman +│ ├── .scrub.report.tsv +│ ├── _R1.scrubbed.fastq.gz +│ ├── _R2.scrubbed.fastq.gz +│ └── logs +│ ├── nf.command.{begin,err,log,out,run,sh,trace} +│ └── versions.yml +├── bactopia-runs +│ └── teton- +│ ├── merged-results +│ │ ├── bracken-adjusted.tsv +│ │ ├── bracken-species-abundance.tsv +│ │ ├── logs +│ │ │ ├── bracken-adjusted-concat +│ │ │ │ ├── nf.command.{begin,err,log,out,run,sh,trace} +│ │ │ │ └── versions.yml +│ │ │ ├── bracken-species-abundance-concat +│ │ │ │ ├── nf.command.{begin,err,log,out,run,sh,trace} +│ │ │ │ └── versions.yml +│ │ │ ├── meta-concat +│ │ │ │ ├── nf.command.{begin,err,log,out,run,sh,trace} +│ │ │ │ └── versions.yml +│ │ │ ├── scrubber-concat +│ │ │ │ ├── nf.command.{begin,err,log,out,run,sh,trace} +│ │ │ │ └── versions.yml +│ │ │ ├── sizemeup-concat +│ │ │ │ ├── nf.command.{begin,err,log,out,run,sh,trace} +│ │ │ │ └── versions.yml +│ │ │ ├── teton-concat +│ │ │ │ ├── nf.command.{begin,err,log,out,run,sh,trace} +│ │ │ │ └── versions.yml +│ │ │ ├── teton-prepare-concat +│ │ │ │ ├── nf.command.{begin,err,log,out,run,sh,trace} +│ │ │ │ └── versions.yml +│ │ │ └── teton-prepare-nonbacteria-concat +│ │ │ ├── nf.command.{begin,err,log,out,run,sh,trace} +│ │ │ └── versions.yml +│ │ ├── meta.tsv +│ │ ├── scrubber.tsv +│ │ ├── sizemeup.tsv +│ │ ├── teton-prepare-nonbacteria.tsv +│ │ ├── teton-prepare.tsv +│ │ └── teton.tsv +│ └── nf-reports +│ ├── teton-dag.dot +│ ├── teton-report.html +│ └── teton-timeline.html +└── merged-results + ├── .tsv + └── logs + └── -join + ├── nf.command.{begin,err,log,out,run,sh,trace} + └── versions.yml +``` + +### Per-Sample Results + +| File | Description | +|------|-------------| +| `bacteria.tsv` | Per-sample TSV files containing bacterial organisms and their properties | +| `nonbacteria.tsv` | Per-sample TSV files containing non-bacterial organisms | +| `sizemeup.tsv` | Per-sample TSV files with genome size estimates | + +### Merged Results + +| File | Description | +|------|-------------| +| `merged-bacteria.tsv` | Consolidated TSV file of all bacterial organisms across samples | +| `merged-nonbacteria.tsv` | Consolidated TSV file of all non-bacterial organisms across samples | +| `merged-sizemeup.tsv` | Consolidated TSV file of genome size estimates across samples | +| `report.tsv` | Joined TSV file combining scrubber and classification results | + + +### Audit Trail + +Below are files that can assist you in understanding which parameters and program versions were used. + +#### Logs + +Each process that is executed will have a folder named `logs`. In this folder are helpful +files for you to review if the need ever arises. + +| Extension | Description | +|--------------|-------------| +| .begin | An empty file used to designate the process started | +| .err | Contains STDERR outputs from the process | +| .log | Contains both STDERR and STDOUT outputs from the process | +| .out | Contains STDOUT outputs from the process | +| .run | The script Nextflow uses to stage/unstage files and queue processes based on given profile | +| .sh | The script executed by bash for the process | +| .trace | The Nextflow trace report for the process | +| versions.yml | A YAML formatted file with program versions | + +#### Nextflow Reports + +These Nextflow reports provide great a great summary of your run. These can be used to optimize +resource usage and estimate expected costs if using cloud platforms. + +| Filename | Description | +|----------|-------------| +| teton-dag.dot | The Nextflow [DAG visualization](https://docs.seqera.io/nextflow/reports#workflow-diagram) | +| teton-report.html | The Nextflow [Execution Report](https://docs.seqera.io/nextflow/reports#execution-report) | +| teton-timeline.html | The Nextflow [Timeline Report](https://docs.seqera.io/nextflow/reports#execution-timeline) | +| teton-trace.txt | The Nextflow [Trace](https://docs.seqera.io/nextflow/reports#trace-file) report | + +## Parameters + +### Required Parameters + +The following parameters are how you will provide either local or remote samples to be processed by Bactopia. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--samples` | string | | A FOFN (via bactopia prepare) with sample names and paths to FASTQ/FASTAs to process + | +| `--r1` | string | | First set of compressed (gzip) Illumina paired-end FASTQ reads (requires --r2 and --sample) | +| `--r2` | string | | Second set of compressed (gzip) Illumina paired-end FASTQ reads (requires --r1 and --sample) | +| `--se` | string | | Compressed (gzip) Illumina single-end FASTQ reads (requires --sample) | +| `--ont` | string | | Compressed (gzip) Oxford Nanopore FASTQ reads (requires --sample) | +| `--hybrid` | boolean | `false` | Create hybrid assembly using Unicycler. (requires --r1, --r2, --ont and --sample) | +| `--short_polish` | boolean | `false` | Create hybrid assembly from long-read assembly and short read polishing. (requires --r1, --r2, --ont and --sample) | +| `--sample` | string | | Sample name to use for the input sequences + | +| `--accessions` | string | | A file containing ENA/SRA Experiment accessions or NCBI Assembly accessions to processed | +| `--accession` | string | | Sample name to use for the input sequences + | +| `--assembly` | string | | A assembled genome in compressed FASTA format. (requires --sample) | +| `--check_samples` | boolean | `false` | Validate the input FOFN provided by --samples | + +
+Dataset Parameters + +Define where the pipeline should find input data and save output data. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--species` | string | | Name of species for species-specific dataset to use | +| `--ask_merlin` | boolean | | Ask Merlin to execute species specific Bactopia tools based on Mash distances | +| `--coverage` | integer | `100` | Reduce samples to a given coverage, requires a genome size | +| `--genome_size` | integer | `0` | Expected genome size (bp) for all samples, required for read error correction and read subsampling | +| `--use_bakta` | boolean | | Use Bakta for annotation, instead of Prokka | +
+ +
+Optional Parameters + +These optional parameters can be useful in certain settings. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--outdir` | string | `bactopia` | Base directory to write results to | +
+ +
+Nextflow Profile Parameters + +Parameters to fine-tune your Nextflow setup. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--datasets_cache` | string | `/.bactopia/datasets` | Directory where downloaded datasets should be stored. | +
+ +
+Helpful Parameters + +Uncommonly used parameters that might be useful. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--wf` | string | `bactopia` | Specify which workflow or Bactopia Tool to execute | +| `--list_wfs` | boolean | | List the available workflows and Bactopia Tools to use with '--wf' | +| `--help_all` | boolean | | An alias for --help --show_hidden_params | +| `--version` | boolean | | Display version text. | +
+ +## Composition + +This workflow uses the following subworkflows: + +- [bactopia_gather](/developers/subworkflows/bactopia_gather) - Search, validate, gather, and standardize input samples. +- [teton](/developers/subworkflows/teton) - Perform taxonomic classification and estimate bacterial genome sizes. + +## Citations + +If you use this in your analysis, please cite the following. + +- [Bactopia](https://bactopia.github.io/) + Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) + +- [Bracken](https://github.com/jenniferlu717/Bracken) + Lu J, Breitwieser FP, Thielen P, and Salzberg SL [Bracken: estimating species abundance in metagenomics data.](https://doi.org/10.7717/peerj-cs.104) _PeerJ Computer Science_, 3, e104. (2017) + +- [Kraken2](https://github.com/DerrickWood/kraken2) + Wood DE, Lu J, Langmead B [Improved metagenomic analysis with Kraken 2.](https://doi.org/10.1186/s13059-019-1891-0) *Genome Biology*, 20(1), 257. (2019) + +- [SRA Human Scrubber](https://github.com/ncbi/sra-human-scrubber) + Katz KS, Shutov O, Lapoint R, Kimelman M, Brister JR, and O'Sullivan C [STAT: a fast, scalable, MinHash-based k-mer tool to assess Sequence Read Archive next-generation sequence submissions.](https://doi.org/10.1186/s13059-021-02490-0) _Genome Biology_, 22(1), 270 (2021) + +## Source + +[View source on GitHub](https://github.com/bactopia/bactopia/tree/main/workflows/teton) diff --git a/bactopia-tools/abricate.mdx b/bactopia-tools/abricate.mdx new file mode 100644 index 00000000..c476d09a --- /dev/null +++ b/bactopia-tools/abricate.mdx @@ -0,0 +1,231 @@ +--- +title: abricate +description: "Mass screening of contigs for antimicrobial resistance and virulence genes." +tags: + - bacteria + - antimicrobial-resistance + - virulence + - screening + - bactopia-tool +--- + +# abricate + +**Tags:** bacteria antimicrobial-resistance virulence screening bactopia-tool + +Mass screening of contigs for antimicrobial resistance and virulence genes. + +This Bactopia Tool uses [Abricate](https://github.com/tseemann/abricate) to screen +assemblies against multiple resistance and virulence gene databases, including +NCBI, CARD, RESFINDER, ARG-ANNOT, VFDB, PLASMIDFINDER, ECOLI_VF, and MEGARES. +It processes a Bactopia analysis directory, runs Abricate on each sample, and +creates a merged summary report. + +## Usage + +Run with the CARD database for antimicrobial resistance gene detection: + +Bactopia CLI: + +```bash +bactopia --wf abricate \ + --bactopia /path/to/your/bactopia/results \ + --abricate_db card +``` + +Nextflow: + +```bash +nextflow run bactopia/bactopia/workflows/bactopia-tools/abricate/main.nf \ + --bactopia /path/to/your/bactopia/results \ + --abricate_db card +``` + +:::tip +You can run this workflow multiple times with different databases to screen +against all of them. Each run will produce separate results that can be +compared. +::: + +## Outputs + +### Expected Output Files + +``` + +├── +│ └── tools +│ └── abricate +│ └── ncbi +│ ├── .tsv +│ └── logs +│ ├── nf.command.{begin,err,log,out,run,sh,trace} +│ └── versions.yml +└── bactopia-runs + └── abricate-ncbi- + ├── merged-results + │ ├── abricate-ncbi.tsv + │ └── logs + │ └── abricate-concat + │ └── ncbi + │ ├── nf.command.{begin,err,log,out,run,sh,trace} + │ └── versions.yml + └── nf-reports + ├── abricate-dag.dot + ├── abricate-report.html + └── abricate-timeline.html +``` + +### Per-Sample Results + +| File | Description | +|------|-------------| +| `*.tsv` | Tab-delimited report of Abricate screening results for each sample | + +### Merged Results + +| File | Description | +|------|-------------| +| `abricate.tsv` | Merged TSV report containing Abricate results from all samples | + + +### Audit Trail + +Below are files that can assist you in understanding which parameters and program versions were used. + +#### Logs + +Each process that is executed will have a folder named `logs`. In this folder are helpful +files for you to review if the need ever arises. + +| Extension | Description | +|--------------|-------------| +| .begin | An empty file used to designate the process started | +| .err | Contains STDERR outputs from the process | +| .log | Contains both STDERR and STDOUT outputs from the process | +| .out | Contains STDOUT outputs from the process | +| .run | The script Nextflow uses to stage/unstage files and queue processes based on given profile | +| .sh | The script executed by bash for the process | +| .trace | The Nextflow trace report for the process | +| versions.yml | A YAML formatted file with program versions | + +#### Nextflow Reports + +These Nextflow reports provide great a great summary of your run. These can be used to optimize +resource usage and estimate expected costs if using cloud platforms. + +| Filename | Description | +|----------|-------------| +| abricate-dag.dot | The Nextflow [DAG visualization](https://docs.seqera.io/nextflow/reports#workflow-diagram) | +| abricate-report.html | The Nextflow [Execution Report](https://docs.seqera.io/nextflow/reports#execution-report) | +| abricate-timeline.html | The Nextflow [Timeline Report](https://docs.seqera.io/nextflow/reports#execution-timeline) | +| abricate-trace.txt | The Nextflow [Trace](https://docs.seqera.io/nextflow/reports#trace-file) report | + +## Parameters + +### Required Parameters + +Define where the pipeline should find input data and save output data. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--bactopia` | string | | The path to bactopia results to use as inputs | + +### Abricate Parameters + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--abricate_db` | string | `ncbi` | Database to use | +| `--abricate_minid` | integer | `80` | Minimum DNA percent identity | +| `--abricate_mincov` | integer | `80` | Minimum DNA percent coverage | + +
+Filtering Parameters + +Use these parameters to specify which samples to include or exclude. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--include` | string | | A text file containing sample names (one per line) to include from the analysis | +| `--exclude` | string | | A text file containing sample names (one per line) to exclude from the analysis | +
+ +
+Optional Parameters + +These optional parameters can be useful in certain settings. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--outdir` | string | `bactopia` | Base directory to write results to | +
+ +
+Nextflow Profile Parameters + +Parameters to fine-tune your Nextflow setup. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--datasets_cache` | string | `/.bactopia/datasets` | Directory where downloaded datasets should be stored. | +
+ +
+Helpful Parameters + +Uncommonly used parameters that might be useful. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--wf` | string | `bactopia` | Specify which workflow or Bactopia Tool to execute | +| `--list_wfs` | boolean | | List the available workflows and Bactopia Tools to use with '--wf' | +| `--help_all` | boolean | | An alias for --help --show_hidden_params | +| `--version` | boolean | | Display version text. | +
+ +## Composition + +This workflow uses the following subworkflows: + +- [abricate](/developers/subworkflows/abricate) - Mass screening of contigs for antimicrobial and virulence genes. + +## Citations + +If you use this in your analysis, please cite the following. + +- [Bactopia](https://bactopia.github.io/) + Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) + +- [Abricate](https://github.com/tseemann/abricate) + Seemann T [Abricate: mass screening of contigs for antimicrobial and virulence genes](https://github.com/tseemann/abricate) (GitHub) + +- [ARG-ANNOT](http://en.mediterranee-infection.com/article.php?laref=283%26titre=arg-annot) + Gupta SK, Padmanabhan BR, Diene SM, Lopez-Rojas R, Kempf M, Landraud L, Rolain J-M [ARG-ANNOT, a new bioinformatic tool to discover antibiotic resistance genes in bacterial genomes.](https://doi.org/10.1128/aac.01310-13) _Antimicrob. Agents Chemother_ 58, 212-220 (2014) + +- [CARD](https://card.mcmaster.ca/) + Alcock BP, Raphenya AR, Lau TTY, Tsang KK, Bouchard M, Edalatmand A, Huynh W, Nguyen A-L V, Cheng AA, Liu S, Min SY, Miroshnichenko A, Tran H-K, Werfalli RE, Nasir JA, Oloni M, Speicher DJ, Florescu A, Singh B, Faltyn M, Hernandez-Koutoucheva A, Sharma AN, Bordeleau E, Pawlowski AC, Zubyk HL, Dooley D, Griffiths E, Maguire F, Winsor GL, Beiko RG, Brinkman FSL, Hsiao WWL, Domselaar GV, McArthur AG [CARD 2020: antibiotic resistome surveillance with the comprehensive antibiotic resistance database.](https://doi.org/10.1093/nar/gkz935) _Nucleic acids research_ 48.D1, D517-D525 (2020) + +- [csvtk](https://bioinf.shenwei.me/csvtk/) + Shen, W [csvtk: A cross-platform, efficient and practical CSV/TSV toolkit in Golang.](https://github.com/shenwei356/csvtk/) (GitHub) + +- [EcOH](https://dx.doi.org/10.1099%2Fmgen.0.000064) + Ingle DJ, Valcanis M, Kuzevski A, Tauschek M, Inouye M, Stinear T, Levine MM, Robins-Browne RM, Holt KE [In silico serotyping of E. coli from short read data identifies limited novel O-loci but extensive diversity of O:H serotype combinations within and between pathogenic lineages.](https://doi.org/10.1099/mgen.0.000064) _Microbial Genomics_, 2(7), e000064. (2016) + +- [MEGARes 2.0](https://megares.meglab.org/) + Doster E, Lakin SM, Dean CJ, Wolfe C, Young JG, Boucher C, Belk KE, Noyes NR, Morley PS [MEGARes 2.0: a database for classification of antimicrobial drug, biocide and metal resistance determinants in metagenomic sequence data.](https://doi.org/10.1093/nar/gkz1010) _Nucleic Acids Research_, 48(D1), D561-D569. (2020) + +- [NCBI Reference Gene Catalog](https://www.ncbi.nlm.nih.gov/bioproject/?term=PRJNA313047) + Feldgarden M, Brover V, Haft DH, Prasad AB, Slotta DJ, Tolstoy I, Tyson GH, Zhao S, Hsu C-H, McDermott PF, Tadesse DA, Morales C, Simmons M, Tillman G, Wasilenko J, Folster JP, Klimke W [Validating the NCBI AMRFinder Tool and Resistance Gene Database Using Antimicrobial Resistance Genotype-Phenotype Correlations in a Collection of NARMS Isolates](https://doi.org/10.1128/AAC.00483-19). _Antimicrob. Agents Chemother._ (2019) + +- [PlasmidFinder](https://bitbucket.org/genomicepidemiology/plasmidfinder) + Carattoli A, Zankari E, García-Fernández A, Voldby Larsen M, Lund O, Villa L, Møller Aarestrup F, Hasman H [In silico detection and typing of plasmids using PlasmidFinder and plasmid multilocus sequence typing.](https://doi.org/10.1128/AAC.02412-14) _Antimicrobial Agents and Chemotherapy_ 58(7), 3895-3903. (2014) + +- [ResFinder](https://cge.food.dtu.dk//services/ResFinder/) + Zankari E, Hasman H, Cosentino S, Vestergaard M, Rasmussen S, Lund O, Aarestrup FM, Larsen MV [Identification of acquired antimicrobial resistance genes.](https://doi.org/10.1093/jac/dks261) _J. Antimicrob. Chemother._ 67, 2640-2644 (2012) + +- [VFDB](http://www.mgc.ac.cn/VFs/) + Chen L, Zheng D, Liu B, Yang J, Jin Q [VFDB 2016: hierarchical and refined dataset for big data analysis--10 years on.](https://doi.org/10.1093/nar/gkv1239) _Nucleic Acids Res._ 44, D694-7 (2016) + +## Source + +[View source on GitHub](https://github.com/bactopia/bactopia/tree/main/workflows/bactopia-tools/abricate) diff --git a/bactopia-tools/abritamr.mdx b/bactopia-tools/abritamr.mdx new file mode 100644 index 00000000..de85a243 --- /dev/null +++ b/bactopia-tools/abritamr.mdx @@ -0,0 +1,208 @@ +--- +title: abritamr +description: "A NATA accredited tool for reporting the presence of antimicrobial resistance genes." +tags: + - bacteria + - antimicrobial-resistance + - virulence + - amr + - nata + - bactopia-tool +--- + +# abritamr + +**Tags:** bacteria antimicrobial-resistance virulence amr nata bactopia-tool + +A NATA accredited tool for reporting the presence of antimicrobial resistance genes. + +This Bactopia Tool uses [abriTAMR](https://github.com/MDU-PHL/abritamr) to identify +antimicrobial resistance genes in bacterial genomes. It runs AMRFinderPlus on each +sample and collates the results into functional classes, producing detailed reports +on resistance genes, partial matches, and virulence factors. It is accredited by NATA +for use in reporting presence of reportable AMR genes in Victoria, Australia. + +## Usage + +Bactopia CLI: + +```bash +bactopia --wf abritamr \ + --bactopia /path/to/your/bactopia/results +``` + +Nextflow: + +```bash +nextflow run bactopia/bactopia/workflows/bactopia-tools/abritamr/main.nf \ + --bactopia /path/to/your/bactopia/results +``` + +## Outputs + +### Expected Output Files + +``` + +├── +│ └── tools +│ └── abritamr- +│ ├── .abritamr.tsv +│ ├── .amrfinder.out +│ ├── .summary_matches.tsv +│ ├── .summary_partials.tsv +│ ├── .summary_virulence.tsv +│ └── logs +│ ├── abritamr.log +│ ├── nf.command.{begin,err,log,out,run,sh,trace} +│ ├── update_abritamr_db.log +│ └── versions.yml +└── bactopia-runs + └── abritamr- + ├── merged-results + │ ├── abritamr.tsv + │ └── logs + │ └── abritamr-concat + │ ├── nf.command.{begin,err,log,out,run,sh,trace} + │ └── versions.yml + └── nf-reports + ├── abritamr-dag.dot + ├── abritamr-report.html + └── abritamr-timeline.html +``` + +### Per-Sample Results + +| File | Description | +|------|-------------| +| `*.abritamr.txt` | Tab-delimited file combining non-empty summary files from abriTAMR | +| `*.amrfinder.out` | Raw output from AMRFinderPlus (per sequence) | +| `*.summary_matches.txt` | Tab-delimited file with AMR gene matches per sequence | +| `*.summary_partials.txt` | Tab-delimited file with partial AMR gene matches | +| `*.summary_virulence.txt` | Tab-delimited file with virulence gene classifications | + +### Merged Results + +| File | Description | +|------|-------------| +| `abritamr.tsv` | Merged TSV file containing AMR summaries from all samples | + + +### Audit Trail + +Below are files that can assist you in understanding which parameters and program versions were used. + +#### Logs + +Each process that is executed will have a folder named `logs`. In this folder are helpful +files for you to review if the need ever arises. + +| Extension | Description | +|--------------|-------------| +| .begin | An empty file used to designate the process started | +| .err | Contains STDERR outputs from the process | +| .log | Contains both STDERR and STDOUT outputs from the process | +| .out | Contains STDOUT outputs from the process | +| .run | The script Nextflow uses to stage/unstage files and queue processes based on given profile | +| .sh | The script executed by bash for the process | +| .trace | The Nextflow trace report for the process | +| versions.yml | A YAML formatted file with program versions | + +#### Nextflow Reports + +These Nextflow reports provide great a great summary of your run. These can be used to optimize +resource usage and estimate expected costs if using cloud platforms. + +| Filename | Description | +|----------|-------------| +| abritamr-dag.dot | The Nextflow [DAG visualization](https://docs.seqera.io/nextflow/reports#workflow-diagram) | +| abritamr-report.html | The Nextflow [Execution Report](https://docs.seqera.io/nextflow/reports#execution-report) | +| abritamr-timeline.html | The Nextflow [Timeline Report](https://docs.seqera.io/nextflow/reports#execution-timeline) | +| abritamr-trace.txt | The Nextflow [Trace](https://docs.seqera.io/nextflow/reports#trace-file) report | + +## Parameters + +### Required Parameters + +Define where the pipeline should find input data and save output data. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--bactopia` | string | | The path to bactopia results to use as inputs | + +### abriTAMR Parameters + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--abritamr_species` | string | | Use species specific point mutations, must provide a valid species | +| `--abritamr_identity` | integer | | Minimum identity of matches with amrfinder (0 - 1.0), defaults to amrfinder preset | + +
+Filtering Parameters + +Use these parameters to specify which samples to include or exclude. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--include` | string | | A text file containing sample names (one per line) to include from the analysis | +| `--exclude` | string | | A text file containing sample names (one per line) to exclude from the analysis | +
+ +
+Optional Parameters + +These optional parameters can be useful in certain settings. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--outdir` | string | `bactopia` | Base directory to write results to | +
+ +
+Nextflow Profile Parameters + +Parameters to fine-tune your Nextflow setup. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--datasets_cache` | string | `/.bactopia/datasets` | Directory where downloaded datasets should be stored. | +
+ +
+Helpful Parameters + +Uncommonly used parameters that might be useful. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--wf` | string | `bactopia` | Specify which workflow or Bactopia Tool to execute | +| `--list_wfs` | boolean | | List the available workflows and Bactopia Tools to use with '--wf' | +| `--help_all` | boolean | | An alias for --help --show_hidden_params | +| `--version` | boolean | | Display version text. | +
+ +## Composition + +This workflow uses the following subworkflows: + +- [abritamr](/developers/subworkflows/abritamr) - Identify antimicrobial resistance genes using AMRFinderPlus. + +## Citations + +If you use this in your analysis, please cite the following. + +- [Bactopia](https://bactopia.github.io/) + Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) + +- [abriTAMR](https://github.com/MDU-PHL/abritamr) + Sherry NL, Horan KA, Ballard SA, Gonҫalves da Silva A, Gorrie CL, Schultz MB, Stevens K, Valcanis M, Sait ML, Stinear TP, Howden BP, and Seemann T [An ISO-certified genomics workflow for identification and surveillance of antimicrobial resistance.](https://doi.org/10.1038/s41467-022-35713-4) _Nature Communications_, 14(1), 60. (2023) + +- [AMRFinderPlus](https://github.com/ncbi/amr) + Feldgarden M, Brover V, Haft DH, Prasad AB, Slotta DJ, Tolstoy I, Tyson GH, Zhao S, Hsu C-H, McDermott PF, Tadesse DA, Morales C, Simmons M, Tillman G, Wasilenko J, Folster JP, Klimke W [Validating the NCBI AMRFinder Tool and Resistance Gene Database Using Antimicrobial Resistance Genotype-Phenotype Correlations in a Collection of NARMS Isolates](https://doi.org/10.1128/AAC.00483-19). _Antimicrob. Agents Chemother._ (2019) + +- [csvtk](https://bioinf.shenwei.me/csvtk/) + Shen, W [csvtk: A cross-platform, efficient and practical CSV/TSV toolkit in Golang.](https://github.com/shenwei356/csvtk/) (GitHub) + +## Source + +[View source on GitHub](https://github.com/bactopia/bactopia/tree/main/workflows/bactopia-tools/abritamr) diff --git a/bactopia-tools/agrvate.mdx b/bactopia-tools/agrvate.mdx new file mode 100644 index 00000000..2b19f9ec --- /dev/null +++ b/bactopia-tools/agrvate.mdx @@ -0,0 +1,193 @@ +--- +title: agrvate +description: "Rapid identification of Staphylococcus aureus agr locus type and agr operon variants." +tags: + - staphylococcus-aureus + - agr + - typing + - virulence + - bactopia-tool +--- + +# agrvate + +**Tags:** staphylococcus-aureus agr typing virulence bactopia-tool + +Rapid identification of Staphylococcus aureus agr locus type and agr operon variants. + +This Bactopia Tool uses [AgrVATE](https://github.com/VishnuRaghuram94/AgrVATE) to rapidly +identify the _agr_ locus type and detect agr operon variants in _Staphylococcus aureus_ assemblies. +The agr system is a key quorum-sensing regulator of virulence in S. aureus. + +## Usage + +Bactopia CLI: + +```bash +bactopia --wf agrvate \ + --bactopia /path/to/your/bactopia/results +``` + +Nextflow: + +```bash +nextflow run bactopia/bactopia/workflows/bactopia-tools/agrvate/main.nf \ + --bactopia /path/to/your/bactopia/results +``` + +## Outputs + +### Expected Output Files + +``` + +├── +│ └── tools +│ └── agrvate- +│ ├── .tsv +│ ├── logs +│ │ ├── nf.command.{begin,err,log,out,run,sh,trace} +│ │ └── versions.yml +│ └── supplemental +│ ├── -agr_gp.tab +│ ├── -blastn_log.txt +│ └── .fna-error-report.tab +└── bactopia-runs + └── agrvate- + ├── merged-results + │ ├── agrvate.tsv + │ └── logs + │ └── agrvate-concat + │ ├── nf.command.{begin,err,log,out,run,sh,trace} + │ └── versions.yml + └── nf-reports + ├── agrvate-dag.dot + ├── agrvate-report.html + └── agrvate-timeline.html +``` + +### Per-Sample Results + +| File | Description | +|------|-------------| +| `*.tsv` | Agr locus typing results in TSV format | +| `supplemental/*` | Supplemental output files including detailed agr analysis | + +### Merged Results + +| File | Description | +|------|-------------| +| `agrvate.tsv` | Combined agr typing results from all samples | + + +### Audit Trail + +Below are files that can assist you in understanding which parameters and program versions were used. + +#### Logs + +Each process that is executed will have a folder named `logs`. In this folder are helpful +files for you to review if the need ever arises. + +| Extension | Description | +|--------------|-------------| +| .begin | An empty file used to designate the process started | +| .err | Contains STDERR outputs from the process | +| .log | Contains both STDERR and STDOUT outputs from the process | +| .out | Contains STDOUT outputs from the process | +| .run | The script Nextflow uses to stage/unstage files and queue processes based on given profile | +| .sh | The script executed by bash for the process | +| .trace | The Nextflow trace report for the process | +| versions.yml | A YAML formatted file with program versions | + +#### Nextflow Reports + +These Nextflow reports provide great a great summary of your run. These can be used to optimize +resource usage and estimate expected costs if using cloud platforms. + +| Filename | Description | +|----------|-------------| +| agrvate-dag.dot | The Nextflow [DAG visualization](https://docs.seqera.io/nextflow/reports#workflow-diagram) | +| agrvate-report.html | The Nextflow [Execution Report](https://docs.seqera.io/nextflow/reports#execution-report) | +| agrvate-timeline.html | The Nextflow [Timeline Report](https://docs.seqera.io/nextflow/reports#execution-timeline) | +| agrvate-trace.txt | The Nextflow [Trace](https://docs.seqera.io/nextflow/reports#trace-file) report | + +## Parameters + +### Required Parameters + +Define where the pipeline should find input data and save output data. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--bactopia` | string | | The path to bactopia results to use as inputs | + +### AgrVATE Parameters + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--agrvate_typing_only` | boolean | `false` | agr typing only. Skips agr operon extraction and frameshift detection | + +
+Filtering Parameters + +Use these parameters to specify which samples to include or exclude. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--include` | string | | A text file containing sample names (one per line) to include from the analysis | +| `--exclude` | string | | A text file containing sample names (one per line) to exclude from the analysis | +
+ +
+Optional Parameters + +These optional parameters can be useful in certain settings. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--outdir` | string | `bactopia` | Base directory to write results to | +
+ +
+Nextflow Profile Parameters + +Parameters to fine-tune your Nextflow setup. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--datasets_cache` | string | `/.bactopia/datasets` | Directory where downloaded datasets should be stored. | +
+ +
+Helpful Parameters + +Uncommonly used parameters that might be useful. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--wf` | string | `bactopia` | Specify which workflow or Bactopia Tool to execute | +| `--list_wfs` | boolean | | List the available workflows and Bactopia Tools to use with '--wf' | +| `--help_all` | boolean | | An alias for --help --show_hidden_params | +| `--version` | boolean | | Display version text. | +
+ +## Composition + +This workflow uses the following subworkflows: + +- [agrvate](/developers/subworkflows/agrvate) - Identify Staphylococcus aureus agr locus type and operon variants. + +## Citations + +If you use this in your analysis, please cite the following. + +- [Bactopia](https://bactopia.github.io/) + Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) + +- [AgrVATE](https://github.com/VishnuRaghuram94/AgrVATE) + Raghuram V. [AgrVATE: Rapid identification of Staphylococcus aureus agr locus type and agr operon variants.](https://github.com/VishnuRaghuram94/AgrVATE) (GitHub) + +## Source + +[View source on GitHub](https://github.com/bactopia/bactopia/tree/main/workflows/bactopia-tools/agrvate) diff --git a/bactopia-tools/amrfinderplus.mdx b/bactopia-tools/amrfinderplus.mdx new file mode 100644 index 00000000..ef8ecfa6 --- /dev/null +++ b/bactopia-tools/amrfinderplus.mdx @@ -0,0 +1,194 @@ +--- +title: amrfinderplus +description: "Bactopia Tool: Amrfinderplus." +tags: + - bacteria + - fasta + - antimicrobial-resistance + - virulence + - genes + - proteins + - mutations + - bactopia-tool +--- + +# amrfinderplus + +**Tags:** bacteria fasta antimicrobial-resistance virulence genes proteins mutations bactopia-tool + +Bactopia Tool: Amrfinderplus. + +Identify antimicrobial resistance genes and point mutations in bacterial genomes. +This Bactopia Tool uses [AMRFinder+](https://github.com/ncbi/amr) to screen assemblies and proteins +for antimicrobial resistance genes, virulence genes, and resistance-associated point mutations. +It identifies acquired AMR genes and some point mutations in protein or assembled nucleotide sequences. + +## Usage + +Bactopia CLI: + +```bash +bactopia --wf amrfinderplus \ + --bactopia /path/to/your/bactopia/results +``` + +Nextflow: + +```bash +nextflow run bactopia/bactopia/workflows/bactopia-tools/amrfinderplus/main.nf \ + --bactopia /path/to/your/bactopia/results +``` + +## Outputs + +### Expected Output Files + +``` + +├── +│ └── tools +│ └── amrfinderplus- +│ ├── .tsv +│ └── logs +│ ├── nf.command.{begin,err,log,out,run,sh,trace} +│ └── versions.yml +└── bactopia-runs + └── amrfinderplus- + ├── merged-results + │ ├── amrfinderplus.tsv + │ └── logs + │ └── amrfinderplus-concat + │ ├── nf.command.{begin,err,log,out,run,sh,trace} + │ └── versions.yml + └── nf-reports + ├── amrfinderplus-dag.dot + ├── amrfinderplus-report.html + └── amrfinderplus-timeline.html +``` + +### Per-Sample Results + +| File | Description | +|------|-------------| +| `*.tsv` | AMR gene detection results in TSV format | +| `*-mutations.tsv` | Point mutations associated with antimicrobial resistance | + +### Merged Results + +| File | Description | +|------|-------------| +| `amrfinderplus.tsv` | Combined AMR detection results from all samples | + + +### Audit Trail + +Below are files that can assist you in understanding which parameters and program versions were used. + +#### Logs + +Each process that is executed will have a folder named `logs`. In this folder are helpful +files for you to review if the need ever arises. + +| Extension | Description | +|--------------|-------------| +| .begin | An empty file used to designate the process started | +| .err | Contains STDERR outputs from the process | +| .log | Contains both STDERR and STDOUT outputs from the process | +| .out | Contains STDOUT outputs from the process | +| .run | The script Nextflow uses to stage/unstage files and queue processes based on given profile | +| .sh | The script executed by bash for the process | +| .trace | The Nextflow trace report for the process | +| versions.yml | A YAML formatted file with program versions | + +#### Nextflow Reports + +These Nextflow reports provide great a great summary of your run. These can be used to optimize +resource usage and estimate expected costs if using cloud platforms. + +| Filename | Description | +|----------|-------------| +| amrfinderplus-dag.dot | The Nextflow [DAG visualization](https://docs.seqera.io/nextflow/reports#workflow-diagram) | +| amrfinderplus-report.html | The Nextflow [Execution Report](https://docs.seqera.io/nextflow/reports#execution-report) | +| amrfinderplus-timeline.html | The Nextflow [Timeline Report](https://docs.seqera.io/nextflow/reports#execution-timeline) | +| amrfinderplus-trace.txt | The Nextflow [Trace](https://docs.seqera.io/nextflow/reports#trace-file) report | + +## Parameters + +### Required Parameters + +Define where the pipeline should find input data and save output data. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--bactopia` | string | | The path to bactopia results to use as inputs | + +### AMRFinder+ Parameters + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--amrfinderplus_opts` | string | | Extra AMRFinder+ options in quotes. | + +
+Filtering Parameters + +Use these parameters to specify which samples to include or exclude. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--include` | string | | A text file containing sample names (one per line) to include from the analysis | +| `--exclude` | string | | A text file containing sample names (one per line) to exclude from the analysis | +
+ +
+Optional Parameters + +These optional parameters can be useful in certain settings. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--outdir` | string | `bactopia` | Base directory to write results to | +
+ +
+Nextflow Profile Parameters + +Parameters to fine-tune your Nextflow setup. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--datasets_cache` | string | `/.bactopia/datasets` | Directory where downloaded datasets should be stored. | +
+ +
+Helpful Parameters + +Uncommonly used parameters that might be useful. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--wf` | string | `bactopia` | Specify which workflow or Bactopia Tool to execute | +| `--list_wfs` | boolean | | List the available workflows and Bactopia Tools to use with '--wf' | +| `--help_all` | boolean | | An alias for --help --show_hidden_params | +| `--version` | boolean | | Display version text. | +
+ +## Composition + +This workflow uses the following subworkflows: + +- [amrfinderplus](/developers/subworkflows/amrfinderplus) - Find antimicrobial resistance genes and point mutations. +- [bactopia_datasets](/developers/subworkflows/bactopia_datasets) - Download and provide pre-compiled datasets required by Bactopia. + +## Citations + +If you use this in your analysis, please cite the following. + +- [Bactopia](https://bactopia.github.io/) + Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) + +- [AMRFinderPlus](https://github.com/ncbi/amr) + Feldgarden M, Brover V, Haft DH, Prasad AB, Slotta DJ, Tolstoy I, Tyson GH, Zhao S, Hsu C-H, McDermott PF, Tadesse DA, Morales C, Simmons M, Tillman G, Wasilenko J, Folster JP, Klimke W [Validating the NCBI AMRFinder Tool and Resistance Gene Database Using Antimicrobial Resistance Genotype-Phenotype Correlations in a Collection of NARMS Isolates](https://doi.org/10.1128/AAC.00483-19). _Antimicrob. Agents Chemother._ (2019) + +## Source + +[View source on GitHub](https://github.com/bactopia/bactopia/tree/main/workflows/bactopia-tools/amrfinderplus) diff --git a/bactopia-tools/ariba.mdx b/bactopia-tools/ariba.mdx new file mode 100644 index 00000000..5cb571d2 --- /dev/null +++ b/bactopia-tools/ariba.mdx @@ -0,0 +1,214 @@ +--- +title: ariba +description: "Gene identification through local assemblies." +tags: + - fastq + - assembly + - resistance + - virulence + - gene-detection + - bactopia-tool +--- + +# ariba + +**Tags:** fastq assembly resistance virulence gene-detection bactopia-tool + +Gene identification through local assemblies. + +This Bactopia Tool uses [ARIBA](https://github.com/sanger-pathogens/ariba) to rapidly +identify genes in a database by creating local assemblies from short-read data. +ARIBA performs reference-based assembly and variant calling for gene detection. + +## Usage + +Bactopia CLI: + +```bash +bactopia --wf ariba \ + --bactopia /path/to/your/bactopia/results +``` + +Nextflow: + +```bash +nextflow run bactopia/bactopia/workflows/bactopia-tools/ariba/main.nf \ + --bactopia /path/to/your/bactopia/results +``` + +## Outputs + +### Expected Output Files + +``` + +├── +│ └── tools +│ └── ariba +│ └── card +│ ├── -report.tsv +│ ├── -summary.csv +│ ├── logs +│ │ └── card +│ │ ├── nf.command.{begin,err,log,out,run,sh,trace} +│ │ └── versions.yml +│ └── supplemental +│ ├── assembled_genes.fa.gz +│ ├── assembled_seqs.fa.gz +│ ├── assemblies.fa.gz +│ ├── debug.report.tsv +│ ├── log.clusters.gz +│ └── version_info.txt +└── bactopia-runs + └── ariba-card- + ├── merged-results + │ ├── card-report.tsv + │ ├── card-summary.csv + │ └── logs + │ ├── card-report-concat + │ │ ├── nf.command.{begin,err,log,out,run,sh,trace} + │ │ └── versions.yml + │ └── card-summary-concat + │ ├── nf.command.{begin,err,log,out,run,sh,trace} + │ └── versions.yml + └── nf-reports + ├── ariba-dag.dot + ├── ariba-report.html + └── ariba-timeline.html +``` + +### Per-Sample Results + +| File | Description | +|------|-------------| +| `*-report.tsv` | Gene detection report for each sample | +| `*-summary.csv` | Summary of gene detection results | +| `assembled_genes.fa.gz` | Assembled genes in compressed FASTA format | +| `assembled_seqs.fa.gz` | Assembled sequences matching references | +| `assemblies.fa.gz` | Raw local assemblies | +| `debug.report.tsv` | Detailed report including synonymous mutations | +| `log.clusters.gz` | Analysis log file | +| `version_info.txt` | Version information for ARIBA and dependencies | + +### Merged Results + +| File | Description | +|------|-------------| +| `ariba-report.tsv` | Merged gene detection reports from all samples | +| `ariba-summary.csv` | Merged summary reports from all samples | + + +### Audit Trail + +Below are files that can assist you in understanding which parameters and program versions were used. + +#### Logs + +Each process that is executed will have a folder named `logs`. In this folder are helpful +files for you to review if the need ever arises. + +| Extension | Description | +|--------------|-------------| +| .begin | An empty file used to designate the process started | +| .err | Contains STDERR outputs from the process | +| .log | Contains both STDERR and STDOUT outputs from the process | +| .out | Contains STDOUT outputs from the process | +| .run | The script Nextflow uses to stage/unstage files and queue processes based on given profile | +| .sh | The script executed by bash for the process | +| .trace | The Nextflow trace report for the process | +| versions.yml | A YAML formatted file with program versions | + +#### Nextflow Reports + +These Nextflow reports provide great a great summary of your run. These can be used to optimize +resource usage and estimate expected costs if using cloud platforms. + +| Filename | Description | +|----------|-------------| +| ariba-dag.dot | The Nextflow [DAG visualization](https://docs.seqera.io/nextflow/reports#workflow-diagram) | +| ariba-report.html | The Nextflow [Execution Report](https://docs.seqera.io/nextflow/reports#execution-report) | +| ariba-timeline.html | The Nextflow [Timeline Report](https://docs.seqera.io/nextflow/reports#execution-timeline) | +| ariba-trace.txt | The Nextflow [Trace](https://docs.seqera.io/nextflow/reports#trace-file) report | + +## Parameters + +### Required Parameters + +Define where the pipeline should find input data and save output data. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--bactopia` | string | | The path to bactopia results to use as inputs | + +### Ariba Run Parameters + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--ariba_db` | string | | A database to query, if unavailable it will be downloaded to the path given by --datasets_cache (choices: `argannot`, `card`, `ncbi`, `megares`, `plasmidfinder`, `resfinder`, `srst2_argannot`, `vfdb_core`, `vfdb_full`, `virulencefinder`) | +| `--ariba_nucmer_min_id` | integer | `90` | Minimum alignment identity (delta-filter -i) | +| `--ariba_nucmer_min_len` | integer | `20` | Minimum alignment identity (delta-filter -i) | +| `--ariba_assembly_cov` | integer | `50` | Target read coverage when sampling reads for assembly | + +
+Filtering Parameters + +Use these parameters to specify which samples to include or exclude. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--include` | string | | A text file containing sample names (one per line) to include from the analysis | +| `--exclude` | string | | A text file containing sample names (one per line) to exclude from the analysis | +
+ +
+Optional Parameters + +These optional parameters can be useful in certain settings. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--outdir` | string | `bactopia` | Base directory to write results to | +
+ +
+Nextflow Profile Parameters + +Parameters to fine-tune your Nextflow setup. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--datasets_cache` | string | `/.bactopia/datasets` | Directory where downloaded datasets should be stored. | +
+ +
+Helpful Parameters + +Uncommonly used parameters that might be useful. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--wf` | string | `bactopia` | Specify which workflow or Bactopia Tool to execute | +| `--list_wfs` | boolean | | List the available workflows and Bactopia Tools to use with '--wf' | +| `--help_all` | boolean | | An alias for --help --show_hidden_params | +| `--version` | boolean | | Display version text. | +
+ +## Composition + +This workflow uses the following subworkflows: + +- [ariba](/developers/subworkflows/ariba) - Rapidly identify genes by creating local assemblies from paired-end reads. + +## Citations + +If you use this in your analysis, please cite the following. + +- [Bactopia](https://bactopia.github.io/) + Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) + +- [Ariba](https://github.com/sanger-pathogens/ariba) + Hunt M, Mather AE, Sánchez-Busó L, Page AJ, Parkhill J, Keane JA, Harris SR [ARIBA: rapid antimicrobial resistance genotyping directly from sequencing reads](http://dx.doi.org/10.1099/mgen.0.000131). _Microb Genom_ 3, e000131 (2017) + +## Source + +[View source on GitHub](https://github.com/bactopia/bactopia/tree/main/workflows/bactopia-tools/ariba) diff --git a/bactopia-tools/bakta.mdx b/bactopia-tools/bakta.mdx new file mode 100644 index 00000000..d1dcc105 --- /dev/null +++ b/bactopia-tools/bakta.mdx @@ -0,0 +1,212 @@ +--- +title: bakta +description: "Rapid annotation of bacterial genomes and plasmids." +tags: + - bacteria + - fasta + - annotation + - genbank + - gff + - proteins + - bactopia-tool +--- + +# bakta + +**Tags:** bacteria fasta annotation genbank gff proteins bactopia-tool + +Rapid annotation of bacterial genomes and plasmids. + +This Bactopia Tool uses [Bakta](https://github.com/oschwengers/bakta) to rapidly annotate bacterial +genomes and plasmids in a standardized fashion. Bakta makes use of a large database ([40+ GB](https://doi.org/10.5281/zenodo.4247252)) +to provide extensive annotations including: tRNA, tmRNA, rRNA, ncRNA, CRISPR, CDS, and sORFs. + +## Usage + +Bactopia CLI: + +```bash +bactopia --wf bakta \ + --bactopia /path/to/your/bactopia/results +``` + +Nextflow: + +```bash +nextflow run bactopia/bactopia/workflows/bactopia-tools/bakta/main.nf \ + --bactopia /path/to/your/bactopia/results +``` + +## Outputs + +### Expected Output Files + +``` + +├── +│ └── main +│ └── annotator +│ └── bakta- +│ ├── -blastdb.tar.gz +│ ├── .embl.gz +│ ├── .faa.gz +│ ├── .ffn.gz +│ ├── .fna.gz +│ ├── .gbff.gz +│ ├── .gff3.gz +│ ├── .hypotheticals.faa.gz +│ ├── .hypotheticals.tsv +│ ├── .inference.tsv +│ ├── .json.gz +│ ├── .png +│ ├── .svg.gz +│ ├── .tsv +│ ├── .txt +│ └── logs +│ ├── .log +│ ├── nf.command.{begin,err,log,out,run,sh,trace} +│ └── versions.yml +└── bactopia-runs + └── bakta- + └── nf-reports + ├── bakta-dag.dot + ├── bakta-report.html + └── bakta-timeline.html +``` + +### Annotation + +| File | Description | +|------|-------------| +| `*.gff3` | Genome annotation in GFF3 format | +| `*.gbff` | Genome annotation in GenBank format | +| `*.faa` | Protein sequences in FASTA format | +| `*.ffn` | Feature nucleotide sequences | +| `*.fna` | Nucleotide sequences of all features | +| `*.hypotheticals.tsv` | List of hypothetical proteins | +| `*.tsv` | Annotation summary in TSV format | +| `*.txt` | Detailed annotation report | + + +### Audit Trail + +Below are files that can assist you in understanding which parameters and program versions were used. + +#### Logs + +Each process that is executed will have a folder named `logs`. In this folder are helpful +files for you to review if the need ever arises. + +| Extension | Description | +|--------------|-------------| +| .begin | An empty file used to designate the process started | +| .err | Contains STDERR outputs from the process | +| .log | Contains both STDERR and STDOUT outputs from the process | +| .out | Contains STDOUT outputs from the process | +| .run | The script Nextflow uses to stage/unstage files and queue processes based on given profile | +| .sh | The script executed by bash for the process | +| .trace | The Nextflow trace report for the process | +| versions.yml | A YAML formatted file with program versions | + +#### Nextflow Reports + +These Nextflow reports provide great a great summary of your run. These can be used to optimize +resource usage and estimate expected costs if using cloud platforms. + +| Filename | Description | +|----------|-------------| +| bakta-dag.dot | The Nextflow [DAG visualization](https://docs.seqera.io/nextflow/reports#workflow-diagram) | +| bakta-report.html | The Nextflow [Execution Report](https://docs.seqera.io/nextflow/reports#execution-report) | +| bakta-timeline.html | The Nextflow [Timeline Report](https://docs.seqera.io/nextflow/reports#execution-timeline) | +| bakta-trace.txt | The Nextflow [Trace](https://docs.seqera.io/nextflow/reports#trace-file) report | + +## Parameters + +### Required Parameters + +Define where the pipeline should find input data and save output data. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--bactopia` | string | | The path to bactopia results to use as inputs | + +### Bakta Download Parameters + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--bakta_db` | string | | Tarball or path to the Bakta database | +| `--bakta_db_type` | string | `full` | Which Bakta DB to download 'full' (~30GB) or 'light' (~2GB) (choices: `full`, `light`) | +| `--bakta_save_as_tarball` | boolean | `false` | Save the Bakta database as a tarball | +| `--download_bakta` | boolean | `false` | Download the Bakta database to the path given by --bakta_db | + +### Bakta Parameters + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--bakta_proteins` | string | | FASTA file of trusted proteins to first annotate from | +| `--bakta_prodigal_tf` | string | | Training file to use for Prodigal | +| `--bakta_replicons` | string | | Replicon information table (tsv/csv) | + +
+Filtering Parameters + +Use these parameters to specify which samples to include or exclude. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--include` | string | | A text file containing sample names (one per line) to include from the analysis | +| `--exclude` | string | | A text file containing sample names (one per line) to exclude from the analysis | +
+ +
+Optional Parameters + +These optional parameters can be useful in certain settings. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--outdir` | string | `bactopia` | Base directory to write results to | +
+ +
+Nextflow Profile Parameters + +Parameters to fine-tune your Nextflow setup. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--datasets_cache` | string | `/.bactopia/datasets` | Directory where downloaded datasets should be stored. | +
+ +
+Helpful Parameters + +Uncommonly used parameters that might be useful. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--wf` | string | `bactopia` | Specify which workflow or Bactopia Tool to execute | +| `--list_wfs` | boolean | | List the available workflows and Bactopia Tools to use with '--wf' | +| `--help_all` | boolean | | An alias for --help --show_hidden_params | +| `--version` | boolean | | Display version text. | +
+ +## Composition + +This workflow uses the following subworkflows: + +- [bakta](/developers/subworkflows/bakta) - Rapid bacterial genome annotation. + +## Citations + +If you use this in your analysis, please cite the following. + +- [Bactopia](https://bactopia.github.io/) + Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) + +- [Bakta](https://github.com/oschwengers/bakta) + Schwengers O, Jelonek L, Dieckmann MA, Beyvers S, Blom J, Goesmann A [Bakta - rapid and standardized annotation of bacterial genomes via alignment-free sequence identification.](https://doi.org/10.1099/mgen.0.000685) _Microbial Genomics_ 7(11) (2021) + +## Source + +[View source on GitHub](https://github.com/bactopia/bactopia/tree/main/workflows/bactopia-tools/bakta) diff --git a/bactopia-tools/blastn.mdx b/bactopia-tools/blastn.mdx new file mode 100644 index 00000000..d5303c05 --- /dev/null +++ b/bactopia-tools/blastn.mdx @@ -0,0 +1,195 @@ +--- +title: blastn +description: "Search against nucleotide BLAST databases using nucleotide queries." +tags: + - fasta + - blast + - alignment + - nucleotide + - similarity + - bactopia-tool +--- + +# blastn + +**Tags:** fasta blast alignment nucleotide similarity bactopia-tool + +Search against nucleotide BLAST databases using nucleotide queries. + +This Bactopia Tool uses [BLASTN](https://blast.ncbi.nlm.nih.gov/Blast.cgi?CMD=Web&PAGE_TYPE=Blastdocs) +to query nucleotide sequences against nucleotide databases for sequence similarity search. +BLASTN finds regions of local similarity between nucleotide sequences. + +## Usage + +Bactopia CLI: + +```bash +bactopia --wf blastn \ + --bactopia /path/to/your/bactopia/results +``` + +Nextflow: + +```bash +nextflow run bactopia/bactopia/workflows/bactopia-tools/blastn/main.nf \ + --bactopia /path/to/your/bactopia/results +``` + +## Outputs + +### Expected Output Files + +``` + +├── +│ └── tools +│ └── blastn- +│ ├── .blastn.tsv +│ └── logs +│ ├── nf.command.{begin,err,log,out,run,sh,trace} +│ └── versions.yml +└── bactopia-runs + └── blastn- + ├── merged-results + │ ├── blastn.tsv + │ └── logs + │ └── blastn-concat + │ ├── nf.command.{begin,err,log,out,run,sh,trace} + │ └── versions.yml + └── nf-reports + ├── blastn-dag.dot + ├── blastn-report.html + └── blastn-timeline.html +``` + +### Per-Sample Results + +| File | Description | +|------|-------------| +| `*.blastn.tsv` | BLASTN alignment results in tabular format | +| `*.blastn.html` | Interactive HTML report of BLASTN results | + +### Merged Results + +| File | Description | +|------|-------------| +| `blastn.tsv` | Merged BLASTN results from all samples | + + +### Audit Trail + +Below are files that can assist you in understanding which parameters and program versions were used. + +#### Logs + +Each process that is executed will have a folder named `logs`. In this folder are helpful +files for you to review if the need ever arises. + +| Extension | Description | +|--------------|-------------| +| .begin | An empty file used to designate the process started | +| .err | Contains STDERR outputs from the process | +| .log | Contains both STDERR and STDOUT outputs from the process | +| .out | Contains STDOUT outputs from the process | +| .run | The script Nextflow uses to stage/unstage files and queue processes based on given profile | +| .sh | The script executed by bash for the process | +| .trace | The Nextflow trace report for the process | +| versions.yml | A YAML formatted file with program versions | + +#### Nextflow Reports + +These Nextflow reports provide great a great summary of your run. These can be used to optimize +resource usage and estimate expected costs if using cloud platforms. + +| Filename | Description | +|----------|-------------| +| blastn-dag.dot | The Nextflow [DAG visualization](https://docs.seqera.io/nextflow/reports#workflow-diagram) | +| blastn-report.html | The Nextflow [Execution Report](https://docs.seqera.io/nextflow/reports#execution-report) | +| blastn-timeline.html | The Nextflow [Timeline Report](https://docs.seqera.io/nextflow/reports#execution-timeline) | +| blastn-trace.txt | The Nextflow [Trace](https://docs.seqera.io/nextflow/reports#trace-file) report | + +## Parameters + +### Required Parameters + +Define where the pipeline should find input data and save output data. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--bactopia` | string | | The path to bactopia results to use as inputs | + +### BLASTN Parameters + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--blastn_query` | string | | A fasta file containing the query sequences to BLAST against the database | +| `--blastn_outfmt` | string | `sseqid qseqid pident qlen slen length nident positive mismatch gapopen gaps qstart qend sstart send evalue bitscore` | The columns to include with -outfmt 6 | +| `--blastn_opts` | string | | Additional options to pass to BLASTN | +| `--blastn_perc_identity` | integer | `50` | Percent identity | +| `--blastn_qcov_hsp_perc` | integer | `50` | Percent query coverage per hsp | +| `--blastn_max_target_seqs` | integer | `2000` | Maximum number of aligned sequences to keep | + +
+Filtering Parameters + +Use these parameters to specify which samples to include or exclude. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--include` | string | | A text file containing sample names (one per line) to include from the analysis | +| `--exclude` | string | | A text file containing sample names (one per line) to exclude from the analysis | +
+ +
+Optional Parameters + +These optional parameters can be useful in certain settings. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--outdir` | string | `bactopia` | Base directory to write results to | +
+ +
+Nextflow Profile Parameters + +Parameters to fine-tune your Nextflow setup. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--datasets_cache` | string | `/.bactopia/datasets` | Directory where downloaded datasets should be stored. | +
+ +
+Helpful Parameters + +Uncommonly used parameters that might be useful. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--wf` | string | `bactopia` | Specify which workflow or Bactopia Tool to execute | +| `--list_wfs` | boolean | | List the available workflows and Bactopia Tools to use with '--wf' | +| `--help_all` | boolean | | An alias for --help --show_hidden_params | +| `--version` | boolean | | Display version text. | +
+ +## Composition + +This workflow uses the following subworkflows: + +- [blastn](/developers/subworkflows/blastn) - Search a nucleotide database using nucleotide query sequences. + +## Citations + +If you use this in your analysis, please cite the following. + +- [Bactopia](https://bactopia.github.io/) + Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) + +- [BLAST](https://blast.ncbi.nlm.nih.gov/Blast.cgi) + Camacho C, Coulouris G, Avagyan V, Ma N, Papadopoulos J, Bealer K, Madden TL [BLAST+: architecture and applications](http://dx.doi.org/10.1186/1471-2105-10-421). _BMC Bioinformatics_ 10, 421 (2009) + +## Source + +[View source on GitHub](https://github.com/bactopia/bactopia/tree/main/workflows/bactopia-tools/blastn) diff --git a/bactopia-tools/blastp.mdx b/bactopia-tools/blastp.mdx new file mode 100644 index 00000000..42eb7091 --- /dev/null +++ b/bactopia-tools/blastp.mdx @@ -0,0 +1,194 @@ +--- +title: blastp +description: "Search against protein BLAST databases using protein queries." +tags: + - fasta + - blast + - alignment + - protein + - similarity + - bactopia-tool +--- + +# blastp + +**Tags:** fasta blast alignment protein similarity bactopia-tool + +Search against protein BLAST databases using protein queries. + +This Bactopia Tool uses [BLASTP](https://blast.ncbi.nlm.nih.gov/Blast.cgi?CMD=Web&PAGE_TYPE=Blastdocs) +to query protein sequences against protein databases for sequence similarity search. +BLASTP compares a protein query to a protein database to find similar sequences. + +## Usage + +Bactopia CLI: + +```bash +bactopia --wf blastp \ + --bactopia /path/to/your/bactopia/results +``` + +Nextflow: + +```bash +nextflow run bactopia/bactopia/workflows/bactopia-tools/blastp/main.nf \ + --bactopia /path/to/your/bactopia/results +``` + +## Outputs + +### Expected Output Files + +``` + +├── +│ └── tools +│ └── blastp- +│ ├── .blastp.tsv +│ └── logs +│ ├── nf.command.{begin,err,log,out,run,sh,trace} +│ └── versions.yml +└── bactopia-runs + └── blastp- + ├── merged-results + │ ├── blastp.tsv + │ └── logs + │ └── blastp-concat + │ ├── nf.command.{begin,err,log,out,run,sh,trace} + │ └── versions.yml + └── nf-reports + ├── blastp-dag.dot + ├── blastp-report.html + └── blastp-timeline.html +``` + +### Per-Sample Results + +| File | Description | +|------|-------------| +| `*.blastp.tsv` | BLASTP alignment results in tabular format | +| `*.blastp.html` | Interactive HTML report of BLASTP results | + +### Merged Results + +| File | Description | +|------|-------------| +| `blastp.tsv` | Merged BLASTP results from all samples | + + +### Audit Trail + +Below are files that can assist you in understanding which parameters and program versions were used. + +#### Logs + +Each process that is executed will have a folder named `logs`. In this folder are helpful +files for you to review if the need ever arises. + +| Extension | Description | +|--------------|-------------| +| .begin | An empty file used to designate the process started | +| .err | Contains STDERR outputs from the process | +| .log | Contains both STDERR and STDOUT outputs from the process | +| .out | Contains STDOUT outputs from the process | +| .run | The script Nextflow uses to stage/unstage files and queue processes based on given profile | +| .sh | The script executed by bash for the process | +| .trace | The Nextflow trace report for the process | +| versions.yml | A YAML formatted file with program versions | + +#### Nextflow Reports + +These Nextflow reports provide great a great summary of your run. These can be used to optimize +resource usage and estimate expected costs if using cloud platforms. + +| Filename | Description | +|----------|-------------| +| blastp-dag.dot | The Nextflow [DAG visualization](https://docs.seqera.io/nextflow/reports#workflow-diagram) | +| blastp-report.html | The Nextflow [Execution Report](https://docs.seqera.io/nextflow/reports#execution-report) | +| blastp-timeline.html | The Nextflow [Timeline Report](https://docs.seqera.io/nextflow/reports#execution-timeline) | +| blastp-trace.txt | The Nextflow [Trace](https://docs.seqera.io/nextflow/reports#trace-file) report | + +## Parameters + +### Required Parameters + +Define where the pipeline should find input data and save output data. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--bactopia` | string | | The path to bactopia results to use as inputs | + +### BLASTP Parameters + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--blastp_query` | string | | A fasta file containing the query sequences to BLAST against the database | +| `--blastp_outfmt` | string | `sseqid qseqid pident qlen slen length nident positive mismatch gapopen gaps qstart qend sstart send evalue bitscore` | The columns to include with -outfmt 6 | +| `--blastp_opts` | string | | Additional options to pass to BLASTN | +| `--blastp_qcov_hsp_perc` | integer | `50` | Percent query coverage per hsp | +| `--blastp_max_target_seqs` | integer | `2000` | Maximum number of aligned sequences to keep | + +
+Filtering Parameters + +Use these parameters to specify which samples to include or exclude. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--include` | string | | A text file containing sample names (one per line) to include from the analysis | +| `--exclude` | string | | A text file containing sample names (one per line) to exclude from the analysis | +
+ +
+Optional Parameters + +These optional parameters can be useful in certain settings. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--outdir` | string | `bactopia` | Base directory to write results to | +
+ +
+Nextflow Profile Parameters + +Parameters to fine-tune your Nextflow setup. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--datasets_cache` | string | `/.bactopia/datasets` | Directory where downloaded datasets should be stored. | +
+ +
+Helpful Parameters + +Uncommonly used parameters that might be useful. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--wf` | string | `bactopia` | Specify which workflow or Bactopia Tool to execute | +| `--list_wfs` | boolean | | List the available workflows and Bactopia Tools to use with '--wf' | +| `--help_all` | boolean | | An alias for --help --show_hidden_params | +| `--version` | boolean | | Display version text. | +
+ +## Composition + +This workflow uses the following subworkflows: + +- [blastp](/developers/subworkflows/blastp) - Search protein sequences against protein database. + +## Citations + +If you use this in your analysis, please cite the following. + +- [Bactopia](https://bactopia.github.io/) + Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) + +- [BLAST](https://blast.ncbi.nlm.nih.gov/Blast.cgi) + Camacho C, Coulouris G, Avagyan V, Ma N, Papadopoulos J, Bealer K, Madden TL [BLAST+: architecture and applications](http://dx.doi.org/10.1186/1471-2105-10-421). _BMC Bioinformatics_ 10, 421 (2009) + +## Source + +[View source on GitHub](https://github.com/bactopia/bactopia/tree/main/workflows/bactopia-tools/blastp) diff --git a/bactopia-tools/blastx.mdx b/bactopia-tools/blastx.mdx new file mode 100644 index 00000000..79214d7d --- /dev/null +++ b/bactopia-tools/blastx.mdx @@ -0,0 +1,193 @@ +--- +title: blastx +description: "Search against protein BLAST databases using translated nucleotide queries." +tags: + - fasta + - blast + - alignment + - protein + - translation + - bactopia-tool +--- + +# blastx + +**Tags:** fasta blast alignment protein translation bactopia-tool + +Search against protein BLAST databases using translated nucleotide queries. + +This Bactopia Tool uses [BLASTX](https://blast.ncbi.nlm.nih.gov/Blast.cgi?CMD=Web&PAGE_TYPE=Blastdocs) +to query translated nucleotide sequences against protein databases for protein homology search. + +## Usage + +Bactopia CLI: + +```bash +bactopia --wf blastx \ + --bactopia /path/to/your/bactopia/results +``` + +Nextflow: + +```bash +nextflow run bactopia/bactopia/workflows/bactopia-tools/blastx/main.nf \ + --bactopia /path/to/your/bactopia/results +``` + +## Outputs + +### Expected Output Files + +``` + +├── +│ └── tools +│ └── blastx- +│ ├── .blastx.tsv +│ └── logs +│ ├── nf.command.{begin,err,log,out,run,sh,trace} +│ └── versions.yml +└── bactopia-runs + └── blastx- + ├── merged-results + │ ├── blastx.tsv + │ └── logs + │ └── blastx-concat + │ ├── nf.command.{begin,err,log,out,run,sh,trace} + │ └── versions.yml + └── nf-reports + ├── blastx-dag.dot + ├── blastx-report.html + └── blastx-timeline.html +``` + +### Per-Sample Results + +| File | Description | +|------|-------------| +| `*.blastx.tsv` | BLASTX alignment results in tabular format | +| `*.blastx.html` | Interactive HTML report of BLASTX results | + +### Merged Results + +| File | Description | +|------|-------------| +| `blastx.tsv` | Merged BLASTX results from all samples | + + +### Audit Trail + +Below are files that can assist you in understanding which parameters and program versions were used. + +#### Logs + +Each process that is executed will have a folder named `logs`. In this folder are helpful +files for you to review if the need ever arises. + +| Extension | Description | +|--------------|-------------| +| .begin | An empty file used to designate the process started | +| .err | Contains STDERR outputs from the process | +| .log | Contains both STDERR and STDOUT outputs from the process | +| .out | Contains STDOUT outputs from the process | +| .run | The script Nextflow uses to stage/unstage files and queue processes based on given profile | +| .sh | The script executed by bash for the process | +| .trace | The Nextflow trace report for the process | +| versions.yml | A YAML formatted file with program versions | + +#### Nextflow Reports + +These Nextflow reports provide great a great summary of your run. These can be used to optimize +resource usage and estimate expected costs if using cloud platforms. + +| Filename | Description | +|----------|-------------| +| blastx-dag.dot | The Nextflow [DAG visualization](https://docs.seqera.io/nextflow/reports#workflow-diagram) | +| blastx-report.html | The Nextflow [Execution Report](https://docs.seqera.io/nextflow/reports#execution-report) | +| blastx-timeline.html | The Nextflow [Timeline Report](https://docs.seqera.io/nextflow/reports#execution-timeline) | +| blastx-trace.txt | The Nextflow [Trace](https://docs.seqera.io/nextflow/reports#trace-file) report | + +## Parameters + +### Required Parameters + +Define where the pipeline should find input data and save output data. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--bactopia` | string | | The path to bactopia results to use as inputs | + +### BLASTX Parameters + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--blastx_query` | string | | A fasta file containing the query sequences to BLAST against the database | +| `--blastx_outfmt` | string | `sseqid qseqid pident qlen slen length nident positive mismatch gapopen gaps qstart qend sstart send evalue bitscore` | The columns to include with -outfmt 6 | +| `--blastx_opts` | string | | Additional options to pass to BLASTN | +| `--blastx_qcov_hsp_perc` | integer | `50` | Percent query coverage per hsp | +| `--blastx_max_target_seqs` | integer | `2000` | Maximum number of aligned sequences to keep | + +
+Filtering Parameters + +Use these parameters to specify which samples to include or exclude. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--include` | string | | A text file containing sample names (one per line) to include from the analysis | +| `--exclude` | string | | A text file containing sample names (one per line) to exclude from the analysis | +
+ +
+Optional Parameters + +These optional parameters can be useful in certain settings. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--outdir` | string | `bactopia` | Base directory to write results to | +
+ +
+Nextflow Profile Parameters + +Parameters to fine-tune your Nextflow setup. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--datasets_cache` | string | `/.bactopia/datasets` | Directory where downloaded datasets should be stored. | +
+ +
+Helpful Parameters + +Uncommonly used parameters that might be useful. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--wf` | string | `bactopia` | Specify which workflow or Bactopia Tool to execute | +| `--list_wfs` | boolean | | List the available workflows and Bactopia Tools to use with '--wf' | +| `--help_all` | boolean | | An alias for --help --show_hidden_params | +| `--version` | boolean | | Display version text. | +
+ +## Composition + +This workflow uses the following subworkflows: + +- [blastx](/developers/subworkflows/blastx) - Translate nucleotide sequences and search protein database. + +## Citations + +If you use this in your analysis, please cite the following. + +- [Bactopia](https://bactopia.github.io/) + Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) + +- [BLAST](https://blast.ncbi.nlm.nih.gov/Blast.cgi) + Camacho C, Coulouris G, Avagyan V, Ma N, Papadopoulos J, Bealer K, Madden TL [BLAST+: architecture and applications](http://dx.doi.org/10.1186/1471-2105-10-421). _BMC Bioinformatics_ 10, 421 (2009) + +## Source + +[View source on GitHub](https://github.com/bactopia/bactopia/tree/main/workflows/bactopia-tools/blastx) diff --git a/bactopia-tools/bracken.mdx b/bactopia-tools/bracken.mdx new file mode 100644 index 00000000..41f18df8 --- /dev/null +++ b/bactopia-tools/bracken.mdx @@ -0,0 +1,213 @@ +--- +title: bracken +description: "Estimate taxonomic abundance of metagenomic samples." +tags: + - metagenomics + - classification + - abundance + - kraken2 + - bracken + - krona + - bactopia-tool +--- + +# bracken + +**Tags:** metagenomics classification abundance kraken2 bracken krona bactopia-tool + +Estimate taxonomic abundance of metagenomic samples. + +This Bactopia Tool uses [Bracken](https://github.com/jenniferlu717/Bracken) to estimate +taxonomic abundance from Kraken2 results. It also runs [Kraken2](https://ccb.jhu.edu/software/kraken2/) +for taxonomic classification and generates [Krona](https://github.com/marbl/Krona) interactive charts. + +## Usage + +Bactopia CLI: + +```bash +bactopia --wf bracken \ + --bactopia /path/to/your/bactopia/results +``` + +Nextflow: + +```bash +nextflow run bactopia/bactopia/workflows/bactopia-tools/bracken/main.nf \ + --bactopia /path/to/your/bactopia/results +``` + +## Outputs + +### Expected Output Files + +``` + +├── +│ └── tools +│ └── bracken- +│ ├── .bracken.abundances.txt +│ ├── .bracken.adjusted.abundances.txt +│ ├── .bracken.classification.txt +│ ├── .bracken.krona.html +│ ├── .bracken.report.txt +│ ├── .bracken.tsv +│ ├── .kraken2.krona.html +│ ├── .kraken2.report.txt +│ └── logs +│ ├── nf.command.{begin,err,log,out,run,sh,trace} +│ └── versions.yml +└── bactopia-runs + └── bracken- + ├── merged-results + │ ├── bracken-adjusted.tsv + │ ├── bracken-species-abundance.tsv + │ └── logs + │ ├── bracken-adjusted-concat + │ │ ├── nf.command.{begin,err,log,out,run,sh,trace} + │ │ └── versions.yml + │ └── bracken-species-abundance-concat + │ ├── nf.command.{begin,err,log,out,run,sh,trace} + │ └── versions.yml + └── nf-reports + ├── bracken-dag.dot + ├── bracken-report.html + └── bracken-timeline.html +``` + +### Classification Results + +| File | Description | +|------|-------------| +| `*.kraken2.report.txt` | Kraken2 classification report | +| `*.bracken.report.txt` | Bracken abundance estimates | +| `*.krona.html` | Krona interactive visualization | + +### Summary Reports + +| File | Description | +|------|-------------| +| `bracken-summary.tsv` | Summary of classification results across all samples | +| `bracken-matrix.tsv` | Abundance matrix for downstream analysis | + + +### Audit Trail + +Below are files that can assist you in understanding which parameters and program versions were used. + +#### Logs + +Each process that is executed will have a folder named `logs`. In this folder are helpful +files for you to review if the need ever arises. + +| Extension | Description | +|--------------|-------------| +| .begin | An empty file used to designate the process started | +| .err | Contains STDERR outputs from the process | +| .log | Contains both STDERR and STDOUT outputs from the process | +| .out | Contains STDOUT outputs from the process | +| .run | The script Nextflow uses to stage/unstage files and queue processes based on given profile | +| .sh | The script executed by bash for the process | +| .trace | The Nextflow trace report for the process | +| versions.yml | A YAML formatted file with program versions | + +#### Nextflow Reports + +These Nextflow reports provide great a great summary of your run. These can be used to optimize +resource usage and estimate expected costs if using cloud platforms. + +| Filename | Description | +|----------|-------------| +| bracken-dag.dot | The Nextflow [DAG visualization](https://docs.seqera.io/nextflow/reports#workflow-diagram) | +| bracken-report.html | The Nextflow [Execution Report](https://docs.seqera.io/nextflow/reports#execution-report) | +| bracken-timeline.html | The Nextflow [Timeline Report](https://docs.seqera.io/nextflow/reports#execution-timeline) | +| bracken-trace.txt | The Nextflow [Trace](https://docs.seqera.io/nextflow/reports#trace-file) report | + +## Parameters + +### Required Parameters + +Define where the pipeline should find input data and save output data. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--bactopia` | string | | The path to bactopia results to use as inputs | + +### Kraken2 and Bracken Parameters + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--kraken2_db` | string | | The a single tarball or path to a Kraken2 formatted database | +| `--kraken2_confidence` | number | `0.0` | Confidence score threshold between 0 and 1 | +| `--kraken2_use_mpa_style` | boolean | `false` | Format report output like Kraken 1's kraken-mpa-report | +| `--kraken2_report_zero_counts` | boolean | `false` | Report counts for ALL taxa, even if counts are zero | + +
+Filtering Parameters + +Use these parameters to specify which samples to include or exclude. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--include` | string | | A text file containing sample names (one per line) to include from the analysis | +| `--exclude` | string | | A text file containing sample names (one per line) to exclude from the analysis | +
+ +
+Optional Parameters + +These optional parameters can be useful in certain settings. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--outdir` | string | `bactopia` | Base directory to write results to | +
+ +
+Nextflow Profile Parameters + +Parameters to fine-tune your Nextflow setup. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--datasets_cache` | string | `/.bactopia/datasets` | Directory where downloaded datasets should be stored. | +
+ +
+Helpful Parameters + +Uncommonly used parameters that might be useful. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--wf` | string | `bactopia` | Specify which workflow or Bactopia Tool to execute | +| `--list_wfs` | boolean | | List the available workflows and Bactopia Tools to use with '--wf' | +| `--help_all` | boolean | | An alias for --help --show_hidden_params | +| `--version` | boolean | | Display version text. | +
+ +## Composition + +This workflow uses the following subworkflows: + +- [bracken](/developers/subworkflows/bracken) - Estimate species abundance from metagenomic reads. + +## Citations + +If you use this in your analysis, please cite the following. + +- [Bactopia](https://bactopia.github.io/) + Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) + +- [Bracken](https://github.com/jenniferlu717/Bracken) + Lu J, Breitwieser FP, Thielen P, and Salzberg SL [Bracken: estimating species abundance in metagenomics data.](https://doi.org/10.7717/peerj-cs.104) _PeerJ Computer Science_, 3, e104. (2017) + +- [Kraken2](https://github.com/DerrickWood/kraken2) + Wood DE, Lu J, Langmead B [Improved metagenomic analysis with Kraken 2.](https://doi.org/10.1186/s13059-019-1891-0) *Genome Biology*, 20(1), 257. (2019) + +- [Krona](https://github.com/marbl/Krona) + Ondov BD, Bergman NH, and Phillippy AM [Interactive metagenomic visualization in a Web browser.](https://doi.org/10.1186/1471-2105-12-385) _BMC Bioinformatics_, 12, 385. (2011) + +## Source + +[View source on GitHub](https://github.com/bactopia/bactopia/tree/main/workflows/bactopia-tools/bracken) diff --git a/bactopia-tools/btyper3.mdx b/bactopia-tools/btyper3.mdx new file mode 100644 index 00000000..6c7ac86f --- /dev/null +++ b/bactopia-tools/btyper3.mdx @@ -0,0 +1,207 @@ +--- +title: btyper3 +description: "Taxonomic classification of Bacillus cereus group isolates." +tags: + - bacillus-cereus + - taxonomy + - classification + - fasta + - bactopia-tool +--- + +# btyper3 + +**Tags:** bacillus-cereus taxonomy classification fasta bactopia-tool + +Taxonomic classification of Bacillus cereus group isolates. + +This Bactopia Tool uses [BTyper3](https://github.com/lmc297/BTyper3) to classify +Bacillus cereus group isolates from genome assemblies. + +## Usage + +Bactopia CLI: + +```bash +bactopia --wf btyper3 \ + --bactopia /path/to/your/bactopia/results +``` + +Nextflow: + +```bash +nextflow run bactopia/bactopia/workflows/bactopia-tools/btyper3/main.nf \ + --bactopia /path/to/your/bactopia/results +``` + +## Outputs + +### Expected Output Files + +``` + +├── +│ └── tools +│ └── btyper3- +│ ├── .tsv +│ ├── logs +│ │ ├── nf.command.{begin,err,log,out,run,sh,trace} +│ │ └── versions.yml +│ └── supplemental +│ ├── bt +│ │ └── _bt.txt +│ ├── logs +│ │ └── .log +│ ├── mlst +│ │ └── _mlst.txt +│ ├── panC +│ │ └── _panC.txt +│ ├── species +│ │ └── _species_fastani.txt +│ ├── subspecies +│ │ └── _subspecies_fastani.txt +│ ├── typestrains +│ │ └── _typestrains_fastani.txt +│ └── virulence +│ └── _virulence.txt +└── bactopia-runs + └── btyper3- + ├── merged-results + │ ├── btyper3.tsv + │ └── logs + │ └── btyper3-concat + │ ├── nf.command.{begin,err,log,out,run,sh,trace} + │ └── versions.yml + └── nf-reports + ├── btyper3-dag.dot + ├── btyper3-report.html + └── btyper3-timeline.html +``` + +### Per-Sample Results + +| File | Description | +|------|-------------| +| `*_final_results.txt` | Final tab-delimited file of BTyper3 results | +| `results/*` | Directory of detailed analysis results | + +### Merged Results + +| File | Description | +|------|-------------| +| `btyper3.tsv` | Merged TSV file containing BTyper3 results from all samples | + + +### Audit Trail + +Below are files that can assist you in understanding which parameters and program versions were used. + +#### Logs + +Each process that is executed will have a folder named `logs`. In this folder are helpful +files for you to review if the need ever arises. + +| Extension | Description | +|--------------|-------------| +| .begin | An empty file used to designate the process started | +| .err | Contains STDERR outputs from the process | +| .log | Contains both STDERR and STDOUT outputs from the process | +| .out | Contains STDOUT outputs from the process | +| .run | The script Nextflow uses to stage/unstage files and queue processes based on given profile | +| .sh | The script executed by bash for the process | +| .trace | The Nextflow trace report for the process | +| versions.yml | A YAML formatted file with program versions | + +#### Nextflow Reports + +These Nextflow reports provide great a great summary of your run. These can be used to optimize +resource usage and estimate expected costs if using cloud platforms. + +| Filename | Description | +|----------|-------------| +| btyper3-dag.dot | The Nextflow [DAG visualization](https://docs.seqera.io/nextflow/reports#workflow-diagram) | +| btyper3-report.html | The Nextflow [Execution Report](https://docs.seqera.io/nextflow/reports#execution-report) | +| btyper3-timeline.html | The Nextflow [Timeline Report](https://docs.seqera.io/nextflow/reports#execution-timeline) | +| btyper3-trace.txt | The Nextflow [Trace](https://docs.seqera.io/nextflow/reports#trace-file) report | + +## Parameters + +### Required Parameters + +Define where the pipeline should find input data and save output data. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--bactopia` | string | | The path to bactopia results to use as inputs | + +### BTyper3 Parameters + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--btyper3_virulence_identity` | integer | `70` | Minimum percent amino acid/nucleotide identity threshold for a virulence gene to be considered present | +| `--btyper3_identity` | integer | `50` | Minimum percent amino acid identity threshold for a Bt toxin gene to be considered present | +| `--btyper3_opts` | string | | Additional options to pass to BTyper3 | + +
+Filtering Parameters + +Use these parameters to specify which samples to include or exclude. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--include` | string | | A text file containing sample names (one per line) to include from the analysis | +| `--exclude` | string | | A text file containing sample names (one per line) to exclude from the analysis | +
+ +
+Optional Parameters + +These optional parameters can be useful in certain settings. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--outdir` | string | `bactopia` | Base directory to write results to | +
+ +
+Nextflow Profile Parameters + +Parameters to fine-tune your Nextflow setup. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--datasets_cache` | string | `/.bactopia/datasets` | Directory where downloaded datasets should be stored. | +
+ +
+Helpful Parameters + +Uncommonly used parameters that might be useful. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--wf` | string | `bactopia` | Specify which workflow or Bactopia Tool to execute | +| `--list_wfs` | boolean | | List the available workflows and Bactopia Tools to use with '--wf' | +| `--help_all` | boolean | | An alias for --help --show_hidden_params | +| `--version` | boolean | | Display version text. | +
+ +## Composition + +This workflow uses the following subworkflows: + +- [btyper3](/developers/subworkflows/btyper3) - In silico taxonomic classification of Bacillus cereus group genomes. + +## Citations + +If you use this in your analysis, please cite the following. + +- [Bactopia](https://bactopia.github.io/) + Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) + +- [BTyper3](https://github.com/lmc297/BTyper3) + Carroll LM, Cheng RA, Kovac J [No Assembly Required: Using BTyper3 to Assess the Congruency of a Proposed Taxonomic Framework for the Bacillus cereus Group With Historical Typing Methods.](https://doi.org/10.3389/fmicb.2020.580691) _Frontiers in Microbiology_, 11, 580691. (2020) + +## Source + +[View source on GitHub](https://github.com/bactopia/bactopia/tree/main/workflows/bactopia-tools/btyper3) diff --git a/bactopia-tools/busco.mdx b/bactopia-tools/busco.mdx new file mode 100644 index 00000000..8e3615c4 --- /dev/null +++ b/bactopia-tools/busco.mdx @@ -0,0 +1,443 @@ +--- +title: busco +description: "Assessment of genome assembly completeness using evolutionarily informed expectations." +tags: + - assembly + - completeness + - assessment + - orthologs + - quality-control + - bactopia-tool +--- + +# busco + +**Tags:** assembly completeness assessment orthologs quality-control bactopia-tool + +Assessment of genome assembly completeness using evolutionarily informed expectations. + +This Bactopia Tool uses [BUSCO](https://gitlab.com/ezlab/busco) (Benchmarking Universal Single-Copy Orthologs) +to assess the completeness of genome assemblies by searching for single-copy orthologs. The workflow +processes each assembly against a specified lineage dataset and provides comprehensive completeness metrics. + +## Usage + +Bactopia CLI: + +```bash +bactopia --wf busco \ + --bactopia /path/to/your/bactopia/results +``` + +Nextflow: + +```bash +nextflow run bactopia/bactopia/workflows/bactopia-tools/busco/main.nf \ + --bactopia /path/to/your/bactopia/results +``` + +## Outputs + +### Expected Output Files + +``` + +├── +│ └── tools +│ └── busco +│ └── bacteria_odb10 +│ ├── -summary.txt +│ ├── logs +│ │ ├── bbtools_err.log +│ │ ├── bbtools_out.log +│ │ ├── busco.log +│ │ ├── hmmsearch_err.log +│ │ ├── hmmsearch_out.log +│ │ ├── nf.command.{begin,err,log,out,run,sh,trace} +│ │ ├── prodigal_err.log +│ │ ├── prodigal_mode_single_code_11_err.log +│ │ ├── prodigal_mode_single_code_11_out.log +│ │ ├── prodigal_mode_single_code_4_err.log +│ │ ├── prodigal_mode_single_code_4_out.log +│ │ ├── prodigal_out.log +│ │ └── versions.yml +│ └── supplemental +│ ├── prodigal_output +│ │ └── predicted_genes +│ │ ├── predicted.faa.gz +│ │ ├── predicted.fna.gz +│ │ └── tmp +│ │ ├── prodigal_mode_single_code_11.faa.gz +│ │ ├── prodigal_mode_single_code_11.fna.gz +│ │ ├── prodigal_mode_single_code_4.faa.gz +│ │ └── prodigal_mode_single_code_4.fna.gz +│ ├── run_bacteria_odb10 +│ │ ├── .bbtools_output +│ │ ├── busco_sequences +│ │ │ ├── fragmented_busco_sequences +│ │ │ │ ├── 1540940at2.faa.gz +│ │ │ │ ├── 1540940at2.fna.gz +│ │ │ │ ├── 1827334at2.faa.gz +│ │ │ │ ├── 1827334at2.fna.gz +│ │ │ │ ├── 1830156at2.faa.gz +│ │ │ │ ├── 1830156at2.fna.gz +│ │ │ │ ├── 1874945at2.faa.gz +│ │ │ │ ├── 1874945at2.fna.gz +│ │ │ │ ├── 1937072at2.faa.gz +│ │ │ │ ├── 1937072at2.fna.gz +│ │ │ │ ├── 1971380at2.faa.gz +│ │ │ │ ├── 1971380at2.fna.gz +│ │ │ │ ├── 226836at2.faa.gz +│ │ │ │ ├── 226836at2.fna.gz +│ │ │ │ ├── 4421at2.faa.gz +│ │ │ │ ├── 4421at2.fna.gz +│ │ │ │ ├── 469058at2.faa.gz +│ │ │ │ ├── 469058at2.fna.gz +│ │ │ │ ├── 837522at2.faa.gz +│ │ │ │ ├── 837522at2.fna.gz +│ │ │ │ ├── 9601at2.faa.gz +│ │ │ │ ├── 9601at2.fna.gz +│ │ │ │ ├── 981870at2.faa.gz +│ │ │ │ └── 981870at2.fna.gz +│ │ │ ├── multi_copy_busco_sequences +│ │ │ └── single_copy_busco_sequences +│ │ │ ├── 1132353at2.faa.gz +│ │ │ ├── 1132353at2.fna.gz +│ │ │ ├── 1211060at2.faa.gz +│ │ │ ├── 1211060at2.fna.gz +│ │ │ ├── 1456375at2.faa.gz +│ │ │ ├── 1456375at2.fna.gz +│ │ │ ├── 1505038at2.faa.gz +│ │ │ ├── 1505038at2.fna.gz +│ │ │ ├── 1567535at2.faa.gz +│ │ │ ├── 1567535at2.fna.gz +│ │ │ ├── 1666043at2.faa.gz +│ │ │ ├── 1666043at2.fna.gz +│ │ │ ├── 1692188at2.faa.gz +│ │ │ ├── 1692188at2.fna.gz +│ │ │ ├── 1698718at2.faa.gz +│ │ │ ├── 1698718at2.fna.gz +│ │ │ ├── 1707228at2.faa.gz +│ │ │ ├── 1707228at2.fna.gz +│ │ │ ├── 1713391at2.faa.gz +│ │ │ ├── 1713391at2.fna.gz +│ │ │ ├── 1772647at2.faa.gz +│ │ │ ├── 1772647at2.fna.gz +│ │ │ ├── 1786618at2.faa.gz +│ │ │ ├── 1786618at2.fna.gz +│ │ │ ├── 1799923at2.faa.gz +│ │ │ ├── 1799923at2.fna.gz +│ │ │ ├── 1838961at2.faa.gz +│ │ │ ├── 1838961at2.fna.gz +│ │ │ ├── 1893906at2.faa.gz +│ │ │ ├── 1893906at2.fna.gz +│ │ │ ├── 1904463at2.faa.gz +│ │ │ ├── 1904463at2.fna.gz +│ │ │ ├── 1963491at2.faa.gz +│ │ │ ├── 1963491at2.fna.gz +│ │ │ ├── 1978865at2.faa.gz +│ │ │ ├── 1978865at2.fna.gz +│ │ │ ├── 2005443at2.faa.gz +│ │ │ ├── 2005443at2.fna.gz +│ │ │ ├── 2012682at2.faa.gz +│ │ │ ├── 2012682at2.fna.gz +│ │ │ ├── 2035880at2.faa.gz +│ │ │ ├── 2035880at2.fna.gz +│ │ │ ├── 2040741at2.faa.gz +│ │ │ ├── 2040741at2.fna.gz +│ │ │ ├── 2063644at2.faa.gz +│ │ │ ├── 2063644at2.fna.gz +│ │ │ ├── 353391at2.faa.gz +│ │ │ ├── 353391at2.fna.gz +│ │ │ ├── 430176at2.faa.gz +│ │ │ ├── 430176at2.fna.gz +│ │ │ ├── 662686at2.faa.gz +│ │ │ ├── 662686at2.fna.gz +│ │ │ ├── 665824at2.faa.gz +│ │ │ ├── 665824at2.fna.gz +│ │ │ ├── 761140at2.faa.gz +│ │ │ ├── 761140at2.fna.gz +│ │ │ ├── 776861at2.faa.gz +│ │ │ ├── 776861at2.fna.gz +│ │ │ ├── 961486at2.faa.gz +│ │ │ └── 961486at2.fna.gz +│ │ ├── full_table.tsv +│ │ ├── hmmer_output +│ │ │ ├── 1009041at2.out.gz +│ │ │ ├── 1024388at2.out.gz +│ │ │ ├── 1036075at2.out.gz +│ │ │ ├── 1043239at2.out.gz +│ │ │ ├── 1049662at2.out.gz +│ │ │ ├── 1054741at2.out.gz +│ │ │ ├── 1069591at2.out.gz +│ │ │ ├── 1074831at2.out.gz +│ │ │ ├── 1080436at2.out.gz +│ │ │ ├── 1093223at2.out.gz +│ │ │ ├── 1132353at2.out.gz +│ │ │ ├── 1151822at2.out.gz +│ │ │ ├── 1166299at2.out.gz +│ │ │ ├── 1211060at2.out.gz +│ │ │ ├── 1257362at2.out.gz +│ │ │ ├── 1266295at2.out.gz +│ │ │ ├── 1270636at2.out.gz +│ │ │ ├── 1272633at2.out.gz +│ │ │ ├── 1346419at2.out.gz +│ │ │ ├── 1395197at2.out.gz +│ │ │ ├── 1398618at2.out.gz +│ │ │ ├── 1419877at2.out.gz +│ │ │ ├── 143460at2.out.gz +│ │ │ ├── 1456375at2.out.gz +│ │ │ ├── 1470978at2.out.gz +│ │ │ ├── 1490892at2.out.gz +│ │ │ ├── 1491686at2.out.gz +│ │ │ ├── 1497415at2.out.gz +│ │ │ ├── 1502854at2.out.gz +│ │ │ ├── 1504821at2.out.gz +│ │ │ ├── 1505038at2.out.gz +│ │ │ ├── 1540940at2.out.gz +│ │ │ ├── 1567535at2.out.gz +│ │ │ ├── 1572673at2.out.gz +│ │ │ ├── 1574817at2.out.gz +│ │ │ ├── 1590629at2.out.gz +│ │ │ ├── 1592033at2.out.gz +│ │ │ ├── 1595498at2.out.gz +│ │ │ ├── 1623045at2.out.gz +│ │ │ ├── 1661836at2.out.gz +│ │ │ ├── 1666043at2.out.gz +│ │ │ ├── 1671455at2.out.gz +│ │ │ ├── 1674344at2.out.gz +│ │ │ ├── 1676462at2.out.gz +│ │ │ ├── 1692188at2.out.gz +│ │ │ ├── 1698718at2.out.gz +│ │ │ ├── 1701531at2.out.gz +│ │ │ ├── 1702697at2.out.gz +│ │ │ ├── 1707228at2.out.gz +│ │ │ ├── 1713391at2.out.gz +│ │ │ ├── 1720952at2.out.gz +│ │ │ ├── 1758685at2.out.gz +│ │ │ ├── 1760144at2.out.gz +│ │ │ ├── 1766414at2.out.gz +│ │ │ ├── 1772647at2.out.gz +│ │ │ ├── 1776954at2.out.gz +│ │ │ ├── 1786618at2.out.gz +│ │ │ ├── 1799923at2.out.gz +│ │ │ ├── 182107at2.out.gz +│ │ │ ├── 1822215at2.out.gz +│ │ │ ├── 1822695at2.out.gz +│ │ │ ├── 1827295at2.out.gz +│ │ │ ├── 1827334at2.out.gz +│ │ │ ├── 1830156at2.out.gz +│ │ │ ├── 1838961at2.out.gz +│ │ │ ├── 1842956at2.out.gz +│ │ │ ├── 1844275at2.out.gz +│ │ │ ├── 1846503at2.out.gz +│ │ │ ├── 1874945at2.out.gz +│ │ │ ├── 1890943at2.out.gz +│ │ │ ├── 1893906at2.out.gz +│ │ │ ├── 1904463at2.out.gz +│ │ │ ├── 1906715at2.out.gz +│ │ │ ├── 1932144at2.out.gz +│ │ │ ├── 1937072at2.out.gz +│ │ │ ├── 1937493at2.out.gz +│ │ │ ├── 1940575at2.out.gz +│ │ │ ├── 1949059at2.out.gz +│ │ │ ├── 1959318at2.out.gz +│ │ │ ├── 1963491at2.out.gz +│ │ │ ├── 1971380at2.out.gz +│ │ │ ├── 1978865at2.out.gz +│ │ │ ├── 1990141at2.out.gz +│ │ │ ├── 1990650at2.out.gz +│ │ │ ├── 2005443at2.out.gz +│ │ │ ├── 2012682at2.out.gz +│ │ │ ├── 2035880at2.out.gz +│ │ │ ├── 2040741at2.out.gz +│ │ │ ├── 2046660at2.out.gz +│ │ │ ├── 2063644at2.out.gz +│ │ │ ├── 2066663at2.out.gz +│ │ │ ├── 2075502at2.out.gz +│ │ │ ├── 219876at2.out.gz +│ │ │ ├── 223233at2.out.gz +│ │ │ ├── 226836at2.out.gz +│ │ │ ├── 232152at2.out.gz +│ │ │ ├── 26038at2.out.gz +│ │ │ ├── 267682at2.out.gz +│ │ │ ├── 353391at2.out.gz +│ │ │ ├── 384865at2.out.gz +│ │ │ ├── 402899at2.out.gz +│ │ │ ├── 430176at2.out.gz +│ │ │ ├── 4421at2.out.gz +│ │ │ ├── 462069at2.out.gz +│ │ │ ├── 469058at2.out.gz +│ │ │ ├── 504464at2.out.gz +│ │ │ ├── 505485at2.out.gz +│ │ │ ├── 533698at2.out.gz +│ │ │ ├── 662686at2.out.gz +│ │ │ ├── 665824at2.out.gz +│ │ │ ├── 761140at2.out.gz +│ │ │ ├── 776861at2.out.gz +│ │ │ ├── 837522at2.out.gz +│ │ │ ├── 841869at2.out.gz +│ │ │ ├── 874197at2.out.gz +│ │ │ ├── 91428at2.out.gz +│ │ │ ├── 923547at2.out.gz +│ │ │ ├── 932854at2.out.gz +│ │ │ ├── 932993at2.out.gz +│ │ │ ├── 95696at2.out.gz +│ │ │ ├── 9601at2.out.gz +│ │ │ ├── 961486at2.out.gz +│ │ │ ├── 981870at2.out.gz +│ │ │ └── 984717at2.out.gz +│ │ ├── missing_busco_list.tsv +│ │ ├── short_summary.json +│ │ └── short_summary.txt +│ ├── short_summary.specific.bacteria_odb10..fna.json +│ └── short_summary.specific.bacteria_odb10..fna.txt +└── bactopia-runs + └── busco-bacteria_odb10- + ├── merged-results + │ ├── busco-bacteria_odb10.tsv + │ └── logs + │ └── busco-bacteria_odb10-concat + │ ├── nf.command.{begin,err,log,out,run,sh,trace} + │ └── versions.yml + └── nf-reports + ├── busco-dag.dot + ├── busco-report.html + └── busco-timeline.html +``` + +### Per-Sample Results + +| File | Description | +|------|-------------| +| `run_` | BUSCO analysis output directory for each lineage | +| `run_/full_table.tsv` | Complete results with scores and lengths of BUSCO matches | +| `run_/missing_busco_list.tsv` | List of missing BUSCO genes | +| `run_/short_summary.txt` | Summary of BUSCO assessment results | +| `run_/short_summary.json` | Summary of BUSCO assessment in JSON format | +| `*-summary.txt` | Per-sample BUSCO summary file | +| `*-summary.json` | Per-sample BUSCO summary in JSON format | + +### Merged Results + +| File | Description | +|------|-------------| +| `busco.tsv` | Merged TSV file containing BUSCO summaries from all samples | + + +### Audit Trail + +Below are files that can assist you in understanding which parameters and program versions were used. + +#### Logs + +Each process that is executed will have a folder named `logs`. In this folder are helpful +files for you to review if the need ever arises. + +| Extension | Description | +|--------------|-------------| +| .begin | An empty file used to designate the process started | +| .err | Contains STDERR outputs from the process | +| .log | Contains both STDERR and STDOUT outputs from the process | +| .out | Contains STDOUT outputs from the process | +| .run | The script Nextflow uses to stage/unstage files and queue processes based on given profile | +| .sh | The script executed by bash for the process | +| .trace | The Nextflow trace report for the process | +| versions.yml | A YAML formatted file with program versions | + +#### Nextflow Reports + +These Nextflow reports provide great a great summary of your run. These can be used to optimize +resource usage and estimate expected costs if using cloud platforms. + +| Filename | Description | +|----------|-------------| +| busco-dag.dot | The Nextflow [DAG visualization](https://docs.seqera.io/nextflow/reports#workflow-diagram) | +| busco-report.html | The Nextflow [Execution Report](https://docs.seqera.io/nextflow/reports#execution-report) | +| busco-timeline.html | The Nextflow [Timeline Report](https://docs.seqera.io/nextflow/reports#execution-timeline) | +| busco-trace.txt | The Nextflow [Trace](https://docs.seqera.io/nextflow/reports#trace-file) report | + +## Parameters + +### Required Parameters + +Define where the pipeline should find input data and save output data. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--bactopia` | string | | The path to bactopia results to use as inputs | + +### BUSCO Parameters + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--busco_lineage` | string | `bacteria_odb10` | Specify the name of the BUSCO lineage to be used | +| `--busco_evalue` | string | `1e-03` | E-value cutoff for BLAST searches. Allowed formats, 0.001 or 1e-03 | +| `--busco_limit` | integer | `3` | Total candidate regions to consider per BUSCO | + +
+Filtering Parameters + +Use these parameters to specify which samples to include or exclude. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--include` | string | | A text file containing sample names (one per line) to include from the analysis | +| `--exclude` | string | | A text file containing sample names (one per line) to exclude from the analysis | +
+ +
+Optional Parameters + +These optional parameters can be useful in certain settings. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--outdir` | string | `bactopia` | Base directory to write results to | +
+ +
+Nextflow Profile Parameters + +Parameters to fine-tune your Nextflow setup. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--datasets_cache` | string | `/.bactopia/datasets` | Directory where downloaded datasets should be stored. | +
+ +
+Helpful Parameters + +Uncommonly used parameters that might be useful. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--wf` | string | `bactopia` | Specify which workflow or Bactopia Tool to execute | +| `--list_wfs` | boolean | | List the available workflows and Bactopia Tools to use with '--wf' | +| `--help_all` | boolean | | An alias for --help --show_hidden_params | +| `--version` | boolean | | Display version text. | +
+ +## Composition + +This workflow uses the following subworkflows: + +- [busco](/developers/subworkflows/busco) - Assess genome assembly completeness using BUSCO. + +## Citations + +If you use this in your analysis, please cite the following. + +- [Bactopia](https://bactopia.github.io/) + Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) + +- [BUSCO](https://gitlab.com/ezlab/busco) + Manni M, Berkeley MR, Seppey M, Simão FA, Zdobnov EM [BUSCO Update: Novel and Streamlined Workflows along with Broader and Deeper Phylogenetic Coverage for Scoring of Eukaryotic, Prokaryotic, and Viral Genomes.](https://doi.org/10.1093/molbev/msab199) _Molecular Biology and Evolution_ 38(10), 4647-4654. (2021) + +- [csvtk](https://bioinf.shenwei.me/csvtk/) + Shen, W [csvtk: A cross-platform, efficient and practical CSV/TSV toolkit in Golang.](https://github.com/shenwei356/csvtk/) (GitHub) + +## Source + +[View source on GitHub](https://github.com/bactopia/bactopia/tree/main/workflows/bactopia-tools/busco) diff --git a/bactopia-tools/checkm.mdx b/bactopia-tools/checkm.mdx new file mode 100644 index 00000000..7713a400 --- /dev/null +++ b/bactopia-tools/checkm.mdx @@ -0,0 +1,244 @@ +--- +title: checkm +description: "Assessment of microbial genome assembly quality." +tags: + - assembly-quality + - microbial-genomes + - completeness + - contamination + - bactopia-tool +--- + +# checkm + +**Tags:** assembly-quality microbial-genomes completeness contamination bactopia-tool + +Assessment of microbial genome assembly quality. + +This Bactopia Tool uses [CheckM](https://github.com/Ecogenomics/CheckM) to assess the quality +of microbial genomes recovered from isolates, single cells, and metagenomes using +a set of lineage-specific marker genes. + +## Usage + +Bactopia CLI: + +```bash +bactopia --wf checkm \ + --bactopia /path/to/your/bactopia/results +``` + +Nextflow: + +```bash +nextflow run bactopia/bactopia/workflows/bactopia-tools/checkm/main.nf \ + --bactopia /path/to/your/bactopia/results +``` + +## Outputs + +### Expected Output Files + +``` + +├── +│ └── tools +│ └── checkm- +│ ├── .tsv +│ ├── logs +│ │ ├── checkm.log +│ │ ├── nf.command.{begin,err,log,out,run,sh,trace} +│ │ └── versions.yml +│ └── supplemental +│ ├── -genes.aln +│ ├── bins +│ │ └── +│ │ ├── genes.faa.gz +│ │ ├── genes.gff +│ │ ├── hmmer.analyze.txt.gz +│ │ └── hmmer.tree.txt +│ ├── lineage.ms +│ └── storage +│ ├── aai_qa +│ ├── bin_stats.analyze.tsv +│ ├── bin_stats.tree.tsv +│ ├── bin_stats_ext.tsv +│ ├── checkm_hmm_info.pkl.gz +│ ├── marker_gene_stats.tsv +│ ├── phylo_hmm_info.pkl.gz +│ └── tree +│ ├── PF00164.20.masked.faa.gz +│ ├── PF00177.16.masked.faa.gz +│ ├── PF00181.18.masked.faa.gz +│ ├── PF00189.15.masked.faa.gz +│ ├── PF00203.16.masked.faa.gz +│ ├── PF00237.14.masked.faa.gz +│ ├── PF00238.14.masked.faa.gz +│ ├── PF00252.13.masked.faa.gz +│ ├── PF00276.15.masked.faa.gz +│ ├── PF00281.14.masked.faa.gz +│ ├── PF00333.15.masked.faa.gz +│ ├── PF00366.15.masked.faa.gz +│ ├── PF00410.14.masked.faa.gz +│ ├── PF00411.14.masked.faa.gz +│ ├── PF00562.23.masked.faa.gz +│ ├── PF00623.15.masked.faa.gz +│ ├── PF00673.16.masked.faa.gz +│ ├── PF00831.18.masked.faa.gz +│ ├── PF00861.17.masked.faa.gz +│ ├── PF03719.10.masked.faa.gz +│ ├── PF03947.13.masked.faa.gz +│ ├── PF04560.15.masked.faa.gz +│ ├── PF04561.9.masked.faa.gz +│ ├── PF04565.11.masked.faa.gz +│ ├── PF04997.7.masked.faa.gz +│ ├── PF11987.3.masked.faa.gz +│ ├── concatenated.fasta.gz +│ ├── concatenated.pplacer.json +│ ├── concatenated.tre +│ └── pplacer.out +└── bactopia-runs + └── checkm- + ├── merged-results + │ ├── checkm.tsv + │ └── logs + │ └── checkm-concat + │ ├── nf.command.{begin,err,log,out,run,sh,trace} + │ └── versions.yml + └── nf-reports + ├── checkm-dag.dot + ├── checkm-report.html + └── checkm-timeline.html +``` + +### Quality Assessment + +| File | Description | +|------|-------------| +| `*.genes.aln` | Alignment of multi-copy genes and their AAI identity | +| `*.results.txt` | Final results of CheckM's lineage_wf | +| `lineage.ms` | Output file describing marker set for each bin | +| `bins/**` | Directory with inputs for processing by CheckM | +| `storage/**` | Directory with intermediate results from CheckM processing | + +### Merged Results + +| File | Description | +|------|-------------| +| `checkm.tsv` | Merged TSV file with CheckM results from all samples | + + +### Audit Trail + +Below are files that can assist you in understanding which parameters and program versions were used. + +#### Logs + +Each process that is executed will have a folder named `logs`. In this folder are helpful +files for you to review if the need ever arises. + +| Extension | Description | +|--------------|-------------| +| .begin | An empty file used to designate the process started | +| .err | Contains STDERR outputs from the process | +| .log | Contains both STDERR and STDOUT outputs from the process | +| .out | Contains STDOUT outputs from the process | +| .run | The script Nextflow uses to stage/unstage files and queue processes based on given profile | +| .sh | The script executed by bash for the process | +| .trace | The Nextflow trace report for the process | +| versions.yml | A YAML formatted file with program versions | + +#### Nextflow Reports + +These Nextflow reports provide great a great summary of your run. These can be used to optimize +resource usage and estimate expected costs if using cloud platforms. + +| Filename | Description | +|----------|-------------| +| checkm-dag.dot | The Nextflow [DAG visualization](https://docs.seqera.io/nextflow/reports#workflow-diagram) | +| checkm-report.html | The Nextflow [Execution Report](https://docs.seqera.io/nextflow/reports#execution-report) | +| checkm-timeline.html | The Nextflow [Timeline Report](https://docs.seqera.io/nextflow/reports#execution-timeline) | +| checkm-trace.txt | The Nextflow [Trace](https://docs.seqera.io/nextflow/reports#trace-file) report | + +## Parameters + +### Required Parameters + +Define where the pipeline should find input data and save output data. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--bactopia` | string | | The path to bactopia results to use as inputs | + +### CheckM Parameters + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--checkm_unique` | integer | `10` | Minimum number of unique phylogenetic markers required to use lineage-specific marker set. | +| `--checkm_multi` | integer | `10` | Maximum number of multi-copy phylogenetic markers before defaulting to domain-level marker set. | +| `--checkm_aai_strain` | number | `0.9` | AAI threshold used to identify strain heterogeneity | +| `--checkm_length` | number | `0.7` | Percent overlap between target and query | + +
+Filtering Parameters + +Use these parameters to specify which samples to include or exclude. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--include` | string | | A text file containing sample names (one per line) to include from the analysis | +| `--exclude` | string | | A text file containing sample names (one per line) to exclude from the analysis | +
+ +
+Optional Parameters + +These optional parameters can be useful in certain settings. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--outdir` | string | `bactopia` | Base directory to write results to | +
+ +
+Nextflow Profile Parameters + +Parameters to fine-tune your Nextflow setup. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--datasets_cache` | string | `/.bactopia/datasets` | Directory where downloaded datasets should be stored. | +
+ +
+Helpful Parameters + +Uncommonly used parameters that might be useful. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--wf` | string | `bactopia` | Specify which workflow or Bactopia Tool to execute | +| `--list_wfs` | boolean | | List the available workflows and Bactopia Tools to use with '--wf' | +| `--help_all` | boolean | | An alias for --help --show_hidden_params | +| `--version` | boolean | | Display version text. | +
+ +## Composition + +This workflow uses the following subworkflows: + +- [checkm](/developers/subworkflows/checkm) - Assess metagenome bin completeness using CheckM. + +## Citations + +If you use this in your analysis, please cite the following. + +- [Bactopia](https://bactopia.github.io/) + Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) + +- [CheckM](https://github.com/Ecogenomics/CheckM) + Parks DH, Imelfort M, Skennerton CT, Hugenholtz P, Tyson GW [CheckM: assessing the quality of microbial genomes recovered from isolates, single cells, and metagenomes.](http://dx.doi.org/10.1101/gr.186072.114) _Genome Res_ 25, 1043-1055 (2015) + +## Source + +[View source on GitHub](https://github.com/bactopia/bactopia/tree/main/workflows/bactopia-tools/checkm) diff --git a/bactopia-tools/checkm2.mdx b/bactopia-tools/checkm2.mdx new file mode 100644 index 00000000..7995c8fd --- /dev/null +++ b/bactopia-tools/checkm2.mdx @@ -0,0 +1,208 @@ +--- +title: checkm2 +description: "Machine learning-based assessment of microbial genome assembly quality." +tags: + - assembly-quality + - microbial-genomes + - machine-learning + - completeness + - bactopia-tool +--- + +# checkm2 + +**Tags:** assembly-quality microbial-genomes machine-learning completeness bactopia-tool + +Machine learning-based assessment of microbial genome assembly quality. + +This Bactopia Tool uses [CheckM2](https://github.com/chklovski/CheckM2) to assess the quality +of microbial genomes recovered from isolates, single cells, and metagenomes using +advanced machine learning approaches. + +## Usage + +Bactopia CLI: + +```bash +bactopia --wf checkm2 \ + --bactopia /path/to/your/bactopia/results +``` + +Nextflow: + +```bash +nextflow run bactopia/bactopia/workflows/bactopia-tools/checkm2/main.nf \ + --bactopia /path/to/your/bactopia/results +``` + +## Outputs + +### Expected Output Files + +``` + +├── +│ └── tools +│ └── checkm2- +│ ├── .tsv +│ ├── logs +│ │ ├── checkm2.log +│ │ ├── nf.command.{begin,err,log,out,run,sh,trace} +│ │ └── versions.yml +│ └── supplemental +│ ├── diamond_output +│ │ └── DIAMOND_RESULTS.tsv +│ └── protein_files +│ └── .faa.gz +└── bactopia-runs + └── checkm2- + ├── merged-results + │ ├── checkm2.tsv + │ └── logs + │ └── checkm2-concat + │ ├── nf.command.{begin,err,log,out,run,sh,trace} + │ └── versions.yml + └── nf-reports + ├── checkm2-dag.dot + ├── checkm2-report.html + └── checkm2-timeline.html +``` + +### Quality Assessment + +| File | Description | +|------|-------------| +| `diamond_output/**` | Directory with intermediate results from CheckM2 processing | +| `protein_files/**` | Directory containing protein files used for analysis | +| `quality_report.tsv` | Output file with completeness statistics | + +### Merged Results + +| File | Description | +|------|-------------| +| `checkm2.tsv` | Merged TSV file with CheckM2 results from all samples | + + +### Audit Trail + +Below are files that can assist you in understanding which parameters and program versions were used. + +#### Logs + +Each process that is executed will have a folder named `logs`. In this folder are helpful +files for you to review if the need ever arises. + +| Extension | Description | +|--------------|-------------| +| .begin | An empty file used to designate the process started | +| .err | Contains STDERR outputs from the process | +| .log | Contains both STDERR and STDOUT outputs from the process | +| .out | Contains STDOUT outputs from the process | +| .run | The script Nextflow uses to stage/unstage files and queue processes based on given profile | +| .sh | The script executed by bash for the process | +| .trace | The Nextflow trace report for the process | +| versions.yml | A YAML formatted file with program versions | + +#### Nextflow Reports + +These Nextflow reports provide great a great summary of your run. These can be used to optimize +resource usage and estimate expected costs if using cloud platforms. + +| Filename | Description | +|----------|-------------| +| checkm2-dag.dot | The Nextflow [DAG visualization](https://docs.seqera.io/nextflow/reports#workflow-diagram) | +| checkm2-report.html | The Nextflow [Execution Report](https://docs.seqera.io/nextflow/reports#execution-report) | +| checkm2-timeline.html | The Nextflow [Timeline Report](https://docs.seqera.io/nextflow/reports#execution-timeline) | +| checkm2-trace.txt | The Nextflow [Trace](https://docs.seqera.io/nextflow/reports#trace-file) report | + +## Parameters + +### Required Parameters + +Define where the pipeline should find input data and save output data. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--bactopia` | string | | The path to bactopia results to use as inputs | + +### CheckM2 Database Download Parameters + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--checkm2_db` | string | | Path to a folder containing CheckM2 database (or where it should be downloaded to). | +| `--download_checkm2` | boolean | `false` | Download the CheckM2 database to the path given by --checkm2_db | + +### CheckM2 Parameters + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--checkm2_lowmem` | boolean | | Low memory mode. Reduces DIAMOND blocksize to significantly reduce RAM usage at the expense of longer runtime | +| `--checkm2_general` | boolean | | Force the use of the general quality prediction model (gradient boost) | +| `--checkm2_specific` | boolean | | Force the use of the specific quality prediction model (neural network) | +| `--checkm2_allmodels` | boolean | | Output quality prediction for both models for each genome. | +| `--checkm2_genes` | boolean | | Treat input files as protein files. [Default: False] | +| `--checkm2_opts` | string | | Additional options to pass to CheckM2 | + +
+Filtering Parameters + +Use these parameters to specify which samples to include or exclude. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--include` | string | | A text file containing sample names (one per line) to include from the analysis | +| `--exclude` | string | | A text file containing sample names (one per line) to exclude from the analysis | +
+ +
+Optional Parameters + +These optional parameters can be useful in certain settings. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--outdir` | string | `bactopia` | Base directory to write results to | +
+ +
+Nextflow Profile Parameters + +Parameters to fine-tune your Nextflow setup. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--datasets_cache` | string | `/.bactopia/datasets` | Directory where downloaded datasets should be stored. | +
+ +
+Helpful Parameters + +Uncommonly used parameters that might be useful. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--wf` | string | `bactopia` | Specify which workflow or Bactopia Tool to execute | +| `--list_wfs` | boolean | | List the available workflows and Bactopia Tools to use with '--wf' | +| `--help_all` | boolean | | An alias for --help --show_hidden_params | +| `--version` | boolean | | Display version text. | +
+ +## Composition + +This workflow uses the following subworkflows: + +- [checkm2](/developers/subworkflows/checkm2) - Assess metagenome bin completeness using CheckM2. + +## Citations + +If you use this in your analysis, please cite the following. + +- [Bactopia](https://bactopia.github.io/) + Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) + +- [CheckM2](https://github.com/chklovski/CheckM2) + Chklovksi A [Rapid assessment of genome bin quality using machine learning](https://github.com/chklovski/CheckM2) (GitHub) + +## Source + +[View source on GitHub](https://github.com/bactopia/bactopia/tree/main/workflows/bactopia-tools/checkm2) diff --git a/bactopia-tools/clermontyping.mdx b/bactopia-tools/clermontyping.mdx new file mode 100644 index 00000000..d4d891ad --- /dev/null +++ b/bactopia-tools/clermontyping.mdx @@ -0,0 +1,196 @@ +--- +title: clermontyping +description: "In silico phylotyping of Escherichia genus." +tags: + - e-coli + - phylotyping + - phylogroups + - clermontyping + - bactopia-tool +--- + +# clermontyping + +**Tags:** e-coli phylotyping phylogroups clermontyping bactopia-tool + +In silico phylotyping of Escherichia genus. + +This Bactopia Tool uses [ClermonTyping](https://github.com/happykhan/ClermonTyping) +to conduct _in silico_ prediction of phylotype for _Escherichia_ genomes. It uses +genome assemblies to assign them to _E. albertii_, _E. fergusonii_, _Escherichia_ +clades I–V, _E. coli sensu stricto_ as well as to the main _E. coli_ phylogroups. + +## Usage + +Bactopia CLI: + +```bash +bactopia --wf clermontyping \ + --bactopia /path/to/your/bactopia/results +``` + +Nextflow: + +```bash +nextflow run bactopia/bactopia/workflows/bactopia-tools/clermontyping/main.nf \ + --bactopia /path/to/your/bactopia/results +``` + +## Outputs + +### Expected Output Files + +``` + +├── +│ └── tools +│ └── clermontyping- +│ ├── .tsv +│ ├── logs +│ │ ├── nf.command.{begin,err,log,out,run,sh,trace} +│ │ └── versions.yml +│ └── supplemental +│ ├── .blast.xml +│ ├── .html +│ └── .mash.tsv +└── bactopia-runs + └── clermontyping- + ├── merged-results + │ ├── clermontyping.tsv + │ └── logs + │ └── clermontyping-concat + │ ├── nf.command.{begin,err,log,out,run,sh,trace} + │ └── versions.yml + └── nf-reports + ├── clermontyping-dag.dot + ├── clermontyping-report.html + └── clermontyping-timeline.html +``` + +### Per-Sample Results + +| File | Description | +|------|-------------| +| `*.blast.xml` | BLAST XML file with ClermonTyping analysis results | +| `*.html` | HTML file with ClermonTyping analysis results | +| `*.mash.tsv` | TSV file with Mash distances | +| `*.phylogroups.txt` | TSV file with final phylogroup assignments | + +### Merged Results + +| File | Description | +|------|-------------| +| `clermontyping.tsv` | Merged TSV file with ClermonTyping results from all samples | + + +### Audit Trail + +Below are files that can assist you in understanding which parameters and program versions were used. + +#### Logs + +Each process that is executed will have a folder named `logs`. In this folder are helpful +files for you to review if the need ever arises. + +| Extension | Description | +|--------------|-------------| +| .begin | An empty file used to designate the process started | +| .err | Contains STDERR outputs from the process | +| .log | Contains both STDERR and STDOUT outputs from the process | +| .out | Contains STDOUT outputs from the process | +| .run | The script Nextflow uses to stage/unstage files and queue processes based on given profile | +| .sh | The script executed by bash for the process | +| .trace | The Nextflow trace report for the process | +| versions.yml | A YAML formatted file with program versions | + +#### Nextflow Reports + +These Nextflow reports provide great a great summary of your run. These can be used to optimize +resource usage and estimate expected costs if using cloud platforms. + +| Filename | Description | +|----------|-------------| +| clermontyping-dag.dot | The Nextflow [DAG visualization](https://docs.seqera.io/nextflow/reports#workflow-diagram) | +| clermontyping-report.html | The Nextflow [Execution Report](https://docs.seqera.io/nextflow/reports#execution-report) | +| clermontyping-timeline.html | The Nextflow [Timeline Report](https://docs.seqera.io/nextflow/reports#execution-timeline) | +| clermontyping-trace.txt | The Nextflow [Trace](https://docs.seqera.io/nextflow/reports#trace-file) report | + +## Parameters + +### Required Parameters + +Define where the pipeline should find input data and save output data. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--bactopia` | string | | The path to bactopia results to use as inputs | + +### ClermonTyping Parameters + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--clermontyping_threshold` | integer | `0` | Do not use contigs under this size | + +
+Filtering Parameters + +Use these parameters to specify which samples to include or exclude. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--include` | string | | A text file containing sample names (one per line) to include from the analysis | +| `--exclude` | string | | A text file containing sample names (one per line) to exclude from the analysis | +
+ +
+Optional Parameters + +These optional parameters can be useful in certain settings. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--outdir` | string | `bactopia` | Base directory to write results to | +
+ +
+Nextflow Profile Parameters + +Parameters to fine-tune your Nextflow setup. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--datasets_cache` | string | `/.bactopia/datasets` | Directory where downloaded datasets should be stored. | +
+ +
+Helpful Parameters + +Uncommonly used parameters that might be useful. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--wf` | string | `bactopia` | Specify which workflow or Bactopia Tool to execute | +| `--list_wfs` | boolean | | List the available workflows and Bactopia Tools to use with '--wf' | +| `--help_all` | boolean | | An alias for --help --show_hidden_params | +| `--version` | boolean | | Display version text. | +
+ +## Composition + +This workflow uses the following subworkflows: + +- [clermontyping](/developers/subworkflows/clermontyping) - Predict phylogroups of Escherichia coli from genome assemblies. + +## Citations + +If you use this in your analysis, please cite the following. + +- [Bactopia](https://bactopia.github.io/) + Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) + +- [ClermontTyping](https://github.com/happykhan/ClermonTyping) + Beghain J, Bridier-Nahmias A, Le Nagard H, Denamur E, Clermont O. [ClermonTyping: an easy-to-use and accurate in silico method for Escherichia genus strain phylotyping.](https://doi.org/10.1099/mgen.0.000192) Microbial Genomics, 4(7), e000192. (2018) + +## Source + +[View source on GitHub](https://github.com/bactopia/bactopia/tree/main/workflows/bactopia-tools/clermontyping) diff --git a/bactopia-tools/defensefinder.mdx b/bactopia-tools/defensefinder.mdx new file mode 100644 index 00000000..91fc884b --- /dev/null +++ b/bactopia-tools/defensefinder.mdx @@ -0,0 +1,206 @@ +--- +title: defensefinder +description: "Systematic identification of anti-phage defense systems." +tags: + - anti-phage + - defense-systems + - hmm + - protein-domains + - bactopia-tool +--- + +# defensefinder + +**Tags:** anti-phage defense-systems hmm protein-domains bactopia-tool + +Systematic identification of anti-phage defense systems. + +This Bactopia Tool uses [DefenseFinder](https://github.com/mdmparis/defense-finder) +to systematically search for and identify all known anti-phage defense systems +in bacterial genomes using HMM-based protein domain detection. + +## Usage + +Bactopia CLI: + +```bash +bactopia --wf defensefinder \ + --bactopia /path/to/your/bactopia/results +``` + +Nextflow: + +```bash +nextflow run bactopia/bactopia/workflows/bactopia-tools/defensefinder/main.nf \ + --bactopia /path/to/your/bactopia/results +``` + +## Outputs + +### Expected Output Files + +``` + +├── +│ └── tools +│ └── defensefinder- +│ ├── _defense_finder_genes.tsv +│ ├── _defense_finder_hmmer.tsv +│ ├── _defense_finder_systems.tsv +│ └── logs +│ ├── nf.command.{begin,err,log,out,run,sh,trace} +│ └── versions.yml +└── bactopia-runs + └── defensefinder- + ├── merged-results + │ ├── defensefinder-genes.tsv + │ ├── defensefinder-hmmer.tsv + │ ├── defensefinder-systems.tsv + │ └── logs + │ ├── defensefinder-genes-concat + │ │ ├── nf.command.{begin,err,log,out,run,sh,trace} + │ │ └── versions.yml + │ ├── defensefinder-hmmer-concat + │ │ ├── nf.command.{begin,err,log,out,run,sh,trace} + │ │ └── versions.yml + │ └── defensefinder-systems-concat + │ ├── nf.command.{begin,err,log,out,run,sh,trace} + │ └── versions.yml + └── nf-reports + ├── defensefinder-dag.dot + ├── defensefinder-report.html + └── defensefinder-timeline.html +``` + +### Per-Sample Results + +| File | Description | +|------|-------------| +| `*.prt` | FASTA file containing all proteins found in defense systems | +| `*.prt.idx` | Index file for the proteins file | +| `*defense_finder_genes.tsv` | TSV file with each gene found in defense systems | +| `*defense_finder_hmmer.tsv` | TSV file with each HMM hit | +| `*defense_finder_systems.tsv` | TSV file with information about each system found | +| `*.macsydata.tar.gz` | Raw MACSyFinder output file (requires --defensefinder_preserveraw) | + +### Merged Results + +| File | Description | +|------|-------------| +| `defensefinder-genes.tsv` | Merged TSV of all genes found in defense systems | +| `defensefinder-hmmer.tsv` | Merged TSV of all HMM hits | +| `defensefinder-systems.tsv` | Merged TSV of all information about systems found | + + +### Audit Trail + +Below are files that can assist you in understanding which parameters and program versions were used. + +#### Logs + +Each process that is executed will have a folder named `logs`. In this folder are helpful +files for you to review if the need ever arises. + +| Extension | Description | +|--------------|-------------| +| .begin | An empty file used to designate the process started | +| .err | Contains STDERR outputs from the process | +| .log | Contains both STDERR and STDOUT outputs from the process | +| .out | Contains STDOUT outputs from the process | +| .run | The script Nextflow uses to stage/unstage files and queue processes based on given profile | +| .sh | The script executed by bash for the process | +| .trace | The Nextflow trace report for the process | +| versions.yml | A YAML formatted file with program versions | + +#### Nextflow Reports + +These Nextflow reports provide great a great summary of your run. These can be used to optimize +resource usage and estimate expected costs if using cloud platforms. + +| Filename | Description | +|----------|-------------| +| defensefinder-dag.dot | The Nextflow [DAG visualization](https://docs.seqera.io/nextflow/reports#workflow-diagram) | +| defensefinder-report.html | The Nextflow [Execution Report](https://docs.seqera.io/nextflow/reports#execution-report) | +| defensefinder-timeline.html | The Nextflow [Timeline Report](https://docs.seqera.io/nextflow/reports#execution-timeline) | +| defensefinder-trace.txt | The Nextflow [Trace](https://docs.seqera.io/nextflow/reports#trace-file) report | + +## Parameters + +### Required Parameters + +Define where the pipeline should find input data and save output data. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--bactopia` | string | | The path to bactopia results to use as inputs | + +### defense-finder Parameters + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--defensefinder_coverage` | number | `0.4` | Minimal percentage of coverage for each profiles | +| `--defensefinder_dbtype` | string | `ordered_replicon` | The macsyfinder --db-type option (choices: `ordered_replicon`, `gembase`, `unordered`) | + +
+Filtering Parameters + +Use these parameters to specify which samples to include or exclude. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--include` | string | | A text file containing sample names (one per line) to include from the analysis | +| `--exclude` | string | | A text file containing sample names (one per line) to exclude from the analysis | +
+ +
+Optional Parameters + +These optional parameters can be useful in certain settings. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--outdir` | string | `bactopia` | Base directory to write results to | +
+ +
+Nextflow Profile Parameters + +Parameters to fine-tune your Nextflow setup. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--datasets_cache` | string | `/.bactopia/datasets` | Directory where downloaded datasets should be stored. | +
+ +
+Helpful Parameters + +Uncommonly used parameters that might be useful. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--wf` | string | `bactopia` | Specify which workflow or Bactopia Tool to execute | +| `--list_wfs` | boolean | | List the available workflows and Bactopia Tools to use with '--wf' | +| `--help_all` | boolean | | An alias for --help --show_hidden_params | +| `--version` | boolean | | Display version text. | +
+ +## Composition + +This workflow uses the following subworkflows: + +- [defensefinder](/developers/subworkflows/defensefinder) - Systematically search for anti-phage defense systems. + +## Citations + +If you use this in your analysis, please cite the following. + +- [Bactopia](https://bactopia.github.io/) + Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) + +- [DefenseFinder](https://github.com/mdmparis/defense-finder) + Tesson F, Hervé A, Mordret E, Touchon M, d'Humières C, Cury J, Bernheim A [Systematic and quantitative view of the antiviral arsenal of prokaryotes.](https://doi.org/10.1038/s41467-022-30269-9) Nature Communications, 13(1), 2561. (2022) + +## Source + +[View source on GitHub](https://github.com/bactopia/bactopia/tree/main/workflows/bactopia-tools/defensefinder) diff --git a/bactopia-tools/ectyper.mdx b/bactopia-tools/ectyper.mdx new file mode 100644 index 00000000..a6f66325 --- /dev/null +++ b/bactopia-tools/ectyper.mdx @@ -0,0 +1,193 @@ +--- +title: ectyper +description: "In silico prediction of Escherichia coli serotype." +tags: + - escherichia-coli + - serotyping + - fasta + - bactopia-tool +--- + +# ectyper + +**Tags:** escherichia-coli serotyping fasta bactopia-tool + +In silico prediction of Escherichia coli serotype. + +This Bactopia Tool uses [ECTyper](https://github.com/phac-nml/ecoli_serotyping) to conduct +_in silico_ prediction of serotype for _Escherichia coli_ genomes. It uses the genome assemblies +to provide basic species identification and the predicted _E. coli_ serotype (e.g. O174:H21). + +## Usage + +Bactopia CLI: + +```bash +bactopia --wf ectyper \ + --bactopia /path/to/your/bactopia/results +``` + +Nextflow: + +```bash +nextflow run bactopia/bactopia/workflows/bactopia-tools/ectyper/main.nf \ + --bactopia /path/to/your/bactopia/results +``` + +## Outputs + +### Expected Output Files + +``` + +├── +│ └── tools +│ └── ectyper- +│ ├── .blast_alleles.txt +│ ├── .tsv +│ └── logs +│ ├── ectyper.log +│ ├── nf.command.{begin,err,log,out,run,sh,trace} +│ └── versions.yml +└── bactopia-runs + └── ectyper- + ├── merged-results + │ ├── ectyper.tsv + │ └── logs + │ └── ectyper-concat + │ ├── nf.command.{begin,err,log,out,run,sh,trace} + │ └── versions.yml + └── nf-reports + ├── ectyper-dag.dot + ├── ectyper-report.html + └── ectyper-timeline.html +``` + +### Per-Sample Results + +| File | Description | +|------|-------------| +| `*.tsv` | Tab-delimited file with ECTyper result | +| `blast_output_alleles.txt` | Allele report generated from BLAST results | + +### Merged Results + +| File | Description | +|------|-------------| +| `ectyper.tsv` | Merged TSV file containing ECTyper results from all samples | + + +### Audit Trail + +Below are files that can assist you in understanding which parameters and program versions were used. + +#### Logs + +Each process that is executed will have a folder named `logs`. In this folder are helpful +files for you to review if the need ever arises. + +| Extension | Description | +|--------------|-------------| +| .begin | An empty file used to designate the process started | +| .err | Contains STDERR outputs from the process | +| .log | Contains both STDERR and STDOUT outputs from the process | +| .out | Contains STDOUT outputs from the process | +| .run | The script Nextflow uses to stage/unstage files and queue processes based on given profile | +| .sh | The script executed by bash for the process | +| .trace | The Nextflow trace report for the process | +| versions.yml | A YAML formatted file with program versions | + +#### Nextflow Reports + +These Nextflow reports provide great a great summary of your run. These can be used to optimize +resource usage and estimate expected costs if using cloud platforms. + +| Filename | Description | +|----------|-------------| +| ectyper-dag.dot | The Nextflow [DAG visualization](https://docs.seqera.io/nextflow/reports#workflow-diagram) | +| ectyper-report.html | The Nextflow [Execution Report](https://docs.seqera.io/nextflow/reports#execution-report) | +| ectyper-timeline.html | The Nextflow [Timeline Report](https://docs.seqera.io/nextflow/reports#execution-timeline) | +| ectyper-trace.txt | The Nextflow [Trace](https://docs.seqera.io/nextflow/reports#trace-file) report | + +## Parameters + +### Required Parameters + +Define where the pipeline should find input data and save output data. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--bactopia` | string | | The path to bactopia results to use as inputs | + +### ECTyper Parameters + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--ectyper_opid` | integer | `90` | Percent identity required for an O antigen allele match | +| `--ectyper_opcov` | integer | `90` | Minimum percent coverage required for an O antigen allele match | +| `--ectyper_hpid` | integer | `95` | Percent identity required for an H antigen allele match | +| `--ectyper_hpcov` | integer | `50` | Minimum percent coverage required for an H antigen allele match | + +
+Filtering Parameters + +Use these parameters to specify which samples to include or exclude. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--include` | string | | A text file containing sample names (one per line) to include from the analysis | +| `--exclude` | string | | A text file containing sample names (one per line) to exclude from the analysis | +
+ +
+Optional Parameters + +These optional parameters can be useful in certain settings. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--outdir` | string | `bactopia` | Base directory to write results to | +
+ +
+Nextflow Profile Parameters + +Parameters to fine-tune your Nextflow setup. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--datasets_cache` | string | `/.bactopia/datasets` | Directory where downloaded datasets should be stored. | +
+ +
+Helpful Parameters + +Uncommonly used parameters that might be useful. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--wf` | string | `bactopia` | Specify which workflow or Bactopia Tool to execute | +| `--list_wfs` | boolean | | List the available workflows and Bactopia Tools to use with '--wf' | +| `--help_all` | boolean | | An alias for --help --show_hidden_params | +| `--version` | boolean | | Display version text. | +
+ +## Composition + +This workflow uses the following subworkflows: + +- [ectyper](/developers/subworkflows/ectyper) - In silico prediction of Escherichia coli serotype. + +## Citations + +If you use this in your analysis, please cite the following. + +- [Bactopia](https://bactopia.github.io/) + Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) + +- [ECTyper](https://github.com/phac-nml/ecoli_serotyping) + Laing C, Bessonov K, Sung S, La Rose C [ECTyper - In silico prediction of _Escherichia coli_ serotype](https://github.com/phac-nml/ecoli_serotyping) (GitHub) + +## Source + +[View source on GitHub](https://github.com/bactopia/bactopia/tree/main/workflows/bactopia-tools/ectyper) diff --git a/bactopia-tools/eggnog.mdx b/bactopia-tools/eggnog.mdx new file mode 100644 index 00000000..f202a118 --- /dev/null +++ b/bactopia-tools/eggnog.mdx @@ -0,0 +1,203 @@ +--- +title: eggnog +description: "Functional annotation of proteins using orthologous groups and phylogenies." +tags: + - functional-annotation + - orthology + - proteins + - eggnog + - bactopia-tool +--- + +# eggnog + +**Tags:** functional-annotation orthology proteins eggnog bactopia-tool + +Functional annotation of proteins using orthologous groups and phylogenies. + +This Bactopia Tool uses [eggNOG-mapper](https://github.com/eggnogdb/eggnog-mapper) to assign +functional annotation to protein sequences. eggNOG-mapper uses orthologous groups and phylogenies +from the eggNOG database to more precisely functionally annotate than traditional homology methods. + +## Usage + +Bactopia CLI: + +```bash +bactopia --wf eggnog \ + --bactopia /path/to/your/bactopia/results +``` + +Nextflow: + +```bash +nextflow run bactopia/bactopia/workflows/bactopia-tools/eggnog/main.nf \ + --bactopia /path/to/your/bactopia/results +``` + +## Outputs + +### Expected Output Files + +``` + +├── +│ └── tools +│ └── eggnog- +│ ├── .emapper.annotations +│ ├── .emapper.hits +│ ├── .emapper.seed_orthologs +│ └── logs +│ ├── nf.command.{begin,err,log,out,run,sh,trace} +│ └── versions.yml +└── bactopia-runs + └── eggnog- + └── nf-reports + ├── eggnog-dag.dot + ├── eggnog-report.html + └── eggnog-timeline.html +``` + +### Annotation + +| File | Description | +|------|-------------| +| `*.emapper.annotations` | Results from the annotation phase | +| `*.emapper.hits` | Results from the search phase (HMMER, Diamond or MMseqs2) | +| `*.emapper.seed_orthologs` | Results from parsing the hits | +| `*.emapper.annotations.xlsx` | Annotations in Excel format | +| `*.emapper.orthologs` | List of orthologs found for each query | +| `*.emapper.genepred.fasta` | Sequences of predicted CDS | +| `*.emapper.gff` | GFF of predicted CDS | +| `*.emapper.no_annotations.fasta` | Sequences without annotation | +| `*.emapper.pfam` | Positions of PFAM domains identified | + + +### Audit Trail + +Below are files that can assist you in understanding which parameters and program versions were used. + +#### Logs + +Each process that is executed will have a folder named `logs`. In this folder are helpful +files for you to review if the need ever arises. + +| Extension | Description | +|--------------|-------------| +| .begin | An empty file used to designate the process started | +| .err | Contains STDERR outputs from the process | +| .log | Contains both STDERR and STDOUT outputs from the process | +| .out | Contains STDOUT outputs from the process | +| .run | The script Nextflow uses to stage/unstage files and queue processes based on given profile | +| .sh | The script executed by bash for the process | +| .trace | The Nextflow trace report for the process | +| versions.yml | A YAML formatted file with program versions | + +#### Nextflow Reports + +These Nextflow reports provide great a great summary of your run. These can be used to optimize +resource usage and estimate expected costs if using cloud platforms. + +| Filename | Description | +|----------|-------------| +| eggnog-dag.dot | The Nextflow [DAG visualization](https://docs.seqera.io/nextflow/reports#workflow-diagram) | +| eggnog-report.html | The Nextflow [Execution Report](https://docs.seqera.io/nextflow/reports#execution-report) | +| eggnog-timeline.html | The Nextflow [Timeline Report](https://docs.seqera.io/nextflow/reports#execution-timeline) | +| eggnog-trace.txt | The Nextflow [Trace](https://docs.seqera.io/nextflow/reports#trace-file) report | + +## Parameters + +### Required Parameters + +Define where the pipeline should find input data and save output data. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--bactopia` | string | | The path to bactopia results to use as inputs | + +### eggNOG Downloader Parameters + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--eggnog_db` | string | | Tarball or path to eggNOG databases | +| `--download_eggnog` | boolean | `false` | Required if downloading latest eggNOG database, will overwrite existing databases. | +| `--eggnog_save_as_tarball` | string | | Save the eggNOG database as a single tarball | +| `--eggnog_skip_diamond` | boolean | `false` | Do not install the diamond database | +| `--eggnog_install_mmseq` | boolean | `false` | Install the MMseqs2 database | +| `--eggnog_install_pfam` | boolean | `false` | Install the Pfam database, required for de novo annotation or realignment | +| `--eggnog_install_hmm` | boolean | `false` | Install the HMMER database specified with --hmmer_taxid | +| `--eggnog_hmmer_taxid` | integer | `2` | Tax ID of eggNOG HMM database to download | + +### eggNOG Mapper Parameters + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--eggnog_genepred` | string | `search` | Method to use for gene prediction (choices: `search`, `prodigal`) | +| `--eggnog_mode` | string | `diamond` | Method to search against eggNOG sequences (choices: `diamond`, `hmmer`, `mmseqs`, `cache`, `no_search`) | + +
+Filtering Parameters + +Use these parameters to specify which samples to include or exclude. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--include` | string | | A text file containing sample names (one per line) to include from the analysis | +| `--exclude` | string | | A text file containing sample names (one per line) to exclude from the analysis | +
+ +
+Optional Parameters + +These optional parameters can be useful in certain settings. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--outdir` | string | `bactopia` | Base directory to write results to | +
+ +
+Nextflow Profile Parameters + +Parameters to fine-tune your Nextflow setup. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--datasets_cache` | string | `/.bactopia/datasets` | Directory where downloaded datasets should be stored. | +
+ +
+Helpful Parameters + +Uncommonly used parameters that might be useful. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--wf` | string | `bactopia` | Specify which workflow or Bactopia Tool to execute | +| `--list_wfs` | boolean | | List the available workflows and Bactopia Tools to use with '--wf' | +| `--help_all` | boolean | | An alias for --help --show_hidden_params | +| `--version` | boolean | | Display version text. | +
+ +## Composition + +This workflow uses the following subworkflows: + +- [eggnog](/developers/subworkflows/eggnog) - Functional annotation through orthology assignment. + +## Citations + +If you use this in your analysis, please cite the following. + +- [Bactopia](https://bactopia.github.io/) + Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) + +- [eggNOG-mapper](https://github.com/eggnogdb/eggnog-mapper) + Huerta-Cepas J, Forslund K, Coelho LP, Szklarczyk D, Jensen LJ, von Mering C, Bork P [Fast Genome-Wide Functional Annotation through Orthology Assignment by eggNOG-Mapper.](http://dx.doi.org/10.1093/molbev/msx148) _Mol. Biol. Evol._ 34, 2115-2122 (2017) + +- [eggNOG 5.0 Database](http://eggnog.embl.de/) + Huerta-Cepas J, Szklarczyk D, Heller D, Hernández-Plaza A, Forslund SK, Cook H, Mende DR, Letunic I, Rattei T, Jensen LJ, von Mering C, Bork P [eggNOG 5.0: a hierarchical, functionally and phylogenetically annotated orthology resource based on 5090 organisms and 2502 viruses.](https://doi.org/10.1093/nar/gky1085) _Nucleic Acids Res._ 47, D309-D314 (2019) + +## Source + +[View source on GitHub](https://github.com/bactopia/bactopia/tree/main/workflows/bactopia-tools/eggnog) diff --git a/bactopia-tools/emmtyper.mdx b/bactopia-tools/emmtyper.mdx new file mode 100644 index 00000000..5511b789 --- /dev/null +++ b/bactopia-tools/emmtyper.mdx @@ -0,0 +1,189 @@ +--- +title: emmtyper +description: "emm-typing of Streptococcus pyogenes assemblies." +tags: + - streptococcus-pyogenes + - emm-typing + - fasta + - bactopia-tool +--- + +# emmtyper + +**Tags:** streptococcus-pyogenes emm-typing fasta bactopia-tool + +emm-typing of Streptococcus pyogenes assemblies. + +This Bactopia Tool uses [emmtyper](https://github.com/MDU-PHL/emmtyper) for +emm-typing of _Streptococcus pyogenes_ using a de novo or complete assembly. + +## Usage + +Bactopia CLI: + +```bash +bactopia --wf emmtyper \ + --bactopia /path/to/your/bactopia/results +``` + +Nextflow: + +```bash +nextflow run bactopia/bactopia/workflows/bactopia-tools/emmtyper/main.nf \ + --bactopia /path/to/your/bactopia/results +``` + +## Outputs + +### Expected Output Files + +``` + +├── +│ └── tools +│ └── emmtyper- +│ ├── .tsv +│ └── logs +│ ├── nf.command.{begin,err,log,out,run,sh,trace} +│ └── versions.yml +└── bactopia-runs + └── emmtyper- + ├── merged-results + │ ├── emmtyper.tsv + │ └── logs + │ └── emmtyper-concat + │ ├── nf.command.{begin,err,log,out,run,sh,trace} + │ └── versions.yml + └── nf-reports + ├── emmtyper-dag.dot + ├── emmtyper-report.html + └── emmtyper-timeline.html +``` + +### Per-Sample Results + +| File | Description | +|------|-------------| +| `*.tsv` | Tab-delimited file with emmtyper result | + +### Merged Results + +| File | Description | +|------|-------------| +| `emmtyper.tsv` | Merged TSV file containing emmtyper results from all samples | + + +### Audit Trail + +Below are files that can assist you in understanding which parameters and program versions were used. + +#### Logs + +Each process that is executed will have a folder named `logs`. In this folder are helpful +files for you to review if the need ever arises. + +| Extension | Description | +|--------------|-------------| +| .begin | An empty file used to designate the process started | +| .err | Contains STDERR outputs from the process | +| .log | Contains both STDERR and STDOUT outputs from the process | +| .out | Contains STDOUT outputs from the process | +| .run | The script Nextflow uses to stage/unstage files and queue processes based on given profile | +| .sh | The script executed by bash for the process | +| .trace | The Nextflow trace report for the process | +| versions.yml | A YAML formatted file with program versions | + +#### Nextflow Reports + +These Nextflow reports provide great a great summary of your run. These can be used to optimize +resource usage and estimate expected costs if using cloud platforms. + +| Filename | Description | +|----------|-------------| +| emmtyper-dag.dot | The Nextflow [DAG visualization](https://docs.seqera.io/nextflow/reports#workflow-diagram) | +| emmtyper-report.html | The Nextflow [Execution Report](https://docs.seqera.io/nextflow/reports#execution-report) | +| emmtyper-timeline.html | The Nextflow [Timeline Report](https://docs.seqera.io/nextflow/reports#execution-timeline) | +| emmtyper-trace.txt | The Nextflow [Trace](https://docs.seqera.io/nextflow/reports#trace-file) report | + +## Parameters + +### Required Parameters + +Define where the pipeline should find input data and save output data. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--bactopia` | string | | The path to bactopia results to use as inputs | + +### emmtyper Parameters + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--emmtyper_wf` | string | `blast` | Workflow for emmtyper to use. (choices: `blast`, `pcr`) | +| `--emmtyper_blastdb` | string | | Path to custom EMM BLAST DB. | +| `--emmtyper_cluster_distance` | integer | `500` | Distance between cluster of matches to consider as different clusters | +| `--emmtyper_percid` | integer | `95` | Minimal percent identity of sequence | + +
+Filtering Parameters + +Use these parameters to specify which samples to include or exclude. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--include` | string | | A text file containing sample names (one per line) to include from the analysis | +| `--exclude` | string | | A text file containing sample names (one per line) to exclude from the analysis | +
+ +
+Optional Parameters + +These optional parameters can be useful in certain settings. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--outdir` | string | `bactopia` | Base directory to write results to | +
+ +
+Nextflow Profile Parameters + +Parameters to fine-tune your Nextflow setup. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--datasets_cache` | string | `/.bactopia/datasets` | Directory where downloaded datasets should be stored. | +
+ +
+Helpful Parameters + +Uncommonly used parameters that might be useful. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--wf` | string | `bactopia` | Specify which workflow or Bactopia Tool to execute | +| `--list_wfs` | boolean | | List the available workflows and Bactopia Tools to use with '--wf' | +| `--help_all` | boolean | | An alias for --help --show_hidden_params | +| `--version` | boolean | | Display version text. | +
+ +## Composition + +This workflow uses the following subworkflows: + +- [emmtyper](/developers/subworkflows/emmtyper) - Predict emm types of Streptococcus pyogenes from genome assemblies. + +## Citations + +If you use this in your analysis, please cite the following. + +- [Bactopia](https://bactopia.github.io/) + Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) + +- [emmtyper](https://github.com/MDU-PHL/emmtyper) + Tan A, Seemann T, Lacey D, Davies M, Mcintyre L, Frost H, Williamson D, Gonçalves da Silva A [emmtyper - emm Automatic Isolate Labeller](https://github.com/MDU-PHL/emmtyper) (GitHub) + +## Source + +[View source on GitHub](https://github.com/bactopia/bactopia/tree/main/workflows/bactopia-tools/emmtyper) diff --git a/bactopia-tools/fastani.mdx b/bactopia-tools/fastani.mdx new file mode 100644 index 00000000..abe10877 --- /dev/null +++ b/bactopia-tools/fastani.mdx @@ -0,0 +1,202 @@ +--- +title: fastani +description: "Fast alignment-free computation of whole-genome Average Nucleotide Identity." +tags: + - ani + - average-nucleotide-identity + - similarity + - comparative-genomics + - bactopia-tool +--- + +# fastani + +**Tags:** ani average-nucleotide-identity similarity comparative-genomics bactopia-tool + +Fast alignment-free computation of whole-genome Average Nucleotide Identity. + +This Bactopia Tool uses [FastANI](https://github.com/ParBLiSS/FastANI) to calculate the average +nucleotide identity (ANI) between samples. It can also calculate ANI against reference genomes +by downloading RefSeq assemblies using NCBI genome download. + +## Usage + +Bactopia CLI: + +```bash +bactopia --wf fastani \ + --bactopia /path/to/your/bactopia/results +``` + +Nextflow: + +```bash +nextflow run bactopia/bactopia/workflows/bactopia-tools/fastani/main.nf \ + --bactopia /path/to/your/bactopia/results +``` + +## Outputs + +### Expected Output Files + +``` + +└── + └── fastani- + ├── GCF_020736045.1_ASM2073604v1_genomic + │ ├── GCF_020736045.1_ASM2073604v1_genomic.tsv + │ └── logs + │ ├── nf.command.{begin,err,log,out,run,sh,trace} + │ └── versions.yml + ├── merged-results + │ ├── fastani.tsv + │ └── logs + │ └── fastani-concat + │ ├── nf.command.{begin,err,log,out,run,sh,trace} + │ └── versions.yml + └── nf-reports + ├── fastani-dag.dot + ├── fastani-report.html + └── fastani-timeline.html +``` + +### Per-Sample Results + +| File | Description | +|------|-------------| +| `*.tsv` | FastANI results of samples against reference | + +### Merged Results + +| File | Description | +|------|-------------| +| `fastani.tsv` | Merged TSV file containing ANI results from all samples | + + +### Audit Trail + +Below are files that can assist you in understanding which parameters and program versions were used. + +#### Logs + +Each process that is executed will have a folder named `logs`. In this folder are helpful +files for you to review if the need ever arises. + +| Extension | Description | +|--------------|-------------| +| .begin | An empty file used to designate the process started | +| .err | Contains STDERR outputs from the process | +| .log | Contains both STDERR and STDOUT outputs from the process | +| .out | Contains STDOUT outputs from the process | +| .run | The script Nextflow uses to stage/unstage files and queue processes based on given profile | +| .sh | The script executed by bash for the process | +| .trace | The Nextflow trace report for the process | +| versions.yml | A YAML formatted file with program versions | + +#### Nextflow Reports + +These Nextflow reports provide great a great summary of your run. These can be used to optimize +resource usage and estimate expected costs if using cloud platforms. + +| Filename | Description | +|----------|-------------| +| fastani-dag.dot | The Nextflow [DAG visualization](https://docs.seqera.io/nextflow/reports#workflow-diagram) | +| fastani-report.html | The Nextflow [Execution Report](https://docs.seqera.io/nextflow/reports#execution-report) | +| fastani-timeline.html | The Nextflow [Timeline Report](https://docs.seqera.io/nextflow/reports#execution-timeline) | +| fastani-trace.txt | The Nextflow [Trace](https://docs.seqera.io/nextflow/reports#trace-file) report | + +## Parameters + +### Required Parameters + +Define where the pipeline should find input data and save output data. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--bactopia` | string | | The path to bactopia results to use as inputs | + +### fastANI Parameters + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--fastani_reference` | string | | Path to reference genome in FASTA format | +| `--fastani_kmer` | integer | `16` | kmer size (<= 16) for ANI calculation | +| `--fastani_min_fraction` | number | `0.2` | Minimum fraction of genome that must be shared for trusting ANI. | +| `--fastani_frag_len` | integer | `3000` | fragment length | +| `--fastani_skip_pairwise` | boolean | `false` | Only use RefSeq or local assemblies for ANI calculations | + +### NCBI Genome Download Parameters + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--species` | string | | Name of the species to download assemblies | +| `--accession` | string | | An NCBI Assembly accession to be downloaded | +| `--accessions` | string | | An file of NCBI Assembly accessions (one per line) to be downloaded | +| `--format` | string | `fasta` | Comma separated list of formats to download | +| `--limit` | string | | Limit the number of assemblies to download | +| `--keep_downloads` | boolean | `false` | Save downloaded files into the bactopia-runs folder | + +
+Filtering Parameters + +Use these parameters to specify which samples to include or exclude. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--include` | string | | A text file containing sample names (one per line) to include from the analysis | +| `--exclude` | string | | A text file containing sample names (one per line) to exclude from the analysis | +
+ +
+Optional Parameters + +These optional parameters can be useful in certain settings. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--outdir` | string | `bactopia` | Base directory to write results to | +
+ +
+Nextflow Profile Parameters + +Parameters to fine-tune your Nextflow setup. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--datasets_cache` | string | `/.bactopia/datasets` | Directory where downloaded datasets should be stored. | +
+ +
+Helpful Parameters + +Uncommonly used parameters that might be useful. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--wf` | string | `bactopia` | Specify which workflow or Bactopia Tool to execute | +| `--list_wfs` | boolean | | List the available workflows and Bactopia Tools to use with '--wf' | +| `--help_all` | boolean | | An alias for --help --show_hidden_params | +| `--version` | boolean | | Display version text. | +
+ +## Composition + +This workflow uses the following subworkflows: + +- [fastani](/developers/subworkflows/fastani) - Calculate Average Nucleotide Identity (ANI) between genomes. +- [ncbigenomedownload](/developers/subworkflows/ncbigenomedownload) - Download bacterial genomes from NCBI's RefSeq database. + +## Citations + +If you use this in your analysis, please cite the following. + +- [Bactopia](https://bactopia.github.io/) + Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) + +- [FastANI](https://github.com/ParBLiSS/FastANI) + Jain C, Rodriguez-R LM, Phillippy AM, Konstantinidis KT, Aluru S [High throughput ANI analysis of 90K prokaryotic genomes reveals clear species boundaries.](http://dx.doi.org/10.1038/s41467-018-07641-9) _Nat. Commun._ 9, 5114 (2018) + +## Source + +[View source on GitHub](https://github.com/bactopia/bactopia/tree/main/workflows/bactopia-tools/fastani) diff --git a/bactopia-tools/gamma.mdx b/bactopia-tools/gamma.mdx new file mode 100644 index 00000000..7d5844f8 --- /dev/null +++ b/bactopia-tools/gamma.mdx @@ -0,0 +1,190 @@ +--- +title: gamma +description: "Identification, classification, and annotation of translated gene matches." +tags: + - gene-annotation + - protein-classification + - translation + - gamma + - bactopia-tool +--- + +# gamma + +**Tags:** gene-annotation protein-classification translation gamma bactopia-tool + +Identification, classification, and annotation of translated gene matches. + +This Bactopia Tool uses [GAMMA](https://github.com/rastanton/GAMMA) to identify, classify, and annotate +translated gene matches from assemblies using a comprehensive protein database. + +## Usage + +Bactopia CLI: + +```bash +bactopia --wf gamma \ + --bactopia /path/to/your/bactopia/results +``` + +Nextflow: + +```bash +nextflow run bactopia/bactopia/workflows/bactopia-tools/gamma/main.nf \ + --bactopia /path/to/your/bactopia/results +``` + +## Outputs + +### Expected Output Files + +``` + +├── +│ └── tools +│ └── gamma- +│ └── gamma- +│ ├── .gamma +│ ├── .psl +│ └── logs +│ ├── nf.command.{begin,err,log,out,run,sh,trace} +│ └── versions.yml +└── bactopia-runs + └── gamma- + ├── merged-results + │ ├── gamma.tsv + │ └── logs + │ └── gamma-concat + │ ├── nf.command.{begin,err,log,out,run,sh,trace} + │ └── versions.yml + └── nf-reports + ├── gamma-dag.dot + ├── gamma-report.html + └── gamma-timeline.html +``` + +### Per-Sample Results + +| File | Description | +|------|-------------| +| `*.tsv` | Tab-delimited file with gene classification results | + +### Merged Results + +| File | Description | +|------|-------------| +| `gamma.tsv` | Merged TSV file containing GAMMA results from all samples | + + +### Audit Trail + +Below are files that can assist you in understanding which parameters and program versions were used. + +#### Logs + +Each process that is executed will have a folder named `logs`. In this folder are helpful +files for you to review if the need ever arises. + +| Extension | Description | +|--------------|-------------| +| .begin | An empty file used to designate the process started | +| .err | Contains STDERR outputs from the process | +| .log | Contains both STDERR and STDOUT outputs from the process | +| .out | Contains STDOUT outputs from the process | +| .run | The script Nextflow uses to stage/unstage files and queue processes based on given profile | +| .sh | The script executed by bash for the process | +| .trace | The Nextflow trace report for the process | +| versions.yml | A YAML formatted file with program versions | + +#### Nextflow Reports + +These Nextflow reports provide great a great summary of your run. These can be used to optimize +resource usage and estimate expected costs if using cloud platforms. + +| Filename | Description | +|----------|-------------| +| gamma-dag.dot | The Nextflow [DAG visualization](https://docs.seqera.io/nextflow/reports#workflow-diagram) | +| gamma-report.html | The Nextflow [Execution Report](https://docs.seqera.io/nextflow/reports#execution-report) | +| gamma-timeline.html | The Nextflow [Timeline Report](https://docs.seqera.io/nextflow/reports#execution-timeline) | +| gamma-trace.txt | The Nextflow [Trace](https://docs.seqera.io/nextflow/reports#trace-file) report | + +## Parameters + +### Required Parameters + +Define where the pipeline should find input data and save output data. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--bactopia` | string | | The path to bactopia results to use as inputs | + +### GAMMA Parameters + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--gamma_db` | string | | A gene database (FASTA) for GAMMA | +| `--gamma_percent_identity` | integer | `90` | The minimum nucleotide sequence identity % used by the Blat search | + +
+Filtering Parameters + +Use these parameters to specify which samples to include or exclude. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--include` | string | | A text file containing sample names (one per line) to include from the analysis | +| `--exclude` | string | | A text file containing sample names (one per line) to exclude from the analysis | +
+ +
+Optional Parameters + +These optional parameters can be useful in certain settings. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--outdir` | string | `bactopia` | Base directory to write results to | +
+ +
+Nextflow Profile Parameters + +Parameters to fine-tune your Nextflow setup. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--datasets_cache` | string | `/.bactopia/datasets` | Directory where downloaded datasets should be stored. | +
+ +
+Helpful Parameters + +Uncommonly used parameters that might be useful. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--wf` | string | `bactopia` | Specify which workflow or Bactopia Tool to execute | +| `--list_wfs` | boolean | | List the available workflows and Bactopia Tools to use with '--wf' | +| `--help_all` | boolean | | An alias for --help --show_hidden_params | +| `--version` | boolean | | Display version text. | +
+ +## Composition + +This workflow uses the following subworkflows: + +- [gamma](/developers/subworkflows/gamma) - Gene Allele Mutation Microbial Assessment. + +## Citations + +If you use this in your analysis, please cite the following. + +- [Bactopia](https://bactopia.github.io/) + Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) + +- [GAMMA](https://github.com/rastanton/GAMMA) + Stanton RA, Vlachos N, Halpin AL [GAMMA: a tool for the rapid identification, classification, and annotation of translated gene matches from sequencing data.](https://doi.org/10.1093/bioinformatics/btab607) _Bioinformatics_ (2021) + +## Source + +[View source on GitHub](https://github.com/bactopia/bactopia/tree/main/workflows/bactopia-tools/gamma) diff --git a/bactopia-tools/genotyphi.mdx b/bactopia-tools/genotyphi.mdx new file mode 100644 index 00000000..26d80347 --- /dev/null +++ b/bactopia-tools/genotyphi.mdx @@ -0,0 +1,208 @@ +--- +title: genotyphi +description: "Salmonella Typhi genotyping with lineage assignment." +tags: + - salmonella-typhi + - genotyping + - lineage + - amr + - mykrobe + - bactopia-tool +--- + +# genotyphi + +**Tags:** salmonella-typhi genotyping lineage amr mykrobe bactopia-tool + +Salmonella Typhi genotyping with lineage assignment. + +This Bactopia Tool uses [GenoTyphi](https://github.com/typhoidgenomics/genotyphi) to +call Typhi lineages, AMR determinants, and plasmid markers in Salmonella Typhi samples. +Samples are first processed by [Mykrobe](https://github.com/Mykrobe-tools/mykrobe) using `mykrobe predict` +with `typhi` specified as the species. Then the Mykrobe results are processed by the +GenoTyphi parse_typhi_mykrobe.py script. + +## Usage + +Bactopia CLI: + +```bash +bactopia --wf genotyphi \ + --bactopia /path/to/your/bactopia/results +``` + +Nextflow: + +```bash +nextflow run bactopia/bactopia/workflows/bactopia-tools/genotyphi/main.nf \ + --bactopia /path/to/your/bactopia/results +``` + +## Outputs + +### Expected Output Files + +``` + +├── +│ └── tools +│ └── genotyphi- +│ ├── .csv +│ ├── .json +│ ├── .tsv +│ └── logs +│ ├── genotyphi- +│ │ ├── nf.command.{begin,err,log,out,run,sh,trace} +│ │ └── versions.yml +│ └── mykrobe +│ ├── nf.command.{begin,err,log,out,run,sh,trace} +│ └── versions.yml +└── bactopia-runs + └── genotyphi- + ├── merged-results + │ ├── genotyphi.tsv + │ └── logs + │ └── genotyphi-concat + │ ├── nf.command.{begin,err,log,out,run,sh,trace} + │ └── versions.yml + └── nf-reports + ├── genotyphi-dag.dot + ├── genotyphi-report.html + └── genotyphi-timeline.html +``` + +### Per-Sample Results + +| File | Description | +|------|-------------| +| `*.json` | Mykrobe prediction results | +| `*.tsv` | GenoTyphi parsed results | + +### Merged Results + +| File | Description | +|------|-------------| +| `genotyphi.tsv` | Merged TSV file containing GenoTyphi results from all samples | + + +### Audit Trail + +Below are files that can assist you in understanding which parameters and program versions were used. + +#### Logs + +Each process that is executed will have a folder named `logs`. In this folder are helpful +files for you to review if the need ever arises. + +| Extension | Description | +|--------------|-------------| +| .begin | An empty file used to designate the process started | +| .err | Contains STDERR outputs from the process | +| .log | Contains both STDERR and STDOUT outputs from the process | +| .out | Contains STDOUT outputs from the process | +| .run | The script Nextflow uses to stage/unstage files and queue processes based on given profile | +| .sh | The script executed by bash for the process | +| .trace | The Nextflow trace report for the process | +| versions.yml | A YAML formatted file with program versions | + +#### Nextflow Reports + +These Nextflow reports provide great a great summary of your run. These can be used to optimize +resource usage and estimate expected costs if using cloud platforms. + +| Filename | Description | +|----------|-------------| +| genotyphi-dag.dot | The Nextflow [DAG visualization](https://docs.seqera.io/nextflow/reports#workflow-diagram) | +| genotyphi-report.html | The Nextflow [Execution Report](https://docs.seqera.io/nextflow/reports#execution-report) | +| genotyphi-timeline.html | The Nextflow [Timeline Report](https://docs.seqera.io/nextflow/reports#execution-timeline) | +| genotyphi-trace.txt | The Nextflow [Trace](https://docs.seqera.io/nextflow/reports#trace-file) report | + +## Parameters + +### Required Parameters + +Define where the pipeline should find input data and save output data. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--bactopia` | string | | The path to bactopia results to use as inputs | + +### Mykrobe Parameters + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--mykrobe_species` | string | | Species panel to use (choices: `sonnei`, `staph`, `tb`, `typhi`) | +| `--mykrobe_opts` | string | | Extra Mykrobe options in quotes | + +### GenoTyphi Parameters + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--genotyphi_mykrobe_opts` | string | | Extra Mykrobe options in quotes | + +
+Filtering Parameters + +Use these parameters to specify which samples to include or exclude. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--include` | string | | A text file containing sample names (one per line) to include from the analysis | +| `--exclude` | string | | A text file containing sample names (one per line) to exclude from the analysis | +
+ +
+Optional Parameters + +These optional parameters can be useful in certain settings. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--outdir` | string | `bactopia` | Base directory to write results to | +
+ +
+Nextflow Profile Parameters + +Parameters to fine-tune your Nextflow setup. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--datasets_cache` | string | `/.bactopia/datasets` | Directory where downloaded datasets should be stored. | +
+ +
+Helpful Parameters + +Uncommonly used parameters that might be useful. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--wf` | string | `bactopia` | Specify which workflow or Bactopia Tool to execute | +| `--list_wfs` | boolean | | List the available workflows and Bactopia Tools to use with '--wf' | +| `--help_all` | boolean | | An alias for --help --show_hidden_params | +| `--version` | boolean | | Display version text. | +
+ +## Composition + +This workflow uses the following subworkflows: + +- [genotyphi](/developers/subworkflows/genotyphi) - Assign genotypes to Salmonella Typhi genomes. + +## Citations + +If you use this in your analysis, please cite the following. + +- [Bactopia](https://bactopia.github.io/) + Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) + +- [GenoTyphi](https://github.com/katholt/genotyphi) + Wong VK, Baker S, Connor TR, Pickard D, Page AJ, Dave J, Murphy N, Holliman R, Sefton A, Millar M, Dyson ZA, Dougan G, Holt KE, & International Typhoid Consortium. [An extended genotyping framework for Salmonella enterica serovar Typhi, the cause of human typhoid](https://doi.org/10.1038/ncomms12827) _Nature Communications_ 7, 12827. (2016) + +- [Mykrobe](https://github.com/Mykrobe-tools/mykrobe) + Hunt M, Bradley P, Lapierre SG, Heys S, Thomsit M, Hall MB, Malone KM, Wintringer P, Walker TM, Cirillo DM, Comas I, Farhat MR, Fowler P, Gardy J, Ismail N, Kohl TA, Mathys V, Merker M, Niemann S, Omar SV, Sintchenko V, Smith G, Supply P, Tahseen S, Wilcox M, Arandjelovic I, Peto TEA, Crook, DW, Iqbal Z [Antibiotic resistance prediction for Mycobacterium tuberculosis from genome sequence data with Mykrobe](https://doi.org/10.12688/wellcomeopenres.15603.1) _Wellcome Open Research_ 4, 191. (2019) + +## Source + +[View source on GitHub](https://github.com/bactopia/bactopia/tree/main/workflows/bactopia-tools/genotyphi) diff --git a/bactopia-tools/gigatyper.mdx b/bactopia-tools/gigatyper.mdx new file mode 100644 index 00000000..2732aff2 --- /dev/null +++ b/bactopia-tools/gigatyper.mdx @@ -0,0 +1,188 @@ +--- +title: gigatyper +description: "Run all available MLST schemes for a species against an assembly" +tags: + - mlst + - typing + - multi-scheme + - bactopia-tool +--- + +# gigatyper + +**Tags:** mlst typing multi-scheme bactopia-tool + +Run all available MLST schemes for a species against an assembly + +This Bactopia Tool uses [GigaTyper](https://github.com/rpetit3/gigatyper) to run all available mlst schemes for a species against an assembly. + +## Usage + +Bactopia CLI: + +```bash +bactopia --wf gigatyper \ + --bactopia /path/to/your/bactopia/results +``` + +Nextflow: + +```bash +nextflow run bactopia/bactopia/workflows/bactopia-tools/gigatyper/main.nf \ + --bactopia /path/to/your/bactopia/results +``` + +## Outputs + +### Expected Output Files + +``` + +├── +│ └── tools +│ └── gigatyper- +│ ├── .tsv +│ └── logs +│ ├── nf.command.{begin,err,log,out,run,sh,trace} +│ └── versions.yml +└── bactopia-runs + └── gigatyper- + ├── merged-results + │ ├── gigatyper.tsv + │ └── logs + │ └── gigatyper-concat + │ ├── nf.command.{begin,err,log,out,run,sh,trace} + │ └── versions.yml + └── nf-reports + ├── gigatyper-dag.dot + ├── gigatyper-report.html + └── gigatyper-timeline.html +``` + +### Per-Sample Results + +| File | Description | +|------|-------------| +| `*.tsv` | MLST results across all schemes | + +### Merged Results + +| File | Description | +|------|-------------| +| `gigatyper.tsv` | Merged TSV file containing gigatyper results from all samples | + + +### Audit Trail + +Below are files that can assist you in understanding which parameters and program versions were used. + +#### Logs + +Each process that is executed will have a folder named `logs`. In this folder are helpful +files for you to review if the need ever arises. + +| Extension | Description | +|--------------|-------------| +| .begin | An empty file used to designate the process started | +| .err | Contains STDERR outputs from the process | +| .log | Contains both STDERR and STDOUT outputs from the process | +| .out | Contains STDOUT outputs from the process | +| .run | The script Nextflow uses to stage/unstage files and queue processes based on given profile | +| .sh | The script executed by bash for the process | +| .trace | The Nextflow trace report for the process | +| versions.yml | A YAML formatted file with program versions | + +#### Nextflow Reports + +These Nextflow reports provide great a great summary of your run. These can be used to optimize +resource usage and estimate expected costs if using cloud platforms. + +| Filename | Description | +|----------|-------------| +| gigatyper-dag.dot | The Nextflow [DAG visualization](https://docs.seqera.io/nextflow/reports#workflow-diagram) | +| gigatyper-report.html | The Nextflow [Execution Report](https://docs.seqera.io/nextflow/reports#execution-report) | +| gigatyper-timeline.html | The Nextflow [Timeline Report](https://docs.seqera.io/nextflow/reports#execution-timeline) | +| gigatyper-trace.txt | The Nextflow [Trace](https://docs.seqera.io/nextflow/reports#trace-file) report | + +## Parameters + +### Required Parameters + +Define where the pipeline should find input data and save output data. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--bactopia` | string | | The path to bactopia results to use as inputs | + +### GigaTyper Parameters + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--gigatyper_species` | string | | Force a specific species for scheme selection | + +
+Filtering Parameters + +Use these parameters to specify which samples to include or exclude. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--include` | string | | A text file containing sample names (one per line) to include from the analysis | +| `--exclude` | string | | A text file containing sample names (one per line) to exclude from the analysis | +
+ +
+Optional Parameters + +These optional parameters can be useful in certain settings. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--outdir` | string | `bactopia` | Base directory to write results to | +
+ +
+Nextflow Profile Parameters + +Parameters to fine-tune your Nextflow setup. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--datasets_cache` | string | `/.bactopia/datasets` | Directory where downloaded datasets should be stored. | +
+ +
+Helpful Parameters + +Uncommonly used parameters that might be useful. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--wf` | string | `bactopia` | Specify which workflow or Bactopia Tool to execute | +| `--list_wfs` | boolean | | List the available workflows and Bactopia Tools to use with '--wf' | +| `--help_all` | boolean | | An alias for --help --show_hidden_params | +| `--version` | boolean | | Display version text. | +
+ +## Composition + +This workflow uses the following subworkflows: + +- [gigatyper](/developers/subworkflows/gigatyper) - Run all available MLST schemes for a species against an assembly + +## Citations + +If you use this in your analysis, please cite the following. + +- [Bactopia](https://bactopia.github.io/) + Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) + +- [csvtk](https://bioinf.shenwei.me/csvtk/) + Shen, W [csvtk: A cross-platform, efficient and practical CSV/TSV toolkit in Golang.](https://github.com/shenwei356/csvtk/) (GitHub) + +- [GigaTyper](https://github.com/rpetit3/gigatyper) + Petit III RA, Fearing T, Groves E [GigaTyper: Why choose one scheme when you can flex them all?](https://github.com/rpetit3/gigatyper) (GitHub) + +## Source + +[View source on GitHub](https://github.com/bactopia/bactopia/tree/main/workflows/bactopia-tools/gigatyper) diff --git a/bactopia-tools/gtdb.mdx b/bactopia-tools/gtdb.mdx new file mode 100644 index 00000000..bb95d795 --- /dev/null +++ b/bactopia-tools/gtdb.mdx @@ -0,0 +1,207 @@ +--- +title: gtdb +description: "Identify marker genes and assign taxonomic classifications using GTDB." +tags: + - taxonomy + - classification + - marker-genes + - phylogeny + - gtdb + - bactopia-tool +--- + +# gtdb + +**Tags:** taxonomy classification marker-genes phylogeny gtdb bactopia-tool + +Identify marker genes and assign taxonomic classifications using GTDB. + +This Bactopia Tool uses [GTDB-Tk's](https://github.com/Ecogenomics/GTDBTk) classify +workflow to assign taxonomic classifications to samples using the +[Genome Taxonomy Database](https://gtdb.ecogenomic.org/). + +## Usage + +Bactopia CLI: + +```bash +bactopia --wf gtdb \ + --bactopia /path/to/your/bactopia/results +``` + +Nextflow: + +```bash +nextflow run bactopia/bactopia/workflows/bactopia-tools/gtdb/main.nf \ + --bactopia /path/to/your/bactopia/results +``` + +## Outputs + +### Expected Output Files + +``` + +├── +│ └── tools +│ └── gtdb- +│ ├── .bac120.summary.tsv +│ ├── logs +│ │ ├── gtdbtk.log +│ │ ├── gtdbtk.warnings.log +│ │ ├── nf.command.{begin,err,log,out,run,sh,trace} +│ │ └── versions.yml +│ └── supplemental +│ ├── classify +│ │ └── ani_screen +│ │ └── .bac120.ani_summary.tsv +│ └── gtdbtk.json +└── bactopia-runs + └── gtdb- + ├── merged-results + │ ├── gtdb.tsv + │ └── logs + │ └── gtdb-concat + │ ├── nf.command.{begin,err,log,out,run,sh,trace} + │ └── versions.yml + └── nf-reports + ├── gtdb-dag.dot + ├── gtdb-report.html + └── gtdb-timeline.html +``` + +### Taxonomic Classification + +| File | Description | +|------|-------------| +| `*.summary.tsv` | Taxonomic classification summary | +| `*.gtdbtk.tsv` | Detailed GTDB-Tk results | + +### Merged Results + +| File | Description | +|------|-------------| +| `gtdb.tsv` | Merged TSV file containing GTDB results from all samples | + + +### Audit Trail + +Below are files that can assist you in understanding which parameters and program versions were used. + +#### Logs + +Each process that is executed will have a folder named `logs`. In this folder are helpful +files for you to review if the need ever arises. + +| Extension | Description | +|--------------|-------------| +| .begin | An empty file used to designate the process started | +| .err | Contains STDERR outputs from the process | +| .log | Contains both STDERR and STDOUT outputs from the process | +| .out | Contains STDOUT outputs from the process | +| .run | The script Nextflow uses to stage/unstage files and queue processes based on given profile | +| .sh | The script executed by bash for the process | +| .trace | The Nextflow trace report for the process | +| versions.yml | A YAML formatted file with program versions | + +#### Nextflow Reports + +These Nextflow reports provide great a great summary of your run. These can be used to optimize +resource usage and estimate expected costs if using cloud platforms. + +| Filename | Description | +|----------|-------------| +| gtdb-dag.dot | The Nextflow [DAG visualization](https://docs.seqera.io/nextflow/reports#workflow-diagram) | +| gtdb-report.html | The Nextflow [Execution Report](https://docs.seqera.io/nextflow/reports#execution-report) | +| gtdb-timeline.html | The Nextflow [Timeline Report](https://docs.seqera.io/nextflow/reports#execution-timeline) | +| gtdb-trace.txt | The Nextflow [Trace](https://docs.seqera.io/nextflow/reports#trace-file) report | + +## Parameters + +### Required Parameters + +Define where the pipeline should find input data and save output data. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--bactopia` | string | | The path to bactopia results to use as inputs | + +### GTDB-Tk Setup Database Parameters + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--gtdb` | string | | Tarball or path of a GTDB database. If a database is not found, you must use '--download_gtdb' | +| `--download_gtdb` | boolean | `false` | Download the latest GTDB database, even it exists | +| `--gtdb_save_as_tarball` | boolean | `false` | Download the latest GTDB database, and save it in a single tarball | + +### GTDB Classify Parameters + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--gtdb_min_af` | number | `0.65` | Minimum alignment fraction to consider closest genome | +| `--gtdb_min_perc_aa` | integer | `10` | Filter genomes with an insufficient percentage of AA in the MSA | +| `--force_gtdb` | boolean | `false` | Continue processing if an error occurs on a single genome | + +
+Filtering Parameters + +Use these parameters to specify which samples to include or exclude. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--include` | string | | A text file containing sample names (one per line) to include from the analysis | +| `--exclude` | string | | A text file containing sample names (one per line) to exclude from the analysis | +
+ +
+Optional Parameters + +These optional parameters can be useful in certain settings. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--outdir` | string | `bactopia` | Base directory to write results to | +
+ +
+Nextflow Profile Parameters + +Parameters to fine-tune your Nextflow setup. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--datasets_cache` | string | `/.bactopia/datasets` | Directory where downloaded datasets should be stored. | +
+ +
+Helpful Parameters + +Uncommonly used parameters that might be useful. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--wf` | string | `bactopia` | Specify which workflow or Bactopia Tool to execute | +| `--list_wfs` | boolean | | List the available workflows and Bactopia Tools to use with '--wf' | +| `--help_all` | boolean | | An alias for --help --show_hidden_params | +| `--version` | boolean | | Display version text. | +
+ +## Composition + +This workflow uses the following subworkflows: + +- [gtdb](/developers/subworkflows/gtdb) - Taxonomic classification with the Genome Taxonomy Database. + +## Citations + +If you use this in your analysis, please cite the following. + +- [Bactopia](https://bactopia.github.io/) + Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) + +- [Genome Taxonomy Database](https://gtdb.ecogenomic.org/) + Parks DH, Chuvochina M, Rinke C, Mussig AJ, Chaumeil P-A, Hugenholtz P [GTDB: an ongoing census of bacterial and archaeal diversity through a phylogenetically consistent, rank normalized and complete genome-based taxonomy](https://doi.org/10.1093/nar/gkab776) _Nucleic Acids Research_ gkab776 (2021) + +## Source + +[View source on GitHub](https://github.com/bactopia/bactopia/tree/main/workflows/bactopia-tools/gtdb) diff --git a/bactopia-tools/hicap.mdx b/bactopia-tools/hicap.mdx new file mode 100644 index 00000000..cd6deef3 --- /dev/null +++ b/bactopia-tools/hicap.mdx @@ -0,0 +1,199 @@ +--- +title: hicap +description: "Identify cap locus serotype and structure in Haemophilus influenzae assemblies." +tags: + - haemophilus-influenzae + - serotyping + - capsular-locus + - bactopia-tool +--- + +# hicap + +**Tags:** haemophilus-influenzae serotyping capsular-locus bactopia-tool + +Identify cap locus serotype and structure in Haemophilus influenzae assemblies. + +This Bactopia Tool uses [hicap](https://github.com/scwatts/hicap) with assemblies for +_in silico_ typing of the _Haemophilus influenzae_ capsular locus. + +## Usage + +Bactopia CLI: + +```bash +bactopia --wf hicap \ + --bactopia /path/to/your/bactopia/results +``` + +Nextflow: + +```bash +nextflow run bactopia/bactopia/workflows/bactopia-tools/hicap/main.nf \ + --bactopia /path/to/your/bactopia/results +``` + +## Outputs + +### Expected Output Files + +``` + +├── +│ └── tools +│ └── hicap- +│ ├── .tsv +│ └── logs +│ ├── nf.command.{begin,err,log,out,run,sh,trace} +│ └── versions.yml +├── GCF_900478275 +│ └── tools +│ └── hicap- +│ ├── GCF_900478275.gbk +│ ├── GCF_900478275.svg +│ ├── GCF_900478275.tsv +│ └── logs +│ ├── nf.command.{begin,err,log,out,run,sh,trace} +│ └── versions.yml +└── bactopia-runs + └── hicap- + ├── merged-results + │ ├── hicap.tsv + │ └── logs + │ └── hicap-concat + │ ├── nf.command.{begin,err,log,out,run,sh,trace} + │ └── versions.yml + └── nf-reports + ├── hicap-dag.dot + ├── hicap-report.html + └── hicap-timeline.html +``` + +### Per-Sample Results + +| File | Description | +|------|-------------| +| `*.summary` | Summary of serotype prediction | +| `*.gff` | Annotated capsular locus in GFF format | + +### Merged Results + +| File | Description | +|------|-------------| +| `hicap.tsv` | Merged TSV file containing hicap results from all samples | + + +### Audit Trail + +Below are files that can assist you in understanding which parameters and program versions were used. + +#### Logs + +Each process that is executed will have a folder named `logs`. In this folder are helpful +files for you to review if the need ever arises. + +| Extension | Description | +|--------------|-------------| +| .begin | An empty file used to designate the process started | +| .err | Contains STDERR outputs from the process | +| .log | Contains both STDERR and STDOUT outputs from the process | +| .out | Contains STDOUT outputs from the process | +| .run | The script Nextflow uses to stage/unstage files and queue processes based on given profile | +| .sh | The script executed by bash for the process | +| .trace | The Nextflow trace report for the process | +| versions.yml | A YAML formatted file with program versions | + +#### Nextflow Reports + +These Nextflow reports provide great a great summary of your run. These can be used to optimize +resource usage and estimate expected costs if using cloud platforms. + +| Filename | Description | +|----------|-------------| +| hicap-dag.dot | The Nextflow [DAG visualization](https://docs.seqera.io/nextflow/reports#workflow-diagram) | +| hicap-report.html | The Nextflow [Execution Report](https://docs.seqera.io/nextflow/reports#execution-report) | +| hicap-timeline.html | The Nextflow [Timeline Report](https://docs.seqera.io/nextflow/reports#execution-timeline) | +| hicap-trace.txt | The Nextflow [Trace](https://docs.seqera.io/nextflow/reports#trace-file) report | + +## Parameters + +### Required Parameters + +Define where the pipeline should find input data and save output data. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--bactopia` | string | | The path to bactopia results to use as inputs | + +### hicap Parameters + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--hicap_gene_coverage` | number | `0.8` | Minimum percentage coverage to consider a single gene complete | +| `--hicap_gene_identity` | number | `0.7` | Minimum percentage identity to consider a single gene complete | +| `--hicap_broken_gene_length` | integer | `60` | Minimum length to consider a broken gene | +| `--hicap_broken_gene_identity` | number | `0.8` | Minimum percentage identity to consider a broken gene | + +
+Filtering Parameters + +Use these parameters to specify which samples to include or exclude. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--include` | string | | A text file containing sample names (one per line) to include from the analysis | +| `--exclude` | string | | A text file containing sample names (one per line) to exclude from the analysis | +
+ +
+Optional Parameters + +These optional parameters can be useful in certain settings. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--outdir` | string | `bactopia` | Base directory to write results to | +
+ +
+Nextflow Profile Parameters + +Parameters to fine-tune your Nextflow setup. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--datasets_cache` | string | `/.bactopia/datasets` | Directory where downloaded datasets should be stored. | +
+ +
+Helpful Parameters + +Uncommonly used parameters that might be useful. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--wf` | string | `bactopia` | Specify which workflow or Bactopia Tool to execute | +| `--list_wfs` | boolean | | List the available workflows and Bactopia Tools to use with '--wf' | +| `--help_all` | boolean | | An alias for --help --show_hidden_params | +| `--version` | boolean | | Display version text. | +
+ +## Composition + +This workflow uses the following subworkflows: + +- [hicap](/developers/subworkflows/hicap) - In silico serotyping of the Haemophilus influenzae capsule locus. + +## Citations + +If you use this in your analysis, please cite the following. + +- [Bactopia](https://bactopia.github.io/) + Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) + +- [hicap](https://github.com/scwatts/hicap) + Watts SC, Holt KE [hicap: in silico serotyping of the Haemophilus influenzae capsule locus.](https://doi.org/10.1128/JCM.00190-19) _Journal of Clinical Microbiology_ JCM.00190-19 (2019) + +## Source + +[View source on GitHub](https://github.com/bactopia/bactopia/tree/main/workflows/bactopia-tools/hicap) diff --git a/bactopia-tools/hpsuissero.mdx b/bactopia-tools/hpsuissero.mdx new file mode 100644 index 00000000..2b6c604c --- /dev/null +++ b/bactopia-tools/hpsuissero.mdx @@ -0,0 +1,180 @@ +--- +title: hpsuissero +description: "Serotype prediction of Haemophilus parasuis assemblies." +tags: + - haemophilus-parasuis + - serotyping + - fasta + - bactopia-tool +--- + +# hpsuissero + +**Tags:** haemophilus-parasuis serotyping fasta bactopia-tool + +Serotype prediction of Haemophilus parasuis assemblies. + +This Bactopia Tool uses [HpsuisSero](https://github.com/jimmyliu1326/HpsuisSero) to predict +the serotype of _Haemophilus parasuis_ assemblies. + +## Usage + +Bactopia CLI: + +```bash +bactopia --wf hpsuissero \ + --bactopia /path/to/your/bactopia/results +``` + +Nextflow: + +```bash +nextflow run bactopia/bactopia/workflows/bactopia-tools/hpsuissero/main.nf \ + --bactopia /path/to/your/bactopia/results +``` + +## Outputs + +### Expected Output Files + +``` + +├── +│ └── tools +│ └── hpsuissero- +│ ├── .tsv +│ └── logs +│ ├── nf.command.{begin,err,log,out,run,sh,trace} +│ └── versions.yml +└── bactopia-runs + └── hpsuissero- + ├── merged-results + │ ├── hpsuissero.tsv + │ └── logs + │ └── hpsuissero-concat + │ ├── nf.command.{begin,err,log,out,run,sh,trace} + │ └── versions.yml + └── nf-reports + ├── hpsuissero-dag.dot + ├── hpsuissero-report.html + └── hpsuissero-timeline.html +``` + +### Per-Sample Results + +| File | Description | +|------|-------------| +| `*.summary` | Summary of serotype prediction | + +### Merged Results + +| File | Description | +|------|-------------| +| `hpsuissero.tsv` | Merged TSV file containing HpsuisSero results from all samples | + + +### Audit Trail + +Below are files that can assist you in understanding which parameters and program versions were used. + +#### Logs + +Each process that is executed will have a folder named `logs`. In this folder are helpful +files for you to review if the need ever arises. + +| Extension | Description | +|--------------|-------------| +| .begin | An empty file used to designate the process started | +| .err | Contains STDERR outputs from the process | +| .log | Contains both STDERR and STDOUT outputs from the process | +| .out | Contains STDOUT outputs from the process | +| .run | The script Nextflow uses to stage/unstage files and queue processes based on given profile | +| .sh | The script executed by bash for the process | +| .trace | The Nextflow trace report for the process | +| versions.yml | A YAML formatted file with program versions | + +#### Nextflow Reports + +These Nextflow reports provide great a great summary of your run. These can be used to optimize +resource usage and estimate expected costs if using cloud platforms. + +| Filename | Description | +|----------|-------------| +| hpsuissero-dag.dot | The Nextflow [DAG visualization](https://docs.seqera.io/nextflow/reports#workflow-diagram) | +| hpsuissero-report.html | The Nextflow [Execution Report](https://docs.seqera.io/nextflow/reports#execution-report) | +| hpsuissero-timeline.html | The Nextflow [Timeline Report](https://docs.seqera.io/nextflow/reports#execution-timeline) | +| hpsuissero-trace.txt | The Nextflow [Trace](https://docs.seqera.io/nextflow/reports#trace-file) report | + +## Parameters + +### Required Parameters + +Define where the pipeline should find input data and save output data. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--bactopia` | string | | The path to bactopia results to use as inputs | + +
+Filtering Parameters + +Use these parameters to specify which samples to include or exclude. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--include` | string | | A text file containing sample names (one per line) to include from the analysis | +| `--exclude` | string | | A text file containing sample names (one per line) to exclude from the analysis | +
+ +
+Optional Parameters + +These optional parameters can be useful in certain settings. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--outdir` | string | `bactopia` | Base directory to write results to | +
+ +
+Nextflow Profile Parameters + +Parameters to fine-tune your Nextflow setup. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--datasets_cache` | string | `/.bactopia/datasets` | Directory where downloaded datasets should be stored. | +
+ +
+Helpful Parameters + +Uncommonly used parameters that might be useful. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--wf` | string | `bactopia` | Specify which workflow or Bactopia Tool to execute | +| `--list_wfs` | boolean | | List the available workflows and Bactopia Tools to use with '--wf' | +| `--help_all` | boolean | | An alias for --help --show_hidden_params | +| `--version` | boolean | | Display version text. | +
+ +## Composition + +This workflow uses the following subworkflows: + +- [hpsuissero](/developers/subworkflows/hpsuissero) - Rapid Haemophilus parasuis serotyping. + +## Citations + +If you use this in your analysis, please cite the following. + +- [Bactopia](https://bactopia.github.io/) + Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) + +- [HpsuisSero](https://github.com/jimmyliu1326/HpsuisSero) + Lui J [HpsuisSero: Rapid _Haemophilus parasuis_ serotyping](https://github.com/jimmyliu1326/HpsuisSero) (GitHub) + +## Source + +[View source on GitHub](https://github.com/bactopia/bactopia/tree/main/workflows/bactopia-tools/hpsuissero) diff --git a/bactopia-tools/index.mdx b/bactopia-tools/index.mdx new file mode 100644 index 00000000..8174f714 --- /dev/null +++ b/bactopia-tools/index.mdx @@ -0,0 +1,79 @@ +--- +title: Bactopia Tools +description: All available Bactopia Tool workflows +sidebar_position: 2 +--- + +# Bactopia Tools + +Bactopia Tools are additional analysis workflows that run specific tools on existing Bactopia results. There are 67 Bactopia Tools available. You can also [browse by tag](/bactopia-tools/tags). + +| Workflow | Description | +|----------|-------------| +| [abricate](/bactopia-tools/abricate) | Mass screening of contigs for antimicrobial resistance and virulence genes. | +| [abritamr](/bactopia-tools/abritamr) | A NATA accredited tool for reporting the presence of antimicrobial resistance genes. | +| [agrvate](/bactopia-tools/agrvate) | Rapid identification of Staphylococcus aureus agr locus type and agr operon variants. | +| [amrfinderplus](/bactopia-tools/amrfinderplus) | Bactopia Tool: Amrfinderplus. | +| [ariba](/bactopia-tools/ariba) | Gene identification through local assemblies. | +| [bakta](/bactopia-tools/bakta) | Rapid annotation of bacterial genomes and plasmids. | +| [blastn](/bactopia-tools/blastn) | Search against nucleotide BLAST databases using nucleotide queries. | +| [blastp](/bactopia-tools/blastp) | Search against protein BLAST databases using protein queries. | +| [blastx](/bactopia-tools/blastx) | Search against protein BLAST databases using translated nucleotide queries. | +| [bracken](/bactopia-tools/bracken) | Estimate taxonomic abundance of metagenomic samples. | +| [btyper3](/bactopia-tools/btyper3) | Taxonomic classification of Bacillus cereus group isolates. | +| [busco](/bactopia-tools/busco) | Assessment of genome assembly completeness using evolutionarily informed expectations. | +| [checkm](/bactopia-tools/checkm) | Assessment of microbial genome assembly quality. | +| [checkm2](/bactopia-tools/checkm2) | Machine learning-based assessment of microbial genome assembly quality. | +| [clermontyping](/bactopia-tools/clermontyping) | In silico phylotyping of Escherichia genus. | +| [defensefinder](/bactopia-tools/defensefinder) | Systematic identification of anti-phage defense systems. | +| [ectyper](/bactopia-tools/ectyper) | In silico prediction of Escherichia coli serotype. | +| [eggnog](/bactopia-tools/eggnog) | Functional annotation of proteins using orthologous groups and phylogenies. | +| [emmtyper](/bactopia-tools/emmtyper) | emm-typing of Streptococcus pyogenes assemblies. | +| [fastani](/bactopia-tools/fastani) | Fast alignment-free computation of whole-genome Average Nucleotide Identity. | +| [gamma](/bactopia-tools/gamma) | Identification, classification, and annotation of translated gene matches. | +| [genotyphi](/bactopia-tools/genotyphi) | Salmonella Typhi genotyping with lineage assignment. | +| [gigatyper](/bactopia-tools/gigatyper) | Run all available MLST schemes for a species against an assembly | +| [gtdb](/bactopia-tools/gtdb) | Identify marker genes and assign taxonomic classifications using GTDB. | +| [hicap](/bactopia-tools/hicap) | Identify cap locus serotype and structure in Haemophilus influenzae assemblies. | +| [hpsuissero](/bactopia-tools/hpsuissero) | Serotype prediction of Haemophilus parasuis assemblies. | +| [ismapper](/bactopia-tools/ismapper) | Identify insertion sequence positions in bacterial genomes. | +| [kleborate](/bactopia-tools/kleborate) | Comprehensive screening of Klebsiella genomes for virulence and resistance determinants. | +| [kraken2](/bactopia-tools/kraken2) | Taxonomic classification of metagenomic sequence reads. | +| [legsta](/bactopia-tools/legsta) | Sequence Based Typing (SBT) of Legionella pneumophila. | +| [lissero](/bactopia-tools/lissero) | Serogroup typing prediction for Listeria monocytogenes. | +| [mashdist](/bactopia-tools/mashdist) | Calculate Mash distances between sequences and reference genomes. | +| [mashtree](/bactopia-tools/mashtree) | Rapid phylogenetic tree construction using Mash distances. | +| [mcroni](/bactopia-tools/mcroni) | Sequence variation analysis of mcr-1 genes (mobilized colistin resistance). | +| [meningotype](/bactopia-tools/meningotype) | Comprehensive typing of Neisseria meningitidis. | +| [merlin](/bactopia-tools/merlin) | MinMER-assisted species-specific tool selection and execution. | +| [midas](/bactopia-tools/midas) | Estimate species abundances from metagenomic samples. | +| [mlst](/bactopia-tools/mlst) | Automatic Multi-Locus Sequence Type (MLST) calling from assembled contigs. | +| [mobsuite](/bactopia-tools/mobsuite) | Reconstruction and annotation of plasmids from bacterial genome assemblies. | +| [mykrobe](/bactopia-tools/mykrobe) | Antimicrobial resistance detection for specific bacterial species. | +| [ngmaster](/bactopia-tools/ngmaster) | Multi-antigen sequence typing of Neisseria gonorrhoeae. | +| [pangenome](/bactopia-tools/pangenome) | Pangenome analysis with optional core-genome phylogeny. | +| [pasty](/bactopia-tools/pasty) | In silico serogrouping of Pseudomonas aeruginosa isolates. | +| [pbptyper](/bactopia-tools/pbptyper) | Penicillin Binding Protein (PBP) typing for Streptococcus pneumoniae. | +| [phispy](/bactopia-tools/phispy) | Prediction of prophages in bacterial and archaeal genomes. | +| [plasmidfinder](/bactopia-tools/plasmidfinder) | Bactopia Tool: Plasmidfinder. | +| [pneumocat](/bactopia-tools/pneumocat) | Capsular type assignment to Streptococcus pneumoniae from sequence reads. | +| [prokka](/bactopia-tools/prokka) | Rapid whole genome annotation of bacterial, archaeal, and viral genomes. | +| [quast](/bactopia-tools/quast) | Quality assessment of assembled contigs using QUAST. | +| [rgi](/bactopia-tools/rgi) | Prediction of antibiotic resistance genes using RGI. | +| [sccmec](/bactopia-tools/sccmec) | Typing of SCCmec cassettes in Staphylococcus aureus assemblies. | +| [scrubber](/bactopia-tools/scrubber) | Removal of human and contaminant sequences from metagenomic reads. | +| [seqsero2](/bactopia-tools/seqsero2) | Salmonella serotype prediction from sequencing reads or assemblies. | +| [seroba](/bactopia-tools/seroba) | Serotyping of Streptococcus pneumoniae from Illumina paired-end reads. | +| [shigapass](/bactopia-tools/shigapass) | Prediction of Shigella serotypes and differentiation from EIEC. | +| [shigatyper](/bactopia-tools/shigatyper) | Rapid determination of Shigella serotypes from sequencing reads. | +| [shigeifinder](/bactopia-tools/shigeifinder) | In silico serotype prediction for Shigella and Enteroinvasive E. coli (EIEC). | +| [sistr](/bactopia-tools/sistr) | Serovar prediction of Salmonella enterica from assemblies. | +| [snippy](/bactopia-tools/snippy) | Rapid haplotype variant calling and core genome alignment. | +| [spatyper](/bactopia-tools/spatyper) | spa typing of Staphylococcus aureus assemblies. | +| [ssuissero](/bactopia-tools/ssuissero) | Serotype prediction of Streptococcus suis assemblies. | +| [staphtyper](/bactopia-tools/staphtyper) | Comprehensive typing of Staphylococcus aureus genomes. | +| [stecfinder](/bactopia-tools/stecfinder) | Serotype identification of Shiga toxin-producing E. coli. | +| [sylph](/bactopia-tools/sylph) | Taxonomic profiling by abundance-corrected MinHash. | +| [tblastn](/bactopia-tools/tblastn) | Search against translated nucleotide databases using protein queries. | +| [tblastx](/bactopia-tools/tblastx) | Search against translated nucleotide databases using translated nucleotide queries. | +| [tbprofiler](/bactopia-tools/tbprofiler) | Detection of antimicrobial resistance and lineage typing of Mycobacterium tuberculosis. | diff --git a/bactopia-tools/ismapper.mdx b/bactopia-tools/ismapper.mdx new file mode 100644 index 00000000..ae7affaa --- /dev/null +++ b/bactopia-tools/ismapper.mdx @@ -0,0 +1,1223 @@ +--- +title: ismapper +description: "Identify insertion sequence positions in bacterial genomes." +tags: + - insertion-sequences + - transposons + - comparative-genomics + - bactopia-tool +--- + +# ismapper + +**Tags:** insertion-sequences transposons comparative-genomics bactopia-tool + +Identify insertion sequence positions in bacterial genomes. + +This Bactopia Tool uses [ISMapper](https://github.com/jhawkey/IS_mapper) to identify +transposase insertion sites in bacterial genomes from short read sequence data. + +## Usage + +Bactopia CLI: + +```bash +bactopia --wf ismapper \ + --bactopia /path/to/your/bactopia/results +``` + +Nextflow: + +```bash +nextflow run bactopia/bactopia/workflows/bactopia-tools/ismapper/main.nf \ + --bactopia /path/to/your/bactopia/results +``` + +## Outputs + +### Expected Output Files + +``` + +├── +│ └── tools +│ └── ismapper- +│ └── is1016.fasta +│ ├── logs +│ │ ├── .log +│ │ ├── nf.command.{begin,err,log,out,run,sh,trace} +│ │ └── versions.yml +│ └── supplemental +│ └── IS1016V1_IS1595_IS1016 +│ ├── _IS1016V1_IS1595_IS1016_left_final.fastq.gz +│ ├── _IS1016V1_IS1595_IS1016_right_final.fastq.gz +│ ├── ___10_closest.bed.gz +│ ├── ___10_intersect.bed.gz +│ ├── ___10_table.txt +│ ├── ___11_closest.bed.gz +│ ├── ___11_intersect.bed.gz +│ ├── ___11_table.txt +│ ├── ___12_closest.bed.gz +│ ├── ___12_intersect.bed.gz +│ ├── ___12_table.txt +│ ├── ___13_closest.bed.gz +│ ├── ___13_intersect.bed.gz +│ ├── ___13_table.txt +│ ├── ___14_closest.bed.gz +│ ├── ___14_intersect.bed.gz +│ ├── ___14_table.txt +│ ├── ___15_closest.bed.gz +│ ├── ___15_intersect.bed.gz +│ ├── ___15_table.txt +│ ├── ___16_closest.bed.gz +│ ├── ___16_intersect.bed.gz +│ ├── ___16_table.txt +│ ├── ___17_closest.bed.gz +│ ├── ___17_intersect.bed.gz +│ ├── ___17_table.txt +│ ├── ___18_closest.bed.gz +│ ├── ___18_intersect.bed.gz +│ ├── ___18_table.txt +│ ├── ___19_closest.bed.gz +│ ├── ___19_intersect.bed.gz +│ ├── ___19_table.txt +│ ├── ___1_closest.bed.gz +│ ├── ___1_intersect.bed.gz +│ ├── ___1_table.txt +│ ├── ___20_closest.bed.gz +│ ├── ___20_intersect.bed.gz +│ ├── ___20_table.txt +│ ├── ___21_closest.bed.gz +│ ├── ___21_intersect.bed.gz +│ ├── ___21_table.txt +│ ├── ___22_closest.bed.gz +│ ├── ___22_intersect.bed.gz +│ ├── ___22_table.txt +│ ├── ___23_closest.bed.gz +│ ├── ___23_intersect.bed.gz +│ ├── ___23_table.txt +│ ├── ___24_closest.bed.gz +│ ├── ___24_intersect.bed.gz +│ ├── ___24_table.txt +│ ├── ___25_closest.bed.gz +│ ├── ___25_intersect.bed.gz +│ ├── ___25_table.txt +│ ├── ___26_closest.bed.gz +│ ├── ___26_intersect.bed.gz +│ ├── ___26_table.txt +│ ├── ___27_closest.bed.gz +│ ├── ___27_intersect.bed.gz +│ ├── ___27_table.txt +│ ├── ___28_closest.bed.gz +│ ├── ___28_intersect.bed.gz +│ ├── ___28_table.txt +│ ├── ___29_closest.bed.gz +│ ├── ___29_intersect.bed.gz +│ ├── ___29_table.txt +│ ├── ___2_closest.bed.gz +│ ├── ___2_intersect.bed.gz +│ ├── ___2_table.txt +│ ├── ___30_closest.bed.gz +│ ├── ___30_intersect.bed.gz +│ ├── ___30_table.txt +│ ├── ___31_closest.bed.gz +│ ├── ___31_intersect.bed.gz +│ ├── ___31_table.txt +│ ├── ___32_closest.bed.gz +│ ├── ___32_intersect.bed.gz +│ ├── ___32_table.txt +│ ├── ___33_closest.bed.gz +│ ├── ___33_intersect.bed.gz +│ ├── ___33_table.txt +│ ├── ___34_closest.bed.gz +│ ├── ___34_intersect.bed.gz +│ ├── ___34_table.txt +│ ├── ___35_closest.bed.gz +│ ├── ___35_intersect.bed.gz +│ ├── ___35_table.txt +│ ├── ___36_closest.bed.gz +│ ├── ___36_intersect.bed.gz +│ ├── ___36_table.txt +│ ├── ___37_closest.bed.gz +│ ├── ___37_intersect.bed.gz +│ ├── ___37_table.txt +│ ├── ___38_closest.bed.gz +│ ├── ___38_intersect.bed.gz +│ ├── ___38_table.txt +│ ├── ___39_closest.bed.gz +│ ├── ___39_intersect.bed.gz +│ ├── ___39_table.txt +│ ├── ___3_closest.bed.gz +│ ├── ___3_intersect.bed.gz +│ ├── ___3_table.txt +│ ├── ___40_closest.bed.gz +│ ├── ___40_intersect.bed.gz +│ ├── ___40_table.txt +│ ├── ___41_closest.bed.gz +│ ├── ___41_intersect.bed.gz +│ ├── ___41_table.txt +│ ├── ___42_closest.bed.gz +│ ├── ___42_intersect.bed.gz +│ ├── ___42_table.txt +│ ├── ___43_closest.bed.gz +│ ├── ___43_intersect.bed.gz +│ ├── ___43_table.txt +│ ├── ___44_closest.bed.gz +│ ├── ___44_intersect.bed.gz +│ ├── ___44_table.txt +│ ├── ___45_closest.bed.gz +│ ├── ___45_intersect.bed.gz +│ ├── ___45_table.txt +│ ├── ___46_closest.bed.gz +│ ├── ___46_intersect.bed.gz +│ ├── ___46_table.txt +│ ├── ___47_closest.bed.gz +│ ├── ___47_intersect.bed.gz +│ ├── ___47_table.txt +│ ├── ___48_closest.bed.gz +│ ├── ___48_intersect.bed.gz +│ ├── ___48_table.txt +│ ├── ___49_closest.bed.gz +│ ├── ___49_intersect.bed.gz +│ ├── ___49_table.txt +│ ├── ___4_closest.bed.gz +│ ├── ___4_intersect.bed.gz +│ ├── ___4_table.txt +│ ├── ___50_closest.bed.gz +│ ├── ___50_intersect.bed.gz +│ ├── ___50_table.txt +│ ├── ___51_closest.bed.gz +│ ├── ___51_intersect.bed.gz +│ ├── ___51_table.txt +│ ├── ___52_closest.bed.gz +│ ├── ___52_intersect.bed.gz +│ ├── ___52_table.txt +│ ├── ___53_closest.bed.gz +│ ├── ___53_intersect.bed.gz +│ ├── ___53_table.txt +│ ├── ___54_closest.bed.gz +│ ├── ___54_intersect.bed.gz +│ ├── ___54_table.txt +│ ├── ___55_closest.bed.gz +│ ├── ___55_intersect.bed.gz +│ ├── ___55_table.txt +│ ├── ___56_closest.bed.gz +│ ├── ___56_intersect.bed.gz +│ ├── ___56_table.txt +│ ├── ___57_closest.bed.gz +│ ├── ___57_intersect.bed.gz +│ ├── ___57_table.txt +│ ├── ___58_closest.bed.gz +│ ├── ___58_intersect.bed.gz +│ ├── ___58_table.txt +│ ├── ___59_closest.bed.gz +│ ├── ___59_intersect.bed.gz +│ ├── ___59_table.txt +│ ├── ___5_closest.bed.gz +│ ├── ___5_intersect.bed.gz +│ ├── ___5_table.txt +│ ├── ___60_closest.bed.gz +│ ├── ___60_intersect.bed.gz +│ ├── ___60_table.txt +│ ├── ___61_closest.bed.gz +│ ├── ___61_intersect.bed.gz +│ ├── ___61_table.txt +│ ├── ___62_closest.bed.gz +│ ├── ___62_intersect.bed.gz +│ ├── ___62_table.txt +│ ├── ___63_closest.bed.gz +│ ├── ___63_intersect.bed.gz +│ ├── ___63_table.txt +│ ├── ___64_closest.bed.gz +│ ├── ___64_intersect.bed.gz +│ ├── ___64_table.txt +│ ├── ___65_closest.bed.gz +│ ├── ___65_intersect.bed.gz +│ ├── ___65_table.txt +│ ├── ___66_closest.bed.gz +│ ├── ___66_intersect.bed.gz +│ ├── ___66_table.txt +│ ├── ___67_closest.bed.gz +│ ├── ___67_intersect.bed.gz +│ ├── ___67_table.txt +│ ├── ___68_closest.bed.gz +│ ├── ___68_intersect.bed.gz +│ ├── ___68_table.txt +│ ├── ___69_closest.bed.gz +│ ├── ___69_intersect.bed.gz +│ ├── ___69_table.txt +│ ├── ___6_closest.bed.gz +│ ├── ___6_intersect.bed.gz +│ ├── ___6_table.txt +│ ├── ___70_closest.bed.gz +│ ├── ___70_intersect.bed.gz +│ ├── ___70_table.txt +│ ├── ___71_closest.bed.gz +│ ├── ___71_intersect.bed.gz +│ ├── ___71_table.txt +│ ├── ___72_closest.bed.gz +│ ├── ___72_intersect.bed.gz +│ ├── ___72_table.txt +│ ├── ___73_closest.bed.gz +│ ├── ___73_intersect.bed.gz +│ ├── ___73_table.txt +│ ├── ___74_closest.bed.gz +│ ├── ___74_intersect.bed.gz +│ ├── ___74_table.txt +│ ├── ___75_closest.bed.gz +│ ├── ___75_intersect.bed.gz +│ ├── ___75_table.txt +│ ├── ___76_closest.bed.gz +│ ├── ___76_intersect.bed.gz +│ ├── ___76_table.txt +│ ├── ___77_closest.bed.gz +│ ├── ___77_intersect.bed.gz +│ ├── ___77_table.txt +│ ├── ___78_closest.bed.gz +│ ├── ___78_intersect.bed.gz +│ ├── ___78_table.txt +│ ├── ___79_closest.bed.gz +│ ├── ___79_intersect.bed.gz +│ ├── ___79_table.txt +│ ├── ___7_closest.bed.gz +│ ├── ___7_intersect.bed.gz +│ ├── ___7_table.txt +│ ├── ___80_closest.bed.gz +│ ├── ___80_intersect.bed.gz +│ ├── ___80_table.txt +│ ├── ___8_closest.bed.gz +│ ├── ___8_intersect.bed.gz +│ ├── ___8_table.txt +│ ├── ___9_closest.bed.gz +│ ├── ___9_intersect.bed.gz +│ ├── ___9_table.txt +│ ├── _left__1.sorted.bam +│ ├── _left__1.sorted.bam.bai +│ ├── _left__10.sorted.bam +│ ├── _left__10.sorted.bam.bai +│ ├── _left__10_finalcov.bed.gz +│ ├── _left__10_merged.sorted.bed.gz +│ ├── _left__10_unpaired.bed.gz +│ ├── _left__11.sorted.bam +│ ├── _left__11.sorted.bam.bai +│ ├── _left__11_finalcov.bed.gz +│ ├── _left__11_merged.sorted.bed.gz +│ ├── _left__11_unpaired.bed.gz +│ ├── _left__12.sorted.bam +│ ├── _left__12.sorted.bam.bai +│ ├── _left__12_finalcov.bed.gz +│ ├── _left__12_merged.sorted.bed.gz +│ ├── _left__12_unpaired.bed.gz +│ ├── _left__13.sorted.bam +│ ├── _left__13.sorted.bam.bai +│ ├── _left__13_finalcov.bed.gz +│ ├── _left__13_merged.sorted.bed.gz +│ ├── _left__13_unpaired.bed.gz +│ ├── _left__14.sorted.bam +│ ├── _left__14.sorted.bam.bai +│ ├── _left__14_finalcov.bed.gz +│ ├── _left__14_merged.sorted.bed.gz +│ ├── _left__14_unpaired.bed.gz +│ ├── _left__15.sorted.bam +│ ├── _left__15.sorted.bam.bai +│ ├── _left__15_finalcov.bed.gz +│ ├── _left__15_merged.sorted.bed.gz +│ ├── _left__15_unpaired.bed.gz +│ ├── _left__16.sorted.bam +│ ├── _left__16.sorted.bam.bai +│ ├── _left__16_finalcov.bed.gz +│ ├── _left__16_merged.sorted.bed.gz +│ ├── _left__16_unpaired.bed.gz +│ ├── _left__17.sorted.bam +│ ├── _left__17.sorted.bam.bai +│ ├── _left__17_finalcov.bed.gz +│ ├── _left__17_merged.sorted.bed.gz +│ ├── _left__17_unpaired.bed.gz +│ ├── _left__18.sorted.bam +│ ├── _left__18.sorted.bam.bai +│ ├── _left__18_finalcov.bed.gz +│ ├── _left__18_merged.sorted.bed.gz +│ ├── _left__18_unpaired.bed.gz +│ ├── _left__19.sorted.bam +│ ├── _left__19.sorted.bam.bai +│ ├── _left__19_finalcov.bed.gz +│ ├── _left__19_merged.sorted.bed.gz +│ ├── _left__19_unpaired.bed.gz +│ ├── _left__1_finalcov.bed.gz +│ ├── _left__1_merged.sorted.bed.gz +│ ├── _left__1_unpaired.bed.gz +│ ├── _left__2.sorted.bam +│ ├── _left__2.sorted.bam.bai +│ ├── _left__20.sorted.bam +│ ├── _left__20.sorted.bam.bai +│ ├── _left__20_finalcov.bed.gz +│ ├── _left__20_merged.sorted.bed.gz +│ ├── _left__20_unpaired.bed.gz +│ ├── _left__21.sorted.bam +│ ├── _left__21.sorted.bam.bai +│ ├── _left__21_finalcov.bed.gz +│ ├── _left__21_merged.sorted.bed.gz +│ ├── _left__21_unpaired.bed.gz +│ ├── _left__22.sorted.bam +│ ├── _left__22.sorted.bam.bai +│ ├── _left__22_finalcov.bed.gz +│ ├── _left__22_merged.sorted.bed.gz +│ ├── _left__22_unpaired.bed.gz +│ ├── _left__23.sorted.bam +│ ├── _left__23.sorted.bam.bai +│ ├── _left__23_finalcov.bed.gz +│ ├── _left__23_merged.sorted.bed.gz +│ ├── _left__23_unpaired.bed.gz +│ ├── _left__24.sorted.bam +│ ├── _left__24.sorted.bam.bai +│ ├── _left__24_finalcov.bed.gz +│ ├── _left__24_merged.sorted.bed.gz +│ ├── _left__24_unpaired.bed.gz +│ ├── _left__25.sorted.bam +│ ├── _left__25.sorted.bam.bai +│ ├── _left__25_finalcov.bed.gz +│ ├── _left__25_merged.sorted.bed.gz +│ ├── _left__25_unpaired.bed.gz +│ ├── _left__26.sorted.bam +│ ├── _left__26.sorted.bam.bai +│ ├── _left__26_finalcov.bed.gz +│ ├── _left__26_merged.sorted.bed.gz +│ ├── _left__26_unpaired.bed.gz +│ ├── _left__27.sorted.bam +│ ├── _left__27.sorted.bam.bai +│ ├── _left__27_finalcov.bed.gz +│ ├── _left__27_merged.sorted.bed.gz +│ ├── _left__27_unpaired.bed.gz +│ ├── _left__28.sorted.bam +│ ├── _left__28.sorted.bam.bai +│ ├── _left__28_finalcov.bed.gz +│ ├── _left__28_merged.sorted.bed.gz +│ ├── _left__28_unpaired.bed.gz +│ ├── _left__29.sorted.bam +│ ├── _left__29.sorted.bam.bai +│ ├── _left__29_finalcov.bed.gz +│ ├── _left__29_merged.sorted.bed.gz +│ ├── _left__29_unpaired.bed.gz +│ ├── _left__2_finalcov.bed.gz +│ ├── _left__2_merged.sorted.bed.gz +│ ├── _left__2_unpaired.bed.gz +│ ├── _left__3.sorted.bam +│ ├── _left__3.sorted.bam.bai +│ ├── _left__30.sorted.bam +│ ├── _left__30.sorted.bam.bai +│ ├── _left__30_finalcov.bed.gz +│ ├── _left__30_merged.sorted.bed.gz +│ ├── _left__30_unpaired.bed.gz +│ ├── _left__31.sorted.bam +│ ├── _left__31.sorted.bam.bai +│ ├── _left__31_finalcov.bed.gz +│ ├── _left__31_merged.sorted.bed.gz +│ ├── _left__31_unpaired.bed.gz +│ ├── _left__32.sorted.bam +│ ├── _left__32.sorted.bam.bai +│ ├── _left__32_finalcov.bed.gz +│ ├── _left__32_merged.sorted.bed.gz +│ ├── _left__32_unpaired.bed.gz +│ ├── _left__33.sorted.bam +│ ├── _left__33.sorted.bam.bai +│ ├── _left__33_finalcov.bed.gz +│ ├── _left__33_merged.sorted.bed.gz +│ ├── _left__33_unpaired.bed.gz +│ ├── _left__34.sorted.bam +│ ├── _left__34.sorted.bam.bai +│ ├── _left__34_finalcov.bed.gz +│ ├── _left__34_merged.sorted.bed.gz +│ ├── _left__34_unpaired.bed.gz +│ ├── _left__35.sorted.bam +│ ├── _left__35.sorted.bam.bai +│ ├── _left__35_finalcov.bed.gz +│ ├── _left__35_merged.sorted.bed.gz +│ ├── _left__35_unpaired.bed.gz +│ ├── _left__36.sorted.bam +│ ├── _left__36.sorted.bam.bai +│ ├── _left__36_finalcov.bed.gz +│ ├── _left__36_merged.sorted.bed.gz +│ ├── _left__36_unpaired.bed.gz +│ ├── _left__37.sorted.bam +│ ├── _left__37.sorted.bam.bai +│ ├── _left__37_finalcov.bed.gz +│ ├── _left__37_merged.sorted.bed.gz +│ ├── _left__37_unpaired.bed.gz +│ ├── _left__38.sorted.bam +│ ├── _left__38.sorted.bam.bai +│ ├── _left__38_finalcov.bed.gz +│ ├── _left__38_merged.sorted.bed.gz +│ ├── _left__38_unpaired.bed.gz +│ ├── _left__39.sorted.bam +│ ├── _left__39.sorted.bam.bai +│ ├── _left__39_finalcov.bed.gz +│ ├── _left__39_merged.sorted.bed.gz +│ ├── _left__39_unpaired.bed.gz +│ ├── _left__3_finalcov.bed.gz +│ ├── _left__3_merged.sorted.bed.gz +│ ├── _left__3_unpaired.bed.gz +│ ├── _left__4.sorted.bam +│ ├── _left__4.sorted.bam.bai +│ ├── _left__40.sorted.bam +│ ├── _left__40.sorted.bam.bai +│ ├── _left__40_finalcov.bed.gz +│ ├── _left__40_merged.sorted.bed.gz +│ ├── _left__40_unpaired.bed.gz +│ ├── _left__41.sorted.bam +│ ├── _left__41.sorted.bam.bai +│ ├── _left__41_finalcov.bed.gz +│ ├── _left__41_merged.sorted.bed.gz +│ ├── _left__41_unpaired.bed.gz +│ ├── _left__42.sorted.bam +│ ├── _left__42.sorted.bam.bai +│ ├── _left__42_finalcov.bed.gz +│ ├── _left__42_merged.sorted.bed.gz +│ ├── _left__42_unpaired.bed.gz +│ ├── _left__43.sorted.bam +│ ├── _left__43.sorted.bam.bai +│ ├── _left__43_finalcov.bed.gz +│ ├── _left__43_merged.sorted.bed.gz +│ ├── _left__43_unpaired.bed.gz +│ ├── _left__44.sorted.bam +│ ├── _left__44.sorted.bam.bai +│ ├── _left__44_finalcov.bed.gz +│ ├── _left__44_merged.sorted.bed.gz +│ ├── _left__44_unpaired.bed.gz +│ ├── _left__45.sorted.bam +│ ├── _left__45.sorted.bam.bai +│ ├── _left__45_finalcov.bed.gz +│ ├── _left__45_merged.sorted.bed.gz +│ ├── _left__45_unpaired.bed.gz +│ ├── _left__46.sorted.bam +│ ├── _left__46.sorted.bam.bai +│ ├── _left__46_finalcov.bed.gz +│ ├── _left__46_merged.sorted.bed.gz +│ ├── _left__46_unpaired.bed.gz +│ ├── _left__47.sorted.bam +│ ├── _left__47.sorted.bam.bai +│ ├── _left__47_finalcov.bed.gz +│ ├── _left__47_merged.sorted.bed.gz +│ ├── _left__47_unpaired.bed.gz +│ ├── _left__48.sorted.bam +│ ├── _left__48.sorted.bam.bai +│ ├── _left__48_finalcov.bed.gz +│ ├── _left__48_merged.sorted.bed.gz +│ ├── _left__48_unpaired.bed.gz +│ ├── _left__49.sorted.bam +│ ├── _left__49.sorted.bam.bai +│ ├── _left__49_finalcov.bed.gz +│ ├── _left__49_merged.sorted.bed.gz +│ ├── _left__49_unpaired.bed.gz +│ ├── _left__4_finalcov.bed.gz +│ ├── _left__4_merged.sorted.bed.gz +│ ├── _left__4_unpaired.bed.gz +│ ├── _left__5.sorted.bam +│ ├── _left__5.sorted.bam.bai +│ ├── _left__50.sorted.bam +│ ├── _left__50.sorted.bam.bai +│ ├── _left__50_finalcov.bed.gz +│ ├── _left__50_merged.sorted.bed.gz +│ ├── _left__50_unpaired.bed.gz +│ ├── _left__51.sorted.bam +│ ├── _left__51.sorted.bam.bai +│ ├── _left__51_finalcov.bed.gz +│ ├── _left__51_merged.sorted.bed.gz +│ ├── _left__51_unpaired.bed.gz +│ ├── _left__52.sorted.bam +│ ├── _left__52.sorted.bam.bai +│ ├── _left__52_finalcov.bed.gz +│ ├── _left__52_merged.sorted.bed.gz +│ ├── _left__52_unpaired.bed.gz +│ ├── _left__53.sorted.bam +│ ├── _left__53.sorted.bam.bai +│ ├── _left__53_finalcov.bed.gz +│ ├── _left__53_merged.sorted.bed.gz +│ ├── _left__53_unpaired.bed.gz +│ ├── _left__54.sorted.bam +│ ├── _left__54.sorted.bam.bai +│ ├── _left__54_finalcov.bed.gz +│ ├── _left__54_merged.sorted.bed.gz +│ ├── _left__54_unpaired.bed.gz +│ ├── _left__55.sorted.bam +│ ├── _left__55.sorted.bam.bai +│ ├── _left__55_finalcov.bed.gz +│ ├── _left__55_merged.sorted.bed.gz +│ ├── _left__55_unpaired.bed.gz +│ ├── _left__56.sorted.bam +│ ├── _left__56.sorted.bam.bai +│ ├── _left__56_finalcov.bed.gz +│ ├── _left__56_merged.sorted.bed.gz +│ ├── _left__56_unpaired.bed.gz +│ ├── _left__57.sorted.bam +│ ├── _left__57.sorted.bam.bai +│ ├── _left__57_finalcov.bed.gz +│ ├── _left__57_merged.sorted.bed.gz +│ ├── _left__57_unpaired.bed.gz +│ ├── _left__58.sorted.bam +│ ├── _left__58.sorted.bam.bai +│ ├── _left__58_finalcov.bed.gz +│ ├── _left__58_merged.sorted.bed.gz +│ ├── _left__58_unpaired.bed.gz +│ ├── _left__59.sorted.bam +│ ├── _left__59.sorted.bam.bai +│ ├── _left__59_finalcov.bed.gz +│ ├── _left__59_merged.sorted.bed.gz +│ ├── _left__59_unpaired.bed.gz +│ ├── _left__5_finalcov.bed.gz +│ ├── _left__5_merged.sorted.bed.gz +│ ├── _left__5_unpaired.bed.gz +│ ├── _left__6.sorted.bam +│ ├── _left__6.sorted.bam.bai +│ ├── _left__60.sorted.bam +│ ├── _left__60.sorted.bam.bai +│ ├── _left__60_finalcov.bed.gz +│ ├── _left__60_merged.sorted.bed.gz +│ ├── _left__60_unpaired.bed.gz +│ ├── _left__61.sorted.bam +│ ├── _left__61.sorted.bam.bai +│ ├── _left__61_finalcov.bed.gz +│ ├── _left__61_merged.sorted.bed.gz +│ ├── _left__61_unpaired.bed.gz +│ ├── _left__62.sorted.bam +│ ├── _left__62.sorted.bam.bai +│ ├── _left__62_finalcov.bed.gz +│ ├── _left__62_merged.sorted.bed.gz +│ ├── _left__62_unpaired.bed.gz +│ ├── _left__63.sorted.bam +│ ├── _left__63.sorted.bam.bai +│ ├── _left__63_finalcov.bed.gz +│ ├── _left__63_merged.sorted.bed.gz +│ ├── _left__63_unpaired.bed.gz +│ ├── _left__64.sorted.bam +│ ├── _left__64.sorted.bam.bai +│ ├── _left__64_finalcov.bed.gz +│ ├── _left__64_merged.sorted.bed.gz +│ ├── _left__64_unpaired.bed.gz +│ ├── _left__65.sorted.bam +│ ├── _left__65.sorted.bam.bai +│ ├── _left__65_finalcov.bed.gz +│ ├── _left__65_merged.sorted.bed.gz +│ ├── _left__65_unpaired.bed.gz +│ ├── _left__66.sorted.bam +│ ├── _left__66.sorted.bam.bai +│ ├── _left__66_finalcov.bed.gz +│ ├── _left__66_merged.sorted.bed.gz +│ ├── _left__66_unpaired.bed.gz +│ ├── _left__67.sorted.bam +│ ├── _left__67.sorted.bam.bai +│ ├── _left__67_finalcov.bed.gz +│ ├── _left__67_merged.sorted.bed.gz +│ ├── _left__67_unpaired.bed.gz +│ ├── _left__68.sorted.bam +│ ├── _left__68.sorted.bam.bai +│ ├── _left__68_finalcov.bed.gz +│ ├── _left__68_merged.sorted.bed.gz +│ ├── _left__68_unpaired.bed.gz +│ ├── _left__69.sorted.bam +│ ├── _left__69.sorted.bam.bai +│ ├── _left__69_finalcov.bed.gz +│ ├── _left__69_merged.sorted.bed.gz +│ ├── _left__69_unpaired.bed.gz +│ ├── _left__6_finalcov.bed.gz +│ ├── _left__6_merged.sorted.bed.gz +│ ├── _left__6_unpaired.bed.gz +│ ├── _left__7.sorted.bam +│ ├── _left__7.sorted.bam.bai +│ ├── _left__70.sorted.bam +│ ├── _left__70.sorted.bam.bai +│ ├── _left__70_finalcov.bed.gz +│ ├── _left__70_merged.sorted.bed.gz +│ ├── _left__70_unpaired.bed.gz +│ ├── _left__71.sorted.bam +│ ├── _left__71.sorted.bam.bai +│ ├── _left__71_finalcov.bed.gz +│ ├── _left__71_merged.sorted.bed.gz +│ ├── _left__71_unpaired.bed.gz +│ ├── _left__72.sorted.bam +│ ├── _left__72.sorted.bam.bai +│ ├── _left__72_finalcov.bed.gz +│ ├── _left__72_merged.sorted.bed.gz +│ ├── _left__72_unpaired.bed.gz +│ ├── _left__73.sorted.bam +│ ├── _left__73.sorted.bam.bai +│ ├── _left__73_finalcov.bed.gz +│ ├── _left__73_merged.sorted.bed.gz +│ ├── _left__73_unpaired.bed.gz +│ ├── _left__74.sorted.bam +│ ├── _left__74.sorted.bam.bai +│ ├── _left__74_finalcov.bed.gz +│ ├── _left__74_merged.sorted.bed.gz +│ ├── _left__74_unpaired.bed.gz +│ ├── _left__75.sorted.bam +│ ├── _left__75.sorted.bam.bai +│ ├── _left__75_finalcov.bed.gz +│ ├── _left__75_merged.sorted.bed.gz +│ ├── _left__75_unpaired.bed.gz +│ ├── _left__76.sorted.bam +│ ├── _left__76.sorted.bam.bai +│ ├── _left__76_finalcov.bed.gz +│ ├── _left__76_merged.sorted.bed.gz +│ ├── _left__76_unpaired.bed.gz +│ ├── _left__77.sorted.bam +│ ├── _left__77.sorted.bam.bai +│ ├── _left__77_finalcov.bed.gz +│ ├── _left__77_merged.sorted.bed.gz +│ ├── _left__77_unpaired.bed.gz +│ ├── _left__78.sorted.bam +│ ├── _left__78.sorted.bam.bai +│ ├── _left__78_finalcov.bed.gz +│ ├── _left__78_merged.sorted.bed.gz +│ ├── _left__78_unpaired.bed.gz +│ ├── _left__79.sorted.bam +│ ├── _left__79.sorted.bam.bai +│ ├── _left__79_finalcov.bed.gz +│ ├── _left__79_merged.sorted.bed.gz +│ ├── _left__79_unpaired.bed.gz +│ ├── _left__7_finalcov.bed.gz +│ ├── _left__7_merged.sorted.bed.gz +│ ├── _left__7_unpaired.bed.gz +│ ├── _left__8.sorted.bam +│ ├── _left__8.sorted.bam.bai +│ ├── _left__80_finalcov.bed.gz +│ ├── _left__80_merged.sorted.bed.gz +│ ├── _left__80_unpaired.bed.gz +│ ├── _left__8_finalcov.bed.gz +│ ├── _left__8_merged.sorted.bed.gz +│ ├── _left__8_unpaired.bed.gz +│ ├── _left__9.sorted.bam +│ ├── _left__9.sorted.bam.bai +│ ├── _left__9_finalcov.bed.gz +│ ├── _left__9_merged.sorted.bed.gz +│ ├── _left__9_unpaired.bed.gz +│ ├── _right__1.sorted.bam +│ ├── _right__1.sorted.bam.bai +│ ├── _right__10.sorted.bam +│ ├── _right__10.sorted.bam.bai +│ ├── _right__10_finalcov.bed.gz +│ ├── _right__10_merged.sorted.bed.gz +│ ├── _right__10_unpaired.bed.gz +│ ├── _right__11.sorted.bam +│ ├── _right__11.sorted.bam.bai +│ ├── _right__11_finalcov.bed.gz +│ ├── _right__11_merged.sorted.bed.gz +│ ├── _right__11_unpaired.bed.gz +│ ├── _right__12.sorted.bam +│ ├── _right__12.sorted.bam.bai +│ ├── _right__12_finalcov.bed.gz +│ ├── _right__12_merged.sorted.bed.gz +│ ├── _right__12_unpaired.bed.gz +│ ├── _right__13.sorted.bam +│ ├── _right__13.sorted.bam.bai +│ ├── _right__13_finalcov.bed.gz +│ ├── _right__13_merged.sorted.bed.gz +│ ├── _right__13_unpaired.bed.gz +│ ├── _right__14.sorted.bam +│ ├── _right__14.sorted.bam.bai +│ ├── _right__14_finalcov.bed.gz +│ ├── _right__14_merged.sorted.bed.gz +│ ├── _right__14_unpaired.bed.gz +│ ├── _right__15.sorted.bam +│ ├── _right__15.sorted.bam.bai +│ ├── _right__15_finalcov.bed.gz +│ ├── _right__15_merged.sorted.bed.gz +│ ├── _right__15_unpaired.bed.gz +│ ├── _right__16.sorted.bam +│ ├── _right__16.sorted.bam.bai +│ ├── _right__16_finalcov.bed.gz +│ ├── _right__16_merged.sorted.bed.gz +│ ├── _right__16_unpaired.bed.gz +│ ├── _right__17.sorted.bam +│ ├── _right__17.sorted.bam.bai +│ ├── _right__17_finalcov.bed.gz +│ ├── _right__17_merged.sorted.bed.gz +│ ├── _right__17_unpaired.bed.gz +│ ├── _right__18.sorted.bam +│ ├── _right__18.sorted.bam.bai +│ ├── _right__18_finalcov.bed.gz +│ ├── _right__18_merged.sorted.bed.gz +│ ├── _right__18_unpaired.bed.gz +│ ├── _right__19.sorted.bam +│ ├── _right__19.sorted.bam.bai +│ ├── _right__19_finalcov.bed.gz +│ ├── _right__19_merged.sorted.bed.gz +│ ├── _right__19_unpaired.bed.gz +│ ├── _right__1_finalcov.bed.gz +│ ├── _right__1_merged.sorted.bed.gz +│ ├── _right__1_unpaired.bed.gz +│ ├── _right__2.sorted.bam +│ ├── _right__2.sorted.bam.bai +│ ├── _right__20.sorted.bam +│ ├── _right__20.sorted.bam.bai +│ ├── _right__20_finalcov.bed.gz +│ ├── _right__20_merged.sorted.bed.gz +│ ├── _right__20_unpaired.bed.gz +│ ├── _right__21.sorted.bam +│ ├── _right__21.sorted.bam.bai +│ ├── _right__21_finalcov.bed.gz +│ ├── _right__21_merged.sorted.bed.gz +│ ├── _right__21_unpaired.bed.gz +│ ├── _right__22.sorted.bam +│ ├── _right__22.sorted.bam.bai +│ ├── _right__22_finalcov.bed.gz +│ ├── _right__22_merged.sorted.bed.gz +│ ├── _right__22_unpaired.bed.gz +│ ├── _right__23.sorted.bam +│ ├── _right__23.sorted.bam.bai +│ ├── _right__23_finalcov.bed.gz +│ ├── _right__23_merged.sorted.bed.gz +│ ├── _right__23_unpaired.bed.gz +│ ├── _right__24.sorted.bam +│ ├── _right__24.sorted.bam.bai +│ ├── _right__24_finalcov.bed.gz +│ ├── _right__24_merged.sorted.bed.gz +│ ├── _right__24_unpaired.bed.gz +│ ├── _right__25.sorted.bam +│ ├── _right__25.sorted.bam.bai +│ ├── _right__25_finalcov.bed.gz +│ ├── _right__25_merged.sorted.bed.gz +│ ├── _right__25_unpaired.bed.gz +│ ├── _right__26.sorted.bam +│ ├── _right__26.sorted.bam.bai +│ ├── _right__26_finalcov.bed.gz +│ ├── _right__26_merged.sorted.bed.gz +│ ├── _right__26_unpaired.bed.gz +│ ├── _right__27.sorted.bam +│ ├── _right__27.sorted.bam.bai +│ ├── _right__27_finalcov.bed.gz +│ ├── _right__27_merged.sorted.bed.gz +│ ├── _right__27_unpaired.bed.gz +│ ├── _right__28.sorted.bam +│ ├── _right__28.sorted.bam.bai +│ ├── _right__28_finalcov.bed.gz +│ ├── _right__28_merged.sorted.bed.gz +│ ├── _right__28_unpaired.bed.gz +│ ├── _right__29.sorted.bam +│ ├── _right__29.sorted.bam.bai +│ ├── _right__29_finalcov.bed.gz +│ ├── _right__29_merged.sorted.bed.gz +│ ├── _right__29_unpaired.bed.gz +│ ├── _right__2_finalcov.bed.gz +│ ├── _right__2_merged.sorted.bed.gz +│ ├── _right__2_unpaired.bed.gz +│ ├── _right__3.sorted.bam +│ ├── _right__3.sorted.bam.bai +│ ├── _right__30.sorted.bam +│ ├── _right__30.sorted.bam.bai +│ ├── _right__30_finalcov.bed.gz +│ ├── _right__30_merged.sorted.bed.gz +│ ├── _right__30_unpaired.bed.gz +│ ├── _right__31.sorted.bam +│ ├── _right__31.sorted.bam.bai +│ ├── _right__31_finalcov.bed.gz +│ ├── _right__31_merged.sorted.bed.gz +│ ├── _right__31_unpaired.bed.gz +│ ├── _right__32.sorted.bam +│ ├── _right__32.sorted.bam.bai +│ ├── _right__32_finalcov.bed.gz +│ ├── _right__32_merged.sorted.bed.gz +│ ├── _right__32_unpaired.bed.gz +│ ├── _right__33.sorted.bam +│ ├── _right__33.sorted.bam.bai +│ ├── _right__33_finalcov.bed.gz +│ ├── _right__33_merged.sorted.bed.gz +│ ├── _right__33_unpaired.bed.gz +│ ├── _right__34.sorted.bam +│ ├── _right__34.sorted.bam.bai +│ ├── _right__34_finalcov.bed.gz +│ ├── _right__34_merged.sorted.bed.gz +│ ├── _right__34_unpaired.bed.gz +│ ├── _right__35.sorted.bam +│ ├── _right__35.sorted.bam.bai +│ ├── _right__35_finalcov.bed.gz +│ ├── _right__35_merged.sorted.bed.gz +│ ├── _right__35_unpaired.bed.gz +│ ├── _right__36.sorted.bam +│ ├── _right__36.sorted.bam.bai +│ ├── _right__36_finalcov.bed.gz +│ ├── _right__36_merged.sorted.bed.gz +│ ├── _right__36_unpaired.bed.gz +│ ├── _right__37.sorted.bam +│ ├── _right__37.sorted.bam.bai +│ ├── _right__37_finalcov.bed.gz +│ ├── _right__37_merged.sorted.bed.gz +│ ├── _right__37_unpaired.bed.gz +│ ├── _right__38.sorted.bam +│ ├── _right__38.sorted.bam.bai +│ ├── _right__38_finalcov.bed.gz +│ ├── _right__38_merged.sorted.bed.gz +│ ├── _right__38_unpaired.bed.gz +│ ├── _right__39.sorted.bam +│ ├── _right__39.sorted.bam.bai +│ ├── _right__39_finalcov.bed.gz +│ ├── _right__39_merged.sorted.bed.gz +│ ├── _right__39_unpaired.bed.gz +│ ├── _right__3_finalcov.bed.gz +│ ├── _right__3_merged.sorted.bed.gz +│ ├── _right__3_unpaired.bed.gz +│ ├── _right__4.sorted.bam +│ ├── _right__4.sorted.bam.bai +│ ├── _right__40.sorted.bam +│ ├── _right__40.sorted.bam.bai +│ ├── _right__40_finalcov.bed.gz +│ ├── _right__40_merged.sorted.bed.gz +│ ├── _right__40_unpaired.bed.gz +│ ├── _right__41.sorted.bam +│ ├── _right__41.sorted.bam.bai +│ ├── _right__41_finalcov.bed.gz +│ ├── _right__41_merged.sorted.bed.gz +│ ├── _right__41_unpaired.bed.gz +│ ├── _right__42.sorted.bam +│ ├── _right__42.sorted.bam.bai +│ ├── _right__42_finalcov.bed.gz +│ ├── _right__42_merged.sorted.bed.gz +│ ├── _right__42_unpaired.bed.gz +│ ├── _right__43.sorted.bam +│ ├── _right__43.sorted.bam.bai +│ ├── _right__43_finalcov.bed.gz +│ ├── _right__43_merged.sorted.bed.gz +│ ├── _right__43_unpaired.bed.gz +│ ├── _right__44.sorted.bam +│ ├── _right__44.sorted.bam.bai +│ ├── _right__44_finalcov.bed.gz +│ ├── _right__44_merged.sorted.bed.gz +│ ├── _right__44_unpaired.bed.gz +│ ├── _right__45.sorted.bam +│ ├── _right__45.sorted.bam.bai +│ ├── _right__45_finalcov.bed.gz +│ ├── _right__45_merged.sorted.bed.gz +│ ├── _right__45_unpaired.bed.gz +│ ├── _right__46.sorted.bam +│ ├── _right__46.sorted.bam.bai +│ ├── _right__46_finalcov.bed.gz +│ ├── _right__46_merged.sorted.bed.gz +│ ├── _right__46_unpaired.bed.gz +│ ├── _right__47.sorted.bam +│ ├── _right__47.sorted.bam.bai +│ ├── _right__47_finalcov.bed.gz +│ ├── _right__47_merged.sorted.bed.gz +│ ├── _right__47_unpaired.bed.gz +│ ├── _right__48.sorted.bam +│ ├── _right__48.sorted.bam.bai +│ ├── _right__48_finalcov.bed.gz +│ ├── _right__48_merged.sorted.bed.gz +│ ├── _right__48_unpaired.bed.gz +│ ├── _right__49.sorted.bam +│ ├── _right__49.sorted.bam.bai +│ ├── _right__49_finalcov.bed.gz +│ ├── _right__49_merged.sorted.bed.gz +│ ├── _right__49_unpaired.bed.gz +│ ├── _right__4_finalcov.bed.gz +│ ├── _right__4_merged.sorted.bed.gz +│ ├── _right__4_unpaired.bed.gz +│ ├── _right__5.sorted.bam +│ ├── _right__5.sorted.bam.bai +│ ├── _right__50.sorted.bam +│ ├── _right__50.sorted.bam.bai +│ ├── _right__50_finalcov.bed.gz +│ ├── _right__50_merged.sorted.bed.gz +│ ├── _right__50_unpaired.bed.gz +│ ├── _right__51.sorted.bam +│ ├── _right__51.sorted.bam.bai +│ ├── _right__51_finalcov.bed.gz +│ ├── _right__51_merged.sorted.bed.gz +│ ├── _right__51_unpaired.bed.gz +│ ├── _right__52.sorted.bam +│ ├── _right__52.sorted.bam.bai +│ ├── _right__52_finalcov.bed.gz +│ ├── _right__52_merged.sorted.bed.gz +│ ├── _right__52_unpaired.bed.gz +│ ├── _right__53.sorted.bam +│ ├── _right__53.sorted.bam.bai +│ ├── _right__53_finalcov.bed.gz +│ ├── _right__53_merged.sorted.bed.gz +│ ├── _right__53_unpaired.bed.gz +│ ├── _right__54.sorted.bam +│ ├── _right__54.sorted.bam.bai +│ ├── _right__54_finalcov.bed.gz +│ ├── _right__54_merged.sorted.bed.gz +│ ├── _right__54_unpaired.bed.gz +│ ├── _right__55.sorted.bam +│ ├── _right__55.sorted.bam.bai +│ ├── _right__55_finalcov.bed.gz +│ ├── _right__55_merged.sorted.bed.gz +│ ├── _right__55_unpaired.bed.gz +│ ├── _right__56.sorted.bam +│ ├── _right__56.sorted.bam.bai +│ ├── _right__56_finalcov.bed.gz +│ ├── _right__56_merged.sorted.bed.gz +│ ├── _right__56_unpaired.bed.gz +│ ├── _right__57.sorted.bam +│ ├── _right__57.sorted.bam.bai +│ ├── _right__57_finalcov.bed.gz +│ ├── _right__57_merged.sorted.bed.gz +│ ├── _right__57_unpaired.bed.gz +│ ├── _right__58.sorted.bam +│ ├── _right__58.sorted.bam.bai +│ ├── _right__58_finalcov.bed.gz +│ ├── _right__58_merged.sorted.bed.gz +│ ├── _right__58_unpaired.bed.gz +│ ├── _right__59.sorted.bam +│ ├── _right__59.sorted.bam.bai +│ ├── _right__59_finalcov.bed.gz +│ ├── _right__59_merged.sorted.bed.gz +│ ├── _right__59_unpaired.bed.gz +│ ├── _right__5_finalcov.bed.gz +│ ├── _right__5_merged.sorted.bed.gz +│ ├── _right__5_unpaired.bed.gz +│ ├── _right__6.sorted.bam +│ ├── _right__6.sorted.bam.bai +│ ├── _right__60.sorted.bam +│ ├── _right__60.sorted.bam.bai +│ ├── _right__60_finalcov.bed.gz +│ ├── _right__60_merged.sorted.bed.gz +│ ├── _right__60_unpaired.bed.gz +│ ├── _right__61.sorted.bam +│ ├── _right__61.sorted.bam.bai +│ ├── _right__61_finalcov.bed.gz +│ ├── _right__61_merged.sorted.bed.gz +│ ├── _right__61_unpaired.bed.gz +│ ├── _right__62.sorted.bam +│ ├── _right__62.sorted.bam.bai +│ ├── _right__62_finalcov.bed.gz +│ ├── _right__62_merged.sorted.bed.gz +│ ├── _right__62_unpaired.bed.gz +│ ├── _right__63.sorted.bam +│ ├── _right__63.sorted.bam.bai +│ ├── _right__63_finalcov.bed.gz +│ ├── _right__63_merged.sorted.bed.gz +│ ├── _right__63_unpaired.bed.gz +│ ├── _right__64.sorted.bam +│ ├── _right__64.sorted.bam.bai +│ ├── _right__64_finalcov.bed.gz +│ ├── _right__64_merged.sorted.bed.gz +│ ├── _right__64_unpaired.bed.gz +│ ├── _right__65.sorted.bam +│ ├── _right__65.sorted.bam.bai +│ ├── _right__65_finalcov.bed.gz +│ ├── _right__65_merged.sorted.bed.gz +│ ├── _right__65_unpaired.bed.gz +│ ├── _right__66.sorted.bam +│ ├── _right__66.sorted.bam.bai +│ ├── _right__66_finalcov.bed.gz +│ ├── _right__66_merged.sorted.bed.gz +│ ├── _right__66_unpaired.bed.gz +│ ├── _right__67.sorted.bam +│ ├── _right__67.sorted.bam.bai +│ ├── _right__67_finalcov.bed.gz +│ ├── _right__67_merged.sorted.bed.gz +│ ├── _right__67_unpaired.bed.gz +│ ├── _right__68.sorted.bam +│ ├── _right__68.sorted.bam.bai +│ ├── _right__68_finalcov.bed.gz +│ ├── _right__68_merged.sorted.bed.gz +│ ├── _right__68_unpaired.bed.gz +│ ├── _right__69.sorted.bam +│ ├── _right__69.sorted.bam.bai +│ ├── _right__69_finalcov.bed.gz +│ ├── _right__69_merged.sorted.bed.gz +│ ├── _right__69_unpaired.bed.gz +│ ├── _right__6_finalcov.bed.gz +│ ├── _right__6_merged.sorted.bed.gz +│ ├── _right__6_unpaired.bed.gz +│ ├── _right__7.sorted.bam +│ ├── _right__7.sorted.bam.bai +│ ├── _right__70.sorted.bam +│ ├── _right__70.sorted.bam.bai +│ ├── _right__70_finalcov.bed.gz +│ ├── _right__70_merged.sorted.bed.gz +│ ├── _right__70_unpaired.bed.gz +│ ├── _right__71.sorted.bam +│ ├── _right__71.sorted.bam.bai +│ ├── _right__71_finalcov.bed.gz +│ ├── _right__71_merged.sorted.bed.gz +│ ├── _right__71_unpaired.bed.gz +│ ├── _right__72.sorted.bam +│ ├── _right__72.sorted.bam.bai +│ ├── _right__72_finalcov.bed.gz +│ ├── _right__72_merged.sorted.bed.gz +│ ├── _right__72_unpaired.bed.gz +│ ├── _right__73.sorted.bam +│ ├── _right__73.sorted.bam.bai +│ ├── _right__73_finalcov.bed.gz +│ ├── _right__73_merged.sorted.bed.gz +│ ├── _right__73_unpaired.bed.gz +│ ├── _right__74.sorted.bam +│ ├── _right__74.sorted.bam.bai +│ ├── _right__74_finalcov.bed.gz +│ ├── _right__74_merged.sorted.bed.gz +│ ├── _right__74_unpaired.bed.gz +│ ├── _right__75.sorted.bam +│ ├── _right__75.sorted.bam.bai +│ ├── _right__75_finalcov.bed.gz +│ ├── _right__75_merged.sorted.bed.gz +│ ├── _right__75_unpaired.bed.gz +│ ├── _right__76.sorted.bam +│ ├── _right__76.sorted.bam.bai +│ ├── _right__76_finalcov.bed.gz +│ ├── _right__76_merged.sorted.bed.gz +│ ├── _right__76_unpaired.bed.gz +│ ├── _right__77.sorted.bam +│ ├── _right__77.sorted.bam.bai +│ ├── _right__77_finalcov.bed.gz +│ ├── _right__77_merged.sorted.bed.gz +│ ├── _right__77_unpaired.bed.gz +│ ├── _right__78.sorted.bam +│ ├── _right__78.sorted.bam.bai +│ ├── _right__78_finalcov.bed.gz +│ ├── _right__78_merged.sorted.bed.gz +│ ├── _right__78_unpaired.bed.gz +│ ├── _right__79.sorted.bam +│ ├── _right__79.sorted.bam.bai +│ ├── _right__79_finalcov.bed.gz +│ ├── _right__79_merged.sorted.bed.gz +│ ├── _right__79_unpaired.bed.gz +│ ├── _right__7_finalcov.bed.gz +│ ├── _right__7_merged.sorted.bed.gz +│ ├── _right__7_unpaired.bed.gz +│ ├── _right__8.sorted.bam +│ ├── _right__8.sorted.bam.bai +│ ├── _right__80_finalcov.bed.gz +│ ├── _right__80_merged.sorted.bed.gz +│ ├── _right__80_unpaired.bed.gz +│ ├── _right__8_finalcov.bed.gz +│ ├── _right__8_merged.sorted.bed.gz +│ ├── _right__8_unpaired.bed.gz +│ ├── _right__9.sorted.bam +│ ├── _right__9.sorted.bam.bai +│ ├── _right__9_finalcov.bed.gz +│ ├── _right__9_merged.sorted.bed.gz +│ └── _right__9_unpaired.bed.gz +└── bactopia-runs + └── ismapper- + └── nf-reports + ├── ismapper-dag.dot + ├── ismapper-report.html + └── ismapper-timeline.html +``` + +### Per-Sample Results + +| File | Description | +|------|-------------| +| `*.vcf` | VCF file with insertion site calls | +| `*.txt` | Summary of insertion site findings | + +### Merged Results + +| File | Description | +|------|-------------| +| `ismapper.tsv` | Merged TSV file containing ISMapper results from all samples | + + +### Audit Trail + +Below are files that can assist you in understanding which parameters and program versions were used. + +#### Logs + +Each process that is executed will have a folder named `logs`. In this folder are helpful +files for you to review if the need ever arises. + +| Extension | Description | +|--------------|-------------| +| .begin | An empty file used to designate the process started | +| .err | Contains STDERR outputs from the process | +| .log | Contains both STDERR and STDOUT outputs from the process | +| .out | Contains STDOUT outputs from the process | +| .run | The script Nextflow uses to stage/unstage files and queue processes based on given profile | +| .sh | The script executed by bash for the process | +| .trace | The Nextflow trace report for the process | +| versions.yml | A YAML formatted file with program versions | + +#### Nextflow Reports + +These Nextflow reports provide great a great summary of your run. These can be used to optimize +resource usage and estimate expected costs if using cloud platforms. + +| Filename | Description | +|----------|-------------| +| ismapper-dag.dot | The Nextflow [DAG visualization](https://docs.seqera.io/nextflow/reports#workflow-diagram) | +| ismapper-report.html | The Nextflow [Execution Report](https://docs.seqera.io/nextflow/reports#execution-report) | +| ismapper-timeline.html | The Nextflow [Timeline Report](https://docs.seqera.io/nextflow/reports#execution-timeline) | +| ismapper-trace.txt | The Nextflow [Trace](https://docs.seqera.io/nextflow/reports#trace-file) report | + +## Parameters + +### Required Parameters + +Define where the pipeline should find input data and save output data. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--bactopia` | string | | The path to bactopia results to use as inputs | + +### ISMapper Parameters + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--reference` | string | | Reference genome for typing against in GenBank format | +| `--insertions` | string | | Multifasta file with insertion sequence(s) to be mapped to | + +
+Filtering Parameters + +Use these parameters to specify which samples to include or exclude. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--include` | string | | A text file containing sample names (one per line) to include from the analysis | +| `--exclude` | string | | A text file containing sample names (one per line) to exclude from the analysis | +
+ +
+Optional Parameters + +These optional parameters can be useful in certain settings. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--outdir` | string | `bactopia` | Base directory to write results to | +
+ +
+Nextflow Profile Parameters + +Parameters to fine-tune your Nextflow setup. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--datasets_cache` | string | `/.bactopia/datasets` | Directory where downloaded datasets should be stored. | +
+ +
+Helpful Parameters + +Uncommonly used parameters that might be useful. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--wf` | string | `bactopia` | Specify which workflow or Bactopia Tool to execute | +| `--list_wfs` | boolean | | List the available workflows and Bactopia Tools to use with '--wf' | +| `--help_all` | boolean | | An alias for --help --show_hidden_params | +| `--version` | boolean | | Display version text. | +
+ +## Composition + +This workflow uses the following subworkflows: + +- [ismapper](/developers/subworkflows/ismapper) - Identify transposase insertion sites in bacterial genomes. + +## Citations + +If you use this in your analysis, please cite the following. + +- [Bactopia](https://bactopia.github.io/) + Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) + +- [ISMapper](https://github.com/jhawkey/IS_mapper) + Hawkey J, Hamidian M, Wick RR, Edwards DJ, Billman-Jacobe H, Hall RM, Holt KE [ISMapper: identifying transposase insertion sites in bacterial genomes from short read sequence data](http://dx.doi.org/10.1186/s12864-015-1860-2). _BMC Genomics_ 16, 667 (2015) + +## Source + +[View source on GitHub](https://github.com/bactopia/bactopia/tree/main/workflows/bactopia-tools/ismapper) diff --git a/bactopia-tools/kleborate.mdx b/bactopia-tools/kleborate.mdx new file mode 100644 index 00000000..7b6634e7 --- /dev/null +++ b/bactopia-tools/kleborate.mdx @@ -0,0 +1,193 @@ +--- +title: kleborate +description: "Comprehensive screening of Klebsiella genomes for virulence and resistance determinants." +tags: + - klebsiella + - mlst + - virulence + - amr + - serotyping + - bactopia-tool +--- + +# kleborate + +**Tags:** klebsiella mlst virulence amr serotyping bactopia-tool + +Comprehensive screening of Klebsiella genomes for virulence and resistance determinants. + +This Bactopia Tool uses [Kleborate](https://github.com/katholt/Kleborate) to screen genome assemblies of +_Klebsiella pneumoniae_ and the _Klebsiella pneumoniae_ species complex (KpSC). Kleborate predicts: +MLST, species, ICEKp associated virulence loci, virulence plasmid associated loci, +antimicrobial resistance determinants, and K (capsule) and O antigen (LPS) serotype. + +## Usage + +Bactopia CLI: + +```bash +bactopia --wf kleborate \ + --bactopia /path/to/your/bactopia/results +``` + +Nextflow: + +```bash +nextflow run bactopia/bactopia/workflows/bactopia-tools/kleborate/main.nf \ + --bactopia /path/to/your/bactopia/results +``` + +## Outputs + +### Expected Output Files + +``` + +├── +│ └── tools +│ └── kleborate- +│ ├── .tsv +│ └── logs +│ ├── nf.command.{begin,err,log,out,run,sh,trace} +│ └── versions.yml +└── bactopia-runs + └── kleborate- + ├── merged-results + │ ├── kleborate.tsv + │ └── logs + │ └── kleborate-concat + │ ├── nf.command.{begin,err,log,out,run,sh,trace} + │ └── versions.yml + └── nf-reports + ├── kleborate-dag.dot + ├── kleborate-report.html + └── kleborate-timeline.html +``` + +### Comprehensive Analysis + +| File | Description | +|------|-------------| +| `*.kleborate.tsv` | Comprehensive Kleborate report | +| `*-resistance.tsv` | AMR determinant summary | +| `*-virulence.tsv` | Virulence gene summary | + +### Merged Results + +| File | Description | +|------|-------------| +| `kleborate.tsv` | Merged TSV file containing Kleborate results from all samples | + + +### Audit Trail + +Below are files that can assist you in understanding which parameters and program versions were used. + +#### Logs + +Each process that is executed will have a folder named `logs`. In this folder are helpful +files for you to review if the need ever arises. + +| Extension | Description | +|--------------|-------------| +| .begin | An empty file used to designate the process started | +| .err | Contains STDERR outputs from the process | +| .log | Contains both STDERR and STDOUT outputs from the process | +| .out | Contains STDOUT outputs from the process | +| .run | The script Nextflow uses to stage/unstage files and queue processes based on given profile | +| .sh | The script executed by bash for the process | +| .trace | The Nextflow trace report for the process | +| versions.yml | A YAML formatted file with program versions | + +#### Nextflow Reports + +These Nextflow reports provide great a great summary of your run. These can be used to optimize +resource usage and estimate expected costs if using cloud platforms. + +| Filename | Description | +|----------|-------------| +| kleborate-dag.dot | The Nextflow [DAG visualization](https://docs.seqera.io/nextflow/reports#workflow-diagram) | +| kleborate-report.html | The Nextflow [Execution Report](https://docs.seqera.io/nextflow/reports#execution-report) | +| kleborate-timeline.html | The Nextflow [Timeline Report](https://docs.seqera.io/nextflow/reports#execution-timeline) | +| kleborate-trace.txt | The Nextflow [Trace](https://docs.seqera.io/nextflow/reports#trace-file) report | + +## Parameters + +### Required Parameters + +Define where the pipeline should find input data and save output data. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--bactopia` | string | | The path to bactopia results to use as inputs | + +### Kleborate Parameters + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--kleborate_preset` | string | `kpsc` | Preset module to use for Kleborate (choices: `kpsc`, `kosc`, `escherichia`) | +| `--kleborate_opts` | string | | Extra options in quotes for Kleborate | + +
+Filtering Parameters + +Use these parameters to specify which samples to include or exclude. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--include` | string | | A text file containing sample names (one per line) to include from the analysis | +| `--exclude` | string | | A text file containing sample names (one per line) to exclude from the analysis | +
+ +
+Optional Parameters + +These optional parameters can be useful in certain settings. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--outdir` | string | `bactopia` | Base directory to write results to | +
+ +
+Nextflow Profile Parameters + +Parameters to fine-tune your Nextflow setup. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--datasets_cache` | string | `/.bactopia/datasets` | Directory where downloaded datasets should be stored. | +
+ +
+Helpful Parameters + +Uncommonly used parameters that might be useful. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--wf` | string | `bactopia` | Specify which workflow or Bactopia Tool to execute | +| `--list_wfs` | boolean | | List the available workflows and Bactopia Tools to use with '--wf' | +| `--help_all` | boolean | | An alias for --help --show_hidden_params | +| `--version` | boolean | | Display version text. | +
+ +## Composition + +This workflow uses the following subworkflows: + +- [kleborate](/developers/subworkflows/kleborate) - Genotyping tool for Klebsiella pneumoniae and its related species complex. + +## Citations + +If you use this in your analysis, please cite the following. + +- [Bactopia](https://bactopia.github.io/) + Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) + +- [Kleborate](https://github.com/katholt/Kleborate) + Lam MMC, Wick RR, Watts, SC, Cerdeira LT, Wyres KL, Holt KE [A genomic surveillance framework and genotyping tool for Klebsiella pneumoniae and its related species complex.](https://doi.org/10.1038/s41467-021-24448-3) _Nat Commun_ 12, 4188 (2021) + +## Source + +[View source on GitHub](https://github.com/bactopia/bactopia/tree/main/workflows/bactopia-tools/kleborate) diff --git a/bactopia-tools/kraken2.mdx b/bactopia-tools/kraken2.mdx new file mode 100644 index 00000000..c34439c8 --- /dev/null +++ b/bactopia-tools/kraken2.mdx @@ -0,0 +1,188 @@ +--- +title: kraken2 +description: "Taxonomic classification of metagenomic sequence reads." +tags: + - taxonomy + - metagenomics + - classification + - fastq + - kraken2 + - bactopia-tool +--- + +# kraken2 + +**Tags:** taxonomy metagenomics classification fastq kraken2 bactopia-tool + +Taxonomic classification of metagenomic sequence reads. + +This Bactopia Tool uses [Kraken2](https://github.com/DerrickWood/kraken2) to assign taxonomic +classifications to metagenomic sequence reads. It creates reports of taxonomic assignments +and generates interactive Krona plots for visualization of classification results. + +## Usage + +Bactopia CLI: + +```bash +bactopia --wf kraken2 \ + --bactopia /path/to/your/bactopia/results +``` + +Nextflow: + +```bash +nextflow run bactopia/bactopia/workflows/bactopia-tools/kraken2/main.nf \ + --bactopia /path/to/your/bactopia/results +``` + +## Outputs + +### Expected Output Files + +``` + +├── +│ └── tools +│ └── kraken2- +│ ├── .kraken2.report.txt +│ └── logs +│ ├── nf.command.{begin,err,log,out,run,sh,trace} +│ └── versions.yml +└── bactopia-runs + └── kraken2- + └── nf-reports + ├── kraken2-dag.dot + ├── kraken2-report.html + └── kraken2-timeline.html +``` + +### Per-Sample Results + +| File | Description | +|------|-------------| +| `*.report` | Kraken2 taxonomic classification report | +| `*.kraken2` | Kraken2 classification results | +| `*.html` | Krona interactive HTML report | + +### Merged Results + +| File | Description | +|------|-------------| +| `merged-report` | Merged Kraken2 reports from all samples | + + +### Audit Trail + +Below are files that can assist you in understanding which parameters and program versions were used. + +#### Logs + +Each process that is executed will have a folder named `logs`. In this folder are helpful +files for you to review if the need ever arises. + +| Extension | Description | +|--------------|-------------| +| .begin | An empty file used to designate the process started | +| .err | Contains STDERR outputs from the process | +| .log | Contains both STDERR and STDOUT outputs from the process | +| .out | Contains STDOUT outputs from the process | +| .run | The script Nextflow uses to stage/unstage files and queue processes based on given profile | +| .sh | The script executed by bash for the process | +| .trace | The Nextflow trace report for the process | +| versions.yml | A YAML formatted file with program versions | + +#### Nextflow Reports + +These Nextflow reports provide great a great summary of your run. These can be used to optimize +resource usage and estimate expected costs if using cloud platforms. + +| Filename | Description | +|----------|-------------| +| kraken2-dag.dot | The Nextflow [DAG visualization](https://docs.seqera.io/nextflow/reports#workflow-diagram) | +| kraken2-report.html | The Nextflow [Execution Report](https://docs.seqera.io/nextflow/reports#execution-report) | +| kraken2-timeline.html | The Nextflow [Timeline Report](https://docs.seqera.io/nextflow/reports#execution-timeline) | +| kraken2-trace.txt | The Nextflow [Trace](https://docs.seqera.io/nextflow/reports#trace-file) report | + +## Parameters + +### Required Parameters + +Define where the pipeline should find input data and save output data. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--bactopia` | string | | The path to bactopia results to use as inputs | + +### Kraken2 Parameters + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--kraken2_db` | string | | The a single tarball or path to a Kraken2 formatted database | +| `--kraken2_confidence` | number | `0.0` | Confidence score threshold between 0 and 1 | +| `--kraken2_use_mpa_style` | boolean | `false` | Format report output like Kraken 1's kraken-mpa-report | +| `--kraken2_report_zero_counts` | boolean | `false` | Report counts for ALL taxa, even if counts are zero | + +
+Filtering Parameters + +Use these parameters to specify which samples to include or exclude. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--include` | string | | A text file containing sample names (one per line) to include from the analysis | +| `--exclude` | string | | A text file containing sample names (one per line) to exclude from the analysis | +
+ +
+Optional Parameters + +These optional parameters can be useful in certain settings. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--outdir` | string | `bactopia` | Base directory to write results to | +
+ +
+Nextflow Profile Parameters + +Parameters to fine-tune your Nextflow setup. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--datasets_cache` | string | `/.bactopia/datasets` | Directory where downloaded datasets should be stored. | +
+ +
+Helpful Parameters + +Uncommonly used parameters that might be useful. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--wf` | string | `bactopia` | Specify which workflow or Bactopia Tool to execute | +| `--list_wfs` | boolean | | List the available workflows and Bactopia Tools to use with '--wf' | +| `--help_all` | boolean | | An alias for --help --show_hidden_params | +| `--version` | boolean | | Display version text. | +
+ +## Composition + +This workflow uses the following subworkflows: + +- [kraken2](/developers/subworkflows/kraken2) - Classify metagenomic reads using Kraken2. + +## Citations + +If you use this in your analysis, please cite the following. + +- [Bactopia](https://bactopia.github.io/) + Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) + +- [Kraken2](https://github.com/DerrickWood/kraken2) + Wood DE, Lu J, Langmead B [Improved metagenomic analysis with Kraken 2.](https://doi.org/10.1186/s13059-019-1891-0) *Genome Biology*, 20(1), 257. (2019) + +## Source + +[View source on GitHub](https://github.com/bactopia/bactopia/tree/main/workflows/bactopia-tools/kraken2) diff --git a/bactopia-tools/legsta.mdx b/bactopia-tools/legsta.mdx new file mode 100644 index 00000000..22214784 --- /dev/null +++ b/bactopia-tools/legsta.mdx @@ -0,0 +1,187 @@ +--- +title: legsta +description: "Sequence Based Typing (SBT) of Legionella pneumophila." +tags: + - legionella-pneumophila + - sbt + - typing + - fasta + - bactopia-tool +--- + +# legsta + +**Tags:** legionella-pneumophila sbt typing fasta bactopia-tool + +Sequence Based Typing (SBT) of Legionella pneumophila. + +This Bactopia Tool uses [legsta](https://github.com/tseemann/legsta) for +_in silico_ _Legionella pneumophila_ Sequence Based Typing (SBT). + +## Usage + +Bactopia CLI: + +```bash +bactopia --wf legsta \ + --bactopia /path/to/your/bactopia/results +``` + +Nextflow: + +```bash +nextflow run bactopia/bactopia/workflows/bactopia-tools/legsta/main.nf \ + --bactopia /path/to/your/bactopia/results +``` + +## Outputs + +### Expected Output Files + +``` + +├── +│ └── tools +│ └── legsta- +│ ├── .tsv +│ └── logs +│ ├── nf.command.{begin,err,log,out,run,sh,trace} +│ └── versions.yml +└── bactopia-runs + └── legsta- + ├── merged-results + │ ├── legsta.tsv + │ └── logs + │ └── legsta-concat + │ ├── nf.command.{begin,err,log,out,run,sh,trace} + │ └── versions.yml + └── nf-reports + ├── legsta-dag.dot + ├── legsta-report.html + └── legsta-timeline.html +``` + +### Per-Sample Results + +| File | Description | +|------|-------------| +| `*.txt` | SBT typing results | + +### Merged Results + +| File | Description | +|------|-------------| +| `legsta.tsv` | Merged TSV file containing legsta results from all samples | + + +### Audit Trail + +Below are files that can assist you in understanding which parameters and program versions were used. + +#### Logs + +Each process that is executed will have a folder named `logs`. In this folder are helpful +files for you to review if the need ever arises. + +| Extension | Description | +|--------------|-------------| +| .begin | An empty file used to designate the process started | +| .err | Contains STDERR outputs from the process | +| .log | Contains both STDERR and STDOUT outputs from the process | +| .out | Contains STDOUT outputs from the process | +| .run | The script Nextflow uses to stage/unstage files and queue processes based on given profile | +| .sh | The script executed by bash for the process | +| .trace | The Nextflow trace report for the process | +| versions.yml | A YAML formatted file with program versions | + +#### Nextflow Reports + +These Nextflow reports provide great a great summary of your run. These can be used to optimize +resource usage and estimate expected costs if using cloud platforms. + +| Filename | Description | +|----------|-------------| +| legsta-dag.dot | The Nextflow [DAG visualization](https://docs.seqera.io/nextflow/reports#workflow-diagram) | +| legsta-report.html | The Nextflow [Execution Report](https://docs.seqera.io/nextflow/reports#execution-report) | +| legsta-timeline.html | The Nextflow [Timeline Report](https://docs.seqera.io/nextflow/reports#execution-timeline) | +| legsta-trace.txt | The Nextflow [Trace](https://docs.seqera.io/nextflow/reports#trace-file) report | + +## Parameters + +### Required Parameters + +Define where the pipeline should find input data and save output data. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--bactopia` | string | | The path to bactopia results to use as inputs | + +### legsta Parameters + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--legsta_noheader` | boolean | `false` | Don't print header row | + +
+Filtering Parameters + +Use these parameters to specify which samples to include or exclude. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--include` | string | | A text file containing sample names (one per line) to include from the analysis | +| `--exclude` | string | | A text file containing sample names (one per line) to exclude from the analysis | +
+ +
+Optional Parameters + +These optional parameters can be useful in certain settings. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--outdir` | string | `bactopia` | Base directory to write results to | +
+ +
+Nextflow Profile Parameters + +Parameters to fine-tune your Nextflow setup. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--datasets_cache` | string | `/.bactopia/datasets` | Directory where downloaded datasets should be stored. | +
+ +
+Helpful Parameters + +Uncommonly used parameters that might be useful. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--wf` | string | `bactopia` | Specify which workflow or Bactopia Tool to execute | +| `--list_wfs` | boolean | | List the available workflows and Bactopia Tools to use with '--wf' | +| `--help_all` | boolean | | An alias for --help --show_hidden_params | +| `--version` | boolean | | Display version text. | +
+ +## Composition + +This workflow uses the following subworkflows: + +- [legsta](/developers/subworkflows/legsta) - In silico Legionella pneumophila Sequence Based Typing. + +## Citations + +If you use this in your analysis, please cite the following. + +- [Bactopia](https://bactopia.github.io/) + Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) + +- [legsta](https://github.com/tseemann/legsta) + Seemann T [legsta: In silico Legionella pneumophila Sequence Based Typing](https://github.com/tseemann/legsta) (GitHub) + +## Source + +[View source on GitHub](https://github.com/bactopia/bactopia/tree/main/workflows/bactopia-tools/legsta) diff --git a/bactopia-tools/lissero.mdx b/bactopia-tools/lissero.mdx new file mode 100644 index 00000000..5159c30e --- /dev/null +++ b/bactopia-tools/lissero.mdx @@ -0,0 +1,187 @@ +--- +title: lissero +description: "Serogroup typing prediction for Listeria monocytogenes." +tags: + - listeria-monocytogenes + - serotyping + - fasta + - bactopia-tool +--- + +# lissero + +**Tags:** listeria-monocytogenes serotyping fasta bactopia-tool + +Serogroup typing prediction for Listeria monocytogenes. + +This Bactopia Tool uses [LisSero](https://github.com/MDU-PHL/LisSero) for +_in silico_ serotype prediction for _Listeria monocytogenes_ assemblies. + +## Usage + +Bactopia CLI: + +```bash +bactopia --wf lissero \ + --bactopia /path/to/your/bactopia/results +``` + +Nextflow: + +```bash +nextflow run bactopia/bactopia/workflows/bactopia-tools/lissero/main.nf \ + --bactopia /path/to/your/bactopia/results +``` + +## Outputs + +### Expected Output Files + +``` + +├── +│ └── tools +│ └── lissero- +│ ├── .tsv +│ └── logs +│ ├── nf.command.{begin,err,log,out,run,sh,trace} +│ └── versions.yml +└── bactopia-runs + └── lissero- + ├── merged-results + │ ├── lissero.tsv + │ └── logs + │ └── lissero-concat + │ ├── nf.command.{begin,err,log,out,run,sh,trace} + │ └── versions.yml + └── nf-reports + ├── lissero-dag.dot + ├── lissero-report.html + └── lissero-timeline.html +``` + +### Per-Sample Results + +| File | Description | +|------|-------------| +| `*.tsv` | Serotype prediction results | + +### Merged Results + +| File | Description | +|------|-------------| +| `lissero.tsv` | Merged TSV file containing LisSero results from all samples | + + +### Audit Trail + +Below are files that can assist you in understanding which parameters and program versions were used. + +#### Logs + +Each process that is executed will have a folder named `logs`. In this folder are helpful +files for you to review if the need ever arises. + +| Extension | Description | +|--------------|-------------| +| .begin | An empty file used to designate the process started | +| .err | Contains STDERR outputs from the process | +| .log | Contains both STDERR and STDOUT outputs from the process | +| .out | Contains STDOUT outputs from the process | +| .run | The script Nextflow uses to stage/unstage files and queue processes based on given profile | +| .sh | The script executed by bash for the process | +| .trace | The Nextflow trace report for the process | +| versions.yml | A YAML formatted file with program versions | + +#### Nextflow Reports + +These Nextflow reports provide great a great summary of your run. These can be used to optimize +resource usage and estimate expected costs if using cloud platforms. + +| Filename | Description | +|----------|-------------| +| lissero-dag.dot | The Nextflow [DAG visualization](https://docs.seqera.io/nextflow/reports#workflow-diagram) | +| lissero-report.html | The Nextflow [Execution Report](https://docs.seqera.io/nextflow/reports#execution-report) | +| lissero-timeline.html | The Nextflow [Timeline Report](https://docs.seqera.io/nextflow/reports#execution-timeline) | +| lissero-trace.txt | The Nextflow [Trace](https://docs.seqera.io/nextflow/reports#trace-file) report | + +## Parameters + +### Required Parameters + +Define where the pipeline should find input data and save output data. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--bactopia` | string | | The path to bactopia results to use as inputs | + +### LisSero Parameters + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--lissero_min_id` | number | `95.0` | Minimum percent identity to accept a match | +| `--lissero_min_cov` | number | `95.0` | Minimum coverage of the gene to accept a match | + +
+Filtering Parameters + +Use these parameters to specify which samples to include or exclude. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--include` | string | | A text file containing sample names (one per line) to include from the analysis | +| `--exclude` | string | | A text file containing sample names (one per line) to exclude from the analysis | +
+ +
+Optional Parameters + +These optional parameters can be useful in certain settings. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--outdir` | string | `bactopia` | Base directory to write results to | +
+ +
+Nextflow Profile Parameters + +Parameters to fine-tune your Nextflow setup. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--datasets_cache` | string | `/.bactopia/datasets` | Directory where downloaded datasets should be stored. | +
+ +
+Helpful Parameters + +Uncommonly used parameters that might be useful. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--wf` | string | `bactopia` | Specify which workflow or Bactopia Tool to execute | +| `--list_wfs` | boolean | | List the available workflows and Bactopia Tools to use with '--wf' | +| `--help_all` | boolean | | An alias for --help --show_hidden_params | +| `--version` | boolean | | Display version text. | +
+ +## Composition + +This workflow uses the following subworkflows: + +- [lissero](/developers/subworkflows/lissero) - In silico serotype prediction for Listeria monocytogenes. + +## Citations + +If you use this in your analysis, please cite the following. + +- [Bactopia](https://bactopia.github.io/) + Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) + +- [LisSero](https://github.com/MDU-PHL/LisSero) + Kwong J, Zhang J, Seeman T, Horan, K, Gonçalves da Silva A [LisSero - _In silico_ serotype prediction for _Listeria monocytogenes_](https://github.com/MDU-PHL/LisSero) (GitHub) + +## Source + +[View source on GitHub](https://github.com/bactopia/bactopia/tree/main/workflows/bactopia-tools/lissero) diff --git a/bactopia-tools/mashdist.mdx b/bactopia-tools/mashdist.mdx new file mode 100644 index 00000000..85f48444 --- /dev/null +++ b/bactopia-tools/mashdist.mdx @@ -0,0 +1,189 @@ +--- +title: mashdist +description: "Calculate Mash distances between sequences and reference genomes." +tags: + - mash + - distance + - similarity + - comparative-genomics + - bactopia-tool +--- + +# mashdist + +**Tags:** mash distance similarity comparative-genomics bactopia-tool + +Calculate Mash distances between sequences and reference genomes. + +This Bactopia Tool uses [Mash](https://github.com/marbl/Mash) to determine the Mash +distance from samples to reference genome sketches for rapid genomic comparison. + +## Usage + +Bactopia CLI: + +```bash +bactopia --wf mashdist \ + --bactopia /path/to/your/bactopia/results +``` + +Nextflow: + +```bash +nextflow run bactopia/bactopia/workflows/bactopia-tools/mashdist/main.nf \ + --bactopia /path/to/your/bactopia/results +``` + +## Outputs + +### Expected Output Files + +``` + +├── +│ └── tools +│ └── mashdist- +│ └── mashdist- +│ ├── -dist.txt +│ └── logs +│ ├── nf.command.{begin,err,log,out,run,sh,trace} +│ └── versions.yml +└── bactopia-runs + └── mashdist- + ├── merged-results + │ ├── logs + │ │ └── mashdist-concat + │ │ ├── nf.command.{begin,err,log,out,run,sh,trace} + │ │ └── versions.yml + │ └── mashdist.tsv + └── nf-reports + ├── mashdist-dag.dot + ├── mashdist-report.html + └── mashdist-timeline.html +``` + +### Per-Sample Results + +| File | Description | +|------|-------------| +| `*.txt` | Mash distance results for each sample | + +### Merged Results + +| File | Description | +|------|-------------| +| `mashdist.tsv` | Merged TSV file containing Mash distances from all samples | + + +### Audit Trail + +Below are files that can assist you in understanding which parameters and program versions were used. + +#### Logs + +Each process that is executed will have a folder named `logs`. In this folder are helpful +files for you to review if the need ever arises. + +| Extension | Description | +|--------------|-------------| +| .begin | An empty file used to designate the process started | +| .err | Contains STDERR outputs from the process | +| .log | Contains both STDERR and STDOUT outputs from the process | +| .out | Contains STDOUT outputs from the process | +| .run | The script Nextflow uses to stage/unstage files and queue processes based on given profile | +| .sh | The script executed by bash for the process | +| .trace | The Nextflow trace report for the process | +| versions.yml | A YAML formatted file with program versions | + +#### Nextflow Reports + +These Nextflow reports provide great a great summary of your run. These can be used to optimize +resource usage and estimate expected costs if using cloud platforms. + +| Filename | Description | +|----------|-------------| +| mashdist-dag.dot | The Nextflow [DAG visualization](https://docs.seqera.io/nextflow/reports#workflow-diagram) | +| mashdist-report.html | The Nextflow [Execution Report](https://docs.seqera.io/nextflow/reports#execution-report) | +| mashdist-timeline.html | The Nextflow [Timeline Report](https://docs.seqera.io/nextflow/reports#execution-timeline) | +| mashdist-trace.txt | The Nextflow [Trace](https://docs.seqera.io/nextflow/reports#trace-file) report | + +## Parameters + +### Required Parameters + +Define where the pipeline should find input data and save output data. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--bactopia` | string | | The path to bactopia results to use as inputs | + +### mashdist Parameters + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--mash_sketch` | string | | The reference sequence as a Mash Sketch (.msh file) | +| `--full_merlin` | boolean | `false` | Go full Merlin and run all species-specific tools, no matter the Mash distance | + +
+Filtering Parameters + +Use these parameters to specify which samples to include or exclude. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--include` | string | | A text file containing sample names (one per line) to include from the analysis | +| `--exclude` | string | | A text file containing sample names (one per line) to exclude from the analysis | +
+ +
+Optional Parameters + +These optional parameters can be useful in certain settings. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--outdir` | string | `bactopia` | Base directory to write results to | +
+ +
+Nextflow Profile Parameters + +Parameters to fine-tune your Nextflow setup. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--datasets_cache` | string | `/.bactopia/datasets` | Directory where downloaded datasets should be stored. | +
+ +
+Helpful Parameters + +Uncommonly used parameters that might be useful. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--wf` | string | `bactopia` | Specify which workflow or Bactopia Tool to execute | +| `--list_wfs` | boolean | | List the available workflows and Bactopia Tools to use with '--wf' | +| `--help_all` | boolean | | An alias for --help --show_hidden_params | +| `--version` | boolean | | Display version text. | +
+ +## Composition + +This workflow uses the following subworkflows: + +- [mashdist](/developers/subworkflows/mashdist) - Calculate Mash distances between sequences and a reference. + +## Citations + +If you use this in your analysis, please cite the following. + +- [Bactopia](https://bactopia.github.io/) + Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) + +- [Mash](https://github.com/marbl/Mash) + Ondov BD, Treangen TJ, Melsted P, Mallonee AB, Bergman NH, Koren S, Phillippy AM [Mash: fast genome and metagenome distance estimation using MinHash](http://dx.doi.org/10.1186/s13059-016-0997-x). _Genome Biol_ 17, 132 (2016) + +## Source + +[View source on GitHub](https://github.com/bactopia/bactopia/tree/main/workflows/bactopia-tools/mashdist) diff --git a/bactopia-tools/mashtree.mdx b/bactopia-tools/mashtree.mdx new file mode 100644 index 00000000..154879a3 --- /dev/null +++ b/bactopia-tools/mashtree.mdx @@ -0,0 +1,197 @@ +--- +title: mashtree +description: "Rapid phylogenetic tree construction using Mash distances." +tags: + - phylogeny + - tree + - mash + - distance + - comparative-genomics + - bactopia-tool +--- + +# mashtree + +**Tags:** phylogeny tree mash distance comparative-genomics bactopia-tool + +Rapid phylogenetic tree construction using Mash distances. + +This Bactopia Tool uses [Mashtree](https://github.com/lskatz/mashtree) to create a phylogenetic tree +of samples using [Mash](https://github.com/marbl/Mash) distances. It can include reference +genomes from RefSeq by downloading them with NCBI genome download. + +## Usage + +Bactopia CLI: + +```bash +bactopia --wf mashtree \ + --bactopia /path/to/your/bactopia/results +``` + +Nextflow: + +```bash +nextflow run bactopia/bactopia/workflows/bactopia-tools/mashtree/main.nf \ + --bactopia /path/to/your/bactopia/results +``` + +## Outputs + +### Expected Output Files + +``` + +└── + └── mashtree- + ├── logs + │ ├── nf.command.{begin,err,log,out,run,sh,trace} + │ └── versions.yml + ├── mashtree.dnd + ├── mashtree.tsv + └── nf-reports + ├── mashtree-dag.dot + ├── mashtree-report.html + └── mashtree-timeline.html +``` + +### Phylogenetic Analysis + +| File | Description | +|------|-------------| +| `mashtree.dnd` | Newick format tree file | +| `mashtree.tsv` | Tab-delimited distance matrix | + +### Merged Results + +| File | Description | +|------|-------------| +| `mashtree-summary.tsv` | Merged summary of all Mashtree results | + + +### Audit Trail + +Below are files that can assist you in understanding which parameters and program versions were used. + +#### Logs + +Each process that is executed will have a folder named `logs`. In this folder are helpful +files for you to review if the need ever arises. + +| Extension | Description | +|--------------|-------------| +| .begin | An empty file used to designate the process started | +| .err | Contains STDERR outputs from the process | +| .log | Contains both STDERR and STDOUT outputs from the process | +| .out | Contains STDOUT outputs from the process | +| .run | The script Nextflow uses to stage/unstage files and queue processes based on given profile | +| .sh | The script executed by bash for the process | +| .trace | The Nextflow trace report for the process | +| versions.yml | A YAML formatted file with program versions | + +#### Nextflow Reports + +These Nextflow reports provide great a great summary of your run. These can be used to optimize +resource usage and estimate expected costs if using cloud platforms. + +| Filename | Description | +|----------|-------------| +| mashtree-dag.dot | The Nextflow [DAG visualization](https://docs.seqera.io/nextflow/reports#workflow-diagram) | +| mashtree-report.html | The Nextflow [Execution Report](https://docs.seqera.io/nextflow/reports#execution-report) | +| mashtree-timeline.html | The Nextflow [Timeline Report](https://docs.seqera.io/nextflow/reports#execution-timeline) | +| mashtree-trace.txt | The Nextflow [Trace](https://docs.seqera.io/nextflow/reports#trace-file) report | + +## Parameters + +### Required Parameters + +Define where the pipeline should find input data and save output data. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--bactopia` | string | | The path to bactopia results to use as inputs | + +### Mashtree Parameters + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--mashtree_sortorder` | string | `ABC` | For neighbor-joining, the sort order can make a difference. (choices: `ABC`, `random`, `input-order`) | +| `--mashtree_genomesize` | integer | `5000000` | Genome size of the input samples | +| `--mashtree_mindepth` | integer | `5` | If mindepth is zero, then it will be chosen in a smart but slower method, to discard lower-abundance kmers. | +| `--mashtree_kmerlength` | integer | `21` | Hashes will be based on strings of this many nucleotides | + +### NCBI Genome Download Parameters + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--species` | string | | Name of the species to download assemblies | +| `--accession` | string | | An NCBI Assembly accession to be downloaded | +| `--accessions` | string | | An file of NCBI Assembly accessions (one per line) to be downloaded | +| `--format` | string | `fasta` | Comma separated list of formats to download | +| `--limit` | string | | Limit the number of assemblies to download | +| `--keep_downloads` | boolean | `false` | Save downloaded files into the bactopia-runs folder | + +
+Filtering Parameters + +Use these parameters to specify which samples to include or exclude. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--include` | string | | A text file containing sample names (one per line) to include from the analysis | +| `--exclude` | string | | A text file containing sample names (one per line) to exclude from the analysis | +
+ +
+Optional Parameters + +These optional parameters can be useful in certain settings. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--outdir` | string | `bactopia` | Base directory to write results to | +
+ +
+Nextflow Profile Parameters + +Parameters to fine-tune your Nextflow setup. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--datasets_cache` | string | `/.bactopia/datasets` | Directory where downloaded datasets should be stored. | +
+ +
+Helpful Parameters + +Uncommonly used parameters that might be useful. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--wf` | string | `bactopia` | Specify which workflow or Bactopia Tool to execute | +| `--list_wfs` | boolean | | List the available workflows and Bactopia Tools to use with '--wf' | +| `--help_all` | boolean | | An alias for --help --show_hidden_params | +| `--version` | boolean | | Display version text. | +
+ +## Composition + +This workflow uses the following subworkflows: + +- [mashtree](/developers/subworkflows/mashtree) - Create phylogenetic trees using Mash distances. +- [ncbigenomedownload](/developers/subworkflows/ncbigenomedownload) - Download bacterial genomes from NCBI's RefSeq database. + +## Citations + +If you use this in your analysis, please cite the following. + +- [Bactopia](https://bactopia.github.io/) + Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) + +- [Mashtree](https://github.com/lskatz/mashtree) + Katz LS, Griswold T, Morrison S, Caravas J, Zhang S, den Bakker HC, Deng X, Carleton HA [Mashtree: a rapid comparison of whole genome sequence files.](https://doi.org/10.21105/joss.01762) _Journal of Open Source Software_, 4(44), 1762 (2019) + +## Source + +[View source on GitHub](https://github.com/bactopia/bactopia/tree/main/workflows/bactopia-tools/mashtree) diff --git a/bactopia-tools/mcroni.mdx b/bactopia-tools/mcroni.mdx new file mode 100644 index 00000000..b2e28f51 --- /dev/null +++ b/bactopia-tools/mcroni.mdx @@ -0,0 +1,184 @@ +--- +title: mcroni +description: "Sequence variation analysis of mcr-1 genes (mobilized colistin resistance)." +tags: + - mcr-1 + - colistin-resistance + - antimicrobial-resistance + - fasta + - bactopia-tool +--- + +# mcroni + +**Tags:** mcr-1 colistin-resistance antimicrobial-resistance fasta bactopia-tool + +Sequence variation analysis of mcr-1 genes (mobilized colistin resistance). + +This Bactopia Tool uses [mcroni](https://github.com/liampshaw/mcroni) to identify _mcr-1_ genes in +assemblies and report sequence variations. If _mcr-1_ is found, the variations will be reported +and available in an output FASTA file. + +## Usage + +Bactopia CLI: + +```bash +bactopia --wf mcroni \ + --bactopia /path/to/your/bactopia/results +``` + +Nextflow: + +```bash +nextflow run bactopia/bactopia/workflows/bactopia-tools/mcroni/main.nf \ + --bactopia /path/to/your/bactopia/results +``` + +## Outputs + +### Expected Output Files + +``` + +├── +│ └── tools +│ └── mcroni- +│ ├── .fasta +│ ├── .tsv +│ └── logs +│ ├── nf.command.{begin,err,log,out,run,sh,trace} +│ └── versions.yml +└── bactopia-runs + └── mcroni- + ├── merged-results + │ ├── logs + │ │ └── mcroni-concat + │ │ ├── nf.command.{begin,err,log,out,run,sh,trace} + │ │ └── versions.yml + │ └── mcroni.tsv + └── nf-reports + ├── mcroni-dag.dot + ├── mcroni-report.html + └── mcroni-timeline.html +``` + +### Per-Sample Results + +| File | Description | +|------|-------------| +| `*.txt` | Summary of mcr-1 variants found | +| `*.fasta` | FASTA file of mcr-1 variants | + +### Merged Results + +| File | Description | +|------|-------------| +| `mcroni.tsv` | Merged TSV file containing mcroni results from all samples | + + +### Audit Trail + +Below are files that can assist you in understanding which parameters and program versions were used. + +#### Logs + +Each process that is executed will have a folder named `logs`. In this folder are helpful +files for you to review if the need ever arises. + +| Extension | Description | +|--------------|-------------| +| .begin | An empty file used to designate the process started | +| .err | Contains STDERR outputs from the process | +| .log | Contains both STDERR and STDOUT outputs from the process | +| .out | Contains STDOUT outputs from the process | +| .run | The script Nextflow uses to stage/unstage files and queue processes based on given profile | +| .sh | The script executed by bash for the process | +| .trace | The Nextflow trace report for the process | +| versions.yml | A YAML formatted file with program versions | + +#### Nextflow Reports + +These Nextflow reports provide great a great summary of your run. These can be used to optimize +resource usage and estimate expected costs if using cloud platforms. + +| Filename | Description | +|----------|-------------| +| mcroni-dag.dot | The Nextflow [DAG visualization](https://docs.seqera.io/nextflow/reports#workflow-diagram) | +| mcroni-report.html | The Nextflow [Execution Report](https://docs.seqera.io/nextflow/reports#execution-report) | +| mcroni-timeline.html | The Nextflow [Timeline Report](https://docs.seqera.io/nextflow/reports#execution-timeline) | +| mcroni-trace.txt | The Nextflow [Trace](https://docs.seqera.io/nextflow/reports#trace-file) report | + +## Parameters + +### Required Parameters + +Define where the pipeline should find input data and save output data. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--bactopia` | string | | The path to bactopia results to use as inputs | + +
+Filtering Parameters + +Use these parameters to specify which samples to include or exclude. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--include` | string | | A text file containing sample names (one per line) to include from the analysis | +| `--exclude` | string | | A text file containing sample names (one per line) to exclude from the analysis | +
+ +
+Optional Parameters + +These optional parameters can be useful in certain settings. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--outdir` | string | `bactopia` | Base directory to write results to | +
+ +
+Nextflow Profile Parameters + +Parameters to fine-tune your Nextflow setup. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--datasets_cache` | string | `/.bactopia/datasets` | Directory where downloaded datasets should be stored. | +
+ +
+Helpful Parameters + +Uncommonly used parameters that might be useful. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--wf` | string | `bactopia` | Specify which workflow or Bactopia Tool to execute | +| `--list_wfs` | boolean | | List the available workflows and Bactopia Tools to use with '--wf' | +| `--help_all` | boolean | | An alias for --help --show_hidden_params | +| `--version` | boolean | | Display version text. | +
+ +## Composition + +This workflow uses the following subworkflows: + +- [mcroni](/developers/subworkflows/mcroni) - Scripts for finding and processing promoter variants upstream of mcr-1. + +## Citations + +If you use this in your analysis, please cite the following. + +- [Bactopia](https://bactopia.github.io/) + Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) + +- [mcroni](https://github.com/liampshaw/mcroni) + Shaw L [mcroni: Scripts for finding and processing promoter variants upstream of mcr-1](https://github.com/liampshaw/mcroni) (GitHub) + +## Source + +[View source on GitHub](https://github.com/bactopia/bactopia/tree/main/workflows/bactopia-tools/mcroni) diff --git a/bactopia-tools/meningotype.mdx b/bactopia-tools/meningotype.mdx new file mode 100644 index 00000000..6ee8b2fe --- /dev/null +++ b/bactopia-tools/meningotype.mdx @@ -0,0 +1,198 @@ +--- +title: meningotype +description: "Comprehensive typing of Neisseria meningitidis." +tags: + - neisseria-meningitidis + - serotyping + - mlst + - finetyping + - fasta + - bactopia-tool +--- + +# meningotype + +**Tags:** neisseria-meningitidis serotyping mlst finetyping fasta bactopia-tool + +Comprehensive typing of Neisseria meningitidis. + +This Bactopia Tool uses [meningotype](https://github.com/MDU-PHL/meningotype) +for _in silico_ typing of _Neisseria meningitidis_ genomes. It uses assembly contigs +to determine the serotype, MLST, finetyping (_porA_, _fetA_, _porB_), and +Bexsero antigen sequence typing (BAST) (_fHbp_, _NHBA_, _NadA_, _PorA_). + +## Usage + +Bactopia CLI: + +```bash +bactopia --wf meningotype \ + --bactopia /path/to/your/bactopia/results +``` + +Nextflow: + +```bash +nextflow run bactopia/bactopia/workflows/bactopia-tools/meningotype/main.nf \ + --bactopia /path/to/your/bactopia/results +``` + +## Outputs + +### Expected Output Files + +``` + +├── +│ └── tools +│ └── meningotype- +│ ├── .tsv +│ └── logs +│ ├── nf.command.{begin,err,log,out,run,sh,trace} +│ └── versions.yml +└── bactopia-runs + └── meningotype- + ├── merged-results + │ ├── logs + │ │ └── meningotype-concat + │ │ ├── nf.command.{begin,err,log,out,run,sh,trace} + │ │ └── versions.yml + │ └── meningotype.tsv + └── nf-reports + ├── meningotype-dag.dot + ├── meningotype-report.html + └── meningotype-timeline.html +``` + +### Per-Sample Results + +| File | Description | +|------|-------------| +| `*.txt` | Comprehensive typing report | +| `*-allele.tsv` | Allele typing results | +| `*-mlst.tsv` | MLST typing results | + +### Merged Results + +| File | Description | +|------|-------------| +| `meningotype.tsv` | Merged TSV file containing meningotype results from all samples | + + +### Audit Trail + +Below are files that can assist you in understanding which parameters and program versions were used. + +#### Logs + +Each process that is executed will have a folder named `logs`. In this folder are helpful +files for you to review if the need ever arises. + +| Extension | Description | +|--------------|-------------| +| .begin | An empty file used to designate the process started | +| .err | Contains STDERR outputs from the process | +| .log | Contains both STDERR and STDOUT outputs from the process | +| .out | Contains STDOUT outputs from the process | +| .run | The script Nextflow uses to stage/unstage files and queue processes based on given profile | +| .sh | The script executed by bash for the process | +| .trace | The Nextflow trace report for the process | +| versions.yml | A YAML formatted file with program versions | + +#### Nextflow Reports + +These Nextflow reports provide great a great summary of your run. These can be used to optimize +resource usage and estimate expected costs if using cloud platforms. + +| Filename | Description | +|----------|-------------| +| meningotype-dag.dot | The Nextflow [DAG visualization](https://docs.seqera.io/nextflow/reports#workflow-diagram) | +| meningotype-report.html | The Nextflow [Execution Report](https://docs.seqera.io/nextflow/reports#execution-report) | +| meningotype-timeline.html | The Nextflow [Timeline Report](https://docs.seqera.io/nextflow/reports#execution-timeline) | +| meningotype-trace.txt | The Nextflow [Trace](https://docs.seqera.io/nextflow/reports#trace-file) report | + +## Parameters + +### Required Parameters + +Define where the pipeline should find input data and save output data. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--bactopia` | string | | The path to bactopia results to use as inputs | + +### meningotype Parameters + +You can use these parameters to fine-tune your meningotype analysis + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--meningotype_finetype` | boolean | `false` | perform porA and fetA fine typing | +| `--meningotype_porB` | boolean | `false` | perform porB sequence typing (NEIS2020) | +| `--meningotype_bast` | boolean | `false` | perform Bexsero antigen sequence typing (BAST) | +| `--meningotype_mlst` | boolean | `false` | perform MLST | +| `--meningotype_all` | boolean | `false` | perform MLST, porA, fetA, porB, BAST typing | + +
+Filtering Parameters + +Use these parameters to specify which samples to include or exclude. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--include` | string | | A text file containing sample names (one per line) to include from the analysis | +| `--exclude` | string | | A text file containing sample names (one per line) to exclude from the analysis | +
+ +
+Optional Parameters + +These optional parameters can be useful in certain settings. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--outdir` | string | `bactopia` | Base directory to write results to | +
+ +
+Nextflow Profile Parameters + +Parameters to fine-tune your Nextflow setup. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--datasets_cache` | string | `/.bactopia/datasets` | Directory where downloaded datasets should be stored. | +
+ +
+Helpful Parameters + +Uncommonly used parameters that might be useful. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--wf` | string | `bactopia` | Specify which workflow or Bactopia Tool to execute | +| `--list_wfs` | boolean | | List the available workflows and Bactopia Tools to use with '--wf' | +| `--help_all` | boolean | | An alias for --help --show_hidden_params | +| `--version` | boolean | | Display version text. | +
+ +## Composition + +This workflow uses the following subworkflows: + +- [meningotype](/developers/subworkflows/meningotype) - Predict serotypes of Neisseria meningitidis from genome assemblies. + +## Citations + +If you use this in your analysis, please cite the following. + +- [Bactopia](https://bactopia.github.io/) + Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) + +- [meningotype](https://github.com/MDU-PHL/meningotype) + Kwong JC, Gonçalves da Silva A, Stinear TP, Howden BP, & Seemann T [meningotype: in silico typing for _Neisseria meningitidis_.](https://github.com/MDU-PHL/meningotype) (GitHub) + +## Source + +[View source on GitHub](https://github.com/bactopia/bactopia/tree/main/workflows/bactopia-tools/meningotype) diff --git a/bactopia-tools/merlin.mdx b/bactopia-tools/merlin.mdx new file mode 100644 index 00000000..75fbc806 --- /dev/null +++ b/bactopia-tools/merlin.mdx @@ -0,0 +1,521 @@ +--- +title: merlin +description: "MinMER-assisted species-specific tool selection and execution." +tags: + - species-specific + - automated + - mash + - minmer + - typing + - bactopia-tool +--- + +# merlin + +**Tags:** species-specific automated mash minmer typing bactopia-tool + +MinMER-assisted species-specific tool selection and execution. + +This Bactopia Tool, Merlin, uses MinMER distances based on the RefSeq sketch to automatically +run species-specific analysis tools. Merlin identifies the closest reference genomes +and executes appropriate typing and analysis tools for each detected species. + +## Usage + +Bactopia CLI: + +```bash +bactopia --wf merlin \ + --bactopia /path/to/your/bactopia/results +``` + +Nextflow: + +```bash +nextflow run bactopia/bactopia/workflows/bactopia-tools/merlin/main.nf \ + --bactopia /path/to/your/bactopia/results +``` + +## Outputs + +### Expected Output Files + +``` + +├── +│ └── tools +│ ├── clermontyping +│ │ ├── .tsv +│ │ ├── logs +│ │ │ ├── nf.command.{begin,err,log,out,run,sh,trace} +│ │ │ └── versions.yml +│ │ └── supplemental +│ │ ├── .blast.xml +│ │ ├── .html +│ │ └── .mash.tsv +│ ├── ectyper +│ │ ├── .blast_alleles.txt +│ │ ├── .tsv +│ │ └── logs +│ │ ├── ectyper.log +│ │ ├── nf.command.{begin,err,log,out,run,sh,trace} +│ │ └── versions.yml +│ ├── kleborate +│ │ ├── .tsv +│ │ └── logs +│ │ ├── nf.command.{begin,err,log,out,run,sh,trace} +│ │ └── versions.yml +│ ├── merlindist +│ │ └── merlin- +│ │ ├── -dist.txt +│ │ └── logs +│ │ ├── nf.command.{begin,err,log,out,run,sh,trace} +│ │ └── versions.yml +│ ├── shigapass +│ │ ├── .tsv +│ │ ├── logs +│ │ │ ├── nf.command.{begin,err,log,out,run,sh,trace} +│ │ │ └── versions.yml +│ │ └── supplemental +│ │ └── ShigaPass_summary.csv +│ ├── shigatyper +│ │ ├── -hits.tsv +│ │ ├── .tsv +│ │ └── logs +│ │ ├── nf.command.{begin,err,log,out,run,sh,trace} +│ │ └── versions.yml +│ ├── shigeifinder +│ │ ├── .tsv +│ │ └── logs +│ │ ├── nf.command.{begin,err,log,out,run,sh,trace} +│ │ └── versions.yml +│ └── stecfinder +│ ├── .tsv +│ └── logs +│ ├── nf.command.{begin,err,log,out,run,sh,trace} +│ └── versions.yml +├── SE +│ └── tools +│ ├── clermontyping +│ │ ├── SE.tsv +│ │ ├── logs +│ │ │ ├── nf.command.{begin,err,log,out,run,sh,trace} +│ │ │ └── versions.yml +│ │ └── supplemental +│ │ ├── SE.blast.xml +│ │ ├── SE.html +│ │ └── SE.mash.tsv +│ ├── ectyper +│ │ ├── SE.blast_alleles.txt +│ │ ├── SE.tsv +│ │ └── logs +│ │ ├── ectyper.log +│ │ ├── nf.command.{begin,err,log,out,run,sh,trace} +│ │ └── versions.yml +│ ├── kleborate +│ │ ├── SE.tsv +│ │ └── logs +│ │ ├── nf.command.{begin,err,log,out,run,sh,trace} +│ │ └── versions.yml +│ ├── merlindist +│ │ └── merlin- +│ │ ├── SE-dist.txt +│ │ └── logs +│ │ ├── nf.command.{begin,err,log,out,run,sh,trace} +│ │ └── versions.yml +│ ├── shigapass +│ │ ├── SE.tsv +│ │ ├── logs +│ │ │ ├── nf.command.{begin,err,log,out,run,sh,trace} +│ │ │ └── versions.yml +│ │ └── supplemental +│ │ └── ShigaPass_summary.csv +│ ├── shigatyper +│ │ ├── SE-hits.tsv +│ │ ├── SE.tsv +│ │ └── logs +│ │ ├── nf.command.{begin,err,log,out,run,sh,trace} +│ │ └── versions.yml +│ ├── shigeifinder +│ │ ├── SE.tsv +│ │ └── logs +│ │ ├── nf.command.{begin,err,log,out,run,sh,trace} +│ │ └── versions.yml +│ └── stecfinder +│ ├── SE.tsv +│ └── logs +│ ├── nf.command.{begin,err,log,out,run,sh,trace} +│ └── versions.yml +├── SRR13039589 +│ └── tools +│ ├── clermontyping +│ │ ├── SRR13039589.tsv +│ │ ├── logs +│ │ │ ├── nf.command.{begin,err,log,out,run,sh,trace} +│ │ │ └── versions.yml +│ │ └── supplemental +│ │ ├── SRR13039589.blast.xml +│ │ ├── SRR13039589.html +│ │ └── SRR13039589.mash.tsv +│ ├── ectyper +│ │ ├── SRR13039589.blast_alleles.txt +│ │ ├── SRR13039589.tsv +│ │ └── logs +│ │ ├── ectyper.log +│ │ ├── nf.command.{begin,err,log,out,run,sh,trace} +│ │ └── versions.yml +│ ├── kleborate +│ │ ├── SRR13039589.tsv +│ │ └── logs +│ │ ├── nf.command.{begin,err,log,out,run,sh,trace} +│ │ └── versions.yml +│ ├── merlindist +│ │ └── merlin- +│ │ ├── SRR13039589-dist.txt +│ │ └── logs +│ │ ├── nf.command.{begin,err,log,out,run,sh,trace} +│ │ └── versions.yml +│ ├── shigapass +│ │ ├── SRR13039589.tsv +│ │ ├── logs +│ │ │ ├── nf.command.{begin,err,log,out,run,sh,trace} +│ │ │ └── versions.yml +│ │ └── supplemental +│ │ └── ShigaPass_summary.csv +│ ├── shigatyper +│ │ ├── SRR13039589-hits.tsv +│ │ ├── SRR13039589.tsv +│ │ └── logs +│ │ ├── nf.command.{begin,err,log,out,run,sh,trace} +│ │ └── versions.yml +│ ├── shigeifinder +│ │ ├── SRR13039589.tsv +│ │ └── logs +│ │ ├── nf.command.{begin,err,log,out,run,sh,trace} +│ │ └── versions.yml +│ └── stecfinder +│ ├── SRR13039589.tsv +│ └── logs +│ ├── nf.command.{begin,err,log,out,run,sh,trace} +│ └── versions.yml +└── bactopia-runs + └── merlin- + ├── merged-results + │ ├── clermontyping.tsv + │ ├── ectyper.tsv + │ ├── kleborate.tsv + │ ├── logs + │ │ ├── clermontyping-concat + │ │ │ ├── nf.command.{begin,err,log,out,run,sh,trace} + │ │ │ └── versions.yml + │ │ ├── ectyper-concat + │ │ │ ├── nf.command.{begin,err,log,out,run,sh,trace} + │ │ │ └── versions.yml + │ │ ├── kleborate-concat + │ │ │ ├── nf.command.{begin,err,log,out,run,sh,trace} + │ │ │ └── versions.yml + │ │ ├── shigapass-concat + │ │ │ ├── nf.command.{begin,err,log,out,run,sh,trace} + │ │ │ └── versions.yml + │ │ ├── shigatyper-concat + │ │ │ ├── nf.command.{begin,err,log,out,run,sh,trace} + │ │ │ └── versions.yml + │ │ ├── shigeifinder-concat + │ │ │ ├── nf.command.{begin,err,log,out,run,sh,trace} + │ │ │ └── versions.yml + │ │ └── stecfinder-concat + │ │ ├── nf.command.{begin,err,log,out,run,sh,trace} + │ │ └── versions.yml + │ ├── shigapass.tsv + │ ├── shigatyper.tsv + │ ├── shigeifinder.tsv + │ └── stecfinder.tsv + └── nf-reports + ├── merlin-dag.dot + ├── merlin-report.html + └── merlin-timeline.html +``` + +### Species-Specific Analysis + +:::note +Tools executed depend on detected species +::: + +| File | Description | +|------|-------------| +| `Analysis` | results from all executed species-specific tools | + +### Merged Results + +| File | Description | +|------|-------------| +| `merlin.tsv` | Merged summary of all species-specific analyses | + + +### Audit Trail + +Below are files that can assist you in understanding which parameters and program versions were used. + +#### Logs + +Each process that is executed will have a folder named `logs`. In this folder are helpful +files for you to review if the need ever arises. + +| Extension | Description | +|--------------|-------------| +| .begin | An empty file used to designate the process started | +| .err | Contains STDERR outputs from the process | +| .log | Contains both STDERR and STDOUT outputs from the process | +| .out | Contains STDOUT outputs from the process | +| .run | The script Nextflow uses to stage/unstage files and queue processes based on given profile | +| .sh | The script executed by bash for the process | +| .trace | The Nextflow trace report for the process | +| versions.yml | A YAML formatted file with program versions | + +#### Nextflow Reports + +These Nextflow reports provide great a great summary of your run. These can be used to optimize +resource usage and estimate expected costs if using cloud platforms. + +| Filename | Description | +|----------|-------------| +| merlin-dag.dot | The Nextflow [DAG visualization](https://docs.seqera.io/nextflow/reports#workflow-diagram) | +| merlin-report.html | The Nextflow [Execution Report](https://docs.seqera.io/nextflow/reports#execution-report) | +| merlin-timeline.html | The Nextflow [Timeline Report](https://docs.seqera.io/nextflow/reports#execution-timeline) | +| merlin-trace.txt | The Nextflow [Trace](https://docs.seqera.io/nextflow/reports#trace-file) report | + +## Parameters + +### Required Parameters + +Define where the pipeline should find input data and save output data. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--bactopia` | string | | The path to bactopia results to use as inputs | + +### mashdist Parameters + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--mash_sketch` | string | | The reference sequence as a Mash Sketch (.msh file) | +| `--full_merlin` | boolean | `false` | Go full Merlin and run all species-specific tools, no matter the Mash distance | + +### ClermonTyping Parameters + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--clermontyping_threshold` | integer | `0` | Do not use contigs under this size | + +### ECTyper Parameters + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--ectyper_opid` | integer | `90` | Percent identity required for an O antigen allele match | +| `--ectyper_opcov` | integer | `90` | Minimum percent coverage required for an O antigen allele match | +| `--ectyper_hpid` | integer | `95` | Percent identity required for an H antigen allele match | +| `--ectyper_hpcov` | integer | `50` | Minimum percent coverage required for an H antigen allele match | + +### emmtyper Parameters + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--emmtyper_wf` | string | `blast` | Workflow for emmtyper to use. (choices: `blast`, `pcr`) | +| `--emmtyper_blastdb` | string | | Path to custom EMM BLAST DB. | +| `--emmtyper_cluster_distance` | integer | `500` | Distance between cluster of matches to consider as different clusters | +| `--emmtyper_percid` | integer | `95` | Minimal percent identity of sequence | + +### hicap Parameters + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--hicap_gene_coverage` | number | `0.8` | Minimum percentage coverage to consider a single gene complete | +| `--hicap_gene_identity` | number | `0.7` | Minimum percentage identity to consider a single gene complete | +| `--hicap_broken_gene_length` | integer | `60` | Minimum length to consider a broken gene | +| `--hicap_broken_gene_identity` | number | `0.8` | Minimum percentage identity to consider a broken gene | + +### Mykrobe Parameters + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--mykrobe_species` | string | | Species panel to use (choices: `sonnei`, `staph`, `tb`, `typhi`) | +| `--mykrobe_opts` | string | | Extra Mykrobe options in quotes | + +### GenoTyphi Parameters + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--genotyphi_mykrobe_opts` | string | | Extra Mykrobe options in quotes | + +### Kleborate Parameters + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--kleborate_preset` | string | `kpsc` | Preset module to use for Kleborate (choices: `kpsc`, `kosc`, `escherichia`) | +| `--kleborate_opts` | string | | Extra options in quotes for Kleborate | + +### legsta Parameters + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--legsta_noheader` | boolean | `false` | Don't print header row | + +### LisSero Parameters + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--lissero_min_id` | number | `95.0` | Minimum percent identity to accept a match | +| `--lissero_min_cov` | number | `95.0` | Minimum coverage of the gene to accept a match | + +### ngmaster Parameters + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--ngmaster_csv` | boolean | `false` | output comma-separated format (CSV) rather than tab-separated | + +### pasty Parameters + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--pasty_min_pident` | integer | `95` | Minimum percent identity to count a hit | +| `--pasty_min_coverage` | integer | `95` | Minimum percent coverage to count a hit | + +### pbptyper Parameters + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--pbptyper_min_pident` | integer | `95` | Minimum percent identity to count a hit | +| `--pbptyper_min_coverage` | integer | `95` | Minimum percent coverage to count a hit | + +### SeqSero2 Parameters + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--seqsero2_run_mode` | string | `k` | Workflow to run. 'a' allele mode, or 'k' k-mer mode (choices: `a`, `k`) | +| `--seqsero2_input_type` | string | `assembly` | Input format to analyze. 'assembly' or 'fastq' (choices: `assembly`, `fastq`) | + +### SeroBA Parameters + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--seroba_coverage` | integer | `20` | Threshold for k-mer coverage of the reference sequence | + +### SISTR Parameters + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--sistr_full_cgmlst` | boolean | `false` | Use the full set of cgMLST alleles which can include highly similar alleles | + +### AgrVATE Parameters + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--agrvate_typing_only` | boolean | `false` | agr typing only. Skips agr operon extraction and frameshift detection | + +### spaTyper Parameters + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--spatyper_do_enrich` | boolean | `false` | Do PCR product enrichment | + +### sccmec Parameters + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--sccmec_min_targets_pident` | integer | `90` | Minimum percent identity to count a target hit | +| `--sccmec_min_targets_coverage` | integer | `80` | Minimum percent coverage to count a target hit | +| `--sccmec_min_regions_pident` | integer | `85` | Minimum percent identity to count a region hit | +| `--sccmec_min_regions_coverage` | integer | `93` | Minimum percent coverage to count a region hit | + +### STECFinder Parameters + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--stecfinder_use_reads` | boolean | `false` | Paired-end Illumina reads will be used instead of assemblies | +| `--stecfinder_hits` | boolean | `false` | Show detailed gene search results | +| `--stecfinder_cutoff` | number | `10.0` | Minimum read coverage for gene to be called | +| `--stecfinder_length` | number | `50.0` | Percentage of gene length needed for positive call | + +### TB-Profiler Profile Parameters + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--tbprofiler_call_whole_genome` | boolean | `false` | Call whole genome | +| `--tbprofiler_mapper` | string | `bwa` | Mapping tool to use. If you are using nanopore data it will default to minimap2 (choices: `bwa`, `minimap2`, `bowtie2`, `bwa-mem2`) | +| `--tbprofiler_caller` | string | `freebayes` | Variant calling tool to use (choices: `bcftools`, `gatk`, `freebayes`) | +| `--tbprofiler_opts` | string | | Extra options in quotes for TBProfiler | + +### TB-Profiler Collate Parameters + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--tbprofiler_itol` | boolean | `false` | Generate itol config files | +| `--tbprofiler_full` | boolean | `false` | Output mutations in main result file | +| `--tbprofiler_all_variants` | boolean | `false` | Output all variants in variant matrix | +| `--tbprofiler_mark_missing` | boolean | `false` | An asterisk will be used to mark predictions which are affected by missing data at a drug resistance position | + +
+Filtering Parameters + +Use these parameters to specify which samples to include or exclude. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--include` | string | | A text file containing sample names (one per line) to include from the analysis | +| `--exclude` | string | | A text file containing sample names (one per line) to exclude from the analysis | +
+ +
+Optional Parameters + +These optional parameters can be useful in certain settings. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--outdir` | string | `bactopia` | Base directory to write results to | +
+ +
+Nextflow Profile Parameters + +Parameters to fine-tune your Nextflow setup. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--datasets_cache` | string | `/.bactopia/datasets` | Directory where downloaded datasets should be stored. | +
+ +
+Helpful Parameters + +Uncommonly used parameters that might be useful. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--wf` | string | `bactopia` | Specify which workflow or Bactopia Tool to execute | +| `--list_wfs` | boolean | | List the available workflows and Bactopia Tools to use with '--wf' | +| `--help_all` | boolean | | An alias for --help --show_hidden_params | +| `--version` | boolean | | Display version text. | +
+ +## Composition + +This workflow uses the following subworkflows: + +- [bactopia_datasets](/developers/subworkflows/bactopia_datasets) - Download and provide pre-compiled datasets required by Bactopia. +- [merlin](/developers/subworkflows/merlin) - MinER assisted species-specific bactopia tool seLectIoN. + +## Citations + +If you use this in your analysis, please cite the following. + +- [Bactopia](https://bactopia.github.io/) + Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) + +- [Mash](https://github.com/marbl/Mash) + Ondov BD, Treangen TJ, Melsted P, Mallonee AB, Bergman NH, Koren S, Phillippy AM [Mash: fast genome and metagenome distance estimation using MinHash](http://dx.doi.org/10.1186/s13059-016-0997-x). _Genome Biol_ 17, 132 (2016) + +## Source + +[View source on GitHub](https://github.com/bactopia/bactopia/tree/main/workflows/bactopia-tools/merlin) diff --git a/bactopia-tools/midas.mdx b/bactopia-tools/midas.mdx new file mode 100644 index 00000000..8749ac50 --- /dev/null +++ b/bactopia-tools/midas.mdx @@ -0,0 +1,203 @@ +--- +title: midas +description: "Estimate species abundances from metagenomic samples." +tags: + - metagenomics + - species-abundance + - profiling + - midas + - bactopia-tool +--- + +# midas + +**Tags:** metagenomics species-abundance profiling midas bactopia-tool + +Estimate species abundances from metagenomic samples. + +This Bactopia Tool uses [MIDAS](https://github.com/snayfach/MIDAS) to estimate +bacterial species abundances in metagenomic samples. MIDAS uses a database +with more than 30,000 reference genomes for accurate species profiling. + +## Usage + +Bactopia CLI: + +```bash +bactopia --wf midas \ + --bactopia /path/to/your/bactopia/results +``` + +Nextflow: + +```bash +nextflow run bactopia/bactopia/workflows/bactopia-tools/midas/main.nf \ + --bactopia /path/to/your/bactopia/results +``` + +## Outputs + +### Expected Output Files + +``` + +├── +│ └── tools +│ └── midas- +│ ├── .midas.abundances.txt +│ ├── .midas.adjusted.abundances.txt +│ ├── .midas.tsv +│ └── logs +│ ├── nf.command.{begin,err,log,out,run,sh,trace} +│ └── versions.yml +└── bactopia-runs + └── midas- + ├── merged-results + │ ├── logs + │ │ └── midas-concat + │ │ ├── nf.command.{begin,err,log,out,run,sh,trace} + │ │ └── versions.yml + │ └── midas.tsv + └── nf-reports + ├── midas-dag.dot + ├── midas-report.html + └── midas-timeline.html +``` + +### Species Abundance + +| File | Description | +|------|-------------| +| `*.tsv` | Species abundance profiles | +| `*-species.tsv` | Species-level abundance | +| `*-genes.tsv` | Gene-level abundance | + +### Merged Results + +| File | Description | +|------|-------------| +| `midas.tsv` | Merged TSV file containing MIDAS results from all samples | + + +### Audit Trail + +Below are files that can assist you in understanding which parameters and program versions were used. + +#### Logs + +Each process that is executed will have a folder named `logs`. In this folder are helpful +files for you to review if the need ever arises. + +| Extension | Description | +|--------------|-------------| +| .begin | An empty file used to designate the process started | +| .err | Contains STDERR outputs from the process | +| .log | Contains both STDERR and STDOUT outputs from the process | +| .out | Contains STDOUT outputs from the process | +| .run | The script Nextflow uses to stage/unstage files and queue processes based on given profile | +| .sh | The script executed by bash for the process | +| .trace | The Nextflow trace report for the process | +| versions.yml | A YAML formatted file with program versions | + +#### Nextflow Reports + +These Nextflow reports provide great a great summary of your run. These can be used to optimize +resource usage and estimate expected costs if using cloud platforms. + +| Filename | Description | +|----------|-------------| +| midas-dag.dot | The Nextflow [DAG visualization](https://docs.seqera.io/nextflow/reports#workflow-diagram) | +| midas-report.html | The Nextflow [Execution Report](https://docs.seqera.io/nextflow/reports#execution-report) | +| midas-timeline.html | The Nextflow [Timeline Report](https://docs.seqera.io/nextflow/reports#execution-timeline) | +| midas-trace.txt | The Nextflow [Trace](https://docs.seqera.io/nextflow/reports#trace-file) report | + +## Parameters + +### Required Parameters + +Define where the pipeline should find input data and save output data. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--bactopia` | string | | The path to bactopia results to use as inputs | + +### MIDAS Database Download Parameters + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--midas_db` | string | | A single tarball or path to a MIDAS formatted database | +| `--midas_save_as_tarball` | boolean | `false` | Save the MIDAS database as a tarball | +| `--download_midas` | boolean | `false` | Download the MIDAS database to the path given by --midas_db | + +### MIDAS Species Parameters + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--midas_word_size` | integer | `28` | Word size for BLAST search | +| `--midas_aln_cov` | number | `0.75` | Discard reads with alignment coverage < ALN_COV | +| `--midas_opts` | string | | Extra MIDAS options | +| `--midas_debug` | boolean | `false` | Keep all temporary files created by MIDAS | + +
+Filtering Parameters + +Use these parameters to specify which samples to include or exclude. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--include` | string | | A text file containing sample names (one per line) to include from the analysis | +| `--exclude` | string | | A text file containing sample names (one per line) to exclude from the analysis | +
+ +
+Optional Parameters + +These optional parameters can be useful in certain settings. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--outdir` | string | `bactopia` | Base directory to write results to | +
+ +
+Nextflow Profile Parameters + +Parameters to fine-tune your Nextflow setup. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--datasets_cache` | string | `/.bactopia/datasets` | Directory where downloaded datasets should be stored. | +
+ +
+Helpful Parameters + +Uncommonly used parameters that might be useful. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--wf` | string | `bactopia` | Specify which workflow or Bactopia Tool to execute | +| `--list_wfs` | boolean | | List the available workflows and Bactopia Tools to use with '--wf' | +| `--help_all` | boolean | | An alias for --help --show_hidden_params | +| `--version` | boolean | | Display version text. | +
+ +## Composition + +This workflow uses the following subworkflows: + +- [midas](/developers/subworkflows/midas) - Species-level profiling from metagenomic data. + +## Citations + +If you use this in your analysis, please cite the following. + +- [Bactopia](https://bactopia.github.io/) + Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) + +- [MIDAS](https://github.com/snayfach/MIDAS) + Nayfach S, Rodriguez-Mueller B, Garud N, and Pollard KS [An integrated metagenomics pipeline for strain profiling reveals novel patterns of bacterial transmission and biogeography.](https://doi.org/10.1101/gr.201863.115) _Genome Research_, 26(11), 1612-1625. (2016) + +## Source + +[View source on GitHub](https://github.com/bactopia/bactopia/tree/main/workflows/bactopia-tools/midas) diff --git a/bactopia-tools/mlst.mdx b/bactopia-tools/mlst.mdx new file mode 100644 index 00000000..cc345cae --- /dev/null +++ b/bactopia-tools/mlst.mdx @@ -0,0 +1,201 @@ +--- +title: mlst +description: "Automatic Multi-Locus Sequence Type (MLST) calling from assembled contigs." +tags: + - typing + - mlst + - sequence-type + - alleles + - pubmlst + - bactopia-tool +--- + +# mlst + +**Tags:** typing mlst sequence-type alleles pubmlst bactopia-tool + +Automatic Multi-Locus Sequence Type (MLST) calling from assembled contigs. + +This Bactopia Tool uses [mlst](https://github.com/tseemann/mlst) to scan genome assemblies +and determine the sequence type based on [PubMLST](https://pubmlst.org/) schemes. The workflow +automatically detects the appropriate MLST scheme for each organism and provides +standardized sequence type assignments. + +## Usage + +Bactopia CLI: + +```bash +bactopia --wf mlst \ + --bactopia /path/to/your/bactopia/results +``` + +Nextflow: + +```bash +nextflow run bactopia/bactopia/workflows/bactopia-tools/mlst/main.nf \ + --bactopia /path/to/your/bactopia/results +``` + +## Outputs + +### Expected Output Files + +``` + +├── +│ └── tools +│ └── mlst- +│ ├── .tsv +│ └── logs +│ ├── nf.command.{begin,err,log,out,run,sh,trace} +│ └── versions.yml +└── bactopia-runs + └── mlst- + ├── merged-results + │ ├── logs + │ │ └── mlst-concat + │ │ ├── nf.command.{begin,err,log,out,run,sh,trace} + │ │ └── versions.yml + │ └── mlst.tsv + └── nf-reports + ├── mlst-dag.dot + ├── mlst-report.html + └── mlst-timeline.html +``` + +### Per-Sample Results + +| File | Description | +|------|-------------| +| `*.tsv` | Tab-delimited file with MLST results including scheme, ST, and allele profiles | + +### Merged Results + +| File | Description | +|------|-------------| +| `mlst.tsv` | Merged TSV file containing MLST results from all samples | + + +### Audit Trail + +Below are files that can assist you in understanding which parameters and program versions were used. + +#### Logs + +Each process that is executed will have a folder named `logs`. In this folder are helpful +files for you to review if the need ever arises. + +| Extension | Description | +|--------------|-------------| +| .begin | An empty file used to designate the process started | +| .err | Contains STDERR outputs from the process | +| .log | Contains both STDERR and STDOUT outputs from the process | +| .out | Contains STDOUT outputs from the process | +| .run | The script Nextflow uses to stage/unstage files and queue processes based on given profile | +| .sh | The script executed by bash for the process | +| .trace | The Nextflow trace report for the process | +| versions.yml | A YAML formatted file with program versions | + +#### Nextflow Reports + +These Nextflow reports provide great a great summary of your run. These can be used to optimize +resource usage and estimate expected costs if using cloud platforms. + +| Filename | Description | +|----------|-------------| +| mlst-dag.dot | The Nextflow [DAG visualization](https://docs.seqera.io/nextflow/reports#workflow-diagram) | +| mlst-report.html | The Nextflow [Execution Report](https://docs.seqera.io/nextflow/reports#execution-report) | +| mlst-timeline.html | The Nextflow [Timeline Report](https://docs.seqera.io/nextflow/reports#execution-timeline) | +| mlst-trace.txt | The Nextflow [Trace](https://docs.seqera.io/nextflow/reports#trace-file) report | + +## Parameters + +### Required Parameters + +Define where the pipeline should find input data and save output data. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--bactopia` | string | | The path to bactopia results to use as inputs | + +### MLST Parameters + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--mlst_scheme` | string | | Don't autodetect, force this scheme on all inputs | +| `--mlst_minid` | integer | `95` | Minimum DNA percent identity of full allele to consider 'similar' | +| `--mlst_mincov` | integer | `10` | Minimum DNA percent coverage to report partial allele at all | +| `--mlst_minscore` | integer | `50` | Minimum score out of 100 to match a scheme | +| `--mlst_nopath` | boolean | `false` | Strip filename paths from FILE column | +| `--mlst_db` | string | | A custom MLST database to use, either a tarball or a directory | + +
+Filtering Parameters + +Use these parameters to specify which samples to include or exclude. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--include` | string | | A text file containing sample names (one per line) to include from the analysis | +| `--exclude` | string | | A text file containing sample names (one per line) to exclude from the analysis | +
+ +
+Optional Parameters + +These optional parameters can be useful in certain settings. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--outdir` | string | `bactopia` | Base directory to write results to | +
+ +
+Nextflow Profile Parameters + +Parameters to fine-tune your Nextflow setup. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--datasets_cache` | string | `/.bactopia/datasets` | Directory where downloaded datasets should be stored. | +
+ +
+Helpful Parameters + +Uncommonly used parameters that might be useful. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--wf` | string | `bactopia` | Specify which workflow or Bactopia Tool to execute | +| `--list_wfs` | boolean | | List the available workflows and Bactopia Tools to use with '--wf' | +| `--help_all` | boolean | | An alias for --help --show_hidden_params | +| `--version` | boolean | | Display version text. | +
+ +## Composition + +This workflow uses the following subworkflows: + +- [mlst](/developers/subworkflows/mlst) - Determine multilocus sequence types (MLST) from bacterial assemblies. + +## Citations + +If you use this in your analysis, please cite the following. + +- [Bactopia](https://bactopia.github.io/) + Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) + +- [csvtk](https://bioinf.shenwei.me/csvtk/) + Shen, W [csvtk: A cross-platform, efficient and practical CSV/TSV toolkit in Golang.](https://github.com/shenwei356/csvtk/) (GitHub) + +- [mlst](https://github.com/tseemann/mlst) + Seemann T [mlst: scan contig files against PubMLST typing schemes](https://github.com/tseemann/mlst) (GitHub) + +- [PubMLST.org](https://pubmlst.org/) + Jolley KA, Bray JE, Maiden MCJ [Open-access bacterial population genomics: BIGSdb software, the PubMLST.org website and their applications.](http://dx.doi.org/10.12688/wellcomeopenres.14826.1) _Wellcome Open Res_ 3, 124 (2018) + +## Source + +[View source on GitHub](https://github.com/bactopia/bactopia/tree/main/workflows/bactopia-tools/mlst) diff --git a/bactopia-tools/mobsuite.mdx b/bactopia-tools/mobsuite.mdx new file mode 100644 index 00000000..5b58eb3e --- /dev/null +++ b/bactopia-tools/mobsuite.mdx @@ -0,0 +1,204 @@ +--- +title: mobsuite +description: "Reconstruction and annotation of plasmids from bacterial genome assemblies." +tags: + - plasmid + - reconstruction + - annotation + - mobile-genetic-elements + - typing + - bactopia-tool +--- + +# mobsuite + +**Tags:** plasmid reconstruction annotation mobile-genetic-elements typing bactopia-tool + +Reconstruction and annotation of plasmids from bacterial genome assemblies. + +This Bactopia Tool uses [MOB-suite](https://github.com/phac-nml/mob-suite) to identify, reconstruct, +and annotate plasmid sequences from draft genome assemblies. It separates plasmid from chromosomal +contigs, groups plasmid sequences into clusters, and provides comprehensive plasmid typing +and mobility information. + +## Usage + +Bactopia CLI: + +```bash +bactopia --wf mobsuite \ + --bactopia /path/to/your/bactopia/results +``` + +Nextflow: + +```bash +nextflow run bactopia/bactopia/workflows/bactopia-tools/mobsuite/main.nf \ + --bactopia /path/to/your/bactopia/results +``` + +## Outputs + +### Expected Output Files + +``` + +├── +│ └── tools +│ └── mobsuite- +│ ├── -chromosome.fasta.gz +│ ├── -contig_report.txt +│ ├── -mobtyper.txt +│ ├── logs +│ │ ├── nf.command.{begin,err,log,out,run,sh,trace} +│ │ └── versions.yml +│ └── plasmid_AA840.fasta.gz +└── bactopia-runs + └── mobsuite- + ├── merged-results + │ ├── logs + │ │ └── mobsuite-concat + │ │ ├── nf.command.{begin,err,log,out,run,sh,trace} + │ │ └── versions.yml + │ └── mobsuite.tsv + └── nf-reports + ├── mobsuite-dag.dot + ├── mobsuite-report.html + └── mobsuite-timeline.html +``` + +### Per-Sample Results + +| File | Description | +|------|-------------| +| `chromosome.fasta` | FASTA file containing all contigs identified as chromosomal | +| `contig_report.txt` | Report assigning each contig to chromosome or plasmid group | +| `plasmid_*.fasta` | Individual FASTA files for each reconstructed plasmid | +| `*-mobtyper.txt` | MOB-typer report with plasmid typing and mobility information | + +### Merged Results + +| File | Description | +|------|-------------| +| `mobsuite.tsv` | Merged TSV file containing MOB-suite results from all samples | + + +### Audit Trail + +Below are files that can assist you in understanding which parameters and program versions were used. + +#### Logs + +Each process that is executed will have a folder named `logs`. In this folder are helpful +files for you to review if the need ever arises. + +| Extension | Description | +|--------------|-------------| +| .begin | An empty file used to designate the process started | +| .err | Contains STDERR outputs from the process | +| .log | Contains both STDERR and STDOUT outputs from the process | +| .out | Contains STDOUT outputs from the process | +| .run | The script Nextflow uses to stage/unstage files and queue processes based on given profile | +| .sh | The script executed by bash for the process | +| .trace | The Nextflow trace report for the process | +| versions.yml | A YAML formatted file with program versions | + +#### Nextflow Reports + +These Nextflow reports provide great a great summary of your run. These can be used to optimize +resource usage and estimate expected costs if using cloud platforms. + +| Filename | Description | +|----------|-------------| +| mobsuite-dag.dot | The Nextflow [DAG visualization](https://docs.seqera.io/nextflow/reports#workflow-diagram) | +| mobsuite-report.html | The Nextflow [Execution Report](https://docs.seqera.io/nextflow/reports#execution-report) | +| mobsuite-timeline.html | The Nextflow [Timeline Report](https://docs.seqera.io/nextflow/reports#execution-timeline) | +| mobsuite-trace.txt | The Nextflow [Trace](https://docs.seqera.io/nextflow/reports#trace-file) report | + +## Parameters + +### Required Parameters + +Define where the pipeline should find input data and save output data. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--bactopia` | string | | The path to bactopia results to use as inputs | + +### MOB-suite Recon Parameters + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--mobsuite_max_contig_size` | integer | `310000` | Maximum size of a contig to be considered a plasmid | +| `--mobsuite_min_contig_size` | integer | `1000` | Minimum length of contigs to classify | +| `--mobsuite_max_plasmid_size` | integer | `350000` | Maximum size of a reconstructed plasmid | + +
+Filtering Parameters + +Use these parameters to specify which samples to include or exclude. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--include` | string | | A text file containing sample names (one per line) to include from the analysis | +| `--exclude` | string | | A text file containing sample names (one per line) to exclude from the analysis | +
+ +
+Optional Parameters + +These optional parameters can be useful in certain settings. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--outdir` | string | `bactopia` | Base directory to write results to | +
+ +
+Nextflow Profile Parameters + +Parameters to fine-tune your Nextflow setup. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--datasets_cache` | string | `/.bactopia/datasets` | Directory where downloaded datasets should be stored. | +
+ +
+Helpful Parameters + +Uncommonly used parameters that might be useful. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--wf` | string | `bactopia` | Specify which workflow or Bactopia Tool to execute | +| `--list_wfs` | boolean | | List the available workflows and Bactopia Tools to use with '--wf' | +| `--help_all` | boolean | | An alias for --help --show_hidden_params | +| `--version` | boolean | | Display version text. | +
+ +## Composition + +This workflow uses the following subworkflows: + +- [mobsuite](/developers/subworkflows/mobsuite) - Reconstruct and type plasmids from bacterial genome assemblies. + +## Citations + +If you use this in your analysis, please cite the following. + +- [Bactopia](https://bactopia.github.io/) + Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) + +- [csvtk](https://bioinf.shenwei.me/csvtk/) + Shen, W [csvtk: A cross-platform, efficient and practical CSV/TSV toolkit in Golang.](https://github.com/shenwei356/csvtk/) (GitHub) + +- [MOB-suite](https://github.com/phac-nml/mob-suite) + Robertson J, Nash JHE [MOB-suite: software tools for clustering, reconstruction and typing of plasmids from draft assemblies.](https://doi.org/10.1099/mgen.0.000206) _Microbial Genomics_ 4(8). (2018) + +- [MOB-suite Database](https://github.com/phac-nml/mob-suite) + Robertson J, Bessonov K, Schonfeld J, Nash JHE. [Universal whole-sequence-based plasmid typing and its utility to prediction of host range and epidemiological surveillance.](https://doi.org/10.1099/mgen.0.000435) _Microbial Genomics_, 6(10)(2020) + +## Source + +[View source on GitHub](https://github.com/bactopia/bactopia/tree/main/workflows/bactopia-tools/mobsuite) diff --git a/bactopia-tools/mykrobe.mdx b/bactopia-tools/mykrobe.mdx new file mode 100644 index 00000000..b2a52e5a --- /dev/null +++ b/bactopia-tools/mykrobe.mdx @@ -0,0 +1,191 @@ +--- +title: mykrobe +description: "Antimicrobial resistance detection for specific bacterial species." +tags: + - fastq + - antimicrobial-resistance + - species-specific + - mykrobe + - bactopia-tool +--- + +# mykrobe + +**Tags:** fastq antimicrobial-resistance species-specific mykrobe bactopia-tool + +Antimicrobial resistance detection for specific bacterial species. + +This Bactopia Tool uses [Mykrobe](https://github.com/Mykrobe-tools/mykrobe) to predict +antimicrobial resistance for _Mycobacterium tuberculosis_, _Staphylococcus aureus_, +_Shigella sonnei_, and _Salmonella typhi_ from sequencing data. + +## Usage + +Bactopia CLI: + +```bash +bactopia --wf mykrobe \ + --bactopia /path/to/your/bactopia/results +``` + +Nextflow: + +```bash +nextflow run bactopia/bactopia/workflows/bactopia-tools/mykrobe/main.nf \ + --bactopia /path/to/your/bactopia/results +``` + +## Outputs + +### Expected Output Files + +``` + +├── +│ └── tools +│ └── mykrobe- +│ ├── .csv +│ ├── .json +│ └── logs +│ ├── nf.command.{begin,err,log,out,run,sh,trace} +│ └── versions.yml +└── bactopia-runs + └── mykrobe- + ├── merged-results + │ ├── logs + │ │ └── mykrobe-concat + │ │ ├── nf.command.{begin,err,log,out,run,sh,trace} + │ │ └── versions.yml + │ └── mykrobe.csv + └── nf-reports + ├── mykrobe-dag.dot + ├── mykrobe-report.html + └── mykrobe-timeline.html +``` + +### Per-Sample Results + +| File | Description | +|------|-------------| +| `*.json` | Mykrobe analysis results in JSON format | +| `*.txt` | Tab-delimited report of resistance predictions | + +### Merged Results + +| File | Description | +|------|-------------| +| `mykrobe.tsv` | Merged TSV file containing results from all samples | + + +### Audit Trail + +Below are files that can assist you in understanding which parameters and program versions were used. + +#### Logs + +Each process that is executed will have a folder named `logs`. In this folder are helpful +files for you to review if the need ever arises. + +| Extension | Description | +|--------------|-------------| +| .begin | An empty file used to designate the process started | +| .err | Contains STDERR outputs from the process | +| .log | Contains both STDERR and STDOUT outputs from the process | +| .out | Contains STDOUT outputs from the process | +| .run | The script Nextflow uses to stage/unstage files and queue processes based on given profile | +| .sh | The script executed by bash for the process | +| .trace | The Nextflow trace report for the process | +| versions.yml | A YAML formatted file with program versions | + +#### Nextflow Reports + +These Nextflow reports provide great a great summary of your run. These can be used to optimize +resource usage and estimate expected costs if using cloud platforms. + +| Filename | Description | +|----------|-------------| +| mykrobe-dag.dot | The Nextflow [DAG visualization](https://docs.seqera.io/nextflow/reports#workflow-diagram) | +| mykrobe-report.html | The Nextflow [Execution Report](https://docs.seqera.io/nextflow/reports#execution-report) | +| mykrobe-timeline.html | The Nextflow [Timeline Report](https://docs.seqera.io/nextflow/reports#execution-timeline) | +| mykrobe-trace.txt | The Nextflow [Trace](https://docs.seqera.io/nextflow/reports#trace-file) report | + +## Parameters + +### Required Parameters + +Define where the pipeline should find input data and save output data. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--bactopia` | string | | The path to bactopia results to use as inputs | + +### Mykrobe Parameters + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--mykrobe_species` | string | | Species panel to use (choices: `sonnei`, `staph`, `tb`, `typhi`) | +| `--mykrobe_opts` | string | | Extra Mykrobe options in quotes | + +
+Filtering Parameters + +Use these parameters to specify which samples to include or exclude. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--include` | string | | A text file containing sample names (one per line) to include from the analysis | +| `--exclude` | string | | A text file containing sample names (one per line) to exclude from the analysis | +
+ +
+Optional Parameters + +These optional parameters can be useful in certain settings. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--outdir` | string | `bactopia` | Base directory to write results to | +
+ +
+Nextflow Profile Parameters + +Parameters to fine-tune your Nextflow setup. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--datasets_cache` | string | `/.bactopia/datasets` | Directory where downloaded datasets should be stored. | +
+ +
+Helpful Parameters + +Uncommonly used parameters that might be useful. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--wf` | string | `bactopia` | Specify which workflow or Bactopia Tool to execute | +| `--list_wfs` | boolean | | List the available workflows and Bactopia Tools to use with '--wf' | +| `--help_all` | boolean | | An alias for --help --show_hidden_params | +| `--version` | boolean | | Display version text. | +
+ +## Composition + +This workflow uses the following subworkflows: + +- [mykrobe](/developers/subworkflows/mykrobe) - Predict antibiotic resistance from sequence reads. + +## Citations + +If you use this in your analysis, please cite the following. + +- [Bactopia](https://bactopia.github.io/) + Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) + +- [Mykrobe](https://github.com/Mykrobe-tools/mykrobe) + Hunt M, Bradley P, Lapierre SG, Heys S, Thomsit M, Hall MB, Malone KM, Wintringer P, Walker TM, Cirillo DM, Comas I, Farhat MR, Fowler P, Gardy J, Ismail N, Kohl TA, Mathys V, Merker M, Niemann S, Omar SV, Sintchenko V, Smith G, Supply P, Tahseen S, Wilcox M, Arandjelovic I, Peto TEA, Crook, DW, Iqbal Z [Antibiotic resistance prediction for Mycobacterium tuberculosis from genome sequence data with Mykrobe](https://doi.org/10.12688/wellcomeopenres.15603.1) _Wellcome Open Research_ 4, 191. (2019) + +## Source + +[View source on GitHub](https://github.com/bactopia/bactopia/tree/main/workflows/bactopia-tools/mykrobe) diff --git a/bactopia-tools/ngmaster.mdx b/bactopia-tools/ngmaster.mdx new file mode 100644 index 00000000..befe2bfb --- /dev/null +++ b/bactopia-tools/ngmaster.mdx @@ -0,0 +1,186 @@ +--- +title: ngmaster +description: "Multi-antigen sequence typing of Neisseria gonorrhoeae." +tags: + - neisseria-gonorrhoeae + - ng-mast + - typing + - bactopia-tool +--- + +# ngmaster + +**Tags:** neisseria-gonorrhoeae ng-mast typing bactopia-tool + +Multi-antigen sequence typing of Neisseria gonorrhoeae. + +This Bactopia Tool uses [ngmaster](https://github.com/MDU-PHL/ngmaster) for +_in silico_ multi-antigen sequence typing (NG-MAST) of _Neisseria gonorrhoeae_. + +## Usage + +Bactopia CLI: + +```bash +bactopia --wf ngmaster \ + --bactopia /path/to/your/bactopia/results +``` + +Nextflow: + +```bash +nextflow run bactopia/bactopia/workflows/bactopia-tools/ngmaster/main.nf \ + --bactopia /path/to/your/bactopia/results +``` + +## Outputs + +### Expected Output Files + +``` + +├── +│ └── tools +│ └── ngmaster- +│ ├── .tsv +│ └── logs +│ ├── nf.command.{begin,err,log,out,run,sh,trace} +│ └── versions.yml +└── bactopia-runs + └── ngmaster- + ├── merged-results + │ ├── logs + │ │ └── ngmaster-concat + │ │ ├── nf.command.{begin,err,log,out,run,sh,trace} + │ │ └── versions.yml + │ └── ngmaster.tsv + └── nf-reports + ├── ngmaster-dag.dot + ├── ngmaster-report.html + └── ngmaster-timeline.html +``` + +### Per-Sample Results + +| File | Description | +|------|-------------| +| `*.txt` | NG-MAST typing results | + +### Merged Results + +| File | Description | +|------|-------------| +| `ngmaster.tsv` | Merged TSV file containing NG-MASTER results from all samples | + + +### Audit Trail + +Below are files that can assist you in understanding which parameters and program versions were used. + +#### Logs + +Each process that is executed will have a folder named `logs`. In this folder are helpful +files for you to review if the need ever arises. + +| Extension | Description | +|--------------|-------------| +| .begin | An empty file used to designate the process started | +| .err | Contains STDERR outputs from the process | +| .log | Contains both STDERR and STDOUT outputs from the process | +| .out | Contains STDOUT outputs from the process | +| .run | The script Nextflow uses to stage/unstage files and queue processes based on given profile | +| .sh | The script executed by bash for the process | +| .trace | The Nextflow trace report for the process | +| versions.yml | A YAML formatted file with program versions | + +#### Nextflow Reports + +These Nextflow reports provide great a great summary of your run. These can be used to optimize +resource usage and estimate expected costs if using cloud platforms. + +| Filename | Description | +|----------|-------------| +| ngmaster-dag.dot | The Nextflow [DAG visualization](https://docs.seqera.io/nextflow/reports#workflow-diagram) | +| ngmaster-report.html | The Nextflow [Execution Report](https://docs.seqera.io/nextflow/reports#execution-report) | +| ngmaster-timeline.html | The Nextflow [Timeline Report](https://docs.seqera.io/nextflow/reports#execution-timeline) | +| ngmaster-trace.txt | The Nextflow [Trace](https://docs.seqera.io/nextflow/reports#trace-file) report | + +## Parameters + +### Required Parameters + +Define where the pipeline should find input data and save output data. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--bactopia` | string | | The path to bactopia results to use as inputs | + +### ngmaster Parameters + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--ngmaster_csv` | boolean | `false` | output comma-separated format (CSV) rather than tab-separated | + +
+Filtering Parameters + +Use these parameters to specify which samples to include or exclude. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--include` | string | | A text file containing sample names (one per line) to include from the analysis | +| `--exclude` | string | | A text file containing sample names (one per line) to exclude from the analysis | +
+ +
+Optional Parameters + +These optional parameters can be useful in certain settings. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--outdir` | string | `bactopia` | Base directory to write results to | +
+ +
+Nextflow Profile Parameters + +Parameters to fine-tune your Nextflow setup. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--datasets_cache` | string | `/.bactopia/datasets` | Directory where downloaded datasets should be stored. | +
+ +
+Helpful Parameters + +Uncommonly used parameters that might be useful. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--wf` | string | `bactopia` | Specify which workflow or Bactopia Tool to execute | +| `--list_wfs` | boolean | | List the available workflows and Bactopia Tools to use with '--wf' | +| `--help_all` | boolean | | An alias for --help --show_hidden_params | +| `--version` | boolean | | Display version text. | +
+ +## Composition + +This workflow uses the following subworkflows: + +- [ngmaster](/developers/subworkflows/ngmaster) - Perform multi-antigen sequence typing of Neisseria gonorrhoeae from genome assemblies. + +## Citations + +If you use this in your analysis, please cite the following. + +- [Bactopia](https://bactopia.github.io/) + Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) + +- [ngmaster](https://github.com/MDU-PHL/ngmaster) + Kwong J, Gonçalves da Silva A, Schultz M, Seeman T [ngmaster - _In silico_ multi-antigen sequence typing for _Neisseria gonorrhoeae_ (NG-MAST)](https://github.com/MDU-PHL/ngmaster) (GitHub) + +## Source + +[View source on GitHub](https://github.com/bactopia/bactopia/tree/main/workflows/bactopia-tools/ngmaster) diff --git a/bactopia-tools/pangenome.mdx b/bactopia-tools/pangenome.mdx new file mode 100644 index 00000000..4dac52f1 --- /dev/null +++ b/bactopia-tools/pangenome.mdx @@ -0,0 +1,419 @@ +--- +title: pangenome +description: "Pangenome analysis with optional core-genome phylogeny." +tags: + - alignment + - core-genome + - pan-genome + - phylogeny + - comparative-genomics + - bactopia-tool +--- + +# pangenome + +**Tags:** alignment core-genome pan-genome phylogeny comparative-genomics bactopia-tool + +Pangenome analysis with optional core-genome phylogeny. + +This Bactopia Tool creates a pangenome from GFF3 annotation files using one of three +tools: [Panaroo](https://github.com/gtonkinhill/panaroo) (default), +[PIRATE](https://github.com/SionBayliss/PIRATE), or +[Roary](https://github.com/sanger-pathogens/roary). It generates core-genome alignments +and gene presence/absence matrices, followed by SNP distance calculations. +You can supplement your pangenome with completed genomes using the --species or +--accessions parameters, which downloads genomes from RefSeq and annotates them with +Prokka. A phylogeny based on the core-genome alignment is created by IQ-Tree, with +optional recombination masking using ClonalFrameML. Finally, pan-genome wide +association studies can be conducted using Scoary. + +## Usage + +Bactopia CLI: + +```bash +bactopia --wf pangenome \ + --bactopia /path/to/your/bactopia/results +``` + +Nextflow: + +```bash +nextflow run bactopia/bactopia/workflows/bactopia-tools/pangenome/main.nf \ + --bactopia /path/to/your/bactopia/results +``` + +## Outputs + +### Expected Output Files + +``` + +└── + └── pangenome- + ├── clonalframeml + │ ├── core-genome.ML_sequence.fasta.gz + │ ├── core-genome.em.txt + │ ├── core-genome.emsim.txt + │ ├── core-genome.importation_status.txt + │ ├── core-genome.labelled_tree.newick + │ ├── core-genome.position_cross_reference.txt.gz + │ └── logs + │ ├── nf.command.{begin,err,log,out,run,sh,trace} + │ └── versions.yml + ├── core-genome.distance.tsv + ├── core-genome.masked.aln.gz + ├── core-genome.masked.distance.tsv + ├── core-genome.treefile + ├── iqtree + │ ├── core-genome.alninfo.gz + │ ├── core-genome.bionj + │ ├── core-genome.ckp.gz + │ ├── core-genome.contree + │ ├── core-genome.iqtree + │ ├── core-genome.log + │ ├── core-genome.mldist + │ ├── core-genome.splits.nex + │ ├── core-genome.ufboot + │ └── logs + │ ├── nf.command.{begin,err,log,out,run,sh,trace} + │ └── versions.yml + ├── iqtree-fast + │ ├── logs + │ │ ├── nf.command.{begin,err,log,out,run,sh,trace} + │ │ └── versions.yml + │ ├── roary.bionj + │ ├── roary.ckp.gz + │ ├── roary.iqtree + │ ├── roary.log + │ ├── roary.mldist + │ ├── roary.model.gz + │ └── roary.treefile + ├── nf-reports + │ ├── pangenome-dag.dot + │ ├── pangenome-report.html + │ └── pangenome-timeline.html + ├── roary + │ ├── accessory.header.embl + │ ├── accessory.tab + │ ├── accessory_binary_genes.fa.gz + │ ├── accessory_binary_genes.fa.newick + │ ├── accessory_graph.dot + │ ├── blast_identity_frequency.Rtab + │ ├── clustered_proteins + │ ├── core_accessory.header.embl + │ ├── core_accessory.tab + │ ├── core_accessory_graph.dot + │ ├── core_alignment_header.embl + │ ├── core_gene_alignment.aln.gz + │ ├── gene_presence_absence.Rtab + │ ├── gene_presence_absence.csv + │ ├── logs + │ │ ├── nf.command.{begin,err,log,out,run,sh,trace} + │ │ └── versions.yml + │ ├── number_of_conserved_genes.Rtab + │ ├── number_of_genes_in_pan_genome.Rtab + │ ├── number_of_new_genes.Rtab + │ ├── number_of_unique_genes.Rtab + │ ├── pan_genome_reference.fa.gz + │ └── summary_statistics.txt + ├── roary.aln.gz + ├── scoary + │ ├── Bogus_trait.results.csv + │ ├── Tetracycline_resistance.results.csv + │ └── logs + │ ├── nf.command.{begin,err,log,out,run,sh,trace} + │ ├── scoary.log + │ └── versions.yml + ├── snpdists + │ └── logs + │ ├── nf.command.{begin,err,log,out,run,sh,trace} + │ └── versions.yml + └── snpdists-masked + └── logs + ├── nf.command.{begin,err,log,out,run,sh,trace} + └── versions.yml +``` + +### Pangenome Results + +| File | Description | +|------|-------------| +| `*.aln` | Core-genome alignment file containing genes present across all input genomes | +| `*.csv` | Gene presence/absence matrix showing which genes are present in each genome | +| `*.tsv` | SNP distance matrix between all samples | + +### Phylogeny Results + +:::note +Only created if --skip_phylogeny is not enabled +::: + +| File | Description | +|------|-------------| +| `*.treefile` | Maximum likelihood phylogenetic tree in Newick format | +| `*.iqtree` | IQ-Tree analysis report with model selection and support values | +| `*.log` | IQ-Tree execution log | + +### Recombination Analysis + +:::note +Only created if --skip_recombination is not enabled +::: + +| File | Description | +|------|-------------| +| `*.masked.aln` | Core-genome alignment with recombination regions masked | + +### Association Analysis + +:::note +Only created if --scoary_traits is specified +::: + +| File | Description | +|------|-------------| +| `scoary/*` | Scoary association analysis results and plots | + +### Panaroo Results + +:::note +Only created when Panaroo is selected as the pangenome tool +::: + +| File | Description | +|------|-------------| +| `panaroo/*` | Panaroo-specific output files including graph and statistics | + +### PIRATE Results + +:::note +Only created when PIRATE is selected as the pangenome tool +::: + +| File | Description | +|------|-------------| +| `pirate/*` | PIRATE-specific output files including gene families and clusters | + +### Roary Results + +:::note +Only created when Roary is selected as the pangenome tool +::: + +| File | Description | +|------|-------------| +| `roary/*` | Roary-specific output files including gene presence/absence matrices | + + +### Audit Trail + +Below are files that can assist you in understanding which parameters and program versions were used. + +#### Logs + +Each process that is executed will have a folder named `logs`. In this folder are helpful +files for you to review if the need ever arises. + +| Extension | Description | +|--------------|-------------| +| .begin | An empty file used to designate the process started | +| .err | Contains STDERR outputs from the process | +| .log | Contains both STDERR and STDOUT outputs from the process | +| .out | Contains STDOUT outputs from the process | +| .run | The script Nextflow uses to stage/unstage files and queue processes based on given profile | +| .sh | The script executed by bash for the process | +| .trace | The Nextflow trace report for the process | +| versions.yml | A YAML formatted file with program versions | + +#### Nextflow Reports + +These Nextflow reports provide great a great summary of your run. These can be used to optimize +resource usage and estimate expected costs if using cloud platforms. + +| Filename | Description | +|----------|-------------| +| pangenome-dag.dot | The Nextflow [DAG visualization](https://docs.seqera.io/nextflow/reports#workflow-diagram) | +| pangenome-report.html | The Nextflow [Execution Report](https://docs.seqera.io/nextflow/reports#execution-report) | +| pangenome-timeline.html | The Nextflow [Timeline Report](https://docs.seqera.io/nextflow/reports#execution-timeline) | +| pangenome-trace.txt | The Nextflow [Trace](https://docs.seqera.io/nextflow/reports#trace-file) report | + +## Parameters + +### Required Parameters + +Define where the pipeline should find input data and save output data. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--bactopia` | string | | The path to bactopia results to use as inputs | + +### NCBI Genome Download Parameters + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--species` | string | | Name of the species to download assemblies | +| `--accession` | string | | An NCBI Assembly accession to be downloaded | +| `--accessions` | string | | An file of NCBI Assembly accessions (one per line) to be downloaded | +| `--format` | string | `fasta` | Comma separated list of formats to download | +| `--limit` | string | | Limit the number of assemblies to download | +| `--keep_downloads` | boolean | `false` | Save downloaded files into the bactopia-runs folder | + +### Prokka Parameters + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--prokka_proteins` | string | `${projectDir}/data/proteins.faa` | FASTA file of trusted proteins to first annotate from | +| `--prokka_prodigal_tf` | string | | Training file to use for Prodigal | +| `--prokka_coverage` | integer | `80` | Minimum coverage on query protein | + +### PIRATE Parameters + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--use_pirate` | boolean | `false` | Use PIRATE instead of panaroo in the 'pangenome' subworkflow | +| `--pirate_steps` | string | `50,60,70,80,90,95,98` | Percent identity thresholds to use for pangenome construction | +| `--pirate_features` | string | `CDS` | Comma-delimited features to use for pangenome construction | +| `--pirate_para_off` | boolean | `false` | Switch off paralog identification | + +### Roary Parameters + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--use_roary` | boolean | `false` | Use Roary instead of PIRATE in the 'pangenome' subworkflow | +| `--roary_cd` | integer | `99` | Percentage of isolates a gene must be in to be core | +| `--roary_s` | boolean | `false` | Do not split paralogs | +| `--roary_ap` | boolean | `false` | Allow paralogs in core alignment | + +### Panaroo Run Parameters + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--panaroo_merge_paralogs` | boolean | `false` | Do not split paralogs | +| `--panaroo_opts` | string | | Additional options to pass to panaroo | + +### SNP-Dists Parameters + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--snpdists_a` | boolean | `false` | Count all differences not just [AGTC] | + +### ClonalFrameML Parameters + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--clonalframeml_emsim` | integer | `100` | Number of simulations to estimate uncertainty in the EM results | +| `--skip_recombination` | boolean | `false` | Skip ClonalFrameML execution in subworkflows | + +### IQ-TREE Parameters + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--iqtree_model` | string | `HKY` | Substitution model name | +| `--iqtree_bb` | integer | `1000` | Ultrafast bootstrap replicates | +| `--iqtree_alrt` | integer | `1000` | SH-like approximate likelihood ratio test replicates | +| `--iqtree_asr` | boolean | `false` | Ancestral state reconstruction by empirical Bayes | +| `--skip_phylogeny` | boolean | `false` | Skip IQ-TREE execution in subworkflows | + +### Scoary Parameters + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--scoary_traits` | string | | Input trait table (CSV) to test for associations | +| `--scoary_permute` | integer | `0` | Perform N number of permutations of the significant results post-analysis | + +
+Filtering Parameters + +Use these parameters to specify which samples to include or exclude. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--include` | string | | A text file containing sample names (one per line) to include from the analysis | +| `--exclude` | string | | A text file containing sample names (one per line) to exclude from the analysis | +
+ +
+Optional Parameters + +These optional parameters can be useful in certain settings. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--outdir` | string | `bactopia` | Base directory to write results to | +
+ +
+Nextflow Profile Parameters + +Parameters to fine-tune your Nextflow setup. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--datasets_cache` | string | `/.bactopia/datasets` | Directory where downloaded datasets should be stored. | +
+ +
+Helpful Parameters + +Uncommonly used parameters that might be useful. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--wf` | string | `bactopia` | Specify which workflow or Bactopia Tool to execute | +| `--list_wfs` | boolean | | List the available workflows and Bactopia Tools to use with '--wf' | +| `--help_all` | boolean | | An alias for --help --show_hidden_params | +| `--version` | boolean | | Display version text. | +
+ +## Composition + +This workflow uses the following subworkflows: + +- [clonalframeml](/developers/subworkflows/clonalframeml) - Detect and mask recombination events in bacterial phylogenies. +- [iqtree](/developers/subworkflows/iqtree) - Construct maximum likelihood phylogenetic trees from alignments. +- [ncbigenomedownload](/developers/subworkflows/ncbigenomedownload) - Download bacterial genomes from NCBI's RefSeq database. +- [pangenome](/developers/subworkflows/pangenome) - Perform pangenome analysis with optional core-genome phylogeny. +- [prokka](/developers/subworkflows/prokka) - Annotate bacterial genomes with functional information. +- [scoary](/developers/subworkflows/scoary) - Pan-genome wide association studies. + +## Citations + +If you use this in your analysis, please cite the following. + +- [Bactopia](https://bactopia.github.io/) + Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) + +- [ClonalFramML](https://github.com/xavierdidelot/ClonalFrameML) + Didelot X, Wilson DJ [ClonalFrameML: Efficient Inference of Recombination in Whole Bacterial Genomes.](https://doi.org/10.1371/journal.pcbi.1004041) _PLoS Comput Biol_ 11(2) e1004041 (2015) + +- [IQ-TREE](https://github.com/Cibiv/IQ-TREE) + Nguyen L-T, Schmidt HA, von Haeseler A, Minh BQ [IQ-TREE: A fast and effective stochastic algorithm for estimating maximum likelihood phylogenies.](https://doi.org/10.1093/molbev/msu300) _Mol. Biol. Evol._ 32:268-274 (2015) + +- [ModelFinder](https://github.com/Cibiv/IQ-TREE) + Kalyaanamoorthy S, Minh BQ, Wong TKF, von Haeseler A, Jermiin LS [ModelFinder - Fast model selection for accurate phylogenetic estimates.](https://doi.org/10.1038/nmeth.4285) _Nat. Methods_ 14:587-589 (2017) + +- [UFBoot2](https://github.com/Cibiv/IQ-TREE) + Hoang DT, Chernomor O, von Haeseler A, Minh BQ, Vinh LS [UFBoot2: Improving the ultrafast bootstrap approximation.](https://doi.org/10.1093/molbev/msx281) _Mol. Biol. Evol._ 35:518-522 (2018) + +- [ncbi-genome-download](https://github.com/kblin/ncbi-genome-download) + Blin K [ncbi-genome-download: Scripts to download genomes from the NCBI FTP servers](https://github.com/kblin/ncbi-genome-download) (GitHub) + +- [Panaroo](https://github.com/gtonkinhill/panaroo) + Tonkin-Hill G, MacAlasdair N, Ruis C, Weimann A, Horesh G, Lees JA, Gladstone RA, Lo S, Beaudoin C, Floto RA, Frost SDW, Corander J, Bentley SD, Parkhill J [Producing polished prokaryotic pangenomes with the Panaroo pipeline.](https://doi.org/10.1186/s13059-020-02090-4) _Genome Biology_ 21(1), 180. (2020) + +- [PIRATE](http://github.com/SionBayliss/PIRATE) + Bayliss SC, Thorpe HA, Coyle NM, Sheppard SK, Feil EJ [PIRATE: A fast and scalable pangenomics toolbox for clustering diverged orthologues in bacteria.](https://doi.org/10.1093/gigascience/giz119) _Gigascience_ 8 (2019) + +- [Prokka](https://github.com/tseemann/prokka) + Seemann T [Prokka: rapid prokaryotic genome annotation](http://dx.doi.org/10.1093/bioinformatics/btu153) _Bioinformatics_ 30, 2068-2069 (2014) + +- [Roary](https://github.com/sanger-pathogens/Roary) + Page AJ, Cummins CA, Hunt M, Wong VK, Reuter S, Holden MTG, Fookes M, Falush D, Keane JA, Parkhill J [Roary: rapid large-scale prokaryote pan genome analysis.](https://doi.org/10.1093/bioinformatics/btv421) _Bioinformatics_ 31, 3691-3693 (2015) + +- [Scoary](https://github.com/AdmiralenOla/Scoary) + Brynildsrud O, Bohlin J, Scheffer L, Eldholm V [Rapid scoring of genes in microbial pan-genome-wide association studies with Scoary.](https://doi.org/10.1186/s13059-016-1108-8) _Genome Biol._ 17:238 (2016) + +## Source + +[View source on GitHub](https://github.com/bactopia/bactopia/tree/main/workflows/bactopia-tools/pangenome) diff --git a/bactopia-tools/pasty.mdx b/bactopia-tools/pasty.mdx new file mode 100644 index 00000000..20c8109d --- /dev/null +++ b/bactopia-tools/pasty.mdx @@ -0,0 +1,190 @@ +--- +title: pasty +description: "In silico serogrouping of Pseudomonas aeruginosa isolates." +tags: + - fasta + - serogrouping + - pseudomonas-aeruginosa + - typing + - bactopia-tool +--- + +# pasty + +**Tags:** fasta serogrouping pseudomonas-aeruginosa typing bactopia-tool + +In silico serogrouping of Pseudomonas aeruginosa isolates. + +This Bactopia Tool uses [pasty](https://github.com/rpetit3/pasty) for +serogrouping of _Pseudomonas aeruginosa_ isolates from genome assemblies. + +## Usage + +Bactopia CLI: + +```bash +bactopia --wf pasty \ + --bactopia /path/to/your/bactopia/results +``` + +Nextflow: + +```bash +nextflow run bactopia/bactopia/workflows/bactopia-tools/pasty/main.nf \ + --bactopia /path/to/your/bactopia/results +``` + +## Outputs + +### Expected Output Files + +``` + +├── +│ └── tools +│ └── pasty- +│ ├── .blastn.tsv +│ ├── .details.tsv +│ ├── .tsv +│ └── logs +│ ├── nf.command.{begin,err,log,out,run,sh,trace} +│ └── versions.yml +└── bactopia-runs + └── pasty- + ├── merged-results + │ ├── logs + │ │ └── pasty-concat + │ │ ├── nf.command.{begin,err,log,out,run,sh,trace} + │ │ └── versions.yml + │ └── pasty.tsv + └── nf-reports + ├── pasty-dag.dot + ├── pasty-report.html + └── pasty-timeline.html +``` + +### Per-Sample Results + +| File | Description | +|------|-------------| +| `*.txt` | Serogrouping results for each sample | + +### Merged Results + +| File | Description | +|------|-------------| +| `pasty.tsv` | Merged TSV file containing Pasty results from all samples | + + +### Audit Trail + +Below are files that can assist you in understanding which parameters and program versions were used. + +#### Logs + +Each process that is executed will have a folder named `logs`. In this folder are helpful +files for you to review if the need ever arises. + +| Extension | Description | +|--------------|-------------| +| .begin | An empty file used to designate the process started | +| .err | Contains STDERR outputs from the process | +| .log | Contains both STDERR and STDOUT outputs from the process | +| .out | Contains STDOUT outputs from the process | +| .run | The script Nextflow uses to stage/unstage files and queue processes based on given profile | +| .sh | The script executed by bash for the process | +| .trace | The Nextflow trace report for the process | +| versions.yml | A YAML formatted file with program versions | + +#### Nextflow Reports + +These Nextflow reports provide great a great summary of your run. These can be used to optimize +resource usage and estimate expected costs if using cloud platforms. + +| Filename | Description | +|----------|-------------| +| pasty-dag.dot | The Nextflow [DAG visualization](https://docs.seqera.io/nextflow/reports#workflow-diagram) | +| pasty-report.html | The Nextflow [Execution Report](https://docs.seqera.io/nextflow/reports#execution-report) | +| pasty-timeline.html | The Nextflow [Timeline Report](https://docs.seqera.io/nextflow/reports#execution-timeline) | +| pasty-trace.txt | The Nextflow [Trace](https://docs.seqera.io/nextflow/reports#trace-file) report | + +## Parameters + +### Required Parameters + +Define where the pipeline should find input data and save output data. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--bactopia` | string | | The path to bactopia results to use as inputs | + +### pasty Parameters + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--pasty_min_pident` | integer | `95` | Minimum percent identity to count a hit | +| `--pasty_min_coverage` | integer | `95` | Minimum percent coverage to count a hit | + +
+Filtering Parameters + +Use these parameters to specify which samples to include or exclude. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--include` | string | | A text file containing sample names (one per line) to include from the analysis | +| `--exclude` | string | | A text file containing sample names (one per line) to exclude from the analysis | +
+ +
+Optional Parameters + +These optional parameters can be useful in certain settings. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--outdir` | string | `bactopia` | Base directory to write results to | +
+ +
+Nextflow Profile Parameters + +Parameters to fine-tune your Nextflow setup. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--datasets_cache` | string | `/.bactopia/datasets` | Directory where downloaded datasets should be stored. | +
+ +
+Helpful Parameters + +Uncommonly used parameters that might be useful. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--wf` | string | `bactopia` | Specify which workflow or Bactopia Tool to execute | +| `--list_wfs` | boolean | | List the available workflows and Bactopia Tools to use with '--wf' | +| `--help_all` | boolean | | An alias for --help --show_hidden_params | +| `--version` | boolean | | Display version text. | +
+ +## Composition + +This workflow uses the following subworkflows: + +- [pasty](/developers/subworkflows/pasty) - Predict serogroups of Pseudomonas aeruginosa from assemblies. + +## Citations + +If you use this in your analysis, please cite the following. + +- [Bactopia](https://bactopia.github.io/) + Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) + +- [pasty](https://github.com/rpetit3/pasty) + Petit III RA [pasty: in silico serogrouping of _Pseudomonas aeruginosa_ isolates](https://github.com/rpetit3/pasty) (GitHub) + +## Source + +[View source on GitHub](https://github.com/bactopia/bactopia/tree/main/workflows/bactopia-tools/pasty) diff --git a/bactopia-tools/pbptyper.mdx b/bactopia-tools/pbptyper.mdx new file mode 100644 index 00000000..e1b0d168 --- /dev/null +++ b/bactopia-tools/pbptyper.mdx @@ -0,0 +1,190 @@ +--- +title: pbptyper +description: "Penicillin Binding Protein (PBP) typing for Streptococcus pneumoniae." +tags: + - streptococcus-pneumoniae + - pbp + - penicillin-resistance + - typing + - bactopia-tool +--- + +# pbptyper + +**Tags:** streptococcus-pneumoniae pbp penicillin-resistance typing bactopia-tool + +Penicillin Binding Protein (PBP) typing for Streptococcus pneumoniae. + +This Bactopia Tool uses [pbptyper](https://github.com/rpetit3/pbptyper) for typing +the Penicillin Binding Protein (PBP) of _Streptococcus pneumoniae_ assemblies +to predict penicillin susceptibility. + +## Usage + +Bactopia CLI: + +```bash +bactopia --wf pbptyper \ + --bactopia /path/to/your/bactopia/results +``` + +Nextflow: + +```bash +nextflow run bactopia/bactopia/workflows/bactopia-tools/pbptyper/main.nf \ + --bactopia /path/to/your/bactopia/results +``` + +## Outputs + +### Expected Output Files + +``` + +├── +│ └── tools +│ └── pbptyper- +│ ├── .tblastn.tsv +│ ├── .tsv +│ └── logs +│ ├── nf.command.{begin,err,log,out,run,sh,trace} +│ └── versions.yml +└── bactopia-runs + └── pbptyper- + ├── merged-results + │ ├── logs + │ │ └── pbptyper-concat + │ │ ├── nf.command.{begin,err,log,out,run,sh,trace} + │ │ └── versions.yml + │ └── pbptyper.tsv + └── nf-reports + ├── pbptyper-dag.dot + ├── pbptyper-report.html + └── pbptyper-timeline.html +``` + +### Per-Sample Results + +| File | Description | +|------|-------------| +| `*.txt` | PBP typing results for each sample | + +### Merged Results + +| File | Description | +|------|-------------| +| `pbptyper.tsv` | Merged TSV file containing PBP typing results from all samples | + + +### Audit Trail + +Below are files that can assist you in understanding which parameters and program versions were used. + +#### Logs + +Each process that is executed will have a folder named `logs`. In this folder are helpful +files for you to review if the need ever arises. + +| Extension | Description | +|--------------|-------------| +| .begin | An empty file used to designate the process started | +| .err | Contains STDERR outputs from the process | +| .log | Contains both STDERR and STDOUT outputs from the process | +| .out | Contains STDOUT outputs from the process | +| .run | The script Nextflow uses to stage/unstage files and queue processes based on given profile | +| .sh | The script executed by bash for the process | +| .trace | The Nextflow trace report for the process | +| versions.yml | A YAML formatted file with program versions | + +#### Nextflow Reports + +These Nextflow reports provide great a great summary of your run. These can be used to optimize +resource usage and estimate expected costs if using cloud platforms. + +| Filename | Description | +|----------|-------------| +| pbptyper-dag.dot | The Nextflow [DAG visualization](https://docs.seqera.io/nextflow/reports#workflow-diagram) | +| pbptyper-report.html | The Nextflow [Execution Report](https://docs.seqera.io/nextflow/reports#execution-report) | +| pbptyper-timeline.html | The Nextflow [Timeline Report](https://docs.seqera.io/nextflow/reports#execution-timeline) | +| pbptyper-trace.txt | The Nextflow [Trace](https://docs.seqera.io/nextflow/reports#trace-file) report | + +## Parameters + +### Required Parameters + +Define where the pipeline should find input data and save output data. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--bactopia` | string | | The path to bactopia results to use as inputs | + +### pbptyper Parameters + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--pbptyper_min_pident` | integer | `95` | Minimum percent identity to count a hit | +| `--pbptyper_min_coverage` | integer | `95` | Minimum percent coverage to count a hit | + +
+Filtering Parameters + +Use these parameters to specify which samples to include or exclude. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--include` | string | | A text file containing sample names (one per line) to include from the analysis | +| `--exclude` | string | | A text file containing sample names (one per line) to exclude from the analysis | +
+ +
+Optional Parameters + +These optional parameters can be useful in certain settings. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--outdir` | string | `bactopia` | Base directory to write results to | +
+ +
+Nextflow Profile Parameters + +Parameters to fine-tune your Nextflow setup. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--datasets_cache` | string | `/.bactopia/datasets` | Directory where downloaded datasets should be stored. | +
+ +
+Helpful Parameters + +Uncommonly used parameters that might be useful. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--wf` | string | `bactopia` | Specify which workflow or Bactopia Tool to execute | +| `--list_wfs` | boolean | | List the available workflows and Bactopia Tools to use with '--wf' | +| `--help_all` | boolean | | An alias for --help --show_hidden_params | +| `--version` | boolean | | Display version text. | +
+ +## Composition + +This workflow uses the following subworkflows: + +- [pbptyper](/developers/subworkflows/pbptyper) - Predict penicillin binding protein (PBP) types of Streptococcus pneumoniae from genome assemblies. + +## Citations + +If you use this in your analysis, please cite the following. + +- [Bactopia](https://bactopia.github.io/) + Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) + +- [pbptyper](https://github.com/rpetit3/pbptyper) + Petit III RA [pbptyper: In silico Penicillin Binding Protein (PBP) typer for _Streptococcus pneumoniae_ assemblies](https://github.com/rpetit3/pbptyper) (GitHub) + +## Source + +[View source on GitHub](https://github.com/bactopia/bactopia/tree/main/workflows/bactopia-tools/pbptyper) diff --git a/bactopia-tools/phispy.mdx b/bactopia-tools/phispy.mdx new file mode 100644 index 00000000..738f4471 --- /dev/null +++ b/bactopia-tools/phispy.mdx @@ -0,0 +1,203 @@ +--- +title: phispy +description: "Prediction of prophages in bacterial and archaeal genomes." +tags: + - prophage + - phage + - annotation + - bacterial + - archaeal + - bactopia-tool +--- + +# phispy + +**Tags:** prophage phage annotation bacterial archaeal bactopia-tool + +Prediction of prophages in bacterial and archaeal genomes. + +This Bactopia Tool uses [PhiSpy](https://github.com/linsalrob/PhiSpy) to identify prophages +from bacterial and archaeal genomes using machine learning approaches. + +## Usage + +Bactopia CLI: + +```bash +bactopia --wf phispy \ + --bactopia /path/to/your/bactopia/results +``` + +Nextflow: + +```bash +nextflow run bactopia/bactopia/workflows/bactopia-tools/phispy/main.nf \ + --bactopia /path/to/your/bactopia/results +``` + +## Outputs + +### Expected Output Files + +``` + +├── +│ └── tools +│ └── phispy- +│ ├── .tsv +│ ├── logs +│ │ ├── .log +│ │ ├── nf.command.{begin,err,log,out,run,sh,trace} +│ │ └── versions.yml +│ └── supplemental +│ └── _.gbk.gz +├── GCF_900478275 +│ └── tools +│ └── phispy- +│ ├── GCF_900478275.tsv +│ ├── logs +│ │ ├── GCF_900478275.log +│ │ ├── nf.command.{begin,err,log,out,run,sh,trace} +│ │ └── versions.yml +│ └── supplemental +│ └── GCF_900478275_GCF_900478275.gbk.gz +└── bactopia-runs + └── phispy- + ├── merged-results + │ ├── logs + │ │ └── phispy-concat + │ │ ├── nf.command.{begin,err,log,out,run,sh,trace} + │ │ └── versions.yml + │ └── phispy.tsv + └── nf-reports + ├── phispy-dag.dot + ├── phispy-report.html + └── phispy-timeline.html +``` + +### Per-Sample Results + +| File | Description | +|------|-------------| +| `*.txt` | Tab-delimited file containing prophage predictions | + +### Merged Results + +| File | Description | +|------|-------------| +| `phispy.tsv` | Merged TSV file containing prophage predictions from all samples | + + +### Audit Trail + +Below are files that can assist you in understanding which parameters and program versions were used. + +#### Logs + +Each process that is executed will have a folder named `logs`. In this folder are helpful +files for you to review if the need ever arises. + +| Extension | Description | +|--------------|-------------| +| .begin | An empty file used to designate the process started | +| .err | Contains STDERR outputs from the process | +| .log | Contains both STDERR and STDOUT outputs from the process | +| .out | Contains STDOUT outputs from the process | +| .run | The script Nextflow uses to stage/unstage files and queue processes based on given profile | +| .sh | The script executed by bash for the process | +| .trace | The Nextflow trace report for the process | +| versions.yml | A YAML formatted file with program versions | + +#### Nextflow Reports + +These Nextflow reports provide great a great summary of your run. These can be used to optimize +resource usage and estimate expected costs if using cloud platforms. + +| Filename | Description | +|----------|-------------| +| phispy-dag.dot | The Nextflow [DAG visualization](https://docs.seqera.io/nextflow/reports#workflow-diagram) | +| phispy-report.html | The Nextflow [Execution Report](https://docs.seqera.io/nextflow/reports#execution-report) | +| phispy-timeline.html | The Nextflow [Timeline Report](https://docs.seqera.io/nextflow/reports#execution-timeline) | +| phispy-trace.txt | The Nextflow [Trace](https://docs.seqera.io/nextflow/reports#trace-file) report | + +## Parameters + +### Required Parameters + +Define where the pipeline should find input data and save output data. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--bactopia` | string | | The path to bactopia results to use as inputs | + +### PhiSpy Parameters + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--phispy_number` | integer | `5` | Number of consecutive genes in a region of window size that must be prophage genes to be called | +| `--phispy_mincontigsize` | integer | `5000` | Minimum contig size (in bp) to be included in the analysis. Smaller contigs will be dropped. | +| `--phispy_opts` | string | | Extra options in quotes for PhiSpy | + +
+Filtering Parameters + +Use these parameters to specify which samples to include or exclude. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--include` | string | | A text file containing sample names (one per line) to include from the analysis | +| `--exclude` | string | | A text file containing sample names (one per line) to exclude from the analysis | +
+ +
+Optional Parameters + +These optional parameters can be useful in certain settings. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--outdir` | string | `bactopia` | Base directory to write results to | +
+ +
+Nextflow Profile Parameters + +Parameters to fine-tune your Nextflow setup. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--datasets_cache` | string | `/.bactopia/datasets` | Directory where downloaded datasets should be stored. | +
+ +
+Helpful Parameters + +Uncommonly used parameters that might be useful. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--wf` | string | `bactopia` | Specify which workflow or Bactopia Tool to execute | +| `--list_wfs` | boolean | | List the available workflows and Bactopia Tools to use with '--wf' | +| `--help_all` | boolean | | An alias for --help --show_hidden_params | +| `--version` | boolean | | Display version text. | +
+ +## Composition + +This workflow uses the following subworkflows: + +- [phispy](/developers/subworkflows/phispy) - Prediction of prophages from bacterial genomes. + +## Citations + +If you use this in your analysis, please cite the following. + +- [Bactopia](https://bactopia.github.io/) + Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) + +- [PhiSpy](https://github.com/linsalrob/PhiSpy) + Akhter S, Aziz RK, and Edwards RA [PhiSpy: a novel algorithm for finding prophages in bacterial genomes that combines similarity- and composition-based strategies.](https://doi.org/10.1093/nar/gks406) _Nucleic Acids Research_, 40(16), e126. (2012) + +## Source + +[View source on GitHub](https://github.com/bactopia/bactopia/tree/main/workflows/bactopia-tools/phispy) diff --git a/bactopia-tools/plasmidfinder.mdx b/bactopia-tools/plasmidfinder.mdx new file mode 100644 index 00000000..d9259865 --- /dev/null +++ b/bactopia-tools/plasmidfinder.mdx @@ -0,0 +1,193 @@ +--- +title: plasmidfinder +description: "Bactopia Tool: Plasmidfinder." +tags: + - plasmid + - identification + - replicon + - typing + - assembly + - bactopia-tool +--- + +# plasmidfinder + +**Tags:** plasmid identification replicon typing assembly bactopia-tool + +Bactopia Tool: Plasmidfinder. + +Plasmid identification from assemblies +The `plasmidfinder` module identifies plasmids in total or partial sequenced isolates of bacteria. + +## Usage + +Bactopia CLI: + +```bash +bactopia --wf plasmidfinder \ + --bactopia /path/to/your/bactopia/results +``` + +Nextflow: + +```bash +nextflow run bactopia/bactopia/workflows/bactopia-tools/plasmidfinder/main.nf \ + --bactopia /path/to/your/bactopia/results +``` + +## Outputs + +### Expected Output Files + +``` + +├── +│ └── tools +│ └── plasmidfinder- +│ ├── -hit_in_genome_seq.fsa.gz +│ ├── -plasmid_seqs.fsa.gz +│ ├── .json +│ ├── .tsv +│ ├── .txt +│ └── logs +│ ├── nf.command.{begin,err,log,out,run,sh,trace} +│ └── versions.yml +└── bactopia-runs + └── plasmidfinder- + ├── merged-results + │ ├── logs + │ │ └── plasmidfinder-concat + │ │ ├── nf.command.{begin,err,log,out,run,sh,trace} + │ │ └── versions.yml + │ └── plasmidfinder.tsv + └── nf-reports + ├── plasmidfinder-dag.dot + ├── plasmidfinder-report.html + └── plasmidfinder-timeline.html +``` + +### Per-Sample Results + +| File | Description | +|------|-------------| +| `*` | Analysis results | + +### Merged Results + +| File | Description | +|------|-------------| +| `merged-*` | Aggregated results from all samples | + + +### Audit Trail + +Below are files that can assist you in understanding which parameters and program versions were used. + +#### Logs + +Each process that is executed will have a folder named `logs`. In this folder are helpful +files for you to review if the need ever arises. + +| Extension | Description | +|--------------|-------------| +| .begin | An empty file used to designate the process started | +| .err | Contains STDERR outputs from the process | +| .log | Contains both STDERR and STDOUT outputs from the process | +| .out | Contains STDOUT outputs from the process | +| .run | The script Nextflow uses to stage/unstage files and queue processes based on given profile | +| .sh | The script executed by bash for the process | +| .trace | The Nextflow trace report for the process | +| versions.yml | A YAML formatted file with program versions | + +#### Nextflow Reports + +These Nextflow reports provide great a great summary of your run. These can be used to optimize +resource usage and estimate expected costs if using cloud platforms. + +| Filename | Description | +|----------|-------------| +| plasmidfinder-dag.dot | The Nextflow [DAG visualization](https://docs.seqera.io/nextflow/reports#workflow-diagram) | +| plasmidfinder-report.html | The Nextflow [Execution Report](https://docs.seqera.io/nextflow/reports#execution-report) | +| plasmidfinder-timeline.html | The Nextflow [Timeline Report](https://docs.seqera.io/nextflow/reports#execution-timeline) | +| plasmidfinder-trace.txt | The Nextflow [Trace](https://docs.seqera.io/nextflow/reports#trace-file) report | + +## Parameters + +### Required Parameters + +Define where the pipeline should find input data and save output data. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--bactopia` | string | | The path to bactopia results to use as inputs | + +### PlasmidFinder Parameters + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--plasmidfinder_mincov` | number | `0.6` | Minimum percent coverage to be considered a hit | +| `--plasmidfinder_threshold` | number | `0.9` | Minimum threshold for identity | + +
+Filtering Parameters + +Use these parameters to specify which samples to include or exclude. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--include` | string | | A text file containing sample names (one per line) to include from the analysis | +| `--exclude` | string | | A text file containing sample names (one per line) to exclude from the analysis | +
+ +
+Optional Parameters + +These optional parameters can be useful in certain settings. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--outdir` | string | `bactopia` | Base directory to write results to | +
+ +
+Nextflow Profile Parameters + +Parameters to fine-tune your Nextflow setup. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--datasets_cache` | string | `/.bactopia/datasets` | Directory where downloaded datasets should be stored. | +
+ +
+Helpful Parameters + +Uncommonly used parameters that might be useful. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--wf` | string | `bactopia` | Specify which workflow or Bactopia Tool to execute | +| `--list_wfs` | boolean | | List the available workflows and Bactopia Tools to use with '--wf' | +| `--help_all` | boolean | | An alias for --help --show_hidden_params | +| `--version` | boolean | | Display version text. | +
+ +## Composition + +This workflow uses the following subworkflows: + +- [plasmidfinder](/developers/subworkflows/plasmidfinder) - Identify plasmid replicons in bacterial genome assemblies. + +## Citations + +If you use this in your analysis, please cite the following. + +- [Bactopia](https://bactopia.github.io/) + Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) + +- [PlasmidFinder](https://bitbucket.org/genomicepidemiology/plasmidfinder) + Carattoli A, Zankari E, García-Fernández A, Voldby Larsen M, Lund O, Villa L, Møller Aarestrup F, Hasman H [In silico detection and typing of plasmids using PlasmidFinder and plasmid multilocus sequence typing.](https://doi.org/10.1128/AAC.02412-14) _Antimicrobial Agents and Chemotherapy_ 58(7), 3895-3903. (2014) + +## Source + +[View source on GitHub](https://github.com/bactopia/bactopia/tree/main/workflows/bactopia-tools/plasmidfinder) diff --git a/bactopia-tools/pneumocat.mdx b/bactopia-tools/pneumocat.mdx new file mode 100644 index 00000000..376fa37c --- /dev/null +++ b/bactopia-tools/pneumocat.mdx @@ -0,0 +1,171 @@ +--- +title: pneumocat +description: "Capsular type assignment to Streptococcus pneumoniae from sequence reads." +tags: + - streptococcus-pneumoniae + - capsular-typing + - pneumocat + - bactopia-tool +--- + +# pneumocat + +**Tags:** streptococcus-pneumoniae capsular-typing pneumocat bactopia-tool + +Capsular type assignment to Streptococcus pneumoniae from sequence reads. + +This Bactopia Tool uses [PneumoCaT](https://github.com/ukhsa-collaboration/PneumoCaT) to assign capsular +type to _Streptococcus pneumoniae_ from sequence reads for epidemiological typing. + +## Usage + +Bactopia CLI: + +```bash +bactopia --wf pneumocat \ + --bactopia /path/to/your/bactopia/results +``` + +Nextflow: + +```bash +nextflow run bactopia/bactopia/workflows/bactopia-tools/pneumocat/main.nf \ + --bactopia /path/to/your/bactopia/results +``` + +## Outputs + +### Expected Output Files + +``` + +├── +│ └── tools +│ └── pneumocat- +│ ├── .coverage_summary.txt +│ ├── .xml +│ └── logs +│ ├── nf.command.{begin,err,log,out,run,sh,trace} +│ ├── pneumo_capsular_typing.stderr.log +│ ├── pneumo_capsular_typing.stdout.log +│ └── versions.yml +└── bactopia-runs + └── pneumocat- + └── nf-reports + ├── pneumocat-dag.dot + ├── pneumocat-report.html + └── pneumocat-timeline.html +``` + +### Per-Sample Results + +| File | Description | +|------|-------------| +| `*.txt` | Capsular type assignment results | + + +### Audit Trail + +Below are files that can assist you in understanding which parameters and program versions were used. + +#### Logs + +Each process that is executed will have a folder named `logs`. In this folder are helpful +files for you to review if the need ever arises. + +| Extension | Description | +|--------------|-------------| +| .begin | An empty file used to designate the process started | +| .err | Contains STDERR outputs from the process | +| .log | Contains both STDERR and STDOUT outputs from the process | +| .out | Contains STDOUT outputs from the process | +| .run | The script Nextflow uses to stage/unstage files and queue processes based on given profile | +| .sh | The script executed by bash for the process | +| .trace | The Nextflow trace report for the process | +| versions.yml | A YAML formatted file with program versions | + +#### Nextflow Reports + +These Nextflow reports provide great a great summary of your run. These can be used to optimize +resource usage and estimate expected costs if using cloud platforms. + +| Filename | Description | +|----------|-------------| +| pneumocat-dag.dot | The Nextflow [DAG visualization](https://docs.seqera.io/nextflow/reports#workflow-diagram) | +| pneumocat-report.html | The Nextflow [Execution Report](https://docs.seqera.io/nextflow/reports#execution-report) | +| pneumocat-timeline.html | The Nextflow [Timeline Report](https://docs.seqera.io/nextflow/reports#execution-timeline) | +| pneumocat-trace.txt | The Nextflow [Trace](https://docs.seqera.io/nextflow/reports#trace-file) report | + +## Parameters + +### Required Parameters + +Define where the pipeline should find input data and save output data. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--bactopia` | string | | The path to bactopia results to use as inputs | + +
+Filtering Parameters + +Use these parameters to specify which samples to include or exclude. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--include` | string | | A text file containing sample names (one per line) to include from the analysis | +| `--exclude` | string | | A text file containing sample names (one per line) to exclude from the analysis | +
+ +
+Optional Parameters + +These optional parameters can be useful in certain settings. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--outdir` | string | `bactopia` | Base directory to write results to | +
+ +
+Nextflow Profile Parameters + +Parameters to fine-tune your Nextflow setup. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--datasets_cache` | string | `/.bactopia/datasets` | Directory where downloaded datasets should be stored. | +
+ +
+Helpful Parameters + +Uncommonly used parameters that might be useful. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--wf` | string | `bactopia` | Specify which workflow or Bactopia Tool to execute | +| `--list_wfs` | boolean | | List the available workflows and Bactopia Tools to use with '--wf' | +| `--help_all` | boolean | | An alias for --help --show_hidden_params | +| `--version` | boolean | | Display version text. | +
+ +## Composition + +This workflow uses the following subworkflows: + +- [pneumocat](/developers/subworkflows/pneumocat) - Perform capsular typing of Streptococcus pneumoniae from NGS data. + +## Citations + +If you use this in your analysis, please cite the following. + +- [Bactopia](https://bactopia.github.io/) + Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) + +- [PneumoCaT](https://github.com/ukhsa-collaboration/PneumoCaT) + Kapatai G, Sheppard CL, Al-Shahib A, Litt DJ, Underwood AP, Harrison TG, and Fry NK [Whole genome sequencing of Streptococcus pneumoniae: development, evaluation and verification of targets for serogroup and serotype prediction using an automated pipeline.](https://doi.org/10.7717/peerj.2477) PeerJ, 4, e2477. (2016) + +## Source + +[View source on GitHub](https://github.com/bactopia/bactopia/tree/main/workflows/bactopia-tools/pneumocat) diff --git a/bactopia-tools/prokka.mdx b/bactopia-tools/prokka.mdx new file mode 100644 index 00000000..27ea1f31 --- /dev/null +++ b/bactopia-tools/prokka.mdx @@ -0,0 +1,208 @@ +--- +title: prokka +description: "Rapid whole genome annotation of bacterial, archaeal, and viral genomes." +tags: + - annotation + - genome + - prokaryote + - functional-annotation + - genes + - bactopia-tool +--- + +# prokka + +**Tags:** annotation genome prokaryote functional-annotation genes bactopia-tool + +Rapid whole genome annotation of bacterial, archaeal, and viral genomes. + +This Bactopia Tool uses [Prokka](https://github.com/tseemann/prokka) to rapidly annotate small genomes +in a standardized fashion. It identifies protein-coding genes, rRNA, tRNA, and other features, +then searches them against multiple reference databases to provide comprehensive functional annotation. + +## Usage + +Bactopia CLI: + +```bash +bactopia --wf prokka \ + --bactopia /path/to/your/bactopia/results +``` + +Nextflow: + +```bash +nextflow run bactopia/bactopia/workflows/bactopia-tools/prokka/main.nf \ + --bactopia /path/to/your/bactopia/results +``` + +## Outputs + +### Expected Output Files + +``` + +├── +│ └── main +│ └── annotator +│ └── prokka- +│ ├── -blastdb.tar.gz +│ ├── .faa.gz +│ ├── .ffn.gz +│ ├── .fna.gz +│ ├── .fsa.gz +│ ├── .gbk.gz +│ ├── .gff.gz +│ ├── .sqn.gz +│ ├── .tbl.gz +│ ├── .tsv +│ ├── .txt +│ └── logs +│ ├── .err +│ ├── .log +│ ├── nf.command.{begin,err,log,out,run,sh,trace} +│ └── versions.yml +└── bactopia-runs + └── prokka- + └── nf-reports + ├── prokka-dag.dot + ├── prokka-report.html + └── prokka-timeline.html +``` + +### Per-Sample Results + +| File | Description | +|------|-------------| +| `*.gff` | Genome annotation in GFF3 format containing sequences and annotations | +| `*.gbk` | Genome annotation in GenBank format containing sequences and annotations | +| `*.faa` | Protein FASTA file of translated CDS sequences | +| `*.fna` | Nucleotide FASTA file of input contig sequences | +| `*.ffn` | Nucleotide FASTA file of all predicted transcripts | +| `*.fsa` | Nucleotide FASTA file of predicted protein sequences | +| `*.sqn` | ASN1 format Sequin file for GenBank submission | +| `*.tbl` | Feature Table file for GenBank submission | +| `*.tsv` | Tab-separated file of all features with functional information | +| `*.txt` | Statistics report of annotated features | +| `*.blastdb.tar.gz` | BLAST+ database archive of contigs, genes, and proteins | + +### Merged Results + +| File | Description | +|------|-------------| +| `prokka.tsv` | Merged TSV file containing annotation summaries from all samples | + + +### Audit Trail + +Below are files that can assist you in understanding which parameters and program versions were used. + +#### Logs + +Each process that is executed will have a folder named `logs`. In this folder are helpful +files for you to review if the need ever arises. + +| Extension | Description | +|--------------|-------------| +| .begin | An empty file used to designate the process started | +| .err | Contains STDERR outputs from the process | +| .log | Contains both STDERR and STDOUT outputs from the process | +| .out | Contains STDOUT outputs from the process | +| .run | The script Nextflow uses to stage/unstage files and queue processes based on given profile | +| .sh | The script executed by bash for the process | +| .trace | The Nextflow trace report for the process | +| versions.yml | A YAML formatted file with program versions | + +#### Nextflow Reports + +These Nextflow reports provide great a great summary of your run. These can be used to optimize +resource usage and estimate expected costs if using cloud platforms. + +| Filename | Description | +|----------|-------------| +| prokka-dag.dot | The Nextflow [DAG visualization](https://docs.seqera.io/nextflow/reports#workflow-diagram) | +| prokka-report.html | The Nextflow [Execution Report](https://docs.seqera.io/nextflow/reports#execution-report) | +| prokka-timeline.html | The Nextflow [Timeline Report](https://docs.seqera.io/nextflow/reports#execution-timeline) | +| prokka-trace.txt | The Nextflow [Trace](https://docs.seqera.io/nextflow/reports#trace-file) report | + +## Parameters + +### Required Parameters + +Define where the pipeline should find input data and save output data. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--bactopia` | string | | The path to bactopia results to use as inputs | + +### Prokka Parameters + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--prokka_proteins` | string | `${projectDir}/data/proteins.faa` | FASTA file of trusted proteins to first annotate from | +| `--prokka_prodigal_tf` | string | | Training file to use for Prodigal | +| `--prokka_coverage` | integer | `80` | Minimum coverage on query protein | + +
+Filtering Parameters + +Use these parameters to specify which samples to include or exclude. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--include` | string | | A text file containing sample names (one per line) to include from the analysis | +| `--exclude` | string | | A text file containing sample names (one per line) to exclude from the analysis | +
+ +
+Optional Parameters + +These optional parameters can be useful in certain settings. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--outdir` | string | `bactopia` | Base directory to write results to | +
+ +
+Nextflow Profile Parameters + +Parameters to fine-tune your Nextflow setup. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--datasets_cache` | string | `/.bactopia/datasets` | Directory where downloaded datasets should be stored. | +
+ +
+Helpful Parameters + +Uncommonly used parameters that might be useful. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--wf` | string | `bactopia` | Specify which workflow or Bactopia Tool to execute | +| `--list_wfs` | boolean | | List the available workflows and Bactopia Tools to use with '--wf' | +| `--help_all` | boolean | | An alias for --help --show_hidden_params | +| `--version` | boolean | | Display version text. | +
+ +## Composition + +This workflow uses the following subworkflows: + +- [prokka](/developers/subworkflows/prokka) - Annotate bacterial genomes with functional information. + +## Citations + +If you use this in your analysis, please cite the following. + +- [Bactopia](https://bactopia.github.io/) + Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) + +- [Prokka](https://github.com/tseemann/prokka) + Seemann T [Prokka: rapid prokaryotic genome annotation](http://dx.doi.org/10.1093/bioinformatics/btu153) _Bioinformatics_ 30, 2068-2069 (2014) + +## Source + +[View source on GitHub](https://github.com/bactopia/bactopia/tree/main/workflows/bactopia-tools/prokka) diff --git a/bactopia-tools/quast.mdx b/bactopia-tools/quast.mdx new file mode 100644 index 00000000..294d0b82 --- /dev/null +++ b/bactopia-tools/quast.mdx @@ -0,0 +1,221 @@ +--- +title: quast +description: "Quality assessment of assembled contigs using QUAST." +tags: + - assembly + - quality + - assessment + - metrics + - bactopia-tool +--- + +# quast + +**Tags:** assembly quality assessment metrics bactopia-tool + +Quality assessment of assembled contigs using QUAST. + +This Bactopia Tool uses [QUAST](https://github.com/ablab/quast) to evaluate the quality +of assembled contigs. QUAST (Quality Assessment Tool for Genome Assemblies) generates +comprehensive reports including numerous plots and tables that help assess assembly +quality metrics such as N50, GC content, genome fraction, and misassembly rates. +It produces both per-sample assessments and merged summaries for comparative analysis +across multiple samples. + +## Usage + +Bactopia CLI: + +```bash +bactopia --wf quast \ + --bactopia /path/to/your/bactopia/results +``` + +Nextflow: + +```bash +nextflow run bactopia/bactopia/workflows/bactopia-tools/quast/main.nf \ + --bactopia /path/to/your/bactopia/results +``` + +## Outputs + +### Expected Output Files + +``` + +├── +│ └── tools +│ └── quast- +│ ├── .tsv +│ ├── logs +│ │ ├── nf.command.{begin,err,log,out,run,sh,trace} +│ │ ├── quast.log +│ │ └── versions.yml +│ └── supplemental +│ ├── basic_stats +│ │ ├── _GC_content_plot.pdf +│ │ ├── GC_content_plot.pdf +│ │ ├── NGx_plot.pdf +│ │ ├── Nx_plot.pdf +│ │ └── cumulative_plot.pdf +│ ├── icarus.html +│ ├── icarus_viewers +│ │ └── contig_size_viewer.html +│ ├── predicted_genes +│ │ ├── _glimmer.stderr +│ │ └── _glimmer_genes.gff +│ ├── report.html +│ ├── report.pdf +│ ├── report.tex +│ ├── report.tsv +│ ├── report.txt +│ ├── transposed_report.tex +│ └── transposed_report.txt +└── bactopia-runs + └── quast- + ├── merged-results + │ ├── logs + │ │ └── quast-concat + │ │ ├── nf.command.{begin,err,log,out,run,sh,trace} + │ │ └── versions.yml + │ └── quast.tsv + └── nf-reports + ├── quast-dag.dot + ├── quast-report.html + └── quast-timeline.html +``` + +### Per-Sample Results + +| File | Description | +|------|-------------| +| `*.tsv` | Summary statistics of QUAST assessment for each sample | +| `basic_stats/` | Directory containing plots of assembly metrics (GC content, NGx, Nx) | +| `icarus.html` | Icarus main menu with links to interactive viewers | +| `icarus_viewers/` | Additional reports and viewers for Icarus | +| `predicted_genes/` | Directory containing predicted gene information | +| `report.*` | Assessment summary in various formats (html, pdf, tex, tsv, txt) | +| `transposed_report.*` | Transposed version of the assessment summary (tex, tsv, txt) | + +### Merged Results + +| File | Description | +|------|-------------| +| `quast.tsv` | Merged TSV file with QUAST summary statistics from all samples | + + +### Audit Trail + +Below are files that can assist you in understanding which parameters and program versions were used. + +#### Logs + +Each process that is executed will have a folder named `logs`. In this folder are helpful +files for you to review if the need ever arises. + +| Extension | Description | +|--------------|-------------| +| .begin | An empty file used to designate the process started | +| .err | Contains STDERR outputs from the process | +| .log | Contains both STDERR and STDOUT outputs from the process | +| .out | Contains STDOUT outputs from the process | +| .run | The script Nextflow uses to stage/unstage files and queue processes based on given profile | +| .sh | The script executed by bash for the process | +| .trace | The Nextflow trace report for the process | +| versions.yml | A YAML formatted file with program versions | + +#### Nextflow Reports + +These Nextflow reports provide great a great summary of your run. These can be used to optimize +resource usage and estimate expected costs if using cloud platforms. + +| Filename | Description | +|----------|-------------| +| quast-dag.dot | The Nextflow [DAG visualization](https://docs.seqera.io/nextflow/reports#workflow-diagram) | +| quast-report.html | The Nextflow [Execution Report](https://docs.seqera.io/nextflow/reports#execution-report) | +| quast-timeline.html | The Nextflow [Timeline Report](https://docs.seqera.io/nextflow/reports#execution-timeline) | +| quast-trace.txt | The Nextflow [Trace](https://docs.seqera.io/nextflow/reports#trace-file) report | + +## Parameters + +### Required Parameters + +Define where the pipeline should find input data and save output data. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--bactopia` | string | | The path to bactopia results to use as inputs | + +### Quast Parameters + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--quast_contig_thresholds` | string | `0,1000,10000,100000,250000,1000000` | Comma-separated list of contig length thresholds | + +
+Filtering Parameters + +Use these parameters to specify which samples to include or exclude. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--include` | string | | A text file containing sample names (one per line) to include from the analysis | +| `--exclude` | string | | A text file containing sample names (one per line) to exclude from the analysis | +
+ +
+Optional Parameters + +These optional parameters can be useful in certain settings. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--outdir` | string | `bactopia` | Base directory to write results to | +
+ +
+Nextflow Profile Parameters + +Parameters to fine-tune your Nextflow setup. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--datasets_cache` | string | `/.bactopia/datasets` | Directory where downloaded datasets should be stored. | +
+ +
+Helpful Parameters + +Uncommonly used parameters that might be useful. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--wf` | string | `bactopia` | Specify which workflow or Bactopia Tool to execute | +| `--list_wfs` | boolean | | List the available workflows and Bactopia Tools to use with '--wf' | +| `--help_all` | boolean | | An alias for --help --show_hidden_params | +| `--version` | boolean | | Display version text. | +
+ +## Composition + +This workflow uses the following subworkflows: + +- [quast](/developers/subworkflows/quast) - Evaluate assembly quality using QUAST. + +## Citations + +If you use this in your analysis, please cite the following. + +- [Bactopia](https://bactopia.github.io/) + Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) + +- [csvtk](https://bioinf.shenwei.me/csvtk/) + Shen, W [csvtk: A cross-platform, efficient and practical CSV/TSV toolkit in Golang.](https://github.com/shenwei356/csvtk/) (GitHub) + +- [QUAST](http://quast.sourceforge.net/) + Gurevich A, Saveliev V, Vyahhi N, Tesler G [QUAST: quality assessment tool for genome assemblies.](http://dx.doi.org/10.1093/bioinformatics/btt086) _Bioinformatics_ 29, 1072-1075 (2013) + +## Source + +[View source on GitHub](https://github.com/bactopia/bactopia/tree/main/workflows/bactopia-tools/quast) diff --git a/bactopia-tools/rgi.mdx b/bactopia-tools/rgi.mdx new file mode 100644 index 00000000..2f2b962a --- /dev/null +++ b/bactopia-tools/rgi.mdx @@ -0,0 +1,214 @@ +--- +title: rgi +description: "Prediction of antibiotic resistance genes using RGI." +tags: + - bacteria + - antibiotic-resistance + - card + - resistance-genes + - bactopia-tool +--- + +# rgi + +**Tags:** bacteria antibiotic-resistance card resistance-genes bactopia-tool + +Prediction of antibiotic resistance genes using RGI. + +This Bactopia Tool uses [Resistance Gene Identifier (RGI)](https://github.com/arpcard/rgi) to identify +and characterize antibiotic resistance genes in bacterial assemblies. RGI integrates with the +Comprehensive Antibiotic Resistance Database (CARD) to provide high-confidence predictions +of resistance determinants, including perfect and strict hits to known resistance genes, +as well as loose hits for novel variants. The tool generates detailed reports in both +JSON and TSV formats, along with heatmap visualizations for comparative analysis +across multiple samples. + +## Usage + +Bactopia CLI: + +```bash +bactopia --wf rgi \ + --bactopia /path/to/your/bactopia/results +``` + +Nextflow: + +```bash +nextflow run bactopia/bactopia/workflows/bactopia-tools/rgi/main.nf \ + --bactopia /path/to/your/bactopia/results +``` + +## Outputs + +### Expected Output Files + +``` + +├── +│ └── tools +│ └── rgi_main +│ ├── .json +│ ├── .tsv +│ └── logs +│ ├── nf.command.{begin,err,log,out,run,sh,trace} +│ └── versions.yml +├── GCF_900478275 +│ └── tools +│ └── rgi_main +│ ├── GCF_900478275.json +│ ├── GCF_900478275.tsv +│ └── logs +│ ├── nf.command.{begin,err,log,out,run,sh,trace} +│ └── versions.yml +└── bactopia-runs + └── rgi- + ├── merged-results + │ ├── logs + │ │ └── rgi-concat + │ │ ├── nf.command.{begin,err,log,out,run,sh,trace} + │ │ └── versions.yml + │ └── rgi.tsv + ├── nf-reports + │ ├── rgi-dag.dot + │ ├── rgi-report.html + │ └── rgi-timeline.html + └── rgi-heatmap + ├── logs + │ └── rgi-heatmap + │ ├── nf.command.{begin,err,log,out,run,sh,trace} + │ └── versions.yml + ├── rgi-2.csv + ├── rgi-2.eps + └── rgi-2.png +``` + +### Per-Sample Results + +| File | Description | +|------|-------------| +| `*.json` | JSON report containing detailed RGI results for each sample | +| `*.txt` | Tab-delimited report of RGI results for each sample | + +### Merged Results + +| File | Description | +|------|-------------| +| `rgi.tsv` | Merged TSV file containing RGI results from all samples | +| `rgi-2.{csv,png}` | Heatmap representations of resistance genes across all samples | + + +### Audit Trail + +Below are files that can assist you in understanding which parameters and program versions were used. + +#### Logs + +Each process that is executed will have a folder named `logs`. In this folder are helpful +files for you to review if the need ever arises. + +| Extension | Description | +|--------------|-------------| +| .begin | An empty file used to designate the process started | +| .err | Contains STDERR outputs from the process | +| .log | Contains both STDERR and STDOUT outputs from the process | +| .out | Contains STDOUT outputs from the process | +| .run | The script Nextflow uses to stage/unstage files and queue processes based on given profile | +| .sh | The script executed by bash for the process | +| .trace | The Nextflow trace report for the process | +| versions.yml | A YAML formatted file with program versions | + +#### Nextflow Reports + +These Nextflow reports provide great a great summary of your run. These can be used to optimize +resource usage and estimate expected costs if using cloud platforms. + +| Filename | Description | +|----------|-------------| +| rgi-dag.dot | The Nextflow [DAG visualization](https://docs.seqera.io/nextflow/reports#workflow-diagram) | +| rgi-report.html | The Nextflow [Execution Report](https://docs.seqera.io/nextflow/reports#execution-report) | +| rgi-timeline.html | The Nextflow [Timeline Report](https://docs.seqera.io/nextflow/reports#execution-timeline) | +| rgi-trace.txt | The Nextflow [Trace](https://docs.seqera.io/nextflow/reports#trace-file) report | + +## Parameters + +### Required Parameters + +Define where the pipeline should find input data and save output data. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--bactopia` | string | | The path to bactopia results to use as inputs | + +### RGI Main Parameters + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--rgi_use_diamond` | boolean | `false` | Use DIAMOND for alignments instead of BLAST | + +
+Filtering Parameters + +Use these parameters to specify which samples to include or exclude. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--include` | string | | A text file containing sample names (one per line) to include from the analysis | +| `--exclude` | string | | A text file containing sample names (one per line) to exclude from the analysis | +
+ +
+Optional Parameters + +These optional parameters can be useful in certain settings. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--outdir` | string | `bactopia` | Base directory to write results to | +
+ +
+Nextflow Profile Parameters + +Parameters to fine-tune your Nextflow setup. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--datasets_cache` | string | `/.bactopia/datasets` | Directory where downloaded datasets should be stored. | +
+ +
+Helpful Parameters + +Uncommonly used parameters that might be useful. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--wf` | string | `bactopia` | Specify which workflow or Bactopia Tool to execute | +| `--list_wfs` | boolean | | List the available workflows and Bactopia Tools to use with '--wf' | +| `--help_all` | boolean | | An alias for --help --show_hidden_params | +| `--version` | boolean | | Display version text. | +
+ +## Composition + +This workflow uses the following subworkflows: + +- [rgi](/developers/subworkflows/rgi) - Predict antimicrobial resistance from protein or nucleotide data. + +## Citations + +If you use this in your analysis, please cite the following. + +- [Bactopia](https://bactopia.github.io/) + Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) + +- [csvtk](https://bioinf.shenwei.me/csvtk/) + Shen, W [csvtk: A cross-platform, efficient and practical CSV/TSV toolkit in Golang.](https://github.com/shenwei356/csvtk/) (GitHub) + +- [Resistance Gene Identifier (RGI)](https://github.com/arpcard/rgi) + Alcock BP, Raphenya AR, Lau TTY, Tsang KK, Bouchard M, Edalatmand A, Huynh W, Nguyen A-L V, Cheng AA, Liu S, Min SY, Miroshnichenko A, Tran H-K, Werfalli RE, Nasir JA, Oloni M, Speicher DJ, Florescu A, Singh B, Faltyn M, Hernandez-Koutoucheva A, Sharma AN, Bordeleau E, Pawlowski AC, Zubyk HL, Dooley D, Griffiths E, Maguire F, Winsor GL, Beiko RG, Brinkman FSL, Hsiao WWL, Domselaar GV, McArthur AG [CARD 2020: antibiotic resistome surveillance with the comprehensive antibiotic resistance database.](https://doi.org/10.1093/nar/gkz935) _Nucleic acids research_ 48.D1, D517-D525 (2020) + +## Source + +[View source on GitHub](https://github.com/bactopia/bactopia/tree/main/workflows/bactopia-tools/rgi) diff --git a/bactopia-tools/sccmec.mdx b/bactopia-tools/sccmec.mdx new file mode 100644 index 00000000..545e7002 --- /dev/null +++ b/bactopia-tools/sccmec.mdx @@ -0,0 +1,206 @@ +--- +title: sccmec +description: "Typing of SCCmec cassettes in Staphylococcus aureus assemblies." +tags: + - resistance + - staphylococcus-aureus + - mrsa + - sccmec + - typing + - bactopia-tool +--- + +# sccmec + +**Tags:** resistance staphylococcus-aureus mrsa sccmec typing bactopia-tool + +Typing of SCCmec cassettes in Staphylococcus aureus assemblies. + +This Bactopia Tool uses [sccmec](https://github.com/rpetit3/sccmec) to identify and type +Staphylococcal Cassette Chromosome mec (SCCmec) elements in *Staphylococcus aureus* assemblies. +SCCmec cassettes are mobile genetic elements that carry the mecA gene and other methicillin +resistance determinants. The tool performs BLAST searches against target-specific sequences +and full cassette references to determine SCCmec types and subtypes, providing detailed +reports of BLAST hits and type predictions for epidemiological surveillance of MRSA. + +## Usage + +Bactopia CLI: + +```bash +bactopia --wf sccmec \ + --bactopia /path/to/your/bactopia/results +``` + +Nextflow: + +```bash +nextflow run bactopia/bactopia/workflows/bactopia-tools/sccmec/main.nf \ + --bactopia /path/to/your/bactopia/results +``` + +## Outputs + +### Expected Output Files + +``` + +├── +│ └── tools +│ └── sccmec- +│ ├── .regions.blastn.tsv +│ ├── .regions.details.tsv +│ ├── .targets.blastn.tsv +│ ├── .targets.details.tsv +│ ├── .tsv +│ └── logs +│ ├── nf.command.{begin,err,log,out,run,sh,trace} +│ └── versions.yml +└── bactopia-runs + └── sccmec- + ├── merged-results + │ ├── logs + │ │ └── sccmec-concat + │ │ ├── nf.command.{begin,err,log,out,run,sh,trace} + │ │ └── versions.yml + │ └── sccmec.tsv + └── nf-reports + ├── sccmec-dag.dot + ├── sccmec-report.html + └── sccmec-timeline.html +``` + +### Per-Sample Results + +| File | Description | +|------|-------------| +| `*.tsv` | Summary report of predicted SCCmec type for each sample | +| `*.targets.blastn.tsv` | Tab-delimited file of all target-specific BLAST hits | +| `*.targets.details.tsv` | Detailed breakdown of type predictions based on target hits | +| `*.regions.blastn.tsv` | Tab-delimited file of all full cassette BLAST hits | +| `*.regions.details.tsv` | Detailed breakdown of type predictions based on full cassettes | + +### Merged Results + +| File | Description | +|------|-------------| +| `sccmec.tsv` | Merged TSV file containing SCCmec typing results from all samples | + + +### Audit Trail + +Below are files that can assist you in understanding which parameters and program versions were used. + +#### Logs + +Each process that is executed will have a folder named `logs`. In this folder are helpful +files for you to review if the need ever arises. + +| Extension | Description | +|--------------|-------------| +| .begin | An empty file used to designate the process started | +| .err | Contains STDERR outputs from the process | +| .log | Contains both STDERR and STDOUT outputs from the process | +| .out | Contains STDOUT outputs from the process | +| .run | The script Nextflow uses to stage/unstage files and queue processes based on given profile | +| .sh | The script executed by bash for the process | +| .trace | The Nextflow trace report for the process | +| versions.yml | A YAML formatted file with program versions | + +#### Nextflow Reports + +These Nextflow reports provide great a great summary of your run. These can be used to optimize +resource usage and estimate expected costs if using cloud platforms. + +| Filename | Description | +|----------|-------------| +| sccmec-dag.dot | The Nextflow [DAG visualization](https://docs.seqera.io/nextflow/reports#workflow-diagram) | +| sccmec-report.html | The Nextflow [Execution Report](https://docs.seqera.io/nextflow/reports#execution-report) | +| sccmec-timeline.html | The Nextflow [Timeline Report](https://docs.seqera.io/nextflow/reports#execution-timeline) | +| sccmec-trace.txt | The Nextflow [Trace](https://docs.seqera.io/nextflow/reports#trace-file) report | + +## Parameters + +### Required Parameters + +Define where the pipeline should find input data and save output data. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--bactopia` | string | | The path to bactopia results to use as inputs | + +### sccmec Parameters + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--sccmec_min_targets_pident` | integer | `90` | Minimum percent identity to count a target hit | +| `--sccmec_min_targets_coverage` | integer | `80` | Minimum percent coverage to count a target hit | +| `--sccmec_min_regions_pident` | integer | `85` | Minimum percent identity to count a region hit | +| `--sccmec_min_regions_coverage` | integer | `93` | Minimum percent coverage to count a region hit | + +
+Filtering Parameters + +Use these parameters to specify which samples to include or exclude. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--include` | string | | A text file containing sample names (one per line) to include from the analysis | +| `--exclude` | string | | A text file containing sample names (one per line) to exclude from the analysis | +
+ +
+Optional Parameters + +These optional parameters can be useful in certain settings. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--outdir` | string | `bactopia` | Base directory to write results to | +
+ +
+Nextflow Profile Parameters + +Parameters to fine-tune your Nextflow setup. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--datasets_cache` | string | `/.bactopia/datasets` | Directory where downloaded datasets should be stored. | +
+ +
+Helpful Parameters + +Uncommonly used parameters that might be useful. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--wf` | string | `bactopia` | Specify which workflow or Bactopia Tool to execute | +| `--list_wfs` | boolean | | List the available workflows and Bactopia Tools to use with '--wf' | +| `--help_all` | boolean | | An alias for --help --show_hidden_params | +| `--version` | boolean | | Display version text. | +
+ +## Composition + +This workflow uses the following subworkflows: + +- [sccmec](/developers/subworkflows/sccmec) - Identify SCCmec elements in Staphylococcus aureus genomes. + +## Citations + +If you use this in your analysis, please cite the following. + +- [Bactopia](https://bactopia.github.io/) + Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) + +- [csvtk](https://bioinf.shenwei.me/csvtk/) + Shen, W [csvtk: A cross-platform, efficient and practical CSV/TSV toolkit in Golang.](https://github.com/shenwei356/csvtk/) (GitHub) + +- [sccmec](https://github.com/rpetit3/sccmec) + Petit III RA, Read TD [sccmec: A tool for typing SCCmec cassettes in assemblies](https://github.com/rpetit3/sccmec) (GitHub) + +## Source + +[View source on GitHub](https://github.com/bactopia/bactopia/tree/main/workflows/bactopia-tools/sccmec) diff --git a/bactopia-tools/scrubber.mdx b/bactopia-tools/scrubber.mdx new file mode 100644 index 00000000..c227e0c3 --- /dev/null +++ b/bactopia-tools/scrubber.mdx @@ -0,0 +1,215 @@ +--- +title: scrubber +description: "Removal of human and contaminant sequences from metagenomic reads." +tags: + - metagenomics + - decontamination + - human-removal + - read-filtering + - bactopia-tool +--- + +# scrubber + +**Tags:** metagenomics decontamination human-removal read-filtering bactopia-tool + +Removal of human and contaminant sequences from metagenomic reads. + +This Bactopia Tool removes human and other contaminant sequences from metagenomic reads using +either [SRA Human Scrubber](https://github.com/ncbi/sra-human-scrubber) or +[nohuman](https://github.com/mbhall88/nohuman) with the HPRC human database. The tool provides flexible contamination removal +with detailed reporting of read classification and filtering statistics. It processes paired-end +or single-end reads, producing cleaned FASTQ files with human sequences removed and comprehensive +reports documenting the decontamination process. + +## Usage + +Bactopia CLI: + +```bash +bactopia --wf scrubber \ + --bactopia /path/to/your/bactopia/results +``` + +Nextflow: + +```bash +nextflow run bactopia/bactopia/workflows/bactopia-tools/scrubber/main.nf \ + --bactopia /path/to/your/bactopia/results +``` + +## Outputs + +### Expected Output Files + +``` + +├── +│ └── tools +│ └── nohuman +│ ├── .scrub.report.tsv +│ ├── _R1.scrubbed.fastq.gz +│ ├── _R2.scrubbed.fastq.gz +│ └── logs +│ ├── nf.command.{begin,err,log,out,run,sh,trace} +│ └── versions.yml +└── bactopia-runs + └── scrubber- + ├── merged-results + │ ├── logs + │ │ └── scrubber-concat + │ │ ├── nf.command.{begin,err,log,out,run,sh,trace} + │ │ └── versions.yml + │ └── scrubber.tsv + └── nf-reports + ├── scrubber-dag.dot + ├── scrubber-report.html + └── scrubber-timeline.html +``` + +### Per-Sample Results + +| File | Description | +|------|-------------| +| `*.scrubbed.fastq.gz` | Cleaned reads after human sequence removal | +| `*.scrub.report.tsv` | Report of read classification and removal statistics | + +### Merged Results + +| File | Description | +|------|-------------| +| `scrubber.tsv` | Merged TSV file containing scrubber reports from all samples | + + +### Audit Trail + +Below are files that can assist you in understanding which parameters and program versions were used. + +#### Logs + +Each process that is executed will have a folder named `logs`. In this folder are helpful +files for you to review if the need ever arises. + +| Extension | Description | +|--------------|-------------| +| .begin | An empty file used to designate the process started | +| .err | Contains STDERR outputs from the process | +| .log | Contains both STDERR and STDOUT outputs from the process | +| .out | Contains STDOUT outputs from the process | +| .run | The script Nextflow uses to stage/unstage files and queue processes based on given profile | +| .sh | The script executed by bash for the process | +| .trace | The Nextflow trace report for the process | +| versions.yml | A YAML formatted file with program versions | + +#### Nextflow Reports + +These Nextflow reports provide great a great summary of your run. These can be used to optimize +resource usage and estimate expected costs if using cloud platforms. + +| Filename | Description | +|----------|-------------| +| scrubber-dag.dot | The Nextflow [DAG visualization](https://docs.seqera.io/nextflow/reports#workflow-diagram) | +| scrubber-report.html | The Nextflow [Execution Report](https://docs.seqera.io/nextflow/reports#execution-report) | +| scrubber-timeline.html | The Nextflow [Timeline Report](https://docs.seqera.io/nextflow/reports#execution-timeline) | +| scrubber-trace.txt | The Nextflow [Trace](https://docs.seqera.io/nextflow/reports#trace-file) report | + +## Parameters + +### Required Parameters + +Define where the pipeline should find input data and save output data. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--bactopia` | string | | The path to bactopia results to use as inputs | + +### SRA Human Scrubber Parameters + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--use_srascrubber` | boolean | `false` | Use SRAHumanScrubber for scrubbing human reads | + +### Nohuman Download Parameters + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--nohuman_db` | string | | Path to the nohuman database or directory to download it to | +| `--nohuman_db_version` | string | | Database version to download (default: latest HPRC release) | +| `--nohuman_save_as_tarball` | boolean | `false` | Save the nohuman database as a tarball | +| `--download_nohuman` | boolean | `false` | Download the nohuman database to the path given by --nohuman_db | + +### Nohuman Run Parameters + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--nohuman_db` | string | | Path to the nohuman database directory or tarball | +| `--nohuman_confidence` | number | `0.0` | Kraken2 minimum confidence score for classification (0.0-1.0) | +| `--nohuman_human` | boolean | `false` | Invert output to keep only human reads instead of removing them | +| `--nohuman_save_report` | boolean | `false` | Save the Kraken2 classification report | + +
+Filtering Parameters + +Use these parameters to specify which samples to include or exclude. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--include` | string | | A text file containing sample names (one per line) to include from the analysis | +| `--exclude` | string | | A text file containing sample names (one per line) to exclude from the analysis | +
+ +
+Optional Parameters + +These optional parameters can be useful in certain settings. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--outdir` | string | `bactopia` | Base directory to write results to | +
+ +
+Nextflow Profile Parameters + +Parameters to fine-tune your Nextflow setup. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--datasets_cache` | string | `/.bactopia/datasets` | Directory where downloaded datasets should be stored. | +
+ +
+Helpful Parameters + +Uncommonly used parameters that might be useful. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--wf` | string | `bactopia` | Specify which workflow or Bactopia Tool to execute | +| `--list_wfs` | boolean | | List the available workflows and Bactopia Tools to use with '--wf' | +| `--help_all` | boolean | | An alias for --help --show_hidden_params | +| `--version` | boolean | | Display version text. | +
+ +## Composition + +This workflow uses the following subworkflows: + +- [scrubber](/developers/subworkflows/scrubber) - Remove contaminant sequences from metagenomic data. + +## Citations + +If you use this in your analysis, please cite the following. + +- [Bactopia](https://bactopia.github.io/) + Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) + +- [Kraken2](https://github.com/DerrickWood/kraken2) + Wood DE, Lu J, Langmead B [Improved metagenomic analysis with Kraken 2.](https://doi.org/10.1186/s13059-019-1891-0) *Genome Biology*, 20(1), 257. (2019) + +- [SRA Human Scrubber](https://github.com/ncbi/sra-human-scrubber) + Katz KS, Shutov O, Lapoint R, Kimelman M, Brister JR, and O'Sullivan C [STAT: a fast, scalable, MinHash-based k-mer tool to assess Sequence Read Archive next-generation sequence submissions.](https://doi.org/10.1186/s13059-021-02490-0) _Genome Biology_, 22(1), 270 (2021) + +## Source + +[View source on GitHub](https://github.com/bactopia/bactopia/tree/main/workflows/bactopia-tools/scrubber) diff --git a/bactopia-tools/seqsero2.mdx b/bactopia-tools/seqsero2.mdx new file mode 100644 index 00000000..137c3e3f --- /dev/null +++ b/bactopia-tools/seqsero2.mdx @@ -0,0 +1,200 @@ +--- +title: seqsero2 +description: "Salmonella serotype prediction from sequencing reads or assemblies." +tags: + - salmonella + - serotyping + - epidemiology + - o-antigen + - h-antigen + - bactopia-tool +--- + +# seqsero2 + +**Tags:** salmonella serotyping epidemiology o-antigen h-antigen bactopia-tool + +Salmonella serotype prediction from sequencing reads or assemblies. + +This Bactopia Tool uses [SeqSero2](https://github.com/denglab/SeqSero2) to predict Salmonella +serotypes from both raw sequencing reads and assembled genomes. SeqSero2 is a novel pipeline +for determining Salmonella serotypes using raw sequencing reads or assemblies through +k-mer analysis and targeted identification of O and H antigen genes. The tool provides +accurate serotype predictions following the Kaufmann-White scheme, supporting traditional +and molecular serotyping methods for epidemiological surveillance and outbreak investigation. + +## Usage + +Bactopia CLI: + +```bash +bactopia --wf seqsero2 \ + --bactopia /path/to/your/bactopia/results +``` + +Nextflow: + +```bash +nextflow run bactopia/bactopia/workflows/bactopia-tools/seqsero2/main.nf \ + --bactopia /path/to/your/bactopia/results +``` + +## Outputs + +### Expected Output Files + +``` + +├── +│ └── tools +│ └── seqsero2- +│ ├── .tsv +│ ├── .txt +│ └── logs +│ ├── .log +│ ├── nf.command.{begin,err,log,out,run,sh,trace} +│ └── versions.yml +└── bactopia-runs + └── seqsero2- + ├── merged-results + │ ├── logs + │ │ └── seqsero2-concat + │ │ ├── nf.command.{begin,err,log,out,run,sh,trace} + │ │ └── versions.yml + │ └── seqsero2.tsv + └── nf-reports + ├── seqsero2-dag.dot + ├── seqsero2-report.html + └── seqsero2-timeline.html +``` + +### Per-Sample Results + +| File | Description | +|------|-------------| +| `*_result.tsv` | Tab-delimited file with detailed SeqSero2 results for each sample | +| `*_result.txt` | Text file with key-value pairs of SeqSero2 prediction results | +| `*_log.txt` | Detailed log file from SeqSero2 analysis | + +### Merged Results + +| File | Description | +|------|-------------| +| `seqsero2.tsv` | Merged TSV file containing SeqSero2 results from all samples | + + +### Audit Trail + +Below are files that can assist you in understanding which parameters and program versions were used. + +#### Logs + +Each process that is executed will have a folder named `logs`. In this folder are helpful +files for you to review if the need ever arises. + +| Extension | Description | +|--------------|-------------| +| .begin | An empty file used to designate the process started | +| .err | Contains STDERR outputs from the process | +| .log | Contains both STDERR and STDOUT outputs from the process | +| .out | Contains STDOUT outputs from the process | +| .run | The script Nextflow uses to stage/unstage files and queue processes based on given profile | +| .sh | The script executed by bash for the process | +| .trace | The Nextflow trace report for the process | +| versions.yml | A YAML formatted file with program versions | + +#### Nextflow Reports + +These Nextflow reports provide great a great summary of your run. These can be used to optimize +resource usage and estimate expected costs if using cloud platforms. + +| Filename | Description | +|----------|-------------| +| seqsero2-dag.dot | The Nextflow [DAG visualization](https://docs.seqera.io/nextflow/reports#workflow-diagram) | +| seqsero2-report.html | The Nextflow [Execution Report](https://docs.seqera.io/nextflow/reports#execution-report) | +| seqsero2-timeline.html | The Nextflow [Timeline Report](https://docs.seqera.io/nextflow/reports#execution-timeline) | +| seqsero2-trace.txt | The Nextflow [Trace](https://docs.seqera.io/nextflow/reports#trace-file) report | + +## Parameters + +### Required Parameters + +Define where the pipeline should find input data and save output data. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--bactopia` | string | | The path to bactopia results to use as inputs | + +### SeqSero2 Parameters + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--seqsero2_run_mode` | string | `k` | Workflow to run. 'a' allele mode, or 'k' k-mer mode (choices: `a`, `k`) | +| `--seqsero2_input_type` | string | `assembly` | Input format to analyze. 'assembly' or 'fastq' (choices: `assembly`, `fastq`) | + +
+Filtering Parameters + +Use these parameters to specify which samples to include or exclude. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--include` | string | | A text file containing sample names (one per line) to include from the analysis | +| `--exclude` | string | | A text file containing sample names (one per line) to exclude from the analysis | +
+ +
+Optional Parameters + +These optional parameters can be useful in certain settings. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--outdir` | string | `bactopia` | Base directory to write results to | +
+ +
+Nextflow Profile Parameters + +Parameters to fine-tune your Nextflow setup. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--datasets_cache` | string | `/.bactopia/datasets` | Directory where downloaded datasets should be stored. | +
+ +
+Helpful Parameters + +Uncommonly used parameters that might be useful. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--wf` | string | `bactopia` | Specify which workflow or Bactopia Tool to execute | +| `--list_wfs` | boolean | | List the available workflows and Bactopia Tools to use with '--wf' | +| `--help_all` | boolean | | An alias for --help --show_hidden_params | +| `--version` | boolean | | Display version text. | +
+ +## Composition + +This workflow uses the following subworkflows: + +- [seqsero2](/developers/subworkflows/seqsero2) - Predict Salmonella serotypes from genome assemblies. + +## Citations + +If you use this in your analysis, please cite the following. + +- [Bactopia](https://bactopia.github.io/) + Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) + +- [csvtk](https://bioinf.shenwei.me/csvtk/) + Shen, W [csvtk: A cross-platform, efficient and practical CSV/TSV toolkit in Golang.](https://github.com/shenwei356/csvtk/) (GitHub) + +- [SeqSero2](https://github.com/denglab/SeqSero2) + Zhang S, Den-Bakker HC, Li S, Dinsmore BA, Lane C, Lauer AC, Fields PI, Deng X. [SeqSero2: rapid and improved Salmonella serotype determination using whole genome sequencing data.](https://doi.org/10.1128/AEM.01746-19) _Appl Environ Microbiology_ 85(23):e01746-19 (2019) + +## Source + +[View source on GitHub](https://github.com/bactopia/bactopia/tree/main/workflows/bactopia-tools/seqsero2) diff --git a/bactopia-tools/seroba.mdx b/bactopia-tools/seroba.mdx new file mode 100644 index 00000000..b03070e1 --- /dev/null +++ b/bactopia-tools/seroba.mdx @@ -0,0 +1,197 @@ +--- +title: seroba +description: "Serotyping of Streptococcus pneumoniae from Illumina paired-end reads." +tags: + - serotyping + - streptococcus-pneumoniae + - capsule + - cps-locus + - vaccine + - bactopia-tool +--- + +# seroba + +**Tags:** serotyping streptococcus-pneumoniae capsule cps-locus vaccine bactopia-tool + +Serotyping of Streptococcus pneumoniae from Illumina paired-end reads. + +This Bactopia Tool uses [Seroba](https://github.com/sanger-pathogens/seroba) to predict the +serotype of *Streptococcus pneumoniae* samples from raw sequencing reads. Seroba employs +a k-mer based approach to identify and type pneumococcal capsules, determining both the +serotype and serogroup based on the presence of specific capsular polysaccharide synthesis +(cps) locus sequences. The tool is specifically designed for Illumina paired-end reads and +provides accurate serotype predictions essential for epidemiological surveillance and vaccine +development studies. + +## Usage + +Bactopia CLI: + +```bash +bactopia --wf seroba \ + --bactopia /path/to/your/bactopia/results +``` + +Nextflow: + +```bash +nextflow run bactopia/bactopia/workflows/bactopia-tools/seroba/main.nf \ + --bactopia /path/to/your/bactopia/results +``` + +## Outputs + +### Expected Output Files + +``` + +├── +│ └── tools +│ └── seroba- +│ ├── .tsv +│ └── logs +│ ├── nf.command.{begin,err,log,out,run,sh,trace} +│ └── versions.yml +└── bactopia-runs + └── seroba- + ├── merged-results + │ ├── logs + │ │ └── seroba-concat + │ │ ├── nf.command.{begin,err,log,out,run,sh,trace} + │ │ └── versions.yml + │ └── seroba.tsv + └── nf-reports + ├── seroba-dag.dot + ├── seroba-report.html + └── seroba-timeline.html +``` + +### Per-Sample Results + +| File | Description | +|------|-------------| +| `*.tsv` | Tab-delimited file containing predicted serotype for each sample | +| `*detailed_serogroup_info.txt` | Detailed information about serotype prediction and coverage | + +### Merged Results + +| File | Description | +|------|-------------| +| `seroba.tsv` | Merged TSV file containing serotype predictions from all samples | + + +### Audit Trail + +Below are files that can assist you in understanding which parameters and program versions were used. + +#### Logs + +Each process that is executed will have a folder named `logs`. In this folder are helpful +files for you to review if the need ever arises. + +| Extension | Description | +|--------------|-------------| +| .begin | An empty file used to designate the process started | +| .err | Contains STDERR outputs from the process | +| .log | Contains both STDERR and STDOUT outputs from the process | +| .out | Contains STDOUT outputs from the process | +| .run | The script Nextflow uses to stage/unstage files and queue processes based on given profile | +| .sh | The script executed by bash for the process | +| .trace | The Nextflow trace report for the process | +| versions.yml | A YAML formatted file with program versions | + +#### Nextflow Reports + +These Nextflow reports provide great a great summary of your run. These can be used to optimize +resource usage and estimate expected costs if using cloud platforms. + +| Filename | Description | +|----------|-------------| +| seroba-dag.dot | The Nextflow [DAG visualization](https://docs.seqera.io/nextflow/reports#workflow-diagram) | +| seroba-report.html | The Nextflow [Execution Report](https://docs.seqera.io/nextflow/reports#execution-report) | +| seroba-timeline.html | The Nextflow [Timeline Report](https://docs.seqera.io/nextflow/reports#execution-timeline) | +| seroba-trace.txt | The Nextflow [Trace](https://docs.seqera.io/nextflow/reports#trace-file) report | + +## Parameters + +### Required Parameters + +Define where the pipeline should find input data and save output data. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--bactopia` | string | | The path to bactopia results to use as inputs | + +### SeroBA Parameters + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--seroba_coverage` | integer | `20` | Threshold for k-mer coverage of the reference sequence | + +
+Filtering Parameters + +Use these parameters to specify which samples to include or exclude. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--include` | string | | A text file containing sample names (one per line) to include from the analysis | +| `--exclude` | string | | A text file containing sample names (one per line) to exclude from the analysis | +
+ +
+Optional Parameters + +These optional parameters can be useful in certain settings. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--outdir` | string | `bactopia` | Base directory to write results to | +
+ +
+Nextflow Profile Parameters + +Parameters to fine-tune your Nextflow setup. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--datasets_cache` | string | `/.bactopia/datasets` | Directory where downloaded datasets should be stored. | +
+ +
+Helpful Parameters + +Uncommonly used parameters that might be useful. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--wf` | string | `bactopia` | Specify which workflow or Bactopia Tool to execute | +| `--list_wfs` | boolean | | List the available workflows and Bactopia Tools to use with '--wf' | +| `--help_all` | boolean | | An alias for --help --show_hidden_params | +| `--version` | boolean | | Display version text. | +
+ +## Composition + +This workflow uses the following subworkflows: + +- [seroba](/developers/subworkflows/seroba) - k-mer based pipeline to identify the serotype of Streptococcus pneumoniae. + +## Citations + +If you use this in your analysis, please cite the following. + +- [Bactopia](https://bactopia.github.io/) + Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) + +- [csvtk](https://bioinf.shenwei.me/csvtk/) + Shen, W [csvtk: A cross-platform, efficient and practical CSV/TSV toolkit in Golang.](https://github.com/shenwei356/csvtk/) (GitHub) + +- [Seroba](https://github.com/sanger-pathogens/seroba) + Epping L, van Tonder AJ, Gladstone RA, The Global Pneumococcal Sequencing Consortium, Bentley SD, Page AJ, Keane JA [SeroBA: rapid high-throughput serotyping of Streptococcus pneumoniae from whole genome sequence data.](https://doi.org/10.1099/mgen.0.000186) _Microbial Genomics_, 4(7) (2018) + +## Source + +[View source on GitHub](https://github.com/bactopia/bactopia/tree/main/workflows/bactopia-tools/seroba) diff --git a/bactopia-tools/shigapass.mdx b/bactopia-tools/shigapass.mdx new file mode 100644 index 00000000..fda1491b --- /dev/null +++ b/bactopia-tools/shigapass.mdx @@ -0,0 +1,193 @@ +--- +title: shigapass +description: "Prediction of Shigella serotypes and differentiation from EIEC." +tags: + - shigella + - eiec + - serotyping + - o-antigen + - ipa-genes + - epidemiology + - bactopia-tool +--- + +# shigapass + +**Tags:** shigella eiec serotyping o-antigen ipa-genes epidemiology bactopia-tool + +Prediction of Shigella serotypes and differentiation from EIEC. + +This Bactopia Tool uses [ShigaPass](https://github.com/imanyass/ShigaPass) for in silico +prediction of serotypes in *Shigella* assemblies and differentiation between *Shigella*, +EIEC (Enteroinvasive *E. coli*) and non-*Shigella EIEC strains. ShigaPass analyzes +key antigenic determinants including O-antigen processing genes and invasion plasmid +antigens (ipa genes) to provide accurate serotype predictions following the +White-Kauffmann-Le Minor scheme. This enables rapid serological characterization +essential for epidemiological investigations and outbreak tracking. + +## Usage + +Bactopia CLI: + +```bash +bactopia --wf shigapass \ + --bactopia /path/to/your/bactopia/results +``` + +Nextflow: + +```bash +nextflow run bactopia/bactopia/workflows/bactopia-tools/shigapass/main.nf \ + --bactopia /path/to/your/bactopia/results +``` + +## Outputs + +### Expected Output Files + +``` + +├── +│ └── tools +│ └── shigapass- +│ ├── .tsv +│ ├── logs +│ │ ├── nf.command.{begin,err,log,out,run,sh,trace} +│ │ └── versions.yml +│ └── supplemental +│ └── ShigaPass_summary.csv +└── bactopia-runs + └── shigapass- + ├── merged-results + │ ├── logs + │ │ └── shigapass-concat + │ │ ├── nf.command.{begin,err,log,out,run,sh,trace} + │ │ └── versions.yml + │ └── shigapass.tsv + └── nf-reports + ├── shigapass-dag.dot + ├── shigapass-report.html + └── shigapass-timeline.html +``` + +### Per-Sample Results + +| File | Description | +|------|-------------| +| `*.csv` | CSV file containing predicted Shigella or EIEC serotype for each sample | + +### Merged Results + +| File | Description | +|------|-------------| +| `shigapass.csv` | Merged CSV file containing ShigaPass results from all samples | + + +### Audit Trail + +Below are files that can assist you in understanding which parameters and program versions were used. + +#### Logs + +Each process that is executed will have a folder named `logs`. In this folder are helpful +files for you to review if the need ever arises. + +| Extension | Description | +|--------------|-------------| +| .begin | An empty file used to designate the process started | +| .err | Contains STDERR outputs from the process | +| .log | Contains both STDERR and STDOUT outputs from the process | +| .out | Contains STDOUT outputs from the process | +| .run | The script Nextflow uses to stage/unstage files and queue processes based on given profile | +| .sh | The script executed by bash for the process | +| .trace | The Nextflow trace report for the process | +| versions.yml | A YAML formatted file with program versions | + +#### Nextflow Reports + +These Nextflow reports provide great a great summary of your run. These can be used to optimize +resource usage and estimate expected costs if using cloud platforms. + +| Filename | Description | +|----------|-------------| +| shigapass-dag.dot | The Nextflow [DAG visualization](https://docs.seqera.io/nextflow/reports#workflow-diagram) | +| shigapass-report.html | The Nextflow [Execution Report](https://docs.seqera.io/nextflow/reports#execution-report) | +| shigapass-timeline.html | The Nextflow [Timeline Report](https://docs.seqera.io/nextflow/reports#execution-timeline) | +| shigapass-trace.txt | The Nextflow [Trace](https://docs.seqera.io/nextflow/reports#trace-file) report | + +## Parameters + +### Required Parameters + +Define where the pipeline should find input data and save output data. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--bactopia` | string | | The path to bactopia results to use as inputs | + +
+Filtering Parameters + +Use these parameters to specify which samples to include or exclude. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--include` | string | | A text file containing sample names (one per line) to include from the analysis | +| `--exclude` | string | | A text file containing sample names (one per line) to exclude from the analysis | +
+ +
+Optional Parameters + +These optional parameters can be useful in certain settings. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--outdir` | string | `bactopia` | Base directory to write results to | +
+ +
+Nextflow Profile Parameters + +Parameters to fine-tune your Nextflow setup. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--datasets_cache` | string | `/.bactopia/datasets` | Directory where downloaded datasets should be stored. | +
+ +
+Helpful Parameters + +Uncommonly used parameters that might be useful. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--wf` | string | `bactopia` | Specify which workflow or Bactopia Tool to execute | +| `--list_wfs` | boolean | | List the available workflows and Bactopia Tools to use with '--wf' | +| `--help_all` | boolean | | An alias for --help --show_hidden_params | +| `--version` | boolean | | Display version text. | +
+ +## Composition + +This workflow uses the following subworkflows: + +- [shigapass](/developers/subworkflows/shigapass) - Predict serotypes of Shigella from assemblies. + +## Citations + +If you use this in your analysis, please cite the following. + +- [Bactopia](https://bactopia.github.io/) + Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) + +- [csvtk](https://bioinf.shenwei.me/csvtk/) + Shen, W [csvtk: A cross-platform, efficient and practical CSV/TSV toolkit in Golang.](https://github.com/shenwei356/csvtk/) (GitHub) + +- [shigapass](https://github.com/imanyass/ShigaPass) + Yassine I, Hansen EE, Lefèvre S, Ruckly C, Carle I, Lejay-Collin M, Fabre L, Rafei R, Pardos de la Gandara M, Daboussi F, Shahin A, Weill FX [ShigaPass: an in silico tool predicting Shigella serotypes from whole-genome sequencing assemblies.](https://doi.org/10.1099%2Fmgen.0.000961) _Microb Genomics_ 9(3) (2023) + +## Source + +[View source on GitHub](https://github.com/bactopia/bactopia/tree/main/workflows/bactopia-tools/shigapass) diff --git a/bactopia-tools/shigatyper.mdx b/bactopia-tools/shigatyper.mdx new file mode 100644 index 00000000..46bfed52 --- /dev/null +++ b/bactopia-tools/shigatyper.mdx @@ -0,0 +1,209 @@ +--- +title: shigatyper +description: "Rapid determination of Shigella serotypes from sequencing reads." +tags: + - shigella + - serotyping + - k-mer + - illumina + - nanopore + - epidemiology + - bactopia-tool +--- + +# shigatyper + +**Tags:** shigella serotyping k-mer illumina nanopore epidemiology bactopia-tool + +Rapid determination of Shigella serotypes from sequencing reads. + +This Bactopia Tool uses [ShigaTyper](https://github.com/CFSAN-Biostatistics/shigatyper) to rapidly +determine *Shigella* serotypes from both Illumina (single or paired-end) and Oxford Nanopore +reads. ShigaTyper performs k-mer based analysis targeting specific antigenic determinants +and marker genes to predict serotypes according to the White-Kauffmann-Le Minor classification +scheme. The tool supports multiple sequencing platforms and provides detailed hit statistics +for each target gene, enabling rapid serotype identification for epidemiological investigations +and outbreak response. + +## Usage + +Bactopia CLI: + +```bash +bactopia --wf shigatyper \ + --bactopia /path/to/your/bactopia/results +``` + +Nextflow: + +```bash +nextflow run bactopia/bactopia/workflows/bactopia-tools/shigatyper/main.nf \ + --bactopia /path/to/your/bactopia/results +``` + +## Outputs + +### Expected Output Files + +``` + +├── +│ └── tools +│ └── shigatyper- +│ ├── -hits.tsv +│ ├── .tsv +│ └── logs +│ ├── nf.command.{begin,err,log,out,run,sh,trace} +│ └── versions.yml +├── SE +│ └── tools +│ └── shigatyper- +│ ├── SE-hits.tsv +│ ├── SE.tsv +│ └── logs +│ ├── nf.command.{begin,err,log,out,run,sh,trace} +│ └── versions.yml +├── SRR13039589 +│ └── tools +│ └── shigatyper- +│ ├── SRR13039589-hits.tsv +│ ├── SRR13039589.tsv +│ └── logs +│ ├── nf.command.{begin,err,log,out,run,sh,trace} +│ └── versions.yml +└── bactopia-runs + └── shigatyper- + ├── merged-results + │ ├── logs + │ │ └── shigatyper-concat + │ │ ├── nf.command.{begin,err,log,out,run,sh,trace} + │ │ └── versions.yml + │ └── shigatyper.tsv + └── nf-reports + ├── shigatyper-dag.dot + ├── shigatyper-report.html + └── shigatyper-timeline.html +``` + +### Per-Sample Results + +| File | Description | +|------|-------------| +| `*.tsv` | Tab-delimited file containing predicted Shigella serotype for each sample | +| `*-hits.tsv` | Detailed statistics about each individual gene hit used for serotype prediction | + +### Merged Results + +| File | Description | +|------|-------------| +| `shigatyper.tsv` | Merged TSV file containing ShigaTyper results from all samples | + + +### Audit Trail + +Below are files that can assist you in understanding which parameters and program versions were used. + +#### Logs + +Each process that is executed will have a folder named `logs`. In this folder are helpful +files for you to review if the need ever arises. + +| Extension | Description | +|--------------|-------------| +| .begin | An empty file used to designate the process started | +| .err | Contains STDERR outputs from the process | +| .log | Contains both STDERR and STDOUT outputs from the process | +| .out | Contains STDOUT outputs from the process | +| .run | The script Nextflow uses to stage/unstage files and queue processes based on given profile | +| .sh | The script executed by bash for the process | +| .trace | The Nextflow trace report for the process | +| versions.yml | A YAML formatted file with program versions | + +#### Nextflow Reports + +These Nextflow reports provide great a great summary of your run. These can be used to optimize +resource usage and estimate expected costs if using cloud platforms. + +| Filename | Description | +|----------|-------------| +| shigatyper-dag.dot | The Nextflow [DAG visualization](https://docs.seqera.io/nextflow/reports#workflow-diagram) | +| shigatyper-report.html | The Nextflow [Execution Report](https://docs.seqera.io/nextflow/reports#execution-report) | +| shigatyper-timeline.html | The Nextflow [Timeline Report](https://docs.seqera.io/nextflow/reports#execution-timeline) | +| shigatyper-trace.txt | The Nextflow [Trace](https://docs.seqera.io/nextflow/reports#trace-file) report | + +## Parameters + +### Required Parameters + +Define where the pipeline should find input data and save output data. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--bactopia` | string | | The path to bactopia results to use as inputs | + +
+Filtering Parameters + +Use these parameters to specify which samples to include or exclude. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--include` | string | | A text file containing sample names (one per line) to include from the analysis | +| `--exclude` | string | | A text file containing sample names (one per line) to exclude from the analysis | +
+ +
+Optional Parameters + +These optional parameters can be useful in certain settings. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--outdir` | string | `bactopia` | Base directory to write results to | +
+ +
+Nextflow Profile Parameters + +Parameters to fine-tune your Nextflow setup. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--datasets_cache` | string | `/.bactopia/datasets` | Directory where downloaded datasets should be stored. | +
+ +
+Helpful Parameters + +Uncommonly used parameters that might be useful. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--wf` | string | `bactopia` | Specify which workflow or Bactopia Tool to execute | +| `--list_wfs` | boolean | | List the available workflows and Bactopia Tools to use with '--wf' | +| `--help_all` | boolean | | An alias for --help --show_hidden_params | +| `--version` | boolean | | Display version text. | +
+ +## Composition + +This workflow uses the following subworkflows: + +- [shigatyper](/developers/subworkflows/shigatyper) - Predict serotypes of Shigella from reads or assemblies. + +## Citations + +If you use this in your analysis, please cite the following. + +- [Bactopia](https://bactopia.github.io/) + Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) + +- [csvtk](https://bioinf.shenwei.me/csvtk/) + Shen, W [csvtk: A cross-platform, efficient and practical CSV/TSV toolkit in Golang.](https://github.com/shenwei356/csvtk/) (GitHub) + +- [ShigaTyper](https://github.com/CFSAN-Biostatistics/shigatyper) + Wu Y, Lau HK, Lee T, Lau DK, Payne J [In Silico Serotyping Based on Whole-Genome Sequencing Improves the Accuracy of Shigella Identification.](https://doi.org/10.1128/AEM.00165-19) *Applied and Environmental Microbiology*, 85(7). (2019) + +## Source + +[View source on GitHub](https://github.com/bactopia/bactopia/tree/main/workflows/bactopia-tools/shigatyper) diff --git a/bactopia-tools/shigeifinder.mdx b/bactopia-tools/shigeifinder.mdx new file mode 100644 index 00000000..ee452862 --- /dev/null +++ b/bactopia-tools/shigeifinder.mdx @@ -0,0 +1,187 @@ +--- +title: shigeifinder +description: "In silico serotype prediction for Shigella and Enteroinvasive E. coli (EIEC)." +tags: + - shigella + - eiec + - serotyping + - prediction + - epidemiology + - bactopia-tool +--- + +# shigeifinder + +**Tags:** shigella eiec serotyping prediction epidemiology bactopia-tool + +In silico serotype prediction for Shigella and Enteroinvasive E. coli (EIEC). + +This Bactopia Tool uses [ShigEiFinder](https://github.com/LanLab/ShigEiFinder) to conduct rapid +serotype prediction from genome assemblies. It provides species identification and predicts +the traditional O and H antigens for Shigella and EIEC isolates, enabling epidemiological +tracking and surveillance without requiring traditional serological methods. + +## Usage + +Bactopia CLI: + +```bash +bactopia --wf shigeifinder \ + --bactopia /path/to/your/bactopia/results +``` + +Nextflow: + +```bash +nextflow run bactopia/bactopia/workflows/bactopia-tools/shigeifinder/main.nf \ + --bactopia /path/to/your/bactopia/results +``` + +## Outputs + +### Expected Output Files + +``` + +├── +│ └── tools +│ └── shigeifinder- +│ ├── .tsv +│ └── logs +│ ├── nf.command.{begin,err,log,out,run,sh,trace} +│ └── versions.yml +└── bactopia-runs + └── shigeifinder- + ├── merged-results + │ ├── logs + │ │ └── shigeifinder-concat + │ │ ├── nf.command.{begin,err,log,out,run,sh,trace} + │ │ └── versions.yml + │ └── shigeifinder.tsv + └── nf-reports + ├── shigeifinder-dag.dot + ├── shigeifinder-report.html + └── shigeifinder-timeline.html +``` + +### Per-Sample Results + +| File | Description | +|------|-------------| +| `*.tsv` | Tab-delimited file containing predicted Shigella or EIEC serotype | + +### Merged Results + +| File | Description | +|------|-------------| +| `shigeifinder.tsv` | Merged TSV file containing serotype predictions from all samples | + + +### Audit Trail + +Below are files that can assist you in understanding which parameters and program versions were used. + +#### Logs + +Each process that is executed will have a folder named `logs`. In this folder are helpful +files for you to review if the need ever arises. + +| Extension | Description | +|--------------|-------------| +| .begin | An empty file used to designate the process started | +| .err | Contains STDERR outputs from the process | +| .log | Contains both STDERR and STDOUT outputs from the process | +| .out | Contains STDOUT outputs from the process | +| .run | The script Nextflow uses to stage/unstage files and queue processes based on given profile | +| .sh | The script executed by bash for the process | +| .trace | The Nextflow trace report for the process | +| versions.yml | A YAML formatted file with program versions | + +#### Nextflow Reports + +These Nextflow reports provide great a great summary of your run. These can be used to optimize +resource usage and estimate expected costs if using cloud platforms. + +| Filename | Description | +|----------|-------------| +| shigeifinder-dag.dot | The Nextflow [DAG visualization](https://docs.seqera.io/nextflow/reports#workflow-diagram) | +| shigeifinder-report.html | The Nextflow [Execution Report](https://docs.seqera.io/nextflow/reports#execution-report) | +| shigeifinder-timeline.html | The Nextflow [Timeline Report](https://docs.seqera.io/nextflow/reports#execution-timeline) | +| shigeifinder-trace.txt | The Nextflow [Trace](https://docs.seqera.io/nextflow/reports#trace-file) report | + +## Parameters + +### Required Parameters + +Define where the pipeline should find input data and save output data. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--bactopia` | string | | The path to bactopia results to use as inputs | + +
+Filtering Parameters + +Use these parameters to specify which samples to include or exclude. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--include` | string | | A text file containing sample names (one per line) to include from the analysis | +| `--exclude` | string | | A text file containing sample names (one per line) to exclude from the analysis | +
+ +
+Optional Parameters + +These optional parameters can be useful in certain settings. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--outdir` | string | `bactopia` | Base directory to write results to | +
+ +
+Nextflow Profile Parameters + +Parameters to fine-tune your Nextflow setup. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--datasets_cache` | string | `/.bactopia/datasets` | Directory where downloaded datasets should be stored. | +
+ +
+Helpful Parameters + +Uncommonly used parameters that might be useful. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--wf` | string | `bactopia` | Specify which workflow or Bactopia Tool to execute | +| `--list_wfs` | boolean | | List the available workflows and Bactopia Tools to use with '--wf' | +| `--help_all` | boolean | | An alias for --help --show_hidden_params | +| `--version` | boolean | | Display version text. | +
+ +## Composition + +This workflow uses the following subworkflows: + +- [shigeifinder](/developers/subworkflows/shigeifinder) - Predict serotypes of Shigella and EIEC from assemblies. + +## Citations + +If you use this in your analysis, please cite the following. + +- [Bactopia](https://bactopia.github.io/) + Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) + +- [csvtk](https://bioinf.shenwei.me/csvtk/) + Shen, W [csvtk: A cross-platform, efficient and practical CSV/TSV toolkit in Golang.](https://github.com/shenwei356/csvtk/) (GitHub) + +- [ShigEiFinder](https://github.com/LanLab/ShigEiFinder) + Zhang X, Payne M, Nguyen T, Kaur S, Lan R [Cluster-specific gene markers enhance Shigella and enteroinvasive Escherichia coli in silico serotyping.](https://doi.org/10.1099/mgen.0.000704) Microbial Genomics, 7(12). (2021) + +## Source + +[View source on GitHub](https://github.com/bactopia/bactopia/tree/main/workflows/bactopia-tools/shigeifinder) diff --git a/bactopia-tools/sistr.mdx b/bactopia-tools/sistr.mdx new file mode 100644 index 00000000..a3623a60 --- /dev/null +++ b/bactopia-tools/sistr.mdx @@ -0,0 +1,192 @@ +--- +title: sistr +description: "Serovar prediction of Salmonella enterica from assemblies." +tags: + - salmonella + - serovar + - cgmlst + - typing + - sistr + - bactopia-tool +--- + +# sistr + +**Tags:** salmonella serovar cgmlst typing sistr bactopia-tool + +Serovar prediction of Salmonella enterica from assemblies. + +This Bactopia Tool uses [Salmonella In Silico Typing Resource](https://github.com/phac-nml/sistr_cmd), +or SISTR, for serovar prediction of Salmonella enterica assemblies using cgMLST typing +and molecular serovar prediction. + +## Usage + +Bactopia CLI: + +```bash +bactopia --wf sistr \ + --bactopia /path/to/your/bactopia/results +``` + +Nextflow: + +```bash +nextflow run bactopia/bactopia/workflows/bactopia-tools/sistr/main.nf \ + --bactopia /path/to/your/bactopia/results +``` + +## Outputs + +### Expected Output Files + +``` + +├── +│ └── tools +│ └── sistr- +│ ├── -allele.fasta.gz +│ ├── -allele.json.gz +│ ├── -cgmlst.csv +│ ├── .tsv +│ └── logs +│ ├── nf.command.{begin,err,log,out,run,sh,trace} +│ └── versions.yml +└── bactopia-runs + └── sistr- + ├── merged-results + │ ├── logs + │ │ └── sistr-concat + │ │ ├── nf.command.{begin,err,log,out,run,sh,trace} + │ │ └── versions.yml + │ └── sistr.tsv + └── nf-reports + ├── sistr-dag.dot + ├── sistr-report.html + └── sistr-timeline.html +``` + +### Per-Sample Results + +| File | Description | +|------|-------------| +| `*.csv` | SISTR analysis results in CSV format | + +### Merged Results + +| File | Description | +|------|-------------| +| `sistr.tsv` | Merged TSV file containing SISTR results from all samples | + + +### Audit Trail + +Below are files that can assist you in understanding which parameters and program versions were used. + +#### Logs + +Each process that is executed will have a folder named `logs`. In this folder are helpful +files for you to review if the need ever arises. + +| Extension | Description | +|--------------|-------------| +| .begin | An empty file used to designate the process started | +| .err | Contains STDERR outputs from the process | +| .log | Contains both STDERR and STDOUT outputs from the process | +| .out | Contains STDOUT outputs from the process | +| .run | The script Nextflow uses to stage/unstage files and queue processes based on given profile | +| .sh | The script executed by bash for the process | +| .trace | The Nextflow trace report for the process | +| versions.yml | A YAML formatted file with program versions | + +#### Nextflow Reports + +These Nextflow reports provide great a great summary of your run. These can be used to optimize +resource usage and estimate expected costs if using cloud platforms. + +| Filename | Description | +|----------|-------------| +| sistr-dag.dot | The Nextflow [DAG visualization](https://docs.seqera.io/nextflow/reports#workflow-diagram) | +| sistr-report.html | The Nextflow [Execution Report](https://docs.seqera.io/nextflow/reports#execution-report) | +| sistr-timeline.html | The Nextflow [Timeline Report](https://docs.seqera.io/nextflow/reports#execution-timeline) | +| sistr-trace.txt | The Nextflow [Trace](https://docs.seqera.io/nextflow/reports#trace-file) report | + +## Parameters + +### Required Parameters + +Define where the pipeline should find input data and save output data. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--bactopia` | string | | The path to bactopia results to use as inputs | + +### SISTR Parameters + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--sistr_full_cgmlst` | boolean | `false` | Use the full set of cgMLST alleles which can include highly similar alleles | + +
+Filtering Parameters + +Use these parameters to specify which samples to include or exclude. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--include` | string | | A text file containing sample names (one per line) to include from the analysis | +| `--exclude` | string | | A text file containing sample names (one per line) to exclude from the analysis | +
+ +
+Optional Parameters + +These optional parameters can be useful in certain settings. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--outdir` | string | `bactopia` | Base directory to write results to | +
+ +
+Nextflow Profile Parameters + +Parameters to fine-tune your Nextflow setup. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--datasets_cache` | string | `/.bactopia/datasets` | Directory where downloaded datasets should be stored. | +
+ +
+Helpful Parameters + +Uncommonly used parameters that might be useful. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--wf` | string | `bactopia` | Specify which workflow or Bactopia Tool to execute | +| `--list_wfs` | boolean | | List the available workflows and Bactopia Tools to use with '--wf' | +| `--help_all` | boolean | | An alias for --help --show_hidden_params | +| `--version` | boolean | | Display version text. | +
+ +## Composition + +This workflow uses the following subworkflows: + +- [sistr](/developers/subworkflows/sistr) - Salmonella In Silico Typing Resource command-line tool. + +## Citations + +If you use this in your analysis, please cite the following. + +- [Bactopia](https://bactopia.github.io/) + Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) + +- [SISTR](https://github.com/phac-nml/sistr_cmd) + Yoshida CE, Kruczkiewicz P, Laing CR, Lingohr EJ, Gannon VPJ, Nash JHE, Taboada EN [The Salmonella In Silico Typing Resource (SISTR): An Open Web-Accessible Tool for Rapidly Typing and Subtyping Draft Salmonella Genome Assemblies.](https://doi.org/10.1371/journal.pone.0147101) _PloS One_, 11(1), e0147101. (2016) + +## Source + +[View source on GitHub](https://github.com/bactopia/bactopia/tree/main/workflows/bactopia-tools/sistr) diff --git a/bactopia-tools/snippy.mdx b/bactopia-tools/snippy.mdx new file mode 100644 index 00000000..d2ca8092 --- /dev/null +++ b/bactopia-tools/snippy.mdx @@ -0,0 +1,484 @@ +--- +title: snippy +description: "Rapid haplotype variant calling and core genome alignment." +tags: + - snp + - variant-calling + - phylogeny + - core-genome + - snippy + - bactopia-tool +--- + +# snippy + +**Tags:** snp variant-calling phylogeny core-genome snippy bactopia-tool + +Rapid haplotype variant calling and core genome alignment. + +This Bactopia Tool uses [Snippy](https://github.com/tseemann/snippy) to find SNPs between a +reference genome and a set of reads, perform core genome alignment, and generate +phylogenetic trees. It includes optional recombination detection with Gubbins +and phylogenetic tree construction with IQ-Tree. + +## Usage + +Bactopia CLI: + +```bash +bactopia --wf snippy \ + --bactopia /path/to/your/bactopia/results +``` + +Nextflow: + +```bash +nextflow run bactopia/bactopia/workflows/bactopia-tools/snippy/main.nf \ + --bactopia /path/to/your/bactopia/results +``` + +## Outputs + +### Expected Output Files + +``` + +├── +│ └── tools +│ └── snippy- +│ └── GCF_000292685 +│ ├── .aligned.fa.gz +│ ├── .annotated.vcf.gz +│ ├── .bam +│ ├── .bam.bai +│ ├── .bed.gz +│ ├── .consensus.fa.gz +│ ├── .consensus.subs.fa.gz +│ ├── .consensus.subs.masked.fa.gz +│ ├── .coverage.txt.gz +│ ├── .csv.gz +│ ├── .filt.vcf.gz +│ ├── .gff.gz +│ ├── .html +│ ├── .raw.vcf.gz +│ ├── .subs.vcf.gz +│ ├── .tab +│ ├── .txt +│ ├── .vcf.gz +│ └── logs +│ ├── .log +│ ├── nf.command.{begin,err,log,out,run,sh,trace} +│ └── versions.yml +├── ERR6005894 +│ └── tools +│ └── snippy- +│ └── GCF_000292685 +│ ├── ERR6005894.aligned.fa.gz +│ ├── ERR6005894.annotated.vcf.gz +│ ├── ERR6005894.bam +│ ├── ERR6005894.bam.bai +│ ├── ERR6005894.bed.gz +│ ├── ERR6005894.consensus.fa.gz +│ ├── ERR6005894.consensus.subs.fa.gz +│ ├── ERR6005894.consensus.subs.masked.fa.gz +│ ├── ERR6005894.coverage.txt.gz +│ ├── ERR6005894.csv.gz +│ ├── ERR6005894.filt.vcf.gz +│ ├── ERR6005894.gff.gz +│ ├── ERR6005894.html +│ ├── ERR6005894.raw.vcf.gz +│ ├── ERR6005894.subs.vcf.gz +│ ├── ERR6005894.tab +│ ├── ERR6005894.txt +│ ├── ERR6005894.vcf.gz +│ └── logs +│ ├── ERR6005894.log +│ ├── nf.command.{begin,err,log,out,run,sh,trace} +│ └── versions.yml +├── ERR6005894SE +│ └── tools +│ └── snippy- +│ └── GCF_000292685 +│ ├── ERR6005894SE.aligned.fa.gz +│ ├── ERR6005894SE.annotated.vcf.gz +│ ├── ERR6005894SE.bam +│ ├── ERR6005894SE.bam.bai +│ ├── ERR6005894SE.bed.gz +│ ├── ERR6005894SE.consensus.fa.gz +│ ├── ERR6005894SE.consensus.subs.fa.gz +│ ├── ERR6005894SE.consensus.subs.masked.fa.gz +│ ├── ERR6005894SE.coverage.txt.gz +│ ├── ERR6005894SE.csv.gz +│ ├── ERR6005894SE.filt.vcf.gz +│ ├── ERR6005894SE.gff.gz +│ ├── ERR6005894SE.html +│ ├── ERR6005894SE.raw.vcf.gz +│ ├── ERR6005894SE.subs.vcf.gz +│ ├── ERR6005894SE.tab +│ ├── ERR6005894SE.txt +│ ├── ERR6005894SE.vcf.gz +│ └── logs +│ ├── ERR6005894SE.log +│ ├── nf.command.{begin,err,log,out,run,sh,trace} +│ └── versions.yml +├── SRR2838702 +│ └── tools +│ └── snippy- +│ └── GCF_000292685 +│ ├── SRR2838702.aligned.fa.gz +│ ├── SRR2838702.annotated.vcf.gz +│ ├── SRR2838702.bam +│ ├── SRR2838702.bam.bai +│ ├── SRR2838702.bed.gz +│ ├── SRR2838702.consensus.fa.gz +│ ├── SRR2838702.consensus.subs.fa.gz +│ ├── SRR2838702.consensus.subs.masked.fa.gz +│ ├── SRR2838702.coverage.txt.gz +│ ├── SRR2838702.csv.gz +│ ├── SRR2838702.filt.vcf.gz +│ ├── SRR2838702.gff.gz +│ ├── SRR2838702.html +│ ├── SRR2838702.raw.vcf.gz +│ ├── SRR2838702.subs.vcf.gz +│ ├── SRR2838702.tab +│ ├── SRR2838702.txt +│ ├── SRR2838702.vcf.gz +│ └── logs +│ ├── SRR2838702.log +│ ├── nf.command.{begin,err,log,out,run,sh,trace} +│ └── versions.yml +├── SRR2838702SE +│ └── tools +│ └── snippy- +│ └── GCF_000292685 +│ ├── SRR2838702SE.aligned.fa.gz +│ ├── SRR2838702SE.annotated.vcf.gz +│ ├── SRR2838702SE.bam +│ ├── SRR2838702SE.bam.bai +│ ├── SRR2838702SE.bed.gz +│ ├── SRR2838702SE.consensus.fa.gz +│ ├── SRR2838702SE.consensus.subs.fa.gz +│ ├── SRR2838702SE.consensus.subs.masked.fa.gz +│ ├── SRR2838702SE.coverage.txt.gz +│ ├── SRR2838702SE.csv.gz +│ ├── SRR2838702SE.filt.vcf.gz +│ ├── SRR2838702SE.gff.gz +│ ├── SRR2838702SE.html +│ ├── SRR2838702SE.raw.vcf.gz +│ ├── SRR2838702SE.subs.vcf.gz +│ ├── SRR2838702SE.tab +│ ├── SRR2838702SE.txt +│ ├── SRR2838702SE.vcf.gz +│ └── logs +│ ├── SRR2838702SE.log +│ ├── nf.command.{begin,err,log,out,run,sh,trace} +│ └── versions.yml +├── SRR2838702SE_2 +│ └── tools +│ └── snippy- +│ └── GCF_000292685 +│ ├── SRR2838702SE_2.aligned.fa.gz +│ ├── SRR2838702SE_2.annotated.vcf.gz +│ ├── SRR2838702SE_2.bam +│ ├── SRR2838702SE_2.bam.bai +│ ├── SRR2838702SE_2.bed.gz +│ ├── SRR2838702SE_2.consensus.fa.gz +│ ├── SRR2838702SE_2.consensus.subs.fa.gz +│ ├── SRR2838702SE_2.consensus.subs.masked.fa.gz +│ ├── SRR2838702SE_2.coverage.txt.gz +│ ├── SRR2838702SE_2.csv.gz +│ ├── SRR2838702SE_2.filt.vcf.gz +│ ├── SRR2838702SE_2.gff.gz +│ ├── SRR2838702SE_2.html +│ ├── SRR2838702SE_2.raw.vcf.gz +│ ├── SRR2838702SE_2.subs.vcf.gz +│ ├── SRR2838702SE_2.tab +│ ├── SRR2838702SE_2.txt +│ ├── SRR2838702SE_2.vcf.gz +│ └── logs +│ ├── SRR2838702SE_2.log +│ ├── nf.command.{begin,err,log,out,run,sh,trace} +│ └── versions.yml +├── SRR2838702_2 +│ └── tools +│ └── snippy- +│ └── GCF_000292685 +│ ├── SRR2838702_2.aligned.fa.gz +│ ├── SRR2838702_2.annotated.vcf.gz +│ ├── SRR2838702_2.bam +│ ├── SRR2838702_2.bam.bai +│ ├── SRR2838702_2.bed.gz +│ ├── SRR2838702_2.consensus.fa.gz +│ ├── SRR2838702_2.consensus.subs.fa.gz +│ ├── SRR2838702_2.consensus.subs.masked.fa.gz +│ ├── SRR2838702_2.coverage.txt.gz +│ ├── SRR2838702_2.csv.gz +│ ├── SRR2838702_2.filt.vcf.gz +│ ├── SRR2838702_2.gff.gz +│ ├── SRR2838702_2.html +│ ├── SRR2838702_2.raw.vcf.gz +│ ├── SRR2838702_2.subs.vcf.gz +│ ├── SRR2838702_2.tab +│ ├── SRR2838702_2.txt +│ ├── SRR2838702_2.vcf.gz +│ └── logs +│ ├── SRR2838702_2.log +│ ├── nf.command.{begin,err,log,out,run,sh,trace} +│ └── versions.yml +└── bactopia-runs + └── snippy- + ├── GCF_000292685.samples.txt + ├── core-snp-clean.full.aln.gz + ├── core-snp.distance.tsv + ├── core-snp.full.aln.gz + ├── core-snp.masked.aln.gz + ├── core-snp.masked.distance.tsv + ├── gubbins + │ ├── core-snp.branch_base_reconstruction.embl.gz + │ ├── core-snp.filtered_polymorphic_sites.fasta.gz + │ ├── core-snp.filtered_polymorphic_sites.phylip + │ ├── core-snp.final_tree.tre + │ ├── core-snp.node_labelled.final_tree.tre + │ ├── core-snp.per_branch_statistics.csv + │ ├── core-snp.recombination_predictions.embl.gz + │ ├── core-snp.recombination_predictions.gff.gz + │ ├── core-snp.summary_of_snp_distribution.vcf.gz + │ └── logs + │ ├── core-snp.log + │ ├── nf.command.{begin,err,log,out,run,sh,trace} + │ └── versions.yml + ├── nf-reports + │ ├── snippy-dag.dot + │ ├── snippy-report.html + │ └── snippy-timeline.html + ├── snippy-core + │ ├── core-snp.aln.gz + │ ├── core-snp.tab.gz + │ ├── core-snp.txt + │ ├── core-snp.vcf.gz + │ └── logs + │ ├── nf.command.{begin,err,log,out,run,sh,trace} + │ └── versions.yml + ├── snpdists + │ └── logs + │ ├── nf.command.{begin,err,log,out,run,sh,trace} + │ └── versions.yml + └── snpdists-masked + └── logs + ├── nf.command.{begin,err,log,out,run,sh,trace} + └── versions.yml +``` + +### Variant Calling + +| File | Description | +|------|-------------| +| `*.vcf` | Variant calls in VCF format | +| `*.bam` | Alignment file | +| `*.txt` | Snippy summary report | + +### Core Genome Alignment + +| File | Description | +|------|-------------| +| `core.full.aln` | Full core genome alignment | +| `core.snps.aln` | Core SNP alignment | + +### Recombination Analysis + +:::note +Only created if recombination analysis is enabled +::: + +| File | Description | +|------|-------------| +| `*.filtered.aln` | Alignment with recombination regions removed | +| `*.gff` | Recombination predictions | + +### Phylogeny + +:::note +Only created if phylogeny analysis is enabled +::: + +| File | Description | +|------|-------------| +| `*.treefile` | Phylogenetic tree in Newick format | + +### Merged Results + +| File | Description | +|------|-------------| +| `snippy.tsv` | Merged summary of Snippy analyses | + + +### Audit Trail + +Below are files that can assist you in understanding which parameters and program versions were used. + +#### Logs + +Each process that is executed will have a folder named `logs`. In this folder are helpful +files for you to review if the need ever arises. + +| Extension | Description | +|--------------|-------------| +| .begin | An empty file used to designate the process started | +| .err | Contains STDERR outputs from the process | +| .log | Contains both STDERR and STDOUT outputs from the process | +| .out | Contains STDOUT outputs from the process | +| .run | The script Nextflow uses to stage/unstage files and queue processes based on given profile | +| .sh | The script executed by bash for the process | +| .trace | The Nextflow trace report for the process | +| versions.yml | A YAML formatted file with program versions | + +#### Nextflow Reports + +These Nextflow reports provide great a great summary of your run. These can be used to optimize +resource usage and estimate expected costs if using cloud platforms. + +| Filename | Description | +|----------|-------------| +| snippy-dag.dot | The Nextflow [DAG visualization](https://docs.seqera.io/nextflow/reports#workflow-diagram) | +| snippy-report.html | The Nextflow [Execution Report](https://docs.seqera.io/nextflow/reports#execution-report) | +| snippy-timeline.html | The Nextflow [Timeline Report](https://docs.seqera.io/nextflow/reports#execution-timeline) | +| snippy-trace.txt | The Nextflow [Trace](https://docs.seqera.io/nextflow/reports#trace-file) report | + +## Parameters + +### Required Parameters + +Define where the pipeline should find input data and save output data. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--bactopia` | string | | The path to bactopia results to use as inputs | + +### NCBI Genome Download Parameters + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--species` | string | | Name of the species to download assemblies | +| `--accession` | string | | An NCBI Assembly accession to be downloaded | +| `--accessions` | string | | An file of NCBI Assembly accessions (one per line) to be downloaded | +| `--format` | string | `fasta` | Comma separated list of formats to download | +| `--limit` | string | | Limit the number of assemblies to download | +| `--keep_downloads` | boolean | `false` | Save downloaded files into the bactopia-runs folder | + +### Snippy Run Parameters + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--reference` | string | | Reference genome in GenBank format | +| `--snippy_mapqual` | integer | `60` | Minimum read mapping quality to consider | +| `--snippy_basequal` | integer | `13` | Minimum base quality to consider | +| `--snippy_bwaopt` | string | | Extra BWA MEM options, eg. -x pacbio | +| `--snippy_fbopt` | string | | Extra Freebayes options, eg. --theta 1E-6 --read-snp-limit 2 | +| `--snippy_opts` | string | | Extra options in quotes for Snippy | + +### Snippy-Core Parameters + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--snippy_core_maxhap` | integer | `100` | Largest haplotype to decompose | +| `--snippy_core_mask` | string | | BED file of sites to mask | +| `--snippy_core_mask_char` | string | `X` | Masking character | +| `--snippy_core_opts` | string | | Extra options in quotes for snippy-core | + +### SNP-Dists Parameters + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--snpdists_a` | boolean | `false` | Count all differences not just [AGTC] | + +### Gubbins Parameters + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--gubbins_iterations` | integer | `5` | Maximum number of iterations | +| `--gubbins_opts` | string | | Extra Gubbins options in quotes | +| `--skip_recombination` | boolean | `false` | Skip Gubbins execution in subworkflows | + +### IQ-TREE Parameters + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--iqtree_model` | string | `HKY` | Substitution model name | +| `--iqtree_bb` | integer | `1000` | Ultrafast bootstrap replicates | +| `--iqtree_alrt` | integer | `1000` | SH-like approximate likelihood ratio test replicates | +| `--iqtree_asr` | boolean | `false` | Ancestral state reconstruction by empirical Bayes | +| `--skip_phylogeny` | boolean | `false` | Skip IQ-TREE execution in subworkflows | + +
+Filtering Parameters + +Use these parameters to specify which samples to include or exclude. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--include` | string | | A text file containing sample names (one per line) to include from the analysis | +| `--exclude` | string | | A text file containing sample names (one per line) to exclude from the analysis | +
+ +
+Optional Parameters + +These optional parameters can be useful in certain settings. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--outdir` | string | `bactopia` | Base directory to write results to | +
+ +
+Nextflow Profile Parameters + +Parameters to fine-tune your Nextflow setup. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--datasets_cache` | string | `/.bactopia/datasets` | Directory where downloaded datasets should be stored. | +
+ +
+Helpful Parameters + +Uncommonly used parameters that might be useful. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--wf` | string | `bactopia` | Specify which workflow or Bactopia Tool to execute | +| `--list_wfs` | boolean | | List the available workflows and Bactopia Tools to use with '--wf' | +| `--help_all` | boolean | | An alias for --help --show_hidden_params | +| `--version` | boolean | | Display version text. | +
+ +## Composition + +This workflow uses the following subworkflows: + +- [gubbins](/developers/subworkflows/gubbins) - Detect and filter recombination regions in bacterial alignments. +- [iqtree](/developers/subworkflows/iqtree) - Construct maximum likelihood phylogenetic trees from alignments. +- [ncbigenomedownload](/developers/subworkflows/ncbigenomedownload) - Download bacterial genomes from NCBI's RefSeq database. +- [snippy_core](/developers/subworkflows/snippy_core) - Generate core-genome SNP alignment from per-sample Snippy outputs. +- [snippy_run](/developers/subworkflows/snippy_run) - Call variants against a reference genome using Snippy. + +## Citations + +If you use this in your analysis, please cite the following. + +- [Bactopia](https://bactopia.github.io/) + Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) + +- [Snippy](https://github.com/tseemann/snippy) + Seemann T [Snippy: fast bacterial variant calling from NGS reads](https://github.com/tseemann/snippy) (GitHub) + +- [Gubbins](https://github.com/nickjcroucher/gubbins) + Croucher NJ, Page AJ, Connor TR, Delaney AJ, Keane JA, Bentley SD, Parkhill J, Harris SR [Rapid phylogenetic analysis of large samples of recombinant bacterial whole genome sequences using Gubbins.](https://doi.org/10.1093/nar/gku1196) _Nucleic Acids Research_ 43(3), e15. (2015) + +- [IQ-TREE](https://github.com/Cibiv/IQ-TREE) + Nguyen L-T, Schmidt HA, von Haeseler A, Minh BQ [IQ-TREE: A fast and effective stochastic algorithm for estimating maximum likelihood phylogenies.](https://doi.org/10.1093/molbev/msu300) _Mol. Biol. Evol._ 32:268-274 (2015) + +## Source + +[View source on GitHub](https://github.com/bactopia/bactopia/tree/main/workflows/bactopia-tools/snippy) diff --git a/bactopia-tools/spatyper.mdx b/bactopia-tools/spatyper.mdx new file mode 100644 index 00000000..95779125 --- /dev/null +++ b/bactopia-tools/spatyper.mdx @@ -0,0 +1,186 @@ +--- +title: spatyper +description: "spa typing of Staphylococcus aureus assemblies." +tags: + - staphylococcus-aureus + - spa-typing + - epidemiology + - bactopia-tool +--- + +# spatyper + +**Tags:** staphylococcus-aureus spa-typing epidemiology bactopia-tool + +spa typing of Staphylococcus aureus assemblies. + +This Bactopia Tool uses [spaTyper](https://github.com/HCGB-IGTP/spaTyper) to assign +spa types to _Staphylococcus aureus_ assemblies for epidemiological typing. + +## Usage + +Bactopia CLI: + +```bash +bactopia --wf spatyper \ + --bactopia /path/to/your/bactopia/results +``` + +Nextflow: + +```bash +nextflow run bactopia/bactopia/workflows/bactopia-tools/spatyper/main.nf \ + --bactopia /path/to/your/bactopia/results +``` + +## Outputs + +### Expected Output Files + +``` + +├── +│ └── tools +│ └── spatyper- +│ ├── .tsv +│ └── logs +│ ├── nf.command.{begin,err,log,out,run,sh,trace} +│ └── versions.yml +└── bactopia-runs + └── spatyper- + ├── merged-results + │ ├── logs + │ │ └── spatyper-concat + │ │ ├── nf.command.{begin,err,log,out,run,sh,trace} + │ │ └── versions.yml + │ └── spatyper.tsv + └── nf-reports + ├── spatyper-dag.dot + ├── spatyper-report.html + └── spatyper-timeline.html +``` + +### Per-Sample Results + +| File | Description | +|------|-------------| +| `*.txt` | spa typing results for each sample | + +### Merged Results + +| File | Description | +|------|-------------| +| `spatyper.tsv` | Merged TSV file containing spaTyper results from all samples | + + +### Audit Trail + +Below are files that can assist you in understanding which parameters and program versions were used. + +#### Logs + +Each process that is executed will have a folder named `logs`. In this folder are helpful +files for you to review if the need ever arises. + +| Extension | Description | +|--------------|-------------| +| .begin | An empty file used to designate the process started | +| .err | Contains STDERR outputs from the process | +| .log | Contains both STDERR and STDOUT outputs from the process | +| .out | Contains STDOUT outputs from the process | +| .run | The script Nextflow uses to stage/unstage files and queue processes based on given profile | +| .sh | The script executed by bash for the process | +| .trace | The Nextflow trace report for the process | +| versions.yml | A YAML formatted file with program versions | + +#### Nextflow Reports + +These Nextflow reports provide great a great summary of your run. These can be used to optimize +resource usage and estimate expected costs if using cloud platforms. + +| Filename | Description | +|----------|-------------| +| spatyper-dag.dot | The Nextflow [DAG visualization](https://docs.seqera.io/nextflow/reports#workflow-diagram) | +| spatyper-report.html | The Nextflow [Execution Report](https://docs.seqera.io/nextflow/reports#execution-report) | +| spatyper-timeline.html | The Nextflow [Timeline Report](https://docs.seqera.io/nextflow/reports#execution-timeline) | +| spatyper-trace.txt | The Nextflow [Trace](https://docs.seqera.io/nextflow/reports#trace-file) report | + +## Parameters + +### Required Parameters + +Define where the pipeline should find input data and save output data. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--bactopia` | string | | The path to bactopia results to use as inputs | + +### spaTyper Parameters + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--spatyper_do_enrich` | boolean | `false` | Do PCR product enrichment | + +
+Filtering Parameters + +Use these parameters to specify which samples to include or exclude. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--include` | string | | A text file containing sample names (one per line) to include from the analysis | +| `--exclude` | string | | A text file containing sample names (one per line) to exclude from the analysis | +
+ +
+Optional Parameters + +These optional parameters can be useful in certain settings. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--outdir` | string | `bactopia` | Base directory to write results to | +
+ +
+Nextflow Profile Parameters + +Parameters to fine-tune your Nextflow setup. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--datasets_cache` | string | `/.bactopia/datasets` | Directory where downloaded datasets should be stored. | +
+ +
+Helpful Parameters + +Uncommonly used parameters that might be useful. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--wf` | string | `bactopia` | Specify which workflow or Bactopia Tool to execute | +| `--list_wfs` | boolean | | List the available workflows and Bactopia Tools to use with '--wf' | +| `--help_all` | boolean | | An alias for --help --show_hidden_params | +| `--version` | boolean | | Display version text. | +
+ +## Composition + +This workflow uses the following subworkflows: + +- [spatyper](/developers/subworkflows/spatyper) - Predict spa types of Staphylococcus aureus from genome assemblies. + +## Citations + +If you use this in your analysis, please cite the following. + +- [Bactopia](https://bactopia.github.io/) + Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) + +- [spaTyper](https://github.com/HCGB-IGTP/spaTyper) + Sanchez-Herrero JF, and Sullivan M [spaTyper: Staphylococcal protein A (spa) characterization pipeline](http://doi.org/10.5281/zenodo.4063625). Zenodo. (2020) + +## Source + +[View source on GitHub](https://github.com/bactopia/bactopia/tree/main/workflows/bactopia-tools/spatyper) diff --git a/bactopia-tools/ssuissero.mdx b/bactopia-tools/ssuissero.mdx new file mode 100644 index 00000000..5aedd1b7 --- /dev/null +++ b/bactopia-tools/ssuissero.mdx @@ -0,0 +1,180 @@ +--- +title: ssuissero +description: "Serotype prediction of Streptococcus suis assemblies." +tags: + - streptococcus-suis + - serotyping + - fasta + - bactopia-tool +--- + +# ssuissero + +**Tags:** streptococcus-suis serotyping fasta bactopia-tool + +Serotype prediction of Streptococcus suis assemblies. + +This Bactopia Tool uses [SsuisSero](https://github.com/jimmyliu1326/SsuisSero) to predict +the serotype of _Streptococcus suis_ assemblies. + +## Usage + +Bactopia CLI: + +```bash +bactopia --wf ssuissero \ + --bactopia /path/to/your/bactopia/results +``` + +Nextflow: + +```bash +nextflow run bactopia/bactopia/workflows/bactopia-tools/ssuissero/main.nf \ + --bactopia /path/to/your/bactopia/results +``` + +## Outputs + +### Expected Output Files + +``` + +├── +│ └── tools +│ └── ssuissero- +│ ├── .tsv +│ └── logs +│ ├── nf.command.{begin,err,log,out,run,sh,trace} +│ └── versions.yml +└── bactopia-runs + └── ssuissero- + ├── merged-results + │ ├── logs + │ │ └── ssuissero-concat + │ │ ├── nf.command.{begin,err,log,out,run,sh,trace} + │ │ └── versions.yml + │ └── ssuissero.tsv + └── nf-reports + ├── ssuissero-dag.dot + ├── ssuissero-report.html + └── ssuissero-timeline.html +``` + +### Per-Sample Results + +| File | Description | +|------|-------------| +| `*.txt` | Serotype prediction results | + +### Merged Results + +| File | Description | +|------|-------------| +| `ssuissero.tsv` | Merged TSV file containing SsuisSero results from all samples | + + +### Audit Trail + +Below are files that can assist you in understanding which parameters and program versions were used. + +#### Logs + +Each process that is executed will have a folder named `logs`. In this folder are helpful +files for you to review if the need ever arises. + +| Extension | Description | +|--------------|-------------| +| .begin | An empty file used to designate the process started | +| .err | Contains STDERR outputs from the process | +| .log | Contains both STDERR and STDOUT outputs from the process | +| .out | Contains STDOUT outputs from the process | +| .run | The script Nextflow uses to stage/unstage files and queue processes based on given profile | +| .sh | The script executed by bash for the process | +| .trace | The Nextflow trace report for the process | +| versions.yml | A YAML formatted file with program versions | + +#### Nextflow Reports + +These Nextflow reports provide great a great summary of your run. These can be used to optimize +resource usage and estimate expected costs if using cloud platforms. + +| Filename | Description | +|----------|-------------| +| ssuissero-dag.dot | The Nextflow [DAG visualization](https://docs.seqera.io/nextflow/reports#workflow-diagram) | +| ssuissero-report.html | The Nextflow [Execution Report](https://docs.seqera.io/nextflow/reports#execution-report) | +| ssuissero-timeline.html | The Nextflow [Timeline Report](https://docs.seqera.io/nextflow/reports#execution-timeline) | +| ssuissero-trace.txt | The Nextflow [Trace](https://docs.seqera.io/nextflow/reports#trace-file) report | + +## Parameters + +### Required Parameters + +Define where the pipeline should find input data and save output data. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--bactopia` | string | | The path to bactopia results to use as inputs | + +
+Filtering Parameters + +Use these parameters to specify which samples to include or exclude. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--include` | string | | A text file containing sample names (one per line) to include from the analysis | +| `--exclude` | string | | A text file containing sample names (one per line) to exclude from the analysis | +
+ +
+Optional Parameters + +These optional parameters can be useful in certain settings. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--outdir` | string | `bactopia` | Base directory to write results to | +
+ +
+Nextflow Profile Parameters + +Parameters to fine-tune your Nextflow setup. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--datasets_cache` | string | `/.bactopia/datasets` | Directory where downloaded datasets should be stored. | +
+ +
+Helpful Parameters + +Uncommonly used parameters that might be useful. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--wf` | string | `bactopia` | Specify which workflow or Bactopia Tool to execute | +| `--list_wfs` | boolean | | List the available workflows and Bactopia Tools to use with '--wf' | +| `--help_all` | boolean | | An alias for --help --show_hidden_params | +| `--version` | boolean | | Display version text. | +
+ +## Composition + +This workflow uses the following subworkflows: + +- [ssuissero](/developers/subworkflows/ssuissero) - Predict serotypes of Streptococcus suis from genome assemblies. + +## Citations + +If you use this in your analysis, please cite the following. + +- [Bactopia](https://bactopia.github.io/) + Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) + +- [SsuisSero](https://github.com/jimmyliu1326/SsuisSero) + Lui J [SsuisSero: Rapid _Streptococcus suis_ serotyping](https://github.com/jimmyliu1326/SsuisSero) (GitHub) + +## Source + +[View source on GitHub](https://github.com/bactopia/bactopia/tree/main/workflows/bactopia-tools/ssuissero) diff --git a/bactopia-tools/staphtyper.mdx b/bactopia-tools/staphtyper.mdx new file mode 100644 index 00000000..e3bf6990 --- /dev/null +++ b/bactopia-tools/staphtyper.mdx @@ -0,0 +1,236 @@ +--- +title: staphtyper +description: "Comprehensive typing of Staphylococcus aureus genomes." +tags: + - staphylococcus-aureus + - agr + - spa + - sccmec + - typing + - bactopia-tool +--- + +# staphtyper + +**Tags:** staphylococcus-aureus agr spa sccmec typing bactopia-tool + +Comprehensive typing of Staphylococcus aureus genomes. + +This Bactopia Tool is a subworkflow that includes multiple tools specific for typing +_Staphylococcus aureus_ features. Currently `staphtyper` includes: +1. [AgrVATE](https://github.com/VishnuRaghuram94/AgrVATE) - agr locus type and operon variants +2. [spaTyper](https://github.com/HCGB-IGTP/spaTyper) - spa type +3. [sccmec](https://github.com/rpetit3/sccmec) - SCCmec type + +## Usage + +Bactopia CLI: + +```bash +bactopia --wf staphtyper \ + --bactopia /path/to/your/bactopia/results +``` + +Nextflow: + +```bash +nextflow run bactopia/bactopia/workflows/bactopia-tools/staphtyper/main.nf \ + --bactopia /path/to/your/bactopia/results +``` + +## Outputs + +### Expected Output Files + +``` + +├── +│ └── tools +│ ├── agrvate +│ │ ├── .tsv +│ │ ├── logs +│ │ │ ├── nf.command.{begin,err,log,out,run,sh,trace} +│ │ │ └── versions.yml +│ │ └── supplemental +│ │ ├── -agr_gp.tab +│ │ ├── -blastn_log.txt +│ │ └── .fna-error-report.tab +│ ├── sccmec +│ │ ├── .regions.blastn.tsv +│ │ ├── .regions.details.tsv +│ │ ├── .targets.blastn.tsv +│ │ ├── .targets.details.tsv +│ │ ├── .tsv +│ │ └── logs +│ │ ├── nf.command.{begin,err,log,out,run,sh,trace} +│ │ └── versions.yml +│ └── spatyper +│ ├── .tsv +│ └── logs +│ ├── nf.command.{begin,err,log,out,run,sh,trace} +│ └── versions.yml +└── bactopia-runs + └── staphtyper- + ├── merged-results + │ ├── agrvate.tsv + │ ├── logs + │ │ ├── agrvate-concat + │ │ │ ├── nf.command.{begin,err,log,out,run,sh,trace} + │ │ │ └── versions.yml + │ │ ├── sccmec-concat + │ │ │ ├── nf.command.{begin,err,log,out,run,sh,trace} + │ │ │ └── versions.yml + │ │ └── spatyper-concat + │ │ ├── nf.command.{begin,err,log,out,run,sh,trace} + │ │ └── versions.yml + │ ├── sccmec.tsv + │ └── spatyper.tsv + └── nf-reports + ├── staphtyper-dag.dot + ├── staphtyper-report.html + └── staphtyper-timeline.html +``` + +### Comprehensive Typing + +:::note +Results from all included typing tools +::: + +| File | Description | +|------|-------------| +| `staphtyper.tsv` | Merged summary containing agr, spa, and SCCmec typing results | + + +### Audit Trail + +Below are files that can assist you in understanding which parameters and program versions were used. + +#### Logs + +Each process that is executed will have a folder named `logs`. In this folder are helpful +files for you to review if the need ever arises. + +| Extension | Description | +|--------------|-------------| +| .begin | An empty file used to designate the process started | +| .err | Contains STDERR outputs from the process | +| .log | Contains both STDERR and STDOUT outputs from the process | +| .out | Contains STDOUT outputs from the process | +| .run | The script Nextflow uses to stage/unstage files and queue processes based on given profile | +| .sh | The script executed by bash for the process | +| .trace | The Nextflow trace report for the process | +| versions.yml | A YAML formatted file with program versions | + +#### Nextflow Reports + +These Nextflow reports provide great a great summary of your run. These can be used to optimize +resource usage and estimate expected costs if using cloud platforms. + +| Filename | Description | +|----------|-------------| +| staphtyper-dag.dot | The Nextflow [DAG visualization](https://docs.seqera.io/nextflow/reports#workflow-diagram) | +| staphtyper-report.html | The Nextflow [Execution Report](https://docs.seqera.io/nextflow/reports#execution-report) | +| staphtyper-timeline.html | The Nextflow [Timeline Report](https://docs.seqera.io/nextflow/reports#execution-timeline) | +| staphtyper-trace.txt | The Nextflow [Trace](https://docs.seqera.io/nextflow/reports#trace-file) report | + +## Parameters + +### Required Parameters + +Define where the pipeline should find input data and save output data. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--bactopia` | string | | The path to bactopia results to use as inputs | + +### AgrVATE Parameters + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--agrvate_typing_only` | boolean | `false` | agr typing only. Skips agr operon extraction and frameshift detection | + +### spaTyper Parameters + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--spatyper_do_enrich` | boolean | `false` | Do PCR product enrichment | + +### sccmec Parameters + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--sccmec_min_targets_pident` | integer | `90` | Minimum percent identity to count a target hit | +| `--sccmec_min_targets_coverage` | integer | `80` | Minimum percent coverage to count a target hit | +| `--sccmec_min_regions_pident` | integer | `85` | Minimum percent identity to count a region hit | +| `--sccmec_min_regions_coverage` | integer | `93` | Minimum percent coverage to count a region hit | + +
+Filtering Parameters + +Use these parameters to specify which samples to include or exclude. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--include` | string | | A text file containing sample names (one per line) to include from the analysis | +| `--exclude` | string | | A text file containing sample names (one per line) to exclude from the analysis | +
+ +
+Optional Parameters + +These optional parameters can be useful in certain settings. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--outdir` | string | `bactopia` | Base directory to write results to | +
+ +
+Nextflow Profile Parameters + +Parameters to fine-tune your Nextflow setup. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--datasets_cache` | string | `/.bactopia/datasets` | Directory where downloaded datasets should be stored. | +
+ +
+Helpful Parameters + +Uncommonly used parameters that might be useful. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--wf` | string | `bactopia` | Specify which workflow or Bactopia Tool to execute | +| `--list_wfs` | boolean | | List the available workflows and Bactopia Tools to use with '--wf' | +| `--help_all` | boolean | | An alias for --help --show_hidden_params | +| `--version` | boolean | | Display version text. | +
+ +## Composition + +This workflow uses the following subworkflows: + +- [staphtyper](/developers/subworkflows/staphtyper) - Determine the agr, spa and SCCmec types for _Staphylococcus aureus_ genomes. + +## Citations + +If you use this in your analysis, please cite the following. + +- [Bactopia](https://bactopia.github.io/) + Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) + +- [AgrVATE](https://github.com/VishnuRaghuram94/AgrVATE) + Raghuram V. [AgrVATE: Rapid identification of Staphylococcus aureus agr locus type and agr operon variants.](https://github.com/VishnuRaghuram94/AgrVATE) (GitHub) + +- [spaTyper](https://github.com/HCGB-IGTP/spaTyper) + Sanchez-Herrero JF, and Sullivan M [spaTyper: Staphylococcal protein A (spa) characterization pipeline](http://doi.org/10.5281/zenodo.4063625). Zenodo. (2020) + +- [sccmec](https://github.com/rpetit3/sccmec) + Petit III RA, Read TD [sccmec: A tool for typing SCCmec cassettes in assemblies](https://github.com/rpetit3/sccmec) (GitHub) + +## Source + +[View source on GitHub](https://github.com/bactopia/bactopia/tree/main/workflows/bactopia-tools/staphtyper) diff --git a/bactopia-tools/stecfinder.mdx b/bactopia-tools/stecfinder.mdx new file mode 100644 index 00000000..c6c4181c --- /dev/null +++ b/bactopia-tools/stecfinder.mdx @@ -0,0 +1,205 @@ +--- +title: stecfinder +description: "Serotype identification of Shiga toxin-producing E. coli." +tags: + - stec + - serotype + - e-coli + - shiga-toxin + - bactopia-tool +--- + +# stecfinder + +**Tags:** stec serotype e-coli shiga-toxin bactopia-tool + +Serotype identification of Shiga toxin-producing E. coli. + +This Bactopia Tool uses [STECFinder](https://github.com/LanLab/STECFinder) to identify +the serotype of Shiga toxin-producing _E. coli_ (STEC) from sequencing data. +STECFinder determines the serotype as well as the O-antigen and H-antigens. + +## Usage + +Bactopia CLI: + +```bash +bactopia --wf stecfinder \ + --bactopia /path/to/your/bactopia/results +``` + +Nextflow: + +```bash +nextflow run bactopia/bactopia/workflows/bactopia-tools/stecfinder/main.nf \ + --bactopia /path/to/your/bactopia/results +``` + +## Outputs + +### Expected Output Files + +``` + +├── +│ └── tools +│ └── stecfinder- +│ ├── .tsv +│ └── logs +│ ├── nf.command.{begin,err,log,out,run,sh,trace} +│ └── versions.yml +├── SE +│ └── tools +│ └── stecfinder- +│ ├── SE.tsv +│ └── logs +│ ├── nf.command.{begin,err,log,out,run,sh,trace} +│ └── versions.yml +├── SRR13039589 +│ └── tools +│ └── stecfinder- +│ ├── SRR13039589.tsv +│ └── logs +│ ├── nf.command.{begin,err,log,out,run,sh,trace} +│ └── versions.yml +└── bactopia-runs + └── stecfinder- + ├── merged-results + │ ├── logs + │ │ └── stecfinder-concat + │ │ ├── nf.command.{begin,err,log,out,run,sh,trace} + │ │ └── versions.yml + │ └── stecfinder.tsv + └── nf-reports + ├── stecfinder-dag.dot + ├── stecfinder-report.html + └── stecfinder-timeline.html +``` + +### Per-Sample Results + +| File | Description | +|------|-------------| +| `*.txt` | Serotype identification results | + +### Merged Results + +| File | Description | +|------|-------------| +| `stecfinder.tsv` | Merged TSV file containing STECFinder results from all samples | + + +### Audit Trail + +Below are files that can assist you in understanding which parameters and program versions were used. + +#### Logs + +Each process that is executed will have a folder named `logs`. In this folder are helpful +files for you to review if the need ever arises. + +| Extension | Description | +|--------------|-------------| +| .begin | An empty file used to designate the process started | +| .err | Contains STDERR outputs from the process | +| .log | Contains both STDERR and STDOUT outputs from the process | +| .out | Contains STDOUT outputs from the process | +| .run | The script Nextflow uses to stage/unstage files and queue processes based on given profile | +| .sh | The script executed by bash for the process | +| .trace | The Nextflow trace report for the process | +| versions.yml | A YAML formatted file with program versions | + +#### Nextflow Reports + +These Nextflow reports provide great a great summary of your run. These can be used to optimize +resource usage and estimate expected costs if using cloud platforms. + +| Filename | Description | +|----------|-------------| +| stecfinder-dag.dot | The Nextflow [DAG visualization](https://docs.seqera.io/nextflow/reports#workflow-diagram) | +| stecfinder-report.html | The Nextflow [Execution Report](https://docs.seqera.io/nextflow/reports#execution-report) | +| stecfinder-timeline.html | The Nextflow [Timeline Report](https://docs.seqera.io/nextflow/reports#execution-timeline) | +| stecfinder-trace.txt | The Nextflow [Trace](https://docs.seqera.io/nextflow/reports#trace-file) report | + +## Parameters + +### Required Parameters + +Define where the pipeline should find input data and save output data. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--bactopia` | string | | The path to bactopia results to use as inputs | + +### STECFinder Parameters + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--stecfinder_use_reads` | boolean | `false` | Paired-end Illumina reads will be used instead of assemblies | +| `--stecfinder_hits` | boolean | `false` | Show detailed gene search results | +| `--stecfinder_cutoff` | number | `10.0` | Minimum read coverage for gene to be called | +| `--stecfinder_length` | number | `50.0` | Percentage of gene length needed for positive call | + +
+Filtering Parameters + +Use these parameters to specify which samples to include or exclude. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--include` | string | | A text file containing sample names (one per line) to include from the analysis | +| `--exclude` | string | | A text file containing sample names (one per line) to exclude from the analysis | +
+ +
+Optional Parameters + +These optional parameters can be useful in certain settings. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--outdir` | string | `bactopia` | Base directory to write results to | +
+ +
+Nextflow Profile Parameters + +Parameters to fine-tune your Nextflow setup. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--datasets_cache` | string | `/.bactopia/datasets` | Directory where downloaded datasets should be stored. | +
+ +
+Helpful Parameters + +Uncommonly used parameters that might be useful. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--wf` | string | `bactopia` | Specify which workflow or Bactopia Tool to execute | +| `--list_wfs` | boolean | | List the available workflows and Bactopia Tools to use with '--wf' | +| `--help_all` | boolean | | An alias for --help --show_hidden_params | +| `--version` | boolean | | Display version text. | +
+ +## Composition + +This workflow uses the following subworkflows: + +- [stecfinder](/developers/subworkflows/stecfinder) - Identify and serotype Shiga toxin-producing E. coli (STEC) from assemblies. + +## Citations + +If you use this in your analysis, please cite the following. + +- [Bactopia](https://bactopia.github.io/) + Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) + +- [STECFinder](https://github.com/LanLab/STECFinder) + Zhang X, Payne M, Kaur S, and Lan R [Improved Genomic Identification, Clustering, and Serotyping of Shiga Toxin-Producing Escherichia coli Using Cluster/Serotype-Specific Gene Markers.](https://doi.org/10.3389/fcimb.2021.772574) _Frontiers in Cellular and Infection Microbiology_, 11, 772574. (2021) + +## Source + +[View source on GitHub](https://github.com/bactopia/bactopia/tree/main/workflows/bactopia-tools/stecfinder) diff --git a/bactopia-tools/sylph.mdx b/bactopia-tools/sylph.mdx new file mode 100644 index 00000000..3d1a3591 --- /dev/null +++ b/bactopia-tools/sylph.mdx @@ -0,0 +1,192 @@ +--- +title: sylph +description: "Taxonomic profiling by abundance-corrected MinHash." +tags: + - taxonomic-profiling + - metagenomics + - minhash + - abundance + - sylph + - bactopia-tool +--- + +# sylph + +**Tags:** taxonomic-profiling metagenomics minhash abundance sylph bactopia-tool + +Taxonomic profiling by abundance-corrected MinHash. + +This Bactopia Tool uses [Sylph](https://github.com/bluenote-1577/Sylph) to perform +taxonomic profiling of metagenomic samples using abundance-corrected MinHash sketches +for accurate species-level quantification. + +## Usage + +Bactopia CLI: + +```bash +bactopia --wf sylph \ + --bactopia /path/to/your/bactopia/results +``` + +Nextflow: + +```bash +nextflow run bactopia/bactopia/workflows/bactopia-tools/sylph/main.nf \ + --bactopia /path/to/your/bactopia/results +``` + +## Outputs + +### Expected Output Files + +``` + +├── +│ └── tools +│ └── sylph- +│ ├── .tsv +│ └── logs +│ ├── nf.command.{begin,err,log,out,run,sh,trace} +│ └── versions.yml +└── bactopia-runs + └── sylph- + ├── merged-results + │ ├── logs + │ │ └── sylph-concat + │ │ ├── nf.command.{begin,err,log,out,run,sh,trace} + │ │ └── versions.yml + │ └── sylph.tsv + └── nf-reports + ├── sylph-dag.dot + ├── sylph-report.html + └── sylph-timeline.html +``` + +### Per-Sample Results + +| File | Description | +|------|-------------| +| `*.profile.txt` | Species abundance profile | +| `*.krona.html` | Interactive Krona plot visualization | + +### Merged Results + +| File | Description | +|------|-------------| +| `sylph.tsv` | Merged TSV file containing Sylph profiles from all samples | + + +### Audit Trail + +Below are files that can assist you in understanding which parameters and program versions were used. + +#### Logs + +Each process that is executed will have a folder named `logs`. In this folder are helpful +files for you to review if the need ever arises. + +| Extension | Description | +|--------------|-------------| +| .begin | An empty file used to designate the process started | +| .err | Contains STDERR outputs from the process | +| .log | Contains both STDERR and STDOUT outputs from the process | +| .out | Contains STDOUT outputs from the process | +| .run | The script Nextflow uses to stage/unstage files and queue processes based on given profile | +| .sh | The script executed by bash for the process | +| .trace | The Nextflow trace report for the process | +| versions.yml | A YAML formatted file with program versions | + +#### Nextflow Reports + +These Nextflow reports provide great a great summary of your run. These can be used to optimize +resource usage and estimate expected costs if using cloud platforms. + +| Filename | Description | +|----------|-------------| +| sylph-dag.dot | The Nextflow [DAG visualization](https://docs.seqera.io/nextflow/reports#workflow-diagram) | +| sylph-report.html | The Nextflow [Execution Report](https://docs.seqera.io/nextflow/reports#execution-report) | +| sylph-timeline.html | The Nextflow [Timeline Report](https://docs.seqera.io/nextflow/reports#execution-timeline) | +| sylph-trace.txt | The Nextflow [Trace](https://docs.seqera.io/nextflow/reports#trace-file) report | + +## Parameters + +### Required Parameters + +Define where the pipeline should find input data and save output data. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--bactopia` | string | | The path to bactopia results to use as inputs | + +### Sylph Profile Parameters + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--sylph_db` | string | | The path to a sylph formatted database | +| `--sylph_min_ani` | integer | `95` | Minimum adjusted ANI to consider. Smaller than 95 for profile will give inaccurate results. | +| `--sylph_opts` | string | | Extra options in quotes for Sylph | + +
+Filtering Parameters + +Use these parameters to specify which samples to include or exclude. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--include` | string | | A text file containing sample names (one per line) to include from the analysis | +| `--exclude` | string | | A text file containing sample names (one per line) to exclude from the analysis | +
+ +
+Optional Parameters + +These optional parameters can be useful in certain settings. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--outdir` | string | `bactopia` | Base directory to write results to | +
+ +
+Nextflow Profile Parameters + +Parameters to fine-tune your Nextflow setup. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--datasets_cache` | string | `/.bactopia/datasets` | Directory where downloaded datasets should be stored. | +
+ +
+Helpful Parameters + +Uncommonly used parameters that might be useful. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--wf` | string | `bactopia` | Specify which workflow or Bactopia Tool to execute | +| `--list_wfs` | boolean | | List the available workflows and Bactopia Tools to use with '--wf' | +| `--help_all` | boolean | | An alias for --help --show_hidden_params | +| `--version` | boolean | | Display version text. | +
+ +## Composition + +This workflow uses the following subworkflows: + +- [sylph](/developers/subworkflows/sylph) - Profile microbial composition using Sylph. + +## Citations + +If you use this in your analysis, please cite the following. + +- [Bactopia](https://bactopia.github.io/) + Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) + +- [Sylph](https://github.com/bluenote-1/sylph) + Shaw J, and Yu YW [Rapid species-level metagenome profiling and containment estimation with sylph.](https://doi.org/10.1038/s41587-024-02412-y) _Nature Biotechnology_ (2024) + +## Source + +[View source on GitHub](https://github.com/bactopia/bactopia/tree/main/workflows/bactopia-tools/sylph) diff --git a/bactopia-tools/tblastn.mdx b/bactopia-tools/tblastn.mdx new file mode 100644 index 00000000..7d4345c1 --- /dev/null +++ b/bactopia-tools/tblastn.mdx @@ -0,0 +1,193 @@ +--- +title: tblastn +description: "Search against translated nucleotide databases using protein queries." +tags: + - fasta + - blast + - alignment + - protein + - nucleotide + - bactopia-tool +--- + +# tblastn + +**Tags:** fasta blast alignment protein nucleotide bactopia-tool + +Search against translated nucleotide databases using protein queries. + +This Bactopia Tool uses [TBLASTN](https://blast.ncbi.nlm.nih.gov/Blast.cgi?CMD=Web&PAGE_TYPE=Blastdocs) +to query protein sequences against translated nucleotide databases (contigs) for homology search. + +## Usage + +Bactopia CLI: + +```bash +bactopia --wf tblastn \ + --bactopia /path/to/your/bactopia/results +``` + +Nextflow: + +```bash +nextflow run bactopia/bactopia/workflows/bactopia-tools/tblastn/main.nf \ + --bactopia /path/to/your/bactopia/results +``` + +## Outputs + +### Expected Output Files + +``` + +├── +│ └── tools +│ └── tblastn- +│ ├── .tblastn.tsv +│ └── logs +│ ├── nf.command.{begin,err,log,out,run,sh,trace} +│ └── versions.yml +└── bactopia-runs + └── tblastn- + ├── merged-results + │ ├── logs + │ │ └── tblastn-concat + │ │ ├── nf.command.{begin,err,log,out,run,sh,trace} + │ │ └── versions.yml + │ └── tblastn.tsv + └── nf-reports + ├── tblastn-dag.dot + ├── tblastn-report.html + └── tblastn-timeline.html +``` + +### Per-Sample Results + +| File | Description | +|------|-------------| +| `*.tblastn.tsv` | TBLASTN alignment results in tabular format | +| `*.tblastn.html` | Interactive HTML report of TBLASTN results | + +### Merged Results + +| File | Description | +|------|-------------| +| `tblastn.tsv` | Merged TBLASTN results from all samples | + + +### Audit Trail + +Below are files that can assist you in understanding which parameters and program versions were used. + +#### Logs + +Each process that is executed will have a folder named `logs`. In this folder are helpful +files for you to review if the need ever arises. + +| Extension | Description | +|--------------|-------------| +| .begin | An empty file used to designate the process started | +| .err | Contains STDERR outputs from the process | +| .log | Contains both STDERR and STDOUT outputs from the process | +| .out | Contains STDOUT outputs from the process | +| .run | The script Nextflow uses to stage/unstage files and queue processes based on given profile | +| .sh | The script executed by bash for the process | +| .trace | The Nextflow trace report for the process | +| versions.yml | A YAML formatted file with program versions | + +#### Nextflow Reports + +These Nextflow reports provide great a great summary of your run. These can be used to optimize +resource usage and estimate expected costs if using cloud platforms. + +| Filename | Description | +|----------|-------------| +| tblastn-dag.dot | The Nextflow [DAG visualization](https://docs.seqera.io/nextflow/reports#workflow-diagram) | +| tblastn-report.html | The Nextflow [Execution Report](https://docs.seqera.io/nextflow/reports#execution-report) | +| tblastn-timeline.html | The Nextflow [Timeline Report](https://docs.seqera.io/nextflow/reports#execution-timeline) | +| tblastn-trace.txt | The Nextflow [Trace](https://docs.seqera.io/nextflow/reports#trace-file) report | + +## Parameters + +### Required Parameters + +Define where the pipeline should find input data and save output data. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--bactopia` | string | | The path to bactopia results to use as inputs | + +### TBLASTN Parameters + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--tblastn_query` | string | | A fasta file containing the query sequences to BLAST against the database | +| `--tblastn_outfmt` | string | `sseqid qseqid pident qlen slen length nident positive mismatch gapopen gaps qstart qend sstart send evalue bitscore` | The columns to include with -outfmt 6 | +| `--tblastn_opts` | string | | Additional options to pass to BLASTN | +| `--tblastn_qcov_hsp_perc` | integer | `50` | Percent query coverage per hsp | +| `--tblastn_max_target_seqs` | integer | `2000` | Maximum number of aligned sequences to keep | + +
+Filtering Parameters + +Use these parameters to specify which samples to include or exclude. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--include` | string | | A text file containing sample names (one per line) to include from the analysis | +| `--exclude` | string | | A text file containing sample names (one per line) to exclude from the analysis | +
+ +
+Optional Parameters + +These optional parameters can be useful in certain settings. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--outdir` | string | `bactopia` | Base directory to write results to | +
+ +
+Nextflow Profile Parameters + +Parameters to fine-tune your Nextflow setup. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--datasets_cache` | string | `/.bactopia/datasets` | Directory where downloaded datasets should be stored. | +
+ +
+Helpful Parameters + +Uncommonly used parameters that might be useful. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--wf` | string | `bactopia` | Specify which workflow or Bactopia Tool to execute | +| `--list_wfs` | boolean | | List the available workflows and Bactopia Tools to use with '--wf' | +| `--help_all` | boolean | | An alias for --help --show_hidden_params | +| `--version` | boolean | | Display version text. | +
+ +## Composition + +This workflow uses the following subworkflows: + +- [tblastn](/developers/subworkflows/tblastn) - Search protein query sequences against nucleotide database. + +## Citations + +If you use this in your analysis, please cite the following. + +- [Bactopia](https://bactopia.github.io/) + Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) + +- [BLAST](https://blast.ncbi.nlm.nih.gov/Blast.cgi) + Camacho C, Coulouris G, Avagyan V, Ma N, Papadopoulos J, Bealer K, Madden TL [BLAST+: architecture and applications](http://dx.doi.org/10.1186/1471-2105-10-421). _BMC Bioinformatics_ 10, 421 (2009) + +## Source + +[View source on GitHub](https://github.com/bactopia/bactopia/tree/main/workflows/bactopia-tools/tblastn) diff --git a/bactopia-tools/tblastx.mdx b/bactopia-tools/tblastx.mdx new file mode 100644 index 00000000..69965ed6 --- /dev/null +++ b/bactopia-tools/tblastx.mdx @@ -0,0 +1,194 @@ +--- +title: tblastx +description: "Search against translated nucleotide databases using translated nucleotide queries." +tags: + - fasta + - blast + - alignment + - protein + - translation + - bactopia-tool +--- + +# tblastx + +**Tags:** fasta blast alignment protein translation bactopia-tool + +Search against translated nucleotide databases using translated nucleotide queries. + +This Bactopia Tool uses [TBLASTX](https://blast.ncbi.nlm.nih.gov/Blast.cgi?CMD=Web&PAGE_TYPE=Blastdocs) +to query translated nucleotide sequences against translated nucleotide databases for +comprehensive homology search at the protein level. + +## Usage + +Bactopia CLI: + +```bash +bactopia --wf tblastx \ + --bactopia /path/to/your/bactopia/results +``` + +Nextflow: + +```bash +nextflow run bactopia/bactopia/workflows/bactopia-tools/tblastx/main.nf \ + --bactopia /path/to/your/bactopia/results +``` + +## Outputs + +### Expected Output Files + +``` + +├── +│ └── tools +│ └── tblastx- +│ ├── .tblastx.tsv +│ └── logs +│ ├── nf.command.{begin,err,log,out,run,sh,trace} +│ └── versions.yml +└── bactopia-runs + └── tblastx- + ├── merged-results + │ ├── logs + │ │ └── tblastx-concat + │ │ ├── nf.command.{begin,err,log,out,run,sh,trace} + │ │ └── versions.yml + │ └── tblastx.tsv + └── nf-reports + ├── tblastx-dag.dot + ├── tblastx-report.html + └── tblastx-timeline.html +``` + +### Per-Sample Results + +| File | Description | +|------|-------------| +| `*.tblastx.tsv` | TBLASTX alignment results in tabular format | +| `*.tblastx.html` | Interactive HTML report of TBLASTX results | + +### Merged Results + +| File | Description | +|------|-------------| +| `tblastx.tsv` | Merged TBLASTX results from all samples | + + +### Audit Trail + +Below are files that can assist you in understanding which parameters and program versions were used. + +#### Logs + +Each process that is executed will have a folder named `logs`. In this folder are helpful +files for you to review if the need ever arises. + +| Extension | Description | +|--------------|-------------| +| .begin | An empty file used to designate the process started | +| .err | Contains STDERR outputs from the process | +| .log | Contains both STDERR and STDOUT outputs from the process | +| .out | Contains STDOUT outputs from the process | +| .run | The script Nextflow uses to stage/unstage files and queue processes based on given profile | +| .sh | The script executed by bash for the process | +| .trace | The Nextflow trace report for the process | +| versions.yml | A YAML formatted file with program versions | + +#### Nextflow Reports + +These Nextflow reports provide great a great summary of your run. These can be used to optimize +resource usage and estimate expected costs if using cloud platforms. + +| Filename | Description | +|----------|-------------| +| tblastx-dag.dot | The Nextflow [DAG visualization](https://docs.seqera.io/nextflow/reports#workflow-diagram) | +| tblastx-report.html | The Nextflow [Execution Report](https://docs.seqera.io/nextflow/reports#execution-report) | +| tblastx-timeline.html | The Nextflow [Timeline Report](https://docs.seqera.io/nextflow/reports#execution-timeline) | +| tblastx-trace.txt | The Nextflow [Trace](https://docs.seqera.io/nextflow/reports#trace-file) report | + +## Parameters + +### Required Parameters + +Define where the pipeline should find input data and save output data. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--bactopia` | string | | The path to bactopia results to use as inputs | + +### TBLASTX Parameters + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--tblastx_query` | string | | A fasta file containing the query sequences to BLAST against the database | +| `--tblastx_outfmt` | string | `sseqid qseqid pident qlen slen length nident positive mismatch gapopen gaps qstart qend sstart send evalue bitscore` | The columns to include with -outfmt 6 | +| `--tblastx_opts` | string | | Additional options to pass to BLASTN | +| `--tblastx_qcov_hsp_perc` | integer | `50` | Percent query coverage per hsp | +| `--tblastx_max_target_seqs` | integer | `2000` | Maximum number of aligned sequences to keep | + +
+Filtering Parameters + +Use these parameters to specify which samples to include or exclude. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--include` | string | | A text file containing sample names (one per line) to include from the analysis | +| `--exclude` | string | | A text file containing sample names (one per line) to exclude from the analysis | +
+ +
+Optional Parameters + +These optional parameters can be useful in certain settings. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--outdir` | string | `bactopia` | Base directory to write results to | +
+ +
+Nextflow Profile Parameters + +Parameters to fine-tune your Nextflow setup. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--datasets_cache` | string | `/.bactopia/datasets` | Directory where downloaded datasets should be stored. | +
+ +
+Helpful Parameters + +Uncommonly used parameters that might be useful. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--wf` | string | `bactopia` | Specify which workflow or Bactopia Tool to execute | +| `--list_wfs` | boolean | | List the available workflows and Bactopia Tools to use with '--wf' | +| `--help_all` | boolean | | An alias for --help --show_hidden_params | +| `--version` | boolean | | Display version text. | +
+ +## Composition + +This workflow uses the following subworkflows: + +- [tblastx](/developers/subworkflows/tblastx) - Translate nucleotide query sequences and search nucleotide database. + +## Citations + +If you use this in your analysis, please cite the following. + +- [Bactopia](https://bactopia.github.io/) + Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) + +- [BLAST](https://blast.ncbi.nlm.nih.gov/Blast.cgi) + Camacho C, Coulouris G, Avagyan V, Ma N, Papadopoulos J, Bealer K, Madden TL [BLAST+: architecture and applications](http://dx.doi.org/10.1186/1471-2105-10-421). _BMC Bioinformatics_ 10, 421 (2009) + +## Source + +[View source on GitHub](https://github.com/bactopia/bactopia/tree/main/workflows/bactopia-tools/tblastx) diff --git a/bactopia-tools/tbprofiler.mdx b/bactopia-tools/tbprofiler.mdx new file mode 100644 index 00000000..a2793781 --- /dev/null +++ b/bactopia-tools/tbprofiler.mdx @@ -0,0 +1,214 @@ +--- +title: tbprofiler +description: "Detection of antimicrobial resistance and lineage typing of Mycobacterium tuberculosis." +tags: + - mycobacterium-tuberculosis + - resistance + - lineage + - typing + - tb + - bactopia-tool +--- + +# tbprofiler + +**Tags:** mycobacterium-tuberculosis resistance lineage typing tb bactopia-tool + +Detection of antimicrobial resistance and lineage typing of Mycobacterium tuberculosis. + +This Bactopia Tool uses [TBProfiler](https://github.com/jodyphelan/TBProfiler) to profile +Mycobacterium tuberculosis genomes for resistance mutations and strain typing. The workflow +processes sequencing reads to identify resistance-conferring variants and determine +the lineage of each TB isolate. + +## Usage + +Bactopia CLI: + +```bash +bactopia --wf tbprofiler \ + --bactopia /path/to/your/bactopia/results +``` + +Nextflow: + +```bash +nextflow run bactopia/bactopia/workflows/bactopia-tools/tbprofiler/main.nf \ + --bactopia /path/to/your/bactopia/results +``` + +## Outputs + +### Expected Output Files + +``` + +├── +│ └── tools +│ └── tbprofiler- +│ ├── .csv +│ ├── .results.json.gz +│ ├── .txt +│ ├── logs +│ │ ├── nf.command.{begin,err,log,out,run,sh,trace} +│ │ └── versions.yml +│ └── supplemental +│ ├── .bam +│ ├── .bam.bai +│ └── .targets.vcf.gz +└── bactopia-runs + └── tbprofiler- + ├── merged-results + │ ├── logs + │ │ └── tbprofiler-collate + │ │ ├── nf.command.{begin,err,log,out,run,sh,trace} + │ │ └── versions.yml + │ ├── tbprofiler.csv + │ ├── tbprofiler.variants.csv + │ └── tbprofiler.variants.txt + └── nf-reports + ├── tbprofiler-dag.dot + ├── tbprofiler-report.html + └── tbprofiler-timeline.html +``` + +### Per-Sample Results + +| File | Description | +|------|-------------| +| `*.results.txt` | Text file containing TBProfiler resistance and strain typing results | +| `*.results.json` | JSON file containing detailed TBProfiler analysis results | +| `*.results.csv` | CSV file containing TBProfiler results in tabular format | +| `bam/*.bam` | BAM file with read alignment details against reference genomes | +| `vcf/*.targets.csq.vcf.gz` | VCF file with variant annotations and functional consequences | + +### Merged Results + +| File | Description | +|------|-------------| +| `tbprofiler.tsv` | Merged TSV file containing TBProfiler results from all samples | + + +### Audit Trail + +Below are files that can assist you in understanding which parameters and program versions were used. + +#### Logs + +Each process that is executed will have a folder named `logs`. In this folder are helpful +files for you to review if the need ever arises. + +| Extension | Description | +|--------------|-------------| +| .begin | An empty file used to designate the process started | +| .err | Contains STDERR outputs from the process | +| .log | Contains both STDERR and STDOUT outputs from the process | +| .out | Contains STDOUT outputs from the process | +| .run | The script Nextflow uses to stage/unstage files and queue processes based on given profile | +| .sh | The script executed by bash for the process | +| .trace | The Nextflow trace report for the process | +| versions.yml | A YAML formatted file with program versions | + +#### Nextflow Reports + +These Nextflow reports provide great a great summary of your run. These can be used to optimize +resource usage and estimate expected costs if using cloud platforms. + +| Filename | Description | +|----------|-------------| +| tbprofiler-dag.dot | The Nextflow [DAG visualization](https://docs.seqera.io/nextflow/reports#workflow-diagram) | +| tbprofiler-report.html | The Nextflow [Execution Report](https://docs.seqera.io/nextflow/reports#execution-report) | +| tbprofiler-timeline.html | The Nextflow [Timeline Report](https://docs.seqera.io/nextflow/reports#execution-timeline) | +| tbprofiler-trace.txt | The Nextflow [Trace](https://docs.seqera.io/nextflow/reports#trace-file) report | + +## Parameters + +### Required Parameters + +Define where the pipeline should find input data and save output data. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--bactopia` | string | | The path to bactopia results to use as inputs | + +### TB-Profiler Profile Parameters + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--tbprofiler_call_whole_genome` | boolean | `false` | Call whole genome | +| `--tbprofiler_mapper` | string | `bwa` | Mapping tool to use. If you are using nanopore data it will default to minimap2 (choices: `bwa`, `minimap2`, `bowtie2`, `bwa-mem2`) | +| `--tbprofiler_caller` | string | `freebayes` | Variant calling tool to use (choices: `bcftools`, `gatk`, `freebayes`) | +| `--tbprofiler_opts` | string | | Extra options in quotes for TBProfiler | + +### TB-Profiler Collate Parameters + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--tbprofiler_itol` | boolean | `false` | Generate itol config files | +| `--tbprofiler_full` | boolean | `false` | Output mutations in main result file | +| `--tbprofiler_all_variants` | boolean | `false` | Output all variants in variant matrix | +| `--tbprofiler_mark_missing` | boolean | `false` | An asterisk will be used to mark predictions which are affected by missing data at a drug resistance position | + +
+Filtering Parameters + +Use these parameters to specify which samples to include or exclude. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--include` | string | | A text file containing sample names (one per line) to include from the analysis | +| `--exclude` | string | | A text file containing sample names (one per line) to exclude from the analysis | +
+ +
+Optional Parameters + +These optional parameters can be useful in certain settings. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--outdir` | string | `bactopia` | Base directory to write results to | +
+ +
+Nextflow Profile Parameters + +Parameters to fine-tune your Nextflow setup. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--datasets_cache` | string | `/.bactopia/datasets` | Directory where downloaded datasets should be stored. | +
+ +
+Helpful Parameters + +Uncommonly used parameters that might be useful. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--wf` | string | `bactopia` | Specify which workflow or Bactopia Tool to execute | +| `--list_wfs` | boolean | | List the available workflows and Bactopia Tools to use with '--wf' | +| `--help_all` | boolean | | An alias for --help --show_hidden_params | +| `--version` | boolean | | Display version text. | +
+ +## Composition + +This workflow uses the following subworkflows: + +- [tbprofiler](/developers/subworkflows/tbprofiler) - Profiling tool for Mycobacterium tuberculosis to detect resistance and strain type. + +## Citations + +If you use this in your analysis, please cite the following. + +- [Bactopia](https://bactopia.github.io/) + Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) + +- [TBProfiler](https://github.com/jodyphelan/TBProfiler) + Phelan JE, O'Sullivan DM, Machado D, Ramos J, Oppong YEA, Campino S, O'Grady J, McNerney R, Hibberd ML, Viveiros M, Huggett JF, Clark TG [Integrating informatics tools and portable sequencing technology for rapid detection of resistance to anti-tuberculous drugs.](https://doi.org/10.1186/s13073-019-0650-x) _Genome Med_ 11, 41 (2019) + +## Source + +[View source on GitHub](https://github.com/bactopia/bactopia/tree/main/workflows/bactopia-tools/tbprofiler) diff --git a/bin/__pycache__/generator_utils.cpython-312.pyc b/bin/__pycache__/generator_utils.cpython-312.pyc new file mode 100644 index 00000000..5605252e Binary files /dev/null and b/bin/__pycache__/generator_utils.cpython-312.pyc differ diff --git a/bin/__pycache__/generator_utils.cpython-313.pyc b/bin/__pycache__/generator_utils.cpython-313.pyc new file mode 100644 index 00000000..cf788c08 Binary files /dev/null and b/bin/__pycache__/generator_utils.cpython-313.pyc differ diff --git a/bin/__pycache__/generator_utils.cpython-314.pyc b/bin/__pycache__/generator_utils.cpython-314.pyc new file mode 100644 index 00000000..59c184b5 Binary files /dev/null and b/bin/__pycache__/generator_utils.cpython-314.pyc differ diff --git a/bin/bactopia-to-md.py b/bin/bactopia-to-md.py deleted file mode 100755 index 39cc4695..00000000 --- a/bin/bactopia-to-md.py +++ /dev/null @@ -1,322 +0,0 @@ -#! /usr/bin/env python3 -""" -Convert the various `meta.yml` and `params.json` files into Markdown format -""" -import json -import yaml -from pathlib import Path -WORKFLOWS = { - 'bactopia': { - 'modules': ['gather', 'qc', 'assembler', 'prokka', 'bakta', 'sketcher', 'amrfinderplus', 'mlst', 'merlin'], - 'template': 'bactopia/bactopia-full.j2' - } -} -SUBWORKFLOWS = 'params.available_workflows.bactopiatools.subworkflows' -MODULES = 'params.available_workflows.bactopiatools.modules' -MODULES_RENAME = { - 'abricate_run': 'abricate', - 'amrfinderplus_run': 'amrfinderplus', - 'ariba_run': 'ariba', - 'bakta_run': 'bakta', - 'blast_blastn': 'blastn', - 'blast_tblastn': 'tblastn', - 'blast_blastp': 'blastp', - 'blast_blastx': 'blastx', - 'blast_tblastx': 'tblastx', - 'checkm_lineagewf': 'checkm', - 'genotyphi_parse': 'genotyphi', - 'midas_species': 'midas', - 'mobsuite_recon': 'mobsuite', - 'mykrobe_predict': 'mykrobe', - 'rgi_main': 'rgi', - 'snippy_run': 'snippy' -} -IGNORE_LIST = [ - # Bactopia modules - "LIST_OF_MODULES", - "plasmidid" -] - -WORKFLOW_RENAME = { - 'mlst': 'sequence-typing/mlst', - 'amrfinderplus': 'antimicrobial-resistance/amrfinderplus', - 'prokka': 'annotator/prokka', - 'bakta': 'annotator/bakta', -} - -def get_citations(citation_path): - """ """ - citations = {} - module_citations = {} - with open(f'{citation_path}/citations.yml', "rt") as citations_fh: - citations = yaml.safe_load(citations_fh) - for group, refs in citations.items(): - for ref, vals in refs.items(): - module_citations[ref] = vals - return [citations, module_citations] - -def get_bactopia_citations(docs_path): - citations = {} - with open(f'{docs_path}/docs/data/citations.yml', "rt") as citations_fh: - citations = yaml.safe_load(citations_fh) - return citations - -def get_enhancements(docs_path): - enhancements = {} - with open(f'{docs_path}/docs/data/contributions.yml', "rt") as enhancements_fh: - enhancements = yaml.safe_load(enhancements_fh) - return enhancements - -def get_generic_params(generic_path): - generic = {} - for params_json in sorted(Path(generic_path).glob('*.json')): - print(params_json) - with open(params_json, "rt") as params_fh: - generic[str(params_json.stem)] = json.load(params_fh) - return generic - -def get_subworkflows(subworkflow_path, is_subworkflow, is_module): - """ """ - subworkflows = {} - total_modules = 0 - total_subworkflows = 0 - for meta_yml in sorted(Path(subworkflow_path).rglob('*meta.yml')): - subworkflow_name = str(meta_yml).replace('/meta.yml', '').split('local/')[1] - with open(meta_yml, "rt") as meta_fh: - subworkflows[subworkflow_name] = yaml.safe_load(meta_fh) - if subworkflow_name in is_subworkflow: - total_subworkflows += 1 - subworkflows[subworkflow_name]['is_subworkflow'] = True - subworkflows[subworkflow_name]['is_module'] = False - elif subworkflow_name in is_module: - total_modules += 1 - subworkflows[subworkflow_name]['is_subworkflow'] = False - subworkflows[subworkflow_name]['is_module'] = True - else: - subworkflows[subworkflow_name]['is_subworkflow'] = False - subworkflows[subworkflow_name]['is_module'] = False - return [subworkflows, total_subworkflows, total_modules] - -def get_modules(module_path): - """ Find all modules in modules path. """ - modules = {} - for params_json in sorted(Path(module_path).rglob('*params.json')): - module_name = str(params_json).replace('/params.json', '') - if 'local' in module_name: - module_name = module_name.split('local/bactopia/')[1] - else: - # nf-core module - module_name = module_name.split('/nf-core/')[1].replace('/', '_').replace('_run', '') - if module_name in MODULES_RENAME: - module_name = MODULES_RENAME[module_name] - print(f"{module_name} - {params_json}") - try: - with open(params_json, "rt") as params_fh: - modules[module_name] = json.load(params_fh) - except Exception as e: - IGNORE_LIST.append(module_name) - print(f"skipping {module_name} - {params_json} - {e}") - return modules - -def read_nextflow_config(nf_config): - """ """ - config = {} - with open(nf_config, 'rt') as nf_fh: - for line in nf_fh: - if "=" in line: - k,v = line.rstrip().split(' = ') - config[k] = nf_to_list(v) if v.startswith('[') else v - return config - -def nf_to_list(param): - return param.replace("'","").replace('[', '').replace(' ', "").replace(']', '').split(',') - -def format_params(params, exclude=[]): - params_md = [] - for group in params["definitions"].keys(): - if group not in exclude: - if len(params["definitions"][group]["properties"].keys()): - params_md.append(f'\n### {params["definitions"][group]["title"]}') - params_md.append(f"{params['definitions'][group]['description']}\n") - params_md.append(f'| Parameter | Description |') - params_md.append(f'|:---|---|') - for parameter in params["definitions"][group]["properties"].keys(): - param = params["definitions"][group]["properties"][parameter] - default_val = param["default"] if "default" in param else "" - default_val = f', **Default:** `{default_val}`' if default_val else "" - params_md.append( - f'| ` --{parameter}` |' - f' {param["description"].rstrip()}
**Type:** `{param["type"]}`{default_val} |' - ) - return params_md - -if __name__ == '__main__': - import argparse as ap - import glob - import sys - import time - from jinja2 import Environment, FileSystemLoader - - parser = ap.ArgumentParser( - prog='bactopia-to-md', - conflict_handler='resolve', - description=('Convert various meta.yml and params.json files into markdown') - ) - - parser.add_argument('bactopia_repo', metavar="STR", type=str, - help='Directory for the Bactopia repo') - parser.add_argument('docs_repo', metavar="STR", type=str, - help='Directory for the Bactopia Docs repo') - parser.add_argument('--output', metavar='STR', type=str, default="./docs", - help='Where to output files') - - if len(sys.argv) == 1: - parser.print_help() - sys.exit(0) - - args = parser.parse_args() - - nf_config = read_nextflow_config(f'{args.bactopia_repo}/nextflow_config.txt') - subworkflows, total_subworkflows, total_modules = get_subworkflows(f'{args.bactopia_repo}/subworkflows', nf_config[SUBWORKFLOWS], nf_config[MODULES]) - modules = get_modules(f'{args.bactopia_repo}/modules') - generic = get_generic_params(f'{args.bactopia_repo}/conf/schema') - citations, module_citations = get_citations(args.bactopia_repo) - enhancements = get_enhancements(args.docs_repo) - - file_loader = FileSystemLoader('templates') - env = Environment(loader=file_loader) - - # Build Bactopia Tools Page - template = env.get_template('bactopia-tools.j2') - output = template.render( - tools=subworkflows, - total_bactopia_tools=total_subworkflows + total_modules, - total_subworkflows=total_subworkflows, - total_modules=total_modules - ) - with open(f'{args.docs_repo}/docs/bactopia-tools/index.md', 'wt') as md_fh: - md_fh.write(output) - - # Build each Bactopia Tool Page - for name, vals in sorted(subworkflows.items()): - if name not in IGNORE_LIST: - is_bactopia_tool = True if name in nf_config[SUBWORKFLOWS] or name in nf_config[MODULES] else False - if is_bactopia_tool: - module_params = [] - for module in vals['modules']: - module_name = module - if module_name not in IGNORE_LIST: - if module_name in MODULES_RENAME: - module_name = MODULES_RENAME[module_name] - print(f"working on {name} - {module}") - module_params += format_params(modules[module_name]) - params = { - 'bactopia_tools': '\n'.join(format_params(generic["bactopia-tools"])), - 'module': '\n'.join(module_params), - 'generic': '\n'.join(format_params(generic["generic"])) - } - if "docs" in vals: - print(f"working on {name} - {module}") - template = env.get_template('bactopia-tools-single.j2') - output = template.render( - meta=vals, - params=params, - citations=module_citations - ) - with open(f'{args.docs_repo}/docs/bactopia-tools/{name}.md', 'wt') as md_fh: - md_fh.write(output) - - # Build Acknowledgements Page - template = env.get_template('acknowledgements.j2') - output = template.render( - citations=citations, - total=len(citations["datasets_ariba"])+len(citations["datasets_generic"])+len(citations["datasets_minmer"])+len(citations["tools"]), - total_datasets=len(citations["datasets_ariba"])+len(citations["datasets_generic"])+len(citations["datasets_minmer"]) - ) - with open(f'{args.docs_repo}/docs/impact-and-outreach/acknowledgements.md', 'wt') as md_fh: - md_fh.write(output) - - # Build enhancements Page - template = env.get_template('enhancements.j2') - output = template.render( - total_contributions=len(enhancements["tools"])+len(enhancements["conda_submissions"])+len(enhancements["conda_updates"])+len(enhancements["nfcore_modules"])+len(enhancements["other"]), - conda_submissions=enhancements["conda_submissions"], - conda_updates=enhancements["conda_updates"], - nfcore_modules=enhancements["nfcore_modules"], - tools=enhancements["tools"], - other=enhancements["other"] - ) - with open(f'{args.docs_repo}/docs/impact-and-outreach/enhancements.md', 'wt') as md_fh: - md_fh.write(output) - - # Build mkdocs.yml Page - template = env.get_template('mkdocs.j2') - output = template.render(tools=subworkflows) - with open(f'{args.docs_repo}/mkdocs.yml', 'wt') as md_fh: - md_fh.write(output) - - # Build beginner's guide - print(f"Working on Beginners Guide") - template = env.get_template('bactopia/bactopia-beginners.j2') - params = { - 'bactopia': '\n'.join(format_params(generic["bactopia"], exclude=['dataset_parameters'])), - 'module': '\n'.join(module_params), - 'generic': '\n'.join(format_params(generic["generic"])) - } - output = template.render( - params=params - ) - with open(f'{args.docs_repo}/docs/beginners-guide.md', 'wt') as md_fh: - md_fh.write(output) - - # Build workflow pages - for workflow, wf_vals in WORKFLOWS.items(): - print(f"Working on {workflow}") - template = env.get_template(wf_vals['template']) - params = { - 'bactopia': '\n'.join(format_params(generic["bactopia"])), - 'generic': '\n'.join(format_params(generic["generic"])) - } - vals = {} - for subworkflow in wf_vals['modules']: - print(f"Working on {subworkflow}") - vals[subworkflow] = subworkflows[subworkflow] - - module_params = [] - for module in vals[subworkflow]['modules']: - module_name = module - if module_name not in IGNORE_LIST: - if module_name in MODULES_RENAME: - module_name = MODULES_RENAME[module_name] - print(f"working on {subworkflow} - {module}") - module_params += format_params(modules[module_name]) - - params[subworkflow] = '\n'.join(module_params) - template2 = env.get_template('bactopia/bactopia-steps.j2') - output = template2.render( - meta=vals[subworkflow], - params={'module': params[subworkflow]}, - citations=module_citations - ) - - with open(f'{args.docs_repo}/docs/bactopia/{WORKFLOW_RENAME[subworkflow] if subworkflow in WORKFLOW_RENAME else subworkflow}.md', 'wt') as md_fh: - md_fh.write(output) - - output = template.render( - meta=vals, - params=params, - citations=module_citations - ) - with open(f'{args.docs_repo}/docs/full-guide.md', 'wt') as md_fh: - md_fh.write(output) - - # Build citations page - print(f"Working on Citations") - bactopia_citations = get_bactopia_citations(args.docs_repo) - template = env.get_template('citations.j2') - output = template.render( - citations=bactopia_citations, - total=len(bactopia_citations['citations']) - ) - with open(f'{args.docs_repo}/docs/impact-and-outreach/citations.md', 'wt') as md_fh: - md_fh.write(output) diff --git a/bin/convert-mkdocs-to-mdx.py b/bin/convert-mkdocs-to-mdx.py deleted file mode 100755 index 37cf19d5..00000000 --- a/bin/convert-mkdocs-to-mdx.py +++ /dev/null @@ -1,364 +0,0 @@ -#!/usr/bin/env python3 -"""Convert mkdocs-material markdown to Docusaurus-compatible markdown.""" - -import argparse -import difflib -import re -import sys -from pathlib import Path - -ADMONITION_TYPE_MAP = { - "note": "note", - "tip": "tip", - "info": "info", - "warning": "warning", - "danger": "danger", - "question": "info", - "error": "danger", - "success": "tip", - "failure": "danger", - "example": "info", - "abstract": "info", -} - -DEFAULT_EXCLUDES = ["blog", "custom", "data", "assets", "impact-and-outreach"] - - -def convert_code_fences(content: str) -> str: - """Convert ```{lang} and ``` { .lang .attr } to ```lang.""" - lines = content.split("\n") - result = [] - for line in lines: - # ```{bash} or ```{tsv} etc. - m = re.match(r"^(\s*)```\{(\w+)\}\s*$", line) - if m: - result.append(f"{m.group(1)}```{m.group(2)}") - continue - # ``` { .bash .copy } or ``` { .bash .no-copy } - m = re.match(r"^(\s*)```\s*\{\s*\.(\w+)(?:\s+\.\w+)*\s*\}\s*$", line) - if m: - result.append(f"{m.group(1)}```{m.group(2)}") - continue - result.append(line) - return "\n".join(result) - - -def convert_zoom_images(content: str) -> str: - """Remove wrappers around images.""" - return re.sub( - r'\s*\n(!\[[^\]]*\]\([^)]+\))\s*\n', - r"\1", - content, - ) - - -def _parse_grid_card(item_text: str) -> dict: - """Parse a single grid card item into title, description, and link.""" - lines = [l.strip() for l in item_text.strip().split("\n")] - - title = "" - description_parts = [] - link = "" - past_separator = False - - for line in lines: - if not title: - # First line: icon + __Title__ - m = re.search(r"__(.+?)__", line) - if m: - title = m.group(1) - continue - if line == "---": - past_separator = True - continue - if past_separator: - # Check for link line: [:octicons-...: Text](url) or [Text](url) - link_match = re.match( - r"\[:?[a-z]+-[a-z0-9-]+:\s*(.+?)\]\((.+?)\)$", line - ) - if not link_match: - link_match = re.match(r"\[(.+?)\]\((.+?)\)$", line) - if link_match: - link = f"[{link_match.group(1)}]({link_match.group(2)})" - elif line: - description_parts.append(line) - - return { - "title": title, - "description": " ".join(description_parts), - "link": link, - } - - -def convert_grid_cards(content: str) -> str: - """Convert
blocks to markdown lists.""" - pattern = re.compile( - r'
\s*\n(.*?)\n
', - re.DOTALL, - ) - - def replace_grid(m): - block = m.group(1) - # Split into individual card items (each starts with "- ") - items = re.split(r"\n- ", block) - # First item may start with "- " after leading whitespace - items = [items[0].lstrip().removeprefix("- ")] + items[1:] - items = [i for i in items if i.strip()] - - result_lines = [] - for item_text in items: - card = _parse_grid_card(item_text) - parts = [f"- **{card['title']}**"] - if card["description"]: - parts.append(f" -- {card['description']}") - if card["link"]: - if card["description"]: - parts.append(f". {card['link']}") - else: - parts.append(f" -- {card['link']}") - result_lines.append("".join(parts)) - - return "\n".join(result_lines) - - return pattern.sub(replace_grid, content) - - -def _collect_indented_block(lines: list[str], start: int) -> list[str]: - """Collect lines belonging to a 4-space-indented block. - - Tracks code fence state to avoid ending the block on blank lines - inside fenced code. - """ - block = [] - i = start - in_code_fence = False - - while i < len(lines): - line = lines[i] - stripped = line.rstrip() - - # Track code fences within the indented block - if stripped.startswith(" "): - dedented = stripped[4:] - if re.match(r"^```", dedented) and not in_code_fence: - in_code_fence = True - elif re.match(r"^```\s*$", dedented) and in_code_fence: - in_code_fence = False - - if in_code_fence: - block.append(line) - i += 1 - continue - - if stripped == "": - # Blank line: check if block continues after it - j = i + 1 - while j < len(lines) and lines[j].rstrip() == "": - j += 1 - if j < len(lines) and lines[j].startswith(" "): - block.append(line) - i += 1 - continue - else: - break - elif line.startswith(" "): - block.append(line) - i += 1 - else: - break - - return block - - -def _dedent_line(line: str, spaces: int = 4) -> str: - """Strip exactly `spaces` leading spaces from a line.""" - if line.startswith(" " * spaces): - return line[spaces:] - return line - - -def convert_admonitions(content: str) -> str: - """Convert !!! and ??? admonition syntax to Docusaurus format.""" - lines = content.split("\n") - result = [] - i = 0 - - while i < len(lines): - line = lines[i] - - # !!! type "Title" or !!! type - adm_match = re.match(r'^!!! (\w+)(?: "(.+)")?', line) - if adm_match: - raw_type = adm_match.group(1) - title = adm_match.group(2) - adm_type = ADMONITION_TYPE_MAP.get(raw_type, "note") - - if title: - result.append(f":::{adm_type}[{title}]") - else: - result.append(f":::{adm_type}") - - i += 1 - block = _collect_indented_block(lines, i) - for bl in block: - result.append(_dedent_line(bl)) - i += len(block) - - result.append(":::") - continue - - # ??? type "Title" - col_match = re.match(r'^\?\?\?\+? (\w+) "(.+)"', line) - if col_match: - title = col_match.group(2) - - result.append("
") - result.append(f"{title}") - result.append("") - - i += 1 - block = _collect_indented_block(lines, i) - for bl in block: - result.append(_dedent_line(bl)) - i += len(block) - - result.append("") - result.append("
") - continue - - result.append(line) - i += 1 - - return "\n".join(result) - - -def convert_fa_icons(content: str) -> str: - """Strip Font Awesome tags.""" - return re.sub(r'\s*', "", content) - - -def convert_icon_syntax(content: str) -> str: - """Strip :material-*:, :octicons-*:, :simple-*: icon syntax.""" - # Icon with attributes: :material-clock-fast:{ .lg .middle } - content = re.sub(r":[a-z]+-[a-z0-9-]+:\{[^}]*\}\s*", "", content) - # Standalone icon: :octicons-arrow-right-24: - # In link text: [:octicons-arrow-right-24: Text] -> [Text] - content = re.sub(r"\[:[a-z]+-[a-z0-9-]+:\s*", "[", content) - # Standalone outside links - content = re.sub(r":[a-z]+-[a-z0-9-]+:\s*", "", content) - return content - - -def convert_image_attributes(content: str) -> str: - """Convert ![alt](src){ width="X" } to tags.""" - - def replace_img(m): - alt = m.group(1).replace("\n", " ").strip() - src = m.group(2) - width = m.group(3) - return f'{alt}' - - return re.sub( - r'!\[([^\]]*)\]\(([^)]+)\)\{\s*width="([^"]+)"\s*\}', - replace_img, - content, - flags=re.DOTALL, - ) - - -def convert_file(content: str) -> str: - """Apply all conversions in pipeline order.""" - content = convert_code_fences(content) - content = convert_zoom_images(content) - content = convert_grid_cards(content) - content = convert_admonitions(content) - content = convert_fa_icons(content) - content = convert_icon_syntax(content) - content = convert_image_attributes(content) - return content - - -def find_files( - paths: list[str], exclude_dirs: list[str], base_dir: Path -) -> list[Path]: - """Find all .md files to process, respecting exclusions.""" - files = [] - for path_str in paths: - p = base_dir / path_str - if p.is_file(): - files.append(p) - elif p.is_dir(): - for md in sorted(p.rglob("*.md")): - try: - rel = md.relative_to(base_dir / "docs") - if any(rel.parts[0] == exc for exc in exclude_dirs): - continue - except ValueError: - pass - files.append(md) - return files - - -def main(): - parser = argparse.ArgumentParser( - description="Convert mkdocs-material markdown to Docusaurus format" - ) - parser.add_argument( - "paths", - nargs="*", - default=["docs", "cli", "impact"], - help="Files or directories to process (relative to project root)", - ) - parser.add_argument( - "--dry-run", - action="store_true", - help="Preview changes without modifying files", - ) - parser.add_argument( - "--verbose", "-v", action="store_true", help="Enable verbose logging" - ) - parser.add_argument( - "--exclude", - nargs="*", - default=DEFAULT_EXCLUDES, - help="Directories to exclude within docs/", - ) - args = parser.parse_args() - - base_dir = Path(__file__).resolve().parent.parent - files = find_files(args.paths, args.exclude, base_dir) - - if args.verbose: - print(f"Found {len(files)} files to process", file=sys.stderr) - - changed = 0 - for filepath in files: - original = filepath.read_text() - converted = convert_file(original) - - if original == converted: - if args.verbose: - print(f" (no changes) {filepath.relative_to(base_dir)}", file=sys.stderr) - continue - - changed += 1 - rel = filepath.relative_to(base_dir) - - if args.dry_run: - diff = difflib.unified_diff( - original.splitlines(keepends=True), - converted.splitlines(keepends=True), - fromfile=f"a/{rel}", - tofile=f"b/{rel}", - ) - sys.stdout.writelines(diff) - else: - filepath.write_text(converted) - if args.verbose: - print(f" converted {rel}", file=sys.stderr) - - print(f"\n{changed} file(s) {'would be ' if args.dry_run else ''}modified", file=sys.stderr) - - -if __name__ == "__main__": - main() diff --git a/bin/generate-acknowledgements.py b/bin/generate-acknowledgements.py new file mode 100644 index 00000000..98fdf9cd --- /dev/null +++ b/bin/generate-acknowledgements.py @@ -0,0 +1,85 @@ +#!/usr/bin/env python3 +"""Generate Docusaurus MD page for Bactopia acknowledgements from bactopia.json.""" +import argparse +import json +import sys +from pathlib import Path + +from generator_utils import create_jinja_env + + +CATEGORY_ORDER = [ + ('datasets_ariba', 'Ariba Reference Datasets'), + ('datasets_minmer', 'Minmer Datasets'), + ('datasets_generic', 'Everything Else'), +] + + +def load_acknowledgements(json_path): + """Load citations from bactopia.json and group by category.""" + with open(json_path) as f: + data = json.load(f) + + citations = data.get('citations', {}) + + datasets = {} + tools = {} + influences = {} + + for key, entry in citations.items(): + cat = entry.get('category', '') + if cat.startswith('datasets_'): + datasets.setdefault(cat, []).append(entry) + elif cat == 'tools': + tools[key] = entry + elif cat == 'influences': + influences[key] = entry + + dataset_sections = [] + for cat_key, cat_title in CATEGORY_ORDER: + items = datasets.get(cat_key, []) + if items: + dataset_sections.append({'title': cat_title, 'entries': items}) + + total_datasets = sum(len(s['entries']) for s in dataset_sections) + + return { + 'influences': influences, + 'dataset_sections': dataset_sections, + 'total_datasets': total_datasets, + 'tools': tools, + 'total': total_datasets + len(tools) + len(influences), + } + + +def main(): + parser = argparse.ArgumentParser(description='Generate acknowledgements MD page') + parser.add_argument('bactopia_json', help='Path to bactopia.json') + parser.add_argument('--output', '-o', default='impact/acknowledgements.md', + help='Output file path (default: impact/acknowledgements.md)') + parser.add_argument('--template-dir', '-t', default='templates', + help='Template directory (default: templates)') + args = parser.parse_args() + + json_path = Path(args.bactopia_json) + if not json_path.exists(): + print(f'Error: {json_path} not found.', file=sys.stderr) + sys.exit(1) + + ack = load_acknowledgements(json_path) + + env = create_jinja_env(args.template_dir) + template = env.get_template('acknowledgements.j2') + + page = template.render(**ack) + + output_path = Path(args.output) + output_path.parent.mkdir(parents=True, exist_ok=True) + output_path.write_text(page) + + print(f'Generated acknowledgements page: {ack["total_datasets"]} datasets, ' + f'{len(ack["tools"])} tools, {len(ack["influences"])} influences at {output_path}') + + +if __name__ == '__main__': + main() diff --git a/bin/generate-citations.py b/bin/generate-citations.py new file mode 100644 index 00000000..8ba2608d --- /dev/null +++ b/bin/generate-citations.py @@ -0,0 +1,56 @@ +#!/usr/bin/env python3 +"""Generate Docusaurus MD page for Bactopia citations from citations.yml.""" +import argparse +import sys +from pathlib import Path + +import yaml + +from generator_utils import create_jinja_env + + +def load_citations(citations_path): + """Load citations from YAML and extract year from date.""" + with open(citations_path) as f: + raw = yaml.safe_load(f) + + citations = raw.get('citations', []) + for entry in citations: + d = entry.get('date', '') + if hasattr(d, 'strftime'): + entry['year'] = d.strftime('%Y') + else: + entry['year'] = str(d)[:4] + return citations + + +def main(): + parser = argparse.ArgumentParser(description='Generate citations MD page') + parser.add_argument('citations_yaml', help='Path to citations.yml') + parser.add_argument('--output', '-o', default='impact/citations.md', + help='Output file path (default: impact/citations.md)') + parser.add_argument('--template-dir', '-t', default='templates', + help='Template directory (default: templates)') + args = parser.parse_args() + + citations_path = Path(args.citations_yaml) + if not citations_path.exists(): + print(f'Error: {citations_path} not found.', file=sys.stderr) + sys.exit(1) + + citations = load_citations(citations_path) + + env = create_jinja_env(args.template_dir) + template = env.get_template('citations.j2') + + page = template.render(total=len(citations), citations=citations) + + output_path = Path(args.output) + output_path.parent.mkdir(parents=True, exist_ok=True) + output_path.write_text(page) + + print(f'Generated citations page with {len(citations)} entries at {output_path}') + + +if __name__ == '__main__': + main() diff --git a/bin/generate-cli.py b/bin/generate-cli.py new file mode 100644 index 00000000..60694c45 --- /dev/null +++ b/bin/generate-cli.py @@ -0,0 +1,108 @@ +#!/usr/bin/env python3 +"""Generate Docusaurus MDX pages for bactopia-py CLI commands from parsed metadata.""" +import argparse +import json +import sys +from pathlib import Path + +from generator_utils import escape_mdx, format_default, create_jinja_env + + +def build_option_sections(cmd_data): + """Organize options into groups, using option_groups ordering if available.""" + groups_spec = cmd_data.get("option_groups", []) + options = {o["name"]: o for o in cmd_data.get("options", [])} + + if not groups_spec: + return [{"name": "Options", "options": list(options.values())}] + + result = [] + used = set() + for group in groups_spec: + group_opts = [] + for opt_flag in group.get("options", []): + for opt in options.values(): + if opt_flag in opt.get("opts", []) and opt["name"] not in used: + group_opts.append(opt) + used.add(opt["name"]) + break + if group_opts: + result.append({"name": group["name"], "options": group_opts}) + + ungrouped = [o for o in options.values() if o["name"] not in used] + if ungrouped: + result.append({"name": "Other Options", "options": ungrouped}) + + return result + + +def build_command_context(cmd_data): + """Build template context for a single CLI command page.""" + usage_parts = [cmd_data["name"]] + if cmd_data.get("is_group"): + usage_parts.append("COMMAND") + for arg in cmd_data.get("arguments", []): + usage_parts.append(arg["human_readable_name"]) + usage_parts.append("[OPTIONS]") + + option_groups = build_option_sections(cmd_data) + + if cmd_data.get("is_group") and cmd_data.get("subcommands"): + for sub in cmd_data["subcommands"].values(): + sub["grouped_options"] = build_option_sections(sub) + + return { + "cmd": cmd_data, + "usage": " ".join(usage_parts), + "option_groups": option_groups, + } + + +def main(): + parser = argparse.ArgumentParser( + description="Generate CLI reference MDX pages from parsed cli.json" + ) + parser.add_argument("catalog", help="Path to cli.json") + parser.add_argument( + "--output-dir", "-o", default="developers/cli", + help="Output directory for MDX files", + ) + parser.add_argument( + "--template-dir", "-t", default="templates", + help="Template directory", + ) + args = parser.parse_args() + + catalog_path = Path(args.catalog) + if not catalog_path.exists(): + print(f"Error: {catalog_path} not found. Run parse-cli.py first.", file=sys.stderr) + sys.exit(1) + + with open(catalog_path) as f: + data = json.load(f) + + env = create_jinja_env(args.template_dir) + output_dir = Path(args.output_dir) + output_dir.mkdir(parents=True, exist_ok=True) + + commands = data.get("commands", {}) + template = env.get_template("cli_command.j2") + + for key in sorted(commands.keys()): + context = build_command_context(commands[key]) + page = template.render(**context) + (output_dir / f"{key}.mdx").write_text(page) + + index_template = env.get_template("cli_index.j2") + index_page = index_template.render( + version=data["meta"]["version"], + categories=data["categories"], + commands=commands, + ) + (output_dir / "index.mdx").write_text(index_page) + + print(f"Generated {len(commands)} CLI pages + index in {output_dir}/") + + +if __name__ == "__main__": + main() diff --git a/bin/generate-enhancements.py b/bin/generate-enhancements.py new file mode 100644 index 00000000..33142adf --- /dev/null +++ b/bin/generate-enhancements.py @@ -0,0 +1,61 @@ +#!/usr/bin/env python3 +"""Generate Docusaurus MD page for Bactopia enhancements from contributions.yml.""" +import argparse +import sys +from pathlib import Path + +import yaml + +from generator_utils import create_jinja_env + + +def main(): + parser = argparse.ArgumentParser(description='Generate enhancements MD page') + parser.add_argument('contributions_yaml', help='Path to contributions.yml') + parser.add_argument('--output', '-o', default='impact/enhancements.md', + help='Output file path (default: impact/enhancements.md)') + parser.add_argument('--template-dir', '-t', default='templates', + help='Template directory (default: templates)') + args = parser.parse_args() + + yaml_path = Path(args.contributions_yaml) + if not yaml_path.exists(): + print(f'Error: {yaml_path} not found.', file=sys.stderr) + sys.exit(1) + + with open(yaml_path) as f: + data = yaml.safe_load(f) + + tools = data.get('tools', []) + conda_submissions = data.get('conda_submissions', []) + conda_updates = data.get('conda_updates', []) + nfcore_modules = data.get('nfcore_modules', []) + other = data.get('other', []) + + total = (len(tools) + len(conda_submissions) + len(conda_updates) + + len(nfcore_modules) + len(other)) + + env = create_jinja_env(args.template_dir) + template = env.get_template('enhancements.j2') + + page = template.render( + tools=tools, + conda_submissions=conda_submissions, + conda_updates=conda_updates, + nfcore_modules=nfcore_modules, + other=other, + total_contributions=total, + ) + + output_path = Path(args.output) + output_path.parent.mkdir(parents=True, exist_ok=True) + output_path.write_text(page) + + print(f'Generated enhancements page: {len(tools)} tools, ' + f'{len(conda_submissions)} submissions, {len(conda_updates)} updates, ' + f'{len(nfcore_modules)} nf-core, {len(other)} other ({total} total) ' + f'at {output_path}') + + +if __name__ == '__main__': + main() diff --git a/bin/generate-llms-catalog.py b/bin/generate-llms-catalog.py new file mode 100644 index 00000000..11616ab5 --- /dev/null +++ b/bin/generate-llms-catalog.py @@ -0,0 +1,331 @@ +#!/usr/bin/env python3 +"""Generate llms.txt and catalog.json for LLM consumption of Bactopia docs.""" +import argparse +import json +import re +import sys +from datetime import datetime, timezone +from pathlib import Path + +import yaml + + +SITE_NAME = "Bactopia" +SITE_TAGLINE = ( + "An extensive workflow to process Nanopore and Illumina sequencing " + "for bacterial genomes." +) + +SECTIONS = [ + { + "key": "bactopia", + "title": "Bactopia", + "content_dir": "docs", + "route_base": "", + "description": ( + "Core Bactopia documentation -- installation, quick start, " + "tutorials, and workflow reference." + ), + }, + { + "key": "bactopia-tools", + "title": "Bactopia Tools", + "content_dir": "bactopia-tools", + "route_base": "bactopia-tools", + "description": ( + "Additional analysis workflows that run specific tools on " + "existing Bactopia results." + ), + }, + { + "key": "bactopia-pipelines", + "title": "Bactopia Pipelines", + "content_dir": "bactopia-pipelines", + "route_base": "bactopia-pipelines", + "description": ( + "Complete analysis pipelines built from Bactopia's subworkflows " + "and modules." + ), + }, + { + "key": "developers", + "title": "Developers", + "content_dir": "developers", + "route_base": "developers", + "description": ( + "Developer reference for CLI commands, subworkflows, modules, " + "and AI skills." + ), + }, + { + "key": "impact", + "title": "Impact & Outreach", + "content_dir": "impact", + "route_base": "impact-and-outreach", + "description": ( + "Citations, acknowledgements, presentations, and community " + "contributions." + ), + }, + { + "key": "blog", + "title": "Blog", + "content_dir": "blog", + "route_base": "blog", + "description": "News, tutorials, and updates from the Bactopia project.", + }, +] + +FRONTMATTER_RE = re.compile(r"^---\n(.+?)\n---", re.DOTALL) + + +def parse_frontmatter(file_path): + """Extract YAML frontmatter from a markdown file.""" + text = file_path.read_text(encoding="utf-8") + match = FRONTMATTER_RE.match(text) + if not match: + return None + try: + return yaml.safe_load(match.group(1)) + except yaml.YAMLError: + return None + + +def file_to_path(file_path, content_dir, route_base, frontmatter): + """Convert a file path to its URL path on the site.""" + slug = frontmatter.get("slug") + rel = file_path.relative_to(content_dir) + + # Blog posts always use slug + if route_base == "blog": + if slug: + return f"/blog/{slug}" + stem = rel.parent.name if rel.name.startswith("index") else rel.stem + return f"/blog/{stem}" + + # Docs sections + if slug is not None: + if slug == "/": + return f"/{route_base}" if route_base else "/" + if slug.startswith("/"): + return f"/{route_base}{slug}" if route_base else slug + return f"/{route_base}/{slug}" if route_base else f"/{slug}" + + # Index files without slug + if rel.stem == "index": + subdir = str(rel.parent) + if subdir == ".": + return f"/{route_base}" if route_base else "/" + return f"/{route_base}/{subdir}" if route_base else f"/{subdir}" + + # Regular files + name = str(rel.with_suffix("")) + return f"/{route_base}/{name}" if route_base else f"/{name}" + + +def first_sentence(text): + """Extract the first sentence from a description string.""" + if not text: + return "" + text = text.strip() + # Look for sentence-ending punctuation followed by space or end + match = re.match(r"(.+?[.!?])(?:\s|$)", text, re.DOTALL) + if match: + return match.group(1).strip() + return text + + +def collect_pages(base_dir, section): + """Collect all pages from a content section.""" + content_dir = base_dir / section["content_dir"] + if not content_dir.is_dir(): + return [] + + pages = [] + for file_path in sorted(content_dir.rglob("*")): + if file_path.suffix not in (".md", ".mdx"): + continue + # Skip tag pages generated by Docusaurus + if "tags" in file_path.parts: + continue + + fm = parse_frontmatter(file_path) + if fm is None: + print(f" warning: no frontmatter in {file_path}", file=sys.stderr) + continue + + title = fm.get("title", file_path.stem) + description = fm.get("description", "") + tags = fm.get("tags") or [] + sidebar_position = fm.get("sidebar_position") + date = fm.get("date") + url_path = file_to_path( + file_path, content_dir, section["route_base"], fm + ) + + page = { + "title": title, + "description": description, + "path": url_path, + "tags": tags, + "source_file": str(file_path.relative_to(base_dir)), + } + if sidebar_position is not None: + page["sidebar_position"] = sidebar_position + if date is not None: + page["date"] = str(date) + + pages.append(page) + + return _sort_pages(pages, section["route_base"]) + + +def _sort_pages(pages, route_base): + """Sort pages: index first, then by sidebar_position, then by title.""" + if route_base == "blog": + return sorted(pages, key=lambda p: p.get("date", ""), reverse=True) + + def sort_key(page): + is_index = page["path"].rstrip("/") == f"/{route_base}".rstrip("/") + pos = page.get("sidebar_position") + # Index pages first, then by sidebar_position (missing last), then title + return ( + 0 if is_index else 1, + pos if pos is not None else 99999, + page["title"].lower(), + ) + + return sorted(pages, key=sort_key) + + +def generate_llms_txt(site_url, sections_data): + """Generate llms.txt content following the llmstxt.org standard.""" + lines = [ + f"# {SITE_NAME}", + "", + f"> {SITE_TAGLINE}", + "", + ] + + for section in sections_data: + lines.append(f"## {section['title']}") + lines.append("") + lines.append(section["description"]) + lines.append("") + + for page in section["pages"]: + url = f"{site_url}{page['path']}" + desc = first_sentence(page["description"]) + if desc: + lines.append(f"- [{page['title']}]({url}): {desc}") + else: + lines.append(f"- [{page['title']}]({url})") + + lines.append("") + + return "\n".join(lines) + + +def generate_catalog_json(site_url, sections_data): + """Generate catalog.json structure.""" + total = sum(len(s["pages"]) for s in sections_data) + catalog = { + "site": { + "name": SITE_NAME, + "url": site_url, + "tagline": SITE_TAGLINE, + }, + "generated": datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ"), + "total_pages": total, + "sections": [], + } + + for section in sections_data: + pages_out = [] + for page in section["pages"]: + entry = { + "title": page["title"], + "description": page["description"], + "url": f"{site_url}{page['path']}", + "path": page["path"], + "tags": page["tags"], + "source_file": page["source_file"], + } + if "sidebar_position" in page: + entry["sidebar_position"] = page["sidebar_position"] + if "date" in page: + entry["date"] = page["date"] + pages_out.append(entry) + + catalog["sections"].append( + { + "key": section["key"], + "title": section["title"], + "route_base": f"/{section['route_base']}" if section["route_base"] else "/", + "description": section["description"], + "page_count": len(pages_out), + "pages": pages_out, + } + ) + + return catalog + + +def main(): + parser = argparse.ArgumentParser( + description="Generate llms.txt and catalog.json for Bactopia docs" + ) + parser.add_argument( + "--base-dir", + default=".", + help="Root directory of the docs repo (default: .)", + ) + parser.add_argument( + "--site-url", + default="https://bactopia.github.io", + help="Site base URL (default: https://bactopia.github.io)", + ) + parser.add_argument( + "--output-dir", + default="static", + help="Output directory for generated files (default: static)", + ) + args = parser.parse_args() + + base_dir = Path(args.base_dir).resolve() + output_dir = base_dir / args.output_dir + + sections_data = [] + for section in SECTIONS: + print(f"Collecting {section['title']}...") + pages = collect_pages(base_dir, section) + sections_data.append( + { + "key": section["key"], + "title": section["title"], + "route_base": section["route_base"], + "description": section["description"], + "pages": pages, + } + ) + print(f" {len(pages)} pages") + + site_url = args.site_url.rstrip("/") + + llms_path = output_dir / "llms.txt" + llms_content = generate_llms_txt(site_url, sections_data) + llms_path.write_text(llms_content, encoding="utf-8") + print(f"\nWrote {llms_path}") + + catalog_path = output_dir / "catalog.json" + catalog = generate_catalog_json(site_url, sections_data) + catalog_json = json.dumps(catalog, indent=2, ensure_ascii=False) + "\n" + catalog_path.write_text(catalog_json, encoding="utf-8") + print(f"Wrote {catalog_path}") + + total = sum(len(s["pages"]) for s in sections_data) + print(f"\nTotal: {total} pages across {len(sections_data)} sections") + + +if __name__ == "__main__": + main() diff --git a/bin/generate-modules.py b/bin/generate-modules.py new file mode 100755 index 00000000..f37ee070 --- /dev/null +++ b/bin/generate-modules.py @@ -0,0 +1,173 @@ +#!/usr/bin/env python3 +""" +Generate Docusaurus MDX pages for Bactopia modules from parsed metadata. +""" +import argparse +import json +import sys +from pathlib import Path + +from generator_utils import ( + STATUS_BADGE, escape_mdx, normalize_tags, render_io_table, render_param_table, + render_citations, tags_yaml, + create_jinja_env, load_template, +) + + +def build_module_context(mod, data): + """Build template context for a single module page.""" + scope = mod.get('scope', 'sample') + tags = normalize_tags(mod.get('keywords', []) + [f'{scope}-scope']) + + badge_color = STATUS_BADGE.get(mod['status'], 'secondary') + tool = mod.get('tool', {}) + tool_name = tool.get('name', '') + tool_version = tool.get('version', '') + scope_label = scope + + badges = '' + tool_url = tool.get('url', '') + + desc_parts = [] + if mod['summary']: + desc_parts.append(escape_mdx(mod['summary'])) + if mod['description']: + desc_parts.append('') + desc_parts.append(escape_mdx(mod['description'])) + description = '\n'.join(desc_parts) + + # Notes + notes_parts = [] + for note in mod.get('notes', []): + title = note.get('title', '') + body = note.get('body', '') + if title: + notes_parts.append(f':::note[{title}]') + else: + notes_parts.append(':::note') + if body: + notes_parts.append(escape_mdx(body)) + notes_parts.append(':::') + notes_parts.append('') + notes_section = '\n'.join(notes_parts) + + # Inputs + inputs_section = render_io_table( + mod.get('inputs', []), header='Inputs', + nf_io=mod.get('nf_inputs'), + ) + + # Outputs + outputs_section = render_io_table( + mod.get('outputs', []), header='Outputs', + nf_io=mod.get('nf_outputs'), + ) + + # Parameters + params_parts = [] + params = mod.get('params', {}) + if params: + params_parts.append('## Parameters') + params_parts.append('') + for group_key, group in params.items(): + table = render_param_table(group) + if table: + params_parts.append(table) + params_section = '\n'.join(params_parts) + + # Used By + used_by_parts = [] + sw_list = mod.get('used_by_subworkflows', []) + wf_list = mod.get('used_by_workflows', []) + if sw_list or wf_list: + used_by_parts.append('## Used By') + used_by_parts.append('') + if sw_list: + used_by_parts.append('### Subworkflows') + used_by_parts.append('') + for sw_name in sorted(sw_list): + sw = data['subworkflows'].get(sw_name, {}) + sw_summary = sw.get('summary', '') + used_by_parts.append(f'- [{sw_name}](/developers/subworkflows/{sw_name}) - {sw_summary}') + used_by_parts.append('') + if wf_list: + used_by_parts.append('### Workflows') + used_by_parts.append('') + for wf_name in sorted(wf_list): + wf = data['workflows'].get(wf_name, {}) + wf_summary = wf.get('summary', '') + if wf_name == 'bactopia': + wf_path = '/full-guide' + elif wf.get('type') == 'tool': + wf_path = f'/bactopia-tools/{wf_name}' + else: + wf_path = f'/bactopia-pipelines/{wf_name}' + used_by_parts.append(f'- [{wf_name}]({wf_path}) - {wf_summary}') + used_by_parts.append('') + used_by_section = '\n'.join(used_by_parts) + + # Citations + citations_section = render_citations(mod.get('citations', []), data['citations']) + + return { + 'mod': mod, + 'data': data, + 'tags': tags, + 'tag_base': '/developers', + 'badges': badges, + 'tool_name': tool_name, + 'tool_version': tool_version, + 'tool_url': tool_url, + 'description': description, + 'notes_section': notes_section, + 'inputs_section': inputs_section, + 'outputs_section': outputs_section, + 'params_section': params_section, + 'used_by_section': used_by_section, + 'citations_section': citations_section, + } + + +def main(): + parser = argparse.ArgumentParser(description='Generate module MDX pages from parsed Bactopia metadata') + parser.add_argument('catalog', help='Path to parsed bactopia.json') + parser.add_argument('--output-dir', '-o', default='developers/modules', + help='Output directory for MDX files (default: developers/modules)') + parser.add_argument('--template-dir', '-t', default='templates', + help='Template directory (default: templates)') + args = parser.parse_args() + + catalog_path = Path(args.catalog) + if not catalog_path.exists(): + print(f'Error: {catalog_path} not found. Run parse-bactopia.py first.', file=sys.stderr) + sys.exit(1) + + with open(catalog_path) as f: + data = json.load(f) + + env = create_jinja_env(args.template_dir) + output_dir = Path(args.output_dir) + output_dir.mkdir(parents=True, exist_ok=True) + + modules = data.get('modules', {}) + for key in sorted(modules.keys()): + mod = modules[key] + template = load_template(env, 'module', key) + context = build_module_context(mod, data) + page = template.render(**context) + (output_dir / f'{key}.mdx').write_text(page) + + # Index page + index_template = env.get_template('module_index.j2') + index_page = index_template.render( + total=len(modules), + modules=modules, + sorted_keys=sorted(modules.keys()), + ) + (output_dir / 'index.mdx').write_text(index_page) + + print(f'Generated {len(modules)} module pages + index in {output_dir}/') + + +if __name__ == '__main__': + main() diff --git a/bin/generate-skills.py b/bin/generate-skills.py new file mode 100644 index 00000000..cdb33948 --- /dev/null +++ b/bin/generate-skills.py @@ -0,0 +1,172 @@ +#!/usr/bin/env python3 +"""Extract structured metadata from Bactopia SKILL.md files for docs generation.""" +import argparse +import json +import re +import sys +from pathlib import Path + +import yaml + + +CATEGORY_MAP = { + "add-": "Scaffolding", + "update-": "Maintenance", + "merge-": "Maintenance", + "review-": "Review & Quality", + "run-": "Testing", + "project-": "Project", +} + +CATEGORY_ORDER = [ + "Scaffolding", + "Maintenance", + "Review & Quality", + "Testing", + "Project", +] + + +def categorize(name): + """Assign a category based on the skill name prefix.""" + for prefix, category in CATEGORY_MAP.items(): + if name.startswith(prefix): + return category + return "Other" + + +def first_sentence(text): + """Extract the first sentence from a description string.""" + text = re.sub(r"\s+", " ", text).strip() + match = re.match(r"(.+?\.)\s", text + " ") + return match.group(1) if match else text + + +def extract_summary(body): + """Extract the first paragraph after the top-level heading.""" + lines = body.strip().splitlines() + para_lines = [] + in_para = False + for line in lines: + stripped = line.strip() + if stripped.startswith("#"): + if in_para: + break + continue + if not stripped: + if in_para: + break + continue + in_para = True + para_lines.append(stripped) + return " ".join(para_lines) if para_lines else "" + + +def find_cli_command(skill_dir, body): + """Extract the CLI command name from the wrapper script or SKILL.md body.""" + scripts_dir = skill_dir / "scripts" + if scripts_dir.is_dir(): + for script in sorted(scripts_dir.glob("run-*.sh")): + text = script.read_text() + match = re.search(r"exec.*?(bactopia-[\w-]+)", text) + if match: + return match.group(1) + # Fallback: find the first bactopia-* command referenced in code blocks + for match in re.finditer(r"`(bactopia-[\w-]+)`", body): + cmd = match.group(1) + if cmd not in ("bactopia-path", "bactopia-tool", "bactopia-tools"): + return cmd + return None + + +def parse_skill(skill_dir): + """Parse a single SKILL.md and its wrapper script.""" + skill_md = skill_dir / "SKILL.md" + if not skill_md.exists(): + return None + + content = skill_md.read_text() + + fm_match = re.match(r"^---\n(.+?)\n---\n(.*)$", content, re.DOTALL) + if not fm_match: + print(f"Warning: no frontmatter in {skill_md}", file=sys.stderr) + return None + + frontmatter = yaml.safe_load(fm_match.group(1)) + body = fm_match.group(2) + + name = frontmatter.get("name", skill_dir.name) + description = frontmatter.get("description", "") + cli_command = find_cli_command(skill_dir, body) + + return { + "name": name, + "description": description, + "first_sentence": first_sentence(description), + "summary": extract_summary(body), + "category": categorize(name), + "cli_command": cli_command, + "cli_page": f"/developers/cli/{cli_command}" if cli_command else None, + } + + +def main(): + parser = argparse.ArgumentParser( + description="Extract metadata from Bactopia SKILL.md files" + ) + parser.add_argument( + "skills_dir", + help="Path to the .claude/skills/ directory in the bactopia repo", + ) + parser.add_argument( + "--json", action="store_true", dest="as_json", + help="Output as JSON (default: human-readable summary)", + ) + args = parser.parse_args() + + skills_dir = Path(args.skills_dir) + if not skills_dir.is_dir(): + print(f"Error: {skills_dir} is not a directory.", file=sys.stderr) + sys.exit(1) + + skills = [] + for child in sorted(skills_dir.iterdir()): + if not child.is_dir(): + continue + skill = parse_skill(child) + if skill: + skills.append(skill) + + if not skills: + print("No SKILL.md files found.", file=sys.stderr) + sys.exit(1) + + if args.as_json: + by_category = {} + for cat in CATEGORY_ORDER: + members = [s for s in skills if s["category"] == cat] + if members: + by_category[cat] = members + output = { + "total": len(skills), + "categories": CATEGORY_ORDER, + "by_category": by_category, + "skills": skills, + } + json.dump(output, sys.stdout, indent=2) + print() + else: + print(f"Found {len(skills)} skills:\n") + for cat in CATEGORY_ORDER: + members = [s for s in skills if s["category"] == cat] + if not members: + continue + print(f" {cat}:") + for s in members: + wraps = s["cli_command"] or "unknown" + print(f" {s['name']:30s} wraps {wraps}") + print() + + +if __name__ == "__main__": + main() diff --git a/bin/generate-subworkflows.py b/bin/generate-subworkflows.py new file mode 100755 index 00000000..70c4d12d --- /dev/null +++ b/bin/generate-subworkflows.py @@ -0,0 +1,192 @@ +#!/usr/bin/env python3 +""" +Generate Docusaurus MDX pages for Bactopia subworkflows from parsed metadata. +""" +import argparse +import json +import sys +from pathlib import Path + +from generator_utils import ( + STATUS_BADGE, escape_mdx, normalize_tags, render_io_table, render_citations, tags_yaml, + create_jinja_env, load_template, +) + + +def build_subworkflow_context(sw, data): + """Build template context for a single subworkflow page.""" + scope = sw.get('scope', 'sample') + tags = normalize_tags(sw.get('keywords', []) + [f'{scope}-scope']) + + badges = '' + + desc_parts = [] + if sw['summary']: + desc_parts.append(escape_mdx(sw['summary'])) + if sw['description']: + desc_parts.append('') + desc_parts.append(escape_mdx(sw['description'])) + description = '\n'.join(desc_parts) + + # Take (inputs) + inputs_section = render_io_table( + sw.get('inputs', []), header='Take', + nf_io=sw.get('nf_inputs'), + ) + + # Emit (outputs) + PUBLISHED_NAMES = {'sample_outputs', 'run_outputs'} + outputs = sw.get('outputs', []) + outputs_section = '' + if outputs: + published = [o for o in outputs if o.get('name') in PUBLISHED_NAMES] + downstream = [o for o in outputs if o.get('name') not in PUBLISHED_NAMES] + + lines = ['## Emit', ''] + + def _render_emit_group(entries, lines): + for out in entries: + name = out.get('name', '') + fields = out.get('fields', []) + if name: + lines.append(f'#### `{name}`') + lines.append('') + if fields: + lines.append('| Output | Description |') + lines.append('|--------|-------------|') + for field in fields: + fname = f'`{field["name"]}`' if field['name'] else '' + lines.append(f'| {fname} | {field.get("description", "")} |') + lines.append('') + elif out.get('description'): + lines.append(out['description']) + lines.append('') + elif name: + scope = 'sample' if 'sample' in name else 'run' + lines.append(f'No {scope}-scope outputs.') + lines.append('') + + lines.append('### Published') + lines.append('') + lines.append('The `sample_outputs` and `run_outputs` emissions are aggregates of output files that will be published in the entry workflow.') + lines.append('') + _render_emit_group(published, lines) + + if downstream: + lines.append('### Downstream Inputs') + lines.append('') + lines.append('The following emissions are meant to be used as inputs to downstream subworkflows.') + lines.append('') + _render_emit_group(downstream, lines) + + outputs_section = '\n'.join(lines) + + # Composition + comp_parts = [] + sw_list = sw.get('subworkflows', []) + if sw_list: + comp_parts.append('## Subworkflow Composition') + comp_parts.append('') + comp_parts.append('This subworkflow calls the following subworkflows:') + comp_parts.append('') + for sw_name in sw_list: + sub = data['subworkflows'].get(sw_name, {}) + sub_summary = sub.get('summary', '') + comp_parts.append(f'- [{sw_name}](/developers/subworkflows/{sw_name}) - {sub_summary}') + comp_parts.append('') + + mod_list = sw.get('modules', []) + if mod_list: + comp_parts.append('## Module Composition') + comp_parts.append('') + comp_parts.append('This subworkflow calls the following modules:') + comp_parts.append('') + for mod_name in mod_list: + mod = data['modules'].get(mod_name, {}) + mod_summary = mod.get('summary', '') + comp_parts.append(f'- [{mod_name}](/developers/modules/{mod_name}) - {mod_summary}') + comp_parts.append('') + composition_section = '\n'.join(comp_parts) + + # Used By + used_by_parts = [] + wf_list = sw.get('used_by_workflows', []) + if wf_list: + used_by_parts.append('## Used By') + used_by_parts.append('') + used_by_parts.append('This subworkflow is used by the following workflows:') + used_by_parts.append('') + for wf_name in sorted(wf_list): + wf = data['workflows'].get(wf_name, {}) + wf_summary = wf.get('summary', '') + if wf_name == 'bactopia': + wf_path = '/full-guide' + elif wf.get('type') == 'tool': + wf_path = f'/bactopia-tools/{wf_name}' + else: + wf_path = f'/bactopia-pipelines/{wf_name}' + used_by_parts.append(f'- [{wf_name}]({wf_path}) - {wf_summary}') + used_by_parts.append('') + used_by_section = '\n'.join(used_by_parts) + + # Citations + citations_section = render_citations(sw.get('citations', []), data['citations']) + + return { + 'sw': sw, + 'data': data, + 'tags': tags, + 'tag_base': '/developers', + 'badges': badges, + 'description': description, + 'inputs_section': inputs_section, + 'outputs_section': outputs_section, + 'composition_section': composition_section, + 'used_by_section': used_by_section, + 'citations_section': citations_section, + } + + +def main(): + parser = argparse.ArgumentParser(description='Generate subworkflow MDX pages from parsed Bactopia metadata') + parser.add_argument('catalog', help='Path to parsed bactopia.json') + parser.add_argument('--output-dir', '-o', default='developers/subworkflows', + help='Output directory for MDX files (default: developers/subworkflows)') + parser.add_argument('--template-dir', '-t', default='templates', + help='Template directory (default: templates)') + args = parser.parse_args() + + catalog_path = Path(args.catalog) + if not catalog_path.exists(): + print(f'Error: {catalog_path} not found. Run parse-bactopia.py first.', file=sys.stderr) + sys.exit(1) + + with open(catalog_path) as f: + data = json.load(f) + + env = create_jinja_env(args.template_dir) + output_dir = Path(args.output_dir) + output_dir.mkdir(parents=True, exist_ok=True) + + subworkflows = data.get('subworkflows', {}) + for key in sorted(subworkflows.keys()): + sw = subworkflows[key] + template = load_template(env, 'subworkflow', key) + context = build_subworkflow_context(sw, data) + page = template.render(**context) + (output_dir / f'{key}.mdx').write_text(page) + + # Index page + index_template = env.get_template('subworkflow_index.j2') + index_page = index_template.render( + total=len(subworkflows), + subworkflows=subworkflows, + sorted_keys=sorted(subworkflows.keys()), + ) + (output_dir / 'index.mdx').write_text(index_page) + + print(f'Generated {len(subworkflows)} subworkflow pages + index in {output_dir}/') + + +if __name__ == '__main__': + main() diff --git a/bin/generate-tools-index.py b/bin/generate-tools-index.py new file mode 100644 index 00000000..84936e31 --- /dev/null +++ b/bin/generate-tools-index.py @@ -0,0 +1,101 @@ +#!/usr/bin/env python3 +"""Generate the grouped bactopia-tools/index.mdx from tool-categories.yml.""" +import argparse +import sys +from pathlib import Path + +import yaml + + +def parse_description(mdx_path): + """Extract the description field from MDX frontmatter.""" + in_frontmatter = False + for line in mdx_path.read_text().splitlines(): + if line.strip() == '---': + if in_frontmatter: + break + in_frontmatter = True + continue + if in_frontmatter and line.startswith('description:'): + desc = line.split(':', 1)[1].strip().strip('"').strip("'") + return desc + return '' + + +def main(): + parser = argparse.ArgumentParser(description=__doc__) + parser.add_argument('categories_file', help='Path to tool-categories.yml') + parser.add_argument('--tools-dir', default='bactopia-tools/', + help='Directory containing tool MDX files') + parser.add_argument('--output', default='bactopia-tools/index.mdx', + help='Output index file path') + args = parser.parse_args() + + tools_dir = Path(args.tools_dir) + categories_data = yaml.safe_load(Path(args.categories_file).read_text()) + categories = categories_data['categories'] + + tool_files = { + p.stem for p in tools_dir.glob('*.mdx') + if not p.stem.startswith('index') + } + + mapped_tools = set() + for cat in categories: + for tool in cat['tools']: + mapped_tools.add(tool) + + missing = tool_files - mapped_tools + if missing: + print(f"Error: {len(missing)} tool(s) not in categories mapping: " + f"{', '.join(sorted(missing))}", file=sys.stderr) + sys.exit(1) + + stale = mapped_tools - tool_files + if stale: + print(f"Warning: {len(stale)} tool(s) in mapping but no MDX file found: " + f"{', '.join(sorted(stale))}", file=sys.stderr) + + total_tools = len(tool_files) + num_categories = len(categories) + + lines = [ + '---', + 'title: Bactopia Tools', + 'description: All available Bactopia Tool workflows', + 'sidebar_position: 2', + '---', + '', + '# Bactopia Tools', + '', + f'Bactopia Tools are additional analysis workflows that run specific tools on existing', + f'Bactopia results. There are {total_tools} Bactopia Tools available across ' + f'{num_categories} categories.', + 'You can also [browse by tag](/bactopia-tools/tags).', + '', + ] + + for cat in categories: + lines.append(f'## {cat["name"]}') + lines.append('') + lines.append(cat['description']) + lines.append('') + lines.append('| Workflow | Description |') + lines.append('|----------|-------------|') + + for tool_name in sorted(cat['tools']): + mdx_path = tools_dir / f'{tool_name}.mdx' + if not mdx_path.exists(): + continue + desc = parse_description(mdx_path) + lines.append(f'| [{tool_name}](/bactopia-tools/{tool_name}) | {desc} |') + + lines.append('') + + output = Path(args.output) + output.write_text('\n'.join(lines)) + print(f"Generated {output} ({total_tools} tools, {num_categories} categories)") + + +if __name__ == '__main__': + main() diff --git a/bin/generate-workflows.py b/bin/generate-workflows.py new file mode 100755 index 00000000..ef60b6e8 --- /dev/null +++ b/bin/generate-workflows.py @@ -0,0 +1,209 @@ +#!/usr/bin/env python3 +""" +Generate Docusaurus MDX pages for Bactopia workflows from parsed metadata. +""" +import argparse +import json +import sys +from pathlib import Path + +from generator_utils import ( + STATUS_BADGE, escape_mdx, normalize_tags, render_param_table, + render_citations, render_shared_params, render_output_tree, tags_yaml, + create_jinja_env, load_template, +) + + +def build_workflow_context(wf, data): + """Build template context for a single workflow page.""" + tags = normalize_tags(wf.get('keywords', [])) + wf_type_tag = 'named-workflow' if wf['type'] == 'named' else 'bactopia-tool' + if wf_type_tag not in tags: + tags.append(wf_type_tag) + + badges = '' + + desc_parts = [] + if wf['summary']: + desc_parts.append(escape_mdx(wf['summary'])) + if wf['description']: + desc_parts.append('') + desc_parts.append(escape_mdx(wf['description'])) + description = '\n'.join(desc_parts) + + # Parameters: required first, then tool-specific, then remaining shared + params_parts = ['## Parameters', ''] + + shared_key = wf.get('params', {}).get('shared_schema', '') + shared_params = data.get('shared_params', {}) + schema = shared_params.get(shared_key, {}) + + # Required parameters first (always visible) + required = schema.get('input_parameters', {}) + required_table = render_param_table(required) if required else '' + if required_table: + params_parts.append(required_table) + + # Tool-specific parameters + tool_params = wf.get('params', {}).get('tool_specific', {}) + has_tool_params = False + for group_key, group in tool_params.items(): + table = render_param_table(group) + if table: + params_parts.append(table) + has_tool_params = True + + # Remaining shared parameters (skip input_parameters, already rendered) + shared_block = render_shared_params(shared_params, shared_key, skip_keys={'input_parameters'}) + if shared_block: + params_parts.append(shared_block) + + params_section = '\n'.join(params_parts) if (has_tool_params or shared_block or required_table) else '' + + # Outputs + outputs_parts = [] + output_tree = wf.get('output_tree', []) + published = wf.get('published_outputs', []) + if published or output_tree: + outputs_parts.append('## Outputs') + outputs_parts.append('') + tree_block = render_output_tree(output_tree) + if tree_block: + outputs_parts.append(tree_block) + skip_sections = {'Execution Logs', 'Versions'} + for section in published: + if section['name'] in skip_sections: + continue + outputs_parts.append(f'### {section["name"]}') + outputs_parts.append('') + if section.get('notes'): + for note in section['notes']: + outputs_parts.append(':::note') + outputs_parts.append(note) + outputs_parts.append(':::') + outputs_parts.append('') + if section.get('files'): + outputs_parts.append('| File | Description |') + outputs_parts.append('|------|-------------|') + for f in section['files']: + outputs_parts.append(f'| `{f["pattern"]}` | {f["description"]} |') + outputs_parts.append('') + outputs_section = '\n'.join(outputs_parts) + + # Composition + comp_parts = [] + sw_list = wf.get('uses_subworkflows', []) + if sw_list: + comp_parts.append('## Composition') + comp_parts.append('') + comp_parts.append('This workflow uses the following subworkflows:') + comp_parts.append('') + for sw_name in sorted(sw_list): + sw = data['subworkflows'].get(sw_name, {}) + sw_summary = sw.get('summary', '') + comp_parts.append(f'- [{sw_name}](/developers/subworkflows/{sw_name}) - {sw_summary}') + comp_parts.append('') + composition_section = '\n'.join(comp_parts) + + # Citations + citations_section = render_citations(wf.get('citations', []), data['citations']) + + tag_base = '/bactopia-tools' if wf['type'] == 'tool' else '/bactopia-pipelines' + + display_names = {'cleanyerreads': 'clean-yer-reads'} + display_name = display_names.get(wf['key'], wf['key']) + + if wf['path'] == '.': + nf_run = 'bactopia/bactopia' + else: + nf_run = f"bactopia/bactopia/{wf['path']}main.nf" + + return { + 'wf': wf, + 'data': data, + 'tags': tags, + 'tag_base': tag_base, + 'badges': badges, + 'description': description, + 'display_name': display_name, + 'nf_run': nf_run, + 'params_section': params_section, + 'outputs_section': outputs_section, + 'composition_section': composition_section, + 'citations_section': citations_section, + } + + +def main(): + parser = argparse.ArgumentParser(description='Generate workflow MDX pages from parsed Bactopia metadata') + parser.add_argument('catalog', help='Path to parsed bactopia.json') + parser.add_argument('--tools-dir', default='bactopia-tools', + help='Output directory for Bactopia Tools (default: bactopia-tools)') + parser.add_argument('--pipelines-dir', default='bactopia-pipelines', + help='Output directory for Bactopia Pipelines (default: bactopia-pipelines)') + parser.add_argument('--docs-dir', default='docs', + help='Output directory for main docs (default: docs)') + parser.add_argument('--template-dir', '-t', default='templates', + help='Template directory (default: templates)') + args = parser.parse_args() + + catalog_path = Path(args.catalog) + if not catalog_path.exists(): + print(f'Error: {catalog_path} not found. Run parse-bactopia.py first.', file=sys.stderr) + sys.exit(1) + + with open(catalog_path) as f: + data = json.load(f) + + env = create_jinja_env(args.template_dir) + tools_dir = Path(args.tools_dir) + tools_dir.mkdir(parents=True, exist_ok=True) + pipelines_dir = Path(args.pipelines_dir) + pipelines_dir.mkdir(parents=True, exist_ok=True) + docs_dir = Path(args.docs_dir) + + workflows = data.get('workflows', {}) + + for key in sorted(workflows.keys()): + wf = workflows[key] + template = load_template(env, 'workflow', key) + context = build_workflow_context(wf, data) + page = template.render(**context) + if wf['type'] == 'tool': + (tools_dir / f'{key}.mdx').write_text(page) + elif key == 'bactopia': + page = page.replace( + 'title: bactopia\n', + 'title: Full Guide\nsidebar_label: Full Guide\nsidebar_position: 5\n', + 1, + ) + page = page.replace(' - named-workflow\n', '', 1) + (docs_dir / 'full-guide.mdx').write_text(page) + else: + (pipelines_dir / f'{key}.mdx').write_text(page) + + # Index pages + named = {k: v for k, v in workflows.items() if v['type'] == 'named' and k != 'bactopia'} + tools = {k: v for k, v in workflows.items() if v['type'] == 'tool'} + + named_index_template = env.get_template('named_workflows_index.j2') + named_index_page = named_index_template.render( + total=len(named), + named=named, + named_keys=sorted(named.keys()), + ) + (pipelines_dir / 'index.mdx').write_text(named_index_page) + + tools_index_template = env.get_template('bactopia_tools_index.j2') + tools_index_page = tools_index_template.render( + total=len(tools), + tools=tools, + tools_keys=sorted(tools.keys()), + ) + (tools_dir / 'index.mdx').write_text(tools_index_page) + + print(f'Generated {len(named)} pipeline + {len(tools)} tool workflow pages + indexes') + + +if __name__ == '__main__': + main() diff --git a/bin/generator_utils.py b/bin/generator_utils.py new file mode 100644 index 00000000..96ca2319 --- /dev/null +++ b/bin/generator_utils.py @@ -0,0 +1,350 @@ +"""Shared utilities for Bactopia documentation generators.""" +from pathlib import Path + +from jinja2 import Environment, FileSystemLoader, ChoiceLoader, select_autoescape + + +STATUS_BADGE = { + 'stable': 'success', + 'testing': 'warning', + 'experimental': 'danger', +} + + +def escape_mdx(text): + """Escape characters that break MDX parsing.""" + text = text.replace('<', '<').replace('>', '>') + text = text.replace('{', '{').replace('}', '}') + return text + + +def normalize_tags(keywords): + """Normalize keywords into Docusaurus-compatible tag strings.""" + tags = [] + for kw in keywords: + tag = kw.strip().lower().replace(' ', '-') + if tag and tag not in tags: + tags.append(tag) + return tags + + +def format_default(value): + """Format a parameter default value for display.""" + if value is None: + return '' + if isinstance(value, bool): + return '`true`' if value else '`false`' + if isinstance(value, str): + return f'`{value}`' if value else '' + return f'`{value}`' + + +def render_param_table(group, include_heading=True): + """Render a parameter group as a markdown table.""" + props = group.get('properties', {}) + visible = {k: v for k, v in props.items() if not v.get('hidden', False)} + if not visible: + return '' + + lines = [] + if include_heading: + lines.append(f'### {group["title"]}') + lines.append('') + if group.get('description'): + lines.append(group['description']) + lines.append('') + + lines.append('| Parameter | Type | Default | Description |') + lines.append('|-----------|------|---------|-------------|') + for name, prop in visible.items(): + ptype = prop.get('type', 'string') + default = format_default(prop.get('default')) + desc = escape_mdx(prop.get('description', '')) + enum = prop.get('enum') + if enum: + choices = ', '.join(f'`{e}`' for e in enum) + desc = f'{desc} (choices: {choices})' + lines.append(f'| `--{name}` | {ptype} | {default} | {desc} |') + lines.append('') + return '\n'.join(lines) + + +def _render_record_code_block(record): + """Render a fenced code block for a single NF record.""" + lines = ['```', 'record ('] + field_lines = [] + for field in record['fields']: + field_lines.append(f' {field["name"]}: {field["type"]}') + lines.append(',\n'.join(field_lines)) + lines.append(')') + lines.append('```') + lines.append('') + return lines + + +def _render_standalone_code_block(standalone): + """Render a fenced code block for standalone typed declarations.""" + lines = ['```'] + for sa in standalone: + lines.append(f'{sa["name"]}: {sa["type"]}') + lines.append('```') + lines.append('') + return lines + + +def _render_fields_table(fields): + """Render a fields table, choosing columns based on whether types are present.""" + lines = [] + has_types = any(f.get('type') for f in fields) + if has_types: + lines.append('| Field | Type | Description |') + lines.append('|-------|------|-------------|') + for field in fields: + fname = f'`{field["name"]}`' if field['name'] else '' + ftype = f'`{field["type"]}`' if field.get('type') else '' + fdesc = escape_mdx(field.get('description', '')) + lines.append(f'| {fname} | {ftype} | {fdesc} |') + else: + lines.append('| Field | Description |') + lines.append('|-------|-------------|') + for field in fields: + fname = f'`{field["name"]}`' if field['name'] else '' + fdesc = escape_mdx(field.get('description', '')) + lines.append(f'| {fname} | {fdesc} |') + lines.append('') + return lines + + +def render_io_table(io_list, header='Inputs', nf_io=None): + """Render @input or @output entries as a markdown section.""" + if not io_list: + return '' + + lines = [f'## {header}', ''] + + records = nf_io.get('records', []) if nf_io else [] + standalone = nf_io.get('standalone', []) if nf_io else [] + + record_entries = [e for e in io_list if e.get('fields')] + standalone_entries = [e for e in io_list if not e.get('fields')] + + # For take blocks (standalone only, no records): split by Channel vs others + if standalone and not records: + record_takes = [s for s in standalone if s['type'] == 'Channel'] + other_takes = [s for s in standalone if s['type'] != 'Channel'] + if record_takes: + lines.extend(_render_standalone_code_block(record_takes)) + else: + other_takes = standalone + + # Render record entries: code block then table for each + for i, entry in enumerate(record_entries): + if i < len(records): + lines.extend(_render_record_code_block(records[i])) + lines.extend(_render_fields_table(entry['fields'])) + + # Render standalone entries: code block + merged table + if standalone_entries: + if other_takes: + lines.extend(_render_standalone_code_block(other_takes)) + elif standalone and records: + lines.extend(_render_standalone_code_block(standalone)) + + has_types = any(e.get('type') for e in standalone_entries) + if has_types: + lines.append('| Name | Type | Description |') + lines.append('|------|------|-------------|') + for entry in standalone_entries: + name = entry.get('name', '') + etype = entry.get('type', '') + desc = entry.get('description', '') + lines.append(f'| `{name}` | `{etype}` | {desc} |') + else: + lines.append('| Name | Description |') + lines.append('|------|-------------|') + for entry in standalone_entries: + name = entry.get('name', '') + desc = entry.get('description', '') + lines.append(f'| `{name}` | {desc} |') + lines.append('') + + return '\n'.join(lines) + + +def render_citations(cite_keys, citations_db, include_bactopia=True): + """Render a citations section from citation keys.""" + if not cite_keys: + return '' + + lines = [ + '## Citations', + '', + 'If you use this in your analysis, please cite the following.', + '', + ] + + def _render_one(cite): + name = cite.get('name', '') + link = cite.get('link', '') + cite_text = cite.get('cite', '') + if link: + lines.append(f'- [{name}]({link}) ') + else: + lines.append(f'- **{name}** ') + if cite_text: + lines.append(f' {cite_text}') + lines.append('') + + if include_bactopia and 'bactopia' not in cite_keys: + bactopia_cite = citations_db.get('bactopia', {}) + if bactopia_cite: + _render_one(bactopia_cite) + + for ck in cite_keys: + cite = citations_db.get(ck, {}) + if cite: + _render_one(cite) + else: + lines.append(ck) + lines.append('') + + return '\n'.join(lines) + + +def tags_yaml(tags): + """Render a tags list as YAML for frontmatter.""" + return '\n'.join(f' - {t}' for t in tags) + + +def render_output_tree(raw_paths): + """Render a list of file paths as a decorated tree.""" + if not raw_paths: + return '' + + sample_name = raw_paths[0].split('/')[0] if raw_paths else '' + + # Collapse nf.command.* files into a single glob entry + nf_command_dirs = set() + filtered = [] + for p in raw_paths: + basename = p.rsplit('/', 1)[-1] + if basename.startswith('nf.command.'): + parent = p.rsplit('/', 1)[0] if '/' in p else '' + if parent not in nf_command_dirs: + nf_command_dirs.add(parent) + filtered.append(f'{parent}/nf.command.{{begin,err,log,out,run,sh,trace}}') + continue + filtered.append(p) + + # Identify the bactopia-runs subdirectory name to add + runs_subdir = '' + for p in filtered: + parts = p.split('/') + if len(parts) >= 2 and parts[0] == 'bactopia-runs': + runs_subdir = parts[1] + break + + cleaned = [] + for p in filtered: + if sample_name: + p = p.replace(sample_name, '') + if runs_subdir: + parts = p.split('/') + parts = [f'{part}-' if part == runs_subdir else part for part in parts] + p = '/'.join(parts) + cleaned.append(p) + + tree = {} + for p in cleaned: + parts = p.split('/') + node = tree + for part in parts: + if part not in node: + node[part] = {} + node = node[part] + + def _render_tree(node, prefix=''): + lines = [] + entries = sorted(node.keys()) + for i, name in enumerate(entries): + is_last = (i == len(entries) - 1) + connector = '└── ' if is_last else '├── ' + lines.append(f'{prefix}{connector}{name}') + child_prefix = prefix + (' ' if is_last else '│ ') + lines.extend(_render_tree(node[name], child_prefix)) + return lines + + tree_lines = [''] + root_entries = sorted(tree.keys()) + for i, name in enumerate(root_entries): + is_last = (i == len(root_entries) - 1) + connector = '└── ' if is_last else '├── ' + tree_lines.append(f'{connector}{name}') + child_prefix = ' ' if is_last else '│ ' + tree_lines.extend(_render_tree(tree[name], child_prefix)) + + return ( + '### Expected Output Files\n\n' + + '```\n' + + '\n'.join(tree_lines) + + '\n```\n' + ) + + +def render_shared_params(shared_params, schema_key, skip_keys=None): + """Render shared parameter groups in collapsible details blocks.""" + schema = shared_params.get(schema_key, {}) + generic = shared_params.get('generic', {}) + skip_keys = skip_keys or set() + + sections = [] + + for groups in [schema, generic]: + for group_key, group in groups.items(): + if group_key in skip_keys: + continue + table = render_param_table(group, include_heading=False) + if table: + title = group.get('title', group_key) + sections.append( + f'
\n{title}\n\n' + + table + + '
\n' + ) + + return '\n'.join(sections) + + +def create_jinja_env(template_dir='templates'): + """Create a Jinja2 environment configured for Bactopia doc generation.""" + template_path = Path(template_dir) + env = Environment( + loader=FileSystemLoader(str(template_path)), + keep_trailing_newline=True, + lstrip_blocks=True, + trim_blocks=True, + ) + + env.globals.update({ + 'STATUS_BADGE': STATUS_BADGE, + 'escape_mdx': escape_mdx, + 'normalize_tags': normalize_tags, + 'format_default': format_default, + 'render_param_table': render_param_table, + 'render_io_table': render_io_table, + 'render_citations': render_citations, + 'render_shared_params': render_shared_params, + 'render_output_tree': render_output_tree, + 'tags_yaml': tags_yaml, + }) + + return env + + +def load_template(env, component_type, key): + """Load a custom template if it exists, otherwise fall back to the base.""" + custom_path = f'custom/{component_type}s/{key}.j2' + try: + return env.get_template(custom_path) + except Exception: + return env.get_template(f'{component_type}.j2') diff --git a/bin/parse-bactopia.py b/bin/parse-bactopia.py new file mode 100755 index 00000000..00318c0c --- /dev/null +++ b/bin/parse-bactopia.py @@ -0,0 +1,709 @@ +#!/usr/bin/env python3 +""" +Parse Bactopia v4 metadata (catalog.json, groovydoc, schema.json, citations.yml) +into a unified JSON file for documentation generators. +""" +import argparse +import json +import re +import sys +from datetime import datetime, timezone +from pathlib import Path + +import yaml + + +def parse_groovydoc(text): + """Extract and parse the /** ... */ groovydoc block from a main.nf file.""" + m = re.search(r'/\*\*(.*?)\*/', text, re.DOTALL) + if not m: + return {} + + raw = m.group(1) + lines = [] + for line in raw.split('\n'): + stripped = line.strip() + if stripped.startswith('* '): + lines.append(stripped[2:]) + elif stripped == '*': + lines.append('') + elif stripped.startswith('*'): + lines.append(stripped[1:]) + + result = { + 'summary': '', + 'description': '', + 'status': '', + 'keywords': [], + 'tags': {}, + 'citations': [], + 'notes': [], + 'subworkflows': [], + 'modules': [], + 'inputs': [], + 'outputs': [], + 'sections': [], + } + + summary_lines = [] + body_lines = [] + in_body = False + current_tag = None + current_tag_value = [] + current_section = None + + def flush_tag(): + nonlocal current_tag, current_tag_value, current_section + if current_tag is None: + return + + value = '\n'.join(current_tag_value).strip() + + if current_tag == 'status': + result['status'] = value + + elif current_tag == 'keywords': + result['keywords'] = [k.strip() for k in value.split(',') if k.strip()] + + elif current_tag == 'tags': + for token in value.split(): + if ':' in token: + k, v = token.split(':', 1) + k = k.strip() + v = v.strip() + if k == 'features': + result['tags'][k] = [f.strip() for f in v.split(',') if f.strip()] + else: + result['tags'][k] = v + + elif current_tag == 'citation': + for part in re.split(r'[,\n]', value): + part = part.strip() + if part: + result['citations'].append(part) + + elif current_tag == 'note': + first_line, _, rest = value.partition('\n') + result['notes'].append({ + 'title': first_line.strip(), + 'body': rest.strip(), + }) + + elif current_tag == 'subworkflows': + for part in re.split(r'[,\n]', value): + part = part.strip().split(' as ')[0].strip() + if part: + result['subworkflows'].append(part) + + elif current_tag == 'modules': + for part in re.split(r'[,\n]', value): + part = part.strip().split(' as ')[0].strip() + if part: + result['modules'].append(part) + + elif current_tag == 'input': + input_entry = parse_io_block(value) + result['inputs'].append(input_entry) + + elif current_tag == 'output': + output_entry = parse_io_block(value) + result['outputs'].append(output_entry) + + elif current_tag == 'section': + current_section = {'name': value, 'notes': [], 'files': []} + result['sections'].append(current_section) + + elif current_tag == 'publish': + if current_section is not None: + pattern, _, desc = value.partition(' ') + desc = desc.strip() + while ' ' in desc: + desc = desc.replace(' ', ' ') + current_section['files'].append({ + 'pattern': pattern.strip(), + 'description': desc, + }) + + current_tag = None + current_tag_value = [] + + for line in lines: + tag_match = re.match(r'^@(\w+)\s*(.*)', line) + + if tag_match: + flush_tag() + tag_name = tag_match.group(1) + tag_rest = tag_match.group(2).strip() + + if tag_name == 'note' and current_section is not None: + current_section['notes'].append(tag_rest) + elif tag_name == 'publish': + current_tag = 'publish' + current_tag_value = [tag_rest] + flush_tag() + else: + current_tag = tag_name + current_tag_value = [tag_rest] + in_body = False + elif current_tag is not None: + current_tag_value.append(line) + elif not in_body and line == '': + if summary_lines: + in_body = True + elif in_body: + body_lines.append(line) + else: + summary_lines.append(line) + + flush_tag() + + result['summary'] = ' '.join(summary_lines).strip() + result['description'] = '\n'.join(body_lines).strip() + + return result + + +def parse_io_block(value): + """Parse an @input or @output block into structured data.""" + lines = value.split('\n') + first_line = lines[0].strip().rstrip('?') + if first_line.startswith('record('): + name = first_line + else: + parts = first_line.split(None, 1) + name = parts[0] if parts else first_line + fields = [] + description_lines = [] + if not first_line.startswith('record('): + parts = first_line.split(None, 1) + if len(parts) > 1: + description_lines.append(parts[1]) + for line in lines[1:]: + line = line.strip() + field_match = re.match(r'^-\s*`(\w+\??)`\s*:\s*(.*)', line) + if field_match: + fields.append({ + 'name': field_match.group(1).rstrip('?'), + 'description': field_match.group(2).strip(), + }) + elif line.startswith('- '): + text = line[2:].strip() + fields.append({'name': '', 'description': text}) + elif line: + description_lines.append(line) + description = ' '.join(description_lines).strip() + return {'name': name, 'description': description, 'fields': fields} + + +COMMON_OUTPUT_DESCRIPTIONS = { + 'meta': 'Sample information record', + 'results': 'All output files to be published', + 'logs': 'Optional program specific log files', + 'nf_logs': 'Nextflow-specific log files (e.g. .command.{begin|err|log|out|run|sh|trace})', + 'versions': 'A YAML formatted file with program versions', +} + + +def parse_nf_input_block(text): + """Parse the Nextflow input: block to extract field type information.""" + m = re.search(r'\n\s+input:\s*\n(.*?)\n\s+(?:output|stage):', text, re.DOTALL) + if not m: + return {'records': [], 'standalone': []} + + block = m.group(1) + records = [] + standalone = [] + + for rec_match in re.finditer(r'record\s*\(\s*(.*?)\)', block, re.DOTALL): + rec_body = rec_match.group(1) + fields = [] + for field_match in re.finditer(r'(\w+)\s*:\s*([\w<>?]+)', rec_body): + fields.append({ + 'name': field_match.group(1), + 'type': field_match.group(2), + }) + if fields: + records.append({'fields': fields}) + + record_span_end = 0 + for rec_match in re.finditer(r'record\s*\(.*?\)', block, re.DOTALL): + record_span_end = max(record_span_end, rec_match.end()) + + remaining = block[record_span_end:] + for sa_match in re.finditer( + r'(\w+)\s*:\s*((?:Set<)?(?:Path|String|Integer|Boolean|Record)(?:>)?\??)', remaining + ): + standalone.append({ + 'name': sa_match.group(1), + 'type': sa_match.group(2), + }) + + return {'records': records, 'standalone': standalone} + + +def _derive_output_type(value_expr): + """Derive a type string from a Nextflow output value expression.""" + value_expr = value_expr.strip() + if re.match(r'^\w+$', value_expr) and not value_expr.startswith(('file', 'files')): + return 'Record' + if value_expr.startswith('file('): + if 'optional: true' in value_expr or 'optional:true' in value_expr: + return 'Path?' + return 'Path' + if value_expr.startswith('files('): + if 'optional: true' in value_expr or 'optional:true' in value_expr: + return 'Set' + return 'Set' + if value_expr.startswith('['): + return 'Set' + return 'Path' + + +def parse_nf_output_block(text): + """Parse the Nextflow output: block to extract field type information.""" + m = re.search(r'\n\s+output:\s*\n(.*?)\n\s+(?:script|exec|shell):', text, re.DOTALL) + if not m: + return {'records': []} + + block = m.group(1) + records = [] + + for rec_match in re.finditer(r'record\s*\(\s*(.*?)\n\s*\)', block, re.DOTALL): + rec_body = rec_match.group(1) + fields = [] + clean_lines = [] + in_list = 0 + for line in rec_body.split('\n'): + stripped = line.strip() + if stripped.startswith('//'): + continue + in_list += stripped.count('[') - stripped.count(']') + if in_list > 0 and not re.match(r'^\s*\w+\s*:', line): + continue + clean_lines.append(line) + clean_body = '\n'.join(clean_lines) + + for field_match in re.finditer( + r'(\w+)\s*:\s*(.+?)(?:,\s*$|\s*$)', clean_body, re.MULTILINE + ): + fname = field_match.group(1) + fvalue = field_match.group(2).strip().rstrip(',') + ftype = _derive_output_type(fvalue) + fields.append({'name': fname, 'type': ftype}) + + if fields: + records.append({'fields': fields}) + + return {'records': records} + + +def merge_io_types(groovydoc_inputs, nf_inputs): + """Merge NF-parsed type info into groovydoc @input entries.""" + type_map = {} + for rec in nf_inputs.get('records', []): + for field in rec.get('fields', []): + type_map[field['name']] = field['type'] + for sa in nf_inputs.get('standalone', []): + type_map[sa['name']] = sa['type'] + + result = [] + for entry in groovydoc_inputs: + new_entry = dict(entry) + new_fields = [] + for field in entry.get('fields', []): + new_field = dict(field) + fname = field['name'].rstrip('?') + if fname in type_map: + new_field['type'] = type_map[fname] + new_fields.append(new_field) + new_entry['fields'] = new_fields + # For standalone entries (no fields), attach the type directly + if not new_fields: + entry_name = entry.get('name', '').rstrip('?') + if entry_name in type_map: + new_entry['type'] = type_map[entry_name] + result.append(new_entry) + return result + + +def merge_io_types_output(groovydoc_outputs, nf_outputs): + """Merge NF-parsed type info into groovydoc @output entries, ordered by NF record.""" + nf_records = nf_outputs.get('records', []) + + # Build description lookup from groovydoc fields + desc_map = {} + for entry in groovydoc_outputs: + for field in entry.get('fields', []): + fname = field['name'].rstrip('?') + if field.get('description'): + desc_map[fname] = field['description'] + + result = [] + for i, entry in enumerate(groovydoc_outputs): + new_entry = dict(entry) + + if i < len(nf_records): + # Build fields in NF record order + new_fields = [] + for nf_field in nf_records[i]['fields']: + fname = nf_field['name'] + desc = desc_map.get(fname, COMMON_OUTPUT_DESCRIPTIONS.get(fname, '')) + new_fields.append({ + 'name': fname, + 'type': nf_field['type'], + 'description': desc, + }) + new_entry['fields'] = new_fields + else: + # Fallback: no matching NF record, keep groovydoc fields as-is + new_entry['fields'] = list(entry.get('fields', [])) + + result.append(new_entry) + return result + + +def parse_nf_take_block(text): + """Parse the Nextflow take: block from a subworkflow to extract parameter types.""" + m = re.search(r'\n\s+take:\s*\n(.*?)\n\s+main:', text, re.DOTALL) + if not m: + return {'standalone': []} + + block = m.group(1) + standalone = [] + for line_match in re.finditer(r'(\w+)\s*:\s*([\w<>?]+)', block): + standalone.append({ + 'name': line_match.group(1), + 'type': line_match.group(2), + }) + + return {'standalone': standalone} + + +def merge_take_types(groovydoc_inputs, take_inputs): + """Merge NF take: block types into groovydoc @input entries for subworkflows.""" + take_params = take_inputs.get('standalone', []) + + record_takes = [p for p in take_params if p['type'] == 'Channel'] + standalone_takes = {p['name']: p['type'] for p in take_params if p['type'] != 'Channel'} + + result = [] + rec_idx = 0 + for entry in groovydoc_inputs: + new_entry = dict(entry) + if entry.get('fields'): + if rec_idx < len(record_takes): + new_entry['take_name'] = record_takes[rec_idx]['name'] + new_entry['take_type'] = record_takes[rec_idx]['type'] + rec_idx += 1 + new_entry['fields'] = list(entry['fields']) + else: + entry_name = entry.get('name', '').rstrip('?') + if entry_name in standalone_takes: + new_entry['type'] = standalone_takes[entry_name] + result.append(new_entry) + return result + + +def load_schema(schema_path): + """Load a JSON schema and extract parameter definitions from $defs.""" + if not schema_path.exists(): + return {} + with open(schema_path) as f: + schema = json.load(f) + defs = schema.get('$defs', schema.get('definitions', {})) + params = {} + for group_key, group_val in defs.items(): + if not isinstance(group_val, dict) or 'properties' not in group_val: + continue + params[group_key] = { + 'title': group_val.get('title', group_key), + 'description': group_val.get('description', ''), + 'fa_icon': group_val.get('fa_icon', ''), + 'properties': {}, + } + for prop_key, prop_val in group_val.get('properties', {}).items(): + params[group_key]['properties'][prop_key] = { + 'type': prop_val.get('type', 'string'), + 'default': prop_val.get('default'), + 'description': prop_val.get('description', ''), + 'help': prop_val.get('help', ''), + 'fa_icon': prop_val.get('fa_icon', ''), + 'hidden': prop_val.get('hidden', False), + 'enum': prop_val.get('enum'), + 'pattern': prop_val.get('pattern'), + } + return params + + +def load_citations(citations_path): + """Load citations.yml and flatten all categories into a single lookup dict.""" + if not citations_path.exists(): + return {} + with open(citations_path) as f: + raw = yaml.safe_load(f) + flat = {} + for category_key, category_val in raw.items(): + if isinstance(category_val, dict): + for tool_key, tool_val in category_val.items(): + if isinstance(tool_val, dict): + flat[tool_key] = { + 'name': tool_val.get('name', tool_key), + 'link': tool_val.get('link', ''), + 'description': tool_val.get('description', ''), + 'cite': tool_val.get('cite', '').strip(), + 'category': category_key, + } + return flat + + +def clean_tool_info(tool): + """Clean tool name/version when conda build strings leak in (e.g. name=version build).""" + if not tool: + return tool + tool = dict(tool) + name = tool.get('name', '') + if '=' in name: + parts = name.split('=', 1) + tool['name'] = parts[0] + tool['version'] = parts[1] + return tool + + +def build_source_url(path): + """Construct a GitHub source URL from a relative path.""" + path = path.rstrip('/') + if path == '.': + return 'https://github.com/bactopia/bactopia' + return f'https://github.com/bactopia/bactopia/tree/main/{path}' + + +def parse_output_tree(snap_path): + """Parse a nf-test snapshot file to extract output file paths.""" + if not snap_path.exists(): + return [] + with open(snap_path) as f: + data = json.load(f) + first_key = next(iter(data), None) + if not first_key: + return [] + content = data[first_key].get('content', []) + if len(content) < 2 or not isinstance(content[1], list): + return [] + return content[1] + + +def parse_bactopia(repo_path): + """Parse all Bactopia v4 metadata into a unified data structure.""" + repo = Path(repo_path) + catalog_path = repo / 'catalog.json' + citations_path = repo / 'data' / 'citations.yml' + + with open(catalog_path) as f: + catalog = json.load(f) + + citations = load_citations(citations_path) + + shared_schemas = {} + for schema_name in ['generic', 'bactopia-tools', 'bactopia']: + schema_file = repo / 'conf' / 'schema' / f'{schema_name}.json' + if schema_file.exists(): + shared_schemas[schema_name] = load_schema(schema_file) + + shared_param_keys = set() + for schema_params in shared_schemas.values(): + shared_param_keys.update(schema_params.keys()) + + modules = {} + for key, val in catalog['modules'].items(): + main_nf = repo / val['path'] / 'main.nf' + groovydoc = {} + nf_inputs = {'records': [], 'standalone': []} + nf_outputs = {'records': []} + if main_nf.exists(): + nf_text = main_nf.read_text() + groovydoc = parse_groovydoc(nf_text) + nf_inputs = parse_nf_input_block(nf_text) + nf_outputs = parse_nf_output_block(nf_text) + + inputs_with_types = merge_io_types(groovydoc.get('inputs', []), nf_inputs) + outputs_with_types = merge_io_types_output(groovydoc.get('outputs', []), nf_outputs) + + schema_path = repo / val['path'] / 'schema.json' + params = load_schema(schema_path) + + modules[key] = { + 'key': key, + 'summary': groovydoc.get('summary', '') or val.get('description', ''), + 'description': groovydoc.get('description', ''), + 'path': val['path'], + 'source_url': build_source_url(val['path']), + 'scope': val.get('scope', ''), + 'process_name': val.get('process_name', ''), + 'tool': clean_tool_info(val.get('tool', {})), + 'status': groovydoc.get('status', ''), + 'keywords': groovydoc.get('keywords', []), + 'tags': groovydoc.get('tags', val.get('tags', {})), + 'citations': groovydoc.get('citations', []), + 'notes': groovydoc.get('notes', []), + 'inputs': inputs_with_types, + 'outputs': outputs_with_types, + 'nf_inputs': nf_inputs, + 'nf_outputs': nf_outputs, + 'params': params, + 'used_by_subworkflows': [], + 'used_by_workflows': [], + } + + subworkflows = {} + for key, val in catalog['subworkflows'].items(): + main_nf = repo / val['path'] / 'main.nf' + groovydoc = {} + nf_inputs = {'standalone': []} + if main_nf.exists(): + nf_text = main_nf.read_text() + groovydoc = parse_groovydoc(nf_text) + nf_inputs = parse_nf_take_block(nf_text) + + inputs_with_types = merge_take_types(groovydoc.get('inputs', []), nf_inputs) + + catalog_modules = val.get('calls', {}).get('modules', []) + groovydoc_modules = groovydoc.get('modules', []) + module_list = groovydoc_modules if groovydoc_modules else catalog_modules + + catalog_sws = val.get('calls', {}).get('subworkflows', []) + groovydoc_sws = groovydoc.get('subworkflows', []) + subworkflow_list = groovydoc_sws if groovydoc_sws else catalog_sws + + subworkflows[key] = { + 'key': key, + 'summary': groovydoc.get('summary', '') or val.get('description', ''), + 'description': groovydoc.get('description', ''), + 'path': val['path'], + 'source_url': build_source_url(val['path']), + 'scope': val.get('scope', ''), + 'status': groovydoc.get('status', ''), + 'keywords': groovydoc.get('keywords', []), + 'tags': groovydoc.get('tags', val.get('tags', {})), + 'citations': groovydoc.get('citations', []), + 'modules': module_list, + 'subworkflows': subworkflow_list, + 'inputs': inputs_with_types, + 'outputs': groovydoc.get('outputs', []), + 'nf_inputs': nf_inputs, + 'used_by_workflows': [], + } + + workflows = {} + for key, val in catalog['workflows'].items(): + main_nf = repo / val['path'] / 'main.nf' + groovydoc = {} + if main_nf.exists(): + groovydoc = parse_groovydoc(main_nf.read_text()) + + schema_path = repo / val['path'] / 'nextflow_schema.json' + all_params = load_schema(schema_path) + tool_params = {k: v for k, v in all_params.items() if k not in shared_param_keys} + + catalog_subworkflows = val.get('subworkflows', []) + groovydoc_subworkflows = groovydoc.get('subworkflows', []) + subworkflow_list = groovydoc_subworkflows if groovydoc_subworkflows else catalog_subworkflows + + wf_type = val.get('type', 'tool') + shared_schema_key = 'bactopia' if wf_type == 'named' else 'bactopia-tools' + + snap_path = repo / val['path'] / 'tests' / 'main.nf.test.snap' + output_tree = parse_output_tree(snap_path) + + workflows[key] = { + 'key': key, + 'type': wf_type, + 'summary': groovydoc.get('summary', '') or val.get('description', ''), + 'description': groovydoc.get('description', ''), + 'path': val['path'], + 'source_url': build_source_url(val['path']), + 'ext': val.get('ext', []), + 'status': groovydoc.get('status', ''), + 'keywords': groovydoc.get('keywords', []), + 'tags': groovydoc.get('tags', val.get('tags', {})), + 'citations': groovydoc.get('citations', []), + 'subworkflows': subworkflow_list, + 'inputs': groovydoc.get('inputs', []), + 'published_outputs': groovydoc.get('sections', []), + 'notes': groovydoc.get('notes', []), + 'output_tree': output_tree, + 'params': { + 'tool_specific': tool_params, + 'shared_schema': shared_schema_key, + }, + 'uses_subworkflows': [], + 'uses_modules': [], + } + + # Build reverse-lookup maps + for wf_key, wf in workflows.items(): + seen_modules = set() + for sw_name in wf.get('subworkflows', []): + if sw_name in subworkflows: + subworkflows[sw_name]['used_by_workflows'].append(wf_key) + wf['uses_subworkflows'].append(sw_name) + for mod_name in subworkflows[sw_name].get('modules', []): + if mod_name in modules: + modules[mod_name]['used_by_workflows'].append(wf_key) + seen_modules.add(mod_name) + wf['uses_modules'] = sorted(seen_modules) + + for sw_key, sw in subworkflows.items(): + for mod_name in sw.get('modules', []): + if mod_name in modules: + modules[mod_name]['used_by_subworkflows'].append(sw_key) + + # Deduplicate reverse-lookup lists + for mod in modules.values(): + mod['used_by_subworkflows'] = sorted(set(mod['used_by_subworkflows'])) + mod['used_by_workflows'] = sorted(set(mod['used_by_workflows'])) + for sw in subworkflows.values(): + sw['used_by_workflows'] = sorted(set(sw['used_by_workflows'])) + + return { + 'meta': { + 'bactopia_version': catalog.get('bactopia_version', ''), + 'generated': datetime.now(timezone.utc).isoformat(), + 'bactopia_repo': str(repo.resolve()), + }, + 'citations': citations, + 'shared_params': shared_schemas, + 'modules': modules, + 'subworkflows': subworkflows, + 'workflows': workflows, + } + + +def main(): + parser = argparse.ArgumentParser(description='Parse Bactopia v4 metadata for doc generation') + parser.add_argument('repo', help='Path to the Bactopia v4 repository') + parser.add_argument('--output', '-o', default='data/bactopia.json', + help='Output JSON path (default: data/bactopia.json)') + args = parser.parse_args() + + repo = Path(args.repo) + if not (repo / 'catalog.json').exists(): + print(f'Error: {repo / "catalog.json"} not found', file=sys.stderr) + sys.exit(1) + + result = parse_bactopia(repo) + + output_path = Path(args.output) + output_path.parent.mkdir(parents=True, exist_ok=True) + with open(output_path, 'w') as f: + json.dump(result, f, indent=2) + + n_mod = len(result['modules']) + n_sw = len(result['subworkflows']) + n_wf = len(result['workflows']) + n_cit = len(result['citations']) + print(f'Parsed {n_mod} modules, {n_sw} subworkflows, {n_wf} workflows, {n_cit} citations') + print(f'Output written to {output_path}') + + +if __name__ == '__main__': + main() diff --git a/bin/parse-cli.py b/bin/parse-cli.py new file mode 100644 index 00000000..f200a85f --- /dev/null +++ b/bin/parse-cli.py @@ -0,0 +1,196 @@ +#!/usr/bin/env python3 +"""Parse bactopia-py CLI commands via Click introspection and write cli.json.""" +import argparse +import copy +import importlib +import json +import sys +from datetime import datetime, timezone +from pathlib import Path + +import click +import rich_click.rich_click as rc_globals + +import bactopia + +CONSOLE_SCRIPTS = { + "bactopia-citations": ("bactopia.cli.citations", "user"), + "bactopia-datasets": ("bactopia.cli.datasets", "user"), + "bactopia-download": ("bactopia.cli.download", "user"), + "bactopia-prepare": ("bactopia.cli.prepare", "user"), + "bactopia-search": ("bactopia.cli.search", "user"), + "bactopia-summary": ("bactopia.cli.summary", "user"), + "bactopia-update": ("bactopia.cli.update", "user"), + "bactopia-status": ("bactopia.cli.status", "user"), + "bactopia-sysinfo": ("bactopia.cli.sysinfo", "user"), + "bactopia-workflows": ("bactopia.cli.workflows", "user"), + "bactopia-atb-formatter": ("bactopia.cli.atb.atb_formatter", "user"), + "bactopia-atb-downloader": ("bactopia.cli.atb.atb_downloader", "user"), + "bactopia-merge-schemas": ("bactopia.cli.helpers.merge_schemas", "user"), + "bactopia-pubmlst-setup": ("bactopia.cli.pubmlst.setup", "user"), + "bactopia-prune": ("bactopia.cli.prune", "user"), + "bactopia-pubmlst-build": ("bactopia.cli.pubmlst.build", "user"), + "bactopia-test": ("bactopia.cli.testing", "user"), + "bactopia-lint": ("bactopia.cli.lint", "user"), + "bactopia-catalog": ("bactopia.cli.catalog", "user"), + "bactopia-review-tests": ("bactopia.cli.review", "user"), + "bactopia-docs": ("bactopia.cli.docs", "user"), + "bactopia-scaffold": ("bactopia.cli.scaffold", "user"), + "bactopia-check-fastqs": ("bactopia.cli.pipeline.check_fastqs", "pipeline"), + "bactopia-check-assembly-accession": ("bactopia.cli.pipeline.check_assembly_accession", "pipeline"), + "bactopia-cleanup-coverage": ("bactopia.cli.pipeline.cleanup_coverage", "pipeline"), + "bactopia-mask-consensus": ("bactopia.cli.pipeline.mask_consensus", "pipeline"), + "bactopia-kraken-bracken-summary": ("bactopia.cli.pipeline.kraken_bracken_summary", "pipeline"), + "bactopia-scrubber-summary": ("bactopia.cli.pipeline.scrubber_summary", "pipeline"), + "bactopia-teton-prepare": ("bactopia.cli.pipeline.teton_prepare", "pipeline"), + "bactopia-bracken-to-excel": ("bactopia.cli.pipeline.bracken_to_excel", "pipeline"), +} + +CATEGORIES = { + "user": { + "label": "User & Developer Commands", + "description": "Commands for preparing inputs, querying databases, and developing Bactopia components.", + }, + "pipeline": { + "label": "Pipeline Utility Scripts", + "description": "Internal scripts called by Nextflow modules during pipeline execution.", + }, +} + + +def find_click_command(module): + """Find the Click command object in a module.""" + for attr in vars(module).values(): + if isinstance(attr, click.Command): + return attr + return None + + +def format_type(param_type): + """Format a Click parameter type for display.""" + if hasattr(param_type, "choices"): + return "CHOICE" + name = str(param_type) + if name.startswith("<") or "object at" in name: + type_class = type(param_type).__name__.upper() + return type_class if type_class != "PARAMTYPE" else "STRING" + return name + + +def extract_param(param): + """Extract structured data from a Click parameter.""" + if isinstance(param, click.Argument): + return { + "kind": "argument", + "name": param.name, + "type": format_type(param.type), + "required": param.required, + "nargs": param.nargs, + "human_readable_name": param.human_readable_name, + "help": getattr(param, "help", None) or "", + } + default = param.default + if default is not None and "Sentinel" in str(type(default)): + default = None + + return { + "kind": "option", + "name": param.name, + "opts": list(param.opts) + list(param.secondary_opts or []), + "type": format_type(param.type), + "required": getattr(param, "required", False), + "default": default, + "is_flag": getattr(param, "is_flag", False), + "help": getattr(param, "help", "") or "", + "choices": list(param.type.choices) if hasattr(param.type, "choices") else None, + } + + +def extract_command(cmd_name, cmd, category, option_groups): + """Extract structured data from a Click command.""" + params = [extract_param(p) for p in cmd.params] + arguments = [p for p in params if p["kind"] == "argument"] + options = [p for p in params if p["kind"] == "option"] + + result = { + "name": cmd_name, + "help": cmd.help or "", + "is_group": isinstance(cmd, click.Group), + "category": category, + "arguments": arguments, + "options": options, + "option_groups": option_groups.get(cmd_name, []), + } + + if isinstance(cmd, click.Group): + subcommands = {} + for sub_name, sub_cmd in sorted(cmd.commands.items()): + sub_params = [extract_param(p) for p in sub_cmd.params] + sub_arguments = [p for p in sub_params if p["kind"] == "argument"] + sub_options = [p for p in sub_params if p["kind"] == "option"] + subcommands[sub_name] = { + "name": sub_name, + "help": sub_cmd.help or "", + "arguments": sub_arguments, + "options": sub_options, + "option_groups": option_groups.get(f"{cmd_name} {sub_name}", []), + } + result["subcommands"] = subcommands + + return result + + +def main(): + parser = argparse.ArgumentParser(description="Parse bactopia-py CLI commands into JSON") + parser.add_argument("--output", "-o", default="data/cli.json", help="Output JSON path") + args = parser.parse_args() + + commands = {} + category_commands = {"user": [], "pipeline": []} + errors = [] + + for cmd_name, (module_path, category) in sorted(CONSOLE_SCRIPTS.items()): + try: + rc_globals.OPTION_GROUPS = {} + mod = importlib.import_module(module_path) + option_groups = copy.deepcopy(rc_globals.OPTION_GROUPS) + + cmd = find_click_command(mod) + if cmd is None: + errors.append(f"{cmd_name}: no Click command found in {module_path}") + continue + commands[cmd_name] = extract_command(cmd_name, cmd, category, option_groups) + category_commands[category].append(cmd_name) + except Exception as e: + errors.append(f"{cmd_name}: {e}") + + if errors: + for err in errors: + print(f"Warning: {err}", file=sys.stderr) + + categories = {} + for key, meta in CATEGORIES.items(): + categories[key] = { + **meta, + "commands": sorted(category_commands[key]), + } + + output = { + "meta": { + "version": bactopia.__version__, + "generated": datetime.now(timezone.utc).isoformat(), + }, + "categories": categories, + "commands": commands, + } + + output_path = Path(args.output) + output_path.parent.mkdir(parents=True, exist_ok=True) + with open(output_path, "w") as f: + json.dump(output, f, indent=2, default=str) + + print(f"Parsed {len(commands)} CLI commands -> {output_path}") + + +if __name__ == "__main__": + main() diff --git a/bin/update-citations.py b/bin/update-citations.py new file mode 100644 index 00000000..cf515e94 --- /dev/null +++ b/bin/update-citations.py @@ -0,0 +1,126 @@ +#!/usr/bin/env python3 +"""Fetch papers citing Bactopia from OpenAlex and write data/citations.yml.""" +import argparse +import json +import sys +import urllib.request +import urllib.parse +from datetime import date +from pathlib import Path + +OPENALEX_WORK_ID = 'W3046929726' +MAILTO = 'robbie.petit@gmail.com' +PER_PAGE = 200 +SELECT_FIELDS = 'id,doi,title,authorships,publication_date,primary_location' + + +def fetch_citations(): + """Fetch all citing works via cursor pagination.""" + works = [] + cursor = '*' + + while cursor: + params = urllib.parse.urlencode({ + 'filter': f'cites:{OPENALEX_WORK_ID}', + 'per_page': PER_PAGE, + 'cursor': cursor, + 'mailto': MAILTO, + 'select': SELECT_FIELDS, + }) + url = f'https://api.openalex.org/works?{params}' + + req = urllib.request.Request(url, headers={'Accept': 'application/json'}) + with urllib.request.urlopen(req) as resp: + data = json.loads(resp.read()) + + results = data.get('results', []) + if not results: + break + + works.extend(results) + meta = data.get('meta', {}) + cursor = meta.get('next_cursor') + print(f' Fetched {len(works)} / {meta.get("count", "?")} citations...', file=sys.stderr) + + return works + + +def format_authors(authorships): + """Format OpenAlex authorships into a citation-style author string.""" + names = [] + for a in authorships: + display = a.get('author', {}).get('display_name', '') + if display: + names.append(display) + if not names: + return '' + if len(names) > 10: + return ', '.join(names[:10]) + ', et al.' + return ', '.join(names) + '.' + + +def parse_work(work): + """Extract citation fields from an OpenAlex work object.""" + authors = format_authors(work.get('authorships', [])) + title = (work.get('title') or '').strip() + doi = work.get('doi') or '' + pub_date = work.get('publication_date') or '' + + location = work.get('primary_location') or {} + source = location.get('source') or {} + journal = source.get('display_name') or '' + + return { + 'authors': authors, + 'title': title, + 'url': doi, + 'journal': journal, + 'date': pub_date, + } + + +def write_yaml(citations, output_path): + """Write citations as YAML (hand-formatted to avoid PyYAML dependency here).""" + lines = ['citations:'] + for c in citations: + title = c['title'].replace('"', '\\"') + lines.append(f' - authors: "{c["authors"]}"') + lines.append(f' title: "{title}"') + lines.append(f' url: {c["url"]}') + lines.append(f' journal: "{c["journal"]}"') + lines.append(f' date: {c["date"]}') + lines.append('') + + output_path.parent.mkdir(parents=True, exist_ok=True) + output_path.write_text('\n'.join(lines) + '\n') + + +def main(): + parser = argparse.ArgumentParser(description='Fetch Bactopia citations from OpenAlex') + parser.add_argument('--output', '-o', default='data/citations.yml', + help='Output YAML path (default: data/citations.yml)') + args = parser.parse_args() + + print('Fetching citations from OpenAlex...', file=sys.stderr) + works = fetch_citations() + + citations = [parse_work(w) for w in works] + citations = [c for c in citations if c['title']] + citations.sort(key=lambda c: c['date'], reverse=True) + + seen_titles = set() + deduped = [] + for c in citations: + key = c['title'].lower().strip() + if key not in seen_titles: + seen_titles.add(key) + deduped.append(c) + citations = deduped + + output_path = Path(args.output) + write_yaml(citations, output_path) + print(f'Wrote {len(citations)} citations to {output_path}', file=sys.stderr) + + +if __name__ == '__main__': + main() diff --git a/docs/blog/posts/2024-03-24-allthebateria.md b/blog/2024/03/24-allthebacteria-tutorial/index.md similarity index 55% rename from docs/blog/posts/2024-03-24-allthebateria.md rename to blog/2024/03/24-allthebacteria-tutorial/index.md index ba0f52f6..49e1d130 100644 --- a/docs/blog/posts/2024-03-24-allthebateria.md +++ b/blog/2024/03/24-allthebacteria-tutorial/index.md @@ -1,25 +1,17 @@ --- -authors: - - rpetit3 -categories: - - Community - - Tutorial +title: Using Bactopia with AllTheBacteria Assemblies +authors: [rpetit3] +tags: [community, tutorial] date: 2024-03-24 -draft: false -pin: true -links: - - installation.md -slug: bactopia-allthebacteria-tutorial +slug: allthebacteria-tutorial description: Learn how to use Bactopia to analyze nearly 2,000,000 bacterial assemblies from the AllTheBacteria project. --- -# Using Bactopia with AllTheBacteria Assemblies - [AllTheBacteria](https://github.com/iqbal-lab-org/AllTheBacteria) (ATB) is a collection of nearly 2,000,000 bacterial assemblies. In this post you'll learn how to use Bactopia to -seamlessly analyze these assemblies with the available [Bactopia Tools](../../bactopia-tools/index.md). +seamlessly analyze these assemblies with the available [Bactopia Tools](/bactopia-tools). - + ## AllTheBacteria @@ -27,7 +19,7 @@ seamlessly analyze these assemblies with the available [Bactopia Tools](../../ba has now taken it a step further with [AllTheBacteria](https://www.biorxiv.org/content/10.1101/2024.03.08.584059v1). As someone once tasked with assembling "all the _Staphylococcus aureus_ genomes" (_although, it was only about 700 samples in 2010!_), this is truly an impressive feat, and a valuable community resource! -With the latest assemblies, the collection is now nearly 2,000,000 bacterial assemblies! 🎉 +With the latest assemblies, the collection is now nearly 2,000,000 bacterial assemblies! Similar to their previous methods, the latest version of AllTheBacteria uses [Shovill](https://github.com/tseemann/shovill) for assembly. In addition, each assembly has basic metrics calculated, undergoes taxonomic @@ -38,11 +30,11 @@ please see: - GitHub: [AllTheBacteria](https://github.com/iqbal-lab-org/AllTheBacteria) Since Zamin revealed the latest updates on AllTheBacteria, I've been wondering: _How could Bactopia -users take advantage these assemblies? Especially, through available [Bactopia Tools](../../bactopia-tools/index.md)?_ +users take advantage these assemblies? Especially, through available [Bactopia Tools](/bactopia-tools)?_ ## Why Bactopia Tools? -The really nice thing about Bactopia Tools is they make it super easy to run [60 additional analyses](../../bactopia-tools/index.md) +The really nice thing about Bactopia Tools is they make it super easy to run [60 additional analyses](/bactopia-tools) on your genomes. It's really as simple as adding `--wf ` to your Bactopia command, then Bactopia will then handle the rest for you, including container selection and audit trails. @@ -54,57 +46,65 @@ To give you an idea, there are currently 38 Bactopia Tools that use assemblies a In other words, each of these tools would be easy to run on the 2,000,000 AllTheBacteria assemblies. -??? tip "Expand to see the list of Bactopia Tools" - - Each of the tools listed below accepts a single assembly as input. - - | Tool | Description | - |------|-------------| - | [bakta](../../bactopia-tools/bakta.md) | Rapid annotation of bacterial genomes & plasmids | - | [fastani](../../bactopia-tools/fastani.md) | Fast alignment-free computation of whole-genome Average Nucleotide Identity (ANI) | - | [gtdb](../../bactopia-tools/gtdb.md) | Identify marker genes and assign taxonomic classifications | - | [mashtree](../../bactopia-tools/mashtree.md) | Create a trees using Mash distances | - | [abricate](../../bactopia-tools/abricate.md) | Mass screening of contigs for antimicrobial and virulence genes | - | [abritamr](../../bactopia-tools/abritamr.md) | A NATA accredited tool for reporting the presence of antimicrobial resistance genes | - | [agrvate](../../bactopia-tools/agrvate.md) | Rapid identification of Staphylococcus aureus agr locus type and agr operon variants | - | [amrfinderplus](../../bactopia-tools/amrfinderplus.md) | Identify antimicrobial resistance in genes or proteins | - | [btyper3](../../bactopia-tools/btyper3.md) | Taxonomic classification of Bacillus cereus group isolates | - | [busco](../../bactopia-tools/busco.md) | Assembly completeness based on evolutionarily informed expectations | - | [checkm](../../bactopia-tools/checkm.md) | Assess the assembly quality of your microbial samples | - | [ectyper](../../bactopia-tools/ectyper.md) | In-silico prediction of Escherichia coli serotype | - | [emmtyper](../../bactopia-tools/emmtyper.md) | emm-typing of Streptococcus pyogenes assemblies | - | [gamma](../../bactopia-tools/gamma.md) | Identification, classification, and annotation of translated gene matches | - | [hicap](../../bactopia-tools/hicap.md) | Identify cap locus serotype and structure in your Haemophilus influenzae assemblies | - | [hpsuissero](../../bactopia-tools/hpsuissero.md) | Rapid Haemophilus parasuis Serotyping of assemblies | - | [kleborate](../../bactopia-tools/kleborate.md) | Screen for MLST, sub-species, and other Klebsiella related genes of interest | - | [legsta](../../bactopia-tools/legsta.md) | Typing of Legionella pneumophila assemblies | - | [lissero](../../bactopia-tools/lissero.md) | Serogroup typing prediction for Listeria monocytogenes | - | [mashdist](../../bactopia-tools/mashdist.md) | Calculate Mash distances between sequences | - | [mcroni](../../bactopia-tools/mcroni.md) | Sequence variation in mobilized colistin resistance (mcr-1) genes | - | [meningotype](../../bactopia-tools/meningotype.md) | Serotyping of Neisseria meningitidis | - | [mlst](../../bactopia-tools/mlst.md) | Scan contig files against PubMLST typing schemes | - | [mobsuite](../../bactopia-tools/mobsuite.md) | Reconstruct and annotate plasmids in bacterial assemblies | - | [pasty](../../bactopia-tools/pasty.md) | Serogrouping of Pseudomonas aeruginosa isolates | - | [pbptyper](../../bactopia-tools/pbptyper.md) | Penicillin Binding Protein (PBP) typer for Streptococcus pneumoniae | - | [phispy](../../bactopia-tools/phispy.md) | Predict prophages in bacterial genomes | - | [plasmidfinder](../../bactopia-tools/plasmidfinder.md) | Plasmid identification from assemblies | - | [prokka](../../bactopia-tools/prokka.md) | Whole genome annotation of small genomes (bacterial, archeal, viral) | - | [quast](../../bactopia-tools/quast.md) | Assess the quality of assembled contigs | - | [rgi](../../bactopia-tools/rgi.md) | Predict antibiotic resistance from assemblies | - | [seqsero2](../../bactopia-tools/seqsero2.md) | Salmonella serotype prediction from reads or assemblies | - | [shigeifinder](../../bactopia-tools/shigeifinder.md) | Shigella and EIEC serotyping from assemblies | - | [sistr](../../bactopia-tools/sistr.md) | Serovar prediction of Salmonella assemblies | - | [spatyper](../../bactopia-tools/spatyper.md) | Computational method for finding spa types in Staphylococcus aureus | - | [staphopiasccmec](../../bactopia-tools/staphopiasccmec.md) | Primer based SCCmec typing of Staphylococcus aureus genomes | - | [stecfinder](../../bactopia-tools/stecfinder.md) | Serotyping Shigella toxin producing Escherichia coli genomes | - | [ssuissero](../../bactopia-tools/ssuissero.md) | Rapid Streptococcus suis Serotyping of assemblies | - -!!! failure "Bactopia Tools require samples processed with Bactopia" - - One of the key features of Bactopia Tools, is they utilize Bactopia outputs to rapidly - identify and begin analysis. AllTheBacteria assemblies were not processed by Bactopia, - so they aren't compatible with Bactopia Tools. But, no worries, with a little work we - can make this a possibility! +
+Expand to see the list of Bactopia Tools + +:::tip + +Each of the tools listed below accepts a single assembly as input. + +| Tool | Description | +|------|-------------| +| [bakta](/bactopia-tools/bakta) | Rapid annotation of bacterial genomes & plasmids | +| [fastani](/bactopia-tools/fastani) | Fast alignment-free computation of whole-genome Average Nucleotide Identity (ANI) | +| [gtdb](/bactopia-tools/gtdb) | Identify marker genes and assign taxonomic classifications | +| [mashtree](/bactopia-tools/mashtree) | Create a trees using Mash distances | +| [abricate](/bactopia-tools/abricate) | Mass screening of contigs for antimicrobial and virulence genes | +| [abritamr](/bactopia-tools/abritamr) | A NATA accredited tool for reporting the presence of antimicrobial resistance genes | +| [agrvate](/bactopia-tools/agrvate) | Rapid identification of Staphylococcus aureus agr locus type and agr operon variants | +| [amrfinderplus](/bactopia-tools/amrfinderplus) | Identify antimicrobial resistance in genes or proteins | +| [btyper3](/bactopia-tools/btyper3) | Taxonomic classification of Bacillus cereus group isolates | +| [busco](/bactopia-tools/busco) | Assembly completeness based on evolutionarily informed expectations | +| [checkm](/bactopia-tools/checkm) | Assess the assembly quality of your microbial samples | +| [ectyper](/bactopia-tools/ectyper) | In-silico prediction of Escherichia coli serotype | +| [emmtyper](/bactopia-tools/emmtyper) | emm-typing of Streptococcus pyogenes assemblies | +| [gamma](/bactopia-tools/gamma) | Identification, classification, and annotation of translated gene matches | +| [hicap](/bactopia-tools/hicap) | Identify cap locus serotype and structure in your Haemophilus influenzae assemblies | +| [hpsuissero](/bactopia-tools/hpsuissero) | Rapid Haemophilus parasuis Serotyping of assemblies | +| [kleborate](/bactopia-tools/kleborate) | Screen for MLST, sub-species, and other Klebsiella related genes of interest | +| [legsta](/bactopia-tools/legsta) | Typing of Legionella pneumophila assemblies | +| [lissero](/bactopia-tools/lissero) | Serogroup typing prediction for Listeria monocytogenes | +| [mashdist](/bactopia-tools/mashdist) | Calculate Mash distances between sequences | +| [mcroni](/bactopia-tools/mcroni) | Sequence variation in mobilized colistin resistance (mcr-1) genes | +| [meningotype](/bactopia-tools/meningotype) | Serotyping of Neisseria meningitidis | +| [mlst](/bactopia-tools/mlst) | Scan contig files against PubMLST typing schemes | +| [mobsuite](/bactopia-tools/mobsuite) | Reconstruct and annotate plasmids in bacterial assemblies | +| [pasty](/bactopia-tools/pasty) | Serogrouping of Pseudomonas aeruginosa isolates | +| [pbptyper](/bactopia-tools/pbptyper) | Penicillin Binding Protein (PBP) typer for Streptococcus pneumoniae | +| [phispy](/bactopia-tools/phispy) | Predict prophages in bacterial genomes | +| [plasmidfinder](/bactopia-tools/plasmidfinder) | Plasmid identification from assemblies | +| [prokka](/bactopia-tools/prokka) | Whole genome annotation of small genomes (bacterial, archeal, viral) | +| [quast](/bactopia-tools/quast) | Assess the quality of assembled contigs | +| [rgi](/bactopia-tools/rgi) | Predict antibiotic resistance from assemblies | +| [seqsero2](/bactopia-tools/seqsero2) | Salmonella serotype prediction from reads or assemblies | +| [shigeifinder](/bactopia-tools/shigeifinder) | Shigella and EIEC serotyping from assemblies | +| [sistr](/bactopia-tools/sistr) | Serovar prediction of Salmonella assemblies | +| [spatyper](/bactopia-tools/spatyper) | Computational method for finding spa types in Staphylococcus aureus | +| [stecfinder](/bactopia-tools/stecfinder) | Serotyping Shigella toxin producing Escherichia coli genomes | +| [ssuissero](/bactopia-tools/ssuissero) | Rapid Streptococcus suis Serotyping of assemblies | + +::: + +
+ +:::danger[Bactopia Tools require samples processed with Bactopia] + +One of the key features of Bactopia Tools, is they utilize Bactopia outputs to rapidly +identify and begin analysis. AllTheBacteria assemblies were not processed by Bactopia, +so they aren't compatible with Bactopia Tools. But, no worries, with a little work we +can make this a possibility! + +::: ## `bactopia atb-formatter` @@ -115,7 +115,9 @@ command called [atb-formatter](https://github.com/bactopia/bactopia-py?tab=readm (_AllTheBacteria Formatter_). With `atb-formatter`, the necessary Bactopia output directory structure will be created from a directory of _AllTheBacteria assemblies. -!!! success "AllTheBacteria assemblies can be used with Bactopia Tools!" +:::tip[AllTheBacteria assemblies can be used with Bactopia Tools!] + +::: That's cool and all, but let's actually demonstrate the usage of `atb-formatter` on some _Legionella pneumophila_ assemblies from AllTheBacteria. @@ -125,17 +127,17 @@ _Legionella pneumophila_ assemblies from AllTheBacteria. To demonstrate the usage of `bactopia atb-formatter`, I will use assemblies for _Legionella pneumophila_ from AllTheBacteria and run [legsta](https://github.com/tseemann/legsta), a typing tool for _L. pneumophila_ assemblies, written by [Torsten Seeman](https://www.doherty.edu.au/people/associate-professor-torsten-seemann), -To be specific, I will run legsta from the available [Bactopia Tool](https://bactopia.github.io/latest/bactopia-tools/legsta/). +To be specific, I will run legsta from the available [Bactopia Tool](/bactopia-tools/legsta). ### Getting Setup Before we get started, you'll need to have Bactopia installed. If you haven't done this yet, -please see the [installation instructions](../../installation.md). +please see the [installation instructions](/installation). You will also want to make sure you are using at least version 3.0.1 of Bactopia, as this is the first release to have the `atb-formatter` command. -``` { .bash .no-copy } +```bash bactopia --version bactopia 3.0.1 ``` @@ -145,7 +147,7 @@ bactopia 3.0.1 First I will download the _L. pneumophila_ assemblies from AllTheBacteria, then extract them into a folder called `legionella-assemblies`. Easy enough! -``` { .bash .copy } +```bash mkdir atb-legionella cd atb-legionella @@ -169,24 +171,31 @@ With the assemblies extracted, now I need to create the required Bactopia direct use of Bactopia Tools. For this, I used `bactopia atb-formatter`, which creates a sample folder for each assembly that matches the BioSample accession. -``` { .bash .copy } +```bash # Create the Bactopia directory structure bactopia atb-formatter --path legionella-assemblies --recursive ``` -??? info "A few notes about `bactopia atb-formatter`" +
+A few notes about bactopia atb-formatter + +:::info - Please note the usage of `--recursive` here, this will traverse the `legionella-assemblies` directory - to find all assemblies contained. At this point, the `bactopia` directory structure has been - created for 5,393 assemblies and is ready for use with Bactopia Tools. +Please note the usage of `--recursive` here, this will traverse the `legionella-assemblies` directory +to find all assemblies contained. At this point, the `bactopia` directory structure has been +created for 5,393 assemblies and is ready for use with Bactopia Tools. - Also, by default the assemblies are not copied into the Bactopia directory structure, but - instead symbolic links are created. This is to save disk space, but if you would like to - copy the assemblies, you can use the `--publish-mode` parameter to change this behavior +Also, by default the assemblies are not copied into the Bactopia directory structure, but +instead symbolic links are created. This is to save disk space, but if you would like to +copy the assemblies, you can use the `--publish-mode` parameter to change this behavior + +::: + +
After running the above command, you should see something like the following: -``` { .bash .no-copy } +```bash 2024-03-22 14:30:07:root:INFO - Setting up Bactopia directory structure (use --verbose to see more details) 2024-03-22 14:30:08:root:INFO - Bactopia directory structure created at bactopia 2024-03-22 14:30:08:root:INFO - Total assemblies processed: 5393 @@ -196,27 +205,29 @@ After running the above command, you should see something like the following: Fancy! Now we have all the assemblies sym-linked into a Bactopia directory structure. It's time to let Bactopia Tools shine! To do this, I will run the -[legsta Bactopia Tool](https://bactopia.github.io/latest/bactopia-tools/legsta/) and demonstrate +[legsta Bactopia Tool](/bactopia-tools/legsta) and demonstrate how seamless it is to type 5,393 assemblies. With a simple addition of `--wf legsta` and pointing to the Bactopia directory, `legsta` will be executed on all 5,393 assemblies! It really is that simple! -``` { .bash .copy } +```bash # Run legsta bactopia --wf legsta -profile singularity ``` -!!! tip "Please use Docker or Singularity for these analyses" +:::tip[Please use Docker or Singularity for these analyses] + +I'm a big supporter of Conda, but for reproducibility, it is recommended to use Docker or +Singularity with Bactopia Tools. Conda environments can change depending on when they are +installed, however the containers will always be the same. - I'm a big supporter of Conda, but for reproducibility, it is recommended to use Docker or - Singularity with Bactopia Tools. Conda environments can change depending on when they are - installed, however the containers will always be the same. +::: After some time, the `legsta` tool will complete for all 5,393 assemblies, and you should be met with something like the following: -```{bash} +```bash [5d/d04297] process > BACTOPIATOOLS:LEGSTA:LEGSTA_MODULE (SAMN29911258) [100%] 5393 of 5393 ✔ [71/c63bf7] process > BACTOPIATOOLS:LEGSTA:CSVTK_CONCAT (legsta) [100%] 1 of 1 ✔ [16/833262] process > BACTOPIATOOLS:CUSTOM_DUMPSOFTWAREVERSIONS (1) [100%] 1 of 1 ✔ @@ -260,4 +271,4 @@ has convinced you that Bactopia can make this process much easier. If you have any questions or ideas for additional Bactopia Tools, please feel free to reach out to me! -__🎉 Also! This the first ever blog post for Bactopia! 🎉__ +**Also! This the first ever blog post for Bactopia!** diff --git a/blog/authors.yml b/blog/authors.yml new file mode 100644 index 00000000..1a193344 --- /dev/null +++ b/blog/authors.yml @@ -0,0 +1,6 @@ +rpetit3: + name: Robert A. Petit III + description: The primary developer of Bactopia + avatar: https://avatars.githubusercontent.com/u/5334269 + slug: rpetit3 + url: https://www.robertpetit.com/ diff --git a/cli/bactopia.md b/cli/bactopia.md deleted file mode 100644 index b76d5371..00000000 --- a/cli/bactopia.md +++ /dev/null @@ -1,24 +0,0 @@ ---- -title: bactopia -sidebar_position: 2 ---- - -# bactopia - -The main entry point for running Bactopia workflows. - -## Usage - -```bash -bactopia [options] -``` - -## Options - -| Option | Description | -|--------|-------------| -| `--input` | Path to input FASTQ files | -| `--datasets` | Path to pre-built datasets | -| `--species` | Species name for auto-download | - -*This is a sample page to verify routing. Full CLI docs will be generated in Phase 3.* diff --git a/cli/index.md b/cli/index.md deleted file mode 100644 index 8733c1d7..00000000 --- a/cli/index.md +++ /dev/null @@ -1,6 +0,0 @@ ---- -title: CLI Reference -sidebar_position: 1 ---- - -Command-line reference documentation for all Bactopia CLI commands and options. diff --git a/data/citations.yml b/data/citations.yml new file mode 100644 index 00000000..39be6783 --- /dev/null +++ b/data/citations.yml @@ -0,0 +1,1303 @@ +citations: + - authors: "Robert A. Petit, Chayse M. Rowley, Taylor R. Fearing, Stefaan Verwimp, Rob G. Christensen, Jim A. Mildenberger, Joseph M. Reed, Timothy D. Read." + title: "camlhmp: a simple framework for building reproducible microbial genome-based typing tools" + url: https://doi.org/10.1128/mra.00201-26 + journal: "Microbiology Resource Announcements" + date: 2026-04-20 + + - authors: "Noémie Vingadassalon, Jacques-Antoine Hennekinne, Yacine Nia, Sven Maurischat, Maria Borowiak, Brian Byrne, Lionel Kenneth Dygico, L. M. Ciupescu, Hugo Guedes, Gonçalo Almeida, et al." + title: "Impact of wet- and dry-lab workflows on genome assembly, typing and enterotoxin gene detection in whole-genome sequencing of foodborne Staphylococcus aureus" + url: https://doi.org/10.1016/j.ijfoodmicro.2026.111761 + journal: "International Journal of Food Microbiology" + date: 2026-04-03 + + - authors: "Laura Mondéjar, Victoria Ballén, Yaiza Gabasa, Laura Castellsagués, Anna Pinar-Méndez, Carles Vilaró, Belén Galofré, Aida González-Díaz, Sara Martí, Sergi Canyelles i Sanz, et al." + title: "Characterizing Aeromonas spp. as a Potential Sentinel Organism for Antimicrobial Resistance Dissemination in Wastewater and Drinking Water Treatment Systems: A Case Study in the Barcelona Metropolitan Area, Spain" + url: https://doi.org/10.3390/antibiotics15030301 + journal: "Antibiotics" + date: 2026-03-17 + + - authors: "Carla López-Causapé, Matias Bonet, Biel Taltavull, Paola Medina-Retiga, Miquel Àngel Sastre-Femenía, Sara Cortés-Lara, María A. Gomis-Font, Fernando Gómez-Romano, Antonio Oliver." + title: "Pa REx: an open-source pipeline for the automated analysis of Pseudomonas aeruginosa resistomes from whole-genome sequences" + url: https://doi.org/10.1128/aac.01326-25 + journal: "Antimicrobial Agents and Chemotherapy" + date: 2026-03-16 + + - authors: "Gian Carlo González-Carballo, Christopher Mairena-Acuña, Cristian Pérez-Corrales, Javier Alfaro-Camacho, Cristina García-Marín, César Rodríguez." + title: "Novel ST557 Streptococcus equi subsp. zooepidemicus lineage associated with a human case of septic arthritis" + url: https://doi.org/10.1016/j.ijidoh.2026.100117 + journal: "IJID One Health" + date: 2026-03-16 + + - authors: "Guillem López de Egea, Aida González-Díaz, Virginia Aragón, Óscar Cabezón, Gérard Guédon, D. Ortiz Berbel, Irene Cadenas-Jiménez, Johan Espunyes, Marta Planellas, M. Ángeles Domínguez, et al." + title: "Macrolide resistance determinants and their associations in streptococci from selected livestock and wildlife species from Catalonia, Northeast Spain" + url: https://doi.org/10.1128/spectrum.02567-25 + journal: "Microbiology Spectrum" + date: 2026-03-16 + + - authors: "Víctor Fernández-Juárez, Francisco Salvà-Serra, G. Seguí, Alberto J. Martín-Rodríguez." + title: "Life in sediments fosters ‘sexual’ speciation in Shewanella baltica" + url: https://doi.org/10.64898/2026.03.14.705985 + journal: "bioRxiv (Cold Spring Harbor Laboratory)" + date: 2026-03-14 + + - authors: "mohamed eltorki, Oluwatimilehin O. Ajayi, Jina Seok, Jianling Xie, Francesco A. Rizzuti, Byron M. Berenger, Phillip I. Tarr, Andrew T. Pavia, K. Snedeker, Silviu Grisaru, et al." + title: "Shiga Toxin–Producing Escherichia coli Outbreak in Canadian Daycare Centers" + url: https://doi.org/10.1001/jamanetworkopen.2026.1278 + journal: "JAMA Network Open" + date: 2026-03-10 + + - authors: "Covadonga Pérez-García, Joaquin Llorente, Maria Elena Aguirre Alustuey, Mirella Llamosí, Ruth Gil, Gabriel Laghlali, Farah El-Ayache, Vivian Yan, Michael Schotsaert, Jorge del Diego, et al." + title: "Outburst of serotype 4 IPD after COVID-19 is driven by ST15063/GPSC162 lineage associated with high-risk behaviors and greater virulence linked to influenza H3N2 virus coinfection and cigarette smoke" + url: https://doi.org/10.64898/2026.02.27.26346872 + journal: "medRxiv" + date: 2026-03-04 + + - authors: "Leslie M. Huggins, Rachel B. Sidebottom, William W. Johnson, Kurt Schwalm, Karissa Culbreath, Jesse Young, Meghan Brett, Darrell L. Dinwiddie, Daryl Domman." + title: "Genomic analysis of Klebsiella aerogenes circulating in New Mexico" + url: https://doi.org/10.1099/mgen.0.001650 + journal: "Microbial Genomics" + date: 2026-02-24 + + - authors: "Megan A Phillips, Robert A. Petit, Daniel B. Weissman, Timothy Read." + title: "Transition of Staphylococcus aureus tetracycline resistance plasmid pT181 from independent multicopy replicon to predominantly integrated chromosomal element over 65 years" + url: https://doi.org/10.7554/elife.109393.1 + journal: "eLife" + date: 2026-02-19 + + - authors: "Sebastian Bruno Ulrich Jordi, Isabel Baertschi, Jiaqi Li, Nadia Fasel, Benjamin Misselwitz, Bahtiyar Yilmaz." + title: "StrainCascade: An automated, modular workflow for high-throughput long-read bacterial genome reconstruction and characterization" + url: https://doi.org/10.64898/2026.02.04.698786 + journal: "bioRxiv (Cold Spring Harbor Laboratory)" + date: 2026-02-06 + + - authors: "Aida González-Díaz, M. Bessa Pinto, Irene Cadenas-Jiménez, Sara Lia Duarte, Carmen Ardanuy, M. Manuela Ribeiro, Sara Martí, Paula Bajanca-Lavado." + title: "First identification and molecular characterization of CTX-M-15 extended-spectrum β-lactamase and OXA-9 β-lactamase in Haemophilus influenzae in the Iberian Peninsula" + url: https://doi.org/10.1128/aac.01649-25 + journal: "Antimicrobial Agents and Chemotherapy" + date: 2026-01-30 + + - authors: "H.A. Alshammari, Ahmed Albarrag, Ihab M. Moussa, Jaffar A. Al-Tawfiq, Sumayh A. Aldakeel, Ali M. Somily." + title: "Molecular analysis of carbapenemase-producing Pseudomonas aeruginosa strains in a tertiary care hospital in Riyadh" + url: https://doi.org/10.1016/j.jiph.2026.103159 + journal: "Journal of Infection and Public Health" + date: 2026-01-21 + + - authors: "Belson Kutambe, Priyanka D. Patel, Kenneth Chizani, Niza Silungwe, Christopher Kukacha, Megan E. Carey, James Meiring, Matthew B. Laurens, Melita A. Gordon, Philip Ashton." + title: "Genomic Analysis of Salmonella Typhi from a Typhoid Conjugate Vaccine Trial" + url: https://doi.org/10.64898/2026.01.06.25342561 + journal: "medRxiv" + date: 2026-01-11 + + - authors: "Dalal M. Alkuraythi, Manal Muhammed Alkhulaifi, Dina A. Altwiley, Mohammed Alarwi, Mohammed I. Mujallad, Mohammad Alshomrani, Takashi Gojobori, Sulaiman M. Alajel." + title: "Genomic and epidemiological insights into the emergence and dominance of MRSA clones in Riyadh’s healthcare facilities" + url: https://doi.org/10.1038/s41598-025-34001-7 + journal: "Scientific Reports" + date: 2026-01-06 + + - authors: "Ismam Ahmed Protic, Md. Nasir Uddin, Andrew Gorzalski, Rashidul Islam, David Alvarez-Ponce." + title: "Comparative genomic analyses shed light on the introduction routes of rice-pathogenic Burkholderia gladioli strains into Bangladesh" + url: https://doi.org/10.1186/s12864-025-12430-y + journal: "BMC Genomics" + date: 2026-01-03 + + - authors: "Gerald Mboowa, Ivan Sserwadda, Stephen Kanyerezi." + title: "rMAP 2.0: a modular, reproducible, and scalable WDL–Cromwell–Docker workflow for genomic analysis of ESKAPEE pathogens" + url: https://doi.org/10.1093/bioadv/vbag046 + journal: "Bioinformatics Advances" + date: 2026-01-01 + + - authors: "Virudhagiri Elamurugan, Subbiah Thamotharan, Vigneshwar Ramakrishnan." + title: "Pa VarDB: a database and web platform for missense variant analysis in Pseudomonas aeruginosa using an end-to-end BVbase pipeline" + url: https://doi.org/10.1093/database/baag014 + journal: "Database" + date: 2026-01-01 + + - authors: "Shuo Jiang, Peng Gao, Ping Shen, Suying Hou, Chenlu Xiao, Richard Yi Tsun Kao, Ho Pak-Leung, Yonghong xiao, Huiluo Cao." + title: "Global emergence and evolution of Staphylococcus aureus clonal complex 59" + url: https://doi.org/10.1128/msystems.01492-25 + journal: "mSystems" + date: 2025-12-31 + + - authors: "Chantisa Keeratipusana, Weerawat Phuklia, Vanheuang Phommadeechack, Janjira Thaipadungpanit, Vilada Chansamouth, Koukeo Phommasone, Sayaphet Rattanavong, Catrin E. Moore, Matthew T. Robinson, Allen L. Richards, et al." + title: "Complete genomes of Rickettsia typhi reveal a clonal population" + url: https://doi.org/10.1371/journal.pntd.0013828 + journal: "PLoS neglected tropical diseases" + date: 2025-12-29 + + - authors: "Nicholas J. Hathaway, Kathryn Murie, Maxwell Murphy, Alfred Simkin, Jorge Amaya-Romero, Alfred Hubbard, Jessica Briggs, Andrés Aranda-Díaz, Angela M. Early, Amy Wesolowski, et al." + title: "The Portable Microhaplotype Object and Tools" + url: https://doi.org/10.64898/2025.12.10.693568 + journal: "bioRxiv (Cold Spring Harbor Laboratory)" + date: 2025-12-12 + + - authors: "Massimiliano S. Tagliamonte, Abhinav Sharma, Alberto Riva, Monika Moir, Marco Salemi, Cheryl Baxter, Túlio de Oliveira, Carla Mavian, Eduan Wilkinson." + title: "CholeraSeq: a comprehensive genomic pipeline for cholera surveillance and near real-time outbreak investigation" + url: https://doi.org/10.1093/bioinformatics/btaf665 + journal: "Bioinformatics" + date: 2025-12-11 + + - authors: "Kaitlin E. Creamer, Gabriel Castro‐Falcón, Ebru İnce, Victoria Vasilat, David Vereau Gorbitz, Alyssa M. Demko, Paul R. Jensen." + title: "Taxonomic and biosynthetic diversity of the marine actinomycete Salinispora across spatial scales" + url: https://doi.org/10.1128/aem.02171-25 + journal: "Applied and Environmental Microbiology" + date: 2025-12-08 + + - authors: "Cheikh Tidiane Houmenou, Cheikh Sokhna, Florence Fenollar, Oleg Mediannikov." + title: "Advancements and challenges in bioinformatics tools for microbial genomics in the last decade: Toward the smart integration of bioinformatics tools, digital resources, and emerging technologies for the analysis of complex biological data" + url: https://doi.org/10.1016/j.meegid.2025.105859 + journal: "Infection Genetics and Evolution" + date: 2025-11-24 + + - authors: "Casey Vieni, Tanner E. Rothstein, Stephen L. Johnson, Audrey N. Schuetz, Andrew P. Norgan, Robin Patel." + title: "Whole genome sequence analysis of Helicobacter pylori isolates reveals incomplete characterization of antimicrobial resistance mechanisms" + url: https://doi.org/10.1016/j.isci.2025.114077 + journal: "iScience" + date: 2025-11-20 + + - authors: "Ismam Ahmed Protic, Mohammed Nasir Uddin, Andrew Gorzalski, Md. Rashidul Islam, David Alvarez‐Ponce." + title: "Comparative genomic analyses shed light on the introduction routes of rice-pathogenic Burkholderia gladioli strains into Bangladesh" + url: https://doi.org/10.1101/2025.11.17.688954 + journal: "bioRxiv (Cold Spring Harbor Laboratory)" + date: 2025-11-18 + + - authors: "Alison Yee-Ting Lam, Chun-Hei Lau, Wing-Yin Tam, Chloe Toi-Mei Chan, Tsun-Ming Lok, Lorna Kwai Ping Suen, Lam-Kwong Lee, Elaine Yin-Ying Yeung, Tsz-Kei Lam, Wai-Kam Cheung, et al." + title: "Targeted probe capture metagenomics-enabled surveillance of multidrug-resistant organisms and antimicrobial resistance genes in post-handwashing areas of public washrooms" + url: https://doi.org/10.1186/s40793-025-00806-2 + journal: "Environmental Microbiome" + date: 2025-11-17 + + - authors: "Jonathan Strysko, Tefelo Thela, Andries Feder, Janet Thubuka, Tichaona Machiya, Jack Mkubwa, Kagiso Mochankana, Celda Tiroyakgosi, Kgomotso Kgomanyane, Tlhalefo Dudu Ntereke, et al." + title: "Carbapenem-resistant Acinetobacter baumannii at a hospital in Botswana: detecting a protracted outbreak using whole genome sequencing" + url: https://doi.org/10.1128/spectrum.01768-25 + journal: "Microbiology Spectrum" + date: 2025-11-17 + + - authors: "Clayton W. Hall, Nicholas Waglechner, E C Choi, Patryk Aftanas, Kevin Katz, Christie Vermeiren, Finlay Maguire, Robert Kozak, Xena X. Li." + title: "Evaluation of Phenotypic and Genotypic Susceptibility Testing Methods for Newer β-lactam/β-lactamase Inhibitor Combinations in Multidrug Resistant Pseudomonas aeruginosa" + url: https://doi.org/10.1093/infdis/jiaf585 + journal: "The Journal of Infectious Diseases" + date: 2025-11-14 + + - authors: "Mohammed Alarawi, Musaad A. Altammami, Mohammed H. Abutarboush, Maxat Kulmanov, Dalal M. Alkuraithy, Şenay Kafkas, Robert Radley, Marwa Abdelhakim, Hind Aldakhil, Reema A. Bawazeer, et al." + title: "Genomic diversity and antimicrobial resistance of Staphylococcus aureus in Saudi Arabia: a nationwide study using whole-genome sequencing" + url: https://doi.org/10.1099/mgen.0.001540 + journal: "Microbial Genomics" + date: 2025-11-12 + + - authors: "Mingkwan Yingkajorn, Thunchanok Yaikhan, Worawut Duangsi-Ngoen, Chollachai Klaysubun, Thitaporn Dechathai, Sarunyou Chusri, Kamonnut Singkhamanan, Rattanaruji Pomwised, Monwadee Wonglapsuwan, Komwit Surachat." + title: "Phylogenomics and Antimicrobial Resistance of Clinical Bacteroides Isolates from a Tertiary Hospital in Southern Thailand" + url: https://doi.org/10.3390/antibiotics14111143 + journal: "Antibiotics" + date: 2025-11-11 + + - authors: "Nattarika Chaichana, Jirasa Boonsan, Sirikan Suwannasin, Kamonnut Singkhamanan, Monwadee Wonglapsuwan, Rattanaruji Pomwised, Thunchanok Yaikhan, Chollachai Klaysubun, Siriwimon Konglue, Rusneeta Chema, et al." + title: "Genomic insights and functional characterization of Lactiplantibacillus pentosus CLP10 isolated from the traditional fermented Thai stinky beans" + url: https://doi.org/10.1016/j.jafr.2025.102509 + journal: "Journal of Agriculture and Food Research" + date: 2025-11-06 + + - authors: "Idowu B. Olawoye, David Alexander, Jennifer L. Guthrie." + title: "Population structure of Mycobacterium avium subsp. hominissuis provides new insights into genomic evolution" + url: https://doi.org/10.1099/mgen.0.001543 + journal: "Microbial Genomics" + date: 2025-11-04 + + - authors: "Michael B. Hall, Chenxi Zhou, Lachlan Coin." + title: "Genome size estimation from long read overlaps" + url: https://doi.org/10.1093/bioinformatics/btaf593 + journal: "Bioinformatics" + date: 2025-11-01 + + - authors: "Zachary Foster, Martha A. Sudermann, Camilo H. Parada-Rojas, Logan Blair, Fernanda Iruegas Bocardo, Upasana Dhakal, Ricardo I. Alcalá-Briseño, Hung Phan, Tori R. Schummer, Alexandra J. Weisberg, et al." + title: "PathogenSurveillance : an automated pipeline for population genomic analyses and pathogen identification" + url: https://doi.org/10.1101/2025.10.31.685798 + journal: "bioRxiv (Cold Spring Harbor Laboratory)" + date: 2025-10-31 + + - authors: "Camryn M. Bonn-Dunbar, Idowu B. Olawoye, Akanksha Thukral, Jennifer L. Guthrie, Martin J. McGavin." + title: "Widespread emergence of Staphylococcus aureus with variant FarR regulators and enhanced resistance to antimicrobial fatty acids within clonal complex CC5, CC8, and CC97 strains from human and bovine hosts" + url: https://doi.org/10.1128/spectrum.02278-25 + journal: "Microbiology Spectrum" + date: 2025-10-31 + + - authors: "Gillian A.M. Tarr, William Finical, Joshua Rounds, Anna Panek, Kirk Smith." + title: "The conundrum of Shiga toxin-producing Escherichia coli O157:H7 persistence: Evidence for locally persistent lineages" + url: https://doi.org/10.1101/2025.09.29.679259 + journal: "bioRxiv (Cold Spring Harbor Laboratory)" + date: 2025-10-01 + + - authors: "Emanuel Becerra Soto, Adam James Oliver, Marcos H. de Moraes." + title: "Pandoomain, a scalable pipeline for genomic and protein domain context analysis, reveals widespread PT-TG domain architectural diversity and novel polymorphic toxins" + url: https://doi.org/10.1101/2025.10.01.679605 + journal: "bioRxiv (Cold Spring Harbor Laboratory)" + date: 2025-10-01 + + - authors: "Brittany N. Ross, Emma Evans, Frances L. Diggle, Paul Briaud, Marvin Whiteley." + title: "The role of colony morphotype in shaping gene essentiality in Mycobacteroides abscessus" + url: https://doi.org/10.1073/pnas.2500719122 + journal: "Proceedings of the National Academy of Sciences" + date: 2025-09-30 + + - authors: "Donovan H. Parks, Pierre-Alain Chaumeil, Aaron J. Mussig, Christian Rinke, Maria Chuvochina, Philip Hugenholtz." + title: "GTDB release 10: a complete and systematic taxonomy for 715 230 bacterial and 17 245 archaeal genomes" + url: https://doi.org/10.1093/nar/gkaf1040 + journal: "Nucleic Acids Research" + date: 2025-09-26 + + - authors: "Daniele Smedile, Elena Lavinia Diaconu, Matteo Grelloni, Barbara Middei, Virginia Carfora, Antonio Battisti, Patricia Alba, Alessia Franco." + title: "Enteroflow: Automated Pipeline for In Silico Characterization of Enterococcus faecium/faecalis Isolates from Short Reads" + url: https://doi.org/10.3390/ijms26199441 + journal: "International Journal of Molecular Sciences" + date: 2025-09-26 + + - authors: "Kara K. Tsang, Iana Amke, David M. Aanensen, Jabir Abdulahi, Alexander M. Aiken, Michael A. Bachman, Stephen Baker, Katherine Barry, Gherard Batisti Biffignandi, Emília Maria Medeiros de Andrade Belitardo, et al." + title: "Ciprofloxacin resistance in Klebsiella pneumoniae : phenotype prediction from genotype and global distribution of resistance determinants" + url: https://doi.org/10.1101/2025.09.24.678318 + journal: "bioRxiv (Cold Spring Harbor Laboratory)" + date: 2025-09-25 + + - authors: "Harry C. H. Hung, Narender Kumar, Victoria Dyster, Corin Yeats, Benjamin J. Metcalf, Yuan Li, Paulina A. Hawkins, Lesley McGee, Stephen D. Bentley, Stephanie W. Lo." + title: "GPS Pipeline: portable, scalable genomic pipeline for Streptococcus pneumoniae surveillance from Global Pneumococcal Sequencing Project" + url: https://doi.org/10.1038/s41467-025-64018-5 + journal: "Nature Communications" + date: 2025-09-24 + + - authors: "Megan Phillips, Robert A. Petit, Daniel B. Weissman, Timothy D. Read." + title: "Transition of Staphylococcus aureus tetracycline resistance plasmid pT181 from independent multicopy replicon to predominantly integrated chromosomal element over 65 years" + url: https://doi.org/10.1101/2025.09.14.675889 + journal: "bioRxiv (Cold Spring Harbor Laboratory)" + date: 2025-09-15 + + - authors: "Michelle Su, Kim L. Hoang, McKenna J. Penley, Michelle H. Davis, Jennifer D. Gresham, Levi T. Morran, Timothy D. Read." + title: "Host and antibiotic jointly select for greater virulence in Staphylococcus aureus" + url: https://doi.org/10.7554/elife.107936 + journal: "" + date: 2025-09-09 + + - authors: "Qiaoxin Zhang, Junle Ren, Siyao Wu, Yadi Tan, Wenliang Wang, Feng Cai, Liqian Zhao, Zhiwei Zhu." + title: "Plasmid‐Free CRISPR/Cpf1 Genome Editing With In Vivo T7 RNA Polymerase‐Transcribed CRISPR RNA From Short Double‐Stranded DNA" + url: https://doi.org/10.1002/bit.70062 + journal: "Biotechnology and Bioengineering" + date: 2025-09-09 + + - authors: "Adesola Olalekan, Sébastien Boutin, Christopher M. Watson, Luiza H. Galarion, Solayide A. Adesida, Bamidele Iwalokun, Olayiwola Popoola, Seraphine Nkie Esemu, Richard A. Adegbola, Dennis Nurjadi." + title: "Molecular landscape of methicillin-resistant Staphylococcus aureus strains in clinical infections from hospitals in Lagos, Nigeria" + url: https://doi.org/10.1093/jacamr/dlaf161 + journal: "JAC-Antimicrobial Resistance" + date: 2025-09-03 + + - authors: "Victoria Ballén, Laura Mondéjar, Yaiza Gabasa, Laura Castellsagués, Manuel Alcalde‐Rico, Anna Pinar-Méndez, Carles Vilaró, Belén Galofré, Sara M. Soto." + title: "Integrated metagenomic, culture-based, and whole genome sequencing analyses of antimicrobial resistance in wastewater and drinking water treatment plants in Barcelona, Spain" + url: https://doi.org/10.1016/j.ijheh.2025.114664 + journal: "International Journal of Hygiene and Environmental Health" + date: 2025-09-01 + + - authors: "Francesca Sivori, Massimo Francalancia, Mauro Truglio, Ilaria Cavallo, Carmelina Pronesti, Giorgia Fabrizio, Ilaria Celesti, Andrea Cazzani, Lorenzo Furzi, Fulvia Pimpinelli, et al." + title: "Meropenem/vaborbactam activity against carbapenem-resistant Klebsiella pneumoniae from catheter-related bloodstream infections" + url: https://doi.org/10.3389/fcimb.2025.1616353 + journal: "Frontiers in Cellular and Infection Microbiology" + date: 2025-07-31 + + - authors: "Margaret Free, Nicole Soper, James C. Slaughter, Andries Feder, Colleen Bianco, Ahmed M. Moustafa, Paul J. Planet, C. Buddy Creech, Isaac Thomsen." + title: "Evolution of strain diversity and virulence factor repertoire in pediatric Staphylococcus aureus isolates" + url: https://doi.org/10.1371/journal.pone.0326353 + journal: "PLoS ONE" + date: 2025-07-31 + + - authors: "Emily A. Felton, Mary-Elizabeth Jobson, Nathanial Torres, Rachel M Washburn, Ariana M. Virgillio, Joshua Alvior, Eleonora Cella, Amorce Lima, Deanna Becker, Suzane Silbert, et al." + title: "Emergence of ST3390: A Novel Apigmented MRSA Clone From the CC5 Lineage" + url: https://doi.org/10.1093/infdis/jiaf410 + journal: "The Journal of Infectious Diseases" + date: 2025-07-31 + + - authors: "Karyn Havas, Roy Edler, Laura Ruesch, Marlee Braun, Peter Ferm, Noelle Noyes, Laura B. Goodman, H.M. Scott, Joel Nerem, Taylor Spronk, et al." + title: "Assessment of a program for monitoring antimicrobial purchase and resistance in Escherichia coli and Salmonella enterica on pig farms in the Midwestern United States from May 2020 through October 2023" + url: https://doi.org/10.3389/fvets.2025.1586008 + journal: "Frontiers in Veterinary Science" + date: 2025-07-25 + + - authors: "Ana Rafaela Kruemmel, Jessica L. Halpin, Victoria Foltz, Janet K. Dykes, Carolina Lúquez." + title: "Detection of a streptogramin A O-acetyltransferase gene ( vatD ) in the chromosome of Clostridium botulinum isolated from infants in the United States" + url: https://doi.org/10.1128/aem.00090-25 + journal: "Applied and Environmental Microbiology" + date: 2025-07-22 + + - authors: "Brooke M. Talbot, Natasia F Jacko, Katrina S. Hofstetter, Tara Alahakoon, Kévin Bouiller, Timothy D. Read, Michael David." + title: "Methicillin-Resistant Staphylococcus aureus Bacteremia Relapses Show Diverse Genomic Profiles but Convergence in Bacteremia-Associated Genes" + url: https://doi.org/10.1093/infdis/jiaf352 + journal: "The Journal of Infectious Diseases" + date: 2025-07-22 + + - authors: "Thunchanok Yaikhan, Mingkwan Yingkajorn, Worawut Duangsi-Ngoen, Ei Phway Thant, Nattarika Chaichana, Sirikan Suwannasin, Kamonnut Singkhamanan, Saranyou Churi, Komwit Surachat." + title: "Genomic characterization of a clinical Enterocloster aldenensis strain: First report in Thailand" + url: https://doi.org/10.1016/j.meegid.2025.105800 + journal: "Infection Genetics and Evolution" + date: 2025-07-16 + + - authors: "Thomas D. Stanton, Marit Andrea Klokkhammer Hetland, Iren H. Löhr, Kathryn E. Holt, Kelly L. Wyres." + title: "Fast and accurate in silico antigen typing with Kaptive 3" + url: https://doi.org/10.1099/mgen.0.001428 + journal: "Microbial Genomics" + date: 2025-06-24 + + - authors: "Martin Larralde, Georg Zeller, Laura M. Carroll." + title: "PyOrthoANI, PyFastANI, and Pyskani: a suite of Python libraries for computation of average nucleotide identity" + url: https://doi.org/10.1093/nargab/lqaf095 + journal: "NAR Genomics and Bioinformatics" + date: 2025-06-21 + + - authors: "Nattarika Chaichana, Sirikan Suwannasin, Jirasa Boonsan, Thunchanok Yaikhan, Chollachai Klaysubun, Kamonnut Singkhamanan, Monwadee Wonglapsuwan, Rattanaruji Pomwised, Siriwimon Konglue, Rusneeta Chema, et al." + title: "Genomic Characterization and Safety Evaluation of Enterococcus lactis RB10 Isolated from Goat Feces" + url: https://doi.org/10.3390/antibiotics14060612 + journal: "Antibiotics" + date: 2025-06-16 + + - authors: "Ahmed Yousef Alhejaili, Ge Zhou, Heba Halawa, Jiayi Huang, Omniya Fallatah, Raneem Hirayban, Sara Iftikhar, Abrar AlAsmari, Mathew Milner, Manuel Banzhaf, et al." + title: "Methicillin-resistant Staphylococcus aureus in Saudi Arabia: genomic evidence of recent clonal expansion and plasmid-driven resistance dissemination" + url: https://doi.org/10.3389/fmicb.2025.1602985 + journal: "Frontiers in Microbiology" + date: 2025-06-13 + + - authors: "Josiah Bennett, Sarah Lohsen, Elizabeth Y. Kim, Pranavi Ravichandran, William K. Blake, Kimberly Workowski, Sarah W. Satola, Matthew H. Collins, Danielle Barrios Steed." + title: "A Haemophilus influenzae -associated mycotic aneurysm months after treated bacteremia" + url: https://doi.org/10.1128/asmcr.00027-25 + journal: "ASM Case Reports" + date: 2025-06-12 + + - authors: "Chantisa Keeratipusana, Weerawat Phuklia, Vanheuang Phommadeechack, Janjira Thaipadungpanit, Vilada Chansamouth, Koukeo Phommasone, Sayaphet Rattanavong, Catrin E. Moore, Matthew T. Robinson, Allen L. Richards, et al." + title: "Complete genomes of Rickettsia typhi reveal a clonal population" + url: https://doi.org/10.1101/2025.05.25.654783 + journal: "bioRxiv (Cold Spring Harbor Laboratory)" + date: 2025-05-27 + + - authors: "Sarah Shrum Davis, Paris S. Hamm, Karen Edge, Tim Hanosh, Jessica P. Houston, Anastacia Griego-Fisher, F. Lugo, Nicholas Wenzel, D’Eldra Malone, Carol Bradford, et al." + title: "Multidrug-resistant Shigella flexneri outbreak affecting humans and non-human primates in New Mexico, USA" + url: https://doi.org/10.1038/s41467-025-59766-3 + journal: "Nature Communications" + date: 2025-05-20 + + - authors: "Helena M. B. Seth-Smith, Ashley M. Rooney, Adrian Egli." + title: "Strain Typing (Bacterial, Viral, Fungal, and Mycobacterial)" + url: https://doi.org/10.1002/9781683674597.ch4 + journal: "" + date: 2025-05-19 + + - authors: "Agnetor Mutheo, Carren Bosire, Victor Tunje Jeza, George Githinji, Sammy Wambua." + title: "Metagenomic exploration of antimicrobial resistance genes in marine bacteria ecosystem" + url: https://doi.org/10.12688/wellcomeopenres.24091.1 + journal: "Wellcome Open Research" + date: 2025-05-19 + + - authors: "Xiaobao Xie, Rong Li, Qiang Yu, Qiang Yu, Xue Zhang, Dongbo Chen, Yuchen Li, Feng Xue, Qinghua Yu, Qinghua Yu." + title: "The high utilization of human milk oligosaccharides with glycoside hydrolase family exsH gene in Akkermansia species" + url: https://doi.org/10.1016/j.carpta.2025.100845 + journal: "Carbohydrate Polymer Technologies and Applications" + date: 2025-05-14 + + - authors: "Silvia García-Cobos, Natalia Seco Alberca, Blanca Bravo, Verónica Casquero-García, Eva Ramírez de Arellano, Cristina Calvo, Guillermo Ruíz-Carrascoso, Iker Falces‐Romero, Nieves Larrosa, Belen Viñado-Pérez, et al." + title: "Genomic analysis of community-associated methicillin-resistant Staphylococcus aureus (CA-MRSA) causing infections in children—a Spanish multicenter study" + url: https://doi.org/10.3389/fmicb.2025.1534840 + journal: "Frontiers in Microbiology" + date: 2025-05-09 + + - authors: "Simon Goldie, Laurie C. Lau, Huw Jones, Philip G. Harries, Andrew F. Walls, Rami J. Salib." + title: "Identification of Novel Staphylococcus aureus Core and Accessory Virulence Patterns in Chronic Rhinosinusitis" + url: https://doi.org/10.3390/ijms26083711 + journal: "International Journal of Molecular Sciences" + date: 2025-04-14 + + - authors: "João Vitor Wagner Ordine, E Silva, Lívia S. Zaramela." + title: "Global cross-genome comparison of carbohydrate transporters in Staphylococcus aureus with distinct antimicrobial susceptibility profiles" + url: https://doi.org/10.1016/j.nexres.2025.100284 + journal: "Next research." + date: 2025-04-01 + + - authors: "Aaron Asmus, K. M. Heimer, Kristin Davis, Peter Ferm, K. E. Belk, Randall S. Singer, Timothy J. Johnson, Noelle Noyes." + title: "Temporality and Genetic Relatedness of Salmonella in a Pork Processing Facility" + url: https://doi.org/10.1016/j.jfp.2025.100500 + journal: "Journal of Food Protection" + date: 2025-03-28 + + - authors: "Brooke M. Talbot, Natasia F Jacko, Katrina S. Hofstetter, Tara Alahakoon, Kévin Bouiller, Timothy D. Read, Michael David." + title: "Genomic investigation of MRSA bacteremia relapse reveals diverse genomic profiles but convergence in bacteremia-associated genes" + url: https://doi.org/10.1101/2025.03.24.25324140 + journal: "medRxiv" + date: 2025-03-28 + + - authors: "Alba Frias‐De‐Diego, Manuel Jara, Cristina Lanzas." + title: "Influence of Sequencing Technology on Pangenome-Level Analysis and Detection of Antimicrobial Resistance Genes in ESKAPE Pathogens" + url: https://doi.org/10.1093/ofid/ofaf183 + journal: "Open Forum Infectious Diseases" + date: 2025-03-26 + + - authors: "Brianna J. Bixler, Charlotte J. Royer, Robert A. Petit, Abraham Moller, Samantha Sefton, Stepy Thomas, Amy Tunali, Lauren F. Collins, Monica M. Farley, Sarah W. Satola, et al." + title: "Comparative genomic analysis of emerging non-typeable Haemophilus influenzae (NTHi) causing emerging septic arthritis in Atlanta" + url: https://doi.org/10.7717/peerj.19081 + journal: "PeerJ" + date: 2025-03-21 + + - authors: "Emily A. Felton, Mary-Elizabeth Jobson, Ariana M. Virgillio, Joshua Alvior, Eleonora Cella, Amorce Lima, Deanna Becker, Suzane Silbert, Taj Azarian, Kami Kim, et al." + title: "Emergence of ST3390: A non-pigmented HA-MRSA clone with enhanced virulence" + url: https://doi.org/10.1101/2025.03.18.643911 + journal: "bioRxiv (Cold Spring Harbor Laboratory)" + date: 2025-03-18 + + - authors: "Carter N. Abbott, Aditi Dhillon, Sushma Timalsina, Elise Furr, Patrick Vélicitat, Adam Belley, Navaneeth Narayanan, Keith S. Kaye, David S. Weiss." + title: "The association between undetected heteroresistance and antibiotic treatment failure in complicated urinary tract infection" + url: https://doi.org/10.1101/2025.03.11.25323422 + journal: "medRxiv" + date: 2025-03-13 + + - authors: "Kat Steinke, Karina Gravgaard Thomsen, Silje Vermedal Hoegh, Sanne Løkkegaard Larsen, Karina Kubel Vilhelmsen, Thøger Gorm Jensen, Marianne Nielsine Skov, Thomas Vognbjerg Sydenham." + title: "RSYD-BASIC: a bioinformatic pipeline for routine sequence analysis and data processing of bacterial isolates for clinical microbiology" + url: https://doi.org/10.1099/acmi.0.000646.v6 + journal: "Access Microbiology" + date: 2025-03-01 + + - authors: "Daichi Morita, Teruo Kuroda." + title: "Recent Antimicrobial Resistance Situation and Mechanisms of Resistance to Key Antimicrobials in Enterotoxigenic <i>Escherichia coli</i>" + url: https://doi.org/10.1248/bpb.b24-00649 + journal: "Biological and Pharmaceutical Bulletin" + date: 2025-02-28 + + - authors: "Sirikan Suwannasin, Kamonnut Singkhamanan, Rattanaruji Pomwised, Nattarika Chaichana, Monwadee Wonglapsuwan, Thunchanok Yaikhan, Komwit Surachat." + title: "Phenotypic and genomic analysis of Enterococcus avium MC09 pathogenicity isolated from Scylla spp. (mud crab) in a Thai market" + url: https://doi.org/10.1016/j.micpath.2025.107415 + journal: "Microbial Pathogenesis" + date: 2025-02-26 + + - authors: "Joseph J. Zeppa, Ellen G. Avery, Patryk Aftanas, Erin Choi, Simone Uleckas, Prachi Patel, Nicholas Waglechner, Hubert Jimenez, Christie Vermeiren, Kevin Katz, et al." + title: "Comparison of pharyngeal and invasive isolates of Streptococcus pyogenes by whole-genome sequencing in Toronto, Canada" + url: https://doi.org/10.1128/spectrum.02141-24 + journal: "Microbiology Spectrum" + date: 2025-02-13 + + - authors: "Thomas D. Stanton, Marit Andrea Klokkhammer Hetland, Iren H. Löhr, Kathryn E. Holt, Kelly L. Wyres." + title: "Fast and Accurate in silico Antigen Typing with Kaptive 3" + url: https://doi.org/10.1101/2025.02.05.636613 + journal: "bioRxiv (Cold Spring Harbor Laboratory)" + date: 2025-02-08 + + - authors: "Ahmed Yousef Alhejaili, Ge Zhou, Heba Halawa, Jiayi Huang, Omniya Fallatah, Raneem Hirayban, Sara Iftikhar, Abrar AlAsmari, Mathew Milner, Manuel Banzhaf, et al." + title: "Methicillin-resistant Staphylococcus aureus in Saudi Arabia: genomic evidence of recent clonal expansion and plasmid-driven resistance dissemination" + url: https://doi.org/10.1101/2025.01.31.25321315 + journal: "medRxiv" + date: 2025-02-03 + + - authors: "Emanuele Nicitra, Morena Terrana, Dafne Bongiorno, Saveria Dodaro, Francesca Greco, Sonia Greco, Nadia Marascio, Maria Vittoria Mauro, Marta Pantanella, Grete Francesca Privitera, et al." + title: "Circulation of a Unique Klebsiella pneumoniae Clone, ST147 NDM-1/OXA-48, in Two Diverse Hospitals in Calabria (Italy)" + url: https://doi.org/10.3390/antibiotics14020128 + journal: "Antibiotics" + date: 2025-01-26 + + - authors: "Christine Marie George, Alves Namunesha, Kelly Endres, Willy Felicien, Presence Sanvura, Jean‐Claude Bisimwa, Jamie Perin, Justin Bengehya, Ghislain Maheshe, Cirhuza Cikomola, et al." + title: "Epidemiologic and Genomic Surveillance of Vibrio cholerae and Effectiveness of Single-Dose Oral Cholera Vaccine, Democratic Republic of the Congo" + url: https://doi.org/10.3201/eid3102.241777 + journal: "Emerging infectious diseases" + date: 2025-01-17 + + - authors: "Gillian A.M. Tarr, Linda Chui, Kim Stanford, Emmanuel W. Bumunang, Rahat Zaheer, Vincent Li, Stephen B. Freedman, Chad Laing, Tim A. McAllister." + title: "Persistent cross-species transmission systems dominate Shiga toxin-producing Escherichia coli O157:H7 epidemiology in a high incidence region: A genomic epidemiology study" + url: https://doi.org/10.7554/elife.97643.3 + journal: "eLife" + date: 2025-01-17 + + - authors: "Timo Saratto, Kerkko Visuri, Jonatan Lehtinen, Irene Ortega-Sanz, Jacob L. Steenwyk, Samuel Sihvonen." + title: "Solu: a cloud platform for real-time genomic pathogen surveillance" + url: https://doi.org/10.1186/s12859-024-06005-z + journal: "BMC Bioinformatics" + date: 2025-01-13 + + - authors: "Guillem López de Egea, Aída González-Díaz, Randall J. Olsen, Gérard Guédon, Dàmaris Berbel, Inmaculada Grau, Jordi Cámara, Lucía Saiz-Escobedo, Sara Calvo-Silveria, Irene Cadenas-Jiménez, et al." + title: "Emergence of invasive Streptococcus dysgalactiae subsp. equisimilis in Spain (2012-2022): genomic insights and clinical correlations" + url: https://doi.org/10.1016/j.ijid.2025.107778 + journal: "International Journal of Infectious Diseases" + date: 2025-01-11 + + - authors: "Olusola Olagoke, Ammar Aziz, Lucile H Zhu, Timothy D. Read, Deborah Dean." + title: "Whole-genome automated assembly pipeline for Chlamydia trachomatis strains from reference, in vitro and clinical samples using the integrated CtGAP pipeline" + url: https://doi.org/10.1093/nargab/lqae187 + journal: "NAR Genomics and Bioinformatics" + date: 2025-01-07 + + - authors: "Gaetano Maugeri, Maddalena Calvo, Dafne Bongiorno, Dalida Bivona, Giuseppe Migliorisi, Grete Francesca Privitera, Guido Scalia, Stefania Stefani." + title: "Sequencing Analysis of Invasive Carbapenem-Resistant Klebsiella pneumoniae Isolates Secondary to Gastrointestinal Colonization" + url: https://doi.org/10.3390/microorganisms13010089 + journal: "Microorganisms" + date: 2025-01-05 + + - authors: "Christian Zirbes, Andries Feder, Anthony J. Pamatmat, Alyssa R. Bartels, Nicholas J. Pitcher, Alexis L. Rozen, M. Teresi, J. Krogh, Margaret Regan, Erin A. Arnold, et al." + title: "Genetic Concordance of Staphylococcus aureus From Oropharyngeal and Sputum Cultures in People With Cystic Fibrosis" + url: https://doi.org/10.1002/ppul.27475 + journal: "Pediatric Pulmonology" + date: 2025-01-01 + + - authors: "Nattarika Chaichana, Jirasa Boonsan, Sirikan Suwannasin, Kamonnut Singkhamanan, Monwadee Wonglapsuwan, Rattanaruji Pomwised, Thunchanok Yaikhan, Chollachai Klaysubun, Siriwimon Konglue, Rusneeta Chema, et al." + title: "Integrative Genomic and Phenotypic Profiling of Lactiplantibacillus pentosus CLP10: A Probiotic Candidate from Traditional Fermented Thai Stinky Beans" + url: https://doi.org/10.2139/ssrn.5432733 + journal: "SSRN Electronic Journal" + date: 2025-01-01 + + - authors: "Derek Kong Lam, Sree Vadlamudi, Ria Poluru, Ashley Fang, Chujing Zheng, C.H. Lee, Anna Zhang, Yujie Men, Linda Shi." + title: "Automation and Comparative Evaluation of the Bactopia Pipeline for Environmental Tracking of Antibiotic Resistance Genes Using Genomic and Visualization Frameworks" + url: https://doi.org/10.11159/jbeb.2025.016 + journal: "Journal of Biomedical Engineering and Biosciences" + date: 2025-01-01 + + - authors: "Liviu-Iulian Rotaru, Marius Surleac." + title: "PeGAS: a versatile bioinformatics pipeline for antimicrobial resistance, virulence and pangenome analysis" + url: https://doi.org/10.1093/bioadv/vbaf165 + journal: "Bioinformatics Advances" + date: 2024-12-26 + + - authors: "Grete Francesca Privitera, Adriana Antonella Cannata, Floriana Campanile, Salvatore Alaimo, Dafne Bongiorno, Alfredo Pulvirenti." + title: "BacExplorer: an integrated platform for de novo bacterial genome annotation" + url: https://doi.org/10.1093/bioadv/vbaf281 + journal: "Bioinformatics Advances" + date: 2024-12-26 + + - authors: "Mohamed M. H. Abdelbary, Maximilian Hatting, Andrea Dahlhausen, Alexandra Bott, Georg Conrads." + title: "Insights into Within-Host Evolution and Dynamics of Oral and Intestinal Streptococci Unveil Niche Adaptation" + url: https://doi.org/10.3390/ijms252413507 + journal: "International Journal of Molecular Sciences" + date: 2024-12-17 + + - authors: "Christine Marie George, Alves Namunesha, Kelly Endres, Willy Felicien, Presence Sanvura, Jean‐Claude Bisimwa, Jamie Perin, Justin Bengehya, Ghislain Maheshe, Cirhuza Cikomola, et al." + title: "Effectiveness of a single dose of oral cholera vaccine: findings from epidemiological and genomic surveillance of Vibrio Cholerae in the Democratic Republic of the Congo (PICHA7 Program)" + url: https://doi.org/10.1101/2024.12.16.24318874 + journal: "medRxiv" + date: 2024-12-16 + + - authors: "Daichi Morita, Asish K. Mukhopadhyay, Goutam Chowdhury, Fumito Maruyama, Miyuki Kanda, Yuki Yamamoto, Hidetoshi Tahara, Piyali Mukherjee, Mainak Bardhan, Takanori Kumagai, et al." + title: "Genomic epidemiology and genetic characteristics of clinical Campylobacter species cocirculating in West Bengal, India, 2019, using whole genome analysis" + url: https://doi.org/10.1128/aac.01108-24 + journal: "Antimicrobial Agents and Chemotherapy" + date: 2024-12-04 + + - authors: "Tu‐Hsuan Chang, Hung-Jen Tang, Chi‐Chung Chen, Chi-Chung Chen, Chih-Jung Chen, Chih-Jung Chen." + title: "Clinical characteristics and genomic changes of recurrent Methicillin‐Resistant Staphylococcus aureus bacteremia" + url: https://doi.org/10.1016/j.jmii.2024.11.008 + journal: "Journal of Microbiology Immunology and Infection" + date: 2024-11-30 + + - authors: "Harry C. H. Hung, Narender Kumar, Victoria Dyster, Corin Yeats, Benjamin J. Metcalf, Yuan Li, Paulina A. Hawkins, Lesley McGee, Stephen D. Bentley, Stephanie W. Lo." + title: "A Portable and Scalable Genomic Analysis Pipeline for Streptococcus pneumoniae Surveillance: GPS Pipeline" + url: https://doi.org/10.1101/2024.11.27.625679 + journal: "bioRxiv (Cold Spring Harbor Laboratory)" + date: 2024-11-29 + + - authors: "Amy Campbell, Chris Hauton, Ronny van Aerle, Jaime Martínez-Urtaza." + title: "Eco-Evolutionary Drivers of Vibrio parahaemolyticus Sequence Type 3 Expansion: Retrospective Machine Learning Approach" + url: https://doi.org/10.2196/62747 + journal: "JMIR Bioinformatics and Biotechnology" + date: 2024-11-28 + + - authors: "Aunchalee Thanwisai, Ricardo A. R. Machado, Aashaq Hussain Bhat, Sacha J. Pidot, Sarunporn Tandhavanant, Chanakan Subkrasae, Wipanee Meesil, Jiranun Ardpairin, Supawan Pansri, Apichat Vitta." + title: "Xenorhabdus bharatensis sp. nov., Xenorhabdus entomophaga sp. nov., Xenorhabdus siamensis sp. nov., and Xenorhabdus thailandensis sp. nov. Isolated from Steinernema Entomopathogenic Nematodes" + url: https://doi.org/10.1007/s00284-024-03972-7 + journal: "Current Microbiology" + date: 2024-11-25 + + - authors: "Ashley M. Alexander, Hui Qi Loo, Lauren C. Askew, Vishnu Raghuram, Timothy D. Read, Joanna B. Goldberg." + title: "Intraspecific Diversity of Staphylococcus aureus Populations Isolated from Cystic Fibrosis Respiratory Infections" + url: https://doi.org/10.1101/2024.11.16.623925 + journal: "bioRxiv (Cold Spring Harbor Laboratory)" + date: 2024-11-16 + + - authors: "Amy Campbell, Ronnie G. Gavilán, Chris Hauton, Ronny van Aerle, Jaime Martínez-Urtaza." + title: "Identifying gene-level mechanisms of successful dispersal of Vibrio parahaemolyticus during El Niño events" + url: https://doi.org/10.1099/mgen.0.001317 + journal: "Microbial Genomics" + date: 2024-11-08 + + - authors: "Tamalee Roberts, Clare Ling, Wanitda Watthanaworawit, Chanvoleak Cheav, Amphonesavanh Sengduangphachanh, Joy Silisouk, Jill Hopkins, Koukeo Phommasone, Elizabeth M. Batty, Paul Turner, et al." + title: "AmpC β-lactamases detected in Southeast Asian Escherichia coli and Klebsiella pneumoniae" + url: https://doi.org/10.1093/jacamr/dlae195 + journal: "JAC-Antimicrobial Resistance" + date: 2024-10-30 + + - authors: "Ricardo A. R. Machado, Antoinette P. Malan, Joaquín Abolafia, Jaspher Ewany, Aashaq Hussain Bhat, S. Patricia Stock." + title: "Photorhabdus viridis sp. nov. Isolated from Heterorhabditis zealandica Entomopathogenic Nematodes" + url: https://doi.org/10.1007/s00284-024-03935-y + journal: "Current Microbiology" + date: 2024-10-23 + + - authors: "Amy Campbell, Ronnie G. Gavilán, Michel A Marín, Chao Yang, Chris Hauton, Ronny van Aerle, Jaime Martínez-Urtaza." + title: "Evolutionary dynamics of the successful expansion of pandemic Vibrio parahaemolyticus ST3 in Latin America" + url: https://doi.org/10.1038/s41467-024-52159-y + journal: "Nature Communications" + date: 2024-09-07 + + - authors: "Ainhize Maruri-Aransolo, Marta Hernández-García, Raquel Barbero, Malkoa Michelena, María Dolores Pastor‐Vivero, Pedro Mondéjar-López, Amparó Solé, Rafael Cantón, Juan de Dios Caballero-Pérez, GEIFQ study group, et al." + title: "Genomic characterization of MRSA recovered from people with cystic fibrosis during two Spanish multicentre studies (2013 and 2021)" + url: https://doi.org/10.1093/jacamr/dlae160 + journal: "JAC-Antimicrobial Resistance" + date: 2024-09-03 + + - authors: "Dalida Bivona, Emanuele Nicitra, Carmelo Bonomo, Maddalena Calvo, Giuseppe Migliorisi, Marianna Perez, Grete Francesca Privitera, Nicolò Musso, Stefania Stefani, Dafne Bongiorno." + title: "Molecular diversity in fusidic acid–resistant Methicillin Susceptible Staphylococcus  aureus" + url: https://doi.org/10.1093/jacamr/dlae154 + journal: "JAC-Antimicrobial Resistance" + date: 2024-09-03 + + - authors: "Noémie Vingadassalon, Déborah Merda, Arnaud Felten, Virginie Chesnais, Christos Kourtis, Tom Van Nieuwenhuysen, Yacine Nia, Jacques‐Antoine Hennekinne, Marina Cavaiuolo." + title: "Epidemiology of Staphylococcus aureus food isolates: Comparison of conventional methods with whole genome sequencing typing methods" + url: https://doi.org/10.1016/j.fm.2024.104625 + journal: "Food Microbiology" + date: 2024-09-02 + + - authors: "Michelle Su, Kim L. Hoang, McKenna J. Penley, Michelle H. Davis, Jennifer D. Gresham, Levi T. Morran, Timothy Read." + title: "Host and antibiotic jointly select for greater virulence in Staphylococcus aureus" + url: https://doi.org/10.1101/2024.08.31.610628 + journal: "bioRxiv (Cold Spring Harbor Laboratory)" + date: 2024-08-31 + + - authors: "Alberto J. Martín‐Rodríguez, Víctor Fernández‐Juárez, Valerie Diane Valeriano, Indiwari Mihindukulasooriya, Livia Ceresnova, Enrique Joffré, Susanne Jensie-Markopoulos, Edward R. B. Moore, Åsa Sjöling." + title: "A hotspot of diversity: novel Shewanella species isolated from Baltic Sea sediments delineate a sympatric species complex" + url: https://doi.org/10.1099/ijsem.0.006480 + journal: "INTERNATIONAL JOURNAL OF SYSTEMATIC AND EVOLUTIONARY MICROBIOLOGY" + date: 2024-08-16 + + - authors: "Duc Quang Le, Tam Nguyen, Canh Hao Nguyen, Tho Huu Ho, Nam S. Vo, Trang Thu Nguyen, Hoang Anh Nguyen, Lê Sỹ Vinh, Thanh Hai Dang, Minh Duc Cao, et al." + title: "AMRomics: a scalable workflow to analyze large microbial genome collections" + url: https://doi.org/10.1186/s12864-024-10620-8 + journal: "BMC Genomics" + date: 2024-07-22 + + - authors: "Carl M. Kobel, Velma T. E. Aho, Ove Øyås, Niels Nørskov‐Lauritsen, Ben J. Woodcroft, Phillip B. Pope." + title: "CompareM2 is a genomes-to-report pipeline for comparing microbial genomes" + url: https://doi.org/10.1101/2024.07.12.603264 + journal: "bioRxiv (Cold Spring Harbor Laboratory)" + date: 2024-07-16 + + - authors: "Daryl Domman, Sarah Shrum Davis, Paris S. Hamm, Karen Edge, Tim Hanosh, Jessica P. Houston, Anastacia Griego-Fisher, F. Lugo, Nicholas Wenzel, D’Eldra Malone, et al." + title: "Multidrug-resistant Shigella flexneri outbreak associated with a high-mortality spillover event into nonhuman primates" + url: https://doi.org/10.21203/rs.3.rs-4682172/v1 + journal: "Research Square" + date: 2024-07-12 + + - authors: "Bastian Marquis, Trestan Pillonel, Alessia Carrara, Claire Bertelli." + title: "zDB: bacterial comparative genomics made easy" + url: https://doi.org/10.1128/msystems.00473-24 + journal: "mSystems" + date: 2024-06-28 + + - authors: "Vishnu Raghuram, Robert A. Petit, Zach Karol, R. J. MEHTA, Daniel B. Weissman, Timothy D. Read." + title: "Average nucleotide identity-based Staphylococcus aureus strain grouping allows identification of strain-specific genes in the pangenome" + url: https://doi.org/10.1128/msystems.00143-24 + journal: "mSystems" + date: 2024-06-27 + + - authors: "Ricardo A. R. Machado, Antoinette P. Malan, Anja Boss, Nicholle J. Claasen, Aashaq Hussain Bhat, Joaquín Abolafia." + title: "Photorhabdus africana sp. nov. isolated from Heterorhabditis entomopathogenic nematodes" + url: https://doi.org/10.1007/s00284-024-03744-3 + journal: "Current Microbiology" + date: 2024-06-23 + + - authors: "Lenora Kepler, Manuel Jara, Bejan Mahmud, Gautam Dantas, Erik R. Dubberke, Cristina Lanzas, David A. Rasmussen." + title: "Quantifying the genomic determinants of fitness in E. coli ST131 using phylodynamics" + url: https://doi.org/10.1101/2024.06.10.598183 + journal: "bioRxiv (Cold Spring Harbor Laboratory)" + date: 2024-06-10 + + - authors: "Timo J Moilanen, Kerkko Visuri, Jonatan Lehtinen, Irene Ortega-Sanz, Jacob L. Steenwyk, Samuel Sihvonen." + title: "Solu – a Cloud Platform for Real-Time Genomic Pathogen Surveillance" + url: https://doi.org/10.1101/2024.05.30.596434 + journal: "bioRxiv (Cold Spring Harbor Laboratory)" + date: 2024-06-02 + + - authors: "Amy Campbell, Chris Hauton, Ronny van Aerle, Jaime Martínez-Urtaza." + title: "Eco-Evolutionary Drivers of Vibrio parahaemolyticus Sequence Type 3 Expansion: Retrospective Machine Learning Approach (Preprint)" + url: https://doi.org/10.2196/preprints.62747 + journal: "" + date: 2024-05-30 + + - authors: "Xuechen Tian, Wee Fei Aaron Teo, Yixin Yang, Linyinxue Dong, Aloysius Wong, Li Chen, Halah Ahmed, Siew Woh Choo, Nicholas S. Jakubovics, Geok Yuan Annie Tan." + title: "Genome characterisation and comparative analysis of Schaalia dentiphila sp. nov. and its subspecies, S. dentiphila subsp. denticola subsp. nov., from the human oral cavity" + url: https://doi.org/10.1186/s12866-024-03346-w + journal: "BMC Microbiology" + date: 2024-05-28 + + - authors: "Liqin Cheng, Mário S. P. Correia, Shawn M. Higdon, Fabricio Romero García, Ioanna Tsiara, Enrique Joffré, Åsa Sjöling, Fredrik Boulund, Elisabeth Norin, Lars Engstrand, et al." + title: "The protective role of commensal gut microbes and their metabolites against bacterial pathogens" + url: https://doi.org/10.1080/19490976.2024.2356275 + journal: "Gut Microbes" + date: 2024-05-26 + + - authors: "Y-C Huang, C-J Chen, A.-J. Kuo, K.Y. Hwang, C-C Chien, Chia‐Yin Lee, Tai‐Hsi Wu, Wen‐Chien Ko, Po‐Ren Hsueh." + title: "Dissemination of meticillin-resistant Staphylococcus aureus sequence type 8 (USA300) in Taiwan" + url: https://doi.org/10.1016/j.jhin.2024.04.024 + journal: "Journal of Hospital Infection" + date: 2024-05-21 + + - authors: "Duc Quang Le, Son Hoang Nguyen, Tam Nguyen, Canh Hao Nguyen, Tho Huu Ho, Nam S. Vo, Trang Nguyen, Hoang Anh Nguyen, Minh Duc Cao." + title: "AMRViz enables seamless genomics analysis and visualization of antimicrobial resistance" + url: https://doi.org/10.1186/s12859-024-05792-9 + journal: "BMC Bioinformatics" + date: 2024-05-16 + + - authors: "George Bouras, Ghais Houtak, Ryan R. Wick, Vijini Mallawaarachchi, Michael J. Roach, Bhavya Papudeshi, Louise M. Judd, Anna E. Sheppard, Robert A. Edwards, Sarah Vreugde." + title: "Hybracter: enabling scalable, automated, complete and accurate bacterial genome assemblies" + url: https://doi.org/10.1099/mgen.0.001244 + journal: "Microbial Genomics" + date: 2024-05-08 + + - authors: "Katrina S. Hofstetter, Natasia F Jacko, Margot J. Shumaker, Brooke M. Talbot, Robert A. Petit, Timothy D. Read, Michael David." + title: "Strain Differences in Bloodstream and Skin Infection: Methicillin-Resistant Staphylococcus aureus Isolated in 2018–2021 in a Single Health System" + url: https://doi.org/10.1093/ofid/ofae261 + journal: "Open Forum Infectious Diseases" + date: 2024-05-03 + + - authors: "Sverre Branders, Manfred Grabherr, Rafi Ahmad." + title: "Real-time Taxonomic Characterization of Long-read Mixed-species Sequencing Samples in Sorted Motif Distance Space: Voyager" + url: https://doi.org/10.1101/2024.04.13.589333 + journal: "bioRxiv (Cold Spring Harbor Laboratory)" + date: 2024-04-15 + + - authors: "Matin Nuhamunada, Omkar S. Mohite, Patrick V. Phaneuf, Bernhard Ø. Palsson, Tilmann Weber." + title: "BGCFlow: systematic pangenome workflow for the analysis of biosynthetic gene clusters across large genomic datasets" + url: https://doi.org/10.1093/nar/gkae314 + journal: "Nucleic Acids Research" + date: 2024-04-11 + + - authors: "Gillian A.M. Tarr, Linda Chui, Kim Stanford, Emmanuel W. Bumunang, Rahat Zaheer, Vincent Li, Stephen B. Freedman, Chad Laing, Tim A. McAllister." + title: "Persistent cross-species transmission systems dominate Shiga toxin-producing Escherichia coli O157:H7 epidemiology in a high incidence region: a genomic epidemiology study" + url: https://doi.org/10.1101/2024.04.05.588308 + journal: "bioRxiv (Cold Spring Harbor Laboratory)" + date: 2024-04-07 + + - authors: "Marissa Duckett, Megan Taylor, Claire Bowman, Nic M. Vega." + title: "Parallel evolution of alternate morphotypes of Chryseobacterium gleum during experimental evolution with Caenorhabditis elegans" + url: https://doi.org/10.1093/femsec/fiae039 + journal: "FEMS Microbiology Ecology" + date: 2024-03-28 + + - authors: "Xiaoyan Guo, Zijun Zhang, Qiankun Chen, Leying Wang, Xizhan Xu, Zhenyu Wei, Yang Zhang, Kexin Chen, Zhi­qun Wang, Xinxin Lu, et al." + title: "Whole Genome Sequencing Highlights the Pathogenic Profile in Nocardia Keratitis" + url: https://doi.org/10.1167/iovs.65.3.26 + journal: "Investigative Ophthalmology & Visual Science" + date: 2024-03-19 + + - authors: "Guillermo Reyes, Irma Betancourt, Martha Borbor, Bonny Bayot." + title: "Draft genome sequence of Bacillus velezensis strain 3TSA-3, a potential probiotic for Pacific white shrimp Penaeus vannamei postlarvae isolated from commercial hatchery tanks" + url: https://doi.org/10.1128/mra.01208-23 + journal: "Microbiology Resource Announcements" + date: 2024-03-19 + + - authors: "Ignasi Roca, Kathya Espinoza, Cinthia Irigoin-Lovera, Maria Piquet, Luciano A. Palomino-Kobayashi, Angie K. Castillo, Diego D. Gonzales-DelCarpio, Joaquim Viñes, Laura Muñoz, Barbara Ymaña, et al." + title: "Clonal dissemination of Acinetobacter radioresistens among Humboldt penguins (Spheniscus humboldti) inhabiting a barren northern Peruvian island" + url: https://doi.org/10.1556/1886.2023.00066 + journal: "European Journal of Microbiology and Immunology" + date: 2024-03-14 + + - authors: "Kévin Bouiller, Natasia F Jacko, Margot J. Shumaker, Brooke M. Talbot, Timothy D. Read, Michael David." + title: "Factors associated with foreign body infection in methicillin-resistant Staphylococcus aureus bacteremia" + url: https://doi.org/10.3389/fimmu.2024.1335867 + journal: "Frontiers in Immunology" + date: 2024-02-16 + + - authors: "Leah Cuthbertson, Ulrike Löber, Jonathan Ish-Horowicz, Claire McBrien, Colin Churchward, Jeremy C. Parker, Michael Olanipekun, Conor M. Burke, Aisling McGowan, Gwyneth A. Davies, et al." + title: "Genomic attributes of airway commensal bacteria and mucosa" + url: https://doi.org/10.1038/s42003-024-05840-3 + journal: "Communications Biology" + date: 2024-02-12 + + - authors: "Vishnu Raghuram, Robert A. Petit, Zach Karol, R. J. MEHTA, Daniel B. Weissman, Timothy D. Read." + title: "Average Nucleotide Identity based Staphylococcus aureus strain grouping allows identification of strain-specific genes in the pangenome" + url: https://doi.org/10.1101/2024.01.29.577756 + journal: "bioRxiv (Cold Spring Harbor Laboratory)" + date: 2024-02-01 + + - authors: "Edwin Sakyi Kyei-Baffour, Kwabena Owusu-Boateng, Abiola Isawumi, Lydia Mosi." + title: "Pseudogenomic insights into the evolution of Mycobacterium ulcerans" + url: https://doi.org/10.1186/s12864-024-10001-1 + journal: "BMC Genomics" + date: 2024-01-22 + + - authors: "Dalal M. Alkuraythi, Manal M. Alkhulaifi, Abdulwahab Z. Binjomah, Mohammed Alarwi, Mohammed I. Mujallad, Saleh Ali Alharbi, Mohammad Alshomrani, Takashi Gojobori, Sulaiman M. Alajel." + title: "Comparative genomic analysis of antibiotic resistance and virulence genes in Staphylococcus aureus isolates from patients and retail meat" + url: https://doi.org/10.3389/fcimb.2023.1339339 + journal: "Frontiers in Cellular and Infection Microbiology" + date: 2024-01-12 + + - authors: "Jelly Vanderwoude, Sheyda Azimi, Timothy D. Read, Stephen P. Diggle." + title: "The role of hypermutation and collateral sensitivity in antimicrobial resistance diversity of Pseudomonas aeruginosa populations in cystic fibrosis lung infection" + url: https://doi.org/10.1128/mbio.03109-23 + journal: "mBio" + date: 2024-01-03 + + - authors: "Adrien Le Meur, Rima Zein-Eddine, Ombeline Lamer, Fiona Hak, Gaëtan Senelle, Jean‐Philippe Vernadet, Samuel O’Donnell, Ricardo C. Rodŕıguez de la Vega, Guislaine Refrégier." + title: "Tools for short variant calling and the way to deal with big datasets" + url: https://doi.org/10.1016/b978-0-323-99886-4.00007-7 + journal: "Elsevier eBooks" + date: 2024-01-01 + + - authors: "Ana Rafaela Kruemmel, Jessica L. Halpin, Victoria Foltz, Janet K. Dykes, Carolina Lúquez." + title: "Detection of a Streptogramin A O-acetyltransferase vot(D)-Like Gene in the Chromosome of Clostridium botulinum Isolated From Infants in the United States" + url: https://doi.org/10.1016/j.toxicon.2024.107432 + journal: "Toxicon" + date: 2024-01-01 + + - authors: "Haley Sanderson, Aaron P. White." + title: "Methods for Genomic Epidemiology of Bacterial Pathogens: Example Salmonella" + url: https://doi.org/10.1007/978-1-0716-3890-3_2 + journal: "Methods in molecular biology" + date: 2024-01-01 + + - authors: "Alina D. Matsvay, V. M. Bezrukov, P.A. Nikolaeva, И.Ф. Стеценко, Varvara Nurmukanova, G.S. Dikaya, Maria A. Gordukova, E. V. Galeeva, German A. Shipulin." + title: "Cultivation and molecular characterization of complete genome sequences of M. pneumoniae isolated in Russia" + url: https://doi.org/10.36488/cmac.2024.3.274-284 + journal: "Clinical Microbiology and Antimicrobial Chemotherapy" + date: 2024-01-01 + + - authors: "Ghadeer Jdeed, Vera V. Morozova, Yuliya Kozlova, Artem Tikunov, Tatyana Ushakova, Alevtina Bardashevа, Andrey D. Manakhov, Maria Mitina, Elena V. Zhirakovskaya, Nina V. Tikunova." + title: "StM171, a Stenotrophomonas maltophilia Bacteriophage That Affects Sensitivity to Antibiotics in Host Bacteria and Their Biofilm Formation" + url: https://doi.org/10.3390/v15122455 + journal: "Viruses" + date: 2023-12-18 + + - authors: "Dalal M. Alkuraythi, Manal M. Alkhulaifi, Abdulwahab Z. Binjomah, Mohammed Alarwi, Hind Aldakhil, Mohammed I. Mujallad, Saleh Ali Alharbi, Mohammad Alshomrani, Saeed Mastour Alshahrani, Takashi Gojobori, et al." + title: "Clonal Flux and Spread of Staphylococcus aureus Isolated from Meat and Its Genetic Relatedness to Staphylococcus aureus Isolated from Patients in Saudi Arabia" + url: https://doi.org/10.3390/microorganisms11122926 + journal: "Microorganisms" + date: 2023-12-06 + + - authors: "Xuechen Tian, Wee Fei Aaron Teo, Wei Yee Wee, Yixin Yang, Halah Ahmed, Nicholas S. Jakubovics, Siew Woh Choo, Geok Yuan Annie Tan." + title: "Genome characterization and taxonomy of Actinomyces acetigenes sp. nov., and Actinomyces stomatis sp. nov., previously isolated from the human oral cavity" + url: https://doi.org/10.1186/s12864-023-09831-2 + journal: "BMC Genomics" + date: 2023-12-04 + + - authors: "Claudio Neidhöfer, Marcel Neuenhoff, Robert Jozić, Brenda Atangcho, Sandra Unsleber, Ulrike Neder, Silke Grumaz, Marijo Parčina." + title: "Exploring clonality and virulence gene associations in bloodstream infections using whole-genome sequencing and clinical data" + url: https://doi.org/10.3389/fcimb.2023.1274573 + journal: "Frontiers in Cellular and Infection Microbiology" + date: 2023-11-14 + + - authors: "Vishnu Raghuram, Jessica J. Gunoskey, Katrina S. Hofstetter, Natasia F Jacko, Margot J. Shumaker, Yi‐Juan Hu, Timothy D. Read, Michael David." + title: "Comparison of genomic diversity between single and pooled Staphylococcus aureus colonies isolated from human colonization cultures" + url: https://doi.org/10.1099/mgen.0.001111 + journal: "Microbial Genomics" + date: 2023-11-07 + + - authors: "Kamalika H. Ubeyratne, Roshan P. Madalagama, Xin Liu, Sujatha Pathirage, Sujeewa Ariyawansa, Matthew K.L. Wong, Hein M. Tun." + title: "Phenotypic and genotypic characterization of antibiotic-resistant Salmonella isolated from humans, aquaculture, and poultry in Sri Lanka: A retrospective study" + url: https://doi.org/10.1016/j.jiph.2023.10.028 + journal: "Journal of Infection and Public Health" + date: 2023-10-31 + + - authors: "Juliette Hayer, Jacques Dainat, Ella Marcy, Anne‐Laure Bañuls." + title: "Baargin: a Nextflow workflow for the automatic analysisof bacterial genomics data with a focus on AntimicrobialResistance" + url: https://doi.org/10.21105/joss.05397 + journal: "The Journal of Open Source Software" + date: 2023-10-19 + + - authors: "Hamza A. Babiker, Amina Al‐Jardani, Saleh Al-Azri, Robert A. Petit, Eltaib Saad, Sarah Al-Mahrouqi, Reham A.H. Mohamed, Salama Al-Hamidhi, Abdullah Balkhair, Najma Al Kharusi, et al." + title: "Mycobacterium tuberculosis epidemiology in Oman: whole-genome sequencing uncovers transmission pathways" + url: https://doi.org/10.1128/spectrum.02420-23 + journal: "Microbiology Spectrum" + date: 2023-09-28 + + - authors: "Felipe Marques de Almeida, Tatiana Amabile de Campos, Γεώργιος Παππάς." + title: "Scalable and versatile container-based pipelines for de novo genome assembly and bacterial annotation." + url: https://doi.org/10.12688/f1000research.139488.1 + journal: "F1000Research" + date: 2023-09-25 + + - authors: "Nicholas J. Pitcher, Andries Feder, Nicholas C. Bolden, Christian Zirbes, Anthony J. Pamatmat, L. Boyken, Jared J. Hill, Alyssa R. Bartels, Andrew L. Thurman, Valérie Reeb, et al." + title: "Parallel evolution of linezolid-resistant Staphylococcus aureus in patients with cystic fibrosis" + url: https://doi.org/10.1128/spectrum.02084-23 + journal: "Microbiology Spectrum" + date: 2023-09-19 + + - authors: "Dan Wang, Jon Palmer, Graham C. Fletcher, Stephen L. W. On, Dragana Gagić, Steve Flint." + title: "Efficacy of commercial peroxyacetic acid on Vibrio parahaemolyticus planktonic cells and biofilms on stainless steel and Greenshell™ mussel (Perna canaliculus) surfaces" + url: https://doi.org/10.1016/j.ijfoodmicro.2023.110372 + journal: "International Journal of Food Microbiology" + date: 2023-08-29 + + - authors: "Ricardo A. R. Machado, Aashaq Hussain Bhat, Carlos Castañeda-Álvarez, Tarique Hassan Askary, Vladimı́r Půža, Sylvie Pagès, Joaquín Abolafia." + title: "Xenorhabdus aichiensis sp. nov., Xenorhabdus anantnagensis sp. nov., and Xenorhabdus yunnanensis sp. nov., Isolated from Steinernema Entomopathogenic Nematodes" + url: https://doi.org/10.1007/s00284-023-03373-2 + journal: "Current Microbiology" + date: 2023-07-26 + + - authors: "Dalal M. Alkuraythi, Manal M. Alkhulaifi, Abdulwahab Z. Binjomah, Mohammed Alarawi, Hind Aldakhil, Mohammed I. Mujallad, Saleh Ali Alharbi, Mohammed Alshomrani, Saeed Mastour Alshahrani, Takashi Gojobori, et al." + title: "Genetic characterization and dissemination of Staphylococcus aureus and Staphylococci genus: food and health perspective" + url: https://doi.org/10.1101/2023.07.24.550257 + journal: "bioRxiv (Cold Spring Harbor Laboratory)" + date: 2023-07-24 + + - authors: "Irene Ortega-Sanz, José A. Barbero‐Aparicio, Antonio Canepa, Jordi Rovira, Beatriz Melero." + title: "CamPype: an open-source workflow for automated bacterial whole-genome sequencing analysis focused on Campylobacter" + url: https://doi.org/10.1186/s12859-023-05414-w + journal: "BMC Bioinformatics" + date: 2023-07-20 + + - authors: "Claire K. A. Elek, Teagan L. Brown, Thanh Le-Viet, Rhiannon Evans, David Baker, Andrea Telatin, Sumeet K. Tiwari, Haider Al-Khanaq, Gaëtan Thilliez, Robert A. Kingsley, et al." + title: "A hybrid and poly-polish workflow for the complete and accurate assembly of phage genomes: a case study of ten przondoviruses" + url: https://doi.org/10.1099/mgen.0.001065 + journal: "Microbial Genomics" + date: 2023-07-18 + + - authors: "Kevin Libuit, Emma L. Doughty, James R. Otieno, Frank Ambrosio, Curtis J. Kapsak, Emily Smith, Sage M. Wright, Michelle R. Scribner, Robert A. Petit, Catarina Inês Mendes, et al." + title: "Accelerating bioinformatics implementation in public health" + url: https://doi.org/10.1099/mgen.0.001051 + journal: "Microbial Genomics" + date: 2023-07-10 + + - authors: "Maho Tokuda, Masahiro Yuki, Moriya Ohkuma, Kazuhide Kimbara, Haruo Suzuki, Masaki Shintani." + title: "Transconjugant range of PromA plasmids in microbial communities is predicted by sequence similarity with the bacterial host chromosome" + url: https://doi.org/10.1099/mgen.0.001043 + journal: "Microbial Genomics" + date: 2023-06-21 + + - authors: "Ricardo A. R. Machado, Aashaq Hussain Bhat, Carlos Castañeda-Álvarez, Vladimı́r Půža, Ernesto San‐Blas." + title: "Photorhabdus aballayi sp. nov. and Photorhabdus luminescens subsp. venezuelensis subsp. nov., isolated from Heterorhabditis amazonensis entomopathogenic nematodes" + url: https://doi.org/10.1099/ijsem.0.005872 + journal: "INTERNATIONAL JOURNAL OF SYSTEMATIC AND EVOLUTIONARY MICROBIOLOGY" + date: 2023-05-12 + + - authors: "Nicholas J. Pitcher, Andries Feder, Nicholas C. Bolden, Christian Zirbes, Anthony J. Pamatmat, L. Boyken, Jared J. Hill, Andrew L. Thurman, Valérie Reeb, Harry S. Porterfield, et al." + title: "Parallel Evolution of Linezolid Resistant Staphylococcus aureus in Patients with Cystic Fibrosis" + url: https://doi.org/10.1101/2023.05.02.539145 + journal: "bioRxiv (Cold Spring Harbor Laboratory)" + date: 2023-05-02 + + - authors: "Ricardo A. R. Machado, Aashaq Hussain Bhat, Patrick Fallet, Ted C. J. Turlings, Joëlle Kajuga, Xun Yan, Stefan Toepfer." + title: "Xenorhabdus bovienii subsp. africana subsp. nov., isolated from Steinernema africanum entomopathogenic nematodes" + url: https://doi.org/10.1099/ijsem.0.005795 + journal: "INTERNATIONAL JOURNAL OF SYSTEMATIC AND EVOLUTIONARY MICROBIOLOGY" + date: 2023-04-27 + + - authors: "Alberto J. Martín‐Rodríguez, Kaisa Thorell, Enrique Joffré, Susanne Jensie-Markopoulos, Edward R. B. Moore, Åsa Sjöling." + title: "Shewanella septentrionalis sp. nov. and Shewanella holmiensis sp. nov., isolated from Baltic Sea water and sediments" + url: https://doi.org/10.1099/ijsem.0.005767 + journal: "INTERNATIONAL JOURNAL OF SYSTEMATIC AND EVOLUTIONARY MICROBIOLOGY" + date: 2023-04-21 + + - authors: "Ekaterina Avershina, Abdolrahman Khezri, Rafi Ahmad." + title: "Clinical Diagnostics of Bacterial Infections and Their Resistance to Antibiotics—Current State and Whole Genome Sequencing Implementation Perspectives" + url: https://doi.org/10.3390/antibiotics12040781 + journal: "Antibiotics" + date: 2023-04-19 + + - authors: "Chalita Chomkatekaew, Janjira Thaipadungpanit, Pasco Hearn, Sona Soeng, Sreymom Pol, Leakhena Neou, Jill Hopkins, Paul Turner, Elizabeth M. Batty." + title: "Detection of maternal transmission of resistant Gram-negative bacteria in a Cambodian hospital setting" + url: https://doi.org/10.3389/fmicb.2023.1158056 + journal: "Frontiers in Microbiology" + date: 2023-04-14 + + - authors: "Michelle Gompelman, Guus T J Wezendonk, Yannick Wouters, Judith Beurskens-Meijerink, Konstantinos C. Fragkos, Farooq Rahman, Jordy P. M. Coolen, Ingrid J.M. van Weerdenburg, Heiman Wertheim, Wietske Kievit, et al." + title: "Randomized clinical trial: Long-term Staphylococcus aureus decolonization in patients on home parenteral nutrition" + url: https://doi.org/10.1016/j.clnu.2023.03.010 + journal: "Clinical Nutrition" + date: 2023-03-17 + + - authors: "Rajnish Kumar, Garima Yadav, Mohammed Kuddus, Ghulam Md Ashraf, Rachana Singh." + title: "Unlocking the microbial studies through computational approaches: how far have we reached?" + url: https://doi.org/10.1007/s11356-023-26220-0 + journal: "Environmental Science and Pollution Research" + date: 2023-03-15 + + - authors: "Ricardo A. R. Machado, Ameni Loulou, Aashaq Hussain Bhat, Maristella Mastore, Céline Terrettaz, Maurizio Francesco Brivio, Sadreddine Kallel." + title: "Acinetobacter nematophilus sp. nov., Alcaligenes nematophilus sp. nov., Enterobacter nematophilus sp. nov., and Kaistia nematophila sp. nov., Isolated from Soil-Borne Nematodes and Proposal for the Elevation of Alcaligenes faecalis subsp. faecalis, Alcaligenes faecalis subsp. parafaecalis, and Alcaligenes faecalis subsp. phenolicus to the Species Level" + url: https://doi.org/10.3390/taxonomy3010012 + journal: "Taxonomy" + date: 2023-03-09 + + - authors: "Eve Beauchemin, Claire Hunter, Corinne F. Maurice." + title: "Actively replicating gut bacteria identified by 5-ethynyl-2’-deoxyuridine (EdU) click chemistry and cell sorting" + url: https://doi.org/10.1080/19490976.2023.2180317 + journal: "Gut Microbes" + date: 2023-02-23 + + - authors: "Ameni Loulou, Maristella Mastore, Sara Caramella, Aashaq Hussain Bhat, Maurizio Francesco Brivio, Ricardo A. R. Machado, Sadreddine Kallel." + title: "Entomopathogenic potential of bacteria associated with soil-borne nematodes and insect immune responses to their infection" + url: https://doi.org/10.1371/journal.pone.0280675 + journal: "PLoS ONE" + date: 2023-01-23 + + - authors: "Daichi Morita, Hiroki Arai, Junko Isobe, Emi Maenishi, Takanori Kumagai, Fumito Maruyama, Teruo Kuroda." + title: "Whole-Genome and Plasmid Comparative Analysis of Campylobacter jejuni from Human Patients in Toyama, Japan, from 2015 to 2019" + url: https://doi.org/10.1128/spectrum.02659-22 + journal: "Microbiology Spectrum" + date: 2023-01-09 + + - authors: "Rahul Jain, Priyanka Bhardwaj, Shweta Guleria, Anita Pandey, Sanjay Kumar." + title: "Polyamine metabolizing rhizobacteria Pseudomonas sp. GBPI_506 modulates hormone signaling to enhance lateral roots and nicotine biosynthesis in Nicotiana benthamiana" + url: https://doi.org/10.1016/j.plaphy.2023.01.010 + journal: "Plant Physiology and Biochemistry" + date: 2023-01-09 + + - authors: "Masato Suzuki, Yusuke Hashimoto, Aki Hirabayashi, Koji Yahara, Mitsunori Yoshida, Hanako Fukano, Yoshihiko Hoshino, Keigo Shibayama, Haruyoshi Tomita." + title: "Genomic Epidemiological Analysis of Antimicrobial-Resistant Bacteria with Nanopore Sequencing" + url: https://doi.org/10.1007/978-1-0716-2996-3_16 + journal: "Methods in molecular biology" + date: 2023-01-01 + + - authors: "Sibasree Hojaisa, Anupam Nath Jha." + title: "Databases and Tools for Microbial Genome and Human Microbiome Studies" + url: https://doi.org/10.1007/978-981-99-2816-3_3 + journal: "" + date: 2023-01-01 + + - authors: "Irene Ortega-Sanz, José A. Barbero‐Aparicio, Antonio Canepa, Jordi Rovira, Beatriz Melero." + title: "Additional file 1 of CamPype: an open-source workflow for automated bacterial whole-genome sequencing analysis focused on Campylobacter" + url: https://doi.org/10.6084/m9.figshare.23722209 + journal: "Figshare" + date: 2023-01-01 + + - authors: "Luc Cornet, Benoit Durieu, Frederik Baert, Elizabet D’hooge, David Colignon, Loïc Meunier, Valérian Lupo, Ilse Cleenwerck, Heide‐Marie Daniel, Leen Rigouts, et al." + title: "The GEN-ERA toolbox: unified and reproducible workflows for research in microbial genomics" + url: https://doi.org/10.1093/gigascience/giad022 + journal: "GigaScience" + date: 2022-12-28 + + - authors: "Ilaria Cavallo, Francesca Sivori, Mauro Truglio, Flavio De Maio, Federica Lucantoni, Giorgia Cardinali, Martina Pontone, Thierry Bernardi, Maurizio Sanguinetti, B. Capitanio, et al." + title: "Skin dysbiosis and Cutibacterium acnes biofilm in inflammatory acne lesions of adolescents" + url: https://doi.org/10.1038/s41598-022-25436-3 + journal: "Scientific Reports" + date: 2022-12-06 + + - authors: "Aitor Atxaerandio-Landa, Ainhoa Arrieta-Gisasola, Lorena Laorden, Joseba Bikandi, Javier Garaizar, Irati Martínez-Malaxetxebarria, Ilargi Martínez-Ballesteros." + title: "A Practical Bioinformatics Workflow for Routine Analysis of Bacterial WGS Data" + url: https://doi.org/10.3390/microorganisms10122364 + journal: "Microorganisms" + date: 2022-11-29 + + - authors: "Poonam Chitale, Alexander Lemenze, Emily C. Fogarty, Avi Shah, Courtney Grady, Aubrey R. Odom-Mabey, W. Evan Johnson, Jason H. Yang, A. Murat Eren, Roland Brosch, et al." + title: "A comprehensive update to the Mycobacterium tuberculosis H37Rv reference genome" + url: https://doi.org/10.1038/s41467-022-34853-x + journal: "Nature Communications" + date: 2022-11-18 + + - authors: "Dan Wang, Graham C. Fletcher, Stephen L. W. On, Jon Palmer, Dragana Gagić, Steve Flint." + title: "Biofilm formation, sodium hypochlorite susceptibility and genetic diversity of Vibrio parahaemolyticus" + url: https://doi.org/10.1016/j.ijfoodmicro.2022.110011 + journal: "International Journal of Food Microbiology" + date: 2022-11-07 + + - authors: "Priyanka Bhardwaj, Rahul Jain, Sanjay Kumar." + title: "Draft Genome Sequence of Endophytic Sphingomonas faeni Strain ALB2, Isolated from the Leaf of a Cold-Desert Medicinal Plant" + url: https://doi.org/10.1128/mra.00687-22 + journal: "Microbiology Resource Announcements" + date: 2022-10-13 + + - authors: "Jon G. Sanders, Weiwei Yan, Deus Mjungu, Elizabeth V. Lonsdorf, John Hart, Crickette Sanz, David Morgan, Martine Peeters, Beatrice H. Hahn, Andrew H. Moeller." + title: "A low-cost genomics workflow enables isolate screening and strain-level analyses within microbiomes" + url: https://doi.org/10.1186/s13059-022-02777-w + journal: "Genome biology" + date: 2022-10-12 + + - authors: "Megan Taylor, Lili Janasky, Nic M. Vega." + title: "Convergent structure with divergent adaptations in combinatorial microbiome communities" + url: https://doi.org/10.1093/femsec/fiac115 + journal: "FEMS Microbiology Ecology" + date: 2022-09-28 + + - authors: "Leah Cuthbertson, Ulrike Löber, Jonathan Ish-Horowicz, Claire McBrien, Colin Churchward, Jeremy C. Parker, Michael Olanipekun, Conor M. Burke, Orla O’Carroll, John L. Faul, et al." + title: "Genomic and ecologic characteristics of the airway microbial-mucosal complex" + url: https://doi.org/10.1101/2022.09.08.507073 + journal: "bioRxiv (Cold Spring Harbor Laboratory)" + date: 2022-09-08 + + - authors: "Ahmed Babiker, Chris Bower, Joseph D. Lutgring, Robert A. Petit, Jessica Howard‐Anderson, Uzma Ansari, Gillian McAllister, Michelle Adamczyk, Erin Breaker, Sarah W. Satola, et al." + title: "Clinical and Genomic Epidemiology of mcr-9 -Carrying Carbapenem-Resistant Enterobacterales Isolates in Metropolitan Atlanta, 2012 to 2017" + url: https://doi.org/10.1128/spectrum.02522-21 + journal: "Microbiology Spectrum" + date: 2022-07-20 + + - authors: "Poonam Chitale, Alexander Lemenze, Emily C. Fogarty, Avi Shah, Courtney Grady, Aubrey R. Odom-Mabey, W. Evan Johnson, Jason H. Yang, A. Murat Eren, Roland Brosch, et al." + title: "A comprehensive update to the Mycobacterium tuberculosis H37Rv reference genome" + url: https://doi.org/10.1101/2022.07.15.500236 + journal: "bioRxiv (Cold Spring Harbor Laboratory)" + date: 2022-07-16 + + - authors: "Daichi Morita, Hiroki Arai, Junko Isobe, Emi Maenishi, Takanori Kumagai, Fumito Maruyama, Teruo Kuroda." + title: "Diversity and characteristics of pTet family plasmids revealed by genomic epidemiology of Campylobacter jejuni from human patients in Toyama, Japan from 2015 to 2019" + url: https://doi.org/10.1101/2022.06.28.498051 + journal: "bioRxiv (Cold Spring Harbor Laboratory)" + date: 2022-06-29 + + - authors: "Caiyan Wang, Yang Xia, Yunfei Liu, Kang Chen, N. Lu, Di Tian, Hui Lu, Fuhai Han, Jian Xu, Tetsuya Yomo." + title: "CleanSeq: A Pipeline for Contamination Detection, Cleanup, and Mutation Verifications from Microbial Genome Sequencing Data" + url: https://doi.org/10.3390/app12126209 + journal: "Applied Sciences" + date: 2022-06-18 + + - authors: "Felicita Dure, Melise Chaves Silveira, Cláudio Marcos Rocha-de-Souza, Robson Souza Leão, Ivson Cassiano de Oliveira Santos, Rodolpho Mattos Albano, Elizabeth Andrade Marques, Ana Paula D’Alincourt Carvalho-Assef, Fabrício Alves Barbosa da Silva." + title: "CABGen: A Web Application for the Bioinformatic Analysis of Bacterial Genomes" + url: https://doi.org/10.3389/fmicb.2022.893474 + journal: "Frontiers in Microbiology" + date: 2022-05-27 + + - authors: "Brooke M. Talbot, Natasia F Jacko, Robert A. Petit, David A. Pegues, Margot J. Shumaker, Timothy D. Read, Michael David." + title: "Unsuspected Clonal Spread of Methicillin-Resistant Staphylococcus aureus Causing Bloodstream Infections in Hospitalized Adults Detected Using Whole Genome Sequencing" + url: https://doi.org/10.1093/cid/ciac339 + journal: "Clinical Infectious Diseases" + date: 2022-04-28 + + - authors: "Jon G. Sanders, Weiwei Yan, Andrew H. Moeller." + title: "Low-cost genomics enable high-throughput isolate screening and strain-level microbiome profiling" + url: https://doi.org/10.1101/2022.04.11.487950 + journal: "bioRxiv (Cold Spring Harbor Laboratory)" + date: 2022-04-11 + + - authors: "Audun Sivertsen, Ruben Dyrhovden, Marit Gjerde Tellevik, Torbjørn S. Bruvold, Eirik Nybakken, Dag Harald Skutlaberg, Ingerid Skarstein, Øyvind Kommedal." + title: "Escherichia marmotae—a Human Pathogen Easily Misidentified as Escherichia coli" + url: https://doi.org/10.1128/spectrum.02035-21 + journal: "Microbiology Spectrum" + date: 2022-04-05 + + - authors: "Xinrong Li, Liying Zhu, Xin Wang, Jinjun Li, Biao Tang." + title: "Evaluation of IR Biotyper for Lactiplantibacillus plantarum Typing and Its Application Potential in Probiotic Preliminary Screening" + url: https://doi.org/10.3389/fmicb.2022.823120 + journal: "Frontiers in Microbiology" + date: 2022-03-24 + + - authors: "Vishnu Raghuram, Ashley M. Alexander, Hui Qi Loo, Robert A. Petit, Joanna B. Goldberg, Timothy D. Read." + title: "Species-Wide Phylogenomics of the Staphylococcus aureus Agr Operon Revealed Convergent Evolution of Frameshift Mutations" + url: https://doi.org/10.1128/spectrum.01334-21 + journal: "Microbiology Spectrum" + date: 2022-01-19 + + - authors: "Vincenzo Pennone, José F. Cobo‐Díaz, Miguel Prieto Maradona, Avelino Álvarez‐Ordóñez." + title: "Application of genomics and metagenomics to improve food safety based on an enhanced characterisation of antimicrobial resistance" + url: https://doi.org/10.1016/j.cofs.2021.12.002 + journal: "Current Opinion in Food Science" + date: 2021-12-16 + + - authors: "Georg Conrads, Tim Klomp, Dongmei Deng, Johannes-Simon Wenzler, Andreas Braun, Mohamed M. H. Abdelbary." + title: "The Antimicrobial Susceptibility of Porphyromonas gingivalis: Genetic Repertoire, Global Phenotype, and Review of the Literature" + url: https://doi.org/10.3390/antibiotics10121438 + journal: "Antibiotics" + date: 2021-11-24 + + - authors: "Oliver Schwengers, Lukas Jelonek, Marius Alfred Dieckmann, Sebastian Beyvers, Jochen Blom, Alexander Goesmann." + title: "Bakta: rapid and standardized annotation of bacterial genomes via alignment-free sequence identification" + url: https://doi.org/10.1099/mgen.0.000685 + journal: "Microbial Genomics" + date: 2021-11-05 + + - authors: "Karen L. Bell, Robert A. Petit, Anya Cutler, Emily K. Dobbs, J. Michael Macpherson, Timothy D. Read, Kevin S. Burgess, Berry J. Brosi." + title: "Comparing whole‐genome shotgun sequencing and DNA metabarcoding approaches for species identification and quantification of pollen species mixtures" + url: https://doi.org/10.1002/ece3.8281 + journal: "Ecology and Evolution" + date: 2021-11-01 + + - authors: "Oliver Schwengers, Lukas Jelonek, Marius Alfred Dieckmann, Sebastian Beyvers, Jochen Blom, Alexander Goesmann." + title: "Bakta: Rapid & standardized annotation of bacterial genomes via alignment-free sequence identification" + url: https://doi.org/10.1101/2021.09.02.458689 + journal: "bioRxiv (Cold Spring Harbor Laboratory)" + date: 2021-09-02 + + - authors: "Vishnu Raghuram, Ashley M. Alexander, Hui Qi Loo, Robert A. Petit, Joanna B. Goldberg, Timothy D. Read." + title: "Species-wide phylogenomics of the Staphylococcus aureus agr operon reveals convergent evolution of frameshift mutations" + url: https://doi.org/10.1101/2021.07.29.454156 + journal: "bioRxiv (Cold Spring Harbor Laboratory)" + date: 2021-07-30 + + - authors: "Samantha L. Goldman, Jon G. Sanders, Weiwei Yan, Anthony Denice, Margaret Cornwall, Kathleen Ivey, Emily N. Taylor, Alex R. Gunderson, Michael J. Sheehan, Deus Mjungu, et al." + title: "Culture‐enriched community profiling improves resolution of the vertebrate gut microbiota" + url: https://doi.org/10.1111/1755-0998.13456 + journal: "Molecular Ecology Resources" + date: 2021-06-26 + + - authors: "Valentine Murigneux, Leah W. Roberts, Brian M. Forde, Minh‐Duy Phan, Nguyen Thi Khanh Nhu, Adam Irwin, Patrick N. A. Harris, David L. Paterson, Mark A. Schembri, David M. Whiley, et al." + title: "MicroPIPE: validating an end-to-end workflow for high-quality complete bacterial genome construction" + url: https://doi.org/10.1186/s12864-021-07767-z + journal: "BMC Genomics" + date: 2021-06-25 + + - authors: "Natasha Pavlovikj, João Carlos Gomes‐Neto, Jitender S. Deogun, Andrew K. Benson." + title: "ProkEvo: an automated, reproducible, and scalable framework for high-throughput bacterial population genomics analyses" + url: https://doi.org/10.7717/peerj.11376 + journal: "PeerJ" + date: 2021-05-21 + + - authors: "" + title: "Peer Review #1 of \"ProkEvo: an automated, reproducible, and scalable framework for high-throughput bacterial population genomics analyses (v0.2)\"" + url: https://doi.org/10.7287/peerj.11376v0.2/reviews/1 + journal: "" + date: 2021-05-21 + + - authors: "" + title: "Peer Review #2 of \"ProkEvo: an automated, reproducible, and scalable framework for high-throughput bacterial population genomics analyses (v0.1)\"" + url: https://doi.org/10.7287/peerj.11376v0.1/reviews/2 + journal: "" + date: 2021-05-21 + + - authors: "Natasha Pavlovikj, Joao Gomes-Neto, Jitender Deogun, Andrew Benson, S Quainoo, Jpm Coolen, Saft Van Hijum, M Huynen, Wjg Melchers, W Van Schaik, et al." + title: "Peer Review #1 of \"ProkEvo: an automated, reproducible, and scalable framework for high-throughput bacterial population genomics analyses (v0.1)\"" + url: https://doi.org/10.7287/peerj.11376v0.1/reviews/1 + journal: "" + date: 2021-05-21 + + - authors: "Aleksei A. Korzhenkov." + title: "ZGA: a flexible pipeline for read processing, de novo assembly and annotation of prokaryotic genomes" + url: https://doi.org/10.1101/2021.04.27.441618 + journal: "bioRxiv (Cold Spring Harbor Laboratory)" + date: 2021-04-28 + + - authors: "Carlus Deneke, Holger Brendebach, Laura Uelze, Maria Borowiak, Burkhard Malorny, Simon H. Tausch." + title: "Species-Specific Quality Control, Assembly and Contamination Detection in Microbial Isolate Sequences with AQUAMIS" + url: https://doi.org/10.3390/genes12050644 + journal: "Genes" + date: 2021-04-26 + + - authors: "Ivan Sserwadda, Gerald Mboowa." + title: "rMAP: the Rapid Microbial Analysis Pipeline for ESKAPE bacterial group whole-genome sequence data" + url: https://doi.org/10.1099/mgen.0.000583 + journal: "Microbial Genomics" + date: 2021-03-24 + + - authors: "Grace A. Beggs, Julio C. Ayala, Logan G. Kavanaugh, Timothy D. Read, Grace M. Hooks, Maria A. Schumacher, William M. Shafer, Richard G. Brennan." + title: "Structures of Neisseria gonorrhoeae MtrR-operator complexes reveal molecular mechanisms of DNA recognition and antibiotic resistance-conferring clinical mutations" + url: https://doi.org/10.1093/nar/gkab213 + journal: "Nucleic Acids Research" + date: 2021-03-17 + + - authors: "Marcelle Oliveira de Almeida, Rodrigo Dias de Oliveira Carvalho, Flávia Figueira Aburjaile, Fábio Malcher Miranda, Janaína Canário Cerqueira, Bertram Brenig, Preetam Ghosh, Rommel Thiago Jucá Ramos, Rodrigo Bentes Kato, Siomar de Castro Soares, et al." + title: "Characterization of the first vaginal Lactobacillus crispatus genomes isolated in Brazil" + url: https://doi.org/10.7717/peerj.11079 + journal: "PeerJ" + date: 2021-03-10 + + - authors: "Valentine Murigneux, Leah W. Roberts, Brian M. Forde, Minh‐Duy Phan, Nguyen Thi Khanh Nhu, Adam Irwin, Patrick N. A. Harris, David L. Paterson, Mark A. Schembri, David M. Whiley, et al." + title: "MicroPIPE: An end-to-end solution for high-quality complete bacterial genome construction" + url: https://doi.org/10.1101/2021.02.02.429319 + journal: "bioRxiv (Cold Spring Harbor Laboratory)" + date: 2021-02-03 + + - authors: "Ricardo A. R. Machado, Aashaq Hussain Bhat, Joaquín Abolafia, Arthur Muller, Pamela Bruno, Patrick Fallet, Carla C. M. Arce, Ted C. J. Turlings, Julio S. Bernal, Joëlle Kajuga, et al." + title: "Multi-locus phylogenetic analyses uncover species boundaries and reveal the occurrence of two new entomopathogenic nematode species, Heterorhabditis ruandica n. sp. and Heterorhabditis zacatecana n. sp." + url: https://doi.org/10.21307/jofnem-2021-089 + journal: "Journal of Nematology" + date: 2021-01-01 + + - authors: "Marcelle Oliveira de Almeida, Marcus Vinícius Canário Viana, Janaína Canário Cerqueira, Flávia Figueira Aburjaile, Andrey A. Zamyatnin, Vasco Azevedo, Rodrigo Dias de Oliveira Carvalho." + title: "Novel insights in bacterial vaginosis etiology through genomic approaches" + url: https://doi.org/10.1590/0001-3765202120200945 + journal: "Anais da Academia Brasileira de Ciências" + date: 2021-01-01 + + - authors: "António Lourenço, Kieran Jordan, Olivia McAuliffe." + title: "Phylogenomics of Foodborne Pathogens: The Case of Listeria monocytogenes" + url: https://doi.org/10.1016/b978-0-12-816395-5.00007-1 + journal: "Elsevier eBooks" + date: 2020-07-01 + + - authors: "Xiaojun Zhou, Duanduan Xuan, Shoukui Hu, Jiang Du, Ji Pu, Dong Jin, Fan Zhao, Feifei Yin, Xiuji Cui, Yi Huang, et al." + title: "Pelistega ratti sp. nov. from Rattus norvegicus of Hainan island" + url: https://doi.org/10.1099/ijsem.0.004733 + journal: "INTERNATIONAL JOURNAL OF SYSTEMATIC AND EVOLUTIONARY MICROBIOLOGY" + date: 2019-06-06 + + - authors: "António Lourenço." + title: "OBSOLETE: Phylogenomics of Foodborne Pathogens: The Case of Listeria monocytogenes" + url: https://doi.org/10.1016/b978-0-08-100596-5.00007-x + journal: "Elsevier eBooks" + date: 2015-11-30 + diff --git a/docs/data/contributions.yml b/data/contributions.yml similarity index 100% rename from docs/data/contributions.yml rename to data/contributions.yml diff --git a/data/tool-categories.yml b/data/tool-categories.yml new file mode 100644 index 00000000..22270dec --- /dev/null +++ b/data/tool-categories.yml @@ -0,0 +1,128 @@ +## Tool-to-category mapping for the bactopia-tools index page. +## The generator (bin/generate-tools-index.py) reads this file and groups tools +## by category. Every MDX file in bactopia-tools/ (except index.mdx) must appear +## here -- the build fails otherwise. + +categories: + - name: Antimicrobial Resistance + description: >- + Detect and characterize antimicrobial resistance genes, point mutations, + and mobilized resistance determinants across bacterial genomes. + tools: + - abricate + - abritamr + - amrfinderplus + - mcroni + - mykrobe + - rgi + - tbprofiler + + - name: Annotation + description: >- + Annotate genome features including genes, proteins, functional domains, + prophages, and defense systems. + tools: + - bakta + - defensefinder + - eggnog + - gamma + - phispy + - prokka + + - name: Assembly Quality + description: >- + Evaluate genome assembly completeness, contamination, and overall + quality metrics. + tools: + - busco + - checkm + - checkm2 + - quast + + - name: BLAST + description: >- + Search nucleotide and protein sequences against BLAST databases using + all five standard BLAST programs. + tools: + - blastn + - blastp + - blastx + - tblastn + - tblastx + + - name: Comparative Genomics + description: >- + Compare genomes through distance estimation, average nucleotide identity, + variant calling, pangenome analysis, and phylogenetic tree construction. + tools: + - fastani + - mashdist + - mashtree + - pangenome + - snippy + + - name: Metagenomics & Taxonomy + description: >- + Classify reads taxonomically, estimate species abundances in metagenomic + samples, and remove contaminant sequences. + tools: + - bracken + - gtdb + - kraken2 + - midas + - scrubber + - sylph + + - name: Plasmids & Mobile Elements + description: >- + Identify, reconstruct, and type plasmids and insertion sequences from + bacterial genome assemblies. + tools: + - ismapper + - mobsuite + - plasmidfinder + + - name: Typing + description: >- + Perform multi-locus sequence typing (MLST), phylotyping, and other + general sequence-based typing methods. + tools: + - ariba + - gigatyper + - mlst + + - name: Serotyping & Species-Specific Analysis + description: >- + Predict serotypes, serogroups, and perform species-specific characterization + including specialized typing, virulence screening, and lineage assignment. + Many of these tools can be automatically selected and run with + [merlin](/bactopia-tools/merlin) based on species identification. + tools: + - agrvate + - btyper3 + - clermontyping + - ectyper + - emmtyper + - genotyphi + - hicap + - hpsuissero + - kleborate + - legsta + - lissero + - meningotype + - merlin + - ngmaster + - pasty + - pbptyper + - pneumocat + - sccmec + - seqsero2 + - seroba + - shigapass + - shigatyper + - shigeifinder + - sistr + - spatyper + - ssuissero + - staphtyper + - stecfinder diff --git a/developers/ai-skills/index.mdx b/developers/ai-skills/index.mdx new file mode 100644 index 00000000..8e7d1c7a --- /dev/null +++ b/developers/ai-skills/index.mdx @@ -0,0 +1,292 @@ +--- +title: AI Skills +description: Reference for AI skills that automate Bactopia development tasks +--- + +# AI Skills + +These 12 skills automate common Bactopia development tasks through AI-assisted +coding tools. Each skill wraps one or more +[bactopia-py](/developers/cli) CLI commands with interactive guidance, +guardrails, and multi-step orchestration. Skills live in the bactopia repo at +`.claude/skills/` and are invoked with `/skill-name`. + +[View skills on GitHub](https://github.com/bactopia/bactopia/tree/main/.claude/skills) + +## Overview + +| Skill | Category | Description | +|-------|----------|-------------| +| [`/add-bactopia-tool`](#add-bactopia-tool) | Scaffolding | Scaffold a complete Bactopia Tool across all three tiers -- module, subworkflow, and workflow. | +| [`/add-module`](#add-module) | Scaffolding | Scaffold a new Bactopia module from a bioconda/conda-forge package. | +| [`/add-subworkflow`](#add-subworkflow) | Scaffolding | Scaffold a new Bactopia subworkflow that orchestrates existing modules. | +| [`/merge-schemas`](#merge-schemas) | Maintenance | Regenerate nextflow.config and nextflow_schema.json for Bactopia workflows. | +| [`/update-catalog`](#update-catalog) | Maintenance | Regenerate catalog.json and llms.txt. | +| [`/update-module`](#update-module) | Maintenance | Check for newer versions of tools used in Bactopia modules and apply updates. | +| [`/review-groovydoc`](#review-groovydoc) | Review & Quality | Review GroovyDoc accuracy across modules and subworkflows. | +| [`/review-citations`](#review-citations) | Review & Quality | Review citation integrity across data/citations.yml and @citation tags. | +| [`/review-docs`](#review-docs) | Review & Quality | Review staleness of reference docs under .claude/docs/. | +| [`/review-tests`](#review-tests) | Review & Quality | Review nf-test run results with grouped error analysis. | +| [`/run-tests`](#run-tests) | Testing | Run Bactopia nf-tests and produce a timestamped logs directory. | +| [`/project-status`](#project-status) | Project | Show a live snapshot of project state, coverage, and structural issues. | + +## Scaffolding + +Skills that create new Bactopia components from bioconda/conda-forge packages. + +### `/add-bactopia-tool` + +Scaffold a complete Bactopia Tool pipeline from a bioconda/conda-forge package, +creating all three tiers (module, subworkflow, workflow) in one shot. + +**Wraps:** [`bactopia-scaffold`](/developers/cli/bactopia-scaffold) + +**When to use:** +- Add a new analysis tool to Bactopia Tools from a bioconda package +- Create all three tiers (module, subworkflow, workflow) in one shot +- Prefer this over `/add-module` or `/add-subworkflow` when the goal is a complete tool + +**Examples:** +``` +/add-bactopia-tool fastp +/add-bactopia-tool checkm2 +``` + +**Related skills:** [`/add-module`](#add-module), [`/add-subworkflow`](#add-subworkflow), +[`/run-tests`](#run-tests), [`/update-catalog`](#update-catalog) + +### `/add-module` + +Scaffold a complete Bactopia module for a bioconda/conda-forge package, creating +all required files with GroovyDoc documentation and nf-test tests. + +**Wraps:** [`bactopia-scaffold`](/developers/cli/bactopia-scaffold) + +**When to use:** +- Add a new module without the full bactopia-tool scaffolding +- Create module files (main.nf, module.config, schema.json, tests) +- Scaffold just the process layer for a tool + +**Examples:** +``` +/add-module snippy +/add-module fastqc +``` + +**Related skills:** [`/add-bactopia-tool`](#add-bactopia-tool), [`/add-subworkflow`](#add-subworkflow) + +### `/add-subworkflow` + +Scaffold a Bactopia subworkflow that orchestrates one or more existing modules. +Subworkflows wire modules together, aggregate results, and provide a clean +interface for workflows. + +**Wraps:** [`bactopia-scaffold`](/developers/cli/bactopia-scaffold) + +**When to use:** +- Create a subworkflow to orchestrate existing modules +- Wire up modules into a reusable analysis unit +- Add the glue layer between modules and workflows + +**Examples:** +``` +/add-subworkflow snippy +/add-subworkflow amrfinderplus +``` + +**Related skills:** [`/add-bactopia-tool`](#add-bactopia-tool), [`/add-module`](#add-module) + +## Maintenance + +Skills that keep existing components up to date. + +### `/merge-schemas` + +Regenerate the `nextflow.config` and `nextflow_schema.json` files for one or more +Bactopia workflows. Auto-discovers each workflow's output directory from +`catalog.json` so the caller never hand-computes paths. + +**Wraps:** [`bactopia-merge-schemas`](/developers/cli/bactopia-merge-schemas) + +**When to use:** +- Sync workflow configs after module schema changes +- Rebuild nextflow_schema.json for a specific workflow +- Regenerate configs after adding or modifying parameters + +**Examples:** +``` +/merge-schemas teton +/merge-schemas teton and staphopia +/merge-schemas all tools +``` + +**Related skills:** [`/update-module`](#update-module), [`/update-catalog`](#update-catalog) + +### `/update-catalog` + +Regenerate the machine-readable Bactopia component index (`catalog.json`) and the +AI-discovery surface (`llms.txt`). + +**Wraps:** [`bactopia-catalog`](/developers/cli/bactopia-catalog) + +**When to use:** +- Sync catalog.json after adding or removing components +- Refresh llms.txt after GroovyDoc edits that affect descriptions +- Update the component index after tool version bumps + +**Examples:** +``` +/update-catalog +``` + +**Related skills:** [`/project-status`](#project-status), [`/merge-schemas`](#merge-schemas) + +### `/update-module` + +Check for newer versions of bioconda tools used in Bactopia modules and apply +updates to module.config files. + +**Wraps:** [`bactopia-update`](/developers/cli/bactopia-update) + +**When to use:** +- Check if any modules have outdated tool versions +- Bump container versions to the latest bioconda release +- Update a specific module's tool version + +**Examples:** +``` +/update-module +/update-module snippy +``` + +**Related skills:** [`/merge-schemas`](#merge-schemas), [`/project-status`](#project-status) + +## Review & Quality + +Skills that audit documentation, citations, and code quality. + +### `/review-groovydoc` + +Run bactopia-lint focused on GroovyDoc accuracy rules. Checks @output/@input +field matching, @modules/@subworkflows lists, citation keys, tag ordering, +and formatting. + +**Wraps:** [`bactopia-lint`](/developers/cli/bactopia-lint) + +**When to use:** +- Validate documentation accuracy across modules and subworkflows +- Check @output/@input fields match actual channel declarations +- Audit GroovyDoc after editing module or subworkflow docs + +**Examples:** +``` +/review-groovydoc +/review-groovydoc snippy +``` + +**Related skills:** [`/review-citations`](#review-citations), [`/review-docs`](#review-docs) + +### `/review-citations` + +Run `bactopia-citations --validate` and present the integrity report. Detects +orphan citation keys (defined but never referenced) and workflow @citation keys +that don't resolve to a citations.yml entry. + +**Wraps:** [`bactopia-citations`](/developers/cli/bactopia-citations) + +**When to use:** +- Audit citations.yml for orphan or missing entries +- Validate that workflow @citation tags resolve correctly +- Clean up unused citations after removing tools + +**Examples:** +``` +/review-citations +``` + +**Related skills:** [`/review-groovydoc`](#review-groovydoc), [`/review-docs`](#review-docs) + +### `/review-docs` + +Run `bactopia-docs --validate` and present the staleness report. Detects +deprecated patterns and ground-truth violations (stale counts, wrong Nextflow +version, references to nonexistent commands or lint rule IDs). + +**Wraps:** [`bactopia-docs`](/developers/cli/bactopia-docs) + +**When to use:** +- Check reference docs for outdated information after migrations +- Verify doc claims match current repo state +- Scan .claude/docs for drift after adding or removing components + +**Examples:** +``` +/review-docs +``` + +**Related skills:** [`/review-groovydoc`](#review-groovydoc), [`/review-citations`](#review-citations), +[`/project-status`](#project-status) + +### `/review-tests` + +Review nf-test run results and present a diagnostic summary with grouped error +analysis. + +**Wraps:** [`bactopia-review-tests`](/developers/cli/bactopia-review-tests) + +**When to use:** +- Analyze test output after running `/run-tests` +- Investigate why tests failed with grouped error summaries +- Review a specific test run by timestamp + +**Examples:** +``` +/review-tests +/review-tests 20260324_081306 +``` + +**Related skills:** [`/run-tests`](#run-tests) + +## Testing + +### `/run-tests` + +Run the Bactopia nf-test suite through `bactopia-test` for a specific component +and produce a timestamped `logs/` directory that `/review-tests` can interpret. + +**Wraps:** [`bactopia-test`](/developers/cli/bactopia-test) + +**When to use:** +- Run tests for a specific module, subworkflow, or workflow +- Validate changes before committing +- Generate test snapshots for newly scaffolded components + +**Examples:** +``` +/run-tests snippy +/run-tests abricate module +/run-tests amrfinderplus subworkflow +``` + +**Related skills:** [`/review-tests`](#review-tests) + +## Project + +### `/project-status` + +Show a live snapshot of the Bactopia project state -- component counts, GroovyDoc +coverage, nf-test coverage, and structural issues. + +**Wraps:** [`bactopia-status`](/developers/cli/bactopia-status) + +**When to use:** +- Check overall project state and coverage metrics +- Find components missing documentation or tests +- Get a quick summary of what needs attention + +**Examples:** +``` +/project-status +``` + +**Related skills:** [`/update-catalog`](#update-catalog), [`/run-tests`](#run-tests) diff --git a/developers/cli/bactopia-atb-downloader.mdx b/developers/cli/bactopia-atb-downloader.mdx new file mode 100644 index 00000000..e4ff9bc9 --- /dev/null +++ b/developers/cli/bactopia-atb-downloader.mdx @@ -0,0 +1,48 @@ +--- +title: bactopia-atb-downloader +description: "Download All-the-Bacteria assemblies based on input query" +--- + +# bactopia-atb-downloader + +Download All-the-Bacteria assemblies based on input query + +## Usage + +```bash +bactopia-atb-downloader [OPTIONS] +``` + +## Required Options + +| Option | Type | Default | Description | +|--------|------|---------|-------------| +| `--query, -q` | STRING | | The species name, taxid, accession to query and download | + +## ATB Download Options + +| Option | Type | Default | Description | +|--------|------|---------|-------------| +| `--outdir, -o` | STRING | `./atb-assemblies` | Directory to download ATB assemblies to | +| `--atb-file-list-url, -a` | STRING | `https://osf.io/download/4yv85/` | The URL to the ATB file list | +| `--dry-run, -d` | BOOL | `false` | Do not download any files, just show what would be downloaded | +| `--progress, -p` | BOOL | `false` | Show download progress bar | +| `--cpus` | INT | `4` | The total number of cpus to use for downloading and compressing | +| `--uncompressed, -u` | BOOL | `false` | Do not compress the downloaded files | +| `--remove-archives, -r` | BOOL | `false` | Remove the downloaded tar.xz archives after extracting samples | + +## NCBI API Options + +| Option | Type | Default | Description | +|--------|------|---------|-------------| +| `--ncbi-api-key, -k` | STRING | | The API key to use for the NCBI API | +| `--chunk-size, -c` | INT | `200` | The size of the chunks to split the list into | + +## Additional Options + +| Option | Type | Default | Description | +|--------|------|---------|-------------| +| `--force` | BOOL | `false` | Overwrite existing files | +| `--verbose` | BOOL | `false` | Print debug related text | +| `--silent` | BOOL | `false` | Only critical errors will be printed | +| `--version, -V` | BOOL | `false` | Show the version and exit. | diff --git a/developers/cli/bactopia-atb-formatter.mdx b/developers/cli/bactopia-atb-formatter.mdx new file mode 100644 index 00000000..954d34c8 --- /dev/null +++ b/developers/cli/bactopia-atb-formatter.mdx @@ -0,0 +1,37 @@ +--- +title: bactopia-atb-formatter +description: "Restructure All-the-Bacteria assemblies to allow usage with Bactopia Tools" +--- + +# bactopia-atb-formatter + +Restructure All-the-Bacteria assemblies to allow usage with Bactopia Tools + +## Usage + +```bash +bactopia-atb-formatter [OPTIONS] +``` + +## Required Options + +| Option | Type | Default | Description | +|--------|------|---------|-------------| +| `--path, -p` | STRING | | Directory where ATB assemblies are stored | + +## Bactopia Directory Structure Options + +| Option | Type | Default | Description | +|--------|------|---------|-------------| +| `--bactopia-dir, -b` | STRING | `bactopia` | The path you would like to place bactopia structure | +| `--publish-mode, -m` | CHOICE (symlink, copy) | `symlink` | Specifies how assemblies will be saved in the Bactopia directory | +| `--recursive, -r` | BOOL | `false` | Traverse recursively through provided path | +| `--extension, -e` | STRING | `.fa` | The extension of the FASTA files | + +## Additional Options + +| Option | Type | Default | Description | +|--------|------|---------|-------------| +| `--verbose` | BOOL | `false` | Print debug related text | +| `--silent` | BOOL | `false` | Only critical errors will be printed | +| `--version, -V` | BOOL | `false` | Show the version and exit. | diff --git a/developers/cli/bactopia-bracken-to-excel.mdx b/developers/cli/bactopia-bracken-to-excel.mdx new file mode 100644 index 00000000..eeb93224 --- /dev/null +++ b/developers/cli/bactopia-bracken-to-excel.mdx @@ -0,0 +1,29 @@ +--- +title: bactopia-bracken-to-excel +description: "Write Bracken abundances to an Excel file." +--- + +# bactopia-bracken-to-excel + +Write Bracken abundances to an Excel file. + +## Usage + +```bash +bactopia-bracken-to-excel PREFIX BRACKEN_ABUNDANCES [OPTIONS] +``` + +## Arguments + +| Argument | Type | Required | Description | +|----------|------|----------|-------------| +| `PREFIX` | STRING | Yes | Prefix to use for output files. | +| `BRACKEN_ABUNDANCES` | STRING | Yes | The Bracken output with abundances. | + +## Options + +| Option | Type | Default | Description | +|--------|------|---------|-------------| +| `--version, -V` | BOOL | `false` | Show the version and exit. | +| `--limit` | INT | `5` | Limit the result to the top N rows. | +| `--include_unclassified` | BOOL | `false` | Include results for unclassified reads. | diff --git a/developers/cli/bactopia-catalog.mdx b/developers/cli/bactopia-catalog.mdx new file mode 100644 index 00000000..8eb2c2d9 --- /dev/null +++ b/developers/cli/bactopia-catalog.mdx @@ -0,0 +1,45 @@ +--- +title: bactopia-catalog +description: "Generate machine-readable catalog of all Bactopia components." +--- + +# bactopia-catalog + +Generate machine-readable catalog of all Bactopia components. + +Produces catalog.json containing workflows, subworkflows, and modules +with their contracts (takes/emits), dependencies, and metadata. +Replaces data/workflows.yml as the authoritative component index. + +Optionally also renders llms.txt from a Jinja2 template when +--llms-output is provided. + + +## Usage + +```bash +bactopia-catalog [OPTIONS] +``` + +## Required Options + +| Option | Type | Default | Description | +|--------|------|---------|-------------| +| `--bactopia-path` | STRING | | Directory where Bactopia repository is stored | + +## Output Options + +| Option | Type | Default | Description | +|--------|------|---------|-------------| +| `-o, --output` | STRING | | Output path for catalog.json (default: stdout) | +| `--pretty` | BOOL | `false` | Pretty-print JSON output | +| `--llms-output` | PATH | | Also render llms.txt to this path. Uses the bundled template at bactopia/templates/bactopia/llms.txt.j2 unless --llms-template is provided | +| `--llms-template` | PATH | | Jinja2 template for llms.txt. Defaults to the template bundled inside bactopia-py | + +## Additional Options + +| Option | Type | Default | Description | +|--------|------|---------|-------------| +| `--verbose` | BOOL | `false` | Print debug related text | +| `--silent` | BOOL | `false` | Only critical errors will be printed | +| `--version, -V` | BOOL | `false` | Show the version and exit. | diff --git a/developers/cli/bactopia-check-assembly-accession.mdx b/developers/cli/bactopia-check-assembly-accession.mdx new file mode 100644 index 00000000..98e8a404 --- /dev/null +++ b/developers/cli/bactopia-check-assembly-accession.mdx @@ -0,0 +1,26 @@ +--- +title: bactopia-check-assembly-accession +description: "Verify NCBI Assembly accession is latest and still available." +--- + +# bactopia-check-assembly-accession + +Verify NCBI Assembly accession is latest and still available. + +## Usage + +```bash +bactopia-check-assembly-accession REFERENCE [OPTIONS] +``` + +## Arguments + +| Argument | Type | Required | Description | +|----------|------|----------|-------------| +| `REFERENCE` | STRING | Yes | The assembly accession to verify. | + +## Options + +| Option | Type | Default | Description | +|--------|------|---------|-------------| +| `--version, -V` | BOOL | `false` | Show the version and exit. | diff --git a/developers/cli/bactopia-check-fastqs.mdx b/developers/cli/bactopia-check-fastqs.mdx new file mode 100644 index 00000000..9864c344 --- /dev/null +++ b/developers/cli/bactopia-check-fastqs.mdx @@ -0,0 +1,27 @@ +--- +title: bactopia-check-fastqs +description: "Verify input FASTQs meet minimum requirements." +--- + +# bactopia-check-fastqs + +Verify input FASTQs meet minimum requirements. + +## Usage + +```bash +bactopia-check-fastqs [OPTIONS] +``` + +## Options + +| Option | Type | Default | Description | +|--------|------|---------|-------------| +| `--version, -V` | BOOL | `false` | Show the version and exit. | +| `--sample` | STRING | | Name of the input sample. | +| `--fq1` | STRING | | Stats for SE or R1 FASTQ in JSON format. | +| `--fq2` | STRING | | Stats for R2 FASTQ in JSON format. | +| `--min_proportion` | FLOAT | `0.0` | Minimum basepair proportion for R1/R2. | +| `--min_reads` | INT | `0` | Minimum number of reads. | +| `--min_basepairs` | INT | `0` | Minimum number of sequenced basepairs. | +| `--runtype` | STRING | `illumina` | The input technology of the FASTQs. | diff --git a/developers/cli/bactopia-citations.mdx b/developers/cli/bactopia-citations.mdx new file mode 100644 index 00000000..1109c0e4 --- /dev/null +++ b/developers/cli/bactopia-citations.mdx @@ -0,0 +1,44 @@ +--- +title: bactopia-citations +description: "Print or validate citations used throughout Bactopia." +--- + +# bactopia-citations + +Print or validate citations used throughout Bactopia. + +Default mode prints the full citation list (or one entry with --name). +Pass --validate to instead scan the repo for orphan keys (defined but +never referenced) and workflow @citation keys that don't resolve to +an entry in citations.yml. Module and subworkflow @citation keys are +validated by bactopia-lint (rules M035 and S019). + + +## Usage + +```bash +bactopia-citations [OPTIONS] +``` + +## Required Options + +| Option | Type | Default | Description | +|--------|------|---------|-------------| +| `--bactopia-path, -b` | STRING | | Directory where Bactopia repository is stored | + +## Output Options + +| Option | Type | Default | Description | +|--------|------|---------|-------------| +| `--name, -n` | STRING | | Only print citation matching a given name | +| `--plain-text, -p` | BOOL | `false` | Disable rich formatting | +| `--validate` | BOOL | `false` | Validate citation integrity: orphan keys + workflow @citation references | +| `--json` | BOOL | `false` | Emit validation results as JSON (use with --validate) | + +## Additional Options + +| Option | Type | Default | Description | +|--------|------|---------|-------------| +| `--verbose` | BOOL | `false` | Print debug related text | +| `--silent` | BOOL | `false` | Only critical errors will be printed | +| `--version, -V` | BOOL | `false` | Show the version and exit. | diff --git a/developers/cli/bactopia-cleanup-coverage.mdx b/developers/cli/bactopia-cleanup-coverage.mdx new file mode 100644 index 00000000..9f20de4f --- /dev/null +++ b/developers/cli/bactopia-cleanup-coverage.mdx @@ -0,0 +1,26 @@ +--- +title: bactopia-cleanup-coverage +description: "Reduce redundancy in per-base coverage from genomeCoverageBed output." +--- + +# bactopia-cleanup-coverage + +Reduce redundancy in per-base coverage from genomeCoverageBed output. + +## Usage + +```bash +bactopia-cleanup-coverage COVERAGE [OPTIONS] +``` + +## Arguments + +| Argument | Type | Required | Description | +|----------|------|----------|-------------| +| `COVERAGE` | STRING | Yes | The per-base coverage file from genomeCoverageBed. | + +## Options + +| Option | Type | Default | Description | +|--------|------|---------|-------------| +| `--version, -V` | BOOL | `false` | Show the version and exit. | diff --git a/developers/cli/bactopia-datasets.mdx b/developers/cli/bactopia-datasets.mdx new file mode 100644 index 00000000..cc8c9023 --- /dev/null +++ b/developers/cli/bactopia-datasets.mdx @@ -0,0 +1,42 @@ +--- +title: bactopia-datasets +description: "Download optional datasets to supplement your analyses with Bactopia" +--- + +# bactopia-datasets + +Download optional datasets to supplement your analyses with Bactopia + +## Usage + +```bash +bactopia-datasets UNKNOWN [OPTIONS] +``` + +## Arguments + +| Argument | Type | Required | Description | +|----------|------|----------|-------------| +| `UNKNOWN` | UNPROCESSED | No | | + +## Required Options + +| Option | Type | Default | Description | +|--------|------|---------|-------------| +| `--bactopia-path` | STRING | | Directory where Bactopia repository is stored | + +## Download Related Options + +| Option | Type | Default | Description | +|--------|------|---------|-------------| +| `--datasets_cache` | STRING | `/home/rpetit3/.bactopia` | Base directory to download datasets to (Defaults to env variable BACTOPIA_CACHEDIR, a subfolder called datasets will be created) | +| `--force` | BOOL | `false` | Force overwrite of existing pre-built environments | +| `--max_retry` | INT | `3` | Maximum times to attempt creating Conda environment. (Default: 3) | + +## Additional Options + +| Option | Type | Default | Description | +|--------|------|---------|-------------| +| `--verbose` | BOOL | `false` | Print debug related text | +| `--silent` | BOOL | `false` | Only critical errors will be printed | +| `--version, -V` | BOOL | `false` | Show the version and exit. | diff --git a/developers/cli/bactopia-docs.mdx b/developers/cli/bactopia-docs.mdx new file mode 100644 index 00000000..67f49cd9 --- /dev/null +++ b/developers/cli/bactopia-docs.mdx @@ -0,0 +1,61 @@ +--- +title: bactopia-docs +description: "Validate reference-doc staleness across a Bactopia repo." +--- + +# bactopia-docs + +Validate reference-doc staleness across a Bactopia repo. + +Two checks run against every .md file under [b]--docs-path[/b]: + +[b]Deprecated patterns (D0xx)[/b]: regex matches against +[b]--patterns-file[/b] entries — phrases retired by past migrations +(e.g. ``flattenPaths``, the 4-channel emission framing). + +[b]Ground-truth assertions (D1xx)[/b]: counts (D101-D103), Nextflow +version (D104), bactopia-py CLI references (D105), lint rule IDs +(D106), markdown link targets (D108). + +Suppress a rule on a single line with +```` (or a comma-separated list). + +Exits 1 if any FAIL is found. + + +## Usage + +```bash +bactopia-docs [OPTIONS] +``` + +## Required Options + +| Option | Type | Default | Description | +|--------|------|---------|-------------| +| `--bactopia-path, -b` | STRING | | Directory where Bactopia repository is stored | + +## Validation Options + +| Option | Type | Default | Description | +|--------|------|---------|-------------| +| `--docs-path` | STRING | `.claude/docs` | Docs directory relative to --bactopia-path | +| `--patterns-file` | STRING | `data/docs-patterns.yml` | Deprecated-patterns YAML relative to --bactopia-path | +| `--bactopia-py-path` | PATH | | Path to bactopia-py repo (for D105 CLI / D106 lint-rule checks). Defaults to <bactopia-path>/../bactopia-py | +| `--skip-path-check` | BOOL | `false` | Skip D108 markdown-link target resolution | +| `--validate` | BOOL | `false` | Run validation (default action; flag is accepted for parity with bactopia-citations) | + +## Output Options + +| Option | Type | Default | Description | +|--------|------|---------|-------------| +| `--json` | BOOL | `false` | Emit results as JSON | +| `--plain-text, -p` | BOOL | `false` | Disable rich formatting | + +## Additional Options + +| Option | Type | Default | Description | +|--------|------|---------|-------------| +| `--verbose` | BOOL | `false` | Print debug related text | +| `--silent` | BOOL | `false` | Only critical errors will be printed | +| `--version, -V` | BOOL | `false` | Show the version and exit. | diff --git a/developers/cli/bactopia-download.mdx b/developers/cli/bactopia-download.mdx new file mode 100644 index 00000000..dcae7054 --- /dev/null +++ b/developers/cli/bactopia-download.mdx @@ -0,0 +1,50 @@ +--- +title: bactopia-download +description: "Builds Bactopia environments for use with Nextflow." +--- + +# bactopia-download + +Builds Bactopia environments for use with Nextflow. + +## Usage + +```bash +bactopia-download UNKNOWN [OPTIONS] +``` + +## Arguments + +| Argument | Type | Required | Description | +|----------|------|----------|-------------| +| `UNKNOWN` | UNPROCESSED | No | | + +## Required Options + +| Option | Type | Default | Description | +|--------|------|---------|-------------| +| `--bactopia-path` | STRING | | Directory where Bactopia repository is stored | + +## Build Related Options + +| Option | Type | Default | Description | +|--------|------|---------|-------------| +| `--envtype` | CHOICE (conda, docker, singularity, all) | `conda` | The type of environment to build | +| `--wf` | STRING | `bactopia` | Build a environment for a the given workflow | +| `--build-all` | BOOL | `false` | Builds all environments for Bactopia workflows | +| `--condadir` | STRING | `/home/rpetit3/.bactopia/conda` | Directory to create Conda environments (NXF_CONDA_CACHEDIR env variable takes precedence) | +| `--use-conda` | BOOL | `false` | Use Conda for building Conda environments instead of Mamba | +| `--registry` | STRING | `quay.io` | Registry to pull Docker containers from | +| `--singularity_cache` | STRING | `/home/rpetit3/.bactopia/singularity` | Directory to download Singularity images (NXF_SINGULARITY_CACHEDIR env variable takes precedence) | +| `--singularity_pull_docker_container` | BOOL | `false` | Force conversion of Docker containers, instead downloading Singularity images directly | +| `--force_rebuild` | BOOL | `false` | Force overwrite of existing pre-built environments | +| `--max_retry` | INT | `3` | Maximum times to attempt creating Conda environment. (Default: 3) | + +## Additional Options + +| Option | Type | Default | Description | +|--------|------|---------|-------------| +| `--dry-run` | BOOL | `false` | Show environments that would be built, without building them | +| `--verbose` | BOOL | `false` | Print debug related text | +| `--silent` | BOOL | `false` | Only critical errors will be printed | +| `--version, -V` | BOOL | `false` | Show the version and exit. | diff --git a/developers/cli/bactopia-kraken-bracken-summary.mdx b/developers/cli/bactopia-kraken-bracken-summary.mdx new file mode 100644 index 00000000..16ee6385 --- /dev/null +++ b/developers/cli/bactopia-kraken-bracken-summary.mdx @@ -0,0 +1,30 @@ +--- +title: bactopia-kraken-bracken-summary +description: "Update the Bracken abundances with unclassified counts." +--- + +# bactopia-kraken-bracken-summary + +Update the Bracken abundances with unclassified counts. + +## Usage + +```bash +bactopia-kraken-bracken-summary PREFIX KRAKEN2_REPORT BRACKEN_REPORT BRACKEN_ABUNDANCES [OPTIONS] +``` + +## Arguments + +| Argument | Type | Required | Description | +|----------|------|----------|-------------| +| `PREFIX` | STRING | Yes | Prefix to use for output files. | +| `KRAKEN2_REPORT` | STRING | Yes | The Kraken2 report. | +| `BRACKEN_REPORT` | STRING | Yes | The Bracken updated Kraken2 report. | +| `BRACKEN_ABUNDANCES` | STRING | Yes | The Bracken output with abundances. | + +## Options + +| Option | Type | Default | Description | +|--------|------|---------|-------------| +| `--version, -V` | BOOL | `false` | Show the version and exit. | +| `--max_secondary_percent` | FLOAT | `0.01` | The maximum percent abundance for the secondary species, if exceeded, sample will remain unclassified. | diff --git a/developers/cli/bactopia-lint.mdx b/developers/cli/bactopia-lint.mdx new file mode 100644 index 00000000..16eee438 --- /dev/null +++ b/developers/cli/bactopia-lint.mdx @@ -0,0 +1,51 @@ +--- +title: bactopia-lint +description: "Lint Bactopia pipeline components against style guidelines." +--- + +# bactopia-lint + +Lint Bactopia pipeline components against style guidelines. + +Checks modules, subworkflows, and workflows for compliance with +Bactopia's GroovyDoc, structural, and configuration standards. + + +## Usage + +```bash +bactopia-lint [OPTIONS] +``` + +## Required Options + +| Option | Type | Default | Description | +|--------|------|---------|-------------| +| `--bactopia-path` | STRING | | Directory where Bactopia repository is stored | + +## Scope Options + +| Option | Type | Default | Description | +|--------|------|---------|-------------| +| `--modules, --no-modules` | BOOL | `true` | Lint modules (default: on) | +| `--subworkflows, --no-subworkflows` | BOOL | `true` | Lint subworkflows (default: on) | +| `--workflows, --no-workflows` | BOOL | `true` | Lint workflows (default: on) | +| `--module` | STRING | | Lint a single module by name (e.g. 'mlst', 'bakta/run') | +| `--subworkflow` | STRING | | Lint a single subworkflow by name (e.g. 'mlst') | +| `--workflow` | STRING | | Lint a single workflow by name (e.g. 'mlst', 'bactopia-tools/mlst') | + +## Output Options + +| Option | Type | Default | Description | +|--------|------|---------|-------------| +| `-q, --quiet` | BOOL | `false` | Only show components with warnings or failures | +| `--json` | BOOL | `false` | Output as JSON | +| `--pretty` | BOOL | `false` | Pretty-print JSON output (implies --json) | + +## Additional Options + +| Option | Type | Default | Description | +|--------|------|---------|-------------| +| `--verbose` | BOOL | `false` | Print debug related text | +| `--silent` | BOOL | `false` | Only critical errors will be printed | +| `--version, -V` | BOOL | `false` | Show the version and exit. | diff --git a/developers/cli/bactopia-mask-consensus.mdx b/developers/cli/bactopia-mask-consensus.mdx new file mode 100644 index 00000000..2568d0bf --- /dev/null +++ b/developers/cli/bactopia-mask-consensus.mdx @@ -0,0 +1,31 @@ +--- +title: bactopia-mask-consensus +description: "Snippy consensus (subs) with coverage masking." +--- + +# bactopia-mask-consensus + +Snippy consensus (subs) with coverage masking. + +## Usage + +```bash +bactopia-mask-consensus SAMPLE REFERENCE FASTA VCF COVERAGE [OPTIONS] +``` + +## Arguments + +| Argument | Type | Required | Description | +|----------|------|----------|-------------| +| `SAMPLE` | STRING | Yes | Name of the input sample. | +| `REFERENCE` | STRING | Yes | The reference assembly accession. | +| `FASTA` | STRING | Yes | The consensus FASTA file. | +| `VCF` | STRING | Yes | The VCF file with called substitutions. | +| `COVERAGE` | STRING | Yes | The per-base coverage file. | + +## Options + +| Option | Type | Default | Description | +|--------|------|---------|-------------| +| `--version, -V` | BOOL | `false` | Show the version and exit. | +| `--mincov` | INT | `10` | Minimum required coverage to not mask. | diff --git a/developers/cli/bactopia-merge-schemas.mdx b/developers/cli/bactopia-merge-schemas.mdx new file mode 100644 index 00000000..9f409759 --- /dev/null +++ b/developers/cli/bactopia-merge-schemas.mdx @@ -0,0 +1,42 @@ +--- +title: bactopia-merge-schemas +description: "Builds a Nextflow Schema and/or Nextflow config for a given workflow." +--- + +# bactopia-merge-schemas + +Builds a Nextflow Schema and/or Nextflow config for a given workflow. + +## Usage + +```bash +bactopia-merge-schemas UNKNOWN [OPTIONS] +``` + +## Arguments + +| Argument | Type | Required | Description | +|----------|------|----------|-------------| +| `UNKNOWN` | UNPROCESSED | No | | + +## Required Options + +| Option | Type | Default | Description | +|--------|------|---------|-------------| +| `--bactopia-path` | STRING | | Directory where Bactopia repository is stored | +| `--wf` | STRING | | The workflow to create a nextflow_schema.json for | + +## Output Options + +| Option | Type | Default | Description | +|--------|------|---------|-------------| +| `--outdir` | PATH | `.` | Directory to write output files to | +| `--force` | BOOL | `false` | Overwrite existing output files | + +## Additional Options + +| Option | Type | Default | Description | +|--------|------|---------|-------------| +| `--verbose` | BOOL | `false` | Print debug related text | +| `--silent` | BOOL | `false` | Only critical errors will be printed | +| `--version, -V` | BOOL | `false` | Show the version and exit. | diff --git a/developers/cli/bactopia-prepare.mdx b/developers/cli/bactopia-prepare.mdx new file mode 100644 index 00000000..f6c48a24 --- /dev/null +++ b/developers/cli/bactopia-prepare.mdx @@ -0,0 +1,54 @@ +--- +title: bactopia-prepare +description: "Create a 'file of filenames' (FOFN) of samples to be processed by Bactopia" +--- + +# bactopia-prepare + +Create a 'file of filenames' (FOFN) of samples to be processed by Bactopia + +## Usage + +```bash +bactopia-prepare [OPTIONS] +``` + +## Required Options + +| Option | Type | Default | Description | +|--------|------|---------|-------------| +| `--path, -p` | STRING | | Directory where FASTQ files are stored | + +## Matching Options + +| Option | Type | Default | Description | +|--------|------|---------|-------------| +| `--assembly-ext, -a` | STRING | `.fna.gz` | Extension of the FASTA assemblies | +| `--fastq-ext, -f` | STRING | `.fastq.gz` | Extension of the FASTQs | +| `--fastq-separator` | STRING | `_` | Split FASTQ name on the last occurrence of the separator | +| `--pe1-pattern` | STRING | `[Aa]|[Rr]1|1` | Designates difference first set of paired-end reads | +| `--pe2-pattern` | STRING | `[Bb]|[Rr]2|2` | Designates difference second set of paired-end reads | +| `--merge` | BOOL | `false` | Flag samples with multiple read sets to be merged by Bactopia | +| `--ont` | BOOL | `false` | Single-end reads should be treated as Oxford Nanopore reads | +| `--hybrid` | BOOL | `false` | Samples with paired and single-end reads will be set to Illumina-first hybrid assembly (requires --ont) | +| `--short-polish` | BOOL | `false` | Samples with paired and single-end reads will be set to Nanopore-first hybrid assembly (requires --ont) | +| `--recursive, -r` | BOOL | `false` | Directories will be traversed recursively | +| `--prefix` | STRING | | Prefix to add to the path | + +## Sample Information Options + +| Option | Type | Default | Description | +|--------|------|---------|-------------| +| `--metadata` | STRING | | Metadata per sample with genome size and species information | +| `--genome-size, -gsize` | INT | `0` | Genome size to use for all samples | +| `--species, -s` | STRING | `UNKNOWN_SPECIES` | Species to use for all samples (If available, can be used to determine genome size) | +| `--taxid` | STRING | | Use the genome size of the Taxon ID for all samples | + +## Additional Options + +| Option | Type | Default | Description | +|--------|------|---------|-------------| +| `--examples` | BOOL | `false` | Print example usage | +| `--verbose` | BOOL | `false` | Print debug related text | +| `--silent` | BOOL | `false` | Only critical errors will be printed | +| `--version, -V` | BOOL | `false` | Show the version and exit. | diff --git a/developers/cli/bactopia-prune.mdx b/developers/cli/bactopia-prune.mdx new file mode 100644 index 00000000..9831d0b1 --- /dev/null +++ b/developers/cli/bactopia-prune.mdx @@ -0,0 +1,40 @@ +--- +title: bactopia-prune +description: "Removes stale Bactopia environments that no longer match current module versions." +--- + +# bactopia-prune + +Removes stale Bactopia environments that no longer match current module versions. + +## Usage + +```bash +bactopia-prune [OPTIONS] +``` + +## Required Options + +| Option | Type | Default | Description | +|--------|------|---------|-------------| +| `--bactopia-path` | STRING | | Directory where Bactopia repository is stored | + +## Prune Options + +| Option | Type | Default | Description | +|--------|------|---------|-------------| +| `--envtype` | CHOICE (conda, docker, singularity, all) | `all` | The type of environment to check for stale items | +| `--wf` | STRING | | Only check environments for the given workflow (default: all workflows) | +| `--condadir` | STRING | `/home/rpetit3/.bactopia/conda` | Directory where Conda environments are stored (NXF_CONDA_CACHEDIR env variable takes precedence) | +| `--registry` | STRING | `quay.io` | Registry to match Docker containers against | +| `--singularity_cache` | STRING | `/home/rpetit3/.bactopia/singularity` | Directory where Singularity images are stored (NXF_SINGULARITY_CACHEDIR env variable takes precedence) | +| `--singularity_pull_docker_container` | BOOL | `false` | Use Docker-based naming for Singularity images | +| `--execute` | BOOL | `false` | Actually remove stale environments. Default is dry-run (report only) | + +## Additional Options + +| Option | Type | Default | Description | +|--------|------|---------|-------------| +| `--verbose` | BOOL | `false` | Print debug related text | +| `--silent` | BOOL | `false` | Only critical errors will be printed | +| `--version, -V` | BOOL | `false` | Show the version and exit. | diff --git a/developers/cli/bactopia-pubmlst-build.mdx b/developers/cli/bactopia-pubmlst-build.mdx new file mode 100644 index 00000000..47c8e2b2 --- /dev/null +++ b/developers/cli/bactopia-pubmlst-build.mdx @@ -0,0 +1,45 @@ +--- +title: bactopia-pubmlst-build +description: "Build PubMLST databases for use with the 'mlst' Bactopia Tool." +--- + +# bactopia-pubmlst-build + +Build PubMLST databases for use with the 'mlst' Bactopia Tool. + +## Usage + +```bash +bactopia-pubmlst-build [OPTIONS] +``` + +## Required Options + +| Option | Type | Default | Description | +|--------|------|---------|-------------| +| `--database, -d` | STRING | | A known organism database to download. (Use 'all' to download all databases.) | + +## Build Options + +| Option | Type | Default | Description | +|--------|------|---------|-------------| +| `--ignore` | STRING | `afumigatus,blastocystis,calbicans,cbotulinum,cglabrata,ckrusei,ctropicalis,csinensis,kseptempunctata,rmlst,sparasitica,test,tpallidum,tvaginalis` | A comma separated list of databases to ignore | +| `--skip-download` | BOOL | `false` | Skip downloading the database files | +| `--skip-blast` | BOOL | `false` | Skip building the BLAST database | +| `--force` | BOOL | `false` | Force overwrite of existing files | + +## API Options + +| Option | Type | Default | Description | +|--------|------|---------|-------------| +| `--site, -s` | CHOICE (pubmlst, pasteur) | `pubmlst` | Only print citation matching a given name | +| `--token-dir, -t` | STRING | `/home/rpetit3/.bactopia` | The directory where the token file is saved | +| `--out-dir, -o` | STRING | `./bactopia-mlst` | The directory where the database files will be saved | + +## Additional Options + +| Option | Type | Default | Description | +|--------|------|---------|-------------| +| `--verbose` | BOOL | `false` | Print debug related text | +| `--silent` | BOOL | `false` | Only critical errors will be printed | +| `--version, -V` | BOOL | `false` | Show the version and exit. | diff --git a/developers/cli/bactopia-pubmlst-setup.mdx b/developers/cli/bactopia-pubmlst-setup.mdx new file mode 100644 index 00000000..a5ad256f --- /dev/null +++ b/developers/cli/bactopia-pubmlst-setup.mdx @@ -0,0 +1,38 @@ +--- +title: bactopia-pubmlst-setup +description: "One-time setup for interacting with the PubMLST API" +--- + +# bactopia-pubmlst-setup + +One-time setup for interacting with the PubMLST API + +## Usage + +```bash +bactopia-pubmlst-setup [OPTIONS] +``` + +## Required Options + +| Option | Type | Default | Description | +|--------|------|---------|-------------| +| `--client-id, -ci` | STRING | | The client ID for the site | +| `--client-secret, -cs` | STRING | | The client secret for the site | + +## API Options + +| Option | Type | Default | Description | +|--------|------|---------|-------------| +| `--site, -s` | CHOICE (pubmlst, pasteur) | `pubmlst` | Only print citation matching a given name | +| `--database, -d` | STRING | `pubmlst_yersinia_seqdef` | The organism database to interact with for setup. Note: the default is available from both PubMLST and Pasteur | +| `--save-dir, -sd` | STRING | `/home/rpetit3/.bactopia` | The directory to save the token | +| `--force` | BOOL | `false` | Force overwrite of existing token files | + +## Additional Options + +| Option | Type | Default | Description | +|--------|------|---------|-------------| +| `--verbose` | BOOL | `false` | Print debug related text | +| `--silent` | BOOL | `false` | Only critical errors will be printed | +| `--version, -V` | BOOL | `false` | Show the version and exit. | diff --git a/developers/cli/bactopia-review-tests.mdx b/developers/cli/bactopia-review-tests.mdx new file mode 100644 index 00000000..52a9287d --- /dev/null +++ b/developers/cli/bactopia-review-tests.mdx @@ -0,0 +1,54 @@ +--- +title: bactopia-review-tests +description: "Review nf-test results with grouped error analysis and timing checks." +--- + +# bactopia-review-tests + +Review nf-test results with grouped error analysis and timing checks. + +Analyzes test run logs, classifies failures by error pattern, +and optionally checks durations against expected baselines. + + +## Usage + +```bash +bactopia-review-tests [OPTIONS] +``` + +## Required Options + +| Option | Type | Default | Description | +|--------|------|---------|-------------| +| `--bactopia-path` | STRING | | Directory where Bactopia repository is stored | + +## Run Selection + +| Option | Type | Default | Description | +|--------|------|---------|-------------| +| `--run` | STRING | | Specific test run timestamp (YYYYMMDD_HHMMSS). Default: latest | +| `--logs-dir` | STRING | | Directory containing test run logs. Default: {bactopia-path}/logs | + +## Timing Options + +| Option | Type | Default | Description | +|--------|------|---------|-------------| +| `--baselines` | STRING | | Path to test-times baseline JSON file. Default: {bactopia-path}/conf/test-times.json | +| `--tolerance` | FLOAT | `2.0` | Tolerance factor for timing anomaly detection | +| `--update-baselines` | BOOL | `false` | Write/update the baselines file from current run results | + +## Output Options + +| Option | Type | Default | Description | +|--------|------|---------|-------------| +| `--json` | BOOL | `false` | Output as JSON | +| `--pretty` | BOOL | `false` | Pretty-print JSON output (implies --json) | + +## Additional Options + +| Option | Type | Default | Description | +|--------|------|---------|-------------| +| `--verbose` | BOOL | `false` | Print debug related text | +| `--silent` | BOOL | `false` | Only critical errors will be printed | +| `--version, -V` | BOOL | `false` | Show the version and exit. | diff --git a/developers/cli/bactopia-scaffold.mdx b/developers/cli/bactopia-scaffold.mdx new file mode 100644 index 00000000..bc22fb3b --- /dev/null +++ b/developers/cli/bactopia-scaffold.mdx @@ -0,0 +1,164 @@ +--- +title: bactopia-scaffold +description: "Scaffold Bactopia components from bioconda/conda-forge packages." +--- + +# bactopia-scaffold + +Scaffold Bactopia components from bioconda/conda-forge packages. + +## Usage + +```bash +bactopia-scaffold COMMAND [OPTIONS] +``` + +## Subcommands + +| Command | Description | +|---------|-------------| +| [`bactopia-scaffold lookup`](#lookup) | Look up package info from Anaconda and check for existing components. | +| [`bactopia-scaffold module`](#module) | Generate module files from a design config. | +| [`bactopia-scaffold subworkflow`](#subworkflow) | Generate subworkflow files from a design config. | +| [`bactopia-scaffold test-data`](#test-data) | Discover test data paths from existing module tests. | +| [`bactopia-scaffold tool`](#tool) | Generate all three tiers (module + subworkflow + workflow) for a bactopia-tool. | + +--- + +### lookup + +Look up package info from Anaconda and check for existing components. + +```bash +bactopia-scaffold lookup PACKAGE [OPTIONS] +``` + +#### Arguments + +| Argument | Type | Required | Description | +|----------|------|----------|-------------| +| `PACKAGE` | STRING | Yes | | + +#### Required Options + +| Option | Type | Default | Description | +|--------|------|---------|-------------| +| `--bactopia-path` | STRING | | Directory where Bactopia repository is stored | + +#### Query Options + +| Option | Type | Default | Description | +|--------|------|---------|-------------| +| `--channel` | STRING | | Force a specific channel (bioconda or conda-forge). Default: try bioconda first, then conda-forge | +| `--max-retry` | INT | `3` | Maximum times to attempt API queries. (Default: 3) | + +#### Output Options + +| Option | Type | Default | Description | +|--------|------|---------|-------------| +| `--json` | BOOL | `false` | Output flat JSON | +| `--pretty` | BOOL | `false` | Output pretty-printed JSON | + +#### Additional Options + +| Option | Type | Default | Description | +|--------|------|---------|-------------| +| `--verbose` | BOOL | `false` | Print debug related text | +| `--silent` | BOOL | `false` | Only critical errors will be printed | + +--- + +### module + +Generate module files from a design config. + +```bash +bactopia-scaffold module [OPTIONS] +``` + +#### Options + +| Option | Type | Default | Description | +|--------|------|---------|-------------| +| `--config` | PATH | | JSON design config file | +| `--bactopia-path` | STRING | | Directory where Bactopia repository is stored | +| `--dry-run` | BOOL | `false` | Show what would be created without writing files | +| `--json` | BOOL | `false` | Output flat JSON | +| `--pretty` | BOOL | `false` | Output pretty-printed JSON | +| `--verbose` | BOOL | `false` | Print debug related text | +| `--silent` | BOOL | `false` | Only critical errors will be printed | + +--- + +### subworkflow + +Generate subworkflow files from a design config. + +```bash +bactopia-scaffold subworkflow [OPTIONS] +``` + +#### Options + +| Option | Type | Default | Description | +|--------|------|---------|-------------| +| `--config` | PATH | | JSON design config file | +| `--bactopia-path` | STRING | | Directory where Bactopia repository is stored | +| `--dry-run` | BOOL | `false` | Show what would be created without writing files | +| `--json` | BOOL | `false` | Output flat JSON | +| `--pretty` | BOOL | `false` | Output pretty-printed JSON | +| `--verbose` | BOOL | `false` | Print debug related text | +| `--silent` | BOOL | `false` | Only critical errors will be printed | + +--- + +### test-data + +Discover test data paths from existing module tests. + +```bash +bactopia-scaffold test-data [OPTIONS] +``` + +#### Required Options + +| Option | Type | Default | Description | +|--------|------|---------|-------------| +| `--input-type` | CHOICE (assembly, assembly_reads, genbank, gff, proteins, reads) | | Input type to search for in existing tests | +| `--bactopia-path` | STRING | | Directory where Bactopia repository is stored | + +#### Output Options + +| Option | Type | Default | Description | +|--------|------|---------|-------------| +| `--json` | BOOL | `false` | Output flat JSON | +| `--pretty` | BOOL | `false` | Output pretty-printed JSON | + +#### Additional Options + +| Option | Type | Default | Description | +|--------|------|---------|-------------| +| `--verbose` | BOOL | `false` | Print debug related text | +| `--silent` | BOOL | `false` | Only critical errors will be printed | + +--- + +### tool + +Generate all three tiers (module + subworkflow + workflow) for a bactopia-tool. + +```bash +bactopia-scaffold tool [OPTIONS] +``` + +#### Options + +| Option | Type | Default | Description | +|--------|------|---------|-------------| +| `--config` | PATH | | JSON design config file | +| `--bactopia-path` | STRING | | Directory where Bactopia repository is stored | +| `--dry-run` | BOOL | `false` | Show what would be created without writing files | +| `--json` | BOOL | `false` | Output flat JSON | +| `--pretty` | BOOL | `false` | Output pretty-printed JSON | +| `--verbose` | BOOL | `false` | Print debug related text | +| `--silent` | BOOL | `false` | Only critical errors will be printed | diff --git a/developers/cli/bactopia-scrubber-summary.mdx b/developers/cli/bactopia-scrubber-summary.mdx new file mode 100644 index 00000000..c890b181 --- /dev/null +++ b/developers/cli/bactopia-scrubber-summary.mdx @@ -0,0 +1,28 @@ +--- +title: bactopia-scrubber-summary +description: "Create a before-and-after report from human read scrubbing." +--- + +# bactopia-scrubber-summary + +Create a before-and-after report from human read scrubbing. + +## Usage + +```bash +bactopia-scrubber-summary SAMPLE ORIGINAL SCRUBBED [OPTIONS] +``` + +## Arguments + +| Argument | Type | Required | Description | +|----------|------|----------|-------------| +| `SAMPLE` | STRING | Yes | Name of the input sample. | +| `ORIGINAL` | STRING | Yes | Original FASTQ stats in JSON format. | +| `SCRUBBED` | STRING | Yes | Scrubbed FASTQ stats in JSON format. | + +## Options + +| Option | Type | Default | Description | +|--------|------|---------|-------------| +| `--version, -V` | BOOL | `false` | Show the version and exit. | diff --git a/developers/cli/bactopia-search.mdx b/developers/cli/bactopia-search.mdx new file mode 100644 index 00000000..067f4cfb --- /dev/null +++ b/developers/cli/bactopia-search.mdx @@ -0,0 +1,51 @@ +--- +title: bactopia-search +description: "Query against ENA and SRA for public accessions to process with Bactopia" +--- + +# bactopia-search + +Query against ENA and SRA for public accessions to process with Bactopia + +## Usage + +```bash +bactopia-search [OPTIONS] +``` + +## Required Options + +| Option | Type | Default | Description | +|--------|------|---------|-------------| +| `--query, -q` | STRING | | Taxon ID or Study, BioSample, or Run accession (can also be comma separated or a file of accessions) | + +## Query Options + +| Option | Type | Default | Description | +|--------|------|---------|-------------| +| `--exact-taxon` | BOOL | `false` | Exclude Taxon ID descendants | +| `--limit, -l` | INT | `1000000` | Maximum number of results (per query) to return | +| `--accession-limit, -al` | INT | `5000` | Maximum number of accessions to query at once | +| `--biosample-subset` | INT | `0` | If a BioSample has multiple Experiments, maximum number to randomly select (0 = disabled) | +| `--include-empty` | BOOL | `false` | Include metadata columns that are empty for all rows | + +## Filtering Options + +| Option | Type | Default | Description | +|--------|------|---------|-------------| +| `--min-base-count, -mbc` | INT | `0` | Filters samples based on minimum base pair count (0 = disabled) | +| `--min-read-length, -mrl` | INT | `0` | Filters samples based on minimum mean read length (0 = disabled) | +| `--min-coverage, -mc` | INT | `0` | Filter samples based on minimum coverage (requires --genome_size, 0 = disabled) | + +## Additional Options + +| Option | Type | Default | Description | +|--------|------|---------|-------------| +| `--genome-size, -gsize` | INT | `0` | Genome size to be used for all samples, and for calculating min coverage | +| `--use-ncbi-genome-size` | BOOL | `false` | If available, use NCBI genome size for species | +| `--outdir, -o` | STRING | `./` | Directory to write output | +| `--prefix, -p` | STRING | `bactopia` | Prefix to use for output file names | +| `--force` | BOOL | `false` | Overwrite existing reports | +| `--verbose` | BOOL | `false` | Print debug related text | +| `--silent` | BOOL | `false` | Only critical errors will be printed | +| `--version, -V` | BOOL | `false` | Show the version and exit. | diff --git a/developers/cli/bactopia-status.mdx b/developers/cli/bactopia-status.mdx new file mode 100644 index 00000000..3701a18a --- /dev/null +++ b/developers/cli/bactopia-status.mdx @@ -0,0 +1,39 @@ +--- +title: bactopia-status +description: "Show a snapshot of the Bactopia project state." +--- + +# bactopia-status + +Show a snapshot of the Bactopia project state. + +Reports component counts, GroovyDoc coverage, nf-test coverage, +missing required files, and structural issues. + + +## Usage + +```bash +bactopia-status [OPTIONS] +``` + +## Required Options + +| Option | Type | Default | Description | +|--------|------|---------|-------------| +| `--bactopia-path` | STRING | | Directory where Bactopia repository is stored | + +## Output Options + +| Option | Type | Default | Description | +|--------|------|---------|-------------| +| `--json` | BOOL | `false` | Output as JSON | +| `--pretty` | BOOL | `false` | Pretty-print JSON output (implies --json) | + +## Additional Options + +| Option | Type | Default | Description | +|--------|------|---------|-------------| +| `--verbose` | BOOL | `false` | Print debug related text | +| `--silent` | BOOL | `false` | Only critical errors will be printed | +| `--version, -V` | BOOL | `false` | Show the version and exit. | diff --git a/developers/cli/bactopia-summary.mdx b/developers/cli/bactopia-summary.mdx new file mode 100644 index 00000000..a763f3f5 --- /dev/null +++ b/developers/cli/bactopia-summary.mdx @@ -0,0 +1,60 @@ +--- +title: bactopia-summary +description: "Generate a summary table from the Bactopia results." +--- + +# bactopia-summary + +Generate a summary table from the Bactopia results. + +## Usage + +```bash +bactopia-summary [OPTIONS] +``` + +## Required Options + +| Option | Type | Default | Description | +|--------|------|---------|-------------| +| `--bactopia-path, -b` | STRING | | Directory where Bactopia results are stored | + +## Gold Cutoffs + +| Option | Type | Default | Description | +|--------|------|---------|-------------| +| `--gold-coverage, -gcov` | INT | `100` | Minimum amount of coverage required for Gold status | +| `--gold-quality, -gqual` | INT | `30` | Minimum per-read mean quality score required for Gold status | +| `--gold-read-length, -glen` | INT | `95` | Minimum mean read length required for Gold status | +| `--gold-contigs, -gcontigs` | INT | `100` | Maximum contig count required for Gold status | + +## Silver Cutoffs + +| Option | Type | Default | Description | +|--------|------|---------|-------------| +| `--silver-coverage, -scov` | INT | `50` | Minimum amount of coverage required for Silver status | +| `--silver-quality, -squal` | INT | `20` | Minimum per-read mean quality score required for Silver status | +| `--silver-read-length, -slen` | INT | `75` | Minimum mean read length required for Silver status | +| `--silver-contigs, -scontigs` | INT | `200` | Maximum contig count required for Silver status | + +## Fail Cutoffs + +| Option | Type | Default | Description | +|--------|------|---------|-------------| +| `--min-coverage, -mincov` | INT | `20` | Minimum amount of coverage required to pass | +| `--min-quality, -minqual` | INT | `12` | Minimum per-read mean quality score required to pass | +| `--min-read-length, -minlen` | INT | `49` | Minimum mean read length required to pass | +| `--max-contigs` | INT | `500` | Maximum contig count required to pass | +| `--min-assembled-size` | INT | | Minimum assembled genome size | +| `--max-assembled-size` | INT | | Maximum assembled genome size | + +## Additional Options + +| Option | Type | Default | Description | +|--------|------|---------|-------------| +| `--outdir, -o` | PATH | `./` | Directory to write output | +| `--prefix, -p` | STRING | `bactopia` | Prefix to use for output files | +| `--force` | BOOL | `false` | Overwrite existing reports | +| `--verbose` | BOOL | `false` | Print debug related text | +| `--silent` | BOOL | `false` | Only critical errors will be printed | +| `--version, -V` | BOOL | `false` | Show the version and exit. | diff --git a/developers/cli/bactopia-sysinfo.mdx b/developers/cli/bactopia-sysinfo.mdx new file mode 100644 index 00000000..ce724513 --- /dev/null +++ b/developers/cli/bactopia-sysinfo.mdx @@ -0,0 +1,35 @@ +--- +title: bactopia-sysinfo +description: "Auto-detect host RAM and CPUs, emit Nextflow CLI fragments for local profiles." +--- + +# bactopia-sysinfo + +Auto-detect host RAM and CPUs, emit Nextflow CLI fragments for local profiles. + +Reads the bactopia wrapper's argv as a passthrough. Emits to stdout the +additional `--max_memory .GB` / `--max_cpus ` flags that should be +appended to the `nextflow run` command line. Emits nothing when: + + - a custom config is supplied (`-c` or `--nfconfig`) + - any `-profile` value is not in the local-executor allow-list + - both `--max_memory` and `--max_cpus` are already set by the user + - the invocation is informational (`--help`, `--help_all`, `--list_wfs`) + + +## Usage + +```bash +bactopia-sysinfo ARGS [OPTIONS] +``` + +## Arguments + +| Argument | Type | Required | Description | +|----------|------|----------|-------------| +| `ARGS` | UNPROCESSED | No | | + +## Options + +| Option | Type | Default | Description | +|--------|------|---------|-------------| diff --git a/developers/cli/bactopia-test.mdx b/developers/cli/bactopia-test.mdx new file mode 100644 index 00000000..fa7daad0 --- /dev/null +++ b/developers/cli/bactopia-test.mdx @@ -0,0 +1,69 @@ +--- +title: bactopia-test +description: "Run nf-test suites for Bactopia components." +--- + +# bactopia-test + +Run nf-test suites for Bactopia components. + +Discovers and executes nf-test files across modules, subworkflows, and +workflows. Results are classified by status and displayed as a summary +table. Per-test logs are saved to a logs/ directory. + + +## Usage + +```bash +bactopia-test [OPTIONS] +``` + +## Required Options + +| Option | Type | Default | Description | +|--------|------|---------|-------------| +| `--bactopia-path` | STRING | | Directory where the Bactopia repository is stored | +| `--test-data` | STRING | | Directory containing bactopia-tests data (sets BACTOPIA_TESTS env). Required unless --cleanup | + +## Cleanup + +| Option | Type | Default | Description | +|--------|------|---------|-------------| +| `--cleanup` | BOOL | `false` | Find and remove all .nf-test/ temp files, then exit (no tests run) | +| `--dry-run` | BOOL | `false` | With --cleanup, list what would be removed without deleting | +| `--keep` | BOOL | `false` | Keep .nf-test/ directories and logs after tests pass (useful for debugging) | + +## Test Selection + +| Option | Type | Default | Description | +|--------|------|---------|-------------| +| `--tier` | CHOICE (modules, subworkflows, workflows, all) | `all` | Which component tier to test | +| `--include` | STRING | | Comma-separated list of component names to include | +| `--exclude` | STRING | | Comma-separated list of component names to exclude | + +## Execution Options + +| Option | Type | Default | Description | +|--------|------|---------|-------------| +| `--profile` | CHOICE (docker, singularity, conda) | `docker` | Nextflow profile to use for tests | +| `--condadir` | STRING | `/home/rpetit3/.bactopia/conda` | Directory where Conda environments are stored (NXF_CONDA_CACHEDIR env variable takes precedence) | +| `--singularity_cache` | STRING | `/home/rpetit3/.bactopia/singularity` | Directory where Singularity images are stored (NXF_SINGULARITY_CACHEDIR env variable takes precedence) | +| `--generate` | BOOL | `false` | Generate mode: delete snapshots and run twice to verify reproducibility | +| `--jobs` | INT | `64` | Number of parallel test workers | +| `--fail-fast` | BOOL | `false` | Stop on the first test failure instead of continuing | +| `--timeout` | INT | `90` | Per-test timeout in minutes. Each nf-test subprocess is killed after this duration. Set to 0 to disable | + +## Output Options + +| Option | Type | Default | Description | +|--------|------|---------|-------------| +| `--outdir` | STRING | `.` | Directory to write the logs/ folder into | +| `--json` | BOOL | `false` | Output results as JSON | + +## Additional Options + +| Option | Type | Default | Description | +|--------|------|---------|-------------| +| `--verbose` | BOOL | `false` | Print debug related text | +| `--silent` | BOOL | `false` | Only critical errors will be printed | +| `--version, -V` | BOOL | `false` | Show the version and exit. | diff --git a/developers/cli/bactopia-teton-prepare.mdx b/developers/cli/bactopia-teton-prepare.mdx new file mode 100644 index 00000000..74ab886a --- /dev/null +++ b/developers/cli/bactopia-teton-prepare.mdx @@ -0,0 +1,30 @@ +--- +title: bactopia-teton-prepare +description: "Prepare sample sheets for downstream analysis in the Teton workflow." +--- + +# bactopia-teton-prepare + +Prepare sample sheets for downstream analysis in the Teton workflow. + +## Usage + +```bash +bactopia-teton-prepare PREFIX SIZEMEUP RUN_TYPE FASTQS OUTDIR [OPTIONS] +``` + +## Arguments + +| Argument | Type | Required | Description | +|----------|------|----------|-------------| +| `PREFIX` | STRING | Yes | Prefix to use for output files. | +| `SIZEMEUP` | STRING | Yes | The SizeMeUp genome size estimate. | +| `RUN_TYPE` | STRING | Yes | The input run type (e.g. paired-end, single-end, ont). | +| `FASTQS` | STRING | Yes | Comma-separated list of FASTQ filenames. | +| `OUTDIR` | STRING | Yes | The output directory for results. | + +## Options + +| Option | Type | Default | Description | +|--------|------|---------|-------------| +| `--version, -V` | BOOL | `false` | Show the version and exit. | diff --git a/developers/cli/bactopia-update.mdx b/developers/cli/bactopia-update.mdx new file mode 100644 index 00000000..44c738e6 --- /dev/null +++ b/developers/cli/bactopia-update.mdx @@ -0,0 +1,48 @@ +--- +title: bactopia-update +description: "Check if modules used by Bactopia Tools have newer versions available" +--- + +# bactopia-update + +Check if modules used by Bactopia Tools have newer versions available + +## Usage + +```bash +bactopia-update UNKNOWN [OPTIONS] +``` + +## Arguments + +| Argument | Type | Required | Description | +|----------|------|----------|-------------| +| `UNKNOWN` | UNPROCESSED | No | | + +## Required Options + +| Option | Type | Default | Description | +|--------|------|---------|-------------| +| `--bactopia-path` | STRING | | Directory where Bactopia repository is stored | + +## Module Options + +| Option | Type | Default | Description | +|--------|------|---------|-------------| +| `--module` | STRING | | Only check a specific module for updates (e.g. 'fastp') | +| `--max_retry` | INT | `3` | Maximum times to attempt API queries. (Default: 3) | + +## Output Options + +| Option | Type | Default | Description | +|--------|------|---------|-------------| +| `--json` | BOOL | `false` | Output flat JSON | +| `--pretty` | BOOL | `false` | Output pretty-printed JSON | + +## Additional Options + +| Option | Type | Default | Description | +|--------|------|---------|-------------| +| `--verbose` | BOOL | `false` | Print debug related text | +| `--silent` | BOOL | `false` | Only critical errors will be printed | +| `--version, -V` | BOOL | `false` | Show the version and exit. | diff --git a/developers/cli/bactopia-workflows.mdx b/developers/cli/bactopia-workflows.mdx new file mode 100644 index 00000000..1f25fb29 --- /dev/null +++ b/developers/cli/bactopia-workflows.mdx @@ -0,0 +1,41 @@ +--- +title: bactopia-workflows +description: "Output the path to a Bactopia workflow main.nf file." +--- + +# bactopia-workflows + +Output the path to a Bactopia workflow main.nf file. + +## Usage + +```bash +bactopia-workflows UNKNOWN [OPTIONS] +``` + +## Arguments + +| Argument | Type | Required | Description | +|----------|------|----------|-------------| +| `UNKNOWN` | UNPROCESSED | No | | + +## Required Options + +| Option | Type | Default | Description | +|--------|------|---------|-------------| +| `--bactopia-path` | STRING | | Directory where Bactopia repository is stored | + +## Workflow Related Options + +| Option | Type | Default | Description | +|--------|------|---------|-------------| +| `--wf` | STRING | `bactopia` | Build a environment for a the given workflow | +| `--list_wfs` | BOOL | `false` | List available Bactopia workflows and exit | + +## Additional Options + +| Option | Type | Default | Description | +|--------|------|---------|-------------| +| `--verbose` | BOOL | `false` | Print debug related text | +| `--silent` | BOOL | `false` | Only critical errors will be printed | +| `--version, -V` | BOOL | `false` | Show the version and exit. | diff --git a/developers/cli/index.mdx b/developers/cli/index.mdx new file mode 100644 index 00000000..8a5bc4ac --- /dev/null +++ b/developers/cli/index.mdx @@ -0,0 +1,54 @@ +--- +title: CLI Reference +description: Command-line reference for all bactopia-py CLI commands +slug: /cli +--- + +# CLI Reference + +Command-line reference for all [bactopia-py](https://github.com/bactopia/bactopia-py) CLI commands (v2.1.2). + +## User & Developer Commands + +Commands for preparing inputs, querying databases, and developing Bactopia components. + +| Command | Description | +|---------|-------------| +| [`bactopia-atb-downloader`](/developers/cli/bactopia-atb-downloader) | Download All-the-Bacteria assemblies based on input query | +| [`bactopia-atb-formatter`](/developers/cli/bactopia-atb-formatter) | Restructure All-the-Bacteria assemblies to allow usage with Bactopia Tools | +| [`bactopia-catalog`](/developers/cli/bactopia-catalog) | Generate machine-readable catalog of all Bactopia components. | +| [`bactopia-citations`](/developers/cli/bactopia-citations) | Print or validate citations used throughout Bactopia. | +| [`bactopia-datasets`](/developers/cli/bactopia-datasets) | Download optional datasets to supplement your analyses with Bactopia | +| [`bactopia-docs`](/developers/cli/bactopia-docs) | Validate reference-doc staleness across a Bactopia repo. | +| [`bactopia-download`](/developers/cli/bactopia-download) | Builds Bactopia environments for use with Nextflow. | +| [`bactopia-lint`](/developers/cli/bactopia-lint) | Lint Bactopia pipeline components against style guidelines. | +| [`bactopia-merge-schemas`](/developers/cli/bactopia-merge-schemas) | Builds a Nextflow Schema and/or Nextflow config for a given workflow. | +| [`bactopia-prepare`](/developers/cli/bactopia-prepare) | Create a 'file of filenames' (FOFN) of samples to be processed by Bactopia | +| [`bactopia-prune`](/developers/cli/bactopia-prune) | Removes stale Bactopia environments that no longer match current module versions. | +| [`bactopia-pubmlst-build`](/developers/cli/bactopia-pubmlst-build) | Build PubMLST databases for use with the 'mlst' Bactopia Tool. | +| [`bactopia-pubmlst-setup`](/developers/cli/bactopia-pubmlst-setup) | One-time setup for interacting with the PubMLST API | +| [`bactopia-review-tests`](/developers/cli/bactopia-review-tests) | Review nf-test results with grouped error analysis and timing checks. | +| [`bactopia-scaffold`](/developers/cli/bactopia-scaffold) | Scaffold Bactopia components from bioconda/conda-forge packages. | +| [`bactopia-search`](/developers/cli/bactopia-search) | Query against ENA and SRA for public accessions to process with Bactopia | +| [`bactopia-status`](/developers/cli/bactopia-status) | Show a snapshot of the Bactopia project state. | +| [`bactopia-summary`](/developers/cli/bactopia-summary) | Generate a summary table from the Bactopia results. | +| [`bactopia-sysinfo`](/developers/cli/bactopia-sysinfo) | Auto-detect host RAM and CPUs, emit Nextflow CLI fragments for local profiles. | +| [`bactopia-test`](/developers/cli/bactopia-test) | Run nf-test suites for Bactopia components. | +| [`bactopia-update`](/developers/cli/bactopia-update) | Check if modules used by Bactopia Tools have newer versions available | +| [`bactopia-workflows`](/developers/cli/bactopia-workflows) | Output the path to a Bactopia workflow main.nf file. | + +## Pipeline Utility Scripts + +Internal scripts called by Nextflow modules during pipeline execution. + +| Command | Description | +|---------|-------------| +| [`bactopia-bracken-to-excel`](/developers/cli/bactopia-bracken-to-excel) | Write Bracken abundances to an Excel file. | +| [`bactopia-check-assembly-accession`](/developers/cli/bactopia-check-assembly-accession) | Verify NCBI Assembly accession is latest and still available. | +| [`bactopia-check-fastqs`](/developers/cli/bactopia-check-fastqs) | Verify input FASTQs meet minimum requirements. | +| [`bactopia-cleanup-coverage`](/developers/cli/bactopia-cleanup-coverage) | Reduce redundancy in per-base coverage from genomeCoverageBed output. | +| [`bactopia-kraken-bracken-summary`](/developers/cli/bactopia-kraken-bracken-summary) | Update the Bracken abundances with unclassified counts. | +| [`bactopia-mask-consensus`](/developers/cli/bactopia-mask-consensus) | Snippy consensus (subs) with coverage masking. | +| [`bactopia-scrubber-summary`](/developers/cli/bactopia-scrubber-summary) | Create a before-and-after report from human read scrubbing. | +| [`bactopia-teton-prepare`](/developers/cli/bactopia-teton-prepare) | Prepare sample sheets for downstream analysis in the Teton workflow. | + diff --git a/developers/index.mdx b/developers/index.mdx new file mode 100644 index 00000000..ba81cdf4 --- /dev/null +++ b/developers/index.mdx @@ -0,0 +1,45 @@ +--- +title: Developers +description: Bactopia developer reference for CLI commands, subworkflows, and modules +slug: / +--- + +# Developers + +This section provides detailed reference documentation for the individual +components that make up Bactopia workflows. + +## CLI Reference + +Full command-line reference for all Bactopia commands and their options. + +[Browse CLI Reference](/developers/cli) + +## nf-bactopia Plugin + +Developer reference for the nf-bactopia Nextflow plugin -- utility functions +and operators used across Bactopia workflows. + +[Browse nf-bactopia docs](/developers/nf-bactopia) + +## AI Skills + +Automation skills that orchestrate Bactopia CLI commands through AI-assisted +coding tools for scaffolding, maintenance, review, and testing tasks. +There are 12 skills available. + +[Browse AI Skills](/developers/ai-skills) + +## Subworkflows + +Subworkflows orchestrate multiple modules into reusable analysis units. +There are 86 subworkflows available. + +[Browse all subworkflows](/developers/subworkflows) + +## Modules + +Modules are individual processes that perform specific analysis tasks. +There are 97 modules available. + +[Browse all modules](/developers/modules) diff --git a/developers/modules/abricate_run.mdx b/developers/modules/abricate_run.mdx new file mode 100644 index 00000000..a8c11121 --- /dev/null +++ b/developers/modules/abricate_run.mdx @@ -0,0 +1,105 @@ +--- +title: abricate_run +description: "Mass screening of contigs for antimicrobial and virulence genes." +tags: + - bacteria + - assembly + - fasta + - antimicrobial-resistance + - virulence + - plasmid + - mobile-genetic-elements + - sample-scope +--- + +# abricate_run + +**Tags:** bacteria assembly fasta antimicrobial-resistance virulence plasmid mobile-genetic-elements sample-scope + +Mass screening of contigs for antimicrobial and virulence genes. + +Screens assemblies for antimicrobial resistance and virulence genes using +[Abricate](https://github.com/tseemann/abricate). It bundles several databases +including NCBI, CARD, ResFinder, PlasmidFinder, ARG-ANNOT, and VFDB. + +:::note[Database Included] +Abricate bundles multiple databases including NCBI, CARD, ResFinder, PlasmidFinder, +ARG-ANNOT, and VFDB. +::: + +## Inputs + +``` +record ( + meta: Record, + fna: Path +) +``` + +| Field | Type | Description | +|-------|------|-------------| +| `meta` | `Record` | Groovy Record containing sample information | +| `fna` | `Path` | Assembled contigs in FASTA format | + +## Outputs + +``` +record ( + meta: Record, + tsv: Path, + results: Set, + logs: Set, + nf_logs: Set, + versions: Set +) +``` + +| Field | Type | Description | +|-------|------|-------------| +| `meta` | `Record` | Sample information record | +| `tsv` | `Path` | A tab-delimited report of hits, for full details please see [Abricate - Output](https://github.com/tseemann/abricate#output) | +| `results` | `Set` | All output files to be published | +| `logs` | `Set` | Optional program specific log files | +| `nf_logs` | `Set` | Nextflow-specific log files (e.g. .command.{begin|err|log|out|run|sh|trace}) | +| `versions` | `Set` | A YAML formatted file with program versions | + +## Parameters + +### Abricate Parameters + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--abricate_db` | string | `ncbi` | Database to use | +| `--abricate_minid` | integer | `80` | Minimum DNA percent identity | +| `--abricate_mincov` | integer | `80` | Minimum DNA percent coverage | + +## Used By + +### Subworkflows + +- [abricate](/developers/subworkflows/abricate) - Mass screening of contigs for antimicrobial and virulence genes. + +### Workflows + +- [abricate](/bactopia-tools/abricate) - Mass screening of contigs for antimicrobial resistance and virulence genes. + +## Citations + +If you use this in your analysis, please cite the following. + +- [Bactopia](https://bactopia.github.io/) + Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) + +- [Abricate](https://github.com/tseemann/abricate) + Seemann T [Abricate: mass screening of contigs for antimicrobial and virulence genes](https://github.com/tseemann/abricate) (GitHub) + +## Source + +[View source on GitHub](https://github.com/bactopia/bactopia/tree/main/modules/abricate/run) + +## Version + +```yaml +ABRICATE_RUN: + - abricate: 1.4.0 +``` diff --git a/developers/modules/abricate_summary.mdx b/developers/modules/abricate_summary.mdx new file mode 100644 index 00000000..7283aa91 --- /dev/null +++ b/developers/modules/abricate_summary.mdx @@ -0,0 +1,87 @@ +--- +title: abricate_summary +description: "Summarize Abricate screening results." +tags: + - bacteria + - tab-delimited + - antimicrobial-resistance + - run-scope +--- + +# abricate_summary + +**Tags:** bacteria tab-delimited antimicrobial-resistance run-scope + +Summarize Abricate screening results. + +Uses [Abricate](https://github.com/tseemann/abricate) to aggregate the +per-sample screening reports into a single tab-delimited summary file. + +## Inputs + +``` +record ( + meta: Record, + reports: Set +) +``` + +| Field | Type | Description | +|-------|------|-------------| +| `meta` | `Record` | Groovy Record containing aggregation information | +| `reports` | `Set` | A collection of Abricate report files from multiple samples | + +## Outputs + +``` +record ( + meta: Record, + tsv: Path, + results: Set, + logs: Set, + nf_logs: Set, + versions: Set +) +``` + +| Field | Type | Description | +|-------|------|-------------| +| `meta` | `Record` | Sample information record | +| `tsv` | `Path` | Aggregated tab-delimited summary of Abricate results from all samples | +| `results` | `Set` | All output files to be published | +| `logs` | `Set` | Optional program specific log files | +| `nf_logs` | `Set` | Nextflow-specific log files (e.g. .command.{begin|err|log|out|run|sh|trace}) | +| `versions` | `Set` | A YAML formatted file with program versions | + +## Parameters + +## Used By + +### Subworkflows + +- [abricate](/developers/subworkflows/abricate) - Mass screening of contigs for antimicrobial and virulence genes. + +### Workflows + +- [abricate](/bactopia-tools/abricate) - Mass screening of contigs for antimicrobial resistance and virulence genes. + +## Citations + +If you use this in your analysis, please cite the following. + +- [Bactopia](https://bactopia.github.io/) + Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) + +- [Abricate](https://github.com/tseemann/abricate) + Seemann T [Abricate: mass screening of contigs for antimicrobial and virulence genes](https://github.com/tseemann/abricate) (GitHub) + +## Source + +[View source on GitHub](https://github.com/bactopia/bactopia/tree/main/modules/abricate/summary) + +## Version + +```yaml +ABRICATE_SUMMARY: + - abricate: 1.4.0 +``` diff --git a/developers/modules/abritamr_run.mdx b/developers/modules/abritamr_run.mdx new file mode 100644 index 00000000..984fbf07 --- /dev/null +++ b/developers/modules/abritamr_run.mdx @@ -0,0 +1,107 @@ +--- +title: abritamr_run +description: "Detect antimicrobial resistance and virulence genes." +tags: + - bacteria + - assembly + - fasta + - antimicrobial-resistance + - nata + - amrfinderplus + - sample-scope +--- + +# abritamr_run + +**Tags:** bacteria assembly fasta antimicrobial-resistance nata amrfinderplus sample-scope + +Detect antimicrobial resistance and virulence genes. + +Uses [abriTAMR](https://github.com/MDU-PHL/abritamr), a NATA (National Association of +Testing Authorities) accredited pipeline, to report the presence of reportable AMR +genes. It acts as a wrapper for AMRFinderPlus, formatted for clinical reporting standards +used in Victoria, Australia. + +## Inputs + +``` +record ( + meta: Record, + fna: Path +) +``` + +| Field | Type | Description | +|-------|------|-------------| +| `meta` | `Record` | Groovy Record containing sample information | +| `fna` | `Path` | Assembled contigs in FASTA format | + +## Outputs + +``` +record ( + meta: Record, + summary: Path?, + matches: Path, + partials: Path, + virulence: Path, + amrfinder: Path, + results: Set, + logs: Set, + nf_logs: Set, + versions: Set +) +``` + +| Field | Type | Description | +|-------|------|-------------| +| `meta` | `Record` | Sample information record | +| `summary` | `Path?` | Tab-delimited NATA-accredited AMR report summary | +| `matches` | `Path` | Tab-delimited list of matched AMR genes | +| `partials` | `Path` | Tab-delimited list of partially matched AMR genes | +| `virulence` | `Path` | Tab-delimited list of detected virulence genes | +| `amrfinder` | `Path` | Raw AMRFinderPlus output | +| `results` | `Set` | All output files to be published | +| `logs` | `Set` | Optional program specific log files | +| `nf_logs` | `Set` | Nextflow-specific log files (e.g. .command.{begin|err|log|out|run|sh|trace}) | +| `versions` | `Set` | A YAML formatted file with program versions | + +## Parameters + +### abriTAMR Parameters + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--abritamr_species` | string | | Use species specific point mutations, must provide a valid species | +| `--abritamr_identity` | integer | | Minimum identity of matches with amrfinder (0 - 1.0), defaults to amrfinder preset | + +## Used By + +### Subworkflows + +- [abritamr](/developers/subworkflows/abritamr) - Identify antimicrobial resistance genes using AMRFinderPlus. + +### Workflows + +- [abritamr](/bactopia-tools/abritamr) - A NATA accredited tool for reporting the presence of antimicrobial resistance genes. + +## Citations + +If you use this in your analysis, please cite the following. + +- [Bactopia](https://bactopia.github.io/) + Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) + +- [abriTAMR](https://github.com/MDU-PHL/abritamr) + Sherry NL, Horan KA, Ballard SA, Gonҫalves da Silva A, Gorrie CL, Schultz MB, Stevens K, Valcanis M, Sait ML, Stinear TP, Howden BP, and Seemann T [An ISO-certified genomics workflow for identification and surveillance of antimicrobial resistance.](https://doi.org/10.1038/s41467-022-35713-4) _Nature Communications_, 14(1), 60. (2023) + +## Source + +[View source on GitHub](https://github.com/bactopia/bactopia/tree/main/modules/abritamr/run) + +## Version + +```yaml +ABRITAMR_RUN: + - abritamr: 1.2.0 +``` diff --git a/developers/modules/agrvate.mdx b/developers/modules/agrvate.mdx new file mode 100644 index 00000000..07e646fb --- /dev/null +++ b/developers/modules/agrvate.mdx @@ -0,0 +1,98 @@ +--- +title: agrvate +description: "Determine the agr locus type and operon variants in Staphylococcus aureus." +tags: + - bacteria + - assembly + - fasta + - typing + - virulence + - staphylococcus + - aureus + - agr + - sample-scope +--- + +# agrvate + +**Tags:** bacteria assembly fasta typing virulence staphylococcus aureus agr sample-scope + +Determine the agr locus type and operon variants in Staphylococcus aureus. + +Uses [AgrVATE](https://github.com/VishnuRaghuram94/AgrVATE) to type the accessory gene +regulator (agr) locus, a quorum sensing system critical for *Staphylococcus aureus* virulence. + +## Inputs + +``` +record ( + meta: Record, + fna: Path +) +``` + +| Field | Type | Description | +|-------|------|-------------| +| `meta` | `Record` | Groovy Record containing sample information | +| `fna` | `Path` | Assembled Staphylococcus aureus contigs in FASTA format | + +## Outputs + +``` +record ( + meta: Record, + tsv: Path, + results: Set, + logs: Set, + nf_logs: Set, + versions: Set +) +``` + +| Field | Type | Description | +|-------|------|-------------| +| `meta` | `Record` | Sample information record | +| `tsv` | `Path` | Tab-delimited summary of agr locus type and operon variants | +| `results` | `Set` | All output files to be published | +| `logs` | `Set` | Optional program specific log files | +| `nf_logs` | `Set` | Nextflow-specific log files (e.g. .command.{begin|err|log|out|run|sh|trace}) | +| `versions` | `Set` | A YAML formatted file with program versions | + +## Parameters + +### AgrVATE Parameters + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--agrvate_typing_only` | boolean | `false` | agr typing only. Skips agr operon extraction and frameshift detection | + +## Used By + +### Subworkflows + +- [agrvate](/developers/subworkflows/agrvate) - Identify Staphylococcus aureus agr locus type and operon variants. + +### Workflows + +- [agrvate](/bactopia-tools/agrvate) - Rapid identification of Staphylococcus aureus agr locus type and agr operon variants. + +## Citations + +If you use this in your analysis, please cite the following. + +- [Bactopia](https://bactopia.github.io/) + Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) + +- [AgrVATE](https://github.com/VishnuRaghuram94/AgrVATE) + Raghuram V. [AgrVATE: Rapid identification of Staphylococcus aureus agr locus type and agr operon variants.](https://github.com/VishnuRaghuram94/AgrVATE) (GitHub) + +## Source + +[View source on GitHub](https://github.com/bactopia/bactopia/tree/main/modules/agrvate) + +## Version + +```yaml +AGRVATE: + - agrvate: 1.0.2 +``` diff --git a/developers/modules/amrfinderplus_run.mdx b/developers/modules/amrfinderplus_run.mdx new file mode 100644 index 00000000..3b5312dd --- /dev/null +++ b/developers/modules/amrfinderplus_run.mdx @@ -0,0 +1,119 @@ +--- +title: amrfinderplus_run +description: "Identify antimicrobial resistance and virulence genes in gene or protein sequences." +tags: + - bacteria + - fasta + - antimicrobial-resistance + - virulence + - ncbi + - amr + - genes + - proteins + - sample-scope +--- + +# amrfinderplus_run + +**Tags:** bacteria fasta antimicrobial-resistance virulence ncbi amr genes proteins sample-scope + +Identify antimicrobial resistance and virulence genes in gene or protein sequences. + +Uses [AMRFinder+](https://github.com/ncbi/amr) to screen nucleotide or protein +sequences against NCBI's [Reference Gene Database](https://www.ncbi.nlm.nih.gov/pathogens/isolates#/refgene/). +It identifies AMR genes, resistance-associated point mutations, and select other classes of +genes using protein annotations and/or assembled nucleotide sequences. + +:::note[Requires external database to be available] +::: + +## Inputs + +``` +record ( + meta: Record, + fna: Path, + faa: Path, + gff: Path +) +``` + +| Field | Type | Description | +|-------|------|-------------| +| `meta` | `Record` | Groovy Record containing sample information | +| `fna` | `Path` | Nucleotide sequences of genes in FASTA format | +| `faa` | `Path` | Optional amino acid sequences of proteins in FASTA format | +| `gff` | `Path` | Optional genome annotation in GFF3 format | + +``` +db: Path +``` + +| Name | Type | Description | +|------|------|-------------| +| `db` | `Path` | A compressed tarball of the AMRFinderPlus database to query | + +## Outputs + +``` +record ( + meta: Record, + report: Path, + mutation_report: Path?, + results: Set, + logs: Set, + nf_logs: Set, + versions: Set +) +``` + +| Field | Type | Description | +|-------|------|-------------| +| `meta` | `Record` | Sample information record | +| `report` | `Path` | A tab-delimited report of identified AMR genes and virulence factors | +| `mutation_report` | `Path?` | Organism-specific point mutations associated with antimicrobial resistance | +| `results` | `Set` | All output files to be published | +| `logs` | `Set` | Optional program specific log files | +| `nf_logs` | `Set` | Nextflow-specific log files (e.g. .command.{begin|err|log|out|run|sh|trace}) | +| `versions` | `Set` | A YAML formatted file with program versions | + +## Parameters + +### AMRFinder+ Parameters + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--amrfinderplus_opts` | string | | Extra AMRFinder+ options in quotes. | + +## Used By + +### Subworkflows + +- [amrfinderplus](/developers/subworkflows/amrfinderplus) - Find antimicrobial resistance genes and point mutations. + +### Workflows + +- [amrfinderplus](/bactopia-tools/amrfinderplus) - Bactopia Tool: Amrfinderplus. +- [bactopia](/full-guide) - Comprehensive bacterial analysis pipeline for complete genomic characterization. +- [staphopia](/bactopia-pipelines/staphopia) - Comprehensive analysis pipeline for Staphylococcus aureus isolates. + +## Citations + +If you use this in your analysis, please cite the following. + +- [Bactopia](https://bactopia.github.io/) + Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) + +- [AMRFinderPlus](https://github.com/ncbi/amr) + Feldgarden M, Brover V, Haft DH, Prasad AB, Slotta DJ, Tolstoy I, Tyson GH, Zhao S, Hsu C-H, McDermott PF, Tadesse DA, Morales C, Simmons M, Tillman G, Wasilenko J, Folster JP, Klimke W [Validating the NCBI AMRFinder Tool and Resistance Gene Database Using Antimicrobial Resistance Genotype-Phenotype Correlations in a Collection of NARMS Isolates](https://doi.org/10.1128/AAC.00483-19). _Antimicrob. Agents Chemother._ (2019) + +## Source + +[View source on GitHub](https://github.com/bactopia/bactopia/tree/main/modules/amrfinderplus/run) + +## Version + +```yaml +AMRFINDERPLUS_RUN: + - ncbi-amrfinderplus: 4.2.7 +``` diff --git a/developers/modules/amrfinderplus_update.mdx b/developers/modules/amrfinderplus_update.mdx new file mode 100644 index 00000000..59062481 --- /dev/null +++ b/developers/modules/amrfinderplus_update.mdx @@ -0,0 +1,63 @@ +--- +title: amrfinderplus_update +description: "Download and index the latest AMRFinder+ database." +tags: + - bacteria + - database + - antimicrobial-resistance + - update + - download + - ncbi + - sample-scope +--- + +# amrfinderplus_update + +**Tags:** bacteria database antimicrobial-resistance update download ncbi sample-scope + +Download and index the latest AMRFinder+ database. + +Fetches the most recent [AMRFinder+](https://github.com/ncbi/amr) databases from NCBI, +indexes them, and packages them into a tarball. + +:::note[Internal Maintenance] +This process is primarily used internally by Bactopia to build and update the +built-in datasets. +::: + +## Outputs + +``` +record ( + db: Path, + logs: Set +) +``` + +| Field | Type | Description | +|-------|------|-------------| +| `db` | `Path` | A compressed tarball of the latest AMRFinder+ database | +| `logs` | `Set` | Optional program specific log files | + +## Parameters + +## Citations + +If you use this in your analysis, please cite the following. + +- [Bactopia](https://bactopia.github.io/) + Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) + +- [AMRFinderPlus](https://github.com/ncbi/amr) + Feldgarden M, Brover V, Haft DH, Prasad AB, Slotta DJ, Tolstoy I, Tyson GH, Zhao S, Hsu C-H, McDermott PF, Tadesse DA, Morales C, Simmons M, Tillman G, Wasilenko J, Folster JP, Klimke W [Validating the NCBI AMRFinder Tool and Resistance Gene Database Using Antimicrobial Resistance Genotype-Phenotype Correlations in a Collection of NARMS Isolates](https://doi.org/10.1128/AAC.00483-19). _Antimicrob. Agents Chemother._ (2019) + +## Source + +[View source on GitHub](https://github.com/bactopia/bactopia/tree/main/modules/amrfinderplus/update) + +## Version + +```yaml +AMRFINDERPLUS_UPDATE: + - ncbi-amrfinderplus: 4.2.7 +``` diff --git a/developers/modules/ariba_getref.mdx b/developers/modules/ariba_getref.mdx new file mode 100644 index 00000000..a367d08d --- /dev/null +++ b/developers/modules/ariba_getref.mdx @@ -0,0 +1,93 @@ +--- +title: ariba_getref +description: "Download and prepare reference databases for ARIBA analysis." +tags: + - bacteria + - database + - download + - antimicrobial-resistance + - virulence + - ariba + - setup + - run-scope +--- + +# ariba_getref + +**Tags:** bacteria database download antimicrobial-resistance virulence ariba setup run-scope + +Download and prepare reference databases for ARIBA analysis. + +Uses [ARIBA](https://github.com/sanger-pathogens/ariba) to fetch curated reference databases +(e.g., CARD, ResFinder, VFDB, PlasmidFinder) and prepare them for local assembly-based +gene detection. The database is indexed and packaged into a tarball for use with `ariba run`. + +:::note[Internet Required] +This process requires an active internet connection to fetch the specified database. +::: + +## Inputs + +``` +db_name: String +``` + +| Name | Type | Description | +|------|------|-------------| +| `db_name` | `String` | Name of the database to download (e.g., 'card', 'resfinder', 'vfdb_core', 'plasmidfinder') | + +## Outputs + +``` +record ( + db: Path, + logs: Set +) +``` + +| Field | Type | Description | +|-------|------|-------------| +| `db` | `Path` | A compressed tarball containing the prepared ARIBA database | +| `logs` | `Set` | Optional program specific log files | + +## Parameters + +## Used By + +### Subworkflows + +- [ariba](/developers/subworkflows/ariba) - Rapidly identify genes by creating local assemblies from paired-end reads. + +### Workflows + +- [ariba](/bactopia-tools/ariba) - Gene identification through local assemblies. + +## Citations + +If you use this in your analysis, please cite the following. + +- [Bactopia](https://bactopia.github.io/) + Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) + +- [Ariba](https://github.com/sanger-pathogens/ariba) + Hunt M, Mather AE, Sánchez-Busó L, Page AJ, Parkhill J, Keane JA, Harris SR [ARIBA: rapid antimicrobial resistance genotyping directly from sequencing reads](http://dx.doi.org/10.1099/mgen.0.000131). _Microb Genom_ 3, e000131 (2017) + +- [MEGARes](https://megares.meglab.org/) + Lakin SM, Dean C, Noyes NR, Dettenwanger A, Ross AS, Doster E, Rovira P, Abdo Z, Jones KL, Ruiz J, Belk KE, Morley PS, Boucher C [MEGARes: an antimicrobial resistance database for high throughput sequencing.](https://doi.org/10.1093/nar/gkw1009) _Nucleic Acids Res._ 45, D574-D580 (2017) + +- [SRST2](https://github.com/katholt/srst2) + Inouye M, Dashnow H, Raven L-A, Schultz MB, Pope BJ, Tomita T, Zobel J, Holt KE [SRST2: Rapid genomic surveillance for public health and hospital microbiology labs.](https://doi.org/10.1186/s13073-014-0090-6) _Genome Med._ 6, 90 (2014) + +- [VirulenceFinder](https://cge.food.dtu.dk/services/VirulenceFinder/) + Joensen KG, Scheutz F, Lund O, Hasman H, Kaas RS, Nielsen EM, Aarestrup FM [Real-time whole-genome sequencing for routine typing, surveillance, and outbreak detection of verotoxigenic _Escherichia coli_.](https://doi.org/10.1128/jcm.03617-13) _J. Clin. Microbiol._ 52, 1501-1510 (2014) + +## Source + +[View source on GitHub](https://github.com/bactopia/bactopia/tree/main/modules/ariba/getref) + +## Version + +```yaml +ARIBA_GETREF: + - ariba: 2.14.7 +``` diff --git a/developers/modules/ariba_run.mdx b/developers/modules/ariba_run.mdx new file mode 100644 index 00000000..409d2f96 --- /dev/null +++ b/developers/modules/ariba_run.mdx @@ -0,0 +1,113 @@ +--- +title: ariba_run +description: "Identify genes by local assembly of reads." +tags: + - fastq + - local-assembly + - antimicrobial-resistance + - virulence + - ariba + - sample-scope +--- + +# ariba_run + +**Tags:** fastq local-assembly antimicrobial-resistance virulence ariba sample-scope + +Identify genes by local assembly of reads. + +Uses [ARIBA](https://github.com/sanger-pathogens/ariba) (Antimicrobial Resistance Identification +By Assembly) to detect AMR and virulence genes by creating local assemblies from paired-end reads. + +Uses explicit positional record fields for reads: +- Input: record(meta, r1, r2) where each read slot is Path + +## Inputs + +``` +record ( + meta: Record, + r1: Path, + r2: Path +) +``` + +| Field | Type | Description | +|-------|------|-------------| +| `meta` | `Record` | Groovy Record containing sample information | +| `r1` | `Path` | Illumina R1 reads (paired-end) | +| `r2` | `Path` | Illumina R2 reads (paired-end) | + +``` +db: Path +``` + +| Name | Type | Description | +|------|------|-------------| +| `db` | `Path` | An [ARIBA](https://github.com/sanger-pathogens/ariba) prepared database | + +## Outputs + +``` +record ( + meta: Record, + report: Path, + summary: Path, + results: Set, + logs: Set, + nf_logs: Set, + versions: Set +) +``` + +| Field | Type | Description | +|-------|------|-------------| +| `meta` | `Record` | Sample information record | +| `report` | `Path` | Tab-delimited detailed report of gene detection results | +| `summary` | `Path` | Comma-separated condensed summary of detected genes | +| `results` | `Set` | All output files to be published | +| `logs` | `Set` | Optional program specific log files | +| `nf_logs` | `Set` | Nextflow-specific log files (e.g. .command.{begin|err|log|out|run|sh|trace}) | +| `versions` | `Set` | A YAML formatted file with program versions | + +## Parameters + +### Ariba Run Parameters + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--ariba_db` | string | | A database to query, if unavailable it will be downloaded to the path given by --datasets_cache (choices: `argannot`, `card`, `ncbi`, `megares`, `plasmidfinder`, `resfinder`, `srst2_argannot`, `vfdb_core`, `vfdb_full`, `virulencefinder`) | +| `--ariba_nucmer_min_id` | integer | `90` | Minimum alignment identity (delta-filter -i) | +| `--ariba_nucmer_min_len` | integer | `20` | Minimum alignment identity (delta-filter -i) | +| `--ariba_assembly_cov` | integer | `50` | Target read coverage when sampling reads for assembly | + +## Used By + +### Subworkflows + +- [ariba](/developers/subworkflows/ariba) - Rapidly identify genes by creating local assemblies from paired-end reads. + +### Workflows + +- [ariba](/bactopia-tools/ariba) - Gene identification through local assemblies. + +## Citations + +If you use this in your analysis, please cite the following. + +- [Bactopia](https://bactopia.github.io/) + Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) + +- [Ariba](https://github.com/sanger-pathogens/ariba) + Hunt M, Mather AE, Sánchez-Busó L, Page AJ, Parkhill J, Keane JA, Harris SR [ARIBA: rapid antimicrobial resistance genotyping directly from sequencing reads](http://dx.doi.org/10.1099/mgen.0.000131). _Microb Genom_ 3, e000131 (2017) + +## Source + +[View source on GitHub](https://github.com/bactopia/bactopia/tree/main/modules/ariba/run) + +## Version + +```yaml +ARIBA_RUN: + - ariba: 2.14.7 +``` diff --git a/developers/modules/bactopia_assembler.mdx b/developers/modules/bactopia_assembler.mdx new file mode 100644 index 00000000..fe5c978a --- /dev/null +++ b/developers/modules/bactopia_assembler.mdx @@ -0,0 +1,191 @@ +--- +title: bactopia_assembler +description: "Assemble bacterial genomes using short read, long read, or hybrid strategies." +tags: + - bacteria + - assembly + - hybrid + - shovill + - dragonflye + - unicycler + - illumina + - nanopore + - sample-scope +--- + +# bactopia_assembler + +**Tags:** bacteria assembly hybrid shovill dragonflye unicycler illumina nanopore sample-scope + +Assemble bacterial genomes using short read, long read, or hybrid strategies. + +Automatically selects the appropriate assembler based on input read types: +- **Short Paired-End Reads:** Uses [Shovill](https://github.com/tseemann/shovill) (SKESA/SPAdes wrapper). +- **Short Single-End Reads:** Uses [Shovill](https://github.com/rpetit3/shovill) (SKESA/SPAdes wrapper). +- **Long Reads:** Uses [Dragonflye](https://github.com/rpetit3/dragonflye) (Flye/Miniasm wrapper). +- **Hybrid:** Uses [Unicycler](https://github.com/rrwick/Unicycler) or Dragonflye (with polishing). + +Summary statistics for each assembly are generated using [assembly-scan](https://github.com/rpetit3/assembly-scan). + +Uses named record input with explicit read slots (r1, r2, se, lr, assembly) as Path?. + +:::note[When runtype is 'assembly' or 'assembly_accession' and --reassemble is not set,] +the original assembly is used without re-assembly. +::: + +## Inputs + +``` +record ( + meta: Record, + r1: Path?, + r2: Path?, + se: Path?, + lr: Path?, + fna: Path? +) +``` + +| Field | Type | Description | +|-------|------|-------------| +| `meta` | `Record` | Groovy Record containing sample information | +| `r1` | `Path?` | Illumina R1 reads (paired-end) | +| `r2` | `Path?` | Illumina R2 reads (paired-end) | +| `se` | `Path?` | Single-end Illumina reads | +| `lr` | `Path?` | Long reads (ONT/PacBio) for long-read or hybrid assembly | +| `fna` | `Path?` | Assembly file (FASTA) for assembly-based runtypes | + +## Outputs + +``` +record ( + meta: Record, + fna: Path?, + r1: Path?, + r2: Path?, + se: Path?, + lr: Path?, + tsv: Path?, + results: Set, + logs: Set, + nf_logs: Set, + versions: Set +) +``` + +| Field | Type | Description | +|-------|------|-------------| +| `meta` | `Record` | Sample information record | +| `fna` | `Path?` | Assembled contigs in FASTA format | +| `r1` | `Path?` | Passthrough Illumina R1 reads | +| `r2` | `Path?` | Passthrough Illumina R2 reads | +| `se` | `Path?` | Passthrough single-end reads | +| `lr` | `Path?` | Passthrough long reads | +| `tsv` | `Path?` | Tab-delimited report of assembly statistics (N50, length, coverage) | +| `results` | `Set` | All output files to be published | +| `logs` | `Set` | Optional program specific log files | +| `nf_logs` | `Set` | Nextflow-specific log files (e.g. .command.{begin|err|log|out|run|sh|trace}) | +| `versions` | `Set` | A YAML formatted file with program versions | + +## Parameters + +## Used By + +### Subworkflows + +- [bactopia_assembler](/developers/subworkflows/bactopia_assembler) - Assemble bacterial genomes using automated assembler selection. + +### Workflows + +- [bactopia](/full-guide) - Comprehensive bacterial analysis pipeline for complete genomic characterization. +- [staphopia](/bactopia-pipelines/staphopia) - Comprehensive analysis pipeline for Staphylococcus aureus isolates. + +## Citations + +If you use this in your analysis, please cite the following. + +- [Bactopia](https://bactopia.github.io/) + Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) + +- [any2fasta](https://github.com/tseemann/any2fasta) + Seemann T [any2fasta: Convert various sequence formats to FASTA](https://github.com/tseemann/any2fasta) (GitHub) + +- [assembly-scan](https://github.com/rpetit3/assembly-scan) + Petit III RA [assembly-scan: generate basic stats for an assembly](https://github.com/rpetit3/assembly-scan) (GitHub) + +- [BWA](https://github.com/lh3/bwa/) + Li H [Aligning sequence reads, clone sequences and assembly contigs with BWA-MEM](http://arxiv.org/abs/1303.3997). _arXiv_ [q-bio.GN] (2013) + +- [Dragonflye](https://github.com/rpetit3/dragonflye) + Petit III RA [Dragonflye: Assemble bacterial isolate genomes from Nanopore reads.](https://github.com/rpetit3/dragonflye) (GitHub) + +- [FLASH](https://ccb.jhu.edu/software/FLASH/) + Magoč T, Salzberg SL [FLASH: fast length adjustment of short reads to improve genome assemblies.](https://doi.org/10.1093/bioinformatics/btr507) _Bioinformatics_ 27.21 2957-2963 (2011) + +- [Flye](https://github.com/fenderglass/Flye) + Kolmogorov M, Yuan J, Lin Y, Pevzner P [Assembly of Long Error-Prone Reads Using Repeat Graphs](https://doi.org/10.1038/s41587-019-0072-8) _Nature Biotechnology_ (2019) + +- [Medaka](https://github.com/nanoporetech/medaka) + ONT Research [Medaka: Sequence correction provided by ONT Research](https://github.com/nanoporetech/medaka) (GitHub) + +- [MEGAHIT](https://github.com/voutcn/megahit) + Li D, Liu C-M, Luo R, Sadakane K, Lam T-W [MEGAHIT: an ultra-fast single-node solution for large and complex metagenomics assembly via succinct de Bruijn graph.](https://doi.org/10.1093/bioinformatics/btv033) _Bioinformatics_ 31.10 1674-1676 (2015) + +- [Miniasm](https://github.com/lh3/miniasm) + Li H [Miniasm: Ultrafast de novo assembly for long noisy reads](https://github.com/lh3/miniasm) (GitHub) + +- [Minimap2](https://github.com/lh3/minimap2) + Li H [Minimap2: pairwise alignment for nucleotide sequences.](https://doi.org/10.1093/bioinformatics/bty191) _Bioinformatics_ 34:3094-3100 (2018) + +- [Nanoq](https://github.com/esteinig/nanoq) + Steinig E [Nanoq: Minimal but speedy quality control for nanopore reads in Rust](https://github.com/esteinig/nanoq) (GitHub) + +- [Pigz](https://zlib.net/pigz/) + Adler M. [pigz: A parallel implementation of gzip for modern multi-processor, multi-core machines.](https://zlib.net/pigz/) _Jet Propulsion Laboratory_ (2015) + +- [Pilon](https://github.com/broadinstitute/pilon/) + Walker BJ, Abeel T, Shea T, Priest M, Abouelliel A, Sakthikumar S, Cuomo CA, Zeng Q, Wortman J, Young SK, Earl AM [Pilon: an integrated tool for comprehensive microbial variant detection and genome assembly improvement.](https://doi.org/10.1371/journal.pone.0112963) _PloS one_ 9.11 e112963 (2014) + +- [Racon](https://github.com/lbcb-sci/racon) + Vaser R, Sović I, Nagarajan N, Šikić M [Fast and accurate de novo genome assembly from long uncorrected reads.](http://dx.doi.org/10.1101/gr.214270.116) _Genome Res_ 27, 737-746 (2017) + +- [Rasusa](https://github.com/mbhall88/rasusa) + Hall MB [Rasusa: Randomly subsample sequencing reads to a specified coverage.](https://doi.org/10.5281/zenodo.3731394) (2019). + +- [Raven](https://github.com/lbcb-sci/raven) + Vaser R, Šikić M [Time- and memory-efficient genome assembly with Raven.](https://doi.org/10.1038/s43588-021-00073-4) _Nat Comput Sci_ 1, 332-336 (2021) + +- [samclip](https://github.com/tseemann/samclip) + Seemann T [Samclip: Filter SAM file for soft and hard clipped alignments](https://github.com/tseemann/samclip) (GitHub) + +- [Samtools](https://github.com/samtools/samtools) + Li H, Handsaker B, Wysoker A, Fennell T, Ruan J, Homer N, Marth G, Abecasis G, Durbin R [The Sequence Alignment/Map format and SAMtools](http://dx.doi.org/10.1093/bioinformatics/btp352). _Bioinformatics_ 25, 2078-2079 (2009) + +- [Shovill](https://github.com/tseemann/shovill) + Seemann T [Shovill: De novo assembly pipeline for Illumina paired reads](https://github.com/tseemann/shovill) (GitHub) + +- [Shovill-SE](https://github.com/rpetit3/shovill) + Petit III RA [Shovill-SE: A fork of Shovill that includes support for single end reads.](https://github.com/rpetit3/shovill) (GitHub) + +- [SKESA](https://github.com/ncbi/SKESA) + Souvorov A, Agarwala R, Lipman DJ [SKESA: strategic k-mer extension for scrupulous assemblies.](https://doi.org/10.1186/s13059-018-1540-z) _Genome Biology_ 19:153 (2018) + +- [SPAdes](https://github.com/ablab/spades) + Bankevich A, Nurk S, Antipov D, Gurevich AA, Dvorkin M, Kulikov AS, Lesin VM, Nikolenko SI, Pham S, Prjibelski AD, Pyshkin AV, Sirotkin AV, Vyahhi N, Tesler G, Alekseyev MA, Pevzner PA [SPAdes: a new genome assembly algorithm and its applications to single-cell sequencing.](https://doi.org/10.1089/cmb.2012.0021) _Journal of computational biology_ 19.5 455-477 (2012) + +- [Unicycler](https://github.com/rrwick/Unicycler) + Wick RR, Judd LM, Gorrie CL, Holt KE [Unicycler: Resolving bacterial genome assemblies from short and long sequencing reads.](http://dx.doi.org/10.1371/journal.pcbi.1005595) _PLoS Comput. Biol._ 13, e1005595 (2017) + +- [Velvet](https://github.com/dzerbino/velvet) + Zerbino DR, Birney E [Velvet: algorithms for de novo short read assembly using de Bruijn graphs.](http://www.genome.org/cgi/doi/10.1101/gr.074492.107) _Genome research_ 18.5 821-829 (2008) + +## Source + +[View source on GitHub](https://github.com/bactopia/bactopia/tree/main/modules/bactopia/assembler) + +## Version + +```yaml +BACTOPIA_ASSEMBLER: + - bactopia-assembler: 1.0.5 +``` diff --git a/developers/modules/bactopia_datasets.mdx b/developers/modules/bactopia_datasets.mdx new file mode 100644 index 00000000..6974df6d --- /dev/null +++ b/developers/modules/bactopia_datasets.mdx @@ -0,0 +1,90 @@ +--- +title: bactopia_datasets +description: "Download pre-compiled datasets required by Bactopia." +tags: + - download + - database + - setup + - amr + - mlst + - minhash + - sourmash + - gtdb + - run-scope +--- + +# bactopia_datasets + +**Tags:** download database setup amr mlst minhash sourmash gtdb run-scope + +Download pre-compiled datasets required by Bactopia. + +Fetches the core datasets (AMR, MLST, Mash, Sourmash) hosted by the Bactopia project. +These are used to populate the local cache for offline use. + +:::note[Internet Required] +This process requires an active internet connection to fetch files from `datasets.bactopia.com`. +::: + +## Outputs + +``` +record ( + amrfinderplus_db: Path, + mlst_db: Path, + mash_db: Path, + sourmash_db: Path +) +``` + +| Field | Type | Description | +|-------|------|-------------| +| `amrfinderplus_db` | `Path` | A compressed tarball of the [AMRFinderPlus](https://github.com/ncbi/amr) database | +| `mlst_db` | `Path` | A compressed tarball of the [PubMLST](https://pubmlst.org/) schemes | +| `mash_db` | `Path` | Pre-computed [Mash](https://github.com/marbl/Mash) sketches (RefSeq) | +| `sourmash_db` | `Path` | Pre-computed [Sourmash](https://github.com/sourmash-bio/sourmash) signatures (GTDB) | + +## Parameters + +## Used By + +### Subworkflows + +- [bactopia_datasets](/developers/subworkflows/bactopia_datasets) - Download and provide pre-compiled datasets required by Bactopia. + +### Workflows + +- [amrfinderplus](/bactopia-tools/amrfinderplus) - Bactopia Tool: Amrfinderplus. +- [bactopia](/full-guide) - Comprehensive bacterial analysis pipeline for complete genomic characterization. +- [merlin](/bactopia-tools/merlin) - MinMER-assisted species-specific tool selection and execution. +- [staphopia](/bactopia-pipelines/staphopia) - Comprehensive analysis pipeline for Staphylococcus aureus isolates. + +## Citations + +If you use this in your analysis, please cite the following. + +- [Bactopia](https://bactopia.github.io/) + Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) + +- [AMRFinderPlus](https://github.com/ncbi/amr) + Feldgarden M, Brover V, Haft DH, Prasad AB, Slotta DJ, Tolstoy I, Tyson GH, Zhao S, Hsu C-H, McDermott PF, Tadesse DA, Morales C, Simmons M, Tillman G, Wasilenko J, Folster JP, Klimke W [Validating the NCBI AMRFinder Tool and Resistance Gene Database Using Antimicrobial Resistance Genotype-Phenotype Correlations in a Collection of NARMS Isolates](https://doi.org/10.1128/AAC.00483-19). _Antimicrob. Agents Chemother._ (2019) + +- [mlst](https://github.com/tseemann/mlst) + Seemann T [mlst: scan contig files against PubMLST typing schemes](https://github.com/tseemann/mlst) (GitHub) + +- [Mash](https://github.com/marbl/Mash) + Ondov BD, Treangen TJ, Melsted P, Mallonee AB, Bergman NH, Koren S, Phillippy AM [Mash: fast genome and metagenome distance estimation using MinHash](http://dx.doi.org/10.1186/s13059-016-0997-x). _Genome Biol_ 17, 132 (2016) + +- [Sourmash](https://github.com/dib-lab/sourmash) + Brown CT, Irber L [sourmash: a library for MinHash sketching of DNA](http://dx.doi.org/10.21105/joss.00027). _JOSS_ 1, 27 (2016) + +## Source + +[View source on GitHub](https://github.com/bactopia/bactopia/tree/main/modules/bactopia/datasets) + +## Version + +```yaml +BACTOPIA_DATASETS: + - gnu-wget: 1.18 +``` diff --git a/developers/modules/bactopia_gather.mdx b/developers/modules/bactopia_gather.mdx new file mode 100644 index 00000000..f29bcfe2 --- /dev/null +++ b/developers/modules/bactopia_gather.mdx @@ -0,0 +1,133 @@ +--- +title: bactopia_gather +description: "Search, validate, gather, or simulate input samples." +tags: + - fastq + - validation + - sra + - ena + - download + - merging + - simulation + - art + - ncbi + - sample-scope +--- + +# bactopia_gather + +**Tags:** fastq validation sra ena download merging simulation art ncbi sample-scope + +Search, validate, gather, or simulate input samples. + +This process is the entry point for data ingestion. It handles: +- **Validation:** Verifies FASTQ formatting and gzip integrity. +- **Merging:** Combines multiple runs (lanes) into a single sample. +- **Downloading:** Fetches reads (SRA/ENA) or assemblies (NCBI) from accessions. +- **Simulation:** Generates synthetic reads from assemblies using [ART](https://www.niehs.nih.gov/research/resources/software/biostatistics/art) to enable read-based analysis. + +Uses explicit named slots for input and output reads: +- Input accepts Set<Path> for each slot (pre-merge, supports multiple files) +- Output emits Path? for each slot (post-merge, single consolidated file or null) + +## Inputs + +``` +record ( + meta: Record, + r1_files: Set, + r2_files: Set, + se_files: Set, + lr_files: Set, + fna_files: Set +) +``` + +| Field | Type | Description | +|-------|------|-------------| +| `meta` | `Record` | Groovy Record containing sample information | +| `r1_files` | `Set` | Illumina R1 read files (Set, elements may be null) | +| `r2_files` | `Set` | Illumina R2 read files (Set, elements may be null) | +| `se_files` | `Set` | Single-end read files (Set, elements may be null) | +| `lr_files` | `Set` | Long read files (ONT) or assembly for simulation (Set, elements may be null) | +| `fna_files` | `Set` | Input or downloaded assembly file (Set, elements may be null) | + +## Outputs + +``` +record ( + meta: Record, + r1: Path?, + r2: Path?, + se: Path?, + lr: Path?, + fna: Path?, + tsv: Path, + results: Set, + logs: Set, + nf_logs: Set, + versions: Set +) +``` + +| Field | Type | Description | +|-------|------|-------------| +| `meta` | `Record` | Sample information record | +| `r1` | `Path?` | Merged Illumina R1 read file | +| `r2` | `Path?` | Merged Illumina R2 read file | +| `se` | `Path?` | Merged single-end read file | +| `lr` | `Path?` | Merged long read file (ONT) | +| `fna` | `Path?` | Assembly file | +| `tsv` | `Path` | A tab-delimited metadata file describing the valid samples | +| `results` | `Set` | All output files to be published | +| `logs` | `Set` | Optional program specific log files | +| `nf_logs` | `Set` | Nextflow-specific log files (e.g. .command.{begin|err|log|out|run|sh|trace}) | +| `versions` | `Set` | A YAML formatted file with program versions | + +## Parameters + +## Used By + +### Subworkflows + +- [bactopia_gather](/developers/subworkflows/bactopia_gather) - Search, validate, gather, and standardize input samples. + +### Workflows + +- [bactopia](/full-guide) - Comprehensive bacterial analysis pipeline for complete genomic characterization. +- [cleanyerreads](/bactopia-pipelines/cleanyerreads) - Quality control and optional host read removal from raw sequencing reads. +- [staphopia](/bactopia-pipelines/staphopia) - Comprehensive analysis pipeline for Staphylococcus aureus isolates. +- [teton](/bactopia-pipelines/teton) - Taxonomic classification and abundance profiling of metagenomic reads. + +## Citations + +If you use this in your analysis, please cite the following. + +- [Bactopia](https://bactopia.github.io/) + Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) + +- [ART](https://www.niehs.nih.gov/research/resources/software/biostatistics/art/index.cfm) + Huang W, Li L, Myers JR, Marth GT [ART: a next-generation sequencing read simulator.](http://dx.doi.org/10.1093/bioinformatics/btr708) _Bioinformatics_ 28, 593-594 (2012) + +- [fastq-dl](https://github.com/rpetit3/fastq-dl) + Petit III RA [fastq-dl: Download FASTQ files from SRA or ENA repositories.](https://github.com/rpetit3/fastq-dl) (GitHub) + +- [fastq-scan](https://github.com/rpetit3/fastq-scan) + Petit III RA [fastq-scan: generate summary statistics of input FASTQ sequences.](https://github.com/rpetit3/fastq-scan) (GitHub) + +- [ncbi-genome-download](https://github.com/kblin/ncbi-genome-download) + Blin K [ncbi-genome-download: Scripts to download genomes from the NCBI FTP servers](https://github.com/kblin/ncbi-genome-download) (GitHub) + +- [Pigz](https://zlib.net/pigz/) + Adler M. [pigz: A parallel implementation of gzip for modern multi-processor, multi-core machines.](https://zlib.net/pigz/) _Jet Propulsion Laboratory_ (2015) + +## Source + +[View source on GitHub](https://github.com/bactopia/bactopia/tree/main/modules/bactopia/gather) + +## Version + +```yaml +BACTOPIA_GATHER: + - bactopia-gather: 1.0.5 +``` diff --git a/developers/modules/bactopia_qc.mdx b/developers/modules/bactopia_qc.mdx new file mode 100644 index 00000000..3a1e6250 --- /dev/null +++ b/developers/modules/bactopia_qc.mdx @@ -0,0 +1,164 @@ +--- +title: bactopia_qc +description: "Automated quality control, error correction, and read subsampling." +tags: + - fastq + - qc + - adapter-removal + - error-correction + - subsampling + - fastp + - bbduk + - lighter + - porechop + - nanoq + - fastqc + - nanoplot + - sample-scope +--- + +# bactopia_qc + +**Tags:** fastq qc adapter-removal error-correction subsampling fastp bbduk lighter porechop nanoq fastqc nanoplot sample-scope + +Automated quality control, error correction, and read subsampling. + +A comprehensive QC pipeline that adapts to the input read type: +- **Illumina:** Adapter/PhiX removal ([Fastp](https://github.com/OpenGene/fastp) or + [BBDuk](https://jgi.doe.gov/data-and-tools/software-tools/bbtools/)), Error Correction + ([Lighter](https://github.com/mourisl/Lighter)), and Subsampling ([Rasusa](https://github.com/mbhall88/rasusa)) +- **Nanopore:** Adapter removal ([Porechop](https://github.com/rrwick/Porechop)), Quality filtering + ([Nanoq](https://github.com/esteinig/nanoq)), and Subsampling ([Rasusa](https://github.com/mbhall88/rasusa)) +- **Hybrid:** Processes both short and long reads through their respective pipelines +- **Assembly:** Passes through simulated reads from assemblies + +Generates quality metrics using [fastq-scan](https://github.com/rpetit3/fastq-scan) and optional +quality reports using [FastQC](https://github.com/s-andrews/FastQC) (Illumina) and +[NanoPlot](https://github.com/wdecoster/NanoPlot) (ONT). + +## Inputs + +``` +record ( + meta: Record, + r1: Path?, + r2: Path?, + se: Path?, + lr: Path?, + fna: Path? +) +``` + +| Field | Type | Description | +|-------|------|-------------| +| `meta` | `Record` | Groovy Record containing sample information (must include `runtype`, `genome_size`, `species`) | +| `r1` | `Path?` | Illumina R1 reads (paired-end forward) | +| `r2` | `Path?` | Illumina R2 reads (paired-end reverse) | +| `se` | `Path?` | Single-end Illumina reads | +| `lr` | `Path?` | Long reads (ONT) | +| `fna` | `Path?` | Assembly file (FASTA) for assembly-based simulations | + +``` +adapters: Path? +phix: Path? +``` + +| Name | Type | Description | +|------|------|-------------| +| `adapters` | `Path?` | Filepath for custom adapter sequences (FASTA) | +| `phix` | `Path?` | Filepath for custom PhiX sequences (FASTA) | + +## Outputs + +``` +record ( + meta: Record, + r1: Path?, + r2: Path?, + se: Path?, + lr: Path?, + fna: Path?, + reads_grouped: Set, + error: Set, + skipped: Path?, + results: Set, + logs: Set, + nf_logs: Set, + versions: Set +) +``` + +| Field | Type | Description | +|-------|------|-------------| +| `meta` | `Record` | Sample information record | +| `r1` | `Path?` | QC'd Illumina R1 reads (paired-end forward) | +| `r2` | `Path?` | QC'd Illumina R2 reads (paired-end reverse) | +| `se` | `Path?` | QC'd single-end Illumina reads | +| `lr` | `Path?` | QC'd long reads (ONT) | +| `fna` | `Path?` | Assembly file (FASTA) | +| `reads_grouped` | `Set` | All output FASTQs for publishing | +| `error` | `Set` | Captured error messages if QC failed (e.g., reads empty after trimming) | +| `skipped` | `Path?` | Marker file indicating QC was skipped for this sample | +| `results` | `Set` | All output files to be published | +| `logs` | `Set` | Optional program specific log files | +| `nf_logs` | `Set` | Nextflow-specific log files (e.g. .command.{begin|err|log|out|run|sh|trace}) | +| `versions` | `Set` | A YAML formatted file with program versions | + +## Parameters + +## Used By + +### Subworkflows + +- [bactopia_qc](/developers/subworkflows/bactopia_qc) - Perform comprehensive quality control on sequencing reads. + +### Workflows + +- [bactopia](/full-guide) - Comprehensive bacterial analysis pipeline for complete genomic characterization. +- [cleanyerreads](/bactopia-pipelines/cleanyerreads) - Quality control and optional host read removal from raw sequencing reads. +- [staphopia](/bactopia-pipelines/staphopia) - Comprehensive analysis pipeline for Staphylococcus aureus isolates. + +## Citations + +If you use this in your analysis, please cite the following. + +- [Bactopia](https://bactopia.github.io/) + Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) + +- [BBTools](https://jgi.doe.gov/data-and-tools/bbtools/) + Bushnell B [BBMap short read aligner, and other bioinformatic tools.](http://sourceforge.net/projects/bbmap/) (Link) + +- [fastp](https://github.com/OpenGene/fastp) + Chen S, Zhou Y, Chen Y, and Gu J [fastp: an ultra-fast all-in-one FASTQ preprocessor.](https://doi.org/10.1093/bioinformatics/bty560) _Bioinformatics_, 34(17), i884-i890. (2018) + +- [FastQC](https://github.com/s-andrews/FastQC) + Andrews S [FastQC: a quality control tool for high throughput sequence data.](http://www.bioinformatics.babraham.ac.uk/projects/fastqc) (WebLink) + +- [fastq-scan](https://github.com/rpetit3/fastq-scan) + Petit III RA [fastq-scan: generate summary statistics of input FASTQ sequences.](https://github.com/rpetit3/fastq-scan) (GitHub) + +- [Lighter](https://github.com/mourisl/Lighter) + Song L, Florea L, Langmead B [Lighter: Fast and Memory-efficient Sequencing Error Correction without Counting](https://doi.org/10.1186/s13059-014-0509-9). _Genome Biol._ 15(11):509 (2014) + +- [NanoPlot](https://github.com/wdecoster/NanoPlot) + De Coster W, D'Hert S, Schultz DT, Cruts M, Van Broeckhoven C [NanoPack: visualizing and processing long-read sequencing data](https://doi.org/10.1093/bioinformatics/bty149) _Bioinformatics_ Volume 34, Issue 15 (2018) + +- [Nanoq](https://github.com/esteinig/nanoq) + Steinig E [Nanoq: Minimal but speedy quality control for nanopore reads in Rust](https://github.com/esteinig/nanoq) (GitHub) + +- [Porechop](https://github.com/rrwick/Porechop) + Wick RR, Judd LM, Gorrie CL, Holt KE. [Completing bacterial genome assemblies with multiplex MinION sequencing.](https://doi.org/10.1099/mgen.0.000132) _Microb Genom._ 3(10):e000132 (2017) + +- [Rasusa](https://github.com/mbhall88/rasusa) + Hall MB [Rasusa: Randomly subsample sequencing reads to a specified coverage.](https://doi.org/10.5281/zenodo.3731394) (2019). + +## Source + +[View source on GitHub](https://github.com/bactopia/bactopia/tree/main/modules/bactopia/qc) + +## Version + +```yaml +BACTOPIA_QC: + - bactopia-qc: 1.0.4 +``` diff --git a/developers/modules/bactopia_sketcher.mdx b/developers/modules/bactopia_sketcher.mdx new file mode 100644 index 00000000..eadaf3c7 --- /dev/null +++ b/developers/modules/bactopia_sketcher.mdx @@ -0,0 +1,126 @@ +--- +title: bactopia_sketcher +description: "Create genomic sketches and perform rapid taxonomic classification." +tags: + - bacteria + - taxonomy + - classification + - minhash + - sketch + - mash + - sourmash + - refseq + - gtdb + - sample-scope +--- + +# bactopia_sketcher + +**Tags:** bacteria taxonomy classification minhash sketch mash sourmash refseq gtdb sample-scope + +Create genomic sketches and perform rapid taxonomic classification. + +Uses [Mash](https://mash.readthedocs.io/) and [Sourmash](https://sourmash.readthedocs.io/) to +create MinHash sketches of the input sequences. These sketches are then queried against +pre-built databases ([RefSeq](https://www.ncbi.nlm.nih.gov/refseq/) and +[GTDB](https://gtdb.ecogenomic.org/) to identify the closest reference genomes. + +:::note[Databases Required] +Requires the pre-compiled RefSeq (Mash) and GTDB (Sourmash) databases, usually downloaded +by the `datasets` module. +::: + +## Inputs + +``` +record ( + meta: Record, + fna: Path +) +``` + +| Field | Type | Description | +|-------|------|-------------| +| `meta` | `Record` | Groovy Record containing sample information | +| `fna` | `Path` | Assembled contigs in FASTA format | + +``` +mash_db: Path +sourmash_db: Path +``` + +| Name | Type | Description | +|------|------|-------------| +| `mash_db` | `Path` | Path to the Mash RefSeq database | +| `sourmash_db` | `Path` | Path to the Sourmash GTDB LCA database | + +## Outputs + +``` +record ( + meta: Record, + sig: Path, + msh: Set, + mash: Path, + sourmash: Path, + results: Set, + logs: Set, + nf_logs: Set, + versions: Set +) +``` + +| Field | Type | Description | +|-------|------|-------------| +| `meta` | `Record` | Sample information record | +| `sig` | `Path` | The Sourmash signature file (*.sig) | +| `msh` | `Set` | The Mash sketch files for k=21 and k=31 (*.msh) | +| `mash` | `Path` | A classification report of Mash Screen results against RefSeq database | +| `sourmash` | `Path` | A classification report from Sourmash LCA against GTDB database | +| `results` | `Set` | All output files to be published | +| `logs` | `Set` | Optional program specific log files | +| `nf_logs` | `Set` | Nextflow-specific log files (e.g. .command.{begin|err|log|out|run|sh|trace}) | +| `versions` | `Set` | A YAML formatted file with program versions | + +## Parameters + +## Used By + +### Subworkflows + +- [bactopia_sketcher](/developers/subworkflows/bactopia_sketcher) - Create genomic sketches and perform rapid taxonomic classification. + +### Workflows + +- [bactopia](/full-guide) - Comprehensive bacterial analysis pipeline for complete genomic characterization. +- [staphopia](/bactopia-pipelines/staphopia) - Comprehensive analysis pipeline for Staphylococcus aureus isolates. + +## Citations + +If you use this in your analysis, please cite the following. + +- [Bactopia](https://bactopia.github.io/) + Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) + +- [Mash](https://github.com/marbl/Mash) + Ondov BD, Treangen TJ, Melsted P, Mallonee AB, Bergman NH, Koren S, Phillippy AM [Mash: fast genome and metagenome distance estimation using MinHash](http://dx.doi.org/10.1186/s13059-016-0997-x). _Genome Biol_ 17, 132 (2016) + +- [Sourmash](https://github.com/dib-lab/sourmash) + Brown CT, Irber L [sourmash: a library for MinHash sketching of DNA](http://dx.doi.org/10.21105/joss.00027). _JOSS_ 1, 27 (2016) + +- [Mash Screen](https://github.com/marbl/Mash) + Ondov BD, Starrett GJ, Sappington A, Kostic A, Koren S, Buck CB, Phillippy AM [Mash Screen: high-throughput sequence containment estimation for genome discovery](https://doi.org/10.1186/s13059-019-1841-x) _Genome Biol_ 20, 232 (2019) + +- [NCBI RefSeq Database](https://www.ncbi.nlm.nih.gov/refseq/) + O'Leary NA, Wright MW, Brister JR, Ciufo S, Haddad D, McVeigh R, Rajput B, Robbertse B, Smith-White B, Ako-Adjei D, Astashyn A, Badretdin A, Bao Y, Blinkova O0, Brover V, Chetvernin V, Choi J, Cox E, Ermolaeva O, Farrell CM, Goldfarb T, Gupta T, Haft D, Hatcher E, Hlavina W, Joardar VS, Kodali VK, Li W, Maglott D, Masterson P, McGarvey KM, Murphy MR, O'Neill K, Pujar S, Rangwala SH, Rausch D, Riddick LD, Schoch C, Shkeda A, Storz SS, Sun H, Thibaud-Nissen F, Tolstoy I, Tully RE, Vatsan AR, Wallin C, Webb D, Wu W, Landrum MJ, Kimchi A, Tatusova T, DiCuccio M, Kitts P, Murphy TD, Pruitt KD [Reference sequence (RefSeq) database at NCBI: current status, taxonomic expansion, and functional annotation.](https://doi.org/10.1093/nar/gkv1189) _Nucleic Acids Res._ 44, D733-45 (2016) + +## Source + +[View source on GitHub](https://github.com/bactopia/bactopia/tree/main/modules/bactopia/sketcher) + +## Version + +```yaml +BACTOPIA_SKETCHER: + - bactopia-sketcher: 1.0.3 +``` diff --git a/developers/modules/bactopia_teton.mdx b/developers/modules/bactopia_teton.mdx new file mode 100644 index 00000000..40789ace --- /dev/null +++ b/developers/modules/bactopia_teton.mdx @@ -0,0 +1,99 @@ +--- +title: bactopia_teton +description: "Predict genome size and route samples based on taxonomic classification." +tags: + - taxonomy + - genome-size + - routing + - filtering + - bacteria + - sizemeup + - bracken + - sample-scope +--- + +# bactopia_teton + +**Tags:** taxonomy genome-size routing filtering bacteria sizemeup bracken sample-scope + +Predict genome size and route samples based on taxonomic classification. + +Uses [SizeMeUp](https://github.com/bactopia/bactopia) to parse [Bracken](https://github.com/jenniferlu717/Bracken) +abundance reports, estimate the genome size for the identified species, and split samples +into "Bacteria" (for downstream analysis with Bactopia) and "Non-Bacteria" lists. + +## Inputs + +``` +record ( + meta: Record, + classification: Path +) +``` + +| Field | Type | Description | +|-------|------|-------------| +| `meta` | `Record` | Groovy Record containing sample information | +| `classification` | `Path` | Bracken species abundance report | + +## Outputs + +``` +record ( + meta: Record, + bacteria_tsv: Path, + nonbacteria_tsv: Path, + sizemeup: Path, + results: Set, + logs: Set, + nf_logs: Set, + versions: Set +) +``` + +| Field | Type | Description | +|-------|------|-------------| +| `meta` | `Record` | Sample information record | +| `bacteria_tsv` | `Path` | A tab-delimited samplesheet compatible with Bactopia (--samples) for samples identified as Bacteria | +| `nonbacteria_tsv` | `Path` | A tab-delimited samplesheet for samples NOT identified as Bacteria | +| `sizemeup` | `Path` | A text file containing the predicted species and genome size | +| `results` | `Set` | All output files to be published | +| `logs` | `Set` | Optional program specific log files | +| `nf_logs` | `Set` | Nextflow-specific log files (e.g. .command.{begin|err|log|out|run|sh|trace}) | +| `versions` | `Set` | A YAML formatted file with program versions | + +## Parameters + +## Used By + +### Subworkflows + +- [teton](/developers/subworkflows/teton) - Perform taxonomic classification and estimate bacterial genome sizes. + +### Workflows + +- [teton](/bactopia-pipelines/teton) - Taxonomic classification and abundance profiling of metagenomic reads. + +## Citations + +If you use this in your analysis, please cite the following. + +- [Bactopia](https://bactopia.github.io/) + Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) + +- [Bracken](https://github.com/jenniferlu717/Bracken) + Lu J, Breitwieser FP, Thielen P, and Salzberg SL [Bracken: estimating species abundance in metagenomics data.](https://doi.org/10.7717/peerj-cs.104) _PeerJ Computer Science_, 3, e104. (2017) + +- [sizemeup](https://github.com/rpetit3/sizemeup) + Petit III RA [sizemeup: A simple tool to retrieve the genome size for a given species name or tax ID](https://github.com/rpetit3/sizemeup) (GitHub) + +## Source + +[View source on GitHub](https://github.com/bactopia/bactopia/tree/main/modules/bactopia/teton) + +## Version + +```yaml +BACTOPIA_TETON: + - bactopia-teton: 1.1.3 +``` diff --git a/developers/modules/bakta_download.mdx b/developers/modules/bakta_download.mdx new file mode 100644 index 00000000..fe325162 --- /dev/null +++ b/developers/modules/bakta_download.mdx @@ -0,0 +1,88 @@ +--- +title: bakta_download +description: "Download the Bakta annotation database." +tags: + - bacteria + - database + - download + - annotation + - bakta + - setup + - run-scope +--- + +# bakta_download + +**Tags:** bacteria database download annotation bakta setup run-scope + +Download the Bakta annotation database. + +Fetches the pre-compiled database required by [Bakta](https://github.com/oschwengers/bakta) +for genome annotation. The database contains UniProt clusters, AMR genes, and other +reference data needed for comprehensive bacterial genome annotation. + +:::note[Internet & Storage Required] +This process requires an active internet connection and significant disk space +to store the database files. The 'light' database is ~1.5GB, while the 'full' +database is ~30GB uncompressed. +::: + +## Outputs + +``` +record ( + db: Path?, + db_tarball: Path?, + logs: Set +) +``` + +| Field | Type | Description | +|-------|------|-------------| +| `db` | `Path?` | The Bakta database directory containing annotation reference data | +| `db_tarball` | `Path?` | A compressed tarball of the database (if requested via parameters) | +| `logs` | `Set` | Optional program specific log files | + +## Parameters + +### Bakta Download Parameters + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--bakta_db` | string | | Tarball or path to the Bakta database | +| `--bakta_db_type` | string | `full` | Which Bakta DB to download 'full' (~30GB) or 'light' (~2GB) (choices: `full`, `light`) | +| `--bakta_save_as_tarball` | boolean | `false` | Save the Bakta database as a tarball | +| `--download_bakta` | boolean | `false` | Download the Bakta database to the path given by --bakta_db | + +## Used By + +### Subworkflows + +- [bakta](/developers/subworkflows/bakta) - Rapid bacterial genome annotation. + +### Workflows + +- [bactopia](/full-guide) - Comprehensive bacterial analysis pipeline for complete genomic characterization. +- [bakta](/bactopia-tools/bakta) - Rapid annotation of bacterial genomes and plasmids. +- [staphopia](/bactopia-pipelines/staphopia) - Comprehensive analysis pipeline for Staphylococcus aureus isolates. + +## Citations + +If you use this in your analysis, please cite the following. + +- [Bactopia](https://bactopia.github.io/) + Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) + +- [Bakta](https://github.com/oschwengers/bakta) + Schwengers O, Jelonek L, Dieckmann MA, Beyvers S, Blom J, Goesmann A [Bakta - rapid and standardized annotation of bacterial genomes via alignment-free sequence identification.](https://doi.org/10.1099/mgen.0.000685) _Microbial Genomics_ 7(11) (2021) + +## Source + +[View source on GitHub](https://github.com/bactopia/bactopia/tree/main/modules/bakta/download) + +## Version + +```yaml +BAKTA_DOWNLOAD: + - bakta: 1.12.0 +``` diff --git a/developers/modules/bakta_run.mdx b/developers/modules/bakta_run.mdx new file mode 100644 index 00000000..d6719794 --- /dev/null +++ b/developers/modules/bakta_run.mdx @@ -0,0 +1,162 @@ +--- +title: bakta_run +description: "Rapid and standardized annotation of bacterial genomes and plasmids." +tags: + - bacteria + - annotation + - genome + - assembly + - prodigal + - compliant + - genbank + - ena + - sample-scope +--- + +# bakta_run + +**Tags:** bacteria annotation genome assembly prodigal compliant genbank ena sample-scope + +Rapid and standardized annotation of bacterial genomes and plasmids. + +Uses [Bakta](https://github.com/oschwengers/bakta) to annotate genomes via alignment-free +sequence identification. It detects CDS, sORFs, tRNAs, tmRNAs, rRNAs, ncRNAs, and CRISPR +arrays, assigning functions from a comprehensive database. + +:::note[Database Required] +Requires a Bakta database (directory or tarball) to be available. +::: + +## Inputs + +``` +record ( + meta: Record, + fna: Path +) +``` + +| Field | Type | Description | +|-------|------|-------------| +| `meta` | `Record` | Groovy Record containing sample information | +| `fna` | `Path` | Assembled contigs in FASTA format | + +``` +db: Path +proteins: Path? +prodigal_tf: Path? +replicons: Path? +``` + +| Name | Type | Description | +|------|------|-------------| +| `db` | `Path` | Path to the Bakta database (Directory or compressed tarball) | +| `proteins` | `Path?` | FASTA file of trusted proteins to use for first-pass annotation | +| `prodigal_tf` | `Path?` | Prodigal training file for CDS prediction | +| `replicons` | `Path?` | Table (TSV/CSV) of replicon information for origin detection | + +## Outputs + +``` +record ( + meta: Record, + blastdb: Path, + faa: Path, + ffn: Path, + fna: Path, + gbff: Path, + gff: Path, + hypotheticals_tsv: Path, + hypotheticals_faa: Path, + inference_tsv: Path, + json: Path, + png: Path, + svg: Path, + tsv: Path, + txt: Path, + results: Set, + logs: Set, + nf_logs: Set, + versions: Set +) +``` + +| Field | Type | Description | +|-------|------|-------------| +| `meta` | `Record` | Sample information record | +| `blastdb` | `Path` | A compressed tar.gz archive of BLAST+ databases of the contigs, genes, and proteins | +| `faa` | `Path` | CDS/sORF amino acid sequences as FASTA | +| `ffn` | `Path` | Feature nucleotide sequences as FASTA | +| `fna` | `Path` | Replicon/contig DNA sequences as FASTA | +| `gbff` | `Path` | Annotations and sequences in GenBank format | +| `gff` | `Path` | Annotations and sequences in GFF3 format | +| `hypotheticals_tsv` | `Path` | Further information on hypothetical protein CDS as tab-separated values | +| `hypotheticals_faa` | `Path` | Hypothetical protein CDS amino acid sequences as FASTA | +| `inference_tsv` | `Path` | Detailed annotation evidence and database hit information | +| `json` | `Path` | Machine-readable annotations and metadata in JSON format | +| `png` | `Path` | Circular genome plot as PNG image | +| `svg` | `Path` | Circular genome plot as SVG image | +| `tsv` | `Path` | Annotations as simple human readable tab-separated values | +| `txt` | `Path` | Broad summary of Bakta annotations | +| `results` | `Set` | All output files to be published | +| `logs` | `Set` | Optional program specific log files | +| `nf_logs` | `Set` | Nextflow-specific log files (e.g. .command.{begin|err|log|out|run|sh|trace}) | +| `versions` | `Set` | A YAML formatted file with program versions | + +## Parameters + +### Bakta Parameters + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--bakta_proteins` | string | | FASTA file of trusted proteins to first annotate from | +| `--bakta_prodigal_tf` | string | | Training file to use for Prodigal | +| `--bakta_replicons` | string | | Replicon information table (tsv/csv) | + +## Used By + +### Subworkflows + +- [bakta](/developers/subworkflows/bakta) - Rapid bacterial genome annotation. + +### Workflows + +- [bactopia](/full-guide) - Comprehensive bacterial analysis pipeline for complete genomic characterization. +- [bakta](/bactopia-tools/bakta) - Rapid annotation of bacterial genomes and plasmids. +- [staphopia](/bactopia-pipelines/staphopia) - Comprehensive analysis pipeline for Staphylococcus aureus isolates. + +## Citations + +If you use this in your analysis, please cite the following. + +- [Bactopia](https://bactopia.github.io/) + Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) + +- [Bakta](https://github.com/oschwengers/bakta) + Schwengers O, Jelonek L, Dieckmann MA, Beyvers S, Blom J, Goesmann A [Bakta - rapid and standardized annotation of bacterial genomes via alignment-free sequence identification.](https://doi.org/10.1099/mgen.0.000685) _Microbial Genomics_ 7(11) (2021) + +- [Aragorn](http://130.235.244.92/ARAGORN/Downloads/) + Laslett D, Canback B [ARAGORN, a program to detect tRNA genes and tmRNA genes in nucleotide sequences.](https://doi.org/10.1093/nar/gkh152) _Nucleic Acids Res_. 32(1):11-6 (2004) + +- [DIAMOND](https://github.com/bbuchfink/diamond) + Buchfink B, Xie C, Huson DH [Fast and sensitive protein alignment using DIAMOND.](http://dx.doi.org/10.1038/nmeth.3176) _Nat. Methods._ 12, 59-60 (2015) + +- [HMMER](http://hmmer.org/) + Eddy SR [Accelerated Profile HMM Searches.](https://doi.org/10.1371/journal.pcbi.1002195) _PLoS Comput. Biol._ 7, e1002195 (2011) + +- [Infernal](http://eddylab.org/infernal/) + Nawrocki EP, Eddy SR [Infernal 1.1: 100-fold faster RNA homology searches.](https://doi.org/10.1093/bioinformatics/btt509) _Bioinformatics_ 29(22), 2933-2935 (2013) + +- [Prodigal](https://github.com/hyattpd/Prodigal) + Hyatt D, Chen G-L, LoCascio PF, Land ML, Larimer FW, Hauser LJ [Prodigal: prokaryotic gene recognition and translation initiation site identification.](https://doi.org/10.1186/1471-2105-11-119) _BMC Bioinformatics_ 11.1 119 (2010) + +## Source + +[View source on GitHub](https://github.com/bactopia/bactopia/tree/main/modules/bakta/run) + +## Version + +```yaml +BAKTA_RUN: + - bakta: 1.12.0 +``` diff --git a/developers/modules/blast_blastn.mdx b/developers/modules/blast_blastn.mdx new file mode 100644 index 00000000..1e06a5e5 --- /dev/null +++ b/developers/modules/blast_blastn.mdx @@ -0,0 +1,109 @@ +--- +title: blast_blastn +description: "Search a nucleotide database using a nucleotide query." +tags: + - blast + - blastn + - alignment + - dna + - search + - fasta + - sample-scope +--- + +# blast_blastn + +**Tags:** blast blastn alignment dna search fasta sample-scope + +Search a nucleotide database using a nucleotide query. + +Uses [BLASTN](https://blast.ncbi.nlm.nih.gov/Blast.cgi) to align nucleotide query sequences +(FASTA) against a nucleotide BLAST database. It is optimized for finding highly similar sequences. + +## Inputs + +``` +record ( + meta: Record, + blastdb: Path +) +``` + +| Field | Type | Description | +|-------|------|-------------| +| `meta` | `Record` | Groovy Record containing sample information | +| `blastdb` | `Path` | A compressed tarball containing the nucleotide BLAST database | + +``` +query: Path +``` + +| Name | Type | Description | +|------|------|-------------| +| `query` | `Path` | FASTA file containing nucleotide query sequences | + +## Outputs + +``` +record ( + meta: Record, + tsv: Path, + results: Set, + logs: Set, + nf_logs: Set, + versions: Set +) +``` + +| Field | Type | Description | +|-------|------|-------------| +| `meta` | `Record` | Sample information record | +| `tsv` | `Path` | Tab-delimited nucleotide alignment results (BLAST outfmt 6) | +| `results` | `Set` | All output files to be published | +| `logs` | `Set` | Optional program specific log files | +| `nf_logs` | `Set` | Nextflow-specific log files (e.g. .command.{begin|err|log|out|run|sh|trace}) | +| `versions` | `Set` | A YAML formatted file with program versions | + +## Parameters + +### BLASTN Parameters + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--blastn_query` | string | | A fasta file containing the query sequences to BLAST against the database | +| `--blastn_outfmt` | string | `sseqid qseqid pident qlen slen length nident positive mismatch gapopen gaps qstart qend sstart send evalue bitscore` | The columns to include with -outfmt 6 | +| `--blastn_opts` | string | | Additional options to pass to BLASTN | +| `--blastn_perc_identity` | integer | `50` | Percent identity | +| `--blastn_qcov_hsp_perc` | integer | `50` | Percent query coverage per hsp | +| `--blastn_max_target_seqs` | integer | `2000` | Maximum number of aligned sequences to keep | + +## Used By + +### Subworkflows + +- [blastn](/developers/subworkflows/blastn) - Search a nucleotide database using nucleotide query sequences. + +### Workflows + +- [blastn](/bactopia-tools/blastn) - Search against nucleotide BLAST databases using nucleotide queries. + +## Citations + +If you use this in your analysis, please cite the following. + +- [Bactopia](https://bactopia.github.io/) + Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) + +- [BLAST](https://blast.ncbi.nlm.nih.gov/Blast.cgi) + Camacho C, Coulouris G, Avagyan V, Ma N, Papadopoulos J, Bealer K, Madden TL [BLAST+: architecture and applications](http://dx.doi.org/10.1186/1471-2105-10-421). _BMC Bioinformatics_ 10, 421 (2009) + +## Source + +[View source on GitHub](https://github.com/bactopia/bactopia/tree/main/modules/blast/blastn) + +## Version + +```yaml +BLAST_BLASTN: + - blast: 2.17.0 +``` diff --git a/developers/modules/blast_blastp.mdx b/developers/modules/blast_blastp.mdx new file mode 100644 index 00000000..1cd37c39 --- /dev/null +++ b/developers/modules/blast_blastp.mdx @@ -0,0 +1,109 @@ +--- +title: blast_blastp +description: "Search a protein database using a protein query." +tags: + - blast + - blastp + - alignment + - protein + - amino-acid + - search + - fasta + - sample-scope +--- + +# blast_blastp + +**Tags:** blast blastp alignment protein amino-acid search fasta sample-scope + +Search a protein database using a protein query. + +Uses [BLASTP](https://blast.ncbi.nlm.nih.gov/Blast.cgi) to align amino acid query sequences +(FASTA) against a protein BLAST database. It is used to identify homologous proteins. + +## Inputs + +``` +record ( + meta: Record, + blastdb: Path +) +``` + +| Field | Type | Description | +|-------|------|-------------| +| `meta` | `Record` | Groovy Record containing sample information | +| `blastdb` | `Path` | A compressed tarball containing the protein BLAST database | + +``` +query: Path +``` + +| Name | Type | Description | +|------|------|-------------| +| `query` | `Path` | FASTA file containing amino acid query sequences | + +## Outputs + +``` +record ( + meta: Record, + tsv: Path, + results: Set, + logs: Set, + nf_logs: Set, + versions: Set +) +``` + +| Field | Type | Description | +|-------|------|-------------| +| `meta` | `Record` | Sample information record | +| `tsv` | `Path` | Tab-delimited protein alignment results (BLAST outfmt 6) | +| `results` | `Set` | All output files to be published | +| `logs` | `Set` | Optional program specific log files | +| `nf_logs` | `Set` | Nextflow-specific log files (e.g. .command.{begin|err|log|out|run|sh|trace}) | +| `versions` | `Set` | A YAML formatted file with program versions | + +## Parameters + +### BLASTP Parameters + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--blastp_query` | string | | A fasta file containing the query sequences to BLAST against the database | +| `--blastp_outfmt` | string | `sseqid qseqid pident qlen slen length nident positive mismatch gapopen gaps qstart qend sstart send evalue bitscore` | The columns to include with -outfmt 6 | +| `--blastp_opts` | string | | Additional options to pass to BLASTN | +| `--blastp_qcov_hsp_perc` | integer | `50` | Percent query coverage per hsp | +| `--blastp_max_target_seqs` | integer | `2000` | Maximum number of aligned sequences to keep | + +## Used By + +### Subworkflows + +- [blastp](/developers/subworkflows/blastp) - Search protein sequences against protein database. + +### Workflows + +- [blastp](/bactopia-tools/blastp) - Search against protein BLAST databases using protein queries. + +## Citations + +If you use this in your analysis, please cite the following. + +- [Bactopia](https://bactopia.github.io/) + Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) + +- [BLAST](https://blast.ncbi.nlm.nih.gov/Blast.cgi) + Camacho C, Coulouris G, Avagyan V, Ma N, Papadopoulos J, Bealer K, Madden TL [BLAST+: architecture and applications](http://dx.doi.org/10.1186/1471-2105-10-421). _BMC Bioinformatics_ 10, 421 (2009) + +## Source + +[View source on GitHub](https://github.com/bactopia/bactopia/tree/main/modules/blast/blastp) + +## Version + +```yaml +BLAST_BLASTP: + - blast: 2.17.0 +``` diff --git a/developers/modules/blast_blastx.mdx b/developers/modules/blast_blastx.mdx new file mode 100644 index 00000000..fb790aa6 --- /dev/null +++ b/developers/modules/blast_blastx.mdx @@ -0,0 +1,111 @@ +--- +title: blast_blastx +description: "Search a protein database using a translated nucleotide query." +tags: + - blast + - blastx + - alignment + - translation + - protein + - dna + - search + - fasta + - sample-scope +--- + +# blast_blastx + +**Tags:** blast blastx alignment translation protein dna search fasta sample-scope + +Search a protein database using a translated nucleotide query. + +Uses [BLASTX](https://blast.ncbi.nlm.nih.gov/Blast.cgi) to translate nucleotide query sequences +(FASTA) in all six reading frames and align them against a protein BLAST database. This is useful +for identifying potential coding regions in unannotated DNA. + +## Inputs + +``` +record ( + meta: Record, + blastdb: Path +) +``` + +| Field | Type | Description | +|-------|------|-------------| +| `meta` | `Record` | Groovy Record containing sample information | +| `blastdb` | `Path` | A compressed tarball containing the protein BLAST database | + +``` +query: Path +``` + +| Name | Type | Description | +|------|------|-------------| +| `query` | `Path` | FASTA file containing nucleotide query sequences | + +## Outputs + +``` +record ( + meta: Record, + tsv: Path, + results: Set, + logs: Set, + nf_logs: Set, + versions: Set +) +``` + +| Field | Type | Description | +|-------|------|-------------| +| `meta` | `Record` | Sample information record | +| `tsv` | `Path` | Tab-delimited translated nucleotide-to-protein alignment results (BLAST outfmt 6) | +| `results` | `Set` | All output files to be published | +| `logs` | `Set` | Optional program specific log files | +| `nf_logs` | `Set` | Nextflow-specific log files (e.g. .command.{begin|err|log|out|run|sh|trace}) | +| `versions` | `Set` | A YAML formatted file with program versions | + +## Parameters + +### BLASTX Parameters + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--blastx_query` | string | | A fasta file containing the query sequences to BLAST against the database | +| `--blastx_outfmt` | string | `sseqid qseqid pident qlen slen length nident positive mismatch gapopen gaps qstart qend sstart send evalue bitscore` | The columns to include with -outfmt 6 | +| `--blastx_opts` | string | | Additional options to pass to BLASTN | +| `--blastx_qcov_hsp_perc` | integer | `50` | Percent query coverage per hsp | +| `--blastx_max_target_seqs` | integer | `2000` | Maximum number of aligned sequences to keep | + +## Used By + +### Subworkflows + +- [blastx](/developers/subworkflows/blastx) - Translate nucleotide sequences and search protein database. + +### Workflows + +- [blastx](/bactopia-tools/blastx) - Search against protein BLAST databases using translated nucleotide queries. + +## Citations + +If you use this in your analysis, please cite the following. + +- [Bactopia](https://bactopia.github.io/) + Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) + +- [BLAST](https://blast.ncbi.nlm.nih.gov/Blast.cgi) + Camacho C, Coulouris G, Avagyan V, Ma N, Papadopoulos J, Bealer K, Madden TL [BLAST+: architecture and applications](http://dx.doi.org/10.1186/1471-2105-10-421). _BMC Bioinformatics_ 10, 421 (2009) + +## Source + +[View source on GitHub](https://github.com/bactopia/bactopia/tree/main/modules/blast/blastx) + +## Version + +```yaml +BLAST_BLASTX: + - blast: 2.17.0 +``` diff --git a/developers/modules/blast_tblastn.mdx b/developers/modules/blast_tblastn.mdx new file mode 100644 index 00000000..86db8a7c --- /dev/null +++ b/developers/modules/blast_tblastn.mdx @@ -0,0 +1,111 @@ +--- +title: blast_tblastn +description: "Search a translated nucleotide database using a protein query." +tags: + - blast + - tblastn + - alignment + - translation + - protein + - dna + - search + - fasta + - sample-scope +--- + +# blast_tblastn + +**Tags:** blast tblastn alignment translation protein dna search fasta sample-scope + +Search a translated nucleotide database using a protein query. + +Uses [TBLASTN](https://blast.ncbi.nlm.nih.gov/Blast.cgi) to align amino acid query sequences +(FASTA) against a nucleotide BLAST database that has been dynamically translated in all six +reading frames. This is useful for finding gene homologs in unannotated genomic DNA. + +## Inputs + +``` +record ( + meta: Record, + blastdb: Path +) +``` + +| Field | Type | Description | +|-------|------|-------------| +| `meta` | `Record` | Groovy Record containing sample information | +| `blastdb` | `Path` | A compressed tarball containing the nucleotide BLAST database | + +``` +query: Path +``` + +| Name | Type | Description | +|------|------|-------------| +| `query` | `Path` | FASTA file containing amino acid query sequences | + +## Outputs + +``` +record ( + meta: Record, + tsv: Path, + results: Set, + logs: Set, + nf_logs: Set, + versions: Set +) +``` + +| Field | Type | Description | +|-------|------|-------------| +| `meta` | `Record` | Sample information record | +| `tsv` | `Path` | Tab-delimited protein-to-translated nucleotide alignment results (BLAST outfmt 6) | +| `results` | `Set` | All output files to be published | +| `logs` | `Set` | Optional program specific log files | +| `nf_logs` | `Set` | Nextflow-specific log files (e.g. .command.{begin|err|log|out|run|sh|trace}) | +| `versions` | `Set` | A YAML formatted file with program versions | + +## Parameters + +### TBLASTN Parameters + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--tblastn_query` | string | | A fasta file containing the query sequences to BLAST against the database | +| `--tblastn_outfmt` | string | `sseqid qseqid pident qlen slen length nident positive mismatch gapopen gaps qstart qend sstart send evalue bitscore` | The columns to include with -outfmt 6 | +| `--tblastn_opts` | string | | Additional options to pass to BLASTN | +| `--tblastn_qcov_hsp_perc` | integer | `50` | Percent query coverage per hsp | +| `--tblastn_max_target_seqs` | integer | `2000` | Maximum number of aligned sequences to keep | + +## Used By + +### Subworkflows + +- [tblastn](/developers/subworkflows/tblastn) - Search protein query sequences against nucleotide database. + +### Workflows + +- [tblastn](/bactopia-tools/tblastn) - Search against translated nucleotide databases using protein queries. + +## Citations + +If you use this in your analysis, please cite the following. + +- [Bactopia](https://bactopia.github.io/) + Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) + +- [BLAST](https://blast.ncbi.nlm.nih.gov/Blast.cgi) + Camacho C, Coulouris G, Avagyan V, Ma N, Papadopoulos J, Bealer K, Madden TL [BLAST+: architecture and applications](http://dx.doi.org/10.1186/1471-2105-10-421). _BMC Bioinformatics_ 10, 421 (2009) + +## Source + +[View source on GitHub](https://github.com/bactopia/bactopia/tree/main/modules/blast/tblastn) + +## Version + +```yaml +BLAST_TBLASTN: + - blast: 2.17.0 +``` diff --git a/developers/modules/blast_tblastx.mdx b/developers/modules/blast_tblastx.mdx new file mode 100644 index 00000000..403dc193 --- /dev/null +++ b/developers/modules/blast_tblastx.mdx @@ -0,0 +1,111 @@ +--- +title: blast_tblastx +description: "Search a translated nucleotide database using a translated nucleotide query." +tags: + - blast + - tblastx + - alignment + - translation + - dna + - search + - fasta + - sample-scope +--- + +# blast_tblastx + +**Tags:** blast tblastx alignment translation dna search fasta sample-scope + +Search a translated nucleotide database using a translated nucleotide query. + +Uses [TBLASTX](https://blast.ncbi.nlm.nih.gov/Blast.cgi) to align nucleotide query sequences +(translated in all six frames) against a nucleotide BLAST database (also translated in all +six frames). This is useful for identifying distant relationships between nucleotide sequences +that have significant divergence but conserved protein structure. + +## Inputs + +``` +record ( + meta: Record, + blastdb: Path +) +``` + +| Field | Type | Description | +|-------|------|-------------| +| `meta` | `Record` | Groovy Record containing sample information | +| `blastdb` | `Path` | A compressed tarball containing the nucleotide BLAST database | + +``` +query: Path +``` + +| Name | Type | Description | +|------|------|-------------| +| `query` | `Path` | FASTA file containing nucleotide query sequences | + +## Outputs + +``` +record ( + meta: Record, + tsv: Path, + results: Set, + logs: Set, + nf_logs: Set, + versions: Set +) +``` + +| Field | Type | Description | +|-------|------|-------------| +| `meta` | `Record` | Sample information record | +| `tsv` | `Path` | Tab-delimited translated nucleotide-to-translated nucleotide alignment results (BLAST outfmt 6) | +| `results` | `Set` | All output files to be published | +| `logs` | `Set` | Optional program specific log files | +| `nf_logs` | `Set` | Nextflow-specific log files (e.g. .command.{begin|err|log|out|run|sh|trace}) | +| `versions` | `Set` | A YAML formatted file with program versions | + +## Parameters + +### TBLASTX Parameters + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--tblastx_query` | string | | A fasta file containing the query sequences to BLAST against the database | +| `--tblastx_outfmt` | string | `sseqid qseqid pident qlen slen length nident positive mismatch gapopen gaps qstart qend sstart send evalue bitscore` | The columns to include with -outfmt 6 | +| `--tblastx_opts` | string | | Additional options to pass to BLASTN | +| `--tblastx_qcov_hsp_perc` | integer | `50` | Percent query coverage per hsp | +| `--tblastx_max_target_seqs` | integer | `2000` | Maximum number of aligned sequences to keep | + +## Used By + +### Subworkflows + +- [tblastx](/developers/subworkflows/tblastx) - Translate nucleotide query sequences and search nucleotide database. + +### Workflows + +- [tblastx](/bactopia-tools/tblastx) - Search against translated nucleotide databases using translated nucleotide queries. + +## Citations + +If you use this in your analysis, please cite the following. + +- [Bactopia](https://bactopia.github.io/) + Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) + +- [BLAST](https://blast.ncbi.nlm.nih.gov/Blast.cgi) + Camacho C, Coulouris G, Avagyan V, Ma N, Papadopoulos J, Bealer K, Madden TL [BLAST+: architecture and applications](http://dx.doi.org/10.1186/1471-2105-10-421). _BMC Bioinformatics_ 10, 421 (2009) + +## Source + +[View source on GitHub](https://github.com/bactopia/bactopia/tree/main/modules/blast/tblastx) + +## Version + +```yaml +BLAST_TBLASTX: + - blast: 2.17.0 +``` diff --git a/developers/modules/bracken.mdx b/developers/modules/bracken.mdx new file mode 100644 index 00000000..dd4219cd --- /dev/null +++ b/developers/modules/bracken.mdx @@ -0,0 +1,150 @@ +--- +title: bracken +description: "Taxonomic classification and abundance estimation." +tags: + - metagenomics + - classification + - taxonomy + - abundance + - kraken2 + - bracken + - krona + - sample-scope +--- + +# bracken + +**Tags:** metagenomics classification taxonomy abundance kraken2 bracken krona sample-scope + +Taxonomic classification and abundance estimation. + +Uses [Kraken2](https://github.com/DerrickWood/kraken2) to classify reads against a +taxonomic database, followed by [Bracken](https://github.com/jenniferlu717/Bracken) +(Bayesian Reestimation of Abundance with KrakEN) to estimate relative abundances at +a specific taxonomic level. It also generates an interactive [Krona](https://github.com/marbl/Krona/wiki) +plot for visualization. + +Uses explicit positional record fields for reads: +- Input: record(meta, r1, r2, se, lr) where each read slot is Path? + +:::note[Database Required] +Requires a compatible Kraken2/Bracken database (tarball). +::: + +## Inputs + +``` +record ( + meta: Record, + r1: Path?, + r2: Path?, + se: Path?, + lr: Path? +) +``` + +| Field | Type | Description | +|-------|------|-------------| +| `meta` | `Record` | Groovy Record containing sample information | +| `r1` | `Path?` | Illumina R1 reads (paired-end) | +| `r2` | `Path?` | Illumina R2 reads (paired-end) | +| `se` | `Path?` | Single-end Illumina reads | +| `lr` | `Path?` | Long reads (ONT/PacBio) - not typically used | + +``` +db: Path +``` + +| Name | Type | Description | +|------|------|-------------| +| `db` | `Path` | A compressed tarball containing the Kraken2/Bracken database | + +## Outputs + +``` +record ( + meta: Record, + tsv: Path, + special_meta: Record, + classified: Set, + unclassified: Set, + kraken2_report: Path, + kraken2_output: Path?, + bracken_report: Path, + krona: Set, + abundances: Path, + classification: Path, + adjusted_abundances: Path, + results: Set, + logs: Set, + nf_logs: Set, + versions: Set +) +``` + +| Field | Type | Description | +|-------|------|-------------| +| `meta` | `Record` | Sample information record | +| `tsv` | `Path` | Tab-delimited summary of Bracken primary and secondary species abundances | +| `special_meta` | `Record` | A simplified metadata record for internal use | +| `classified` | `Set` | Reads classified to belong to any of the taxa on the Kraken2 database | +| `unclassified` | `Set` | Reads not classified to belong to any of the taxa on the Kraken2 database | +| `kraken2_report` | `Path` | Kraken2 report containing stats about classified and not classified reads | +| `kraken2_output` | `Path?` | Kraken2 output file containing the taxonomic classification of each read | +| `bracken_report` | `Path` | Bracken report containing stats about classified and not classified reads | +| `krona` | `Set` | Interactive Krona HTML visualization | +| `abundances` | `Path` | Bracken abundance estimates for each taxon | +| `classification` | `Path` | Bracken per-read classification details | +| `adjusted_abundances` | `Path` | Bracken abundance estimates adjusted for unclassified reads | +| `results` | `Set` | All output files to be published | +| `logs` | `Set` | Optional program specific log files | +| `nf_logs` | `Set` | Nextflow-specific log files (e.g. .command.{begin|err|log|out|run|sh|trace}) | +| `versions` | `Set` | A YAML formatted file with program versions | + +## Parameters + +### Kraken2 and Bracken Parameters + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--kraken2_db` | string | | The a single tarball or path to a Kraken2 formatted database | +| `--kraken2_confidence` | number | `0.0` | Confidence score threshold between 0 and 1 | +| `--kraken2_use_mpa_style` | boolean | `false` | Format report output like Kraken 1's kraken-mpa-report | +| `--kraken2_report_zero_counts` | boolean | `false` | Report counts for ALL taxa, even if counts are zero | + +## Used By + +### Subworkflows + +- [bracken](/developers/subworkflows/bracken) - Estimate species abundance from metagenomic reads. + +### Workflows + +- [bracken](/bactopia-tools/bracken) - Estimate taxonomic abundance of metagenomic samples. + +## Citations + +If you use this in your analysis, please cite the following. + +- [Bactopia](https://bactopia.github.io/) + Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) + +- [Bracken](https://github.com/jenniferlu717/Bracken) + Lu J, Breitwieser FP, Thielen P, and Salzberg SL [Bracken: estimating species abundance in metagenomics data.](https://doi.org/10.7717/peerj-cs.104) _PeerJ Computer Science_, 3, e104. (2017) + +- [Kraken2](https://github.com/DerrickWood/kraken2) + Wood DE, Lu J, Langmead B [Improved metagenomic analysis with Kraken 2.](https://doi.org/10.1186/s13059-019-1891-0) *Genome Biology*, 20(1), 257. (2019) + +- [Krona](https://github.com/marbl/Krona) + Ondov BD, Bergman NH, and Phillippy AM [Interactive metagenomic visualization in a Web browser.](https://doi.org/10.1186/1471-2105-12-385) _BMC Bioinformatics_, 12, 385. (2011) + +## Source + +[View source on GitHub](https://github.com/bactopia/bactopia/tree/main/modules/bracken) + +## Version + +```yaml +BRACKEN: + - bactopia-teton: 1.1.3 +``` diff --git a/developers/modules/btyper3.mdx b/developers/modules/btyper3.mdx new file mode 100644 index 00000000..3a4bde5a --- /dev/null +++ b/developers/modules/btyper3.mdx @@ -0,0 +1,101 @@ +--- +title: btyper3 +description: "In silico typing and characterization of *Bacillus cereus* group genomes." +tags: + - bacteria + - bacillus + - cereus + - typing + - virulence + - toxin + - amr + - mlst + - sample-scope +--- + +# btyper3 + +**Tags:** bacteria bacillus cereus typing virulence toxin amr mlst sample-scope + +In silico typing and characterization of *Bacillus cereus* group genomes. + +Uses [BTyper3](https://github.com/lmc297/BTyper3) to classify *B. cereus* group isolates. +It determines the PanC clade, Multi-Locus Sequence Type (MLST), and screens for virulence +factors, crystal toxins (Bt), and antimicrobial resistance genes. + +## Inputs + +``` +record ( + meta: Record, + fna: Path +) +``` + +| Field | Type | Description | +|-------|------|-------------| +| `meta` | `Record` | Groovy Record containing sample information | +| `fna` | `Path` | Assembled contigs in FASTA format | + +## Outputs + +``` +record ( + meta: Record, + tsv: Path, + results: Set, + logs: Set, + nf_logs: Set, + versions: Set +) +``` + +| Field | Type | Description | +|-------|------|-------------| +| `meta` | `Record` | Sample information record | +| `tsv` | `Path` | Tab-delimited Bacillus cereus group typing results including PanC clade and virulence factors | +| `results` | `Set` | All output files to be published | +| `logs` | `Set` | Optional program specific log files | +| `nf_logs` | `Set` | Nextflow-specific log files (e.g. .command.{begin|err|log|out|run|sh|trace}) | +| `versions` | `Set` | A YAML formatted file with program versions | + +## Parameters + +### BTyper3 Parameters + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--btyper3_virulence_identity` | integer | `70` | Minimum percent amino acid/nucleotide identity threshold for a virulence gene to be considered present | +| `--btyper3_identity` | integer | `50` | Minimum percent amino acid identity threshold for a Bt toxin gene to be considered present | +| `--btyper3_opts` | string | | Additional options to pass to BTyper3 | + +## Used By + +### Subworkflows + +- [btyper3](/developers/subworkflows/btyper3) - In silico taxonomic classification of Bacillus cereus group genomes. + +### Workflows + +- [btyper3](/bactopia-tools/btyper3) - Taxonomic classification of Bacillus cereus group isolates. + +## Citations + +If you use this in your analysis, please cite the following. + +- [Bactopia](https://bactopia.github.io/) + Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) + +- [BTyper3](https://github.com/lmc297/BTyper3) + Carroll LM, Cheng RA, Kovac J [No Assembly Required: Using BTyper3 to Assess the Congruency of a Proposed Taxonomic Framework for the Bacillus cereus Group With Historical Typing Methods.](https://doi.org/10.3389/fmicb.2020.580691) _Frontiers in Microbiology_, 11, 580691. (2020) + +## Source + +[View source on GitHub](https://github.com/bactopia/bactopia/tree/main/modules/btyper3) + +## Version + +```yaml +BTYPER3: + - btyper3: 3.4.0 +``` diff --git a/developers/modules/busco.mdx b/developers/modules/busco.mdx new file mode 100644 index 00000000..401cdd4c --- /dev/null +++ b/developers/modules/busco.mdx @@ -0,0 +1,99 @@ +--- +title: busco +description: "Assess genome assembly completeness using single-copy orthologs." +tags: + - quality-control + - completeness + - genome + - assembly + - orthologs + - busco + - sample-scope +--- + +# busco + +**Tags:** quality-control completeness genome assembly orthologs busco sample-scope + +Assess genome assembly completeness using single-copy orthologs. + +Uses [BUSCO](https://gitlab.com/ezlab/busco) (Benchmarking Universal Single-Copy Orthologs) +to measure the completeness of genome assemblies, gene sets, or transcriptomes by matching +them against a lineage-specific set of conserved orthologs. + +## Inputs + +``` +record ( + meta: Record, + fna: Path +) +``` + +| Field | Type | Description | +|-------|------|-------------| +| `meta` | `Record` | Groovy Record containing sample information | +| `fna` | `Path` | Assembled contigs in FASTA format | + +## Outputs + +``` +record ( + meta: Record, + tsv: Path, + results: Set, + logs: Set, + nf_logs: Set, + versions: Set +) +``` + +| Field | Type | Description | +|-------|------|-------------| +| `meta` | `Record` | Sample information record | +| `tsv` | `Path` | Text summary report of the completeness score (C/S/D/F/M%) | +| `results` | `Set` | All output files to be published | +| `logs` | `Set` | Optional program specific log files | +| `nf_logs` | `Set` | Nextflow-specific log files (e.g. .command.{begin|err|log|out|run|sh|trace}) | +| `versions` | `Set` | A YAML formatted file with program versions | + +## Parameters + +### BUSCO Parameters + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--busco_lineage` | string | `bacteria_odb10` | Specify the name of the BUSCO lineage to be used | +| `--busco_evalue` | string | `1e-03` | E-value cutoff for BLAST searches. Allowed formats, 0.001 or 1e-03 | +| `--busco_limit` | integer | `3` | Total candidate regions to consider per BUSCO | + +## Used By + +### Subworkflows + +- [busco](/developers/subworkflows/busco) - Assess genome assembly completeness using BUSCO. + +### Workflows + +- [busco](/bactopia-tools/busco) - Assessment of genome assembly completeness using evolutionarily informed expectations. + +## Citations + +If you use this in your analysis, please cite the following. + +- [Bactopia](https://bactopia.github.io/) + Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) + +- [BUSCO](https://gitlab.com/ezlab/busco) + Manni M, Berkeley MR, Seppey M, Simão FA, Zdobnov EM [BUSCO Update: Novel and Streamlined Workflows along with Broader and Deeper Phylogenetic Coverage for Scoring of Eukaryotic, Prokaryotic, and Viral Genomes.](https://doi.org/10.1093/molbev/msab199) _Molecular Biology and Evolution_ 38(10), 4647-4654. (2021) + +## Source + +[View source on GitHub](https://github.com/bactopia/bactopia/tree/main/modules/busco) + +## Version + +```yaml +BUSCO: + - busco: 6.0.0 +``` diff --git a/developers/modules/checkm2_download.mdx b/developers/modules/checkm2_download.mdx new file mode 100644 index 00000000..bcfe01a7 --- /dev/null +++ b/developers/modules/checkm2_download.mdx @@ -0,0 +1,80 @@ +--- +title: checkm2_download +description: "Download the pre-trained CheckM2 database." +tags: + - checkm2 + - download + - database + - diamond + - machine-learning + - sample-scope +--- + +# checkm2_download + +**Tags:** checkm2 download database diamond machine-learning sample-scope + +Download the pre-trained CheckM2 database. + +Fetches the required Diamond database used by [CheckM2](https://github.com/chklovski/CheckM2) +for genome quality prediction. This database contains the machine learning model training data. + +:::note[Internet Required] +This process requires an active internet connection to fetch the database. +::: + +## Outputs + +``` +record ( + db: Path, + json: Path, + logs: Set +) +``` + +| Field | Type | Description | +|-------|------|-------------| +| `db` | `Path` | The CheckM2 Diamond database file (*.dmnd) | +| `json` | `Path` | Metadata file describing the database contents | +| `logs` | `Set` | Optional program specific log files | + +## Parameters + +### CheckM2 Database Download Parameters + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--checkm2_db` | string | | Path to a folder containing CheckM2 database (or where it should be downloaded to). | +| `--download_checkm2` | boolean | `false` | Download the CheckM2 database to the path given by --checkm2_db | + +## Used By + +### Subworkflows + +- [checkm2](/developers/subworkflows/checkm2) - Assess metagenome bin completeness using CheckM2. + +### Workflows + +- [checkm2](/bactopia-tools/checkm2) - Machine learning-based assessment of microbial genome assembly quality. + +## Citations + +If you use this in your analysis, please cite the following. + +- [Bactopia](https://bactopia.github.io/) + Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) + +- [CheckM2](https://github.com/chklovski/CheckM2) + Chklovksi A [Rapid assessment of genome bin quality using machine learning](https://github.com/chklovski/CheckM2) (GitHub) + +## Source + +[View source on GitHub](https://github.com/bactopia/bactopia/tree/main/modules/checkm2/download) + +## Version + +```yaml +CHECKM2_DOWNLOAD: + - aria2: 1.36.0 +``` diff --git a/developers/modules/checkm2_predict.mdx b/developers/modules/checkm2_predict.mdx new file mode 100644 index 00000000..c33886db --- /dev/null +++ b/developers/modules/checkm2_predict.mdx @@ -0,0 +1,115 @@ +--- +title: checkm2_predict +description: "Assess genome quality using machine learning." +tags: + - quality-control + - completeness + - contamination + - machine-learning + - bacteria + - archaea + - sample-scope +--- + +# checkm2_predict + +**Tags:** quality-control completeness contamination machine-learning bacteria archaea sample-scope + +Assess genome quality using machine learning. + +Uses [CheckM2](https://github.com/chklovski/CheckM2) to predict the completeness and +contamination of genome assemblies. Unlike the original CheckM, it uses a gradient boost +machine learning model to predict quality without relying on lineage-specific marker sets, +making it more accurate for novel or reduced genomes. + +:::note[Database Required] +Requires the CheckM2 database (Diamond database file) to be available. +::: + +## Inputs + +``` +record ( + meta: Record, + fna: Path +) +``` + +| Field | Type | Description | +|-------|------|-------------| +| `meta` | `Record` | Groovy Record containing sample information | +| `fna` | `Path` | Assembled contigs in FASTA format | + +``` +db: Path +``` + +| Name | Type | Description | +|------|------|-------------| +| `db` | `Path` | The CheckM2 database file (*.dmnd) | + +## Outputs + +``` +record ( + meta: Record, + tsv: Path, + results: Set, + logs: Set, + nf_logs: Set, + versions: Set +) +``` + +| Field | Type | Description | +|-------|------|-------------| +| `meta` | `Record` | Sample information record | +| `tsv` | `Path` | Tab-delimited report of quality metrics (Completeness, Contamination) | +| `results` | `Set` | All output files to be published | +| `logs` | `Set` | Optional program specific log files | +| `nf_logs` | `Set` | Nextflow-specific log files (e.g. .command.{begin|err|log|out|run|sh|trace}) | +| `versions` | `Set` | A YAML formatted file with program versions | + +## Parameters + +### CheckM2 Parameters + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--checkm2_lowmem` | boolean | | Low memory mode. Reduces DIAMOND blocksize to significantly reduce RAM usage at the expense of longer runtime | +| `--checkm2_general` | boolean | | Force the use of the general quality prediction model (gradient boost) | +| `--checkm2_specific` | boolean | | Force the use of the specific quality prediction model (neural network) | +| `--checkm2_allmodels` | boolean | | Output quality prediction for both models for each genome. | +| `--checkm2_genes` | boolean | | Treat input files as protein files. [Default: False] | +| `--checkm2_opts` | string | | Additional options to pass to CheckM2 | + +## Used By + +### Subworkflows + +- [checkm2](/developers/subworkflows/checkm2) - Assess metagenome bin completeness using CheckM2. + +### Workflows + +- [checkm2](/bactopia-tools/checkm2) - Machine learning-based assessment of microbial genome assembly quality. + +## Citations + +If you use this in your analysis, please cite the following. + +- [Bactopia](https://bactopia.github.io/) + Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) + +- [CheckM2](https://github.com/chklovski/CheckM2) + Chklovksi A [Rapid assessment of genome bin quality using machine learning](https://github.com/chklovski/CheckM2) (GitHub) + +## Source + +[View source on GitHub](https://github.com/bactopia/bactopia/tree/main/modules/checkm2/predict) + +## Version + +```yaml +CHECKM2_PREDICT: + - checkm2: 1.1.0 +``` diff --git a/developers/modules/checkm_lineagewf.mdx b/developers/modules/checkm_lineagewf.mdx new file mode 100644 index 00000000..05bdbdbb --- /dev/null +++ b/developers/modules/checkm_lineagewf.mdx @@ -0,0 +1,110 @@ +--- +title: checkm_lineagewf +description: "Assess genome quality using lineage-specific marker sets." +tags: + - quality-control + - completeness + - contamination + - marker-genes + - lineage + - bacteria + - archaea + - sample-scope +--- + +# checkm_lineagewf + +**Tags:** quality-control completeness contamination marker-genes lineage bacteria archaea sample-scope + +Assess genome quality using lineage-specific marker sets. + +Uses [CheckM](https://github.com/Ecogenomics/CheckM) to estimate the completeness and +contamination of genome assemblies. It places the genome into a reference tree to select +an appropriate set of single-copy marker genes, then calculates quality metrics based on +the recovery of these markers. + +:::note[Database Required] +Requires the CheckM reference database (~275GB uncompressed) to be configured via the +`CHECKM_DATA_PATH` environment variable or pre-installed in the container. +::: + +## Inputs + +``` +record ( + meta: Record, + fna: Path +) +``` + +| Field | Type | Description | +|-------|------|-------------| +| `meta` | `Record` | Groovy Record containing sample information | +| `fna` | `Path` | Assembled contigs in FASTA format | + +## Outputs + +``` +record ( + meta: Record, + tsv: Path, + results: Set, + logs: Set, + nf_logs: Set, + versions: Set +) +``` + +| Field | Type | Description | +|-------|------|-------------| +| `meta` | `Record` | Sample information record | +| `tsv` | `Path` | Tab-delimited genome quality report with completeness and contamination estimates | +| `results` | `Set` | All output files to be published | +| `logs` | `Set` | Optional program specific log files | +| `nf_logs` | `Set` | Nextflow-specific log files (e.g. .command.{begin|err|log|out|run|sh|trace}) | +| `versions` | `Set` | A YAML formatted file with program versions | + +## Parameters + +### CheckM Parameters + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--checkm_unique` | integer | `10` | Minimum number of unique phylogenetic markers required to use lineage-specific marker set. | +| `--checkm_multi` | integer | `10` | Maximum number of multi-copy phylogenetic markers before defaulting to domain-level marker set. | +| `--checkm_aai_strain` | number | `0.9` | AAI threshold used to identify strain heterogeneity | +| `--checkm_length` | number | `0.7` | Percent overlap between target and query | + +## Used By + +### Subworkflows + +- [checkm](/developers/subworkflows/checkm) - Assess metagenome bin completeness using CheckM. + +### Workflows + +- [checkm](/bactopia-tools/checkm) - Assessment of microbial genome assembly quality. + +## Citations + +If you use this in your analysis, please cite the following. + +- [Bactopia](https://bactopia.github.io/) + Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) + +- [CheckM](https://github.com/Ecogenomics/CheckM) + Parks DH, Imelfort M, Skennerton CT, Hugenholtz P, Tyson GW [CheckM: assessing the quality of microbial genomes recovered from isolates, single cells, and metagenomes.](http://dx.doi.org/10.1101/gr.186072.114) _Genome Res_ 25, 1043-1055 (2015) + +- [pplacer](https://github.com/matsen/pplacer) + Matsen FA, Kodner RB, Armbrust EV [pplacer: linear time maximum-likelihood and Bayesian phylogenetic placement of sequences onto a fixed reference tree.](https://doi.org/10.1186/1471-2105-11-538) _BMC Bioinformatics_ 11, 538 (2010) + +## Source + +[View source on GitHub](https://github.com/bactopia/bactopia/tree/main/modules/checkm/lineagewf) + +## Version + +```yaml +CHECKM_LINEAGEWF: + - checkm-genome: 1.2.5 +``` diff --git a/developers/modules/clermontyping.mdx b/developers/modules/clermontyping.mdx new file mode 100644 index 00000000..9844f967 --- /dev/null +++ b/developers/modules/clermontyping.mdx @@ -0,0 +1,97 @@ +--- +title: clermontyping +description: "Determine the phylogroup of Escherichia coli isolates." +tags: + - bacteria + - escherichia-coli + - typing + - phylotyping + - pcr + - phylogroup + - sample-scope +--- + +# clermontyping + +**Tags:** bacteria escherichia-coli typing phylotyping pcr phylogroup sample-scope + +Determine the phylogroup of Escherichia coli isolates. + +Uses [ClermonTyping](https://github.com/A-BN/ClermonTyping) to perform in silico PCR +detection of specific marker genes (arpA, chuA, yjaA, TspE4.C2). This assigns the +isolate to one of the main *E. coli* phylogroups (A, B1, B2, C, D, E, F, G, or Cryptic). + +## Inputs + +``` +record ( + meta: Record, + fna: Path +) +``` + +| Field | Type | Description | +|-------|------|-------------| +| `meta` | `Record` | Groovy Record containing sample information | +| `fna` | `Path` | Assembled contigs in FASTA format | + +## Outputs + +``` +record ( + meta: Record, + tsv: Path, + results: Set, + logs: Set, + nf_logs: Set, + versions: Set +) +``` + +| Field | Type | Description | +|-------|------|-------------| +| `meta` | `Record` | Sample information record | +| `tsv` | `Path` | Tab-delimited E. coli phylogroup assignment with detected marker genes | +| `results` | `Set` | All output files to be published | +| `logs` | `Set` | Optional program specific log files | +| `nf_logs` | `Set` | Nextflow-specific log files (e.g. .command.{begin|err|log|out|run|sh|trace}) | +| `versions` | `Set` | A YAML formatted file with program versions | + +## Parameters + +### ClermonTyping Parameters + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--clermontyping_threshold` | integer | `0` | Do not use contigs under this size | + +## Used By + +### Subworkflows + +- [clermontyping](/developers/subworkflows/clermontyping) - Predict phylogroups of Escherichia coli from genome assemblies. + +### Workflows + +- [clermontyping](/bactopia-tools/clermontyping) - In silico phylotyping of Escherichia genus. + +## Citations + +If you use this in your analysis, please cite the following. + +- [Bactopia](https://bactopia.github.io/) + Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) + +- [ClermontTyping](https://github.com/happykhan/ClermonTyping) + Beghain J, Bridier-Nahmias A, Le Nagard H, Denamur E, Clermont O. [ClermonTyping: an easy-to-use and accurate in silico method for Escherichia genus strain phylotyping.](https://doi.org/10.1099/mgen.0.000192) Microbial Genomics, 4(7), e000192. (2018) + +## Source + +[View source on GitHub](https://github.com/bactopia/bactopia/tree/main/modules/clermontyping) + +## Version + +```yaml +CLERMONTYPING: + - clermontyping: 24.02 +``` diff --git a/developers/modules/clonalframeml.mdx b/developers/modules/clonalframeml.mdx new file mode 100644 index 00000000..33edf514 --- /dev/null +++ b/developers/modules/clonalframeml.mdx @@ -0,0 +1,116 @@ +--- +title: clonalframeml +description: "Inference of recombination in bacterial genomes." +tags: + - bacteria + - recombination + - phylogeny + - alignment + - msa + - evolution + - run-scope +--- + +# clonalframeml + +**Tags:** bacteria recombination phylogeny alignment msa evolution run-scope + +Inference of recombination in bacterial genomes. + +Uses [ClonalFrameML](https://github.com/xavierdidelot/ClonalFrameML) to detect recombination +events in bacterial genomes. It corrects the phylogenetic tree for recombination and produces +a "masked" alignment where recombinant regions are removed, allowing for more accurate +phylogenetic inference. + +## Inputs + +``` +record ( + meta: Record, + aln: Path, + nwk: Path +) +``` + +| Field | Type | Description | +|-------|------|-------------| +| `meta` | `Record` | Groovy Record containing sample information | +| `aln` | `Path` | Multiple sequence alignment in FASTA format | +| `nwk` | `Path` | Initial phylogenetic tree in Newick format | + +## Outputs + +``` +record ( + meta: Record, + emsim: Path?, + em: Path, + status: Path, + nwk: Path, + fasta: Path, + pos_ref: Path, + masked_aln: Path, + results: Set, + logs: Set, + nf_logs: Set, + versions: Set +) +``` + +| Field | Type | Description | +|-------|------|-------------| +| `meta` | `Record` | Sample information record | +| `emsim` | `Path?` | Uncertainty estimation results (if requested) | +| `em` | `Path` | Final parameter estimates from the EM algorithm | +| `status` | `Path` | Tab-delimited list of predicted recombination events (importations) | +| `nwk` | `Path` | The input tree with internal nodes labelled | +| `fasta` | `Path` | Reconstructed ancestral sequences (*.fasta.gz) | +| `pos_ref` | `Path` | Position cross-reference table (*.txt.gz) | +| `masked_aln` | `Path` | The input alignment with recombinant regions masked (*.aln.gz) | +| `results` | `Set` | All output files to be published | +| `logs` | `Set` | Optional program specific log files | +| `nf_logs` | `Set` | Nextflow-specific log files (e.g. .command.{begin|err|log|out|run|sh|trace}) | +| `versions` | `Set` | A YAML formatted file with program versions | + +## Parameters + +### ClonalFrameML Parameters + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--clonalframeml_emsim` | integer | `100` | Number of simulations to estimate uncertainty in the EM results | +| `--skip_recombination` | boolean | `false` | Skip ClonalFrameML execution in subworkflows | + +## Used By + +### Subworkflows + +- [clonalframeml](/developers/subworkflows/clonalframeml) - Detect and mask recombination events in bacterial phylogenies. + +### Workflows + +- [pangenome](/bactopia-tools/pangenome) - Pangenome analysis with optional core-genome phylogeny. + +## Citations + +If you use this in your analysis, please cite the following. + +- [Bactopia](https://bactopia.github.io/) + Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) + +- [ClonalFramML](https://github.com/xavierdidelot/ClonalFrameML) + Didelot X, Wilson DJ [ClonalFrameML: Efficient Inference of Recombination in Whole Bacterial Genomes.](https://doi.org/10.1371/journal.pcbi.1004041) _PLoS Comput Biol_ 11(2) e1004041 (2015) + +- [maskrc-svg](https://github.com/kwongj/maskrc-svg) + Kwong J [maskrc-svg - Masks recombination as detected by ClonalFrameML or Gubbins and draws an SVG.](https://github.com/kwongj/maskrc-svg) (GitHub) + +## Source + +[View source on GitHub](https://github.com/bactopia/bactopia/tree/main/modules/clonalframeml) + +## Version + +```yaml +CLONALFRAMEML: + - clonalframeml: 1.12 +``` diff --git a/developers/modules/csvtk_concat.mdx b/developers/modules/csvtk_concat.mdx new file mode 100644 index 00000000..90122960 --- /dev/null +++ b/developers/modules/csvtk_concat.mdx @@ -0,0 +1,216 @@ +--- +title: csvtk_concat +description: "Concatenate multiple CSV or TSV files into a single table." +tags: + - utility + - table + - merge + - concat + - csv + - tsv + - csvtk + - run-scope +--- + +# csvtk_concat + +**Tags:** utility table merge concat csv tsv csvtk run-scope + +Concatenate multiple CSV or TSV files into a single table. + +Uses [csvtk concat](https://github.com/shenwei356/csvtk) to merge a list of delimited files +by row. It handles header processing (keeping only one header) and supports format conversion +(e.g., merging CSVs but outputting a TSV). + +## Inputs + +``` +record ( + meta: Record, + csv: Set +) +``` + +| Field | Type | Description | +|-------|------|-------------| +| `meta` | `Record` | Groovy Record containing sample information | +| `csv` | `Set` | A list of CSV/TSV files to be concatenated | + +``` +in_format: String +out_format: String +``` + +| Name | Type | Description | +|------|------|-------------| +| `in_format` | `String` | Input format string ('csv', 'tsv', or a specific delimiter character) | +| `out_format` | `String` | Output format string ('csv', 'tsv', or a specific delimiter character) | + +## Outputs + +``` +record ( + meta: Record, + csv: Path, + results: Set, + logs: Set, + nf_logs: Set, + versions: Set +) +``` + +| Field | Type | Description | +|-------|------|-------------| +| `meta` | `Record` | Sample information record | +| `csv` | `Path` | Concatenated results from all samples in the specified output format | +| `results` | `Set` | All output files to be published | +| `logs` | `Set` | Optional program specific log files | +| `nf_logs` | `Set` | Nextflow-specific log files (e.g. .command.{begin|err|log|out|run|sh|trace}) | +| `versions` | `Set` | A YAML formatted file with program versions | + +## Parameters + +## Used By + +### Subworkflows + +- [abritamr](/developers/subworkflows/abritamr) - Identify antimicrobial resistance genes using AMRFinderPlus. +- [agrvate](/developers/subworkflows/agrvate) - Identify Staphylococcus aureus agr locus type and operon variants. +- [amrfinderplus](/developers/subworkflows/amrfinderplus) - Find antimicrobial resistance genes and point mutations. +- [ariba](/developers/subworkflows/ariba) - Rapidly identify genes by creating local assemblies from paired-end reads. +- [bactopia_assembler](/developers/subworkflows/bactopia_assembler) - Assemble bacterial genomes using automated assembler selection. +- [bactopia_gather](/developers/subworkflows/bactopia_gather) - Search, validate, gather, and standardize input samples. +- [blastn](/developers/subworkflows/blastn) - Search a nucleotide database using nucleotide query sequences. +- [blastp](/developers/subworkflows/blastp) - Search protein sequences against protein database. +- [blastx](/developers/subworkflows/blastx) - Translate nucleotide sequences and search protein database. +- [bracken](/developers/subworkflows/bracken) - Estimate species abundance from metagenomic reads. +- [btyper3](/developers/subworkflows/btyper3) - In silico taxonomic classification of Bacillus cereus group genomes. +- [busco](/developers/subworkflows/busco) - Assess genome assembly completeness using BUSCO. +- [checkm](/developers/subworkflows/checkm) - Assess metagenome bin completeness using CheckM. +- [checkm2](/developers/subworkflows/checkm2) - Assess metagenome bin completeness using CheckM2. +- [clermontyping](/developers/subworkflows/clermontyping) - Predict phylogroups of Escherichia coli from genome assemblies. +- [defensefinder](/developers/subworkflows/defensefinder) - Systematically search for anti-phage defense systems. +- [ectyper](/developers/subworkflows/ectyper) - In silico prediction of Escherichia coli serotype. +- [emmtyper](/developers/subworkflows/emmtyper) - Predict emm types of Streptococcus pyogenes from genome assemblies. +- [fastani](/developers/subworkflows/fastani) - Calculate Average Nucleotide Identity (ANI) between genomes. +- [gamma](/developers/subworkflows/gamma) - Gene Allele Mutation Microbial Assessment. +- [genotyphi](/developers/subworkflows/genotyphi) - Assign genotypes to Salmonella Typhi genomes. +- [gigatyper](/developers/subworkflows/gigatyper) - Run all available MLST schemes for a species against an assembly +- [gtdb](/developers/subworkflows/gtdb) - Taxonomic classification with the Genome Taxonomy Database. +- [hicap](/developers/subworkflows/hicap) - In silico serotyping of the Haemophilus influenzae capsule locus. +- [hpsuissero](/developers/subworkflows/hpsuissero) - Rapid Haemophilus parasuis serotyping. +- [kleborate](/developers/subworkflows/kleborate) - Genotyping tool for Klebsiella pneumoniae and its related species complex. +- [legsta](/developers/subworkflows/legsta) - In silico Legionella pneumophila Sequence Based Typing. +- [lissero](/developers/subworkflows/lissero) - In silico serotype prediction for Listeria monocytogenes. +- [mashdist](/developers/subworkflows/mashdist) - Calculate Mash distances between sequences and a reference. +- [mcroni](/developers/subworkflows/mcroni) - Scripts for finding and processing promoter variants upstream of mcr-1. +- [meningotype](/developers/subworkflows/meningotype) - Predict serotypes of Neisseria meningitidis from genome assemblies. +- [midas](/developers/subworkflows/midas) - Species-level profiling from metagenomic data. +- [mlst](/developers/subworkflows/mlst) - Determine multilocus sequence types (MLST) from bacterial assemblies. +- [mobsuite](/developers/subworkflows/mobsuite) - Reconstruct and type plasmids from bacterial genome assemblies. +- [mykrobe](/developers/subworkflows/mykrobe) - Predict antibiotic resistance from sequence reads. +- [ngmaster](/developers/subworkflows/ngmaster) - Perform multi-antigen sequence typing of Neisseria gonorrhoeae from genome assemblies. +- [pasty](/developers/subworkflows/pasty) - Predict serogroups of Pseudomonas aeruginosa from assemblies. +- [pbptyper](/developers/subworkflows/pbptyper) - Predict penicillin binding protein (PBP) types of Streptococcus pneumoniae from genome assemblies. +- [phispy](/developers/subworkflows/phispy) - Prediction of prophages from bacterial genomes. +- [plasmidfinder](/developers/subworkflows/plasmidfinder) - Identify plasmid replicons in bacterial genome assemblies. +- [quast](/developers/subworkflows/quast) - Evaluate assembly quality using QUAST. +- [rgi](/developers/subworkflows/rgi) - Predict antimicrobial resistance from protein or nucleotide data. +- [sccmec](/developers/subworkflows/sccmec) - Identify SCCmec elements in Staphylococcus aureus genomes. +- [scrubber](/developers/subworkflows/scrubber) - Remove contaminant sequences from metagenomic data. +- [seqsero2](/developers/subworkflows/seqsero2) - Predict Salmonella serotypes from genome assemblies. +- [seroba](/developers/subworkflows/seroba) - k-mer based pipeline to identify the serotype of Streptococcus pneumoniae. +- [shigapass](/developers/subworkflows/shigapass) - Predict serotypes of Shigella from assemblies. +- [shigatyper](/developers/subworkflows/shigatyper) - Predict serotypes of Shigella from reads or assemblies. +- [shigeifinder](/developers/subworkflows/shigeifinder) - Predict serotypes of Shigella and EIEC from assemblies. +- [sistr](/developers/subworkflows/sistr) - Salmonella In Silico Typing Resource command-line tool. +- [spatyper](/developers/subworkflows/spatyper) - Predict spa types of Staphylococcus aureus from genome assemblies. +- [ssuissero](/developers/subworkflows/ssuissero) - Predict serotypes of Streptococcus suis from genome assemblies. +- [staphopiasccmec](/developers/subworkflows/staphopiasccmec) - Identify SCCmec elements in Staphylococcus aureus genomes using Staphopia method. +- [stecfinder](/developers/subworkflows/stecfinder) - Identify and serotype Shiga toxin-producing E. coli (STEC) from assemblies. +- [sylph](/developers/subworkflows/sylph) - Profile microbial composition using Sylph. +- [tblastn](/developers/subworkflows/tblastn) - Search protein query sequences against nucleotide database. +- [tblastx](/developers/subworkflows/tblastx) - Translate nucleotide query sequences and search nucleotide database. +- [teton](/developers/subworkflows/teton) - Perform taxonomic classification and estimate bacterial genome sizes. + +### Workflows + +- [abritamr](/bactopia-tools/abritamr) - A NATA accredited tool for reporting the presence of antimicrobial resistance genes. +- [agrvate](/bactopia-tools/agrvate) - Rapid identification of Staphylococcus aureus agr locus type and agr operon variants. +- [amrfinderplus](/bactopia-tools/amrfinderplus) - Bactopia Tool: Amrfinderplus. +- [ariba](/bactopia-tools/ariba) - Gene identification through local assemblies. +- [bactopia](/full-guide) - Comprehensive bacterial analysis pipeline for complete genomic characterization. +- [blastn](/bactopia-tools/blastn) - Search against nucleotide BLAST databases using nucleotide queries. +- [blastp](/bactopia-tools/blastp) - Search against protein BLAST databases using protein queries. +- [blastx](/bactopia-tools/blastx) - Search against protein BLAST databases using translated nucleotide queries. +- [bracken](/bactopia-tools/bracken) - Estimate taxonomic abundance of metagenomic samples. +- [btyper3](/bactopia-tools/btyper3) - Taxonomic classification of Bacillus cereus group isolates. +- [busco](/bactopia-tools/busco) - Assessment of genome assembly completeness using evolutionarily informed expectations. +- [checkm](/bactopia-tools/checkm) - Assessment of microbial genome assembly quality. +- [checkm2](/bactopia-tools/checkm2) - Machine learning-based assessment of microbial genome assembly quality. +- [cleanyerreads](/bactopia-pipelines/cleanyerreads) - Quality control and optional host read removal from raw sequencing reads. +- [clermontyping](/bactopia-tools/clermontyping) - In silico phylotyping of Escherichia genus. +- [defensefinder](/bactopia-tools/defensefinder) - Systematic identification of anti-phage defense systems. +- [ectyper](/bactopia-tools/ectyper) - In silico prediction of Escherichia coli serotype. +- [emmtyper](/bactopia-tools/emmtyper) - emm-typing of Streptococcus pyogenes assemblies. +- [fastani](/bactopia-tools/fastani) - Fast alignment-free computation of whole-genome Average Nucleotide Identity. +- [gamma](/bactopia-tools/gamma) - Identification, classification, and annotation of translated gene matches. +- [genotyphi](/bactopia-tools/genotyphi) - Salmonella Typhi genotyping with lineage assignment. +- [gigatyper](/bactopia-tools/gigatyper) - Run all available MLST schemes for a species against an assembly +- [gtdb](/bactopia-tools/gtdb) - Identify marker genes and assign taxonomic classifications using GTDB. +- [hicap](/bactopia-tools/hicap) - Identify cap locus serotype and structure in Haemophilus influenzae assemblies. +- [hpsuissero](/bactopia-tools/hpsuissero) - Serotype prediction of Haemophilus parasuis assemblies. +- [kleborate](/bactopia-tools/kleborate) - Comprehensive screening of Klebsiella genomes for virulence and resistance determinants. +- [legsta](/bactopia-tools/legsta) - Sequence Based Typing (SBT) of Legionella pneumophila. +- [lissero](/bactopia-tools/lissero) - Serogroup typing prediction for Listeria monocytogenes. +- [mashdist](/bactopia-tools/mashdist) - Calculate Mash distances between sequences and reference genomes. +- [mcroni](/bactopia-tools/mcroni) - Sequence variation analysis of mcr-1 genes (mobilized colistin resistance). +- [meningotype](/bactopia-tools/meningotype) - Comprehensive typing of Neisseria meningitidis. +- [midas](/bactopia-tools/midas) - Estimate species abundances from metagenomic samples. +- [mlst](/bactopia-tools/mlst) - Automatic Multi-Locus Sequence Type (MLST) calling from assembled contigs. +- [mobsuite](/bactopia-tools/mobsuite) - Reconstruction and annotation of plasmids from bacterial genome assemblies. +- [mykrobe](/bactopia-tools/mykrobe) - Antimicrobial resistance detection for specific bacterial species. +- [ngmaster](/bactopia-tools/ngmaster) - Multi-antigen sequence typing of Neisseria gonorrhoeae. +- [pasty](/bactopia-tools/pasty) - In silico serogrouping of Pseudomonas aeruginosa isolates. +- [pbptyper](/bactopia-tools/pbptyper) - Penicillin Binding Protein (PBP) typing for Streptococcus pneumoniae. +- [phispy](/bactopia-tools/phispy) - Prediction of prophages in bacterial and archaeal genomes. +- [plasmidfinder](/bactopia-tools/plasmidfinder) - Bactopia Tool: Plasmidfinder. +- [quast](/bactopia-tools/quast) - Quality assessment of assembled contigs using QUAST. +- [rgi](/bactopia-tools/rgi) - Prediction of antibiotic resistance genes using RGI. +- [sccmec](/bactopia-tools/sccmec) - Typing of SCCmec cassettes in Staphylococcus aureus assemblies. +- [scrubber](/bactopia-tools/scrubber) - Removal of human and contaminant sequences from metagenomic reads. +- [seqsero2](/bactopia-tools/seqsero2) - Salmonella serotype prediction from sequencing reads or assemblies. +- [seroba](/bactopia-tools/seroba) - Serotyping of Streptococcus pneumoniae from Illumina paired-end reads. +- [shigapass](/bactopia-tools/shigapass) - Prediction of Shigella serotypes and differentiation from EIEC. +- [shigatyper](/bactopia-tools/shigatyper) - Rapid determination of Shigella serotypes from sequencing reads. +- [shigeifinder](/bactopia-tools/shigeifinder) - In silico serotype prediction for Shigella and Enteroinvasive E. coli (EIEC). +- [sistr](/bactopia-tools/sistr) - Serovar prediction of Salmonella enterica from assemblies. +- [spatyper](/bactopia-tools/spatyper) - spa typing of Staphylococcus aureus assemblies. +- [ssuissero](/bactopia-tools/ssuissero) - Serotype prediction of Streptococcus suis assemblies. +- [staphopia](/bactopia-pipelines/staphopia) - Comprehensive analysis pipeline for Staphylococcus aureus isolates. +- [stecfinder](/bactopia-tools/stecfinder) - Serotype identification of Shiga toxin-producing E. coli. +- [sylph](/bactopia-tools/sylph) - Taxonomic profiling by abundance-corrected MinHash. +- [tblastn](/bactopia-tools/tblastn) - Search against translated nucleotide databases using protein queries. +- [tblastx](/bactopia-tools/tblastx) - Search against translated nucleotide databases using translated nucleotide queries. +- [teton](/bactopia-pipelines/teton) - Taxonomic classification and abundance profiling of metagenomic reads. + +## Citations + +If you use this in your analysis, please cite the following. + +- [Bactopia](https://bactopia.github.io/) + Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) + +- [csvtk](https://bioinf.shenwei.me/csvtk/) + Shen, W [csvtk: A cross-platform, efficient and practical CSV/TSV toolkit in Golang.](https://github.com/shenwei356/csvtk/) (GitHub) + +## Source + +[View source on GitHub](https://github.com/bactopia/bactopia/tree/main/modules/csvtk/concat) + +## Version + +```yaml +CSVTK_CONCAT: + - csvtk: 0.31.0 +``` diff --git a/developers/modules/csvtk_join.mdx b/developers/modules/csvtk_join.mdx new file mode 100644 index 00000000..50b699aa --- /dev/null +++ b/developers/modules/csvtk_join.mdx @@ -0,0 +1,107 @@ +--- +title: csvtk_join +description: "Join two CSV or TSV files based on common fields." +tags: + - utility + - table + - join + - merge + - csv + - tsv + - csvtk + - relational + - run-scope +--- + +# csvtk_join + +**Tags:** utility table join merge csv tsv csvtk relational run-scope + +Join two CSV or TSV files based on common fields. + +Uses [csvtk join](https://github.com/shenwei356/csvtk) to merge two tabular files horizontally +by matching values in a specified key column (similar to a SQL JOIN). It supports inner, left, +right, and outer joins via optional arguments. + +## Inputs + +``` +record ( + meta: Record, + csv1: Path, + csv2: Path +) +``` + +| Field | Type | Description | +|-------|------|-------------| +| `meta` | `Record` | Groovy Record containing sample information | +| `csv1` | `Path` | The first CSV/TSV file (Left table) | +| `csv2` | `Path` | The second CSV/TSV file (Right table) | + +``` +in_format: String +out_format: String +key: String +``` + +| Name | Type | Description | +|------|------|-------------| +| `in_format` | `String` | Input format string ('csv', 'tsv', or a specific delimiter character) | +| `out_format` | `String` | Output format string ('csv', 'tsv', or a specific delimiter character) | +| `key` | `String` | The column name(s) or index(es) to use as the join key (e.g., "sample_id" or "1") | + +## Outputs + +``` +record ( + meta: Record, + csv: Path, + results: Set, + logs: Set, + nf_logs: Set, + versions: Set +) +``` + +| Field | Type | Description | +|-------|------|-------------| +| `meta` | `Record` | Sample information record | +| `csv` | `Path` | The joined tabular file (*.csv or *.tsv) | +| `results` | `Set` | All output files to be published | +| `logs` | `Set` | Optional program specific log files | +| `nf_logs` | `Set` | Nextflow-specific log files (e.g. .command.{begin|err|log|out|run|sh|trace}) | +| `versions` | `Set` | A YAML formatted file with program versions | + +## Parameters + +## Used By + +### Subworkflows + +- [teton](/developers/subworkflows/teton) - Perform taxonomic classification and estimate bacterial genome sizes. + +### Workflows + +- [teton](/bactopia-pipelines/teton) - Taxonomic classification and abundance profiling of metagenomic reads. + +## Citations + +If you use this in your analysis, please cite the following. + +- [Bactopia](https://bactopia.github.io/) + Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) + +- [csvtk](https://bioinf.shenwei.me/csvtk/) + Shen, W [csvtk: A cross-platform, efficient and practical CSV/TSV toolkit in Golang.](https://github.com/shenwei356/csvtk/) (GitHub) + +## Source + +[View source on GitHub](https://github.com/bactopia/bactopia/tree/main/modules/csvtk/join) + +## Version + +```yaml +CSVTK_JOIN: + - csvtk: 0.31.0 +``` diff --git a/developers/modules/defensefinder_run.mdx b/developers/modules/defensefinder_run.mdx new file mode 100644 index 00000000..4979b444 --- /dev/null +++ b/developers/modules/defensefinder_run.mdx @@ -0,0 +1,122 @@ +--- +title: defensefinder_run +description: "Detect anti-phage defense systems using HMM profiles." +tags: + - bacteria + - defense-systems + - antiviral + - phage + - crispr + - restriction-modification + - hmm + - macsyfinder + - sample-scope +--- + +# defensefinder_run + +**Tags:** bacteria defense-systems antiviral phage crispr restriction-modification hmm macsyfinder sample-scope + +Detect anti-phage defense systems using HMM profiles. + +Uses [DefenseFinder](https://github.com/mdmparis/defense-finder) to systematically search +protein sequences for known antiviral defense systems (e.g., CRISPR-Cas, Restriction-Modification, +TA systems, CBASS) using MacSyFinder and a dedicated database of HMM models. + +:::note[Database Required] +Requires the DefenseFinder HMM database to be available. +::: + +## Inputs + +``` +record ( + meta: Record, + faa: Path +) +``` + +| Field | Type | Description | +|-------|------|-------------| +| `meta` | `Record` | Groovy Record containing sample information | +| `faa` | `Path` | Protein sequences in FASTA format (amino acids) | + +``` +db: Path +``` + +| Name | Type | Description | +|------|------|-------------| +| `db` | `Path` | Directory containing the DefenseFinder models database | + +## Outputs + +``` +record ( + meta: Record, + genes_tsv: Path, + hmmer_tsv: Path, + systems_tsv: Path, + proteins: Path?, + proteins_index: Path?, + macsydata_raw: Path?, + results: Set, + logs: Set, + nf_logs: Set, + versions: Set +) +``` + +| Field | Type | Description | +|-------|------|-------------| +| `meta` | `Record` | Sample information record | +| `genes_tsv` | `Path` | Tab-delimited list of detected defense genes | +| `hmmer_tsv` | `Path` | Tab-delimited list of HMMER hits used for detection | +| `systems_tsv` | `Path` | Tab-delimited summary of detected defense systems | +| `proteins` | `Path?` | Protein sequences of the detected defense genes | +| `proteins_index` | `Path?` | Index file for the protein sequences | +| `macsydata_raw` | `Path?` | Compressed tarball of raw MacSyFinder data | +| `results` | `Set` | All output files to be published | +| `logs` | `Set` | Optional program specific log files | +| `nf_logs` | `Set` | Nextflow-specific log files (e.g. .command.{begin|err|log|out|run|sh|trace}) | +| `versions` | `Set` | A YAML formatted file with program versions | + +## Parameters + +### defense-finder Parameters + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--defensefinder_coverage` | number | `0.4` | Minimal percentage of coverage for each profiles | +| `--defensefinder_dbtype` | string | `ordered_replicon` | The macsyfinder --db-type option (choices: `ordered_replicon`, `gembase`, `unordered`) | + +## Used By + +### Subworkflows + +- [defensefinder](/developers/subworkflows/defensefinder) - Systematically search for anti-phage defense systems. + +### Workflows + +- [defensefinder](/bactopia-tools/defensefinder) - Systematic identification of anti-phage defense systems. + +## Citations + +If you use this in your analysis, please cite the following. + +- [Bactopia](https://bactopia.github.io/) + Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) + +- [DefenseFinder](https://github.com/mdmparis/defense-finder) + Tesson F, Hervé A, Mordret E, Touchon M, d'Humières C, Cury J, Bernheim A [Systematic and quantitative view of the antiviral arsenal of prokaryotes.](https://doi.org/10.1038/s41467-022-30269-9) Nature Communications, 13(1), 2561. (2022) + +## Source + +[View source on GitHub](https://github.com/bactopia/bactopia/tree/main/modules/defensefinder/run) + +## Version + +```yaml +DEFENSEFINDER_RUN: + - defense-finder: 2.0.1 +``` diff --git a/developers/modules/defensefinder_update.mdx b/developers/modules/defensefinder_update.mdx new file mode 100644 index 00000000..f7632ac8 --- /dev/null +++ b/developers/modules/defensefinder_update.mdx @@ -0,0 +1,74 @@ +--- +title: defensefinder_update +description: "Download and package the DefenseFinder and CasFinder model databases." +tags: + - bacteria + - defense + - database + - download + - hmm + - casfinder + - crispr + - run-scope +--- + +# defensefinder_update + +**Tags:** bacteria defense database download hmm casfinder crispr run-scope + +Download and package the DefenseFinder and CasFinder model databases. + +Fetches the latest HMM profiles from the [DefenseFinder](https://github.com/mdmparis/defense-finder-models) +and [CasFinder](https://github.com/macsy-models/CasFinder) repositories, then packages them +into a single tarball for use by the DefenseFinder module. + +:::note[Internet Required] +This process requires an active internet connection to fetch the models from GitHub. +::: + +## Outputs + +``` +record ( + db: Path, + logs: Set +) +``` + +| Field | Type | Description | +|-------|------|-------------| +| `db` | `Path` | A compressed tarball containing both DefenseFinder and CasFinder models | +| `logs` | `Set` | Optional program specific log files | + +## Parameters + +## Used By + +### Subworkflows + +- [defensefinder](/developers/subworkflows/defensefinder) - Systematically search for anti-phage defense systems. + +### Workflows + +- [defensefinder](/bactopia-tools/defensefinder) - Systematic identification of anti-phage defense systems. + +## Citations + +If you use this in your analysis, please cite the following. + +- [Bactopia](https://bactopia.github.io/) + Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) + +- [DefenseFinder](https://github.com/mdmparis/defense-finder) + Tesson F, Hervé A, Mordret E, Touchon M, d'Humières C, Cury J, Bernheim A [Systematic and quantitative view of the antiviral arsenal of prokaryotes.](https://doi.org/10.1038/s41467-022-30269-9) Nature Communications, 13(1), 2561. (2022) + +## Source + +[View source on GitHub](https://github.com/bactopia/bactopia/tree/main/modules/defensefinder/update) + +## Version + +```yaml +DEFENSEFINDER_UPDATE: + - defense-finder: 2.0.1 +``` diff --git a/developers/modules/ectyper.mdx b/developers/modules/ectyper.mdx new file mode 100644 index 00000000..c372e3cb --- /dev/null +++ b/developers/modules/ectyper.mdx @@ -0,0 +1,102 @@ +--- +title: ectyper +description: "Predict *Escherichia coli* serotype (O and H antigens)." +tags: + - bacteria + - escherichia-coli + - serotype + - o-antigen + - h-antigen + - typing + - sample-scope +--- + +# ectyper + +**Tags:** bacteria escherichia-coli serotype o-antigen h-antigen typing sample-scope + +Predict *Escherichia coli* serotype (O and H antigens). + +Uses [ECTyper](https://github.com/phac-nml/ectyper) to identify the O-antigen (lipopolysaccharide) +and H-antigen (flagella) genes in *E. coli* genome assemblies via BLAST. It provides a +standardized serotype prediction (e.g., O157:H7). + +## Inputs + +``` +record ( + meta: Record, + fna: Path +) +``` + +| Field | Type | Description | +|-------|------|-------------| +| `meta` | `Record` | Groovy Record containing sample information | +| `fna` | `Path` | Assembled contigs in FASTA format | + +## Outputs + +``` +record ( + meta: Record, + tsv: Path, + txt: Path, + results: Set, + logs: Set, + nf_logs: Set, + versions: Set +) +``` + +| Field | Type | Description | +|-------|------|-------------| +| `meta` | `Record` | Sample information record | +| `tsv` | `Path` | Tab-delimited E. coli serotype predictions (O and H antigens) | +| `txt` | `Path` | BLAST allele details for serotype determination | +| `results` | `Set` | All output files to be published | +| `logs` | `Set` | Optional program specific log files | +| `nf_logs` | `Set` | Nextflow-specific log files (e.g. .command.{begin|err|log|out|run|sh|trace}) | +| `versions` | `Set` | A YAML formatted file with program versions | + +## Parameters + +### ECTyper Parameters + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--ectyper_opid` | integer | `90` | Percent identity required for an O antigen allele match | +| `--ectyper_opcov` | integer | `90` | Minimum percent coverage required for an O antigen allele match | +| `--ectyper_hpid` | integer | `95` | Percent identity required for an H antigen allele match | +| `--ectyper_hpcov` | integer | `50` | Minimum percent coverage required for an H antigen allele match | + +## Used By + +### Subworkflows + +- [ectyper](/developers/subworkflows/ectyper) - In silico prediction of Escherichia coli serotype. + +### Workflows + +- [ectyper](/bactopia-tools/ectyper) - In silico prediction of Escherichia coli serotype. + +## Citations + +If you use this in your analysis, please cite the following. + +- [Bactopia](https://bactopia.github.io/) + Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) + +- [ECTyper](https://github.com/phac-nml/ecoli_serotyping) + Laing C, Bessonov K, Sung S, La Rose C [ECTyper - In silico prediction of _Escherichia coli_ serotype](https://github.com/phac-nml/ecoli_serotyping) (GitHub) + +## Source + +[View source on GitHub](https://github.com/bactopia/bactopia/tree/main/modules/ectyper) + +## Version + +```yaml +ECTYPER: + - ectyper: 2.0.0 +``` diff --git a/developers/modules/eggnog_download.mdx b/developers/modules/eggnog_download.mdx new file mode 100644 index 00000000..6c1ce6e1 --- /dev/null +++ b/developers/modules/eggnog_download.mdx @@ -0,0 +1,89 @@ +--- +title: eggnog_download +description: "Download the eggNOG database for functional annotation." +tags: + - eggnog + - database + - download + - annotation + - functional + - orthology + - sample-scope +--- + +# eggnog_download + +**Tags:** eggnog database download annotation functional orthology sample-scope + +Download the eggNOG database for functional annotation. + +Fetches the pre-computed orthology data and Diamond database required by +[eggNOG-mapper](https://github.com/eggnogdb/eggnog-mapper). This includes the massive +protein database and taxonomic information needed for accurate ortholog assignment. + +:::note[Internet & Storage Required] +This process requires an active internet connection and significant disk space (often >50GB) +to store the uncompressed database files. +::: + +## Outputs + +``` +record ( + db: Path?, + db_tarball: Path?, + logs: Set +) +``` + +| Field | Type | Description | +|-------|------|-------------| +| `db` | `Path?` | The eggNOG database directory (Diamond database and taxonomy info) | +| `db_tarball` | `Path?` | A compressed tarball of the database (if requested via parameters) | +| `logs` | `Set` | Optional program specific log files | + +## Parameters + +### eggNOG Downloader Parameters + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--eggnog_db` | string | | Tarball or path to eggNOG databases | +| `--download_eggnog` | boolean | `false` | Required if downloading latest eggNOG database, will overwrite existing databases. | +| `--eggnog_save_as_tarball` | string | | Save the eggNOG database as a single tarball | +| `--eggnog_skip_diamond` | boolean | `false` | Do not install the diamond database | +| `--eggnog_install_mmseq` | boolean | `false` | Install the MMseqs2 database | +| `--eggnog_install_pfam` | boolean | `false` | Install the Pfam database, required for de novo annotation or realignment | +| `--eggnog_install_hmm` | boolean | `false` | Install the HMMER database specified with --hmmer_taxid | +| `--eggnog_hmmer_taxid` | integer | `2` | Tax ID of eggNOG HMM database to download | + +## Used By + +### Subworkflows + +- [eggnog](/developers/subworkflows/eggnog) - Functional annotation through orthology assignment. + +### Workflows + +- [eggnog](/bactopia-tools/eggnog) - Functional annotation of proteins using orthologous groups and phylogenies. + +## Citations + +If you use this in your analysis, please cite the following. + +- [Bactopia](https://bactopia.github.io/) + Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) + +- [eggNOG-mapper](https://github.com/eggnogdb/eggnog-mapper) + Huerta-Cepas J, Forslund K, Coelho LP, Szklarczyk D, Jensen LJ, von Mering C, Bork P [Fast Genome-Wide Functional Annotation through Orthology Assignment by eggNOG-Mapper.](http://dx.doi.org/10.1093/molbev/msx148) _Mol. Biol. Evol._ 34, 2115-2122 (2017) + +## Source + +[View source on GitHub](https://github.com/bactopia/bactopia/tree/main/modules/eggnog/download) + +## Version + +```yaml +EGGNOG_DOWNLOAD: + - eggnog-mapper: 2.1.13 +``` diff --git a/developers/modules/eggnog_mapper.mdx b/developers/modules/eggnog_mapper.mdx new file mode 100644 index 00000000..ddb63e5a --- /dev/null +++ b/developers/modules/eggnog_mapper.mdx @@ -0,0 +1,130 @@ +--- +title: eggnog_mapper +description: "Functional annotation of proteins using eggNOG orthology data." +tags: + - functional-annotation + - orthology + - cog + - kegg + - go + - proteins + - eggnog + - sample-scope +--- + +# eggnog_mapper + +**Tags:** functional-annotation orthology cog kegg go proteins eggnog sample-scope + +Functional annotation of proteins using eggNOG orthology data. + +Uses [eggNOG-mapper](https://github.com/eggnogdb/eggnog-mapper) to assign functional annotations +to protein sequences. It uses precomputed orthologous groups (OGs) to infer functions like +COG categories, KEGG pathways, GO terms, and CAZymes with high precision. + +:::note[Database Required] +Requires the eggNOG database (including the diamond database and taxonomic data) to be available. +::: + +## Inputs + +``` +record ( + meta: Record, + faa: Path +) +``` + +| Field | Type | Description | +|-------|------|-------------| +| `meta` | `Record` | Groovy Record containing sample information | +| `faa` | `Path` | Protein sequences in FASTA format (amino acids) | + +``` +db: Path +``` + +| Name | Type | Description | +|------|------|-------------| +| `db` | `Path` | Directory or compressed tarball containing the eggNOG database | + +## Outputs + +``` +record ( + meta: Record, + hits: Path, + seed_orthologs: Path, + annotations: Path, + xlsx: Path?, + orthologs: Path?, + genepred: Path?, + gff: Path?, + no_anno: Path?, + pfam: Path?, + results: Set, + logs: Set, + nf_logs: Set, + versions: Set +) +``` + +| Field | Type | Description | +|-------|------|-------------| +| `meta` | `Record` | Sample information record | +| `hits` | `Path` | Raw search hits (Diamond/MMseqs2) against the eggNOG database | +| `seed_orthologs` | `Path` | List of identified seed orthologs used for annotation transfer | +| `annotations` | `Path` | Main tab-delimited annotation file (COGs, KEGG, GO, etc.) | +| `xlsx` | `Path?` | Excel format of the annotations file | +| `orthologs` | `Path?` | List of fine-grained orthologs | +| `genepred` | `Path?` | Predicted gene sequences | +| `gff` | `Path?` | Annotations in GFF format | +| `no_anno` | `Path?` | FASTA file of sequences that failed to be annotated | +| `pfam` | `Path?` | Raw PFAM domain hits | +| `results` | `Set` | All output files to be published | +| `logs` | `Set` | Optional program specific log files | +| `nf_logs` | `Set` | Nextflow-specific log files (e.g. .command.{begin|err|log|out|run|sh|trace}) | +| `versions` | `Set` | A YAML formatted file with program versions | + +## Parameters + +### eggNOG Mapper Parameters + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--eggnog_genepred` | string | `search` | Method to use for gene prediction (choices: `search`, `prodigal`) | +| `--eggnog_mode` | string | `diamond` | Method to search against eggNOG sequences (choices: `diamond`, `hmmer`, `mmseqs`, `cache`, `no_search`) | + +## Used By + +### Subworkflows + +- [eggnog](/developers/subworkflows/eggnog) - Functional annotation through orthology assignment. + +### Workflows + +- [eggnog](/bactopia-tools/eggnog) - Functional annotation of proteins using orthologous groups and phylogenies. + +## Citations + +If you use this in your analysis, please cite the following. + +- [Bactopia](https://bactopia.github.io/) + Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) + +- [eggNOG-mapper](https://github.com/eggnogdb/eggnog-mapper) + Huerta-Cepas J, Forslund K, Coelho LP, Szklarczyk D, Jensen LJ, von Mering C, Bork P [Fast Genome-Wide Functional Annotation through Orthology Assignment by eggNOG-Mapper.](http://dx.doi.org/10.1093/molbev/msx148) _Mol. Biol. Evol._ 34, 2115-2122 (2017) + +- [DIAMOND](https://github.com/bbuchfink/diamond) + Buchfink B, Xie C, Huson DH [Fast and sensitive protein alignment using DIAMOND.](http://dx.doi.org/10.1038/nmeth.3176) _Nat. Methods._ 12, 59-60 (2015) + +## Source + +[View source on GitHub](https://github.com/bactopia/bactopia/tree/main/modules/eggnog/mapper) + +## Version + +```yaml +EGGNOG_MAPPER: + - eggnog-mapper: 2.1.13 +``` diff --git a/developers/modules/emmtyper.mdx b/developers/modules/emmtyper.mdx new file mode 100644 index 00000000..524b1f6c --- /dev/null +++ b/developers/modules/emmtyper.mdx @@ -0,0 +1,109 @@ +--- +title: emmtyper +description: "*emm*-typing of *Streptococcus pyogenes* (Group A Strep) assemblies." +tags: + - bacteria + - streptococcus-pyogenes + - gas + - typing + - emm + - virulence + - m-protein + - sample-scope +--- + +# emmtyper + +**Tags:** bacteria streptococcus-pyogenes gas typing emm virulence m-protein sample-scope + +*emm*-typing of *Streptococcus pyogenes* (Group A Strep) assemblies. + +Uses [emmtyper](https://github.com/MDU-PHL/emmtyper) to assign *emm* types to +*S. pyogenes* genomes by blasting the assembly against a database of specific +M protein gene (*emm*) subtypes. + +## Inputs + +``` +record ( + meta: Record, + fna: Path +) +``` + +| Field | Type | Description | +|-------|------|-------------| +| `meta` | `Record` | Groovy Record containing sample information | +| `fna` | `Path` | Assembled contigs in FASTA format | + +``` +blastdb: Path? +``` + +| Name | Type | Description | +|------|------|-------------| +| `blastdb` | `Path?` | Path to a custom *emm* cluster BLAST database | + +## Outputs + +``` +record ( + meta: Record, + tsv: Path, + results: Set, + logs: Set, + nf_logs: Set, + versions: Set +) +``` + +| Field | Type | Description | +|-------|------|-------------| +| `meta` | `Record` | Sample information record | +| `tsv` | `Path` | Tab-delimited summary of the assigned emm type and cluster | +| `results` | `Set` | All output files to be published | +| `logs` | `Set` | Optional program specific log files | +| `nf_logs` | `Set` | Nextflow-specific log files (e.g. .command.{begin|err|log|out|run|sh|trace}) | +| `versions` | `Set` | A YAML formatted file with program versions | + +## Parameters + +### emmtyper Parameters + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--emmtyper_wf` | string | `blast` | Workflow for emmtyper to use. (choices: `blast`, `pcr`) | +| `--emmtyper_blastdb` | string | | Path to custom EMM BLAST DB. | +| `--emmtyper_cluster_distance` | integer | `500` | Distance between cluster of matches to consider as different clusters | +| `--emmtyper_percid` | integer | `95` | Minimal percent identity of sequence | + +## Used By + +### Subworkflows + +- [emmtyper](/developers/subworkflows/emmtyper) - Predict emm types of Streptococcus pyogenes from genome assemblies. + +### Workflows + +- [emmtyper](/bactopia-tools/emmtyper) - emm-typing of Streptococcus pyogenes assemblies. + +## Citations + +If you use this in your analysis, please cite the following. + +- [Bactopia](https://bactopia.github.io/) + Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) + +- [emmtyper](https://github.com/MDU-PHL/emmtyper) + Tan A, Seemann T, Lacey D, Davies M, Mcintyre L, Frost H, Williamson D, Gonçalves da Silva A [emmtyper - emm Automatic Isolate Labeller](https://github.com/MDU-PHL/emmtyper) (GitHub) + +## Source + +[View source on GitHub](https://github.com/bactopia/bactopia/tree/main/modules/emmtyper) + +## Version + +```yaml +EMMTYPER: + - emmtyper: 0.2.0 +``` diff --git a/developers/modules/fastani.mdx b/developers/modules/fastani.mdx new file mode 100644 index 00000000..df31f892 --- /dev/null +++ b/developers/modules/fastani.mdx @@ -0,0 +1,103 @@ +--- +title: fastani +description: "Compute whole-genome Average Nucleotide Identity (ANI)." +tags: + - fastani + - ani + - average-nucleotide-identity + - taxonomy + - genomic-distance + - comparison + - run-scope +--- + +# fastani + +**Tags:** fastani ani average-nucleotide-identity taxonomy genomic-distance comparison run-scope + +Compute whole-genome Average Nucleotide Identity (ANI). + +Uses [FastANI](https://github.com/ParBLiSS/FastANI) to perform alignment-free computation +of ANI between the input query genomes and a reference genome. This is the standard method +for species definition (typically >95% ANI) and is much faster than traditional BLAST-based approaches. + +## Inputs + +``` +record ( + meta: Record, + query: Set, + reference: Path +) +``` + +| Field | Type | Description | +|-------|------|-------------| +| `meta` | `Record` | Groovy Record containing sample information | +| `query` | `Set` | One or more assembled contigs in FASTA format (Query genomes) | +| `reference` | `Path` | The reference genome assembly in FASTA format to compare against | + +## Outputs + +``` +record ( + meta: Record, + tsv: Path, + results: Set, + logs: Set, + nf_logs: Set, + versions: Set +) +``` + +| Field | Type | Description | +|-------|------|-------------| +| `meta` | `Record` | Sample information record | +| `tsv` | `Path` | Tab-delimited summary of ANI scores, matched fragments, and total fragments | +| `results` | `Set` | All output files to be published | +| `logs` | `Set` | Optional program specific log files | +| `nf_logs` | `Set` | Nextflow-specific log files (e.g. .command.{begin|err|log|out|run|sh|trace}) | +| `versions` | `Set` | A YAML formatted file with program versions | + +## Parameters + +### fastANI Parameters + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--fastani_reference` | string | | Path to reference genome in FASTA format | +| `--fastani_kmer` | integer | `16` | kmer size (<= 16) for ANI calculation | +| `--fastani_min_fraction` | number | `0.2` | Minimum fraction of genome that must be shared for trusting ANI. | +| `--fastani_frag_len` | integer | `3000` | fragment length | +| `--fastani_skip_pairwise` | boolean | `false` | Only use RefSeq or local assemblies for ANI calculations | + +## Used By + +### Subworkflows + +- [fastani](/developers/subworkflows/fastani) - Calculate Average Nucleotide Identity (ANI) between genomes. + +### Workflows + +- [fastani](/bactopia-tools/fastani) - Fast alignment-free computation of whole-genome Average Nucleotide Identity. + +## Citations + +If you use this in your analysis, please cite the following. + +- [Bactopia](https://bactopia.github.io/) + Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) + +- [FastANI](https://github.com/ParBLiSS/FastANI) + Jain C, Rodriguez-R LM, Phillippy AM, Konstantinidis KT, Aluru S [High throughput ANI analysis of 90K prokaryotic genomes reveals clear species boundaries.](http://dx.doi.org/10.1038/s41467-018-07641-9) _Nat. Commun._ 9, 5114 (2018) + +## Source + +[View source on GitHub](https://github.com/bactopia/bactopia/tree/main/modules/fastani) + +## Version + +```yaml +FASTANI: + - fastani: 1.34 +``` diff --git a/developers/modules/gamma.mdx b/developers/modules/gamma.mdx new file mode 100644 index 00000000..0c61fe04 --- /dev/null +++ b/developers/modules/gamma.mdx @@ -0,0 +1,113 @@ +--- +title: gamma +description: "Identification, classification, and annotation of translated gene matches." +tags: + - gene-finding + - annotation + - homology + - alignment + - gamma + - psl + - sample-scope +--- + +# gamma + +**Tags:** gene-finding annotation homology alignment gamma psl sample-scope + +Identification, classification, and annotation of translated gene matches. + +Uses [GAMMA](https://github.com/rastanton/GAMMA) (Gene Allele Mutation Microbial Assessment) +to identify and annotate coding sequences in an assembly that match a specific gene database. +It is particularly useful for detecting specific targets like antimicrobial resistance genes +or virulence factors while accounting for potential mutations. + +## Inputs + +``` +record ( + meta: Record, + fna: Path +) +``` + +| Field | Type | Description | +|-------|------|-------------| +| `meta` | `Record` | Groovy Record containing sample information | +| `fna` | `Path` | Assembled contigs in FASTA format | + +``` +db: Path +``` + +| Name | Type | Description | +|------|------|-------------| +| `db` | `Path` | The reference gene database in FASTA format | + +## Outputs + +``` +record ( + meta: Record, + gamma: Path, + psl: Path, + gff: Path?, + fasta: Path?, + results: Set, + logs: Set, + nf_logs: Set, + versions: Set +) +``` + +| Field | Type | Description | +|-------|------|-------------| +| `meta` | `Record` | Sample information record | +| `gamma` | `Path` | Main GAMMA output file containing annotated gene matches | +| `psl` | `Path` | Raw alignment details in PSL format | +| `gff` | `Path?` | Gene matches in GFF3 format | +| `fasta` | `Path?` | Extracted nucleotide sequences of the matched genes | +| `results` | `Set` | All output files to be published | +| `logs` | `Set` | Optional program specific log files | +| `nf_logs` | `Set` | Nextflow-specific log files (e.g. .command.{begin|err|log|out|run|sh|trace}) | +| `versions` | `Set` | A YAML formatted file with program versions | + +## Parameters + +### GAMMA Parameters + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--gamma_db` | string | | A gene database (FASTA) for GAMMA | +| `--gamma_percent_identity` | integer | `90` | The minimum nucleotide sequence identity % used by the Blat search | + +## Used By + +### Subworkflows + +- [gamma](/developers/subworkflows/gamma) - Gene Allele Mutation Microbial Assessment. + +### Workflows + +- [gamma](/bactopia-tools/gamma) - Identification, classification, and annotation of translated gene matches. + +## Citations + +If you use this in your analysis, please cite the following. + +- [Bactopia](https://bactopia.github.io/) + Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) + +- [GAMMA](https://github.com/rastanton/GAMMA) + Stanton RA, Vlachos N, Halpin AL [GAMMA: a tool for the rapid identification, classification, and annotation of translated gene matches from sequencing data.](https://doi.org/10.1093/bioinformatics/btab607) _Bioinformatics_ (2021) + +## Source + +[View source on GitHub](https://github.com/bactopia/bactopia/tree/main/modules/gamma) + +## Version + +```yaml +GAMMA: + - gamma: 2.2 +``` diff --git a/developers/modules/genotyphi_parse.mdx b/developers/modules/genotyphi_parse.mdx new file mode 100644 index 00000000..42353e14 --- /dev/null +++ b/developers/modules/genotyphi_parse.mdx @@ -0,0 +1,100 @@ +--- +title: genotyphi_parse +description: "Parse Mykrobe results to genotype *Salmonella* Typhi." +tags: + - bacteria + - salmonella + - typhi + - genotyping + - mykrobe + - parser + - sample-scope +--- + +# genotyphi_parse + +**Tags:** bacteria salmonella typhi genotyping mykrobe parser sample-scope + +Parse Mykrobe results to genotype *Salmonella* Typhi. + +Uses scripts from [GenoTyphi](https://github.com/katholt/genotyphi) to parse the JSON output +from Mykrobe. It assigns isolates to specific *S.* Typhi genotypes (e.g., 4.3.1) based on +the presence of specific SNPs defined in the GenoTyphi scheme. + +## Inputs + +``` +record ( + meta: Record, + json: Path +) +``` + +| Field | Type | Description | +|-------|------|-------------| +| `meta` | `Record` | Groovy Record containing sample information | +| `json` | `Path` | The JSON output file generated by Mykrobe | + +## Outputs + +``` +record ( + meta: Record, + tsv: Path, + results: Set, + logs: Set, + nf_logs: Set, + versions: Set +) +``` + +| Field | Type | Description | +|-------|------|-------------| +| `meta` | `Record` | Sample information record | +| `tsv` | `Path` | Tab-delimited report containing the assigned GenoTyphi genotype | +| `results` | `Set` | All output files to be published | +| `logs` | `Set` | Optional program specific log files | +| `nf_logs` | `Set` | Nextflow-specific log files (e.g. .command.{begin|err|log|out|run|sh|trace}) | +| `versions` | `Set` | A YAML formatted file with program versions | + +## Parameters + +### GenoTyphi Parameters + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--genotyphi_mykrobe_opts` | string | | Extra Mykrobe options in quotes | + +## Used By + +### Subworkflows + +- [genotyphi](/developers/subworkflows/genotyphi) - Assign genotypes to Salmonella Typhi genomes. + +### Workflows + +- [genotyphi](/bactopia-tools/genotyphi) - Salmonella Typhi genotyping with lineage assignment. + +## Citations + +If you use this in your analysis, please cite the following. + +- [Bactopia](https://bactopia.github.io/) + Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) + +- [GenoTyphi](https://github.com/katholt/genotyphi) + Wong VK, Baker S, Connor TR, Pickard D, Page AJ, Dave J, Murphy N, Holliman R, Sefton A, Millar M, Dyson ZA, Dougan G, Holt KE, & International Typhoid Consortium. [An extended genotyping framework for Salmonella enterica serovar Typhi, the cause of human typhoid](https://doi.org/10.1038/ncomms12827) _Nature Communications_ 7, 12827. (2016) + +- [McCortex](https://github.com/mcveanlab/mccortex) + Turner I, Garimella KV, Iqbal Z, McVean G [Integrating long-range connectivity information into de Bruijn graphs.](http://dx.doi.org/10.1093/bioinformatics/bty157) _Bioinformatics_ 34, 2556-2565 (2018) + +## Source + +[View source on GitHub](https://github.com/bactopia/bactopia/tree/main/modules/genotyphi/parse) + +## Version + +```yaml +GENOTYPHI_PARSE: + - mykrobe: 0.13.0 +``` diff --git a/developers/modules/gigatyper.mdx b/developers/modules/gigatyper.mdx new file mode 100644 index 00000000..20852d5c --- /dev/null +++ b/developers/modules/gigatyper.mdx @@ -0,0 +1,92 @@ +--- +title: gigatyper +description: "Run all available MLST schemes for a species against an assembly" +tags: + - mlst + - typing + - multi-scheme + - sample-scope +--- + +# gigatyper + +**Tags:** mlst typing multi-scheme sample-scope + +Run all available MLST schemes for a species against an assembly + +Uses [GigaTyper](https://github.com/rpetit3/gigatyper) to run all available mlst schemes for a species against an assembly. + +## Inputs + +``` +record ( + meta: Record, + fna: Path +) +``` + +| Field | Type | Description | +|-------|------|-------------| +| `meta` | `Record` | Groovy Record containing sample information | +| `fna` | `Path` | Assembled contigs in FASTA format | + +## Outputs + +``` +record ( + meta: Record, + tsv: Path, + results: Set, + logs: Set, + nf_logs: Set, + versions: Set +) +``` + +| Field | Type | Description | +|-------|------|-------------| +| `meta` | `Record` | Sample information record | +| `tsv` | `Path` | MLST results across all schemes | +| `results` | `Set` | All output files to be published | +| `logs` | `Set` | Optional program specific log files | +| `nf_logs` | `Set` | Nextflow-specific log files (e.g. .command.{begin|err|log|out|run|sh|trace}) | +| `versions` | `Set` | A YAML formatted file with program versions | + +## Parameters + +### GigaTyper Parameters + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--gigatyper_species` | string | | Force a specific species for scheme selection | + +## Used By + +### Subworkflows + +- [gigatyper](/developers/subworkflows/gigatyper) - Run all available MLST schemes for a species against an assembly + +### Workflows + +- [gigatyper](/bactopia-tools/gigatyper) - Run all available MLST schemes for a species against an assembly + +## Citations + +If you use this in your analysis, please cite the following. + +- [Bactopia](https://bactopia.github.io/) + Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) + +- [GigaTyper](https://github.com/rpetit3/gigatyper) + Petit III RA, Fearing T, Groves E [GigaTyper: Why choose one scheme when you can flex them all?](https://github.com/rpetit3/gigatyper) (GitHub) + +## Source + +[View source on GitHub](https://github.com/bactopia/bactopia/tree/main/modules/gigatyper) + +## Version + +```yaml +GIGATYPER: + - gigatyper: 1.0.0 +``` diff --git a/developers/modules/gtdbtk_classifywf.mdx b/developers/modules/gtdbtk_classifywf.mdx new file mode 100644 index 00000000..2c67702b --- /dev/null +++ b/developers/modules/gtdbtk_classifywf.mdx @@ -0,0 +1,117 @@ +--- +title: gtdbtk_classifywf +description: "Taxonomic classification of bacterial and archaeal genomes using GTDB-Tk." +tags: + - taxonomy + - classification + - phylogeny + - gtdb + - bacteria + - archaea + - marker-genes + - sample-scope +--- + +# gtdbtk_classifywf + +**Tags:** taxonomy classification phylogeny gtdb bacteria archaea marker-genes sample-scope + +Taxonomic classification of bacterial and archaeal genomes using GTDB-Tk. + +Uses [GTDB-Tk](https://github.com/Ecogenomics/GTDBTk) to assign objective taxonomic +classifications to genome assemblies based on the [Genome Taxonomy Database](https://gtdb.ecogenomic.org/). +It identifies marker genes, aligns them, and places the genome into the reference tree to determine taxonomy. + +:::note[Database Required] +Requires the massive GTDB-Tk database (~60GB+) to be available. +::: + +## Inputs + +``` +record ( + meta: Record, + fna: Path +) +``` + +| Field | Type | Description | +|-------|------|-------------| +| `meta` | `Record` | Groovy Record containing sample information | +| `fna` | `Path` | Assembled contigs in FASTA format | + +``` +db: Path +``` + +| Name | Type | Description | +|------|------|-------------| +| `db` | `Path` | Path (or Set of paths) to the GTDB-Tk reference database | + +## Outputs + +``` +record ( + meta: Record, + bac_tsv: Path?, + ar_tsv: Path?, + results: Set, + logs: Set, + nf_logs: Set, + versions: Set +) +``` + +| Field | Type | Description | +|-------|------|-------------| +| `meta` | `Record` | Sample information record | +| `bac_tsv` | `Path?` | The bacterial classification summary file containing the taxonomic assignment | +| `ar_tsv` | `Path?` | The archaeal classification summary file containing the taxonomic assignment | +| `results` | `Set` | All output files to be published | +| `logs` | `Set` | Optional program specific log files | +| `nf_logs` | `Set` | Nextflow-specific log files (e.g. .command.{begin|err|log|out|run|sh|trace}) | +| `versions` | `Set` | A YAML formatted file with program versions | + +## Parameters + +### GTDB Classify Parameters + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--gtdb_min_af` | number | `0.65` | Minimum alignment fraction to consider closest genome | +| `--gtdb_min_perc_aa` | integer | `10` | Filter genomes with an insufficient percentage of AA in the MSA | +| `--force_gtdb` | boolean | `false` | Continue processing if an error occurs on a single genome | + +## Used By + +### Subworkflows + +- [gtdb](/developers/subworkflows/gtdb) - Taxonomic classification with the Genome Taxonomy Database. + +### Workflows + +- [gtdb](/bactopia-tools/gtdb) - Identify marker genes and assign taxonomic classifications using GTDB. + +## Citations + +If you use this in your analysis, please cite the following. + +- [Bactopia](https://bactopia.github.io/) + Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) + +- [GTDB-Tk](https://github.com/Ecogenomics/GTDBTk) + Chaumeil PA, Mussig AJ, Hugenholtz P, Parks DH [GTDB-Tk: a toolkit to classify genomes with the Genome Taxonomy Database.](https://doi.org/10.1093/bioinformatics/btz848) _Bioinformatics_ (2019) + +- [pplacer](https://github.com/matsen/pplacer) + Matsen FA, Kodner RB, Armbrust EV [pplacer: linear time maximum-likelihood and Bayesian phylogenetic placement of sequences onto a fixed reference tree.](https://doi.org/10.1186/1471-2105-11-538) _BMC Bioinformatics_ 11, 538 (2010) + +## Source + +[View source on GitHub](https://github.com/bactopia/bactopia/tree/main/modules/gtdbtk/classifywf) + +## Version + +```yaml +GTDBTK_CLASSIFYWF: + - gtdbtk: 2.7.1 +``` diff --git a/developers/modules/gtdbtk_download.mdx b/developers/modules/gtdbtk_download.mdx new file mode 100644 index 00000000..ded02527 --- /dev/null +++ b/developers/modules/gtdbtk_download.mdx @@ -0,0 +1,85 @@ +--- +title: gtdbtk_download +description: "Download and configure the GTDB-Tk reference database." +tags: + - gtdb + - taxonomy + - database + - download + - setup + - bacteria + - archaea + - sample-scope +--- + +# gtdbtk_download + +**Tags:** gtdb taxonomy database download setup bacteria archaea sample-scope + +Download and configure the GTDB-Tk reference database. + +Uses the official `download-db.sh` script to fetch the latest Genome Taxonomy Database (GTDB) +files required by [GTDB-Tk](https://github.com/Ecogenomics/GTDBTk). It automatically uncompresses +the data and verifies the installation using `gtdbtk check_install`. + +:::note[Internet & Storage Required] +This process requires an active internet connection and significant disk space (~60GB+ uncompressed) +to store the database files. +::: + +## Outputs + +``` +record ( + db: Path?, + db_tarball: Path?, + logs: Set +) +``` + +| Field | Type | Description | +|-------|------|-------------| +| `db` | `Path?` | The directory containing the uncompressed GTDB-Tk database files | +| `db_tarball` | `Path?` | A compressed tarball of the database (if requested via parameters) | +| `logs` | `Set` | Optional program specific log files | + +## Parameters + +### GTDB-Tk Setup Database Parameters + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--gtdb` | string | | Tarball or path of a GTDB database. If a database is not found, you must use '--download_gtdb' | +| `--download_gtdb` | boolean | `false` | Download the latest GTDB database, even it exists | +| `--gtdb_save_as_tarball` | boolean | `false` | Download the latest GTDB database, and save it in a single tarball | + +## Used By + +### Subworkflows + +- [gtdb](/developers/subworkflows/gtdb) - Taxonomic classification with the Genome Taxonomy Database. + +### Workflows + +- [gtdb](/bactopia-tools/gtdb) - Identify marker genes and assign taxonomic classifications using GTDB. + +## Citations + +If you use this in your analysis, please cite the following. + +- [Bactopia](https://bactopia.github.io/) + Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) + +- [GTDB-Tk](https://github.com/Ecogenomics/GTDBTk) + Chaumeil PA, Mussig AJ, Hugenholtz P, Parks DH [GTDB-Tk: a toolkit to classify genomes with the Genome Taxonomy Database.](https://doi.org/10.1093/bioinformatics/btz848) _Bioinformatics_ (2019) + +## Source + +[View source on GitHub](https://github.com/bactopia/bactopia/tree/main/modules/gtdbtk/download) + +## Version + +```yaml +GTDBTK_DOWNLOAD: + - gtdbtk: 2.7.1 +``` diff --git a/developers/modules/gubbins.mdx b/developers/modules/gubbins.mdx new file mode 100644 index 00000000..b6794163 --- /dev/null +++ b/developers/modules/gubbins.mdx @@ -0,0 +1,100 @@ +--- +title: gubbins +description: "Detect recombination and construct a recombination-free phylogeny." +tags: + - bacteria + - recombination + - phylogeny + - alignment + - msa + - evolution + - snp + - run-scope +--- + +# gubbins + +**Tags:** bacteria recombination phylogeny alignment msa evolution snp run-scope + +Detect recombination and construct a recombination-free phylogeny. + +Uses [Gubbins](https://github.com/nickjcroucher/gubbins) (Genealogies Unbiased By recomBinations In Nucleotide Sequences) +to iteratively identify and mask recombinant regions in a multiple sequence alignment. It generates +a phylogenetic tree based only on vertically inherited point mutations. + +## Inputs + +``` +record ( + meta: Record, + aln: Path +) +``` + +| Field | Type | Description | +|-------|------|-------------| +| `meta` | `Record` | Groovy Record containing sample information | +| `aln` | `Path` | Multiple sequence alignment in FASTA format | + +## Outputs + +``` +record ( + meta: Record, + masked_aln: Path, + results: Set, + logs: Set, + nf_logs: Set, + versions: Set +) +``` + +| Field | Type | Description | +|-------|------|-------------| +| `meta` | `Record` | Sample information record | +| `masked_aln` | `Path` | The input alignment with recombinant regions masked (*.masked.aln.gz) | +| `results` | `Set` | All output files to be published | +| `logs` | `Set` | Optional program specific log files | +| `nf_logs` | `Set` | Nextflow-specific log files (e.g. .command.{begin|err|log|out|run|sh|trace}) | +| `versions` | `Set` | A YAML formatted file with program versions | + +## Parameters + +### Gubbins Parameters + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--gubbins_iterations` | integer | `5` | Maximum number of iterations | +| `--gubbins_opts` | string | | Extra Gubbins options in quotes | +| `--skip_recombination` | boolean | `false` | Skip Gubbins execution in subworkflows | + +## Used By + +### Subworkflows + +- [gubbins](/developers/subworkflows/gubbins) - Detect and filter recombination regions in bacterial alignments. + +### Workflows + +- [snippy](/bactopia-tools/snippy) - Rapid haplotype variant calling and core genome alignment. + +## Citations + +If you use this in your analysis, please cite the following. + +- [Bactopia](https://bactopia.github.io/) + Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) + +- [Gubbins](https://github.com/nickjcroucher/gubbins) + Croucher NJ, Page AJ, Connor TR, Delaney AJ, Keane JA, Bentley SD, Parkhill J, Harris SR [Rapid phylogenetic analysis of large samples of recombinant bacterial whole genome sequences using Gubbins.](https://doi.org/10.1093/nar/gku1196) _Nucleic Acids Research_ 43(3), e15. (2015) + +## Source + +[View source on GitHub](https://github.com/bactopia/bactopia/tree/main/modules/gubbins) + +## Version + +```yaml +GUBBINS: + - gubbins: 3.4.3 +``` diff --git a/developers/modules/hicap.mdx b/developers/modules/hicap.mdx new file mode 100644 index 00000000..384da61e --- /dev/null +++ b/developers/modules/hicap.mdx @@ -0,0 +1,114 @@ +--- +title: hicap +description: "Predict *Haemophilus influenzae* capsule serotype." +tags: + - bacteria + - haemophilus-influenzae + - serotype + - capsule + - typing + - nthi + - sample-scope +--- + +# hicap + +**Tags:** bacteria haemophilus-influenzae serotype capsule typing nthi sample-scope + +Predict *Haemophilus influenzae* capsule serotype. + +Uses [hicap](https://github.com/scwatts/hicap) to identify the capsule locus in *H. influenzae* +genome assemblies. It predicts the serotype (a, b, c, d, e, f, or Non-Typeable/NTHi) and +can optionally generate visualizations of the locus structure. + +## Inputs + +``` +record ( + meta: Record, + fna: Path +) +``` + +| Field | Type | Description | +|-------|------|-------------| +| `meta` | `Record` | Groovy Record containing sample information | +| `fna` | `Path` | Assembled contigs in FASTA format | + +``` +database_dir: Path? +model_fp: Path? +``` + +| Name | Type | Description | +|------|------|-------------| +| `database_dir` | `Path?` | Path to a custom hicap reference database directory | +| `model_fp` | `Path?` | Path to a custom Prodigal training model file | + +## Outputs + +``` +record ( + meta: Record, + gbff: Path?, + svg: Path?, + tsv: Path, + results: Set, + logs: Set, + nf_logs: Set, + versions: Set +) +``` + +| Field | Type | Description | +|-------|------|-------------| +| `meta` | `Record` | Sample information record | +| `gbff` | `Path?` | GenBank file containing the annotated capsule locus region | +| `svg` | `Path?` | SVG visualization of the capsule locus gene arrangement | +| `tsv` | `Path` | Tab-delimited summary of the predicted serotype and locus coverage | +| `results` | `Set` | All output files to be published | +| `logs` | `Set` | Optional program specific log files | +| `nf_logs` | `Set` | Nextflow-specific log files (e.g. .command.{begin|err|log|out|run|sh|trace}) | +| `versions` | `Set` | A YAML formatted file with program versions | + +## Parameters + +### hicap Parameters + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--hicap_gene_coverage` | number | `0.8` | Minimum percentage coverage to consider a single gene complete | +| `--hicap_gene_identity` | number | `0.7` | Minimum percentage identity to consider a single gene complete | +| `--hicap_broken_gene_length` | integer | `60` | Minimum length to consider a broken gene | +| `--hicap_broken_gene_identity` | number | `0.8` | Minimum percentage identity to consider a broken gene | + +## Used By + +### Subworkflows + +- [hicap](/developers/subworkflows/hicap) - In silico serotyping of the Haemophilus influenzae capsule locus. + +### Workflows + +- [hicap](/bactopia-tools/hicap) - Identify cap locus serotype and structure in Haemophilus influenzae assemblies. + +## Citations + +If you use this in your analysis, please cite the following. + +- [Bactopia](https://bactopia.github.io/) + Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) + +- [hicap](https://github.com/scwatts/hicap) + Watts SC, Holt KE [hicap: in silico serotyping of the Haemophilus influenzae capsule locus.](https://doi.org/10.1128/JCM.00190-19) _Journal of Clinical Microbiology_ JCM.00190-19 (2019) + +## Source + +[View source on GitHub](https://github.com/bactopia/bactopia/tree/main/modules/hicap) + +## Version + +```yaml +HICAP: + - hicap: 1.0.4 +``` diff --git a/developers/modules/hpsuissero.mdx b/developers/modules/hpsuissero.mdx new file mode 100644 index 00000000..9b136d94 --- /dev/null +++ b/developers/modules/hpsuissero.mdx @@ -0,0 +1,91 @@ +--- +title: hpsuissero +description: "Predict *Haemophilus parasuis* serotype." +tags: + - bacteria + - haemophilus-parasuis + - glaesserella-parasuis + - serotype + - typing + - capsule + - sample-scope +--- + +# hpsuissero + +**Tags:** bacteria haemophilus-parasuis glaesserella-parasuis serotype typing capsule sample-scope + +Predict *Haemophilus parasuis* serotype. + +Uses [HPSuisSero](https://github.com/Abraham-L/HPSuisSero) to predict the serotype of +*Haemophilus parasuis* (syn. *Glaesserella parasuis*) assemblies. It detects specific +capsule loci markers to assign one of the known serovars. + +## Inputs + +``` +record ( + meta: Record, + fna: Path +) +``` + +| Field | Type | Description | +|-------|------|-------------| +| `meta` | `Record` | Groovy Record containing sample information | +| `fna` | `Path` | Assembled contigs in FASTA format | + +## Outputs + +``` +record ( + meta: Record, + tsv: Path, + results: Set, + logs: Set, + nf_logs: Set, + versions: Set +) +``` + +| Field | Type | Description | +|-------|------|-------------| +| `meta` | `Record` | Sample information record | +| `tsv` | `Path` | Tab-delimited Haemophilus parasuis serotype prediction results | +| `results` | `Set` | All output files to be published | +| `logs` | `Set` | Optional program specific log files | +| `nf_logs` | `Set` | Nextflow-specific log files (e.g. .command.{begin|err|log|out|run|sh|trace}) | +| `versions` | `Set` | A YAML formatted file with program versions | + +## Parameters + +## Used By + +### Subworkflows + +- [hpsuissero](/developers/subworkflows/hpsuissero) - Rapid Haemophilus parasuis serotyping. + +### Workflows + +- [hpsuissero](/bactopia-tools/hpsuissero) - Serotype prediction of Haemophilus parasuis assemblies. + +## Citations + +If you use this in your analysis, please cite the following. + +- [Bactopia](https://bactopia.github.io/) + Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) + +- [HpsuisSero](https://github.com/jimmyliu1326/HpsuisSero) + Lui J [HpsuisSero: Rapid _Haemophilus parasuis_ serotyping](https://github.com/jimmyliu1326/HpsuisSero) (GitHub) + +## Source + +[View source on GitHub](https://github.com/bactopia/bactopia/tree/main/modules/hpsuissero) + +## Version + +```yaml +HPSUISSERO: + - hpsuissero: 1.0.1 +``` diff --git a/developers/modules/index.mdx b/developers/modules/index.mdx new file mode 100644 index 00000000..1c622e94 --- /dev/null +++ b/developers/modules/index.mdx @@ -0,0 +1,110 @@ +--- +title: Modules +description: All available Bactopia modules +slug: /modules +--- + +# Modules + +Bactopia includes 98 modules -- individual processes that perform specific analysis tasks. You can also [browse by tag](/developers/tags). + +| Module | Description | +|--------|-------------| +| [abricate_run](/developers/modules/abricate_run) | Mass screening of contigs for antimicrobial and virulence genes. | +| [abricate_summary](/developers/modules/abricate_summary) | Summarize Abricate screening results. | +| [abritamr_run](/developers/modules/abritamr_run) | Detect antimicrobial resistance and virulence genes. | +| [agrvate](/developers/modules/agrvate) | Determine the agr locus type and operon variants in Staphylococcus aureus. | +| [amrfinderplus_run](/developers/modules/amrfinderplus_run) | Identify antimicrobial resistance and virulence genes in gene or protein sequences. | +| [amrfinderplus_update](/developers/modules/amrfinderplus_update) | Download and index the latest AMRFinder+ database. | +| [ariba_getref](/developers/modules/ariba_getref) | Download and prepare reference databases for ARIBA analysis. | +| [ariba_run](/developers/modules/ariba_run) | Identify genes by local assembly of reads. | +| [bactopia_assembler](/developers/modules/bactopia_assembler) | Assemble bacterial genomes using short read, long read, or hybrid strategies. | +| [bactopia_datasets](/developers/modules/bactopia_datasets) | Download pre-compiled datasets required by Bactopia. | +| [bactopia_gather](/developers/modules/bactopia_gather) | Search, validate, gather, or simulate input samples. | +| [bactopia_qc](/developers/modules/bactopia_qc) | Automated quality control, error correction, and read subsampling. | +| [bactopia_sketcher](/developers/modules/bactopia_sketcher) | Create genomic sketches and perform rapid taxonomic classification. | +| [bactopia_teton](/developers/modules/bactopia_teton) | Predict genome size and route samples based on taxonomic classification. | +| [bakta_download](/developers/modules/bakta_download) | Download the Bakta annotation database. | +| [bakta_run](/developers/modules/bakta_run) | Rapid and standardized annotation of bacterial genomes and plasmids. | +| [blast_blastn](/developers/modules/blast_blastn) | Search a nucleotide database using a nucleotide query. | +| [blast_blastp](/developers/modules/blast_blastp) | Search a protein database using a protein query. | +| [blast_blastx](/developers/modules/blast_blastx) | Search a protein database using a translated nucleotide query. | +| [blast_tblastn](/developers/modules/blast_tblastn) | Search a translated nucleotide database using a protein query. | +| [blast_tblastx](/developers/modules/blast_tblastx) | Search a translated nucleotide database using a translated nucleotide query. | +| [bracken](/developers/modules/bracken) | Taxonomic classification and abundance estimation. | +| [btyper3](/developers/modules/btyper3) | In silico typing and characterization of *Bacillus cereus* group genomes. | +| [busco](/developers/modules/busco) | Assess genome assembly completeness using single-copy orthologs. | +| [checkm2_download](/developers/modules/checkm2_download) | Download the pre-trained CheckM2 database. | +| [checkm2_predict](/developers/modules/checkm2_predict) | Assess genome quality using machine learning. | +| [checkm_lineagewf](/developers/modules/checkm_lineagewf) | Assess genome quality using lineage-specific marker sets. | +| [clermontyping](/developers/modules/clermontyping) | Determine the phylogroup of Escherichia coli isolates. | +| [clonalframeml](/developers/modules/clonalframeml) | Inference of recombination in bacterial genomes. | +| [csvtk_concat](/developers/modules/csvtk_concat) | Concatenate multiple CSV or TSV files into a single table. | +| [csvtk_join](/developers/modules/csvtk_join) | Join two CSV or TSV files based on common fields. | +| [defensefinder_run](/developers/modules/defensefinder_run) | Detect anti-phage defense systems using HMM profiles. | +| [defensefinder_update](/developers/modules/defensefinder_update) | Download and package the DefenseFinder and CasFinder model databases. | +| [ectyper](/developers/modules/ectyper) | Predict *Escherichia coli* serotype (O and H antigens). | +| [eggnog_download](/developers/modules/eggnog_download) | Download the eggNOG database for functional annotation. | +| [eggnog_mapper](/developers/modules/eggnog_mapper) | Functional annotation of proteins using eggNOG orthology data. | +| [emmtyper](/developers/modules/emmtyper) | *emm*-typing of *Streptococcus pyogenes* (Group A Strep) assemblies. | +| [fastani](/developers/modules/fastani) | Compute whole-genome Average Nucleotide Identity (ANI). | +| [gamma](/developers/modules/gamma) | Identification, classification, and annotation of translated gene matches. | +| [genotyphi_parse](/developers/modules/genotyphi_parse) | Parse Mykrobe results to genotype *Salmonella* Typhi. | +| [gigatyper](/developers/modules/gigatyper) | Run all available MLST schemes for a species against an assembly | +| [gtdbtk_classifywf](/developers/modules/gtdbtk_classifywf) | Taxonomic classification of bacterial and archaeal genomes using GTDB-Tk. | +| [gtdbtk_download](/developers/modules/gtdbtk_download) | Download and configure the GTDB-Tk reference database. | +| [gubbins](/developers/modules/gubbins) | Detect recombination and construct a recombination-free phylogeny. | +| [hicap](/developers/modules/hicap) | Predict *Haemophilus influenzae* capsule serotype. | +| [hpsuissero](/developers/modules/hpsuissero) | Predict *Haemophilus parasuis* serotype. | +| [iqtree](/developers/modules/iqtree) | Efficient phylogenomic inference using Maximum Likelihood. | +| [ismapper](/developers/modules/ismapper) | Identify insertion sites and orientation of mobile genetic elements. | +| [kleborate](/developers/modules/kleborate) | Genotyping and screening of *Klebsiella* genome assemblies. | +| [kraken2](/developers/modules/kraken2) | Taxonomic classification and host filtering of sequence reads. | +| [legsta](/developers/modules/legsta) | In silico Sequence Based Typing (SBT) of *Legionella pneumophila*. | +| [lissero](/developers/modules/lissero) | Predict *Listeria monocytogenes* serogroup. | +| [mash_dist](/developers/modules/mash_dist) | Calculate genomic distances using MinHash sketches. | +| [mashtree](/developers/modules/mashtree) | Rapid alignment-free phylogenomic tree construction. | +| [mcroni](/developers/modules/mcroni) | Detect sequence variations in the *mcr-1* colistin resistance gene. | +| [meningotype](/developers/modules/meningotype) | Serotyping and finetyping of *Neisseria meningitidis*. | +| [merlin_dist](/developers/modules/merlin_dist) | Identify species to trigger genus-specific downstream analyses (Merlin). | +| [midas_download](/developers/modules/midas_download) | Download the MIDAS reference database. | +| [midas_species](/developers/modules/midas_species) | Estimate bacterial species abundance from metagenomic reads. | +| [mlst](/developers/modules/mlst) | Automatic Multi-Locus Sequence Typing (MLST) of genome assemblies. | +| [mobsuite_recon](/developers/modules/mobsuite_recon) | Reconstruct and type plasmids from a bacterial genome assembly. | +| [mykrobe_predict](/developers/modules/mykrobe_predict) | Predict Antimicrobial Resistance (AMR) for supported bacterial species. | +| [ncbigenomedownload](/developers/modules/ncbigenomedownload) | Download assemblies and annotation files from NCBI's Assembly database. | +| [ngmaster](/developers/modules/ngmaster) | Serotyping and Multi-Antigen Sequence Typing (MAST) of *Neisseria gonorrhoeae*. | +| [nohuman_download](/developers/modules/nohuman_download) | Download the nohuman database for human read removal. | +| [nohuman_run](/developers/modules/nohuman_run) | Remove human reads from sequencing data. | +| [panaroo_run](/developers/modules/panaroo_run) | Fast and scalable bacterial pangenome analysis using a graph-based approach. | +| [pasty](/developers/modules/pasty) | Predict O-antigen serogroup of Pseudomonas aeruginosa isolates. | +| [pbptyper](/developers/modules/pbptyper) | Predict Penicillin Binding Protein (PBP) type of *Streptococcus pneumoniae* assemblies. | +| [phispy](/developers/modules/phispy) | Predict prophage regions integrated into bacterial genomes. | +| [pirate](/developers/modules/pirate) | Pangenome Identification and Reconciliation Analysis Tool for Epidemiology (PIRATE). | +| [plasmidfinder](/developers/modules/plasmidfinder) | Identify plasmid replicon types in bacterial sequences and assemblies. | +| [pneumocat](/developers/modules/pneumocat) | Capsular typing of Streptococcus pneumoniae from Illumina reads. | +| [prokka](/developers/modules/prokka) | Annotate prokaryotic genomes. | +| [quast](/developers/modules/quast) | Quality Assessment Tool for Genome Assemblies. | +| [rgi_heatmap](/developers/modules/rgi_heatmap) | Create heatmaps of resistance gene presence/absence. | +| [rgi_main](/developers/modules/rgi_main) | Predict antibiotic resistance from assemblies. | +| [roary](/developers/modules/roary) | Rapid large-scale prokaryote pan genome analysis. | +| [sccmec](/developers/modules/sccmec) | Identify SCCmec elements in Staphylococcus aureus genomes. | +| [scoary](/developers/modules/scoary) | Pan-genome wide association studies. | +| [seqsero2](/developers/modules/seqsero2) | Salmonella serotype prediction from genome sequencing data. | +| [seroba_run](/developers/modules/seroba_run) | k-mer based Streptococcus pneumoniae serotyping. | +| [shigapass](/developers/modules/shigapass) | Predict Shigella serotypes and differentiate Shigella/EIEC. | +| [shigatyper](/developers/modules/shigatyper) | Shigella serotype from Illumina or Oxford Nanopore reads. | +| [shigeifinder](/developers/modules/shigeifinder) | Shigella and EIEC serotyping from assemblies. | +| [sistr](/developers/modules/sistr) | Serovar prediction of Salmonella assemblies. | +| [snippy_core](/developers/modules/snippy_core) | Core-SNP alignment from Snippy outputs. | +| [snippy_run](/developers/modules/snippy_run) | Rapid haploid variant calling and core genome alignment. | +| [snpdists](/developers/modules/snpdists) | Create a SNP distance matrix from a multiple sequence alignment. | +| [spatyper](/developers/modules/spatyper) | Finding spa types in Staphylococcus aureus. | +| [srahumanscrubber_initdb](/developers/modules/srahumanscrubber_initdb) | Initialize human read removal database for SRA Human Scrubber. | +| [srahumanscrubber_scrub](/developers/modules/srahumanscrubber_scrub) | Scrub human reads from FASTQ files. | +| [ssuissero](/developers/modules/ssuissero) | Serotype prediction of Streptococcus suis assemblies. | +| [staphopiasccmec](/developers/modules/staphopiasccmec) | Primer based SCCmec typing of S. aureus genomes. | +| [stecfinder](/developers/modules/stecfinder) | Serotype of Shigatoxin producing E. coli using reads/assemblies. | +| [sylph_profile](/developers/modules/sylph_profile) | Profile metagenome samples against a database using Sylph. | +| [tbprofiler_collate](/developers/modules/tbprofiler_collate) | Collate TB-Profiler results from multiple samples. | +| [tbprofiler_profile](/developers/modules/tbprofiler_profile) | Detect resistance and lineages of Mycobacterium tuberculosis genomes. | diff --git a/developers/modules/iqtree.mdx b/developers/modules/iqtree.mdx new file mode 100644 index 00000000..b1dcb624 --- /dev/null +++ b/developers/modules/iqtree.mdx @@ -0,0 +1,104 @@ +--- +title: iqtree +description: "Efficient phylogenomic inference using Maximum Likelihood." +tags: + - phylogeny + - tree + - maximum-likelihood + - bootstrap + - model-selection + - iqtree + - run-scope +--- + +# iqtree + +**Tags:** phylogeny tree maximum-likelihood bootstrap model-selection iqtree run-scope + +Efficient phylogenomic inference using Maximum Likelihood. + +Uses [IQ-TREE](https://iqtree.github.io/) to construct a maximum-likelihood phylogenetic tree +from a multiple sequence alignment. It automatically determines the best-fit substitution model +(via ModelFinder) and assesses branch support using the Ultrafast Bootstrap approximation. + +## Inputs + +``` +record ( + meta: Record, + aln: Path +) +``` + +| Field | Type | Description | +|-------|------|-------------| +| `meta` | `Record` | Groovy Record containing sample information | +| `aln` | `Path` | Multiple sequence alignment in FASTA, PHYLIP, or NEXUS format | + +## Outputs + +``` +record ( + meta: Record, + aln: Path, + nwk: Path, + results: Set, + logs: Set, + nf_logs: Set, + versions: Set +) +``` + +| Field | Type | Description | +|-------|------|-------------| +| `meta` | `Record` | Sample information record | +| `aln` | `Path` | The input alignment (passed through) | +| `nwk` | `Path` | The final maximum-likelihood phylogenetic tree (Newick format) | +| `results` | `Set` | All output files to be published | +| `logs` | `Set` | Optional program specific log files | +| `nf_logs` | `Set` | Nextflow-specific log files (e.g. .command.{begin|err|log|out|run|sh|trace}) | +| `versions` | `Set` | A YAML formatted file with program versions | + +## Parameters + +### IQ-TREE Parameters + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--iqtree_model` | string | `HKY` | Substitution model name | +| `--iqtree_bb` | integer | `1000` | Ultrafast bootstrap replicates | +| `--iqtree_alrt` | integer | `1000` | SH-like approximate likelihood ratio test replicates | +| `--iqtree_asr` | boolean | `false` | Ancestral state reconstruction by empirical Bayes | +| `--skip_phylogeny` | boolean | `false` | Skip IQ-TREE execution in subworkflows | + +## Used By + +### Subworkflows + +- [iqtree](/developers/subworkflows/iqtree) - Construct maximum likelihood phylogenetic trees from alignments. + +### Workflows + +- [pangenome](/bactopia-tools/pangenome) - Pangenome analysis with optional core-genome phylogeny. +- [snippy](/bactopia-tools/snippy) - Rapid haplotype variant calling and core genome alignment. + +## Citations + +If you use this in your analysis, please cite the following. + +- [Bactopia](https://bactopia.github.io/) + Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) + +- [IQ-TREE](https://github.com/Cibiv/IQ-TREE) + Nguyen L-T, Schmidt HA, von Haeseler A, Minh BQ [IQ-TREE: A fast and effective stochastic algorithm for estimating maximum likelihood phylogenies.](https://doi.org/10.1093/molbev/msu300) _Mol. Biol. Evol._ 32:268-274 (2015) + +## Source + +[View source on GitHub](https://github.com/bactopia/bactopia/tree/main/modules/iqtree) + +## Version + +```yaml +IQTREE: + - iqtree: 3.1.1 +``` diff --git a/developers/modules/ismapper.mdx b/developers/modules/ismapper.mdx new file mode 100644 index 00000000..fa066156 --- /dev/null +++ b/developers/modules/ismapper.mdx @@ -0,0 +1,112 @@ +--- +title: ismapper +description: "Identify insertion sites and orientation of mobile genetic elements." +tags: + - bacteria + - mobile-elements + - insertion-sequences + - mapping + - structural-variation + - ismapper + - sample-scope +--- + +# ismapper + +**Tags:** bacteria mobile-elements insertion-sequences mapping structural-variation ismapper sample-scope + +Identify insertion sites and orientation of mobile genetic elements. + +Uses [ISMapper](https://github.com/jhawkey/IS_mapper) to identify the position and orientation +of specific insertion sequences (IS) in a bacterial genome. It works by mapping paired-end reads +to a library of IS queries and a reference genome to determine where the IS elements are located +relative to the reference coordinates. + +Uses explicit positional record fields for reads: +- Input: record(meta, r1, r2) where each read slot is Path + +## Inputs + +``` +record ( + meta: Record, + r1: Path, + r2: Path +) +``` + +| Field | Type | Description | +|-------|------|-------------| +| `meta` | `Record` | Groovy Record containing sample information | +| `r1` | `Path` | Illumina R1 reads (paired-end) | +| `r2` | `Path` | Illumina R2 reads (paired-end) | + +``` +reference: Path +query: Path +``` + +| Name | Type | Description | +|------|------|-------------| +| `reference` | `Path` | Reference genome in GenBank format (*.gbk) to map insertion sites against | +| `query` | `Path` | FASTA file containing the insertion sequences to search for | + +## Outputs + +``` +record ( + meta: Record, + results: Set, + logs: Set, + nf_logs: Set, + versions: Set +) +``` + +| Field | Type | Description | +|-------|------|-------------| +| `meta` | `Record` | Sample information record | +| `results` | `Set` | All output files to be published | +| `logs` | `Set` | Optional program specific log files | +| `nf_logs` | `Set` | Nextflow-specific log files (e.g. .command.{begin|err|log|out|run|sh|trace}) | +| `versions` | `Set` | A YAML formatted file with program versions | + +## Parameters + +### ISMapper Parameters + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--reference` | string | | Reference genome for typing against in GenBank format | +| `--insertions` | string | | Multifasta file with insertion sequence(s) to be mapped to | + +## Used By + +### Subworkflows + +- [ismapper](/developers/subworkflows/ismapper) - Identify transposase insertion sites in bacterial genomes. + +### Workflows + +- [ismapper](/bactopia-tools/ismapper) - Identify insertion sequence positions in bacterial genomes. + +## Citations + +If you use this in your analysis, please cite the following. + +- [Bactopia](https://bactopia.github.io/) + Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) + +- [ISMapper](https://github.com/jhawkey/IS_mapper) + Hawkey J, Hamidian M, Wick RR, Edwards DJ, Billman-Jacobe H, Hall RM, Holt KE [ISMapper: identifying transposase insertion sites in bacterial genomes from short read sequence data](http://dx.doi.org/10.1186/s12864-015-1860-2). _BMC Genomics_ 16, 667 (2015) + +## Source + +[View source on GitHub](https://github.com/bactopia/bactopia/tree/main/modules/ismapper) + +## Version + +```yaml +ISMAPPER: + - ismapper: 2.0.2 +``` diff --git a/developers/modules/kleborate.mdx b/developers/modules/kleborate.mdx new file mode 100644 index 00000000..6c35b62b --- /dev/null +++ b/developers/modules/kleborate.mdx @@ -0,0 +1,109 @@ +--- +title: kleborate +description: "Genotyping and screening of *Klebsiella* genome assemblies." +tags: + - bacteria + - klebsiella + - amr + - virulence + - typing + - mlst + - serotype + - k-locus + - o-locus + - sample-scope +--- + +# kleborate + +**Tags:** bacteria klebsiella amr virulence typing mlst serotype k-locus o-locus sample-scope + +Genotyping and screening of *Klebsiella* genome assemblies. + +Uses [Kleborate](https://github.com/katholt/Kleborate) to screen *Klebsiella* assemblies +for Multi-Locus Sequence Type (MLST), species identity, antimicrobial resistance determinants, +virulence plasmids (e.g., *ybt*, *iuc*, *iro*), and capsular serotype prediction (K and O loci). + +:::note[Database Bundled] +Kleborate bundles the required databases for species identification, MLST, +and virulence/resistance gene detection. +::: + +## Inputs + +``` +record ( + meta: Record, + fna: Path +) +``` + +| Field | Type | Description | +|-------|------|-------------| +| `meta` | `Record` | Groovy Record containing sample information | +| `fna` | `Path` | Assembled contigs in FASTA format | + +## Outputs + +``` +record ( + meta: Record, + tsv: Path, + results: Set, + logs: Set, + nf_logs: Set, + versions: Set +) +``` + +| Field | Type | Description | +|-------|------|-------------| +| `meta` | `Record` | Sample information record | +| `tsv` | `Path` | Tab-delimited Kleborate results with species, MLST, virulence, and resistance predictions | +| `results` | `Set` | All output files to be published | +| `logs` | `Set` | Optional program specific log files | +| `nf_logs` | `Set` | Nextflow-specific log files (e.g. .command.{begin|err|log|out|run|sh|trace}) | +| `versions` | `Set` | A YAML formatted file with program versions | + +## Parameters + +### Kleborate Parameters + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--kleborate_preset` | string | `kpsc` | Preset module to use for Kleborate (choices: `kpsc`, `kosc`, `escherichia`) | +| `--kleborate_opts` | string | | Extra options in quotes for Kleborate | + +## Used By + +### Subworkflows + +- [kleborate](/developers/subworkflows/kleborate) - Genotyping tool for Klebsiella pneumoniae and its related species complex. + +### Workflows + +- [kleborate](/bactopia-tools/kleborate) - Comprehensive screening of Klebsiella genomes for virulence and resistance determinants. + +## Citations + +If you use this in your analysis, please cite the following. + +- [Bactopia](https://bactopia.github.io/) + Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) + +- [Kleborate](https://github.com/katholt/Kleborate) + Lam MMC, Wick RR, Watts, SC, Cerdeira LT, Wyres KL, Holt KE [A genomic surveillance framework and genotyping tool for Klebsiella pneumoniae and its related species complex.](https://doi.org/10.1038/s41467-021-24448-3) _Nat Commun_ 12, 4188 (2021) + +- [Kaptive](https://github.com/katholt/Kaptive) + Wyres KL, Wick RR, Gorrie C, Jenney A, Follador R, Thomson NR, Holt KE [Identification of Klebsiella capsule synthesis loci from whole genome data.](https://doi.org/10.1099/mgen.0.000102) _Microbial genomics_ 2(12) (2016) + +## Source + +[View source on GitHub](https://github.com/bactopia/bactopia/tree/main/modules/kleborate) + +## Version + +```yaml +KLEBORATE: + - kleborate: 3.2.4 +``` diff --git a/developers/modules/kraken2.mdx b/developers/modules/kraken2.mdx new file mode 100644 index 00000000..d9594ef1 --- /dev/null +++ b/developers/modules/kraken2.mdx @@ -0,0 +1,131 @@ +--- +title: kraken2 +description: "Taxonomic classification and host filtering of sequence reads." +tags: + - metagenomics + - taxonomy + - classification + - contamination + - scrubbing + - k-mer + - lca + - sample-scope +--- + +# kraken2 + +**Tags:** metagenomics taxonomy classification contamination scrubbing k-mer lca sample-scope + +Taxonomic classification and host filtering of sequence reads. + +Uses [Kraken2](https://github.com/DerrickWood/kraken2) to assign taxonomic labels to short +DNA reads by examining exact k-mer matches against a large reference database. It uses the +Lowest Common Ancestor (LCA) algorithm to provide high-precision classification, making it +ideal for metagenomics or removing host contamination (scrubbing). + +Uses explicit positional record fields for reads: +- Input: record(meta, r1, r2, se, lr) where each read slot is Path? + +:::note[Database Required] +Requires a standard Kraken2 database (directory or tarball). Memory usage depends on database size (Standard ~50GB). +::: + +## Inputs + +``` +record ( + meta: Record, + r1: Path?, + r2: Path?, + se: Path?, + lr: Path? +) +``` + +| Field | Type | Description | +|-------|------|-------------| +| `meta` | `Record` | Groovy Record containing sample information | +| `r1` | `Path?` | Illumina R1 reads (paired-end) | +| `r2` | `Path?` | Illumina R2 reads (paired-end) | +| `se` | `Path?` | Single-end Illumina reads | +| `lr` | `Path?` | Long reads (ONT/PacBio) - not typically used by Kraken2 | + +``` +db: Path +``` + +| Name | Type | Description | +|------|------|-------------| +| `db` | `Path` | Kraken2 database (Directory or compressed tarball) | + +## Outputs + +``` +record ( + meta: Record, + special_meta: Record, + kraken2_report: Path, + scrub_report: Path?, + classified: Set, + unclassified: Set, + results: Set, + logs: Set, + nf_logs: Set, + versions: Set +) +``` + +| Field | Type | Description | +|-------|------|-------------| +| `meta` | `Record` | Sample information record | +| `special_meta` | `Record` | A simplified metadata record for internal use | +| `kraken2_report` | `Path` | Standard Kraken2 report containing taxonomic abundance counts | +| `scrub_report` | `Path?` | Summary report of reads removed during host scrubbing | +| `classified` | `Set` | Reads assigned to a taxon in the database (FASTQ) | +| `unclassified` | `Set` | Reads NOT assigned to any taxon (FASTQ) | +| `results` | `Set` | All output files to be published | +| `logs` | `Set` | Optional program specific log files | +| `nf_logs` | `Set` | Nextflow-specific log files (e.g. .command.{begin|err|log|out|run|sh|trace}) | +| `versions` | `Set` | A YAML formatted file with program versions | + +## Parameters + +### Kraken2 Parameters + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--kraken2_db` | string | | The a single tarball or path to a Kraken2 formatted database | +| `--kraken2_confidence` | number | `0.0` | Confidence score threshold between 0 and 1 | +| `--kraken2_use_mpa_style` | boolean | `false` | Format report output like Kraken 1's kraken-mpa-report | +| `--kraken2_report_zero_counts` | boolean | `false` | Report counts for ALL taxa, even if counts are zero | + +## Used By + +### Subworkflows + +- [kraken2](/developers/subworkflows/kraken2) - Classify metagenomic reads using Kraken2. + +### Workflows + +- [kraken2](/bactopia-tools/kraken2) - Taxonomic classification of metagenomic sequence reads. + +## Citations + +If you use this in your analysis, please cite the following. + +- [Bactopia](https://bactopia.github.io/) + Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) + +- [Kraken2](https://github.com/DerrickWood/kraken2) + Wood DE, Lu J, Langmead B [Improved metagenomic analysis with Kraken 2.](https://doi.org/10.1186/s13059-019-1891-0) *Genome Biology*, 20(1), 257. (2019) + +## Source + +[View source on GitHub](https://github.com/bactopia/bactopia/tree/main/modules/kraken2) + +## Version + +```yaml +KRAKEN2: + - bactopia-teton: 1.1.3 +``` diff --git a/developers/modules/legsta.mdx b/developers/modules/legsta.mdx new file mode 100644 index 00000000..59fce185 --- /dev/null +++ b/developers/modules/legsta.mdx @@ -0,0 +1,98 @@ +--- +title: legsta +description: "In silico Sequence Based Typing (SBT) of *Legionella pneumophila*." +tags: + - bacteria + - legionella + - pneumophila + - typing + - sbt + - mlst + - serogroup + - sample-scope +--- + +# legsta + +**Tags:** bacteria legionella pneumophila typing sbt mlst serogroup sample-scope + +In silico Sequence Based Typing (SBT) of *Legionella pneumophila*. + +Uses [Legsta](https://github.com/tseemann/legsta) to determine the Sequence Based Type (SBT) +of *L. pneumophila* isolates. It aligns the assembly against the standard 7-gene schema +(flaA, pilE, asd, mip, mompS, proA, neuA) to assign allele numbers and the resulting Sequence Type. + +## Inputs + +``` +record ( + meta: Record, + fna: Path +) +``` + +| Field | Type | Description | +|-------|------|-------------| +| `meta` | `Record` | Groovy Record containing sample information | +| `fna` | `Path` | Assembled contigs in FASTA format | + +## Outputs + +``` +record ( + meta: Record, + tsv: Path, + results: Set, + logs: Set, + nf_logs: Set, + versions: Set +) +``` + +| Field | Type | Description | +|-------|------|-------------| +| `meta` | `Record` | Sample information record | +| `tsv` | `Path` | Tab-delimited Legionella pneumophila SBT results with allele numbers and sequence type | +| `results` | `Set` | All output files to be published | +| `logs` | `Set` | Optional program specific log files | +| `nf_logs` | `Set` | Nextflow-specific log files (e.g. .command.{begin|err|log|out|run|sh|trace}) | +| `versions` | `Set` | A YAML formatted file with program versions | + +## Parameters + +### legsta Parameters + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--legsta_noheader` | boolean | `false` | Don't print header row | + +## Used By + +### Subworkflows + +- [legsta](/developers/subworkflows/legsta) - In silico Legionella pneumophila Sequence Based Typing. + +### Workflows + +- [legsta](/bactopia-tools/legsta) - Sequence Based Typing (SBT) of Legionella pneumophila. + +## Citations + +If you use this in your analysis, please cite the following. + +- [Bactopia](https://bactopia.github.io/) + Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) + +- [legsta](https://github.com/tseemann/legsta) + Seemann T [legsta: In silico Legionella pneumophila Sequence Based Typing](https://github.com/tseemann/legsta) (GitHub) + +## Source + +[View source on GitHub](https://github.com/bactopia/bactopia/tree/main/modules/legsta) + +## Version + +```yaml +LEGSTA: + - legsta: 0.5.2 +``` diff --git a/developers/modules/lissero.mdx b/developers/modules/lissero.mdx new file mode 100644 index 00000000..7aadd9a3 --- /dev/null +++ b/developers/modules/lissero.mdx @@ -0,0 +1,100 @@ +--- +title: lissero +description: "Predict *Listeria monocytogenes* serogroup." +tags: + - bacteria + - listeria + - monocytogenes + - serotype + - serogroup + - typing + - pcr + - sample-scope +--- + +# lissero + +**Tags:** bacteria listeria monocytogenes serotype serogroup typing pcr sample-scope + +Predict *Listeria monocytogenes* serogroup. + +Uses [LisSero](https://github.com/MDU-PHL/LisSero) to predict the serogroup of +*L. monocytogenes* isolates. It simulates a PCR assay by detecting specific marker genes +(lmo1118, lmo0737, ORF2110, ORF2819, prs) to assign the isolate to one of the major +molecular serogroups (IIa, IIb, IIc, IVb). + +## Inputs + +``` +record ( + meta: Record, + fna: Path +) +``` + +| Field | Type | Description | +|-------|------|-------------| +| `meta` | `Record` | Groovy Record containing sample information | +| `fna` | `Path` | Assembled contigs in FASTA format | + +## Outputs + +``` +record ( + meta: Record, + tsv: Path, + results: Set, + logs: Set, + nf_logs: Set, + versions: Set +) +``` + +| Field | Type | Description | +|-------|------|-------------| +| `meta` | `Record` | Sample information record | +| `tsv` | `Path` | Tab-delimited LisSero results with predicted serogroup and marker gene detection | +| `results` | `Set` | All output files to be published | +| `logs` | `Set` | Optional program specific log files | +| `nf_logs` | `Set` | Nextflow-specific log files (e.g. .command.{begin|err|log|out|run|sh|trace}) | +| `versions` | `Set` | A YAML formatted file with program versions | + +## Parameters + +### LisSero Parameters + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--lissero_min_id` | number | `95.0` | Minimum percent identity to accept a match | +| `--lissero_min_cov` | number | `95.0` | Minimum coverage of the gene to accept a match | + +## Used By + +### Subworkflows + +- [lissero](/developers/subworkflows/lissero) - In silico serotype prediction for Listeria monocytogenes. + +### Workflows + +- [lissero](/bactopia-tools/lissero) - Serogroup typing prediction for Listeria monocytogenes. + +## Citations + +If you use this in your analysis, please cite the following. + +- [Bactopia](https://bactopia.github.io/) + Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) + +- [LisSero](https://github.com/MDU-PHL/LisSero) + Kwong J, Zhang J, Seeman T, Horan, K, Gonçalves da Silva A [LisSero - _In silico_ serotype prediction for _Listeria monocytogenes_](https://github.com/MDU-PHL/LisSero) (GitHub) + +## Source + +[View source on GitHub](https://github.com/bactopia/bactopia/tree/main/modules/lissero) + +## Version + +```yaml +LISSERO: + - lissero: 0.4.10 +``` diff --git a/developers/modules/mash_dist.mdx b/developers/modules/mash_dist.mdx new file mode 100644 index 00000000..f91630ac --- /dev/null +++ b/developers/modules/mash_dist.mdx @@ -0,0 +1,106 @@ +--- +title: mash_dist +description: "Calculate genomic distances using MinHash sketches." +tags: + - mash + - distance + - minhash + - ani + - comparison + - taxonomy + - sample-scope +--- + +# mash_dist + +**Tags:** mash distance minhash ani comparison taxonomy sample-scope + +Calculate genomic distances using MinHash sketches. + +Uses [Mash](https://github.com/marbl/Mash) to compute the distance between query sequences +and a reference database. It uses MinHash sketches to rapidly estimate the Jaccard index, +providing a fast approximation of Average Nucleotide Identity (ANI). + +## Inputs + +``` +record ( + meta: Record, + fna: Path +) +``` + +| Field | Type | Description | +|-------|------|-------------| +| `meta` | `Record` | Groovy Record containing sample information | +| `fna` | `Path` | FASTA, FASTQ, or Mash sketch file to be queried | + +``` +reference: Path +``` + +| Name | Type | Description | +|------|------|-------------| +| `reference` | `Path` | The reference file (FASTA, FASTQ, or Mash sketch) to compare against | + +## Outputs + +``` +record ( + meta: Record, + dist: Path, + results: Set, + logs: Set, + nf_logs: Set, + versions: Set +) +``` + +| Field | Type | Description | +|-------|------|-------------| +| `meta` | `Record` | Sample information record | +| `dist` | `Path` | A tab-delimited summary of the Mash distances and p-values | +| `results` | `Set` | All output files to be published | +| `logs` | `Set` | Optional program specific log files | +| `nf_logs` | `Set` | Nextflow-specific log files (e.g. .command.{begin|err|log|out|run|sh|trace}) | +| `versions` | `Set` | A YAML formatted file with program versions | + +## Parameters + +### mashdist Parameters + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--mash_sketch` | string | | The reference sequence as a Mash Sketch (.msh file) | +| `--full_merlin` | boolean | `false` | Go full Merlin and run all species-specific tools, no matter the Mash distance | + +## Used By + +### Subworkflows + +- [mashdist](/developers/subworkflows/mashdist) - Calculate Mash distances between sequences and a reference. + +### Workflows + +- [mashdist](/bactopia-tools/mashdist) - Calculate Mash distances between sequences and reference genomes. + +## Citations + +If you use this in your analysis, please cite the following. + +- [Bactopia](https://bactopia.github.io/) + Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) + +- [Mash](https://github.com/marbl/Mash) + Ondov BD, Treangen TJ, Melsted P, Mallonee AB, Bergman NH, Koren S, Phillippy AM [Mash: fast genome and metagenome distance estimation using MinHash](http://dx.doi.org/10.1186/s13059-016-0997-x). _Genome Biol_ 17, 132 (2016) + +## Source + +[View source on GitHub](https://github.com/bactopia/bactopia/tree/main/modules/mash/dist) + +## Version + +```yaml +MASH_DIST: + - mash: 2.3 +``` diff --git a/developers/modules/mashtree.mdx b/developers/modules/mashtree.mdx new file mode 100644 index 00000000..5ca800d2 --- /dev/null +++ b/developers/modules/mashtree.mdx @@ -0,0 +1,107 @@ +--- +title: mashtree +description: "Rapid alignment-free phylogenomic tree construction." +tags: + - phylogeny + - tree + - mash + - minhash + - alignment-free + - distance + - clustering + - neighbor-joining + - run-scope +--- + +# mashtree + +**Tags:** phylogeny tree mash minhash alignment-free distance clustering neighbor-joining run-scope + +Rapid alignment-free phylogenomic tree construction. + +Uses [Mashtree](https://github.com/lskatz/mashtree) to create a phylogenetic tree +from genome sequences (FASTA, FASTQ, or GenBank) using MinHash distances. It computes +pairwise distances between all inputs and uses the Neighbor-Joining algorithm to +cluster genomes, effectively creating a "distance-based" tree without full alignment. + +## Inputs + +``` +record ( + meta: Record, + fna: Set +) +``` + +| Field | Type | Description | +|-------|------|-------------| +| `meta` | `Record` | Groovy Record containing sample information | +| `fna` | `Set` | Assembled contigs in FASTA format | + +## Outputs + +``` +record ( + meta: Record, + nwk: Path, + tsv: Path, + sketches: Set, + results: Set, + logs: Set, + nf_logs: Set, + versions: Set +) +``` + +| Field | Type | Description | +|-------|------|-------------| +| `meta` | `Record` | Sample information record | +| `nwk` | `Path` | The final phylogenetic tree in Newick format (*.dnd) | +| `tsv` | `Path` | The pairwise distance matrix used to build the tree (*.tsv) | +| `sketches` | `Set` | Directory containing the individual Mash sketches | +| `results` | `Set` | All output files to be published | +| `logs` | `Set` | Optional program specific log files | +| `nf_logs` | `Set` | Nextflow-specific log files (e.g. .command.{begin|err|log|out|run|sh|trace}) | +| `versions` | `Set` | A YAML formatted file with program versions | + +## Parameters + +### Mashtree Parameters + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--mashtree_sortorder` | string | `ABC` | For neighbor-joining, the sort order can make a difference. (choices: `ABC`, `random`, `input-order`) | +| `--mashtree_genomesize` | integer | `5000000` | Genome size of the input samples | +| `--mashtree_mindepth` | integer | `5` | If mindepth is zero, then it will be chosen in a smart but slower method, to discard lower-abundance kmers. | +| `--mashtree_kmerlength` | integer | `21` | Hashes will be based on strings of this many nucleotides | + +## Used By + +### Subworkflows + +- [mashtree](/developers/subworkflows/mashtree) - Create phylogenetic trees using Mash distances. + +### Workflows + +- [mashtree](/bactopia-tools/mashtree) - Rapid phylogenetic tree construction using Mash distances. + +## Citations + +If you use this in your analysis, please cite the following. + +- [Bactopia](https://bactopia.github.io/) + Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) + +- [Mashtree](https://github.com/lskatz/mashtree) + Katz LS, Griswold T, Morrison S, Caravas J, Zhang S, den Bakker HC, Deng X, Carleton HA [Mashtree: a rapid comparison of whole genome sequence files.](https://doi.org/10.21105/joss.01762) _Journal of Open Source Software_, 4(44), 1762 (2019) + +## Source + +[View source on GitHub](https://github.com/bactopia/bactopia/tree/main/modules/mashtree) + +## Version + +```yaml +MASHTREE: + - mashtree: 1.4.6 +``` diff --git a/developers/modules/mcroni.mdx b/developers/modules/mcroni.mdx new file mode 100644 index 00000000..a5a5b4ac --- /dev/null +++ b/developers/modules/mcroni.mdx @@ -0,0 +1,95 @@ +--- +title: mcroni +description: "Detect sequence variations in the *mcr-1* colistin resistance gene." +tags: + - bacteria + - amr + - resistance + - colistin + - mcr-1 + - plasmid + - variation + - sample-scope +--- + +# mcroni + +**Tags:** bacteria amr resistance colistin mcr-1 plasmid variation sample-scope + +Detect sequence variations in the *mcr-1* colistin resistance gene. + +Uses [Mcroni](https://github.com/tseemann/mcroni) to screen genome assemblies for the +*mcr-1* gene (Mobilized Colistin Resistance). It extracts the gene sequence and reports +any variations (mutations) relative to the reference, which is critical for tracking +resistance to colistin, a last-resort antibiotic. + +## Inputs + +``` +record ( + meta: Record, + fna: Path +) +``` + +| Field | Type | Description | +|-------|------|-------------| +| `meta` | `Record` | Groovy Record containing sample information | +| `fna` | `Path` | Assembled contigs in FASTA format | + +## Outputs + +``` +record ( + meta: Record, + tsv: Path, + fa: Path?, + results: Set, + logs: Set, + nf_logs: Set, + versions: Set +) +``` + +| Field | Type | Description | +|-------|------|-------------| +| `meta` | `Record` | Sample information record | +| `tsv` | `Path` | Tab-delimited mcr-1 gene variation results | +| `fa` | `Path?` | Extracted mcr-1 gene sequence in FASTA format | +| `results` | `Set` | All output files to be published | +| `logs` | `Set` | Optional program specific log files | +| `nf_logs` | `Set` | Nextflow-specific log files (e.g. .command.{begin|err|log|out|run|sh|trace}) | +| `versions` | `Set` | A YAML formatted file with program versions | + +## Parameters + +## Used By + +### Subworkflows + +- [mcroni](/developers/subworkflows/mcroni) - Scripts for finding and processing promoter variants upstream of mcr-1. + +### Workflows + +- [mcroni](/bactopia-tools/mcroni) - Sequence variation analysis of mcr-1 genes (mobilized colistin resistance). + +## Citations + +If you use this in your analysis, please cite the following. + +- [Bactopia](https://bactopia.github.io/) + Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) + +- [mcroni](https://github.com/liampshaw/mcroni) + Shaw L [mcroni: Scripts for finding and processing promoter variants upstream of mcr-1](https://github.com/liampshaw/mcroni) (GitHub) + +## Source + +[View source on GitHub](https://github.com/bactopia/bactopia/tree/main/modules/mcroni) + +## Version + +```yaml +MCRONI: + - mcroni: 1.0.4 +``` diff --git a/developers/modules/meningotype.mdx b/developers/modules/meningotype.mdx new file mode 100644 index 00000000..6687fa6a --- /dev/null +++ b/developers/modules/meningotype.mdx @@ -0,0 +1,105 @@ +--- +title: meningotype +description: "Serotyping and finetyping of *Neisseria meningitidis*." +tags: + - bacteria + - neisseria-meningitidis + - serotype + - serogroup + - finetyping + - pora + - feta + - capsule + - sample-scope +--- + +# meningotype + +**Tags:** bacteria neisseria-meningitidis serotype serogroup finetyping pora feta capsule sample-scope + +Serotyping and finetyping of *Neisseria meningitidis*. + +Uses [Meningotype](https://github.com/MDU-PHL/meningotype) to predict the serogroup (capsule), +PorA variable regions, and FetA variable regions of *N. meningitidis* assemblies. This provides +a comprehensive molecular typing profile (e.g., B:P1.7-2,4:F1-5) used in surveillance. + +## Inputs + +``` +record ( + meta: Record, + fna: Path +) +``` + +| Field | Type | Description | +|-------|------|-------------| +| `meta` | `Record` | Groovy Record containing sample information | +| `fna` | `Path` | Assembled contigs in FASTA format | + +## Outputs + +``` +record ( + meta: Record, + tsv: Path, + results: Set, + logs: Set, + nf_logs: Set, + versions: Set +) +``` + +| Field | Type | Description | +|-------|------|-------------| +| `meta` | `Record` | Sample information record | +| `tsv` | `Path` | Tab-delimited meningotype results with serogroup, PorA, and FetA predictions | +| `results` | `Set` | All output files to be published | +| `logs` | `Set` | Optional program specific log files | +| `nf_logs` | `Set` | Nextflow-specific log files (e.g. .command.{begin|err|log|out|run|sh|trace}) | +| `versions` | `Set` | A YAML formatted file with program versions | + +## Parameters + +### meningotype Parameters + +You can use these parameters to fine-tune your meningotype analysis + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--meningotype_finetype` | boolean | `false` | perform porA and fetA fine typing | +| `--meningotype_porB` | boolean | `false` | perform porB sequence typing (NEIS2020) | +| `--meningotype_bast` | boolean | `false` | perform Bexsero antigen sequence typing (BAST) | +| `--meningotype_mlst` | boolean | `false` | perform MLST | +| `--meningotype_all` | boolean | `false` | perform MLST, porA, fetA, porB, BAST typing | + +## Used By + +### Subworkflows + +- [meningotype](/developers/subworkflows/meningotype) - Predict serotypes of Neisseria meningitidis from genome assemblies. + +### Workflows + +- [meningotype](/bactopia-tools/meningotype) - Comprehensive typing of Neisseria meningitidis. + +## Citations + +If you use this in your analysis, please cite the following. + +- [Bactopia](https://bactopia.github.io/) + Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) + +- [meningotype](https://github.com/MDU-PHL/meningotype) + Kwong JC, Gonçalves da Silva A, Stinear TP, Howden BP, & Seemann T [meningotype: in silico typing for _Neisseria meningitidis_.](https://github.com/MDU-PHL/meningotype) (GitHub) + +## Source + +[View source on GitHub](https://github.com/bactopia/bactopia/tree/main/modules/meningotype) + +## Version + +```yaml +MENINGOTYPE: + - meningotype: 0.8.6b +``` diff --git a/developers/modules/merlin_dist.mdx b/developers/modules/merlin_dist.mdx new file mode 100644 index 00000000..c05e0315 --- /dev/null +++ b/developers/modules/merlin_dist.mdx @@ -0,0 +1,148 @@ +--- +title: merlin_dist +description: "Identify species to trigger genus-specific downstream analyses (Merlin)." +tags: + - merlin + - mash + - routing + - logic + - genus-specific + - automation + - sample-scope +--- + +# merlin_dist + +**Tags:** merlin mash routing logic genus-specific automation sample-scope + +Identify species to trigger genus-specific downstream analyses (Merlin). + +This is a specialized process for the [Merlin](https://bactopia.github.io/latest/bactopia-tools/merlin/) +workflow. It runs `mash dist` against a reference database and parses the results to detect +specific genera (e.g., *Salmonella*, *Staphylococcus*). Based on the detected genus, it +outputs data into specific channels to trigger targeted tools (e.g., finding *Salmonella* triggers Sistr). + +## Inputs + +``` +record ( + meta: Record, + fna: Path, + r1: Path?, + r2: Path?, + se: Path?, + lr: Path? +) +``` + +| Field | Type | Description | +|-------|------|-------------| +| `meta` | `Record` | Groovy Record containing sample information | +| `fna` | `Path` | Assembled contigs in FASTA format | +| `r1` | `Path?` | Illumina R1 reads (paired-end) | +| `r2` | `Path?` | Illumina R2 reads (paired-end) | +| `se` | `Path?` | Single-end Illumina reads | +| `lr` | `Path?` | Long reads (ONT/PacBio) | + +``` +reference: Path +``` + +| Name | Type | Description | +|------|------|-------------| +| `reference` | `Path` | The reference Mash database to screen against | + +## Outputs + +``` +record ( + meta: Record, + fna: Path, + r1: Path, + r2: Path, + se: Path, + lr: Path, + escherichia: Path?, + haemophilus: Path?, + klebsiella: Path?, + legionella: Path?, + listeria: Path?, + mycobacterium: Path?, + neisseria: Path?, + pseudomonas: Path?, + salmonella: Path?, + staphylococcus: Path?, + streptococcus: Path?, + genus: Set, + dist: Path, + results: Set, + logs: Set, + nf_logs: Set, + versions: Set +) +``` + +| Field | Type | Description | +|-------|------|-------------| +| `meta` | `Record` | Sample information record | +| `fna` | `Path` | Passthrough of assembled contigs | +| `r1` | `Path` | Passthrough of Illumina R1 reads | +| `r2` | `Path` | Passthrough of Illumina R2 reads | +| `se` | `Path` | Passthrough of single-end reads | +| `lr` | `Path` | Passthrough of long reads | +| `escherichia` | `Path?` | Conditional marker file triggering Escherichia analysis tools | +| `haemophilus` | `Path?` | Conditional marker file triggering Haemophilus analysis tools | +| `klebsiella` | `Path?` | Conditional marker file triggering Klebsiella analysis tools | +| `legionella` | `Path?` | Conditional marker file triggering Legionella analysis tools | +| `listeria` | `Path?` | Conditional marker file triggering Listeria analysis tools | +| `mycobacterium` | `Path?` | Conditional marker file triggering Mycobacterium analysis tools | +| `neisseria` | `Path?` | Conditional marker file triggering Neisseria analysis tools | +| `pseudomonas` | `Path?` | Conditional marker file triggering Pseudomonas analysis tools | +| `salmonella` | `Path?` | Conditional marker file triggering Salmonella analysis tools | +| `staphylococcus` | `Path?` | Conditional marker file triggering Staphylococcus analysis tools | +| `streptococcus` | `Path?` | Conditional marker file triggering Streptococcus analysis tools | +| `genus` | `Set` | Marker file indicating the detected genus | +| `dist` | `Path` | Raw Mash distance results | +| `results` | `Set` | All output files to be published | +| `logs` | `Set` | Optional program specific log files | +| `nf_logs` | `Set` | Nextflow-specific log files (e.g. .command.{begin|err|log|out|run|sh|trace}) | +| `versions` | `Set` | A YAML formatted file with program versions | + +## Parameters + +### mashdist Parameters + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--mash_sketch` | string | | The reference sequence as a Mash Sketch (.msh file) | +| `--full_merlin` | boolean | `false` | Go full Merlin and run all species-specific tools, no matter the Mash distance | + +## Used By + +### Subworkflows + +- [merlindist](/developers/subworkflows/merlindist) - Identify species from assembly and read data using Mash distances. + +## Citations + +If you use this in your analysis, please cite the following. + +- [Bactopia](https://bactopia.github.io/) + Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) + +- [Mash](https://github.com/marbl/Mash) + Ondov BD, Treangen TJ, Melsted P, Mallonee AB, Bergman NH, Koren S, Phillippy AM [Mash: fast genome and metagenome distance estimation using MinHash](http://dx.doi.org/10.1186/s13059-016-0997-x). _Genome Biol_ 17, 132 (2016) + +- [NCBI RefSeq Database](https://www.ncbi.nlm.nih.gov/refseq/) + O'Leary NA, Wright MW, Brister JR, Ciufo S, Haddad D, McVeigh R, Rajput B, Robbertse B, Smith-White B, Ako-Adjei D, Astashyn A, Badretdin A, Bao Y, Blinkova O0, Brover V, Chetvernin V, Choi J, Cox E, Ermolaeva O, Farrell CM, Goldfarb T, Gupta T, Haft D, Hatcher E, Hlavina W, Joardar VS, Kodali VK, Li W, Maglott D, Masterson P, McGarvey KM, Murphy MR, O'Neill K, Pujar S, Rangwala SH, Rausch D, Riddick LD, Schoch C, Shkeda A, Storz SS, Sun H, Thibaud-Nissen F, Tolstoy I, Tully RE, Vatsan AR, Wallin C, Webb D, Wu W, Landrum MJ, Kimchi A, Tatusova T, DiCuccio M, Kitts P, Murphy TD, Pruitt KD [Reference sequence (RefSeq) database at NCBI: current status, taxonomic expansion, and functional annotation.](https://doi.org/10.1093/nar/gkv1189) _Nucleic Acids Res._ 44, D733-45 (2016) + +## Source + +[View source on GitHub](https://github.com/bactopia/bactopia/tree/main/modules/merlin/dist) + +## Version + +```yaml +MERLIN_DIST: + - mash: 2.3 +``` diff --git a/developers/modules/midas_download.mdx b/developers/modules/midas_download.mdx new file mode 100644 index 00000000..17d47df9 --- /dev/null +++ b/developers/modules/midas_download.mdx @@ -0,0 +1,83 @@ +--- +title: midas_download +description: "Download the MIDAS reference database." +tags: + - midas + - download + - database + - metagenomics + - species + - run-scope +--- + +# midas_download + +**Tags:** midas download database metagenomics species run-scope + +Download the MIDAS reference database. + +Fetches the pre-compiled database required by [MIDAS](https://github.com/snayfach/MIDAS) +for metagenomic species profiling. The database contains reference genomes from the +UHGG collection used for species identification and abundance estimation. + +:::note[Internet & Storage Required] +This process requires an active internet connection and significant disk space +to store the database files (~3.3GB compressed, ~7GB uncompressed). +::: + +## Outputs + +``` +record ( + db: Path?, + db_tarball: Path?, + logs: Set +) +``` + +| Field | Type | Description | +|-------|------|-------------| +| `db` | `Path?` | The MIDAS database directory containing reference genome data | +| `db_tarball` | `Path?` | A compressed tarball of the database (if requested via parameters) | +| `logs` | `Set` | Optional program specific log files | + +## Parameters + +### MIDAS Database Download Parameters + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--midas_db` | string | | A single tarball or path to a MIDAS formatted database | +| `--midas_save_as_tarball` | boolean | `false` | Save the MIDAS database as a tarball | +| `--download_midas` | boolean | `false` | Download the MIDAS database to the path given by --midas_db | + +## Used By + +### Subworkflows + +- [midas](/developers/subworkflows/midas) - Species-level profiling from metagenomic data. + +### Workflows + +- [midas](/bactopia-tools/midas) - Estimate species abundances from metagenomic samples. + +## Citations + +If you use this in your analysis, please cite the following. + +- [Bactopia](https://bactopia.github.io/) + Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) + +- [MIDAS](https://github.com/snayfach/MIDAS) + Nayfach S, Rodriguez-Mueller B, Garud N, and Pollard KS [An integrated metagenomics pipeline for strain profiling reveals novel patterns of bacterial transmission and biogeography.](https://doi.org/10.1101/gr.201863.115) _Genome Research_, 26(11), 1612-1625. (2016) + +## Source + +[View source on GitHub](https://github.com/bactopia/bactopia/tree/main/modules/midas/download) + +## Version + +```yaml +MIDAS_DOWNLOAD: + - gnu-wget: 1.18 +``` diff --git a/developers/modules/midas_species.mdx b/developers/modules/midas_species.mdx new file mode 100644 index 00000000..a9a6a304 --- /dev/null +++ b/developers/modules/midas_species.mdx @@ -0,0 +1,124 @@ +--- +title: midas_species +description: "Estimate bacterial species abundance from metagenomic reads." +tags: + - metagenomics + - abundance + - species + - midas + - marker-genes + - diversity + - sample-scope +--- + +# midas_species + +**Tags:** metagenomics abundance species midas marker-genes diversity sample-scope + +Estimate bacterial species abundance from metagenomic reads. + +Uses [MIDAS](https://github.com/snayfach/MIDAS) (Metagenomic Intra-Species Diversity Analysis System) +to estimate the abundance of bacterial species in metagenomic data. It maps reads to a database +of universal single-copy marker genes (15 genes) to provide accurate coverage and relative +abundance estimates. + +Uses explicit positional record fields for reads: +- Input: record(meta, r1, r2, se) where each read slot is Path? + +:::note[Database Required] +Requires a compatible MIDAS database (containing marker gene sequences and taxonomy). +::: + +## Inputs + +``` +record ( + meta: Record, + r1: Path?, + r2: Path?, + se: Path? +) +``` + +| Field | Type | Description | +|-------|------|-------------| +| `meta` | `Record` | Groovy Record containing sample information | +| `r1` | `Path?` | Illumina R1 reads (paired-end) | +| `r2` | `Path?` | Illumina R2 reads (paired-end) | +| `se` | `Path?` | Single-end Illumina reads | + +``` +db: Path +``` + +| Name | Type | Description | +|------|------|-------------| +| `db` | `Path` | Directory containing the MIDAS database | + +## Outputs + +``` +record ( + meta: Record, + tsv: Path, + abundances: Path, + adjusted_abundances: Path, + results: Set, + logs: Set, + nf_logs: Set, + versions: Set +) +``` + +| Field | Type | Description | +|-------|------|-------------| +| `meta` | `Record` | Sample information record | +| `tsv` | `Path` | A tab-delimited summary of species abundance and coverage | +| `abundances` | `Path` | Detailed species abundance profile (*.abundances.txt) | +| `adjusted_abundances` | `Path` | Relative abundance estimates adjusted for genome size (*.adjusted.abundances.txt) | +| `results` | `Set` | All output files to be published | +| `logs` | `Set` | Optional program specific log files | +| `nf_logs` | `Set` | Nextflow-specific log files (e.g. .command.{begin|err|log|out|run|sh|trace}) | +| `versions` | `Set` | A YAML formatted file with program versions | + +## Parameters + +### MIDAS Species Parameters + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--midas_word_size` | integer | `28` | Word size for BLAST search | +| `--midas_aln_cov` | number | `0.75` | Discard reads with alignment coverage < ALN_COV | +| `--midas_opts` | string | | Extra MIDAS options | +| `--midas_debug` | boolean | `false` | Keep all temporary files created by MIDAS | + +## Used By + +### Subworkflows + +- [midas](/developers/subworkflows/midas) - Species-level profiling from metagenomic data. + +### Workflows + +- [midas](/bactopia-tools/midas) - Estimate species abundances from metagenomic samples. + +## Citations + +If you use this in your analysis, please cite the following. + +- [Bactopia](https://bactopia.github.io/) + Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) + +- [MIDAS](https://github.com/snayfach/MIDAS) + Nayfach S, Rodriguez-Mueller B, Garud N, and Pollard KS [An integrated metagenomics pipeline for strain profiling reveals novel patterns of bacterial transmission and biogeography.](https://doi.org/10.1101/gr.201863.115) _Genome Research_, 26(11), 1612-1625. (2016) + +## Source + +[View source on GitHub](https://github.com/bactopia/bactopia/tree/main/modules/midas/species) + +## Version + +```yaml +MIDAS_SPECIES: + - midas: 1.3.2 +``` diff --git a/developers/modules/mlst.mdx b/developers/modules/mlst.mdx new file mode 100644 index 00000000..782d7dbf --- /dev/null +++ b/developers/modules/mlst.mdx @@ -0,0 +1,116 @@ +--- +title: mlst +description: "Automatic Multi-Locus Sequence Typing (MLST) of genome assemblies." +tags: + - bacteria + - typing + - mlst + - sequence-type + - pubmlst + - alleles + - sample-scope +--- + +# mlst + +**Tags:** bacteria typing mlst sequence-type pubmlst alleles sample-scope + +Automatic Multi-Locus Sequence Typing (MLST) of genome assemblies. + +Uses [mlst](https://github.com/tseemann/mlst) to scan genome assemblies against traditional +PubMLST schemes. It automatically detects the likely species scheme, identifies the alleles +for the 7 housekeeping genes, and assigns a Sequence Type (ST). + +:::note[Database Required] +Requires the MLST database (derived from PubMLST) to be available. +::: + +## Inputs + +``` +record ( + meta: Record, + fna: Path +) +``` + +| Field | Type | Description | +|-------|------|-------------| +| `meta` | `Record` | Groovy Record containing sample information | +| `fna` | `Path` | Assembled contigs in FASTA format | + +``` +db: Path +``` + +| Name | Type | Description | +|------|------|-------------| +| `db` | `Path` | Directory or compressed tarball containing the MLST database schemes | + +## Outputs + +``` +record ( + meta: Record, + tsv: Path, + results: Set, + logs: Set, + nf_logs: Set, + versions: Set +) +``` + +| Field | Type | Description | +|-------|------|-------------| +| `meta` | `Record` | Sample information record | +| `tsv` | `Path` | A tab-delimited summary containing the Sample, Scheme, ST, and Allele IDs | +| `results` | `Set` | All output files to be published | +| `logs` | `Set` | Optional program specific log files | +| `nf_logs` | `Set` | Nextflow-specific log files (e.g. .command.{begin|err|log|out|run|sh|trace}) | +| `versions` | `Set` | A YAML formatted file with program versions | + +## Parameters + +### MLST Parameters + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--mlst_scheme` | string | | Don't autodetect, force this scheme on all inputs | +| `--mlst_minid` | integer | `95` | Minimum DNA percent identity of full allele to consider 'similar' | +| `--mlst_mincov` | integer | `10` | Minimum DNA percent coverage to report partial allele at all | +| `--mlst_minscore` | integer | `50` | Minimum score out of 100 to match a scheme | +| `--mlst_nopath` | boolean | `false` | Strip filename paths from FILE column | +| `--mlst_db` | string | | A custom MLST database to use, either a tarball or a directory | + +## Used By + +### Subworkflows + +- [mlst](/developers/subworkflows/mlst) - Determine multilocus sequence types (MLST) from bacterial assemblies. + +### Workflows + +- [bactopia](/full-guide) - Comprehensive bacterial analysis pipeline for complete genomic characterization. +- [mlst](/bactopia-tools/mlst) - Automatic Multi-Locus Sequence Type (MLST) calling from assembled contigs. +- [staphopia](/bactopia-pipelines/staphopia) - Comprehensive analysis pipeline for Staphylococcus aureus isolates. + +## Citations + +If you use this in your analysis, please cite the following. + +- [Bactopia](https://bactopia.github.io/) + Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) + +- [mlst](https://github.com/tseemann/mlst) + Seemann T [mlst: scan contig files against PubMLST typing schemes](https://github.com/tseemann/mlst) (GitHub) + +## Source + +[View source on GitHub](https://github.com/bactopia/bactopia/tree/main/modules/mlst) + +## Version + +```yaml +MLST: + - mlst: 2.33.1 +``` diff --git a/developers/modules/mobsuite_recon.mdx b/developers/modules/mobsuite_recon.mdx new file mode 100644 index 00000000..50bcb763 --- /dev/null +++ b/developers/modules/mobsuite_recon.mdx @@ -0,0 +1,106 @@ +--- +title: mobsuite_recon +description: "Reconstruct and type plasmids from a bacterial genome assembly." +tags: + - bacteria + - plasmid + - reconstruction + - mobtyper + - replicon + - contigs + - assembly + - sample-scope +--- + +# mobsuite_recon + +**Tags:** bacteria plasmid reconstruction mobtyper replicon contigs assembly sample-scope + +Reconstruct and type plasmids from a bacterial genome assembly. + +Uses [MobSuite's mob_recon](https://github.com/phac-nml/mob-suite) to reconstruct plasmids +by grouping relevant contigs. It then uses the Mob-typer component to classify the plasmids +based on replicon type, incompatibility group (Inc type), and predicted mobility. + +## Inputs + +``` +record ( + meta: Record, + fna: Path +) +``` + +| Field | Type | Description | +|-------|------|-------------| +| `meta` | `Record` | Groovy Record containing sample information | +| `fna` | `Path` | Assembled contigs in FASTA format | + +## Outputs + +``` +record ( + meta: Record, + chromosome: Path, + contig_report: Path, + txt: Path?, + plasmids: Set, + results: Set, + logs: Set, + nf_logs: Set, + versions: Set +) +``` + +| Field | Type | Description | +|-------|------|-------------| +| `meta` | `Record` | Sample information record | +| `chromosome` | `Path` | Chromosomal sequences separated from plasmid contigs (gzipped FASTA) | +| `contig_report` | `Path` | Tab-delimited report assigning each contig to chromosome or plasmid | +| `txt` | `Path?` | MOB-typer results with replicon type, mobility, and incompatibility group | +| `plasmids` | `Set` | Reconstructed plasmid sequences in gzipped FASTA format | +| `results` | `Set` | All output files to be published | +| `logs` | `Set` | Optional program specific log files | +| `nf_logs` | `Set` | Nextflow-specific log files (e.g. .command.{begin|err|log|out|run|sh|trace}) | +| `versions` | `Set` | A YAML formatted file with program versions | + +## Parameters + +### MOB-suite Recon Parameters + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--mobsuite_max_contig_size` | integer | `310000` | Maximum size of a contig to be considered a plasmid | +| `--mobsuite_min_contig_size` | integer | `1000` | Minimum length of contigs to classify | +| `--mobsuite_max_plasmid_size` | integer | `350000` | Maximum size of a reconstructed plasmid | + +## Used By + +### Subworkflows + +- [mobsuite](/developers/subworkflows/mobsuite) - Reconstruct and type plasmids from bacterial genome assemblies. + +### Workflows + +- [mobsuite](/bactopia-tools/mobsuite) - Reconstruction and annotation of plasmids from bacterial genome assemblies. + +## Citations + +If you use this in your analysis, please cite the following. + +- [Bactopia](https://bactopia.github.io/) + Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) + +- [MOB-suite](https://github.com/phac-nml/mob-suite) + Robertson J, Nash JHE [MOB-suite: software tools for clustering, reconstruction and typing of plasmids from draft assemblies.](https://doi.org/10.1099/mgen.0.000206) _Microbial Genomics_ 4(8). (2018) + +## Source + +[View source on GitHub](https://github.com/bactopia/bactopia/tree/main/modules/mobsuite/recon) + +## Version + +```yaml +MOBSUITE_RECON: + - mob_suite: 3.1.9 +``` diff --git a/developers/modules/mykrobe_predict.mdx b/developers/modules/mykrobe_predict.mdx new file mode 100644 index 00000000..8e3b9402 --- /dev/null +++ b/developers/modules/mykrobe_predict.mdx @@ -0,0 +1,123 @@ +--- +title: mykrobe_predict +description: "Predict Antimicrobial Resistance (AMR) for supported bacterial species." +tags: + - amr + - resistance + - susceptibility + - k-mer + - fastq + - bam + - mykrobe + - sample-scope +--- + +# mykrobe_predict + +**Tags:** amr resistance susceptibility k-mer fastq bam mykrobe sample-scope + +Predict Antimicrobial Resistance (AMR) for supported bacterial species. + +Uses [Mykrobe](https://github.com/mykrobe/mykrobe) to quickly predict resistance and susceptibility +based on short reads (FASTQ) or aligned reads (BAM). It maps k-mers from the input sequences +against a curated database of resistance markers for species like *M. tuberculosis* and *S. aureus*. + +Uses explicit positional record fields for reads: +- Input: record(meta, r1, r2, se, lr) where each read slot is Path? + +## Inputs + +``` +record ( + meta: Record, + r1: Path?, + r2: Path?, + se: Path?, + lr: Path? +) +``` + +| Field | Type | Description | +|-------|------|-------------| +| `meta` | `Record` | Groovy Record containing sample information | +| `r1` | `Path?` | Illumina R1 reads (paired-end) | +| `r2` | `Path?` | Illumina R2 reads (paired-end) | +| `se` | `Path?` | Single-end Illumina reads | +| `lr` | `Path?` | Long reads (ONT/PacBio) | + +``` +species: String +``` + +| Name | Type | Description | +|------|------|-------------| +| `species` | `String` | The target species for which to make the AMR prediction (e.g., "tb" or "staph") | + +## Outputs + +``` +record ( + meta: Record, + csv: Path, + json: Path, + results: Set, + logs: Set, + nf_logs: Set, + versions: Set +) +``` + +| Field | Type | Description | +|-------|------|-------------| +| `meta` | `Record` | Sample information record | +| `csv` | `Path` | AMR predictions in machine-readable CSV format | +| `json` | `Path` | Detailed AMR prediction results in JSON format | +| `results` | `Set` | All output files to be published | +| `logs` | `Set` | Optional program specific log files | +| `nf_logs` | `Set` | Nextflow-specific log files (e.g. .command.{begin|err|log|out|run|sh|trace}) | +| `versions` | `Set` | A YAML formatted file with program versions | + +## Parameters + +### Mykrobe Parameters + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--mykrobe_species` | string | | Species panel to use (choices: `sonnei`, `staph`, `tb`, `typhi`) | +| `--mykrobe_opts` | string | | Extra Mykrobe options in quotes | + +## Used By + +### Subworkflows + +- [genotyphi](/developers/subworkflows/genotyphi) - Assign genotypes to Salmonella Typhi genomes. +- [mykrobe](/developers/subworkflows/mykrobe) - Predict antibiotic resistance from sequence reads. + +### Workflows + +- [genotyphi](/bactopia-tools/genotyphi) - Salmonella Typhi genotyping with lineage assignment. +- [mykrobe](/bactopia-tools/mykrobe) - Antimicrobial resistance detection for specific bacterial species. + +## Citations + +If you use this in your analysis, please cite the following. + +- [Bactopia](https://bactopia.github.io/) + Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) + +- [Mykrobe](https://github.com/Mykrobe-tools/mykrobe) + Hunt M, Bradley P, Lapierre SG, Heys S, Thomsit M, Hall MB, Malone KM, Wintringer P, Walker TM, Cirillo DM, Comas I, Farhat MR, Fowler P, Gardy J, Ismail N, Kohl TA, Mathys V, Merker M, Niemann S, Omar SV, Sintchenko V, Smith G, Supply P, Tahseen S, Wilcox M, Arandjelovic I, Peto TEA, Crook, DW, Iqbal Z [Antibiotic resistance prediction for Mycobacterium tuberculosis from genome sequence data with Mykrobe](https://doi.org/10.12688/wellcomeopenres.15603.1) _Wellcome Open Research_ 4, 191. (2019) + +- [McCortex](https://github.com/mcveanlab/mccortex) + Turner I, Garimella KV, Iqbal Z, McVean G [Integrating long-range connectivity information into de Bruijn graphs.](http://dx.doi.org/10.1093/bioinformatics/bty157) _Bioinformatics_ 34, 2556-2565 (2018) + +## Source + +[View source on GitHub](https://github.com/bactopia/bactopia/tree/main/modules/mykrobe/predict) + +## Version + +```yaml +MYKROBE_PREDICT: + - mykrobe: 0.13.0 +``` diff --git a/developers/modules/ncbigenomedownload.mdx b/developers/modules/ncbigenomedownload.mdx new file mode 100644 index 00000000..fa9e4171 --- /dev/null +++ b/developers/modules/ncbigenomedownload.mdx @@ -0,0 +1,128 @@ +--- +title: ncbigenomedownload +description: "Download assemblies and annotation files from NCBI's Assembly database." +tags: + - ncbi + - download + - genome + - assembly + - fasta + - genbank + - utility + - run-scope +--- + +# ncbigenomedownload + +**Tags:** ncbi download genome assembly fasta genbank utility run-scope + +Download assemblies and annotation files from NCBI's Assembly database. + +Uses [ncbi-genome-download](https://github.com/kblin/ncbi-genome-download) to efficiently fetch +one or more complete genome assemblies and their associated annotation and report files from +the NCBI FTP site based on accession numbers, species name, or assembly ID. + +## Inputs + +``` +accessions: Path? +``` + +| Name | Type | Description | +|------|------|-------------| +| `accessions` | `Path?` | A path to a text file containing a list of NCBI Assembly accession numbers (one per line) | + +## Outputs + +``` +record ( + meta: Record, + gbff: Set, + fna: Set, + rm: Set, + features: Set, + gff: Set, + faa: Set, + gpff: Set, + wgs_gbk: Set, + cds: Set, + rna: Set, + rna_fna: Set, + report: Set, + stats: Set, + accessions: Set, + results: Set, + logs: Set, + nf_logs: Set, + versions: Set +) +``` + +| Field | Type | Description | +|-------|------|-------------| +| `meta` | `Record` | Sample information record | +| `gbff` | `Set` | GenBank format of the genomic sequence(s) (*_genomic.gbff.gz) | +| `fna` | `Set` | FASTA format of the genomic nucleotide sequence(s) (*_genomic.fna.gz) | +| `rm` | `Set` | RepeatMasker output for eukaryotes | +| `features` | `Set` | Tab-delimited text file reporting locations and attributes for a subset of features | +| `gff` | `Set` | Annotation of the genomic sequence(s) in GFF3 format (*_genomic.gff.gz) | +| `faa` | `Set` | FASTA format of the accessioned protein products (*_protein.faa.gz) | +| `gpff` | `Set` | GenPept format of the accessioned protein products | +| `wgs_gbk` | `Set` | GenBank flat file format of the WGS master | +| `cds` | `Set` | FASTA format of the nucleotide sequences corresponding to all CDS features | +| `rna` | `Set` | FASTA format of accessioned RNA products | +| `rna_fna` | `Set` | FASTA format of the nucleotide sequences corresponding to all RNA features | +| `report` | `Set` | Tab-delimited text file reporting assembly unit names, roles, and relationships | +| `stats` | `Set` | Tab-delimited text file reporting assembly statistics | +| `accessions` | `Set` | The generated accession list files | +| `results` | `Set` | All output files to be published | +| `logs` | `Set` | Optional program specific log files | +| `nf_logs` | `Set` | Nextflow-specific log files (e.g. .command.{begin|err|log|out|run|sh|trace}) | +| `versions` | `Set` | A YAML formatted file with program versions | + +## Parameters + +### NCBI Genome Download Parameters + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--species` | string | | Name of the species to download assemblies | +| `--accession` | string | | An NCBI Assembly accession to be downloaded | +| `--accessions` | string | | An file of NCBI Assembly accessions (one per line) to be downloaded | +| `--format` | string | `fasta` | Comma separated list of formats to download | +| `--limit` | string | | Limit the number of assemblies to download | +| `--keep_downloads` | boolean | `false` | Save downloaded files into the bactopia-runs folder | + +## Used By + +### Subworkflows + +- [ncbigenomedownload](/developers/subworkflows/ncbigenomedownload) - Download bacterial genomes from NCBI's RefSeq database. + +### Workflows + +- [fastani](/bactopia-tools/fastani) - Fast alignment-free computation of whole-genome Average Nucleotide Identity. +- [mashtree](/bactopia-tools/mashtree) - Rapid phylogenetic tree construction using Mash distances. +- [pangenome](/bactopia-tools/pangenome) - Pangenome analysis with optional core-genome phylogeny. +- [snippy](/bactopia-tools/snippy) - Rapid haplotype variant calling and core genome alignment. + +## Citations + +If you use this in your analysis, please cite the following. + +- [Bactopia](https://bactopia.github.io/) + Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) + +- [ncbi-genome-download](https://github.com/kblin/ncbi-genome-download) + Blin K [ncbi-genome-download: Scripts to download genomes from the NCBI FTP servers](https://github.com/kblin/ncbi-genome-download) (GitHub) + +## Source + +[View source on GitHub](https://github.com/bactopia/bactopia/tree/main/modules/ncbigenomedownload) + +## Version + +```yaml +NCBIGENOMEDOWNLOAD: + - ncbi-genome-download: 0.3.3 +``` diff --git a/developers/modules/ngmaster.mdx b/developers/modules/ngmaster.mdx new file mode 100644 index 00000000..f498be25 --- /dev/null +++ b/developers/modules/ngmaster.mdx @@ -0,0 +1,98 @@ +--- +title: ngmaster +description: "Serotyping and Multi-Antigen Sequence Typing (MAST) of *Neisseria gonorrhoeae*." +tags: + - bacteria + - neisseria-gonorrhoeae + - serotype + - typing + - mast + - porb + - tbpb + - sample-scope +--- + +# ngmaster + +**Tags:** bacteria neisseria-gonorrhoeae serotype typing mast porb tbpb sample-scope + +Serotyping and Multi-Antigen Sequence Typing (MAST) of *Neisseria gonorrhoeae*. + +Uses [NG-MASTER](https://github.com/phac-nml/NG-MASTER) to identify the alleles of the +*porB* and *tbpB* genes in *N. gonorrhoeae* assemblies, which is the basis for the +internationally recognized MAST genotyping scheme. + +## Inputs + +``` +record ( + meta: Record, + fna: Path +) +``` + +| Field | Type | Description | +|-------|------|-------------| +| `meta` | `Record` | Groovy Record containing sample information | +| `fna` | `Path` | Assembled contigs in FASTA format | + +## Outputs + +``` +record ( + meta: Record, + tsv: Path, + results: Set, + logs: Set, + nf_logs: Set, + versions: Set +) +``` + +| Field | Type | Description | +|-------|------|-------------| +| `meta` | `Record` | Sample information record | +| `tsv` | `Path` | Tab-delimited NG-MASTER results with porB and tbpB alleles and sequence type | +| `results` | `Set` | All output files to be published | +| `logs` | `Set` | Optional program specific log files | +| `nf_logs` | `Set` | Nextflow-specific log files (e.g. .command.{begin|err|log|out|run|sh|trace}) | +| `versions` | `Set` | A YAML formatted file with program versions | + +## Parameters + +### ngmaster Parameters + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--ngmaster_csv` | boolean | `false` | output comma-separated format (CSV) rather than tab-separated | + +## Used By + +### Subworkflows + +- [ngmaster](/developers/subworkflows/ngmaster) - Perform multi-antigen sequence typing of Neisseria gonorrhoeae from genome assemblies. + +### Workflows + +- [ngmaster](/bactopia-tools/ngmaster) - Multi-antigen sequence typing of Neisseria gonorrhoeae. + +## Citations + +If you use this in your analysis, please cite the following. + +- [Bactopia](https://bactopia.github.io/) + Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) + +- [ngmaster](https://github.com/MDU-PHL/ngmaster) + Kwong J, Gonçalves da Silva A, Schultz M, Seeman T [ngmaster - _In silico_ multi-antigen sequence typing for _Neisseria gonorrhoeae_ (NG-MAST)](https://github.com/MDU-PHL/ngmaster) (GitHub) + +## Source + +[View source on GitHub](https://github.com/bactopia/bactopia/tree/main/modules/ngmaster) + +## Version + +```yaml +NGMASTER: + - ngmaster: 2.0.0 +``` diff --git a/developers/modules/nohuman_download.mdx b/developers/modules/nohuman_download.mdx new file mode 100644 index 00000000..5b9b48cf --- /dev/null +++ b/developers/modules/nohuman_download.mdx @@ -0,0 +1,82 @@ +--- +title: nohuman_download +description: "Download the nohuman database for human read removal." +tags: + - bacteria + - database + - download + - human + - decontamination + - kraken2 + - nohuman + - run-scope +--- + +# nohuman_download + +**Tags:** bacteria database download human decontamination kraken2 nohuman run-scope + +Download the nohuman database for human read removal. + +Fetches the Kraken2-based database used by [nohuman](https://github.com/mbhall88/nohuman) +to classify and remove human reads from sequencing datasets. The database is built from +Human Pangenome Reference Consortium (HPRC) genomes. + +:::note[Internet & Storage Required] +This process requires an active internet connection and sufficient disk space +to store the Kraken2 database files. +::: + +## Outputs + +``` +record ( + db: Path?, + db_tarball: Path?, + logs: Set +) +``` + +| Field | Type | Description | +|-------|------|-------------| +| `db` | `Path?` | The nohuman Kraken2 database directory | +| `db_tarball` | `Path?` | A compressed tarball of the database (if requested via parameters) | +| `logs` | `Set` | Optional program specific log files | + +## Parameters + +### Nohuman Download Parameters + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--nohuman_db` | string | | Path to the nohuman database or directory to download it to | +| `--nohuman_db_version` | string | | Database version to download (default: latest HPRC release) | +| `--nohuman_save_as_tarball` | boolean | `false` | Save the nohuman database as a tarball | +| `--download_nohuman` | boolean | `false` | Download the nohuman database to the path given by --nohuman_db | + +## Used By + +### Subworkflows + +- [nohuman](/developers/subworkflows/nohuman) - Remove human reads from sequencing data using nohuman. + +## Citations + +If you use this in your analysis, please cite the following. + +- [Bactopia](https://bactopia.github.io/) + Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) + +- [Kraken2](https://github.com/DerrickWood/kraken2) + Wood DE, Lu J, Langmead B [Improved metagenomic analysis with Kraken 2.](https://doi.org/10.1186/s13059-019-1891-0) *Genome Biology*, 20(1), 257. (2019) + +## Source + +[View source on GitHub](https://github.com/bactopia/bactopia/tree/main/modules/nohuman/download) + +## Version + +```yaml +NOHUMAN_DOWNLOAD: + - bactopia-teton: 1.1.3 +``` diff --git a/developers/modules/nohuman_run.mdx b/developers/modules/nohuman_run.mdx new file mode 100644 index 00000000..2957398d --- /dev/null +++ b/developers/modules/nohuman_run.mdx @@ -0,0 +1,126 @@ +--- +title: nohuman_run +description: "Remove human reads from sequencing data." +tags: + - human + - contamination + - decontamination + - scrubbing + - reads + - kraken2 + - nohuman + - sample-scope +--- + +# nohuman_run + +**Tags:** human contamination decontamination scrubbing reads kraken2 nohuman sample-scope + +Remove human reads from sequencing data. + +Uses [nohuman](https://github.com/mbhall88/nohuman) to classify and remove human reads +from FASTQ files using a Kraken2 database built from Human Pangenome Reference Consortium +(HPRC) genomes. Supports paired-end and single-end Illumina reads. + +:::note[Database Required] +Requires the nohuman Kraken2 database. Use the nohuman/download module or +provide a pre-existing database via --nohuman_db. +::: + +## Inputs + +``` +record ( + meta: Record, + r1: Path?, + r2: Path?, + se: Path?, + lr: Path? +) +``` + +| Field | Type | Description | +|-------|------|-------------| +| `meta` | `Record` | Groovy Record containing sample information | +| `r1` | `Path?` | Illumina R1 reads (paired-end forward) | +| `r2` | `Path?` | Illumina R2 reads (paired-end reverse) | +| `se` | `Path?` | Single-end Illumina reads | +| `lr` | `Path?` | Long reads (ONT/PacBio) | + +``` +db: Path +``` + +| Name | Type | Description | +|------|------|-------------| +| `db` | `Path` | Directory or compressed tarball containing the nohuman Kraken2 database | + +## Outputs + +``` +record ( + meta: Record, + special_meta: Record, + r1: Path?, + r2: Path?, + se: Path?, + lr: Path?, + scrub_report: Path, + results: Set, + logs: Set, + nf_logs: Set, + versions: Set +) +``` + +| Field | Type | Description | +|-------|------|-------------| +| `meta` | `Record` | Sample information record | +| `special_meta` | `Record` | A simplified metadata record for downstream report joining | +| `r1` | `Path?` | Scrubbed paired-end forward reads | +| `r2` | `Path?` | Scrubbed paired-end reverse reads | +| `se` | `Path?` | Scrubbed single-end reads | +| `lr` | `Path?` | Scrubbed long reads | +| `scrub_report` | `Path` | Summary report of reads removed during scrubbing | +| `results` | `Set` | All output files to be published | +| `logs` | `Set` | Optional program specific log files | +| `nf_logs` | `Set` | Nextflow-specific log files (e.g. .command.{begin|err|log|out|run|sh|trace}) | +| `versions` | `Set` | A YAML formatted file with program versions | + +## Parameters + +### Nohuman Run Parameters + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--nohuman_db` | string | | Path to the nohuman database directory or tarball | +| `--nohuman_confidence` | number | `0.0` | Kraken2 minimum confidence score for classification (0.0-1.0) | +| `--nohuman_human` | boolean | `false` | Invert output to keep only human reads instead of removing them | +| `--nohuman_save_report` | boolean | `false` | Save the Kraken2 classification report | + +## Used By + +### Subworkflows + +- [nohuman](/developers/subworkflows/nohuman) - Remove human reads from sequencing data using nohuman. + +## Citations + +If you use this in your analysis, please cite the following. + +- [Bactopia](https://bactopia.github.io/) + Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) + +- [Kraken2](https://github.com/DerrickWood/kraken2) + Wood DE, Lu J, Langmead B [Improved metagenomic analysis with Kraken 2.](https://doi.org/10.1186/s13059-019-1891-0) *Genome Biology*, 20(1), 257. (2019) + +## Source + +[View source on GitHub](https://github.com/bactopia/bactopia/tree/main/modules/nohuman/run) + +## Version + +```yaml +NOHUMAN_RUN: + - bactopia-teton: 1.1.3 +``` diff --git a/developers/modules/panaroo_run.mdx b/developers/modules/panaroo_run.mdx new file mode 100644 index 00000000..eb1c58d6 --- /dev/null +++ b/developers/modules/panaroo_run.mdx @@ -0,0 +1,104 @@ +--- +title: panaroo_run +description: "Fast and scalable bacterial pangenome analysis using a graph-based approach." +tags: + - pan-genome + - orthologs + - core-genome + - gene-presence-absence + - graph-based + - annotation + - run-scope +--- + +# panaroo_run + +**Tags:** pan-genome orthologs core-genome gene-presence-absence graph-based annotation run-scope + +Fast and scalable bacterial pangenome analysis using a graph-based approach. + +Uses [Panaroo](https://gtonkinhill.github.io/panaroo/) to cluster genes from multiple +annotated bacterial genomes into orthologous groups, correcting for gene splitting and +merges. The primary outputs are the gene presence/absence matrix (the pan-genome) and +a core-genome alignment (for phylogenetics). + +## Inputs + +``` +record ( + meta: Record, + gff: Set +) +``` + +| Field | Type | Description | +|-------|------|-------------| +| `meta` | `Record` | Groovy Record containing sample information | +| `gff` | `Set` | A list of annotated genome files in GFF3 format (required input) | + +## Outputs + +``` +record ( + meta: Record, + aln: Path?, + filtered_aln: Path?, + csv: Path?, + panaroo_csv: Path?, + results: Set, + logs: Set, + nf_logs: Set, + versions: Set +) +``` + +| Field | Type | Description | +|-------|------|-------------| +| `meta` | `Record` | Sample information record | +| `aln` | `Path?` | The core-genome alignment (*core-genome.aln.gz), suitable for phylogenetic tree building | +| `filtered_aln` | `Path?` | The core-genome alignment with highly recombinant regions filtered out | +| `csv` | `Path?` | Gene presence/absence matrix in Roary-compatible CSV format | +| `panaroo_csv` | `Path?` | Gene presence/absence matrix in Panaroo's native CSV format | +| `results` | `Set` | All output files to be published | +| `logs` | `Set` | Optional program specific log files | +| `nf_logs` | `Set` | Nextflow-specific log files (e.g. .command.{begin|err|log|out|run|sh|trace}) | +| `versions` | `Set` | A YAML formatted file with program versions | + +## Parameters + +### Panaroo Run Parameters + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--panaroo_merge_paralogs` | boolean | `false` | Do not split paralogs | +| `--panaroo_opts` | string | | Additional options to pass to panaroo | + +## Used By + +### Subworkflows + +- [panaroo](/developers/subworkflows/panaroo) - Build a pangenome from GFF3 annotations using Panaroo. + +## Citations + +If you use this in your analysis, please cite the following. + +- [Bactopia](https://bactopia.github.io/) + Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) + +- [Panaroo](https://github.com/gtonkinhill/panaroo) + Tonkin-Hill G, MacAlasdair N, Ruis C, Weimann A, Horesh G, Lees JA, Gladstone RA, Lo S, Beaudoin C, Floto RA, Frost SDW, Corander J, Bentley SD, Parkhill J [Producing polished prokaryotic pangenomes with the Panaroo pipeline.](https://doi.org/10.1186/s13059-020-02090-4) _Genome Biology_ 21(1), 180. (2020) + +- [MAFFT](https://mafft.cbrc.jp/alignment/software/) + Katoh K, Standley DM [MAFFT multiple sequence alignment software version 7: improvements in performance and usability.](https://doi.org/10.1093/molbev/mst010) _Mol. Biol. Evol._ 30, 772-780 (2013) + +## Source + +[View source on GitHub](https://github.com/bactopia/bactopia/tree/main/modules/panaroo/run) + +## Version + +```yaml +PANAROO_RUN: + - panaroo: 1.6.0 +``` diff --git a/developers/modules/pasty.mdx b/developers/modules/pasty.mdx new file mode 100644 index 00000000..58365e1b --- /dev/null +++ b/developers/modules/pasty.mdx @@ -0,0 +1,102 @@ +--- +title: pasty +description: "Predict O-antigen serogroup of Pseudomonas aeruginosa isolates." +tags: + - bacteria + - pseudomonas-aeruginosa + - serogroup + - o-antigen + - typing + - blast + - sample-scope +--- + +# pasty + +**Tags:** bacteria pseudomonas-aeruginosa serogroup o-antigen typing blast sample-scope + +Predict O-antigen serogroup of Pseudomonas aeruginosa isolates. + +Uses [Pasty](https://github.com/rpetit3/pasty) (in silico serogrouping of *Pseudomonas aeruginosa* isolates) +to predict the O-antigen serogroup by searching the genome assembly for specific serogroup-associated +genes within the O-antigen locus. + +## Inputs + +``` +record ( + meta: Record, + fna: Path +) +``` + +| Field | Type | Description | +|-------|------|-------------| +| `meta` | `Record` | Groovy Record containing sample information | +| `fna` | `Path` | Assembled contigs in FASTA format | + +## Outputs + +``` +record ( + meta: Record, + tsv: Path, + blast: Path, + details: Path, + results: Set, + logs: Set, + nf_logs: Set, + versions: Set +) +``` + +| Field | Type | Description | +|-------|------|-------------| +| `meta` | `Record` | Sample information record | +| `tsv` | `Path` | A tab-delimited summary file with the predicted O-antigen serogroup | +| `blast` | `Path` | A tab-delimited file of all raw BLAST hits used for the prediction | +| `details` | `Path` | A tab-delimited file with detailed gene hits for each serogroup tested | +| `results` | `Set` | All output files to be published | +| `logs` | `Set` | Optional program specific log files | +| `nf_logs` | `Set` | Nextflow-specific log files (e.g. .command.{begin|err|log|out|run|sh|trace}) | +| `versions` | `Set` | A YAML formatted file with program versions | + +## Parameters + +### pasty Parameters + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--pasty_min_pident` | integer | `95` | Minimum percent identity to count a hit | +| `--pasty_min_coverage` | integer | `95` | Minimum percent coverage to count a hit | + +## Used By + +### Subworkflows + +- [pasty](/developers/subworkflows/pasty) - Predict serogroups of Pseudomonas aeruginosa from assemblies. + +### Workflows + +- [pasty](/bactopia-tools/pasty) - In silico serogrouping of Pseudomonas aeruginosa isolates. + +## Citations + +If you use this in your analysis, please cite the following. + +- [Bactopia](https://bactopia.github.io/) + Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) + +- [pasty](https://github.com/rpetit3/pasty) + Petit III RA [pasty: in silico serogrouping of _Pseudomonas aeruginosa_ isolates](https://github.com/rpetit3/pasty) (GitHub) + +## Source + +[View source on GitHub](https://github.com/bactopia/bactopia/tree/main/modules/pasty) + +## Version + +```yaml +PASTY: + - pasty: 2.2.1 +``` diff --git a/developers/modules/pbptyper.mdx b/developers/modules/pbptyper.mdx new file mode 100644 index 00000000..b25246a3 --- /dev/null +++ b/developers/modules/pbptyper.mdx @@ -0,0 +1,102 @@ +--- +title: pbptyper +description: "Predict Penicillin Binding Protein (PBP) type of *Streptococcus pneumoniae* assemblies." +tags: + - bacteria + - streptococcus-pneumoniae + - penicillin + - amr + - resistance + - pbp + - typing + - sample-scope +--- + +# pbptyper + +**Tags:** bacteria streptococcus-pneumoniae penicillin amr resistance pbp typing sample-scope + +Predict Penicillin Binding Protein (PBP) type of *Streptococcus pneumoniae* assemblies. + +Uses [PBPtyper](https://github.com/rpetit3/pbptyper) to detect variations in the three +key PBP genes (*pbp1a*, *pbp2b*, and *pbp2x*) in *S. pneumoniae*. Typing these genes is +essential for predicting reduced susceptibility or full resistance to penicillin and other +$\beta$-lactam antibiotics. + +## Inputs + +``` +record ( + meta: Record, + fna: Path +) +``` + +| Field | Type | Description | +|-------|------|-------------| +| `meta` | `Record` | Groovy Record containing sample information | +| `fna` | `Path` | Assembled contigs in FASTA format | + +## Outputs + +``` +record ( + meta: Record, + tsv: Path, + blast: Path, + results: Set, + logs: Set, + nf_logs: Set, + versions: Set +) +``` + +| Field | Type | Description | +|-------|------|-------------| +| `meta` | `Record` | Sample information record | +| `tsv` | `Path` | A tab-delimited summary file with the predicted PBP type for each gene | +| `blast` | `Path` | A tab-delimited file of the raw TBLASTN hits used for gene identification | +| `results` | `Set` | All output files to be published | +| `logs` | `Set` | Optional program specific log files | +| `nf_logs` | `Set` | Nextflow-specific log files (e.g. .command.{begin|err|log|out|run|sh|trace}) | +| `versions` | `Set` | A YAML formatted file with program versions | + +## Parameters + +### pbptyper Parameters + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--pbptyper_min_pident` | integer | `95` | Minimum percent identity to count a hit | +| `--pbptyper_min_coverage` | integer | `95` | Minimum percent coverage to count a hit | + +## Used By + +### Subworkflows + +- [pbptyper](/developers/subworkflows/pbptyper) - Predict penicillin binding protein (PBP) types of Streptococcus pneumoniae from genome assemblies. + +### Workflows + +- [pbptyper](/bactopia-tools/pbptyper) - Penicillin Binding Protein (PBP) typing for Streptococcus pneumoniae. + +## Citations + +If you use this in your analysis, please cite the following. + +- [Bactopia](https://bactopia.github.io/) + Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) + +- [pbptyper](https://github.com/rpetit3/pbptyper) + Petit III RA [pbptyper: In silico Penicillin Binding Protein (PBP) typer for _Streptococcus pneumoniae_ assemblies](https://github.com/rpetit3/pbptyper) (GitHub) + +## Source + +[View source on GitHub](https://github.com/bactopia/bactopia/tree/main/modules/pbptyper) + +## Version + +```yaml +PBPTYPER: + - pbptyper: 2.0.0 +``` diff --git a/developers/modules/phispy.mdx b/developers/modules/phispy.mdx new file mode 100644 index 00000000..f0cfe0a6 --- /dev/null +++ b/developers/modules/phispy.mdx @@ -0,0 +1,100 @@ +--- +title: phispy +description: "Predict prophage regions integrated into bacterial genomes." +tags: + - genomics + - virus + - phage + - prophage + - bacteriophage + - identification + - annotation + - sample-scope +--- + +# phispy + +**Tags:** genomics virus phage prophage bacteriophage identification annotation sample-scope + +Predict prophage regions integrated into bacterial genomes. + +Uses [PhiSpy](https://github.com/linsalrob/PhiSpy) to identify integrated bacteriophage +(prophage) regions in a fully annotated bacterial genome. The prediction relies on scoring +features like strand-switch, AT-skew, unique phage-like proteins, and short coding regions. + +## Inputs + +``` +record ( + meta: Record, + gbff: Path +) +``` + +| Field | Type | Description | +|-------|------|-------------| +| `meta` | `Record` | Groovy Record containing sample information | +| `gbff` | `Path` | Annotated genome file in GenBank format | + +## Outputs + +``` +record ( + meta: Record, + tsv: Path, + results: Set, + logs: Set, + nf_logs: Set, + versions: Set +) +``` + +| Field | Type | Description | +|-------|------|-------------| +| `meta` | `Record` | Sample information record | +| `tsv` | `Path` | Coordinates (start/end) of each predicted prophage region in the genome | +| `results` | `Set` | All output files to be published | +| `logs` | `Set` | Optional program specific log files | +| `nf_logs` | `Set` | Nextflow-specific log files (e.g. .command.{begin|err|log|out|run|sh|trace}) | +| `versions` | `Set` | A YAML formatted file with program versions | + +## Parameters + +### PhiSpy Parameters + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--phispy_number` | integer | `5` | Number of consecutive genes in a region of window size that must be prophage genes to be called | +| `--phispy_mincontigsize` | integer | `5000` | Minimum contig size (in bp) to be included in the analysis. Smaller contigs will be dropped. | +| `--phispy_opts` | string | | Extra options in quotes for PhiSpy | + +## Used By + +### Subworkflows + +- [phispy](/developers/subworkflows/phispy) - Prediction of prophages from bacterial genomes. + +### Workflows + +- [phispy](/bactopia-tools/phispy) - Prediction of prophages in bacterial and archaeal genomes. + +## Citations + +If you use this in your analysis, please cite the following. + +- [Bactopia](https://bactopia.github.io/) + Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) + +- [PhiSpy](https://github.com/linsalrob/PhiSpy) + Akhter S, Aziz RK, and Edwards RA [PhiSpy: a novel algorithm for finding prophages in bacterial genomes that combines similarity- and composition-based strategies.](https://doi.org/10.1093/nar/gks406) _Nucleic Acids Research_, 40(16), e126. (2012) + +## Source + +[View source on GitHub](https://github.com/bactopia/bactopia/tree/main/modules/phispy) + +## Version + +```yaml +PHISPY: + - phispy: 5.0.6 +``` diff --git a/developers/modules/pirate.mdx b/developers/modules/pirate.mdx new file mode 100644 index 00000000..52e3809f --- /dev/null +++ b/developers/modules/pirate.mdx @@ -0,0 +1,99 @@ +--- +title: pirate +description: "Pangenome Identification and Reconciliation Analysis Tool for Epidemiology (PIRATE)." +tags: + - pan-genome + - orthologs + - core-genome + - gene-presence-absence + - epidemiology + - annotation + - run-scope +--- + +# pirate + +**Tags:** pan-genome orthologs core-genome gene-presence-absence epidemiology annotation run-scope + +Pangenome Identification and Reconciliation Analysis Tool for Epidemiology (PIRATE). + +Uses [PIRATE](https://github.com/SionBayliss/PIRATE) to construct the pangenome of a +collection of bacterial isolates. It clusters orthologous genes and generates the core +genome alignment and a gene presence/absence matrix, which is compatible with downstream +analysis tools like Scoary for association testing. + +## Inputs + +``` +record ( + meta: Record, + gff: Set +) +``` + +| Field | Type | Description | +|-------|------|-------------| +| `meta` | `Record` | Groovy Record containing sample information | +| `gff` | `Set` | A list of annotated genome files in GFF3 format | + +## Outputs + +``` +record ( + meta: Record, + aln: Path?, + csv: Path?, + results: Set, + logs: Set, + nf_logs: Set, + versions: Set +) +``` + +| Field | Type | Description | +|-------|------|-------------| +| `meta` | `Record` | Sample information record | +| `aln` | `Path?` | The core-genome alignment (*core-genome.aln.gz), suitable for phylogenetic tree building | +| `csv` | `Path?` | Gene presence/absence matrix in CSV format, compatible with Scoary | +| `results` | `Set` | All output files to be published | +| `logs` | `Set` | Optional program specific log files | +| `nf_logs` | `Set` | Nextflow-specific log files (e.g. .command.{begin|err|log|out|run|sh|trace}) | +| `versions` | `Set` | A YAML formatted file with program versions | + +## Parameters + +### PIRATE Parameters + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--use_pirate` | boolean | `false` | Use PIRATE instead of panaroo in the 'pangenome' subworkflow | +| `--pirate_steps` | string | `50,60,70,80,90,95,98` | Percent identity thresholds to use for pangenome construction | +| `--pirate_features` | string | `CDS` | Comma-delimited features to use for pangenome construction | +| `--pirate_para_off` | boolean | `false` | Switch off paralog identification | + +## Used By + +### Subworkflows + +- [pirate](/developers/subworkflows/pirate) - Build a pangenome from GFF3 annotations using PIRATE. + +## Citations + +If you use this in your analysis, please cite the following. + +- [Bactopia](https://bactopia.github.io/) + Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) + +- [PIRATE](http://github.com/SionBayliss/PIRATE) + Bayliss SC, Thorpe HA, Coyle NM, Sheppard SK, Feil EJ [PIRATE: A fast and scalable pangenomics toolbox for clustering diverged orthologues in bacteria.](https://doi.org/10.1093/gigascience/giz119) _Gigascience_ 8 (2019) + +## Source + +[View source on GitHub](https://github.com/bactopia/bactopia/tree/main/modules/pirate) + +## Version + +```yaml +PIRATE: + - pirate: 1.0.5 +``` diff --git a/developers/modules/plasmidfinder.mdx b/developers/modules/plasmidfinder.mdx new file mode 100644 index 00000000..cc3420fc --- /dev/null +++ b/developers/modules/plasmidfinder.mdx @@ -0,0 +1,106 @@ +--- +title: plasmidfinder +description: "Identify plasmid replicon types in bacterial sequences and assemblies." +tags: + - plasmid + - replicon + - typing + - identification + - mobility + - amr + - sample-scope +--- + +# plasmidfinder + +**Tags:** plasmid replicon typing identification mobility amr sample-scope + +Identify plasmid replicon types in bacterial sequences and assemblies. + +Uses [PlasmidFinder](https://bitbucket.org/genomicepidemiology/plasmidfinder/src/master/) +to identify plasmid types (replicon typing) by querying the genome assembly against a database +of plasmid sequences. This is a crucial step for understanding the mobility of resistance and virulence genes. + +## Inputs + +``` +record ( + meta: Record, + fna: Path +) +``` + +| Field | Type | Description | +|-------|------|-------------| +| `meta` | `Record` | Groovy Record containing sample information | +| `fna` | `Path` | Assembled contigs in FASTA format | + +## Outputs + +``` +record ( + meta: Record, + json: Path, + txt: Path, + tsv: Path, + genome_seq: Path, + plasmid_seq: Path, + results: Set, + logs: Set, + nf_logs: Set, + versions: Set +) +``` + +| Field | Type | Description | +|-------|------|-------------| +| `meta` | `Record` | Sample information record | +| `json` | `Path` | PlasmidFinder results in JSON format | +| `txt` | `Path` | PlasmidFinder results in text format | +| `tsv` | `Path` | Tab-delimited PlasmidFinder results with replicon typing information | +| `genome_seq` | `Path` | FASTA sequences of plasmid hits found in the genome (gzipped) | +| `plasmid_seq` | `Path` | Reference plasmid sequences matched (gzipped) | +| `results` | `Set` | All output files to be published | +| `logs` | `Set` | Optional program specific log files | +| `nf_logs` | `Set` | Nextflow-specific log files (e.g. .command.{begin|err|log|out|run|sh|trace}) | +| `versions` | `Set` | A YAML formatted file with program versions | + +## Parameters + +### PlasmidFinder Parameters + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--plasmidfinder_mincov` | number | `0.6` | Minimum percent coverage to be considered a hit | +| `--plasmidfinder_threshold` | number | `0.9` | Minimum threshold for identity | + +## Used By + +### Subworkflows + +- [plasmidfinder](/developers/subworkflows/plasmidfinder) - Identify plasmid replicons in bacterial genome assemblies. + +### Workflows + +- [plasmidfinder](/bactopia-tools/plasmidfinder) - Bactopia Tool: Plasmidfinder. + +## Citations + +If you use this in your analysis, please cite the following. + +- [Bactopia](https://bactopia.github.io/) + Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) + +- [PlasmidFinder](https://bitbucket.org/genomicepidemiology/plasmidfinder) + Carattoli A, Zankari E, García-Fernández A, Voldby Larsen M, Lund O, Villa L, Møller Aarestrup F, Hasman H [In silico detection and typing of plasmids using PlasmidFinder and plasmid multilocus sequence typing.](https://doi.org/10.1128/AAC.02412-14) _Antimicrobial Agents and Chemotherapy_ 58(7), 3895-3903. (2014) + +## Source + +[View source on GitHub](https://github.com/bactopia/bactopia/tree/main/modules/plasmidfinder) + +## Version + +```yaml +PLASMIDFINDER: + - plasmidfinder: 2.1.6 +``` diff --git a/developers/modules/pneumocat.mdx b/developers/modules/pneumocat.mdx new file mode 100644 index 00000000..99f973fd --- /dev/null +++ b/developers/modules/pneumocat.mdx @@ -0,0 +1,102 @@ +--- +title: pneumocat +description: "Capsular typing of Streptococcus pneumoniae from Illumina reads." +tags: + - pneumocat + - streptococcus-pneumoniae + - capsular-typing + - serotyping + - sample-scope +--- + +# pneumocat + +**Tags:** pneumocat streptococcus-pneumoniae capsular-typing serotyping sample-scope + +Capsular typing of Streptococcus pneumoniae from Illumina reads. + +Uses [PneumoCaT](https://github.com/ukhsa-collaboration/PneumoCaT) (Pneumococcal Capsular Typing) +to assign capsular types to *Streptococcus pneumoniae* using a two-step approach: first matching +reads to a global database, then using a mapped-based approach for specific serogroup differentiation. + +Uses explicit positional record fields for reads: +- Input: record(meta, r1, r2) where each read slot is Path + +:::note[Negative results will cause non-0 exit codes from PneumoCaT] +::: + +## Inputs + +``` +record ( + meta: Record, + r1: Path, + r2: Path +) +``` + +| Field | Type | Description | +|-------|------|-------------| +| `meta` | `Record` | Groovy Record containing sample information | +| `r1` | `Path` | Illumina R1 reads (paired-end) | +| `r2` | `Path` | Illumina R2 reads (paired-end) | + +## Outputs + +``` +record ( + meta: Record, + xml: Path?, + txt: Path?, + results: Set, + logs: Set, + nf_logs: Set, + versions: Set +) +``` + +| Field | Type | Description | +|-------|------|-------------| +| `meta` | `Record` | Sample information record | +| `xml` | `Path?` | The PneumoCaT result files in XML format | +| `txt` | `Path?` | A file containing the coverage information across the genes | +| `results` | `Set` | All output files to be published | +| `logs` | `Set` | Optional program specific log files | +| `nf_logs` | `Set` | Nextflow-specific log files (e.g. .command.{begin|err|log|out|run|sh|trace}) | +| `versions` | `Set` | A YAML formatted file with program versions | + +## Parameters + +## Used By + +### Subworkflows + +- [pneumocat](/developers/subworkflows/pneumocat) - Perform capsular typing of Streptococcus pneumoniae from NGS data. + +### Workflows + +- [pneumocat](/bactopia-tools/pneumocat) - Capsular type assignment to Streptococcus pneumoniae from sequence reads. + +## Citations + +If you use this in your analysis, please cite the following. + +- [Bactopia](https://bactopia.github.io/) + Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) + +- [PneumoCaT](https://github.com/ukhsa-collaboration/PneumoCaT) + Kapatai G, Sheppard CL, Al-Shahib A, Litt DJ, Underwood AP, Harrison TG, and Fry NK [Whole genome sequencing of Streptococcus pneumoniae: development, evaluation and verification of targets for serogroup and serotype prediction using an automated pipeline.](https://doi.org/10.7717/peerj.2477) PeerJ, 4, e2477. (2016) + +- [Bowtie2](https://github.com/BenLangmead/bowtie2) + Langmead B, Salzberg SL [Fast gapped-read alignment with Bowtie 2.](http://dx.doi.org/10.1038/nmeth.1923) _Nat. Methods._ 9, 357-359 (2012) + +## Source + +[View source on GitHub](https://github.com/bactopia/bactopia/tree/main/modules/pneumocat) + +## Version + +```yaml +PNEUMOCAT: + - pneumocat: 1.2.1 +``` diff --git a/developers/modules/prokka.mdx b/developers/modules/prokka.mdx new file mode 100644 index 00000000..e9bdced0 --- /dev/null +++ b/developers/modules/prokka.mdx @@ -0,0 +1,161 @@ +--- +title: prokka +description: "Annotate prokaryotic genomes." +tags: + - prokka + - annotation + - prokaryotic + - bacteria + - genbank + - gff + - sample-scope +--- + +# prokka + +**Tags:** prokka annotation prokaryotic bacteria genbank gff sample-scope + +Annotate prokaryotic genomes. + +Uses [Prokka](https://github.com/tseemann/prokka) to rapidly annotate bacterial, archaeal, +and viral genomes, producing standards-compliant output files including GFF3, GenBank, and Sequin. + +## Inputs + +``` +record ( + meta: Record, + fna: Path +) +``` + +| Field | Type | Description | +|-------|------|-------------| +| `meta` | `Record` | Groovy Record containing sample information | +| `fna` | `Path` | Assembled contigs in FASTA format | + +``` +proteins: Path? +prodigal_tf: Path? +``` + +| Name | Type | Description | +|------|------|-------------| +| `proteins` | `Path?` | FASTA file of trusted proteins to first annotate from | +| `prodigal_tf` | `Path?` | Training file to use for gene prediction | + +## Outputs + +``` +record ( + meta: Record, + gff: Path, + gbff: Path, + fna: Path, + faa: Path, + ffn: Path, + sqn: Path, + fsa: Path, + tbl: Path, + txt: Path, + tsv: Path, + blastdb: Path, + results: Set, + logs: Set, + nf_logs: Set, + versions: Set +) +``` + +| Field | Type | Description | +|-------|------|-------------| +| `meta` | `Record` | Sample information record | +| `gff` | `Path` | Annotation in GFF3 format, containing both sequences and annotations | +| `gbff` | `Path` | Annotation in GenBank format, containing both sequences and annotations | +| `fna` | `Path` | Nucleotide FASTA file of the input contig sequences | +| `faa` | `Path` | Protein FASTA file of the translated CDS sequences | +| `ffn` | `Path` | Nucleotide FASTA file of all prediction transcripts (CDS, rRNA, tRNA, tmRNA, misc_RNA) | +| `sqn` | `Path` | An ASN1 format "Sequin" file for submission to GenBank | +| `fsa` | `Path` | Nucleotide FASTA file of the input contig sequences, used by tbl2asn | +| `tbl` | `Path` | Feature Table file for NCBI submission | +| `txt` | `Path` | Summary statistics relating to the annotated features found | +| `tsv` | `Path` | Tab-separated file of all features (locus_tag, ftype, len_bp, gene, EC_number, COG, product) | +| `blastdb` | `Path` | A compressed tar.gz archive of BLAST+ databases of the contigs, genes, and proteins | +| `results` | `Set` | All output files to be published | +| `logs` | `Set` | Optional program specific log files | +| `nf_logs` | `Set` | Nextflow-specific log files (e.g. .command.{begin|err|log|out|run|sh|trace}) | +| `versions` | `Set` | A YAML formatted file with program versions | + +## Parameters + +### Prokka Parameters + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--prokka_proteins` | string | `${projectDir}/data/proteins.faa` | FASTA file of trusted proteins to first annotate from | +| `--prokka_prodigal_tf` | string | | Training file to use for Prodigal | +| `--prokka_coverage` | integer | `80` | Minimum coverage on query protein | + +## Used By + +### Subworkflows + +- [prokka](/developers/subworkflows/prokka) - Annotate bacterial genomes with functional information. + +### Workflows + +- [bactopia](/full-guide) - Comprehensive bacterial analysis pipeline for complete genomic characterization. +- [pangenome](/bactopia-tools/pangenome) - Pangenome analysis with optional core-genome phylogeny. +- [prokka](/bactopia-tools/prokka) - Rapid whole genome annotation of bacterial, archaeal, and viral genomes. +- [staphopia](/bactopia-pipelines/staphopia) - Comprehensive analysis pipeline for Staphylococcus aureus isolates. + +## Citations + +If you use this in your analysis, please cite the following. + +- [Bactopia](https://bactopia.github.io/) + Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) + +- [Prokka](https://github.com/tseemann/prokka) + Seemann T [Prokka: rapid prokaryotic genome annotation](http://dx.doi.org/10.1093/bioinformatics/btu153) _Bioinformatics_ 30, 2068-2069 (2014) + +- [Aragorn](http://130.235.244.92/ARAGORN/Downloads/) + Laslett D, Canback B [ARAGORN, a program to detect tRNA genes and tmRNA genes in nucleotide sequences.](https://doi.org/10.1093/nar/gkh152) _Nucleic Acids Res_. 32(1):11-6 (2004) + +- [Barrnap](https://github.com/tseemann/barrnap) + Seemann T [Barrnap: Bacterial ribosomal RNA predictor](https://github.com/tseemann/barrnap) (GitHub) + +- [CD-HIT](https://github.com/weizhongli/cdhit) + Li W, Godzik A [Cd-hit: a fast program for clustering and comparing large sets of protein or nucleotide sequences](http://dx.doi.org/10.1093/bioinformatics/btl158). _Bioinformatics_ 22, 1658-1659 (2006) + +- [HMMER](http://hmmer.org/) + Eddy SR [Accelerated Profile HMM Searches.](https://doi.org/10.1371/journal.pcbi.1002195) _PLoS Comput. Biol._ 7, e1002195 (2011) + +- [Infernal](http://eddylab.org/infernal/) + Nawrocki EP, Eddy SR [Infernal 1.1: 100-fold faster RNA homology searches.](https://doi.org/10.1093/bioinformatics/btt509) _Bioinformatics_ 29(22), 2933-2935 (2013) + +- [MinCED](https://github.com/ctSkennerton/minced) + Skennerton C [MinCED: Mining CRISPRs in Environmental Datasets](https://github.com/ctSkennerton/minced) (GitHub) + +- [nhmmer](http://hmmer.org/) + Wheeler TJ, Eddy SR [nhmmer: DNA homology search with profile HMMs.](https://doi.org/10.1093/bioinformatics/btt403) _Bioinformatics_ 29, 2487-2489 (2013) + +- [Prodigal](https://github.com/hyattpd/Prodigal) + Hyatt D, Chen G-L, LoCascio PF, Land ML, Larimer FW, Hauser LJ [Prodigal: prokaryotic gene recognition and translation initiation site identification.](https://doi.org/10.1186/1471-2105-11-119) _BMC Bioinformatics_ 11.1 119 (2010) + +- [RNAmmer](http://www.cbs.dtu.dk/services/RNAmmer/) + Lagesen K, Hallin P, Rødland EA, Stærfeldt H-H, Rognes T, Ussery DW [RNAmmer: consistent annotation of rRNA genes in genomic sequences.](https://dx.doi.org/10.1093%2Fnar%2Fgkm160) _Nucleic Acids Res_ 35.9: 3100-3108 (2007) + +- [SignalP](http://www.cbs.dtu.dk/services/SignalP-4.0/) + Petersen TN, Brunak S, von Heijne G, Nielsen H [SignalP 4.0: discriminating signal peptides from transmembrane regions.](https://doi.org/10.1038/nmeth.1701) _Nature methods_ 8.10: 785 (2011) + +## Source + +[View source on GitHub](https://github.com/bactopia/bactopia/tree/main/modules/prokka) + +## Version + +```yaml +PROKKA: + - prokka: 1.15.6 +``` diff --git a/developers/modules/quast.mdx b/developers/modules/quast.mdx new file mode 100644 index 00000000..19534e39 --- /dev/null +++ b/developers/modules/quast.mdx @@ -0,0 +1,97 @@ +--- +title: quast +description: "Quality Assessment Tool for Genome Assemblies." +tags: + - quast + - assembly + - quality-control + - n50 + - metrics + - sample-scope +--- + +# quast + +**Tags:** quast assembly quality-control n50 metrics sample-scope + +Quality Assessment Tool for Genome Assemblies. + +Uses [QUAST](https://github.com/ablab/quast) to evaluate genome assemblies by computing various +metrics such as N50, gene counts, and assembly length. + +## Inputs + +``` +record ( + meta: Record, + fna: Path, + tsv_meta: Path +) +``` + +| Field | Type | Description | +|-------|------|-------------| +| `meta` | `Record` | Groovy Record containing sample information | +| `fna` | `Path` | Assembled contigs in FASTA format | +| `tsv_meta` | `Path` | Meta file containing reference size information | + +## Outputs + +``` +record ( + meta: Record, + tsv: Path, + results: Set, + logs: Set, + nf_logs: Set, + versions: Set +) +``` + +| Field | Type | Description | +|-------|------|-------------| +| `meta` | `Record` | Sample information record | +| `tsv` | `Path` | Transposed report in TSV format | +| `results` | `Set` | All output files to be published | +| `logs` | `Set` | Optional program specific log files | +| `nf_logs` | `Set` | Nextflow-specific log files (e.g. .command.{begin|err|log|out|run|sh|trace}) | +| `versions` | `Set` | A YAML formatted file with program versions | + +## Parameters + +### Quast Parameters + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--quast_contig_thresholds` | string | `0,1000,10000,100000,250000,1000000` | Comma-separated list of contig length thresholds | + +## Used By + +### Subworkflows + +- [quast](/developers/subworkflows/quast) - Evaluate assembly quality using QUAST. + +### Workflows + +- [quast](/bactopia-tools/quast) - Quality assessment of assembled contigs using QUAST. + +## Citations + +If you use this in your analysis, please cite the following. + +- [Bactopia](https://bactopia.github.io/) + Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) + +- [QUAST](http://quast.sourceforge.net/) + Gurevich A, Saveliev V, Vyahhi N, Tesler G [QUAST: quality assessment tool for genome assemblies.](http://dx.doi.org/10.1093/bioinformatics/btt086) _Bioinformatics_ 29, 1072-1075 (2013) + +## Source + +[View source on GitHub](https://github.com/bactopia/bactopia/tree/main/modules/quast) + +## Version + +```yaml +QUAST: + - quast: 5.3.0 +``` diff --git a/developers/modules/rgi_heatmap.mdx b/developers/modules/rgi_heatmap.mdx new file mode 100644 index 00000000..3cb9bc08 --- /dev/null +++ b/developers/modules/rgi_heatmap.mdx @@ -0,0 +1,91 @@ +--- +title: rgi_heatmap +description: "Create heatmaps of resistance gene presence/absence." +tags: + - resistance + - antimicrobial-resistance + - card + - rgi + - heatmap + - visualization + - run-scope +--- + +# rgi_heatmap + +**Tags:** resistance antimicrobial-resistance card rgi heatmap visualization run-scope + +Create heatmaps of resistance gene presence/absence. + +Uses [RGI](https://github.com/arpcard/rgi) (Resistance Gene Identifier) to generate +heatmaps visualizing the presence or absence of antimicrobial resistance genes across +multiple samples based on RGI JSON results. + +## Inputs + +``` +record ( + meta: Record, + json: Set +) +``` + +| Field | Type | Description | +|-------|------|-------------| +| `meta` | `Record` | Groovy Record containing sample information | +| `json` | `Set` | List of RGI results in JSON format | + +## Outputs + +``` +record ( + meta: Record, + heatmap: Set, + results: Set, + logs: Set, + nf_logs: Set, + versions: Set +) +``` + +| Field | Type | Description | +|-------|------|-------------| +| `meta` | `Record` | Sample information record | +| `heatmap` | `Set` | Heatmap files in various formats (CSV, EPS, PNG) | +| `results` | `Set` | All output files to be published | +| `logs` | `Set` | Optional program specific log files | +| `nf_logs` | `Set` | Nextflow-specific log files (e.g. .command.{begin|err|log|out|run|sh|trace}) | +| `versions` | `Set` | A YAML formatted file with program versions | + +## Parameters + +## Used By + +### Subworkflows + +- [rgi](/developers/subworkflows/rgi) - Predict antimicrobial resistance from protein or nucleotide data. + +### Workflows + +- [rgi](/bactopia-tools/rgi) - Prediction of antibiotic resistance genes using RGI. + +## Citations + +If you use this in your analysis, please cite the following. + +- [Bactopia](https://bactopia.github.io/) + Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) + +- [Resistance Gene Identifier (RGI)](https://github.com/arpcard/rgi) + Alcock BP, Raphenya AR, Lau TTY, Tsang KK, Bouchard M, Edalatmand A, Huynh W, Nguyen A-L V, Cheng AA, Liu S, Min SY, Miroshnichenko A, Tran H-K, Werfalli RE, Nasir JA, Oloni M, Speicher DJ, Florescu A, Singh B, Faltyn M, Hernandez-Koutoucheva A, Sharma AN, Bordeleau E, Pawlowski AC, Zubyk HL, Dooley D, Griffiths E, Maguire F, Winsor GL, Beiko RG, Brinkman FSL, Hsiao WWL, Domselaar GV, McArthur AG [CARD 2020: antibiotic resistome surveillance with the comprehensive antibiotic resistance database.](https://doi.org/10.1093/nar/gkz935) _Nucleic acids research_ 48.D1, D517-D525 (2020) + +## Source + +[View source on GitHub](https://github.com/bactopia/bactopia/tree/main/modules/rgi/heatmap) + +## Version + +```yaml +RGI_HEATMAP: + - rgi: 6.0.5 +``` diff --git a/developers/modules/rgi_main.mdx b/developers/modules/rgi_main.mdx new file mode 100644 index 00000000..b7201a16 --- /dev/null +++ b/developers/modules/rgi_main.mdx @@ -0,0 +1,101 @@ +--- +title: rgi_main +description: "Predict antibiotic resistance from assemblies." +tags: + - resistance + - antimicrobial-resistance + - card + - rgi + - amr + - sample-scope +--- + +# rgi_main + +**Tags:** resistance antimicrobial-resistance card rgi amr sample-scope + +Predict antibiotic resistance from assemblies. + +Uses [RGI](https://github.com/arpcard/rgi) (Resistance Gene Identifier) to predict +resistomes from protein or nucleotide data based on homology and SNP models using +the Comprehensive Antibiotic Resistance Database (CARD). + +## Inputs + +``` +record ( + meta: Record, + fna: Path +) +``` + +| Field | Type | Description | +|-------|------|-------------| +| `meta` | `Record` | Groovy Record containing sample information | +| `fna` | `Path` | Assembled contigs in FASTA format | + +## Outputs + +``` +record ( + meta: Record, + tsv: Path, + json: Path?, + results: Set, + logs: Set, + nf_logs: Set, + versions: Set +) +``` + +| Field | Type | Description | +|-------|------|-------------| +| `meta` | `Record` | Sample information record | +| `tsv` | `Path` | RGI results in tab-separated format | +| `json` | `Path?` | RGI results in JSON format | +| `results` | `Set` | All output files to be published | +| `logs` | `Set` | Optional program specific log files | +| `nf_logs` | `Set` | Nextflow-specific log files (e.g. .command.{begin|err|log|out|run|sh|trace}) | +| `versions` | `Set` | A YAML formatted file with program versions | + +## Parameters + +### RGI Main Parameters + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--rgi_use_diamond` | boolean | `false` | Use DIAMOND for alignments instead of BLAST | + +## Used By + +### Subworkflows + +- [rgi](/developers/subworkflows/rgi) - Predict antimicrobial resistance from protein or nucleotide data. + +### Workflows + +- [rgi](/bactopia-tools/rgi) - Prediction of antibiotic resistance genes using RGI. + +## Citations + +If you use this in your analysis, please cite the following. + +- [Bactopia](https://bactopia.github.io/) + Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) + +- [Resistance Gene Identifier (RGI)](https://github.com/arpcard/rgi) + Alcock BP, Raphenya AR, Lau TTY, Tsang KK, Bouchard M, Edalatmand A, Huynh W, Nguyen A-L V, Cheng AA, Liu S, Min SY, Miroshnichenko A, Tran H-K, Werfalli RE, Nasir JA, Oloni M, Speicher DJ, Florescu A, Singh B, Faltyn M, Hernandez-Koutoucheva A, Sharma AN, Bordeleau E, Pawlowski AC, Zubyk HL, Dooley D, Griffiths E, Maguire F, Winsor GL, Beiko RG, Brinkman FSL, Hsiao WWL, Domselaar GV, McArthur AG [CARD 2020: antibiotic resistome surveillance with the comprehensive antibiotic resistance database.](https://doi.org/10.1093/nar/gkz935) _Nucleic acids research_ 48.D1, D517-D525 (2020) + +- [DIAMOND](https://github.com/bbuchfink/diamond) + Buchfink B, Xie C, Huson DH [Fast and sensitive protein alignment using DIAMOND.](http://dx.doi.org/10.1038/nmeth.3176) _Nat. Methods._ 12, 59-60 (2015) + +## Source + +[View source on GitHub](https://github.com/bactopia/bactopia/tree/main/modules/rgi/main) + +## Version + +```yaml +RGI_MAIN: + - rgi: 6.0.5 +``` diff --git a/developers/modules/roary.mdx b/developers/modules/roary.mdx new file mode 100644 index 00000000..fc53afd1 --- /dev/null +++ b/developers/modules/roary.mdx @@ -0,0 +1,97 @@ +--- +title: roary +description: "Rapid large-scale prokaryote pan genome analysis." +tags: + - pangenome + - orthology + - core-genome + - alignment + - bacteria + - run-scope +--- + +# roary + +**Tags:** pangenome orthology core-genome alignment bacteria run-scope + +Rapid large-scale prokaryote pan genome analysis. + +Uses [Roary](https://github.com/sanger-pathogens/Roary) to calculate the pan genome of a +collection of prokaryotic annotated assemblies. It outputs a core gene alignment and a +gene presence/absence table. + +## Inputs + +``` +record ( + meta: Record, + gff: Set +) +``` + +| Field | Type | Description | +|-------|------|-------------| +| `meta` | `Record` | Groovy Record containing sample information | +| `gff` | `Set` | List of GFF3 files to be analyzed (typically from Prokka) | + +## Outputs + +``` +record ( + meta: Record, + aln: Path?, + csv: Path?, + results: Set, + logs: Set, + nf_logs: Set, + versions: Set +) +``` + +| Field | Type | Description | +|-------|------|-------------| +| `meta` | `Record` | Sample information record | +| `aln` | `Path?` | Core genome alignment in FASTA format | +| `csv` | `Path?` | Gene presence/absence table | +| `results` | `Set` | All output files to be published | +| `logs` | `Set` | Optional program specific log files | +| `nf_logs` | `Set` | Nextflow-specific log files (e.g. .command.{begin|err|log|out|run|sh|trace}) | +| `versions` | `Set` | A YAML formatted file with program versions | + +## Parameters + +### Roary Parameters + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--use_roary` | boolean | `false` | Use Roary instead of PIRATE in the 'pangenome' subworkflow | +| `--roary_cd` | integer | `99` | Percentage of isolates a gene must be in to be core | +| `--roary_s` | boolean | `false` | Do not split paralogs | +| `--roary_ap` | boolean | `false` | Allow paralogs in core alignment | + +## Used By + +### Subworkflows + +- [roary](/developers/subworkflows/roary) - Build a pangenome from GFF3 annotations using Roary. + +## Citations + +If you use this in your analysis, please cite the following. + +- [Bactopia](https://bactopia.github.io/) + Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) + +- [Roary](https://github.com/sanger-pathogens/Roary) + Page AJ, Cummins CA, Hunt M, Wong VK, Reuter S, Holden MTG, Fookes M, Falush D, Keane JA, Parkhill J [Roary: rapid large-scale prokaryote pan genome analysis.](https://doi.org/10.1093/bioinformatics/btv421) _Bioinformatics_ 31, 3691-3693 (2015) + +## Source + +[View source on GitHub](https://github.com/bactopia/bactopia/tree/main/modules/roary) + +## Version + +```yaml +ROARY: + - roary: 3.13.0 +``` diff --git a/developers/modules/sccmec.mdx b/developers/modules/sccmec.mdx new file mode 100644 index 00000000..545fa8b1 --- /dev/null +++ b/developers/modules/sccmec.mdx @@ -0,0 +1,107 @@ +--- +title: sccmec +description: "Identify SCCmec elements in Staphylococcus aureus genomes." +tags: + - sccmec + - staphylococcus-aureus + - mrsa + - antimicrobial-resistance + - typing + - sample-scope +--- + +# sccmec + +**Tags:** sccmec staphylococcus-aureus mrsa antimicrobial-resistance typing sample-scope + +Identify SCCmec elements in Staphylococcus aureus genomes. + +Uses [SCCmec](https://github.com/rpetit3/sccmec) to identify the Staphylococcal Cassette +Chromosome mec (SCCmec) element in *Staphylococcus aureus* assemblies. It predicts the type +based on the presence of specific *mec* and *ccr* gene complexes. + +## Inputs + +``` +record ( + meta: Record, + fna: Path +) +``` + +| Field | Type | Description | +|-------|------|-------------| +| `meta` | `Record` | Groovy Record containing sample information | +| `fna` | `Path` | Assembled contigs in FASTA format | + +## Outputs + +``` +record ( + meta: Record, + tsv: Path, + targets: Path, + target_details: Path, + regions: Path, + regions_details: Path, + results: Set, + logs: Set, + nf_logs: Set, + versions: Set +) +``` + +| Field | Type | Description | +|-------|------|-------------| +| `meta` | `Record` | Sample information record | +| `tsv` | `Path` | Main results file with SCCmec typing | +| `targets` | `Path` | BLAST results for target sequences | +| `target_details` | `Path` | Detailed results for target matches | +| `regions` | `Path` | BLAST results for SCCmec regions | +| `regions_details` | `Path` | Detailed results for SCCmec region matches | +| `results` | `Set` | All output files to be published | +| `logs` | `Set` | Optional program specific log files | +| `nf_logs` | `Set` | Nextflow-specific log files (e.g. .command.{begin|err|log|out|run|sh|trace}) | +| `versions` | `Set` | A YAML formatted file with program versions | + +## Parameters + +### sccmec Parameters + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--sccmec_min_targets_pident` | integer | `90` | Minimum percent identity to count a target hit | +| `--sccmec_min_targets_coverage` | integer | `80` | Minimum percent coverage to count a target hit | +| `--sccmec_min_regions_pident` | integer | `85` | Minimum percent identity to count a region hit | +| `--sccmec_min_regions_coverage` | integer | `93` | Minimum percent coverage to count a region hit | + +## Used By + +### Subworkflows + +- [sccmec](/developers/subworkflows/sccmec) - Identify SCCmec elements in Staphylococcus aureus genomes. + +### Workflows + +- [sccmec](/bactopia-tools/sccmec) - Typing of SCCmec cassettes in Staphylococcus aureus assemblies. + +## Citations + +If you use this in your analysis, please cite the following. + +- [Bactopia](https://bactopia.github.io/) + Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) + +- [sccmec](https://github.com/rpetit3/sccmec) + Petit III RA, Read TD [sccmec: A tool for typing SCCmec cassettes in assemblies](https://github.com/rpetit3/sccmec) (GitHub) + +## Source + +[View source on GitHub](https://github.com/bactopia/bactopia/tree/main/modules/sccmec) + +## Version + +```yaml +SCCMEC: + - sccmec: 1.2.0 +``` diff --git a/developers/modules/scoary.mdx b/developers/modules/scoary.mdx new file mode 100644 index 00000000..3181d5d7 --- /dev/null +++ b/developers/modules/scoary.mdx @@ -0,0 +1,104 @@ +--- +title: scoary +description: "Pan-genome wide association studies." +tags: + - scoary + - pangenome + - gwas + - association + - bacteria + - roary + - run-scope +--- + +# scoary + +**Tags:** scoary pangenome gwas association bacteria roary run-scope + +Pan-genome wide association studies. + +Uses [Scoary](https://github.com/AdmiralenOla/Scoary) to score the components of the pan-genome +for associations to specified traits (phenotypes). It is designed to work with the gene +presence/absence output from Roary. + +## Inputs + +``` +record ( + meta: Record, + csv: Path +) +``` + +| Field | Type | Description | +|-------|------|-------------| +| `meta` | `Record` | Groovy Record containing sample information | +| `csv` | `Path` | Gene presence/absence CSV file (typically from Roary) | + +``` +traits: Path +``` + +| Name | Type | Description | +|------|------|-------------| +| `traits` | `Path` | CSV file containing trait information for the samples | + +## Outputs + +``` +record ( + meta: Record, + results: Set, + logs: Set, + nf_logs: Set, + versions: Set +) +``` + +| Field | Type | Description | +|-------|------|-------------| +| `meta` | `Record` | Sample information record | +| `results` | `Set` | All output files to be published | +| `logs` | `Set` | Optional program specific log files | +| `nf_logs` | `Set` | Nextflow-specific log files (e.g. .command.{begin|err|log|out|run|sh|trace}) | +| `versions` | `Set` | A YAML formatted file with program versions | + +## Parameters + +### Scoary Parameters + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--scoary_traits` | string | | Input trait table (CSV) to test for associations | +| `--scoary_permute` | integer | `0` | Perform N number of permutations of the significant results post-analysis | + +## Used By + +### Subworkflows + +- [scoary](/developers/subworkflows/scoary) - Pan-genome wide association studies. + +### Workflows + +- [pangenome](/bactopia-tools/pangenome) - Pangenome analysis with optional core-genome phylogeny. + +## Citations + +If you use this in your analysis, please cite the following. + +- [Bactopia](https://bactopia.github.io/) + Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) + +- [Scoary](https://github.com/AdmiralenOla/Scoary) + Brynildsrud O, Bohlin J, Scheffer L, Eldholm V [Rapid scoring of genes in microbial pan-genome-wide association studies with Scoary.](https://doi.org/10.1186/s13059-016-1108-8) _Genome Biol._ 17:238 (2016) + +## Source + +[View source on GitHub](https://github.com/bactopia/bactopia/tree/main/modules/scoary) + +## Version + +```yaml +SCOARY: + - scoary: 1.6.16 +``` diff --git a/developers/modules/seqsero2.mdx b/developers/modules/seqsero2.mdx new file mode 100644 index 00000000..ee3c6170 --- /dev/null +++ b/developers/modules/seqsero2.mdx @@ -0,0 +1,98 @@ +--- +title: seqsero2 +description: "Salmonella serotype prediction from genome sequencing data." +tags: + - salmonella + - serotype + - prediction + - seqsero2 + - antigen + - sample-scope +--- + +# seqsero2 + +**Tags:** salmonella serotype prediction seqsero2 antigen sample-scope + +Salmonella serotype prediction from genome sequencing data. + +Uses [SeqSero2](https://github.com/denglab/SeqSero2) to predict *Salmonella* serotypes from +raw sequencing reads or genome assemblies using specific O-antigen and H-antigen markers. + +## Inputs + +``` +record ( + meta: Record, + fna: Path +) +``` + +| Field | Type | Description | +|-------|------|-------------| +| `meta` | `Record` | Groovy Record containing sample information | +| `fna` | `Path` | FASTQ reads or Assembled contigs | + +## Outputs + +``` +record ( + meta: Record, + tsv: Path, + txt: Path, + results: Set, + logs: Set, + nf_logs: Set, + versions: Set +) +``` + +| Field | Type | Description | +|-------|------|-------------| +| `meta` | `Record` | Sample information record | +| `tsv` | `Path` | SeqSero2 serotype prediction results in TSV format | +| `txt` | `Path` | SeqSero2 serotype prediction results in text format | +| `results` | `Set` | All output files to be published | +| `logs` | `Set` | Optional program specific log files | +| `nf_logs` | `Set` | Nextflow-specific log files (e.g. .command.{begin|err|log|out|run|sh|trace}) | +| `versions` | `Set` | A YAML formatted file with program versions | + +## Parameters + +### SeqSero2 Parameters + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--seqsero2_run_mode` | string | `k` | Workflow to run. 'a' allele mode, or 'k' k-mer mode (choices: `a`, `k`) | +| `--seqsero2_input_type` | string | `assembly` | Input format to analyze. 'assembly' or 'fastq' (choices: `assembly`, `fastq`) | + +## Used By + +### Subworkflows + +- [seqsero2](/developers/subworkflows/seqsero2) - Predict Salmonella serotypes from genome assemblies. + +### Workflows + +- [seqsero2](/bactopia-tools/seqsero2) - Salmonella serotype prediction from sequencing reads or assemblies. + +## Citations + +If you use this in your analysis, please cite the following. + +- [Bactopia](https://bactopia.github.io/) + Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) + +- [SeqSero2](https://github.com/denglab/SeqSero2) + Zhang S, Den-Bakker HC, Li S, Dinsmore BA, Lane C, Lauer AC, Fields PI, Deng X. [SeqSero2: rapid and improved Salmonella serotype determination using whole genome sequencing data.](https://doi.org/10.1128/AEM.01746-19) _Appl Environ Microbiology_ 85(23):e01746-19 (2019) + +## Source + +[View source on GitHub](https://github.com/bactopia/bactopia/tree/main/modules/seqsero2) + +## Version + +```yaml +SEQSERO2: + - seqsero2: 1.3.2 +``` diff --git a/developers/modules/seroba_run.mdx b/developers/modules/seroba_run.mdx new file mode 100644 index 00000000..4e63f541 --- /dev/null +++ b/developers/modules/seroba_run.mdx @@ -0,0 +1,101 @@ +--- +title: seroba_run +description: "k-mer based Streptococcus pneumoniae serotyping." +tags: + - streptococcus-pneumoniae + - serotype + - k-mer + - prediction + - seroba + - sample-scope +--- + +# seroba_run + +**Tags:** streptococcus-pneumoniae serotype k-mer prediction seroba sample-scope + +k-mer based Streptococcus pneumoniae serotyping. + +Uses [SeroBA](https://github.com/sanger-pathogens/seroba) to identify the serotype of +*Streptococcus pneumoniae* from Illumina paired-end reads using a k-mer based approach. + +:::note[Database Required] +Requires the SeroBA database to be set up using `seroba createDBs` before running. +::: + +## Inputs + +``` +record ( + meta: Record, + r1: Path, + r2: Path +) +``` + +| Field | Type | Description | +|-------|------|-------------| +| `meta` | `Record` | Groovy Record containing sample information | +| `r1` | `Path` | Illumina R1 reads (paired-end) | +| `r2` | `Path` | Illumina R2 reads (paired-end) | + +## Outputs + +``` +record ( + meta: Record, + tsv: Path, + results: Set, + logs: Set, + nf_logs: Set, + versions: Set +) +``` + +| Field | Type | Description | +|-------|------|-------------| +| `meta` | `Record` | Sample information record | +| `tsv` | `Path` | Serotype prediction results with predicted serotype and confidence in TSV format | +| `results` | `Set` | All output files to be published | +| `logs` | `Set` | Optional program specific log files | +| `nf_logs` | `Set` | Nextflow-specific log files (e.g. .command.{begin|err|log|out|run|sh|trace}) | +| `versions` | `Set` | A YAML formatted file with program versions | + +## Parameters + +### SeroBA Parameters + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--seroba_coverage` | integer | `20` | Threshold for k-mer coverage of the reference sequence | + +## Used By + +### Subworkflows + +- [seroba](/developers/subworkflows/seroba) - k-mer based pipeline to identify the serotype of Streptococcus pneumoniae. + +### Workflows + +- [seroba](/bactopia-tools/seroba) - Serotyping of Streptococcus pneumoniae from Illumina paired-end reads. + +## Citations + +If you use this in your analysis, please cite the following. + +- [Bactopia](https://bactopia.github.io/) + Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) + +- [Seroba](https://github.com/sanger-pathogens/seroba) + Epping L, van Tonder AJ, Gladstone RA, The Global Pneumococcal Sequencing Consortium, Bentley SD, Page AJ, Keane JA [SeroBA: rapid high-throughput serotyping of Streptococcus pneumoniae from whole genome sequence data.](https://doi.org/10.1099/mgen.0.000186) _Microbial Genomics_, 4(7) (2018) + +## Source + +[View source on GitHub](https://github.com/bactopia/bactopia/tree/main/modules/seroba/run) + +## Version + +```yaml +SEROBA_RUN: + - seroba: 1.0.2 +``` diff --git a/developers/modules/shigapass.mdx b/developers/modules/shigapass.mdx new file mode 100644 index 00000000..903f4040 --- /dev/null +++ b/developers/modules/shigapass.mdx @@ -0,0 +1,92 @@ +--- +title: shigapass +description: "Predict Shigella serotypes and differentiate Shigella/EIEC." +tags: + - shigella + - eiec + - serotype + - virulence + - prediction + - sample-scope +--- + +# shigapass + +**Tags:** shigella eiec serotype virulence prediction sample-scope + +Predict Shigella serotypes and differentiate Shigella/EIEC. + +Uses [ShigaPass](https://github.com/Munch-Lab/ShigaPass) to identify *Shigella* serotypes and +distinguish *Shigella* species from Enteroinvasive *Escherichia coli* (EIEC) using specific +genomic markers from assembled contigs. + +## Inputs + +``` +record ( + meta: Record, + fna: Path +) +``` + +| Field | Type | Description | +|-------|------|-------------| +| `meta` | `Record` | Groovy Record containing sample information | +| `fna` | `Path` | Assembled contigs in FASTA format | + +## Outputs + +``` +record ( + meta: Record, + tsv: Path, + flex_tsv: Path?, + results: Set, + logs: Set, + nf_logs: Set, + versions: Set +) +``` + +| Field | Type | Description | +|-------|------|-------------| +| `meta` | `Record` | Sample information record | +| `tsv` | `Path` | ShigaPass summary results in TSV format | +| `flex_tsv` | `Path?` | ShigaPass Flex summary results in TSV format | +| `results` | `Set` | All output files to be published | +| `logs` | `Set` | Optional program specific log files | +| `nf_logs` | `Set` | Nextflow-specific log files (e.g. .command.{begin|err|log|out|run|sh|trace}) | +| `versions` | `Set` | A YAML formatted file with program versions | + +## Parameters + +## Used By + +### Subworkflows + +- [shigapass](/developers/subworkflows/shigapass) - Predict serotypes of Shigella from assemblies. + +### Workflows + +- [shigapass](/bactopia-tools/shigapass) - Prediction of Shigella serotypes and differentiation from EIEC. + +## Citations + +If you use this in your analysis, please cite the following. + +- [Bactopia](https://bactopia.github.io/) + Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) + +- [shigapass](https://github.com/imanyass/ShigaPass) + Yassine I, Hansen EE, Lefèvre S, Ruckly C, Carle I, Lejay-Collin M, Fabre L, Rafei R, Pardos de la Gandara M, Daboussi F, Shahin A, Weill FX [ShigaPass: an in silico tool predicting Shigella serotypes from whole-genome sequencing assemblies.](https://doi.org/10.1099%2Fmgen.0.000961) _Microb Genomics_ 9(3) (2023) + +## Source + +[View source on GitHub](https://github.com/bactopia/bactopia/tree/main/modules/shigapass) + +## Version + +```yaml +SHIGAPASS: + - shigapass: 1.5.0 +``` diff --git a/developers/modules/shigatyper.mdx b/developers/modules/shigatyper.mdx new file mode 100644 index 00000000..297c4983 --- /dev/null +++ b/developers/modules/shigatyper.mdx @@ -0,0 +1,102 @@ +--- +title: shigatyper +description: "Shigella serotype from Illumina or Oxford Nanopore reads." +tags: + - shigella + - serotype + - typing + - illumina + - nanopore + - reads + - sample-scope +--- + +# shigatyper + +**Tags:** shigella serotype typing illumina nanopore reads sample-scope + +Shigella serotype from Illumina or Oxford Nanopore reads. + +Uses [ShigaTyper](https://github.com/CFSAN-Biostatistics/shigatyper) to determine the serotype +of *Shigella* isolates using Illumina paired-end reads or Oxford Nanopore long reads. It detects +serotype-specific genes and markers to provide a predicted serotype. + +Uses explicit positional record fields for reads: +- Input: record(meta, r1, r2, se, lr) where each read slot is Path? + +## Inputs + +``` +record ( + meta: Record, + r1: Path?, + r2: Path?, + se: Path?, + lr: Path? +) +``` + +| Field | Type | Description | +|-------|------|-------------| +| `meta` | `Record` | Groovy Record containing sample information | +| `r1` | `Path?` | Illumina R1 reads (paired-end) | +| `r2` | `Path?` | Illumina R2 reads (paired-end) | +| `se` | `Path?` | Single-end Illumina reads | +| `lr` | `Path?` | Long reads (ONT/PacBio) | + +## Outputs + +``` +record ( + meta: Record, + tsv: Path, + hits: Set, + results: Set, + logs: Set, + nf_logs: Set, + versions: Set +) +``` + +| Field | Type | Description | +|-------|------|-------------| +| `meta` | `Record` | Sample information record | +| `tsv` | `Path` | ShigaTyper serotype prediction results in TSV format | +| `hits` | `Set` | Detailed gene hits from ShigaTyper analysis | +| `results` | `Set` | All output files to be published | +| `logs` | `Set` | Optional program specific log files | +| `nf_logs` | `Set` | Nextflow-specific log files (e.g. .command.{begin|err|log|out|run|sh|trace}) | +| `versions` | `Set` | A YAML formatted file with program versions | + +## Parameters + +## Used By + +### Subworkflows + +- [shigatyper](/developers/subworkflows/shigatyper) - Predict serotypes of Shigella from reads or assemblies. + +### Workflows + +- [shigatyper](/bactopia-tools/shigatyper) - Rapid determination of Shigella serotypes from sequencing reads. + +## Citations + +If you use this in your analysis, please cite the following. + +- [Bactopia](https://bactopia.github.io/) + Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) + +- [ShigaTyper](https://github.com/CFSAN-Biostatistics/shigatyper) + Wu Y, Lau HK, Lee T, Lau DK, Payne J [In Silico Serotyping Based on Whole-Genome Sequencing Improves the Accuracy of Shigella Identification.](https://doi.org/10.1128/AEM.00165-19) *Applied and Environmental Microbiology*, 85(7). (2019) + +## Source + +[View source on GitHub](https://github.com/bactopia/bactopia/tree/main/modules/shigatyper) + +## Version + +```yaml +SHIGATYPER: + - shigatyper: 2.0.5 +``` diff --git a/developers/modules/shigeifinder.mdx b/developers/modules/shigeifinder.mdx new file mode 100644 index 00000000..6be79f52 --- /dev/null +++ b/developers/modules/shigeifinder.mdx @@ -0,0 +1,91 @@ +--- +title: shigeifinder +description: "Shigella and EIEC serotyping from assemblies." +tags: + - shigella + - eiec + - serotype + - identification + - cluster + - virulence + - sample-scope +--- + +# shigeifinder + +**Tags:** shigella eiec serotype identification cluster virulence sample-scope + +Shigella and EIEC serotyping from assemblies. + +Uses [ShigEiFinder](https://github.com/LanLab/ShigEiFinder) to differentiate *Shigella* and +Enteroinvasive *E. coli* (EIEC) and predict their serotypes from genome assemblies. It utilizes +cluster-specific marker genes to distinguish these closely related pathovars. + +## Inputs + +``` +record ( + meta: Record, + fna: Path +) +``` + +| Field | Type | Description | +|-------|------|-------------| +| `meta` | `Record` | Groovy Record containing sample information | +| `fna` | `Path` | Assembled contigs in FASTA format | + +## Outputs + +``` +record ( + meta: Record, + tsv: Path, + results: Set, + logs: Set, + nf_logs: Set, + versions: Set +) +``` + +| Field | Type | Description | +|-------|------|-------------| +| `meta` | `Record` | Sample information record | +| `tsv` | `Path` | ShigEiFinder results in TSV format | +| `results` | `Set` | All output files to be published | +| `logs` | `Set` | Optional program specific log files | +| `nf_logs` | `Set` | Nextflow-specific log files (e.g. .command.{begin|err|log|out|run|sh|trace}) | +| `versions` | `Set` | A YAML formatted file with program versions | + +## Parameters + +## Used By + +### Subworkflows + +- [shigeifinder](/developers/subworkflows/shigeifinder) - Predict serotypes of Shigella and EIEC from assemblies. + +### Workflows + +- [shigeifinder](/bactopia-tools/shigeifinder) - In silico serotype prediction for Shigella and Enteroinvasive E. coli (EIEC). + +## Citations + +If you use this in your analysis, please cite the following. + +- [Bactopia](https://bactopia.github.io/) + Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) + +- [ShigEiFinder](https://github.com/LanLab/ShigEiFinder) + Zhang X, Payne M, Nguyen T, Kaur S, Lan R [Cluster-specific gene markers enhance Shigella and enteroinvasive Escherichia coli in silico serotyping.](https://doi.org/10.1099/mgen.0.000704) Microbial Genomics, 7(12). (2021) + +## Source + +[View source on GitHub](https://github.com/bactopia/bactopia/tree/main/modules/shigeifinder) + +## Version + +```yaml +SHIGEIFINDER: + - shigeifinder: 1.3.5 +``` diff --git a/developers/modules/sistr.mdx b/developers/modules/sistr.mdx new file mode 100644 index 00000000..c393a34e --- /dev/null +++ b/developers/modules/sistr.mdx @@ -0,0 +1,102 @@ +--- +title: sistr +description: "Serovar prediction of Salmonella assemblies." +tags: + - salmonella + - serotype + - cgmlst + - typing + - prediction + - sample-scope +--- + +# sistr + +**Tags:** salmonella serotype cgmlst typing prediction sample-scope + +Serovar prediction of Salmonella assemblies. + +Uses [SISTR](https://github.com/phac-nml/sistr_cmd) (Salmonella In Silico Typing Resource) to +predict serovars of *Salmonella* from draft genome assemblies using core genome Multi-Locus +Sequence Typing (cgMLST). + +## Inputs + +``` +record ( + meta: Record, + fna: Path +) +``` + +| Field | Type | Description | +|-------|------|-------------| +| `meta` | `Record` | Groovy Record containing sample information | +| `fna` | `Path` | Assembled contigs in FASTA format | + +## Outputs + +``` +record ( + meta: Record, + tsv: Path, + allele_fasta: Path, + allele_json: Path, + cgmlst_csv: Path, + results: Set, + logs: Set, + nf_logs: Set, + versions: Set +) +``` + +| Field | Type | Description | +|-------|------|-------------| +| `meta` | `Record` | Sample information record | +| `tsv` | `Path` | SISTR prediction results in TSV format | +| `allele_fasta` | `Path` | Novel alleles in FASTA format | +| `allele_json` | `Path` | Alleles in JSON format | +| `cgmlst_csv` | `Path` | cgMLST profile in CSV format | +| `results` | `Set` | All output files to be published | +| `logs` | `Set` | Optional program specific log files | +| `nf_logs` | `Set` | Nextflow-specific log files (e.g. .command.{begin|err|log|out|run|sh|trace}) | +| `versions` | `Set` | A YAML formatted file with program versions | + +## Parameters + +### SISTR Parameters + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--sistr_full_cgmlst` | boolean | `false` | Use the full set of cgMLST alleles which can include highly similar alleles | + +## Used By + +### Subworkflows + +- [sistr](/developers/subworkflows/sistr) - Salmonella In Silico Typing Resource command-line tool. + +### Workflows + +- [sistr](/bactopia-tools/sistr) - Serovar prediction of Salmonella enterica from assemblies. + +## Citations + +If you use this in your analysis, please cite the following. + +- [Bactopia](https://bactopia.github.io/) + Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) + +- [SISTR](https://github.com/phac-nml/sistr_cmd) + Yoshida CE, Kruczkiewicz P, Laing CR, Lingohr EJ, Gannon VPJ, Nash JHE, Taboada EN [The Salmonella In Silico Typing Resource (SISTR): An Open Web-Accessible Tool for Rapidly Typing and Subtyping Draft Salmonella Genome Assemblies.](https://doi.org/10.1371/journal.pone.0147101) _PloS One_, 11(1), e0147101. (2016) + +## Source + +[View source on GitHub](https://github.com/bactopia/bactopia/tree/main/modules/sistr) + +## Version + +```yaml +SISTR: + - sistr_cmd: 1.1.3 +``` diff --git a/developers/modules/snippy_core.mdx b/developers/modules/snippy_core.mdx new file mode 100644 index 00000000..bf9a3825 --- /dev/null +++ b/developers/modules/snippy_core.mdx @@ -0,0 +1,154 @@ +--- +title: snippy_core +description: "Core-SNP alignment from Snippy outputs." +tags: + - snippy + - core-genome + - alignment + - phylogeny + - snp + - bacteria + - run-scope +--- + +# snippy_core + +**Tags:** snippy core-genome alignment phylogeny snp bacteria run-scope + +Core-SNP alignment from Snippy outputs. + +Uses [Snippy](https://github.com/tseemann/snippy) to generate a core genome alignment +from multiple Snippy outputs. It combines variant calls (VCF) and alignments to produce +a core SNP alignment, which can be used for phylogenetic analysis. + +## Inputs + +``` +record ( + meta: Record, + _vcf: Set, + _aligned_fa: Set +) +``` + +| Field | Type | Description | +|-------|------|-------------| +| `meta` | `Record` | Groovy Record containing sample information | +| `_vcf` | `Set` | List of VCF files from Snippy | +| `_aligned_fa` | `Set` | List of aligned FASTA files from Snippy | + +| Field | Type | Description | +|-------|------|-------------| +| `meta` | `Record` | Groovy Record containing reference information | +| `reference` | `Path` | Reference genome (FASTA or GenBank format) | + +``` +reference: Path +mask: Path? +``` + +| Name | Type | Description | +|------|------|-------------| +| `mask` | `Path?` | BED file of regions to mask in the alignment | + +## Outputs + +``` +record ( + meta: Record, + supplemental: Set, + aln: Path, + full_aln: Path, + clean_full_aln: Path, + tab: Path, + vcf: Path, + txt: Path, + samples: Path, + results: Set, + logs: Set, + nf_logs: Set, + versions: Set +) +``` + +| Field | Type | Description | +|-------|------|-------------| +| `meta` | `Record` | Sample information record | +| `supplemental` | `Set` | Supplemental files including individual sample alignments | +| `aln` | `Path` | A core SNP alignment in FASTA format | +| `full_aln` | `Path` | A whole genome SNP alignment (includes invariant sites) | +| `clean_full_aln` | `Path` | A whole genome SNP alignment (includes invariant sites) with Ns | +| `tab` | `Path` | Tab-separated list of core SNP sites with alleles (no annotations) | +| `vcf` | `Path` | Multi-sample VCF file with genotype GT tags for all discovered alleles | +| `txt` | `Path` | Tab-separated list of alignment and core-size statistics | +| `samples` | `Path` | List of samples included in the core alignment | +| `results` | `Set` | All output files to be published | +| `logs` | `Set` | Optional program specific log files | +| `nf_logs` | `Set` | Nextflow-specific log files (e.g. .command.{begin|err|log|out|run|sh|trace}) | +| `versions` | `Set` | A YAML formatted file with program versions | + +## Parameters + +### Snippy-Core Parameters + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--snippy_core_maxhap` | integer | `100` | Largest haplotype to decompose | +| `--snippy_core_mask` | string | | BED file of sites to mask | +| `--snippy_core_mask_char` | string | `X` | Masking character | +| `--snippy_core_opts` | string | | Extra options in quotes for snippy-core | + +## Used By + +### Subworkflows + +- [snippy_core](/developers/subworkflows/snippy_core) - Generate core-genome SNP alignment from per-sample Snippy outputs. + +### Workflows + +- [snippy](/bactopia-tools/snippy) - Rapid haplotype variant calling and core genome alignment. + +## Citations + +If you use this in your analysis, please cite the following. + +- [Bactopia](https://bactopia.github.io/) + Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) + +- [Snippy](https://github.com/tseemann/snippy) + Seemann T [Snippy: fast bacterial variant calling from NGS reads](https://github.com/tseemann/snippy) (GitHub) + +- [BCFtools](https://github.com/samtools/bcftools) + Danecek P, Bonfield JK, Liddle J, Marshall J, Ohan V, Pollard MO, Whitwham A, Keane T, McCarthy SA, Davies RM, Li H [Twelve years of SAMtools and BCFtools](https://doi.org/10.1093/gigascience/giab008) _GigaScience_ Volume 10, Issue 2 (2021) + +- [Bedtools](https://github.com/arq5x/bedtools2) + Quinlan AR, Hall IM [BEDTools: a flexible suite of utilities for comparing genomic features](http://dx.doi.org/10.1093/bioinformatics/btq033). _Bioinformatics_ 26, 841-842 (2010) + +- [freebayes](https://github.com/ekg/freebayes) + Garrison E, Marth G [Haplotype-based variant detection from short-read sequencing.](https://arxiv.org/abs/1207.3907) arXiv preprint arXiv:1207.3907 [q-bio.GN] (2012) + +- [Seqtk](https://github.com/lh3/seqtk) + Li H [Toolkit for processing sequences in FASTA/Q formats](https://github.com/lh3/seqtk) (GitHub) + +- [SnpEff](http://snpeff.sourceforge.net/) + Cingolani P, Platts A, Wang LL, Coon M, Nguyen T, Wang L, Land SJ, Lu X, Douglas M [A program for annotating and predicting the effects of single nucleotide polymorphisms, SnpEff: SNPs in the genome of Drosophila melanogaster strain w1118; iso-2; iso-3.](https://doi.org/10.4161/fly.19695) _Fly_ 6(2), 80-92 (2012) + +- [VCF-Annotator](https://github.com/rpetit3/vcf-annotator) + Petit III RA [VCF-Annotator: Add biological annotations to variants in a VCF file.](https://github.com/rpetit3/vcf-annotator) (GitHub) + +- [Vcflib](https://github.com/vcflib/vcflib) + Garrison E [Vcflib: A C++ library for parsing and manipulating VCF files](https://github.com/vcflib/vcflib) (GitHub) + +- [vt](https://github.com/atks/vt) + Tan A, Abecasis GR, Kang HM [Unified representation of genetic variants.](https://doi.org/10.1093/bioinformatics/btv112) _Bioinformatics_ 31(13), 2202-2204 (2015) + +## Source + +[View source on GitHub](https://github.com/bactopia/bactopia/tree/main/modules/snippy/core) + +## Version + +```yaml +SNIPPY_CORE: + - bactopia-variants: 1.0.4 +``` diff --git a/developers/modules/snippy_run.mdx b/developers/modules/snippy_run.mdx new file mode 100644 index 00000000..b828c93d --- /dev/null +++ b/developers/modules/snippy_run.mdx @@ -0,0 +1,178 @@ +--- +title: snippy_run +description: "Rapid haploid variant calling and core genome alignment." +tags: + - snippy + - variant-calling + - snp + - indel + - alignment + - bacteria + - sample-scope +--- + +# snippy_run + +**Tags:** snippy variant-calling snp indel alignment bacteria sample-scope + +Rapid haploid variant calling and core genome alignment. + +Uses [Snippy](https://github.com/tseemann/snippy) to find SNPs and indels between a haploid +reference genome and your Next-Generation Sequencing (NGS) sequence reads. It maps reads to +the reference, calls variants, and generates a consensus sequence. + +Uses explicit positional record fields for reads: +- Input: record(meta, r1, r2, se) where each read slot is Path? + +## Inputs + +``` +record ( + meta: Record, + r1: Path?, + r2: Path?, + se: Path? +) +``` + +| Field | Type | Description | +|-------|------|-------------| +| `meta` | `Record` | Groovy Record containing sample information | +| `r1` | `Path?` | Illumina R1 reads (paired-end) | +| `r2` | `Path?` | Illumina R2 reads (paired-end) | +| `se` | `Path?` | Single-end Illumina reads | + +| Field | Type | Description | +|-------|------|-------------| +| `meta` | `Record` | Groovy Record containing reference information | +| `reference` | `Path` | Reference genome (FASTA or GenBank format) | + +## Outputs + +``` +record ( + meta: Record, + aligned_fa: Path?, + vcf: Path?, + aligned_fa_error: Path?, + vcf_error: Path?, + error: Path?, + annotated_vcf: Path, + bam: Path?, + bai: Path?, + bed: Path, + consensus_fa: Path, + consensus_subs_fa: Path, + consensus_subs_masked_fa: Path, + coverage: Path, + csv: Path, + filt_vcf: Path, + gff: Path, + html: Path, + raw_vcf: Path, + subs_vcf: Path, + tab: Path, + txt: Path, + results: Set, + logs: Set, + nf_logs: Set, + versions: Set +) +``` + +| Field | Type | Description | +|-------|------|-------------| +| `meta` | `Record` | Sample information record | +| `aligned_fa` | `Path?` | A version of the reference with - at zero coverage positions | +| `vcf` | `Path?` | The final annotated variants in VCF format | +| `aligned_fa_error` | `Path?` | Aligned FASTA file generated during error state | +| `vcf_error` | `Path?` | VCF file generated during error state | +| `error` | `Path?` | Error log text file | +| `annotated_vcf` | `Path` | Annotated VCF file | +| `bam` | `Path?` | The alignments in BAM format (includes unmapped/multimapping) | +| `bai` | `Path?` | Index for the BAM file | +| `bed` | `Path` | The variants in BED format | +| `consensus_fa` | `Path` | Reference genome with all variants instantiated | +| `consensus_subs_fa` | `Path` | Reference genome with only substitution variants instantiated | +| `consensus_subs_masked_fa` | `Path` | Reference genome with substitutions instantiated and low coverage masked | +| `coverage` | `Path` | Per-base coverage depth information | +| `csv` | `Path` | A comma-separated summary of variants | +| `filt_vcf` | `Path` | The filtered variant calls from Freebayes | +| `gff` | `Path` | The variants in GFF3 format | +| `html` | `Path` | A HTML summary of the variants | +| `raw_vcf` | `Path` | The unfiltered variant calls from Freebayes | +| `subs_vcf` | `Path` | VCF containing only substitution variants | +| `tab` | `Path` | A simple tab-separated summary of all variants | +| `txt` | `Path` | Tab-separated columnar list of alignment statistics | +| `results` | `Set` | All output files to be published | +| `logs` | `Set` | Optional program specific log files | +| `nf_logs` | `Set` | Nextflow-specific log files (e.g. .command.{begin|err|log|out|run|sh|trace}) | +| `versions` | `Set` | A YAML formatted file with program versions | + +## Parameters + +### Snippy Run Parameters + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--reference` | string | | Reference genome in GenBank format | +| `--snippy_mapqual` | integer | `60` | Minimum read mapping quality to consider | +| `--snippy_basequal` | integer | `13` | Minimum base quality to consider | +| `--snippy_bwaopt` | string | | Extra BWA MEM options, eg. -x pacbio | +| `--snippy_fbopt` | string | | Extra Freebayes options, eg. --theta 1E-6 --read-snp-limit 2 | +| `--snippy_opts` | string | | Extra options in quotes for Snippy | + +## Used By + +### Subworkflows + +- [snippy_run](/developers/subworkflows/snippy_run) - Call variants against a reference genome using Snippy. + +### Workflows + +- [snippy](/bactopia-tools/snippy) - Rapid haplotype variant calling and core genome alignment. + +## Citations + +If you use this in your analysis, please cite the following. + +- [Bactopia](https://bactopia.github.io/) + Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) + +- [Snippy](https://github.com/tseemann/snippy) + Seemann T [Snippy: fast bacterial variant calling from NGS reads](https://github.com/tseemann/snippy) (GitHub) + +- [BCFtools](https://github.com/samtools/bcftools) + Danecek P, Bonfield JK, Liddle J, Marshall J, Ohan V, Pollard MO, Whitwham A, Keane T, McCarthy SA, Davies RM, Li H [Twelve years of SAMtools and BCFtools](https://doi.org/10.1093/gigascience/giab008) _GigaScience_ Volume 10, Issue 2 (2021) + +- [Bedtools](https://github.com/arq5x/bedtools2) + Quinlan AR, Hall IM [BEDTools: a flexible suite of utilities for comparing genomic features](http://dx.doi.org/10.1093/bioinformatics/btq033). _Bioinformatics_ 26, 841-842 (2010) + +- [freebayes](https://github.com/ekg/freebayes) + Garrison E, Marth G [Haplotype-based variant detection from short-read sequencing.](https://arxiv.org/abs/1207.3907) arXiv preprint arXiv:1207.3907 [q-bio.GN] (2012) + +- [Seqtk](https://github.com/lh3/seqtk) + Li H [Toolkit for processing sequences in FASTA/Q formats](https://github.com/lh3/seqtk) (GitHub) + +- [SnpEff](http://snpeff.sourceforge.net/) + Cingolani P, Platts A, Wang LL, Coon M, Nguyen T, Wang L, Land SJ, Lu X, Douglas M [A program for annotating and predicting the effects of single nucleotide polymorphisms, SnpEff: SNPs in the genome of Drosophila melanogaster strain w1118; iso-2; iso-3.](https://doi.org/10.4161/fly.19695) _Fly_ 6(2), 80-92 (2012) + +- [VCF-Annotator](https://github.com/rpetit3/vcf-annotator) + Petit III RA [VCF-Annotator: Add biological annotations to variants in a VCF file.](https://github.com/rpetit3/vcf-annotator) (GitHub) + +- [Vcflib](https://github.com/vcflib/vcflib) + Garrison E [Vcflib: A C++ library for parsing and manipulating VCF files](https://github.com/vcflib/vcflib) (GitHub) + +- [vt](https://github.com/atks/vt) + Tan A, Abecasis GR, Kang HM [Unified representation of genetic variants.](https://doi.org/10.1093/bioinformatics/btv112) _Bioinformatics_ 31(13), 2202-2204 (2015) + +## Source + +[View source on GitHub](https://github.com/bactopia/bactopia/tree/main/modules/snippy/run) + +## Version + +```yaml +SNIPPY_RUN: + - bactopia-variants: 1.0.4 +``` diff --git a/developers/modules/snpdists.mdx b/developers/modules/snpdists.mdx new file mode 100644 index 00000000..0a313d03 --- /dev/null +++ b/developers/modules/snpdists.mdx @@ -0,0 +1,91 @@ +--- +title: snpdists +description: "Create a SNP distance matrix from a multiple sequence alignment." +tags: + - snp + - distance + - matrix + - alignment + - phylogeny + - run-scope +--- + +# snpdists + +**Tags:** snp distance matrix alignment phylogeny run-scope + +Create a SNP distance matrix from a multiple sequence alignment. + +Uses [snp-dists](https://github.com/tseemann/snp-dists) to read a FASTA alignment and +compute a pairwise SNP distance matrix between all sequences. + +## Inputs + +``` +record ( + meta: Record, + aln: Path +) +``` + +| Field | Type | Description | +|-------|------|-------------| +| `meta` | `Record` | Groovy Record containing sample information | +| `aln` | `Path` | Multiple sequence alignment in FASTA format | + +## Outputs + +``` +record ( + meta: Record, + tsv: Path, + results: Set, + logs: Set, + nf_logs: Set, + versions: Set +) +``` + +| Field | Type | Description | +|-------|------|-------------| +| `meta` | `Record` | Sample information record | +| `tsv` | `Path` | Pairwise SNP distance matrix in TSV format | +| `results` | `Set` | All output files to be published | +| `logs` | `Set` | Optional program specific log files | +| `nf_logs` | `Set` | Nextflow-specific log files (e.g. .command.{begin|err|log|out|run|sh|trace}) | +| `versions` | `Set` | A YAML formatted file with program versions | + +## Parameters + +### SNP-Dists Parameters + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--snpdists_a` | boolean | `false` | Count all differences not just [AGTC] | + +## Used By + +### Subworkflows + +- [snpdists](/developers/subworkflows/snpdists) - Calculate pairwise SNP distances from sequence alignments. + +## Citations + +If you use this in your analysis, please cite the following. + +- [Bactopia](https://bactopia.github.io/) + Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) + +- [snp-dists](https://github.com/tseemann/snp-dists) + Seemann T [snp-dists - Pairwise SNP distance matrix from a FASTA sequence alignment.](https://github.com/tseemann/snp-dists) (GitHub) + +## Source + +[View source on GitHub](https://github.com/bactopia/bactopia/tree/main/modules/snpdists) + +## Version + +```yaml +SNPDISTS: + - snp-dists: 1.2.0 +``` diff --git a/developers/modules/spatyper.mdx b/developers/modules/spatyper.mdx new file mode 100644 index 00000000..75c0a2ca --- /dev/null +++ b/developers/modules/spatyper.mdx @@ -0,0 +1,109 @@ +--- +title: spatyper +description: "Finding spa types in Staphylococcus aureus." +tags: + - staphylococcus-aureus + - spa-typing + - repeat + - mrsa + - typing + - sample-scope +--- + +# spatyper + +**Tags:** staphylococcus-aureus spa-typing repeat mrsa typing sample-scope + +Finding spa types in Staphylococcus aureus. + +Uses [spaTyper](https://github.com/HCGB-IGTP/spaTyper) to determine the *spa* type of +*Staphylococcus aureus* genomes by identifying the repeats in the polymorphic X region +of the protein A gene (*spa*). + +## Inputs + +``` +record ( + meta: Record, + fna: Path +) +``` + +| Field | Type | Description | +|-------|------|-------------| +| `meta` | `Record` | Groovy Record containing sample information | +| `fna` | `Path` | Assembled contigs in FASTA format | + +``` +repeats: Path? +repeat_order: Path? +``` + +| Name | Type | Description | +|------|------|-------------| +| `repeats` | `Path?` | Custom repeat sequences file | +| `repeat_order` | `Path?` | Custom repeat order file | + +## Outputs + +``` +record ( + meta: Record, + tsv: Path, + results: Set, + logs: Set, + nf_logs: Set, + versions: Set +) +``` + +| Field | Type | Description | +|-------|------|-------------| +| `meta` | `Record` | Sample information record | +| `tsv` | `Path` | spa typing results in TSV format | +| `results` | `Set` | All output files to be published | +| `logs` | `Set` | Optional program specific log files | +| `nf_logs` | `Set` | Nextflow-specific log files (e.g. .command.{begin|err|log|out|run|sh|trace}) | +| `versions` | `Set` | A YAML formatted file with program versions | + +## Parameters + +### spaTyper Parameters + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--spatyper_do_enrich` | boolean | `false` | Do PCR product enrichment | + +## Used By + +### Subworkflows + +- [spatyper](/developers/subworkflows/spatyper) - Predict spa types of Staphylococcus aureus from genome assemblies. + +### Workflows + +- [spatyper](/bactopia-tools/spatyper) - spa typing of Staphylococcus aureus assemblies. + +## Citations + +If you use this in your analysis, please cite the following. + +- [Bactopia](https://bactopia.github.io/) + Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) + +- [spaTyper](https://github.com/HCGB-IGTP/spaTyper) + Sanchez-Herrero JF, and Sullivan M [spaTyper: Staphylococcal protein A (spa) characterization pipeline](http://doi.org/10.5281/zenodo.4063625). Zenodo. (2020) + +- [spaTyper Database](https://cge.food.dtu.dk/services/spatyper/) + Harmsen D, Claus H, Witte W, Rothgänger J, Claus H, Turnwald D, and Vogel U [Typing of methicillin-resistant _Staphylococcus aureus_ in a university hospital setting using a novel software for spa-repeat determination and database management.](https://doi.org/10.1128/jcm.41.12.5442-5448.2003) _J. Clin. Microbiol._ 41:5442-5448 (2003) + +## Source + +[View source on GitHub](https://github.com/bactopia/bactopia/tree/main/modules/spatyper) + +## Version + +```yaml +SPATYPER: + - spatyper: 0.3.3 +``` diff --git a/developers/modules/srahumanscrubber_initdb.mdx b/developers/modules/srahumanscrubber_initdb.mdx new file mode 100644 index 00000000..50541bc2 --- /dev/null +++ b/developers/modules/srahumanscrubber_initdb.mdx @@ -0,0 +1,68 @@ +--- +title: srahumanscrubber_initdb +description: "Initialize human read removal database for SRA Human Scrubber." +tags: + - human + - database + - scrubber + - ncbi + - download + - sample-scope +--- + +# srahumanscrubber_initdb + +**Tags:** human database scrubber ncbi download sample-scope + +Initialize human read removal database for SRA Human Scrubber. + +Uses [SRA Human Scrubber](https://github.com/ncbi/sra-human-scrubber) to download and +initialize the necessary k-mer database required for scrubbing human reads from +sequencing data. + +:::note[Internet Required] +This process requires an active internet connection to fetch the database from NCBI FTP. +::: + +## Outputs + +``` +record ( + db: Set, + logs: Set +) +``` + +| Field | Type | Description | +|-------|------|-------------| +| `db` | `Set` | The initialized SRA Human Scrubber database files | +| `logs` | `Set` | Optional program specific log files | + +## Parameters + +## Used By + +### Subworkflows + +- [srahumanscrubber](/developers/subworkflows/srahumanscrubber) - Remove human contamination from sequencing reads for SRA submission. + +## Citations + +If you use this in your analysis, please cite the following. + +- [Bactopia](https://bactopia.github.io/) + Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) + +- [SRA Human Scrubber](https://github.com/ncbi/sra-human-scrubber) + Katz KS, Shutov O, Lapoint R, Kimelman M, Brister JR, and O'Sullivan C [STAT: a fast, scalable, MinHash-based k-mer tool to assess Sequence Read Archive next-generation sequence submissions.](https://doi.org/10.1186/s13059-021-02490-0) _Genome Biology_, 22(1), 270 (2021) + +## Source + +[View source on GitHub](https://github.com/bactopia/bactopia/tree/main/modules/srahumanscrubber/initdb) + +## Version + +```yaml +SRAHUMANSCRUBBER_INITDB: + - sra-human-scrubber: 2.2.1 +``` diff --git a/developers/modules/srahumanscrubber_scrub.mdx b/developers/modules/srahumanscrubber_scrub.mdx new file mode 100644 index 00000000..7ef77b94 --- /dev/null +++ b/developers/modules/srahumanscrubber_scrub.mdx @@ -0,0 +1,120 @@ +--- +title: srahumanscrubber_scrub +description: "Scrub human reads from FASTQ files." +tags: + - human + - contamination + - scrubber + - decontamination + - ncbi + - sra + - sample-scope +--- + +# srahumanscrubber_scrub + +**Tags:** human contamination scrubber decontamination ncbi sra sample-scope + +Scrub human reads from FASTQ files. + +Uses [SRA Human Scrubber](https://github.com/ncbi/sra-human-scrubber) to identify and remove +human reads from sequencing data. It relies on a specific k-mer database to mask or remove +sequences that align to human references. + +Uses explicit positional named parameters for reads: +- Input: record(meta, r1, r2, se, lr) where each read slot is Path? + +## Inputs + +``` +record ( + meta: Record, + r1: Path?, + r2: Path?, + se: Path?, + lr: Path? +) +``` + +| Field | Type | Description | +|-------|------|-------------| +| `meta` | `Record` | Groovy Record containing sample information | +| `r1` | `Path?` | Illumina R1 reads (paired-end) | +| `r2` | `Path?` | Illumina R2 reads (paired-end) | +| `se` | `Path?` | Single-end Illumina reads | +| `lr` | `Path?` | Long reads (ONT/PacBio) | + +``` +db: Path +``` + +| Name | Type | Description | +|------|------|-------------| +| `db` | `Path` | SRA Human Scrubber database directory | + +## Outputs + +``` +record ( + meta: Record, + special_meta: Record, + r1: Path?, + r2: Path?, + se: Path?, + lr: Path?, + scrub_report: Path?, + results: Set, + logs: Set, + nf_logs: Set, + versions: Set +) +``` + +| Field | Type | Description | +|-------|------|-------------| +| `meta` | `Record` | Sample information record | +| `special_meta` | `Record` | A simplified metadata record for downstream report joining | +| `r1` | `Path?` | Scrubbed paired-end forward reads | +| `r2` | `Path?` | Scrubbed paired-end reverse reads | +| `se` | `Path?` | Scrubbed single-end reads | +| `lr` | `Path?` | Scrubbed long reads | +| `scrub_report` | `Path?` | Report of scrubbing statistics | +| `results` | `Set` | All output files to be published | +| `logs` | `Set` | Optional program specific log files | +| `nf_logs` | `Set` | Nextflow-specific log files (e.g. .command.{begin|err|log|out|run|sh|trace}) | +| `versions` | `Set` | A YAML formatted file with program versions | + +## Parameters + +### SRA Human Scrubber Parameters + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--use_srascrubber` | boolean | `false` | Use SRAHumanScrubber for scrubbing human reads | + +## Used By + +### Subworkflows + +- [srahumanscrubber](/developers/subworkflows/srahumanscrubber) - Remove human contamination from sequencing reads for SRA submission. + +## Citations + +If you use this in your analysis, please cite the following. + +- [Bactopia](https://bactopia.github.io/) + Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) + +- [SRA Human Scrubber](https://github.com/ncbi/sra-human-scrubber) + Katz KS, Shutov O, Lapoint R, Kimelman M, Brister JR, and O'Sullivan C [STAT: a fast, scalable, MinHash-based k-mer tool to assess Sequence Read Archive next-generation sequence submissions.](https://doi.org/10.1186/s13059-021-02490-0) _Genome Biology_, 22(1), 270 (2021) + +## Source + +[View source on GitHub](https://github.com/bactopia/bactopia/tree/main/modules/srahumanscrubber/scrub) + +## Version + +```yaml +SRAHUMANSCRUBBER_SCRUB: + - bactopia-teton: 1.1.3 +``` diff --git a/developers/modules/ssuissero.mdx b/developers/modules/ssuissero.mdx new file mode 100644 index 00000000..edb4a2ba --- /dev/null +++ b/developers/modules/ssuissero.mdx @@ -0,0 +1,89 @@ +--- +title: ssuissero +description: "Serotype prediction of Streptococcus suis assemblies." +tags: + - streptococcus-suis + - serotype + - typing + - prediction + - sample-scope +--- + +# ssuissero + +**Tags:** streptococcus-suis serotype typing prediction sample-scope + +Serotype prediction of Streptococcus suis assemblies. + +Uses [SsuisSero](https://github.com/idmc-cnr/SsuisSero) to predict the serotype of +*Streptococcus suis* strains from genome assemblies based on the presence of specific +capsular genes. + +## Inputs + +``` +record ( + meta: Record, + fna: Path +) +``` + +| Field | Type | Description | +|-------|------|-------------| +| `meta` | `Record` | Groovy Record containing sample information | +| `fna` | `Path` | Assembled contigs in FASTA format | + +## Outputs + +``` +record ( + meta: Record, + tsv: Path, + results: Set, + logs: Set, + nf_logs: Set, + versions: Set +) +``` + +| Field | Type | Description | +|-------|------|-------------| +| `meta` | `Record` | Sample information record | +| `tsv` | `Path` | SsuisSero results in TSV format | +| `results` | `Set` | All output files to be published | +| `logs` | `Set` | Optional program specific log files | +| `nf_logs` | `Set` | Nextflow-specific log files (e.g. .command.{begin|err|log|out|run|sh|trace}) | +| `versions` | `Set` | A YAML formatted file with program versions | + +## Parameters + +## Used By + +### Subworkflows + +- [ssuissero](/developers/subworkflows/ssuissero) - Predict serotypes of Streptococcus suis from genome assemblies. + +### Workflows + +- [ssuissero](/bactopia-tools/ssuissero) - Serotype prediction of Streptococcus suis assemblies. + +## Citations + +If you use this in your analysis, please cite the following. + +- [Bactopia](https://bactopia.github.io/) + Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) + +- [SsuisSero](https://github.com/jimmyliu1326/SsuisSero) + Lui J [SsuisSero: Rapid _Streptococcus suis_ serotyping](https://github.com/jimmyliu1326/SsuisSero) (GitHub) + +## Source + +[View source on GitHub](https://github.com/bactopia/bactopia/tree/main/modules/ssuissero) + +## Version + +```yaml +SSUISSERO: + - ssuissero: 1.0.1 +``` diff --git a/developers/modules/staphopiasccmec.mdx b/developers/modules/staphopiasccmec.mdx new file mode 100644 index 00000000..4b6b2851 --- /dev/null +++ b/developers/modules/staphopiasccmec.mdx @@ -0,0 +1,92 @@ +--- +title: staphopiasccmec +description: "Primer based SCCmec typing of S. aureus genomes." +tags: + - staphylococcus-aureus + - sccmec + - typing + - mrsa + - primers + - sample-scope +--- + +# staphopiasccmec + +**Tags:** staphylococcus-aureus sccmec typing mrsa primers sample-scope + +Primer based SCCmec typing of S. aureus genomes. + +Uses [Staphopia SCCmec](https://github.com/staphopia/staphopia-sccmec) to determine the +SCCmec type of *Staphylococcus aureus* assemblies. It includes a primer-based approach +to identify SCCmec types I-XI. + +## Inputs + +``` +record ( + meta: Record, + fna: Path +) +``` + +| Field | Type | Description | +|-------|------|-------------| +| `meta` | `Record` | Groovy Record containing sample information | +| `fna` | `Path` | Assembled contigs in FASTA format | + +## Outputs + +``` +record ( + meta: Record, + tsv: Path, + results: Set, + logs: Set, + nf_logs: Set, + versions: Set +) +``` + +| Field | Type | Description | +|-------|------|-------------| +| `meta` | `Record` | Sample information record | +| `tsv` | `Path` | TSV file with SCCmec typing results | +| `results` | `Set` | All output files to be published | +| `logs` | `Set` | Optional program specific log files | +| `nf_logs` | `Set` | Nextflow-specific log files (e.g. .command.{begin|err|log|out|run|sh|trace}) | +| `versions` | `Set` | A YAML formatted file with program versions | + +## Parameters + +### Staphopia SCCmec Parameters + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--staphopiasccmec_hamming` | boolean | `false` | Report the results as hamming distances | + +## Used By + +### Subworkflows + +- [staphopiasccmec](/developers/subworkflows/staphopiasccmec) - Identify SCCmec elements in Staphylococcus aureus genomes using Staphopia method. + +## Citations + +If you use this in your analysis, please cite the following. + +- [Bactopia](https://bactopia.github.io/) + Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) + +- [staphopia-sccmec](https://github.com/staphopia/staphopia-sccmec) + Petit III RA, Read TD [_Staphylococcus aureus_ viewed from the perspective of 40,000+ genomes.](http://dx.doi.org/10.7717/peerj.5261) _PeerJ_ 6, e5261 (2018) + +## Source + +[View source on GitHub](https://github.com/bactopia/bactopia/tree/main/modules/staphopiasccmec) + +## Version + +```yaml +STAPHOPIASCCMEC: + - staphopia-sccmec: 1.0.0 +``` diff --git a/developers/modules/stecfinder.mdx b/developers/modules/stecfinder.mdx new file mode 100644 index 00000000..206a79cc --- /dev/null +++ b/developers/modules/stecfinder.mdx @@ -0,0 +1,106 @@ +--- +title: stecfinder +description: "Serotype of Shigatoxin producing E. coli using reads/assemblies." +tags: + - stec + - e.-coli + - virulence + - serotype + - typing + - sample-scope +--- + +# stecfinder + +**Tags:** stec e.-coli virulence serotype typing sample-scope + +Serotype of Shigatoxin producing E. coli using reads/assemblies. + +Uses [STECFinder](https://github.com/LanLab/STECFinder) to identify Shiga toxin-producing +*Escherichia coli* (STEC) serotypes and virulence factors from genome assemblies or sequencing reads. + +## Inputs + +``` +record ( + meta: Record, + fna: Path, + r1: Path?, + r2: Path?, + se: Path?, + lr: Path? +) +``` + +| Field | Type | Description | +|-------|------|-------------| +| `meta` | `Record` | Groovy Record containing sample information | +| `fna` | `Path` | Assembled contigs in FASTA format | +| `r1` | `Path?` | Illumina R1 reads (paired-end) | +| `r2` | `Path?` | Illumina R2 reads (paired-end) | +| `se` | `Path?` | Single-end Illumina reads | +| `lr` | `Path?` | Long reads (ONT/PacBio) | + +## Outputs + +``` +record ( + meta: Record, + tsv: Path, + results: Set, + logs: Set, + nf_logs: Set, + versions: Set +) +``` + +| Field | Type | Description | +|-------|------|-------------| +| `meta` | `Record` | Sample information record | +| `tsv` | `Path` | TSV file with STEC gene markers results | +| `results` | `Set` | All output files to be published | +| `logs` | `Set` | Optional program specific log files | +| `nf_logs` | `Set` | Nextflow-specific log files (e.g. .command.{begin|err|log|out|run|sh|trace}) | +| `versions` | `Set` | A YAML formatted file with program versions | + +## Parameters + +### STECFinder Parameters + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--stecfinder_use_reads` | boolean | `false` | Paired-end Illumina reads will be used instead of assemblies | +| `--stecfinder_hits` | boolean | `false` | Show detailed gene search results | +| `--stecfinder_cutoff` | number | `10.0` | Minimum read coverage for gene to be called | +| `--stecfinder_length` | number | `50.0` | Percentage of gene length needed for positive call | + +## Used By + +### Subworkflows + +- [stecfinder](/developers/subworkflows/stecfinder) - Identify and serotype Shiga toxin-producing E. coli (STEC) from assemblies. + +### Workflows + +- [stecfinder](/bactopia-tools/stecfinder) - Serotype identification of Shiga toxin-producing E. coli. + +## Citations + +If you use this in your analysis, please cite the following. + +- [Bactopia](https://bactopia.github.io/) + Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) + +- [STECFinder](https://github.com/LanLab/STECFinder) + Zhang X, Payne M, Kaur S, and Lan R [Improved Genomic Identification, Clustering, and Serotyping of Shiga Toxin-Producing Escherichia coli Using Cluster/Serotype-Specific Gene Markers.](https://doi.org/10.3389/fcimb.2021.772574) _Frontiers in Cellular and Infection Microbiology_, 11, 772574. (2021) + +## Source + +[View source on GitHub](https://github.com/bactopia/bactopia/tree/main/modules/stecfinder) + +## Version + +```yaml +STECFINDER: + - stecfinder: 1.1.2 +``` diff --git a/developers/modules/sylph_profile.mdx b/developers/modules/sylph_profile.mdx new file mode 100644 index 00000000..f0a30fd8 --- /dev/null +++ b/developers/modules/sylph_profile.mdx @@ -0,0 +1,116 @@ +--- +title: sylph_profile +description: "Profile metagenome samples against a database using Sylph." +tags: + - metagenomics + - profiling + - taxonomy + - abundance + - ani + - sylph + - sample-scope +--- + +# sylph_profile + +**Tags:** metagenomics profiling taxonomy abundance ani sylph sample-scope + +Profile metagenome samples against a database using Sylph. + +Uses [Sylph](https://github.com/bluenote-1/sylph) to profile metagenomic samples for taxonomic +abundance and containment ANI against a provided database. It is designed to be extremely fast +and memory-efficient. + +Uses explicit positional record fields for reads: +- Input: record(meta, r1, r2, se, lr) where each read slot is Path? + +## Inputs + +``` +record ( + meta: Record, + r1: Path?, + r2: Path?, + se: Path?, + lr: Path? +) +``` + +| Field | Type | Description | +|-------|------|-------------| +| `meta` | `Record` | Groovy Record containing sample information | +| `r1` | `Path?` | Illumina R1 reads (paired-end) | +| `r2` | `Path?` | Illumina R2 reads (paired-end) | +| `se` | `Path?` | Single-end Illumina reads | +| `lr` | `Path?` | Long reads (ONT/PacBio) | + +``` +db: Path +``` + +| Name | Type | Description | +|------|------|-------------| +| `db` | `Path` | Path to the Sylph database file (*.syldb) | + +## Outputs + +``` +record ( + meta: Record, + tsv: Path, + results: Set, + logs: Set, + nf_logs: Set, + versions: Set +) +``` + +| Field | Type | Description | +|-------|------|-------------| +| `meta` | `Record` | Sample information record | +| `tsv` | `Path` | TSV file with profiling results | +| `results` | `Set` | All output files to be published | +| `logs` | `Set` | Optional program specific log files | +| `nf_logs` | `Set` | Nextflow-specific log files (e.g. .command.{begin|err|log|out|run|sh|trace}) | +| `versions` | `Set` | A YAML formatted file with program versions | + +## Parameters + +### Sylph Profile Parameters + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--sylph_db` | string | | The path to a sylph formatted database | +| `--sylph_min_ani` | integer | `95` | Minimum adjusted ANI to consider. Smaller than 95 for profile will give inaccurate results. | +| `--sylph_opts` | string | | Extra options in quotes for Sylph | + +## Used By + +### Subworkflows + +- [sylph](/developers/subworkflows/sylph) - Profile microbial composition using Sylph. + +### Workflows + +- [sylph](/bactopia-tools/sylph) - Taxonomic profiling by abundance-corrected MinHash. + +## Citations + +If you use this in your analysis, please cite the following. + +- [Bactopia](https://bactopia.github.io/) + Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) + +- [Sylph](https://github.com/bluenote-1/sylph) + Shaw J, and Yu YW [Rapid species-level metagenome profiling and containment estimation with sylph.](https://doi.org/10.1038/s41587-024-02412-y) _Nature Biotechnology_ (2024) + +## Source + +[View source on GitHub](https://github.com/bactopia/bactopia/tree/main/modules/sylph/profile) + +## Version + +```yaml +SYLPH_PROFILE: + - sylph: 0.9.0 +``` diff --git a/developers/modules/tbprofiler_collate.mdx b/developers/modules/tbprofiler_collate.mdx new file mode 100644 index 00000000..225be9c8 --- /dev/null +++ b/developers/modules/tbprofiler_collate.mdx @@ -0,0 +1,104 @@ +--- +title: tbprofiler_collate +description: "Collate TB-Profiler results from multiple samples." +tags: + - tuberculosis + - mycobacterium + - drug-resistance + - collate + - summary + - run-scope +--- + +# tbprofiler_collate + +**Tags:** tuberculosis mycobacterium drug-resistance collate summary run-scope + +Collate TB-Profiler results from multiple samples. + +Uses [TBProfiler](https://github.com/jodyphelan/TBProfiler) to aggregate profiling results +from multiple samples into summary tables and files suitable for phylogenetic visualization. + +## Inputs + +``` +record ( + meta: Record, + json: Set +) +``` + +| Field | Type | Description | +|-------|------|-------------| +| `meta` | `Record` | Groovy Record containing sample information | +| `json` | `Set` | List of TB-Profiler JSON output files | + +## Outputs + +``` +record ( + meta: Record, + csv: Path, + variants_csv: Path, + variants_txt: Path, + itol: Set, + results: Set, + logs: Set, + nf_logs: Set, + versions: Set +) +``` + +| Field | Type | Description | +|-------|------|-------------| +| `meta` | `Record` | Sample information record | +| `csv` | `Path` | Main collated results in CSV format | +| `variants_csv` | `Path` | Collated variants in CSV format | +| `variants_txt` | `Path` | Collated variants in text format | +| `itol` | `Set` | iTOL formatted files for visualization | +| `results` | `Set` | All output files to be published | +| `logs` | `Set` | Optional program specific log files | +| `nf_logs` | `Set` | Nextflow-specific log files (e.g. .command.{begin|err|log|out|run|sh|trace}) | +| `versions` | `Set` | A YAML formatted file with program versions | + +## Parameters + +### TB-Profiler Collate Parameters + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--tbprofiler_itol` | boolean | `false` | Generate itol config files | +| `--tbprofiler_full` | boolean | `false` | Output mutations in main result file | +| `--tbprofiler_all_variants` | boolean | `false` | Output all variants in variant matrix | +| `--tbprofiler_mark_missing` | boolean | `false` | An asterisk will be used to mark predictions which are affected by missing data at a drug resistance position | + +## Used By + +### Subworkflows + +- [tbprofiler](/developers/subworkflows/tbprofiler) - Profiling tool for Mycobacterium tuberculosis to detect resistance and strain type. + +### Workflows + +- [tbprofiler](/bactopia-tools/tbprofiler) - Detection of antimicrobial resistance and lineage typing of Mycobacterium tuberculosis. + +## Citations + +If you use this in your analysis, please cite the following. + +- [Bactopia](https://bactopia.github.io/) + Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) + +- [TBProfiler](https://github.com/jodyphelan/TBProfiler) + Phelan JE, O'Sullivan DM, Machado D, Ramos J, Oppong YEA, Campino S, O'Grady J, McNerney R, Hibberd ML, Viveiros M, Huggett JF, Clark TG [Integrating informatics tools and portable sequencing technology for rapid detection of resistance to anti-tuberculous drugs.](https://doi.org/10.1186/s13073-019-0650-x) _Genome Med_ 11, 41 (2019) + +## Source + +[View source on GitHub](https://github.com/bactopia/bactopia/tree/main/modules/tbprofiler/collate) + +## Version + +```yaml +TBPROFILER_COLLATE: + - tb-profiler: 6.7.0 +``` diff --git a/developers/modules/tbprofiler_profile.mdx b/developers/modules/tbprofiler_profile.mdx new file mode 100644 index 00000000..63fcbd60 --- /dev/null +++ b/developers/modules/tbprofiler_profile.mdx @@ -0,0 +1,116 @@ +--- +title: tbprofiler_profile +description: "Detect resistance and lineages of Mycobacterium tuberculosis genomes." +tags: + - tuberculosis + - mycobacterium + - drug-resistance + - amr + - typing + - variant-calling + - sample-scope +--- + +# tbprofiler_profile + +**Tags:** tuberculosis mycobacterium drug-resistance amr typing variant-calling sample-scope + +Detect resistance and lineages of Mycobacterium tuberculosis genomes. + +Uses [TBProfiler](https://github.com/jodyphelan/TBProfiler) to profile *Mycobacterium tuberculosis* +data for drug resistance and lineage information by aligning reads to a reference genome and identifying +specific variants. + +Uses explicit positional record fields for reads: +- Input: record(meta, r1, r2, se, lr) where each read slot is Path? + +## Inputs + +``` +record ( + meta: Record, + r1: Path?, + r2: Path?, + se: Path?, + lr: Path? +) +``` + +| Field | Type | Description | +|-------|------|-------------| +| `meta` | `Record` | Groovy Record containing sample information | +| `r1` | `Path?` | Illumina R1 reads (paired-end) | +| `r2` | `Path?` | Illumina R2 reads (paired-end) | +| `se` | `Path?` | Single-end Illumina reads | +| `lr` | `Path?` | Long reads (ONT/PacBio) | + +## Outputs + +``` +record ( + meta: Record, + csv: Path?, + json: Path, + txt: Path?, + results: Set, + logs: Set, + nf_logs: Set, + versions: Set +) +``` + +| Field | Type | Description | +|-------|------|-------------| +| `meta` | `Record` | Sample information record | +| `csv` | `Path?` | Results in CSV format | +| `json` | `Path` | Compressed JSON results file | +| `txt` | `Path?` | Results in text format | +| `results` | `Set` | All output files to be published | +| `logs` | `Set` | Optional program specific log files | +| `nf_logs` | `Set` | Nextflow-specific log files (e.g. .command.{begin|err|log|out|run|sh|trace}) | +| `versions` | `Set` | A YAML formatted file with program versions | + +## Parameters + +### TB-Profiler Profile Parameters + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `--tbprofiler_call_whole_genome` | boolean | `false` | Call whole genome | +| `--tbprofiler_mapper` | string | `bwa` | Mapping tool to use. If you are using nanopore data it will default to minimap2 (choices: `bwa`, `minimap2`, `bowtie2`, `bwa-mem2`) | +| `--tbprofiler_caller` | string | `freebayes` | Variant calling tool to use (choices: `bcftools`, `gatk`, `freebayes`) | +| `--tbprofiler_opts` | string | | Extra options in quotes for TBProfiler | + +## Used By + +### Subworkflows + +- [tbprofiler](/developers/subworkflows/tbprofiler) - Profiling tool for Mycobacterium tuberculosis to detect resistance and strain type. + +### Workflows + +- [tbprofiler](/bactopia-tools/tbprofiler) - Detection of antimicrobial resistance and lineage typing of Mycobacterium tuberculosis. + +## Citations + +If you use this in your analysis, please cite the following. + +- [Bactopia](https://bactopia.github.io/) + Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) + +- [TBProfiler](https://github.com/jodyphelan/TBProfiler) + Phelan JE, O'Sullivan DM, Machado D, Ramos J, Oppong YEA, Campino S, O'Grady J, McNerney R, Hibberd ML, Viveiros M, Huggett JF, Clark TG [Integrating informatics tools and portable sequencing technology for rapid detection of resistance to anti-tuberculous drugs.](https://doi.org/10.1186/s13073-019-0650-x) _Genome Med_ 11, 41 (2019) + +- [freebayes](https://github.com/ekg/freebayes) + Garrison E, Marth G [Haplotype-based variant detection from short-read sequencing.](https://arxiv.org/abs/1207.3907) arXiv preprint arXiv:1207.3907 [q-bio.GN] (2012) + +## Source + +[View source on GitHub](https://github.com/bactopia/bactopia/tree/main/modules/tbprofiler/profile) + +## Version + +```yaml +TBPROFILER_PROFILE: + - tb-profiler: 6.7.0 +``` diff --git a/developers/nf-bactopia/index.mdx b/developers/nf-bactopia/index.mdx new file mode 100644 index 00000000..fe4f1a4b --- /dev/null +++ b/developers/nf-bactopia/index.mdx @@ -0,0 +1,230 @@ +--- +title: nf-bactopia Plugin +description: Developer reference for the nf-bactopia Nextflow plugin -- utility functions used across Bactopia workflows +--- + +# nf-bactopia Plugin + +The [nf-bactopia](https://github.com/bactopia/nf-bactopia) Nextflow plugin provides +utility functions that handle input collection, parameter validation, output gathering, +and channel operations across all Bactopia workflows. Rather than duplicating this logic +in every workflow file, the plugin centralizes it so that subworkflows and modules can +focus on their analysis tasks. + +## Installation & Setup + +Declare the plugin in your `nextflow.config`: + +```groovy +plugins { + id 'nf-bactopia@2.0.3' +} +``` + +Then import functions in your Nextflow scripts: + +```groovy +include { gather } from 'plugin/nf-bactopia' +include { gatherCsvtk } from 'plugin/nf-bactopia' +include { filterWithData } from 'plugin/nf-bactopia' +``` + +**Requires:** Nextflow >= 26.03.1-edge + +## Input Handling + +These functions run at pipeline startup to validate parameters and collect sample inputs +into standardized channel structures. + +### validateParameters + +Checks all pipeline parameters against the JSON schema, catching type mismatches, missing +required values, and invalid combinations before any processes run. Pass `false` for the +main Bactopia pipeline, `true` for standalone Bactopia Tools. + +```groovy +include { validateParameters } from 'plugin/nf-bactopia' + +// In the BACTOPIA_INIT subworkflow +def validation = validateParameters(false) +if (validation.hasErrors) { + log.info(validation.error) + error(" ") +} else { + log.info(validation.logs) +} +``` + +Returns a map with `hasErrors`, `error`, `logs`, and `data` fields. + +### bactopiaInputs + +Collects and organizes sample inputs (FASTQs from SRA/ENA, local files, assemblies) +into a standardized list of sample records. Called after `validateParameters` in the +main pipeline, using the validation result to determine the run type. + +```groovy +include { bactopiaInputs } from 'plugin/nf-bactopia' + +def collectedInputs = bactopiaInputs(validation.data) +if (collectedInputs.hasErrors) { + log.info(collectedInputs.error) + error(" ") +} else { + log.info(collectedInputs.logs) +} + +// Convert to channel of records +def ch_samples = channel.fromList(collectedInputs.samples.collect { sample -> + record( + meta: sample.meta, + r1_files: sample.r1.collect { fastq -> file(fastq) }.toSet(), + r2_files: sample.r2.collect { fastq -> file(fastq) }.toSet(), + se_files: sample.se.collect { fastq -> file(fastq) }.toSet(), + lr_files: sample.lr.collect { fastq -> file(fastq) }.toSet(), + fna_files: sample.assembly.collect { fna -> file(fna) }.toSet() + ) +}) +``` + +### bactopiaToolInputs + +The equivalent of `bactopiaInputs` for standalone Bactopia Tools. Instead of collecting +raw inputs, it reads from a previous Bactopia run directory and builds channels for +assemblies, proteins, GFFs, BLAST databases, and other outputs that tools need. + +```groovy +include { bactopiaToolInputs } from 'plugin/nf-bactopia' + +def collectedInputs = bactopiaToolInputs() +// Returns samples with: meta, fna, faa, gff, r1, r2, se, lr, blastdb, etc. +``` + +## Gathering Outputs + +These functions collect per-sample outputs into aggregated structures for merging or +downstream analysis. They are the most commonly used plugin functions -- nearly every +subworkflow uses at least one. + +### gather + +Collects a single field from all sample records into a Set, keeping the original field name. +Used when a downstream process needs all samples' outputs together (e.g., building a heatmap +from individual JSON results). + +```groovy +include { gather } from 'plugin/nf-bactopia' + +// Collect all RGI JSON outputs for heatmap generation +ch_rgi_heatmap = RGI_HEATMAP(gather(ch_rgi_main, 'json', [name: 'rgi'])) +``` + +The `meta` map must contain a `name` key, and all keys pass through to the output. + +### gatherCsvtk + +Gathers a single field and renames it to `csv`, preparing it for `CSVTK_CONCAT` input. +This is the most common gathering pattern -- used whenever per-sample TSV/CSV results +need to be concatenated into a single merged report. + +```groovy +include { gatherCsvtk } from 'plugin/nf-bactopia' + +// Merge all per-sample AMR reports into one file +ch_csvtk_concat = CSVTK_CONCAT( + gatherCsvtk(ch_amrfinderplus_run, 'report', [name: 'amrfinderplus']), + 'tsv', + 'tsv' +) +``` + +You can pass extra args through the meta map: + +```groovy +// For tools that don't include headers in their output +gatherCsvtk(ch_emmtyper, 'tsv', [name: 'emmtyper', args: '--no-header-row']) +``` + +### gatherFields + +Gathers multiple fields with explicit rename mapping. Used when a process needs +gathered inputs under different names than the originals, such as renaming `fna` to +`query` for a comparison tool. + +```groovy +include { gatherFields } from 'plugin/nf-bactopia' + +// Gather assemblies and rename 'fna' to 'query' for FastANI +gatherFields(query, [fna: 'query'], [name: 'fastani']) +``` + +## Channel Operations + +### filterWithData + +Filters out records where all specified fields are null. Necessary because some samples +may lack certain data types (e.g., a sample with only long reads has no `r1`/`r2` files), +and passing null paths to a process would cause it to fail. + +```groovy +include { filterWithData } from 'plugin/nf-bactopia' + +// Only run Seroba on samples that have paired-end reads +ch_seroba_run = SEROBA_RUN(filterWithData(reads, ['r1', 'r2'])) + +// Filter for samples with any read type available +scrubbed = filterWithData(ch_sample_outputs, ['r1', 'r2', 'se', 'lr']) +``` + +### combineWith + +Creates a Cartesian product between a gathered channel and a multi-item channel, merging +each item into the gathered map under a specified field name. Replaces the deprecated +Nextflow `each` input qualifier. + +```groovy +include { combineWith } from 'plugin/nf-bactopia' +include { gatherFields } from 'plugin/nf-bactopia' + +// Combine gathered query assemblies with each reference genome +ch_fastani = FASTANI_MODULE( + combineWith( + gatherFields(query, [fna: 'query'], [name: 'fastani']), + ch_ref, + 'reference' + ) +) +``` + +### formatSamples + +Adapts tuple sizes based on data availability. Takes a channel of 4-element tuples +and trims them to 1, 2, or 3 elements depending on the `dataTypes` parameter. + +```groovy +include { formatSamples } from 'plugin/nf-bactopia' + +// Trim to just [meta, inputs] (dataTypes=1) +ch_trimmed = formatSamples(ch_samples, 1) +``` + +## Logging + +### collectNextflowLogs + +Expands each record's `nf_logs` field into individual `[meta, file]` tuples suitable +for publishing. Used in workflow `publish` blocks to write Nextflow execution logs +alongside sample outputs. + +```groovy +include { collectNextflowLogs } from 'plugin/nf-bactopia' + +publish: +sample_nf_logs = collectNextflowLogs(ch_amrfinderplus.sample_outputs) +run_nf_logs = collectNextflowLogs(ch_amrfinderplus.run_outputs) +``` + +## Links + +- [Nextflow Plugin Registry](https://registry.nextflow.io/plugins/nf-bactopia) +- [Source on GitHub](https://github.com/bactopia/nf-bactopia) diff --git a/developers/subworkflows/abricate.mdx b/developers/subworkflows/abricate.mdx new file mode 100644 index 00000000..430f1714 --- /dev/null +++ b/developers/subworkflows/abricate.mdx @@ -0,0 +1,77 @@ +--- +title: abricate +description: "Mass screening of contigs for antimicrobial and virulence genes." +tags: + - bacteria + - assembly + - antimicrobial-resistance + - virulence + - workflow + - sample-scope +--- + +# abricate + +**Tags:** bacteria assembly antimicrobial-resistance virulence workflow sample-scope + +Mass screening of contigs for antimicrobial and virulence genes. + +This subworkflow orchestrates the execution of [Abricate](https://github.com/tseemann/abricate) +to screen genome assemblies for antimicrobial resistance and virulence genes, followed by +aggregating the results into a single summary report. + +## Take + +``` +assembly: Channel +``` + +| Field | Description | +|-------|-------------| +| `meta` | Groovy Record containing sample information | +| `assembly` | Assembled contigs in FASTA format | + +## Emit + +### Published + +The `sample_outputs` and `run_outputs` emissions are aggregates of output files that will be published in the entry workflow. + +#### `sample_outputs` + +| Output | Description | +|--------|-------------| +| `report` | A tab-delimited report of hits, for full details please see [Abricate - Output](https://github.com/tseemann/abricate#output) | + +#### `run_outputs` + +| Output | Description | +|--------|-------------| +| `report` | Aggregated tab-delimited summary of Abricate results from all samples | + +## Module Composition + +This subworkflow calls the following modules: + +- [abricate_run](/developers/modules/abricate_run) - Mass screening of contigs for antimicrobial and virulence genes. +- [abricate_summary](/developers/modules/abricate_summary) - Summarize Abricate screening results. + +## Used By + +This subworkflow is used by the following workflows: + +- [abricate](/bactopia-tools/abricate) - Mass screening of contigs for antimicrobial resistance and virulence genes. + +## Citations + +If you use this in your analysis, please cite the following. + +- [Bactopia](https://bactopia.github.io/) + Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) + +- [Abricate](https://github.com/tseemann/abricate) + Seemann T [Abricate: mass screening of contigs for antimicrobial and virulence genes](https://github.com/tseemann/abricate) (GitHub) + +## Source + +[View source on GitHub](https://github.com/bactopia/bactopia/tree/main/subworkflows/abricate) diff --git a/developers/subworkflows/abritamr.mdx b/developers/subworkflows/abritamr.mdx new file mode 100644 index 00000000..acbc0379 --- /dev/null +++ b/developers/subworkflows/abritamr.mdx @@ -0,0 +1,82 @@ +--- +title: abritamr +description: "Identify antimicrobial resistance genes using AMRFinderPlus." +tags: + - bacteria + - antimicrobial-resistance + - amr + - amrfinderplus + - classification + - sample-scope +--- + +# abritamr + +**Tags:** bacteria antimicrobial-resistance amr amrfinderplus classification sample-scope + +Identify antimicrobial resistance genes using AMRFinderPlus. + +This subworkflow uses [abriTAMR](https://github.com/MDU-PHL/abritamr) to identify +antimicrobial resistance genes in bacterial genomes. It runs AMRFinderPlus on +each sample and collates the results into functional classes, producing detailed +reports on resistance genes, partial matches, and virulence factors. + +## Take + +``` +assembly: Channel +``` + +| Field | Description | +|-------|-------------| +| `meta` | Groovy Record containing sample information | +| `assembly` | Assembled contigs in FASTA format | + +## Emit + +### Published + +The `sample_outputs` and `run_outputs` emissions are aggregates of output files that will be published in the entry workflow. + +#### `sample_outputs` + +| Output | Description | +|--------|-------------| +| `summary` | Tab-delimited NATA-accredited AMR report summary | +| `matches` | Tab-delimited list of matched AMR genes | +| `partials` | Tab-delimited list of partially matched AMR genes | +| `virulence` | Tab-delimited list of detected virulence genes | +| `amrfinder` | Raw AMRFinderPlus output | + +#### `run_outputs` + +| Output | Description | +|--------|-------------| +| `csv` | Aggregated results in CSV format | + +## Module Composition + +This subworkflow calls the following modules: + +- [abritamr_run](/developers/modules/abritamr_run) - Detect antimicrobial resistance and virulence genes. +- [csvtk_concat](/developers/modules/csvtk_concat) - Concatenate multiple CSV or TSV files into a single table. + +## Used By + +This subworkflow is used by the following workflows: + +- [abritamr](/bactopia-tools/abritamr) - A NATA accredited tool for reporting the presence of antimicrobial resistance genes. + +## Citations + +If you use this in your analysis, please cite the following. + +- [Bactopia](https://bactopia.github.io/) + Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) + +- [abriTAMR](https://github.com/MDU-PHL/abritamr) + Sherry NL, Horan KA, Ballard SA, Gonҫalves da Silva A, Gorrie CL, Schultz MB, Stevens K, Valcanis M, Sait ML, Stinear TP, Howden BP, and Seemann T [An ISO-certified genomics workflow for identification and surveillance of antimicrobial resistance.](https://doi.org/10.1038/s41467-022-35713-4) _Nature Communications_, 14(1), 60. (2023) + +## Source + +[View source on GitHub](https://github.com/bactopia/bactopia/tree/main/subworkflows/abritamr) diff --git a/developers/subworkflows/agrvate.mdx b/developers/subworkflows/agrvate.mdx new file mode 100644 index 00000000..9a235da2 --- /dev/null +++ b/developers/subworkflows/agrvate.mdx @@ -0,0 +1,77 @@ +--- +title: agrvate +description: "Identify Staphylococcus aureus agr locus type and operon variants." +tags: + - staphylococcus-aureus + - assembly + - agr-locus + - virulence + - quorum-sensing + - sample-scope +--- + +# agrvate + +**Tags:** staphylococcus-aureus assembly agr-locus virulence quorum-sensing sample-scope + +Identify Staphylococcus aureus agr locus type and operon variants. + +This subworkflow uses [AgrVATE](https://github.com/VishnuRaghuram94/AgrVATE) to rapidly identify the +accessory gene regulator (agr) locus type and detect agr operon variants in Staphylococcus aureus. +The agr system is a key quorum-sensing regulator of virulence in S. aureus. + +## Take + +``` +assembly: Channel +``` + +| Field | Description | +|-------|-------------| +| `meta` | Groovy Record containing sample information | +| `assembly` | Assembled contigs in FASTA format for agr locus detection | + +## Emit + +### Published + +The `sample_outputs` and `run_outputs` emissions are aggregates of output files that will be published in the entry workflow. + +#### `sample_outputs` + +| Output | Description | +|--------|-------------| +| `summary` | Tab-delimited summary of agr locus type and operon variants | + +#### `run_outputs` + +| Output | Description | +|--------|-------------| +| `csv` | Aggregated results in CSV format | + +## Module Composition + +This subworkflow calls the following modules: + +- [agrvate](/developers/modules/agrvate) - Determine the agr locus type and operon variants in Staphylococcus aureus. +- [csvtk_concat](/developers/modules/csvtk_concat) - Concatenate multiple CSV or TSV files into a single table. + +## Used By + +This subworkflow is used by the following workflows: + +- [agrvate](/bactopia-tools/agrvate) - Rapid identification of Staphylococcus aureus agr locus type and agr operon variants. + +## Citations + +If you use this in your analysis, please cite the following. + +- [Bactopia](https://bactopia.github.io/) + Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) + +- [AgrVATE](https://github.com/VishnuRaghuram94/AgrVATE) + Raghuram V. [AgrVATE: Rapid identification of Staphylococcus aureus agr locus type and agr operon variants.](https://github.com/VishnuRaghuram94/AgrVATE) (GitHub) + +## Source + +[View source on GitHub](https://github.com/bactopia/bactopia/tree/main/subworkflows/agrvate) diff --git a/developers/subworkflows/amrfinderplus.mdx b/developers/subworkflows/amrfinderplus.mdx new file mode 100644 index 00000000..f66edd8a --- /dev/null +++ b/developers/subworkflows/amrfinderplus.mdx @@ -0,0 +1,88 @@ +--- +title: amrfinderplus +description: "Find antimicrobial resistance genes and point mutations." +tags: + - bacteria + - assembly + - antimicrobial-resistance + - gene-prediction + - sample-scope +--- + +# amrfinderplus + +**Tags:** bacteria assembly antimicrobial-resistance gene-prediction sample-scope + +Find antimicrobial resistance genes and point mutations. + +This subworkflow uses [AMRFinderPlus](https://github.com/ncbi/amr) to identify acquired antimicrobial +resistance genes and some point mutations in protein or assembled nucleotide sequences. + +## Take + +``` +fasta: Channel +``` + +| Field | Description | +|-------|-------------| +| `meta` | Groovy Record containing sample information | +| `genes` | Nucleotide sequences of genes in FASTA format | +| `proteins` | Optional amino acid sequences of proteins in FASTA format (Path?) | +| `gff` | Optional GFF3 annotation file (Path?) | + +``` +db: Path +``` + +| Name | Type | Description | +|------|------|-------------| +| `db` | `Path` | Path to the AMRFinderPlus database directory containing reference data for AMR gene detection. | + +## Emit + +### Published + +The `sample_outputs` and `run_outputs` emissions are aggregates of output files that will be published in the entry workflow. + +#### `sample_outputs` + +| Output | Description | +|--------|-------------| +| `report` | A tab-delimited report of identified AMR genes and virulence factors | +| `mutation_report` | Organism-specific point mutations associated with antimicrobial resistance | + +#### `run_outputs` + +| Output | Description | +|--------|-------------| +| `csv` | A merged TSV file with AMRFinder+ results from all samples | + +## Module Composition + +This subworkflow calls the following modules: + +- [csvtk_concat](/developers/modules/csvtk_concat) - Concatenate multiple CSV or TSV files into a single table. +- [amrfinderplus_run](/developers/modules/amrfinderplus_run) - Identify antimicrobial resistance and virulence genes in gene or protein sequences. + +## Used By + +This subworkflow is used by the following workflows: + +- [amrfinderplus](/bactopia-tools/amrfinderplus) - Bactopia Tool: Amrfinderplus. +- [bactopia](/full-guide) - Comprehensive bacterial analysis pipeline for complete genomic characterization. +- [staphopia](/bactopia-pipelines/staphopia) - Comprehensive analysis pipeline for Staphylococcus aureus isolates. + +## Citations + +If you use this in your analysis, please cite the following. + +- [Bactopia](https://bactopia.github.io/) + Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) + +- [AMRFinderPlus](https://github.com/ncbi/amr) + Feldgarden M, Brover V, Haft DH, Prasad AB, Slotta DJ, Tolstoy I, Tyson GH, Zhao S, Hsu C-H, McDermott PF, Tadesse DA, Morales C, Simmons M, Tillman G, Wasilenko J, Folster JP, Klimke W [Validating the NCBI AMRFinder Tool and Resistance Gene Database Using Antimicrobial Resistance Genotype-Phenotype Correlations in a Collection of NARMS Isolates](https://doi.org/10.1128/AAC.00483-19). _Antimicrob. Agents Chemother._ (2019) + +## Source + +[View source on GitHub](https://github.com/bactopia/bactopia/tree/main/subworkflows/amrfinderplus) diff --git a/developers/subworkflows/ariba.mdx b/developers/subworkflows/ariba.mdx new file mode 100644 index 00000000..1320c9e6 --- /dev/null +++ b/developers/subworkflows/ariba.mdx @@ -0,0 +1,94 @@ +--- +title: ariba +description: "Rapidly identify genes by creating local assemblies from paired-end reads." +tags: + - bacteria + - reads + - antimicrobial-resistance + - virulence + - local-assembly + - sample-scope +--- + +# ariba + +**Tags:** bacteria reads antimicrobial-resistance virulence local-assembly sample-scope + +Rapidly identify genes by creating local assemblies from paired-end reads. + +This subworkflow uses [ARIBA](https://github.com/sanger-pathogens/ariba) +(Antimicrobial Resistance Identification By Assembly) to rapidly identify genes +in a database by creating local assemblies. It first downloads and prepares an ARIBA database, +then analyzes paired-end reads to identify genes, and finally aggregates results across all samples. + +Uses explicit positional record fields for reads: +- Input: record(meta, r1, r2, se, lr) where each read slot is Path? + +## Take + +``` +reads: Channel +``` + +| Field | Description | +|-------|-------------| +| `meta` | Groovy Record containing sample information | +| `r1` | Illumina R1 reads (paired-end) | +| `r2` | Illumina R2 reads (paired-end) | +| `se` | Single-end Illumina reads (not supported by ARIBA) | +| `lr` | Long reads (not supported by ARIBA) | + +``` +db: String +``` + +| Name | Type | Description | +|------|------|-------------| +| `db` | `String` | Database name for ARIBA analysis (e.g., ncbi, card, vfdb, resfinder, argannot) | + +## Emit + +### Published + +The `sample_outputs` and `run_outputs` emissions are aggregates of output files that will be published in the entry workflow. + +#### `sample_outputs` + +| Output | Description | +|--------|-------------| +| `report` | Tab-delimited detailed report of gene detection results | +| `summary` | Comma-separated condensed summary of detected genes | + +#### `run_outputs` + +| Output | Description | +|--------|-------------| +| `csv` | Aggregated results in CSV format | + +## Module Composition + +This subworkflow calls the following modules: + +- [ariba_getref](/developers/modules/ariba_getref) - Download and prepare reference databases for ARIBA analysis. +- [ariba_run](/developers/modules/ariba_run) - Identify genes by local assembly of reads. +- [csvtk_concat](/developers/modules/csvtk_concat) - Concatenate multiple CSV or TSV files into a single table. + +## Used By + +This subworkflow is used by the following workflows: + +- [ariba](/bactopia-tools/ariba) - Gene identification through local assemblies. + +## Citations + +If you use this in your analysis, please cite the following. + +- [Bactopia](https://bactopia.github.io/) + Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) + +- [Ariba](https://github.com/sanger-pathogens/ariba) + Hunt M, Mather AE, Sánchez-Busó L, Page AJ, Parkhill J, Keane JA, Harris SR [ARIBA: rapid antimicrobial resistance genotyping directly from sequencing reads](http://dx.doi.org/10.1099/mgen.0.000131). _Microb Genom_ 3, e000131 (2017) + +## Source + +[View source on GitHub](https://github.com/bactopia/bactopia/tree/main/subworkflows/ariba) diff --git a/developers/subworkflows/bactopia_assembler.mdx b/developers/subworkflows/bactopia_assembler.mdx new file mode 100644 index 00000000..c9671637 --- /dev/null +++ b/developers/subworkflows/bactopia_assembler.mdx @@ -0,0 +1,181 @@ +--- +title: bactopia_assembler +description: "Assemble bacterial genomes using automated assembler selection." +tags: + - bacteria + - assembly + - hybrid + - shovill + - dragonflye + - unicycler + - illumina + - nanopore + - sample-scope +--- + +# bactopia_assembler + +**Tags:** bacteria assembly hybrid shovill dragonflye unicycler illumina nanopore sample-scope + +Assemble bacterial genomes using automated assembler selection. + +This subworkflow automatically selects the optimal assembly strategy based on input read types: +- **Short Paired-End Reads:** Uses [Shovill](https://github.com/tseemann/shovill) (SKESA/SPAdes wrapper) +- **Short Single-End Reads:** Uses [Shovill-SE](https://github.com/rpetit3/shovill) (SKESA/SPAdes wrapper) +- **Long Reads:** Uses [Dragonflye](https://github.com/rpetit3/dragonflye) (Flye/Miniasm wrapper) +- **Hybrid Assembly:** Uses [Unicycler](https://github.com/rrwick/Unicycler) or Dragonflye with short-read polishing + +The workflow performs individual assemblies per sample and aggregates assembly statistics +across all samples using [assembly-scan](https://github.com/rpetit3/assembly-scan) for +comprehensive quality assessment. + +## Take + +``` +samples: Channel +``` + +| Field | Description | +|-------|-------------| +| `meta` | Groovy Record containing sample information | +| `r1` | Illumina R1 reads (paired-end forward) | +| `r2` | Illumina R2 reads (paired-end reverse) | +| `se` | Single-end Illumina reads | +| `lr` | Long reads (ONT/PacBio) for long-read or hybrid assembly | + +## Emit + +### Published + +The `sample_outputs` and `run_outputs` emissions are aggregates of output files that will be published in the entry workflow. + +#### `sample_outputs` + +| Output | Description | +|--------|-------------| +| `tsv` | Tab-delimited report of assembly statistics (N50, length, coverage) | +| `supplemental` | Supplemental files including assembly graphs and tool-specific logs | +| `error` | Captured error messages if assembly fails | + +#### `run_outputs` + +| Output | Description | +|--------|-------------| +| `csv` | Aggregated assembly statistics from all samples | + +### Downstream Inputs + +The following emissions are meant to be used as inputs to downstream subworkflows. + +#### `assembly` + +| Output | Description | +|--------|-------------| +| `fna` | Assembled contigs for downstream annotation and analysis | + +#### `assembly_reads` + +| Output | Description | +|--------|-------------| +| `fna` | Assembled contigs | +| `r1` | Illumina R1 reads (paired-end forward) | +| `r2` | Illumina R2 reads (paired-end reverse) | +| `se` | Single-end Illumina reads | +| `lr` | Long reads (ONT/PacBio) | + +## Module Composition + +This subworkflow calls the following modules: + +- [bactopia_assembler](/developers/modules/bactopia_assembler) - Assemble bacterial genomes using short read, long read, or hybrid strategies. +- [csvtk_concat](/developers/modules/csvtk_concat) - Concatenate multiple CSV or TSV files into a single table. + +## Used By + +This subworkflow is used by the following workflows: + +- [bactopia](/full-guide) - Comprehensive bacterial analysis pipeline for complete genomic characterization. +- [staphopia](/bactopia-pipelines/staphopia) - Comprehensive analysis pipeline for Staphylococcus aureus isolates. + +## Citations + +If you use this in your analysis, please cite the following. + +- [Bactopia](https://bactopia.github.io/) + Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) + +- [any2fasta](https://github.com/tseemann/any2fasta) + Seemann T [any2fasta: Convert various sequence formats to FASTA](https://github.com/tseemann/any2fasta) (GitHub) + +- [assembly-scan](https://github.com/rpetit3/assembly-scan) + Petit III RA [assembly-scan: generate basic stats for an assembly](https://github.com/rpetit3/assembly-scan) (GitHub) + +- [BWA](https://github.com/lh3/bwa/) + Li H [Aligning sequence reads, clone sequences and assembly contigs with BWA-MEM](http://arxiv.org/abs/1303.3997). _arXiv_ [q-bio.GN] (2013) + +- [Dragonflye](https://github.com/rpetit3/dragonflye) + Petit III RA [Dragonflye: Assemble bacterial isolate genomes from Nanopore reads.](https://github.com/rpetit3/dragonflye) (GitHub) + +- [FLASH](https://ccb.jhu.edu/software/FLASH/) + Magoč T, Salzberg SL [FLASH: fast length adjustment of short reads to improve genome assemblies.](https://doi.org/10.1093/bioinformatics/btr507) _Bioinformatics_ 27.21 2957-2963 (2011) + +- [Flye](https://github.com/fenderglass/Flye) + Kolmogorov M, Yuan J, Lin Y, Pevzner P [Assembly of Long Error-Prone Reads Using Repeat Graphs](https://doi.org/10.1038/s41587-019-0072-8) _Nature Biotechnology_ (2019) + +- [Medaka](https://github.com/nanoporetech/medaka) + ONT Research [Medaka: Sequence correction provided by ONT Research](https://github.com/nanoporetech/medaka) (GitHub) + +- [MEGAHIT](https://github.com/voutcn/megahit) + Li D, Liu C-M, Luo R, Sadakane K, Lam T-W [MEGAHIT: an ultra-fast single-node solution for large and complex metagenomics assembly via succinct de Bruijn graph.](https://doi.org/10.1093/bioinformatics/btv033) _Bioinformatics_ 31.10 1674-1676 (2015) + +- [Miniasm](https://github.com/lh3/miniasm) + Li H [Miniasm: Ultrafast de novo assembly for long noisy reads](https://github.com/lh3/miniasm) (GitHub) + +- [Minimap2](https://github.com/lh3/minimap2) + Li H [Minimap2: pairwise alignment for nucleotide sequences.](https://doi.org/10.1093/bioinformatics/bty191) _Bioinformatics_ 34:3094-3100 (2018) + +- [Nanoq](https://github.com/esteinig/nanoq) + Steinig E [Nanoq: Minimal but speedy quality control for nanopore reads in Rust](https://github.com/esteinig/nanoq) (GitHub) + +- [Pigz](https://zlib.net/pigz/) + Adler M. [pigz: A parallel implementation of gzip for modern multi-processor, multi-core machines.](https://zlib.net/pigz/) _Jet Propulsion Laboratory_ (2015) + +- [Pilon](https://github.com/broadinstitute/pilon/) + Walker BJ, Abeel T, Shea T, Priest M, Abouelliel A, Sakthikumar S, Cuomo CA, Zeng Q, Wortman J, Young SK, Earl AM [Pilon: an integrated tool for comprehensive microbial variant detection and genome assembly improvement.](https://doi.org/10.1371/journal.pone.0112963) _PloS one_ 9.11 e112963 (2014) + +- [Racon](https://github.com/lbcb-sci/racon) + Vaser R, Sović I, Nagarajan N, Šikić M [Fast and accurate de novo genome assembly from long uncorrected reads.](http://dx.doi.org/10.1101/gr.214270.116) _Genome Res_ 27, 737-746 (2017) + +- [Rasusa](https://github.com/mbhall88/rasusa) + Hall MB [Rasusa: Randomly subsample sequencing reads to a specified coverage.](https://doi.org/10.5281/zenodo.3731394) (2019). + +- [Raven](https://github.com/lbcb-sci/raven) + Vaser R, Šikić M [Time- and memory-efficient genome assembly with Raven.](https://doi.org/10.1038/s43588-021-00073-4) _Nat Comput Sci_ 1, 332-336 (2021) + +- [samclip](https://github.com/tseemann/samclip) + Seemann T [Samclip: Filter SAM file for soft and hard clipped alignments](https://github.com/tseemann/samclip) (GitHub) + +- [Samtools](https://github.com/samtools/samtools) + Li H, Handsaker B, Wysoker A, Fennell T, Ruan J, Homer N, Marth G, Abecasis G, Durbin R [The Sequence Alignment/Map format and SAMtools](http://dx.doi.org/10.1093/bioinformatics/btp352). _Bioinformatics_ 25, 2078-2079 (2009) + +- [Shovill](https://github.com/tseemann/shovill) + Seemann T [Shovill: De novo assembly pipeline for Illumina paired reads](https://github.com/tseemann/shovill) (GitHub) + +- [Shovill-SE](https://github.com/rpetit3/shovill) + Petit III RA [Shovill-SE: A fork of Shovill that includes support for single end reads.](https://github.com/rpetit3/shovill) (GitHub) + +- [SKESA](https://github.com/ncbi/SKESA) + Souvorov A, Agarwala R, Lipman DJ [SKESA: strategic k-mer extension for scrupulous assemblies.](https://doi.org/10.1186/s13059-018-1540-z) _Genome Biology_ 19:153 (2018) + +- [SPAdes](https://github.com/ablab/spades) + Bankevich A, Nurk S, Antipov D, Gurevich AA, Dvorkin M, Kulikov AS, Lesin VM, Nikolenko SI, Pham S, Prjibelski AD, Pyshkin AV, Sirotkin AV, Vyahhi N, Tesler G, Alekseyev MA, Pevzner PA [SPAdes: a new genome assembly algorithm and its applications to single-cell sequencing.](https://doi.org/10.1089/cmb.2012.0021) _Journal of computational biology_ 19.5 455-477 (2012) + +- [Unicycler](https://github.com/rrwick/Unicycler) + Wick RR, Judd LM, Gorrie CL, Holt KE [Unicycler: Resolving bacterial genome assemblies from short and long sequencing reads.](http://dx.doi.org/10.1371/journal.pcbi.1005595) _PLoS Comput. Biol._ 13, e1005595 (2017) + +- [Velvet](https://github.com/dzerbino/velvet) + Zerbino DR, Birney E [Velvet: algorithms for de novo short read assembly using de Bruijn graphs.](http://www.genome.org/cgi/doi/10.1101/gr.074492.107) _Genome research_ 18.5 821-829 (2008) + +## Source + +[View source on GitHub](https://github.com/bactopia/bactopia/tree/main/subworkflows/bactopia/assembler) diff --git a/developers/subworkflows/bactopia_datasets.mdx b/developers/subworkflows/bactopia_datasets.mdx new file mode 100644 index 00000000..d5e20b67 --- /dev/null +++ b/developers/subworkflows/bactopia_datasets.mdx @@ -0,0 +1,87 @@ +--- +title: bactopia_datasets +description: "Download and provide pre-compiled datasets required by Bactopia." +tags: + - download + - database + - setup + - amr + - mlst + - minhash + - sourmash + - gtdb + - custom-scope +--- + +# bactopia_datasets + +**Tags:** download database setup amr mlst minhash sourmash gtdb custom-scope + +Download and provide pre-compiled datasets required by Bactopia. + +This subworkflow wraps the DATASETS module and extracts individual database +paths as separate channel emissions for downstream consumption. + +## Emit + +### Published + +The `sample_outputs` and `run_outputs` emissions are aggregates of output files that will be published in the entry workflow. + +### Downstream Inputs + +The following emissions are meant to be used as inputs to downstream subworkflows. + +#### `amrfinderplus_db` + +Path to the AMRFinderPlus database tarball + +#### `mlst_db` + +Path to the PubMLST database tarball + +#### `mash_db` + +Path to the Mash RefSeq sketch + +#### `sourmash_db` + +Path to the Sourmash GTDB signatures + +## Module Composition + +This subworkflow calls the following modules: + +- [bactopia_datasets](/developers/modules/bactopia_datasets) - Download pre-compiled datasets required by Bactopia. + +## Used By + +This subworkflow is used by the following workflows: + +- [amrfinderplus](/bactopia-tools/amrfinderplus) - Bactopia Tool: Amrfinderplus. +- [bactopia](/full-guide) - Comprehensive bacterial analysis pipeline for complete genomic characterization. +- [merlin](/bactopia-tools/merlin) - MinMER-assisted species-specific tool selection and execution. +- [staphopia](/bactopia-pipelines/staphopia) - Comprehensive analysis pipeline for Staphylococcus aureus isolates. + +## Citations + +If you use this in your analysis, please cite the following. + +- [Bactopia](https://bactopia.github.io/) + Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) + +- [AMRFinderPlus](https://github.com/ncbi/amr) + Feldgarden M, Brover V, Haft DH, Prasad AB, Slotta DJ, Tolstoy I, Tyson GH, Zhao S, Hsu C-H, McDermott PF, Tadesse DA, Morales C, Simmons M, Tillman G, Wasilenko J, Folster JP, Klimke W [Validating the NCBI AMRFinder Tool and Resistance Gene Database Using Antimicrobial Resistance Genotype-Phenotype Correlations in a Collection of NARMS Isolates](https://doi.org/10.1128/AAC.00483-19). _Antimicrob. Agents Chemother._ (2019) + +- [Mash Refseq (release 88) Sketch](https://mash.readthedocs.io/en/latest/data.html) + Ondov BD, Starrett GJ, Sappington A, Kostic A, Koren S, Buck CB, Phillippy AM [Mash Screen: high-throughput sequence containment estimation for genome discovery](https://doi.org/10.1186/s13059-019-1841-x) _Genome Biol_ 20, 232 (2019) + +- [PubMLST.org](https://pubmlst.org/) + Jolley KA, Bray JE, Maiden MCJ [Open-access bacterial population genomics: BIGSdb software, the PubMLST.org website and their applications.](http://dx.doi.org/10.12688/wellcomeopenres.14826.1) _Wellcome Open Res_ 3, 124 (2018) + +- [Sourmash Genbank LCA Signature](https://sourmash.readthedocs.io/en/latest/databases.html) + Brown CT, Irber L [sourmash: a library for MinHash sketching of DNA](http://dx.doi.org/10.21105/joss.00027). _JOSS_ 1, 27 (2016) + +## Source + +[View source on GitHub](https://github.com/bactopia/bactopia/tree/main/subworkflows/bactopia/datasets) diff --git a/developers/subworkflows/bactopia_gather.mdx b/developers/subworkflows/bactopia_gather.mdx new file mode 100644 index 00000000..d48cf176 --- /dev/null +++ b/developers/subworkflows/bactopia_gather.mdx @@ -0,0 +1,118 @@ +--- +title: bactopia_gather +description: "Search, validate, gather, and standardize input samples." +tags: + - validation + - download + - merging + - simulation + - metadata + - fastq + - sra + - ena + - art + - sample-scope +--- + +# bactopia_gather + +**Tags:** validation download merging simulation metadata fastq sra ena art sample-scope + +Search, validate, gather, and standardize input samples. + +This subworkflow processes raw input samples through validation, standardization, and metadata +collection. It handles various input types including local FASTQ files, SRA/ENA accessions, +NCBI assembly accessions, and assemblies. The workflow can merge multiple sequencing runs, +download remote data, and simulate reads from assemblies using [ART](https://www.niehs.nih.gov/research/resources/software/biostatistics/art). + +Uses explicit positional record fields for reads: +- Input: record(meta, r1_files, r2_files, se_files, lr_files) with Set<Path> slots (pre-merge) +- Output: record(meta, r1, r2, se, lr) with Path? slots (post-merge, consolidated) + +## Take + +``` +samples: Channel +``` + +| Field | Description | +|-------|-------------| +| `meta` | Groovy Record containing sample information | +| `r1_files` | Illumina R1 read files (Set, elements may be null) | +| `r2_files` | Illumina R2 read files (Set, elements may be null) | +| `se_files` | Single-end read files (Set, elements may be null) | +| `lr_files` | Long read files (ONT/PacBio) or assembly for simulation (Set, elements may be null) | + +## Emit + +### Published + +The `sample_outputs` and `run_outputs` emissions are aggregates of output files that will be published in the entry workflow. + +#### `sample_outputs` + +| Output | Description | +|--------|-------------| +| `tsv` | A tab-delimited metadata file describing the valid samples | + +#### `run_outputs` + +| Output | Description | +|--------|-------------| +| `csv` | Aggregated metadata from all samples | + +### Downstream Inputs + +The following emissions are meant to be used as inputs to downstream subworkflows. + +#### `reads` + +| Output | Description | +|--------|-------------| +| `r1` | Illumina R1 reads (paired-end forward) | +| `r2` | Illumina R2 reads (paired-end reverse) | +| `se` | Single-end Illumina reads | +| `lr` | Long reads (ONT/PacBio) | +| `fna` | Assembly file for assembly-based samples | + +## Module Composition + +This subworkflow calls the following modules: + +- [bactopia_gather](/developers/modules/bactopia_gather) - Search, validate, gather, or simulate input samples. +- [csvtk_concat](/developers/modules/csvtk_concat) - Concatenate multiple CSV or TSV files into a single table. + +## Used By + +This subworkflow is used by the following workflows: + +- [bactopia](/full-guide) - Comprehensive bacterial analysis pipeline for complete genomic characterization. +- [cleanyerreads](/bactopia-pipelines/cleanyerreads) - Quality control and optional host read removal from raw sequencing reads. +- [staphopia](/bactopia-pipelines/staphopia) - Comprehensive analysis pipeline for Staphylococcus aureus isolates. +- [teton](/bactopia-pipelines/teton) - Taxonomic classification and abundance profiling of metagenomic reads. + +## Citations + +If you use this in your analysis, please cite the following. + +- [Bactopia](https://bactopia.github.io/) + Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) + +- [ART](https://www.niehs.nih.gov/research/resources/software/biostatistics/art/index.cfm) + Huang W, Li L, Myers JR, Marth GT [ART: a next-generation sequencing read simulator.](http://dx.doi.org/10.1093/bioinformatics/btr708) _Bioinformatics_ 28, 593-594 (2012) + +- [fastq-dl](https://github.com/rpetit3/fastq-dl) + Petit III RA [fastq-dl: Download FASTQ files from SRA or ENA repositories.](https://github.com/rpetit3/fastq-dl) (GitHub) + +- [fastq-scan](https://github.com/rpetit3/fastq-scan) + Petit III RA [fastq-scan: generate summary statistics of input FASTQ sequences.](https://github.com/rpetit3/fastq-scan) (GitHub) + +- [ncbi-genome-download](https://github.com/kblin/ncbi-genome-download) + Blin K [ncbi-genome-download: Scripts to download genomes from the NCBI FTP servers](https://github.com/kblin/ncbi-genome-download) (GitHub) + +- [Pigz](https://zlib.net/pigz/) + Adler M. [pigz: A parallel implementation of gzip for modern multi-processor, multi-core machines.](https://zlib.net/pigz/) _Jet Propulsion Laboratory_ (2015) + +## Source + +[View source on GitHub](https://github.com/bactopia/bactopia/tree/main/subworkflows/bactopia/gather) diff --git a/developers/subworkflows/bactopia_qc.mdx b/developers/subworkflows/bactopia_qc.mdx new file mode 100644 index 00000000..a864e93d --- /dev/null +++ b/developers/subworkflows/bactopia_qc.mdx @@ -0,0 +1,145 @@ +--- +title: bactopia_qc +description: "Perform comprehensive quality control on sequencing reads." +tags: + - quality-control + - adapters + - error-correction + - subsampling + - fastq + - illumina + - nanopore + - fastp + - bbduk + - nanoq + - sample-scope +--- + +# bactopia_qc + +**Tags:** quality-control adapters error-correction subsampling fastq illumina nanopore fastp bbduk nanoq sample-scope + +Perform comprehensive quality control on sequencing reads. + +This subworkflow processes raw sequencing reads through a comprehensive quality control pipeline. +It adapts to different read types: +- **Illumina:** Adapter/PhiX removal ([Fastp](https://github.com/OpenGene/fastp) or + [BBDuk](https://jgi.doe.gov/data-and-tools/software-tools/bbtools/)), Error Correction + ([Lighter](https://github.com/mourisl/Lighter)), and Subsampling ([Rasusa](https://github.com/mbhall88/rasusa)) +- **Nanopore:** Adapter removal ([Porechop](https://github.com/rrwick/Porechop)), Quality filtering + ([Nanoq](https://github.com/esteinig/nanoq)), and Subsampling ([Rasusa](https://github.com/mbhall88/rasusa)) +- **Hybrid:** Processes both short and long reads through their respective pipelines +- **Assembly:** Passes through simulated reads from assemblies + +Generates quality metrics using [fastq-scan](https://github.com/rpetit3/fastq-scan) and optional +quality reports using [FastQC](https://github.com/s-andrews/FastQC) (Illumina) and +[NanoPlot](https://github.com/wdecoster/NanoPlot) (ONT). + +## Take + +``` +samples: Channel +``` + +| Field | Description | +|-------|-------------| +| `meta` | Groovy Record containing sample information (must include `runtype`, `genome_size`, `species`) | +| `r1` | Illumina R1 reads (paired-end forward) | +| `r2` | Illumina R2 reads (paired-end reverse) | +| `se` | Single-end Illumina reads | +| `lr` | Long reads (ONT) | +| `assembly` | Assembly file (FASTA) for assembly-based simulations | + +``` +adapters: Path? +phix: Path? +``` + +| Name | Type | Description | +|------|------|-------------| +| `adapters` | `Path?` | Optional adapter sequences in FASTA format for removal from Illumina reads | +| `phix` | `Path?` | Optional PhiX sequences in FASTA format for removal from Illumina reads | + +## Emit + +### Published + +The `sample_outputs` and `run_outputs` emissions are aggregates of output files that will be published in the entry workflow. + +#### `sample_outputs` + +| Output | Description | +|--------|-------------| +| `reads_grouped` | All output FASTQs for publishing | +| `supplemental` | QC reports (FastQC/NanoPlot), JSON metrics, and error FASTQs if QC failed | +| `error` | Captured error messages if QC failed (e.g., reads empty after trimming) | + +#### `run_outputs` + +No run-scope outputs. + +### Downstream Inputs + +The following emissions are meant to be used as inputs to downstream subworkflows. + +#### `reads` + +| Output | Description | +|--------|-------------| +| `r1` | QC-filtered Illumina R1 reads | +| `r2` | QC-filtered Illumina R2 reads | +| `se` | QC-filtered single-end reads | +| `lr` | QC-filtered long reads | +| `fna` | Assembly file (passed through for assembly-based samples) | + +## Module Composition + +This subworkflow calls the following modules: + +- [bactopia_qc](/developers/modules/bactopia_qc) - Automated quality control, error correction, and read subsampling. + +## Used By + +This subworkflow is used by the following workflows: + +- [bactopia](/full-guide) - Comprehensive bacterial analysis pipeline for complete genomic characterization. +- [cleanyerreads](/bactopia-pipelines/cleanyerreads) - Quality control and optional host read removal from raw sequencing reads. +- [staphopia](/bactopia-pipelines/staphopia) - Comprehensive analysis pipeline for Staphylococcus aureus isolates. + +## Citations + +If you use this in your analysis, please cite the following. + +- [Bactopia](https://bactopia.github.io/) + Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) + +- [BBTools](https://jgi.doe.gov/data-and-tools/bbtools/) + Bushnell B [BBMap short read aligner, and other bioinformatic tools.](http://sourceforge.net/projects/bbmap/) (Link) + +- [fastp](https://github.com/OpenGene/fastp) + Chen S, Zhou Y, Chen Y, and Gu J [fastp: an ultra-fast all-in-one FASTQ preprocessor.](https://doi.org/10.1093/bioinformatics/bty560) _Bioinformatics_, 34(17), i884-i890. (2018) + +- [FastQC](https://github.com/s-andrews/FastQC) + Andrews S [FastQC: a quality control tool for high throughput sequence data.](http://www.bioinformatics.babraham.ac.uk/projects/fastqc) (WebLink) + +- [fastq-scan](https://github.com/rpetit3/fastq-scan) + Petit III RA [fastq-scan: generate summary statistics of input FASTQ sequences.](https://github.com/rpetit3/fastq-scan) (GitHub) + +- [Lighter](https://github.com/mourisl/Lighter) + Song L, Florea L, Langmead B [Lighter: Fast and Memory-efficient Sequencing Error Correction without Counting](https://doi.org/10.1186/s13059-014-0509-9). _Genome Biol._ 15(11):509 (2014) + +- [NanoPlot](https://github.com/wdecoster/NanoPlot) + De Coster W, D'Hert S, Schultz DT, Cruts M, Van Broeckhoven C [NanoPack: visualizing and processing long-read sequencing data](https://doi.org/10.1093/bioinformatics/bty149) _Bioinformatics_ Volume 34, Issue 15 (2018) + +- [Nanoq](https://github.com/esteinig/nanoq) + Steinig E [Nanoq: Minimal but speedy quality control for nanopore reads in Rust](https://github.com/esteinig/nanoq) (GitHub) + +- [Porechop](https://github.com/rrwick/Porechop) + Wick RR, Judd LM, Gorrie CL, Holt KE. [Completing bacterial genome assemblies with multiplex MinION sequencing.](https://doi.org/10.1099/mgen.0.000132) _Microb Genom._ 3(10):e000132 (2017) + +- [Rasusa](https://github.com/mbhall88/rasusa) + Hall MB [Rasusa: Randomly subsample sequencing reads to a specified coverage.](https://doi.org/10.5281/zenodo.3731394) (2019). + +## Source + +[View source on GitHub](https://github.com/bactopia/bactopia/tree/main/subworkflows/bactopia/qc) diff --git a/developers/subworkflows/bactopia_sketcher.mdx b/developers/subworkflows/bactopia_sketcher.mdx new file mode 100644 index 00000000..b4a85566 --- /dev/null +++ b/developers/subworkflows/bactopia_sketcher.mdx @@ -0,0 +1,95 @@ +--- +title: bactopia_sketcher +description: "Create genomic sketches and perform rapid taxonomic classification." +tags: + - taxonomy + - classification + - minhash + - sketch + - mash + - sourmash + - refseq + - gtdb + - sample-scope +--- + +# bactopia_sketcher + +**Tags:** taxonomy classification minhash sketch mash sourmash refseq gtdb sample-scope + +Create genomic sketches and perform rapid taxonomic classification. + +This subworkflow generates MinHash sketches from assembled genomes using [Mash](https://github.com/marbl/Mash) +and [Sourmash](https://github.com/dib-lab/sourmash). The sketches are compared against reference databases +to identify taxonomic classification and find closely related genomes. Mash queries against RefSeq while +Sourmash uses the GTDB database for comprehensive taxonomic placement. + +## Take + +``` +assembly: Channel +``` + +| Field | Description | +|-------|-------------| +| `meta` | Groovy Record containing sample information | +| `assembly` | Assembled contigs in FASTA format | + +``` +mash_db: Path +sourmash_db: Path +``` + +| Name | Type | Description | +|------|------|-------------| +| `mash_db` | `Path` | Path to the Mash RefSeq database for taxonomic classification | +| `sourmash_db` | `Path` | Path to the Sourmash GTDB LCA database for taxonomic classification | + +## Emit + +### Published + +The `sample_outputs` and `run_outputs` emissions are aggregates of output files that will be published in the entry workflow. + +#### `sample_outputs` + +| Output | Description | +|--------|-------------| +| `sig` | Sourmash signature file | +| `msh` | Mash sketch files for k=21 and k=31 | +| `mash` | Mash Screen classification report against RefSeq | +| `sourmash` | Sourmash LCA classification report against GTDB | + +#### `run_outputs` + +No run-scope outputs. + +## Module Composition + +This subworkflow calls the following modules: + +- [bactopia_sketcher](/developers/modules/bactopia_sketcher) - Create genomic sketches and perform rapid taxonomic classification. + +## Used By + +This subworkflow is used by the following workflows: + +- [bactopia](/full-guide) - Comprehensive bacterial analysis pipeline for complete genomic characterization. +- [staphopia](/bactopia-pipelines/staphopia) - Comprehensive analysis pipeline for Staphylococcus aureus isolates. + +## Citations + +If you use this in your analysis, please cite the following. + +- [Bactopia](https://bactopia.github.io/) + Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) + +- [Mash](https://github.com/marbl/Mash) + Ondov BD, Treangen TJ, Melsted P, Mallonee AB, Bergman NH, Koren S, Phillippy AM [Mash: fast genome and metagenome distance estimation using MinHash](http://dx.doi.org/10.1186/s13059-016-0997-x). _Genome Biol_ 17, 132 (2016) + +- [Sourmash](https://github.com/dib-lab/sourmash) + Brown CT, Irber L [sourmash: a library for MinHash sketching of DNA](http://dx.doi.org/10.21105/joss.00027). _JOSS_ 1, 27 (2016) + +## Source + +[View source on GitHub](https://github.com/bactopia/bactopia/tree/main/subworkflows/bactopia/sketcher) diff --git a/developers/subworkflows/bakta.mdx b/developers/subworkflows/bakta.mdx new file mode 100644 index 00000000..e6c93a6c --- /dev/null +++ b/developers/subworkflows/bakta.mdx @@ -0,0 +1,119 @@ +--- +title: bakta +description: "Rapid bacterial genome annotation." +tags: + - bacteria + - annotation + - genome + - functional-annotation + - taxonomy + - sample-scope +--- + +# bakta + +**Tags:** bacteria annotation genome functional-annotation taxonomy sample-scope + +Rapid bacterial genome annotation. + +This subworkflow uses [Bakta](https://github.com/oschwengers/bakta) to provide +rapid, comprehensive annotation of bacterial genomes. It can download and prepare +the Bakta database on-demand or use a pre-existing database. The workflow processes +each sample individually, producing multiple output formats including GFF3, GenBank, +protein sequences, nucleotide sequences, and a BLAST database. + +## Take + +``` +assembly: Channel +``` + +| Field | Description | +|-------|-------------| +| `meta` | Groovy Record containing sample information | +| `assembly` | Assembled contigs in FASTA format | + +``` +database: Path? +download_bakta: Boolean +save_as_tarball: Boolean +proteins: Path? +prodigal_tf: Path? +replicons: Path? +``` + +| Name | Type | Description | +|------|------|-------------| +| `database` | `Path?` | Optional pre-existing Bakta database path | +| `download_bakta` | `Boolean` | Boolean flag to trigger automatic database download | +| `save_as_tarball` | `Boolean` | Boolean flag to save downloaded database as tarball | +| `proteins` | `Path?` | Optional trusted protein sequences for homology search | +| `prodigal_tf` | `Path?` | Optional Prodigal training file for improved gene prediction | +| `replicons` | `Path?` | Optional replicon sequences for plasmid identification | + +## Emit + +### Published + +The `sample_outputs` and `run_outputs` emissions are aggregates of output files that will be published in the entry workflow. + +#### `sample_outputs` + +| Output | Description | +|--------|-------------| +| `embl` | Annotations and sequences in EMBL format | +| `faa` | CDS/sORF amino acid sequences as FASTA | +| `ffn` | Feature nucleotide sequences as FASTA | +| `fna` | Replicon/contig DNA sequences as FASTA | +| `gbff` | Annotations and sequences in GenBank format | +| `gff` | Annotations and sequences in GFF3 format | +| `hypotheticals_tsv` | Further information on hypothetical protein CDS as tab-separated values | +| `hypotheticals_faa` | Hypothetical protein CDS amino acid sequences as FASTA | +| `tsv` | Annotations as simple human readable tab-separated values | +| `txt` | Broad summary of Bakta annotations | +| `blastdb` | A compressed tar.gz archive of BLAST+ databases of the contigs, genes, and proteins | + +#### `run_outputs` + +No run-scope outputs. + +### Downstream Inputs + +The following emissions are meant to be used as inputs to downstream subworkflows. + +#### `annotations` + +| Output | Description | +|--------|-------------| +| `fna` | Annotated nucleotide sequences in FASTA format | +| `faa` | Protein sequences in FASTA format | +| `gff` | Annotations in GFF3 format | + +## Module Composition + +This subworkflow calls the following modules: + +- [bakta_download](/developers/modules/bakta_download) - Download the Bakta annotation database. +- [bakta_run](/developers/modules/bakta_run) - Rapid and standardized annotation of bacterial genomes and plasmids. + +## Used By + +This subworkflow is used by the following workflows: + +- [bactopia](/full-guide) - Comprehensive bacterial analysis pipeline for complete genomic characterization. +- [bakta](/bactopia-tools/bakta) - Rapid annotation of bacterial genomes and plasmids. +- [staphopia](/bactopia-pipelines/staphopia) - Comprehensive analysis pipeline for Staphylococcus aureus isolates. + +## Citations + +If you use this in your analysis, please cite the following. + +- [Bactopia](https://bactopia.github.io/) + Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) + +- [Bakta](https://github.com/oschwengers/bakta) + Schwengers O, Jelonek L, Dieckmann MA, Beyvers S, Blom J, Goesmann A [Bakta - rapid and standardized annotation of bacterial genomes via alignment-free sequence identification.](https://doi.org/10.1099/mgen.0.000685) _Microbial Genomics_ 7(11) (2021) + +## Source + +[View source on GitHub](https://github.com/bactopia/bactopia/tree/main/subworkflows/bakta) diff --git a/developers/subworkflows/blastn.mdx b/developers/subworkflows/blastn.mdx new file mode 100644 index 00000000..590e3517 --- /dev/null +++ b/developers/subworkflows/blastn.mdx @@ -0,0 +1,87 @@ +--- +title: blastn +description: "Search a nucleotide database using nucleotide query sequences." +tags: + - blast + - alignment + - nucleotide + - search + - fasta + - database + - sample-scope +--- + +# blastn + +**Tags:** blast alignment nucleotide search fasta database sample-scope + +Search a nucleotide database using nucleotide query sequences. + +This subworkflow uses [BLASTN](https://blast.ncbi.nlm.nih.gov/Blast.cgi) to align +nucleotide query sequences against a nucleotide BLAST database. It processes each +sample individually and aggregates the alignment results into a single consolidated +report for all samples. + +## Take + +``` +blastdb: Channel +``` + +| Field | Description | +|-------|-------------| +| `meta` | Groovy Record containing sample information | +| `blastdb` | A compressed tarball containing the BLAST database | + +``` +query: Path +``` + +| Name | Type | Description | +|------|------|-------------| +| `query` | `Path` | FASTA file containing nucleotide query sequences to search for in the assemblies | + +## Emit + +### Published + +The `sample_outputs` and `run_outputs` emissions are aggregates of output files that will be published in the entry workflow. + +#### `sample_outputs` + +| Output | Description | +|--------|-------------| +| `tsv` | A tab-delimited summary of alignments (standard BLAST outfmt 6) | + +#### `run_outputs` + +| Output | Description | +|--------|-------------| +| `csv` | Aggregated results in CSV format | + +## Module Composition + +This subworkflow calls the following modules: + +- [blast_blastn](/developers/modules/blast_blastn) - Search a nucleotide database using a nucleotide query. +- [csvtk_concat](/developers/modules/csvtk_concat) - Concatenate multiple CSV or TSV files into a single table. + +## Used By + +This subworkflow is used by the following workflows: + +- [blastn](/bactopia-tools/blastn) - Search against nucleotide BLAST databases using nucleotide queries. + +## Citations + +If you use this in your analysis, please cite the following. + +- [Bactopia](https://bactopia.github.io/) + Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) + +- [BLAST](https://blast.ncbi.nlm.nih.gov/Blast.cgi) + Camacho C, Coulouris G, Avagyan V, Ma N, Papadopoulos J, Bealer K, Madden TL [BLAST+: architecture and applications](http://dx.doi.org/10.1186/1471-2105-10-421). _BMC Bioinformatics_ 10, 421 (2009) + +## Source + +[View source on GitHub](https://github.com/bactopia/bactopia/tree/main/subworkflows/blastn) diff --git a/developers/subworkflows/blastp.mdx b/developers/subworkflows/blastp.mdx new file mode 100644 index 00000000..6071f24d --- /dev/null +++ b/developers/subworkflows/blastp.mdx @@ -0,0 +1,84 @@ +--- +title: blastp +description: "Search protein sequences against protein database." +tags: + - blast + - protein + - alignment + - database + - sample-scope +--- + +# blastp + +**Tags:** blast protein alignment database sample-scope + +Search protein sequences against protein database. + +This subworkflow uses [BLASTP](https://blast.ncbi.nlm.nih.gov/Blast.cgi?PAGE_TYPE=BlastSearch&PROGRAM=blastp) +from the NCBI BLAST+ suite to search protein sequences against a protein database. +It processes each assembly individually and aggregates the results into a single consolidated report. + +## Take + +``` +blastdb: Channel +``` + +| Field | Description | +|-------|-------------| +| `meta` | Groovy Record containing sample information | +| `blastdb` | A compressed tarball containing the BLAST database | + +``` +query: Path +``` + +| Name | Type | Description | +|------|------|-------------| +| `query` | `Path` | Path to protein database for searching against translated sequences | + +## Emit + +### Published + +The `sample_outputs` and `run_outputs` emissions are aggregates of output files that will be published in the entry workflow. + +#### `sample_outputs` + +| Output | Description | +|--------|-------------| +| `tsv` | A tab-delimited summary of alignments (standard BLAST outfmt 6) | + +#### `run_outputs` + +| Output | Description | +|--------|-------------| +| `csv` | Aggregated results in CSV format | + +## Module Composition + +This subworkflow calls the following modules: + +- [blast_blastp](/developers/modules/blast_blastp) - Search a protein database using a protein query. +- [csvtk_concat](/developers/modules/csvtk_concat) - Concatenate multiple CSV or TSV files into a single table. + +## Used By + +This subworkflow is used by the following workflows: + +- [blastp](/bactopia-tools/blastp) - Search against protein BLAST databases using protein queries. + +## Citations + +If you use this in your analysis, please cite the following. + +- [Bactopia](https://bactopia.github.io/) + Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) + +- [BLAST](https://blast.ncbi.nlm.nih.gov/Blast.cgi) + Camacho C, Coulouris G, Avagyan V, Ma N, Papadopoulos J, Bealer K, Madden TL [BLAST+: architecture and applications](http://dx.doi.org/10.1186/1471-2105-10-421). _BMC Bioinformatics_ 10, 421 (2009) + +## Source + +[View source on GitHub](https://github.com/bactopia/bactopia/tree/main/subworkflows/blastp) diff --git a/developers/subworkflows/blastx.mdx b/developers/subworkflows/blastx.mdx new file mode 100644 index 00000000..a16ea145 --- /dev/null +++ b/developers/subworkflows/blastx.mdx @@ -0,0 +1,86 @@ +--- +title: blastx +description: "Translate nucleotide sequences and search protein database." +tags: + - blast + - protein + - translation + - alignment + - database + - sample-scope +--- + +# blastx + +**Tags:** blast protein translation alignment database sample-scope + +Translate nucleotide sequences and search protein database. + +This subworkflow uses [BLASTX](https://blast.ncbi.nlm.nih.gov/Blast.cgi?PAGE_TYPE=BlastSearch&PROGRAM=blastx) +from the NCBI BLAST+ suite to translate nucleotide sequences in all six reading frames +and search them against a protein database. It processes each assembly individually +and aggregates the results into a single consolidated report. + +## Take + +``` +blastdb: Channel +``` + +| Field | Description | +|-------|-------------| +| `meta` | Groovy Record containing sample information | +| `blastdb` | A compressed tarball containing the BLAST database | + +``` +query: Path +``` + +| Name | Type | Description | +|------|------|-------------| +| `query` | `Path` | Path to BLAST protein database for searching translated sequences | + +## Emit + +### Published + +The `sample_outputs` and `run_outputs` emissions are aggregates of output files that will be published in the entry workflow. + +#### `sample_outputs` + +| Output | Description | +|--------|-------------| +| `tsv` | A tab-delimited summary of alignments (standard BLAST outfmt 6) | + +#### `run_outputs` + +| Output | Description | +|--------|-------------| +| `csv` | Aggregated results in CSV format | + +## Module Composition + +This subworkflow calls the following modules: + +- [blast_blastx](/developers/modules/blast_blastx) - Search a protein database using a translated nucleotide query. +- [csvtk_concat](/developers/modules/csvtk_concat) - Concatenate multiple CSV or TSV files into a single table. + +## Used By + +This subworkflow is used by the following workflows: + +- [blastx](/bactopia-tools/blastx) - Search against protein BLAST databases using translated nucleotide queries. + +## Citations + +If you use this in your analysis, please cite the following. + +- [Bactopia](https://bactopia.github.io/) + Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) + +- [BLAST](https://blast.ncbi.nlm.nih.gov/Blast.cgi) + Camacho C, Coulouris G, Avagyan V, Ma N, Papadopoulos J, Bealer K, Madden TL [BLAST+: architecture and applications](http://dx.doi.org/10.1186/1471-2105-10-421). _BMC Bioinformatics_ 10, 421 (2009) + +## Source + +[View source on GitHub](https://github.com/bactopia/bactopia/tree/main/subworkflows/blastx) diff --git a/developers/subworkflows/bracken.mdx b/developers/subworkflows/bracken.mdx new file mode 100644 index 00000000..928bab79 --- /dev/null +++ b/developers/subworkflows/bracken.mdx @@ -0,0 +1,105 @@ +--- +title: bracken +description: "Estimate species abundance from metagenomic reads." +tags: + - metagenomics + - taxonomic-classification + - abundance-estimation + - kraken2 + - bracken + - sample-scope +--- + +# bracken + +**Tags:** metagenomics taxonomic-classification abundance-estimation kraken2 bracken sample-scope + +Estimate species abundance from metagenomic reads. + +This subworkflow performs taxonomic classification and abundance estimation using [Kraken2](https://github.com/DerrickWood/kraken2) +and [Bracken](https://github.com/jenniferlu717/Bracken). It processes metagenomic reads, classifies them against a reference database, +and generates abundance estimates at different taxonomic levels with optional abundance correction. + +Uses explicit positional record fields for reads: +- Input: record(meta, r1, r2, se, lr) where each read slot is Path? + +## Take + +``` +reads: Channel +``` + +| Field | Description | +|-------|-------------| +| `meta` | Groovy Record containing sample information | +| `r1` | Illumina R1 reads (paired-end) | +| `r2` | Illumina R2 reads (paired-end) | +| `se` | Single-end Illumina reads | +| `lr` | Long reads (ONT/PacBio) | + +``` +database: Path +``` + +| Name | Type | Description | +|------|------|-------------| +| `database` | `Path` | Path to the Kraken2 database for taxonomic classification. | + +## Emit + +### Published + +The `sample_outputs` and `run_outputs` emissions are aggregates of output files that will be published in the entry workflow. + +#### `sample_outputs` + +| Output | Description | +|--------|-------------| +| `tsv` | Tab-delimited summary of Bracken primary and secondary species abundances | +| `special_meta` | A simplified metadata record for internal use | +| `classified` | Reads classified to belong to any of the taxa on the Kraken2 database | +| `unclassified` | Reads not classified to belong to any of the taxa on the Kraken2 database | +| `kraken2_report` | Kraken2 report containing stats about classified and not classified reads | +| `kraken2_output` | Kraken2 output file containing the taxonomic classification of each read | +| `bracken_report` | Bracken report containing stats about classified and not classified reads | +| `krona` | Interactive Krona HTML visualization | +| `abundances` | Bracken abundance estimates for each taxon | +| `classification` | Bracken per-read classification details | +| `adjusted_abundances` | Bracken abundance estimates adjusted for unclassified reads | + +#### `run_outputs` + +| Output | Description | +|--------|-------------| +| `csv` | Aggregated results in CSV format | + +## Module Composition + +This subworkflow calls the following modules: + +- [bracken](/developers/modules/bracken) - Taxonomic classification and abundance estimation. +- [csvtk_concat](/developers/modules/csvtk_concat) - Concatenate multiple CSV or TSV files into a single table. +- [csvtk_concat](/developers/modules/csvtk_concat) - Concatenate multiple CSV or TSV files into a single table. + +## Used By + +This subworkflow is used by the following workflows: + +- [bracken](/bactopia-tools/bracken) - Estimate taxonomic abundance of metagenomic samples. + +## Citations + +If you use this in your analysis, please cite the following. + +- [Bactopia](https://bactopia.github.io/) + Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) + +- [Kraken2](https://github.com/DerrickWood/kraken2) + Wood DE, Lu J, Langmead B [Improved metagenomic analysis with Kraken 2.](https://doi.org/10.1186/s13059-019-1891-0) *Genome Biology*, 20(1), 257. (2019) + +- [Bracken](https://github.com/jenniferlu717/Bracken) + Lu J, Breitwieser FP, Thielen P, and Salzberg SL [Bracken: estimating species abundance in metagenomics data.](https://doi.org/10.7717/peerj-cs.104) _PeerJ Computer Science_, 3, e104. (2017) + +## Source + +[View source on GitHub](https://github.com/bactopia/bactopia/tree/main/subworkflows/bracken) diff --git a/developers/subworkflows/btyper3.mdx b/developers/subworkflows/btyper3.mdx new file mode 100644 index 00000000..ca024245 --- /dev/null +++ b/developers/subworkflows/btyper3.mdx @@ -0,0 +1,78 @@ +--- +title: btyper3 +description: "In silico taxonomic classification of Bacillus cereus group genomes." +tags: + - bacillus + - cereus + - taxonomy + - typing + - toxin-genes + - sample-scope +--- + +# btyper3 + +**Tags:** bacillus cereus taxonomy typing toxin-genes sample-scope + +In silico taxonomic classification of Bacillus cereus group genomes. + +This subworkflow performs taxonomic classification of Bacillus cereus group +genomes using [BTyper3](https://github.com/lmc297/BTyper3), which provides +comprehensive classification including species, lineage, and toxin gene detection. +The results from individual samples are aggregated into a combined summary file. + +## Take + +``` +assembly: Channel +``` + +| Field | Description | +|-------|-------------| +| `meta` | Groovy Record containing sample information | +| `assembly` | Assembly files in FASTA format for Bacillus cereus group classification | + +## Emit + +### Published + +The `sample_outputs` and `run_outputs` emissions are aggregates of output files that will be published in the entry workflow. + +#### `sample_outputs` + +| Output | Description | +|--------|-------------| +| `tsv` | Tab-delimited BTyper3 typing and classification results | + +#### `run_outputs` + +| Output | Description | +|--------|-------------| +| `csv` | Aggregated results in CSV format | + +## Module Composition + +This subworkflow calls the following modules: + +- [csvtk_concat](/developers/modules/csvtk_concat) - Concatenate multiple CSV or TSV files into a single table. +- [btyper3](/developers/modules/btyper3) - In silico typing and characterization of *Bacillus cereus* group genomes. + +## Used By + +This subworkflow is used by the following workflows: + +- [btyper3](/bactopia-tools/btyper3) - Taxonomic classification of Bacillus cereus group isolates. + +## Citations + +If you use this in your analysis, please cite the following. + +- [Bactopia](https://bactopia.github.io/) + Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) + +- [BTyper3](https://github.com/lmc297/BTyper3) + Carroll LM, Cheng RA, Kovac J [No Assembly Required: Using BTyper3 to Assess the Congruency of a Proposed Taxonomic Framework for the Bacillus cereus Group With Historical Typing Methods.](https://doi.org/10.3389/fmicb.2020.580691) _Frontiers in Microbiology_, 11, 580691. (2020) + +## Source + +[View source on GitHub](https://github.com/bactopia/bactopia/tree/main/subworkflows/btyper3) diff --git a/developers/subworkflows/busco.mdx b/developers/subworkflows/busco.mdx new file mode 100644 index 00000000..7dbc822b --- /dev/null +++ b/developers/subworkflows/busco.mdx @@ -0,0 +1,89 @@ +--- +title: busco +description: "Assess genome assembly completeness using BUSCO." +tags: + - assembly + - completeness + - quality + - assessment + - orthologs + - evaluation + - sample-scope +--- + +# busco + +**Tags:** assembly completeness quality assessment orthologs evaluation sample-scope + +Assess genome assembly completeness using BUSCO. + +This subworkflow evaluates genome assembly completeness by searching for +single-copy orthologs against the [BUSCO](https://busco.ezlab.org/) database. +It generates comprehensive completeness reports including missing, duplicated, +fragmented, and complete single-copy orthologs. The workflow includes individual +sample assessments and a merged summary report across all samples. + +## Take + +``` +assembly: Channel +``` + +| Field | Description | +|-------|-------------| +| `meta` | Groovy Record containing sample information | +| `assembly` | Genome assemblies to evaluate for completeness. Each record contains metadata | + +``` +busco_lineage: String +``` + +| Name | Type | Description | +|------|------|-------------| +| `busco_lineage` | `String` | BUSCO lineage dataset to use for assessment (e.g., bacteria_odb10). | + +## Emit + +### Published + +The `sample_outputs` and `run_outputs` emissions are aggregates of output files that will be published in the entry workflow. + +#### `sample_outputs` + +| Output | Description | +|--------|-------------| +| `tsv` | A text summary report of the completeness score (C/S/D/F/M%) | +| `supplemental` | Directory containing full tables, missing gene lists, and lineage data | + +#### `run_outputs` + +| Output | Description | +|--------|-------------| +| `csv` | Aggregated results in CSV format | + +## Module Composition + +This subworkflow calls the following modules: + +- [csvtk_concat](/developers/modules/csvtk_concat) - Concatenate multiple CSV or TSV files into a single table. +- [busco](/developers/modules/busco) - Assess genome assembly completeness using single-copy orthologs. + +## Used By + +This subworkflow is used by the following workflows: + +- [busco](/bactopia-tools/busco) - Assessment of genome assembly completeness using evolutionarily informed expectations. + +## Citations + +If you use this in your analysis, please cite the following. + +- [Bactopia](https://bactopia.github.io/) + Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) + +- [BUSCO](https://gitlab.com/ezlab/busco) + Manni M, Berkeley MR, Seppey M, Simão FA, Zdobnov EM [BUSCO Update: Novel and Streamlined Workflows along with Broader and Deeper Phylogenetic Coverage for Scoring of Eukaryotic, Prokaryotic, and Viral Genomes.](https://doi.org/10.1093/molbev/msab199) _Molecular Biology and Evolution_ 38(10), 4647-4654. (2021) + +## Source + +[View source on GitHub](https://github.com/bactopia/bactopia/tree/main/subworkflows/busco) diff --git a/developers/subworkflows/checkm.mdx b/developers/subworkflows/checkm.mdx new file mode 100644 index 00000000..5359b6dc --- /dev/null +++ b/developers/subworkflows/checkm.mdx @@ -0,0 +1,80 @@ +--- +title: checkm +description: "Assess metagenome bin completeness using CheckM." +tags: + - metagenome + - bin + - completeness + - contamination + - mag + - quality + - sample-scope +--- + +# checkm + +**Tags:** metagenome bin completeness contamination mag quality sample-scope + +Assess metagenome bin completeness using CheckM. + +This subworkflow evaluates the quality and completeness of metagenome-assembled genomes +(MAGs) using [CheckM](https://github.com/Ecogenomics/CheckM). It provides a comprehensive +assessment based on lineage-specific marker sets, estimating completeness and contamination +of genome bins. The workflow generates detailed reports including marker gene statistics, +taxonomy predictions, and quality metrics for each bin. + +## Take + +``` +assembly: Channel +``` + +| Field | Description | +|-------|-------------| +| `meta` | Groovy Record containing sample information | +| `assembly` | Metagenome-assembled genome bins to evaluate. Each record contains metadata | + +## Emit + +### Published + +The `sample_outputs` and `run_outputs` emissions are aggregates of output files that will be published in the entry workflow. + +#### `sample_outputs` + +| Output | Description | +|--------|-------------| +| `tsv` | Tab-delimited CheckM quality assessment results | + +#### `run_outputs` + +| Output | Description | +|--------|-------------| +| `csv` | Aggregated results in CSV format | + +## Module Composition + +This subworkflow calls the following modules: + +- [csvtk_concat](/developers/modules/csvtk_concat) - Concatenate multiple CSV or TSV files into a single table. +- [checkm_lineagewf](/developers/modules/checkm_lineagewf) - Assess genome quality using lineage-specific marker sets. + +## Used By + +This subworkflow is used by the following workflows: + +- [checkm](/bactopia-tools/checkm) - Assessment of microbial genome assembly quality. + +## Citations + +If you use this in your analysis, please cite the following. + +- [Bactopia](https://bactopia.github.io/) + Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) + +- [CheckM](https://github.com/Ecogenomics/CheckM) + Parks DH, Imelfort M, Skennerton CT, Hugenholtz P, Tyson GW [CheckM: assessing the quality of microbial genomes recovered from isolates, single cells, and metagenomes.](http://dx.doi.org/10.1101/gr.186072.114) _Genome Res_ 25, 1043-1055 (2015) + +## Source + +[View source on GitHub](https://github.com/bactopia/bactopia/tree/main/subworkflows/checkm) diff --git a/developers/subworkflows/checkm2.mdx b/developers/subworkflows/checkm2.mdx new file mode 100644 index 00000000..63186ebb --- /dev/null +++ b/developers/subworkflows/checkm2.mdx @@ -0,0 +1,93 @@ +--- +title: checkm2 +description: "Assess metagenome bin completeness using CheckM2." +tags: + - metagenome + - bin + - completeness + - contamination + - mag + - quality + - machine-learning + - sample-scope +--- + +# checkm2 + +**Tags:** metagenome bin completeness contamination mag quality machine-learning sample-scope + +Assess metagenome bin completeness using CheckM2. + +This subworkflow evaluates the quality and completeness of metagenome-assembled genomes +(MAGs) using [CheckM2](https://github.com/chklovski/CheckM2). It provides an improved +assessment using machine learning models trained on high-quality reference genomes, +offering more accurate completeness and contamination estimates. The workflow can either +download the required database or use a user-provided database path. + +## Take + +``` +assembly: Channel +``` + +| Field | Description | +|-------|-------------| +| `meta` | Groovy Record containing sample information | +| `assembly` | Metagenome-assembled genome bins to evaluate. Each record contains metadata | + +``` +database: Path +download_checkm2: Boolean +``` + +| Name | Type | Description | +|------|------|-------------| +| `database` | `Path` | Path to CheckM2 database directory. If download_checkm2 is true, this can be a placeholder as the database will be downloaded automatically. | +| `download_checkm2` | `Boolean` | Boolean flag to automatically download the CheckM2 database if not available. When true, downloads the required reference database before prediction. | + +## Emit + +### Published + +The `sample_outputs` and `run_outputs` emissions are aggregates of output files that will be published in the entry workflow. + +#### `sample_outputs` + +| Output | Description | +|--------|-------------| +| `tsv` | A tab-delimited report of quality metrics (Completeness, Contamination) | +| `supplemental` | Directory containing intermediate protein files and Diamond alignments | + +#### `run_outputs` + +| Output | Description | +|--------|-------------| +| `csv` | Aggregated results in CSV format | + +## Module Composition + +This subworkflow calls the following modules: + +- [csvtk_concat](/developers/modules/csvtk_concat) - Concatenate multiple CSV or TSV files into a single table. +- [checkm2_predict](/developers/modules/checkm2_predict) - Assess genome quality using machine learning. +- [checkm2_download](/developers/modules/checkm2_download) - Download the pre-trained CheckM2 database. + +## Used By + +This subworkflow is used by the following workflows: + +- [checkm2](/bactopia-tools/checkm2) - Machine learning-based assessment of microbial genome assembly quality. + +## Citations + +If you use this in your analysis, please cite the following. + +- [Bactopia](https://bactopia.github.io/) + Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) + +- [CheckM2](https://github.com/chklovski/CheckM2) + Chklovksi A [Rapid assessment of genome bin quality using machine learning](https://github.com/chklovski/CheckM2) (GitHub) + +## Source + +[View source on GitHub](https://github.com/bactopia/bactopia/tree/main/subworkflows/checkm2) diff --git a/developers/subworkflows/clermontyping.mdx b/developers/subworkflows/clermontyping.mdx new file mode 100644 index 00000000..33ae9ed3 --- /dev/null +++ b/developers/subworkflows/clermontyping.mdx @@ -0,0 +1,76 @@ +--- +title: clermontyping +description: "Predict phylogroups of Escherichia coli from genome assemblies." +tags: + - escherichia-coli + - phylogroup + - typing + - clermont + - sample-scope +--- + +# clermontyping + +**Tags:** escherichia-coli phylogroup typing clermont sample-scope + +Predict phylogroups of Escherichia coli from genome assemblies. + +This subworkflow uses [ClermontTyping](https://github.com/happykhan/ClermonTyping) to determine +the phylogenetic groups of *Escherichia coli* strains from assembled genomes. It processes +each sample individually and aggregates the results into a single consolidated report. + +## Take + +``` +assembly: Channel +``` + +| Field | Description | +|-------|-------------| +| `meta` | Groovy Record containing sample information | +| `assembly` | Assembled contigs in FASTA format | + +## Emit + +### Published + +The `sample_outputs` and `run_outputs` emissions are aggregates of output files that will be published in the entry workflow. + +#### `sample_outputs` + +| Output | Description | +|--------|-------------| +| `tsv` | Tab-delimited ClermonTyping phylogroup assignment results | + +#### `run_outputs` + +| Output | Description | +|--------|-------------| +| `csv` | Aggregated results in CSV format | + +## Module Composition + +This subworkflow calls the following modules: + +- [csvtk_concat](/developers/modules/csvtk_concat) - Concatenate multiple CSV or TSV files into a single table. +- [clermontyping](/developers/modules/clermontyping) - Determine the phylogroup of Escherichia coli isolates. + +## Used By + +This subworkflow is used by the following workflows: + +- [clermontyping](/bactopia-tools/clermontyping) - In silico phylotyping of Escherichia genus. + +## Citations + +If you use this in your analysis, please cite the following. + +- [Bactopia](https://bactopia.github.io/) + Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) + +- [ClermontTyping](https://github.com/happykhan/ClermonTyping) + Beghain J, Bridier-Nahmias A, Le Nagard H, Denamur E, Clermont O. [ClermonTyping: an easy-to-use and accurate in silico method for Escherichia genus strain phylotyping.](https://doi.org/10.1099/mgen.0.000192) Microbial Genomics, 4(7), e000192. (2018) + +## Source + +[View source on GitHub](https://github.com/bactopia/bactopia/tree/main/subworkflows/clermontyping) diff --git a/developers/subworkflows/clonalframeml.mdx b/developers/subworkflows/clonalframeml.mdx new file mode 100644 index 00000000..a4734ecf --- /dev/null +++ b/developers/subworkflows/clonalframeml.mdx @@ -0,0 +1,108 @@ +--- +title: clonalframeml +description: "Detect and mask recombination events in bacterial phylogenies." +tags: + - recombination + - phylogeny + - masking + - clonalframe + - bacterial-evolution + - run-scope +--- + +# clonalframeml + +**Tags:** recombination phylogeny masking clonalframe bacterial-evolution run-scope + +Detect and mask recombination events in bacterial phylogenies. + +This subworkflow uses [ClonalFrameML](https://github.com/xavierdidelot/ClonalFrameML) to +detect and mask recombination events in bacterial phylogenies. It first builds a quick +phylogenetic tree using [IQ-TREE](https://github.com/iqtree/iqtree2), then identifies +recombination regions and creates a recombination-masked alignment. Finally, it +calculates SNP distances from the masked alignment using [snp-dists](https://github.com/tseemann/snp-dists). + +## Take + +``` +alignment: Channel +``` + +| Field | Description | +|-------|-------------| +| `meta` | Groovy Record containing sample information | +| `aln` | Core-genome alignment in FASTA format | + +## Emit + +### Published + +The `sample_outputs` and `run_outputs` emissions are aggregates of output files that will be published in the entry workflow. + +#### `sample_outputs` + +No sample-scope outputs. + +#### `run_outputs` + +| Output | Description | +|--------|-------------| +| `masked_aln` | Recombination-masked alignment with detected recombination regions removed | +| `emsim` | Uncertainty estimation results | +| `em` | Final parameter estimates from the EM algorithm | +| `status` | Predicted recombination events (importations) | +| `nwk` | Input tree with internal nodes labelled | +| `fasta` | Reconstructed ancestral sequences | +| `pos_ref` | Position cross-reference table | +| `aln` | Input multiple sequence alignment (passed through from IQ-TREE) | +| `nwk` | Quick-start maximum-likelihood phylogenetic tree from IQ-TREE | +| `tsv` | Pairwise SNP distances from masked alignment in TSV format | + +### Downstream Inputs + +The following emissions are meant to be used as inputs to downstream subworkflows. + +#### `alignment` + +| Output | Description | +|--------|-------------| +| `aln` | Recombination-masked alignment for downstream phylogenetic analysis | + +## Subworkflow Composition + +This subworkflow calls the following subworkflows: + +- [iqtree](/developers/subworkflows/iqtree) - Construct maximum likelihood phylogenetic trees from alignments. +- [snpdists](/developers/subworkflows/snpdists) - Calculate pairwise SNP distances from sequence alignments. + +## Module Composition + +This subworkflow calls the following modules: + +- [clonalframeml](/developers/modules/clonalframeml) - Inference of recombination in bacterial genomes. + +## Used By + +This subworkflow is used by the following workflows: + +- [pangenome](/bactopia-tools/pangenome) - Pangenome analysis with optional core-genome phylogeny. + +## Citations + +If you use this in your analysis, please cite the following. + +- [Bactopia](https://bactopia.github.io/) + Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) + +- [ClonalFramML](https://github.com/xavierdidelot/ClonalFrameML) + Didelot X, Wilson DJ [ClonalFrameML: Efficient Inference of Recombination in Whole Bacterial Genomes.](https://doi.org/10.1371/journal.pcbi.1004041) _PLoS Comput Biol_ 11(2) e1004041 (2015) + +- [IQ-TREE](https://github.com/Cibiv/IQ-TREE) + Nguyen L-T, Schmidt HA, von Haeseler A, Minh BQ [IQ-TREE: A fast and effective stochastic algorithm for estimating maximum likelihood phylogenies.](https://doi.org/10.1093/molbev/msu300) _Mol. Biol. Evol._ 32:268-274 (2015) + +- [snp-dists](https://github.com/tseemann/snp-dists) + Seemann T [snp-dists - Pairwise SNP distance matrix from a FASTA sequence alignment.](https://github.com/tseemann/snp-dists) (GitHub) + +## Source + +[View source on GitHub](https://github.com/bactopia/bactopia/tree/main/subworkflows/clonalframeml) diff --git a/developers/subworkflows/defensefinder.mdx b/developers/subworkflows/defensefinder.mdx new file mode 100644 index 00000000..7e6c9410 --- /dev/null +++ b/developers/subworkflows/defensefinder.mdx @@ -0,0 +1,83 @@ +--- +title: defensefinder +description: "Systematically search for anti-phage defense systems." +tags: + - bacteria + - assembly + - anti-phage + - defense-systems + - immunity + - sample-scope +--- + +# defensefinder + +**Tags:** bacteria assembly anti-phage defense-systems immunity sample-scope + +Systematically search for anti-phage defense systems. + +This subworkflow uses [DefenseFinder](https://github.com/mdmparis/defense-finder) to identify and classify +anti-phage defense systems in bacterial genomes. It detects defense genes, HMM hits, and complete +defense systems, providing comprehensive analysis of bacterial antiviral mechanisms. + +## Take + +``` +assembly: Channel +``` + +| Field | Description | +|-------|-------------| +| `meta` | Groovy Record containing sample information | +| `assembly` | Assembled contigs in FASTA format for defense system detection | + +## Emit + +### Published + +The `sample_outputs` and `run_outputs` emissions are aggregates of output files that will be published in the entry workflow. + +#### `sample_outputs` + +| Output | Description | +|--------|-------------| +| `genes_tsv` | Tab-delimited list of detected defense genes | +| `hmmer_tsv` | Tab-delimited list of HMMER hits used for detection | +| `systems_tsv` | Tab-delimited summary of detected defense systems | +| `proteins` | Protein sequences of the detected defense genes | +| `proteins_index` | Index file for the protein sequences | +| `macsydata_raw` | Compressed tarball of raw MacSyFinder data (optional) | + +#### `run_outputs` + +| Output | Description | +|--------|-------------| +| `csv` | Aggregated results in CSV format | + +## Module Composition + +This subworkflow calls the following modules: + +- [defensefinder_run](/developers/modules/defensefinder_run) - Detect anti-phage defense systems using HMM profiles. +- [defensefinder_update](/developers/modules/defensefinder_update) - Download and package the DefenseFinder and CasFinder model databases. +- [csvtk_concat](/developers/modules/csvtk_concat) - Concatenate multiple CSV or TSV files into a single table. + +## Used By + +This subworkflow is used by the following workflows: + +- [defensefinder](/bactopia-tools/defensefinder) - Systematic identification of anti-phage defense systems. + +## Citations + +If you use this in your analysis, please cite the following. + +- [Bactopia](https://bactopia.github.io/) + Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) + +- [DefenseFinder](https://github.com/mdmparis/defense-finder) + Tesson F, Hervé A, Mordret E, Touchon M, d'Humières C, Cury J, Bernheim A [Systematic and quantitative view of the antiviral arsenal of prokaryotes.](https://doi.org/10.1038/s41467-022-30269-9) Nature Communications, 13(1), 2561. (2022) + +## Source + +[View source on GitHub](https://github.com/bactopia/bactopia/tree/main/subworkflows/defensefinder) diff --git a/developers/subworkflows/ectyper.mdx b/developers/subworkflows/ectyper.mdx new file mode 100644 index 00000000..208a2bc1 --- /dev/null +++ b/developers/subworkflows/ectyper.mdx @@ -0,0 +1,79 @@ +--- +title: ectyper +description: "In silico prediction of Escherichia coli serotype." +tags: + - escherichia + - coli + - serotype + - o-antigen + - h-antigen + - sample-scope +--- + +# ectyper + +**Tags:** escherichia coli serotype o-antigen h-antigen sample-scope + +In silico prediction of Escherichia coli serotype. + +This subworkflow performs serotype prediction for Escherichia coli genomes +using [ECTyper](https://github.com/phac-nml/ecoli_serotyping), which predicts +O and H antigens from whole genome assemblies. The tool identifies specific +serotype markers and provides comprehensive serotype classification. + +## Take + +``` +assembly: Channel +``` + +| Field | Description | +|-------|-------------| +| `meta` | Groovy Record containing sample information | +| `assembly` | Assembly files in FASTA format for E. coli serotype prediction | + +## Emit + +### Published + +The `sample_outputs` and `run_outputs` emissions are aggregates of output files that will be published in the entry workflow. + +#### `sample_outputs` + +| Output | Description | +|--------|-------------| +| `tsv` | Tab-delimited ECTyper serotype prediction results | +| `txt` | ECTyper detailed results in text format | + +#### `run_outputs` + +| Output | Description | +|--------|-------------| +| `csv` | Aggregated results in CSV format | + +## Module Composition + +This subworkflow calls the following modules: + +- [csvtk_concat](/developers/modules/csvtk_concat) - Concatenate multiple CSV or TSV files into a single table. +- [ectyper](/developers/modules/ectyper) - Predict *Escherichia coli* serotype (O and H antigens). + +## Used By + +This subworkflow is used by the following workflows: + +- [ectyper](/bactopia-tools/ectyper) - In silico prediction of Escherichia coli serotype. + +## Citations + +If you use this in your analysis, please cite the following. + +- [Bactopia](https://bactopia.github.io/) + Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) + +- [ECTyper](https://github.com/phac-nml/ecoli_serotyping) + Laing C, Bessonov K, Sung S, La Rose C [ECTyper - In silico prediction of _Escherichia coli_ serotype](https://github.com/phac-nml/ecoli_serotyping) (GitHub) + +## Source + +[View source on GitHub](https://github.com/bactopia/bactopia/tree/main/subworkflows/ectyper) diff --git a/developers/subworkflows/eggnog.mdx b/developers/subworkflows/eggnog.mdx new file mode 100644 index 00000000..4c66f5ed --- /dev/null +++ b/developers/subworkflows/eggnog.mdx @@ -0,0 +1,97 @@ +--- +title: eggnog +description: "Functional annotation through orthology assignment." +tags: + - functional + - annotation + - orthology + - eggnog + - protein-domains + - sample-scope +--- + +# eggnog + +**Tags:** functional annotation orthology eggnog protein-domains sample-scope + +Functional annotation through orthology assignment. + +This subworkflow performs genome-wide functional annotation using +[eggNOG-mapper](https://github.com/eggnogdb/eggnog-mapper), which provides fast +functional annotation through orthology assignment. It can optionally download +the eggNOG database if not provided. The pipeline predicts orthologs, functional +categories, and various annotation formats including GFF, Excel, and detailed reports. + +## Take + +``` +proteins: Channel +``` + +| Field | Description | +|-------|-------------| +| `meta` | Groovy Record containing sample information | +| `proteins` | Protein sequences in FASTA format for functional annotation | + +``` +database: Path? +download_eggnog: Boolean +save_as_tarball: Boolean +``` + +| Name | Type | Description | +|------|------|-------------| +| `database` | `Path?` | Path to pre-downloaded eggNOG database (optional) | +| `download_eggnog` | `Boolean` | Boolean flag to trigger database download if not provided | +| `save_as_tarball` | `Boolean` | Boolean flag to save downloaded database as tarball | + +## Emit + +### Published + +The `sample_outputs` and `run_outputs` emissions are aggregates of output files that will be published in the entry workflow. + +#### `sample_outputs` + +| Output | Description | +|--------|-------------| +| `hits` | Raw search hits (Diamond/MMseqs2) against the eggNOG database | +| `seed_orthologs` | List of identified seed orthologs used for annotation transfer | +| `annotations` | Main tab-delimited annotation file (COGs, KEGG, GO, etc.) | +| `xlsx` | Excel format of the annotations file | +| `orthologs` | List of fine-grained orthologs (optional) | +| `genepred` | Predicted gene sequences (optional) | +| `gff` | Annotations in GFF format (optional) | +| `no_anno` | FASTA file of sequences that failed to be annotated (optional) | +| `pfam` | Raw PFAM domain hits (optional) | + +#### `run_outputs` + +No run-scope outputs. + +## Module Composition + +This subworkflow calls the following modules: + +- [eggnog_download](/developers/modules/eggnog_download) - Download the eggNOG database for functional annotation. +- [eggnog_mapper](/developers/modules/eggnog_mapper) - Functional annotation of proteins using eggNOG orthology data. + +## Used By + +This subworkflow is used by the following workflows: + +- [eggnog](/bactopia-tools/eggnog) - Functional annotation of proteins using orthologous groups and phylogenies. + +## Citations + +If you use this in your analysis, please cite the following. + +- [Bactopia](https://bactopia.github.io/) + Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) + +- [eggNOG-mapper](https://github.com/eggnogdb/eggnog-mapper) + Huerta-Cepas J, Forslund K, Coelho LP, Szklarczyk D, Jensen LJ, von Mering C, Bork P [Fast Genome-Wide Functional Annotation through Orthology Assignment by eggNOG-Mapper.](http://dx.doi.org/10.1093/molbev/msx148) _Mol. Biol. Evol._ 34, 2115-2122 (2017) + +## Source + +[View source on GitHub](https://github.com/bactopia/bactopia/tree/main/subworkflows/eggnog) diff --git a/developers/subworkflows/emmtyper.mdx b/developers/subworkflows/emmtyper.mdx new file mode 100644 index 00000000..39ea64cc --- /dev/null +++ b/developers/subworkflows/emmtyper.mdx @@ -0,0 +1,84 @@ +--- +title: emmtyper +description: "Predict emm types of Streptococcus pyogenes from genome assemblies." +tags: + - streptococcus-pyogenes + - emm-typing + - gas + - m-protein + - sample-scope +--- + +# emmtyper + +**Tags:** streptococcus-pyogenes emm-typing gas m-protein sample-scope + +Predict emm types of Streptococcus pyogenes from genome assemblies. + +This subworkflow uses [emmtyper](https://github.com/MDU-PHL/emmtyper) to predict +the emm types of *Streptococcus pyogenes* strains from assembled genomes. It processes +each sample individually and aggregates the results into a single consolidated report. + +## Take + +``` +assembly: Channel +``` + +| Field | Description | +|-------|-------------| +| `meta` | Groovy Record containing sample information | +| `assembly` | Set of assembled contigs in FASTA format to be analyzed for emm genes | + +``` +blastdb: Path? +``` + +| Name | Type | Description | +|------|------|-------------| +| `blastdb` | `Path?` | Optional BLAST database containing emm gene reference sequences for improved typing accuracy | + +## Emit + +### Published + +The `sample_outputs` and `run_outputs` emissions are aggregates of output files that will be published in the entry workflow. + +#### `sample_outputs` + +| Output | Description | +|--------|-------------| +| `tsv` | A tab-delimited summary of the assigned emm type and cluster | + +#### `run_outputs` + +| Output | Description | +|--------|-------------| +| `csv` | Aggregated results in CSV format | + +## Module Composition + +This subworkflow calls the following modules: + +- [emmtyper](/developers/modules/emmtyper) - *emm*-typing of *Streptococcus pyogenes* (Group A Strep) assemblies. +- [csvtk_concat](/developers/modules/csvtk_concat) - Concatenate multiple CSV or TSV files into a single table. + +## Used By + +This subworkflow is used by the following workflows: + +- [emmtyper](/bactopia-tools/emmtyper) - emm-typing of Streptococcus pyogenes assemblies. + +## Citations + +If you use this in your analysis, please cite the following. + +- [Bactopia](https://bactopia.github.io/) + Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) + +- [emmtyper](https://github.com/MDU-PHL/emmtyper) + Tan A, Seemann T, Lacey D, Davies M, Mcintyre L, Frost H, Williamson D, Gonçalves da Silva A [emmtyper - emm Automatic Isolate Labeller](https://github.com/MDU-PHL/emmtyper) (GitHub) + +## Source + +[View source on GitHub](https://github.com/bactopia/bactopia/tree/main/subworkflows/emmtyper) diff --git a/developers/subworkflows/fastani.mdx b/developers/subworkflows/fastani.mdx new file mode 100644 index 00000000..3e115627 --- /dev/null +++ b/developers/subworkflows/fastani.mdx @@ -0,0 +1,84 @@ +--- +title: fastani +description: "Calculate Average Nucleotide Identity (ANI) between genomes." +tags: + - ani + - average-nucleotide-identity + - taxonomy + - species + - comparison + - run-scope +--- + +# fastani + +**Tags:** ani average-nucleotide-identity taxonomy species comparison run-scope + +Calculate Average Nucleotide Identity (ANI) between genomes. + +This subworkflow uses [FastANI](https://github.com/ParBLiSS/FastANI) to compute +whole-genome Average Nucleotide Identity (ANI) values between query genomes +and reference genomes. ANI is a robust measure of genomic similarity used for +species delineation in microbial taxonomy. The results are aggregated into +a single consolidated report. + +## Take + +``` +query: Channel +reference: Channel +``` + +| Field | Description | +|-------|-------------| +| `meta` | Groovy Record containing sample information | +| `fna` | Query genomes in FASTA format for ANI calculation | + +| Field | Description | +|-------|-------------| +| `meta` | Groovy Record containing sample information | +| `fna` | Reference genomes in FASTA format for ANI calculation | + +## Emit + +### Published + +The `sample_outputs` and `run_outputs` emissions are aggregates of output files that will be published in the entry workflow. + +#### `sample_outputs` + +No sample-scope outputs. + +#### `run_outputs` + +| Output | Description | +|--------|-------------| +| `tsv` | A tab-delimited summary of the ANI scores, matched fragments, and total fragments | +| `csv` | Aggregated results in CSV format | + +## Module Composition + +This subworkflow calls the following modules: + +- [csvtk_concat](/developers/modules/csvtk_concat) - Concatenate multiple CSV or TSV files into a single table. +- [fastani](/developers/modules/fastani) - Compute whole-genome Average Nucleotide Identity (ANI). + +## Used By + +This subworkflow is used by the following workflows: + +- [fastani](/bactopia-tools/fastani) - Fast alignment-free computation of whole-genome Average Nucleotide Identity. + +## Citations + +If you use this in your analysis, please cite the following. + +- [Bactopia](https://bactopia.github.io/) + Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) + +- [FastANI](https://github.com/ParBLiSS/FastANI) + Jain C, Rodriguez-R LM, Phillippy AM, Konstantinidis KT, Aluru S [High throughput ANI analysis of 90K prokaryotic genomes reveals clear species boundaries.](http://dx.doi.org/10.1038/s41467-018-07641-9) _Nat. Commun._ 9, 5114 (2018) + +## Source + +[View source on GitHub](https://github.com/bactopia/bactopia/tree/main/subworkflows/fastani) diff --git a/developers/subworkflows/gamma.mdx b/developers/subworkflows/gamma.mdx new file mode 100644 index 00000000..0e65963c --- /dev/null +++ b/developers/subworkflows/gamma.mdx @@ -0,0 +1,89 @@ +--- +title: gamma +description: "Gene Allele Mutation Microbial Assessment." +tags: + - gene + - allele + - mutation + - variant + - antimicrobial-resistance + - sample-scope +--- + +# gamma + +**Tags:** gene allele mutation variant antimicrobial-resistance sample-scope + +Gene Allele Mutation Microbial Assessment. + +This subworkflow performs rapid identification, classification, and annotation of +translated gene matches from sequencing data using [GAMMA](https://github.com/rastanton/GAMMA). +The tool screens input sequences against a protein database to identify gene +variants, mutations, and allele types, providing detailed annotation and classification. + +## Take + +``` +assembly: Channel +``` + +| Field | Description | +|-------|-------------| +| `meta` | Groovy Record containing sample information | +| `assembly` | Assembly files in FASTA format for gene allele identification | + +``` +db: Path +``` + +| Name | Type | Description | +|------|------|-------------| +| `db` | `Path` | Protein database file for sequence comparison (required) | + +## Emit + +### Published + +The `sample_outputs` and `run_outputs` emissions are aggregates of output files that will be published in the entry workflow. + +#### `sample_outputs` + +| Output | Description | +|--------|-------------| +| `gamma` | Main GAMMA output file containing annotated gene matches | +| `psl` | Raw alignment details in PSL format | +| `gff` | Gene matches in GFF3 format | +| `fasta` | Extracted nucleotide sequences of the matched genes | + +#### `run_outputs` + +| Output | Description | +|--------|-------------| +| `csv` | Aggregated results in CSV format | + +## Module Composition + +This subworkflow calls the following modules: + +- [csvtk_concat](/developers/modules/csvtk_concat) - Concatenate multiple CSV or TSV files into a single table. +- [gamma](/developers/modules/gamma) - Identification, classification, and annotation of translated gene matches. + +## Used By + +This subworkflow is used by the following workflows: + +- [gamma](/bactopia-tools/gamma) - Identification, classification, and annotation of translated gene matches. + +## Citations + +If you use this in your analysis, please cite the following. + +- [Bactopia](https://bactopia.github.io/) + Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) + +- [GAMMA](https://github.com/rastanton/GAMMA) + Stanton RA, Vlachos N, Halpin AL [GAMMA: a tool for the rapid identification, classification, and annotation of translated gene matches from sequencing data.](https://doi.org/10.1093/bioinformatics/btab607) _Bioinformatics_ (2021) + +## Source + +[View source on GitHub](https://github.com/bactopia/bactopia/tree/main/subworkflows/gamma) diff --git a/developers/subworkflows/genotyphi.mdx b/developers/subworkflows/genotyphi.mdx new file mode 100644 index 00000000..5968630a --- /dev/null +++ b/developers/subworkflows/genotyphi.mdx @@ -0,0 +1,88 @@ +--- +title: genotyphi +description: "Assign genotypes to Salmonella Typhi genomes." +tags: + - salmonella + - typhi + - genotype + - antimicrobial-resistance + - sample-scope +--- + +# genotyphi + +**Tags:** salmonella typhi genotype antimicrobial-resistance sample-scope + +Assign genotypes to Salmonella Typhi genomes. + +This subworkflow assigns genotypes to Salmonella Typhi genomes based on Mykrobe +results using [GenoTyphi](https://github.com/katholt/genotyphi). The workflow first +runs Mykrobe for antimicrobial resistance prediction on sequencing reads, then +processes the results with GenoTyphi to assign specific genotypes based on +the presence of known genetic markers. + +Uses explicit positional record fields for reads: +- Input: record(meta, r1, r2, se, lr) where each read slot is Path? + +## Take + +``` +reads: Channel +``` + +| Field | Description | +|-------|-------------| +| `meta` | Groovy Record containing sample information | +| `r1` | Illumina R1 reads (paired-end) | +| `r2` | Illumina R2 reads (paired-end) | +| `se` | Single-end Illumina reads | +| `lr` | Long reads (ONT/PacBio) | + +## Emit + +### Published + +The `sample_outputs` and `run_outputs` emissions are aggregates of output files that will be published in the entry workflow. + +#### `sample_outputs` + +| Output | Description | +|--------|-------------| +| `tsv` | A tab-delimited report containing the assigned GenoTyphi genotype | + +#### `run_outputs` + +| Output | Description | +|--------|-------------| +| `csv` | Aggregated results in CSV format | + +## Module Composition + +This subworkflow calls the following modules: + +- [mykrobe_predict](/developers/modules/mykrobe_predict) - Predict Antimicrobial Resistance (AMR) for supported bacterial species. +- [genotyphi_parse](/developers/modules/genotyphi_parse) - Parse Mykrobe results to genotype *Salmonella* Typhi. +- [csvtk_concat](/developers/modules/csvtk_concat) - Concatenate multiple CSV or TSV files into a single table. + +## Used By + +This subworkflow is used by the following workflows: + +- [genotyphi](/bactopia-tools/genotyphi) - Salmonella Typhi genotyping with lineage assignment. + +## Citations + +If you use this in your analysis, please cite the following. + +- [Bactopia](https://bactopia.github.io/) + Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) + +- [GenoTyphi](https://github.com/katholt/genotyphi) + Wong VK, Baker S, Connor TR, Pickard D, Page AJ, Dave J, Murphy N, Holliman R, Sefton A, Millar M, Dyson ZA, Dougan G, Holt KE, & International Typhoid Consortium. [An extended genotyping framework for Salmonella enterica serovar Typhi, the cause of human typhoid](https://doi.org/10.1038/ncomms12827) _Nature Communications_ 7, 12827. (2016) + +- [Mykrobe](https://github.com/Mykrobe-tools/mykrobe) + Hunt M, Bradley P, Lapierre SG, Heys S, Thomsit M, Hall MB, Malone KM, Wintringer P, Walker TM, Cirillo DM, Comas I, Farhat MR, Fowler P, Gardy J, Ismail N, Kohl TA, Mathys V, Merker M, Niemann S, Omar SV, Sintchenko V, Smith G, Supply P, Tahseen S, Wilcox M, Arandjelovic I, Peto TEA, Crook, DW, Iqbal Z [Antibiotic resistance prediction for Mycobacterium tuberculosis from genome sequence data with Mykrobe](https://doi.org/10.12688/wellcomeopenres.15603.1) _Wellcome Open Research_ 4, 191. (2019) + +## Source + +[View source on GitHub](https://github.com/bactopia/bactopia/tree/main/subworkflows/genotyphi) diff --git a/developers/subworkflows/gigatyper.mdx b/developers/subworkflows/gigatyper.mdx new file mode 100644 index 00000000..8f9dda5b --- /dev/null +++ b/developers/subworkflows/gigatyper.mdx @@ -0,0 +1,75 @@ +--- +title: gigatyper +description: "Run all available MLST schemes for a species against an assembly" +tags: + - mlst + - typing + - multi-scheme + - sample-scope +--- + +# gigatyper + +**Tags:** mlst typing multi-scheme sample-scope + +Run all available MLST schemes for a species against an assembly + +This subworkflow uses [GigaTyper](https://github.com/rpetit3/gigatyper) to run all available mlst schemes for a species against an assembly. +It processes each sample individually and aggregates the results into +a single consolidated report. + +## Take + +``` +fna: Channel +``` + +| Field | Description | +|-------|-------------| +| `meta` | Groovy Record containing sample information | +| `fna` | Assembled contigs in FASTA format | + +## Emit + +### Published + +The `sample_outputs` and `run_outputs` emissions are aggregates of output files that will be published in the entry workflow. + +#### `sample_outputs` + +| Output | Description | +|--------|-------------| +| `tsv` | MLST results across all schemes | + +#### `run_outputs` + +| Output | Description | +|--------|-------------| +| `csv` | A merged TSV file with gigatyper results from all samples | + +## Module Composition + +This subworkflow calls the following modules: + +- [csvtk_concat](/developers/modules/csvtk_concat) - Concatenate multiple CSV or TSV files into a single table. +- [gigatyper](/developers/modules/gigatyper) - Run all available MLST schemes for a species against an assembly + +## Used By + +This subworkflow is used by the following workflows: + +- [gigatyper](/bactopia-tools/gigatyper) - Run all available MLST schemes for a species against an assembly + +## Citations + +If you use this in your analysis, please cite the following. + +- [Bactopia](https://bactopia.github.io/) + Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) + +- [GigaTyper](https://github.com/rpetit3/gigatyper) + Petit III RA, Fearing T, Groves E [GigaTyper: Why choose one scheme when you can flex them all?](https://github.com/rpetit3/gigatyper) (GitHub) + +## Source + +[View source on GitHub](https://github.com/bactopia/bactopia/tree/main/subworkflows/gigatyper) diff --git a/developers/subworkflows/gtdb.mdx b/developers/subworkflows/gtdb.mdx new file mode 100644 index 00000000..021d11c3 --- /dev/null +++ b/developers/subworkflows/gtdb.mdx @@ -0,0 +1,94 @@ +--- +title: gtdb +description: "Taxonomic classification with the Genome Taxonomy Database." +tags: + - taxonomy + - classification + - gtdb + - phylogeny + - marker-genes + - sample-scope +--- + +# gtdb + +**Tags:** taxonomy classification gtdb phylogeny marker-genes sample-scope + +Taxonomic classification with the Genome Taxonomy Database. + +This subworkflow assigns objective taxonomic classifications to bacterial and +archaeal genomes using [GTDB-Tk](https://github.com/Ecogenomics/GTDBTk), which +is based on the Genome Taxonomy Database (GTDB). The workflow can optionally +download the GTDB database and supports both unpacked and tarball database formats. +It provides taxonomic placement and phylogenetic marker gene identification. + +## Take + +``` +assembly: Channel +``` + +| Field | Description | +|-------|-------------| +| `meta` | Groovy Record containing sample information | +| `assembly` | Assembly files in FASTA format for taxonomic classification | + +``` +database: Path +download_gtdb: Boolean +save_as_tarball: Boolean +``` + +| Name | Type | Description | +|------|------|-------------| +| `database` | `Path` | Path to GTDB reference database, or path to download to if `download_gtdb` is true | +| `download_gtdb` | `Boolean` | Boolean flag to trigger GTDB database download if not provided | +| `save_as_tarball` | `Boolean` | Boolean flag to use tarball format database when downloading | + +## Emit + +### Published + +The `sample_outputs` and `run_outputs` emissions are aggregates of output files that will be published in the entry workflow. + +#### `sample_outputs` + +| Output | Description | +|--------|-------------| +| `bac_tsv` | The bacterial classification summary file containing the taxonomic assignment | +| `ar_tsv` | The archaeal classification summary file containing the taxonomic assignment | +| `supplemental` | Directory containing the reference tree, alignments, and detailed logs | + +#### `run_outputs` + +| Output | Description | +|--------|-------------| +| `csv` | Aggregated results in CSV format | + +## Module Composition + +This subworkflow calls the following modules: + +- [csvtk_concat](/developers/modules/csvtk_concat) - Concatenate multiple CSV or TSV files into a single table. +- [gtdbtk_classifywf](/developers/modules/gtdbtk_classifywf) - Taxonomic classification of bacterial and archaeal genomes using GTDB-Tk. +- [gtdbtk_download](/developers/modules/gtdbtk_download) - Download and configure the GTDB-Tk reference database. + +## Used By + +This subworkflow is used by the following workflows: + +- [gtdb](/bactopia-tools/gtdb) - Identify marker genes and assign taxonomic classifications using GTDB. + +## Citations + +If you use this in your analysis, please cite the following. + +- [Bactopia](https://bactopia.github.io/) + Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) + +- [GTDB-Tk](https://github.com/Ecogenomics/GTDBTk) + Chaumeil PA, Mussig AJ, Hugenholtz P, Parks DH [GTDB-Tk: a toolkit to classify genomes with the Genome Taxonomy Database.](https://doi.org/10.1093/bioinformatics/btz848) _Bioinformatics_ (2019) + +## Source + +[View source on GitHub](https://github.com/bactopia/bactopia/tree/main/subworkflows/gtdb) diff --git a/developers/subworkflows/gubbins.mdx b/developers/subworkflows/gubbins.mdx new file mode 100644 index 00000000..76cabc54 --- /dev/null +++ b/developers/subworkflows/gubbins.mdx @@ -0,0 +1,107 @@ +--- +title: gubbins +description: "Detect and filter recombination regions in bacterial alignments." +tags: + - recombination + - phylogeny + - filter + - snp + - core-genome + - run-scope +--- + +# gubbins + +**Tags:** recombination phylogeny filter snp core-genome run-scope + +Detect and filter recombination regions in bacterial alignments. + +This subworkflow uses [Gubbins](https://github.com/nickjcroucher/gubbins) (Globally +Optimised Bacterial Phylogenomic analysis) to identify recombination regions in +bacterial core-genome alignments. It iteratively filters out recombination to +produce a recombination-free phylogeny, then calculates SNP distances from the +masked alignment. Gubbins is essential for accurate phylogenetic reconstruction +of recombining bacterial species. + +## Take + +``` +alignment: Channel +``` + +| Field | Description | +|-------|-------------| +| `meta` | Groovy Record containing sample information | +| `aln` | Multiple sequence alignment in FASTA format | + +## Emit + +### Published + +The `sample_outputs` and `run_outputs` emissions are aggregates of output files that will be published in the entry workflow. + +#### `sample_outputs` + +No sample-scope outputs. + +#### `run_outputs` + +| Output | Description | +|--------|-------------| +| `masked_aln` | Recombination-masked alignment in FASTA format | +| `fasta` | Concatenated alignment before masking in FASTA format | +| `gff` | GFF file containing recombination region coordinates | +| `vcf` | VCF file containing SNPs filtered by Gubbins | +| `stats` | Summary statistics of the Gubbins analysis | +| `phylip` | Recombination-masked alignment in PHYLIP format | +| `embl_predicted` | Recombination predictions in EMBL format | +| `embl_branch` | Branch-specific recombination in EMBL format | +| `tree` | Maximum likelihood tree from filtered SNPs in Newick format | +| `tree_labelled` | Annotated tree with node labels in Newick format | +| `bootstrap_tree` | Bootstrapped phylogenetic tree in Newick format | +| `tsv` | Pairwise SNP distances from masked alignment in TSV format | + +### Downstream Inputs + +The following emissions are meant to be used as inputs to downstream subworkflows. + +#### `alignment` + +| Output | Description | +|--------|-------------| +| `aln` | Recombination-masked alignment for downstream phylogenetic analysis | + +## Subworkflow Composition + +This subworkflow calls the following subworkflows: + +- [snpdists](/developers/subworkflows/snpdists) - Calculate pairwise SNP distances from sequence alignments. + +## Module Composition + +This subworkflow calls the following modules: + +- [gubbins](/developers/modules/gubbins) - Detect recombination and construct a recombination-free phylogeny. + +## Used By + +This subworkflow is used by the following workflows: + +- [snippy](/bactopia-tools/snippy) - Rapid haplotype variant calling and core genome alignment. + +## Citations + +If you use this in your analysis, please cite the following. + +- [Bactopia](https://bactopia.github.io/) + Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) + +- [Gubbins](https://github.com/nickjcroucher/gubbins) + Croucher NJ, Page AJ, Connor TR, Delaney AJ, Keane JA, Bentley SD, Parkhill J, Harris SR [Rapid phylogenetic analysis of large samples of recombinant bacterial whole genome sequences using Gubbins.](https://doi.org/10.1093/nar/gku1196) _Nucleic Acids Research_ 43(3), e15. (2015) + +- [snp-dists](https://github.com/tseemann/snp-dists) + Seemann T [snp-dists - Pairwise SNP distance matrix from a FASTA sequence alignment.](https://github.com/tseemann/snp-dists) (GitHub) + +## Source + +[View source on GitHub](https://github.com/bactopia/bactopia/tree/main/subworkflows/gubbins) diff --git a/developers/subworkflows/hicap.mdx b/developers/subworkflows/hicap.mdx new file mode 100644 index 00000000..5063eb48 --- /dev/null +++ b/developers/subworkflows/hicap.mdx @@ -0,0 +1,90 @@ +--- +title: hicap +description: "In silico serotyping of the Haemophilus influenzae capsule locus." +tags: + - haemophilus + - influenzae + - serotype + - capsule + - cap-locus + - sample-scope +--- + +# hicap + +**Tags:** haemophilus influenzae serotype capsule cap-locus sample-scope + +In silico serotyping of the Haemophilus influenzae capsule locus. + +This subworkflow performs serotyping of Haemophilus influenzae by analyzing +the capsule locus using [hicap](https://github.com/scwatts/hicap). The tool +identifies capsule type and produces detailed annotations of the cap locus, +including graphical representation in SVG format and annotation in GenBank format. + +## Take + +``` +assembly: Channel +``` + +| Field | Description | +|-------|-------------| +| `meta` | Groovy Record containing sample information | +| `assembly` | Assembly files in FASTA format for H. influenzae serotype prediction | + +``` +database_dir: Path? +model_fp: Path? +``` + +| Name | Type | Description | +|------|------|-------------| +| `database_dir` | `Path?` | Directory containing hicap reference database files (optional) | +| `model_fp` | `Path?` | Path to HMM model file for improved detection (optional) | + +## Emit + +### Published + +The `sample_outputs` and `run_outputs` emissions are aggregates of output files that will be published in the entry workflow. + +#### `sample_outputs` + +| Output | Description | +|--------|-------------| +| `gbff` | GenBank file containing the annotated capsule locus region (optional) | +| `svg` | SVG visualization of the capsule locus gene arrangement (optional) | +| `tsv` | Tab-delimited summary of the predicted serotype and locus coverage | + +#### `run_outputs` + +| Output | Description | +|--------|-------------| +| `csv` | Aggregated results in CSV format | + +## Module Composition + +This subworkflow calls the following modules: + +- [csvtk_concat](/developers/modules/csvtk_concat) - Concatenate multiple CSV or TSV files into a single table. +- [hicap](/developers/modules/hicap) - Predict *Haemophilus influenzae* capsule serotype. + +## Used By + +This subworkflow is used by the following workflows: + +- [hicap](/bactopia-tools/hicap) - Identify cap locus serotype and structure in Haemophilus influenzae assemblies. + +## Citations + +If you use this in your analysis, please cite the following. + +- [Bactopia](https://bactopia.github.io/) + Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) + +- [hicap](https://github.com/scwatts/hicap) + Watts SC, Holt KE [hicap: in silico serotyping of the Haemophilus influenzae capsule locus.](https://doi.org/10.1128/JCM.00190-19) _Journal of Clinical Microbiology_ JCM.00190-19 (2019) + +## Source + +[View source on GitHub](https://github.com/bactopia/bactopia/tree/main/subworkflows/hicap) diff --git a/developers/subworkflows/hpsuissero.mdx b/developers/subworkflows/hpsuissero.mdx new file mode 100644 index 00000000..084da701 --- /dev/null +++ b/developers/subworkflows/hpsuissero.mdx @@ -0,0 +1,78 @@ +--- +title: hpsuissero +description: "Rapid Haemophilus parasuis serotyping." +tags: + - haemophilus + - parasuis + - serotype + - epidemiology + - sample-scope +--- + +# hpsuissero + +**Tags:** haemophilus parasuis serotype epidemiology sample-scope + +Rapid Haemophilus parasuis serotyping. + +This subworkflow performs serotyping of Haemophilus parasuis using +[HpsuisSero](https://github.com/jimmyliu1326/HpsuisSero), which identifies +serotype-specific markers in genome assemblies. The tool provides rapid +classification of H. parasuis isolates into their respective serotypes, +which is important for epidemiological surveillance and vaccine development. + +## Take + +``` +assembly: Channel +``` + +| Field | Description | +|-------|-------------| +| `meta` | Groovy Record containing sample information | +| `assembly` | Assembly files in FASTA format for H. parasuis serotype prediction | + +## Emit + +### Published + +The `sample_outputs` and `run_outputs` emissions are aggregates of output files that will be published in the entry workflow. + +#### `sample_outputs` + +| Output | Description | +|--------|-------------| +| `tsv` | Tab-delimited HpsuisSero serotype prediction results | + +#### `run_outputs` + +| Output | Description | +|--------|-------------| +| `csv` | Aggregated results in CSV format | + +## Module Composition + +This subworkflow calls the following modules: + +- [csvtk_concat](/developers/modules/csvtk_concat) - Concatenate multiple CSV or TSV files into a single table. +- [hpsuissero](/developers/modules/hpsuissero) - Predict *Haemophilus parasuis* serotype. + +## Used By + +This subworkflow is used by the following workflows: + +- [hpsuissero](/bactopia-tools/hpsuissero) - Serotype prediction of Haemophilus parasuis assemblies. + +## Citations + +If you use this in your analysis, please cite the following. + +- [Bactopia](https://bactopia.github.io/) + Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) + +- [HpsuisSero](https://github.com/jimmyliu1326/HpsuisSero) + Lui J [HpsuisSero: Rapid _Haemophilus parasuis_ serotyping](https://github.com/jimmyliu1326/HpsuisSero) (GitHub) + +## Source + +[View source on GitHub](https://github.com/bactopia/bactopia/tree/main/subworkflows/hpsuissero) diff --git a/developers/subworkflows/index.mdx b/developers/subworkflows/index.mdx new file mode 100644 index 00000000..b9292d6a --- /dev/null +++ b/developers/subworkflows/index.mdx @@ -0,0 +1,99 @@ +--- +title: Subworkflows +description: All available Bactopia subworkflows +slug: /subworkflows +--- + +# Subworkflows + +Bactopia includes 87 subworkflows that orchestrate modules into reusable analysis units. You can also [browse by tag](/developers/tags). + +| Subworkflow | Description | +|-------------|-------------| +| [abricate](/developers/subworkflows/abricate) | Mass screening of contigs for antimicrobial and virulence genes. | +| [abritamr](/developers/subworkflows/abritamr) | Identify antimicrobial resistance genes using AMRFinderPlus. | +| [agrvate](/developers/subworkflows/agrvate) | Identify Staphylococcus aureus agr locus type and operon variants. | +| [amrfinderplus](/developers/subworkflows/amrfinderplus) | Find antimicrobial resistance genes and point mutations. | +| [ariba](/developers/subworkflows/ariba) | Rapidly identify genes by creating local assemblies from paired-end reads. | +| [bactopia_assembler](/developers/subworkflows/bactopia_assembler) | Assemble bacterial genomes using automated assembler selection. | +| [bactopia_datasets](/developers/subworkflows/bactopia_datasets) | Download and provide pre-compiled datasets required by Bactopia. | +| [bactopia_gather](/developers/subworkflows/bactopia_gather) | Search, validate, gather, and standardize input samples. | +| [bactopia_qc](/developers/subworkflows/bactopia_qc) | Perform comprehensive quality control on sequencing reads. | +| [bactopia_sketcher](/developers/subworkflows/bactopia_sketcher) | Create genomic sketches and perform rapid taxonomic classification. | +| [bakta](/developers/subworkflows/bakta) | Rapid bacterial genome annotation. | +| [blastn](/developers/subworkflows/blastn) | Search a nucleotide database using nucleotide query sequences. | +| [blastp](/developers/subworkflows/blastp) | Search protein sequences against protein database. | +| [blastx](/developers/subworkflows/blastx) | Translate nucleotide sequences and search protein database. | +| [bracken](/developers/subworkflows/bracken) | Estimate species abundance from metagenomic reads. | +| [btyper3](/developers/subworkflows/btyper3) | In silico taxonomic classification of Bacillus cereus group genomes. | +| [busco](/developers/subworkflows/busco) | Assess genome assembly completeness using BUSCO. | +| [checkm](/developers/subworkflows/checkm) | Assess metagenome bin completeness using CheckM. | +| [checkm2](/developers/subworkflows/checkm2) | Assess metagenome bin completeness using CheckM2. | +| [clermontyping](/developers/subworkflows/clermontyping) | Predict phylogroups of Escherichia coli from genome assemblies. | +| [clonalframeml](/developers/subworkflows/clonalframeml) | Detect and mask recombination events in bacterial phylogenies. | +| [defensefinder](/developers/subworkflows/defensefinder) | Systematically search for anti-phage defense systems. | +| [ectyper](/developers/subworkflows/ectyper) | In silico prediction of Escherichia coli serotype. | +| [eggnog](/developers/subworkflows/eggnog) | Functional annotation through orthology assignment. | +| [emmtyper](/developers/subworkflows/emmtyper) | Predict emm types of Streptococcus pyogenes from genome assemblies. | +| [fastani](/developers/subworkflows/fastani) | Calculate Average Nucleotide Identity (ANI) between genomes. | +| [gamma](/developers/subworkflows/gamma) | Gene Allele Mutation Microbial Assessment. | +| [genotyphi](/developers/subworkflows/genotyphi) | Assign genotypes to Salmonella Typhi genomes. | +| [gigatyper](/developers/subworkflows/gigatyper) | Run all available MLST schemes for a species against an assembly | +| [gtdb](/developers/subworkflows/gtdb) | Taxonomic classification with the Genome Taxonomy Database. | +| [gubbins](/developers/subworkflows/gubbins) | Detect and filter recombination regions in bacterial alignments. | +| [hicap](/developers/subworkflows/hicap) | In silico serotyping of the Haemophilus influenzae capsule locus. | +| [hpsuissero](/developers/subworkflows/hpsuissero) | Rapid Haemophilus parasuis serotyping. | +| [iqtree](/developers/subworkflows/iqtree) | Construct maximum likelihood phylogenetic trees from alignments. | +| [ismapper](/developers/subworkflows/ismapper) | Identify transposase insertion sites in bacterial genomes. | +| [kleborate](/developers/subworkflows/kleborate) | Genotyping tool for Klebsiella pneumoniae and its related species complex. | +| [kraken2](/developers/subworkflows/kraken2) | Classify metagenomic reads using Kraken2. | +| [legsta](/developers/subworkflows/legsta) | In silico Legionella pneumophila Sequence Based Typing. | +| [lissero](/developers/subworkflows/lissero) | In silico serotype prediction for Listeria monocytogenes. | +| [mashdist](/developers/subworkflows/mashdist) | Calculate Mash distances between sequences and a reference. | +| [mashtree](/developers/subworkflows/mashtree) | Create phylogenetic trees using Mash distances. | +| [mcroni](/developers/subworkflows/mcroni) | Scripts for finding and processing promoter variants upstream of mcr-1. | +| [meningotype](/developers/subworkflows/meningotype) | Predict serotypes of Neisseria meningitidis from genome assemblies. | +| [merlin](/developers/subworkflows/merlin) | MinER assisted species-specific bactopia tool seLectIoN. | +| [merlindist](/developers/subworkflows/merlindist) | Identify species from assembly and read data using Mash distances. | +| [midas](/developers/subworkflows/midas) | Species-level profiling from metagenomic data. | +| [mlst](/developers/subworkflows/mlst) | Determine multilocus sequence types (MLST) from bacterial assemblies. | +| [mobsuite](/developers/subworkflows/mobsuite) | Reconstruct and type plasmids from bacterial genome assemblies. | +| [mykrobe](/developers/subworkflows/mykrobe) | Predict antibiotic resistance from sequence reads. | +| [ncbigenomedownload](/developers/subworkflows/ncbigenomedownload) | Download bacterial genomes from NCBI's RefSeq database. | +| [ngmaster](/developers/subworkflows/ngmaster) | Perform multi-antigen sequence typing of Neisseria gonorrhoeae from genome assemblies. | +| [nohuman](/developers/subworkflows/nohuman) | Remove human reads from sequencing data using nohuman. | +| [panaroo](/developers/subworkflows/panaroo) | Build a pangenome from GFF3 annotations using Panaroo. | +| [pangenome](/developers/subworkflows/pangenome) | Perform pangenome analysis with optional core-genome phylogeny. | +| [pasty](/developers/subworkflows/pasty) | Predict serogroups of Pseudomonas aeruginosa from assemblies. | +| [pbptyper](/developers/subworkflows/pbptyper) | Predict penicillin binding protein (PBP) types of Streptococcus pneumoniae from genome assemblies. | +| [phispy](/developers/subworkflows/phispy) | Prediction of prophages from bacterial genomes. | +| [pirate](/developers/subworkflows/pirate) | Build a pangenome from GFF3 annotations using PIRATE. | +| [plasmidfinder](/developers/subworkflows/plasmidfinder) | Identify plasmid replicons in bacterial genome assemblies. | +| [pneumocat](/developers/subworkflows/pneumocat) | Perform capsular typing of Streptococcus pneumoniae from NGS data. | +| [prokka](/developers/subworkflows/prokka) | Annotate bacterial genomes with functional information. | +| [quast](/developers/subworkflows/quast) | Evaluate assembly quality using QUAST. | +| [rgi](/developers/subworkflows/rgi) | Predict antimicrobial resistance from protein or nucleotide data. | +| [roary](/developers/subworkflows/roary) | Build a pangenome from GFF3 annotations using Roary. | +| [sccmec](/developers/subworkflows/sccmec) | Identify SCCmec elements in Staphylococcus aureus genomes. | +| [scoary](/developers/subworkflows/scoary) | Pan-genome wide association studies. | +| [scrubber](/developers/subworkflows/scrubber) | Remove contaminant sequences from metagenomic data. | +| [seqsero2](/developers/subworkflows/seqsero2) | Predict Salmonella serotypes from genome assemblies. | +| [seroba](/developers/subworkflows/seroba) | k-mer based pipeline to identify the serotype of Streptococcus pneumoniae. | +| [shigapass](/developers/subworkflows/shigapass) | Predict serotypes of Shigella from assemblies. | +| [shigatyper](/developers/subworkflows/shigatyper) | Predict serotypes of Shigella from reads or assemblies. | +| [shigeifinder](/developers/subworkflows/shigeifinder) | Predict serotypes of Shigella and EIEC from assemblies. | +| [sistr](/developers/subworkflows/sistr) | Salmonella In Silico Typing Resource command-line tool. | +| [snippy_core](/developers/subworkflows/snippy_core) | Generate core-genome SNP alignment from per-sample Snippy outputs. | +| [snippy_run](/developers/subworkflows/snippy_run) | Call variants against a reference genome using Snippy. | +| [snpdists](/developers/subworkflows/snpdists) | Calculate pairwise SNP distances from sequence alignments. | +| [spatyper](/developers/subworkflows/spatyper) | Predict spa types of Staphylococcus aureus from genome assemblies. | +| [srahumanscrubber](/developers/subworkflows/srahumanscrubber) | Remove human contamination from sequencing reads for SRA submission. | +| [ssuissero](/developers/subworkflows/ssuissero) | Predict serotypes of Streptococcus suis from genome assemblies. | +| [staphopiasccmec](/developers/subworkflows/staphopiasccmec) | Identify SCCmec elements in Staphylococcus aureus genomes using Staphopia method. | +| [staphtyper](/developers/subworkflows/staphtyper) | Determine the agr, spa and SCCmec types for _Staphylococcus aureus_ genomes. | +| [stecfinder](/developers/subworkflows/stecfinder) | Identify and serotype Shiga toxin-producing E. coli (STEC) from assemblies. | +| [sylph](/developers/subworkflows/sylph) | Profile microbial composition using Sylph. | +| [tblastn](/developers/subworkflows/tblastn) | Search protein query sequences against nucleotide database. | +| [tblastx](/developers/subworkflows/tblastx) | Translate nucleotide query sequences and search nucleotide database. | +| [tbprofiler](/developers/subworkflows/tbprofiler) | Profiling tool for Mycobacterium tuberculosis to detect resistance and strain type. | +| [teton](/developers/subworkflows/teton) | Perform taxonomic classification and estimate bacterial genome sizes. | diff --git a/developers/subworkflows/iqtree.mdx b/developers/subworkflows/iqtree.mdx new file mode 100644 index 00000000..091d356b --- /dev/null +++ b/developers/subworkflows/iqtree.mdx @@ -0,0 +1,80 @@ +--- +title: iqtree +description: "Construct maximum likelihood phylogenetic trees from alignments." +tags: + - phylogeny + - maximum-likelihood + - tree + - bootstrap + - model-selection + - run-scope +--- + +# iqtree + +**Tags:** phylogeny maximum-likelihood tree bootstrap model-selection run-scope + +Construct maximum likelihood phylogenetic trees from alignments. + +This subworkflow uses [IQ-TREE](https://github.com/Cibiv/IQ-TREE) to build +maximum likelihood phylogenetic trees from multiple sequence alignments. IQ-TREE +implements fast and effective stochastic algorithms for phylogenetic inference, +including automatic model selection via ModelFinder. It produces phylogenetic trees +with bootstrap support and various supplementary files for tree visualization +and analysis. + +## Take + +``` +alignment: Channel +``` + +| Field | Description | +|-------|-------------| +| `meta` | Groovy Record containing sample information | +| `aln` | Multiple sequence alignment in FASTA format | + +## Emit + +### Published + +The `sample_outputs` and `run_outputs` emissions are aggregates of output files that will be published in the entry workflow. + +#### `sample_outputs` + +No sample-scope outputs. + +#### `run_outputs` + +| Output | Description | +|--------|-------------| +| `aln` | Input multiple sequence alignment (passed through) | +| `nwk` | Maximum-likelihood phylogenetic tree in Newick format | +| `supplemental` | Detailed report, distance matrix, and model parameters | + +## Module Composition + +This subworkflow calls the following modules: + +- [iqtree](/developers/modules/iqtree) - Efficient phylogenomic inference using Maximum Likelihood. + +## Used By + +This subworkflow is used by the following workflows: + +- [pangenome](/bactopia-tools/pangenome) - Pangenome analysis with optional core-genome phylogeny. +- [snippy](/bactopia-tools/snippy) - Rapid haplotype variant calling and core genome alignment. + +## Citations + +If you use this in your analysis, please cite the following. + +- [Bactopia](https://bactopia.github.io/) + Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) + +- [IQ-TREE](https://github.com/Cibiv/IQ-TREE) + Nguyen L-T, Schmidt HA, von Haeseler A, Minh BQ [IQ-TREE: A fast and effective stochastic algorithm for estimating maximum likelihood phylogenies.](https://doi.org/10.1093/molbev/msu300) _Mol. Biol. Evol._ 32:268-274 (2015) + +## Source + +[View source on GitHub](https://github.com/bactopia/bactopia/tree/main/subworkflows/iqtree) diff --git a/developers/subworkflows/ismapper.mdx b/developers/subworkflows/ismapper.mdx new file mode 100644 index 00000000..6f52a65b --- /dev/null +++ b/developers/subworkflows/ismapper.mdx @@ -0,0 +1,90 @@ +--- +title: ismapper +description: "Identify transposase insertion sites in bacterial genomes." +tags: + - insertion + - sequence + - transposase + - mobile-genetic-elements + - sample-scope +--- + +# ismapper + +**Tags:** insertion sequence transposase mobile-genetic-elements sample-scope + +Identify transposase insertion sites in bacterial genomes. + +This subworkflow maps insertion sequence (IS) positions in bacterial genomes +using [ISMapper](https://github.com/jhawkey/IS_mapper). The tool identifies +transposase insertion sites from short read sequence data by mapping reads +to reference sequences and detecting insertion sites with high precision. + +Uses explicit positional record fields for reads: +- Input: record(meta, r1, r2, se, lr) where each read slot is Path? + +## Take + +``` +reads: Channel +``` + +| Field | Description | +|-------|-------------| +| `meta` | Groovy Record containing sample information | +| `r1` | Illumina R1 reads (paired-end) | +| `r2` | Illumina R2 reads (paired-end) | +| `se` | Single-end Illumina reads (not supported by ISMapper) | +| `lr` | Long reads (not supported by ISMapper) | + +``` +reference: Path +insertions: Path +``` + +| Name | Type | Description | +|------|------|-------------| +| `reference` | `Path` | Reference genome in FASTA format for mapping | +| `insertions` | `Path` | Insertion sequence reference file containing IS elements to map | + +## Emit + +### Published + +The `sample_outputs` and `run_outputs` emissions are aggregates of output files that will be published in the entry workflow. + +#### `sample_outputs` + +| Output | Description | +|--------|-------------| +| `supplemental` | Directory containing the final tables of insertion sites and visual summaries | + +#### `run_outputs` + +No run-scope outputs. + +## Module Composition + +This subworkflow calls the following modules: + +- [ismapper](/developers/modules/ismapper) - Identify insertion sites and orientation of mobile genetic elements. + +## Used By + +This subworkflow is used by the following workflows: + +- [ismapper](/bactopia-tools/ismapper) - Identify insertion sequence positions in bacterial genomes. + +## Citations + +If you use this in your analysis, please cite the following. + +- [Bactopia](https://bactopia.github.io/) + Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) + +- [ISMapper](https://github.com/jhawkey/IS_mapper) + Hawkey J, Hamidian M, Wick RR, Edwards DJ, Billman-Jacobe H, Hall RM, Holt KE [ISMapper: identifying transposase insertion sites in bacterial genomes from short read sequence data](http://dx.doi.org/10.1186/s12864-015-1860-2). _BMC Genomics_ 16, 667 (2015) + +## Source + +[View source on GitHub](https://github.com/bactopia/bactopia/tree/main/subworkflows/ismapper) diff --git a/developers/subworkflows/kleborate.mdx b/developers/subworkflows/kleborate.mdx new file mode 100644 index 00000000..085f6315 --- /dev/null +++ b/developers/subworkflows/kleborate.mdx @@ -0,0 +1,79 @@ +--- +title: kleborate +description: "Genotyping tool for Klebsiella pneumoniae and its related species complex." +tags: + - klebsiella + - pneumoniae + - genotyping + - virulence + - capsule + - sample-scope +--- + +# kleborate + +**Tags:** klebsiella pneumoniae genotyping virulence capsule sample-scope + +Genotyping tool for Klebsiella pneumoniae and its related species complex. + +This subworkflow performs comprehensive genotyping of Klebsiella pneumoniae +and related species using [Kleborate](https://github.com/katholt/Kleborate). The tool +identifies capsular (K) and O-antigen (L) loci, virulence factors, and acquired +antimicrobial resistance genes, providing a detailed genotype for surveillance +and epidemiological studies. + +## Take + +``` +assembly: Channel +``` + +| Field | Description | +|-------|-------------| +| `meta` | Groovy Record containing sample information | +| `assembly` | Assembly files in FASTA format for Klebsiella genotyping | + +## Emit + +### Published + +The `sample_outputs` and `run_outputs` emissions are aggregates of output files that will be published in the entry workflow. + +#### `sample_outputs` + +| Output | Description | +|--------|-------------| +| `tsv` | Tab-delimited Kleborate results with species, MLST, virulence, and resistance predictions | + +#### `run_outputs` + +| Output | Description | +|--------|-------------| +| `csv` | Aggregated results in CSV format | + +## Module Composition + +This subworkflow calls the following modules: + +- [csvtk_concat](/developers/modules/csvtk_concat) - Concatenate multiple CSV or TSV files into a single table. +- [kleborate](/developers/modules/kleborate) - Genotyping and screening of *Klebsiella* genome assemblies. + +## Used By + +This subworkflow is used by the following workflows: + +- [kleborate](/bactopia-tools/kleborate) - Comprehensive screening of Klebsiella genomes for virulence and resistance determinants. + +## Citations + +If you use this in your analysis, please cite the following. + +- [Bactopia](https://bactopia.github.io/) + Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) + +- [Kleborate](https://github.com/katholt/Kleborate) + Lam MMC, Wick RR, Watts, SC, Cerdeira LT, Wyres KL, Holt KE [A genomic surveillance framework and genotyping tool for Klebsiella pneumoniae and its related species complex.](https://doi.org/10.1038/s41467-021-24448-3) _Nat Commun_ 12, 4188 (2021) + +## Source + +[View source on GitHub](https://github.com/bactopia/bactopia/tree/main/subworkflows/kleborate) diff --git a/developers/subworkflows/kraken2.mdx b/developers/subworkflows/kraken2.mdx new file mode 100644 index 00000000..f451b074 --- /dev/null +++ b/developers/subworkflows/kraken2.mdx @@ -0,0 +1,90 @@ +--- +title: kraken2 +description: "Classify metagenomic reads using Kraken2." +tags: + - metagenomics + - taxonomic-classification + - kraken2 + - k-mer + - sample-scope +--- + +# kraken2 + +**Tags:** metagenomics taxonomic-classification kraken2 k-mer sample-scope + +Classify metagenomic reads using Kraken2. + +This subworkflow performs taxonomic classification of metagenomic reads using [Kraken2](https://github.com/DerrickWood/kraken2), +a fast taxonomic classification system. It assigns taxonomic labels to sequencing reads based on k-mer matching against a reference database. + +Uses explicit positional record fields for reads: +- Input: record(meta, r1, r2, se, lr) where each read slot is Path? + +## Take + +``` +reads: Channel +``` + +| Field | Description | +|-------|-------------| +| `meta` | Groovy Record containing sample information | +| `r1` | Illumina R1 reads (paired-end) | +| `r2` | Illumina R2 reads (paired-end) | +| `se` | Single-end Illumina reads | +| `lr` | Long reads (ONT/PacBio) | + +``` +database: Path +``` + +| Name | Type | Description | +|------|------|-------------| +| `database` | `Path` | Path to the Kraken2 database for taxonomic classification. | + +## Emit + +### Published + +The `sample_outputs` and `run_outputs` emissions are aggregates of output files that will be published in the entry workflow. + +#### `sample_outputs` + +| Output | Description | +|--------|-------------| +| `kraken2_report` | Standard Kraken2 report containing taxonomic abundance counts | +| `scrub_report` | Summary report of reads removed during host scrubbing (optional) | +| `special_meta` | A simplified metadata record for internal use | +| `classified` | Reads assigned to a taxon in the database (FASTQ) | +| `unclassified` | Reads NOT assigned to any taxon (FASTQ) | + +#### `run_outputs` + +No run-scope outputs. + +## Module Composition + +This subworkflow calls the following modules: + +- [kraken2](/developers/modules/kraken2) - Taxonomic classification and host filtering of sequence reads. + +## Used By + +This subworkflow is used by the following workflows: + +- [kraken2](/bactopia-tools/kraken2) - Taxonomic classification of metagenomic sequence reads. + +## Citations + +If you use this in your analysis, please cite the following. + +- [Bactopia](https://bactopia.github.io/) + Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) + +- [Kraken2](https://github.com/DerrickWood/kraken2) + Wood DE, Lu J, Langmead B [Improved metagenomic analysis with Kraken 2.](https://doi.org/10.1186/s13059-019-1891-0) *Genome Biology*, 20(1), 257. (2019) + +## Source + +[View source on GitHub](https://github.com/bactopia/bactopia/tree/main/subworkflows/kraken2) diff --git a/developers/subworkflows/legsta.mdx b/developers/subworkflows/legsta.mdx new file mode 100644 index 00000000..b3436c77 --- /dev/null +++ b/developers/subworkflows/legsta.mdx @@ -0,0 +1,79 @@ +--- +title: legsta +description: "In silico Legionella pneumophila Sequence Based Typing." +tags: + - legionella + - pneumophila + - sequence-typing + - st + - epidemiology + - sample-scope +--- + +# legsta + +**Tags:** legionella pneumophila sequence-typing st epidemiology sample-scope + +In silico Legionella pneumophila Sequence Based Typing. + +This subworkflow performs sequence-based typing of Legionella pneumophila +using [legsta](https://github.com/tseemann/legsta), which identifies the +Sequence Type (ST) based on the seven-locus scheme. The tool analyzes +allele profiles and provides epidemiological typing data for outbreak +investigation and population studies. + +## Take + +``` +assembly: Channel +``` + +| Field | Description | +|-------|-------------| +| `meta` | Groovy Record containing sample information | +| `assembly` | Assembly files in FASTA format for L. pneumophila sequence typing | + +## Emit + +### Published + +The `sample_outputs` and `run_outputs` emissions are aggregates of output files that will be published in the entry workflow. + +#### `sample_outputs` + +| Output | Description | +|--------|-------------| +| `tsv` | Tab-delimited Legionella pneumophila SBT results with allele numbers and sequence type | + +#### `run_outputs` + +| Output | Description | +|--------|-------------| +| `csv` | Aggregated results in CSV format | + +## Module Composition + +This subworkflow calls the following modules: + +- [csvtk_concat](/developers/modules/csvtk_concat) - Concatenate multiple CSV or TSV files into a single table. +- [legsta](/developers/modules/legsta) - In silico Sequence Based Typing (SBT) of *Legionella pneumophila*. + +## Used By + +This subworkflow is used by the following workflows: + +- [legsta](/bactopia-tools/legsta) - Sequence Based Typing (SBT) of Legionella pneumophila. + +## Citations + +If you use this in your analysis, please cite the following. + +- [Bactopia](https://bactopia.github.io/) + Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) + +- [legsta](https://github.com/tseemann/legsta) + Seemann T [legsta: In silico Legionella pneumophila Sequence Based Typing](https://github.com/tseemann/legsta) (GitHub) + +## Source + +[View source on GitHub](https://github.com/bactopia/bactopia/tree/main/subworkflows/legsta) diff --git a/developers/subworkflows/lissero.mdx b/developers/subworkflows/lissero.mdx new file mode 100644 index 00000000..6ca832ab --- /dev/null +++ b/developers/subworkflows/lissero.mdx @@ -0,0 +1,78 @@ +--- +title: lissero +description: "In silico serotype prediction for Listeria monocytogenes." +tags: + - listeria + - monocytogenes + - serotype + - outbreak + - sample-scope +--- + +# lissero + +**Tags:** listeria monocytogenes serotype outbreak sample-scope + +In silico serotype prediction for Listeria monocytogenes. + +This subworkflow performs serotype prediction for Listeria monocytogenes +using [LisSero](https://github.com/MDU-PHL/LisSero), which identifies specific +serotype markers in genome assemblies. The tool provides rapid classification +into the major L. monocytogenes serotypes, which is important for outbreak +investigation and tracking. + +## Take + +``` +assembly: Channel +``` + +| Field | Description | +|-------|-------------| +| `meta` | Groovy Record containing sample information | +| `assembly` | Assembly files in FASTA format for L. monocytogenes serotype prediction | + +## Emit + +### Published + +The `sample_outputs` and `run_outputs` emissions are aggregates of output files that will be published in the entry workflow. + +#### `sample_outputs` + +| Output | Description | +|--------|-------------| +| `tsv` | Tab-delimited LisSero results with predicted serogroup and marker gene detection | + +#### `run_outputs` + +| Output | Description | +|--------|-------------| +| `csv` | Aggregated results in CSV format | + +## Module Composition + +This subworkflow calls the following modules: + +- [csvtk_concat](/developers/modules/csvtk_concat) - Concatenate multiple CSV or TSV files into a single table. +- [lissero](/developers/modules/lissero) - Predict *Listeria monocytogenes* serogroup. + +## Used By + +This subworkflow is used by the following workflows: + +- [lissero](/bactopia-tools/lissero) - Serogroup typing prediction for Listeria monocytogenes. + +## Citations + +If you use this in your analysis, please cite the following. + +- [Bactopia](https://bactopia.github.io/) + Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) + +- [LisSero](https://github.com/MDU-PHL/LisSero) + Kwong J, Zhang J, Seeman T, Horan, K, Gonçalves da Silva A [LisSero - _In silico_ serotype prediction for _Listeria monocytogenes_](https://github.com/MDU-PHL/LisSero) (GitHub) + +## Source + +[View source on GitHub](https://github.com/bactopia/bactopia/tree/main/subworkflows/lissero) diff --git a/developers/subworkflows/mashdist.mdx b/developers/subworkflows/mashdist.mdx new file mode 100644 index 00000000..b9a80fab --- /dev/null +++ b/developers/subworkflows/mashdist.mdx @@ -0,0 +1,86 @@ +--- +title: mashdist +description: "Calculate Mash distances between sequences and a reference." +tags: + - mash + - distance + - minhash + - comparison + - reference + - sample-scope +--- + +# mashdist + +**Tags:** mash distance minhash comparison reference sample-scope + +Calculate Mash distances between sequences and a reference. + +This subworkflow uses [Mash](https://github.com/marbl/Mash) to calculate MinHash-based +distances between query sequences and a reference sequence. It creates Mash sketches +of the input sequences and computes distance values, then aggregates all distance +calculations into a single consolidated report. + +## Take + +``` +assembly: Channel +``` + +| Field | Description | +|-------|-------------| +| `meta` | Groovy Record containing sample information | +| `fna` | Sequences in FASTA format to compare against reference | + +``` +reference: Path +``` + +| Name | Type | Description | +|------|------|-------------| +| `reference` | `Path` | Reference sequence in FASTA format for distance calculations | + +## Emit + +### Published + +The `sample_outputs` and `run_outputs` emissions are aggregates of output files that will be published in the entry workflow. + +#### `sample_outputs` + +| Output | Description | +|--------|-------------| +| `dist` | A tab-delimited summary of the Mash distances and p-values | + +#### `run_outputs` + +| Output | Description | +|--------|-------------| +| `csv` | Merged Mash distance results from all samples | + +## Module Composition + +This subworkflow calls the following modules: + +- [csvtk_concat](/developers/modules/csvtk_concat) - Concatenate multiple CSV or TSV files into a single table. +- [mash_dist](/developers/modules/mash_dist) - Calculate genomic distances using MinHash sketches. + +## Used By + +This subworkflow is used by the following workflows: + +- [mashdist](/bactopia-tools/mashdist) - Calculate Mash distances between sequences and reference genomes. + +## Citations + +If you use this in your analysis, please cite the following. + +- [Bactopia](https://bactopia.github.io/) + Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) + +- [Mash](https://github.com/marbl/Mash) + Ondov BD, Treangen TJ, Melsted P, Mallonee AB, Bergman NH, Koren S, Phillippy AM [Mash: fast genome and metagenome distance estimation using MinHash](http://dx.doi.org/10.1186/s13059-016-0997-x). _Genome Biol_ 17, 132 (2016) + +## Source + +[View source on GitHub](https://github.com/bactopia/bactopia/tree/main/subworkflows/mashdist) diff --git a/developers/subworkflows/mashtree.mdx b/developers/subworkflows/mashtree.mdx new file mode 100644 index 00000000..15fb5bbe --- /dev/null +++ b/developers/subworkflows/mashtree.mdx @@ -0,0 +1,77 @@ +--- +title: mashtree +description: "Create phylogenetic trees using Mash distances." +tags: + - phylogeny + - tree + - mash + - distance + - comparison + - run-scope +--- + +# mashtree + +**Tags:** phylogeny tree mash distance comparison run-scope + +Create phylogenetic trees using Mash distances. + +This subworkflow uses [Mashtree](https://github.com/lskatz/mashtree) to rapidly compare +whole genome sequence files and generate phylogenetic trees. It creates Mash sketches +of input genomes, calculates pairwise distances, and constructs a tree based on +the distance matrix. + +## Take + +``` +assemblies: Channel +``` + +| Field | Description | +|-------|-------------| +| `meta` | Groovy Record containing sample information | +| `fna` | Pre-gathered assembled contigs in FASTA format (multiple genomes) | + +## Emit + +### Published + +The `sample_outputs` and `run_outputs` emissions are aggregates of output files that will be published in the entry workflow. + +#### `sample_outputs` + +No sample-scope outputs. + +#### `run_outputs` + +| Output | Description | +|--------|-------------| +| `nwk` | Phylogenetic tree in Newick format | +| `tsv` | Pairwise distance matrix | +| `sketches` | Individual Mash sketch files | + +## Module Composition + +This subworkflow calls the following modules: + +- [mashtree](/developers/modules/mashtree) - Rapid alignment-free phylogenomic tree construction. + +## Used By + +This subworkflow is used by the following workflows: + +- [mashtree](/bactopia-tools/mashtree) - Rapid phylogenetic tree construction using Mash distances. + +## Citations + +If you use this in your analysis, please cite the following. + +- [Bactopia](https://bactopia.github.io/) + Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) + +- [Mashtree](https://github.com/lskatz/mashtree) + Katz LS, Griswold T, Morrison S, Caravas J, Zhang S, den Bakker HC, Deng X, Carleton HA [Mashtree: a rapid comparison of whole genome sequence files.](https://doi.org/10.21105/joss.01762) _Journal of Open Source Software_, 4(44), 1762 (2019) + +## Source + +[View source on GitHub](https://github.com/bactopia/bactopia/tree/main/subworkflows/mashtree) diff --git a/developers/subworkflows/mcroni.mdx b/developers/subworkflows/mcroni.mdx new file mode 100644 index 00000000..3d58900e --- /dev/null +++ b/developers/subworkflows/mcroni.mdx @@ -0,0 +1,80 @@ +--- +title: mcroni +description: "Scripts for finding and processing promoter variants upstream of mcr-1." +tags: + - mcr-1 + - colistin + - resistance + - promoter + - variant + - sample-scope +--- + +# mcroni + +**Tags:** mcr-1 colistin resistance promoter variant sample-scope + +Scripts for finding and processing promoter variants upstream of mcr-1. + +This subworkflow identifies and characterizes promoter variants upstream of +the mcr-1 colistin resistance gene using [mcroni](https://github.com/liampshaw/mcroni). +The tool searches for mutations in the promoter region that may affect expression +levels of mcr-1, which is important for understanding the regulation of +plasmid-mediated colistin resistance. + +## Take + +``` +assembly: Channel +``` + +| Field | Description | +|-------|-------------| +| `meta` | Groovy Record containing sample information | +| `assembly` | Assembly files in FASTA format for mcr-1 promoter analysis | + +## Emit + +### Published + +The `sample_outputs` and `run_outputs` emissions are aggregates of output files that will be published in the entry workflow. + +#### `sample_outputs` + +| Output | Description | +|--------|-------------| +| `tsv` | Tab-delimited mcr-1 gene variation results | +| `fa` | Extracted mcr-1 gene sequence in FASTA format (optional) | + +#### `run_outputs` + +| Output | Description | +|--------|-------------| +| `csv` | Aggregated results in CSV format | + +## Module Composition + +This subworkflow calls the following modules: + +- [csvtk_concat](/developers/modules/csvtk_concat) - Concatenate multiple CSV or TSV files into a single table. +- [mcroni](/developers/modules/mcroni) - Detect sequence variations in the *mcr-1* colistin resistance gene. + +## Used By + +This subworkflow is used by the following workflows: + +- [mcroni](/bactopia-tools/mcroni) - Sequence variation analysis of mcr-1 genes (mobilized colistin resistance). + +## Citations + +If you use this in your analysis, please cite the following. + +- [Bactopia](https://bactopia.github.io/) + Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) + +- [mcroni](https://github.com/liampshaw/mcroni) + Shaw L [mcroni: Scripts for finding and processing promoter variants upstream of mcr-1](https://github.com/liampshaw/mcroni) (GitHub) + +## Source + +[View source on GitHub](https://github.com/bactopia/bactopia/tree/main/subworkflows/mcroni) diff --git a/developers/subworkflows/meningotype.mdx b/developers/subworkflows/meningotype.mdx new file mode 100644 index 00000000..7a002613 --- /dev/null +++ b/developers/subworkflows/meningotype.mdx @@ -0,0 +1,78 @@ +--- +title: meningotype +description: "Predict serotypes of Neisseria meningitidis from genome assemblies." +tags: + - neisseria-meningitidis + - serotype + - finetype + - bexsero + - meningococcal + - sample-scope +--- + +# meningotype + +**Tags:** neisseria-meningitidis serotype finetype bexsero meningococcal sample-scope + +Predict serotypes of Neisseria meningitidis from genome assemblies. + +This subworkflow uses [meningotype](https://github.com/MDU-PHL/meningotype) to perform +in silico serotyping, finetyping and Bexsero antigen sequence typing of *Neisseria meningitidis* +strains from assembled genomes. It processes each sample individually and aggregates the +results into a single consolidated report. + +## Take + +``` +assembly: Channel +``` + +| Field | Description | +|-------|-------------| +| `meta` | Groovy Record containing sample information | +| `assembly` | Assembled contigs in FASTA format | + +## Emit + +### Published + +The `sample_outputs` and `run_outputs` emissions are aggregates of output files that will be published in the entry workflow. + +#### `sample_outputs` + +| Output | Description | +|--------|-------------| +| `tsv` | Tab-delimited meningotype results with serogroup, PorA, and FetA predictions | + +#### `run_outputs` + +| Output | Description | +|--------|-------------| +| `csv` | Aggregated results in CSV format | + +## Module Composition + +This subworkflow calls the following modules: + +- [csvtk_concat](/developers/modules/csvtk_concat) - Concatenate multiple CSV or TSV files into a single table. +- [meningotype](/developers/modules/meningotype) - Serotyping and finetyping of *Neisseria meningitidis*. + +## Used By + +This subworkflow is used by the following workflows: + +- [meningotype](/bactopia-tools/meningotype) - Comprehensive typing of Neisseria meningitidis. + +## Citations + +If you use this in your analysis, please cite the following. + +- [Bactopia](https://bactopia.github.io/) + Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) + +- [meningotype](https://github.com/MDU-PHL/meningotype) + Kwong JC, Gonçalves da Silva A, Stinear TP, Howden BP, & Seemann T [meningotype: in silico typing for _Neisseria meningitidis_.](https://github.com/MDU-PHL/meningotype) (GitHub) + +## Source + +[View source on GitHub](https://github.com/bactopia/bactopia/tree/main/subworkflows/meningotype) diff --git a/developers/subworkflows/merlin.mdx b/developers/subworkflows/merlin.mdx new file mode 100644 index 00000000..985af8fa --- /dev/null +++ b/developers/subworkflows/merlin.mdx @@ -0,0 +1,119 @@ +--- +title: merlin +description: "MinER assisted species-specific bactopia tool seLectIoN." +tags: + - species + - identification + - typing + - serotype + - virulence + - sample-scope +--- + +# merlin + +**Tags:** species identification typing serotype virulence sample-scope + +MinER assisted species-specific bactopia tool seLectIoN. + +This subworkflow performs intelligent species identification and selects appropriate +species-specific typing tools based on the detected organism. It first identifies +potential species using MinHash distance estimation, then runs species-specific +subworkflows for detailed characterization including serotyping, MLST, virulence +factor detection, and antimicrobial resistance profiling. + +## Take + +``` +assembly: Channel +``` + +| Field | Description | +|-------|-------------| +| `meta` | Groovy Record containing sample information | +| `fna` | Assembly file for species identification and typing | +| `r1` | Illumina R1 reads (paired-end) or null | +| `r2` | Illumina R2 reads (paired-end) or null | +| `se` | Single-end Illumina reads or null | +| `lr` | Long reads (ONT/PacBio) or null | + +``` +mash_db: Path +emmtyper_blastdb: Path? +hicap_database_dir: Path? +hicap_model_fp: Path? +staphtyper_repeats: Path? +staphtyper_repeat_order: Path? +``` + +| Name | Type | Description | +|------|------|-------------| +| `mash_db` | `Path` | Mash sketch database for rapid species identification | +| `emmtyper_blastdb` | `Path?` | EMMTyper BLAST database for Streptococcus pyogenes emm typing (optional) | +| `hicap_database_dir` | `Path?` | HiCAP database directory for Haemophilus influenzae serotyping (optional) | +| `hicap_model_fp` | `Path?` | HiCAP HMM model file for improved detection (optional) | +| `staphtyper_repeats` | `Path?` | Staphylococcus aureus repeat sequences for spa typing (optional) | +| `staphtyper_repeat_order` | `Path?` | Staphylococcus aureus repeat order file for spa typing (optional) | + +## Emit + +### Published + +The `sample_outputs` and `run_outputs` emissions are aggregates of output files that will be published in the entry workflow. + +#### `sample_outputs` + +Mixed per-sample records from merlindist and all activated species-specific typing subworkflows (e.g., ectyper, sistr, kleborate). Each record carries tool-specific fields. + +#### `run_outputs` + +Mixed aggregated results from all activated species-specific typing subworkflows. Each record contains tool-specific cross-sample summaries. + +## Subworkflow Composition + +This subworkflow calls the following subworkflows: + +- [merlindist](/developers/subworkflows/merlindist) - Identify species from assembly and read data using Mash distances. +- [clermontyping](/developers/subworkflows/clermontyping) - Predict phylogroups of Escherichia coli from genome assemblies. +- [ectyper](/developers/subworkflows/ectyper) - In silico prediction of Escherichia coli serotype. +- [emmtyper](/developers/subworkflows/emmtyper) - Predict emm types of Streptococcus pyogenes from genome assemblies. +- [genotyphi](/developers/subworkflows/genotyphi) - Assign genotypes to Salmonella Typhi genomes. +- [hicap](/developers/subworkflows/hicap) - In silico serotyping of the Haemophilus influenzae capsule locus. +- [hpsuissero](/developers/subworkflows/hpsuissero) - Rapid Haemophilus parasuis serotyping. +- [kleborate](/developers/subworkflows/kleborate) - Genotyping tool for Klebsiella pneumoniae and its related species complex. +- [legsta](/developers/subworkflows/legsta) - In silico Legionella pneumophila Sequence Based Typing. +- [lissero](/developers/subworkflows/lissero) - In silico serotype prediction for Listeria monocytogenes. +- [ngmaster](/developers/subworkflows/ngmaster) - Perform multi-antigen sequence typing of Neisseria gonorrhoeae from genome assemblies. +- [pasty](/developers/subworkflows/pasty) - Predict serogroups of Pseudomonas aeruginosa from assemblies. +- [pbptyper](/developers/subworkflows/pbptyper) - Predict penicillin binding protein (PBP) types of Streptococcus pneumoniae from genome assemblies. +- [seqsero2](/developers/subworkflows/seqsero2) - Predict Salmonella serotypes from genome assemblies. +- [seroba](/developers/subworkflows/seroba) - k-mer based pipeline to identify the serotype of Streptococcus pneumoniae. +- [shigapass](/developers/subworkflows/shigapass) - Predict serotypes of Shigella from assemblies. +- [shigatyper](/developers/subworkflows/shigatyper) - Predict serotypes of Shigella from reads or assemblies. +- [shigeifinder](/developers/subworkflows/shigeifinder) - Predict serotypes of Shigella and EIEC from assemblies. +- [sistr](/developers/subworkflows/sistr) - Salmonella In Silico Typing Resource command-line tool. +- [ssuissero](/developers/subworkflows/ssuissero) - Predict serotypes of Streptococcus suis from genome assemblies. +- [staphtyper](/developers/subworkflows/staphtyper) - Determine the agr, spa and SCCmec types for _Staphylococcus aureus_ genomes. +- [stecfinder](/developers/subworkflows/stecfinder) - Identify and serotype Shiga toxin-producing E. coli (STEC) from assemblies. +- [tbprofiler](/developers/subworkflows/tbprofiler) - Profiling tool for Mycobacterium tuberculosis to detect resistance and strain type. + +## Used By + +This subworkflow is used by the following workflows: + +- [bactopia](/full-guide) - Comprehensive bacterial analysis pipeline for complete genomic characterization. +- [merlin](/bactopia-tools/merlin) - MinMER-assisted species-specific tool selection and execution. + +## Citations + +If you use this in your analysis, please cite the following. + +- [Bactopia](https://bactopia.github.io/) + Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) + +- [Mash](https://github.com/marbl/Mash) + Ondov BD, Treangen TJ, Melsted P, Mallonee AB, Bergman NH, Koren S, Phillippy AM [Mash: fast genome and metagenome distance estimation using MinHash](http://dx.doi.org/10.1186/s13059-016-0997-x). _Genome Biol_ 17, 132 (2016) + +## Source + +[View source on GitHub](https://github.com/bactopia/bactopia/tree/main/subworkflows/merlin) diff --git a/developers/subworkflows/merlindist.mdx b/developers/subworkflows/merlindist.mdx new file mode 100644 index 00000000..3168fd7d --- /dev/null +++ b/developers/subworkflows/merlindist.mdx @@ -0,0 +1,100 @@ +--- +title: merlindist +description: "Identify species from assembly and read data using Mash distances." +tags: + - species + - identification + - mash + - distance + - classification + - taxonomy + - sample-scope +--- + +# merlindist + +**Tags:** species identification mash distance classification taxonomy sample-scope + +Identify species from assembly and read data using Mash distances. + +This subworkflow performs rapid species identification using [Mash](https://github.com/marbl/Mash) +distance calculations against a reference database. It is a core component of the MERLIN +(MinER assisted species-specific bactopia tool seLectIoN) pipeline, responsible for determining +which species-specific typing tools should be run based on the detected organism. The workflow +outputs channels filtered by detected genera for downstream species-specific analysis. + +## Take + +``` +ch_seqs: Channel +``` + +| Field | Description | +|-------|-------------| +| `meta` | Groovy Record containing sample information | +| `fna` | Assembled contigs in FASTA format for species identification | +| `r1` | Illumina R1 reads (paired-end) or null | +| `r2` | Illumina R2 reads (paired-end) or null | +| `se` | Single-end Illumina reads or null | +| `lr` | Long reads (ONT/PacBio) or null | + +``` +ch_mash_db: Path +``` + +| Name | Description | +|------|-------------| +| `mash_db` | Mash sketch database for rapid species identification | + +## Emit + +### Published + +The `sample_outputs` and `run_outputs` emissions are aggregates of output files that will be published in the entry workflow. + +#### `sample_outputs` + +| Output | Description | +|--------|-------------| +| `dist` | The raw Mash distance results | +| `fna` | Passthrough of assembled contigs | +| `r1` | Passthrough of Illumina R1 reads | +| `r2` | Passthrough of Illumina R2 reads | +| `se` | Passthrough of single-end reads | +| `lr` | Passthrough of long reads | +| `escherichia` | Conditional marker file triggering Escherichia analysis tools | +| `haemophilus` | Conditional marker file triggering Haemophilus analysis tools | +| `klebsiella` | Conditional marker file triggering Klebsiella analysis tools | +| `legionella` | Conditional marker file triggering Legionella analysis tools | +| `listeria` | Conditional marker file triggering Listeria analysis tools | +| `mycobacterium` | Conditional marker file triggering Mycobacterium analysis tools | +| `neisseria` | Conditional marker file triggering Neisseria analysis tools | +| `pseudomonas` | Conditional marker file triggering Pseudomonas analysis tools | +| `salmonella` | Conditional marker file triggering Salmonella analysis tools | +| `staphylococcus` | Conditional marker file triggering Staphylococcus analysis tools | +| `streptococcus` | Conditional marker file triggering Streptococcus analysis tools | +| `genus` | A marker file indicating the detected genus (for debugging) | + +#### `run_outputs` + +No run-scope outputs. + +## Module Composition + +This subworkflow calls the following modules: + +- [merlin_dist](/developers/modules/merlin_dist) - Identify species to trigger genus-specific downstream analyses (Merlin). + +## Citations + +If you use this in your analysis, please cite the following. + +- [Bactopia](https://bactopia.github.io/) + Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) + +- [Mash](https://github.com/marbl/Mash) + Ondov BD, Treangen TJ, Melsted P, Mallonee AB, Bergman NH, Koren S, Phillippy AM [Mash: fast genome and metagenome distance estimation using MinHash](http://dx.doi.org/10.1186/s13059-016-0997-x). _Genome Biol_ 17, 132 (2016) + +## Source + +[View source on GitHub](https://github.com/bactopia/bactopia/tree/main/subworkflows/merlindist) diff --git a/developers/subworkflows/midas.mdx b/developers/subworkflows/midas.mdx new file mode 100644 index 00000000..8107d0fe --- /dev/null +++ b/developers/subworkflows/midas.mdx @@ -0,0 +1,99 @@ +--- +title: midas +description: "Species-level profiling from metagenomic data." +tags: + - metagenomics + - species + - profiling + - abundance + - strain + - sample-scope +--- + +# midas + +**Tags:** metagenomics species profiling abundance strain sample-scope + +Species-level profiling from metagenomic data. + +This subworkflow estimates strain-level genomic variation from metagenomic data +using [MIDAS](https://github.com/snayfach/MIDAS). The pipeline identifies bacterial +species abundances and provides strain-level profiling including SNP analysis. +It uses a comprehensive reference database for accurate species identification +and quantification in complex microbial communities. + +Uses explicit positional record fields for reads: +- Input: record(meta, r1, r2, se, lr) where each read slot is Path? + +## Take + +``` +reads: Channel +``` + +| Field | Description | +|-------|-------------| +| `meta` | Groovy Record containing sample information | +| `r1` | Illumina R1 reads (paired-end) | +| `r2` | Illumina R2 reads (paired-end) | +| `se` | Single-end Illumina reads | +| `lr` | Long reads (not supported by MIDAS) | + +``` +database: Path? +download_midas: Boolean +save_as_tarball: Boolean +``` + +| Name | Type | Description | +|------|------|-------------| +| `database` | `Path?` | MIDAS reference database for species identification and quantification | +| `download_midas` | `Boolean` | Boolean flag to automatically download the MIDAS database if not available | +| `save_as_tarball` | `Boolean` | Boolean flag to save downloaded database as tarball | + +## Emit + +### Published + +The `sample_outputs` and `run_outputs` emissions are aggregates of output files that will be published in the entry workflow. + +#### `sample_outputs` + +| Output | Description | +|--------|-------------| +| `tsv` | A tab-delimited summary of species abundance and coverage | +| `abundances` | Detailed species abundance profile | + +#### `run_outputs` + +| Output | Description | +|--------|-------------| +| `csv` | Merged species abundance results from all samples | + +## Module Composition + +This subworkflow calls the following modules: + +- [csvtk_concat](/developers/modules/csvtk_concat) - Concatenate multiple CSV or TSV files into a single table. +- [midas_species](/developers/modules/midas_species) - Estimate bacterial species abundance from metagenomic reads. +- [midas_download](/developers/modules/midas_download) - Download the MIDAS reference database. + +## Used By + +This subworkflow is used by the following workflows: + +- [midas](/bactopia-tools/midas) - Estimate species abundances from metagenomic samples. + +## Citations + +If you use this in your analysis, please cite the following. + +- [Bactopia](https://bactopia.github.io/) + Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) + +- [MIDAS](https://github.com/snayfach/MIDAS) + Nayfach S, Rodriguez-Mueller B, Garud N, and Pollard KS [An integrated metagenomics pipeline for strain profiling reveals novel patterns of bacterial transmission and biogeography.](https://doi.org/10.1101/gr.201863.115) _Genome Research_, 26(11), 1612-1625. (2016) + +## Source + +[View source on GitHub](https://github.com/bactopia/bactopia/tree/main/subworkflows/midas) diff --git a/developers/subworkflows/mlst.mdx b/developers/subworkflows/mlst.mdx new file mode 100644 index 00000000..618ecbce --- /dev/null +++ b/developers/subworkflows/mlst.mdx @@ -0,0 +1,86 @@ +--- +title: mlst +description: "Determine multilocus sequence types (MLST) from bacterial assemblies." +tags: + - mlst + - sequence-typing + - pubmlst + - bacteria + - sample-scope +--- + +# mlst + +**Tags:** mlst sequence-typing pubmlst bacteria sample-scope + +Determine multilocus sequence types (MLST) from bacterial assemblies. + +This subworkflow uses [mlst](https://github.com/tseemann/mlst) to scan assembled +contigs against PubMLST typing schemes and determine sequence types (STs). It processes +each sample individually and aggregates the results into a single consolidated report. + +## Take + +``` +assembly: Channel +``` + +| Field | Description | +|-------|-------------| +| `meta` | Groovy Record containing sample information | +| `assembly` | Assembled contigs in FASTA format | + +``` +db: Path +``` + +| Name | Type | Description | +|------|------|-------------| +| `db` | `Path` | PubMLST database to use for MLST typing | + +## Emit + +### Published + +The `sample_outputs` and `run_outputs` emissions are aggregates of output files that will be published in the entry workflow. + +#### `sample_outputs` + +| Output | Description | +|--------|-------------| +| `tsv` | A tab-delimited summary containing the Sample, Scheme, ST, and Allele IDs | + +#### `run_outputs` + +| Output | Description | +|--------|-------------| +| `csv` | A merged TSV file with mlst results from all samples | + +## Module Composition + +This subworkflow calls the following modules: + +- [csvtk_concat](/developers/modules/csvtk_concat) - Concatenate multiple CSV or TSV files into a single table. +- [mlst](/developers/modules/mlst) - Automatic Multi-Locus Sequence Typing (MLST) of genome assemblies. + +## Used By + +This subworkflow is used by the following workflows: + +- [bactopia](/full-guide) - Comprehensive bacterial analysis pipeline for complete genomic characterization. +- [mlst](/bactopia-tools/mlst) - Automatic Multi-Locus Sequence Type (MLST) calling from assembled contigs. +- [staphopia](/bactopia-pipelines/staphopia) - Comprehensive analysis pipeline for Staphylococcus aureus isolates. + +## Citations + +If you use this in your analysis, please cite the following. + +- [Bactopia](https://bactopia.github.io/) + Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) + +- [mlst](https://github.com/tseemann/mlst) + Seemann T [mlst: scan contig files against PubMLST typing schemes](https://github.com/tseemann/mlst) (GitHub) + +## Source + +[View source on GitHub](https://github.com/bactopia/bactopia/tree/main/subworkflows/mlst) diff --git a/developers/subworkflows/mobsuite.mdx b/developers/subworkflows/mobsuite.mdx new file mode 100644 index 00000000..287de39f --- /dev/null +++ b/developers/subworkflows/mobsuite.mdx @@ -0,0 +1,81 @@ +--- +title: mobsuite +description: "Reconstruct and type plasmids from bacterial genome assemblies." +tags: + - plasmid + - reconstruction + - typing + - mobilome + - bacterial-genome + - sample-scope +--- + +# mobsuite + +**Tags:** plasmid reconstruction typing mobilome bacterial-genome sample-scope + +Reconstruct and type plasmids from bacterial genome assemblies. + +This subworkflow uses [MOB-suite](https://github.com/phac-nml/mob-suite) to reconstruct +and type plasmids from draft genome assemblies. It separates plasmid from chromosomal +sequences, determines plasmid replicon types using the MOB-suite database, and provides +comprehensive reports on plasmid content and organization. + +## Take + +``` +assembly: Channel +``` + +| Field | Description | +|-------|-------------| +| `meta` | Groovy Record containing sample information | +| `assembly` | Assembled contigs in FASTA format | + +## Emit + +### Published + +The `sample_outputs` and `run_outputs` emissions are aggregates of output files that will be published in the entry workflow. + +#### `sample_outputs` + +| Output | Description | +|--------|-------------| +| `chromosome` | Chromosomal sequences separated from plasmid contigs (gzipped FASTA) | +| `contig_report` | Tab-delimited report assigning each contig to chromosome or plasmid | +| `txt` | MOB-typer results with replicon type, mobility, and incompatibility group (optional) | +| `plasmids` | Reconstructed plasmid sequences in gzipped FASTA format (optional) | + +#### `run_outputs` + +| Output | Description | +|--------|-------------| +| `csv` | Aggregated results in CSV format | + +## Module Composition + +This subworkflow calls the following modules: + +- [mobsuite_recon](/developers/modules/mobsuite_recon) - Reconstruct and type plasmids from a bacterial genome assembly. +- [csvtk_concat](/developers/modules/csvtk_concat) - Concatenate multiple CSV or TSV files into a single table. + +## Used By + +This subworkflow is used by the following workflows: + +- [mobsuite](/bactopia-tools/mobsuite) - Reconstruction and annotation of plasmids from bacterial genome assemblies. + +## Citations + +If you use this in your analysis, please cite the following. + +- [Bactopia](https://bactopia.github.io/) + Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) + +- [MOB-suite](https://github.com/phac-nml/mob-suite) + Robertson J, Nash JHE [MOB-suite: software tools for clustering, reconstruction and typing of plasmids from draft assemblies.](https://doi.org/10.1099/mgen.0.000206) _Microbial Genomics_ 4(8). (2018) + +## Source + +[View source on GitHub](https://github.com/bactopia/bactopia/tree/main/subworkflows/mobsuite) diff --git a/developers/subworkflows/mykrobe.mdx b/developers/subworkflows/mykrobe.mdx new file mode 100644 index 00000000..1b5feb7d --- /dev/null +++ b/developers/subworkflows/mykrobe.mdx @@ -0,0 +1,91 @@ +--- +title: mykrobe +description: "Predict antibiotic resistance from sequence reads." +tags: + - bacteria + - reads + - antimicrobial-resistance + - genotype-prediction + - sample-scope +--- + +# mykrobe + +**Tags:** bacteria reads antimicrobial-resistance genotype-prediction sample-scope + +Predict antibiotic resistance from sequence reads. + +This subworkflow uses [Mykrobe](https://github.com/Mykrobe-tools/mykrobe) to predict antibiotic +resistance directly from sequencing reads. It provides rapid genotype-based resistance predictions +for specific bacterial species. + +Uses explicit positional record fields for reads: +- Input: record(meta, r1, r2, se, lr) where each read slot is Path? + +## Take + +``` +reads: Channel +``` + +| Field | Description | +|-------|-------------| +| `meta` | Groovy Record containing sample information | +| `r1` | Illumina R1 reads (paired-end) | +| `r2` | Illumina R2 reads (paired-end) | +| `se` | Single-end Illumina reads | +| `lr` | Long reads (ONT/PacBio) | + +``` +mykrobe_species: String +``` + +| Name | Type | Description | +|------|------|-------------| +| `mykrobe_species` | `String` | Target bacterial species for resistance prediction (e.g., "staphylococcus_aureus", "mycobacterium_tuberculosis", or "enterococcus_faecium"). | + +## Emit + +### Published + +The `sample_outputs` and `run_outputs` emissions are aggregates of output files that will be published in the entry workflow. + +#### `sample_outputs` + +| Output | Description | +|--------|-------------| +| `csv` | AMR predictions in machine-readable CSV format | +| `json` | Detailed AMR prediction results in JSON format | + +#### `run_outputs` + +| Output | Description | +|--------|-------------| +| `csv` | Merged AMR predictions from all samples | + +## Module Composition + +This subworkflow calls the following modules: + +- [mykrobe_predict](/developers/modules/mykrobe_predict) - Predict Antimicrobial Resistance (AMR) for supported bacterial species. +- [csvtk_concat](/developers/modules/csvtk_concat) - Concatenate multiple CSV or TSV files into a single table. + +## Used By + +This subworkflow is used by the following workflows: + +- [mykrobe](/bactopia-tools/mykrobe) - Antimicrobial resistance detection for specific bacterial species. + +## Citations + +If you use this in your analysis, please cite the following. + +- [Bactopia](https://bactopia.github.io/) + Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) + +- [Mykrobe](https://github.com/Mykrobe-tools/mykrobe) + Hunt M, Bradley P, Lapierre SG, Heys S, Thomsit M, Hall MB, Malone KM, Wintringer P, Walker TM, Cirillo DM, Comas I, Farhat MR, Fowler P, Gardy J, Ismail N, Kohl TA, Mathys V, Merker M, Niemann S, Omar SV, Sintchenko V, Smith G, Supply P, Tahseen S, Wilcox M, Arandjelovic I, Peto TEA, Crook, DW, Iqbal Z [Antibiotic resistance prediction for Mycobacterium tuberculosis from genome sequence data with Mykrobe](https://doi.org/10.12688/wellcomeopenres.15603.1) _Wellcome Open Research_ 4, 191. (2019) + +## Source + +[View source on GitHub](https://github.com/bactopia/bactopia/tree/main/subworkflows/mykrobe) diff --git a/developers/subworkflows/ncbigenomedownload.mdx b/developers/subworkflows/ncbigenomedownload.mdx new file mode 100644 index 00000000..73c84d77 --- /dev/null +++ b/developers/subworkflows/ncbigenomedownload.mdx @@ -0,0 +1,109 @@ +--- +title: ncbigenomedownload +description: "Download bacterial genomes from NCBI's RefSeq database." +tags: + - download + - ncbi + - refseq + - genome + - assembly + - database + - sample-scope +--- + +# ncbigenomedownload + +**Tags:** download ncbi refseq genome assembly database sample-scope + +Download bacterial genomes from NCBI's RefSeq database. + +This subworkflow downloads complete and draft bacterial genomes using the +[ncbi-genome-download](https://github.com/kblin/ncbi-genome-download) tool. It fetches +genome assemblies in various formats including GenBank, GFF, and FASTA files +along with associated annotation files and statistics. + +## Take + +``` +accessions: Path? +``` + +| Name | Type | Description | +|------|------|-------------| +| `accessions` | `Path?` | A file containing NCBI accession numbers, one per line. If empty, will download all genomes matching the specified criteria. | + +## Emit + +### Published + +The `sample_outputs` and `run_outputs` emissions are aggregates of output files that will be published in the entry workflow. + +#### `sample_outputs` + +| Output | Description | +|--------|-------------| +| `gbff` | GenBank format genome sequences | +| `fna` | Genomic nucleotide sequences in FASTA format | +| `gff` | Genome annotations in GFF3 format | +| `faa` | Protein sequences in FASTA format | +| `gpff` | Protein sequences in GenPept format | +| `wgs_gbk` | WGS master records in GenBank format | +| `cds` | CDS nucleotide sequences in FASTA format | +| `rna` | RNA product sequences in FASTA format | +| `rna_fna` | RNA feature nucleotide sequences in FASTA format | +| `features` | Feature table with locations and attributes | +| `rm` | RepeatMasker output (optional) | +| `report` | Assembly report with unit and sequence relationships | +| `stats` | Assembly statistics | +| `accessions` | Generated accession list files | + +#### `run_outputs` + +No run-scope outputs. + +### Downstream Inputs + +The following emissions are meant to be used as inputs to downstream subworkflows. + +#### `bactopia_tools` + +Downloaded files formatted for Bactopia Tools workflows + +#### `assemblies` + +| Output | Description | +|--------|-------------| +| `fna` | Individual downloaded assembly in FASTA format | + +#### `reference` + +First downloaded assembly file for use as a reference genome + +## Module Composition + +This subworkflow calls the following modules: + +- [ncbigenomedownload](/developers/modules/ncbigenomedownload) - Download assemblies and annotation files from NCBI's Assembly database. + +## Used By + +This subworkflow is used by the following workflows: + +- [fastani](/bactopia-tools/fastani) - Fast alignment-free computation of whole-genome Average Nucleotide Identity. +- [mashtree](/bactopia-tools/mashtree) - Rapid phylogenetic tree construction using Mash distances. +- [pangenome](/bactopia-tools/pangenome) - Pangenome analysis with optional core-genome phylogeny. +- [snippy](/bactopia-tools/snippy) - Rapid haplotype variant calling and core genome alignment. + +## Citations + +If you use this in your analysis, please cite the following. + +- [Bactopia](https://bactopia.github.io/) + Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) + +- [ncbi-genome-download](https://github.com/kblin/ncbi-genome-download) + Blin K [ncbi-genome-download: Scripts to download genomes from the NCBI FTP servers](https://github.com/kblin/ncbi-genome-download) (GitHub) + +## Source + +[View source on GitHub](https://github.com/bactopia/bactopia/tree/main/subworkflows/ncbigenomedownload) diff --git a/developers/subworkflows/ngmaster.mdx b/developers/subworkflows/ngmaster.mdx new file mode 100644 index 00000000..212db1e0 --- /dev/null +++ b/developers/subworkflows/ngmaster.mdx @@ -0,0 +1,78 @@ +--- +title: ngmaster +description: "Perform multi-antigen sequence typing of Neisseria gonorrhoeae from genome assemblies." +tags: + - neisseria-gonorrhoeae + - ng-mast + - typing + - gonococcal + - antigen + - sample-scope +--- + +# ngmaster + +**Tags:** neisseria-gonorrhoeae ng-mast typing gonococcal antigen sample-scope + +Perform multi-antigen sequence typing of Neisseria gonorrhoeae from genome assemblies. + +This subworkflow uses [ngmaster](https://github.com/MDU-PHL/ngmaster) to perform +in silico multi-antigen sequence typing (NG-MAST) for *Neisseria gonorrhoeae* +strains from assembled genomes. It processes each sample individually and aggregates +the results into a single consolidated report. + +## Take + +``` +assembly: Channel +``` + +| Field | Description | +|-------|-------------| +| `meta` | Groovy Record containing sample information | +| `assembly` | Assembled contigs in FASTA format | + +## Emit + +### Published + +The `sample_outputs` and `run_outputs` emissions are aggregates of output files that will be published in the entry workflow. + +#### `sample_outputs` + +| Output | Description | +|--------|-------------| +| `tsv` | Tab-delimited NG-MASTER results with porB and tbpB alleles and sequence type | + +#### `run_outputs` + +| Output | Description | +|--------|-------------| +| `csv` | Aggregated results in CSV format | + +## Module Composition + +This subworkflow calls the following modules: + +- [csvtk_concat](/developers/modules/csvtk_concat) - Concatenate multiple CSV or TSV files into a single table. +- [ngmaster](/developers/modules/ngmaster) - Serotyping and Multi-Antigen Sequence Typing (MAST) of *Neisseria gonorrhoeae*. + +## Used By + +This subworkflow is used by the following workflows: + +- [ngmaster](/bactopia-tools/ngmaster) - Multi-antigen sequence typing of Neisseria gonorrhoeae. + +## Citations + +If you use this in your analysis, please cite the following. + +- [Bactopia](https://bactopia.github.io/) + Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) + +- [ngmaster](https://github.com/MDU-PHL/ngmaster) + Kwong J, Gonçalves da Silva A, Schultz M, Seeman T [ngmaster - _In silico_ multi-antigen sequence typing for _Neisseria gonorrhoeae_ (NG-MAST)](https://github.com/MDU-PHL/ngmaster) (GitHub) + +## Source + +[View source on GitHub](https://github.com/bactopia/bactopia/tree/main/subworkflows/ngmaster) diff --git a/developers/subworkflows/nohuman.mdx b/developers/subworkflows/nohuman.mdx new file mode 100644 index 00000000..45719bdb --- /dev/null +++ b/developers/subworkflows/nohuman.mdx @@ -0,0 +1,87 @@ +--- +title: nohuman +description: "Remove human reads from sequencing data using nohuman." +tags: + - human + - contamination + - decontamination + - scrubbing + - reads + - nohuman + - kraken2 + - sample-scope +--- + +# nohuman + +**Tags:** human contamination decontamination scrubbing reads nohuman kraken2 sample-scope + +Remove human reads from sequencing data using nohuman. + +This subworkflow uses [nohuman](https://github.com/mbhall88/nohuman) to identify and remove +human reads from FASTQ files using a Kraken2 database built from Human Pangenome Reference +Consortium (HPRC) genomes. It optionally downloads the database if not already available. + +## Take + +``` +reads: Channel +``` + +| Field | Description | +|-------|-------------| +| `meta` | Groovy Record containing sample information | +| `r1` | Illumina R1 reads (paired-end forward) | +| `r2` | Illumina R2 reads (paired-end reverse) | +| `se` | Single-end Illumina reads | +| `lr` | Long reads (ONT/PacBio) | + +``` +database: Path? +download_nohuman: Boolean +save_as_tarball: Boolean +``` + +| Name | Type | Description | +|------|------|-------------| +| `database` | `Path?` | Path to nohuman database directory or tarball (ignored if download_nohuman is true) | +| `download_nohuman` | `Boolean` | Boolean flag to download the database instead of using the provided path | +| `save_as_tarball` | `Boolean` | Boolean flag to save downloaded database as tarball | + +## Emit + +### Published + +The `sample_outputs` and `run_outputs` emissions are aggregates of output files that will be published in the entry workflow. + +#### `sample_outputs` + +| Output | Description | +|--------|-------------| +| `scrubbed` | FASTQ files with human reads removed | +| `scrub_report` | Kraken2 classification report (optional) | + +#### `run_outputs` + +No run-scope outputs. + +## Module Composition + +This subworkflow calls the following modules: + +- [nohuman_download](/developers/modules/nohuman_download) - Download the nohuman database for human read removal. +- [nohuman_run](/developers/modules/nohuman_run) - Remove human reads from sequencing data. + +## Citations + +If you use this in your analysis, please cite the following. + +- [Bactopia](https://bactopia.github.io/) + Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) + +- [Kraken2](https://github.com/DerrickWood/kraken2) + Wood DE, Lu J, Langmead B [Improved metagenomic analysis with Kraken 2.](https://doi.org/10.1186/s13059-019-1891-0) *Genome Biology*, 20(1), 257. (2019) + +## Source + +[View source on GitHub](https://github.com/bactopia/bactopia/tree/main/subworkflows/nohuman) diff --git a/developers/subworkflows/panaroo.mdx b/developers/subworkflows/panaroo.mdx new file mode 100644 index 00000000..d61e1157 --- /dev/null +++ b/developers/subworkflows/panaroo.mdx @@ -0,0 +1,73 @@ +--- +title: panaroo +description: "Build a pangenome from GFF3 annotations using Panaroo." +tags: + - pangenome + - pan-genome + - comparative-genomics + - core-genome + - alignment + - run-scope +--- + +# panaroo + +**Tags:** pangenome pan-genome comparative-genomics core-genome alignment run-scope + +Build a pangenome from GFF3 annotations using Panaroo. + +This subworkflow creates a pangenome from bacterial genome annotations using [Panaroo](https://github.com/gtonkinhill/panaroo). +Panaroo is a pangenome pipeline that produces polished pangenomes by removing errors and +contamination from input annotations. It generates gene presence/absence matrices and core-genome +alignments suitable for downstream phylogenetic analysis. + +## Take + +``` +gff: Channel +``` + +| Field | Description | +|-------|-------------| +| `meta` | Groovy Record containing sample information | +| `gff` | Set of GFF3 annotation files representing the genomic annotations for each sample | + +## Emit + +### Published + +The `sample_outputs` and `run_outputs` emissions are aggregates of output files that will be published in the entry workflow. + +#### `sample_outputs` + +No sample-scope outputs. + +#### `run_outputs` + +| Output | Description | +|--------|-------------| +| `aln` | Core genome alignment in FASTA format (optional) | +| `filtered_aln` | Core genome alignment with recombinant regions filtered out (optional) | +| `csv` | Gene presence/absence matrix in Roary-compatible CSV format (optional) | +| `panaroo_csv` | Gene presence/absence matrix in Panaroo's native CSV format (optional) | +| `supplemental` | Directory containing Panaroo intermediate files and data structures | + +## Module Composition + +This subworkflow calls the following modules: + +- [panaroo_run](/developers/modules/panaroo_run) - Fast and scalable bacterial pangenome analysis using a graph-based approach. + +## Citations + +If you use this in your analysis, please cite the following. + +- [Bactopia](https://bactopia.github.io/) + Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) + +- [Panaroo](https://github.com/gtonkinhill/panaroo) + Tonkin-Hill G, MacAlasdair N, Ruis C, Weimann A, Horesh G, Lees JA, Gladstone RA, Lo S, Beaudoin C, Floto RA, Frost SDW, Corander J, Bentley SD, Parkhill J [Producing polished prokaryotic pangenomes with the Panaroo pipeline.](https://doi.org/10.1186/s13059-020-02090-4) _Genome Biology_ 21(1), 180. (2020) + +## Source + +[View source on GitHub](https://github.com/bactopia/bactopia/tree/main/subworkflows/panaroo) diff --git a/developers/subworkflows/pangenome.mdx b/developers/subworkflows/pangenome.mdx new file mode 100644 index 00000000..c7e73a1c --- /dev/null +++ b/developers/subworkflows/pangenome.mdx @@ -0,0 +1,125 @@ +--- +title: pangenome +description: "Perform pangenome analysis with optional core-genome phylogeny." +tags: + - alignment + - core-genome + - pan-genome + - phylogeny + - comparative-genomics + - run-scope +--- + +# pangenome + +**Tags:** alignment core-genome pan-genome phylogeny comparative-genomics run-scope + +Perform pangenome analysis with optional core-genome phylogeny. + +This subworkflow creates a pangenome from GFF3 annotation files using one of three +tools: [Panaroo](https://github.com/gtonkinhill/panaroo) (default), +[PIRATE](https://github.com/SionBayliss/PIRATE), or +[Roary](https://github.com/sanger-pathogens/roary). It generates core-genome alignments +and gene presence/absence matrices, followed by SNP distance calculations using +[snp-dists](https://github.com/tseemann/snp-dists). The workflow conditionally executes +the selected pangenome tool based on Boolean parameters. + +## Take + +``` +gff: Channel +``` + +| Field | Description | +|-------|-------------| +| `meta` | Groovy Record containing sample information | +| `gff` | Set of GFF3 annotation files from assembled genomes | + +``` +use_pirate: Boolean +use_roary: Boolean +``` + +| Name | Type | Description | +|------|------|-------------| +| `use_pirate` | `Boolean` | Boolean flag to use PIRATE for pangenome analysis | +| `use_roary` | `Boolean` | Boolean flag to use Roary for pangenome analysis | + +## Emit + +### Published + +The `sample_outputs` and `run_outputs` emissions are aggregates of output files that will be published in the entry workflow. + +#### `sample_outputs` + +No sample-scope outputs. + +#### `run_outputs` + +| Output | Description | +|--------|-------------| +| `aln` | Core-genome alignment in FASTA format | +| `csv` | Gene presence/absence matrix | +| `supplemental` | Intermediate files and detailed outputs | +| `tsv` | Pairwise SNP distance matrix from core-genome alignment | + +### Downstream Inputs + +The following emissions are meant to be used as inputs to downstream subworkflows. + +#### `alignment` + +| Output | Description | +|--------|-------------| +| `aln` | Core-genome alignment for downstream analysis (e.g., recombination detection) | + +#### `phylogeny_input` + +| Output | Description | +|--------|-------------| +| `aln` | Core-genome alignment with iqtree-ready meta for phylogeny construction | + +#### `csv` + +| Output | Description | +|--------|-------------| +| `csv` | Gene presence/absence matrix for downstream analysis (e.g., pan-GWAS) | + +## Subworkflow Composition + +This subworkflow calls the following subworkflows: + +- [pirate](/developers/subworkflows/pirate) - Build a pangenome from GFF3 annotations using PIRATE. +- [roary](/developers/subworkflows/roary) - Build a pangenome from GFF3 annotations using Roary. +- [panaroo](/developers/subworkflows/panaroo) - Build a pangenome from GFF3 annotations using Panaroo. +- [snpdists](/developers/subworkflows/snpdists) - Calculate pairwise SNP distances from sequence alignments. + +## Used By + +This subworkflow is used by the following workflows: + +- [pangenome](/bactopia-tools/pangenome) - Pangenome analysis with optional core-genome phylogeny. + +## Citations + +If you use this in your analysis, please cite the following. + +- [Bactopia](https://bactopia.github.io/) + Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) + +- [PIRATE](http://github.com/SionBayliss/PIRATE) + Bayliss SC, Thorpe HA, Coyle NM, Sheppard SK, Feil EJ [PIRATE: A fast and scalable pangenomics toolbox for clustering diverged orthologues in bacteria.](https://doi.org/10.1093/gigascience/giz119) _Gigascience_ 8 (2019) + +- [Panaroo](https://github.com/gtonkinhill/panaroo) + Tonkin-Hill G, MacAlasdair N, Ruis C, Weimann A, Horesh G, Lees JA, Gladstone RA, Lo S, Beaudoin C, Floto RA, Frost SDW, Corander J, Bentley SD, Parkhill J [Producing polished prokaryotic pangenomes with the Panaroo pipeline.](https://doi.org/10.1186/s13059-020-02090-4) _Genome Biology_ 21(1), 180. (2020) + +- [Roary](https://github.com/sanger-pathogens/Roary) + Page AJ, Cummins CA, Hunt M, Wong VK, Reuter S, Holden MTG, Fookes M, Falush D, Keane JA, Parkhill J [Roary: rapid large-scale prokaryote pan genome analysis.](https://doi.org/10.1093/bioinformatics/btv421) _Bioinformatics_ 31, 3691-3693 (2015) + +- [snp-dists](https://github.com/tseemann/snp-dists) + Seemann T [snp-dists - Pairwise SNP distance matrix from a FASTA sequence alignment.](https://github.com/tseemann/snp-dists) (GitHub) + +## Source + +[View source on GitHub](https://github.com/bactopia/bactopia/tree/main/subworkflows/pangenome) diff --git a/developers/subworkflows/pasty.mdx b/developers/subworkflows/pasty.mdx new file mode 100644 index 00000000..5c7f7557 --- /dev/null +++ b/developers/subworkflows/pasty.mdx @@ -0,0 +1,80 @@ +--- +title: pasty +description: "Predict serogroups of Pseudomonas aeruginosa from assemblies." +tags: + - pseudomonas-aeruginosa + - serogroup + - typing + - o-antigen + - prediction + - sample-scope +--- + +# pasty + +**Tags:** pseudomonas-aeruginosa serogroup typing o-antigen prediction sample-scope + +Predict serogroups of Pseudomonas aeruginosa from assemblies. + +This subworkflow uses [Pasty](https://github.com/rpetit3/pasty) to perform in silico +serogrouping of *Pseudomonas aeruginosa* isolates from assembled genomes. It identifies +O-antigen biosynthesis genes to classify isolates into their known serogroups using +BLAST-based homology searches. + +## Take + +``` +assembly: Channel +``` + +| Field | Description | +|-------|-------------| +| `meta` | Groovy Record containing sample information | +| `assembly` | Assembled contigs in FASTA format | + +## Emit + +### Published + +The `sample_outputs` and `run_outputs` emissions are aggregates of output files that will be published in the entry workflow. + +#### `sample_outputs` + +| Output | Description | +|--------|-------------| +| `tsv` | A tab-delimited summary file with the predicted O-antigen serogroup | +| `blast` | A tab-delimited file of all raw BLAST hits used for the prediction | +| `details` | A tab-delimited file with detailed gene hits for each serogroup tested | + +#### `run_outputs` + +| Output | Description | +|--------|-------------| +| `csv` | Aggregated results in CSV format | + +## Module Composition + +This subworkflow calls the following modules: + +- [pasty](/developers/modules/pasty) - Predict O-antigen serogroup of Pseudomonas aeruginosa isolates. +- [csvtk_concat](/developers/modules/csvtk_concat) - Concatenate multiple CSV or TSV files into a single table. + +## Used By + +This subworkflow is used by the following workflows: + +- [pasty](/bactopia-tools/pasty) - In silico serogrouping of Pseudomonas aeruginosa isolates. + +## Citations + +If you use this in your analysis, please cite the following. + +- [Bactopia](https://bactopia.github.io/) + Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) + +- [pasty](https://github.com/rpetit3/pasty) + Petit III RA [pasty: in silico serogrouping of _Pseudomonas aeruginosa_ isolates](https://github.com/rpetit3/pasty) (GitHub) + +## Source + +[View source on GitHub](https://github.com/bactopia/bactopia/tree/main/subworkflows/pasty) diff --git a/developers/subworkflows/pbptyper.mdx b/developers/subworkflows/pbptyper.mdx new file mode 100644 index 00000000..702b6117 --- /dev/null +++ b/developers/subworkflows/pbptyper.mdx @@ -0,0 +1,79 @@ +--- +title: pbptyper +description: "Predict penicillin binding protein (PBP) types of Streptococcus pneumoniae from genome assemblies." +tags: + - streptococcus-pneumoniae + - pbp-typing + - penicillin + - antimicrobial-resistance + - sample-scope +--- + +# pbptyper + +**Tags:** streptococcus-pneumoniae pbp-typing penicillin antimicrobial-resistance sample-scope + +Predict penicillin binding protein (PBP) types of Streptococcus pneumoniae from genome assemblies. + +This subworkflow uses [pbptyper](https://github.com/rpetit3/pbptyper) to predict +the penicillin binding protein (PBP) types and predict antimicrobial susceptibility +of *Streptococcus pneumoniae* strains from assembled genomes. It processes each sample +individually and aggregates the results into a single consolidated report. + +## Take + +``` +assembly: Channel +``` + +| Field | Description | +|-------|-------------| +| `meta` | Groovy Record containing sample information | +| `assembly` | Assembled contigs in FASTA format | + +## Emit + +### Published + +The `sample_outputs` and `run_outputs` emissions are aggregates of output files that will be published in the entry workflow. + +#### `sample_outputs` + +| Output | Description | +|--------|-------------| +| `tsv` | A tab-delimited summary file with the predicted PBP type for each gene | +| `blast` | A tab-delimited file of the raw TBLASTN hits used for gene identification | +| `details` | Detailed PBP typing results for each gene analyzed | + +#### `run_outputs` + +| Output | Description | +|--------|-------------| +| `csv` | Aggregated results in CSV format | + +## Module Composition + +This subworkflow calls the following modules: + +- [pbptyper](/developers/modules/pbptyper) - Predict Penicillin Binding Protein (PBP) type of *Streptococcus pneumoniae* assemblies. +- [csvtk_concat](/developers/modules/csvtk_concat) - Concatenate multiple CSV or TSV files into a single table. + +## Used By + +This subworkflow is used by the following workflows: + +- [pbptyper](/bactopia-tools/pbptyper) - Penicillin Binding Protein (PBP) typing for Streptococcus pneumoniae. + +## Citations + +If you use this in your analysis, please cite the following. + +- [Bactopia](https://bactopia.github.io/) + Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) + +- [pbptyper](https://github.com/rpetit3/pbptyper) + Petit III RA [pbptyper: In silico Penicillin Binding Protein (PBP) typer for _Streptococcus pneumoniae_ assemblies](https://github.com/rpetit3/pbptyper) (GitHub) + +## Source + +[View source on GitHub](https://github.com/bactopia/bactopia/tree/main/subworkflows/pbptyper) diff --git a/developers/subworkflows/phispy.mdx b/developers/subworkflows/phispy.mdx new file mode 100644 index 00000000..a8dd567a --- /dev/null +++ b/developers/subworkflows/phispy.mdx @@ -0,0 +1,78 @@ +--- +title: phispy +description: "Prediction of prophages from bacterial genomes." +tags: + - prophage + - phage + - bacterial + - genome + - mobile-genetic-elements + - sample-scope +--- + +# phispy + +**Tags:** prophage phage bacterial genome mobile-genetic-elements sample-scope + +Prediction of prophages from bacterial genomes. + +This subworkflow identifies prophages in bacterial genomes using [PhiSpy](https://github.com/linsalrob/PhiSpy), +which combines similarity-based and composition-based strategies for accurate detection. +The tool identifies integrated phage sequences, extracts bacterial and phage regions, +and provides comprehensive annotation including GFF format for downstream analysis. + +## Take + +``` +gbff: Channel +``` + +| Name | Description | +|------|-------------| +| `gbff` | Annotated bacterial genomes in GenBank format for prophage prediction | + +## Emit + +### Published + +The `sample_outputs` and `run_outputs` emissions are aggregates of output files that will be published in the entry workflow. + +#### `sample_outputs` + +| Output | Description | +|--------|-------------| +| `tsv` | Coordinates (start/end) of each predicted prophage region in the genome | +| `supplemental` | Directory containing detailed prophage information, sequences, and annotations | + +#### `run_outputs` + +| Output | Description | +|--------|-------------| +| `csv` | Merged prophage prediction results from all samples | + +## Module Composition + +This subworkflow calls the following modules: + +- [csvtk_concat](/developers/modules/csvtk_concat) - Concatenate multiple CSV or TSV files into a single table. +- [phispy](/developers/modules/phispy) - Predict prophage regions integrated into bacterial genomes. + +## Used By + +This subworkflow is used by the following workflows: + +- [phispy](/bactopia-tools/phispy) - Prediction of prophages in bacterial and archaeal genomes. + +## Citations + +If you use this in your analysis, please cite the following. + +- [Bactopia](https://bactopia.github.io/) + Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) + +- [PhiSpy](https://github.com/linsalrob/PhiSpy) + Akhter S, Aziz RK, and Edwards RA [PhiSpy: a novel algorithm for finding prophages in bacterial genomes that combines similarity- and composition-based strategies.](https://doi.org/10.1093/nar/gks406) _Nucleic Acids Research_, 40(16), e126. (2012) + +## Source + +[View source on GitHub](https://github.com/bactopia/bactopia/tree/main/subworkflows/phispy) diff --git a/developers/subworkflows/pirate.mdx b/developers/subworkflows/pirate.mdx new file mode 100644 index 00000000..94d58389 --- /dev/null +++ b/developers/subworkflows/pirate.mdx @@ -0,0 +1,71 @@ +--- +title: pirate +description: "Build a pangenome from GFF3 annotations using PIRATE." +tags: + - pangenome + - pan-genome + - comparative-genomics + - core-genome + - alignment + - run-scope +--- + +# pirate + +**Tags:** pangenome pan-genome comparative-genomics core-genome alignment run-scope + +Build a pangenome from GFF3 annotations using PIRATE. + +This subworkflow creates a pangenome from bacterial genome annotations using [PIRATE](https://github.com/SionBayliss/PIRATE). +PIRATE is a scalable pangenome toolbox that clusters orthologous genes at multiple identity thresholds. +It is particularly useful for highly diverse datasets as it can handle divergent gene families +and provides flexible clustering options for different analytical needs. + +## Take + +``` +gff: Channel +``` + +| Field | Description | +|-------|-------------| +| `meta` | Groovy Record containing sample information | +| `gff` | Set of GFF3 annotation files representing the genomic annotations for each sample | + +## Emit + +### Published + +The `sample_outputs` and `run_outputs` emissions are aggregates of output files that will be published in the entry workflow. + +#### `sample_outputs` + +No sample-scope outputs. + +#### `run_outputs` + +| Output | Description | +|--------|-------------| +| `aln` | Core genome alignment in FASTA format (optional) | +| `csv` | Gene presence/absence matrix in CSV format | +| `supplemental` | Directory containing PIRATE intermediate files and detailed outputs | + +## Module Composition + +This subworkflow calls the following modules: + +- [pirate](/developers/modules/pirate) - Pangenome Identification and Reconciliation Analysis Tool for Epidemiology (PIRATE). + +## Citations + +If you use this in your analysis, please cite the following. + +- [Bactopia](https://bactopia.github.io/) + Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) + +- [PIRATE](http://github.com/SionBayliss/PIRATE) + Bayliss SC, Thorpe HA, Coyle NM, Sheppard SK, Feil EJ [PIRATE: A fast and scalable pangenomics toolbox for clustering diverged orthologues in bacteria.](https://doi.org/10.1093/gigascience/giz119) _Gigascience_ 8 (2019) + +## Source + +[View source on GitHub](https://github.com/bactopia/bactopia/tree/main/subworkflows/pirate) diff --git a/developers/subworkflows/plasmidfinder.mdx b/developers/subworkflows/plasmidfinder.mdx new file mode 100644 index 00000000..bb6f6446 --- /dev/null +++ b/developers/subworkflows/plasmidfinder.mdx @@ -0,0 +1,82 @@ +--- +title: plasmidfinder +description: "Identify plasmid replicons in bacterial genome assemblies." +tags: + - plasmid + - replicon + - typing + - antimicrobial-resistance + - mobilome + - sample-scope +--- + +# plasmidfinder + +**Tags:** plasmid replicon typing antimicrobial-resistance mobilome sample-scope + +Identify plasmid replicons in bacterial genome assemblies. + +This subworkflow uses [PlasmidFinder](https://bitbucket.org/genomicepidemiology/plasmidfinder) to +identify plasmid replicons in bacterial genome assemblies. It screens assemblies against +the PlasmidFinder database to detect known plasmid replicon types and provides detailed +results including hit sequences and classification information. + +## Take + +``` +assembly: Channel +``` + +| Field | Description | +|-------|-------------| +| `meta` | Groovy Record containing sample information | +| `assembly` | Assembled contigs in FASTA format | + +## Emit + +### Published + +The `sample_outputs` and `run_outputs` emissions are aggregates of output files that will be published in the entry workflow. + +#### `sample_outputs` + +| Output | Description | +|--------|-------------| +| `json` | PlasmidFinder results in JSON format | +| `txt` | PlasmidFinder results in text format | +| `tsv` | Tab-delimited PlasmidFinder results with replicon typing information | +| `genome_seq` | FASTA sequences of plasmid hits found in the genome (gzipped) | +| `plasmid_seq` | Reference plasmid sequences matched (gzipped) | + +#### `run_outputs` + +| Output | Description | +|--------|-------------| +| `csv` | Aggregated results in CSV format | + +## Module Composition + +This subworkflow calls the following modules: + +- [plasmidfinder](/developers/modules/plasmidfinder) - Identify plasmid replicon types in bacterial sequences and assemblies. +- [csvtk_concat](/developers/modules/csvtk_concat) - Concatenate multiple CSV or TSV files into a single table. + +## Used By + +This subworkflow is used by the following workflows: + +- [plasmidfinder](/bactopia-tools/plasmidfinder) - Bactopia Tool: Plasmidfinder. + +## Citations + +If you use this in your analysis, please cite the following. + +- [Bactopia](https://bactopia.github.io/) + Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) + +- [PlasmidFinder](https://bitbucket.org/genomicepidemiology/plasmidfinder) + Carattoli A, Zankari E, García-Fernández A, Voldby Larsen M, Lund O, Villa L, Møller Aarestrup F, Hasman H [In silico detection and typing of plasmids using PlasmidFinder and plasmid multilocus sequence typing.](https://doi.org/10.1128/AAC.02412-14) _Antimicrobial Agents and Chemotherapy_ 58(7), 3895-3903. (2014) + +## Source + +[View source on GitHub](https://github.com/bactopia/bactopia/tree/main/subworkflows/plasmidfinder) diff --git a/developers/subworkflows/pneumocat.mdx b/developers/subworkflows/pneumocat.mdx new file mode 100644 index 00000000..ad87b99c --- /dev/null +++ b/developers/subworkflows/pneumocat.mdx @@ -0,0 +1,81 @@ +--- +title: pneumocat +description: "Perform capsular typing of Streptococcus pneumoniae from NGS data." +tags: + - streptococcus-pneumoniae + - serotype + - capsular-typing + - typing + - sample-scope +--- + +# pneumocat + +**Tags:** streptococcus-pneumoniae serotype capsular-typing typing sample-scope + +Perform capsular typing of Streptococcus pneumoniae from NGS data. + +This subworkflow uses [PneumoCaT](https://github.com/ukhsa-collaboration/PneumoCaT) to +identify serotype-specific capsular loci and determine serotypes from next-generation +sequencing data. It provides comprehensive serotype determination including coverage +statistics and confidence scores for each sample. + +Uses explicit positional record fields for reads: +- Input: record(meta, r1, r2, se, lr) where each read slot is Path? + +## Take + +``` +reads: Channel +``` + +| Field | Description | +|-------|-------------| +| `meta` | Groovy Record containing sample information | +| `r1` | Illumina R1 reads (paired-end) | +| `r2` | Illumina R2 reads (paired-end) | +| `se` | Single-end Illumina reads (not supported by PneumoCaT) | +| `lr` | Long reads (not supported by PneumoCaT) | + +## Emit + +### Published + +The `sample_outputs` and `run_outputs` emissions are aggregates of output files that will be published in the entry workflow. + +#### `sample_outputs` + +| Output | Description | +|--------|-------------| +| `xml` | The PneumoCaT result files in XML format | +| `txt` | A file containing the coverage information across the genes | + +#### `run_outputs` + +No run-scope outputs. + +## Module Composition + +This subworkflow calls the following modules: + +- [pneumocat](/developers/modules/pneumocat) - Capsular typing of Streptococcus pneumoniae from Illumina reads. + +## Used By + +This subworkflow is used by the following workflows: + +- [pneumocat](/bactopia-tools/pneumocat) - Capsular type assignment to Streptococcus pneumoniae from sequence reads. + +## Citations + +If you use this in your analysis, please cite the following. + +- [Bactopia](https://bactopia.github.io/) + Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) + +- [PneumoCaT](https://github.com/ukhsa-collaboration/PneumoCaT) + Kapatai G, Sheppard CL, Al-Shahib A, Litt DJ, Underwood AP, Harrison TG, and Fry NK [Whole genome sequencing of Streptococcus pneumoniae: development, evaluation and verification of targets for serogroup and serotype prediction using an automated pipeline.](https://doi.org/10.7717/peerj.2477) PeerJ, 4, e2477. (2016) + +## Source + +[View source on GitHub](https://github.com/bactopia/bactopia/tree/main/subworkflows/pneumocat) diff --git a/developers/subworkflows/prokka.mdx b/developers/subworkflows/prokka.mdx new file mode 100644 index 00000000..7db3fe49 --- /dev/null +++ b/developers/subworkflows/prokka.mdx @@ -0,0 +1,117 @@ +--- +title: prokka +description: "Annotate bacterial genomes with functional information." +tags: + - bacteria + - annotation + - genome + - prokaryote + - functional-annotation + - genes + - sample-scope +--- + +# prokka + +**Tags:** bacteria annotation genome prokaryote functional-annotation genes sample-scope + +Annotate bacterial genomes with functional information. + +This subworkflow annotates bacterial assemblies using [Prokka](https://github.com/tseemann/prokka). +It rapidly calls genes, translates them, and searches them against multiple protein databases +to produce comprehensive annotation in various standard formats. Optional protein sequences +and Prodigal training files can be provided to improve annotation accuracy. + +## Take + +``` +assembly: Channel +``` + +| Field | Description | +|-------|-------------| +| `meta` | Groovy Record containing sample information | +| `assembly` | Bacterial assembly files in FASTA format to be annotated | + +``` +proteins: Path? +prodigal_tf: Path? +``` + +| Name | Type | Description | +|------|------|-------------| +| `proteins` | `Path?` | Optional protein sequences for homology search to improve annotation accuracy | +| `prodigal_tf` | `Path?` | Optional Prodigal training file for improved gene prediction accuracy | + +## Emit + +### Published + +The `sample_outputs` and `run_outputs` emissions are aggregates of output files that will be published in the entry workflow. + +#### `sample_outputs` + +| Output | Description | +|--------|-------------| +| `gff` | Annotation in GFF3 format, containing both sequences and annotations | +| `gbff` | Annotation in GenBank format, containing both sequences and annotations | +| `fna` | Nucleotide FASTA file of the input contig sequences | +| `faa` | Protein FASTA file of the translated CDS sequences | +| `ffn` | Nucleotide FASTA file of all prediction transcripts (CDS, rRNA, tRNA, tmRNA, misc_RNA) | +| `sqn` | An ASN1 format "Sequin" file for submission to GenBank | +| `fsa` | Nucleotide FASTA file of the input contig sequences, used by tbl2asn | +| `tbl` | Feature Table file for NCBI submission | +| `txt` | Summary statistics relating to the annotated features found | +| `tsv` | Tab-separated file of all features | +| `blastdb` | A compressed tar.gz archive of BLAST+ databases | + +#### `run_outputs` + +No run-scope outputs. + +### Downstream Inputs + +The following emissions are meant to be used as inputs to downstream subworkflows. + +#### `annotations` + +| Output | Description | +|--------|-------------| +| `fna` | Annotated nucleotide sequences in FASTA format | +| `faa` | Protein sequences in FASTA format | +| `gff` | Annotations in GFF3 format | + +#### `gffs` + +| Output | Description | +|--------|-------------| +| `gff` | GFF3 annotation file for pangenome analysis | + +## Module Composition + +This subworkflow calls the following modules: + +- [prokka](/developers/modules/prokka) - Annotate prokaryotic genomes. + +## Used By + +This subworkflow is used by the following workflows: + +- [bactopia](/full-guide) - Comprehensive bacterial analysis pipeline for complete genomic characterization. +- [pangenome](/bactopia-tools/pangenome) - Pangenome analysis with optional core-genome phylogeny. +- [prokka](/bactopia-tools/prokka) - Rapid whole genome annotation of bacterial, archaeal, and viral genomes. +- [staphopia](/bactopia-pipelines/staphopia) - Comprehensive analysis pipeline for Staphylococcus aureus isolates. + +## Citations + +If you use this in your analysis, please cite the following. + +- [Bactopia](https://bactopia.github.io/) + Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) + +- [Prokka](https://github.com/tseemann/prokka) + Seemann T [Prokka: rapid prokaryotic genome annotation](http://dx.doi.org/10.1093/bioinformatics/btu153) _Bioinformatics_ 30, 2068-2069 (2014) + +## Source + +[View source on GitHub](https://github.com/bactopia/bactopia/tree/main/subworkflows/prokka) diff --git a/developers/subworkflows/quast.mdx b/developers/subworkflows/quast.mdx new file mode 100644 index 00000000..f5943d26 --- /dev/null +++ b/developers/subworkflows/quast.mdx @@ -0,0 +1,82 @@ +--- +title: quast +description: "Evaluate assembly quality using QUAST." +tags: + - assembly + - quality + - assessment + - metrics + - n50 + - evaluation + - sample-scope +--- + +# quast + +**Tags:** assembly quality assessment metrics n50 evaluation sample-scope + +Evaluate assembly quality using QUAST. + +This subworkflow assesses genome assembly quality using [QUAST](https://quast.sourceforge.net/) +(Quality Assessment Tool for Genome Assemblies). It provides comprehensive metrics +including N50, L50, GC content, total length, and other quality statistics. The workflow +generates both individual sample reports and a combined summary for comparative analysis +across all assemblies. + +## Take + +``` +fasta: Channel +``` + +| Field | Description | +|-------|-------------| +| `meta` | Groovy Record containing sample information | +| `assembly` | Assembled contigs in FASTA format (Path) | +| `meta_file` | Meta file containing reference size information (Path) | + +## Emit + +### Published + +The `sample_outputs` and `run_outputs` emissions are aggregates of output files that will be published in the entry workflow. + +#### `sample_outputs` + +| Output | Description | +|--------|-------------| +| `tsv` | Transposed report in TSV format | +| `supplemental` | Supplemental files including plots and HTML reports | + +#### `run_outputs` + +| Output | Description | +|--------|-------------| +| `csv` | Aggregated results in CSV format | + +## Module Composition + +This subworkflow calls the following modules: + +- [csvtk_concat](/developers/modules/csvtk_concat) - Concatenate multiple CSV or TSV files into a single table. +- [quast](/developers/modules/quast) - Quality Assessment Tool for Genome Assemblies. + +## Used By + +This subworkflow is used by the following workflows: + +- [quast](/bactopia-tools/quast) - Quality assessment of assembled contigs using QUAST. + +## Citations + +If you use this in your analysis, please cite the following. + +- [Bactopia](https://bactopia.github.io/) + Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) + +- [QUAST](http://quast.sourceforge.net/) + Gurevich A, Saveliev V, Vyahhi N, Tesler G [QUAST: quality assessment tool for genome assemblies.](http://dx.doi.org/10.1093/bioinformatics/btt086) _Bioinformatics_ 29, 1072-1075 (2013) + +## Source + +[View source on GitHub](https://github.com/bactopia/bactopia/tree/main/subworkflows/quast) diff --git a/developers/subworkflows/rgi.mdx b/developers/subworkflows/rgi.mdx new file mode 100644 index 00000000..0967db10 --- /dev/null +++ b/developers/subworkflows/rgi.mdx @@ -0,0 +1,79 @@ +--- +title: rgi +description: "Predict antimicrobial resistance from protein or nucleotide data." +tags: + - bacteria + - assembly + - antimicrobial-resistance + - resistome + - homology + - sample-scope +--- + +# rgi + +**Tags:** bacteria assembly antimicrobial-resistance resistome homology sample-scope + +Predict antimicrobial resistance from protein or nucleotide data. + +This subworkflow uses the [Resistance Gene Identifier (RGI)](https://github.com/arpcard/rgi) to predict +resistomes based on homology and SNP models. It includes analysis of resistance genes, +creation of summary visualizations, and aggregation of results across samples. + +## Take + +``` +assembly: Channel +``` + +| Field | Description | +|-------|-------------| +| `meta` | Groovy Record containing sample information | +| `assembly` | Assembled contigs in FASTA format for resistome prediction | + +## Emit + +### Published + +The `sample_outputs` and `run_outputs` emissions are aggregates of output files that will be published in the entry workflow. + +#### `sample_outputs` + +| Output | Description | +|--------|-------------| +| `tsv` | RGI results in tab-separated format | +| `json` | RGI results in JSON format (optional) | + +#### `run_outputs` + +| Output | Description | +|--------|-------------| +| `csv` | Aggregated results in CSV format | + +## Module Composition + +This subworkflow calls the following modules: + +- [rgi_main](/developers/modules/rgi_main) - Predict antibiotic resistance from assemblies. +- [rgi_heatmap](/developers/modules/rgi_heatmap) - Create heatmaps of resistance gene presence/absence. +- [csvtk_concat](/developers/modules/csvtk_concat) - Concatenate multiple CSV or TSV files into a single table. + +## Used By + +This subworkflow is used by the following workflows: + +- [rgi](/bactopia-tools/rgi) - Prediction of antibiotic resistance genes using RGI. + +## Citations + +If you use this in your analysis, please cite the following. + +- [Bactopia](https://bactopia.github.io/) + Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) + +- [Resistance Gene Identifier (RGI)](https://github.com/arpcard/rgi) + Alcock BP, Raphenya AR, Lau TTY, Tsang KK, Bouchard M, Edalatmand A, Huynh W, Nguyen A-L V, Cheng AA, Liu S, Min SY, Miroshnichenko A, Tran H-K, Werfalli RE, Nasir JA, Oloni M, Speicher DJ, Florescu A, Singh B, Faltyn M, Hernandez-Koutoucheva A, Sharma AN, Bordeleau E, Pawlowski AC, Zubyk HL, Dooley D, Griffiths E, Maguire F, Winsor GL, Beiko RG, Brinkman FSL, Hsiao WWL, Domselaar GV, McArthur AG [CARD 2020: antibiotic resistome surveillance with the comprehensive antibiotic resistance database.](https://doi.org/10.1093/nar/gkz935) _Nucleic acids research_ 48.D1, D517-D525 (2020) + +## Source + +[View source on GitHub](https://github.com/bactopia/bactopia/tree/main/subworkflows/rgi) diff --git a/developers/subworkflows/roary.mdx b/developers/subworkflows/roary.mdx new file mode 100644 index 00000000..3856b4f8 --- /dev/null +++ b/developers/subworkflows/roary.mdx @@ -0,0 +1,71 @@ +--- +title: roary +description: "Build a pangenome from GFF3 annotations using Roary." +tags: + - pangenome + - pan-genome + - comparative-genomics + - core-genome + - alignment + - run-scope +--- + +# roary + +**Tags:** pangenome pan-genome comparative-genomics core-genome alignment run-scope + +Build a pangenome from GFF3 annotations using Roary. + +This subworkflow creates a pangenome from bacterial genome annotations using [Roary](https://github.com/sanger-pathogens/Roary). +Roary is a rapid pangenome pipeline that processes large numbers of annotated genomes to produce +gene presence/absence matrices and core-genome alignments. It is particularly optimized for +bacterial datasets and can handle hundreds of genomes efficiently. + +## Take + +``` +gff: Channel +``` + +| Field | Description | +|-------|-------------| +| `meta` | Groovy Record containing sample information | +| `gff` | Set of GFF3 annotation files representing the genomic annotations for each sample | + +## Emit + +### Published + +The `sample_outputs` and `run_outputs` emissions are aggregates of output files that will be published in the entry workflow. + +#### `sample_outputs` + +No sample-scope outputs. + +#### `run_outputs` + +| Output | Description | +|--------|-------------| +| `aln` | Core genome alignment in FASTA format (optional) | +| `csv` | Gene presence/absence table | +| `supplemental` | Supplemental files including accessory binary genes and graphs | + +## Module Composition + +This subworkflow calls the following modules: + +- [roary](/developers/modules/roary) - Rapid large-scale prokaryote pan genome analysis. + +## Citations + +If you use this in your analysis, please cite the following. + +- [Bactopia](https://bactopia.github.io/) + Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) + +- [Roary](https://github.com/sanger-pathogens/Roary) + Page AJ, Cummins CA, Hunt M, Wong VK, Reuter S, Holden MTG, Fookes M, Falush D, Keane JA, Parkhill J [Roary: rapid large-scale prokaryote pan genome analysis.](https://doi.org/10.1093/bioinformatics/btv421) _Bioinformatics_ 31, 3691-3693 (2015) + +## Source + +[View source on GitHub](https://github.com/bactopia/bactopia/tree/main/subworkflows/roary) diff --git a/developers/subworkflows/sccmec.mdx b/developers/subworkflows/sccmec.mdx new file mode 100644 index 00000000..bae48a9a --- /dev/null +++ b/developers/subworkflows/sccmec.mdx @@ -0,0 +1,82 @@ +--- +title: sccmec +description: "Identify SCCmec elements in Staphylococcus aureus genomes." +tags: + - sccmec + - staphylococcus-aureus + - mrsa + - antimicrobial-resistance + - typing + - sample-scope +--- + +# sccmec + +**Tags:** sccmec staphylococcus-aureus mrsa antimicrobial-resistance typing sample-scope + +Identify SCCmec elements in Staphylococcus aureus genomes. + +This subworkflow uses [SCCmec](https://github.com/rpetit3/sccmec) to identify the +Staphylococcal Cassette Chromosome mec (SCCmec) element in *Staphylococcus aureus* +assemblies. It predicts the type based on the presence of specific *mec* and *ccr* +gene complexes, generating detailed BLAST results and typing information. + +## Take + +``` +assembly: Channel +``` + +| Field | Description | +|-------|-------------| +| `meta` | Groovy Record containing sample information | +| `assembly` | Assembled contigs in FASTA format | + +## Emit + +### Published + +The `sample_outputs` and `run_outputs` emissions are aggregates of output files that will be published in the entry workflow. + +#### `sample_outputs` + +| Output | Description | +|--------|-------------| +| `tsv` | Main results file with SCCmec typing | +| `targets` | BLAST results for target sequences | +| `target_details` | Detailed results for target matches | +| `regions` | BLAST results for SCCmec regions | +| `regions_details` | Detailed results for SCCmec region matches | + +#### `run_outputs` + +| Output | Description | +|--------|-------------| +| `csv` | Aggregated results in CSV format | + +## Module Composition + +This subworkflow calls the following modules: + +- [sccmec](/developers/modules/sccmec) - Identify SCCmec elements in Staphylococcus aureus genomes. +- [csvtk_concat](/developers/modules/csvtk_concat) - Concatenate multiple CSV or TSV files into a single table. + +## Used By + +This subworkflow is used by the following workflows: + +- [sccmec](/bactopia-tools/sccmec) - Typing of SCCmec cassettes in Staphylococcus aureus assemblies. + +## Citations + +If you use this in your analysis, please cite the following. + +- [Bactopia](https://bactopia.github.io/) + Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) + +- [sccmec](https://github.com/rpetit3/sccmec) + Petit III RA, Read TD [sccmec: A tool for typing SCCmec cassettes in assemblies](https://github.com/rpetit3/sccmec) (GitHub) + +## Source + +[View source on GitHub](https://github.com/bactopia/bactopia/tree/main/subworkflows/sccmec) diff --git a/developers/subworkflows/scoary.mdx b/developers/subworkflows/scoary.mdx new file mode 100644 index 00000000..640e2808 --- /dev/null +++ b/developers/subworkflows/scoary.mdx @@ -0,0 +1,82 @@ +--- +title: scoary +description: "Pan-genome wide association studies." +tags: + - gwas + - association + - pan-genome + - traits + - statistical + - run-scope +--- + +# scoary + +**Tags:** gwas association pan-genome traits statistical run-scope + +Pan-genome wide association studies. + +This subworkflow performs genome-wide association studies (GWAS) on pan-genome data +using [Scoary](https://github.com/AdmiralenOla/Scoary). The tool identifies genes +associated with binary traits such as pathogenicity, host specificity, or antibiotic +resistance. It calculates statistical associations between gene presence/absence +and phenotypic traits across multiple bacterial isolates. + +## Take + +``` +csv: Channel +``` + +| Field | Description | +|-------|-------------| +| `meta` | Groovy Record containing sample information | +| `csv` | Gene presence/absence matrix from pan-genome analysis in CSV format | + +``` +traits: Path? +``` + +| Name | Type | Description | +|------|------|-------------| +| `traits` | `Path?` | Trait file containing binary phenotypic characteristics for each isolate (optional) | + +## Emit + +### Published + +The `sample_outputs` and `run_outputs` emissions are aggregates of output files that will be published in the entry workflow. + +#### `sample_outputs` + +No sample-scope outputs. + +#### `run_outputs` + +No run-scope outputs. + +## Module Composition + +This subworkflow calls the following modules: + +- [scoary](/developers/modules/scoary) - Pan-genome wide association studies. + +## Used By + +This subworkflow is used by the following workflows: + +- [pangenome](/bactopia-tools/pangenome) - Pangenome analysis with optional core-genome phylogeny. + +## Citations + +If you use this in your analysis, please cite the following. + +- [Bactopia](https://bactopia.github.io/) + Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) + +- [Scoary](https://github.com/AdmiralenOla/Scoary) + Brynildsrud O, Bohlin J, Scheffer L, Eldholm V [Rapid scoring of genes in microbial pan-genome-wide association studies with Scoary.](https://doi.org/10.1186/s13059-016-1108-8) _Genome Biol._ 17:238 (2016) + +## Source + +[View source on GitHub](https://github.com/bactopia/bactopia/tree/main/subworkflows/scoary) diff --git a/developers/subworkflows/scrubber.mdx b/developers/subworkflows/scrubber.mdx new file mode 100644 index 00000000..53e6a491 --- /dev/null +++ b/developers/subworkflows/scrubber.mdx @@ -0,0 +1,139 @@ +--- +title: scrubber +description: "Remove contaminant sequences from metagenomic data." +tags: + - metagenomics + - decontamination + - human-removal + - read-filtering + - sample-scope +--- + +# scrubber + +**Tags:** metagenomics decontamination human-removal read-filtering sample-scope + +Remove contaminant sequences from metagenomic data. + +This subworkflow removes human and other contaminant sequences from metagenomic reads using either +the [SRA Human Scrubber](https://github.com/ncbi/sra-human-scrubber) or [nohuman](https://github.com/mbhall88/nohuman) +with the HPRC human database. It provides flexible contamination removal with detailed reporting +and aggregates results across multiple samples. + +## Take + +``` +reads: Channel +``` + +| Field | Description | +|-------|-------------| +| `meta` | Groovy Record containing sample information | +| `r1` | Illumina R1 reads (paired-end) | +| `r2` | Illumina R2 reads (paired-end) | +| `se` | Single-end Illumina reads | +| `lr` | Long reads (ONT/PacBio) | + +``` +use_srascrubber: Boolean +nohuman_db: Path? +download_nohuman: Boolean +nohuman_save_as_tarball: Boolean +``` + +| Name | Type | Description | +|------|------|-------------| +| `use_srascrubber` | `Boolean` | Boolean flag to choose between SRA Human Scrubber (true) or nohuman (false) for decontamination. | +| `nohuman_db` | `Path?` | Path to nohuman database directory or tarball (used when use_srascrubber is false) | +| `download_nohuman` | `Boolean` | Boolean flag to download the nohuman database instead of using the provided path | +| `nohuman_save_as_tarball` | `Boolean` | Boolean flag to save downloaded nohuman database as tarball | + +## Emit + +### Published + +The `sample_outputs` and `run_outputs` emissions are aggregates of output files that will be published in the entry workflow. + +#### `sample_outputs` + +| Output | Description | +|--------|-------------| +| `special_meta` | Simplified metadata record for downstream report joining | +| `r1` | Scrubbed paired-end forward reads | +| `r2` | Scrubbed paired-end reverse reads | +| `se` | Scrubbed single-end reads | +| `lr` | Scrubbed long reads | +| `scrub_report` | Contamination removal statistics report | + +#### `run_outputs` + +| Output | Description | +|--------|-------------| +| `csv` | Aggregated contamination reports across all samples | + +### Downstream Inputs + +The following emissions are meant to be used as inputs to downstream subworkflows. + +#### `scrubbed` + +| Output | Description | +|--------|-------------| +| `r1` | Scrubbed paired-end forward reads | +| `r2` | Scrubbed paired-end reverse reads | +| `se` | Scrubbed single-end reads | +| `lr` | Scrubbed long reads | + +#### `scrubbed_extra` + +| Output | Description | +|--------|-------------| +| `r1` | Scrubbed paired-end forward reads | +| `r2` | Scrubbed paired-end reverse reads | +| `se` | Scrubbed single-end reads | +| `lr` | Scrubbed long reads | +| `fna` | Assembly file (passed through) | + +#### `special_tsv` + +| Output | Description | +|--------|-------------| +| `special_meta` | Simplified metadata record for downstream report joining | +| `scrub_report` | Contamination removal statistics report | + +## Subworkflow Composition + +This subworkflow calls the following subworkflows: + +- [srahumanscrubber](/developers/subworkflows/srahumanscrubber) - Remove human contamination from sequencing reads for SRA submission. +- [nohuman](/developers/subworkflows/nohuman) - Remove human reads from sequencing data using nohuman. + +## Module Composition + +This subworkflow calls the following modules: + +- [csvtk_concat](/developers/modules/csvtk_concat) - Concatenate multiple CSV or TSV files into a single table. + +## Used By + +This subworkflow is used by the following workflows: + +- [cleanyerreads](/bactopia-pipelines/cleanyerreads) - Quality control and optional host read removal from raw sequencing reads. +- [scrubber](/bactopia-tools/scrubber) - Removal of human and contaminant sequences from metagenomic reads. + +## Citations + +If you use this in your analysis, please cite the following. + +- [Bactopia](https://bactopia.github.io/) + Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) + +- [Kraken2](https://github.com/DerrickWood/kraken2) + Wood DE, Lu J, Langmead B [Improved metagenomic analysis with Kraken 2.](https://doi.org/10.1186/s13059-019-1891-0) *Genome Biology*, 20(1), 257. (2019) + +- [SRA Human Scrubber](https://github.com/ncbi/sra-human-scrubber) + Katz KS, Shutov O, Lapoint R, Kimelman M, Brister JR, and O'Sullivan C [STAT: a fast, scalable, MinHash-based k-mer tool to assess Sequence Read Archive next-generation sequence submissions.](https://doi.org/10.1186/s13059-021-02490-0) _Genome Biology_, 22(1), 270 (2021) + +## Source + +[View source on GitHub](https://github.com/bactopia/bactopia/tree/main/subworkflows/scrubber) diff --git a/developers/subworkflows/seqsero2.mdx b/developers/subworkflows/seqsero2.mdx new file mode 100644 index 00000000..1088a89d --- /dev/null +++ b/developers/subworkflows/seqsero2.mdx @@ -0,0 +1,78 @@ +--- +title: seqsero2 +description: "Predict Salmonella serotypes from genome assemblies." +tags: + - salmonella + - serotype + - prediction + - foodborne + - enteric + - sample-scope +--- + +# seqsero2 + +**Tags:** salmonella serotype prediction foodborne enteric sample-scope + +Predict Salmonella serotypes from genome assemblies. + +This subworkflow uses [SeqSero2](https://github.com/denglab/SeqSero2) to predict +the serotypes of *Salmonella* strains from assembled genomes. It processes each +sample individually and aggregates the results into a single consolidated report. + +## Take + +``` +seqs: Channel +``` + +| Field | Description | +|-------|-------------| +| `meta` | Groovy Record containing sample information | +| `assembly` | Assembled contigs in FASTA format | + +## Emit + +### Published + +The `sample_outputs` and `run_outputs` emissions are aggregates of output files that will be published in the entry workflow. + +#### `sample_outputs` + +| Output | Description | +|--------|-------------| +| `tsv` | SeqSero2 serotype prediction results in TSV format | +| `txt` | SeqSero2 serotype prediction results in text format | + +#### `run_outputs` + +| Output | Description | +|--------|-------------| +| `csv` | Aggregated results in CSV format | + +## Module Composition + +This subworkflow calls the following modules: + +- [csvtk_concat](/developers/modules/csvtk_concat) - Concatenate multiple CSV or TSV files into a single table. +- [seqsero2](/developers/modules/seqsero2) - Salmonella serotype prediction from genome sequencing data. + +## Used By + +This subworkflow is used by the following workflows: + +- [seqsero2](/bactopia-tools/seqsero2) - Salmonella serotype prediction from sequencing reads or assemblies. + +## Citations + +If you use this in your analysis, please cite the following. + +- [Bactopia](https://bactopia.github.io/) + Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) + +- [SeqSero2](https://github.com/denglab/SeqSero2) + Zhang S, Den-Bakker HC, Li S, Dinsmore BA, Lane C, Lauer AC, Fields PI, Deng X. [SeqSero2: rapid and improved Salmonella serotype determination using whole genome sequencing data.](https://doi.org/10.1128/AEM.01746-19) _Appl Environ Microbiology_ 85(23):e01746-19 (2019) + +## Source + +[View source on GitHub](https://github.com/bactopia/bactopia/tree/main/subworkflows/seqsero2) diff --git a/developers/subworkflows/seroba.mdx b/developers/subworkflows/seroba.mdx new file mode 100644 index 00000000..ec2ea9d4 --- /dev/null +++ b/developers/subworkflows/seroba.mdx @@ -0,0 +1,82 @@ +--- +title: seroba +description: "k-mer based pipeline to identify the serotype of Streptococcus pneumoniae." +tags: + - streptococcus + - pneumoniae + - serotype + - k-mer + - capsular + - sample-scope +--- + +# seroba + +**Tags:** streptococcus pneumoniae serotype k-mer capsular sample-scope + +k-mer based pipeline to identify the serotype of Streptococcus pneumoniae. + +This subworkflow performs serotyping of Streptococcus pneumoniae from Illumina +next-generation sequencing reads using [Seroba](https://github.com/sanger-pathogens/seroba). +The tool uses a k-mer based approach to rapidly classify pneumococcal isolates into +their respective serotypes based on the capsular polysaccharide synthesis locus. + +## Take + +``` +reads: Channel +``` + +| Field | Description | +|-------|-------------| +| `meta` | Groovy Record containing sample information | +| `r1` | Illumina R1 reads (paired-end) | +| `r2` | Illumina R2 reads (paired-end) | +| `se` | Single-end Illumina reads | +| `lr` | Long reads (ONT/PacBio) | + +## Emit + +### Published + +The `sample_outputs` and `run_outputs` emissions are aggregates of output files that will be published in the entry workflow. + +#### `sample_outputs` + +| Output | Description | +|--------|-------------| +| `tsv` | Serotype prediction results with predicted serotype and confidence in TSV format | +| `txt` | Detailed information about the predicted serogroup and allele matches | + +#### `run_outputs` + +| Output | Description | +|--------|-------------| +| `csv` | Aggregated results in CSV format | + +## Module Composition + +This subworkflow calls the following modules: + +- [csvtk_concat](/developers/modules/csvtk_concat) - Concatenate multiple CSV or TSV files into a single table. +- [seroba_run](/developers/modules/seroba_run) - k-mer based Streptococcus pneumoniae serotyping. + +## Used By + +This subworkflow is used by the following workflows: + +- [seroba](/bactopia-tools/seroba) - Serotyping of Streptococcus pneumoniae from Illumina paired-end reads. + +## Citations + +If you use this in your analysis, please cite the following. + +- [Bactopia](https://bactopia.github.io/) + Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) + +- [Seroba](https://github.com/sanger-pathogens/seroba) + Epping L, van Tonder AJ, Gladstone RA, The Global Pneumococcal Sequencing Consortium, Bentley SD, Page AJ, Keane JA [SeroBA: rapid high-throughput serotyping of Streptococcus pneumoniae from whole genome sequence data.](https://doi.org/10.1099/mgen.0.000186) _Microbial Genomics_, 4(7) (2018) + +## Source + +[View source on GitHub](https://github.com/bactopia/bactopia/tree/main/subworkflows/seroba) diff --git a/developers/subworkflows/shigapass.mdx b/developers/subworkflows/shigapass.mdx new file mode 100644 index 00000000..9cfe090d --- /dev/null +++ b/developers/subworkflows/shigapass.mdx @@ -0,0 +1,78 @@ +--- +title: shigapass +description: "Predict serotypes of Shigella from assemblies." +tags: + - shigella + - serotype + - typing + - prediction + - antigen-genes + - sample-scope +--- + +# shigapass + +**Tags:** shigella serotype typing prediction antigen-genes sample-scope + +Predict serotypes of Shigella from assemblies. + +This subworkflow uses [ShigaPass](https://github.com/imanyass/ShigaPass) to predict +serotypes of *Shigella* strains from assembled genomes. It analyzes the presence +and composition of antigen-encoding genes to classify isolates into their known serotypes. + +## Take + +``` +assembly: Channel +``` + +| Field | Description | +|-------|-------------| +| `meta` | Groovy Record containing sample information | +| `assembly` | Assembled contigs in FASTA format | + +## Emit + +### Published + +The `sample_outputs` and `run_outputs` emissions are aggregates of output files that will be published in the entry workflow. + +#### `sample_outputs` + +| Output | Description | +|--------|-------------| +| `tsv` | ShigaPass summary results in TSV format | +| `flex_tsv` | ShigaPass Flex summary results in TSV format | + +#### `run_outputs` + +| Output | Description | +|--------|-------------| +| `csv` | Aggregated results in CSV format | + +## Module Composition + +This subworkflow calls the following modules: + +- [shigapass](/developers/modules/shigapass) - Predict Shigella serotypes and differentiate Shigella/EIEC. +- [csvtk_concat](/developers/modules/csvtk_concat) - Concatenate multiple CSV or TSV files into a single table. + +## Used By + +This subworkflow is used by the following workflows: + +- [shigapass](/bactopia-tools/shigapass) - Prediction of Shigella serotypes and differentiation from EIEC. + +## Citations + +If you use this in your analysis, please cite the following. + +- [Bactopia](https://bactopia.github.io/) + Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) + +- [shigapass](https://github.com/imanyass/ShigaPass) + Yassine I, Hansen EE, Lefèvre S, Ruckly C, Carle I, Lejay-Collin M, Fabre L, Rafei R, Pardos de la Gandara M, Daboussi F, Shahin A, Weill FX [ShigaPass: an in silico tool predicting Shigella serotypes from whole-genome sequencing assemblies.](https://doi.org/10.1099%2Fmgen.0.000961) _Microb Genomics_ 9(3) (2023) + +## Source + +[View source on GitHub](https://github.com/bactopia/bactopia/tree/main/subworkflows/shigapass) diff --git a/developers/subworkflows/shigatyper.mdx b/developers/subworkflows/shigatyper.mdx new file mode 100644 index 00000000..2fbf7a1b --- /dev/null +++ b/developers/subworkflows/shigatyper.mdx @@ -0,0 +1,84 @@ +--- +title: shigatyper +description: "Predict serotypes of Shigella from reads or assemblies." +tags: + - shigella + - serotype + - typing + - prediction + - antigen-genes + - sample-scope +--- + +# shigatyper + +**Tags:** shigella serotype typing prediction antigen-genes sample-scope + +Predict serotypes of Shigella from reads or assemblies. + +This subworkflow uses [ShigaTyper](https://github.com/CFSAN-Biostatistics/shigatyper) to predict +serotypes of *Shigella* strains from either Illumina/Nanopore reads or assembled genomes. +It analyzes antigen-encoding genes to determine the serotype classification of each isolate. + +Uses explicit positional record fields for reads: +- Input: record(meta, r1, r2, se, lr) where each read slot is Path? + +## Take + +``` +reads: Channel +``` + +| Field | Description | +|-------|-------------| +| `meta` | Groovy Record containing sample information | +| `r1` | Illumina R1 reads (paired-end) | +| `r2` | Illumina R2 reads (paired-end) | +| `se` | Single-end Illumina reads | +| `lr` | Long reads (ONT/PacBio) | + +## Emit + +### Published + +The `sample_outputs` and `run_outputs` emissions are aggregates of output files that will be published in the entry workflow. + +#### `sample_outputs` + +| Output | Description | +|--------|-------------| +| `tsv` | ShigaTyper results in TSV format | +| `hits` | Detailed hits from ShigaTyper | + +#### `run_outputs` + +| Output | Description | +|--------|-------------| +| `csv` | Merged serotype predictions from all samples | + +## Module Composition + +This subworkflow calls the following modules: + +- [shigatyper](/developers/modules/shigatyper) - Shigella serotype from Illumina or Oxford Nanopore reads. +- [csvtk_concat](/developers/modules/csvtk_concat) - Concatenate multiple CSV or TSV files into a single table. + +## Used By + +This subworkflow is used by the following workflows: + +- [shigatyper](/bactopia-tools/shigatyper) - Rapid determination of Shigella serotypes from sequencing reads. + +## Citations + +If you use this in your analysis, please cite the following. + +- [Bactopia](https://bactopia.github.io/) + Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) + +- [ShigaTyper](https://github.com/CFSAN-Biostatistics/shigatyper) + Wu Y, Lau HK, Lee T, Lau DK, Payne J [In Silico Serotyping Based on Whole-Genome Sequencing Improves the Accuracy of Shigella Identification.](https://doi.org/10.1128/AEM.00165-19) *Applied and Environmental Microbiology*, 85(7). (2019) + +## Source + +[View source on GitHub](https://github.com/bactopia/bactopia/tree/main/subworkflows/shigatyper) diff --git a/developers/subworkflows/shigeifinder.mdx b/developers/subworkflows/shigeifinder.mdx new file mode 100644 index 00000000..0ddaa844 --- /dev/null +++ b/developers/subworkflows/shigeifinder.mdx @@ -0,0 +1,78 @@ +--- +title: shigeifinder +description: "Predict serotypes of Shigella and EIEC from assemblies." +tags: + - shigella + - eiec + - serotype + - typing + - cluster-analysis + - sample-scope +--- + +# shigeifinder + +**Tags:** shigella eiec serotype typing cluster-analysis sample-scope + +Predict serotypes of Shigella and EIEC from assemblies. + +This subworkflow uses [ShigEiFinder](https://github.com/LanLab/ShigEiFinder) to predict +serotypes of *Shigella* and Enteroinvasive *E. coli* (EIEC) from assembled genomes. +It uses a cluster-informed approach to identify specific serotype markers and classify +isolates based on their antigenic profiles. + +## Take + +``` +assembly: Channel +``` + +| Field | Description | +|-------|-------------| +| `meta` | Groovy Record containing sample information | +| `assembly` | Assembled contigs in FASTA format | + +## Emit + +### Published + +The `sample_outputs` and `run_outputs` emissions are aggregates of output files that will be published in the entry workflow. + +#### `sample_outputs` + +| Output | Description | +|--------|-------------| +| `tsv` | ShigEiFinder results in TSV format | + +#### `run_outputs` + +| Output | Description | +|--------|-------------| +| `csv` | Aggregated results in CSV format | + +## Module Composition + +This subworkflow calls the following modules: + +- [shigeifinder](/developers/modules/shigeifinder) - Shigella and EIEC serotyping from assemblies. +- [csvtk_concat](/developers/modules/csvtk_concat) - Concatenate multiple CSV or TSV files into a single table. + +## Used By + +This subworkflow is used by the following workflows: + +- [shigeifinder](/bactopia-tools/shigeifinder) - In silico serotype prediction for Shigella and Enteroinvasive E. coli (EIEC). + +## Citations + +If you use this in your analysis, please cite the following. + +- [Bactopia](https://bactopia.github.io/) + Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) + +- [ShigEiFinder](https://github.com/LanLab/ShigEiFinder) + Zhang X, Payne M, Nguyen T, Kaur S, Lan R [Cluster-specific gene markers enhance Shigella and enteroinvasive Escherichia coli in silico serotyping.](https://doi.org/10.1099/mgen.0.000704) Microbial Genomics, 7(12). (2021) + +## Source + +[View source on GitHub](https://github.com/bactopia/bactopia/tree/main/subworkflows/shigeifinder) diff --git a/developers/subworkflows/sistr.mdx b/developers/subworkflows/sistr.mdx new file mode 100644 index 00000000..571e9e80 --- /dev/null +++ b/developers/subworkflows/sistr.mdx @@ -0,0 +1,82 @@ +--- +title: sistr +description: "Salmonella In Silico Typing Resource command-line tool." +tags: + - salmonella + - serotype + - mlst + - cgmlst + - typing + - sample-scope +--- + +# sistr + +**Tags:** salmonella serotype mlst cgmlst typing sample-scope + +Salmonella In Silico Typing Resource command-line tool. + +This subworkflow performs comprehensive typing of Salmonella genomes using +[SISTR](https://github.com/phac-nml/sistr_cmd), which predicts serotype, +determines subspecies, performs MLST typing, and calculates core genome +MLST distances. The tool provides a one-stop solution for Salmonella +classification and epidemiological typing. + +## Take + +``` +assembly: Channel +``` + +| Field | Description | +|-------|-------------| +| `meta` | Groovy Record containing sample information | +| `assembly` | Assembly files in FASTA format for Salmonella typing | + +## Emit + +### Published + +The `sample_outputs` and `run_outputs` emissions are aggregates of output files that will be published in the entry workflow. + +#### `sample_outputs` + +| Output | Description | +|--------|-------------| +| `tsv` | SISTR prediction results in TSV format | +| `allele_fasta` | Novel alleles in FASTA format | +| `allele_json` | Alleles in JSON format | +| `cgmlst_csv` | cgMLST profile in CSV format | + +#### `run_outputs` + +| Output | Description | +|--------|-------------| +| `csv` | Aggregated results in CSV format | + +## Module Composition + +This subworkflow calls the following modules: + +- [csvtk_concat](/developers/modules/csvtk_concat) - Concatenate multiple CSV or TSV files into a single table. +- [sistr](/developers/modules/sistr) - Serovar prediction of Salmonella assemblies. + +## Used By + +This subworkflow is used by the following workflows: + +- [sistr](/bactopia-tools/sistr) - Serovar prediction of Salmonella enterica from assemblies. + +## Citations + +If you use this in your analysis, please cite the following. + +- [Bactopia](https://bactopia.github.io/) + Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) + +- [SISTR](https://github.com/phac-nml/sistr_cmd) + Yoshida CE, Kruczkiewicz P, Laing CR, Lingohr EJ, Gannon VPJ, Nash JHE, Taboada EN [The Salmonella In Silico Typing Resource (SISTR): An Open Web-Accessible Tool for Rapidly Typing and Subtyping Draft Salmonella Genome Assemblies.](https://doi.org/10.1371/journal.pone.0147101) _PloS One_, 11(1), e0147101. (2016) + +## Source + +[View source on GitHub](https://github.com/bactopia/bactopia/tree/main/subworkflows/sistr) diff --git a/developers/subworkflows/snippy_core.mdx b/developers/subworkflows/snippy_core.mdx new file mode 100644 index 00000000..a89ab64e --- /dev/null +++ b/developers/subworkflows/snippy_core.mdx @@ -0,0 +1,109 @@ +--- +title: snippy_core +description: "Generate core-genome SNP alignment from per-sample Snippy outputs." +tags: + - variant-calling + - core-genome + - snp + - alignment + - phylogenetics + - run-scope +--- + +# snippy_core + +**Tags:** variant-calling core-genome snp alignment phylogenetics run-scope + +Generate core-genome SNP alignment from per-sample Snippy outputs. + +This subworkflow aggregates individual Snippy variant calls to produce a core-genome +alignment using [snippy-core](https://github.com/tseemann/snippy). It identifies core +SNPs present across all samples, generates a clean alignment suitable for phylogenetic +analysis, and calculates pairwise SNP distances using snp-dists. The output can be +used directly with tree-building tools like IQ-TREE, RAxML, or Gubbins. + +## Take + +``` +alignments: Channel +``` + +``` +reference: Path +mask: Path? +``` + +| Name | Type | Description | +|------|------|-------------| +| `alignments` | `` | Channel containing per-sample aligned FASTA files and VCFs from Snippy runs | +| `reference` | `Path` | Reference genome in GenBank or FASTA format used for variant calling | +| `mask` | `Path?` | Optional BED file of regions to mask from the core alignment (e.g., recombinant regions, repeat regions) | + +## Emit + +### Published + +The `sample_outputs` and `run_outputs` emissions are aggregates of output files that will be published in the entry workflow. + +#### `sample_outputs` + +No sample-scope outputs. + +#### `run_outputs` + +| Output | Description | +|--------|-------------| +| `aln` | Core SNP alignment in FASTA format (polymorphic sites only) | +| `full_aln` | Full core alignment including monomorphic sites | +| `clean_full_aln` | Cleaned full alignment with constant sites for phylogenetic inference | +| `tab` | Core SNPs in TAB format | +| `vcf` | Core SNPs in VCF format | +| `txt` | Core summary statistics (number of SNPs, core genome size) | +| `samples` | List of samples included in the core alignment | +| `supplemental` | Individual sample alignments and intermediate files | +| `tsv` | Pairwise SNP distance matrix from snp-dists | + +### Downstream Inputs + +The following emissions are meant to be used as inputs to downstream subworkflows. + +#### `alignment` + +| Output | Description | +|--------|-------------| +| `aln` | Core-SNP alignment for downstream phylogenetic analysis | + +## Subworkflow Composition + +This subworkflow calls the following subworkflows: + +- [snpdists](/developers/subworkflows/snpdists) - Calculate pairwise SNP distances from sequence alignments. + +## Module Composition + +This subworkflow calls the following modules: + +- [snippy_core](/developers/modules/snippy_core) - Core-SNP alignment from Snippy outputs. + +## Used By + +This subworkflow is used by the following workflows: + +- [snippy](/bactopia-tools/snippy) - Rapid haplotype variant calling and core genome alignment. + +## Citations + +If you use this in your analysis, please cite the following. + +- [Bactopia](https://bactopia.github.io/) + Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) + +- [Snippy](https://github.com/tseemann/snippy) + Seemann T [Snippy: fast bacterial variant calling from NGS reads](https://github.com/tseemann/snippy) (GitHub) + +- [snp-dists](https://github.com/tseemann/snp-dists) + Seemann T [snp-dists - Pairwise SNP distance matrix from a FASTA sequence alignment.](https://github.com/tseemann/snp-dists) (GitHub) + +## Source + +[View source on GitHub](https://github.com/bactopia/bactopia/tree/main/subworkflows/snippy/core) diff --git a/developers/subworkflows/snippy_run.mdx b/developers/subworkflows/snippy_run.mdx new file mode 100644 index 00000000..f02c27e5 --- /dev/null +++ b/developers/subworkflows/snippy_run.mdx @@ -0,0 +1,118 @@ +--- +title: snippy_run +description: "Call variants against a reference genome using Snippy." +tags: + - variant-calling + - snp + - reference-mapping + - phylogenetics + - outbreak + - sample-scope +--- + +# snippy_run + +**Tags:** variant-calling snp reference-mapping phylogenetics outbreak sample-scope + +Call variants against a reference genome using Snippy. + +This subworkflow performs rapid haploid variant calling from bacterial sequence reads +using [Snippy](https://github.com/tseemann/snippy). It maps reads to a reference genome, +identifies SNPs and indels, and generates consensus sequences. The tool produces multiple +output formats including VCF, aligned FASTA, and annotated variants for downstream +phylogenetic analysis with snippy-core. + +Uses explicit positional record fields for reads: +- Input: record(meta, r1, r2, se, lr) where each read slot is Path? + +## Take + +``` +reads: Channel +``` + +| Field | Description | +|-------|-------------| +| `meta` | Groovy Record containing sample information | +| `r1` | Illumina R1 reads (paired-end) | +| `r2` | Illumina R2 reads (paired-end) | +| `se` | Single-end Illumina reads | +| `lr` | Long reads (ONT/PacBio) | + +``` +reference: Path +``` + +| Name | Type | Description | +|------|------|-------------| +| `reference` | `Path` | Reference genome in GenBank format (preferred, for annotation) or FASTA format | + +## Emit + +### Published + +The `sample_outputs` and `run_outputs` emissions are aggregates of output files that will be published in the entry workflow. + +#### `sample_outputs` + +| Output | Description | +|--------|-------------| +| `aligned_fa` | A version of the reference with - at zero coverage positions | +| `vcf` | The final annotated variants in VCF format | +| `aligned_fa_error` | Aligned FASTA file generated during error state | +| `vcf_error` | VCF file generated during error state | +| `error` | Error log text file | +| `annotated_vcf` | Annotated VCF file | +| `bam` | The alignments in BAM format (includes unmapped/multimapping) | +| `bai` | Index for the BAM file | +| `bed` | The variants in BED format | +| `consensus_fa` | Reference genome with all variants instantiated | +| `consensus_subs_fa` | Reference genome with only substitution variants instantiated | +| `consensus_subs_masked_fa` | Reference genome with substitutions instantiated and low coverage masked | +| `coverage` | Per-base coverage depth information | +| `csv` | A comma-separated summary of variants | +| `filt_vcf` | The filtered variant calls from Freebayes | +| `gff` | The variants in GFF3 format | +| `html` | A HTML summary of the variants | +| `raw_vcf` | The unfiltered variant calls from Freebayes | +| `subs_vcf` | VCF containing only substitution variants | +| `tab` | A simple tab-separated summary of all variants | +| `txt` | Tab-separated columnar list of alignment statistics | + +#### `run_outputs` + +No run-scope outputs. + +### Downstream Inputs + +The following emissions are meant to be used as inputs to downstream subworkflows. + +#### `variants` + +Per-sample VCFs and aligned FAs filtered to only samples with variant data + +## Module Composition + +This subworkflow calls the following modules: + +- [snippy_run](/developers/modules/snippy_run) - Rapid haploid variant calling and core genome alignment. + +## Used By + +This subworkflow is used by the following workflows: + +- [snippy](/bactopia-tools/snippy) - Rapid haplotype variant calling and core genome alignment. + +## Citations + +If you use this in your analysis, please cite the following. + +- [Bactopia](https://bactopia.github.io/) + Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) + +- [Snippy](https://github.com/tseemann/snippy) + Seemann T [Snippy: fast bacterial variant calling from NGS reads](https://github.com/tseemann/snippy) (GitHub) + +## Source + +[View source on GitHub](https://github.com/bactopia/bactopia/tree/main/subworkflows/snippy/run) diff --git a/developers/subworkflows/snpdists.mdx b/developers/subworkflows/snpdists.mdx new file mode 100644 index 00000000..f7dd4af8 --- /dev/null +++ b/developers/subworkflows/snpdists.mdx @@ -0,0 +1,70 @@ +--- +title: snpdists +description: "Calculate pairwise SNP distances from sequence alignments." +tags: + - snp + - distance + - alignment + - phylogeny + - core-genome + - run-scope +--- + +# snpdists + +**Tags:** snp distance alignment phylogeny core-genome run-scope + +Calculate pairwise SNP distances from sequence alignments. + +This subworkflow uses [snp-dists](https://github.com/tseemann/snp-dists) to compute +pairwise SNP distance matrices from multiple sequence alignments. It reads an +alignment file (typically a core-genome alignment) and calculates the number +of SNP differences between each pair of sequences, producing a distance matrix +useful for phylogenetic and epidemiological analyses. + +## Take + +``` +alignment: Channel +``` + +| Field | Description | +|-------|-------------| +| `meta` | Groovy Record containing sample information | +| `aln` | Multiple sequence alignment in FASTA format | + +## Emit + +### Published + +The `sample_outputs` and `run_outputs` emissions are aggregates of output files that will be published in the entry workflow. + +#### `sample_outputs` + +No sample-scope outputs. + +#### `run_outputs` + +| Output | Description | +|--------|-------------| +| `tsv` | Pairwise SNP distance matrix in TSV format | + +## Module Composition + +This subworkflow calls the following modules: + +- [snpdists](/developers/modules/snpdists) - Create a SNP distance matrix from a multiple sequence alignment. + +## Citations + +If you use this in your analysis, please cite the following. + +- [Bactopia](https://bactopia.github.io/) + Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) + +- [snp-dists](https://github.com/tseemann/snp-dists) + Seemann T [snp-dists - Pairwise SNP distance matrix from a FASTA sequence alignment.](https://github.com/tseemann/snp-dists) (GitHub) + +## Source + +[View source on GitHub](https://github.com/bactopia/bactopia/tree/main/subworkflows/snpdists) diff --git a/developers/subworkflows/spatyper.mdx b/developers/subworkflows/spatyper.mdx new file mode 100644 index 00000000..d866e7c3 --- /dev/null +++ b/developers/subworkflows/spatyper.mdx @@ -0,0 +1,87 @@ +--- +title: spatyper +description: "Predict spa types of Staphylococcus aureus from genome assemblies." +tags: + - staphylococcus-aureus + - spa-typing + - protein-a + - mrsa + - sample-scope +--- + +# spatyper + +**Tags:** staphylococcus-aureus spa-typing protein-a mrsa sample-scope + +Predict spa types of Staphylococcus aureus from genome assemblies. + +This subworkflow uses [spaTyper](https://github.com/HCGB-IGTP/spaTyper) to predict +the spa types of *Staphylococcus aureus* strains from assembled genomes based on +the polymorphic X region of the protein A gene (spa). It processes each sample +individually and aggregates the results into a single consolidated report. + +## Take + +``` +assembly: Channel +``` + +| Field | Description | +|-------|-------------| +| `meta` | Groovy Record containing sample information | +| `assembly` | Assembled contigs in FASTA format | + +``` +repeats: Path? +repeat_order: Path? +``` + +| Name | Type | Description | +|------|------|-------------| +| `repeats` | `Path?` | Optional custom repeats database for spa typing | +| `repeat_order` | `Path?` | Optional custom repeat order file for spa typing | + +## Emit + +### Published + +The `sample_outputs` and `run_outputs` emissions are aggregates of output files that will be published in the entry workflow. + +#### `sample_outputs` + +| Output | Description | +|--------|-------------| +| `tsv` | spa typing results in TSV format | + +#### `run_outputs` + +| Output | Description | +|--------|-------------| +| `csv` | Aggregated results in CSV format | + +## Module Composition + +This subworkflow calls the following modules: + +- [csvtk_concat](/developers/modules/csvtk_concat) - Concatenate multiple CSV or TSV files into a single table. +- [spatyper](/developers/modules/spatyper) - Finding spa types in Staphylococcus aureus. + +## Used By + +This subworkflow is used by the following workflows: + +- [spatyper](/bactopia-tools/spatyper) - spa typing of Staphylococcus aureus assemblies. + +## Citations + +If you use this in your analysis, please cite the following. + +- [Bactopia](https://bactopia.github.io/) + Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) + +- [spaTyper](https://github.com/HCGB-IGTP/spaTyper) + Sanchez-Herrero JF, and Sullivan M [spaTyper: Staphylococcal protein A (spa) characterization pipeline](http://doi.org/10.5281/zenodo.4063625). Zenodo. (2020) + +## Source + +[View source on GitHub](https://github.com/bactopia/bactopia/tree/main/subworkflows/spatyper) diff --git a/developers/subworkflows/srahumanscrubber.mdx b/developers/subworkflows/srahumanscrubber.mdx new file mode 100644 index 00000000..a30983e9 --- /dev/null +++ b/developers/subworkflows/srahumanscrubber.mdx @@ -0,0 +1,79 @@ +--- +title: srahumanscrubber +description: "Remove human contamination from sequencing reads for SRA submission." +tags: + - contamination + - human + - scrub + - sra + - sequencing + - fastq + - sample-scope +--- + +# srahumanscrubber + +**Tags:** contamination human scrub sra sequencing fastq sample-scope + +Remove human contamination from sequencing reads for SRA submission. + +This subworkflow uses the [SRA Human Scrubber](https://github.com/ncbi/sra-human-scrubber) to identify +and remove human reads from sequencing data. It first initializes a human reference database +and then scrubs the input reads to ensure they meet SRA submission requirements. + +Uses explicit positional record fields for reads: +- Input: record(meta, r1, r2, se, lr) where each read slot is Path? + +## Take + +``` +reads: Channel +``` + +| Field | Description | +|-------|-------------| +| `meta` | Groovy Record containing sample information | +| `r1` | Illumina R1 reads (paired-end) | +| `r2` | Illumina R2 reads (paired-end) | +| `se` | Single-end Illumina reads | +| `lr` | Long reads (ONT/PacBio) | + +## Emit + +### Published + +The `sample_outputs` and `run_outputs` emissions are aggregates of output files that will be published in the entry workflow. + +#### `sample_outputs` + +| Output | Description | +|--------|-------------| +| `special_meta` | Groovy Record with name for downstream aggregation | +| `scrubbed` | Scrubbed FASTQ files with human reads removed | +| `scrubbed_extra` | Placeholder files for pipeline compatibility | +| `scrub_report` | Report of scrubbing statistics | + +#### `run_outputs` + +No run-scope outputs. + +## Module Composition + +This subworkflow calls the following modules: + +- [srahumanscrubber_initdb](/developers/modules/srahumanscrubber_initdb) - Initialize human read removal database for SRA Human Scrubber. +- [srahumanscrubber_scrub](/developers/modules/srahumanscrubber_scrub) - Scrub human reads from FASTQ files. + +## Citations + +If you use this in your analysis, please cite the following. + +- [Bactopia](https://bactopia.github.io/) + Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) + +- [SRA Human Scrubber](https://github.com/ncbi/sra-human-scrubber) + Katz KS, Shutov O, Lapoint R, Kimelman M, Brister JR, and O'Sullivan C [STAT: a fast, scalable, MinHash-based k-mer tool to assess Sequence Read Archive next-generation sequence submissions.](https://doi.org/10.1186/s13059-021-02490-0) _Genome Biology_, 22(1), 270 (2021) + +## Source + +[View source on GitHub](https://github.com/bactopia/bactopia/tree/main/subworkflows/srahumanscrubber) diff --git a/developers/subworkflows/ssuissero.mdx b/developers/subworkflows/ssuissero.mdx new file mode 100644 index 00000000..6c674d2d --- /dev/null +++ b/developers/subworkflows/ssuissero.mdx @@ -0,0 +1,78 @@ +--- +title: ssuissero +description: "Predict serotypes of Streptococcus suis from genome assemblies." +tags: + - streptococcus-suis + - serotype + - typing + - prediction + - capsular-genes + - sample-scope +--- + +# ssuissero + +**Tags:** streptococcus-suis serotype typing prediction capsular-genes sample-scope + +Predict serotypes of Streptococcus suis from genome assemblies. + +This subworkflow uses [SsuisSero](https://github.com/jimmyliu1326/SsuisSero) to predict +serotypes of *Streptococcus suis* strains from genome assemblies based on the presence +of specific capsular genes. It processes each sample individually and aggregates the +results into a single consolidated report. + +## Take + +``` +assembly: Channel +``` + +| Field | Description | +|-------|-------------| +| `meta` | Groovy Record containing sample information | +| `assembly` | Assembled contigs in FASTA format | + +## Emit + +### Published + +The `sample_outputs` and `run_outputs` emissions are aggregates of output files that will be published in the entry workflow. + +#### `sample_outputs` + +| Output | Description | +|--------|-------------| +| `tsv` | SsuisSero results in TSV format | + +#### `run_outputs` + +| Output | Description | +|--------|-------------| +| `csv` | Aggregated results in CSV format | + +## Module Composition + +This subworkflow calls the following modules: + +- [ssuissero](/developers/modules/ssuissero) - Serotype prediction of Streptococcus suis assemblies. +- [csvtk_concat](/developers/modules/csvtk_concat) - Concatenate multiple CSV or TSV files into a single table. + +## Used By + +This subworkflow is used by the following workflows: + +- [ssuissero](/bactopia-tools/ssuissero) - Serotype prediction of Streptococcus suis assemblies. + +## Citations + +If you use this in your analysis, please cite the following. + +- [Bactopia](https://bactopia.github.io/) + Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) + +- [SsuisSero](https://github.com/jimmyliu1326/SsuisSero) + Lui J [SsuisSero: Rapid _Streptococcus suis_ serotyping](https://github.com/jimmyliu1326/SsuisSero) (GitHub) + +## Source + +[View source on GitHub](https://github.com/bactopia/bactopia/tree/main/subworkflows/ssuissero) diff --git a/developers/subworkflows/staphopiasccmec.mdx b/developers/subworkflows/staphopiasccmec.mdx new file mode 100644 index 00000000..1a3f37db --- /dev/null +++ b/developers/subworkflows/staphopiasccmec.mdx @@ -0,0 +1,73 @@ +--- +title: staphopiasccmec +description: "Identify SCCmec elements in Staphylococcus aureus genomes using Staphopia method." +tags: + - sccmec + - staphylococcus-aureus + - mrsa + - antimicrobial-resistance + - typing + - sample-scope +--- + +# staphopiasccmec + +**Tags:** sccmec staphylococcus-aureus mrsa antimicrobial-resistance typing sample-scope + +Identify SCCmec elements in Staphylococcus aureus genomes using Staphopia method. + +This subworkflow uses [staphopia-sccmec](https://github.com/staphopia/staphopia-sccmec) to +identify Staphylococcal Cassette Chromosome mec (SCCmec) elements in *Staphylococcus aureus* +assemblies. This is the standalone version of the SCCmec typing method developed for +the Staphopia project, which predicts SCCmec types based on the presence of specific +*mec* and *ccr* gene complexes. + +## Take + +``` +assembly: Channel +``` + +| Field | Description | +|-------|-------------| +| `meta` | Groovy Record containing sample information | +| `assembly` | Assembled contigs in FASTA format | + +## Emit + +### Published + +The `sample_outputs` and `run_outputs` emissions are aggregates of output files that will be published in the entry workflow. + +#### `sample_outputs` + +| Output | Description | +|--------|-------------| +| `tsv` | TSV file with SCCmec typing results | + +#### `run_outputs` + +| Output | Description | +|--------|-------------| +| `csv` | Aggregated results in CSV format | + +## Module Composition + +This subworkflow calls the following modules: + +- [staphopiasccmec](/developers/modules/staphopiasccmec) - Primer based SCCmec typing of S. aureus genomes. +- [csvtk_concat](/developers/modules/csvtk_concat) - Concatenate multiple CSV or TSV files into a single table. + +## Citations + +If you use this in your analysis, please cite the following. + +- [Bactopia](https://bactopia.github.io/) + Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) + +- [staphopia-sccmec](https://github.com/staphopia/staphopia-sccmec) + Petit III RA, Read TD [_Staphylococcus aureus_ viewed from the perspective of 40,000+ genomes.](http://dx.doi.org/10.7717/peerj.5261) _PeerJ_ 6, e5261 (2018) + +## Source + +[View source on GitHub](https://github.com/bactopia/bactopia/tree/main/subworkflows/staphopiasccmec) diff --git a/developers/subworkflows/staphtyper.mdx b/developers/subworkflows/staphtyper.mdx new file mode 100644 index 00000000..b41ff862 --- /dev/null +++ b/developers/subworkflows/staphtyper.mdx @@ -0,0 +1,95 @@ +--- +title: staphtyper +description: "Determine the agr, spa and SCCmec types for _Staphylococcus aureus_ genomes." +tags: + - staphylococcus-aureus + - agr-typing + - spa-typing + - sccmec + - strain-characterization + - run-scope +--- + +# staphtyper + +**Tags:** staphylococcus-aureus agr-typing spa-typing sccmec strain-characterization run-scope + +Determine the agr, spa and SCCmec types for _Staphylococcus aureus_ genomes. + +This subworkflow performs comprehensive typing of *Staphylococcus aureus* genomes by +determining the agr locus type using [AgrVATE](https://github.com/VishnuRaghuram94/AgrVATE), +spa repeat type using [spaTyper](https://github.com/HCGB-IGTP/spaTyper), and SCCmec element +type using SCCmec typing. It combines results from multiple typing methods to provide +a complete characterization of *S. aureus* strains. + +## Take + +``` +assembly: Channel +``` + +| Field | Description | +|-------|-------------| +| `meta` | Groovy Record containing sample information | +| `assembly` | Assembled contigs in FASTA format | + +``` +repeats: Path? +repeat_order: Path? +``` + +| Name | Type | Description | +|------|------|-------------| +| `repeats` | `Path?` | Optional spa repeats database for improved spa typing | +| `repeat_order` | `Path?` | Optional spa repeat order file for improved spa typing | + +## Emit + +### Published + +The `sample_outputs` and `run_outputs` emissions are aggregates of output files that will be published in the entry workflow. + +#### `sample_outputs` + +No sample-scope outputs. + +#### `run_outputs` + +| Output | Description | +|--------|-------------| +| `csv` | Aggregated results in CSV format | + +## Subworkflow Composition + +This subworkflow calls the following subworkflows: + +- [agrvate](/developers/subworkflows/agrvate) - Identify Staphylococcus aureus agr locus type and operon variants. +- [spatyper](/developers/subworkflows/spatyper) - Predict spa types of Staphylococcus aureus from genome assemblies. +- [sccmec](/developers/subworkflows/sccmec) - Identify SCCmec elements in Staphylococcus aureus genomes. + +## Used By + +This subworkflow is used by the following workflows: + +- [staphopia](/bactopia-pipelines/staphopia) - Comprehensive analysis pipeline for Staphylococcus aureus isolates. +- [staphtyper](/bactopia-tools/staphtyper) - Comprehensive typing of Staphylococcus aureus genomes. + +## Citations + +If you use this in your analysis, please cite the following. + +- [Bactopia](https://bactopia.github.io/) + Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) + +- [AgrVATE](https://github.com/VishnuRaghuram94/AgrVATE) + Raghuram V. [AgrVATE: Rapid identification of Staphylococcus aureus agr locus type and agr operon variants.](https://github.com/VishnuRaghuram94/AgrVATE) (GitHub) + +- [spaTyper](https://github.com/HCGB-IGTP/spaTyper) + Sanchez-Herrero JF, and Sullivan M [spaTyper: Staphylococcal protein A (spa) characterization pipeline](http://doi.org/10.5281/zenodo.4063625). Zenodo. (2020) + +- [sccmec](https://github.com/rpetit3/sccmec) + Petit III RA, Read TD [sccmec: A tool for typing SCCmec cassettes in assemblies](https://github.com/rpetit3/sccmec) (GitHub) + +## Source + +[View source on GitHub](https://github.com/bactopia/bactopia/tree/main/subworkflows/staphtyper) diff --git a/developers/subworkflows/stecfinder.mdx b/developers/subworkflows/stecfinder.mdx new file mode 100644 index 00000000..7722efac --- /dev/null +++ b/developers/subworkflows/stecfinder.mdx @@ -0,0 +1,82 @@ +--- +title: stecfinder +description: "Identify and serotype Shiga toxin-producing E. coli (STEC) from assemblies." +tags: + - escherichia-coli + - stec + - serotype + - virulence-genes + - shiga-toxin + - sample-scope +--- + +# stecfinder + +**Tags:** escherichia-coli stec serotype virulence-genes shiga-toxin sample-scope + +Identify and serotype Shiga toxin-producing E. coli (STEC) from assemblies. + +This subworkflow uses [STECFinder](https://github.com/LanLab/STECFinder) to identify +and serotype Shiga toxin-producing *E. coli* (STEC) strains using genomic cluster-specific +markers. It screens assemblies for virulence genes and serotype markers to classify +STEC isolates into their known serotypes. + +## Take + +``` +seqs: Channel +``` + +| Field | Description | +|-------|-------------| +| `meta` | Groovy Record containing sample information | +| `fna` | Assembled contigs in FASTA format | +| `r1` | Illumina R1 reads (paired-end) | +| `r2` | Illumina R2 reads (paired-end) | +| `se` | Single-end Illumina reads | +| `lr` | Long reads (ONT/PacBio) | + +## Emit + +### Published + +The `sample_outputs` and `run_outputs` emissions are aggregates of output files that will be published in the entry workflow. + +#### `sample_outputs` + +| Output | Description | +|--------|-------------| +| `tsv` | TSV file with STEC gene markers results | + +#### `run_outputs` + +| Output | Description | +|--------|-------------| +| `csv` | Merged STEC results from all samples | + +## Module Composition + +This subworkflow calls the following modules: + +- [stecfinder](/developers/modules/stecfinder) - Serotype of Shigatoxin producing E. coli using reads/assemblies. +- [csvtk_concat](/developers/modules/csvtk_concat) - Concatenate multiple CSV or TSV files into a single table. + +## Used By + +This subworkflow is used by the following workflows: + +- [stecfinder](/bactopia-tools/stecfinder) - Serotype identification of Shiga toxin-producing E. coli. + +## Citations + +If you use this in your analysis, please cite the following. + +- [Bactopia](https://bactopia.github.io/) + Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) + +- [STECFinder](https://github.com/LanLab/STECFinder) + Zhang X, Payne M, Kaur S, and Lan R [Improved Genomic Identification, Clustering, and Serotyping of Shiga Toxin-Producing Escherichia coli Using Cluster/Serotype-Specific Gene Markers.](https://doi.org/10.3389/fcimb.2021.772574) _Frontiers in Cellular and Infection Microbiology_, 11, 772574. (2021) + +## Source + +[View source on GitHub](https://github.com/bactopia/bactopia/tree/main/subworkflows/stecfinder) diff --git a/developers/subworkflows/sylph.mdx b/developers/subworkflows/sylph.mdx new file mode 100644 index 00000000..36ec21c4 --- /dev/null +++ b/developers/subworkflows/sylph.mdx @@ -0,0 +1,94 @@ +--- +title: sylph +description: "Profile microbial composition using Sylph." +tags: + - metagenome + - profiling + - composition + - abundance + - kmer + - taxonomic + - sample-scope +--- + +# sylph + +**Tags:** metagenome profiling composition abundance kmer taxonomic sample-scope + +Profile microbial composition using Sylph. + +This subworkflow estimates microbial composition directly from sequencing reads using +[Sylph](https://github.com/xiaoli-dong/sylph). It provides rapid and accurate abundance +estimates by comparing k-mer signatures against a reference genome database. Sylph can +process both short and long reads, offering taxonomic profiling from species to strain level +with confidence estimates for each identification. + +Uses explicit positional record fields for reads: +- Input: record(meta, r1, r2, se, lr) where each read slot is Path? + +## Take + +``` +reads: Channel +``` + +| Field | Description | +|-------|-------------| +| `meta` | Groovy Record containing sample information | +| `r1` | Illumina R1 reads (paired-end) | +| `r2` | Illumina R2 reads (paired-end) | +| `se` | Single-end Illumina reads | +| `lr` | Long reads (ONT/PacBio) | + +``` +database: Path +``` + +| Name | Type | Description | +|------|------|-------------| +| `database` | `Path` | Path to Sylph reference database directory containing pre-computed k-mer signatures of reference genomes for taxonomic classification. | + +## Emit + +### Published + +The `sample_outputs` and `run_outputs` emissions are aggregates of output files that will be published in the entry workflow. + +#### `sample_outputs` + +| Output | Description | +|--------|-------------| +| `tsv` | TSV file with profiling results | + +#### `run_outputs` + +| Output | Description | +|--------|-------------| +| `csv` | Aggregated profiling results in CSV format | + +## Module Composition + +This subworkflow calls the following modules: + +- [sylph_profile](/developers/modules/sylph_profile) - Profile metagenome samples against a database using Sylph. +- [csvtk_concat](/developers/modules/csvtk_concat) - Concatenate multiple CSV or TSV files into a single table. + +## Used By + +This subworkflow is used by the following workflows: + +- [sylph](/bactopia-tools/sylph) - Taxonomic profiling by abundance-corrected MinHash. + +## Citations + +If you use this in your analysis, please cite the following. + +- [Bactopia](https://bactopia.github.io/) + Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) + +- [Sylph](https://github.com/bluenote-1/sylph) + Shaw J, and Yu YW [Rapid species-level metagenome profiling and containment estimation with sylph.](https://doi.org/10.1038/s41587-024-02412-y) _Nature Biotechnology_ (2024) + +## Source + +[View source on GitHub](https://github.com/bactopia/bactopia/tree/main/subworkflows/sylph) diff --git a/developers/subworkflows/tblastn.mdx b/developers/subworkflows/tblastn.mdx new file mode 100644 index 00000000..d9cc30ed --- /dev/null +++ b/developers/subworkflows/tblastn.mdx @@ -0,0 +1,86 @@ +--- +title: tblastn +description: "Search protein query sequences against nucleotide database." +tags: + - blast + - protein + - nucleotide + - alignment + - database + - sample-scope +--- + +# tblastn + +**Tags:** blast protein nucleotide alignment database sample-scope + +Search protein query sequences against nucleotide database. + +This subworkflow uses [TBLASTN](https://blast.ncbi.nlm.nih.gov/Blast.cgi?PAGE_TYPE=BlastSearch&PROGRAM=tblastn) +from the NCBI BLAST+ suite to search protein query sequences against a nucleotide database +translated in all six reading frames. It processes each assembly individually +and aggregates the results into a single consolidated report. + +## Take + +``` +blastdb: Channel +``` + +| Field | Description | +|-------|-------------| +| `meta` | Groovy Record containing sample information | +| `blastdb` | A compressed tarball containing the BLAST database | + +``` +query: Path +``` + +| Name | Type | Description | +|------|------|-------------| +| `query` | `Path` | Path to protein query sequences for searching against translated nucleotide database | + +## Emit + +### Published + +The `sample_outputs` and `run_outputs` emissions are aggregates of output files that will be published in the entry workflow. + +#### `sample_outputs` + +| Output | Description | +|--------|-------------| +| `tsv` | Tab-delimited protein-to-translated nucleotide alignment results (BLAST outfmt 6) | + +#### `run_outputs` + +| Output | Description | +|--------|-------------| +| `csv` | Aggregated results in CSV format | + +## Module Composition + +This subworkflow calls the following modules: + +- [blast_tblastn](/developers/modules/blast_tblastn) - Search a translated nucleotide database using a protein query. +- [csvtk_concat](/developers/modules/csvtk_concat) - Concatenate multiple CSV or TSV files into a single table. + +## Used By + +This subworkflow is used by the following workflows: + +- [tblastn](/bactopia-tools/tblastn) - Search against translated nucleotide databases using protein queries. + +## Citations + +If you use this in your analysis, please cite the following. + +- [Bactopia](https://bactopia.github.io/) + Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) + +- [BLAST](https://blast.ncbi.nlm.nih.gov/Blast.cgi) + Camacho C, Coulouris G, Avagyan V, Ma N, Papadopoulos J, Bealer K, Madden TL [BLAST+: architecture and applications](http://dx.doi.org/10.1186/1471-2105-10-421). _BMC Bioinformatics_ 10, 421 (2009) + +## Source + +[View source on GitHub](https://github.com/bactopia/bactopia/tree/main/subworkflows/tblastn) diff --git a/developers/subworkflows/tblastx.mdx b/developers/subworkflows/tblastx.mdx new file mode 100644 index 00000000..9b58d41a --- /dev/null +++ b/developers/subworkflows/tblastx.mdx @@ -0,0 +1,86 @@ +--- +title: tblastx +description: "Translate nucleotide query sequences and search nucleotide database." +tags: + - blast + - nucleotide + - translation + - alignment + - database + - sample-scope +--- + +# tblastx + +**Tags:** blast nucleotide translation alignment database sample-scope + +Translate nucleotide query sequences and search nucleotide database. + +This subworkflow uses [TBLASTX](https://blast.ncbi.nlm.nih.gov/Blast.cgi?PAGE_TYPE=BlastSearch&PROGRAM=tblastx) +from the NCBI BLAST+ suite to translate nucleotide query sequences in all six reading frames +and search them against a nucleotide database also translated in all six reading frames. +It processes each assembly individually and aggregates the results into a single consolidated report. + +## Take + +``` +blastdb: Channel +``` + +| Field | Description | +|-------|-------------| +| `meta` | Groovy Record containing sample information | +| `blastdb` | A compressed tarball containing the BLAST database | + +``` +query: Path +``` + +| Name | Type | Description | +|------|------|-------------| +| `query` | `Path` | Path to nucleotide query sequences that will be translated and searched against translated database | + +## Emit + +### Published + +The `sample_outputs` and `run_outputs` emissions are aggregates of output files that will be published in the entry workflow. + +#### `sample_outputs` + +| Output | Description | +|--------|-------------| +| `tsv` | Tab-delimited translated nucleotide-to-translated nucleotide alignment results (BLAST outfmt 6) | + +#### `run_outputs` + +| Output | Description | +|--------|-------------| +| `csv` | Aggregated results in CSV format | + +## Module Composition + +This subworkflow calls the following modules: + +- [blast_tblastx](/developers/modules/blast_tblastx) - Search a translated nucleotide database using a translated nucleotide query. +- [csvtk_concat](/developers/modules/csvtk_concat) - Concatenate multiple CSV or TSV files into a single table. + +## Used By + +This subworkflow is used by the following workflows: + +- [tblastx](/bactopia-tools/tblastx) - Search against translated nucleotide databases using translated nucleotide queries. + +## Citations + +If you use this in your analysis, please cite the following. + +- [Bactopia](https://bactopia.github.io/) + Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) + +- [BLAST](https://blast.ncbi.nlm.nih.gov/Blast.cgi) + Camacho C, Coulouris G, Avagyan V, Ma N, Papadopoulos J, Bealer K, Madden TL [BLAST+: architecture and applications](http://dx.doi.org/10.1186/1471-2105-10-421). _BMC Bioinformatics_ 10, 421 (2009) + +## Source + +[View source on GitHub](https://github.com/bactopia/bactopia/tree/main/subworkflows/tblastx) diff --git a/developers/subworkflows/tbprofiler.mdx b/developers/subworkflows/tbprofiler.mdx new file mode 100644 index 00000000..0fc7d4e5 --- /dev/null +++ b/developers/subworkflows/tbprofiler.mdx @@ -0,0 +1,92 @@ +--- +title: tbprofiler +description: "Profiling tool for Mycobacterium tuberculosis to detect resistance and strain type." +tags: + - mycobacterium + - tuberculosis + - drug-resistance + - lineage + - variants + - sample-scope +--- + +# tbprofiler + +**Tags:** mycobacterium tuberculosis drug-resistance lineage variants sample-scope + +Profiling tool for Mycobacterium tuberculosis to detect resistance and strain type. + +This subworkflow performs comprehensive profiling of Mycobacterium tuberculosis +from sequencing reads using [TBProfiler](https://github.com/jodyphelan/TBProfiler). +The tool detects drug resistance mutations, determines lineage and strain type, +and provides detailed variant calling results. It combines individual sample +results with population-level analysis for surveillance and epidemiological studies. + +Uses explicit positional record fields for reads: +- Input: record(meta, r1, r2, se, lr) where each read slot is Path? + +## Take + +``` +reads: Channel +``` + +| Field | Description | +|-------|-------------| +| `meta` | Groovy Record containing sample information | +| `r1` | Illumina R1 reads (paired-end) | +| `r2` | Illumina R2 reads (paired-end) | +| `se` | Single-end Illumina reads | +| `lr` | Long reads (ONT/PacBio) | + +## Emit + +### Published + +The `sample_outputs` and `run_outputs` emissions are aggregates of output files that will be published in the entry workflow. + +#### `sample_outputs` + +| Output | Description | +|--------|-------------| +| `bam` | Aligned BAM file | +| `csv` | Results in CSV format | +| `json` | Compressed JSON results file | +| `txt` | Results in text format | +| `vcf` | Compressed VCF file with variants | + +#### `run_outputs` + +| Output | Description | +|--------|-------------| +| `csv` | Main collated results in CSV format | +| `variants_csv` | Collated variants in CSV format | +| `variants_txt` | Collated variants in text format | +| `itol` | iTOL formatted files for visualization | + +## Module Composition + +This subworkflow calls the following modules: + +- [tbprofiler_profile](/developers/modules/tbprofiler_profile) - Detect resistance and lineages of Mycobacterium tuberculosis genomes. +- [tbprofiler_collate](/developers/modules/tbprofiler_collate) - Collate TB-Profiler results from multiple samples. + +## Used By + +This subworkflow is used by the following workflows: + +- [tbprofiler](/bactopia-tools/tbprofiler) - Detection of antimicrobial resistance and lineage typing of Mycobacterium tuberculosis. + +## Citations + +If you use this in your analysis, please cite the following. + +- [Bactopia](https://bactopia.github.io/) + Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) + +- [TBProfiler](https://github.com/jodyphelan/TBProfiler) + Phelan JE, O'Sullivan DM, Machado D, Ramos J, Oppong YEA, Campino S, O'Grady J, McNerney R, Hibberd ML, Viveiros M, Huggett JF, Clark TG [Integrating informatics tools and portable sequencing technology for rapid detection of resistance to anti-tuberculous drugs.](https://doi.org/10.1186/s13073-019-0650-x) _Genome Med_ 11, 41 (2019) + +## Source + +[View source on GitHub](https://github.com/bactopia/bactopia/tree/main/subworkflows/tbprofiler) diff --git a/developers/subworkflows/teton.mdx b/developers/subworkflows/teton.mdx new file mode 100644 index 00000000..3ca07de5 --- /dev/null +++ b/developers/subworkflows/teton.mdx @@ -0,0 +1,106 @@ +--- +title: teton +description: "Perform taxonomic classification and estimate bacterial genome sizes." +tags: + - metagenomics + - taxonomy + - classification + - kraken + - bracken + - genome-size + - run-scope +--- + +# teton + +**Tags:** metagenomics taxonomy classification kraken bracken genome-size run-scope + +Perform taxonomic classification and estimate bacterial genome sizes. + +This subworkflow processes raw sequencing reads through a taxonomic classification +pipeline using [Kraken2](https://github.com/DerrickWood/kraken2) and [Bracken](https://github.com/jenniferlu717/Bracken) +to estimate bacterial genome sizes and separate bacterial from non-bacterial organisms. +It first removes host reads using the scrubber subworkflow, then classifies reads, +and finally creates sample sheets with genome size estimates for downstream Bactopia analysis. + +Uses explicit positional record fields for reads: +- Input: record(meta, r1, r2, se, lr) where each read slot is Path? + +## Take + +``` +reads: Channel +``` + +| Field | Description | +|-------|-------------| +| `meta` | Groovy Record containing sample information | +| `r1` | Illumina R1 reads (paired-end) | +| `r2` | Illumina R2 reads (paired-end) | +| `se` | Single-end Illumina reads | +| `lr` | Long reads (ONT/PacBio) | + +``` +db: Path? +use_srascrubber: Boolean +nohuman_db: Path? +download_nohuman: Boolean +nohuman_save_as_tarball: Boolean +``` + +| Name | Type | Description | +|------|------|-------------| +| `db` | `Path?` | Optional Kraken2 database path for taxonomic classification | +| `use_srascrubber` | `Boolean` | Boolean flag to use SRA scrubber for host read removal | + +## Emit + +### Published + +The `sample_outputs` and `run_outputs` emissions are aggregates of output files that will be published in the entry workflow. + +#### `sample_outputs` + +No sample-scope outputs. + +#### `run_outputs` + +No run-scope outputs. + +## Subworkflow Composition + +This subworkflow calls the following subworkflows: + +- [scrubber](/developers/subworkflows/scrubber) - Remove contaminant sequences from metagenomic data. +- [bracken](/developers/subworkflows/bracken) - Estimate species abundance from metagenomic reads. + +## Module Composition + +This subworkflow calls the following modules: + +- [bactopia_teton](/developers/modules/bactopia_teton) - Predict genome size and route samples based on taxonomic classification. +- [csvtk_join](/developers/modules/csvtk_join) - Join two CSV or TSV files based on common fields. +- [csvtk_concat](/developers/modules/csvtk_concat) - Concatenate multiple CSV or TSV files into a single table. + +## Used By + +This subworkflow is used by the following workflows: + +- [teton](/bactopia-pipelines/teton) - Taxonomic classification and abundance profiling of metagenomic reads. + +## Citations + +If you use this in your analysis, please cite the following. + +- [Bactopia](https://bactopia.github.io/) + Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) + +- [Kraken2](https://github.com/DerrickWood/kraken2) + Wood DE, Lu J, Langmead B [Improved metagenomic analysis with Kraken 2.](https://doi.org/10.1186/s13059-019-1891-0) *Genome Biology*, 20(1), 257. (2019) + +- [Bracken](https://github.com/jenniferlu717/Bracken) + Lu J, Breitwieser FP, Thielen P, and Salzberg SL [Bracken: estimating species abundance in metagenomics data.](https://doi.org/10.7717/peerj-cs.104) _PeerJ Computer Science_, 3, e104. (2017) + +## Source + +[View source on GitHub](https://github.com/bactopia/bactopia/tree/main/subworkflows/teton) diff --git a/docs/assets/bactopia-logo.png b/docs/assets/bactopia-logo.png deleted file mode 100644 index eeae7c9b..00000000 Binary files a/docs/assets/bactopia-logo.png and /dev/null differ diff --git a/docs/assets/bactopia-overview.png b/docs/assets/bactopia-overview.png deleted file mode 100644 index 16ad6f98..00000000 Binary files a/docs/assets/bactopia-overview.png and /dev/null differ diff --git a/docs/assets/bactopia-small-logo.png b/docs/assets/bactopia-small-logo.png deleted file mode 100644 index 25540c2e..00000000 Binary files a/docs/assets/bactopia-small-logo.png and /dev/null differ diff --git a/docs/assets/cape-banner.png b/docs/assets/cape-banner.png deleted file mode 100644 index 3747fec5..00000000 Binary files a/docs/assets/cape-banner.png and /dev/null differ diff --git a/docs/assets/favicon.ico b/docs/assets/favicon.ico deleted file mode 100644 index 93e75548..00000000 Binary files a/docs/assets/favicon.ico and /dev/null differ diff --git a/docs/assets/gaeip-banner.png b/docs/assets/gaeip-banner.png deleted file mode 100644 index da092a15..00000000 Binary files a/docs/assets/gaeip-banner.png and /dev/null differ diff --git a/docs/assets/wyphd-banner.jpg b/docs/assets/wyphd-banner.jpg deleted file mode 100644 index 7aac1c48..00000000 Binary files a/docs/assets/wyphd-banner.jpg and /dev/null differ diff --git a/docs/bactopia-tools/abricate.md b/docs/bactopia-tools/abricate.md deleted file mode 100644 index d7cd76d4..00000000 --- a/docs/bactopia-tools/abricate.md +++ /dev/null @@ -1,256 +0,0 @@ ---- -title: abricate -description: A Bactopia Tool which uses Abricate to screen assemblies for antimicrobial resistance and virulence genes ---- -# Bactopia Tool - `abricate` -The `abricate` module uses [Abricate](https://github.com/tseemann/abricate) to screen assemblies -for antimicrobial resistance and virulence genes. - - -## Example Usage -``` -bactopia --wf abricate \ - --bactopia /path/to/your/bactopia/results -``` - -## Output Overview - -Below is the default output structure for the `abricate` tool. Where possible the -file descriptions below were modified from a tools description. - -```bash - -├── -│ └── tools -│ └── abricate -│ ├── .txt -│ └── logs -│ ├── nf-abricate.{begin,err,log,out,run,sh,trace} -│ └── versions.yml -└── bactopia-runs - └── abricate- - ├── merged-results - │ ├── abricate.tsv - │ └── logs - │ └── abricate-concat - │ ├── nf-merged-results.{begin,err,log,out,run,sh,trace} - │ └── versions.yml - └── nf-reports - ├── abricate-dag.dot - ├── abricate-report.html - ├── abricate-timeline.html - └── abricate-trace.txt - -``` - - - -### Results - -#### Merged Results - -Below are results that are concatenated into a single file. - - -| Filename | Description | -|-------------------------------|-------------| -| abricate.tsv | A merged TSV file with `Abricate` results from all samples | - - -#### Abricate - -Below is a description of the _per-sample_ results from [Abricate](https://github.com/tseemann/abricate). - - -| Filename | Description | -|-------------------------------|-------------| -| <SAMPLE_NAME>.txt | A tab-delimited report of hits, for full details please see [Abricate - Output](https://github.com/tseemann/abricate#output) | - - - - - -### Audit Trail - -Below are files that can assist you in understanding which parameters and program versions were used. - -#### Logs - -Each process that is executed will have a folder named `logs`. In this folder are helpful -files for you to review if the need ever arises. - -| Extension | Description | -|--------------|-------------| -| .begin | An empty file used to designate the process started | -| .err | Contains STDERR outputs from the process | -| .log | Contains both STDERR and STDOUT outputs from the process | -| .out | Contains STDOUT outputs from the process | -| .run | The script Nextflow uses to stage/unstage files and queue processes based on given profile | -| .sh | The script executed by bash for the process | -| .trace | The Nextflow [Trace](https://www.nextflow.io/docs/latest/tracing.html#trace-report) report for the process | -| versions.yml | A YAML formatted file with program versions | - -#### Nextflow Reports - -These Nextflow reports provide great a great summary of your run. These can be used to optimize -resource usage and estimate expected costs if using cloud platforms. - -| Filename | Description | -|----------|-------------| -| abricate-dag.dot | The Nextflow [DAG visualisation](https://www.nextflow.io/docs/latest/tracing.html#dag-visualisation) | -| abricate-report.html | The Nextflow [Execution Report](https://www.nextflow.io/docs/latest/tracing.html#execution-report) | -| abricate-timeline.html | The Nextflow [Timeline Report](https://www.nextflow.io/docs/latest/tracing.html#timeline-report) | -| abricate-trace.txt | The Nextflow [Trace](https://www.nextflow.io/docs/latest/tracing.html#trace-report) report | - - -#### Program Versions - -At the end of each run, each of the `versions.yml` files are merged into the files below. - -| Filename | Description | -|---------------------------|-------------| -| software_versions.yml | A complete list of programs and versions used by each process | -| software_versions_mqc.yml | A complete list of programs and versions formatted for [MultiQC](https://multiqc.info/) | - -## Parameters - - -### Required Parameters -Define where the pipeline should find input data and save output data. - -| Parameter | Description | -|:---|---| -| ` --bactopia` | The path to bactopia results to use as inputs
**Type:** `string` | - -### Filtering Parameters -Use these parameters to specify which samples to include or exclude. - -| Parameter | Description | -|:---|---| -| ` --include` | A text file containing sample names (one per line) to include from the analysis
**Type:** `string` | -| ` --exclude` | A text file containing sample names (one per line) to exclude from the analysis
**Type:** `string` | - - -### Abricate Parameters - - -| Parameter | Description | -|:---|---| -| ` --abricate_db` | Database to use
**Type:** `string`, **Default:** `ncbi` | -| ` --minid` | Minimum DNA percent identity
**Type:** `integer`, **Default:** `80` | -| ` --mincov` | Minimum DNA percent coverage
**Type:** `integer`, **Default:** `80` | - - -### Optional Parameters -These optional parameters can be useful in certain settings. - -| Parameter | Description | -|:---|---| -| ` --outdir` | Base directory to write results to
**Type:** `string`, **Default:** `bactopia` | -| ` --skip_compression` | Ouput files will not be compressed
**Type:** `boolean` | -| ` --datasets` | The path to cache datasets to
**Type:** `string` | -| ` --keep_all_files` | Keeps all analysis files created
**Type:** `boolean` | - -### Max Job Request Parameters -Set the top limit for requested resources for any single job. - -| Parameter | Description | -|:---|---| -| ` --max_retry` | Maximum times to retry a process before allowing it to fail.
**Type:** `integer`, **Default:** `3` | -| ` --max_cpus` | Maximum number of CPUs that can be requested for any single job.
**Type:** `integer`, **Default:** `4` | -| ` --max_memory` | Maximum amount of memory that can be requested for any single job.
**Type:** `string`, **Default:** `128.GB` | -| ` --max_time` | Maximum amount of time that can be requested for any single job.
**Type:** `string`, **Default:** `240.h` | -| ` --max_downloads` | Maximum number of samples to download at a time
**Type:** `integer`, **Default:** `3` | - -### Nextflow Configuration Parameters -Parameters to fine-tune your Nextflow setup. - -| Parameter | Description | -|:---|---| -| ` --nfconfig` | A Nextflow compatible config file for custom profiles, loaded last and will overwrite existing variables if set.
**Type:** `string` | -| ` --publish_dir_mode` | Method used to save pipeline results to output directory.
**Type:** `string`, **Default:** `copy` | -| ` --infodir` | Directory to keep pipeline Nextflow logs and reports.
**Type:** `string`, **Default:** `${params.outdir}/pipeline_info` | -| ` --force` | Nextflow will overwrite existing output files.
**Type:** `boolean` | -| ` --cleanup_workdir` | After Bactopia is successfully executed, the `work` directory will be deleted.
**Type:** `boolean` | - -### Institutional config options -Parameters used to describe centralized config profiles. These should not be edited. - -| Parameter | Description | -|:---|---| -| ` --custom_config_version` | Git commit id for Institutional configs.
**Type:** `string`, **Default:** `master` | -| ` --custom_config_base` | Base directory for Institutional configs.
**Type:** `string`, **Default:** `https://raw.githubusercontent.com/nf-core/configs/master` | -| ` --config_profile_name` | Institutional config name.
**Type:** `string` | -| ` --config_profile_description` | Institutional config description.
**Type:** `string` | -| ` --config_profile_contact` | Institutional config contact information.
**Type:** `string` | -| ` --config_profile_url` | Institutional config URL link.
**Type:** `string` | - -### Nextflow Profile Parameters -Parameters to fine-tune your Nextflow setup. - -| Parameter | Description | -|:---|---| -| ` --condadir` | Directory to Nextflow should use for Conda environments
**Type:** `string` | -| ` --registry` | Docker registry to pull containers from.
**Type:** `string`, **Default:** `dockerhub` | -| ` --datasets_cache` | Directory where downloaded datasets should be stored.
**Type:** `string`, **Default:** `/data/datasets` | -| ` --singularity_cache_dir` | Directory where remote Singularity images are stored.
**Type:** `string` | -| ` --singularity_pull_docker_container` | Instead of directly downloading Singularity images for use with Singularity, force the workflow to pull and convert Docker containers instead.
**Type:** `boolean` | -| ` --force_rebuild` | Force overwrite of existing pre-built environments.
**Type:** `boolean` | -| ` --queue` | Comma-separated name of the queue(s) to be used by a job scheduler (e.g. AWS Batch or SLURM)
**Type:** `string`, **Default:** `general,high-memory` | -| ` --cluster_opts` | Additional options to pass to the executor. (e.g. SLURM: '--account=my_acct_name'
**Type:** `string` | -| ` --container_opts` | Additional options to pass to Apptainer, Docker, or Singularityu. (e.g. Singularity: '-D `pwd`'
**Type:** `string` | -| ` --disable_scratch` | All intermediate files created on worker nodes of will be transferred to the head node.
**Type:** `boolean` | - -### Helpful Parameters -Uncommonly used parameters that might be useful. - -| Parameter | Description | -|:---|---| -| ` --monochrome_logs` | Do not use coloured log outputs.
**Type:** `boolean` | -| ` --nfdir` | Print directory Nextflow has pulled Bactopia to
**Type:** `boolean` | -| ` --sleep_time` | The amount of time (seconds) Nextflow will wait after setting up datasets before execution.
**Type:** `integer`, **Default:** `5` | -| ` --validate_params` | Boolean whether to validate parameters against the schema at runtime
**Type:** `boolean`, **Default:** `True` | -| ` --help` | Display help text.
**Type:** `boolean` | -| ` --wf` | Specify which workflow or Bactopia Tool to execute
**Type:** `string`, **Default:** `bactopia` | -| ` --list_wfs` | List the available workflows and Bactopia Tools to use with '--wf'
**Type:** `boolean` | -| ` --show_hidden_params` | Show all params when using `--help`
**Type:** `boolean` | -| ` --help_all` | An alias for --help --show_hidden_params
**Type:** `boolean` | -| ` --version` | Display version text.
**Type:** `boolean` | - -## Citations -If you use Bactopia and `abricate` in your analysis, please cite the following. - -- [Bactopia](https://bactopia.github.io/) - Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) - - -- [Abricate](https://github.com/tseemann/abricate) - Seemann T [Abricate: mass screening of contigs for antimicrobial and virulence genes](https://github.com/tseemann/abricate) (GitHub) - -- [ARG-ANNOT](http://en.mediterranee-infection.com/article.php?laref=283%26titre=arg-annot) - Gupta SK, Padmanabhan BR, Diene SM, Lopez-Rojas R, Kempf M, Landraud L, Rolain J-M [ARG-ANNOT, a new bioinformatic tool to discover antibiotic resistance genes in bacterial genomes.](https://doi.org/10.1128/aac.01310-13) _Antimicrob. Agents Chemother_ 58, 212–220 (2014) - -- [CARD](https://card.mcmaster.ca/) - Alcock BP, Raphenya AR, Lau TTY, Tsang KK, Bouchard M, Edalatmand A, Huynh W, Nguyen A-L V, Cheng AA, Liu S, Min SY, Miroshnichenko A, Tran H-K, Werfalli RE, Nasir JA, Oloni M, Speicher DJ, Florescu A, Singh B, Faltyn M, Hernandez-Koutoucheva A, Sharma AN, Bordeleau E, Pawlowski AC, Zubyk HL, Dooley D, Griffiths E, Maguire F, Winsor GL, Beiko RG, Brinkman FSL, Hsiao WWL, Domselaar GV, McArthur AG [CARD 2020: antibiotic resistome surveillance with the comprehensive antibiotic resistance database.](https://doi.org/10.1093/nar/gkz935) _Nucleic acids research_ 48.D1, D517-D525 (2020) - -- [csvtk](https://bioinf.shenwei.me/csvtk/) - Shen, W [csvtk: A cross-platform, efficient and practical CSV/TSV toolkit in Golang.](https://github.com/shenwei356/csvtk/) (GitHub) - -- [EcOH](https://dx.doi.org/10.1099%2Fmgen.0.000064) - Ingle DJ, Valcanis M, Kuzevski A, Tauschek M, Inouye M, Stinear T, Levine MM, Robins-Browne RM, Holt KE [In silico serotyping of E. coli from short read data identifies limited novel O-loci but extensive diversity of O:H serotype combinations within and between pathogenic lineages.](https://doi.org/10.1099/mgen.0.000064) _Microbial Genomics_, 2(7), e000064. (2016) - -- [MEGARes 2.0](https://megares.meglab.org/) - Doster E, Lakin SM, Dean CJ, Wolfe C, Young JG, Boucher C, Belk KE, Noyes NR, Morley PS [MEGARes 2.0: a database for classification of antimicrobial drug, biocide and metal resistance determinants in metagenomic sequence data.](https://doi.org/10.1093/nar/gkz1010) _Nucleic Acids Research_, 48(D1), D561–D569. (2020) - -- [NCBI Reference Gene Catalog](https://www.ncbi.nlm.nih.gov/bioproject/?term=PRJNA313047) - Feldgarden M, Brover V, Haft DH, Prasad AB, Slotta DJ, Tolstoy I, Tyson GH, Zhao S, Hsu C-H, McDermott PF, Tadesse DA, Morales C, Simmons M, Tillman G, Wasilenko J, Folster JP, Klimke W [Validating the NCBI AMRFinder Tool and Resistance Gene Database Using Antimicrobial Resistance Genotype-Phenotype Correlations in a Collection of NARMS Isolates](https://doi.org/10.1128/AAC.00483-19). _Antimicrob. Agents Chemother._ (2019) - -- [PlasmidFinder](https://bitbucket.org/genomicepidemiology/plasmidfinder) - Carattoli A, Zankari E, García-Fernández A, Voldby Larsen M, Lund O, Villa L, Møller Aarestrup F, Hasman H [In silico detection and typing of plasmids using PlasmidFinder and plasmid multilocus sequence typing.](https://doi.org/10.1128/AAC.02412-14) _Antimicrobial Agents and Chemotherapy_ 58(7), 3895–3903. (2014) - -- [ResFinder](https://cge.cbs.dtu.dk//services/ResFinder/) - Zankari E, Hasman H, Cosentino S, Vestergaard M, Rasmussen S, Lund O, Aarestrup FM, Larsen MV [Identification of acquired antimicrobial resistance genes.](https://doi.org/10.1093/jac/dks261) _J. Antimicrob. Chemother._ 67, 2640–2644 (2012) - -- [VFDB](http://www.mgc.ac.cn/VFs/) - Chen L, Zheng D, Liu B, Yang J, Jin Q [VFDB 2016: hierarchical and refined dataset for big data analysis--10 years on.](https://doi.org/10.1093/nar/gkv1239) _Nucleic Acids Res._ 44, D694–7 (2016) - diff --git a/docs/bactopia-tools/abritamr.md b/docs/bactopia-tools/abritamr.md deleted file mode 100644 index c36780b2..00000000 --- a/docs/bactopia-tools/abritamr.md +++ /dev/null @@ -1,246 +0,0 @@ ---- -title: abritamr -description: A Bactopia Tool which uses abriTAMR to screen assemblies for antimicrobial resistance and virulence genes. - ---- -# Bactopia Tool - `abritamr` -The `abritamr` module uses [abriTAMR](https://github.com/MDU-PHL/abritamr) for the -detection of antimicrobial resistance and virulence genes. It makes use of -[AMRFinderPlus](https://github.com/ncbi/amr) and its accredited by NATA for use in -reporting presence of reportable AMR genes in Victoria Australia. - - -## Example Usage -``` -bactopia --wf abritamr \ - --bactopia /path/to/your/bactopia/results -``` - -## Output Overview - -Below is the default output structure for the `abritamr` tool. Where possible the -file descriptions below were modified from a tools description. - -```bash - -├── -│ └── tools -│ └── abritamr -│ ├── .abritamr.txt -│ ├── .amrfinder.out -│ ├── .summary_matches.txt -│ ├── .summary_partials.txt -│ ├── .summary_virulence.txt -│ └── logs -│ ├── abritamr.log -│ ├── nf-abritamr.{begin,err,log,out,run,sh,trace} -│ └── versions.yml -└── bactopia-runs - └── abritamr- - ├── merged-results - │ ├── abritamr.tsv - │ └── logs - │ └── abritamr-concat - │ ├── nf-merged-results.{begin,err,log,out,run,sh,trace} - │ └── versions.yml - └── nf-reports - ├── abritamr-dag.dot - ├── abritamr-report.html - ├── abritamr-timeline.html - └── abritamr-trace.txt - -``` - - - -### Results - -#### Merged Results - -Below are results that are concatenated into a single file. - - -| Filename | Description | -|-------------------------------|-------------| -| abritamr.tsv | Tab-delimited report of results from all samples | - - -#### abritamr - -Below is a description of the _per-sample_ results from [abriTAMR](https://github.com/MDU-PHL/abritamr). - - -| Extension | Description | -|-------------------------------|-------------| -| .abritamr.txt | Tab-delimited file, combining non-empty summary files from abriTAMR | -| .amrfinder.out | raw output from AMRFinder plus (per sequence) | -| .summary_matches.txt | Tab-delimited file, with a row per sequence, and columns representing functional drug classes | -| .summary_partials.txt | Tab-delimited file, with a row per sequence, and columns representing partial hits to functional drug classes | -| .summary_virulence.txt | Tab-delimited file, with a row per sequence, and columns representing AMRFinderPlus virulence gene classification | - - - - - -### Audit Trail - -Below are files that can assist you in understanding which parameters and program versions were used. - -#### Logs - -Each process that is executed will have a folder named `logs`. In this folder are helpful -files for you to review if the need ever arises. - -| Extension | Description | -|--------------|-------------| -| .begin | An empty file used to designate the process started | -| .err | Contains STDERR outputs from the process | -| .log | Contains both STDERR and STDOUT outputs from the process | -| .out | Contains STDOUT outputs from the process | -| .run | The script Nextflow uses to stage/unstage files and queue processes based on given profile | -| .sh | The script executed by bash for the process | -| .trace | The Nextflow [Trace](https://www.nextflow.io/docs/latest/tracing.html#trace-report) report for the process | -| versions.yml | A YAML formatted file with program versions | - -#### Nextflow Reports - -These Nextflow reports provide great a great summary of your run. These can be used to optimize -resource usage and estimate expected costs if using cloud platforms. - -| Filename | Description | -|----------|-------------| -| abritamr-dag.dot | The Nextflow [DAG visualisation](https://www.nextflow.io/docs/latest/tracing.html#dag-visualisation) | -| abritamr-report.html | The Nextflow [Execution Report](https://www.nextflow.io/docs/latest/tracing.html#execution-report) | -| abritamr-timeline.html | The Nextflow [Timeline Report](https://www.nextflow.io/docs/latest/tracing.html#timeline-report) | -| abritamr-trace.txt | The Nextflow [Trace](https://www.nextflow.io/docs/latest/tracing.html#trace-report) report | - - -#### Program Versions - -At the end of each run, each of the `versions.yml` files are merged into the files below. - -| Filename | Description | -|---------------------------|-------------| -| software_versions.yml | A complete list of programs and versions used by each process | -| software_versions_mqc.yml | A complete list of programs and versions formatted for [MultiQC](https://multiqc.info/) | - -## Parameters - - -### Required Parameters -Define where the pipeline should find input data and save output data. - -| Parameter | Description | -|:---|---| -| ` --bactopia` | The path to bactopia results to use as inputs
**Type:** `string` | - -### Filtering Parameters -Use these parameters to specify which samples to include or exclude. - -| Parameter | Description | -|:---|---| -| ` --include` | A text file containing sample names (one per line) to include from the analysis
**Type:** `string` | -| ` --exclude` | A text file containing sample names (one per line) to exclude from the analysis
**Type:** `string` | - - -### abriTAMR Parameters - - -| Parameter | Description | -|:---|---| -| ` --abritamr_species` | Use species specific point mutations, must provide a valid species
**Type:** `string` | -| ` --abritamr_identity` | Minimum identity of matches with amrfinder (0 - 1.0), defaults to amrfinder preset
**Type:** `integer` | - - -### Optional Parameters -These optional parameters can be useful in certain settings. - -| Parameter | Description | -|:---|---| -| ` --outdir` | Base directory to write results to
**Type:** `string`, **Default:** `bactopia` | -| ` --skip_compression` | Ouput files will not be compressed
**Type:** `boolean` | -| ` --datasets` | The path to cache datasets to
**Type:** `string` | -| ` --keep_all_files` | Keeps all analysis files created
**Type:** `boolean` | - -### Max Job Request Parameters -Set the top limit for requested resources for any single job. - -| Parameter | Description | -|:---|---| -| ` --max_retry` | Maximum times to retry a process before allowing it to fail.
**Type:** `integer`, **Default:** `3` | -| ` --max_cpus` | Maximum number of CPUs that can be requested for any single job.
**Type:** `integer`, **Default:** `4` | -| ` --max_memory` | Maximum amount of memory that can be requested for any single job.
**Type:** `string`, **Default:** `128.GB` | -| ` --max_time` | Maximum amount of time that can be requested for any single job.
**Type:** `string`, **Default:** `240.h` | -| ` --max_downloads` | Maximum number of samples to download at a time
**Type:** `integer`, **Default:** `3` | - -### Nextflow Configuration Parameters -Parameters to fine-tune your Nextflow setup. - -| Parameter | Description | -|:---|---| -| ` --nfconfig` | A Nextflow compatible config file for custom profiles, loaded last and will overwrite existing variables if set.
**Type:** `string` | -| ` --publish_dir_mode` | Method used to save pipeline results to output directory.
**Type:** `string`, **Default:** `copy` | -| ` --infodir` | Directory to keep pipeline Nextflow logs and reports.
**Type:** `string`, **Default:** `${params.outdir}/pipeline_info` | -| ` --force` | Nextflow will overwrite existing output files.
**Type:** `boolean` | -| ` --cleanup_workdir` | After Bactopia is successfully executed, the `work` directory will be deleted.
**Type:** `boolean` | - -### Institutional config options -Parameters used to describe centralized config profiles. These should not be edited. - -| Parameter | Description | -|:---|---| -| ` --custom_config_version` | Git commit id for Institutional configs.
**Type:** `string`, **Default:** `master` | -| ` --custom_config_base` | Base directory for Institutional configs.
**Type:** `string`, **Default:** `https://raw.githubusercontent.com/nf-core/configs/master` | -| ` --config_profile_name` | Institutional config name.
**Type:** `string` | -| ` --config_profile_description` | Institutional config description.
**Type:** `string` | -| ` --config_profile_contact` | Institutional config contact information.
**Type:** `string` | -| ` --config_profile_url` | Institutional config URL link.
**Type:** `string` | - -### Nextflow Profile Parameters -Parameters to fine-tune your Nextflow setup. - -| Parameter | Description | -|:---|---| -| ` --condadir` | Directory to Nextflow should use for Conda environments
**Type:** `string` | -| ` --registry` | Docker registry to pull containers from.
**Type:** `string`, **Default:** `dockerhub` | -| ` --datasets_cache` | Directory where downloaded datasets should be stored.
**Type:** `string`, **Default:** `/data/datasets` | -| ` --singularity_cache_dir` | Directory where remote Singularity images are stored.
**Type:** `string` | -| ` --singularity_pull_docker_container` | Instead of directly downloading Singularity images for use with Singularity, force the workflow to pull and convert Docker containers instead.
**Type:** `boolean` | -| ` --force_rebuild` | Force overwrite of existing pre-built environments.
**Type:** `boolean` | -| ` --queue` | Comma-separated name of the queue(s) to be used by a job scheduler (e.g. AWS Batch or SLURM)
**Type:** `string`, **Default:** `general,high-memory` | -| ` --cluster_opts` | Additional options to pass to the executor. (e.g. SLURM: '--account=my_acct_name'
**Type:** `string` | -| ` --container_opts` | Additional options to pass to Apptainer, Docker, or Singularityu. (e.g. Singularity: '-D `pwd`'
**Type:** `string` | -| ` --disable_scratch` | All intermediate files created on worker nodes of will be transferred to the head node.
**Type:** `boolean` | - -### Helpful Parameters -Uncommonly used parameters that might be useful. - -| Parameter | Description | -|:---|---| -| ` --monochrome_logs` | Do not use coloured log outputs.
**Type:** `boolean` | -| ` --nfdir` | Print directory Nextflow has pulled Bactopia to
**Type:** `boolean` | -| ` --sleep_time` | The amount of time (seconds) Nextflow will wait after setting up datasets before execution.
**Type:** `integer`, **Default:** `5` | -| ` --validate_params` | Boolean whether to validate parameters against the schema at runtime
**Type:** `boolean`, **Default:** `True` | -| ` --help` | Display help text.
**Type:** `boolean` | -| ` --wf` | Specify which workflow or Bactopia Tool to execute
**Type:** `string`, **Default:** `bactopia` | -| ` --list_wfs` | List the available workflows and Bactopia Tools to use with '--wf'
**Type:** `boolean` | -| ` --show_hidden_params` | Show all params when using `--help`
**Type:** `boolean` | -| ` --help_all` | An alias for --help --show_hidden_params
**Type:** `boolean` | -| ` --version` | Display version text.
**Type:** `boolean` | - -## Citations -If you use Bactopia and `abritamr` in your analysis, please cite the following. - -- [Bactopia](https://bactopia.github.io/) - Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) - - -- [abriTAMR](https://github.com/MDU-PHL/abritamr) - Sherry NL, Horan KA, Ballard SA, Gonҫalves da Silva A, Gorrie CL, Schultz MB, Stevens K, Valcanis M, Sait ML, Stinear TP, Howden BP, and Seemann T [An ISO-certified genomics workflow for identification and surveillance of antimicrobial resistance.](https://doi.org/10.1038/s41467-022-35713-4) _Nature Communications_, 14(1), 60. (2023) - -- [AMRFinderPlus](https://github.com/ncbi/amr) - Feldgarden M, Brover V, Haft DH, Prasad AB, Slotta DJ, Tolstoy I, Tyson GH, Zhao S, Hsu C-H, McDermott PF, Tadesse DA, Morales C, Simmons M, Tillman G, Wasilenko J, Folster JP, Klimke W [Validating the NCBI AMRFinder Tool and Resistance Gene Database Using Antimicrobial Resistance Genotype-Phenotype Correlations in a Collection of NARMS Isolates](https://doi.org/10.1128/AAC.00483-19). _Antimicrob. Agents Chemother._ (2019) - -- [csvtk](https://bioinf.shenwei.me/csvtk/) - Shen, W [csvtk: A cross-platform, efficient and practical CSV/TSV toolkit in Golang.](https://github.com/shenwei356/csvtk/) (GitHub) - diff --git a/docs/bactopia-tools/agrvate.md b/docs/bactopia-tools/agrvate.md deleted file mode 100644 index 26c325ce..00000000 --- a/docs/bactopia-tools/agrvate.md +++ /dev/null @@ -1,242 +0,0 @@ ---- -title: argvate -description: A Bactopia Tool which uses AgrVATE to rapidly identify the _agr_ locus type in _Staphylococcus aureus_ assemblies. ---- -# Bactopia Tool - `agrvate` -The `agrvate` module uses [AgrVATE](https://github.com/VishnuRaghuram94/AgrVATE) -to rapidly identify the _agr_ locus type in _Staphylococcus aureus_ assemblies. - - -## Example Usage -``` -bactopia --wf agrvate \ - --bactopia /path/to/your/bactopia/results -``` - -## Output Overview - -Below is the default output structure for the `agrvate` tool. Where possible the -file descriptions below were modified from a tools description. - -```bash - -├── -│ └── tools -│ └── agrvate -│ ├── -agr_gp.tab -│ ├── -blastn_log.txt -│ ├── -summary.tab -│ └── logs -│ ├── nf-agrvate.{begin,err,log,out,run,sh,trace} -│ └── versions.yml -└── bactopia-runs - └── agrvate- - ├── merged-results - │ ├── agrvate.tsv - │ └── logs - │ └── agrvate-concat - │ ├── nf-merged-results.{begin,err,log,out,run,sh,trace} - │ └── versions.yml - └── nf-reports - ├── agrvate-dag.dot - ├── agrvate-report.html - ├── agrvate-timeline.html - └── agrvate-trace.txt - -``` - -:::info[Directory structure might be different] - -`agrvate` is available as a standalone Bactopia Tool, as well as from -the main Bactopia workflow (e.g. through Staphopia or Merlin). If executed -from Bactopia, the `agrvate` directory structure might be different, but the -output descriptions below still apply. -::: - - - -### Results - -#### Merged Results - -Below are results that are concatenated into a single file. - - -| Filename | Description | -|-------------------------------|-------------| -| agrvate.tsv | A merged TSV file with `AgrVATE` results from all samples | - - -#### AgrVATE - -Below is a description of the _per-sample_ results from [AgrVATE](https://github.com/VishnuRaghuram94/AgrVATE). - - -| Extension | Description | -|-------------------------------|-------------| -| -agr_gp.tab | A detailed report for _agr_ kmer matches | -| -blastn_log.txt | Log files from programs called by `AgrVATE` | -| -summary.tab | A final summary report for _agr_ typing | - - - - - -### Audit Trail - -Below are files that can assist you in understanding which parameters and program versions were used. - -#### Logs - -Each process that is executed will have a folder named `logs`. In this folder are helpful -files for you to review if the need ever arises. - -| Extension | Description | -|--------------|-------------| -| .begin | An empty file used to designate the process started | -| .err | Contains STDERR outputs from the process | -| .log | Contains both STDERR and STDOUT outputs from the process | -| .out | Contains STDOUT outputs from the process | -| .run | The script Nextflow uses to stage/unstage files and queue processes based on given profile | -| .sh | The script executed by bash for the process | -| .trace | The Nextflow [Trace](https://www.nextflow.io/docs/latest/tracing.html#trace-report) report for the process | -| versions.yml | A YAML formatted file with program versions | - -#### Nextflow Reports - -These Nextflow reports provide great a great summary of your run. These can be used to optimize -resource usage and estimate expected costs if using cloud platforms. - -| Filename | Description | -|----------|-------------| -| agrvate-dag.dot | The Nextflow [DAG visualisation](https://www.nextflow.io/docs/latest/tracing.html#dag-visualisation) | -| agrvate-report.html | The Nextflow [Execution Report](https://www.nextflow.io/docs/latest/tracing.html#execution-report) | -| agrvate-timeline.html | The Nextflow [Timeline Report](https://www.nextflow.io/docs/latest/tracing.html#timeline-report) | -| agrvate-trace.txt | The Nextflow [Trace](https://www.nextflow.io/docs/latest/tracing.html#trace-report) report | - - -#### Program Versions - -At the end of each run, each of the `versions.yml` files are merged into the files below. - -| Filename | Description | -|---------------------------|-------------| -| software_versions.yml | A complete list of programs and versions used by each process | -| software_versions_mqc.yml | A complete list of programs and versions formatted for [MultiQC](https://multiqc.info/) | - -## Parameters - - -### Required Parameters -Define where the pipeline should find input data and save output data. - -| Parameter | Description | -|:---|---| -| ` --bactopia` | The path to bactopia results to use as inputs
**Type:** `string` | - -### Filtering Parameters -Use these parameters to specify which samples to include or exclude. - -| Parameter | Description | -|:---|---| -| ` --include` | A text file containing sample names (one per line) to include from the analysis
**Type:** `string` | -| ` --exclude` | A text file containing sample names (one per line) to exclude from the analysis
**Type:** `string` | - - -### AgrVATE Parameters - - -| Parameter | Description | -|:---|---| -| ` --typing_only` | agr typing only. Skips agr operon extraction and frameshift detection
**Type:** `boolean` | - - -### Optional Parameters -These optional parameters can be useful in certain settings. - -| Parameter | Description | -|:---|---| -| ` --outdir` | Base directory to write results to
**Type:** `string`, **Default:** `bactopia` | -| ` --skip_compression` | Ouput files will not be compressed
**Type:** `boolean` | -| ` --datasets` | The path to cache datasets to
**Type:** `string` | -| ` --keep_all_files` | Keeps all analysis files created
**Type:** `boolean` | - -### Max Job Request Parameters -Set the top limit for requested resources for any single job. - -| Parameter | Description | -|:---|---| -| ` --max_retry` | Maximum times to retry a process before allowing it to fail.
**Type:** `integer`, **Default:** `3` | -| ` --max_cpus` | Maximum number of CPUs that can be requested for any single job.
**Type:** `integer`, **Default:** `4` | -| ` --max_memory` | Maximum amount of memory that can be requested for any single job.
**Type:** `string`, **Default:** `128.GB` | -| ` --max_time` | Maximum amount of time that can be requested for any single job.
**Type:** `string`, **Default:** `240.h` | -| ` --max_downloads` | Maximum number of samples to download at a time
**Type:** `integer`, **Default:** `3` | - -### Nextflow Configuration Parameters -Parameters to fine-tune your Nextflow setup. - -| Parameter | Description | -|:---|---| -| ` --nfconfig` | A Nextflow compatible config file for custom profiles, loaded last and will overwrite existing variables if set.
**Type:** `string` | -| ` --publish_dir_mode` | Method used to save pipeline results to output directory.
**Type:** `string`, **Default:** `copy` | -| ` --infodir` | Directory to keep pipeline Nextflow logs and reports.
**Type:** `string`, **Default:** `${params.outdir}/pipeline_info` | -| ` --force` | Nextflow will overwrite existing output files.
**Type:** `boolean` | -| ` --cleanup_workdir` | After Bactopia is successfully executed, the `work` directory will be deleted.
**Type:** `boolean` | - -### Institutional config options -Parameters used to describe centralized config profiles. These should not be edited. - -| Parameter | Description | -|:---|---| -| ` --custom_config_version` | Git commit id for Institutional configs.
**Type:** `string`, **Default:** `master` | -| ` --custom_config_base` | Base directory for Institutional configs.
**Type:** `string`, **Default:** `https://raw.githubusercontent.com/nf-core/configs/master` | -| ` --config_profile_name` | Institutional config name.
**Type:** `string` | -| ` --config_profile_description` | Institutional config description.
**Type:** `string` | -| ` --config_profile_contact` | Institutional config contact information.
**Type:** `string` | -| ` --config_profile_url` | Institutional config URL link.
**Type:** `string` | - -### Nextflow Profile Parameters -Parameters to fine-tune your Nextflow setup. - -| Parameter | Description | -|:---|---| -| ` --condadir` | Directory to Nextflow should use for Conda environments
**Type:** `string` | -| ` --registry` | Docker registry to pull containers from.
**Type:** `string`, **Default:** `dockerhub` | -| ` --datasets_cache` | Directory where downloaded datasets should be stored.
**Type:** `string`, **Default:** `/data/datasets` | -| ` --singularity_cache_dir` | Directory where remote Singularity images are stored.
**Type:** `string` | -| ` --singularity_pull_docker_container` | Instead of directly downloading Singularity images for use with Singularity, force the workflow to pull and convert Docker containers instead.
**Type:** `boolean` | -| ` --force_rebuild` | Force overwrite of existing pre-built environments.
**Type:** `boolean` | -| ` --queue` | Comma-separated name of the queue(s) to be used by a job scheduler (e.g. AWS Batch or SLURM)
**Type:** `string`, **Default:** `general,high-memory` | -| ` --cluster_opts` | Additional options to pass to the executor. (e.g. SLURM: '--account=my_acct_name'
**Type:** `string` | -| ` --container_opts` | Additional options to pass to Apptainer, Docker, or Singularityu. (e.g. Singularity: '-D `pwd`'
**Type:** `string` | -| ` --disable_scratch` | All intermediate files created on worker nodes of will be transferred to the head node.
**Type:** `boolean` | - -### Helpful Parameters -Uncommonly used parameters that might be useful. - -| Parameter | Description | -|:---|---| -| ` --monochrome_logs` | Do not use coloured log outputs.
**Type:** `boolean` | -| ` --nfdir` | Print directory Nextflow has pulled Bactopia to
**Type:** `boolean` | -| ` --sleep_time` | The amount of time (seconds) Nextflow will wait after setting up datasets before execution.
**Type:** `integer`, **Default:** `5` | -| ` --validate_params` | Boolean whether to validate parameters against the schema at runtime
**Type:** `boolean`, **Default:** `True` | -| ` --help` | Display help text.
**Type:** `boolean` | -| ` --wf` | Specify which workflow or Bactopia Tool to execute
**Type:** `string`, **Default:** `bactopia` | -| ` --list_wfs` | List the available workflows and Bactopia Tools to use with '--wf'
**Type:** `boolean` | -| ` --show_hidden_params` | Show all params when using `--help`
**Type:** `boolean` | -| ` --help_all` | An alias for --help --show_hidden_params
**Type:** `boolean` | -| ` --version` | Display version text.
**Type:** `boolean` | - -## Citations -If you use Bactopia and `agrvate` in your analysis, please cite the following. - -- [Bactopia](https://bactopia.github.io/) - Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) - - -- [AgrVATE](https://github.com/VishnuRaghuram94/AgrVATE) - Raghuram V. [AgrVATE: Rapid identification of Staphylococcus aureus agr locus type and agr operon variants.](https://github.com/VishnuRaghuram94/AgrVATE) (GitHub) - -- [csvtk](https://bioinf.shenwei.me/csvtk/) - Shen, W [csvtk: A cross-platform, efficient and practical CSV/TSV toolkit in Golang.](https://github.com/shenwei356/csvtk/) (GitHub) - diff --git a/docs/bactopia-tools/amrfinderplus.md b/docs/bactopia-tools/amrfinderplus.md deleted file mode 100644 index a1a55307..00000000 --- a/docs/bactopia-tools/amrfinderplus.md +++ /dev/null @@ -1,242 +0,0 @@ ---- -title: amrfinderplus -description: A Bactopia Tool which uses AMRFinder+ to screen assemblies and proteins for antimicrobial resistance and virulence genes. ---- -# Bactopia Tool - `amrfinderplus` -The `amrfinderplus` module uses [AMRFinder+](https://github.com/ncbi/amr) to screen assemblies and proteins -for antimicrobial resistance and virulence genes. - - -## Example Usage -``` -bactopia --wf amrfinderplus \ - --bactopia /path/to/your/bactopia/results -``` - -## Output Overview - -Below is the default output structure for the `amrfinderplus` tool. Where possible the -file descriptions below were modified from a tools description. - -```bash - -├── -│ └── tools -│ └── amrfinderplus -│ ├── -genes.tsv -│ ├── -proteins.tsv -│ └── logs -│ ├── nf-amrfinderplus.{begin,err,log,out,run,sh,trace} -│ └── versions.yml -└── bactopia-runs - └── amrfinderplus- - ├── merged-results - │ ├── amrfinderplus-genes.tsv - │ ├── amrfinderplus-proteins.tsv - │ └── logs - │ └── amrfinderplus-{genes|proteins|-concat - │ ├── nf-merged-results.{begin,err,log,out,run,sh,trace} - │ └── versions.yml - └── nf-reports - ├── amrfinderplus-dag.dot - ├── amrfinderplus-report.html - ├── amrfinderplus-timeline.html - └── amrfinderplus-trace.txt - -``` - - - -### Results - -#### Merged Results - -Below are results that are concatenated into a single file. - - -| Filename | Description | -|-------------------------------|-------------| -| amrfinderplus-genes.tsv | A merged TSV file with `AMRFinder+` results using nucleotide inputs | -| amrfinderplus-proteins.tsv | A merged TSV file with `AMRFinder+` results using protein inputs | - - -#### AMRFinder+ - -Below is a description of the _per-sample_ results from [AMRFinder+](https://github.com/ncbi/amr). - - -| Extension | Description | -|-------------------------------|-------------| -| -genes.tsv | A TSV file with `AMRFinder+` results using nucleotide inputs | -| -proteins.tsv | A TSV file with `AMRFinder+` results using protein inputs | - - - - - -### Audit Trail - -Below are files that can assist you in understanding which parameters and program versions were used. - -#### Logs - -Each process that is executed will have a folder named `logs`. In this folder are helpful -files for you to review if the need ever arises. - -| Extension | Description | -|--------------|-------------| -| .begin | An empty file used to designate the process started | -| .err | Contains STDERR outputs from the process | -| .log | Contains both STDERR and STDOUT outputs from the process | -| .out | Contains STDOUT outputs from the process | -| .run | The script Nextflow uses to stage/unstage files and queue processes based on given profile | -| .sh | The script executed by bash for the process | -| .trace | The Nextflow [Trace](https://www.nextflow.io/docs/latest/tracing.html#trace-report) report for the process | -| versions.yml | A YAML formatted file with program versions | - -#### Nextflow Reports - -These Nextflow reports provide great a great summary of your run. These can be used to optimize -resource usage and estimate expected costs if using cloud platforms. - -| Filename | Description | -|----------|-------------| -| amrfinderplus-dag.dot | The Nextflow [DAG visualisation](https://www.nextflow.io/docs/latest/tracing.html#dag-visualisation) | -| amrfinderplus-report.html | The Nextflow [Execution Report](https://www.nextflow.io/docs/latest/tracing.html#execution-report) | -| amrfinderplus-timeline.html | The Nextflow [Timeline Report](https://www.nextflow.io/docs/latest/tracing.html#timeline-report) | -| amrfinderplus-trace.txt | The Nextflow [Trace](https://www.nextflow.io/docs/latest/tracing.html#trace-report) report | - - -#### Program Versions - -At the end of each run, each of the `versions.yml` files are merged into the files below. - -| Filename | Description | -|---------------------------|-------------| -| software_versions.yml | A complete list of programs and versions used by each process | -| software_versions_mqc.yml | A complete list of programs and versions formatted for [MultiQC](https://multiqc.info/) | - -## Parameters - - -### Required Parameters -Define where the pipeline should find input data and save output data. - -| Parameter | Description | -|:---|---| -| ` --bactopia` | The path to bactopia results to use as inputs
**Type:** `string` | - -### Filtering Parameters -Use these parameters to specify which samples to include or exclude. - -| Parameter | Description | -|:---|---| -| ` --include` | A text file containing sample names (one per line) to include from the analysis
**Type:** `string` | -| ` --exclude` | A text file containing sample names (one per line) to exclude from the analysis
**Type:** `string` | - - -### AMRFinder+ Parameters - - -| Parameter | Description | -|:---|---| -| ` --ident_min` | Minimum proportion of identical amino acids in alignment for hit (0..1)
**Type:** `number`, **Default:** `-1` | -| ` --coverage_min` | Minimum coverage of the reference protein (0..1)
**Type:** `number`, **Default:** `0.5` | -| ` --organism` | Taxonomy group to run additional screens against
**Type:** `string` | -| ` --translation_table` | NCBI genetic code for translated BLAST
**Type:** `integer`, **Default:** `11` | -| ` --amrfinder_noplus` | Disable running AMRFinder+ with the --plus option
**Type:** `boolean` | -| ` --report_common` | Report proteins common to a taxonomy group
**Type:** `boolean` | -| ` --report_all_equal` | Report all equally-scoring BLAST and HMM matches
**Type:** `boolean` | -| ` --amrfinder_opts` | Extra AMRFinder+ options in quotes.
**Type:** `string` | -| ` --amrfinder_db` | A custom AMRFinder+ database to use, either a tarball or a folder
**Type:** `string` | - - -### Optional Parameters -These optional parameters can be useful in certain settings. - -| Parameter | Description | -|:---|---| -| ` --outdir` | Base directory to write results to
**Type:** `string`, **Default:** `bactopia` | -| ` --skip_compression` | Ouput files will not be compressed
**Type:** `boolean` | -| ` --datasets` | The path to cache datasets to
**Type:** `string` | -| ` --keep_all_files` | Keeps all analysis files created
**Type:** `boolean` | - -### Max Job Request Parameters -Set the top limit for requested resources for any single job. - -| Parameter | Description | -|:---|---| -| ` --max_retry` | Maximum times to retry a process before allowing it to fail.
**Type:** `integer`, **Default:** `3` | -| ` --max_cpus` | Maximum number of CPUs that can be requested for any single job.
**Type:** `integer`, **Default:** `4` | -| ` --max_memory` | Maximum amount of memory that can be requested for any single job.
**Type:** `string`, **Default:** `128.GB` | -| ` --max_time` | Maximum amount of time that can be requested for any single job.
**Type:** `string`, **Default:** `240.h` | -| ` --max_downloads` | Maximum number of samples to download at a time
**Type:** `integer`, **Default:** `3` | - -### Nextflow Configuration Parameters -Parameters to fine-tune your Nextflow setup. - -| Parameter | Description | -|:---|---| -| ` --nfconfig` | A Nextflow compatible config file for custom profiles, loaded last and will overwrite existing variables if set.
**Type:** `string` | -| ` --publish_dir_mode` | Method used to save pipeline results to output directory.
**Type:** `string`, **Default:** `copy` | -| ` --infodir` | Directory to keep pipeline Nextflow logs and reports.
**Type:** `string`, **Default:** `${params.outdir}/pipeline_info` | -| ` --force` | Nextflow will overwrite existing output files.
**Type:** `boolean` | -| ` --cleanup_workdir` | After Bactopia is successfully executed, the `work` directory will be deleted.
**Type:** `boolean` | - -### Institutional config options -Parameters used to describe centralized config profiles. These should not be edited. - -| Parameter | Description | -|:---|---| -| ` --custom_config_version` | Git commit id for Institutional configs.
**Type:** `string`, **Default:** `master` | -| ` --custom_config_base` | Base directory for Institutional configs.
**Type:** `string`, **Default:** `https://raw.githubusercontent.com/nf-core/configs/master` | -| ` --config_profile_name` | Institutional config name.
**Type:** `string` | -| ` --config_profile_description` | Institutional config description.
**Type:** `string` | -| ` --config_profile_contact` | Institutional config contact information.
**Type:** `string` | -| ` --config_profile_url` | Institutional config URL link.
**Type:** `string` | - -### Nextflow Profile Parameters -Parameters to fine-tune your Nextflow setup. - -| Parameter | Description | -|:---|---| -| ` --condadir` | Directory to Nextflow should use for Conda environments
**Type:** `string` | -| ` --registry` | Docker registry to pull containers from.
**Type:** `string`, **Default:** `dockerhub` | -| ` --datasets_cache` | Directory where downloaded datasets should be stored.
**Type:** `string`, **Default:** `/data/datasets` | -| ` --singularity_cache_dir` | Directory where remote Singularity images are stored.
**Type:** `string` | -| ` --singularity_pull_docker_container` | Instead of directly downloading Singularity images for use with Singularity, force the workflow to pull and convert Docker containers instead.
**Type:** `boolean` | -| ` --force_rebuild` | Force overwrite of existing pre-built environments.
**Type:** `boolean` | -| ` --queue` | Comma-separated name of the queue(s) to be used by a job scheduler (e.g. AWS Batch or SLURM)
**Type:** `string`, **Default:** `general,high-memory` | -| ` --cluster_opts` | Additional options to pass to the executor. (e.g. SLURM: '--account=my_acct_name'
**Type:** `string` | -| ` --container_opts` | Additional options to pass to Apptainer, Docker, or Singularityu. (e.g. Singularity: '-D `pwd`'
**Type:** `string` | -| ` --disable_scratch` | All intermediate files created on worker nodes of will be transferred to the head node.
**Type:** `boolean` | - -### Helpful Parameters -Uncommonly used parameters that might be useful. - -| Parameter | Description | -|:---|---| -| ` --monochrome_logs` | Do not use coloured log outputs.
**Type:** `boolean` | -| ` --nfdir` | Print directory Nextflow has pulled Bactopia to
**Type:** `boolean` | -| ` --sleep_time` | The amount of time (seconds) Nextflow will wait after setting up datasets before execution.
**Type:** `integer`, **Default:** `5` | -| ` --validate_params` | Boolean whether to validate parameters against the schema at runtime
**Type:** `boolean`, **Default:** `True` | -| ` --help` | Display help text.
**Type:** `boolean` | -| ` --wf` | Specify which workflow or Bactopia Tool to execute
**Type:** `string`, **Default:** `bactopia` | -| ` --list_wfs` | List the available workflows and Bactopia Tools to use with '--wf'
**Type:** `boolean` | -| ` --show_hidden_params` | Show all params when using `--help`
**Type:** `boolean` | -| ` --help_all` | An alias for --help --show_hidden_params
**Type:** `boolean` | -| ` --version` | Display version text.
**Type:** `boolean` | - -## Citations -If you use Bactopia and `amrfinderplus` in your analysis, please cite the following. - -- [Bactopia](https://bactopia.github.io/) - Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) - - -- [AMRFinderPlus](https://github.com/ncbi/amr) - Feldgarden M, Brover V, Haft DH, Prasad AB, Slotta DJ, Tolstoy I, Tyson GH, Zhao S, Hsu C-H, McDermott PF, Tadesse DA, Morales C, Simmons M, Tillman G, Wasilenko J, Folster JP, Klimke W [Validating the NCBI AMRFinder Tool and Resistance Gene Database Using Antimicrobial Resistance Genotype-Phenotype Correlations in a Collection of NARMS Isolates](https://doi.org/10.1128/AAC.00483-19). _Antimicrob. Agents Chemother._ (2019) - -- [csvtk](https://bioinf.shenwei.me/csvtk/) - Shen, W [csvtk: A cross-platform, efficient and practical CSV/TSV toolkit in Golang.](https://github.com/shenwei356/csvtk/) (GitHub) - diff --git a/docs/bactopia-tools/ariba.md b/docs/bactopia-tools/ariba.md deleted file mode 100644 index 76780002..00000000 --- a/docs/bactopia-tools/ariba.md +++ /dev/null @@ -1,268 +0,0 @@ ---- -title: ariba -description: A Bactopia Tool which uses ARIBA to rapidly identify genes in a database by creating local assemblies. ---- -# Bactopia Tool - `ariba` -The `ariba` module uses [ARIBA](https://github.com/sanger-pathogens/ariba) -to rapidly identify genes in a database by creating local assemblies. - - -## Example Usage -``` -bactopia --wf ariba \ - --bactopia /path/to/your/bactopia/results -``` - -## Output Overview - -Below is the default output structure for the `ariba` tool. Where possible the -file descriptions below were modified from a tools description. - -```bash - -├── -│ └── tools -│ └── ariba -│ └── card -│ ├── -report.tsv -│ ├── -summary.csv -│ ├── assembled_genes.fa.gz -│ ├── assembled_seqs.fa.gz -│ ├── assemblies.fa.gz -│ ├── debug.report.tsv -│ ├── log.clusters.gz -│ ├── logs -│ │ ├── nf-ariba.{begin,err,log,out,run,sh,trace} -│ │ └── versions.yml -│ └── version_info.txt -└── bactopia-runs - └── ariba- - ├── merged-results - │ ├── card-report.tsv - │ ├── card-summary.csv - │ └── logs - │ ├── card-report - │ │ ├── nf-merged-results.{begin,err,log,out,run,sh,trace} - │ │ └── versions.yml - │ └── card-summary - │ ├── nf-merged-results.{begin,err,log,out,run,sh,trace} - │ └── versions.yml - └── nf-reports - ├── ariba-dag.dot - ├── ariba-report.html - ├── ariba-timeline.html - └── ariba-trace.txt - -``` - -:::info[Directory structure might be different] - -`ariba` is available as a standalone Bactopia Tool, as well as from -the main Bactopia workflow (e.g. through Staphopia or Merlin). If executed -from Bactopia, the `ariba` directory structure might be different, but the -output descriptions below still apply. -::: - - - -### Results - -#### Merged Results - -Below are results that are concatenated into a single file. - - -| Extension | Description | -|-------------------------------|-------------| -| -report.tsv | A merged TSV file with `ariba` results from all samples | -| -summary.csv | A merged CSV file created with `ariba summary` | - - -#### Ariba - -Below is a description of the _per-sample_ results from [ARIBA](https://github.com/sanger-pathogens/ariba/wiki/Task:-run). - - -| Filename | Description | -|-------------------------------|-------------| -| <SAMPLE_NAME>-report.tsv | A report of the ARIBA analysis results | -| <SAMPLE_NAME>-summary.csv | A summary of the report created using `ariba summary` | -| assembled_genes.fa.gz | All the assembled genes | -| assembled_seqs.fa.gz | All the assembled sequences that match the reference | -| assemblies.fa.gz | All the raw local assembles | -| debug.report.tsv | Contains the results from `report.tsv` in addition to synonymous mutations | -| log.clusters.gz | A log of the ARIBA analysis | -| version_info.txt | Contains info on the versions of ARIBA and its dependencies | - - - - - -### Audit Trail - -Below are files that can assist you in understanding which parameters and program versions were used. - -#### Logs - -Each process that is executed will have a folder named `logs`. In this folder are helpful -files for you to review if the need ever arises. - -| Extension | Description | -|--------------|-------------| -| .begin | An empty file used to designate the process started | -| .err | Contains STDERR outputs from the process | -| .log | Contains both STDERR and STDOUT outputs from the process | -| .out | Contains STDOUT outputs from the process | -| .run | The script Nextflow uses to stage/unstage files and queue processes based on given profile | -| .sh | The script executed by bash for the process | -| .trace | The Nextflow [Trace](https://www.nextflow.io/docs/latest/tracing.html#trace-report) report for the process | -| versions.yml | A YAML formatted file with program versions | - -#### Nextflow Reports - -These Nextflow reports provide great a great summary of your run. These can be used to optimize -resource usage and estimate expected costs if using cloud platforms. - -| Filename | Description | -|----------|-------------| -| ariba-dag.dot | The Nextflow [DAG visualisation](https://www.nextflow.io/docs/latest/tracing.html#dag-visualisation) | -| ariba-report.html | The Nextflow [Execution Report](https://www.nextflow.io/docs/latest/tracing.html#execution-report) | -| ariba-timeline.html | The Nextflow [Timeline Report](https://www.nextflow.io/docs/latest/tracing.html#timeline-report) | -| ariba-trace.txt | The Nextflow [Trace](https://www.nextflow.io/docs/latest/tracing.html#trace-report) report | - - -#### Program Versions - -At the end of each run, each of the `versions.yml` files are merged into the files below. - -| Filename | Description | -|---------------------------|-------------| -| software_versions.yml | A complete list of programs and versions used by each process | -| software_versions_mqc.yml | A complete list of programs and versions formatted for [MultiQC](https://multiqc.info/) | - -## Parameters - - -### Required Parameters -Define where the pipeline should find input data and save output data. - -| Parameter | Description | -|:---|---| -| ` --bactopia` | The path to bactopia results to use as inputs
**Type:** `string` | - -### Filtering Parameters -Use these parameters to specify which samples to include or exclude. - -| Parameter | Description | -|:---|---| -| ` --include` | A text file containing sample names (one per line) to include from the analysis
**Type:** `string` | -| ` --exclude` | A text file containing sample names (one per line) to exclude from the analysis
**Type:** `string` | - - -### Ariba Run Parameters - - -| Parameter | Description | -|:---|---| -| ` --ariba_db` | A database to query, if unavailable it will be downloaded to the path given by --datasets_cache
**Type:** `string` | -| ` --nucmer_min_id` | Minimum alignment identity (delta-filter -i)
**Type:** `integer`, **Default:** `90` | -| ` --nucmer_min_len` | Minimum alignment identity (delta-filter -i)
**Type:** `integer`, **Default:** `20` | -| ` --nucmer_breaklen` | Value to use for -breaklen when running nucmer
**Type:** `integer`, **Default:** `200` | -| ` --assembly_cov` | Target read coverage when sampling reads for assembly
**Type:** `integer`, **Default:** `50` | -| ` --min_scaff_depth` | Minimum number of read pairs needed as evidence for scaffold link between two contigs
**Type:** `integer`, **Default:** `10` | -| ` --spades_options` | Extra options to pass to Spades assembler
**Type:** `string` | -| ` --assembled_threshold` | If proportion of gene assembled (regardless of into how many contigs) is at least this value then the flag gene_assembled is set
**Type:** `number`, **Default:** `0.95` | -| ` --gene_nt_extend` | Max number of nucleotides to extend ends of gene matches to look for start/stop codons
**Type:** `integer`, **Default:** `30` | -| ` --unique_threshold` | If proportion of bases in gene assembled more than once is <= this value, then the flag unique_contig is set
**Type:** `number`, **Default:** `0.03` | -| ` --ariba_no_clean` | Do not clean up intermediate files created by Ariba.
**Type:** `boolean` | - - -### Optional Parameters -These optional parameters can be useful in certain settings. - -| Parameter | Description | -|:---|---| -| ` --outdir` | Base directory to write results to
**Type:** `string`, **Default:** `bactopia` | -| ` --skip_compression` | Ouput files will not be compressed
**Type:** `boolean` | -| ` --datasets` | The path to cache datasets to
**Type:** `string` | -| ` --keep_all_files` | Keeps all analysis files created
**Type:** `boolean` | - -### Max Job Request Parameters -Set the top limit for requested resources for any single job. - -| Parameter | Description | -|:---|---| -| ` --max_retry` | Maximum times to retry a process before allowing it to fail.
**Type:** `integer`, **Default:** `3` | -| ` --max_cpus` | Maximum number of CPUs that can be requested for any single job.
**Type:** `integer`, **Default:** `4` | -| ` --max_memory` | Maximum amount of memory that can be requested for any single job.
**Type:** `string`, **Default:** `128.GB` | -| ` --max_time` | Maximum amount of time that can be requested for any single job.
**Type:** `string`, **Default:** `240.h` | -| ` --max_downloads` | Maximum number of samples to download at a time
**Type:** `integer`, **Default:** `3` | - -### Nextflow Configuration Parameters -Parameters to fine-tune your Nextflow setup. - -| Parameter | Description | -|:---|---| -| ` --nfconfig` | A Nextflow compatible config file for custom profiles, loaded last and will overwrite existing variables if set.
**Type:** `string` | -| ` --publish_dir_mode` | Method used to save pipeline results to output directory.
**Type:** `string`, **Default:** `copy` | -| ` --infodir` | Directory to keep pipeline Nextflow logs and reports.
**Type:** `string`, **Default:** `${params.outdir}/pipeline_info` | -| ` --force` | Nextflow will overwrite existing output files.
**Type:** `boolean` | -| ` --cleanup_workdir` | After Bactopia is successfully executed, the `work` directory will be deleted.
**Type:** `boolean` | - -### Institutional config options -Parameters used to describe centralized config profiles. These should not be edited. - -| Parameter | Description | -|:---|---| -| ` --custom_config_version` | Git commit id for Institutional configs.
**Type:** `string`, **Default:** `master` | -| ` --custom_config_base` | Base directory for Institutional configs.
**Type:** `string`, **Default:** `https://raw.githubusercontent.com/nf-core/configs/master` | -| ` --config_profile_name` | Institutional config name.
**Type:** `string` | -| ` --config_profile_description` | Institutional config description.
**Type:** `string` | -| ` --config_profile_contact` | Institutional config contact information.
**Type:** `string` | -| ` --config_profile_url` | Institutional config URL link.
**Type:** `string` | - -### Nextflow Profile Parameters -Parameters to fine-tune your Nextflow setup. - -| Parameter | Description | -|:---|---| -| ` --condadir` | Directory to Nextflow should use for Conda environments
**Type:** `string` | -| ` --registry` | Docker registry to pull containers from.
**Type:** `string`, **Default:** `dockerhub` | -| ` --datasets_cache` | Directory where downloaded datasets should be stored.
**Type:** `string`, **Default:** `/data/datasets` | -| ` --singularity_cache_dir` | Directory where remote Singularity images are stored.
**Type:** `string` | -| ` --singularity_pull_docker_container` | Instead of directly downloading Singularity images for use with Singularity, force the workflow to pull and convert Docker containers instead.
**Type:** `boolean` | -| ` --force_rebuild` | Force overwrite of existing pre-built environments.
**Type:** `boolean` | -| ` --queue` | Comma-separated name of the queue(s) to be used by a job scheduler (e.g. AWS Batch or SLURM)
**Type:** `string`, **Default:** `general,high-memory` | -| ` --cluster_opts` | Additional options to pass to the executor. (e.g. SLURM: '--account=my_acct_name'
**Type:** `string` | -| ` --container_opts` | Additional options to pass to Apptainer, Docker, or Singularityu. (e.g. Singularity: '-D `pwd`'
**Type:** `string` | -| ` --disable_scratch` | All intermediate files created on worker nodes of will be transferred to the head node.
**Type:** `boolean` | - -### Helpful Parameters -Uncommonly used parameters that might be useful. - -| Parameter | Description | -|:---|---| -| ` --monochrome_logs` | Do not use coloured log outputs.
**Type:** `boolean` | -| ` --nfdir` | Print directory Nextflow has pulled Bactopia to
**Type:** `boolean` | -| ` --sleep_time` | The amount of time (seconds) Nextflow will wait after setting up datasets before execution.
**Type:** `integer`, **Default:** `5` | -| ` --validate_params` | Boolean whether to validate parameters against the schema at runtime
**Type:** `boolean`, **Default:** `True` | -| ` --help` | Display help text.
**Type:** `boolean` | -| ` --wf` | Specify which workflow or Bactopia Tool to execute
**Type:** `string`, **Default:** `bactopia` | -| ` --list_wfs` | List the available workflows and Bactopia Tools to use with '--wf'
**Type:** `boolean` | -| ` --show_hidden_params` | Show all params when using `--help`
**Type:** `boolean` | -| ` --help_all` | An alias for --help --show_hidden_params
**Type:** `boolean` | -| ` --version` | Display version text.
**Type:** `boolean` | - -## Citations -If you use Bactopia and `ariba` in your analysis, please cite the following. - -- [Bactopia](https://bactopia.github.io/) - Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) - - -- [Ariba](https://github.com/sanger-pathogens/ariba) - Hunt M, Mather AE, Sánchez-Busó L, Page AJ, Parkhill J, Keane JA, Harris SR [ARIBA: rapid antimicrobial resistance genotyping directly from sequencing reads](http://dx.doi.org/10.1099/mgen.0.000131). _Microb Genom_ 3, e000131 (2017) - -- [csvtk](https://bioinf.shenwei.me/csvtk/) - Shen, W [csvtk: A cross-platform, efficient and practical CSV/TSV toolkit in Golang.](https://github.com/shenwei356/csvtk/) (GitHub) - diff --git a/docs/bactopia-tools/bakta.md b/docs/bactopia-tools/bakta.md deleted file mode 100644 index cea8a22b..00000000 --- a/docs/bactopia-tools/bakta.md +++ /dev/null @@ -1,259 +0,0 @@ ---- -title: bakta -description: A Bactopia Tool which uses Bakta to rapidly provide extensive annotations (tRNA, tmRNA, rRNA, ncRNA, CRISPR, CDS, pseudogenes, and sORFs) in a standardized fashion. ---- -# Bactopia Tool - `bakta` -The `bakta` module uses [Bakta](https://github.com/oschwengers/bakta) to rapidly annotate bacterial -genomes and plasmids in a standardized fashion. Bakta makes use of a large database ([40+ GB](https://doi.org/10.5281/zenodo.4247252)) -to provide extensive annotations including: tRNA, tmRNA, rRNA, ncRNA, CRISPR, CDS, and sORFs. - - -## Example Usage -``` -bactopia --wf bakta \ - --bactopia /path/to/your/bactopia/results -``` - -## Output Overview - -Below is the default output structure for the `bakta` tool. Where possible the -file descriptions below were modified from a tools description. - -```bash - -├── -│ └── main -│ └── annotator -│ └── bakta -│ ├── -blastdb.tar.gz -│ ├── .embl.gz -│ ├── .faa.gz -│ ├── .ffn.gz -│ ├── .fna.gz -│ ├── .gbff.gz -│ ├── .gff3.gz -│ ├── .hypotheticals.faa.gz -│ ├── .hypotheticals.tsv -│ ├── .tsv -│ ├── .txt -│ └── logs -│ ├── nf-bakta.{begin,err,log,out,run,sh,trace} -│ └── versions.yml -└── bactopia-runs - └── bakta- - └── nf-reports - ├── bakta-dag.dot - ├── bakta-report.html - ├── bakta-timeline.html - └── bakta-trace.txt - -``` - - - -### Results - -#### Bakta - -Below is a description of the _per-sample_ results from [Bakta](https://github.com/oschwengers/bakta). - - -| Extension | Description | -|-------------------------------|-------------| -| .blastdb.tar.gz | A gzipped tar archive of BLAST+ database of the contigs, genes, and proteins | -| .embl.gz | Annotations & sequences in (multi) EMBL format | -| .faa.gz | CDS/sORF amino acid sequences as FASTA | -| .ffn.gz | Feature nucleotide sequences as FASTA | -| .fna.gz | Replicon/contig DNA sequences as FASTA | -| .gbff.gz | Annotations & sequences in (multi) GenBank format | -| .gff3.gz | Annotations & sequences in GFF3 format | -| .hypotheticals.faa.gz | Hypothetical protein CDS amino acid sequences as FASTA | -| .hypotheticals.tsv | Further information on hypothetical protein CDS as simple human readable tab separated values | -| .tsv | Annotations as simple human readable tab separated values | -| .txt | Broad summary of `Bakta` annotations | - - - - - -### Audit Trail - -Below are files that can assist you in understanding which parameters and program versions were used. - -#### Logs - -Each process that is executed will have a folder named `logs`. In this folder are helpful -files for you to review if the need ever arises. - -| Extension | Description | -|--------------|-------------| -| .begin | An empty file used to designate the process started | -| .err | Contains STDERR outputs from the process | -| .log | Contains both STDERR and STDOUT outputs from the process | -| .out | Contains STDOUT outputs from the process | -| .run | The script Nextflow uses to stage/unstage files and queue processes based on given profile | -| .sh | The script executed by bash for the process | -| .trace | The Nextflow [Trace](https://www.nextflow.io/docs/latest/tracing.html#trace-report) report for the process | -| versions.yml | A YAML formatted file with program versions | - -#### Nextflow Reports - -These Nextflow reports provide great a great summary of your run. These can be used to optimize -resource usage and estimate expected costs if using cloud platforms. - -| Filename | Description | -|----------|-------------| -| bakta-dag.dot | The Nextflow [DAG visualisation](https://www.nextflow.io/docs/latest/tracing.html#dag-visualisation) | -| bakta-report.html | The Nextflow [Execution Report](https://www.nextflow.io/docs/latest/tracing.html#execution-report) | -| bakta-timeline.html | The Nextflow [Timeline Report](https://www.nextflow.io/docs/latest/tracing.html#timeline-report) | -| bakta-trace.txt | The Nextflow [Trace](https://www.nextflow.io/docs/latest/tracing.html#trace-report) report | - - -#### Program Versions - -At the end of each run, each of the `versions.yml` files are merged into the files below. - -| Filename | Description | -|---------------------------|-------------| -| software_versions.yml | A complete list of programs and versions used by each process | -| software_versions_mqc.yml | A complete list of programs and versions formatted for [MultiQC](https://multiqc.info/) | - -## Parameters - - -### Required Parameters -Define where the pipeline should find input data and save output data. - -| Parameter | Description | -|:---|---| -| ` --bactopia` | The path to bactopia results to use as inputs
**Type:** `string` | - -### Filtering Parameters -Use these parameters to specify which samples to include or exclude. - -| Parameter | Description | -|:---|---| -| ` --include` | A text file containing sample names (one per line) to include from the analysis
**Type:** `string` | -| ` --exclude` | A text file containing sample names (one per line) to exclude from the analysis
**Type:** `string` | - - -### Bakta Download Parameters - - -| Parameter | Description | -|:---|---| -| ` --bakta_db` | Tarball or path to the Bakta database
**Type:** `string` | -| ` --bakta_db_type` | Which Bakta DB to download 'full' (~30GB) or 'light' (~2GB)
**Type:** `string`, **Default:** `full` | -| ` --bakta_save_as_tarball` | Save the Bakta database as a tarball
**Type:** `boolean` | -| ` --download_bakta` | Download the Bakta database to the path given by --bakta_db
**Type:** `boolean` | - -### Bakta Parameters - - -| Parameter | Description | -|:---|---| -| ` --proteins` | FASTA file of trusted proteins to first annotate from
**Type:** `string` | -| ` --prodigal_tf` | Training file to use for Prodigal
**Type:** `string` | -| ` --replicons` | Replicon information table (tsv/csv)
**Type:** `string` | -| ` --min_contig_length` | Minimum contig size to annotate
**Type:** `integer`, **Default:** `1` | -| ` --keep_contig_headers` | Keep original contig headers
**Type:** `boolean` | -| ` --compliant` | Force Genbank/ENA/DDJB compliance
**Type:** `boolean` | -| ` --skip_trna` | Skip tRNA detection & annotation
**Type:** `boolean` | -| ` --skip_tmrna` | Skip tmRNA detection & annotation
**Type:** `boolean` | -| ` --skip_rrna` | Skip rRNA detection & annotation
**Type:** `boolean` | -| ` --skip_ncrna` | Skip ncRNA detection & annotation
**Type:** `boolean` | -| ` --skip_ncrna_region` | Skip ncRNA region detection & annotation
**Type:** `boolean` | -| ` --skip_crispr` | Skip CRISPR array detection & annotation
**Type:** `boolean` | -| ` --skip_cds` | Skip CDS detection & annotation
**Type:** `boolean` | -| ` --skip_sorf` | Skip sORF detection & annotation
**Type:** `boolean` | -| ` --skip_gap` | Skip gap detection & annotation
**Type:** `boolean` | -| ` --skip_ori` | Skip oriC/oriT detection & annotation
**Type:** `boolean` | -| ` --bakta_opts` | Extra Backa options in quotes. Example: '--gram +'
**Type:** `string` | - - -### Optional Parameters -These optional parameters can be useful in certain settings. - -| Parameter | Description | -|:---|---| -| ` --outdir` | Base directory to write results to
**Type:** `string`, **Default:** `bactopia` | -| ` --skip_compression` | Ouput files will not be compressed
**Type:** `boolean` | -| ` --datasets` | The path to cache datasets to
**Type:** `string` | -| ` --keep_all_files` | Keeps all analysis files created
**Type:** `boolean` | - -### Max Job Request Parameters -Set the top limit for requested resources for any single job. - -| Parameter | Description | -|:---|---| -| ` --max_retry` | Maximum times to retry a process before allowing it to fail.
**Type:** `integer`, **Default:** `3` | -| ` --max_cpus` | Maximum number of CPUs that can be requested for any single job.
**Type:** `integer`, **Default:** `4` | -| ` --max_memory` | Maximum amount of memory that can be requested for any single job.
**Type:** `string`, **Default:** `128.GB` | -| ` --max_time` | Maximum amount of time that can be requested for any single job.
**Type:** `string`, **Default:** `240.h` | -| ` --max_downloads` | Maximum number of samples to download at a time
**Type:** `integer`, **Default:** `3` | - -### Nextflow Configuration Parameters -Parameters to fine-tune your Nextflow setup. - -| Parameter | Description | -|:---|---| -| ` --nfconfig` | A Nextflow compatible config file for custom profiles, loaded last and will overwrite existing variables if set.
**Type:** `string` | -| ` --publish_dir_mode` | Method used to save pipeline results to output directory.
**Type:** `string`, **Default:** `copy` | -| ` --infodir` | Directory to keep pipeline Nextflow logs and reports.
**Type:** `string`, **Default:** `${params.outdir}/pipeline_info` | -| ` --force` | Nextflow will overwrite existing output files.
**Type:** `boolean` | -| ` --cleanup_workdir` | After Bactopia is successfully executed, the `work` directory will be deleted.
**Type:** `boolean` | - -### Institutional config options -Parameters used to describe centralized config profiles. These should not be edited. - -| Parameter | Description | -|:---|---| -| ` --custom_config_version` | Git commit id for Institutional configs.
**Type:** `string`, **Default:** `master` | -| ` --custom_config_base` | Base directory for Institutional configs.
**Type:** `string`, **Default:** `https://raw.githubusercontent.com/nf-core/configs/master` | -| ` --config_profile_name` | Institutional config name.
**Type:** `string` | -| ` --config_profile_description` | Institutional config description.
**Type:** `string` | -| ` --config_profile_contact` | Institutional config contact information.
**Type:** `string` | -| ` --config_profile_url` | Institutional config URL link.
**Type:** `string` | - -### Nextflow Profile Parameters -Parameters to fine-tune your Nextflow setup. - -| Parameter | Description | -|:---|---| -| ` --condadir` | Directory to Nextflow should use for Conda environments
**Type:** `string` | -| ` --registry` | Docker registry to pull containers from.
**Type:** `string`, **Default:** `dockerhub` | -| ` --datasets_cache` | Directory where downloaded datasets should be stored.
**Type:** `string`, **Default:** `/data/datasets` | -| ` --singularity_cache_dir` | Directory where remote Singularity images are stored.
**Type:** `string` | -| ` --singularity_pull_docker_container` | Instead of directly downloading Singularity images for use with Singularity, force the workflow to pull and convert Docker containers instead.
**Type:** `boolean` | -| ` --force_rebuild` | Force overwrite of existing pre-built environments.
**Type:** `boolean` | -| ` --queue` | Comma-separated name of the queue(s) to be used by a job scheduler (e.g. AWS Batch or SLURM)
**Type:** `string`, **Default:** `general,high-memory` | -| ` --cluster_opts` | Additional options to pass to the executor. (e.g. SLURM: '--account=my_acct_name'
**Type:** `string` | -| ` --container_opts` | Additional options to pass to Apptainer, Docker, or Singularityu. (e.g. Singularity: '-D `pwd`'
**Type:** `string` | -| ` --disable_scratch` | All intermediate files created on worker nodes of will be transferred to the head node.
**Type:** `boolean` | - -### Helpful Parameters -Uncommonly used parameters that might be useful. - -| Parameter | Description | -|:---|---| -| ` --monochrome_logs` | Do not use coloured log outputs.
**Type:** `boolean` | -| ` --nfdir` | Print directory Nextflow has pulled Bactopia to
**Type:** `boolean` | -| ` --sleep_time` | The amount of time (seconds) Nextflow will wait after setting up datasets before execution.
**Type:** `integer`, **Default:** `5` | -| ` --validate_params` | Boolean whether to validate parameters against the schema at runtime
**Type:** `boolean`, **Default:** `True` | -| ` --help` | Display help text.
**Type:** `boolean` | -| ` --wf` | Specify which workflow or Bactopia Tool to execute
**Type:** `string`, **Default:** `bactopia` | -| ` --list_wfs` | List the available workflows and Bactopia Tools to use with '--wf'
**Type:** `boolean` | -| ` --show_hidden_params` | Show all params when using `--help`
**Type:** `boolean` | -| ` --help_all` | An alias for --help --show_hidden_params
**Type:** `boolean` | -| ` --version` | Display version text.
**Type:** `boolean` | - -## Citations -If you use Bactopia and `bakta` in your analysis, please cite the following. - -- [Bactopia](https://bactopia.github.io/) - Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) - - -- [Bakta](https://github.com/oschwengers/bakta) - Schwengers O, Jelonek L, Dieckmann MA, Beyvers S, Blom J, Goesmann A [Bakta - rapid and standardized annotation of bacterial genomes via alignment-free sequence identification.](https://doi.org/10.1099/mgen.0.000685) _Microbial Genomics_ 7(11) (2021) - diff --git a/docs/bactopia-tools/blastn.md b/docs/bactopia-tools/blastn.md deleted file mode 100644 index 23676dc6..00000000 --- a/docs/bactopia-tools/blastn.md +++ /dev/null @@ -1,237 +0,0 @@ ---- -title: blastn -description: A Bactopia Tool which uses BLASTN to query nucleotide sequences against nucleotide databases (contigs or genes). - ---- -# Bactopia Tool - `blastn` -The `blastn` module uses [BLASTN](https://blast.ncbi.nlm.nih.gov/Blast.cgi?CMD=Web&PAGE_TYPE=Blastdocs) -to query nucleotide sequences against nucleotide databases (contigs or genes) for each sample. - - -## Example Usage -``` -bactopia --wf blastn \ - --bactopia /path/to/your/bactopia/results -``` - -## Output Overview - -Below is the default output structure for the `blastn` tool. Where possible the -file descriptions below were modified from a tools description. - -```bash - -├── -│ └── tools -│ └── blastn -│ ├── .blastn.tsv -│ └── logs -│ ├── nf-blastn.{begin,err,log,out,run,sh,trace} -│ └── versions.yml -└── bactopia-runs - └── blastn- - ├── merged-results - │ ├── blastn.tsv - │ └── logs - │ └── blastn-concat - │ ├── nf-merged-results.{begin,err,log,out,run,sh,trace} - │ └── versions.yml - └── nf-reports - ├── blastn-dag.dot - ├── blastn-report.html - ├── blastn-timeline.html - └── blastn-trace.txt - -``` - - - -### Results - -#### Merged Results - -Below are results that are concatenated into a single file. - - -| Filename | Description | -|-------------------------------|-------------| -| blastn.tsv | A merged TSV file with `blastn` results from all samples | - - -#### blastn - -Below is a description of the _per-sample_ results from [BLASTN](https://blast.ncbi.nlm.nih.gov/Blast.cgi?CMD=Web&PAGE_TYPE=Blastdocs). - - -| Filename | Description | -|-------------------------------|-------------| -| <SAMPLE_NAME>.blastn.tsv | A TSV file with `blastn` results for a single sample | - - - - - -### Audit Trail - -Below are files that can assist you in understanding which parameters and program versions were used. - -#### Logs - -Each process that is executed will have a folder named `logs`. In this folder are helpful -files for you to review if the need ever arises. - -| Extension | Description | -|--------------|-------------| -| .begin | An empty file used to designate the process started | -| .err | Contains STDERR outputs from the process | -| .log | Contains both STDERR and STDOUT outputs from the process | -| .out | Contains STDOUT outputs from the process | -| .run | The script Nextflow uses to stage/unstage files and queue processes based on given profile | -| .sh | The script executed by bash for the process | -| .trace | The Nextflow [Trace](https://www.nextflow.io/docs/latest/tracing.html#trace-report) report for the process | -| versions.yml | A YAML formatted file with program versions | - -#### Nextflow Reports - -These Nextflow reports provide great a great summary of your run. These can be used to optimize -resource usage and estimate expected costs if using cloud platforms. - -| Filename | Description | -|----------|-------------| -| blastn-dag.dot | The Nextflow [DAG visualisation](https://www.nextflow.io/docs/latest/tracing.html#dag-visualisation) | -| blastn-report.html | The Nextflow [Execution Report](https://www.nextflow.io/docs/latest/tracing.html#execution-report) | -| blastn-timeline.html | The Nextflow [Timeline Report](https://www.nextflow.io/docs/latest/tracing.html#timeline-report) | -| blastn-trace.txt | The Nextflow [Trace](https://www.nextflow.io/docs/latest/tracing.html#trace-report) report | - - -#### Program Versions - -At the end of each run, each of the `versions.yml` files are merged into the files below. - -| Filename | Description | -|---------------------------|-------------| -| software_versions.yml | A complete list of programs and versions used by each process | -| software_versions_mqc.yml | A complete list of programs and versions formatted for [MultiQC](https://multiqc.info/) | - -## Parameters - - -### Required Parameters -Define where the pipeline should find input data and save output data. - -| Parameter | Description | -|:---|---| -| ` --bactopia` | The path to bactopia results to use as inputs
**Type:** `string` | - -### Filtering Parameters -Use these parameters to specify which samples to include or exclude. - -| Parameter | Description | -|:---|---| -| ` --include` | A text file containing sample names (one per line) to include from the analysis
**Type:** `string` | -| ` --exclude` | A text file containing sample names (one per line) to exclude from the analysis
**Type:** `string` | - - -### BLASTN Parameters - - -| Parameter | Description | -|:---|---| -| ` --blastn_query` | A fasta file containing the query sequences to BLAST against the database
**Type:** `string` | -| ` --blastn_outfmt` | The columns to include with -outfmt 6
**Type:** `string`, **Default:** `sseqid qseqid pident qlen slen length nident positive mismatch gapopen gaps qstart qend sstart send evalue bitscore` | -| ` --blastn_opts` | Additional options to pass to BLASTN
**Type:** `string` | -| ` --blastn_perc_identity` | Percent identity
**Type:** `integer`, **Default:** `50` | -| ` --blastn_qcov_hsp_perc` | Percent query coverage per hsp
**Type:** `integer`, **Default:** `50` | -| ` --blastn_max_target_seqs` | Maximum number of aligned sequences to keep
**Type:** `integer`, **Default:** `2000` | -| ` --blastn_use_genes` | Blast against genes sequences instead of contigs
**Type:** `boolean` | - - -### Optional Parameters -These optional parameters can be useful in certain settings. - -| Parameter | Description | -|:---|---| -| ` --outdir` | Base directory to write results to
**Type:** `string`, **Default:** `bactopia` | -| ` --skip_compression` | Ouput files will not be compressed
**Type:** `boolean` | -| ` --datasets` | The path to cache datasets to
**Type:** `string` | -| ` --keep_all_files` | Keeps all analysis files created
**Type:** `boolean` | - -### Max Job Request Parameters -Set the top limit for requested resources for any single job. - -| Parameter | Description | -|:---|---| -| ` --max_retry` | Maximum times to retry a process before allowing it to fail.
**Type:** `integer`, **Default:** `3` | -| ` --max_cpus` | Maximum number of CPUs that can be requested for any single job.
**Type:** `integer`, **Default:** `4` | -| ` --max_memory` | Maximum amount of memory that can be requested for any single job.
**Type:** `string`, **Default:** `128.GB` | -| ` --max_time` | Maximum amount of time that can be requested for any single job.
**Type:** `string`, **Default:** `240.h` | -| ` --max_downloads` | Maximum number of samples to download at a time
**Type:** `integer`, **Default:** `3` | - -### Nextflow Configuration Parameters -Parameters to fine-tune your Nextflow setup. - -| Parameter | Description | -|:---|---| -| ` --nfconfig` | A Nextflow compatible config file for custom profiles, loaded last and will overwrite existing variables if set.
**Type:** `string` | -| ` --publish_dir_mode` | Method used to save pipeline results to output directory.
**Type:** `string`, **Default:** `copy` | -| ` --infodir` | Directory to keep pipeline Nextflow logs and reports.
**Type:** `string`, **Default:** `${params.outdir}/pipeline_info` | -| ` --force` | Nextflow will overwrite existing output files.
**Type:** `boolean` | -| ` --cleanup_workdir` | After Bactopia is successfully executed, the `work` directory will be deleted.
**Type:** `boolean` | - -### Institutional config options -Parameters used to describe centralized config profiles. These should not be edited. - -| Parameter | Description | -|:---|---| -| ` --custom_config_version` | Git commit id for Institutional configs.
**Type:** `string`, **Default:** `master` | -| ` --custom_config_base` | Base directory for Institutional configs.
**Type:** `string`, **Default:** `https://raw.githubusercontent.com/nf-core/configs/master` | -| ` --config_profile_name` | Institutional config name.
**Type:** `string` | -| ` --config_profile_description` | Institutional config description.
**Type:** `string` | -| ` --config_profile_contact` | Institutional config contact information.
**Type:** `string` | -| ` --config_profile_url` | Institutional config URL link.
**Type:** `string` | - -### Nextflow Profile Parameters -Parameters to fine-tune your Nextflow setup. - -| Parameter | Description | -|:---|---| -| ` --condadir` | Directory to Nextflow should use for Conda environments
**Type:** `string` | -| ` --registry` | Docker registry to pull containers from.
**Type:** `string`, **Default:** `dockerhub` | -| ` --datasets_cache` | Directory where downloaded datasets should be stored.
**Type:** `string`, **Default:** `/data/datasets` | -| ` --singularity_cache_dir` | Directory where remote Singularity images are stored.
**Type:** `string` | -| ` --singularity_pull_docker_container` | Instead of directly downloading Singularity images for use with Singularity, force the workflow to pull and convert Docker containers instead.
**Type:** `boolean` | -| ` --force_rebuild` | Force overwrite of existing pre-built environments.
**Type:** `boolean` | -| ` --queue` | Comma-separated name of the queue(s) to be used by a job scheduler (e.g. AWS Batch or SLURM)
**Type:** `string`, **Default:** `general,high-memory` | -| ` --cluster_opts` | Additional options to pass to the executor. (e.g. SLURM: '--account=my_acct_name'
**Type:** `string` | -| ` --container_opts` | Additional options to pass to Apptainer, Docker, or Singularityu. (e.g. Singularity: '-D `pwd`'
**Type:** `string` | -| ` --disable_scratch` | All intermediate files created on worker nodes of will be transferred to the head node.
**Type:** `boolean` | - -### Helpful Parameters -Uncommonly used parameters that might be useful. - -| Parameter | Description | -|:---|---| -| ` --monochrome_logs` | Do not use coloured log outputs.
**Type:** `boolean` | -| ` --nfdir` | Print directory Nextflow has pulled Bactopia to
**Type:** `boolean` | -| ` --sleep_time` | The amount of time (seconds) Nextflow will wait after setting up datasets before execution.
**Type:** `integer`, **Default:** `5` | -| ` --validate_params` | Boolean whether to validate parameters against the schema at runtime
**Type:** `boolean`, **Default:** `True` | -| ` --help` | Display help text.
**Type:** `boolean` | -| ` --wf` | Specify which workflow or Bactopia Tool to execute
**Type:** `string`, **Default:** `bactopia` | -| ` --list_wfs` | List the available workflows and Bactopia Tools to use with '--wf'
**Type:** `boolean` | -| ` --show_hidden_params` | Show all params when using `--help`
**Type:** `boolean` | -| ` --help_all` | An alias for --help --show_hidden_params
**Type:** `boolean` | -| ` --version` | Display version text.
**Type:** `boolean` | - -## Citations -If you use Bactopia and `blastn` in your analysis, please cite the following. - -- [Bactopia](https://bactopia.github.io/) - Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) - - -- [BLAST](https://blast.ncbi.nlm.nih.gov/Blast.cgi) - Camacho C, Coulouris G, Avagyan V, Ma N, Papadopoulos J, Bealer K, Madden TL [BLAST+: architecture and applications](http://dx.doi.org/10.1186/1471-2105-10-421). _BMC Bioinformatics_ 10, 421 (2009) - -- [csvtk](https://bioinf.shenwei.me/csvtk/) - Shen, W [csvtk: A cross-platform, efficient and practical CSV/TSV toolkit in Golang.](https://github.com/shenwei356/csvtk/) (GitHub) - diff --git a/docs/bactopia-tools/blastp.md b/docs/bactopia-tools/blastp.md deleted file mode 100644 index aa026343..00000000 --- a/docs/bactopia-tools/blastp.md +++ /dev/null @@ -1,235 +0,0 @@ ---- -title: blastp -description: A Bactopia Tool which uses BLASTP to query protein sequences against protein databases. - ---- -# Bactopia Tool - `blastp` -The `blastp` module uses [BLASTP](https://blast.ncbi.nlm.nih.gov/Blast.cgi?CMD=Web&PAGE_TYPE=Blastdocs) -to query protein sequences against protein databases for each sample. - - -## Example Usage -``` -bactopia --wf blastp \ - --bactopia /path/to/your/bactopia/results -``` - -## Output Overview - -Below is the default output structure for the `blastp` tool. Where possible the -file descriptions below were modified from a tools description. - -```bash - -├── -│ └── tools -│ └── blastp -│ ├── .blastp.tsv -│ └── logs -│ ├── nf-blastp.{begin,err,log,out,run,sh,trace} -│ └── versions.yml -└── bactopia-runs - └── blastp- - ├── merged-results - │ ├── blastp.tsv - │ └── logs - │ └── blastp-concat - │ ├── nf-merged-results.{begin,err,log,out,run,sh,trace} - │ └── versions.yml - └── nf-reports - ├── blastp-dag.dot - ├── blastp-report.html - ├── blastp-timeline.html - └── blastp-trace.txt - -``` - - - -### Results - -#### Merged Results - -Below are results that are concatenated into a single file. - - -| Filename | Description | -|-------------------------------|-------------| -| blastp.tsv | A merged TSV file with `blastp` results from all samples | - - -#### blastp - -Below is a description of the _per-sample_ results from [BLASTP](https://blast.ncbi.nlm.nih.gov/Blast.cgi?CMD=Web&PAGE_TYPE=Blastdocs). - - -| Filename | Description | -|-------------------------------|-------------| -| <SAMPLE_NAME>.blastp.tsv | A TSV file with `blastp` results for a single sample | - - - - - -### Audit Trail - -Below are files that can assist you in understanding which parameters and program versions were used. - -#### Logs - -Each process that is executed will have a folder named `logs`. In this folder are helpful -files for you to review if the need ever arises. - -| Extension | Description | -|--------------|-------------| -| .begin | An empty file used to designate the process started | -| .err | Contains STDERR outputs from the process | -| .log | Contains both STDERR and STDOUT outputs from the process | -| .out | Contains STDOUT outputs from the process | -| .run | The script Nextflow uses to stage/unstage files and queue processes based on given profile | -| .sh | The script executed by bash for the process | -| .trace | The Nextflow [Trace](https://www.nextflow.io/docs/latest/tracing.html#trace-report) report for the process | -| versions.yml | A YAML formatted file with program versions | - -#### Nextflow Reports - -These Nextflow reports provide great a great summary of your run. These can be used to optimize -resource usage and estimate expected costs if using cloud platforms. - -| Filename | Description | -|----------|-------------| -| blastp-dag.dot | The Nextflow [DAG visualisation](https://www.nextflow.io/docs/latest/tracing.html#dag-visualisation) | -| blastp-report.html | The Nextflow [Execution Report](https://www.nextflow.io/docs/latest/tracing.html#execution-report) | -| blastp-timeline.html | The Nextflow [Timeline Report](https://www.nextflow.io/docs/latest/tracing.html#timeline-report) | -| blastp-trace.txt | The Nextflow [Trace](https://www.nextflow.io/docs/latest/tracing.html#trace-report) report | - - -#### Program Versions - -At the end of each run, each of the `versions.yml` files are merged into the files below. - -| Filename | Description | -|---------------------------|-------------| -| software_versions.yml | A complete list of programs and versions used by each process | -| software_versions_mqc.yml | A complete list of programs and versions formatted for [MultiQC](https://multiqc.info/) | - -## Parameters - - -### Required Parameters -Define where the pipeline should find input data and save output data. - -| Parameter | Description | -|:---|---| -| ` --bactopia` | The path to bactopia results to use as inputs
**Type:** `string` | - -### Filtering Parameters -Use these parameters to specify which samples to include or exclude. - -| Parameter | Description | -|:---|---| -| ` --include` | A text file containing sample names (one per line) to include from the analysis
**Type:** `string` | -| ` --exclude` | A text file containing sample names (one per line) to exclude from the analysis
**Type:** `string` | - - -### BLASTP Parameters - - -| Parameter | Description | -|:---|---| -| ` --blastp_query` | A fasta file containing the query sequences to BLAST against the database
**Type:** `string` | -| ` --blastp_outfmt` | The columns to include with -outfmt 6
**Type:** `string`, **Default:** `sseqid qseqid pident qlen slen length nident positive mismatch gapopen gaps qstart qend sstart send evalue bitscore` | -| ` --blastp_opts` | Additional options to pass to BLASTN
**Type:** `string` | -| ` --blastp_qcov_hsp_perc` | Percent query coverage per hsp
**Type:** `integer`, **Default:** `50` | -| ` --blastp_max_target_seqs` | Maximum number of aligned sequences to keep
**Type:** `integer`, **Default:** `2000` | - - -### Optional Parameters -These optional parameters can be useful in certain settings. - -| Parameter | Description | -|:---|---| -| ` --outdir` | Base directory to write results to
**Type:** `string`, **Default:** `bactopia` | -| ` --skip_compression` | Ouput files will not be compressed
**Type:** `boolean` | -| ` --datasets` | The path to cache datasets to
**Type:** `string` | -| ` --keep_all_files` | Keeps all analysis files created
**Type:** `boolean` | - -### Max Job Request Parameters -Set the top limit for requested resources for any single job. - -| Parameter | Description | -|:---|---| -| ` --max_retry` | Maximum times to retry a process before allowing it to fail.
**Type:** `integer`, **Default:** `3` | -| ` --max_cpus` | Maximum number of CPUs that can be requested for any single job.
**Type:** `integer`, **Default:** `4` | -| ` --max_memory` | Maximum amount of memory that can be requested for any single job.
**Type:** `string`, **Default:** `128.GB` | -| ` --max_time` | Maximum amount of time that can be requested for any single job.
**Type:** `string`, **Default:** `240.h` | -| ` --max_downloads` | Maximum number of samples to download at a time
**Type:** `integer`, **Default:** `3` | - -### Nextflow Configuration Parameters -Parameters to fine-tune your Nextflow setup. - -| Parameter | Description | -|:---|---| -| ` --nfconfig` | A Nextflow compatible config file for custom profiles, loaded last and will overwrite existing variables if set.
**Type:** `string` | -| ` --publish_dir_mode` | Method used to save pipeline results to output directory.
**Type:** `string`, **Default:** `copy` | -| ` --infodir` | Directory to keep pipeline Nextflow logs and reports.
**Type:** `string`, **Default:** `${params.outdir}/pipeline_info` | -| ` --force` | Nextflow will overwrite existing output files.
**Type:** `boolean` | -| ` --cleanup_workdir` | After Bactopia is successfully executed, the `work` directory will be deleted.
**Type:** `boolean` | - -### Institutional config options -Parameters used to describe centralized config profiles. These should not be edited. - -| Parameter | Description | -|:---|---| -| ` --custom_config_version` | Git commit id for Institutional configs.
**Type:** `string`, **Default:** `master` | -| ` --custom_config_base` | Base directory for Institutional configs.
**Type:** `string`, **Default:** `https://raw.githubusercontent.com/nf-core/configs/master` | -| ` --config_profile_name` | Institutional config name.
**Type:** `string` | -| ` --config_profile_description` | Institutional config description.
**Type:** `string` | -| ` --config_profile_contact` | Institutional config contact information.
**Type:** `string` | -| ` --config_profile_url` | Institutional config URL link.
**Type:** `string` | - -### Nextflow Profile Parameters -Parameters to fine-tune your Nextflow setup. - -| Parameter | Description | -|:---|---| -| ` --condadir` | Directory to Nextflow should use for Conda environments
**Type:** `string` | -| ` --registry` | Docker registry to pull containers from.
**Type:** `string`, **Default:** `dockerhub` | -| ` --datasets_cache` | Directory where downloaded datasets should be stored.
**Type:** `string`, **Default:** `/data/datasets` | -| ` --singularity_cache_dir` | Directory where remote Singularity images are stored.
**Type:** `string` | -| ` --singularity_pull_docker_container` | Instead of directly downloading Singularity images for use with Singularity, force the workflow to pull and convert Docker containers instead.
**Type:** `boolean` | -| ` --force_rebuild` | Force overwrite of existing pre-built environments.
**Type:** `boolean` | -| ` --queue` | Comma-separated name of the queue(s) to be used by a job scheduler (e.g. AWS Batch or SLURM)
**Type:** `string`, **Default:** `general,high-memory` | -| ` --cluster_opts` | Additional options to pass to the executor. (e.g. SLURM: '--account=my_acct_name'
**Type:** `string` | -| ` --container_opts` | Additional options to pass to Apptainer, Docker, or Singularityu. (e.g. Singularity: '-D `pwd`'
**Type:** `string` | -| ` --disable_scratch` | All intermediate files created on worker nodes of will be transferred to the head node.
**Type:** `boolean` | - -### Helpful Parameters -Uncommonly used parameters that might be useful. - -| Parameter | Description | -|:---|---| -| ` --monochrome_logs` | Do not use coloured log outputs.
**Type:** `boolean` | -| ` --nfdir` | Print directory Nextflow has pulled Bactopia to
**Type:** `boolean` | -| ` --sleep_time` | The amount of time (seconds) Nextflow will wait after setting up datasets before execution.
**Type:** `integer`, **Default:** `5` | -| ` --validate_params` | Boolean whether to validate parameters against the schema at runtime
**Type:** `boolean`, **Default:** `True` | -| ` --help` | Display help text.
**Type:** `boolean` | -| ` --wf` | Specify which workflow or Bactopia Tool to execute
**Type:** `string`, **Default:** `bactopia` | -| ` --list_wfs` | List the available workflows and Bactopia Tools to use with '--wf'
**Type:** `boolean` | -| ` --show_hidden_params` | Show all params when using `--help`
**Type:** `boolean` | -| ` --help_all` | An alias for --help --show_hidden_params
**Type:** `boolean` | -| ` --version` | Display version text.
**Type:** `boolean` | - -## Citations -If you use Bactopia and `blastp` in your analysis, please cite the following. - -- [Bactopia](https://bactopia.github.io/) - Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) - - -- [BLAST](https://blast.ncbi.nlm.nih.gov/Blast.cgi) - Camacho C, Coulouris G, Avagyan V, Ma N, Papadopoulos J, Bealer K, Madden TL [BLAST+: architecture and applications](http://dx.doi.org/10.1186/1471-2105-10-421). _BMC Bioinformatics_ 10, 421 (2009) - -- [csvtk](https://bioinf.shenwei.me/csvtk/) - Shen, W [csvtk: A cross-platform, efficient and practical CSV/TSV toolkit in Golang.](https://github.com/shenwei356/csvtk/) (GitHub) - diff --git a/docs/bactopia-tools/blastx.md b/docs/bactopia-tools/blastx.md deleted file mode 100644 index a7d8dea5..00000000 --- a/docs/bactopia-tools/blastx.md +++ /dev/null @@ -1,235 +0,0 @@ ---- -title: blastx -description: A Bactopia Tool which uses BLASTX to query translated nucleotide sequences against protein databases. - ---- -# Bactopia Tool - `blastx` -The `blastx` module uses [BLASTX](https://blast.ncbi.nlm.nih.gov/Blast.cgi?CMD=Web&PAGE_TYPE=Blastdocs) -to query translated nucleotide sequences against protein databases for each sample. - - -## Example Usage -``` -bactopia --wf blastx \ - --bactopia /path/to/your/bactopia/results -``` - -## Output Overview - -Below is the default output structure for the `blastx` tool. Where possible the -file descriptions below were modified from a tools description. - -```bash - -├── -│ └── tools -│ └── blastx -│ ├── .blastx.tsv -│ └── logs -│ ├── nf-blastx.{begin,err,log,out,run,sh,trace} -│ └── versions.yml -└── bactopia-runs - └── blastx- - ├── merged-results - │ ├── blastx.tsv - │ └── logs - │ └── blastx-concat - │ ├── nf-merged-results.{begin,err,log,out,run,sh,trace} - │ └── versions.yml - └── nf-reports - ├── blastx-dag.dot - ├── blastx-report.html - ├── blastx-timeline.html - └── blastx-trace.txt - -``` - - - -### Results - -#### Merged Results - -Below are results that are concatenated into a single file. - - -| Filename | Description | -|-------------------------------|-------------| -| blastx.tsv | A merged TSV file with `blastx` results from all samples | - - -#### blastx - -Below is a description of the _per-sample_ results from [BLASTX](https://blast.ncbi.nlm.nih.gov/Blast.cgi?CMD=Web&PAGE_TYPE=Blastdocs). - - -| Filename | Description | -|-------------------------------|-------------| -| <SAMPLE_NAME>.blastx.tsv | A TSV file with `blastx` results for a single sample | - - - - - -### Audit Trail - -Below are files that can assist you in understanding which parameters and program versions were used. - -#### Logs - -Each process that is executed will have a folder named `logs`. In this folder are helpful -files for you to review if the need ever arises. - -| Extension | Description | -|--------------|-------------| -| .begin | An empty file used to designate the process started | -| .err | Contains STDERR outputs from the process | -| .log | Contains both STDERR and STDOUT outputs from the process | -| .out | Contains STDOUT outputs from the process | -| .run | The script Nextflow uses to stage/unstage files and queue processes based on given profile | -| .sh | The script executed by bash for the process | -| .trace | The Nextflow [Trace](https://www.nextflow.io/docs/latest/tracing.html#trace-report) report for the process | -| versions.yml | A YAML formatted file with program versions | - -#### Nextflow Reports - -These Nextflow reports provide great a great summary of your run. These can be used to optimize -resource usage and estimate expected costs if using cloud platforms. - -| Filename | Description | -|----------|-------------| -| blastx-dag.dot | The Nextflow [DAG visualisation](https://www.nextflow.io/docs/latest/tracing.html#dag-visualisation) | -| blastx-report.html | The Nextflow [Execution Report](https://www.nextflow.io/docs/latest/tracing.html#execution-report) | -| blastx-timeline.html | The Nextflow [Timeline Report](https://www.nextflow.io/docs/latest/tracing.html#timeline-report) | -| blastx-trace.txt | The Nextflow [Trace](https://www.nextflow.io/docs/latest/tracing.html#trace-report) report | - - -#### Program Versions - -At the end of each run, each of the `versions.yml` files are merged into the files below. - -| Filename | Description | -|---------------------------|-------------| -| software_versions.yml | A complete list of programs and versions used by each process | -| software_versions_mqc.yml | A complete list of programs and versions formatted for [MultiQC](https://multiqc.info/) | - -## Parameters - - -### Required Parameters -Define where the pipeline should find input data and save output data. - -| Parameter | Description | -|:---|---| -| ` --bactopia` | The path to bactopia results to use as inputs
**Type:** `string` | - -### Filtering Parameters -Use these parameters to specify which samples to include or exclude. - -| Parameter | Description | -|:---|---| -| ` --include` | A text file containing sample names (one per line) to include from the analysis
**Type:** `string` | -| ` --exclude` | A text file containing sample names (one per line) to exclude from the analysis
**Type:** `string` | - - -### BLASTX Parameters - - -| Parameter | Description | -|:---|---| -| ` --blastx_query` | A fasta file containing the query sequences to BLAST against the database
**Type:** `string` | -| ` --blastx_outfmt` | The columns to include with -outfmt 6
**Type:** `string`, **Default:** `sseqid qseqid pident qlen slen length nident positive mismatch gapopen gaps qstart qend sstart send evalue bitscore` | -| ` --blastx_opts` | Additional options to pass to BLASTN
**Type:** `string` | -| ` --blastx_qcov_hsp_perc` | Percent query coverage per hsp
**Type:** `integer`, **Default:** `50` | -| ` --blastx_max_target_seqs` | Maximum number of aligned sequences to keep
**Type:** `integer`, **Default:** `2000` | - - -### Optional Parameters -These optional parameters can be useful in certain settings. - -| Parameter | Description | -|:---|---| -| ` --outdir` | Base directory to write results to
**Type:** `string`, **Default:** `bactopia` | -| ` --skip_compression` | Ouput files will not be compressed
**Type:** `boolean` | -| ` --datasets` | The path to cache datasets to
**Type:** `string` | -| ` --keep_all_files` | Keeps all analysis files created
**Type:** `boolean` | - -### Max Job Request Parameters -Set the top limit for requested resources for any single job. - -| Parameter | Description | -|:---|---| -| ` --max_retry` | Maximum times to retry a process before allowing it to fail.
**Type:** `integer`, **Default:** `3` | -| ` --max_cpus` | Maximum number of CPUs that can be requested for any single job.
**Type:** `integer`, **Default:** `4` | -| ` --max_memory` | Maximum amount of memory that can be requested for any single job.
**Type:** `string`, **Default:** `128.GB` | -| ` --max_time` | Maximum amount of time that can be requested for any single job.
**Type:** `string`, **Default:** `240.h` | -| ` --max_downloads` | Maximum number of samples to download at a time
**Type:** `integer`, **Default:** `3` | - -### Nextflow Configuration Parameters -Parameters to fine-tune your Nextflow setup. - -| Parameter | Description | -|:---|---| -| ` --nfconfig` | A Nextflow compatible config file for custom profiles, loaded last and will overwrite existing variables if set.
**Type:** `string` | -| ` --publish_dir_mode` | Method used to save pipeline results to output directory.
**Type:** `string`, **Default:** `copy` | -| ` --infodir` | Directory to keep pipeline Nextflow logs and reports.
**Type:** `string`, **Default:** `${params.outdir}/pipeline_info` | -| ` --force` | Nextflow will overwrite existing output files.
**Type:** `boolean` | -| ` --cleanup_workdir` | After Bactopia is successfully executed, the `work` directory will be deleted.
**Type:** `boolean` | - -### Institutional config options -Parameters used to describe centralized config profiles. These should not be edited. - -| Parameter | Description | -|:---|---| -| ` --custom_config_version` | Git commit id for Institutional configs.
**Type:** `string`, **Default:** `master` | -| ` --custom_config_base` | Base directory for Institutional configs.
**Type:** `string`, **Default:** `https://raw.githubusercontent.com/nf-core/configs/master` | -| ` --config_profile_name` | Institutional config name.
**Type:** `string` | -| ` --config_profile_description` | Institutional config description.
**Type:** `string` | -| ` --config_profile_contact` | Institutional config contact information.
**Type:** `string` | -| ` --config_profile_url` | Institutional config URL link.
**Type:** `string` | - -### Nextflow Profile Parameters -Parameters to fine-tune your Nextflow setup. - -| Parameter | Description | -|:---|---| -| ` --condadir` | Directory to Nextflow should use for Conda environments
**Type:** `string` | -| ` --registry` | Docker registry to pull containers from.
**Type:** `string`, **Default:** `dockerhub` | -| ` --datasets_cache` | Directory where downloaded datasets should be stored.
**Type:** `string`, **Default:** `/data/datasets` | -| ` --singularity_cache_dir` | Directory where remote Singularity images are stored.
**Type:** `string` | -| ` --singularity_pull_docker_container` | Instead of directly downloading Singularity images for use with Singularity, force the workflow to pull and convert Docker containers instead.
**Type:** `boolean` | -| ` --force_rebuild` | Force overwrite of existing pre-built environments.
**Type:** `boolean` | -| ` --queue` | Comma-separated name of the queue(s) to be used by a job scheduler (e.g. AWS Batch or SLURM)
**Type:** `string`, **Default:** `general,high-memory` | -| ` --cluster_opts` | Additional options to pass to the executor. (e.g. SLURM: '--account=my_acct_name'
**Type:** `string` | -| ` --container_opts` | Additional options to pass to Apptainer, Docker, or Singularityu. (e.g. Singularity: '-D `pwd`'
**Type:** `string` | -| ` --disable_scratch` | All intermediate files created on worker nodes of will be transferred to the head node.
**Type:** `boolean` | - -### Helpful Parameters -Uncommonly used parameters that might be useful. - -| Parameter | Description | -|:---|---| -| ` --monochrome_logs` | Do not use coloured log outputs.
**Type:** `boolean` | -| ` --nfdir` | Print directory Nextflow has pulled Bactopia to
**Type:** `boolean` | -| ` --sleep_time` | The amount of time (seconds) Nextflow will wait after setting up datasets before execution.
**Type:** `integer`, **Default:** `5` | -| ` --validate_params` | Boolean whether to validate parameters against the schema at runtime
**Type:** `boolean`, **Default:** `True` | -| ` --help` | Display help text.
**Type:** `boolean` | -| ` --wf` | Specify which workflow or Bactopia Tool to execute
**Type:** `string`, **Default:** `bactopia` | -| ` --list_wfs` | List the available workflows and Bactopia Tools to use with '--wf'
**Type:** `boolean` | -| ` --show_hidden_params` | Show all params when using `--help`
**Type:** `boolean` | -| ` --help_all` | An alias for --help --show_hidden_params
**Type:** `boolean` | -| ` --version` | Display version text.
**Type:** `boolean` | - -## Citations -If you use Bactopia and `blastx` in your analysis, please cite the following. - -- [Bactopia](https://bactopia.github.io/) - Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) - - -- [BLAST](https://blast.ncbi.nlm.nih.gov/Blast.cgi) - Camacho C, Coulouris G, Avagyan V, Ma N, Papadopoulos J, Bealer K, Madden TL [BLAST+: architecture and applications](http://dx.doi.org/10.1186/1471-2105-10-421). _BMC Bioinformatics_ 10, 421 (2009) - -- [csvtk](https://bioinf.shenwei.me/csvtk/) - Shen, W [csvtk: A cross-platform, efficient and practical CSV/TSV toolkit in Golang.](https://github.com/shenwei356/csvtk/) (GitHub) - diff --git a/docs/bactopia-tools/bracken.md b/docs/bactopia-tools/bracken.md deleted file mode 100644 index 089b423f..00000000 --- a/docs/bactopia-tools/bracken.md +++ /dev/null @@ -1,252 +0,0 @@ ---- -title: bracken -description: A Bactopia Tool which uses Bracken (and Kraken2) to estimate taxonomic abundance of samples. - ---- -# Bactopia Tool - `bracken` -The `bracken` module uses [Bracken](https://github.com/jenniferlu717/Bracken) to estimate -taxonomic abundance of samples. This Bactopia Tool will also run [Kraken2](https://ccb.jhu.edu/software/kraken2/), -automatically and generate [Krona](https://github.com/marbl/Krona) charts for both Bracken and Kraken2. - - -## Example Usage -``` -bactopia --wf bracken \ - --bactopia /path/to/your/bactopia/results -``` - -## Output Overview - -Below is the default output structure for the `bracken` tool. Where possible the -file descriptions below were modified from a tools description. - -```bash - -├── -│ └── tools -│ └── bracken -│ ├── .bracken.abundances.txt -│ ├── .bracken.adjusted.abundances.txt -│ ├── .bracken.krona.html -│ ├── .bracken.report.txt -│ ├── .bracken.tsv -│ ├── .classified_{1,2}.fastq.gz -│ ├── .kraken2.krona.html -│ ├── .kraken2.output.txt -│ ├── .kraken2.report.txt -│ ├── .unclassified_{1,2}.fastq.gz -│ └── logs -│ ├── nf-bracken.{begin,err,log,out,run,sh,trace} -│ └── versions.yml -└── bactopia-runs - └── bracken- - └── nf-reports - ├── bracken-dag.dot - ├── bracken-report.html - ├── bracken-timeline.html - └── bracken-trace.txt - -``` - - - -### Results - -#### Bracken & Kraken2 - -Below is a description of the _per-sample_ results from [Bracken](https://github.com/jenniferlu717/Bracken) -and [Kraken2](https://github.com/DerrickWood/kraken2). - - -| Filename | Description | -|-------------------------------|-------------| -| <SAMPLE_NAME>.bracken.abundances.txt | Bracken abundance estimates for each taxon. | -| <SAMPLE_NAME>.bracken.adjusted.abundances.txt | Bracken abundance estimates for each taxon adjusted for inclusion of unclassified reads | -| <SAMPLE_NAME>.bracken.krona.html | Krona chart of Bracken abundance estimates | -| <SAMPLE_NAME>.bracken.report.txt | Bracken report containing stats about classified and not classified reads See [Bracken - Output Formats](https://ccb.jhu.edu/software/bracken/index.shtml?t=manual) | -| <SAMPLE_NAME>.classified_{1|2}.fastq.gz | Reads classified to belong to any of the taxa on the Kraken2 database. | -| <SAMPLE_NAME>.kraken2.krona.html | Krona chart of Kraken2 abundance estimates | -| <SAMPLE_NAME>.kraken2.output.txt | Kraken2 output file containing the taxonomic classification of each read | -| <SAMPLE_NAME>.kraken2.report.txt | Kraken2 report containing stats about classified and not classified reads See [Kraken2 - Output Formats](https://github.com/DerrickWood/kraken2/wiki/Manual#output-formats) for more details | -| <SAMPLE_NAME>.unclassified_{1,2}.fastq.gz | Reads not classified to belong to any of the taxa on the Kraken2 database. | - - - - - -### Audit Trail - -Below are files that can assist you in understanding which parameters and program versions were used. - -#### Logs - -Each process that is executed will have a folder named `logs`. In this folder are helpful -files for you to review if the need ever arises. - -| Extension | Description | -|--------------|-------------| -| .begin | An empty file used to designate the process started | -| .err | Contains STDERR outputs from the process | -| .log | Contains both STDERR and STDOUT outputs from the process | -| .out | Contains STDOUT outputs from the process | -| .run | The script Nextflow uses to stage/unstage files and queue processes based on given profile | -| .sh | The script executed by bash for the process | -| .trace | The Nextflow [Trace](https://www.nextflow.io/docs/latest/tracing.html#trace-report) report for the process | -| versions.yml | A YAML formatted file with program versions | - -#### Nextflow Reports - -These Nextflow reports provide great a great summary of your run. These can be used to optimize -resource usage and estimate expected costs if using cloud platforms. - -| Filename | Description | -|----------|-------------| -| bracken-dag.dot | The Nextflow [DAG visualisation](https://www.nextflow.io/docs/latest/tracing.html#dag-visualisation) | -| bracken-report.html | The Nextflow [Execution Report](https://www.nextflow.io/docs/latest/tracing.html#execution-report) | -| bracken-timeline.html | The Nextflow [Timeline Report](https://www.nextflow.io/docs/latest/tracing.html#timeline-report) | -| bracken-trace.txt | The Nextflow [Trace](https://www.nextflow.io/docs/latest/tracing.html#trace-report) report | - - -#### Program Versions - -At the end of each run, each of the `versions.yml` files are merged into the files below. - -| Filename | Description | -|---------------------------|-------------| -| software_versions.yml | A complete list of programs and versions used by each process | -| software_versions_mqc.yml | A complete list of programs and versions formatted for [MultiQC](https://multiqc.info/) | - -## Parameters - - -### Required Parameters -Define where the pipeline should find input data and save output data. - -| Parameter | Description | -|:---|---| -| ` --bactopia` | The path to bactopia results to use as inputs
**Type:** `string` | - -### Filtering Parameters -Use these parameters to specify which samples to include or exclude. - -| Parameter | Description | -|:---|---| -| ` --include` | A text file containing sample names (one per line) to include from the analysis
**Type:** `string` | -| ` --exclude` | A text file containing sample names (one per line) to exclude from the analysis
**Type:** `string` | - - -### Kraken2 and Bracken Parameters - - -| Parameter | Description | -|:---|---| -| ` --kraken2_db` | The a single tarball or path to a Kraken2 formatted database
**Type:** `string` | -| ` --kraken2_quick_mode` | Quick operation (use first hit or hits)
**Type:** `boolean` | -| ` --kraken2_confidence` | Confidence score threshold between 0 and 1
**Type:** `number` | -| ` --kraken2_minimum_base_quality` | Minimum base quality used in classification
**Type:** `integer` | -| ` --kraken2_use_mpa_style` | Format report output like Kraken 1's kraken-mpa-report
**Type:** `boolean` | -| ` --kraken2_report_zero_counts` | Report counts for ALL taxa, even if counts are zero
**Type:** `boolean` | -| ` --kraken2_report_minimizer_data` | Include minimizer and distinct minimizer count information in report
**Type:** `boolean` | -| ` --kraken2_use_names` | Print scientific names instead of just taxids
**Type:** `boolean` | -| ` --kraken2_memory_mapping` | Avoid loading database into RAM
**Type:** `boolean` | -| ` --kraken2_minimum_hit_groups` | Minimum number of hit groups needed to make a call
**Type:** `integer`, **Default:** `2` | -| ` --kraken2_remove_filtered_reads` | Discard the classified and unclassified FASTQs prduced by Kraken2
**Type:** `boolean` | -| ` --kraken2_keep_raw_output` | Keep the STDOUT file produced from Kraken2
**Type:** `boolean` | -| ` --bracken_read_length` | Read length to get all classifications for (0 = determine at runtime)
**Type:** `integer` | -| ` --bracken_level` | Level to estimate abundance at
**Type:** `string`, **Default:** `S` | -| ` --bracken_threshold` | Reads required PRIOR to abundance estimation to perform re-estimation
**Type:** `integer` | -| ` --skip_krona` | Skip the creation of a Krona report
**Type:** `boolean` | - - -### Optional Parameters -These optional parameters can be useful in certain settings. - -| Parameter | Description | -|:---|---| -| ` --outdir` | Base directory to write results to
**Type:** `string`, **Default:** `bactopia` | -| ` --skip_compression` | Ouput files will not be compressed
**Type:** `boolean` | -| ` --datasets` | The path to cache datasets to
**Type:** `string` | -| ` --keep_all_files` | Keeps all analysis files created
**Type:** `boolean` | - -### Max Job Request Parameters -Set the top limit for requested resources for any single job. - -| Parameter | Description | -|:---|---| -| ` --max_retry` | Maximum times to retry a process before allowing it to fail.
**Type:** `integer`, **Default:** `3` | -| ` --max_cpus` | Maximum number of CPUs that can be requested for any single job.
**Type:** `integer`, **Default:** `4` | -| ` --max_memory` | Maximum amount of memory that can be requested for any single job.
**Type:** `string`, **Default:** `128.GB` | -| ` --max_time` | Maximum amount of time that can be requested for any single job.
**Type:** `string`, **Default:** `240.h` | -| ` --max_downloads` | Maximum number of samples to download at a time
**Type:** `integer`, **Default:** `3` | - -### Nextflow Configuration Parameters -Parameters to fine-tune your Nextflow setup. - -| Parameter | Description | -|:---|---| -| ` --nfconfig` | A Nextflow compatible config file for custom profiles, loaded last and will overwrite existing variables if set.
**Type:** `string` | -| ` --publish_dir_mode` | Method used to save pipeline results to output directory.
**Type:** `string`, **Default:** `copy` | -| ` --infodir` | Directory to keep pipeline Nextflow logs and reports.
**Type:** `string`, **Default:** `${params.outdir}/pipeline_info` | -| ` --force` | Nextflow will overwrite existing output files.
**Type:** `boolean` | -| ` --cleanup_workdir` | After Bactopia is successfully executed, the `work` directory will be deleted.
**Type:** `boolean` | - -### Institutional config options -Parameters used to describe centralized config profiles. These should not be edited. - -| Parameter | Description | -|:---|---| -| ` --custom_config_version` | Git commit id for Institutional configs.
**Type:** `string`, **Default:** `master` | -| ` --custom_config_base` | Base directory for Institutional configs.
**Type:** `string`, **Default:** `https://raw.githubusercontent.com/nf-core/configs/master` | -| ` --config_profile_name` | Institutional config name.
**Type:** `string` | -| ` --config_profile_description` | Institutional config description.
**Type:** `string` | -| ` --config_profile_contact` | Institutional config contact information.
**Type:** `string` | -| ` --config_profile_url` | Institutional config URL link.
**Type:** `string` | - -### Nextflow Profile Parameters -Parameters to fine-tune your Nextflow setup. - -| Parameter | Description | -|:---|---| -| ` --condadir` | Directory to Nextflow should use for Conda environments
**Type:** `string` | -| ` --registry` | Docker registry to pull containers from.
**Type:** `string`, **Default:** `dockerhub` | -| ` --datasets_cache` | Directory where downloaded datasets should be stored.
**Type:** `string`, **Default:** `/data/datasets` | -| ` --singularity_cache_dir` | Directory where remote Singularity images are stored.
**Type:** `string` | -| ` --singularity_pull_docker_container` | Instead of directly downloading Singularity images for use with Singularity, force the workflow to pull and convert Docker containers instead.
**Type:** `boolean` | -| ` --force_rebuild` | Force overwrite of existing pre-built environments.
**Type:** `boolean` | -| ` --queue` | Comma-separated name of the queue(s) to be used by a job scheduler (e.g. AWS Batch or SLURM)
**Type:** `string`, **Default:** `general,high-memory` | -| ` --cluster_opts` | Additional options to pass to the executor. (e.g. SLURM: '--account=my_acct_name'
**Type:** `string` | -| ` --container_opts` | Additional options to pass to Apptainer, Docker, or Singularityu. (e.g. Singularity: '-D `pwd`'
**Type:** `string` | -| ` --disable_scratch` | All intermediate files created on worker nodes of will be transferred to the head node.
**Type:** `boolean` | - -### Helpful Parameters -Uncommonly used parameters that might be useful. - -| Parameter | Description | -|:---|---| -| ` --monochrome_logs` | Do not use coloured log outputs.
**Type:** `boolean` | -| ` --nfdir` | Print directory Nextflow has pulled Bactopia to
**Type:** `boolean` | -| ` --sleep_time` | The amount of time (seconds) Nextflow will wait after setting up datasets before execution.
**Type:** `integer`, **Default:** `5` | -| ` --validate_params` | Boolean whether to validate parameters against the schema at runtime
**Type:** `boolean`, **Default:** `True` | -| ` --help` | Display help text.
**Type:** `boolean` | -| ` --wf` | Specify which workflow or Bactopia Tool to execute
**Type:** `string`, **Default:** `bactopia` | -| ` --list_wfs` | List the available workflows and Bactopia Tools to use with '--wf'
**Type:** `boolean` | -| ` --show_hidden_params` | Show all params when using `--help`
**Type:** `boolean` | -| ` --help_all` | An alias for --help --show_hidden_params
**Type:** `boolean` | -| ` --version` | Display version text.
**Type:** `boolean` | - -## Citations -If you use Bactopia and `bracken` in your analysis, please cite the following. - -- [Bactopia](https://bactopia.github.io/) - Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) - - -- [Bracken](https://github.com/jenniferlu717/Bracken) - Lu J, Breitwieser FP, Thielen P, and Salzberg SL [Bracken: estimating species abundance in metagenomics data.](https://doi.org/10.7717/peerj-cs.104) _PeerJ Computer Science_, 3, e104. (2017) - -- [Kraken2](https://github.com/DerrickWood/kraken2) - Wood DE, Lu J, Langmead B [Improved metagenomic analysis with Kraken 2.](https://doi.org/10.1186/s13059-019-1891-0) *Genome Biology*, 20(1), 257. (2019) - -- [Krona](https://github.com/marbl/Krona) - Ondov BD, Bergman NH, and Phillippy AM [Interactive metagenomic visualization in a Web browser.](https://doi.org/10.1186/1471-2105-12-385) _BMC Bioinformatics_, 12, 385. (2011) - diff --git a/docs/bactopia-tools/btyper3.md b/docs/bactopia-tools/btyper3.md deleted file mode 100644 index 0bc5416d..00000000 --- a/docs/bactopia-tools/btyper3.md +++ /dev/null @@ -1,258 +0,0 @@ ---- -title: btyper3 -description: A Bactopia Tool which uses BTyper3 to classify Bacillus cereus group isolates from genome assemblies. - ---- -# Bactopia Tool - `btyper3` -The `btyper3` module uses [BTyper3](https://github.com/lmc297/BTyper3) to classify -Bacillus cereus group isolates from genome assemblies. - - -## Example Usage -``` -bactopia --wf btyper3 \ - --bactopia /path/to/your/bactopia/results -``` - -## Output Overview - -Below is the default output structure for the `btyper3` tool. Where possible the -file descriptions below were modified from a tools description. - -```bash - -├── -│ └── tools -│ └── btyper3 -│ ├── _final_results.txt -│ ├── bt -│ │ └── _bt.txt -│ ├── logs -│ │ ├── .log -│ │ ├── nf-btyper3.{begin,err,log,out,run,sh,trace} -│ │ └── versions.yml -│ ├── mlst -│ │ └── _mlst.txt -│ ├── panC -│ │ └── _panC.txt -│ ├── species -│ │ └── _species_fastani.txt -│ ├── subspecies -│ │ └── _subspecies_fastani.txt -│ ├── typestrains -│ │ └── _typestrains_fastani.txt -│ └── virulence -│ └── _virulence.txt -└── bactopia-runs - └── btyper3- - ├── merged-results - │ ├── btyper3.tsv - │ └── logs - │ └── btyper3-concat - │ ├── nf-merged-results.{begin,err,log,out,run,sh,trace} - │ └── versions.yml - └── nf-reports - ├── btyper3-dag.dot - ├── btyper3-report.html - ├── btyper3-timeline.html - └── btyper3-trace.txt - -``` - - - -### Results - -#### Merged Results - -Below are results that are concatenated into a single file. - - -| Filename | Description | -|-------------------------------|-------------| -| btyper3.tsv | A merged TSV file with `BTyper3` results from all samples | - - -#### btyper3 - -Below is a description of the _per-sample_ results from [BTyper3](https://github.com/lmc297/BTyper3). - - -| Extension | Description | -|-------------------------------|-------------| -| _final_results.txt | A final tab-delimited file of BTyper3 results | -| _bt.txt | BLAST results from Bt genes detection | -| _mlst.txt | BLAST results against a MLST database | -| _panC.txt | BLAST results from panC group assignment | -| _species_fastani.txt | FastANI results for species assignment | -| _subspecies_fastani.txt | FastANI results for subspecies assignment | -| _typestrains_fastani.txt | FastANI results for type strain comparison | -| _virulence.txt | BLAST results against a virulence database | - - - - - -### Audit Trail - -Below are files that can assist you in understanding which parameters and program versions were used. - -#### Logs - -Each process that is executed will have a folder named `logs`. In this folder are helpful -files for you to review if the need ever arises. - -| Extension | Description | -|--------------|-------------| -| .begin | An empty file used to designate the process started | -| .err | Contains STDERR outputs from the process | -| .log | Contains both STDERR and STDOUT outputs from the process | -| .out | Contains STDOUT outputs from the process | -| .run | The script Nextflow uses to stage/unstage files and queue processes based on given profile | -| .sh | The script executed by bash for the process | -| .trace | The Nextflow [Trace](https://www.nextflow.io/docs/latest/tracing.html#trace-report) report for the process | -| versions.yml | A YAML formatted file with program versions | - -#### Nextflow Reports - -These Nextflow reports provide great a great summary of your run. These can be used to optimize -resource usage and estimate expected costs if using cloud platforms. - -| Filename | Description | -|----------|-------------| -| btyper3-dag.dot | The Nextflow [DAG visualisation](https://www.nextflow.io/docs/latest/tracing.html#dag-visualisation) | -| btyper3-report.html | The Nextflow [Execution Report](https://www.nextflow.io/docs/latest/tracing.html#execution-report) | -| btyper3-timeline.html | The Nextflow [Timeline Report](https://www.nextflow.io/docs/latest/tracing.html#timeline-report) | -| btyper3-trace.txt | The Nextflow [Trace](https://www.nextflow.io/docs/latest/tracing.html#trace-report) report | - - -#### Program Versions - -At the end of each run, each of the `versions.yml` files are merged into the files below. - -| Filename | Description | -|---------------------------|-------------| -| software_versions.yml | A complete list of programs and versions used by each process | -| software_versions_mqc.yml | A complete list of programs and versions formatted for [MultiQC](https://multiqc.info/) | - -## Parameters - - -### Required Parameters -Define where the pipeline should find input data and save output data. - -| Parameter | Description | -|:---|---| -| ` --bactopia` | The path to bactopia results to use as inputs
**Type:** `string` | - -### Filtering Parameters -Use these parameters to specify which samples to include or exclude. - -| Parameter | Description | -|:---|---| -| ` --include` | A text file containing sample names (one per line) to include from the analysis
**Type:** `string` | -| ` --exclude` | A text file containing sample names (one per line) to exclude from the analysis
**Type:** `string` | - - -### BTyper3 Parameters - - -| Parameter | Description | -|:---|---| -| ` --bt_virulence_identity` | Minimum percent amino acid/nucleotide identity threshold for a virulence gene to be considered present
**Type:** `integer`, **Default:** `70` | -| ` --bt_virulence_coverage` | Minimum percent coverage threshold for a virulence gene to be considered present
**Type:** `integer`, **Default:** `80` | -| ` --bt_identity` | Minimum percent amino acid identity threshold for a Bt toxin gene to be considered present
**Type:** `integer`, **Default:** `50` | -| ` --bt_coverage` | Minimum percent coverage threshold for a Bt toxin gene to be considered present
**Type:** `integer`, **Default:** `70` | -| ` --bt_overlap` | Specify maximum proportion of overlap for overlapping Bt toxin genes to be considered separate genes
**Type:** `integer`, **Default:** `70` | -| ` --bt_opts` | Additional options to pass to BTyper3
**Type:** `string` | - - -### Optional Parameters -These optional parameters can be useful in certain settings. - -| Parameter | Description | -|:---|---| -| ` --outdir` | Base directory to write results to
**Type:** `string`, **Default:** `bactopia` | -| ` --skip_compression` | Ouput files will not be compressed
**Type:** `boolean` | -| ` --datasets` | The path to cache datasets to
**Type:** `string` | -| ` --keep_all_files` | Keeps all analysis files created
**Type:** `boolean` | - -### Max Job Request Parameters -Set the top limit for requested resources for any single job. - -| Parameter | Description | -|:---|---| -| ` --max_retry` | Maximum times to retry a process before allowing it to fail.
**Type:** `integer`, **Default:** `3` | -| ` --max_cpus` | Maximum number of CPUs that can be requested for any single job.
**Type:** `integer`, **Default:** `4` | -| ` --max_memory` | Maximum amount of memory that can be requested for any single job.
**Type:** `string`, **Default:** `128.GB` | -| ` --max_time` | Maximum amount of time that can be requested for any single job.
**Type:** `string`, **Default:** `240.h` | -| ` --max_downloads` | Maximum number of samples to download at a time
**Type:** `integer`, **Default:** `3` | - -### Nextflow Configuration Parameters -Parameters to fine-tune your Nextflow setup. - -| Parameter | Description | -|:---|---| -| ` --nfconfig` | A Nextflow compatible config file for custom profiles, loaded last and will overwrite existing variables if set.
**Type:** `string` | -| ` --publish_dir_mode` | Method used to save pipeline results to output directory.
**Type:** `string`, **Default:** `copy` | -| ` --infodir` | Directory to keep pipeline Nextflow logs and reports.
**Type:** `string`, **Default:** `${params.outdir}/pipeline_info` | -| ` --force` | Nextflow will overwrite existing output files.
**Type:** `boolean` | -| ` --cleanup_workdir` | After Bactopia is successfully executed, the `work` directory will be deleted.
**Type:** `boolean` | - -### Institutional config options -Parameters used to describe centralized config profiles. These should not be edited. - -| Parameter | Description | -|:---|---| -| ` --custom_config_version` | Git commit id for Institutional configs.
**Type:** `string`, **Default:** `master` | -| ` --custom_config_base` | Base directory for Institutional configs.
**Type:** `string`, **Default:** `https://raw.githubusercontent.com/nf-core/configs/master` | -| ` --config_profile_name` | Institutional config name.
**Type:** `string` | -| ` --config_profile_description` | Institutional config description.
**Type:** `string` | -| ` --config_profile_contact` | Institutional config contact information.
**Type:** `string` | -| ` --config_profile_url` | Institutional config URL link.
**Type:** `string` | - -### Nextflow Profile Parameters -Parameters to fine-tune your Nextflow setup. - -| Parameter | Description | -|:---|---| -| ` --condadir` | Directory to Nextflow should use for Conda environments
**Type:** `string` | -| ` --registry` | Docker registry to pull containers from.
**Type:** `string`, **Default:** `dockerhub` | -| ` --datasets_cache` | Directory where downloaded datasets should be stored.
**Type:** `string`, **Default:** `/data/datasets` | -| ` --singularity_cache_dir` | Directory where remote Singularity images are stored.
**Type:** `string` | -| ` --singularity_pull_docker_container` | Instead of directly downloading Singularity images for use with Singularity, force the workflow to pull and convert Docker containers instead.
**Type:** `boolean` | -| ` --force_rebuild` | Force overwrite of existing pre-built environments.
**Type:** `boolean` | -| ` --queue` | Comma-separated name of the queue(s) to be used by a job scheduler (e.g. AWS Batch or SLURM)
**Type:** `string`, **Default:** `general,high-memory` | -| ` --cluster_opts` | Additional options to pass to the executor. (e.g. SLURM: '--account=my_acct_name'
**Type:** `string` | -| ` --container_opts` | Additional options to pass to Apptainer, Docker, or Singularityu. (e.g. Singularity: '-D `pwd`'
**Type:** `string` | -| ` --disable_scratch` | All intermediate files created on worker nodes of will be transferred to the head node.
**Type:** `boolean` | - -### Helpful Parameters -Uncommonly used parameters that might be useful. - -| Parameter | Description | -|:---|---| -| ` --monochrome_logs` | Do not use coloured log outputs.
**Type:** `boolean` | -| ` --nfdir` | Print directory Nextflow has pulled Bactopia to
**Type:** `boolean` | -| ` --sleep_time` | The amount of time (seconds) Nextflow will wait after setting up datasets before execution.
**Type:** `integer`, **Default:** `5` | -| ` --validate_params` | Boolean whether to validate parameters against the schema at runtime
**Type:** `boolean`, **Default:** `True` | -| ` --help` | Display help text.
**Type:** `boolean` | -| ` --wf` | Specify which workflow or Bactopia Tool to execute
**Type:** `string`, **Default:** `bactopia` | -| ` --list_wfs` | List the available workflows and Bactopia Tools to use with '--wf'
**Type:** `boolean` | -| ` --show_hidden_params` | Show all params when using `--help`
**Type:** `boolean` | -| ` --help_all` | An alias for --help --show_hidden_params
**Type:** `boolean` | -| ` --version` | Display version text.
**Type:** `boolean` | - -## Citations -If you use Bactopia and `btyper3` in your analysis, please cite the following. - -- [Bactopia](https://bactopia.github.io/) - Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) - - -- [BTyper3](https://github.com/lmc297/BTyper3) - Carroll LM, Wiedmann M, Kovac J [Proposal of a Taxonomic Nomenclature for the Bacillus cereus Group Which Reconciles Genomic Definitions of Bacterial Species with Clinical and Industrial Phenotypes.](https://doi.org/10.1128/mBio.00034-20) _mBio_, 11(1). (2020) - -- [BTyper3](https://github.com/lmc297/BTyper3) - Carroll LM, Cheng RA, Kovac J [No Assembly Required: Using BTyper3 to Assess the Congruency of a Proposed Taxonomic Framework for the Bacillus cereus Group With Historical Typing Methods.](https://doi.org/10.3389/fmicb.2020.580691) _Frontiers in Microbiology_, 11, 580691. (2020) - diff --git a/docs/bactopia-tools/busco.md b/docs/bactopia-tools/busco.md deleted file mode 100644 index e9cb23ca..00000000 --- a/docs/bactopia-tools/busco.md +++ /dev/null @@ -1,453 +0,0 @@ ---- -title: busco -description: A Bactopia Tool which uses BUSCO, or Benchmarking Universal Single-Copy Orthologs, to assess the completeness of your assembly. ---- -# Bactopia Tool - `busco` -The `busco` module uses [BUSCO](https://gitlab.com/ezlab/busco) (_or Benchmarking Universal Single-Copy Orthologs_) -to assess the completeness of your assembly. - - -## Example Usage -``` -bactopia --wf busco \ - --bactopia /path/to/your/bactopia/results -``` - -## Output Overview - -Below is the default output structure for the `busco` tool. Where possible the -file descriptions below were modified from a tools description. - -```bash - -├── -│ └── tools -│ └── busco -│ └── -│ ├── -summary.txt -│ ├── logs -│ │ ├── bbtools_{err|out}.log -│ │ ├── busco.log -│ │ ├── hmmsearch_{err|out}.log -│ │ ├── nf-busco.{begin,err,log,out,run,sh,trace} -│ │ ├── prodigal_{err|out}.log -│ │ ├── prodigal_mode_single_code_#_{err,out}.log -│ │ └── versions.yml -│ ├── prodigal_output -│ │ └── predicted_genes -│ │ ├── predicted.{faa,fna} -│ │ └── tmp -│ │ └── prodigal_mode_single_code_#.{faa,fna} -│ ├── run_ -│ │ ├── busco_sequences -│ │ │ ├── fragmented_busco_sequences -│ │ │ │ └── .{faa,fna -│ │ │ ├── multi_copy_busco_sequences -│ │ │ └── single_copy_busco_sequences -│ │ │ └── .{faa,fna} -│ │ ├── full_table.tsv -│ │ ├── hmmer_output -│ │ │ └── .out -│ │ ├── missing_busco_list.tsv -│ │ └── short_summary.{json|txt} -│ └── short_summary.specific.bacteria_odb10.GCF_000292685.fna.{json|txt} -└── bactopia-runs - └── busco- - ├── merged-results - │ ├── busco.tsv - │ └── logs - │ └── busco-concat - │ ├── nf-merged-results.{begin,err,log,out,run,sh,trace} - │ └── versions.yml - └── nf-reports - ├── busco-dag.dot - ├── busco-report.html - ├── busco-timeline.html - └── busco-trace.txt - -``` - - - -### Results - -#### Merged Results - -Below are results that are concatenated into a single file. - - -| Filename | Description | -|-------------------------------|-------------| -| | | -| | | -| | | -| | | -| | | -| | | -| | | -| | | -| | | -| | | -| | | -| | | -| | | -| | | -| | | -| | | -| | | -| | | -| | | -| | | -| | | -| | | -| | | -| | | -| | | -| | | -| | | -| | | -| | | -| | | -| | | -| | | -| | | -| | | -| | | -| | | -| | | -| | | -| | | -| | | -| | | -| | | -| | | -| | | -| | | -| | | -| | | -| | | -| | | -| | | -| | | -| | | -| | | -| | | -| | | -| | | -| | | -| | | -| | | -| | | -| | | -| | | -| | | -| | | -| | | -| | | -| | | -| | | -| | | -| | | -| | | -| | | -| | | -| | | -| | | -| | | -| | | -| | | -| | | -| | | -| | | -| | | -| | | -| | | -| | | -| | | -| | | -| | | -| | | -| | | -| | | -| | | -| | | -| | | -| | | -| | | -| | | -| | | -| | | -| | | -| | | -| | | -| | | -| | | -| | | -| | | -| | | -| | | -| | | -| | | -| | | -| | | -| | | -| | | -| | | -| | | -| | | -| | | -| | | -| | | -| | | -| | | -| | | -| | | -| | | -| | | -| | | -| | | -| | | -| | | -| | | -| | | -| | | -| | | -| | | -| | | -| | | -| | | -| | | -| | | -| | | -| | | -| | | -| | | -| | | -| | | -| | | -| | | -| | | -| | | -| | | -| | | -| | | -| | | -| | | -| | | -| | | -| | | -| | | -| | | -| | | -| | | -| | | -| | | -| | | -| | | -| | | -| | | -| | | -| | | -| | | -| | | -| | | -| | | -| | | -| | | -| | | -| | | -| | | -| | | -| | | -| | | -| | | - - -#### busco - -Below is a description of the _per-lineage_ results from [BUSCO](https://gitlab.com/ezlab/busco). - - -| Filename | Description | -|-------------------------------|-------------| -| <SAMPLE_NAME>-summary.txt | A summary of the BUSCO results | -| prodigal_output | Below are the outputs from the Prodigal gene-prediction step. | -| run_<BUSCO_LINEAGE>/busco_sequences | FASTA format file for each BUSCO gene identified | -| run_<BUSCO_LINEAGE>/full_table.tsv | Complete results in a tab-delimited format with scores and lengths of BUSCO matches | -| run_<BUSCO_LINEAGE>/hmmer_output | Tab-delimited output from HMMER | -| run_<BUSCO_LINEAGE>/missing_busco_list.tsv | Tab-delimited list of missing BUSCOs | -| run_<BUSCO_LINEAGE>/short_summary.json | A summary of BUSCO matches in JSON format | -| run_<BUSCO_LINEAGE>/short_summary.txt | A summary of BUSCO matches | -| short_summary.specific.<BUSCO_LINEAGE>.<SAMPLE_NAME>.json | A summary of BUSCO matches in JSON format per-sample | -| short_summary.specific.<BUSCO_LINEAGE>.<SAMPLE_NAME>.txt | A summary of BUSCO matches per-sample | - - - - - -### Audit Trail - -Below are files that can assist you in understanding which parameters and program versions were used. - -#### Logs - -Each process that is executed will have a folder named `logs`. In this folder are helpful -files for you to review if the need ever arises. - -| Extension | Description | -|--------------|-------------| -| .begin | An empty file used to designate the process started | -| .err | Contains STDERR outputs from the process | -| .log | Contains both STDERR and STDOUT outputs from the process | -| .out | Contains STDOUT outputs from the process | -| .run | The script Nextflow uses to stage/unstage files and queue processes based on given profile | -| .sh | The script executed by bash for the process | -| .trace | The Nextflow [Trace](https://www.nextflow.io/docs/latest/tracing.html#trace-report) report for the process | -| versions.yml | A YAML formatted file with program versions | - -#### Nextflow Reports - -These Nextflow reports provide great a great summary of your run. These can be used to optimize -resource usage and estimate expected costs if using cloud platforms. - -| Filename | Description | -|----------|-------------| -| busco-dag.dot | The Nextflow [DAG visualisation](https://www.nextflow.io/docs/latest/tracing.html#dag-visualisation) | -| busco-report.html | The Nextflow [Execution Report](https://www.nextflow.io/docs/latest/tracing.html#execution-report) | -| busco-timeline.html | The Nextflow [Timeline Report](https://www.nextflow.io/docs/latest/tracing.html#timeline-report) | -| busco-trace.txt | The Nextflow [Trace](https://www.nextflow.io/docs/latest/tracing.html#trace-report) report | - - -#### Program Versions - -At the end of each run, each of the `versions.yml` files are merged into the files below. - -| Filename | Description | -|---------------------------|-------------| -| software_versions.yml | A complete list of programs and versions used by each process | -| software_versions_mqc.yml | A complete list of programs and versions formatted for [MultiQC](https://multiqc.info/) | - -## Parameters - - -### Required Parameters -Define where the pipeline should find input data and save output data. - -| Parameter | Description | -|:---|---| -| ` --bactopia` | The path to bactopia results to use as inputs
**Type:** `string` | - -### Filtering Parameters -Use these parameters to specify which samples to include or exclude. - -| Parameter | Description | -|:---|---| -| ` --include` | A text file containing sample names (one per line) to include from the analysis
**Type:** `string` | -| ` --exclude` | A text file containing sample names (one per line) to exclude from the analysis
**Type:** `string` | - - -### BUSCO Parameters - - -| Parameter | Description | -|:---|---| -| ` --busco_lineage` | Specify the name of the BUSCO lineage to be used
**Type:** `string`, **Default:** `bacteria_odb10` | -| ` --busco_evalue` | E-value cutoff for BLAST searches. Allowed formats, 0.001 or 1e-03
**Type:** `string`, **Default:** `1e-03` | -| ` --busco_limit` | Total candidate regions to consider per BUSCO
**Type:** `integer`, **Default:** `3` | -| ` --metaeuk_parameters` | Additional Metaeuk first-pass arguments contained within a single pair of quotation marks, separated by commas
**Type:** `string` | -| ` --metaeuk_rerun_parameters` | Additional Metaeuk second-pass arguments contained within a single pair of quotation marks, separated by commas
**Type:** `string` | -| ` --use_augustus` | Use augustus gene predictor for eukaryote runs
**Type:** `boolean` | -| ` --augustus_parameters` | Additional Augustus arguments contained within a single pair of quotation marks, separated by commas
**Type:** `string` | -| ` --augustus_species` | Specify a species for Augustus training
**Type:** `string` | -| ` --augustus_long` | Optimization Augustus self-training mode
**Type:** `boolean` | - - -### Optional Parameters -These optional parameters can be useful in certain settings. - -| Parameter | Description | -|:---|---| -| ` --outdir` | Base directory to write results to
**Type:** `string`, **Default:** `bactopia` | -| ` --skip_compression` | Ouput files will not be compressed
**Type:** `boolean` | -| ` --datasets` | The path to cache datasets to
**Type:** `string` | -| ` --keep_all_files` | Keeps all analysis files created
**Type:** `boolean` | - -### Max Job Request Parameters -Set the top limit for requested resources for any single job. - -| Parameter | Description | -|:---|---| -| ` --max_retry` | Maximum times to retry a process before allowing it to fail.
**Type:** `integer`, **Default:** `3` | -| ` --max_cpus` | Maximum number of CPUs that can be requested for any single job.
**Type:** `integer`, **Default:** `4` | -| ` --max_memory` | Maximum amount of memory that can be requested for any single job.
**Type:** `string`, **Default:** `128.GB` | -| ` --max_time` | Maximum amount of time that can be requested for any single job.
**Type:** `string`, **Default:** `240.h` | -| ` --max_downloads` | Maximum number of samples to download at a time
**Type:** `integer`, **Default:** `3` | - -### Nextflow Configuration Parameters -Parameters to fine-tune your Nextflow setup. - -| Parameter | Description | -|:---|---| -| ` --nfconfig` | A Nextflow compatible config file for custom profiles, loaded last and will overwrite existing variables if set.
**Type:** `string` | -| ` --publish_dir_mode` | Method used to save pipeline results to output directory.
**Type:** `string`, **Default:** `copy` | -| ` --infodir` | Directory to keep pipeline Nextflow logs and reports.
**Type:** `string`, **Default:** `${params.outdir}/pipeline_info` | -| ` --force` | Nextflow will overwrite existing output files.
**Type:** `boolean` | -| ` --cleanup_workdir` | After Bactopia is successfully executed, the `work` directory will be deleted.
**Type:** `boolean` | - -### Institutional config options -Parameters used to describe centralized config profiles. These should not be edited. - -| Parameter | Description | -|:---|---| -| ` --custom_config_version` | Git commit id for Institutional configs.
**Type:** `string`, **Default:** `master` | -| ` --custom_config_base` | Base directory for Institutional configs.
**Type:** `string`, **Default:** `https://raw.githubusercontent.com/nf-core/configs/master` | -| ` --config_profile_name` | Institutional config name.
**Type:** `string` | -| ` --config_profile_description` | Institutional config description.
**Type:** `string` | -| ` --config_profile_contact` | Institutional config contact information.
**Type:** `string` | -| ` --config_profile_url` | Institutional config URL link.
**Type:** `string` | - -### Nextflow Profile Parameters -Parameters to fine-tune your Nextflow setup. - -| Parameter | Description | -|:---|---| -| ` --condadir` | Directory to Nextflow should use for Conda environments
**Type:** `string` | -| ` --registry` | Docker registry to pull containers from.
**Type:** `string`, **Default:** `dockerhub` | -| ` --datasets_cache` | Directory where downloaded datasets should be stored.
**Type:** `string`, **Default:** `/data/datasets` | -| ` --singularity_cache_dir` | Directory where remote Singularity images are stored.
**Type:** `string` | -| ` --singularity_pull_docker_container` | Instead of directly downloading Singularity images for use with Singularity, force the workflow to pull and convert Docker containers instead.
**Type:** `boolean` | -| ` --force_rebuild` | Force overwrite of existing pre-built environments.
**Type:** `boolean` | -| ` --queue` | Comma-separated name of the queue(s) to be used by a job scheduler (e.g. AWS Batch or SLURM)
**Type:** `string`, **Default:** `general,high-memory` | -| ` --cluster_opts` | Additional options to pass to the executor. (e.g. SLURM: '--account=my_acct_name'
**Type:** `string` | -| ` --container_opts` | Additional options to pass to Apptainer, Docker, or Singularityu. (e.g. Singularity: '-D `pwd`'
**Type:** `string` | -| ` --disable_scratch` | All intermediate files created on worker nodes of will be transferred to the head node.
**Type:** `boolean` | - -### Helpful Parameters -Uncommonly used parameters that might be useful. - -| Parameter | Description | -|:---|---| -| ` --monochrome_logs` | Do not use coloured log outputs.
**Type:** `boolean` | -| ` --nfdir` | Print directory Nextflow has pulled Bactopia to
**Type:** `boolean` | -| ` --sleep_time` | The amount of time (seconds) Nextflow will wait after setting up datasets before execution.
**Type:** `integer`, **Default:** `5` | -| ` --validate_params` | Boolean whether to validate parameters against the schema at runtime
**Type:** `boolean`, **Default:** `True` | -| ` --help` | Display help text.
**Type:** `boolean` | -| ` --wf` | Specify which workflow or Bactopia Tool to execute
**Type:** `string`, **Default:** `bactopia` | -| ` --list_wfs` | List the available workflows and Bactopia Tools to use with '--wf'
**Type:** `boolean` | -| ` --show_hidden_params` | Show all params when using `--help`
**Type:** `boolean` | -| ` --help_all` | An alias for --help --show_hidden_params
**Type:** `boolean` | -| ` --version` | Display version text.
**Type:** `boolean` | - -## Citations -If you use Bactopia and `busco` in your analysis, please cite the following. - -- [Bactopia](https://bactopia.github.io/) - Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) - - -- [BUSCO](https://gitlab.com/ezlab/busco) - Manni M, Berkeley MR, Seppey M, Simão FA, Zdobnov EM [BUSCO Update: Novel and Streamlined Workflows along with Broader and Deeper Phylogenetic Coverage for Scoring of Eukaryotic, Prokaryotic, and Viral Genomes.](https://doi.org/10.1093/molbev/msab199) _Molecular Biology and Evolution_ 38(10), 4647–4654. (2021) - -- [csvtk](https://bioinf.shenwei.me/csvtk/) - Shen, W [csvtk: A cross-platform, efficient and practical CSV/TSV toolkit in Golang.](https://github.com/shenwei356/csvtk/) (GitHub) - diff --git a/docs/bactopia-tools/checkm.md b/docs/bactopia-tools/checkm.md deleted file mode 100644 index 4b835c94..00000000 --- a/docs/bactopia-tools/checkm.md +++ /dev/null @@ -1,251 +0,0 @@ ---- -title: checkm -description: A Bactopia Tool which uses CheckM to assess the quality of microbial genomes recovered from isolates. ---- -# Bactopia Tool - `checkm` -The `checkm` module is used [CheckM](https://github.com/Ecogenomics/CheckM) to assess the quality of microbial -genomes recovered from isolates, single cells, and metagenomes. - - -## Example Usage -``` -bactopia --wf checkm \ - --bactopia /path/to/your/bactopia/results -``` - -## Output Overview - -Below is the default output structure for the `checkm` tool. Where possible the -file descriptions below were modified from a tools description. - -```bash - -├── -│ └── tools -│ └── checkm -│ ├── -genes.aln -│ ├── -results.txt -│ ├── bins/ -│ ├── lineage.ms -│ ├── logs -│ │ ├── checkm.log -│ │ ├── nf-checkm.{begin,err,log,out,run,sh,trace} -│ │ └── versions.yml -│ └── storage/ -└── bactopia-runs - └── checkm- - ├── merged-results - │ ├── checkm.tsv - │ └── logs - │ └── checkm-concat - │ ├── nf-merged-results.{begin,err,log,out,run,sh,trace} - │ └── versions.yml - └── nf-reports - ├── checkm-dag.dot - ├── checkm-report.html - ├── checkm-timeline.html - └── checkm-trace.txt - -``` - - - -### Results - -#### Merged Results - -Below are results that are concatenated into a single file. - - -| Filename | Description | -|-------------------------------|-------------| -| checkm.tsv | A merged TSV file with `checkm` results from all samples | - - -#### CheckM - -Below is a description of the _per-sample_ results from [CheckM](https://github.com/Ecogenomics/CheckM). - - -| Filename | Description | -|-------------------------------|-------------| -| <SAMPLE_NAME>-genes.aln | Alignment of multi-copy genes and their AAI identity | -| <SAMPLE_NAME>-results.txt | Final results of Final results of [CheckM's lineage_wf](https://github.com/Ecogenomics/CheckM/wiki/Workflows#lineage-specific-workflow) | -| bins/ | A folder with inputs (e.g. proteins) for processing by `CheckM` | -| lineage.ms | Output file describing marker set for each bin | -| storage/ | A folder with intermediate results from `CheckM` processing | - - - - - -### Audit Trail - -Below are files that can assist you in understanding which parameters and program versions were used. - -#### Logs - -Each process that is executed will have a folder named `logs`. In this folder are helpful -files for you to review if the need ever arises. - -| Extension | Description | -|--------------|-------------| -| .begin | An empty file used to designate the process started | -| .err | Contains STDERR outputs from the process | -| .log | Contains both STDERR and STDOUT outputs from the process | -| .out | Contains STDOUT outputs from the process | -| .run | The script Nextflow uses to stage/unstage files and queue processes based on given profile | -| .sh | The script executed by bash for the process | -| .trace | The Nextflow [Trace](https://www.nextflow.io/docs/latest/tracing.html#trace-report) report for the process | -| versions.yml | A YAML formatted file with program versions | - -#### Nextflow Reports - -These Nextflow reports provide great a great summary of your run. These can be used to optimize -resource usage and estimate expected costs if using cloud platforms. - -| Filename | Description | -|----------|-------------| -| checkm-dag.dot | The Nextflow [DAG visualisation](https://www.nextflow.io/docs/latest/tracing.html#dag-visualisation) | -| checkm-report.html | The Nextflow [Execution Report](https://www.nextflow.io/docs/latest/tracing.html#execution-report) | -| checkm-timeline.html | The Nextflow [Timeline Report](https://www.nextflow.io/docs/latest/tracing.html#timeline-report) | -| checkm-trace.txt | The Nextflow [Trace](https://www.nextflow.io/docs/latest/tracing.html#trace-report) report | - - -#### Program Versions - -At the end of each run, each of the `versions.yml` files are merged into the files below. - -| Filename | Description | -|---------------------------|-------------| -| software_versions.yml | A complete list of programs and versions used by each process | -| software_versions_mqc.yml | A complete list of programs and versions formatted for [MultiQC](https://multiqc.info/) | - -## Parameters - - -### Required Parameters -Define where the pipeline should find input data and save output data. - -| Parameter | Description | -|:---|---| -| ` --bactopia` | The path to bactopia results to use as inputs
**Type:** `string` | - -### Filtering Parameters -Use these parameters to specify which samples to include or exclude. - -| Parameter | Description | -|:---|---| -| ` --include` | A text file containing sample names (one per line) to include from the analysis
**Type:** `string` | -| ` --exclude` | A text file containing sample names (one per line) to exclude from the analysis
**Type:** `string` | - - -### CheckM Parameters - - -| Parameter | Description | -|:---|---| -| ` --checkm_unique` | Minimum number of unique phylogenetic markers required to use lineage-specific marker set.
**Type:** `integer`, **Default:** `10` | -| ` --checkm_multi` | Maximum number of multi-copy phylogenetic markers before defaulting to domain-level marker set.
**Type:** `integer`, **Default:** `10` | -| ` --aai_strain` | AAI threshold used to identify strain heterogeneity
**Type:** `number`, **Default:** `0.9` | -| ` --checkm_length` | Percent overlap between target and query
**Type:** `number`, **Default:** `0.7` | -| ` --full_tree` | Use the full tree (requires ~40GB of memory) for determining lineage of each bin.
**Type:** `boolean` | -| ` --skip_pseudogene_correction` | Skip identification and filtering of pseudogene
**Type:** `boolean` | -| ` --ignore_thresholds` | Ignore model-specific score thresholds
**Type:** `boolean` | -| ` --checkm_ali` | Generate HMMER alignment file for each bin
**Type:** `boolean` | -| ` --checkm_nt` | Generate nucleotide gene sequences for each bin
**Type:** `boolean` | -| ` --force_domain` | Use domain-level sets for all bins
**Type:** `boolean` | -| ` --no_refinement` | Do not perform lineage-specific marker set refinement
**Type:** `boolean` | -| ` --individual_markers` | Treat marker as independent
**Type:** `boolean` | -| ` --skip_adj_correction` | Do not exclude adjacent marker genes when estimating contamination
**Type:** `boolean` | - - -### Optional Parameters -These optional parameters can be useful in certain settings. - -| Parameter | Description | -|:---|---| -| ` --outdir` | Base directory to write results to
**Type:** `string`, **Default:** `bactopia` | -| ` --skip_compression` | Ouput files will not be compressed
**Type:** `boolean` | -| ` --datasets` | The path to cache datasets to
**Type:** `string` | -| ` --keep_all_files` | Keeps all analysis files created
**Type:** `boolean` | - -### Max Job Request Parameters -Set the top limit for requested resources for any single job. - -| Parameter | Description | -|:---|---| -| ` --max_retry` | Maximum times to retry a process before allowing it to fail.
**Type:** `integer`, **Default:** `3` | -| ` --max_cpus` | Maximum number of CPUs that can be requested for any single job.
**Type:** `integer`, **Default:** `4` | -| ` --max_memory` | Maximum amount of memory that can be requested for any single job.
**Type:** `string`, **Default:** `128.GB` | -| ` --max_time` | Maximum amount of time that can be requested for any single job.
**Type:** `string`, **Default:** `240.h` | -| ` --max_downloads` | Maximum number of samples to download at a time
**Type:** `integer`, **Default:** `3` | - -### Nextflow Configuration Parameters -Parameters to fine-tune your Nextflow setup. - -| Parameter | Description | -|:---|---| -| ` --nfconfig` | A Nextflow compatible config file for custom profiles, loaded last and will overwrite existing variables if set.
**Type:** `string` | -| ` --publish_dir_mode` | Method used to save pipeline results to output directory.
**Type:** `string`, **Default:** `copy` | -| ` --infodir` | Directory to keep pipeline Nextflow logs and reports.
**Type:** `string`, **Default:** `${params.outdir}/pipeline_info` | -| ` --force` | Nextflow will overwrite existing output files.
**Type:** `boolean` | -| ` --cleanup_workdir` | After Bactopia is successfully executed, the `work` directory will be deleted.
**Type:** `boolean` | - -### Institutional config options -Parameters used to describe centralized config profiles. These should not be edited. - -| Parameter | Description | -|:---|---| -| ` --custom_config_version` | Git commit id for Institutional configs.
**Type:** `string`, **Default:** `master` | -| ` --custom_config_base` | Base directory for Institutional configs.
**Type:** `string`, **Default:** `https://raw.githubusercontent.com/nf-core/configs/master` | -| ` --config_profile_name` | Institutional config name.
**Type:** `string` | -| ` --config_profile_description` | Institutional config description.
**Type:** `string` | -| ` --config_profile_contact` | Institutional config contact information.
**Type:** `string` | -| ` --config_profile_url` | Institutional config URL link.
**Type:** `string` | - -### Nextflow Profile Parameters -Parameters to fine-tune your Nextflow setup. - -| Parameter | Description | -|:---|---| -| ` --condadir` | Directory to Nextflow should use for Conda environments
**Type:** `string` | -| ` --registry` | Docker registry to pull containers from.
**Type:** `string`, **Default:** `dockerhub` | -| ` --datasets_cache` | Directory where downloaded datasets should be stored.
**Type:** `string`, **Default:** `/data/datasets` | -| ` --singularity_cache_dir` | Directory where remote Singularity images are stored.
**Type:** `string` | -| ` --singularity_pull_docker_container` | Instead of directly downloading Singularity images for use with Singularity, force the workflow to pull and convert Docker containers instead.
**Type:** `boolean` | -| ` --force_rebuild` | Force overwrite of existing pre-built environments.
**Type:** `boolean` | -| ` --queue` | Comma-separated name of the queue(s) to be used by a job scheduler (e.g. AWS Batch or SLURM)
**Type:** `string`, **Default:** `general,high-memory` | -| ` --cluster_opts` | Additional options to pass to the executor. (e.g. SLURM: '--account=my_acct_name'
**Type:** `string` | -| ` --container_opts` | Additional options to pass to Apptainer, Docker, or Singularityu. (e.g. Singularity: '-D `pwd`'
**Type:** `string` | -| ` --disable_scratch` | All intermediate files created on worker nodes of will be transferred to the head node.
**Type:** `boolean` | - -### Helpful Parameters -Uncommonly used parameters that might be useful. - -| Parameter | Description | -|:---|---| -| ` --monochrome_logs` | Do not use coloured log outputs.
**Type:** `boolean` | -| ` --nfdir` | Print directory Nextflow has pulled Bactopia to
**Type:** `boolean` | -| ` --sleep_time` | The amount of time (seconds) Nextflow will wait after setting up datasets before execution.
**Type:** `integer`, **Default:** `5` | -| ` --validate_params` | Boolean whether to validate parameters against the schema at runtime
**Type:** `boolean`, **Default:** `True` | -| ` --help` | Display help text.
**Type:** `boolean` | -| ` --wf` | Specify which workflow or Bactopia Tool to execute
**Type:** `string`, **Default:** `bactopia` | -| ` --list_wfs` | List the available workflows and Bactopia Tools to use with '--wf'
**Type:** `boolean` | -| ` --show_hidden_params` | Show all params when using `--help`
**Type:** `boolean` | -| ` --help_all` | An alias for --help --show_hidden_params
**Type:** `boolean` | -| ` --version` | Display version text.
**Type:** `boolean` | - -## Citations -If you use Bactopia and `checkm` in your analysis, please cite the following. - -- [Bactopia](https://bactopia.github.io/) - Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) - - -- [CheckM](https://github.com/Ecogenomics/CheckM) - Parks DH, Imelfort M, Skennerton CT, Hugenholtz P, Tyson GW [CheckM: assessing the quality of microbial genomes recovered from isolates, single cells, and metagenomes.](http://dx.doi.org/10.1101/gr.186072.114) _Genome Res_ 25, 1043–1055 (2015) - -- [csvtk](https://bioinf.shenwei.me/csvtk/) - Shen, W [csvtk: A cross-platform, efficient and practical CSV/TSV toolkit in Golang.](https://github.com/shenwei356/csvtk/) (GitHub) - diff --git a/docs/bactopia-tools/clermontyping.md b/docs/bactopia-tools/clermontyping.md deleted file mode 100644 index e95fcf22..00000000 --- a/docs/bactopia-tools/clermontyping.md +++ /dev/null @@ -1,246 +0,0 @@ ---- -title: clermontyping -description: A Bactopia Tool which uses ClermonTyping to conduct _in silico_ phylotyping of _Escherichia_ genomes. ---- -# Bactopia Tool - `clermontyping` -The `clermontyping` module used [ClermonTyping](https://github.com/happykhan/ClermonTyping) -to conduct _in silico_ prediction of phylotype for _Escherichia_ genomes. It uses the -genome assemblies to be assign them to _E. albertii_, _E. fergusonii_, _Escherichia_ -clades I–V, _E. coli sensu stricto_ as well as to the main _E. coli_ phylogroups - - -## Example Usage -``` -bactopia --wf clermontyping \ - --bactopia /path/to/your/bactopia/results -``` - -## Output Overview - -Below is the default output structure for the `clermontyping` tool. Where possible the -file descriptions below were modified from a tools description. - -```bash - -├── -│ └── tools -│ └── clermontyping -│ ├── .blast.xml -│ ├── .html -│ ├── .mash.tsv -│ ├── .phylogroups.txt -│ └── logs -│ ├── nf-clermontyping.{begin,err,log,out,run,sh,trace} -│ └── versions.yml -└── bactopia-runs - └── clermontyping - ├── merged-results - │ ├── clermontyping.tsv - │ └── logs - │ └── clermontyping-concat - │ ├── nf-merged-results.{begin,err,log,out,run,sh,trace} - │ └── versions.yml - └── nf-reports - ├── clermontyping-dag.dot - ├── clermontyping-report.html - ├── clermontyping-timeline.html - └── clermontyping-trace.txt - -``` - -:::info[Directory structure might be different] - -`clermontyping` is available as a standalone Bactopia Tool, as well as from -the main Bactopia workflow (e.g. through Staphopia or Merlin). If executed -from Bactopia, the `clermontyping` directory structure might be different, but the -output descriptions below still apply. -::: - - - -### Results - -#### Merged Results - -Below are results that are concatenated into a single file. - - -| Filename | Description | -|-------------------------------|-------------| -| clermontyping.csv | A merged TSV file with `ClermonTyping` results from all samples | - - -#### ClermonTyping - -Below is a description of the _per-sample_ results from [ClermonTyping](https://github.com/happykhan/ClermonTyping). - - -| Extension | Description | -|-------------------------------|-------------| -| <SAMPLE_NAME>.blast.xml | A BLAST XML file with the results of the ClermonTyping analysis | -| <SAMPLE_NAME>.html | A HTML file with the results of the ClermonTyping analysis | -| <SAMPLE_NAME>.mash.tsv | A TSV file with the Mash distances | -| <SAMPLE_NAME>.phylogroups.txt | A TSV file with the final phylogroup assignments | - - - - - -### Audit Trail - -Below are files that can assist you in understanding which parameters and program versions were used. - -#### Logs - -Each process that is executed will have a folder named `logs`. In this folder are helpful -files for you to review if the need ever arises. - -| Extension | Description | -|--------------|-------------| -| .begin | An empty file used to designate the process started | -| .err | Contains STDERR outputs from the process | -| .log | Contains both STDERR and STDOUT outputs from the process | -| .out | Contains STDOUT outputs from the process | -| .run | The script Nextflow uses to stage/unstage files and queue processes based on given profile | -| .sh | The script executed by bash for the process | -| .trace | The Nextflow [Trace](https://www.nextflow.io/docs/latest/tracing.html#trace-report) report for the process | -| versions.yml | A YAML formatted file with program versions | - -#### Nextflow Reports - -These Nextflow reports provide great a great summary of your run. These can be used to optimize -resource usage and estimate expected costs if using cloud platforms. - -| Filename | Description | -|----------|-------------| -| clermontyping-dag.dot | The Nextflow [DAG visualisation](https://www.nextflow.io/docs/latest/tracing.html#dag-visualisation) | -| clermontyping-report.html | The Nextflow [Execution Report](https://www.nextflow.io/docs/latest/tracing.html#execution-report) | -| clermontyping-timeline.html | The Nextflow [Timeline Report](https://www.nextflow.io/docs/latest/tracing.html#timeline-report) | -| clermontyping-trace.txt | The Nextflow [Trace](https://www.nextflow.io/docs/latest/tracing.html#trace-report) report | - - -#### Program Versions - -At the end of each run, each of the `versions.yml` files are merged into the files below. - -| Filename | Description | -|---------------------------|-------------| -| software_versions.yml | A complete list of programs and versions used by each process | -| software_versions_mqc.yml | A complete list of programs and versions formatted for [MultiQC](https://multiqc.info/) | - -## Parameters - - -### Required Parameters -Define where the pipeline should find input data and save output data. - -| Parameter | Description | -|:---|---| -| ` --bactopia` | The path to bactopia results to use as inputs
**Type:** `string` | - -### Filtering Parameters -Use these parameters to specify which samples to include or exclude. - -| Parameter | Description | -|:---|---| -| ` --include` | A text file containing sample names (one per line) to include from the analysis
**Type:** `string` | -| ` --exclude` | A text file containing sample names (one per line) to exclude from the analysis
**Type:** `string` | - - -### ClermonTyping Parameters - - -| Parameter | Description | -|:---|---| -| ` --clermon_threshold` | Do not use contigs under this size
**Type:** `number` | - - -### Optional Parameters -These optional parameters can be useful in certain settings. - -| Parameter | Description | -|:---|---| -| ` --outdir` | Base directory to write results to
**Type:** `string`, **Default:** `bactopia` | -| ` --skip_compression` | Ouput files will not be compressed
**Type:** `boolean` | -| ` --datasets` | The path to cache datasets to
**Type:** `string` | -| ` --keep_all_files` | Keeps all analysis files created
**Type:** `boolean` | - -### Max Job Request Parameters -Set the top limit for requested resources for any single job. - -| Parameter | Description | -|:---|---| -| ` --max_retry` | Maximum times to retry a process before allowing it to fail.
**Type:** `integer`, **Default:** `3` | -| ` --max_cpus` | Maximum number of CPUs that can be requested for any single job.
**Type:** `integer`, **Default:** `4` | -| ` --max_memory` | Maximum amount of memory that can be requested for any single job.
**Type:** `string`, **Default:** `128.GB` | -| ` --max_time` | Maximum amount of time that can be requested for any single job.
**Type:** `string`, **Default:** `240.h` | -| ` --max_downloads` | Maximum number of samples to download at a time
**Type:** `integer`, **Default:** `3` | - -### Nextflow Configuration Parameters -Parameters to fine-tune your Nextflow setup. - -| Parameter | Description | -|:---|---| -| ` --nfconfig` | A Nextflow compatible config file for custom profiles, loaded last and will overwrite existing variables if set.
**Type:** `string` | -| ` --publish_dir_mode` | Method used to save pipeline results to output directory.
**Type:** `string`, **Default:** `copy` | -| ` --infodir` | Directory to keep pipeline Nextflow logs and reports.
**Type:** `string`, **Default:** `${params.outdir}/pipeline_info` | -| ` --force` | Nextflow will overwrite existing output files.
**Type:** `boolean` | -| ` --cleanup_workdir` | After Bactopia is successfully executed, the `work` directory will be deleted.
**Type:** `boolean` | - -### Institutional config options -Parameters used to describe centralized config profiles. These should not be edited. - -| Parameter | Description | -|:---|---| -| ` --custom_config_version` | Git commit id for Institutional configs.
**Type:** `string`, **Default:** `master` | -| ` --custom_config_base` | Base directory for Institutional configs.
**Type:** `string`, **Default:** `https://raw.githubusercontent.com/nf-core/configs/master` | -| ` --config_profile_name` | Institutional config name.
**Type:** `string` | -| ` --config_profile_description` | Institutional config description.
**Type:** `string` | -| ` --config_profile_contact` | Institutional config contact information.
**Type:** `string` | -| ` --config_profile_url` | Institutional config URL link.
**Type:** `string` | - -### Nextflow Profile Parameters -Parameters to fine-tune your Nextflow setup. - -| Parameter | Description | -|:---|---| -| ` --condadir` | Directory to Nextflow should use for Conda environments
**Type:** `string` | -| ` --registry` | Docker registry to pull containers from.
**Type:** `string`, **Default:** `dockerhub` | -| ` --datasets_cache` | Directory where downloaded datasets should be stored.
**Type:** `string`, **Default:** `/data/datasets` | -| ` --singularity_cache_dir` | Directory where remote Singularity images are stored.
**Type:** `string` | -| ` --singularity_pull_docker_container` | Instead of directly downloading Singularity images for use with Singularity, force the workflow to pull and convert Docker containers instead.
**Type:** `boolean` | -| ` --force_rebuild` | Force overwrite of existing pre-built environments.
**Type:** `boolean` | -| ` --queue` | Comma-separated name of the queue(s) to be used by a job scheduler (e.g. AWS Batch or SLURM)
**Type:** `string`, **Default:** `general,high-memory` | -| ` --cluster_opts` | Additional options to pass to the executor. (e.g. SLURM: '--account=my_acct_name'
**Type:** `string` | -| ` --container_opts` | Additional options to pass to Apptainer, Docker, or Singularityu. (e.g. Singularity: '-D `pwd`'
**Type:** `string` | -| ` --disable_scratch` | All intermediate files created on worker nodes of will be transferred to the head node.
**Type:** `boolean` | - -### Helpful Parameters -Uncommonly used parameters that might be useful. - -| Parameter | Description | -|:---|---| -| ` --monochrome_logs` | Do not use coloured log outputs.
**Type:** `boolean` | -| ` --nfdir` | Print directory Nextflow has pulled Bactopia to
**Type:** `boolean` | -| ` --sleep_time` | The amount of time (seconds) Nextflow will wait after setting up datasets before execution.
**Type:** `integer`, **Default:** `5` | -| ` --validate_params` | Boolean whether to validate parameters against the schema at runtime
**Type:** `boolean`, **Default:** `True` | -| ` --help` | Display help text.
**Type:** `boolean` | -| ` --wf` | Specify which workflow or Bactopia Tool to execute
**Type:** `string`, **Default:** `bactopia` | -| ` --list_wfs` | List the available workflows and Bactopia Tools to use with '--wf'
**Type:** `boolean` | -| ` --show_hidden_params` | Show all params when using `--help`
**Type:** `boolean` | -| ` --help_all` | An alias for --help --show_hidden_params
**Type:** `boolean` | -| ` --version` | Display version text.
**Type:** `boolean` | - -## Citations -If you use Bactopia and `clermontyping` in your analysis, please cite the following. - -- [Bactopia](https://bactopia.github.io/) - Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) - - -- [ClermontTyping](https://github.com/happykhan/ClermonTyping) - Beghain J, Bridier-Nahmias A, Le Nagard H, Denamur E, Clermont O. [ClermonTyping: an easy-to-use and accurate in silico method for Escherichia genus strain phylotyping.](https://doi.org/10.1099/mgen.0.000192) Microbial Genomics, 4(7), e000192. (2018) - -- [csvtk](https://bioinf.shenwei.me/csvtk/) - Shen, W [csvtk: A cross-platform, efficient and practical CSV/TSV toolkit in Golang.](https://github.com/shenwei356/csvtk/) (GitHub) - diff --git a/docs/bactopia-tools/clonalframeml.md b/docs/bactopia-tools/clonalframeml.md deleted file mode 100644 index c4eedff3..00000000 --- a/docs/bactopia-tools/clonalframeml.md +++ /dev/null @@ -1,132 +0,0 @@ ---- -tags: - - alignment - - recombination ---- - - - -# Bactopia Tool - `clonalframeml` -The `clonalframeml` module uses [ClonalFrameML](https://github.com/xavierdidelot/ClonalFrameML) to predict -recombination in bacterial genomes. A starting tree is first created with [IQ-TREE](https://github.com/Cibiv/IQ-TREE) -(`-fast` mode) using the input alignment. Then the alignment and tree are provided ClonalFrameML which uses -maximum likelihood to predict recombinant regions in the alignment. Finally, [maskrc-svg](https://github.com/kwongj/maskrc-svg) -is used to create and additional alignment with the recombinant regions masked. - - -## Example Usage -``` -bactopia --wf clonalframeml \ - --bactopia /path/to/your/bactopia/results \ - --include includes.txt -``` - -## Parameters - - -### Required Parameters -Define where the pipeline should find input data and save output data. - -| Parameter | Description | Default | -|---|---|---| -| `--bactopia` | The path to bactopia results to use as inputs | | - -### Filtering Parameters -Use these parameters to specify which samples to include or exclude. - -| Parameter | Description | Default | -|---|---|---| -| `--include` | A text file containing sample names (one per line) to include from the analysis | | -| `--exclude` | A text file containing sample names (one per line) to exclude from the analysis | | - - -### ClonalFrameML Parameters - - -| Parameter | Description | Default | -|---|---|---| -| `--emsim` | Number of simulations to estimate uncertainty in the EM results | 100 | -| `--clonal_opts` | Extra ClonalFrameML options in quotes | | -| `--skip_recombination` | Skip ClonalFrameML execution in subworkflows | False | - - -### Optional Parameters -These optional parameters can be useful in certain settings. - -| Parameter | Description | Default | -|---|---|---| -| `--outdir` | Base directory to write results to | ./ | -| `--run_name` | Name of the directory to hold results | bactopia | -| `--skip_compression` | Ouput files will not be compressed | False | -| `--keep_all_files` | Keeps all analysis files created | False | - -### Max Job Request Parameters -Set the top limit for requested resources for any single job. - -| Parameter | Description | Default | -|---|---|---| -| `--max_retry` | Maximum times to retry a process before allowing it to fail. | 3 | -| `--max_cpus` | Maximum number of CPUs that can be requested for any single job. | 4 | -| `--max_memory` | Maximum amount of memory (in GB) that can be requested for any single job. | 32 | -| `--max_time` | Maximum amount of time (in minutes) that can be requested for any single job. | 120 | -| `--max_downloads` | Maximum number of samples to download at a time | 3 | - -### Nextflow Configuration Parameters -Parameters to fine-tune your Nextflow setup. - -| Parameter | Description | Default | -|---|---|---| -| `--nfconfig` | A Nextflow compatible config file for custom profiles, loaded last and will overwrite existing variables if set. | | -| `--publish_dir_mode` | Method used to save pipeline results to output directory. | copy | -| `--infodir` | Directory to keep pipeline Nextflow logs and reports. | ${params.outdir}/pipeline_info | -| `--force` | Nextflow will overwrite existing output files. | False | -| `--cleanup_workdir` | After Bactopia is successfully executed, the `work` directory will be deleted. | False | - -### Nextflow Profile Parameters -Parameters to fine-tune your Nextflow setup. - -| Parameter | Description | Default | -|---|---|---| -| `--condadir` | Directory to Nextflow should use for Conda environments | | -| `--registry` | Docker registry to pull containers from. | dockerhub | -| `--singularity_cache` | Directory where remote Singularity images are stored. | | -| `--singularity_pull_docker_container` | Instead of directly downloading Singularity images for use with Singularity, force the workflow to pull and convert Docker containers instead. | | -| `--force_rebuild` | Force overwrite of existing pre-built environments. | False | -| `--queue` | Comma-separated name of the queue(s) to be used by a job scheduler (e.g. AWS Batch or SLURM) | general,high-memory | -| `--cluster_opts` | Additional options to pass to the executor. (e.g. SLURM: '--account=my_acct_name' | | -| `--disable_scratch` | All intermediate files created on worker nodes of will be transferred to the head node. | False | - -### Helpful Parameters -Uncommonly used parameters that might be useful. - -| Parameter | Description | Default | -|---|---|---| -| `--monochrome_logs` | Do not use coloured log outputs. | | -| `--nfdir` | Print directory Nextflow has pulled Bactopia to | | -| `--sleep_time` | The amount of time (seconds) Nextflow will wait after setting up datasets before execution. | 5 | -| `--validate_params` | Boolean whether to validate parameters against the schema at runtime | True | -| `--help` | Display help text. | | -| `--wf` | Specify which workflow or Bactopia Tool to execute | bactopia | -| `--list_wfs` | List the available workflows and Bactopia Tools to use with '--wf' | | -| `--show_hidden_params` | Show all params when using `--help` | | -| `--help_all` | An alias for --help --show_hidden_params | | -| `--version` | Display version text. | | - -## Citations -If you use Bactopia and `clonalframeml` in your analysis, please cite the following. - -- [Bactopia](https://bactopia.github.io/) - Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) - - -- [ClonalFramML](https://github.com/xavierdidelot/ClonalFrameML) - Didelot X, Wilson DJ [ClonalFrameML: Efficient Inference of Recombination in Whole Bacterial Genomes.](https://doi.org/10.1371/journal.pcbi.1004041) _PLoS Comput Biol_ 11(2) e1004041 (2015) - -- [IQ-TREE](https://github.com/Cibiv/IQ-TREE) - Nguyen L-T, Schmidt HA, von Haeseler A, Minh BQ [IQ-TREE: A fast and effective stochastic algorithm for estimating maximum likelihood phylogenies.](https://doi.org/10.1093/molbev/msu300) _Mol. Biol. Evol._ 32:268-274 (2015) - -- [ModelFinder](https://github.com/Cibiv/IQ-TREE) - Kalyaanamoorthy S, Minh BQ, Wong TKF, von Haeseler A, Jermiin LS [ModelFinder - Fast model selection for accurate phylogenetic estimates.](https://doi.org/10.1038/nmeth.4285) _Nat. Methods_ 14:587-589 (2017) - -- [maskrc-svg](https://github.com/kwongj/maskrc-svg) - Kwong J [maskrc-svg - Masks recombination as detected by ClonalFrameML or Gubbins and draws an SVG.](https://github.com/kwongj/maskrc-svg) (GitHub) diff --git a/docs/bactopia-tools/defensefinder.md b/docs/bactopia-tools/defensefinder.md deleted file mode 100644 index dd50e769..00000000 --- a/docs/bactopia-tools/defensefinder.md +++ /dev/null @@ -1,244 +0,0 @@ ---- -title: defensefinder -description: A Bactopia Tool which uses defense-finder to rapidly identify all known anti-phage systems in a bacterial genome. - ---- -# Bactopia Tool - `defensefinder` -The `defensefinder` module uses the [defense-finder](https://github.com/mdmparis/defense-finder) -for the identification of all known anti-phage systems. - - -## Example Usage -``` -bactopia --wf defensefinder \ - --bactopia /path/to/your/bactopia/results -``` - -## Output Overview - -Below is the default output structure for the `defensefinder` tool. Where possible the -file descriptions below were modified from a tools description. - -```bash - -├── -│ └── tools -│ └── defensefinder -│ ├── .fna.prt -│ ├── .fna.prt.idx -│ ├── .fna_defense_finder_{genes|hmmer|systems}.tsv -│ ├── .macsydata.tar.gz -│ └── logs -│ ├── nf-defensefinder.{begin,err,log,out,run,sh,trace} -│ └── versions.yml -└── bactopia-runs - └── defensefinder- - ├── merged-results - │ ├── defensefinder-{genes|hmmer|systems}.tsv - │ └── logs - │ └── defensefinder-{genes|hmmer|systems}-concat - │ ├── nf-merged-results.{begin,err,log,out,run,sh,trace} - │ └── versions.yml - └── nf-reports - ├── defensefinder-dag.dot - ├── defensefinder-report.html - ├── defensefinder-timeline.html - └── defensefinder-trace.txt - -``` - - - -### Results - -#### Merged Results - -Below are results that are concatenated into a single file. - - -| Filename | Description | -|-------------------------------|-------------| -| defensefinder-genes.tsv | A merged TSV of all genes found in the system for all samples | -| defensefinder-hmmer.tsv | A merged TSV of all HMM hits for all samples | -| defensefinder-systems.tsv | A merged TSV of all information about systems found for all samples | - - -#### defensefinder - -Below is a description of the _per-sample_ results from [defense-finder](https://github.com/mdmparis/defense-finder). - - -| Filename | Description | -|-------------------------------|-------------| -| <SAMPLE_NAME>.fna.prt | A FASTA file containing all proteins found in the system for a single sample | -| <SAMPLE_NAME>.fna.prt.idx | A index file for the proteins file | -| <SAMPLE_NAME>.fna_defense_finder_genes.tsv | A tab-delimited file with each gene found in the system for a single sample | -| <SAMPLE_NAME>.fna_defense_finder_hmmer.tsv | A tab-delimited file with each HMM hit for a single sample | -| <SAMPLE_NAME>.fna_defense_finder_systems.tsv | A tab-delimited file with information about each system found for a single sample | -| <SAMPLE_NAME>.macsydata.tar.gz | A raw MACSyFinder output file (requires --df_preserveraw) | - - - - - -### Audit Trail - -Below are files that can assist you in understanding which parameters and program versions were used. - -#### Logs - -Each process that is executed will have a folder named `logs`. In this folder are helpful -files for you to review if the need ever arises. - -| Extension | Description | -|--------------|-------------| -| .begin | An empty file used to designate the process started | -| .err | Contains STDERR outputs from the process | -| .log | Contains both STDERR and STDOUT outputs from the process | -| .out | Contains STDOUT outputs from the process | -| .run | The script Nextflow uses to stage/unstage files and queue processes based on given profile | -| .sh | The script executed by bash for the process | -| .trace | The Nextflow [Trace](https://www.nextflow.io/docs/latest/tracing.html#trace-report) report for the process | -| versions.yml | A YAML formatted file with program versions | - -#### Nextflow Reports - -These Nextflow reports provide great a great summary of your run. These can be used to optimize -resource usage and estimate expected costs if using cloud platforms. - -| Filename | Description | -|----------|-------------| -| defensefinder-dag.dot | The Nextflow [DAG visualisation](https://www.nextflow.io/docs/latest/tracing.html#dag-visualisation) | -| defensefinder-report.html | The Nextflow [Execution Report](https://www.nextflow.io/docs/latest/tracing.html#execution-report) | -| defensefinder-timeline.html | The Nextflow [Timeline Report](https://www.nextflow.io/docs/latest/tracing.html#timeline-report) | -| defensefinder-trace.txt | The Nextflow [Trace](https://www.nextflow.io/docs/latest/tracing.html#trace-report) report | - - -#### Program Versions - -At the end of each run, each of the `versions.yml` files are merged into the files below. - -| Filename | Description | -|---------------------------|-------------| -| software_versions.yml | A complete list of programs and versions used by each process | -| software_versions_mqc.yml | A complete list of programs and versions formatted for [MultiQC](https://multiqc.info/) | - -## Parameters - - -### Required Parameters -Define where the pipeline should find input data and save output data. - -| Parameter | Description | -|:---|---| -| ` --bactopia` | The path to bactopia results to use as inputs
**Type:** `string` | - -### Filtering Parameters -Use these parameters to specify which samples to include or exclude. - -| Parameter | Description | -|:---|---| -| ` --include` | A text file containing sample names (one per line) to include from the analysis
**Type:** `string` | -| ` --exclude` | A text file containing sample names (one per line) to exclude from the analysis
**Type:** `string` | - - -### defense-finder Parameters - - -| Parameter | Description | -|:---|---| -| ` --df_coverage` | Minimal percentage of coverage for each profiles
**Type:** `number`, **Default:** `0.4` | -| ` --df_dbtype` | The macsyfinder --db-type option
**Type:** `string`, **Default:** `ordered_replicon` | -| ` --df_preserveraw` | Preserve raw MacsyFinder outputs alongside Defense Finder results inside the output directory
**Type:** `boolean` | -| ` --df_nocutga` | Advanced! Run macsyfinder in no-cut-ga mode. The validity of the genes and systems found is not guaranteed!
**Type:** `boolean` | - - -### Optional Parameters -These optional parameters can be useful in certain settings. - -| Parameter | Description | -|:---|---| -| ` --outdir` | Base directory to write results to
**Type:** `string`, **Default:** `bactopia` | -| ` --skip_compression` | Ouput files will not be compressed
**Type:** `boolean` | -| ` --datasets` | The path to cache datasets to
**Type:** `string` | -| ` --keep_all_files` | Keeps all analysis files created
**Type:** `boolean` | - -### Max Job Request Parameters -Set the top limit for requested resources for any single job. - -| Parameter | Description | -|:---|---| -| ` --max_retry` | Maximum times to retry a process before allowing it to fail.
**Type:** `integer`, **Default:** `3` | -| ` --max_cpus` | Maximum number of CPUs that can be requested for any single job.
**Type:** `integer`, **Default:** `4` | -| ` --max_memory` | Maximum amount of memory that can be requested for any single job.
**Type:** `string`, **Default:** `128.GB` | -| ` --max_time` | Maximum amount of time that can be requested for any single job.
**Type:** `string`, **Default:** `240.h` | -| ` --max_downloads` | Maximum number of samples to download at a time
**Type:** `integer`, **Default:** `3` | - -### Nextflow Configuration Parameters -Parameters to fine-tune your Nextflow setup. - -| Parameter | Description | -|:---|---| -| ` --nfconfig` | A Nextflow compatible config file for custom profiles, loaded last and will overwrite existing variables if set.
**Type:** `string` | -| ` --publish_dir_mode` | Method used to save pipeline results to output directory.
**Type:** `string`, **Default:** `copy` | -| ` --infodir` | Directory to keep pipeline Nextflow logs and reports.
**Type:** `string`, **Default:** `${params.outdir}/pipeline_info` | -| ` --force` | Nextflow will overwrite existing output files.
**Type:** `boolean` | -| ` --cleanup_workdir` | After Bactopia is successfully executed, the `work` directory will be deleted.
**Type:** `boolean` | - -### Institutional config options -Parameters used to describe centralized config profiles. These should not be edited. - -| Parameter | Description | -|:---|---| -| ` --custom_config_version` | Git commit id for Institutional configs.
**Type:** `string`, **Default:** `master` | -| ` --custom_config_base` | Base directory for Institutional configs.
**Type:** `string`, **Default:** `https://raw.githubusercontent.com/nf-core/configs/master` | -| ` --config_profile_name` | Institutional config name.
**Type:** `string` | -| ` --config_profile_description` | Institutional config description.
**Type:** `string` | -| ` --config_profile_contact` | Institutional config contact information.
**Type:** `string` | -| ` --config_profile_url` | Institutional config URL link.
**Type:** `string` | - -### Nextflow Profile Parameters -Parameters to fine-tune your Nextflow setup. - -| Parameter | Description | -|:---|---| -| ` --condadir` | Directory to Nextflow should use for Conda environments
**Type:** `string` | -| ` --registry` | Docker registry to pull containers from.
**Type:** `string`, **Default:** `dockerhub` | -| ` --datasets_cache` | Directory where downloaded datasets should be stored.
**Type:** `string`, **Default:** `/data/datasets` | -| ` --singularity_cache_dir` | Directory where remote Singularity images are stored.
**Type:** `string` | -| ` --singularity_pull_docker_container` | Instead of directly downloading Singularity images for use with Singularity, force the workflow to pull and convert Docker containers instead.
**Type:** `boolean` | -| ` --force_rebuild` | Force overwrite of existing pre-built environments.
**Type:** `boolean` | -| ` --queue` | Comma-separated name of the queue(s) to be used by a job scheduler (e.g. AWS Batch or SLURM)
**Type:** `string`, **Default:** `general,high-memory` | -| ` --cluster_opts` | Additional options to pass to the executor. (e.g. SLURM: '--account=my_acct_name'
**Type:** `string` | -| ` --container_opts` | Additional options to pass to Apptainer, Docker, or Singularityu. (e.g. Singularity: '-D `pwd`'
**Type:** `string` | -| ` --disable_scratch` | All intermediate files created on worker nodes of will be transferred to the head node.
**Type:** `boolean` | - -### Helpful Parameters -Uncommonly used parameters that might be useful. - -| Parameter | Description | -|:---|---| -| ` --monochrome_logs` | Do not use coloured log outputs.
**Type:** `boolean` | -| ` --nfdir` | Print directory Nextflow has pulled Bactopia to
**Type:** `boolean` | -| ` --sleep_time` | The amount of time (seconds) Nextflow will wait after setting up datasets before execution.
**Type:** `integer`, **Default:** `5` | -| ` --validate_params` | Boolean whether to validate parameters against the schema at runtime
**Type:** `boolean`, **Default:** `True` | -| ` --help` | Display help text.
**Type:** `boolean` | -| ` --wf` | Specify which workflow or Bactopia Tool to execute
**Type:** `string`, **Default:** `bactopia` | -| ` --list_wfs` | List the available workflows and Bactopia Tools to use with '--wf'
**Type:** `boolean` | -| ` --show_hidden_params` | Show all params when using `--help`
**Type:** `boolean` | -| ` --help_all` | An alias for --help --show_hidden_params
**Type:** `boolean` | -| ` --version` | Display version text.
**Type:** `boolean` | - -## Citations -If you use Bactopia and `defensefinder` in your analysis, please cite the following. - -- [Bactopia](https://bactopia.github.io/) - Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) - - -- [csvtk](https://bioinf.shenwei.me/csvtk/) - Shen, W [csvtk: A cross-platform, efficient and practical CSV/TSV toolkit in Golang.](https://github.com/shenwei356/csvtk/) (GitHub) - -- [DefenseFinder](https://github.com/mdmparis/defense-finder) - Tesson F, Hervé A, Mordret E, Touchon M, d’Humières C, Cury J, Bernheim A [Systematic and quantitative view of the antiviral arsenal of prokaryotes.](https://doi.org/10.1038/s41467-022-30269-9) Nature Communications, 13(1), 2561. (2022) - diff --git a/docs/bactopia-tools/ectyper.md b/docs/bactopia-tools/ectyper.md deleted file mode 100644 index 0cec806f..00000000 --- a/docs/bactopia-tools/ectyper.md +++ /dev/null @@ -1,246 +0,0 @@ ---- -title: ectyper -description: A Bactopia Tool which uses ECTyper to conduct _in silico_ prediction of serotype for _Escherichia coli_ genomes. ---- -# Bactopia Tool - `ectyper` -The `ectyper` module used [ECTyper](https://github.com/phac-nml/ecoli_serotyping) to conduct -_in silico_ prediction of serotype for _Escherichia coli_ genomes. It uses the genome assemblies -tp provide basic species identification and the predicted _E. coli_ serotype (e.g. O174:H21). - - -## Example Usage -``` -bactopia --wf ectyper \ - --bactopia /path/to/your/bactopia/results -``` - -## Output Overview - -Below is the default output structure for the `ectyper` tool. Where possible the -file descriptions below were modified from a tools description. - -```bash - -├── -│ └── tools -│ └── ectyper -│ ├── .tsv -│ ├── blast_output_alleles.txt -│ └── logs -│ ├── ectyper.log -│ ├── nf-ectyper.{begin,err,log,out,run,sh,trace} -│ └── versions.yml -└── bactopia-runs - └── ectyper- - ├── merged-results - │ ├── ectyper.tsv - │ └── logs - │ └── ectyper-concat - │ ├── nf-merged-results.{begin,err,log,out,run,sh,trace} - │ └── versions.yml - └── nf-reports - ├── ectyper-dag.dot - ├── ectyper-report.html - ├── ectyper-timeline.html - └── ectyper-trace.txt - -``` - -:::info[Directory structure might be different] - -`ectyper` is available as a standalone Bactopia Tool, as well as from -the main Bactopia workflow (e.g. through Staphopia or Merlin). If executed -from Bactopia, the `ectyper` directory structure might be different, but the -output descriptions below still apply. -::: - - - -### Results - -#### Merged Results - -Below are results that are concatenated into a single file. - - -| Filename | Description | -|-------------------------------|-------------| -| ectyper.tsv | A merged TSV file with `ECTyper` results from all samples | - - -#### ECTyper - -Below is a description of the _per-sample_ results from [ECTyper](https://github.com/phac-nml/ecoli_serotyping). - - -| Filename | Description | -|-------------------------------|-------------| -| <SAMPLE_NAME>.tsv | A tab-delimited file with `ECTyper` result, see [ECTyper - Report format](https://github.com/phac-nml/ecoli_serotyping#report-format) for details | -| blast_output_alleles.txt | Allele report generated from BLAST results | - - - - - -### Audit Trail - -Below are files that can assist you in understanding which parameters and program versions were used. - -#### Logs - -Each process that is executed will have a folder named `logs`. In this folder are helpful -files for you to review if the need ever arises. - -| Extension | Description | -|--------------|-------------| -| .begin | An empty file used to designate the process started | -| .err | Contains STDERR outputs from the process | -| .log | Contains both STDERR and STDOUT outputs from the process | -| .out | Contains STDOUT outputs from the process | -| .run | The script Nextflow uses to stage/unstage files and queue processes based on given profile | -| .sh | The script executed by bash for the process | -| .trace | The Nextflow [Trace](https://www.nextflow.io/docs/latest/tracing.html#trace-report) report for the process | -| versions.yml | A YAML formatted file with program versions | - -#### Nextflow Reports - -These Nextflow reports provide great a great summary of your run. These can be used to optimize -resource usage and estimate expected costs if using cloud platforms. - -| Filename | Description | -|----------|-------------| -| ectyper-dag.dot | The Nextflow [DAG visualisation](https://www.nextflow.io/docs/latest/tracing.html#dag-visualisation) | -| ectyper-report.html | The Nextflow [Execution Report](https://www.nextflow.io/docs/latest/tracing.html#execution-report) | -| ectyper-timeline.html | The Nextflow [Timeline Report](https://www.nextflow.io/docs/latest/tracing.html#timeline-report) | -| ectyper-trace.txt | The Nextflow [Trace](https://www.nextflow.io/docs/latest/tracing.html#trace-report) report | - - -#### Program Versions - -At the end of each run, each of the `versions.yml` files are merged into the files below. - -| Filename | Description | -|---------------------------|-------------| -| software_versions.yml | A complete list of programs and versions used by each process | -| software_versions_mqc.yml | A complete list of programs and versions formatted for [MultiQC](https://multiqc.info/) | - -## Parameters - - -### Required Parameters -Define where the pipeline should find input data and save output data. - -| Parameter | Description | -|:---|---| -| ` --bactopia` | The path to bactopia results to use as inputs
**Type:** `string` | - -### Filtering Parameters -Use these parameters to specify which samples to include or exclude. - -| Parameter | Description | -|:---|---| -| ` --include` | A text file containing sample names (one per line) to include from the analysis
**Type:** `string` | -| ` --exclude` | A text file containing sample names (one per line) to exclude from the analysis
**Type:** `string` | - - -### ECTyper Parameters - - -| Parameter | Description | -|:---|---| -| ` --opid` | Percent identity required for an O antigen allele match
**Type:** `integer`, **Default:** `90` | -| ` --opcov` | Minumum percent coverage required for an O antigen allele match
**Type:** `integer`, **Default:** `90` | -| ` --hpid` | Percent identity required for an H antigen allele match
**Type:** `integer`, **Default:** `95` | -| ` --hpcov` | Minumum percent coverage required for an H antigen allele match
**Type:** `integer`, **Default:** `50` | -| ` --verify` | Enable E. coli species verification
**Type:** `boolean` | -| ` --print_alleles` | Prints the allele sequences if enabled as the final column
**Type:** `boolean` | - - -### Optional Parameters -These optional parameters can be useful in certain settings. - -| Parameter | Description | -|:---|---| -| ` --outdir` | Base directory to write results to
**Type:** `string`, **Default:** `bactopia` | -| ` --skip_compression` | Ouput files will not be compressed
**Type:** `boolean` | -| ` --datasets` | The path to cache datasets to
**Type:** `string` | -| ` --keep_all_files` | Keeps all analysis files created
**Type:** `boolean` | - -### Max Job Request Parameters -Set the top limit for requested resources for any single job. - -| Parameter | Description | -|:---|---| -| ` --max_retry` | Maximum times to retry a process before allowing it to fail.
**Type:** `integer`, **Default:** `3` | -| ` --max_cpus` | Maximum number of CPUs that can be requested for any single job.
**Type:** `integer`, **Default:** `4` | -| ` --max_memory` | Maximum amount of memory that can be requested for any single job.
**Type:** `string`, **Default:** `128.GB` | -| ` --max_time` | Maximum amount of time that can be requested for any single job.
**Type:** `string`, **Default:** `240.h` | -| ` --max_downloads` | Maximum number of samples to download at a time
**Type:** `integer`, **Default:** `3` | - -### Nextflow Configuration Parameters -Parameters to fine-tune your Nextflow setup. - -| Parameter | Description | -|:---|---| -| ` --nfconfig` | A Nextflow compatible config file for custom profiles, loaded last and will overwrite existing variables if set.
**Type:** `string` | -| ` --publish_dir_mode` | Method used to save pipeline results to output directory.
**Type:** `string`, **Default:** `copy` | -| ` --infodir` | Directory to keep pipeline Nextflow logs and reports.
**Type:** `string`, **Default:** `${params.outdir}/pipeline_info` | -| ` --force` | Nextflow will overwrite existing output files.
**Type:** `boolean` | -| ` --cleanup_workdir` | After Bactopia is successfully executed, the `work` directory will be deleted.
**Type:** `boolean` | - -### Institutional config options -Parameters used to describe centralized config profiles. These should not be edited. - -| Parameter | Description | -|:---|---| -| ` --custom_config_version` | Git commit id for Institutional configs.
**Type:** `string`, **Default:** `master` | -| ` --custom_config_base` | Base directory for Institutional configs.
**Type:** `string`, **Default:** `https://raw.githubusercontent.com/nf-core/configs/master` | -| ` --config_profile_name` | Institutional config name.
**Type:** `string` | -| ` --config_profile_description` | Institutional config description.
**Type:** `string` | -| ` --config_profile_contact` | Institutional config contact information.
**Type:** `string` | -| ` --config_profile_url` | Institutional config URL link.
**Type:** `string` | - -### Nextflow Profile Parameters -Parameters to fine-tune your Nextflow setup. - -| Parameter | Description | -|:---|---| -| ` --condadir` | Directory to Nextflow should use for Conda environments
**Type:** `string` | -| ` --registry` | Docker registry to pull containers from.
**Type:** `string`, **Default:** `dockerhub` | -| ` --datasets_cache` | Directory where downloaded datasets should be stored.
**Type:** `string`, **Default:** `/data/datasets` | -| ` --singularity_cache_dir` | Directory where remote Singularity images are stored.
**Type:** `string` | -| ` --singularity_pull_docker_container` | Instead of directly downloading Singularity images for use with Singularity, force the workflow to pull and convert Docker containers instead.
**Type:** `boolean` | -| ` --force_rebuild` | Force overwrite of existing pre-built environments.
**Type:** `boolean` | -| ` --queue` | Comma-separated name of the queue(s) to be used by a job scheduler (e.g. AWS Batch or SLURM)
**Type:** `string`, **Default:** `general,high-memory` | -| ` --cluster_opts` | Additional options to pass to the executor. (e.g. SLURM: '--account=my_acct_name'
**Type:** `string` | -| ` --container_opts` | Additional options to pass to Apptainer, Docker, or Singularityu. (e.g. Singularity: '-D `pwd`'
**Type:** `string` | -| ` --disable_scratch` | All intermediate files created on worker nodes of will be transferred to the head node.
**Type:** `boolean` | - -### Helpful Parameters -Uncommonly used parameters that might be useful. - -| Parameter | Description | -|:---|---| -| ` --monochrome_logs` | Do not use coloured log outputs.
**Type:** `boolean` | -| ` --nfdir` | Print directory Nextflow has pulled Bactopia to
**Type:** `boolean` | -| ` --sleep_time` | The amount of time (seconds) Nextflow will wait after setting up datasets before execution.
**Type:** `integer`, **Default:** `5` | -| ` --validate_params` | Boolean whether to validate parameters against the schema at runtime
**Type:** `boolean`, **Default:** `True` | -| ` --help` | Display help text.
**Type:** `boolean` | -| ` --wf` | Specify which workflow or Bactopia Tool to execute
**Type:** `string`, **Default:** `bactopia` | -| ` --list_wfs` | List the available workflows and Bactopia Tools to use with '--wf'
**Type:** `boolean` | -| ` --show_hidden_params` | Show all params when using `--help`
**Type:** `boolean` | -| ` --help_all` | An alias for --help --show_hidden_params
**Type:** `boolean` | -| ` --version` | Display version text.
**Type:** `boolean` | - -## Citations -If you use Bactopia and `ectyper` in your analysis, please cite the following. - -- [Bactopia](https://bactopia.github.io/) - Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) - - -- [csvtk](https://bioinf.shenwei.me/csvtk/) - Shen, W [csvtk: A cross-platform, efficient and practical CSV/TSV toolkit in Golang.](https://github.com/shenwei356/csvtk/) (GitHub) - -- [ECTyper](https://github.com/phac-nml/ecoli_serotyping) - Laing C, Bessonov K, Sung S, La Rose C [ECTyper - In silico prediction of _Escherichia coli_ serotype](https://github.com/phac-nml/ecoli_serotyping) (GitHub) diff --git a/docs/bactopia-tools/eggnog.md b/docs/bactopia-tools/eggnog.md deleted file mode 100644 index 83e9a128..00000000 --- a/docs/bactopia-tools/eggnog.md +++ /dev/null @@ -1,243 +0,0 @@ ---- -title: eggnog -description: A Bactopia Tool which uses eggNOG-mapper to assign functional annotation to protein sequences. ---- -# Bactopia Tool - `eggnog` -The `eggnog` module uses [eggNOG-mapper](https://github.com/eggnogdb/eggnog-mapper) to assign -functional annotation to protein sequences. eggNOG-mapper uses orthologous groups and phylogenies -from the eggNOG database to more precisely functionally annotate than traditional homology methods. - - -## Example Usage -``` -bactopia --wf eggnog \ - --bactopia /path/to/your/bactopia/results -``` - -## Output Overview - -Below is the default output structure for the `eggnog` tool. Where possible the -file descriptions below were modified from a tools description. - -```bash - -├── -│ └── tools -│ └── eggnog -│ ├── .emapper.annotations -│ ├── .emapper.hits -│ ├── .emapper.seed_orthologs -│ └── logs -│ ├── nf-eggnog.{begin,err,log,out,run,sh,trace} -│ └── versions.yml -└── bactopia-runs - └── eggnog- - └── nf-reports - ├── eggnog-dag.dot - ├── eggnog-report.html - ├── eggnog-timeline.html - └── eggnog-trace.txt - -``` - - - -### Results - -#### eggNOG-mapper - -Below is a description of the _per-sample_ results from [eggNOG-mapper](https://github.com/eggnogdb/eggnog-mapper). -For full details about each of the eggNOG output files, see -[eggNOG-mapper - Outputs](https://github.com/eggnogdb/eggnog-mapper/wiki/eggNOG-mapper-v2.1.5-to-v2.1.7#Output_format). - - -| Filename | Description | -|-------------------------------|-------------| -| <SAMPLE_NAME>.emapper.annotations | Results from the annotation phase | -| <SAMPLE_NAME>.emapper.hits | Results from the search phase, from HMMER, Diamond or MMseqs2 | -| <SAMPLE_NAME>.emapper.seed_orthologs | Results from parsing the hits | -| <SAMPLE_NAME>.emapper.annotations.xlsx | (Optional) Annotations in .xlsx format | -| <SAMPLE_NAME>.emapper.orthologs | (Optional) List of orthologs found for each query | -| <SAMPLE_NAME>.emapper.genepred.fasta | (Optional) Sequences of predicted CDS | -| <SAMPLE_NAME>.emapper.gff | (Optional) GFF of predicted CDS | -| <SAMPLE_NAME>.emapper.no_annotations.fasta | (Optional) Sequences without annotation | -| <SAMPLE_NAME>.emapper.pfam | (Optional) Positions of the PFAM domains identified | - - - - - -### Audit Trail - -Below are files that can assist you in understanding which parameters and program versions were used. - -#### Logs - -Each process that is executed will have a folder named `logs`. In this folder are helpful -files for you to review if the need ever arises. - -| Extension | Description | -|--------------|-------------| -| .begin | An empty file used to designate the process started | -| .err | Contains STDERR outputs from the process | -| .log | Contains both STDERR and STDOUT outputs from the process | -| .out | Contains STDOUT outputs from the process | -| .run | The script Nextflow uses to stage/unstage files and queue processes based on given profile | -| .sh | The script executed by bash for the process | -| .trace | The Nextflow [Trace](https://www.nextflow.io/docs/latest/tracing.html#trace-report) report for the process | -| versions.yml | A YAML formatted file with program versions | - -#### Nextflow Reports - -These Nextflow reports provide great a great summary of your run. These can be used to optimize -resource usage and estimate expected costs if using cloud platforms. - -| Filename | Description | -|----------|-------------| -| eggnog-dag.dot | The Nextflow [DAG visualisation](https://www.nextflow.io/docs/latest/tracing.html#dag-visualisation) | -| eggnog-report.html | The Nextflow [Execution Report](https://www.nextflow.io/docs/latest/tracing.html#execution-report) | -| eggnog-timeline.html | The Nextflow [Timeline Report](https://www.nextflow.io/docs/latest/tracing.html#timeline-report) | -| eggnog-trace.txt | The Nextflow [Trace](https://www.nextflow.io/docs/latest/tracing.html#trace-report) report | - - -#### Program Versions - -At the end of each run, each of the `versions.yml` files are merged into the files below. - -| Filename | Description | -|---------------------------|-------------| -| software_versions.yml | A complete list of programs and versions used by each process | -| software_versions_mqc.yml | A complete list of programs and versions formatted for [MultiQC](https://multiqc.info/) | - -## Parameters - - -### Required Parameters -Define where the pipeline should find input data and save output data. - -| Parameter | Description | -|:---|---| -| ` --bactopia` | The path to bactopia results to use as inputs
**Type:** `string` | - -### Filtering Parameters -Use these parameters to specify which samples to include or exclude. - -| Parameter | Description | -|:---|---| -| ` --include` | A text file containing sample names (one per line) to include from the analysis
**Type:** `string` | -| ` --exclude` | A text file containing sample names (one per line) to exclude from the analysis
**Type:** `string` | - - -### eggNOG Downloader Parameters - - -| Parameter | Description | -|:---|---| -| ` --eggnog_db` | Tarball or path to eggNOG databases
**Type:** `string` | -| ` --download_eggnog` | Required if downloading latest eggNOG database, will overwrite existing databases.
**Type:** `boolean` | -| ` --eggnog_save_as_tarball` | Save the eggNOG database as a single tarball
**Type:** `string` | -| ` --skip_diamond` | Do not install the diamond database
**Type:** `boolean` | -| ` --install_mmseq` | Install the MMseqs2 database
**Type:** `boolean` | -| ` --install_pfam` | Install the Pfam database, required for de novo annotation or realignment
**Type:** `boolean` | -| ` --install_hmm` | Install the HMMER database specified with --hmmer_taxid
**Type:** `boolean` | -| ` --hmmer_taxid` | Tax ID of eggNOG HMM database to download
**Type:** `integer`, **Default:** `2` | - -### eggNOG Mapper Parameters - - -| Parameter | Description | -|:---|---| -| ` --genepred` | Method to use for gene prediction
**Type:** `string`, **Default:** `search` | -| ` --mode` | Method to search against eggNOG sequences
**Type:** `string`, **Default:** `diamond` | -| ` --eggnog_opts` | Extra eggNOG Mapper options in quotes
**Type:** `string` | - - -### Optional Parameters -These optional parameters can be useful in certain settings. - -| Parameter | Description | -|:---|---| -| ` --outdir` | Base directory to write results to
**Type:** `string`, **Default:** `bactopia` | -| ` --skip_compression` | Ouput files will not be compressed
**Type:** `boolean` | -| ` --datasets` | The path to cache datasets to
**Type:** `string` | -| ` --keep_all_files` | Keeps all analysis files created
**Type:** `boolean` | - -### Max Job Request Parameters -Set the top limit for requested resources for any single job. - -| Parameter | Description | -|:---|---| -| ` --max_retry` | Maximum times to retry a process before allowing it to fail.
**Type:** `integer`, **Default:** `3` | -| ` --max_cpus` | Maximum number of CPUs that can be requested for any single job.
**Type:** `integer`, **Default:** `4` | -| ` --max_memory` | Maximum amount of memory that can be requested for any single job.
**Type:** `string`, **Default:** `128.GB` | -| ` --max_time` | Maximum amount of time that can be requested for any single job.
**Type:** `string`, **Default:** `240.h` | -| ` --max_downloads` | Maximum number of samples to download at a time
**Type:** `integer`, **Default:** `3` | - -### Nextflow Configuration Parameters -Parameters to fine-tune your Nextflow setup. - -| Parameter | Description | -|:---|---| -| ` --nfconfig` | A Nextflow compatible config file for custom profiles, loaded last and will overwrite existing variables if set.
**Type:** `string` | -| ` --publish_dir_mode` | Method used to save pipeline results to output directory.
**Type:** `string`, **Default:** `copy` | -| ` --infodir` | Directory to keep pipeline Nextflow logs and reports.
**Type:** `string`, **Default:** `${params.outdir}/pipeline_info` | -| ` --force` | Nextflow will overwrite existing output files.
**Type:** `boolean` | -| ` --cleanup_workdir` | After Bactopia is successfully executed, the `work` directory will be deleted.
**Type:** `boolean` | - -### Institutional config options -Parameters used to describe centralized config profiles. These should not be edited. - -| Parameter | Description | -|:---|---| -| ` --custom_config_version` | Git commit id for Institutional configs.
**Type:** `string`, **Default:** `master` | -| ` --custom_config_base` | Base directory for Institutional configs.
**Type:** `string`, **Default:** `https://raw.githubusercontent.com/nf-core/configs/master` | -| ` --config_profile_name` | Institutional config name.
**Type:** `string` | -| ` --config_profile_description` | Institutional config description.
**Type:** `string` | -| ` --config_profile_contact` | Institutional config contact information.
**Type:** `string` | -| ` --config_profile_url` | Institutional config URL link.
**Type:** `string` | - -### Nextflow Profile Parameters -Parameters to fine-tune your Nextflow setup. - -| Parameter | Description | -|:---|---| -| ` --condadir` | Directory to Nextflow should use for Conda environments
**Type:** `string` | -| ` --registry` | Docker registry to pull containers from.
**Type:** `string`, **Default:** `dockerhub` | -| ` --datasets_cache` | Directory where downloaded datasets should be stored.
**Type:** `string`, **Default:** `/data/datasets` | -| ` --singularity_cache_dir` | Directory where remote Singularity images are stored.
**Type:** `string` | -| ` --singularity_pull_docker_container` | Instead of directly downloading Singularity images for use with Singularity, force the workflow to pull and convert Docker containers instead.
**Type:** `boolean` | -| ` --force_rebuild` | Force overwrite of existing pre-built environments.
**Type:** `boolean` | -| ` --queue` | Comma-separated name of the queue(s) to be used by a job scheduler (e.g. AWS Batch or SLURM)
**Type:** `string`, **Default:** `general,high-memory` | -| ` --cluster_opts` | Additional options to pass to the executor. (e.g. SLURM: '--account=my_acct_name'
**Type:** `string` | -| ` --container_opts` | Additional options to pass to Apptainer, Docker, or Singularityu. (e.g. Singularity: '-D `pwd`'
**Type:** `string` | -| ` --disable_scratch` | All intermediate files created on worker nodes of will be transferred to the head node.
**Type:** `boolean` | - -### Helpful Parameters -Uncommonly used parameters that might be useful. - -| Parameter | Description | -|:---|---| -| ` --monochrome_logs` | Do not use coloured log outputs.
**Type:** `boolean` | -| ` --nfdir` | Print directory Nextflow has pulled Bactopia to
**Type:** `boolean` | -| ` --sleep_time` | The amount of time (seconds) Nextflow will wait after setting up datasets before execution.
**Type:** `integer`, **Default:** `5` | -| ` --validate_params` | Boolean whether to validate parameters against the schema at runtime
**Type:** `boolean`, **Default:** `True` | -| ` --help` | Display help text.
**Type:** `boolean` | -| ` --wf` | Specify which workflow or Bactopia Tool to execute
**Type:** `string`, **Default:** `bactopia` | -| ` --list_wfs` | List the available workflows and Bactopia Tools to use with '--wf'
**Type:** `boolean` | -| ` --show_hidden_params` | Show all params when using `--help`
**Type:** `boolean` | -| ` --help_all` | An alias for --help --show_hidden_params
**Type:** `boolean` | -| ` --version` | Display version text.
**Type:** `boolean` | - -## Citations -If you use Bactopia and `eggnog` in your analysis, please cite the following. - -- [Bactopia](https://bactopia.github.io/) - Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) - - -- [eggNOG 5.0 Database](http://eggnog.embl.de/) - Huerta-Cepas J, Szklarczyk D, Heller D, Hernández-Plaza A, Forslund SK, Cook H, Mende DR, Letunic I, Rattei T, Jensen LJ, von Mering C, Bork P [eggNOG 5.0: a hierarchical, functionally and phylogenetically annotated orthology resource based on 5090 organisms and 2502 viruses.](https://doi.org/10.1093/nar/gky1085) _Nucleic Acids Res._ 47, D309–D314 (2019) - -- [eggNOG-mapper](https://github.com/eggnogdb/eggnog-mapper) - Huerta-Cepas J, Forslund K, Coelho LP, Szklarczyk D, Jensen LJ, von Mering C, Bork P [Fast Genome-Wide Functional Annotation through Orthology Assignment by eggNOG-Mapper.](http://dx.doi.org/10.1093/molbev/msx148) _Mol. Biol. Evol._ 34, 2115–2122 (2017) - diff --git a/docs/bactopia-tools/emmtyper.md b/docs/bactopia-tools/emmtyper.md deleted file mode 100644 index 2ec34e34..00000000 --- a/docs/bactopia-tools/emmtyper.md +++ /dev/null @@ -1,247 +0,0 @@ ---- -title: emmtyper -description: A Bactopia Tool which uses emmtyper for emm-typing of _Streptococcus pyogenes_ using a de novo or complete assembly. ---- -# Bactopia Tool - `emmtyper` -The `emmtyper` module uses [emmtyper](https://github.com/MDU-PHL/emmtyper) for -emm-typing of _Streptococcus pyogenes_ using a de novo or complete assembly. - - -## Example Usage -``` -bactopia --wf emmtyper \ - --bactopia /path/to/your/bactopia/results -``` - -## Output Overview - -Below is the default output structure for the `emmtyper` tool. Where possible the -file descriptions below were modified from a tools description. - -```bash - -├── -│ └── tools -│ └── emmtyper -│ ├── .tsv -│ └── logs -│ ├── nf-emmtyper.{begin,err,log,out,run,sh,trace} -│ └── versions.yml -└── bactopia-runs - └── emmtyper- - ├── merged-results - │ ├── emmtyper.tsv - │ └── logs - │ └── emmtyper-concat - │ ├── nf-merged-results.{begin,err,log,out,run,sh,trace} - │ └── versions.yml - └── nf-reports - ├── emmtyper-dag.dot - ├── emmtyper-report.html - ├── emmtyper-timeline.html - └── emmtyper-trace.txt - -``` - -:::info[Directory structure might be different] - -`emmtyper` is available as a standalone Bactopia Tool, as well as from -the main Bactopia workflow (e.g. through Staphopia or Merlin). If executed -from Bactopia, the `emmtyper` directory structure might be different, but the -output descriptions below still apply. -::: - - - -### Results - -#### Merged Results - -Below are results that are concatenated into a single file. - - -| Filename | Description | -|-------------------------------|-------------| -| emmtyper.tsv | A merged TSV file with `emmtyper` results from all samples | - - -#### emmtyper - -Below is a description of the _per-sample_ results from [emmtyper](https://github.com/MDU-PHL/emmtyper). - - -| Filename | Description | -|-------------------------------|-------------| -| <SAMPLE_NAME>.tsv | A tab-delimited file with `emmtyper` result, see [emmtyper - Result format](https://github.com/MDU-PHL/emmtyper#result-format) for details | - - - - - -### Audit Trail - -Below are files that can assist you in understanding which parameters and program versions were used. - -#### Logs - -Each process that is executed will have a folder named `logs`. In this folder are helpful -files for you to review if the need ever arises. - -| Extension | Description | -|--------------|-------------| -| .begin | An empty file used to designate the process started | -| .err | Contains STDERR outputs from the process | -| .log | Contains both STDERR and STDOUT outputs from the process | -| .out | Contains STDOUT outputs from the process | -| .run | The script Nextflow uses to stage/unstage files and queue processes based on given profile | -| .sh | The script executed by bash for the process | -| .trace | The Nextflow [Trace](https://www.nextflow.io/docs/latest/tracing.html#trace-report) report for the process | -| versions.yml | A YAML formatted file with program versions | - -#### Nextflow Reports - -These Nextflow reports provide great a great summary of your run. These can be used to optimize -resource usage and estimate expected costs if using cloud platforms. - -| Filename | Description | -|----------|-------------| -| emmtyper-dag.dot | The Nextflow [DAG visualisation](https://www.nextflow.io/docs/latest/tracing.html#dag-visualisation) | -| emmtyper-report.html | The Nextflow [Execution Report](https://www.nextflow.io/docs/latest/tracing.html#execution-report) | -| emmtyper-timeline.html | The Nextflow [Timeline Report](https://www.nextflow.io/docs/latest/tracing.html#timeline-report) | -| emmtyper-trace.txt | The Nextflow [Trace](https://www.nextflow.io/docs/latest/tracing.html#trace-report) report | - - -#### Program Versions - -At the end of each run, each of the `versions.yml` files are merged into the files below. - -| Filename | Description | -|---------------------------|-------------| -| software_versions.yml | A complete list of programs and versions used by each process | -| software_versions_mqc.yml | A complete list of programs and versions formatted for [MultiQC](https://multiqc.info/) | - -## Parameters - - -### Required Parameters -Define where the pipeline should find input data and save output data. - -| Parameter | Description | -|:---|---| -| ` --bactopia` | The path to bactopia results to use as inputs
**Type:** `string` | - -### Filtering Parameters -Use these parameters to specify which samples to include or exclude. - -| Parameter | Description | -|:---|---| -| ` --include` | A text file containing sample names (one per line) to include from the analysis
**Type:** `string` | -| ` --exclude` | A text file containing sample names (one per line) to exclude from the analysis
**Type:** `string` | - - -### emmtyper Parameters - - -| Parameter | Description | -|:---|---| -| ` --emmtyper_wf` | Workflow for emmtyper to use.
**Type:** `string`, **Default:** `blast` | -| ` --cluster_distance` | Distance between cluster of matches to consider as different clusters
**Type:** `integer`, **Default:** `500` | -| ` --percid` | Minimal percent identity of sequence
**Type:** `integer`, **Default:** `95` | -| ` --culling_limit` | Total hits to return in a position
**Type:** `integer`, **Default:** `5` | -| ` --mismatch` | Threshold for number of mismatch to allow in BLAST hit
**Type:** `integer`, **Default:** `5` | -| ` --align_diff` | Threshold for difference between alignment length and subject length in BLAST
**Type:** `integer`, **Default:** `5` | -| ` --gap` | Threshold gap to allow in BLAST hit
**Type:** `integer`, **Default:** `2` | -| ` --min_perfect` | Minimum size of perfect match at 3 primer end
**Type:** `integer`, **Default:** `15` | -| ` --min_good` | Minimum size where there must be 2 matches for each mismatch
**Type:** `integer`, **Default:** `15` | -| ` --max_size` | Maximum size of PCR product
**Type:** `integer`, **Default:** `2000` | - - -### Optional Parameters -These optional parameters can be useful in certain settings. - -| Parameter | Description | -|:---|---| -| ` --outdir` | Base directory to write results to
**Type:** `string`, **Default:** `bactopia` | -| ` --skip_compression` | Ouput files will not be compressed
**Type:** `boolean` | -| ` --datasets` | The path to cache datasets to
**Type:** `string` | -| ` --keep_all_files` | Keeps all analysis files created
**Type:** `boolean` | - -### Max Job Request Parameters -Set the top limit for requested resources for any single job. - -| Parameter | Description | -|:---|---| -| ` --max_retry` | Maximum times to retry a process before allowing it to fail.
**Type:** `integer`, **Default:** `3` | -| ` --max_cpus` | Maximum number of CPUs that can be requested for any single job.
**Type:** `integer`, **Default:** `4` | -| ` --max_memory` | Maximum amount of memory that can be requested for any single job.
**Type:** `string`, **Default:** `128.GB` | -| ` --max_time` | Maximum amount of time that can be requested for any single job.
**Type:** `string`, **Default:** `240.h` | -| ` --max_downloads` | Maximum number of samples to download at a time
**Type:** `integer`, **Default:** `3` | - -### Nextflow Configuration Parameters -Parameters to fine-tune your Nextflow setup. - -| Parameter | Description | -|:---|---| -| ` --nfconfig` | A Nextflow compatible config file for custom profiles, loaded last and will overwrite existing variables if set.
**Type:** `string` | -| ` --publish_dir_mode` | Method used to save pipeline results to output directory.
**Type:** `string`, **Default:** `copy` | -| ` --infodir` | Directory to keep pipeline Nextflow logs and reports.
**Type:** `string`, **Default:** `${params.outdir}/pipeline_info` | -| ` --force` | Nextflow will overwrite existing output files.
**Type:** `boolean` | -| ` --cleanup_workdir` | After Bactopia is successfully executed, the `work` directory will be deleted.
**Type:** `boolean` | - -### Institutional config options -Parameters used to describe centralized config profiles. These should not be edited. - -| Parameter | Description | -|:---|---| -| ` --custom_config_version` | Git commit id for Institutional configs.
**Type:** `string`, **Default:** `master` | -| ` --custom_config_base` | Base directory for Institutional configs.
**Type:** `string`, **Default:** `https://raw.githubusercontent.com/nf-core/configs/master` | -| ` --config_profile_name` | Institutional config name.
**Type:** `string` | -| ` --config_profile_description` | Institutional config description.
**Type:** `string` | -| ` --config_profile_contact` | Institutional config contact information.
**Type:** `string` | -| ` --config_profile_url` | Institutional config URL link.
**Type:** `string` | - -### Nextflow Profile Parameters -Parameters to fine-tune your Nextflow setup. - -| Parameter | Description | -|:---|---| -| ` --condadir` | Directory to Nextflow should use for Conda environments
**Type:** `string` | -| ` --registry` | Docker registry to pull containers from.
**Type:** `string`, **Default:** `dockerhub` | -| ` --datasets_cache` | Directory where downloaded datasets should be stored.
**Type:** `string`, **Default:** `/data/datasets` | -| ` --singularity_cache_dir` | Directory where remote Singularity images are stored.
**Type:** `string` | -| ` --singularity_pull_docker_container` | Instead of directly downloading Singularity images for use with Singularity, force the workflow to pull and convert Docker containers instead.
**Type:** `boolean` | -| ` --force_rebuild` | Force overwrite of existing pre-built environments.
**Type:** `boolean` | -| ` --queue` | Comma-separated name of the queue(s) to be used by a job scheduler (e.g. AWS Batch or SLURM)
**Type:** `string`, **Default:** `general,high-memory` | -| ` --cluster_opts` | Additional options to pass to the executor. (e.g. SLURM: '--account=my_acct_name'
**Type:** `string` | -| ` --container_opts` | Additional options to pass to Apptainer, Docker, or Singularityu. (e.g. Singularity: '-D `pwd`'
**Type:** `string` | -| ` --disable_scratch` | All intermediate files created on worker nodes of will be transferred to the head node.
**Type:** `boolean` | - -### Helpful Parameters -Uncommonly used parameters that might be useful. - -| Parameter | Description | -|:---|---| -| ` --monochrome_logs` | Do not use coloured log outputs.
**Type:** `boolean` | -| ` --nfdir` | Print directory Nextflow has pulled Bactopia to
**Type:** `boolean` | -| ` --sleep_time` | The amount of time (seconds) Nextflow will wait after setting up datasets before execution.
**Type:** `integer`, **Default:** `5` | -| ` --validate_params` | Boolean whether to validate parameters against the schema at runtime
**Type:** `boolean`, **Default:** `True` | -| ` --help` | Display help text.
**Type:** `boolean` | -| ` --wf` | Specify which workflow or Bactopia Tool to execute
**Type:** `string`, **Default:** `bactopia` | -| ` --list_wfs` | List the available workflows and Bactopia Tools to use with '--wf'
**Type:** `boolean` | -| ` --show_hidden_params` | Show all params when using `--help`
**Type:** `boolean` | -| ` --help_all` | An alias for --help --show_hidden_params
**Type:** `boolean` | -| ` --version` | Display version text.
**Type:** `boolean` | - -## Citations -If you use Bactopia and `emmtyper` in your analysis, please cite the following. - -- [Bactopia](https://bactopia.github.io/) - Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) - - -- [csvtk](https://bioinf.shenwei.me/csvtk/) - Shen, W [csvtk: A cross-platform, efficient and practical CSV/TSV toolkit in Golang.](https://github.com/shenwei356/csvtk/) (GitHub) - -- [emmtyper](https://github.com/MDU-PHL/emmtyper) - Tan A, Seemann T, Lacey D, Davies M, Mcintyre L, Frost H, Williamson D, Gonçalves da Silva A [emmtyper - emm Automatic Isolate Labeller](https://github.com/MDU-PHL/emmtyper) (GitHub) - diff --git a/docs/bactopia-tools/fastani.md b/docs/bactopia-tools/fastani.md deleted file mode 100644 index ec1eebdb..00000000 --- a/docs/bactopia-tools/fastani.md +++ /dev/null @@ -1,240 +0,0 @@ ---- -title: fastani -description: A Bactopia Tool which uses FastANI to calculate the average nucleotide identity (ANI) between your samples. ---- -# Bactopia Tool - `fastani` -The `fastani` module uses [FastANI](https://github.com/ParBLiSS/FastANI) to calculate the average -nucleotide identity (ANI) between your samples. - -Although, sometimes you might be more interested in calculating the ANI of your samples against -a reference genome. Fortunately, using [ncbi-genome-download](https://github.com/kblin/ncbi-genome-download), -the `fastani` module allows you specify either a specific NCBI Assembly RefSeq accession (`--accession`) -or a species name (`--species`) for which to download all RefSeq genomes. - - -## Example Usage -``` -bactopia --wf fastani \ - --bactopia /path/to/your/bactopia/results -``` - -## Output Overview - -Below is the default output structure for the `fastani` tool. Where possible the -file descriptions below were modified from a tools description. - -```bash - -└── bactopia-runs - └── fastani- - ├── fastani - │ ├── .tsv - │ └── logs - │ └── - │ ├── nf-fastani.{begin,err,log,out,run,sh,trace} - │ └── versions.yml - ├── merged-results - │ ├── fastani.tsv - │ └── logs - │ └── fastani-concat - │ ├── nf-merged-results.{begin,err,log,out,run,sh,trace} - │ └── versions.yml - └── nf-reports - ├── fastani-dag.dot - ├── fastani-report.html - ├── fastani-timeline.html - └── fastani-trace.txt - -``` - - - -### Results - -#### Merged Results - -Below are results that are concatenated into a single file. - - -| Filename | Description | -|-------------------------------|-------------| -| fastani.tsv | A merged TSV file with `FastANI` results from all samples | - - -#### FastANI - -Below is a description of the _per-sample_ results from [FastANI](https://github.com/ParBLiSS/FastANI). - - -| Filename | Description | -|-------------------------------|-------------| -| <SAMPLE_NAME>.tsv | FastANI results of all samples against a single samples | - - - - - -### Audit Trail - -Below are files that can assist you in understanding which parameters and program versions were used. - -#### Logs - -Each process that is executed will have a folder named `logs`. In this folder are helpful -files for you to review if the need ever arises. - -| Extension | Description | -|--------------|-------------| -| .begin | An empty file used to designate the process started | -| .err | Contains STDERR outputs from the process | -| .log | Contains both STDERR and STDOUT outputs from the process | -| .out | Contains STDOUT outputs from the process | -| .run | The script Nextflow uses to stage/unstage files and queue processes based on given profile | -| .sh | The script executed by bash for the process | -| .trace | The Nextflow [Trace](https://www.nextflow.io/docs/latest/tracing.html#trace-report) report for the process | -| versions.yml | A YAML formatted file with program versions | - -#### Nextflow Reports - -These Nextflow reports provide great a great summary of your run. These can be used to optimize -resource usage and estimate expected costs if using cloud platforms. - -| Filename | Description | -|----------|-------------| -| fastani-dag.dot | The Nextflow [DAG visualisation](https://www.nextflow.io/docs/latest/tracing.html#dag-visualisation) | -| fastani-report.html | The Nextflow [Execution Report](https://www.nextflow.io/docs/latest/tracing.html#execution-report) | -| fastani-timeline.html | The Nextflow [Timeline Report](https://www.nextflow.io/docs/latest/tracing.html#timeline-report) | -| fastani-trace.txt | The Nextflow [Trace](https://www.nextflow.io/docs/latest/tracing.html#trace-report) report | - - -#### Program Versions - -At the end of each run, each of the `versions.yml` files are merged into the files below. - -| Filename | Description | -|---------------------------|-------------| -| software_versions.yml | A complete list of programs and versions used by each process | -| software_versions_mqc.yml | A complete list of programs and versions formatted for [MultiQC](https://multiqc.info/) | - -## Parameters - - -### Required Parameters -Define where the pipeline should find input data and save output data. - -| Parameter | Description | -|:---|---| -| ` --bactopia` | The path to bactopia results to use as inputs
**Type:** `string` | - -### Filtering Parameters -Use these parameters to specify which samples to include or exclude. - -| Parameter | Description | -|:---|---| -| ` --include` | A text file containing sample names (one per line) to include from the analysis
**Type:** `string` | -| ` --exclude` | A text file containing sample names (one per line) to exclude from the analysis
**Type:** `string` | - - -### fastANI Parameters - - -| Parameter | Description | -|:---|---| -| ` --kmer` | kmer size (<= 16) for ANI calculation
**Type:** `integer`, **Default:** `16` | -| ` --min_fraction` | Minimum fraction of genome that must be shared for trusting ANI.
**Type:** `number`, **Default:** `0.2` | -| ` --frag_len` | fragment length
**Type:** `integer`, **Default:** `3000` | -| ` --skip_pairwise` | Only use RefSeq or local assemblies for ANI calculations
**Type:** `boolean` | - - -### Optional Parameters -These optional parameters can be useful in certain settings. - -| Parameter | Description | -|:---|---| -| ` --outdir` | Base directory to write results to
**Type:** `string`, **Default:** `bactopia` | -| ` --skip_compression` | Ouput files will not be compressed
**Type:** `boolean` | -| ` --datasets` | The path to cache datasets to
**Type:** `string` | -| ` --keep_all_files` | Keeps all analysis files created
**Type:** `boolean` | - -### Max Job Request Parameters -Set the top limit for requested resources for any single job. - -| Parameter | Description | -|:---|---| -| ` --max_retry` | Maximum times to retry a process before allowing it to fail.
**Type:** `integer`, **Default:** `3` | -| ` --max_cpus` | Maximum number of CPUs that can be requested for any single job.
**Type:** `integer`, **Default:** `4` | -| ` --max_memory` | Maximum amount of memory that can be requested for any single job.
**Type:** `string`, **Default:** `128.GB` | -| ` --max_time` | Maximum amount of time that can be requested for any single job.
**Type:** `string`, **Default:** `240.h` | -| ` --max_downloads` | Maximum number of samples to download at a time
**Type:** `integer`, **Default:** `3` | - -### Nextflow Configuration Parameters -Parameters to fine-tune your Nextflow setup. - -| Parameter | Description | -|:---|---| -| ` --nfconfig` | A Nextflow compatible config file for custom profiles, loaded last and will overwrite existing variables if set.
**Type:** `string` | -| ` --publish_dir_mode` | Method used to save pipeline results to output directory.
**Type:** `string`, **Default:** `copy` | -| ` --infodir` | Directory to keep pipeline Nextflow logs and reports.
**Type:** `string`, **Default:** `${params.outdir}/pipeline_info` | -| ` --force` | Nextflow will overwrite existing output files.
**Type:** `boolean` | -| ` --cleanup_workdir` | After Bactopia is successfully executed, the `work` directory will be deleted.
**Type:** `boolean` | - -### Institutional config options -Parameters used to describe centralized config profiles. These should not be edited. - -| Parameter | Description | -|:---|---| -| ` --custom_config_version` | Git commit id for Institutional configs.
**Type:** `string`, **Default:** `master` | -| ` --custom_config_base` | Base directory for Institutional configs.
**Type:** `string`, **Default:** `https://raw.githubusercontent.com/nf-core/configs/master` | -| ` --config_profile_name` | Institutional config name.
**Type:** `string` | -| ` --config_profile_description` | Institutional config description.
**Type:** `string` | -| ` --config_profile_contact` | Institutional config contact information.
**Type:** `string` | -| ` --config_profile_url` | Institutional config URL link.
**Type:** `string` | - -### Nextflow Profile Parameters -Parameters to fine-tune your Nextflow setup. - -| Parameter | Description | -|:---|---| -| ` --condadir` | Directory to Nextflow should use for Conda environments
**Type:** `string` | -| ` --registry` | Docker registry to pull containers from.
**Type:** `string`, **Default:** `dockerhub` | -| ` --datasets_cache` | Directory where downloaded datasets should be stored.
**Type:** `string`, **Default:** `/data/datasets` | -| ` --singularity_cache_dir` | Directory where remote Singularity images are stored.
**Type:** `string` | -| ` --singularity_pull_docker_container` | Instead of directly downloading Singularity images for use with Singularity, force the workflow to pull and convert Docker containers instead.
**Type:** `boolean` | -| ` --force_rebuild` | Force overwrite of existing pre-built environments.
**Type:** `boolean` | -| ` --queue` | Comma-separated name of the queue(s) to be used by a job scheduler (e.g. AWS Batch or SLURM)
**Type:** `string`, **Default:** `general,high-memory` | -| ` --cluster_opts` | Additional options to pass to the executor. (e.g. SLURM: '--account=my_acct_name'
**Type:** `string` | -| ` --container_opts` | Additional options to pass to Apptainer, Docker, or Singularityu. (e.g. Singularity: '-D `pwd`'
**Type:** `string` | -| ` --disable_scratch` | All intermediate files created on worker nodes of will be transferred to the head node.
**Type:** `boolean` | - -### Helpful Parameters -Uncommonly used parameters that might be useful. - -| Parameter | Description | -|:---|---| -| ` --monochrome_logs` | Do not use coloured log outputs.
**Type:** `boolean` | -| ` --nfdir` | Print directory Nextflow has pulled Bactopia to
**Type:** `boolean` | -| ` --sleep_time` | The amount of time (seconds) Nextflow will wait after setting up datasets before execution.
**Type:** `integer`, **Default:** `5` | -| ` --validate_params` | Boolean whether to validate parameters against the schema at runtime
**Type:** `boolean`, **Default:** `True` | -| ` --help` | Display help text.
**Type:** `boolean` | -| ` --wf` | Specify which workflow or Bactopia Tool to execute
**Type:** `string`, **Default:** `bactopia` | -| ` --list_wfs` | List the available workflows and Bactopia Tools to use with '--wf'
**Type:** `boolean` | -| ` --show_hidden_params` | Show all params when using `--help`
**Type:** `boolean` | -| ` --help_all` | An alias for --help --show_hidden_params
**Type:** `boolean` | -| ` --version` | Display version text.
**Type:** `boolean` | - -## Citations -If you use Bactopia and `fastani` in your analysis, please cite the following. - -- [Bactopia](https://bactopia.github.io/) - Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) - - -- [csvtk](https://bioinf.shenwei.me/csvtk/) - Shen, W [csvtk: A cross-platform, efficient and practical CSV/TSV toolkit in Golang.](https://github.com/shenwei356/csvtk/) (GitHub) - -- [FastANI](https://github.com/ParBLiSS/FastANI) - Jain C, Rodriguez-R LM, Phillippy AM, Konstantinidis KT, Aluru S [High throughput ANI analysis of 90K prokaryotic genomes reveals clear species boundaries.](http://dx.doi.org/10.1038/s41467-018-07641-9) _Nat. Commun._ 9, 5114 (2018) - -- [ncbi-genome-download](https://github.com/kblin/ncbi-genome-download) - Blin K [ncbi-genome-download: Scripts to download genomes from the NCBI FTP servers](https://github.com/kblin/ncbi-genome-download) (GitHub) - diff --git a/docs/bactopia-tools/gamma.md b/docs/bactopia-tools/gamma.md deleted file mode 100644 index 26cb2e8d..00000000 --- a/docs/bactopia-tools/gamma.md +++ /dev/null @@ -1,237 +0,0 @@ ---- -title: gamma -description: A Bactopia Tool which uses GAMMA to identify, classify, and annotate translated gene matches from assemblies. ---- -# Bactopia Tool - `gamma` -The `gamma` module uses [GAMMA](https://github.com/rastanton/GAMMA) to identify, classify, and annotate -translated gene matches from assemblies. - - -## Example Usage -``` -bactopia --wf gamma \ - --bactopia /path/to/your/bactopia/results -``` - -## Output Overview - -Below is the default output structure for the `gamma` tool. Where possible the -file descriptions below were modified from a tools description. - -```bash - -├── -│ └── tools -│ └── gamma -│ └── gamma -│ ├── .gamma -│ ├── .psl -│ └── logs -│ ├── nf-gamma.{begin,err,log,out,run,sh,trace} -│ └── versions.yml -└── bactopia-runs - └── gamma- - ├── merged-results - │ ├── gamma.tsv - │ └── logs - │ └── gamma-concat - │ ├── nf-merged-results.{begin,err,log,out,run,sh,trace} - │ └── versions.yml - └── nf-reports - ├── gamma-dag.dot - ├── gamma-report.html - ├── gamma-timeline.html - └── gamma-trace.txt - -``` - - - -### Results - -#### Merged Results - -Below are results that are concatenated into a single file. - - -| Filename | Description | -|-------------------------------|-------------| -| gamma.tsv | A merged TSV file with `GAMMA` results from all samples | - - -#### GAMMA - -Below is a description of the _per-sample_ results from [GAMMA](https://github.com/rastanton/GAMMA). - - -| Filename | Description | -|-------------------------------|-------------| -| <SAMPLE_NAME>.fasta | Annotated gene sequences is FASTA format | -| <SAMPLE_NAME>.gamma | A TSV file with annotated gene matches | -| <SAMPLE_NAME>.gff | Annotated gene matches is GFF3 format | -| <SAMPLE_NAME>.psl | A PSL file with BLAT gene alignments | - - - - - -### Audit Trail - -Below are files that can assist you in understanding which parameters and program versions were used. - -#### Logs - -Each process that is executed will have a folder named `logs`. In this folder are helpful -files for you to review if the need ever arises. - -| Extension | Description | -|--------------|-------------| -| .begin | An empty file used to designate the process started | -| .err | Contains STDERR outputs from the process | -| .log | Contains both STDERR and STDOUT outputs from the process | -| .out | Contains STDOUT outputs from the process | -| .run | The script Nextflow uses to stage/unstage files and queue processes based on given profile | -| .sh | The script executed by bash for the process | -| .trace | The Nextflow [Trace](https://www.nextflow.io/docs/latest/tracing.html#trace-report) report for the process | -| versions.yml | A YAML formatted file with program versions | - -#### Nextflow Reports - -These Nextflow reports provide great a great summary of your run. These can be used to optimize -resource usage and estimate expected costs if using cloud platforms. - -| Filename | Description | -|----------|-------------| -| gamma-dag.dot | The Nextflow [DAG visualisation](https://www.nextflow.io/docs/latest/tracing.html#dag-visualisation) | -| gamma-report.html | The Nextflow [Execution Report](https://www.nextflow.io/docs/latest/tracing.html#execution-report) | -| gamma-timeline.html | The Nextflow [Timeline Report](https://www.nextflow.io/docs/latest/tracing.html#timeline-report) | -| gamma-trace.txt | The Nextflow [Trace](https://www.nextflow.io/docs/latest/tracing.html#trace-report) report | - - -#### Program Versions - -At the end of each run, each of the `versions.yml` files are merged into the files below. - -| Filename | Description | -|---------------------------|-------------| -| software_versions.yml | A complete list of programs and versions used by each process | -| software_versions_mqc.yml | A complete list of programs and versions formatted for [MultiQC](https://multiqc.info/) | - -## Parameters - - -### Required Parameters -Define where the pipeline should find input data and save output data. - -| Parameter | Description | -|:---|---| -| ` --bactopia` | The path to bactopia results to use as inputs
**Type:** `string` | - -### Filtering Parameters -Use these parameters to specify which samples to include or exclude. - -| Parameter | Description | -|:---|---| -| ` --include` | A text file containing sample names (one per line) to include from the analysis
**Type:** `string` | -| ` --exclude` | A text file containing sample names (one per line) to exclude from the analysis
**Type:** `string` | - - -### GAMMA Parameters - - -| Parameter | Description | -|:---|---| -| ` --gamma_db` | A gene database (FASTA) for GAMMA
**Type:** `string` | -| ` --percent_identity` | The minimum nucleotide sequence identiy % used by the Blat search
**Type:** `integer`, **Default:** `90` | -| ` --all_matches` | Include all gene matches, even overlaps
**Type:** `boolean` | -| ` --extended` | Writes out all protein mutations
**Type:** `boolean` | -| ` --write_fasta` | Write FASTA of gene matches
**Type:** `boolean` | -| ` --write_gff` | Write gene matches as GFF file
**Type:** `boolean` | - - -### Optional Parameters -These optional parameters can be useful in certain settings. - -| Parameter | Description | -|:---|---| -| ` --outdir` | Base directory to write results to
**Type:** `string`, **Default:** `bactopia` | -| ` --skip_compression` | Ouput files will not be compressed
**Type:** `boolean` | -| ` --datasets` | The path to cache datasets to
**Type:** `string` | -| ` --keep_all_files` | Keeps all analysis files created
**Type:** `boolean` | - -### Max Job Request Parameters -Set the top limit for requested resources for any single job. - -| Parameter | Description | -|:---|---| -| ` --max_retry` | Maximum times to retry a process before allowing it to fail.
**Type:** `integer`, **Default:** `3` | -| ` --max_cpus` | Maximum number of CPUs that can be requested for any single job.
**Type:** `integer`, **Default:** `4` | -| ` --max_memory` | Maximum amount of memory that can be requested for any single job.
**Type:** `string`, **Default:** `128.GB` | -| ` --max_time` | Maximum amount of time that can be requested for any single job.
**Type:** `string`, **Default:** `240.h` | -| ` --max_downloads` | Maximum number of samples to download at a time
**Type:** `integer`, **Default:** `3` | - -### Nextflow Configuration Parameters -Parameters to fine-tune your Nextflow setup. - -| Parameter | Description | -|:---|---| -| ` --nfconfig` | A Nextflow compatible config file for custom profiles, loaded last and will overwrite existing variables if set.
**Type:** `string` | -| ` --publish_dir_mode` | Method used to save pipeline results to output directory.
**Type:** `string`, **Default:** `copy` | -| ` --infodir` | Directory to keep pipeline Nextflow logs and reports.
**Type:** `string`, **Default:** `${params.outdir}/pipeline_info` | -| ` --force` | Nextflow will overwrite existing output files.
**Type:** `boolean` | -| ` --cleanup_workdir` | After Bactopia is successfully executed, the `work` directory will be deleted.
**Type:** `boolean` | - -### Institutional config options -Parameters used to describe centralized config profiles. These should not be edited. - -| Parameter | Description | -|:---|---| -| ` --custom_config_version` | Git commit id for Institutional configs.
**Type:** `string`, **Default:** `master` | -| ` --custom_config_base` | Base directory for Institutional configs.
**Type:** `string`, **Default:** `https://raw.githubusercontent.com/nf-core/configs/master` | -| ` --config_profile_name` | Institutional config name.
**Type:** `string` | -| ` --config_profile_description` | Institutional config description.
**Type:** `string` | -| ` --config_profile_contact` | Institutional config contact information.
**Type:** `string` | -| ` --config_profile_url` | Institutional config URL link.
**Type:** `string` | - -### Nextflow Profile Parameters -Parameters to fine-tune your Nextflow setup. - -| Parameter | Description | -|:---|---| -| ` --condadir` | Directory to Nextflow should use for Conda environments
**Type:** `string` | -| ` --registry` | Docker registry to pull containers from.
**Type:** `string`, **Default:** `dockerhub` | -| ` --datasets_cache` | Directory where downloaded datasets should be stored.
**Type:** `string`, **Default:** `/data/datasets` | -| ` --singularity_cache_dir` | Directory where remote Singularity images are stored.
**Type:** `string` | -| ` --singularity_pull_docker_container` | Instead of directly downloading Singularity images for use with Singularity, force the workflow to pull and convert Docker containers instead.
**Type:** `boolean` | -| ` --force_rebuild` | Force overwrite of existing pre-built environments.
**Type:** `boolean` | -| ` --queue` | Comma-separated name of the queue(s) to be used by a job scheduler (e.g. AWS Batch or SLURM)
**Type:** `string`, **Default:** `general,high-memory` | -| ` --cluster_opts` | Additional options to pass to the executor. (e.g. SLURM: '--account=my_acct_name'
**Type:** `string` | -| ` --container_opts` | Additional options to pass to Apptainer, Docker, or Singularityu. (e.g. Singularity: '-D `pwd`'
**Type:** `string` | -| ` --disable_scratch` | All intermediate files created on worker nodes of will be transferred to the head node.
**Type:** `boolean` | - -### Helpful Parameters -Uncommonly used parameters that might be useful. - -| Parameter | Description | -|:---|---| -| ` --monochrome_logs` | Do not use coloured log outputs.
**Type:** `boolean` | -| ` --nfdir` | Print directory Nextflow has pulled Bactopia to
**Type:** `boolean` | -| ` --sleep_time` | The amount of time (seconds) Nextflow will wait after setting up datasets before execution.
**Type:** `integer`, **Default:** `5` | -| ` --validate_params` | Boolean whether to validate parameters against the schema at runtime
**Type:** `boolean`, **Default:** `True` | -| ` --help` | Display help text.
**Type:** `boolean` | -| ` --wf` | Specify which workflow or Bactopia Tool to execute
**Type:** `string`, **Default:** `bactopia` | -| ` --list_wfs` | List the available workflows and Bactopia Tools to use with '--wf'
**Type:** `boolean` | -| ` --show_hidden_params` | Show all params when using `--help`
**Type:** `boolean` | -| ` --help_all` | An alias for --help --show_hidden_params
**Type:** `boolean` | -| ` --version` | Display version text.
**Type:** `boolean` | - -## Citations -If you use Bactopia and `gamma` in your analysis, please cite the following. - -- [Bactopia](https://bactopia.github.io/) - Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) - - -- [GAMMA](https://github.com/rastanton/GAMMA) - Stanton RA, Vlachos N, Halpin AL [GAMMA: a tool for the rapid identification, classification, and annotation of translated gene matches from sequencing data.](https://doi.org/10.1093/bioinformatics/btab607) _Bioinformatics_ (2021) - diff --git a/docs/bactopia-tools/genotyphi.md b/docs/bactopia-tools/genotyphi.md deleted file mode 100644 index 86927a4b..00000000 --- a/docs/bactopia-tools/genotyphi.md +++ /dev/null @@ -1,262 +0,0 @@ ---- -title: genotyphi -description: A Bactopia Tool which uses GenoTyphi to genotype Salmonella Typhi from the outputs of Mykrobe. ---- -# Bactopia Tool - `genotyphi` -The `genotyphi` module uses [GenoTyphi](https://github.com/typhoidgenomics/genotyphi) to -call Typhi lineages, AMR determinants, and plasmid markers in Salmonella Typhi samples. -Samples are first processed by [Mykrobe](https://github.com/Mykrobe-tools/mykrobe) using `mykrobe predict` -with `typhi` specified as the species. Then the Mykrobe results are then processed by the -[parse_typhi_mykrobe.py](https://github.com/typhoidgenomics/genotyphi/blob/main/typhimykrobe/parse_typhi_mykrobe.py) -script available from GenoTyphi. - - -## Example Usage -``` -bactopia --wf genotyphi \ - --bactopia /path/to/your/bactopia/results -``` - -## Output Overview - -Below is the default output structure for the `genotyphi` tool. Where possible the -file descriptions below were modified from a tools description. - -```bash - -├── -│ └── tools -│ └── genotyphi -│ ├── logs -│ │ ├── genotyphi -│ │ │ ├── nf-genotyphi.{begin,err,log,out,run,sh,trace} -│ │ │ └── versions.yml -│ │ └── mykrobe -│ │ ├── nf-genotyphi.{begin,err,log,out,run,sh,trace} -│ │ └── versions.yml -│ ├── test.csv -│ ├── test.json -│ └── test_predictResults.tsv -└── bactopia-runs - └── genotyphi- - ├── merged-results - │ ├── genotyphi.tsv - │ └── logs - │ └── genotyphi-concat - │ ├── nf-merged-results.{begin,err,log,out,run,sh,trace} - │ └── versions.yml - └── nf-reports - ├── genotyphi-dag.dot - ├── genotyphi-report.html - ├── genotyphi-timeline.html - └── genotyphi-trace.txt - -``` - - - -### Results - -#### Merged Results - -Below are results that are concatenated into a single file. - - -| Filename | Description | -|-------------------------------|-------------| -| genotyphi.tsv | A merged TSV file with `GenoTyphi` results from all samples | - - -#### GenoTyphi - -Below is a description of the _per-sample_ results from [GenoTyphi](https://github.com/typhoidgenomics/genotyphi/). A -full description of the GenoTyphi output is available at [GenoTyphi - Output](https://github.com/typhoidgenomics/genotyphi/tree/main?tab=readme-ov-file#explanation-of-columns-in-the-output) - - -| Filename | Description | -|-------------------------------|-------------| -| <SAMPLE_NAME>_predictResults.tsv | A tab-delimited file with `GenoTyphi` results | -| <SAMPLE_NAME>.csv | The output of `mykrobe predict` in comma-separated format | -| <SAMPLE_NAME>.json | The output of `mykrobe predict` in JSON format | - - - - - -### Audit Trail - -Below are files that can assist you in understanding which parameters and program versions were used. - -#### Logs - -Each process that is executed will have a folder named `logs`. In this folder are helpful -files for you to review if the need ever arises. - -| Extension | Description | -|--------------|-------------| -| .begin | An empty file used to designate the process started | -| .err | Contains STDERR outputs from the process | -| .log | Contains both STDERR and STDOUT outputs from the process | -| .out | Contains STDOUT outputs from the process | -| .run | The script Nextflow uses to stage/unstage files and queue processes based on given profile | -| .sh | The script executed by bash for the process | -| .trace | The Nextflow [Trace](https://www.nextflow.io/docs/latest/tracing.html#trace-report) report for the process | -| versions.yml | A YAML formatted file with program versions | - -#### Nextflow Reports - -These Nextflow reports provide great a great summary of your run. These can be used to optimize -resource usage and estimate expected costs if using cloud platforms. - -| Filename | Description | -|----------|-------------| -| genotyphi-dag.dot | The Nextflow [DAG visualisation](https://www.nextflow.io/docs/latest/tracing.html#dag-visualisation) | -| genotyphi-report.html | The Nextflow [Execution Report](https://www.nextflow.io/docs/latest/tracing.html#execution-report) | -| genotyphi-timeline.html | The Nextflow [Timeline Report](https://www.nextflow.io/docs/latest/tracing.html#timeline-report) | -| genotyphi-trace.txt | The Nextflow [Trace](https://www.nextflow.io/docs/latest/tracing.html#trace-report) report | - - -#### Program Versions - -At the end of each run, each of the `versions.yml` files are merged into the files below. - -| Filename | Description | -|---------------------------|-------------| -| software_versions.yml | A complete list of programs and versions used by each process | -| software_versions_mqc.yml | A complete list of programs and versions formatted for [MultiQC](https://multiqc.info/) | - -## Parameters - - -### Required Parameters -Define where the pipeline should find input data and save output data. - -| Parameter | Description | -|:---|---| -| ` --bactopia` | The path to bactopia results to use as inputs
**Type:** `string` | - -### Filtering Parameters -Use these parameters to specify which samples to include or exclude. - -| Parameter | Description | -|:---|---| -| ` --include` | A text file containing sample names (one per line) to include from the analysis
**Type:** `string` | -| ` --exclude` | A text file containing sample names (one per line) to exclude from the analysis
**Type:** `string` | - - -### GenoTyphi Parameters - - -| Parameter | Description | -|:---|---| -| ` --kmer` | K-mer length
**Type:** `integer`, **Default:** `21` | -| ` --min_depth` | Minimum depth
**Type:** `integer`, **Default:** `1` | -| ` --model` | Genotype model used.
**Type:** `string`, **Default:** `kmer_count` | -| ` --report_all_calls` | Report all calls
**Type:** `boolean` | -| ` --mykrobe_opts` | Extra Mykrobe options in quotes
**Type:** `string` | - -### Mykrobe Parameters - - -| Parameter | Description | -|:---|---| -| ` --mykrobe_species` | Species panel to use
**Type:** `string` | -| ` --kmer` | K-mer length
**Type:** `integer`, **Default:** `21` | -| ` --min_depth` | Minimum depth
**Type:** `integer`, **Default:** `1` | -| ` --model` | Genotype model used.
**Type:** `string`, **Default:** `kmer_count` | -| ` --report_all_calls` | Report all calls
**Type:** `boolean` | -| ` --mykrobe_opts` | Extra Mykrobe options in quotes
**Type:** `string` | - - -### Optional Parameters -These optional parameters can be useful in certain settings. - -| Parameter | Description | -|:---|---| -| ` --outdir` | Base directory to write results to
**Type:** `string`, **Default:** `bactopia` | -| ` --skip_compression` | Ouput files will not be compressed
**Type:** `boolean` | -| ` --datasets` | The path to cache datasets to
**Type:** `string` | -| ` --keep_all_files` | Keeps all analysis files created
**Type:** `boolean` | - -### Max Job Request Parameters -Set the top limit for requested resources for any single job. - -| Parameter | Description | -|:---|---| -| ` --max_retry` | Maximum times to retry a process before allowing it to fail.
**Type:** `integer`, **Default:** `3` | -| ` --max_cpus` | Maximum number of CPUs that can be requested for any single job.
**Type:** `integer`, **Default:** `4` | -| ` --max_memory` | Maximum amount of memory that can be requested for any single job.
**Type:** `string`, **Default:** `128.GB` | -| ` --max_time` | Maximum amount of time that can be requested for any single job.
**Type:** `string`, **Default:** `240.h` | -| ` --max_downloads` | Maximum number of samples to download at a time
**Type:** `integer`, **Default:** `3` | - -### Nextflow Configuration Parameters -Parameters to fine-tune your Nextflow setup. - -| Parameter | Description | -|:---|---| -| ` --nfconfig` | A Nextflow compatible config file for custom profiles, loaded last and will overwrite existing variables if set.
**Type:** `string` | -| ` --publish_dir_mode` | Method used to save pipeline results to output directory.
**Type:** `string`, **Default:** `copy` | -| ` --infodir` | Directory to keep pipeline Nextflow logs and reports.
**Type:** `string`, **Default:** `${params.outdir}/pipeline_info` | -| ` --force` | Nextflow will overwrite existing output files.
**Type:** `boolean` | -| ` --cleanup_workdir` | After Bactopia is successfully executed, the `work` directory will be deleted.
**Type:** `boolean` | - -### Institutional config options -Parameters used to describe centralized config profiles. These should not be edited. - -| Parameter | Description | -|:---|---| -| ` --custom_config_version` | Git commit id for Institutional configs.
**Type:** `string`, **Default:** `master` | -| ` --custom_config_base` | Base directory for Institutional configs.
**Type:** `string`, **Default:** `https://raw.githubusercontent.com/nf-core/configs/master` | -| ` --config_profile_name` | Institutional config name.
**Type:** `string` | -| ` --config_profile_description` | Institutional config description.
**Type:** `string` | -| ` --config_profile_contact` | Institutional config contact information.
**Type:** `string` | -| ` --config_profile_url` | Institutional config URL link.
**Type:** `string` | - -### Nextflow Profile Parameters -Parameters to fine-tune your Nextflow setup. - -| Parameter | Description | -|:---|---| -| ` --condadir` | Directory to Nextflow should use for Conda environments
**Type:** `string` | -| ` --registry` | Docker registry to pull containers from.
**Type:** `string`, **Default:** `dockerhub` | -| ` --datasets_cache` | Directory where downloaded datasets should be stored.
**Type:** `string`, **Default:** `/data/datasets` | -| ` --singularity_cache_dir` | Directory where remote Singularity images are stored.
**Type:** `string` | -| ` --singularity_pull_docker_container` | Instead of directly downloading Singularity images for use with Singularity, force the workflow to pull and convert Docker containers instead.
**Type:** `boolean` | -| ` --force_rebuild` | Force overwrite of existing pre-built environments.
**Type:** `boolean` | -| ` --queue` | Comma-separated name of the queue(s) to be used by a job scheduler (e.g. AWS Batch or SLURM)
**Type:** `string`, **Default:** `general,high-memory` | -| ` --cluster_opts` | Additional options to pass to the executor. (e.g. SLURM: '--account=my_acct_name'
**Type:** `string` | -| ` --container_opts` | Additional options to pass to Apptainer, Docker, or Singularityu. (e.g. Singularity: '-D `pwd`'
**Type:** `string` | -| ` --disable_scratch` | All intermediate files created on worker nodes of will be transferred to the head node.
**Type:** `boolean` | - -### Helpful Parameters -Uncommonly used parameters that might be useful. - -| Parameter | Description | -|:---|---| -| ` --monochrome_logs` | Do not use coloured log outputs.
**Type:** `boolean` | -| ` --nfdir` | Print directory Nextflow has pulled Bactopia to
**Type:** `boolean` | -| ` --sleep_time` | The amount of time (seconds) Nextflow will wait after setting up datasets before execution.
**Type:** `integer`, **Default:** `5` | -| ` --validate_params` | Boolean whether to validate parameters against the schema at runtime
**Type:** `boolean`, **Default:** `True` | -| ` --help` | Display help text.
**Type:** `boolean` | -| ` --wf` | Specify which workflow or Bactopia Tool to execute
**Type:** `string`, **Default:** `bactopia` | -| ` --list_wfs` | List the available workflows and Bactopia Tools to use with '--wf'
**Type:** `boolean` | -| ` --show_hidden_params` | Show all params when using `--help`
**Type:** `boolean` | -| ` --help_all` | An alias for --help --show_hidden_params
**Type:** `boolean` | -| ` --version` | Display version text.
**Type:** `boolean` | - -## Citations -If you use Bactopia and `genotyphi` in your analysis, please cite the following. - -- [Bactopia](https://bactopia.github.io/) - Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) - - -- [csvtk](https://bioinf.shenwei.me/csvtk/) - Shen, W [csvtk: A cross-platform, efficient and practical CSV/TSV toolkit in Golang.](https://github.com/shenwei356/csvtk/) (GitHub) - -- [GenoTyphi](https://github.com/katholt/genotyphi) - Wong VK, Baker S, Connor TR, Pickard D, Page AJ, Dave J, Murphy N, Holliman R, Sefton A, Millar M, Dyson ZA, Dougan G, Holt KE, & International Typhoid Consortium. [An extended genotyping framework for Salmonella enterica serovar Typhi, the cause of human typhoid](https://doi.org/10.1038/ncomms12827) _Nature Communications_ 7, 12827. (2016) - -- [Mykrobe](https://github.com/Mykrobe-tools/mykrobe) - Hunt M, Bradley P, Lapierre SG, Heys S, Thomsit M, Hall MB, Malone KM, Wintringer P, Walker TM, Cirillo DM, Comas I, Farhat MR, Fowler P, Gardy J, Ismail N, Kohl TA, Mathys V, Merker M, Niemann S, Omar SV, Sintchenko V, Smith G, Supply P, Tahseen S, Wilcox M, Arandjelovic I, Peto TEA, Crook, DW, Iqbal Z [Antibiotic resistance prediction for Mycobacterium tuberculosis from genome sequence data with Mykrobe](https://doi.org/10.12688/wellcomeopenres.15603.1) _Wellcome Open Research_ 4, 191. (2019) - diff --git a/docs/bactopia-tools/gtdb.md b/docs/bactopia-tools/gtdb.md deleted file mode 100644 index 827ec7ae..00000000 --- a/docs/bactopia-tools/gtdb.md +++ /dev/null @@ -1,267 +0,0 @@ ---- -title: gtdb -description: A Bactopia Tool which uses GTDB-Tk's classify workflow to assign taxonomic classifications to your set of samples. ---- -# Bactopia Tool - `gtdb` -The `gtdb` tool uses [GTDB-Tk's](https://github.com/Ecogenomics/GTDBTk) classify -workflow to assign taxonomic classifications to your set of samples. This is -done through the use of the [Genome Taxonomy Database](https://gtdb.ecogenomic.org/). -If you are unsure of your sequences, `gtdb` is useful tool to help determine -the taxonomy of your samples. - - -## Example Usage -``` -bactopia --wf gtdb \ - --bactopia /path/to/your/bactopia/results -``` - -## Output Overview - -Below is the default output structure for the `gtdb` tool. Where possible the -file descriptions below were modified from a tools description. - -```bash - -├── -│ └── tools -│ └── gtdb -│ ├── align -│ │ ├── .{ar122|bac120}.filtered.tsv -│ │ ├── .{ar122|bac120}.msa.fasta.gz -│ │ └── .{ar122|bac120}.user_msa.fasta.gz -│ ├── classify -│ │ ├── .{ar122|bac120}.classify.tree.1.tree -│ │ ├── .{ar122|bac120}.classify.tree.6.tree -│ │ ├── .{ar122|bac120}.summary.tsv -│ │ ├── .{ar122|bac120}.tree.mapping.tsv -│ │ └── .backbone.{ar122|bac120}.classify.tree -│ ├── gtdbtk.json -│ ├── identify -│ │ ├── .ar53.markers_summary.tsv -│ │ ├── .bac120.markers_summary.tsv -│ │ ├── .failed_genomes.tsv -│ │ └── .translation_table_summary.tsv -│ ├── logs -│ │ ├── nf-gtdb.{begin,err,log,out,run,sh,trace} -│ │ ├── results -│ │ │ ├── gtdbtk.log -│ │ │ └── gtdbtk.warnings.log -│ │ └── versions.yml -│ └── .{ar122|bac120}.summary.tsv -└── bactopia-runs - └── gtdb- - ├── merged-results - │ ├── gtdb.tsv - │ └── logs - │ └── gtdb-concat - │ ├── nf-merged-results.{begin,err,log,out,run,sh,trace} - │ └── versions.yml - └── nf-reports - ├── gtdb-dag.dot - ├── gtdb-report.html - ├── gtdb-timeline.html - └── gtdb-trace.txt - -``` - - - -### Results - -#### GTDB-Tk - -Below is a description of the _per-sample_ results from [GTDB-Tk](https://github.com/Ecogenomics/GTDBTk). -For full details about each of the GTDB output files, see -[GTDB-Tk Classify Workflow](https://github.com/Ecogenomics/GTDBTk#classify-workflow) page. - - -| Filename | Description | -|-------------------------------|-------------| -| <SAMPLE_NAME>.{ar122|bac120}.classify.tree | Reference tree in Newick format containing query genomes placed with pplacer | -| <SAMPLE_NAME>.{ar122|bac120}.filtered.tsv | List of genomes with an insufficient number of amino acids in MSA | -| <SAMPLE_NAME>.{ar122|bac120}.markers_summary.tsv | Markers used in generation of the concatenated MSA and the order in which they were applied | -| <SAMPLE_NAME>.{ar122|bac120}.msa.fasta | FASTA file containing MSA of submitted and reference genomes | -| <SAMPLE_NAME>.{ar122|bac120}.summary.tsv | A summary of classifications provided by GTDB-Tk, see [classification summary](https://github.com/Ecogenomics/GTDBTk#classification-summary-file) for more details | -| <SAMPLE_NAME>.{ar122|bac120}.user_msa.fasta | FASTA file containing MSA of the submitted genomes | -| <SAMPLE_NAME>.translation_table_summary.tsv | Summary of the translation table used for each genome | -| <SAMPLE_NAME>.ar122.markers_summary.tsv | Summary of unique, duplicated, and missing markers within the 122 archaeal marker set for each submitted genome | -| <SAMPLE_NAME>.bac120.markers_summary.tsv | Summary of unique, duplicated, and missing markers within the 120 bacterial marker set for each submitted genome | - - - - - -### Audit Trail - -Below are files that can assist you in understanding which parameters and program versions were used. - -#### Logs - -Each process that is executed will have a folder named `logs`. In this folder are helpful -files for you to review if the need ever arises. - -| Extension | Description | -|--------------|-------------| -| .begin | An empty file used to designate the process started | -| .err | Contains STDERR outputs from the process | -| .log | Contains both STDERR and STDOUT outputs from the process | -| .out | Contains STDOUT outputs from the process | -| .run | The script Nextflow uses to stage/unstage files and queue processes based on given profile | -| .sh | The script executed by bash for the process | -| .trace | The Nextflow [Trace](https://www.nextflow.io/docs/latest/tracing.html#trace-report) report for the process | -| versions.yml | A YAML formatted file with program versions | - -#### Nextflow Reports - -These Nextflow reports provide great a great summary of your run. These can be used to optimize -resource usage and estimate expected costs if using cloud platforms. - -| Filename | Description | -|----------|-------------| -| gtdb-dag.dot | The Nextflow [DAG visualisation](https://www.nextflow.io/docs/latest/tracing.html#dag-visualisation) | -| gtdb-report.html | The Nextflow [Execution Report](https://www.nextflow.io/docs/latest/tracing.html#execution-report) | -| gtdb-timeline.html | The Nextflow [Timeline Report](https://www.nextflow.io/docs/latest/tracing.html#timeline-report) | -| gtdb-trace.txt | The Nextflow [Trace](https://www.nextflow.io/docs/latest/tracing.html#trace-report) report | - - -#### Program Versions - -At the end of each run, each of the `versions.yml` files are merged into the files below. - -| Filename | Description | -|---------------------------|-------------| -| software_versions.yml | A complete list of programs and versions used by each process | -| software_versions_mqc.yml | A complete list of programs and versions formatted for [MultiQC](https://multiqc.info/) | - -## Parameters - - -### Required Parameters -Define where the pipeline should find input data and save output data. - -| Parameter | Description | -|:---|---| -| ` --bactopia` | The path to bactopia results to use as inputs
**Type:** `string` | - -### Filtering Parameters -Use these parameters to specify which samples to include or exclude. - -| Parameter | Description | -|:---|---| -| ` --include` | A text file containing sample names (one per line) to include from the analysis
**Type:** `string` | -| ` --exclude` | A text file containing sample names (one per line) to exclude from the analysis
**Type:** `string` | - - -### GTDB Setup Parameters - - -| Parameter | Description | -|:---|---| -| ` --gtdb` | Tarball or path of a GTDB database. If a database is not found, you must use '--download_gtdb'
**Type:** `string` | -| ` --download_gtdb` | Download the latest GTDB database, even it exists
**Type:** `boolean` | -| ` --gtdb_save_as_tarball` | Download the latest GTDB database, and save it in a single tarball
**Type:** `boolean` | -| ` --skip_check` | Do not check the installation of GTDB database
**Type:** `boolean` | - -### GTDB Classify Parameters - - -| Parameter | Description | -|:---|---| -| ` --min_af` | Minimum alignment fraction to consider closest genome
**Type:** `number`, **Default:** `0.65` | -| ` --min_perc_aa` | Filter genomes with an insufficient percentage of AA in the MSA
**Type:** `integer`, **Default:** `10` | -| ` --gtdb_tmp` | Specify alternative directory for temporary files
**Type:** `boolean`, **Default:** `/tmp` | -| ` --gtdb_use_scratch` | Reduce pplacer memory usage by writing to --gtdb_tmp location (slower)
**Type:** `boolean` | -| ` --gtdb_debug` | Create intermediate files for debugging purposes
**Type:** `boolean` | -| ` --force_gtdb` | Continue processing if an error occurs on a single genome
**Type:** `boolean` | - - -### Optional Parameters -These optional parameters can be useful in certain settings. - -| Parameter | Description | -|:---|---| -| ` --outdir` | Base directory to write results to
**Type:** `string`, **Default:** `bactopia` | -| ` --skip_compression` | Ouput files will not be compressed
**Type:** `boolean` | -| ` --datasets` | The path to cache datasets to
**Type:** `string` | -| ` --keep_all_files` | Keeps all analysis files created
**Type:** `boolean` | - -### Max Job Request Parameters -Set the top limit for requested resources for any single job. - -| Parameter | Description | -|:---|---| -| ` --max_retry` | Maximum times to retry a process before allowing it to fail.
**Type:** `integer`, **Default:** `3` | -| ` --max_cpus` | Maximum number of CPUs that can be requested for any single job.
**Type:** `integer`, **Default:** `4` | -| ` --max_memory` | Maximum amount of memory that can be requested for any single job.
**Type:** `string`, **Default:** `128.GB` | -| ` --max_time` | Maximum amount of time that can be requested for any single job.
**Type:** `string`, **Default:** `240.h` | -| ` --max_downloads` | Maximum number of samples to download at a time
**Type:** `integer`, **Default:** `3` | - -### Nextflow Configuration Parameters -Parameters to fine-tune your Nextflow setup. - -| Parameter | Description | -|:---|---| -| ` --nfconfig` | A Nextflow compatible config file for custom profiles, loaded last and will overwrite existing variables if set.
**Type:** `string` | -| ` --publish_dir_mode` | Method used to save pipeline results to output directory.
**Type:** `string`, **Default:** `copy` | -| ` --infodir` | Directory to keep pipeline Nextflow logs and reports.
**Type:** `string`, **Default:** `${params.outdir}/pipeline_info` | -| ` --force` | Nextflow will overwrite existing output files.
**Type:** `boolean` | -| ` --cleanup_workdir` | After Bactopia is successfully executed, the `work` directory will be deleted.
**Type:** `boolean` | - -### Institutional config options -Parameters used to describe centralized config profiles. These should not be edited. - -| Parameter | Description | -|:---|---| -| ` --custom_config_version` | Git commit id for Institutional configs.
**Type:** `string`, **Default:** `master` | -| ` --custom_config_base` | Base directory for Institutional configs.
**Type:** `string`, **Default:** `https://raw.githubusercontent.com/nf-core/configs/master` | -| ` --config_profile_name` | Institutional config name.
**Type:** `string` | -| ` --config_profile_description` | Institutional config description.
**Type:** `string` | -| ` --config_profile_contact` | Institutional config contact information.
**Type:** `string` | -| ` --config_profile_url` | Institutional config URL link.
**Type:** `string` | - -### Nextflow Profile Parameters -Parameters to fine-tune your Nextflow setup. - -| Parameter | Description | -|:---|---| -| ` --condadir` | Directory to Nextflow should use for Conda environments
**Type:** `string` | -| ` --registry` | Docker registry to pull containers from.
**Type:** `string`, **Default:** `dockerhub` | -| ` --datasets_cache` | Directory where downloaded datasets should be stored.
**Type:** `string`, **Default:** `/data/datasets` | -| ` --singularity_cache_dir` | Directory where remote Singularity images are stored.
**Type:** `string` | -| ` --singularity_pull_docker_container` | Instead of directly downloading Singularity images for use with Singularity, force the workflow to pull and convert Docker containers instead.
**Type:** `boolean` | -| ` --force_rebuild` | Force overwrite of existing pre-built environments.
**Type:** `boolean` | -| ` --queue` | Comma-separated name of the queue(s) to be used by a job scheduler (e.g. AWS Batch or SLURM)
**Type:** `string`, **Default:** `general,high-memory` | -| ` --cluster_opts` | Additional options to pass to the executor. (e.g. SLURM: '--account=my_acct_name'
**Type:** `string` | -| ` --container_opts` | Additional options to pass to Apptainer, Docker, or Singularityu. (e.g. Singularity: '-D `pwd`'
**Type:** `string` | -| ` --disable_scratch` | All intermediate files created on worker nodes of will be transferred to the head node.
**Type:** `boolean` | - -### Helpful Parameters -Uncommonly used parameters that might be useful. - -| Parameter | Description | -|:---|---| -| ` --monochrome_logs` | Do not use coloured log outputs.
**Type:** `boolean` | -| ` --nfdir` | Print directory Nextflow has pulled Bactopia to
**Type:** `boolean` | -| ` --sleep_time` | The amount of time (seconds) Nextflow will wait after setting up datasets before execution.
**Type:** `integer`, **Default:** `5` | -| ` --validate_params` | Boolean whether to validate parameters against the schema at runtime
**Type:** `boolean`, **Default:** `True` | -| ` --help` | Display help text.
**Type:** `boolean` | -| ` --wf` | Specify which workflow or Bactopia Tool to execute
**Type:** `string`, **Default:** `bactopia` | -| ` --list_wfs` | List the available workflows and Bactopia Tools to use with '--wf'
**Type:** `boolean` | -| ` --show_hidden_params` | Show all params when using `--help`
**Type:** `boolean` | -| ` --help_all` | An alias for --help --show_hidden_params
**Type:** `boolean` | -| ` --version` | Display version text.
**Type:** `boolean` | - -## Citations -If you use Bactopia and `gtdb` in your analysis, please cite the following. - -- [Bactopia](https://bactopia.github.io/) - Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) - - -- [Genome Taxonomy Database](https://gtdb.ecogenomic.org/) - Parks DH, Chuvochina M, Rinke C, Mussig AJ, Chaumeil P-A, Hugenholtz P [GTDB: an ongoing census of bacterial and archaeal diversity through a phylogenetically consistent, rank normalized and complete genome-based taxonomy](https://doi.org/10.1093/nar/gkab776) _Nucleic Acids Research_ gkab776 (2021) - -- [GTDB-Tk](https://github.com/Ecogenomics/GTDBTk) - Chaumeil PA, Mussig AJ, Hugenholtz P, Parks DH [GTDB-Tk: a toolkit to classify genomes with the Genome Taxonomy Database.](https://doi.org/10.1093/bioinformatics/btz848) _Bioinformatics_ (2019) - diff --git a/docs/bactopia-tools/hicap.md b/docs/bactopia-tools/hicap.md deleted file mode 100644 index 4528aff5..00000000 --- a/docs/bactopia-tools/hicap.md +++ /dev/null @@ -1,247 +0,0 @@ ---- -title: hicap -description: A Bactopia Tool which uses hicap along wih an assembly for the _in silico_ typing of the _Haemophilus influenzae_ cap locus. ---- -# Bactopia Tool - `hicap` -The `hicap` module uses [hicap](https://github.com/scwatts/hicap) along wih an assembly for -the _in silico_ typing of the _Haemophilus influenzae_ cap locus. - - -## Example Usage -``` -bactopia --wf hicap \ - --bactopia /path/to/your/bactopia/results -``` - -## Output Overview - -Below is the default output structure for the `hicap` tool. Where possible the -file descriptions below were modified from a tools description. - -```bash - -├── -│ └── tools -│ └── hicap -│ ├── .{gbk|svg|tsv} -│ └── logs -│ ├── nf-hicap.{begin,err,log,out,run,sh,trace} -│ └── versions.yml -└── bactopia-runs - └── hicap- - ├── merged-results - │ ├── hicap.tsv - │ └── logs - │ └── hicap-concat - │ ├── nf-merged-results.{begin,err,log,out,run,sh,trace} - │ └── versions.yml - └── nf-reports - ├── hicap-dag.dot - ├── hicap-report.html - ├── hicap-timeline.html - └── hicap-trace.txt - -``` - -:::info[Directory structure might be different] - -`hicap` is available as a standalone Bactopia Tool, as well as from -the main Bactopia workflow (e.g. through Staphopia or Merlin). If executed -from Bactopia, the `hicap` directory structure might be different, but the -output descriptions below still apply. -::: - - - -### Results - -#### Merged Results - -Below are results that are concatenated into a single file. - - -| Filename | Description | -|-------------------------------|-------------| -| hicap.tsv | A merged TSV file with `hicap` results from all samples | - - -#### hicap - -Below is a description of the _per-sample_ results from [hicap](https://github.com/scwatts/hicap). - - -| Filename | Description | -|-------------------------------|-------------| -| <SAMPLE_NAME>.gbk | GenBank file and cap locus annotations | -| <SAMPLE_NAME>.svg | Visualization of annotated cap locus | -| <SAMPLE_NAME>.tsv | A tab-delimited file with `hicap` results | - - - - - -### Audit Trail - -Below are files that can assist you in understanding which parameters and program versions were used. - -#### Logs - -Each process that is executed will have a folder named `logs`. In this folder are helpful -files for you to review if the need ever arises. - -| Extension | Description | -|--------------|-------------| -| .begin | An empty file used to designate the process started | -| .err | Contains STDERR outputs from the process | -| .log | Contains both STDERR and STDOUT outputs from the process | -| .out | Contains STDOUT outputs from the process | -| .run | The script Nextflow uses to stage/unstage files and queue processes based on given profile | -| .sh | The script executed by bash for the process | -| .trace | The Nextflow [Trace](https://www.nextflow.io/docs/latest/tracing.html#trace-report) report for the process | -| versions.yml | A YAML formatted file with program versions | - -#### Nextflow Reports - -These Nextflow reports provide great a great summary of your run. These can be used to optimize -resource usage and estimate expected costs if using cloud platforms. - -| Filename | Description | -|----------|-------------| -| hicap-dag.dot | The Nextflow [DAG visualisation](https://www.nextflow.io/docs/latest/tracing.html#dag-visualisation) | -| hicap-report.html | The Nextflow [Execution Report](https://www.nextflow.io/docs/latest/tracing.html#execution-report) | -| hicap-timeline.html | The Nextflow [Timeline Report](https://www.nextflow.io/docs/latest/tracing.html#timeline-report) | -| hicap-trace.txt | The Nextflow [Trace](https://www.nextflow.io/docs/latest/tracing.html#trace-report) report | - - -#### Program Versions - -At the end of each run, each of the `versions.yml` files are merged into the files below. - -| Filename | Description | -|---------------------------|-------------| -| software_versions.yml | A complete list of programs and versions used by each process | -| software_versions_mqc.yml | A complete list of programs and versions formatted for [MultiQC](https://multiqc.info/) | - -## Parameters - - -### Required Parameters -Define where the pipeline should find input data and save output data. - -| Parameter | Description | -|:---|---| -| ` --bactopia` | The path to bactopia results to use as inputs
**Type:** `string` | - -### Filtering Parameters -Use these parameters to specify which samples to include or exclude. - -| Parameter | Description | -|:---|---| -| ` --include` | A text file containing sample names (one per line) to include from the analysis
**Type:** `string` | -| ` --exclude` | A text file containing sample names (one per line) to exclude from the analysis
**Type:** `string` | - - -### hicap Parameters - - -| Parameter | Description | -|:---|---| -| ` --database_dir` | Directory containing locus database
**Type:** `string` | -| ` --model_fp` | Path to prodigal model
**Type:** `string` | -| ` --full_sequence` | Write the full input sequence out to the genbank file rather than just the region surrounding and including the locus
**Type:** `boolean` | -| ` --hicap_debug` | hicap will print debug messages
**Type:** `boolean` | -| ` --gene_coverage` | Minimum percentage coverage to consider a single gene complete
**Type:** `number`, **Default:** `0.8` | -| ` --gene_identity` | Minimum percentage identity to consider a single gene complete
**Type:** `number`, **Default:** `0.7` | -| ` --broken_gene_length` | Minimum length to consider a broken gene
**Type:** `integer`, **Default:** `60` | -| ` --broken_gene_identity` | Minimum percentage identity to consider a broken gene
**Type:** `number`, **Default:** `0.8` | - - -### Optional Parameters -These optional parameters can be useful in certain settings. - -| Parameter | Description | -|:---|---| -| ` --outdir` | Base directory to write results to
**Type:** `string`, **Default:** `bactopia` | -| ` --skip_compression` | Ouput files will not be compressed
**Type:** `boolean` | -| ` --datasets` | The path to cache datasets to
**Type:** `string` | -| ` --keep_all_files` | Keeps all analysis files created
**Type:** `boolean` | - -### Max Job Request Parameters -Set the top limit for requested resources for any single job. - -| Parameter | Description | -|:---|---| -| ` --max_retry` | Maximum times to retry a process before allowing it to fail.
**Type:** `integer`, **Default:** `3` | -| ` --max_cpus` | Maximum number of CPUs that can be requested for any single job.
**Type:** `integer`, **Default:** `4` | -| ` --max_memory` | Maximum amount of memory that can be requested for any single job.
**Type:** `string`, **Default:** `128.GB` | -| ` --max_time` | Maximum amount of time that can be requested for any single job.
**Type:** `string`, **Default:** `240.h` | -| ` --max_downloads` | Maximum number of samples to download at a time
**Type:** `integer`, **Default:** `3` | - -### Nextflow Configuration Parameters -Parameters to fine-tune your Nextflow setup. - -| Parameter | Description | -|:---|---| -| ` --nfconfig` | A Nextflow compatible config file for custom profiles, loaded last and will overwrite existing variables if set.
**Type:** `string` | -| ` --publish_dir_mode` | Method used to save pipeline results to output directory.
**Type:** `string`, **Default:** `copy` | -| ` --infodir` | Directory to keep pipeline Nextflow logs and reports.
**Type:** `string`, **Default:** `${params.outdir}/pipeline_info` | -| ` --force` | Nextflow will overwrite existing output files.
**Type:** `boolean` | -| ` --cleanup_workdir` | After Bactopia is successfully executed, the `work` directory will be deleted.
**Type:** `boolean` | - -### Institutional config options -Parameters used to describe centralized config profiles. These should not be edited. - -| Parameter | Description | -|:---|---| -| ` --custom_config_version` | Git commit id for Institutional configs.
**Type:** `string`, **Default:** `master` | -| ` --custom_config_base` | Base directory for Institutional configs.
**Type:** `string`, **Default:** `https://raw.githubusercontent.com/nf-core/configs/master` | -| ` --config_profile_name` | Institutional config name.
**Type:** `string` | -| ` --config_profile_description` | Institutional config description.
**Type:** `string` | -| ` --config_profile_contact` | Institutional config contact information.
**Type:** `string` | -| ` --config_profile_url` | Institutional config URL link.
**Type:** `string` | - -### Nextflow Profile Parameters -Parameters to fine-tune your Nextflow setup. - -| Parameter | Description | -|:---|---| -| ` --condadir` | Directory to Nextflow should use for Conda environments
**Type:** `string` | -| ` --registry` | Docker registry to pull containers from.
**Type:** `string`, **Default:** `dockerhub` | -| ` --datasets_cache` | Directory where downloaded datasets should be stored.
**Type:** `string`, **Default:** `/data/datasets` | -| ` --singularity_cache_dir` | Directory where remote Singularity images are stored.
**Type:** `string` | -| ` --singularity_pull_docker_container` | Instead of directly downloading Singularity images for use with Singularity, force the workflow to pull and convert Docker containers instead.
**Type:** `boolean` | -| ` --force_rebuild` | Force overwrite of existing pre-built environments.
**Type:** `boolean` | -| ` --queue` | Comma-separated name of the queue(s) to be used by a job scheduler (e.g. AWS Batch or SLURM)
**Type:** `string`, **Default:** `general,high-memory` | -| ` --cluster_opts` | Additional options to pass to the executor. (e.g. SLURM: '--account=my_acct_name'
**Type:** `string` | -| ` --container_opts` | Additional options to pass to Apptainer, Docker, or Singularityu. (e.g. Singularity: '-D `pwd`'
**Type:** `string` | -| ` --disable_scratch` | All intermediate files created on worker nodes of will be transferred to the head node.
**Type:** `boolean` | - -### Helpful Parameters -Uncommonly used parameters that might be useful. - -| Parameter | Description | -|:---|---| -| ` --monochrome_logs` | Do not use coloured log outputs.
**Type:** `boolean` | -| ` --nfdir` | Print directory Nextflow has pulled Bactopia to
**Type:** `boolean` | -| ` --sleep_time` | The amount of time (seconds) Nextflow will wait after setting up datasets before execution.
**Type:** `integer`, **Default:** `5` | -| ` --validate_params` | Boolean whether to validate parameters against the schema at runtime
**Type:** `boolean`, **Default:** `True` | -| ` --help` | Display help text.
**Type:** `boolean` | -| ` --wf` | Specify which workflow or Bactopia Tool to execute
**Type:** `string`, **Default:** `bactopia` | -| ` --list_wfs` | List the available workflows and Bactopia Tools to use with '--wf'
**Type:** `boolean` | -| ` --show_hidden_params` | Show all params when using `--help`
**Type:** `boolean` | -| ` --help_all` | An alias for --help --show_hidden_params
**Type:** `boolean` | -| ` --version` | Display version text.
**Type:** `boolean` | - -## Citations -If you use Bactopia and `hicap` in your analysis, please cite the following. - -- [Bactopia](https://bactopia.github.io/) - Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) - - -- [csvtk](https://bioinf.shenwei.me/csvtk/) - Shen, W [csvtk: A cross-platform, efficient and practical CSV/TSV toolkit in Golang.](https://github.com/shenwei356/csvtk/) (GitHub) - -- [hicap](https://github.com/scwatts/hicap) - Watts SC, Holt KE [hicap: in silico serotyping of the Haemophilus influenzae capsule locus.](https://doi.org/10.1128/JCM.00190-19) _Journal of Clinical Microbiology_ JCM.00190-19 (2019) - diff --git a/docs/bactopia-tools/hpsuissero.md b/docs/bactopia-tools/hpsuissero.md deleted file mode 100644 index 901c1ebb..00000000 --- a/docs/bactopia-tools/hpsuissero.md +++ /dev/null @@ -1,232 +0,0 @@ ---- -title: hpsuissero -description: A Bactopia Tool which uses HpsuisSero to predict the serotype of _Haemophilus parasuis_ assemblies. ---- -# Bactopia Tool - `hpsuissero` -The `hpsuissero` module uses [HpsuisSero](https://github.com/jimmyliu1326/HpsuisSero) to predict -the serotype of _Haemophilus parasuis_ assemblies. - - -## Example Usage -``` -bactopia --wf hpsuissero \ - --bactopia /path/to/your/bactopia/results -``` - -## Output Overview - -Below is the default output structure for the `hpsuissero` tool. Where possible the -file descriptions below were modified from a tools description. - -```bash - -├── -│ └── tools -│ └── hpsuissero -│ ├── _serotyping_res.tsv -│ └── logs -│ ├── nf-hpsuissero.{begin,err,log,out,run,sh,trace} -│ └── versions.yml -└── bactopia-runs - └── hpsuissero- - ├── merged-results - │ ├── hpsuissero.tsv - │ └── logs - │ └── hpsuissero-concat - │ ├── nf-merged-results.{begin,err,log,out,run,sh,trace} - │ └── versions.yml - └── nf-reports - ├── hpsuissero-dag.dot - ├── hpsuissero-report.html - ├── hpsuissero-timeline.html - └── hpsuissero-trace.txt - -``` - -:::info[Directory structure might be different] - -`hpsuissero` is available as a standalone Bactopia Tool, as well as from -the main Bactopia workflow (e.g. through Staphopia or Merlin). If executed -from Bactopia, the `hpsuissero` directory structure might be different, but the -output descriptions below still apply. -::: - - - -### Results - -#### Merged Results - -Below are results that are concatenated into a single file. - - -| Filename | Description | -|-------------------------------|-------------| -| hpsuissero.tsv | A merged TSV file with `HpsuisSero` results from all samples | - - -#### HpsuisSero - -Below is a description of the _per-sample_ results from [HpsuisSero](https://github.com/jimmyliu1326/HpsuisSero). - - -| Filename | Description | -|-------------------------------|-------------| -| <SAMPLE_NAME>_serotyping_res.tsv | A tab-delimited file with `HpsuisSero` result | - - - - - -### Audit Trail - -Below are files that can assist you in understanding which parameters and program versions were used. - -#### Logs - -Each process that is executed will have a folder named `logs`. In this folder are helpful -files for you to review if the need ever arises. - -| Extension | Description | -|--------------|-------------| -| .begin | An empty file used to designate the process started | -| .err | Contains STDERR outputs from the process | -| .log | Contains both STDERR and STDOUT outputs from the process | -| .out | Contains STDOUT outputs from the process | -| .run | The script Nextflow uses to stage/unstage files and queue processes based on given profile | -| .sh | The script executed by bash for the process | -| .trace | The Nextflow [Trace](https://www.nextflow.io/docs/latest/tracing.html#trace-report) report for the process | -| versions.yml | A YAML formatted file with program versions | - -#### Nextflow Reports - -These Nextflow reports provide great a great summary of your run. These can be used to optimize -resource usage and estimate expected costs if using cloud platforms. - -| Filename | Description | -|----------|-------------| -| hpsuissero-dag.dot | The Nextflow [DAG visualisation](https://www.nextflow.io/docs/latest/tracing.html#dag-visualisation) | -| hpsuissero-report.html | The Nextflow [Execution Report](https://www.nextflow.io/docs/latest/tracing.html#execution-report) | -| hpsuissero-timeline.html | The Nextflow [Timeline Report](https://www.nextflow.io/docs/latest/tracing.html#timeline-report) | -| hpsuissero-trace.txt | The Nextflow [Trace](https://www.nextflow.io/docs/latest/tracing.html#trace-report) report | - - -#### Program Versions - -At the end of each run, each of the `versions.yml` files are merged into the files below. - -| Filename | Description | -|---------------------------|-------------| -| software_versions.yml | A complete list of programs and versions used by each process | -| software_versions_mqc.yml | A complete list of programs and versions formatted for [MultiQC](https://multiqc.info/) | - -## Parameters - - -### Required Parameters -Define where the pipeline should find input data and save output data. - -| Parameter | Description | -|:---|---| -| ` --bactopia` | The path to bactopia results to use as inputs
**Type:** `string` | - -### Filtering Parameters -Use these parameters to specify which samples to include or exclude. - -| Parameter | Description | -|:---|---| -| ` --include` | A text file containing sample names (one per line) to include from the analysis
**Type:** `string` | -| ` --exclude` | A text file containing sample names (one per line) to exclude from the analysis
**Type:** `string` | - - - - -### Optional Parameters -These optional parameters can be useful in certain settings. - -| Parameter | Description | -|:---|---| -| ` --outdir` | Base directory to write results to
**Type:** `string`, **Default:** `bactopia` | -| ` --skip_compression` | Ouput files will not be compressed
**Type:** `boolean` | -| ` --datasets` | The path to cache datasets to
**Type:** `string` | -| ` --keep_all_files` | Keeps all analysis files created
**Type:** `boolean` | - -### Max Job Request Parameters -Set the top limit for requested resources for any single job. - -| Parameter | Description | -|:---|---| -| ` --max_retry` | Maximum times to retry a process before allowing it to fail.
**Type:** `integer`, **Default:** `3` | -| ` --max_cpus` | Maximum number of CPUs that can be requested for any single job.
**Type:** `integer`, **Default:** `4` | -| ` --max_memory` | Maximum amount of memory that can be requested for any single job.
**Type:** `string`, **Default:** `128.GB` | -| ` --max_time` | Maximum amount of time that can be requested for any single job.
**Type:** `string`, **Default:** `240.h` | -| ` --max_downloads` | Maximum number of samples to download at a time
**Type:** `integer`, **Default:** `3` | - -### Nextflow Configuration Parameters -Parameters to fine-tune your Nextflow setup. - -| Parameter | Description | -|:---|---| -| ` --nfconfig` | A Nextflow compatible config file for custom profiles, loaded last and will overwrite existing variables if set.
**Type:** `string` | -| ` --publish_dir_mode` | Method used to save pipeline results to output directory.
**Type:** `string`, **Default:** `copy` | -| ` --infodir` | Directory to keep pipeline Nextflow logs and reports.
**Type:** `string`, **Default:** `${params.outdir}/pipeline_info` | -| ` --force` | Nextflow will overwrite existing output files.
**Type:** `boolean` | -| ` --cleanup_workdir` | After Bactopia is successfully executed, the `work` directory will be deleted.
**Type:** `boolean` | - -### Institutional config options -Parameters used to describe centralized config profiles. These should not be edited. - -| Parameter | Description | -|:---|---| -| ` --custom_config_version` | Git commit id for Institutional configs.
**Type:** `string`, **Default:** `master` | -| ` --custom_config_base` | Base directory for Institutional configs.
**Type:** `string`, **Default:** `https://raw.githubusercontent.com/nf-core/configs/master` | -| ` --config_profile_name` | Institutional config name.
**Type:** `string` | -| ` --config_profile_description` | Institutional config description.
**Type:** `string` | -| ` --config_profile_contact` | Institutional config contact information.
**Type:** `string` | -| ` --config_profile_url` | Institutional config URL link.
**Type:** `string` | - -### Nextflow Profile Parameters -Parameters to fine-tune your Nextflow setup. - -| Parameter | Description | -|:---|---| -| ` --condadir` | Directory to Nextflow should use for Conda environments
**Type:** `string` | -| ` --registry` | Docker registry to pull containers from.
**Type:** `string`, **Default:** `dockerhub` | -| ` --datasets_cache` | Directory where downloaded datasets should be stored.
**Type:** `string`, **Default:** `/data/datasets` | -| ` --singularity_cache_dir` | Directory where remote Singularity images are stored.
**Type:** `string` | -| ` --singularity_pull_docker_container` | Instead of directly downloading Singularity images for use with Singularity, force the workflow to pull and convert Docker containers instead.
**Type:** `boolean` | -| ` --force_rebuild` | Force overwrite of existing pre-built environments.
**Type:** `boolean` | -| ` --queue` | Comma-separated name of the queue(s) to be used by a job scheduler (e.g. AWS Batch or SLURM)
**Type:** `string`, **Default:** `general,high-memory` | -| ` --cluster_opts` | Additional options to pass to the executor. (e.g. SLURM: '--account=my_acct_name'
**Type:** `string` | -| ` --container_opts` | Additional options to pass to Apptainer, Docker, or Singularityu. (e.g. Singularity: '-D `pwd`'
**Type:** `string` | -| ` --disable_scratch` | All intermediate files created on worker nodes of will be transferred to the head node.
**Type:** `boolean` | - -### Helpful Parameters -Uncommonly used parameters that might be useful. - -| Parameter | Description | -|:---|---| -| ` --monochrome_logs` | Do not use coloured log outputs.
**Type:** `boolean` | -| ` --nfdir` | Print directory Nextflow has pulled Bactopia to
**Type:** `boolean` | -| ` --sleep_time` | The amount of time (seconds) Nextflow will wait after setting up datasets before execution.
**Type:** `integer`, **Default:** `5` | -| ` --validate_params` | Boolean whether to validate parameters against the schema at runtime
**Type:** `boolean`, **Default:** `True` | -| ` --help` | Display help text.
**Type:** `boolean` | -| ` --wf` | Specify which workflow or Bactopia Tool to execute
**Type:** `string`, **Default:** `bactopia` | -| ` --list_wfs` | List the available workflows and Bactopia Tools to use with '--wf'
**Type:** `boolean` | -| ` --show_hidden_params` | Show all params when using `--help`
**Type:** `boolean` | -| ` --help_all` | An alias for --help --show_hidden_params
**Type:** `boolean` | -| ` --version` | Display version text.
**Type:** `boolean` | - -## Citations -If you use Bactopia and `hpsuissero` in your analysis, please cite the following. - -- [Bactopia](https://bactopia.github.io/) - Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) - - -- [csvtk](https://bioinf.shenwei.me/csvtk/) - Shen, W [csvtk: A cross-platform, efficient and practical CSV/TSV toolkit in Golang.](https://github.com/shenwei356/csvtk/) (GitHub) - -- [HpsuisSero](https://github.com/jimmyliu1326/HpsuisSero) - Lui J [HpsuisSero: Rapid _Haemophilus parasuis_ serotyping](https://github.com/jimmyliu1326/HpsuisSero) (GitHub) - diff --git a/docs/bactopia-tools/index.md b/docs/bactopia-tools/index.md deleted file mode 100644 index 7bd57509..00000000 --- a/docs/bactopia-tools/index.md +++ /dev/null @@ -1,156 +0,0 @@ ---- -title: Bactopia Tools -description: >- - More than 64 additional workflows to allow you to easily - dive further into your Bactopia results. ---- -# Overview - -After you've run your samples through Bactopia, you are probably going to want to -investigate them some more, or conduct some comparative analyses. That's where -Bactopia Tools come into play! - -Bactopia Tools are a set of pre-defined workflows such as pan-genome contruction, -serotyping, and phylogenies. A main benefit of using Bactopia Tools, is they make -use of the predictable output structure of Bactopia to automate analyses. This saves -you valuable time by allowing you to make further use of the -[many output files](../full-guide.md) to extend your analyses. - -### Available Bactopia Tools -There are currently 64 Bactopia Tools that you can make use of. -Below are a list of available Bactopia Tools, grouped as __Subworkflows__ and __Modules__. - -#### Subworkflows (11) - -Subworkflows string together tools to create an complete pipeline. - -| Subworkflow | Description | -|-------------|-------------| -| [ariba](ariba.md) | Gene identification through local assemblies | - | [bakta](bakta.md) | Rapid annotation of bacterial genomes and plasmids | - | [eggnog](eggnog.md) | Functional annotation of proteins using orthologous groups and phylogenies | - | [gtdb](gtdb.md) | Identify marker genes and assign taxonomic classifications | - | [mashtree](mashtree.md) | Quickly create a tree using Mash distances | - | [merlin](merlin.md) | MinmER assisted species-specific bactopia tool seLectIoN | - | [pangenome](pangenome.md) | Pangenome analysis with optional core-genome phylogeny | - | [scrubber](scrubber.md) | Scrub human reads from FASTQ files | - | [snippy](snippy.md) | Rapid variant calling from Illumina sequence reads with optional core-SNP phylogeny | - | [staphtyper](staphtyper.md) | Determine the agr, spa and SCCmec types for _Staphylococcus aureus_ genomes | - | [tbprofiler](tbprofiler.md) | Detect resistance and lineages of _Mycobacterium tuberculosis_ genomes | - - -#### Modules (53) - -Modules are workflows consisting of only one tool. - -| Module | Description | -|-------------|-------------| -| [abricate](abricate.md) | Mass screening of contigs for antimicrobial and virulence genes | - | [abritamr](abritamr.md) | A NATA accredited tool for reporting the presence of antimicrobial resistance genes | - | [agrvate](agrvate.md) | Rapid identification of Staphylococcus aureus agr locus type and agr operon variants. | - | [amrfinderplus](amrfinderplus.md) | Identify antimicrobial resistance in genes or proteins | - | [blastn](blastn.md) | Search against nucleotide BLAST databases using nucleotide queries | - | [blastp](blastp.md) | Search against protein BLAST databases using protein queries | - | [blastx](blastx.md) | Search against protein BLAST databases using translated nucleotide queries | - | [bracken](bracken.md) | Estimate taxonomic abundance of samples from Kraken2 results | - | [btyper3](btyper3.md) | Taxonomic classification of Bacillus cereus group isolates | - | [busco](busco.md) | Assembly completeness based on evolutionarily informed expectations | - | [checkm](checkm.md) | Assess the assembly quality of your samples | - | [clermontyping](clermontyping.md) | in silico phylotyping of Escherichia genus | - | [defensefinder](defensefinder.md) | Systematic search of all known anti-phage systems | - | [ectyper](ectyper.md) | In-silico prediction of _Escherichia coli_ serotype | - | [emmtyper](emmtyper.md) | emm-typing of _Streptococcus pyogenes_ assemblies | - | [fastani](fastani.md) | fast alignment-free computation of whole-genome Average Nucleotide Identity (ANI) | - | [gamma](gamma.md) | Identification, classification, and annotation of translated gene matches | - | [genotyphi](genotyphi.md) | Salmonella Typhi genotyping with Mykrobe outputs | - | [hicap](hicap.md) | Identify cap locus serotype and structure in your _Haemophilus influenzae_ assemblies | - | [hpsuissero](hpsuissero.md) | Serotype prediction of _Haemophilus parasuis_ assemblies | - | [ismapper](ismapper.md) | Identify insertion sites positions in bacterial genomes | - | [kleborate](kleborate.md) | Screening Klebsiella genome assemblies for MLST, sub-species, and other related genes of interest | - | [kraken2](kraken2.md) | Taxonomic classifications of sequence reads | - | [legsta](legsta.md) | Typing of Legionella pneumophila assemblies | - | [lissero](lissero.md) | Serogroup typing prediction for _Listeria monocytogenes_ | - | [mashdist](mashdist.md) | Calculate Mash distances between sequences | - | [mcroni](mcroni.md) | Sequence variation in mcr-1 genes (mobilized colistin resistance) | - | [meningotype](meningotype.md) | Serotyping of Neisseria meningitidis | - | [midas](midas.md) | Estimate species abundances from FASTQ files | - | [mlst](mlst.md) | Automatic MLST calling from assembled contigs | - | [mobsuite](mobsuite.md) | Reconstruct and annotate plasmids in bacterial assemblies | - | [mykrobe](mykrobe.md) | Antimicrobial resistance detection for specific species | - | [ngmaster](ngmaster.md) | Multi-antigen sequence typing for _Neisseria gonorrhoeae_ | - | [pasty](pasty.md) | in silico serogrouping of Pseudomonas aeruginosa isolates | - | [pbptyper](pbptyper.md) | Penicillin Binding Protein (PBP) typer for Streptococcus pneumoniae | - | [phispy](phispy.md) | Predict prophages in bacterial genomes | - | [plasmidfinder](plasmidfinder.md) | Plasmid identification from assemblies | - | [pneumocat](pneumocat.md) | Assign capsular type to Streptococcus pneumoniae from sequence reads | - | [quast](quast.md) | A module for assessing the quality of assembled contigs | - | [rgi](rgi.md) | Predict antibiotic resistance from assemblies | - | [sccmec](sccmec.md) | A tool for typing SCCmec cassettes in assemblies | - | [seqsero2](seqsero2.md) | Salmonella serotype prediction from reads or assemblies | - | [seroba](seroba.md) | Serotyping of Streptococcus pneumoniae from sequence reads | - | [shigapass](shigapass.md) | Predict Shigella serotypes and differentiate Shigella, EIEC and non-Shigella/EIEC | - | [shigatyper](shigatyper.md) | Shigella serotype from Illumina or Oxford Nanopore reads | - | [shigeifinder](shigeifinder.md) | Shigella and EIEC serotyping from assemblies | - | [sistr](sistr.md) | Serovar prediction of Salmonella assemblies | - | [spatyper](spatyper.md) | Computational method for finding spa types in _Staphylococcus aureus_ | - | [ssuissero](ssuissero.md) | Serotype prediction of _Streptococcus suis_ assemblies | - | [staphopiasccmec](staphopiasccmec.md) | Primer based SCCmec typing of _Staphylococcus aureus_ genomes | - | [stecfinder](stecfinder.md) | Serotype of Shigatoxin producing E. coli using Illumina reads or assemblies | - | [tblastn](tblastn.md) | Search against translated nucleotide BLAST databases using protein queries | - | [tblastx](tblastx.md) | Search against translated nucleotide databases using a translated nucleotide query | - - -### Common Inputs - -With the exceptions of the `summary` tool, each Bactopia Tool will use the following -input parameters: -``` - --bactopia STR Directory containing Bactopia analysis results for all samples. - - --exclude STR A text file containing sample names to exclude from the - analysis. The expected format is a single sample per line. - - --include STR A text file containing sample names to include in the - analysis. The expected format is a single sample per line. -``` - -#### `--bactopia` - -This parameter tells each tool where to find your Bactopia outputs from your project. -Using this path, the tool will identify the required inputs and begin analysis. What -this means is there is no need for you to wrangle up input files for comparative analyses. - -#### `--exclude` - -What `--exclude` allows is for you to give a text file with a list of samples to -exclude from further analyses. While you can produce this list yourself, the -`summary` tool will produce a list of samples that do not pass certain thresholds. -These thresholds are based on read lengths, sequence quality, coverage and assembly -quality. You can adjust these thresholds to meet your needs. - -#### `--include` - -Similarly, `--include` allows you to give a text file with a list of samples to be -included in the analysis. This allows you to target your analyses on a specific subset -of samples. An example of this may be to use the `fastani` tool to determine samples -with >95% ANI to a reference, then create a pan-genome with the `pangenome` tool using -only the subset of samples. - -### nf-core/modules Availabilty - -Good news! All Bactopia Tools are also available through [nf-core/modules](https://github.com/nf-core/modules), -a repository of ready to use Nextflow DSL2 modules. This means you can leverage nf-core tools -to rapidly string together your own workflows. - -Many of the above Bactopia Tools were submitted to [nf-core/modules](https://github.com/nf-core/modules) -as part of [Bactopia V2](https://github.com/bactopia/bactopia/issues/233). The [nf-core Team](https://nf-co.re/about) -is a fun group to work with so expect many more Bactopia Tools to find their way to -[nf-core/modules](https://github.com/nf-core/modules)! - -_Thank you modules team!_ - -### Suggest A Tool - -Do you have an idea or suggestion for an analysis that should be added to the set -of Bactopia Tools? If so, please feel free to submit it to -[Bactopia GitHub Issues](https://github.com/bactopia/bactopia/issues)! \ No newline at end of file diff --git a/docs/bactopia-tools/iqtree.md b/docs/bactopia-tools/iqtree.md deleted file mode 100644 index 5136519a..00000000 --- a/docs/bactopia-tools/iqtree.md +++ /dev/null @@ -1,131 +0,0 @@ ---- -tags: - - alignment - - phylogeny ---- - - - -# Bactopia Tool - `iqtree` -The `iqtree` module uses [IQ-TREE](https://github.com/Cibiv/IQ-TREE) to create phylogenetic trees -based on a multiple sequence alignment. In most cases the `iqtree` module will be used in a -subworkflow where an alignment is created. - - -## Example Usage -``` -bactopia --wf iqtree \ - --bactopia /path/to/your/bactopia/results \ - --include includes.txt -``` - -## Parameters - - -### Required Parameters -Define where the pipeline should find input data and save output data. - -| Parameter | Description | Default | -|---|---|---| -| `--bactopia` | The path to bactopia results to use as inputs | | - -### Filtering Parameters -Use these parameters to specify which samples to include or exclude. - -| Parameter | Description | Default | -|---|---|---| -| `--include` | A text file containing sample names (one per line) to include from the analysis | | -| `--exclude` | A text file containing sample names (one per line) to exclude from the analysis | | - - -### IQ-TREE Parameters - - -| Parameter | Description | Default | -|---|---|---| -| `--m` | Substitution model name | MFP | -| `--bb` | Ultrafast bootstrap replicates | 1000 | -| `--alrt` | SH-like approximate likelihood ratio test replicates | 1000 | -| `--asr` | Ancestral state reconstruction by empirical Bayes | False | -| `--iqtree_opts` | Extra IQ-TREE options in quotes. | | -| `--skip_phylogeny` | Skip IQ-TREE execution in subworkflows | False | - - -### Optional Parameters -These optional parameters can be useful in certain settings. - -| Parameter | Description | Default | -|---|---|---| -| `--outdir` | Base directory to write results to | ./ | -| `--run_name` | Name of the directory to hold results | bactopia | -| `--skip_compression` | Ouput files will not be compressed | False | -| `--keep_all_files` | Keeps all analysis files created | False | - -### Max Job Request Parameters -Set the top limit for requested resources for any single job. - -| Parameter | Description | Default | -|---|---|---| -| `--max_retry` | Maximum times to retry a process before allowing it to fail. | 3 | -| `--max_cpus` | Maximum number of CPUs that can be requested for any single job. | 4 | -| `--max_memory` | Maximum amount of memory (in GB) that can be requested for any single job. | 32 | -| `--max_time` | Maximum amount of time (in minutes) that can be requested for any single job. | 120 | -| `--max_downloads` | Maximum number of samples to download at a time | 3 | - -### Nextflow Configuration Parameters -Parameters to fine-tune your Nextflow setup. - -| Parameter | Description | Default | -|---|---|---| -| `--nfconfig` | A Nextflow compatible config file for custom profiles, loaded last and will overwrite existing variables if set. | | -| `--publish_dir_mode` | Method used to save pipeline results to output directory. | copy | -| `--infodir` | Directory to keep pipeline Nextflow logs and reports. | ${params.outdir}/pipeline_info | -| `--force` | Nextflow will overwrite existing output files. | False | -| `--cleanup_workdir` | After Bactopia is successfully executed, the `work` directory will be deleted. | False | - -### Nextflow Profile Parameters -Parameters to fine-tune your Nextflow setup. - -| Parameter | Description | Default | -|---|---|---| -| `--condadir` | Directory to Nextflow should use for Conda environments | | -| `--registry` | Docker registry to pull containers from. | dockerhub | -| `--singularity_cache` | Directory where remote Singularity images are stored. | | -| `--singularity_pull_docker_container` | Instead of directly downloading Singularity images for use with Singularity, force the workflow to pull and convert Docker containers instead. | | -| `--force_rebuild` | Force overwrite of existing pre-built environments. | False | -| `--queue` | Comma-separated name of the queue(s) to be used by a job scheduler (e.g. AWS Batch or SLURM) | general,high-memory | -| `--cluster_opts` | Additional options to pass to the executor. (e.g. SLURM: '--account=my_acct_name' | | -| `--disable_scratch` | All intermediate files created on worker nodes of will be transferred to the head node. | False | - -### Helpful Parameters -Uncommonly used parameters that might be useful. - -| Parameter | Description | Default | -|---|---|---| -| `--monochrome_logs` | Do not use coloured log outputs. | | -| `--nfdir` | Print directory Nextflow has pulled Bactopia to | | -| `--sleep_time` | The amount of time (seconds) Nextflow will wait after setting up datasets before execution. | 5 | -| `--validate_params` | Boolean whether to validate parameters against the schema at runtime | True | -| `--help` | Display help text. | | -| `--wf` | Specify which workflow or Bactopia Tool to execute | bactopia | -| `--list_wfs` | List the available workflows and Bactopia Tools to use with '--wf' | | -| `--show_hidden_params` | Show all params when using `--help` | | -| `--help_all` | An alias for --help --show_hidden_params | | -| `--version` | Display version text. | | - -## Citations -If you use Bactopia and `iqtree` in your analysis, please cite the following. - -- [Bactopia](https://bactopia.github.io/) - Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) - - -- [IQ-TREE](https://github.com/Cibiv/IQ-TREE) - Nguyen L-T, Schmidt HA, von Haeseler A, Minh BQ [IQ-TREE: A fast and effective stochastic algorithm for estimating maximum likelihood phylogenies.](https://doi.org/10.1093/molbev/msu300) _Mol. Biol. Evol._ 32:268-274 (2015) - -- [ModelFinder](https://github.com/Cibiv/IQ-TREE) - Kalyaanamoorthy S, Minh BQ, Wong TKF, von Haeseler A, Jermiin LS [ModelFinder - Fast model selection for accurate phylogenetic estimates.](https://doi.org/10.1038/nmeth.4285) _Nat. Methods_ 14:587-589 (2017) - -- [UFBoot2](https://github.com/Cibiv/IQ-TREE) - Hoang DT, Chernomor O, von Haeseler A, Minh BQ, Vinh LS [UFBoot2: Improving the ultrafast bootstrap approximation.](https://doi.org/10.1093/molbev/msx281) _Mol. Biol. Evol._ 35:518–522 (2018) - diff --git a/docs/bactopia-tools/ismapper.md b/docs/bactopia-tools/ismapper.md deleted file mode 100644 index b43b2e32..00000000 --- a/docs/bactopia-tools/ismapper.md +++ /dev/null @@ -1,236 +0,0 @@ ---- -title: ismapper -description: A Bactopia Tool which uses ISMapper to search for insertion sites in your samples. ---- -# Bactopia Tool - `ismapper` -The `ismapper` module uses [ISMapper](https://github.com/jhawkey/IS_mapper) to search for -insertion sites in your samples. - - -## Example Usage -``` -bactopia --wf ismapper \ - --bactopia /path/to/your/bactopia/results -``` - -## Output Overview - -Below is the default output structure for the `ismapper` tool. Where possible the -file descriptions below were modified from a tools description. - -```bash - -├── -│ └── tools -│ └── ismapper -│ └── ismapper -│ ├── -│ │ ├── __{left_final,right_final}.fastq -│ │ ├── ___closest.bed -│ │ ├── ___intersect.bed -│ │ ├── ___table.txt -│ │ ├── _{left,right}__finalcov.bed -│ │ ├── _{left,right}__merged.sorted.bed -│ │ └── _{left,right}__unpaired.bed -│ └── logs -│ ├── SRR2838702.log -│ ├── nf-ismapper.{begin,err,log,out,run,sh,trace} -│ └── versions.yml -└── bactopia-runs - └── ismapper- - └── nf-reports - ├── ismapper-dag.dot - ├── ismapper-report.html - ├── ismapper-timeline.html - └── ismapper-trace.txt - -``` - - - -### Results - -#### ISMapper - -Below is a description of the _per-sample_ results from [ISMapper](https://github.com/jhawkey/IS_mapper). - - -| Extension | Description | -|-------------------------------|-------------| -| _final.fastq | Sequences (FASTQ format) that mapped to the flanking regions of the IS query. | -| _closest.bed | Merged regions that are close but do not overlap. | -| _intersect.bed | An intersection of merged regions from the left and right flanks. | -| _table.txt | A [detailed description](https://github.com/jhawkey/IS_mapper#single-isolate-output) of the IS query results. | -| _finalcov.bed | Contains information about the coverage of the IS query | -| _merged.sorted.bed | Merged overlapping regions that passed coverage cutoffs | -| _unpaired.bed | All unpaired mappings to the IS query | - - - - - -### Audit Trail - -Below are files that can assist you in understanding which parameters and program versions were used. - -#### Logs - -Each process that is executed will have a folder named `logs`. In this folder are helpful -files for you to review if the need ever arises. - -| Extension | Description | -|--------------|-------------| -| .begin | An empty file used to designate the process started | -| .err | Contains STDERR outputs from the process | -| .log | Contains both STDERR and STDOUT outputs from the process | -| .out | Contains STDOUT outputs from the process | -| .run | The script Nextflow uses to stage/unstage files and queue processes based on given profile | -| .sh | The script executed by bash for the process | -| .trace | The Nextflow [Trace](https://www.nextflow.io/docs/latest/tracing.html#trace-report) report for the process | -| versions.yml | A YAML formatted file with program versions | - -#### Nextflow Reports - -These Nextflow reports provide great a great summary of your run. These can be used to optimize -resource usage and estimate expected costs if using cloud platforms. - -| Filename | Description | -|----------|-------------| -| ismapper-dag.dot | The Nextflow [DAG visualisation](https://www.nextflow.io/docs/latest/tracing.html#dag-visualisation) | -| ismapper-report.html | The Nextflow [Execution Report](https://www.nextflow.io/docs/latest/tracing.html#execution-report) | -| ismapper-timeline.html | The Nextflow [Timeline Report](https://www.nextflow.io/docs/latest/tracing.html#timeline-report) | -| ismapper-trace.txt | The Nextflow [Trace](https://www.nextflow.io/docs/latest/tracing.html#trace-report) report | - - -#### Program Versions - -At the end of each run, each of the `versions.yml` files are merged into the files below. - -| Filename | Description | -|---------------------------|-------------| -| software_versions.yml | A complete list of programs and versions used by each process | -| software_versions_mqc.yml | A complete list of programs and versions formatted for [MultiQC](https://multiqc.info/) | - -## Parameters - - -### Required Parameters -Define where the pipeline should find input data and save output data. - -| Parameter | Description | -|:---|---| -| ` --bactopia` | The path to bactopia results to use as inputs
**Type:** `string` | - -### Filtering Parameters -Use these parameters to specify which samples to include or exclude. - -| Parameter | Description | -|:---|---| -| ` --include` | A text file containing sample names (one per line) to include from the analysis
**Type:** `string` | -| ` --exclude` | A text file containing sample names (one per line) to exclude from the analysis
**Type:** `string` | - - -### ISMapper Parameters - - -| Parameter | Description | -|:---|---| -| ` --reference` | Reference genome for typing against in GenBank format
**Type:** `string` | -| ` --insertions` | Multifasta file with insertion sequence(s) to be mapped to
**Type:** `string` | -| ` --min_clip` | Minimum size for softclipped region to be extracted from initial mapping
**Type:** `integer`, **Default:** `10` | -| ` --max_clip` | Maximum size for softclipped regions to be included
**Type:** `integer`, **Default:** `30` | -| ` --cutoff` | Minimum depth for mapped region to be kept in bed file
**Type:** `integer`, **Default:** `6` | -| ` --novel_gap_size` | Distance in base pairs between left and right flanks to be called a novel hit
**Type:** `integer`, **Default:** `15` | -| ` --min_range` | Minimum percent size of the gap to be called a known hit
**Type:** `number`, **Default:** `0.9` | -| ` --max_range` | Maximum percent size of the gap to be called a known hit
**Type:** `number`, **Default:** `1.1` | -| ` --merging` | Value for merging left and right hits in bed files together to simply calculation of closest and intersecting regions
**Type:** `integer`, **Default:** `100` | -| ` --ismap_all` | Switch on all alignment reporting for bwa
**Type:** `boolean` | -| ` --ismap_minqual` | Mapping quality score for bwa
**Type:** `integer`, **Default:** `30` | - - -### Optional Parameters -These optional parameters can be useful in certain settings. - -| Parameter | Description | -|:---|---| -| ` --outdir` | Base directory to write results to
**Type:** `string`, **Default:** `bactopia` | -| ` --skip_compression` | Ouput files will not be compressed
**Type:** `boolean` | -| ` --datasets` | The path to cache datasets to
**Type:** `string` | -| ` --keep_all_files` | Keeps all analysis files created
**Type:** `boolean` | - -### Max Job Request Parameters -Set the top limit for requested resources for any single job. - -| Parameter | Description | -|:---|---| -| ` --max_retry` | Maximum times to retry a process before allowing it to fail.
**Type:** `integer`, **Default:** `3` | -| ` --max_cpus` | Maximum number of CPUs that can be requested for any single job.
**Type:** `integer`, **Default:** `4` | -| ` --max_memory` | Maximum amount of memory that can be requested for any single job.
**Type:** `string`, **Default:** `128.GB` | -| ` --max_time` | Maximum amount of time that can be requested for any single job.
**Type:** `string`, **Default:** `240.h` | -| ` --max_downloads` | Maximum number of samples to download at a time
**Type:** `integer`, **Default:** `3` | - -### Nextflow Configuration Parameters -Parameters to fine-tune your Nextflow setup. - -| Parameter | Description | -|:---|---| -| ` --nfconfig` | A Nextflow compatible config file for custom profiles, loaded last and will overwrite existing variables if set.
**Type:** `string` | -| ` --publish_dir_mode` | Method used to save pipeline results to output directory.
**Type:** `string`, **Default:** `copy` | -| ` --infodir` | Directory to keep pipeline Nextflow logs and reports.
**Type:** `string`, **Default:** `${params.outdir}/pipeline_info` | -| ` --force` | Nextflow will overwrite existing output files.
**Type:** `boolean` | -| ` --cleanup_workdir` | After Bactopia is successfully executed, the `work` directory will be deleted.
**Type:** `boolean` | - -### Institutional config options -Parameters used to describe centralized config profiles. These should not be edited. - -| Parameter | Description | -|:---|---| -| ` --custom_config_version` | Git commit id for Institutional configs.
**Type:** `string`, **Default:** `master` | -| ` --custom_config_base` | Base directory for Institutional configs.
**Type:** `string`, **Default:** `https://raw.githubusercontent.com/nf-core/configs/master` | -| ` --config_profile_name` | Institutional config name.
**Type:** `string` | -| ` --config_profile_description` | Institutional config description.
**Type:** `string` | -| ` --config_profile_contact` | Institutional config contact information.
**Type:** `string` | -| ` --config_profile_url` | Institutional config URL link.
**Type:** `string` | - -### Nextflow Profile Parameters -Parameters to fine-tune your Nextflow setup. - -| Parameter | Description | -|:---|---| -| ` --condadir` | Directory to Nextflow should use for Conda environments
**Type:** `string` | -| ` --registry` | Docker registry to pull containers from.
**Type:** `string`, **Default:** `dockerhub` | -| ` --datasets_cache` | Directory where downloaded datasets should be stored.
**Type:** `string`, **Default:** `/data/datasets` | -| ` --singularity_cache_dir` | Directory where remote Singularity images are stored.
**Type:** `string` | -| ` --singularity_pull_docker_container` | Instead of directly downloading Singularity images for use with Singularity, force the workflow to pull and convert Docker containers instead.
**Type:** `boolean` | -| ` --force_rebuild` | Force overwrite of existing pre-built environments.
**Type:** `boolean` | -| ` --queue` | Comma-separated name of the queue(s) to be used by a job scheduler (e.g. AWS Batch or SLURM)
**Type:** `string`, **Default:** `general,high-memory` | -| ` --cluster_opts` | Additional options to pass to the executor. (e.g. SLURM: '--account=my_acct_name'
**Type:** `string` | -| ` --container_opts` | Additional options to pass to Apptainer, Docker, or Singularityu. (e.g. Singularity: '-D `pwd`'
**Type:** `string` | -| ` --disable_scratch` | All intermediate files created on worker nodes of will be transferred to the head node.
**Type:** `boolean` | - -### Helpful Parameters -Uncommonly used parameters that might be useful. - -| Parameter | Description | -|:---|---| -| ` --monochrome_logs` | Do not use coloured log outputs.
**Type:** `boolean` | -| ` --nfdir` | Print directory Nextflow has pulled Bactopia to
**Type:** `boolean` | -| ` --sleep_time` | The amount of time (seconds) Nextflow will wait after setting up datasets before execution.
**Type:** `integer`, **Default:** `5` | -| ` --validate_params` | Boolean whether to validate parameters against the schema at runtime
**Type:** `boolean`, **Default:** `True` | -| ` --help` | Display help text.
**Type:** `boolean` | -| ` --wf` | Specify which workflow or Bactopia Tool to execute
**Type:** `string`, **Default:** `bactopia` | -| ` --list_wfs` | List the available workflows and Bactopia Tools to use with '--wf'
**Type:** `boolean` | -| ` --show_hidden_params` | Show all params when using `--help`
**Type:** `boolean` | -| ` --help_all` | An alias for --help --show_hidden_params
**Type:** `boolean` | -| ` --version` | Display version text.
**Type:** `boolean` | - -## Citations -If you use Bactopia and `ismapper` in your analysis, please cite the following. - -- [Bactopia](https://bactopia.github.io/) - Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) - - -- [ISMapper](https://github.com/jhawkey/IS_mapper) - Hawkey J, Hamidian M, Wick RR, Edwards DJ, Billman-Jacobe H, Hall RM, Holt KE [ISMapper: identifying transposase insertion sites in bacterial genomes from short read sequence data](http://dx.doi.org/10.1186/s12864-015-1860-2). _BMC Genomics_ 16, 667 (2015) - diff --git a/docs/bactopia-tools/kleborate.md b/docs/bactopia-tools/kleborate.md deleted file mode 100644 index 31ef8a5a..00000000 --- a/docs/bactopia-tools/kleborate.md +++ /dev/null @@ -1,248 +0,0 @@ ---- -title: kleborate -description: A Bactopia Tool which uses Kleborate to screen genome assemblies of _Klebsiella pneumoniae_ and the _Klebsiella pneumoniae_ species complex (KpSC). ---- -# Bactopia Tool - `kleborate` -The `kleborate` module uses [Kleborate](https://github.com/katholt/Kleborate) to screen genome assemblies of -_Klebsiella pneumoniae_ and the _Klebsiella pneumoniae_ species complex (KpSC). Kleborate predicts: - -- MLST, species, -- ICEKp associated virulence loci -- virulence plasmid associated loci -- antimicrobial resistance determinants, -- K (capsule) and O antigen (LPS) serotype prediction. - - -## Example Usage -``` -bactopia --wf kleborate \ - --bactopia /path/to/your/bactopia/results -``` - -## Output Overview - -Below is the default output structure for the `kleborate` tool. Where possible the -file descriptions below were modified from a tools description. - -```bash - -├── -│ └── tools -│ └── kleborate -│ ├── .txt -│ └── logs -│ ├── nf-kleborate.{begin,err,log,out,run,sh,trace} -│ └── versions.yml -└── bactopia-runs - └── kleborate- - ├── merged-results - │ ├── kleborate.tsv - │ └── logs - │ └── kleborate-concat - │ ├── nf-merged-results.{begin,err,log,out,run,sh,trace} - │ └── versions.yml - └── nf-reports - ├── kleborate-dag.dot - ├── kleborate-report.html - ├── kleborate-timeline.html - └── kleborate-trace.txt - -``` - -:::info[Directory structure might be different] - -`kleborate` is available as a standalone Bactopia Tool, as well as from -the main Bactopia workflow (e.g. through Staphopia or Merlin). If executed -from Bactopia, the `kleborate` directory structure might be different, but the -output descriptions below still apply. -::: - - - -### Results - -#### Merged Results - -Below are results that are concatenated into a single file. - - -| Filename | Description | -|-------------------------------|-------------| -| kleborate.tsv | A tab-delimited file with `Kleborate` results from all samples | - - -#### Kleborate - -Below is a description of the _per-sample_ results from [Kleborate](https://github.com/katholt/Kleborate). - - -| Filename | Description | -|-------------------------------|-------------| -| <SAMPLE_NAME>.txt | A tab-delimited file with `Kleborate` result, see [Kleborate - Example output](https://github.com/katholt/Kleborate/wiki/Tests-and-example-outputs#example-output) for more details. | - - - - - -### Audit Trail - -Below are files that can assist you in understanding which parameters and program versions were used. - -#### Logs - -Each process that is executed will have a folder named `logs`. In this folder are helpful -files for you to review if the need ever arises. - -| Extension | Description | -|--------------|-------------| -| .begin | An empty file used to designate the process started | -| .err | Contains STDERR outputs from the process | -| .log | Contains both STDERR and STDOUT outputs from the process | -| .out | Contains STDOUT outputs from the process | -| .run | The script Nextflow uses to stage/unstage files and queue processes based on given profile | -| .sh | The script executed by bash for the process | -| .trace | The Nextflow [Trace](https://www.nextflow.io/docs/latest/tracing.html#trace-report) report for the process | -| versions.yml | A YAML formatted file with program versions | - -#### Nextflow Reports - -These Nextflow reports provide great a great summary of your run. These can be used to optimize -resource usage and estimate expected costs if using cloud platforms. - -| Filename | Description | -|----------|-------------| -| kleborate-dag.dot | The Nextflow [DAG visualisation](https://www.nextflow.io/docs/latest/tracing.html#dag-visualisation) | -| kleborate-report.html | The Nextflow [Execution Report](https://www.nextflow.io/docs/latest/tracing.html#execution-report) | -| kleborate-timeline.html | The Nextflow [Timeline Report](https://www.nextflow.io/docs/latest/tracing.html#timeline-report) | -| kleborate-trace.txt | The Nextflow [Trace](https://www.nextflow.io/docs/latest/tracing.html#trace-report) report | - - -#### Program Versions - -At the end of each run, each of the `versions.yml` files are merged into the files below. - -| Filename | Description | -|---------------------------|-------------| -| software_versions.yml | A complete list of programs and versions used by each process | -| software_versions_mqc.yml | A complete list of programs and versions formatted for [MultiQC](https://multiqc.info/) | - -## Parameters - - -### Required Parameters -Define where the pipeline should find input data and save output data. - -| Parameter | Description | -|:---|---| -| ` --bactopia` | The path to bactopia results to use as inputs
**Type:** `string` | - -### Filtering Parameters -Use these parameters to specify which samples to include or exclude. - -| Parameter | Description | -|:---|---| -| ` --include` | A text file containing sample names (one per line) to include from the analysis
**Type:** `string` | -| ` --exclude` | A text file containing sample names (one per line) to exclude from the analysis
**Type:** `string` | - - -### Kleborate Parameters - - -| Parameter | Description | -|:---|---| -| ` --kleborate_preset` | Preset module to use for Kleborate
**Type:** `string`, **Default:** `kpsc` | -| ` --kleborate_opts` | Extra options in quotes for Kleborate
**Type:** `string` | - - -### Optional Parameters -These optional parameters can be useful in certain settings. - -| Parameter | Description | -|:---|---| -| ` --outdir` | Base directory to write results to
**Type:** `string`, **Default:** `bactopia` | -| ` --skip_compression` | Ouput files will not be compressed
**Type:** `boolean` | -| ` --datasets` | The path to cache datasets to
**Type:** `string` | -| ` --keep_all_files` | Keeps all analysis files created
**Type:** `boolean` | - -### Max Job Request Parameters -Set the top limit for requested resources for any single job. - -| Parameter | Description | -|:---|---| -| ` --max_retry` | Maximum times to retry a process before allowing it to fail.
**Type:** `integer`, **Default:** `3` | -| ` --max_cpus` | Maximum number of CPUs that can be requested for any single job.
**Type:** `integer`, **Default:** `4` | -| ` --max_memory` | Maximum amount of memory that can be requested for any single job.
**Type:** `string`, **Default:** `128.GB` | -| ` --max_time` | Maximum amount of time that can be requested for any single job.
**Type:** `string`, **Default:** `240.h` | -| ` --max_downloads` | Maximum number of samples to download at a time
**Type:** `integer`, **Default:** `3` | - -### Nextflow Configuration Parameters -Parameters to fine-tune your Nextflow setup. - -| Parameter | Description | -|:---|---| -| ` --nfconfig` | A Nextflow compatible config file for custom profiles, loaded last and will overwrite existing variables if set.
**Type:** `string` | -| ` --publish_dir_mode` | Method used to save pipeline results to output directory.
**Type:** `string`, **Default:** `copy` | -| ` --infodir` | Directory to keep pipeline Nextflow logs and reports.
**Type:** `string`, **Default:** `${params.outdir}/pipeline_info` | -| ` --force` | Nextflow will overwrite existing output files.
**Type:** `boolean` | -| ` --cleanup_workdir` | After Bactopia is successfully executed, the `work` directory will be deleted.
**Type:** `boolean` | - -### Institutional config options -Parameters used to describe centralized config profiles. These should not be edited. - -| Parameter | Description | -|:---|---| -| ` --custom_config_version` | Git commit id for Institutional configs.
**Type:** `string`, **Default:** `master` | -| ` --custom_config_base` | Base directory for Institutional configs.
**Type:** `string`, **Default:** `https://raw.githubusercontent.com/nf-core/configs/master` | -| ` --config_profile_name` | Institutional config name.
**Type:** `string` | -| ` --config_profile_description` | Institutional config description.
**Type:** `string` | -| ` --config_profile_contact` | Institutional config contact information.
**Type:** `string` | -| ` --config_profile_url` | Institutional config URL link.
**Type:** `string` | - -### Nextflow Profile Parameters -Parameters to fine-tune your Nextflow setup. - -| Parameter | Description | -|:---|---| -| ` --condadir` | Directory to Nextflow should use for Conda environments
**Type:** `string` | -| ` --registry` | Docker registry to pull containers from.
**Type:** `string`, **Default:** `dockerhub` | -| ` --datasets_cache` | Directory where downloaded datasets should be stored.
**Type:** `string`, **Default:** `/data/datasets` | -| ` --singularity_cache_dir` | Directory where remote Singularity images are stored.
**Type:** `string` | -| ` --singularity_pull_docker_container` | Instead of directly downloading Singularity images for use with Singularity, force the workflow to pull and convert Docker containers instead.
**Type:** `boolean` | -| ` --force_rebuild` | Force overwrite of existing pre-built environments.
**Type:** `boolean` | -| ` --queue` | Comma-separated name of the queue(s) to be used by a job scheduler (e.g. AWS Batch or SLURM)
**Type:** `string`, **Default:** `general,high-memory` | -| ` --cluster_opts` | Additional options to pass to the executor. (e.g. SLURM: '--account=my_acct_name'
**Type:** `string` | -| ` --container_opts` | Additional options to pass to Apptainer, Docker, or Singularityu. (e.g. Singularity: '-D `pwd`'
**Type:** `string` | -| ` --disable_scratch` | All intermediate files created on worker nodes of will be transferred to the head node.
**Type:** `boolean` | - -### Helpful Parameters -Uncommonly used parameters that might be useful. - -| Parameter | Description | -|:---|---| -| ` --monochrome_logs` | Do not use coloured log outputs.
**Type:** `boolean` | -| ` --nfdir` | Print directory Nextflow has pulled Bactopia to
**Type:** `boolean` | -| ` --sleep_time` | The amount of time (seconds) Nextflow will wait after setting up datasets before execution.
**Type:** `integer`, **Default:** `5` | -| ` --validate_params` | Boolean whether to validate parameters against the schema at runtime
**Type:** `boolean`, **Default:** `True` | -| ` --help` | Display help text.
**Type:** `boolean` | -| ` --wf` | Specify which workflow or Bactopia Tool to execute
**Type:** `string`, **Default:** `bactopia` | -| ` --list_wfs` | List the available workflows and Bactopia Tools to use with '--wf'
**Type:** `boolean` | -| ` --show_hidden_params` | Show all params when using `--help`
**Type:** `boolean` | -| ` --help_all` | An alias for --help --show_hidden_params
**Type:** `boolean` | -| ` --version` | Display version text.
**Type:** `boolean` | - -## Citations -If you use Bactopia and `kleborate` in your analysis, please cite the following. - -- [Bactopia](https://bactopia.github.io/) - Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) - - -- [csvtk](https://bioinf.shenwei.me/csvtk/) - Shen, W [csvtk: A cross-platform, efficient and practical CSV/TSV toolkit in Golang.](https://github.com/shenwei356/csvtk/) (GitHub) - -- [Kaptive](https://github.com/katholt/Kaptive) - Wyres KL, Wick RR, Gorrie C, Jenney A, Follador R, Thomson NR, Holt KE [Identification of Klebsiella capsule synthesis loci from whole genome data.](https://doi.org/10.1099/mgen.0.000102) _Microbial genomics_ 2(12) (2016) - -- [Kleborate](https://github.com/katholt/Kleborate) - Lam MMC, Wick RR, Watts, SC, Cerdeira LT, Wyres KL, Holt KE [A genomic surveillance framework and genotyping tool for Klebsiella pneumoniae and its related species complex.](https://doi.org/10.1038/s41467-021-24448-3) _Nat Commun_ 12, 4188 (2021) - diff --git a/docs/bactopia-tools/kraken2.md b/docs/bactopia-tools/kraken2.md deleted file mode 100644 index e2bdeb3d..00000000 --- a/docs/bactopia-tools/kraken2.md +++ /dev/null @@ -1,225 +0,0 @@ ---- -title: kraken2 -description: A Bactopia Tool which uses Kraken2 to assign taxonomic classifications to your sequence reads. ---- -# Bactopia Tool - `kraken2` -The `kraken2` module uses [Kraken2](https://github.com/DerrickWood/kraken2) to assign taxonomic -classifications to your sequence reads. - - -## Example Usage -``` -bactopia --wf kraken2 \ - --bactopia /path/to/your/bactopia/results -``` - -## Output Overview - -Below is the default output structure for the `kraken2` tool. Where possible the -file descriptions below were modified from a tools description. - -```bash - -├── -│ └── tools -│ └── kraken2 -│ ├── .classified_{1|2}.fastq.gz -│ ├── .kraken2.report.txt -│ ├── .unclassified_{1,2}.fastq.gz -│ └── logs -│ ├── nf-kraken2.{begin,err,log,out,run,sh,trace} -│ └── versions.yml -└── bactopia-runs - └── kraken2- - └── nf-reports - ├── kraken2-dag.dot - ├── kraken2-report.html - ├── kraken2-timeline.html - └── kraken2-trace.txt - -``` - - - -### Results - -#### Kraken2 - -Below is a description of the _per-sample_ results from [Kraken2](https://github.com/DerrickWood/kraken2). - - -| Filename | Description | -|-------------------------------|-------------| -| <SAMPLE_NAME>.classified_{1|2}.fastq.gz | Reads classified to belong to any of the taxa on the Kraken2 database. | -| <SAMPLE_NAME>.kraken2.report.txt | Kraken2 report containing stats about classified and not classified reads See [Kraken2 - Output Formats](https://github.com/DerrickWood/kraken2/wiki/Manual#output-formats) for more details | -| <SAMPLE_NAME>.unclassified_{1,2}.fastq.gz | Reads not classified to belong to any of the taxa on the Kraken2 database. | - - - - - -### Audit Trail - -Below are files that can assist you in understanding which parameters and program versions were used. - -#### Logs - -Each process that is executed will have a folder named `logs`. In this folder are helpful -files for you to review if the need ever arises. - -| Extension | Description | -|--------------|-------------| -| .begin | An empty file used to designate the process started | -| .err | Contains STDERR outputs from the process | -| .log | Contains both STDERR and STDOUT outputs from the process | -| .out | Contains STDOUT outputs from the process | -| .run | The script Nextflow uses to stage/unstage files and queue processes based on given profile | -| .sh | The script executed by bash for the process | -| .trace | The Nextflow [Trace](https://www.nextflow.io/docs/latest/tracing.html#trace-report) report for the process | -| versions.yml | A YAML formatted file with program versions | - -#### Nextflow Reports - -These Nextflow reports provide great a great summary of your run. These can be used to optimize -resource usage and estimate expected costs if using cloud platforms. - -| Filename | Description | -|----------|-------------| -| kraken2-dag.dot | The Nextflow [DAG visualisation](https://www.nextflow.io/docs/latest/tracing.html#dag-visualisation) | -| kraken2-report.html | The Nextflow [Execution Report](https://www.nextflow.io/docs/latest/tracing.html#execution-report) | -| kraken2-timeline.html | The Nextflow [Timeline Report](https://www.nextflow.io/docs/latest/tracing.html#timeline-report) | -| kraken2-trace.txt | The Nextflow [Trace](https://www.nextflow.io/docs/latest/tracing.html#trace-report) report | - - -#### Program Versions - -At the end of each run, each of the `versions.yml` files are merged into the files below. - -| Filename | Description | -|---------------------------|-------------| -| software_versions.yml | A complete list of programs and versions used by each process | -| software_versions_mqc.yml | A complete list of programs and versions formatted for [MultiQC](https://multiqc.info/) | - -## Parameters - - -### Required Parameters -Define where the pipeline should find input data and save output data. - -| Parameter | Description | -|:---|---| -| ` --bactopia` | The path to bactopia results to use as inputs
**Type:** `string` | - -### Filtering Parameters -Use these parameters to specify which samples to include or exclude. - -| Parameter | Description | -|:---|---| -| ` --include` | A text file containing sample names (one per line) to include from the analysis
**Type:** `string` | -| ` --exclude` | A text file containing sample names (one per line) to exclude from the analysis
**Type:** `string` | - - -### Kraken2 Parameters - - -| Parameter | Description | -|:---|---| -| ` --kraken2_db` | The a single tarball or path to a Kraken2 formatted database
**Type:** `string` | -| ` --quick_mode` | Quick operation (use first hit or hits)
**Type:** `boolean` | -| ` --confidence` | Confidence score threshold between 0 and 1
**Type:** `number` | -| ` --minimum_base_quality` | Minimum base quality used in classification
**Type:** `integer` | -| ` --use_mpa_style` | Format report output like Kraken 1's kraken-mpa-report
**Type:** `boolean` | -| ` --report_zero_counts` | Report counts for ALL taxa, even if counts are zero
**Type:** `boolean` | -| ` --report_minimizer_data` | Include minimizer and distinct minimizer count information in report
**Type:** `boolean` | -| ` --use_names` | Print scientific names instead of just taxids
**Type:** `boolean` | -| ` --memory_mapping` | Avoid loading database into RAM
**Type:** `boolean` | -| ` --minimum_hit_groups` | Minimum number of hit groups needed to make a call
**Type:** `integer`, **Default:** `2` | -| ` --remove_filtered_reads` | Discard the classified and unclassified FASTQs produced by Kraken2
**Type:** `boolean` | - - -### Optional Parameters -These optional parameters can be useful in certain settings. - -| Parameter | Description | -|:---|---| -| ` --outdir` | Base directory to write results to
**Type:** `string`, **Default:** `bactopia` | -| ` --skip_compression` | Ouput files will not be compressed
**Type:** `boolean` | -| ` --datasets` | The path to cache datasets to
**Type:** `string` | -| ` --keep_all_files` | Keeps all analysis files created
**Type:** `boolean` | - -### Max Job Request Parameters -Set the top limit for requested resources for any single job. - -| Parameter | Description | -|:---|---| -| ` --max_retry` | Maximum times to retry a process before allowing it to fail.
**Type:** `integer`, **Default:** `3` | -| ` --max_cpus` | Maximum number of CPUs that can be requested for any single job.
**Type:** `integer`, **Default:** `4` | -| ` --max_memory` | Maximum amount of memory that can be requested for any single job.
**Type:** `string`, **Default:** `128.GB` | -| ` --max_time` | Maximum amount of time that can be requested for any single job.
**Type:** `string`, **Default:** `240.h` | -| ` --max_downloads` | Maximum number of samples to download at a time
**Type:** `integer`, **Default:** `3` | - -### Nextflow Configuration Parameters -Parameters to fine-tune your Nextflow setup. - -| Parameter | Description | -|:---|---| -| ` --nfconfig` | A Nextflow compatible config file for custom profiles, loaded last and will overwrite existing variables if set.
**Type:** `string` | -| ` --publish_dir_mode` | Method used to save pipeline results to output directory.
**Type:** `string`, **Default:** `copy` | -| ` --infodir` | Directory to keep pipeline Nextflow logs and reports.
**Type:** `string`, **Default:** `${params.outdir}/pipeline_info` | -| ` --force` | Nextflow will overwrite existing output files.
**Type:** `boolean` | -| ` --cleanup_workdir` | After Bactopia is successfully executed, the `work` directory will be deleted.
**Type:** `boolean` | - -### Institutional config options -Parameters used to describe centralized config profiles. These should not be edited. - -| Parameter | Description | -|:---|---| -| ` --custom_config_version` | Git commit id for Institutional configs.
**Type:** `string`, **Default:** `master` | -| ` --custom_config_base` | Base directory for Institutional configs.
**Type:** `string`, **Default:** `https://raw.githubusercontent.com/nf-core/configs/master` | -| ` --config_profile_name` | Institutional config name.
**Type:** `string` | -| ` --config_profile_description` | Institutional config description.
**Type:** `string` | -| ` --config_profile_contact` | Institutional config contact information.
**Type:** `string` | -| ` --config_profile_url` | Institutional config URL link.
**Type:** `string` | - -### Nextflow Profile Parameters -Parameters to fine-tune your Nextflow setup. - -| Parameter | Description | -|:---|---| -| ` --condadir` | Directory to Nextflow should use for Conda environments
**Type:** `string` | -| ` --registry` | Docker registry to pull containers from.
**Type:** `string`, **Default:** `dockerhub` | -| ` --datasets_cache` | Directory where downloaded datasets should be stored.
**Type:** `string`, **Default:** `/data/datasets` | -| ` --singularity_cache_dir` | Directory where remote Singularity images are stored.
**Type:** `string` | -| ` --singularity_pull_docker_container` | Instead of directly downloading Singularity images for use with Singularity, force the workflow to pull and convert Docker containers instead.
**Type:** `boolean` | -| ` --force_rebuild` | Force overwrite of existing pre-built environments.
**Type:** `boolean` | -| ` --queue` | Comma-separated name of the queue(s) to be used by a job scheduler (e.g. AWS Batch or SLURM)
**Type:** `string`, **Default:** `general,high-memory` | -| ` --cluster_opts` | Additional options to pass to the executor. (e.g. SLURM: '--account=my_acct_name'
**Type:** `string` | -| ` --container_opts` | Additional options to pass to Apptainer, Docker, or Singularityu. (e.g. Singularity: '-D `pwd`'
**Type:** `string` | -| ` --disable_scratch` | All intermediate files created on worker nodes of will be transferred to the head node.
**Type:** `boolean` | - -### Helpful Parameters -Uncommonly used parameters that might be useful. - -| Parameter | Description | -|:---|---| -| ` --monochrome_logs` | Do not use coloured log outputs.
**Type:** `boolean` | -| ` --nfdir` | Print directory Nextflow has pulled Bactopia to
**Type:** `boolean` | -| ` --sleep_time` | The amount of time (seconds) Nextflow will wait after setting up datasets before execution.
**Type:** `integer`, **Default:** `5` | -| ` --validate_params` | Boolean whether to validate parameters against the schema at runtime
**Type:** `boolean`, **Default:** `True` | -| ` --help` | Display help text.
**Type:** `boolean` | -| ` --wf` | Specify which workflow or Bactopia Tool to execute
**Type:** `string`, **Default:** `bactopia` | -| ` --list_wfs` | List the available workflows and Bactopia Tools to use with '--wf'
**Type:** `boolean` | -| ` --show_hidden_params` | Show all params when using `--help`
**Type:** `boolean` | -| ` --help_all` | An alias for --help --show_hidden_params
**Type:** `boolean` | -| ` --version` | Display version text.
**Type:** `boolean` | - -## Citations -If you use Bactopia and `kraken2` in your analysis, please cite the following. - -- [Bactopia](https://bactopia.github.io/) - Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) - - -- [Kraken2](https://github.com/DerrickWood/kraken2) - Wood DE, Lu J, Langmead B [Improved metagenomic analysis with Kraken 2.](https://doi.org/10.1186/s13059-019-1891-0) *Genome Biology*, 20(1), 257. (2019) - diff --git a/docs/bactopia-tools/legsta.md b/docs/bactopia-tools/legsta.md deleted file mode 100644 index 04868a75..00000000 --- a/docs/bactopia-tools/legsta.md +++ /dev/null @@ -1,238 +0,0 @@ ---- -title: legsta -description: A Bactopia Tool which uses legsta for _in silico_ _Legionella pneumophila_ Sequence Based Typing (SBT). ---- -# Bactopia Tool - `legsta` -The `legsta` module uses [legsta](https://github.com/tseemann/legsta) for -_in silico_ _Legionella pneumophila_ Sequence Based Typing (SBT). - - -## Example Usage -``` -bactopia --wf legsta \ - --bactopia /path/to/your/bactopia/results -``` - -## Output Overview - -Below is the default output structure for the `legsta` tool. Where possible the -file descriptions below were modified from a tools description. - -```bash - -├── -│ └── tools -│ └── legsta -│ ├── .tsv -│ └── logs -│ ├── nf-legsta.{begin,err,log,out,run,sh,trace} -│ └── versions.yml -└── bactopia-runs - └── legsta- - ├── merged-results - │ ├── legsta.tsv - │ └── logs - │ └── legsta-concat - │ ├── nf-merged-results.{begin,err,log,out,run,sh,trace} - │ └── versions.yml - └── nf-reports - ├── legsta-dag.dot - ├── legsta-report.html - ├── legsta-timeline.html - └── legsta-trace.txt - -``` - -:::info[Directory structure might be different] - -`legsta` is available as a standalone Bactopia Tool, as well as from -the main Bactopia workflow (e.g. through Staphopia or Merlin). If executed -from Bactopia, the `legsta` directory structure might be different, but the -output descriptions below still apply. -::: - - - -### Results - -#### Merged Results - -Below are results that are concatenated into a single file. - - -| Filename | Description | -|-------------------------------|-------------| -| legsta.tsv | A tab-delimited file with `legsta` results from all samples | - - -#### legsta - -Below is a description of the _per-sample_ results from [legsta](https://github.com/tseemann/legsta). - - -| Filename | Description | -|-------------------------------|-------------| -| <SAMPLE_NAME>.tsv | A tab-delimited file with `legsta` result, see [legsta - Output](https://github.com/tseemann/legsta#output) for more details | - - - - - -### Audit Trail - -Below are files that can assist you in understanding which parameters and program versions were used. - -#### Logs - -Each process that is executed will have a folder named `logs`. In this folder are helpful -files for you to review if the need ever arises. - -| Extension | Description | -|--------------|-------------| -| .begin | An empty file used to designate the process started | -| .err | Contains STDERR outputs from the process | -| .log | Contains both STDERR and STDOUT outputs from the process | -| .out | Contains STDOUT outputs from the process | -| .run | The script Nextflow uses to stage/unstage files and queue processes based on given profile | -| .sh | The script executed by bash for the process | -| .trace | The Nextflow [Trace](https://www.nextflow.io/docs/latest/tracing.html#trace-report) report for the process | -| versions.yml | A YAML formatted file with program versions | - -#### Nextflow Reports - -These Nextflow reports provide great a great summary of your run. These can be used to optimize -resource usage and estimate expected costs if using cloud platforms. - -| Filename | Description | -|----------|-------------| -| legsta-dag.dot | The Nextflow [DAG visualisation](https://www.nextflow.io/docs/latest/tracing.html#dag-visualisation) | -| legsta-report.html | The Nextflow [Execution Report](https://www.nextflow.io/docs/latest/tracing.html#execution-report) | -| legsta-timeline.html | The Nextflow [Timeline Report](https://www.nextflow.io/docs/latest/tracing.html#timeline-report) | -| legsta-trace.txt | The Nextflow [Trace](https://www.nextflow.io/docs/latest/tracing.html#trace-report) report | - - -#### Program Versions - -At the end of each run, each of the `versions.yml` files are merged into the files below. - -| Filename | Description | -|---------------------------|-------------| -| software_versions.yml | A complete list of programs and versions used by each process | -| software_versions_mqc.yml | A complete list of programs and versions formatted for [MultiQC](https://multiqc.info/) | - -## Parameters - - -### Required Parameters -Define where the pipeline should find input data and save output data. - -| Parameter | Description | -|:---|---| -| ` --bactopia` | The path to bactopia results to use as inputs
**Type:** `string` | - -### Filtering Parameters -Use these parameters to specify which samples to include or exclude. - -| Parameter | Description | -|:---|---| -| ` --include` | A text file containing sample names (one per line) to include from the analysis
**Type:** `string` | -| ` --exclude` | A text file containing sample names (one per line) to exclude from the analysis
**Type:** `string` | - - -### legsta Parameters - - -| Parameter | Description | -|:---|---| -| ` --noheader` | Don't print header row
**Type:** `boolean` | - - -### Optional Parameters -These optional parameters can be useful in certain settings. - -| Parameter | Description | -|:---|---| -| ` --outdir` | Base directory to write results to
**Type:** `string`, **Default:** `bactopia` | -| ` --skip_compression` | Ouput files will not be compressed
**Type:** `boolean` | -| ` --datasets` | The path to cache datasets to
**Type:** `string` | -| ` --keep_all_files` | Keeps all analysis files created
**Type:** `boolean` | - -### Max Job Request Parameters -Set the top limit for requested resources for any single job. - -| Parameter | Description | -|:---|---| -| ` --max_retry` | Maximum times to retry a process before allowing it to fail.
**Type:** `integer`, **Default:** `3` | -| ` --max_cpus` | Maximum number of CPUs that can be requested for any single job.
**Type:** `integer`, **Default:** `4` | -| ` --max_memory` | Maximum amount of memory that can be requested for any single job.
**Type:** `string`, **Default:** `128.GB` | -| ` --max_time` | Maximum amount of time that can be requested for any single job.
**Type:** `string`, **Default:** `240.h` | -| ` --max_downloads` | Maximum number of samples to download at a time
**Type:** `integer`, **Default:** `3` | - -### Nextflow Configuration Parameters -Parameters to fine-tune your Nextflow setup. - -| Parameter | Description | -|:---|---| -| ` --nfconfig` | A Nextflow compatible config file for custom profiles, loaded last and will overwrite existing variables if set.
**Type:** `string` | -| ` --publish_dir_mode` | Method used to save pipeline results to output directory.
**Type:** `string`, **Default:** `copy` | -| ` --infodir` | Directory to keep pipeline Nextflow logs and reports.
**Type:** `string`, **Default:** `${params.outdir}/pipeline_info` | -| ` --force` | Nextflow will overwrite existing output files.
**Type:** `boolean` | -| ` --cleanup_workdir` | After Bactopia is successfully executed, the `work` directory will be deleted.
**Type:** `boolean` | - -### Institutional config options -Parameters used to describe centralized config profiles. These should not be edited. - -| Parameter | Description | -|:---|---| -| ` --custom_config_version` | Git commit id for Institutional configs.
**Type:** `string`, **Default:** `master` | -| ` --custom_config_base` | Base directory for Institutional configs.
**Type:** `string`, **Default:** `https://raw.githubusercontent.com/nf-core/configs/master` | -| ` --config_profile_name` | Institutional config name.
**Type:** `string` | -| ` --config_profile_description` | Institutional config description.
**Type:** `string` | -| ` --config_profile_contact` | Institutional config contact information.
**Type:** `string` | -| ` --config_profile_url` | Institutional config URL link.
**Type:** `string` | - -### Nextflow Profile Parameters -Parameters to fine-tune your Nextflow setup. - -| Parameter | Description | -|:---|---| -| ` --condadir` | Directory to Nextflow should use for Conda environments
**Type:** `string` | -| ` --registry` | Docker registry to pull containers from.
**Type:** `string`, **Default:** `dockerhub` | -| ` --datasets_cache` | Directory where downloaded datasets should be stored.
**Type:** `string`, **Default:** `/data/datasets` | -| ` --singularity_cache_dir` | Directory where remote Singularity images are stored.
**Type:** `string` | -| ` --singularity_pull_docker_container` | Instead of directly downloading Singularity images for use with Singularity, force the workflow to pull and convert Docker containers instead.
**Type:** `boolean` | -| ` --force_rebuild` | Force overwrite of existing pre-built environments.
**Type:** `boolean` | -| ` --queue` | Comma-separated name of the queue(s) to be used by a job scheduler (e.g. AWS Batch or SLURM)
**Type:** `string`, **Default:** `general,high-memory` | -| ` --cluster_opts` | Additional options to pass to the executor. (e.g. SLURM: '--account=my_acct_name'
**Type:** `string` | -| ` --container_opts` | Additional options to pass to Apptainer, Docker, or Singularityu. (e.g. Singularity: '-D `pwd`'
**Type:** `string` | -| ` --disable_scratch` | All intermediate files created on worker nodes of will be transferred to the head node.
**Type:** `boolean` | - -### Helpful Parameters -Uncommonly used parameters that might be useful. - -| Parameter | Description | -|:---|---| -| ` --monochrome_logs` | Do not use coloured log outputs.
**Type:** `boolean` | -| ` --nfdir` | Print directory Nextflow has pulled Bactopia to
**Type:** `boolean` | -| ` --sleep_time` | The amount of time (seconds) Nextflow will wait after setting up datasets before execution.
**Type:** `integer`, **Default:** `5` | -| ` --validate_params` | Boolean whether to validate parameters against the schema at runtime
**Type:** `boolean`, **Default:** `True` | -| ` --help` | Display help text.
**Type:** `boolean` | -| ` --wf` | Specify which workflow or Bactopia Tool to execute
**Type:** `string`, **Default:** `bactopia` | -| ` --list_wfs` | List the available workflows and Bactopia Tools to use with '--wf'
**Type:** `boolean` | -| ` --show_hidden_params` | Show all params when using `--help`
**Type:** `boolean` | -| ` --help_all` | An alias for --help --show_hidden_params
**Type:** `boolean` | -| ` --version` | Display version text.
**Type:** `boolean` | - -## Citations -If you use Bactopia and `legsta` in your analysis, please cite the following. - -- [Bactopia](https://bactopia.github.io/) - Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) - - -- [csvtk](https://bioinf.shenwei.me/csvtk/) - Shen, W [csvtk: A cross-platform, efficient and practical CSV/TSV toolkit in Golang.](https://github.com/shenwei356/csvtk/) (GitHub) - -- [legsta](https://github.com/tseemann/legsta) - Seemann T [legsta: In silico Legionella pneumophila Sequence Based Typing](https://github.com/tseemann/legsta) (GitHub) - diff --git a/docs/bactopia-tools/lissero.md b/docs/bactopia-tools/lissero.md deleted file mode 100644 index 0614d8d5..00000000 --- a/docs/bactopia-tools/lissero.md +++ /dev/null @@ -1,239 +0,0 @@ ---- -title: lissero -description: A Bactopia Tool which uses LisSero for _in silico_ serotype prediction for _Listeria monocytogenes_ assemblies. ---- -# Bactopia Tool - `lissero` -The `lissero` module uses [LisSero](https://github.com/MDU-PHL/LisSero) for -_in silico_ serotype prediction for _Listeria monocytogenes_ assemblies. - - -## Example Usage -``` -bactopia --wf lissero \ - --bactopia /path/to/your/bactopia/results -``` - -## Output Overview - -Below is the default output structure for the `lissero` tool. Where possible the -file descriptions below were modified from a tools description. - -```bash - -├── -│ └── tools -│ └── lissero -│ ├── .tsv -│ └── logs -│ ├── nf-lissero.{begin,err,log,out,run,sh,trace} -│ └── versions.yml -└── bactopia-runs - └── lissero- - ├── merged-results - │ ├── lissero.tsv - │ └── logs - │ └── lissero-concat - │ ├── nf-merged-results.{begin,err,log,out,run,sh,trace} - │ └── versions.yml - └── nf-reports - ├── lissero-dag.dot - ├── lissero-report.html - ├── lissero-timeline.html - └── lissero-trace.txt - -``` - -:::info[Directory structure might be different] - -`lissero` is available as a standalone Bactopia Tool, as well as from -the main Bactopia workflow (e.g. through Staphopia or Merlin). If executed -from Bactopia, the `lissero` directory structure might be different, but the -output descriptions below still apply. -::: - - - -### Results - -#### Merged Results - -Below are results that are concatenated into a single file. - - -| Filename | Description | -|-------------------------------|-------------| -| lissero.tsv | A tab-delimited file with `LisSero` results from all samples | - - -#### LisSero - -Below is a description of the _per-sample_ results from [LisSero](https://github.com/MDU-PHL/LisSero). - - -| Filename | Description | -|-------------------------------|-------------| -| <SAMPLE_NAME>.tsv | A tab-delimited file with `LisSero` results | - - - - - -### Audit Trail - -Below are files that can assist you in understanding which parameters and program versions were used. - -#### Logs - -Each process that is executed will have a folder named `logs`. In this folder are helpful -files for you to review if the need ever arises. - -| Extension | Description | -|--------------|-------------| -| .begin | An empty file used to designate the process started | -| .err | Contains STDERR outputs from the process | -| .log | Contains both STDERR and STDOUT outputs from the process | -| .out | Contains STDOUT outputs from the process | -| .run | The script Nextflow uses to stage/unstage files and queue processes based on given profile | -| .sh | The script executed by bash for the process | -| .trace | The Nextflow [Trace](https://www.nextflow.io/docs/latest/tracing.html#trace-report) report for the process | -| versions.yml | A YAML formatted file with program versions | - -#### Nextflow Reports - -These Nextflow reports provide great a great summary of your run. These can be used to optimize -resource usage and estimate expected costs if using cloud platforms. - -| Filename | Description | -|----------|-------------| -| lissero-dag.dot | The Nextflow [DAG visualisation](https://www.nextflow.io/docs/latest/tracing.html#dag-visualisation) | -| lissero-report.html | The Nextflow [Execution Report](https://www.nextflow.io/docs/latest/tracing.html#execution-report) | -| lissero-timeline.html | The Nextflow [Timeline Report](https://www.nextflow.io/docs/latest/tracing.html#timeline-report) | -| lissero-trace.txt | The Nextflow [Trace](https://www.nextflow.io/docs/latest/tracing.html#trace-report) report | - - -#### Program Versions - -At the end of each run, each of the `versions.yml` files are merged into the files below. - -| Filename | Description | -|---------------------------|-------------| -| software_versions.yml | A complete list of programs and versions used by each process | -| software_versions_mqc.yml | A complete list of programs and versions formatted for [MultiQC](https://multiqc.info/) | - -## Parameters - - -### Required Parameters -Define where the pipeline should find input data and save output data. - -| Parameter | Description | -|:---|---| -| ` --bactopia` | The path to bactopia results to use as inputs
**Type:** `string` | - -### Filtering Parameters -Use these parameters to specify which samples to include or exclude. - -| Parameter | Description | -|:---|---| -| ` --include` | A text file containing sample names (one per line) to include from the analysis
**Type:** `string` | -| ` --exclude` | A text file containing sample names (one per line) to exclude from the analysis
**Type:** `string` | - - -### LisSero Parameters - - -| Parameter | Description | -|:---|---| -| ` --min_id` | Minimum percent identity to accept a match
**Type:** `number`, **Default:** `95.0` | -| ` --min_cov` | Minimum coverage of the gene to accept a match
**Type:** `number`, **Default:** `95.0` | - - -### Optional Parameters -These optional parameters can be useful in certain settings. - -| Parameter | Description | -|:---|---| -| ` --outdir` | Base directory to write results to
**Type:** `string`, **Default:** `bactopia` | -| ` --skip_compression` | Ouput files will not be compressed
**Type:** `boolean` | -| ` --datasets` | The path to cache datasets to
**Type:** `string` | -| ` --keep_all_files` | Keeps all analysis files created
**Type:** `boolean` | - -### Max Job Request Parameters -Set the top limit for requested resources for any single job. - -| Parameter | Description | -|:---|---| -| ` --max_retry` | Maximum times to retry a process before allowing it to fail.
**Type:** `integer`, **Default:** `3` | -| ` --max_cpus` | Maximum number of CPUs that can be requested for any single job.
**Type:** `integer`, **Default:** `4` | -| ` --max_memory` | Maximum amount of memory that can be requested for any single job.
**Type:** `string`, **Default:** `128.GB` | -| ` --max_time` | Maximum amount of time that can be requested for any single job.
**Type:** `string`, **Default:** `240.h` | -| ` --max_downloads` | Maximum number of samples to download at a time
**Type:** `integer`, **Default:** `3` | - -### Nextflow Configuration Parameters -Parameters to fine-tune your Nextflow setup. - -| Parameter | Description | -|:---|---| -| ` --nfconfig` | A Nextflow compatible config file for custom profiles, loaded last and will overwrite existing variables if set.
**Type:** `string` | -| ` --publish_dir_mode` | Method used to save pipeline results to output directory.
**Type:** `string`, **Default:** `copy` | -| ` --infodir` | Directory to keep pipeline Nextflow logs and reports.
**Type:** `string`, **Default:** `${params.outdir}/pipeline_info` | -| ` --force` | Nextflow will overwrite existing output files.
**Type:** `boolean` | -| ` --cleanup_workdir` | After Bactopia is successfully executed, the `work` directory will be deleted.
**Type:** `boolean` | - -### Institutional config options -Parameters used to describe centralized config profiles. These should not be edited. - -| Parameter | Description | -|:---|---| -| ` --custom_config_version` | Git commit id for Institutional configs.
**Type:** `string`, **Default:** `master` | -| ` --custom_config_base` | Base directory for Institutional configs.
**Type:** `string`, **Default:** `https://raw.githubusercontent.com/nf-core/configs/master` | -| ` --config_profile_name` | Institutional config name.
**Type:** `string` | -| ` --config_profile_description` | Institutional config description.
**Type:** `string` | -| ` --config_profile_contact` | Institutional config contact information.
**Type:** `string` | -| ` --config_profile_url` | Institutional config URL link.
**Type:** `string` | - -### Nextflow Profile Parameters -Parameters to fine-tune your Nextflow setup. - -| Parameter | Description | -|:---|---| -| ` --condadir` | Directory to Nextflow should use for Conda environments
**Type:** `string` | -| ` --registry` | Docker registry to pull containers from.
**Type:** `string`, **Default:** `dockerhub` | -| ` --datasets_cache` | Directory where downloaded datasets should be stored.
**Type:** `string`, **Default:** `/data/datasets` | -| ` --singularity_cache_dir` | Directory where remote Singularity images are stored.
**Type:** `string` | -| ` --singularity_pull_docker_container` | Instead of directly downloading Singularity images for use with Singularity, force the workflow to pull and convert Docker containers instead.
**Type:** `boolean` | -| ` --force_rebuild` | Force overwrite of existing pre-built environments.
**Type:** `boolean` | -| ` --queue` | Comma-separated name of the queue(s) to be used by a job scheduler (e.g. AWS Batch or SLURM)
**Type:** `string`, **Default:** `general,high-memory` | -| ` --cluster_opts` | Additional options to pass to the executor. (e.g. SLURM: '--account=my_acct_name'
**Type:** `string` | -| ` --container_opts` | Additional options to pass to Apptainer, Docker, or Singularityu. (e.g. Singularity: '-D `pwd`'
**Type:** `string` | -| ` --disable_scratch` | All intermediate files created on worker nodes of will be transferred to the head node.
**Type:** `boolean` | - -### Helpful Parameters -Uncommonly used parameters that might be useful. - -| Parameter | Description | -|:---|---| -| ` --monochrome_logs` | Do not use coloured log outputs.
**Type:** `boolean` | -| ` --nfdir` | Print directory Nextflow has pulled Bactopia to
**Type:** `boolean` | -| ` --sleep_time` | The amount of time (seconds) Nextflow will wait after setting up datasets before execution.
**Type:** `integer`, **Default:** `5` | -| ` --validate_params` | Boolean whether to validate parameters against the schema at runtime
**Type:** `boolean`, **Default:** `True` | -| ` --help` | Display help text.
**Type:** `boolean` | -| ` --wf` | Specify which workflow or Bactopia Tool to execute
**Type:** `string`, **Default:** `bactopia` | -| ` --list_wfs` | List the available workflows and Bactopia Tools to use with '--wf'
**Type:** `boolean` | -| ` --show_hidden_params` | Show all params when using `--help`
**Type:** `boolean` | -| ` --help_all` | An alias for --help --show_hidden_params
**Type:** `boolean` | -| ` --version` | Display version text.
**Type:** `boolean` | - -## Citations -If you use Bactopia and `lissero` in your analysis, please cite the following. - -- [Bactopia](https://bactopia.github.io/) - Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) - - -- [csvtk](https://bioinf.shenwei.me/csvtk/) - Shen, W [csvtk: A cross-platform, efficient and practical CSV/TSV toolkit in Golang.](https://github.com/shenwei356/csvtk/) (GitHub) - -- [LisSero](https://github.com/MDU-PHL/LisSero) - Kwong J, Zhang J, Seeman T, Horan, K, Gonçalves da Silva A [LisSero - _In silico_ serotype prediction for _Listeria monocytogenes_](https://github.com/MDU-PHL/LisSero) (GitHub) - diff --git a/docs/bactopia-tools/mashdist.md b/docs/bactopia-tools/mashdist.md deleted file mode 100644 index 6beed1d2..00000000 --- a/docs/bactopia-tools/mashdist.md +++ /dev/null @@ -1,240 +0,0 @@ ---- -title: mashdist -description: A Bactopia Tool which uses Mash to determine the Mash distance from a given reference genome(s). ---- -# Bactopia Tool - `mashdist` -The `mashdist` module uses [Mash](https://github.com/marbl/Mash) to determine the Mash -distance from a given reference genome(s). - - -## Example Usage -``` -bactopia --wf mashdist \ - --bactopia /path/to/your/bactopia/results -``` - -## Output Overview - -Below is the default output structure for the `mashdist` tool. Where possible the -file descriptions below were modified from a tools description. - -```bash - -├── -│ └── tools -│ └── mashdist -│ └── mashdist -│ ├── -dist.txt -│ └── logs -│ ├── nf-mashdist.{begin,err,log,out,run,sh,trace} -│ └── versions.yml -└── bactopia-runs - └── mashdist- - ├── merged-results - │ ├── logs - │ │ └── mashdist-concat - │ │ ├── nf-merged-results.{begin,err,log,out,run,sh,trace} - │ │ └── versions.yml - │ └── mashdist.tsv - └── nf-reports - ├── mashdist-dag.dot - ├── mashdist-report.html - ├── mashdist-timeline.html - └── mashdist-trace.txt - -``` - - - -### Results - -#### Merged Results - -Below are results that are concatenated into a single file. - - -| Filename | Description | -|-------------------------------|-------------| -| mashdist.tsv | A merged TSV file with `Mash` distances against a reference from all samples | - - -#### Mash - -Below is a description of the _per-sample_ results from [Mash](https://github.com/marbl/Mash). - - -| Filename | Description | -|-------------------------------|-------------| -| <SAMPLE_NAME>-dist.txt | A tab-delimited file with `mash dist` results | - - - - - -### Audit Trail - -Below are files that can assist you in understanding which parameters and program versions were used. - -#### Logs - -Each process that is executed will have a folder named `logs`. In this folder are helpful -files for you to review if the need ever arises. - -| Extension | Description | -|--------------|-------------| -| .begin | An empty file used to designate the process started | -| .err | Contains STDERR outputs from the process | -| .log | Contains both STDERR and STDOUT outputs from the process | -| .out | Contains STDOUT outputs from the process | -| .run | The script Nextflow uses to stage/unstage files and queue processes based on given profile | -| .sh | The script executed by bash for the process | -| .trace | The Nextflow [Trace](https://www.nextflow.io/docs/latest/tracing.html#trace-report) report for the process | -| versions.yml | A YAML formatted file with program versions | - -#### Nextflow Reports - -These Nextflow reports provide great a great summary of your run. These can be used to optimize -resource usage and estimate expected costs if using cloud platforms. - -| Filename | Description | -|----------|-------------| -| mashdist-dag.dot | The Nextflow [DAG visualisation](https://www.nextflow.io/docs/latest/tracing.html#dag-visualisation) | -| mashdist-report.html | The Nextflow [Execution Report](https://www.nextflow.io/docs/latest/tracing.html#execution-report) | -| mashdist-timeline.html | The Nextflow [Timeline Report](https://www.nextflow.io/docs/latest/tracing.html#timeline-report) | -| mashdist-trace.txt | The Nextflow [Trace](https://www.nextflow.io/docs/latest/tracing.html#trace-report) report | - - -#### Program Versions - -At the end of each run, each of the `versions.yml` files are merged into the files below. - -| Filename | Description | -|---------------------------|-------------| -| software_versions.yml | A complete list of programs and versions used by each process | -| software_versions_mqc.yml | A complete list of programs and versions formatted for [MultiQC](https://multiqc.info/) | - -## Parameters - - -### Required Parameters -Define where the pipeline should find input data and save output data. - -| Parameter | Description | -|:---|---| -| ` --bactopia` | The path to bactopia results to use as inputs
**Type:** `string` | - -### Filtering Parameters -Use these parameters to specify which samples to include or exclude. - -| Parameter | Description | -|:---|---| -| ` --include` | A text file containing sample names (one per line) to include from the analysis
**Type:** `string` | -| ` --exclude` | A text file containing sample names (one per line) to exclude from the analysis
**Type:** `string` | - - -### mashdist Parameters - - -| Parameter | Description | -|:---|---| -| ` --mash_sketch` | The reference sequence as a Mash Sketch (.msh file)
**Type:** `string` | -| ` --mash_seed` | Seed to provide to the hash function
**Type:** `integer`, **Default:** `42` | -| ` --mash_table` | Table output (fields will be blank if they do not meet the p-value threshold)
**Type:** `boolean` | -| ` --mash_m` | Minimum copies of each k-mer required to pass noise filter for reads
**Type:** `integer`, **Default:** `1` | -| ` --mash_w` | Probability threshold for warning about low k-mer size.
**Type:** `number`, **Default:** `0.01` | -| ` --max_p` | Maximum p-value to report.
**Type:** `number`, **Default:** `1.0` | -| ` --max_dist` | Maximum distance to report.
**Type:** `number`, **Default:** `1.0` | -| ` --merlin_dist` | Maximum distance to report when using Merlin .
**Type:** `number`, **Default:** `0.1` | -| ` --full_merlin` | Go full Merlin and run all species-specific tools, no matter the Mash distance
**Type:** `boolean` | -| ` --use_fastqs` | Query with FASTQs instead of the assemblies
**Type:** `boolean` | - - -### Optional Parameters -These optional parameters can be useful in certain settings. - -| Parameter | Description | -|:---|---| -| ` --outdir` | Base directory to write results to
**Type:** `string`, **Default:** `bactopia` | -| ` --skip_compression` | Ouput files will not be compressed
**Type:** `boolean` | -| ` --datasets` | The path to cache datasets to
**Type:** `string` | -| ` --keep_all_files` | Keeps all analysis files created
**Type:** `boolean` | - -### Max Job Request Parameters -Set the top limit for requested resources for any single job. - -| Parameter | Description | -|:---|---| -| ` --max_retry` | Maximum times to retry a process before allowing it to fail.
**Type:** `integer`, **Default:** `3` | -| ` --max_cpus` | Maximum number of CPUs that can be requested for any single job.
**Type:** `integer`, **Default:** `4` | -| ` --max_memory` | Maximum amount of memory that can be requested for any single job.
**Type:** `string`, **Default:** `128.GB` | -| ` --max_time` | Maximum amount of time that can be requested for any single job.
**Type:** `string`, **Default:** `240.h` | -| ` --max_downloads` | Maximum number of samples to download at a time
**Type:** `integer`, **Default:** `3` | - -### Nextflow Configuration Parameters -Parameters to fine-tune your Nextflow setup. - -| Parameter | Description | -|:---|---| -| ` --nfconfig` | A Nextflow compatible config file for custom profiles, loaded last and will overwrite existing variables if set.
**Type:** `string` | -| ` --publish_dir_mode` | Method used to save pipeline results to output directory.
**Type:** `string`, **Default:** `copy` | -| ` --infodir` | Directory to keep pipeline Nextflow logs and reports.
**Type:** `string`, **Default:** `${params.outdir}/pipeline_info` | -| ` --force` | Nextflow will overwrite existing output files.
**Type:** `boolean` | -| ` --cleanup_workdir` | After Bactopia is successfully executed, the `work` directory will be deleted.
**Type:** `boolean` | - -### Institutional config options -Parameters used to describe centralized config profiles. These should not be edited. - -| Parameter | Description | -|:---|---| -| ` --custom_config_version` | Git commit id for Institutional configs.
**Type:** `string`, **Default:** `master` | -| ` --custom_config_base` | Base directory for Institutional configs.
**Type:** `string`, **Default:** `https://raw.githubusercontent.com/nf-core/configs/master` | -| ` --config_profile_name` | Institutional config name.
**Type:** `string` | -| ` --config_profile_description` | Institutional config description.
**Type:** `string` | -| ` --config_profile_contact` | Institutional config contact information.
**Type:** `string` | -| ` --config_profile_url` | Institutional config URL link.
**Type:** `string` | - -### Nextflow Profile Parameters -Parameters to fine-tune your Nextflow setup. - -| Parameter | Description | -|:---|---| -| ` --condadir` | Directory to Nextflow should use for Conda environments
**Type:** `string` | -| ` --registry` | Docker registry to pull containers from.
**Type:** `string`, **Default:** `dockerhub` | -| ` --datasets_cache` | Directory where downloaded datasets should be stored.
**Type:** `string`, **Default:** `/data/datasets` | -| ` --singularity_cache_dir` | Directory where remote Singularity images are stored.
**Type:** `string` | -| ` --singularity_pull_docker_container` | Instead of directly downloading Singularity images for use with Singularity, force the workflow to pull and convert Docker containers instead.
**Type:** `boolean` | -| ` --force_rebuild` | Force overwrite of existing pre-built environments.
**Type:** `boolean` | -| ` --queue` | Comma-separated name of the queue(s) to be used by a job scheduler (e.g. AWS Batch or SLURM)
**Type:** `string`, **Default:** `general,high-memory` | -| ` --cluster_opts` | Additional options to pass to the executor. (e.g. SLURM: '--account=my_acct_name'
**Type:** `string` | -| ` --container_opts` | Additional options to pass to Apptainer, Docker, or Singularityu. (e.g. Singularity: '-D `pwd`'
**Type:** `string` | -| ` --disable_scratch` | All intermediate files created on worker nodes of will be transferred to the head node.
**Type:** `boolean` | - -### Helpful Parameters -Uncommonly used parameters that might be useful. - -| Parameter | Description | -|:---|---| -| ` --monochrome_logs` | Do not use coloured log outputs.
**Type:** `boolean` | -| ` --nfdir` | Print directory Nextflow has pulled Bactopia to
**Type:** `boolean` | -| ` --sleep_time` | The amount of time (seconds) Nextflow will wait after setting up datasets before execution.
**Type:** `integer`, **Default:** `5` | -| ` --validate_params` | Boolean whether to validate parameters against the schema at runtime
**Type:** `boolean`, **Default:** `True` | -| ` --help` | Display help text.
**Type:** `boolean` | -| ` --wf` | Specify which workflow or Bactopia Tool to execute
**Type:** `string`, **Default:** `bactopia` | -| ` --list_wfs` | List the available workflows and Bactopia Tools to use with '--wf'
**Type:** `boolean` | -| ` --show_hidden_params` | Show all params when using `--help`
**Type:** `boolean` | -| ` --help_all` | An alias for --help --show_hidden_params
**Type:** `boolean` | -| ` --version` | Display version text.
**Type:** `boolean` | - -## Citations -If you use Bactopia and `mashdist` in your analysis, please cite the following. - -- [Bactopia](https://bactopia.github.io/) - Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) - - -- [csvtk](https://bioinf.shenwei.me/csvtk/) - Shen, W [csvtk: A cross-platform, efficient and practical CSV/TSV toolkit in Golang.](https://github.com/shenwei356/csvtk/) (GitHub) - -- [Mash](https://github.com/marbl/Mash) - Ondov BD, Treangen TJ, Melsted P, Mallonee AB, Bergman NH, Koren S, Phillippy AM [Mash: fast genome and metagenome distance estimation using MinHash](http://dx.doi.org/10.1186/s13059-016-0997-x). _Genome Biol_ 17, 132 (2016) - diff --git a/docs/bactopia-tools/mashtree.md b/docs/bactopia-tools/mashtree.md deleted file mode 100644 index 967e41b6..00000000 --- a/docs/bactopia-tools/mashtree.md +++ /dev/null @@ -1,226 +0,0 @@ ---- -title: mashtree -description: A Bactopia Tool which uses Mashtree to create a tree of your samples using Mash distances. ---- -# Bactopia Tool - `mashtree` -The `mashtree` module uses [Mashtree](https://github.com/lskatz/mashtree) to create a tree of your samples -using [Mash](https://github.com/marbl/Mash) distances. - -You can also use `--species` or `-accessions` to see how your samples compare to completed genomes. If used, -[ncbi-genome-download](https://github.com/kblin/ncbi-genome-download) will download available completed -genomes available from RefSeq and they will be included in your tree. - - -## Example Usage -``` -bactopia --wf mashtree \ - --bactopia /path/to/your/bactopia/results -``` - -## Output Overview - -Below is the default output structure for the `mashtree` tool. Where possible the -file descriptions below were modified from a tools description. - -```bash - -└── bactopia-runs - └── mashtree- - ├── mashtree - │ ├── logs - │ │ ├── nf-mashtree.{begin,err,log,out,run,sh,trace} - │ │ └── versions.yml - │ └── mashtree.{dnd|tsv} - └── nf-reports - ├── mashtree-dag.dot - ├── mashtree-report.html - ├── mashtree-timeline.html - └── mashtree-trace.txt - -``` - - - -### Results - -#### Mashtree - -Below are results that are in the base directory. - - -| Filename | Description | -|-------------------------------|-------------| -| mashtree.dnd | A Newick formatted tree file | -| mashtree.tsv | A TSV matrix of pair-wise Mash distances | - - - - - -### Audit Trail - -Below are files that can assist you in understanding which parameters and program versions were used. - -#### Logs - -Each process that is executed will have a folder named `logs`. In this folder are helpful -files for you to review if the need ever arises. - -| Extension | Description | -|--------------|-------------| -| .begin | An empty file used to designate the process started | -| .err | Contains STDERR outputs from the process | -| .log | Contains both STDERR and STDOUT outputs from the process | -| .out | Contains STDOUT outputs from the process | -| .run | The script Nextflow uses to stage/unstage files and queue processes based on given profile | -| .sh | The script executed by bash for the process | -| .trace | The Nextflow [Trace](https://www.nextflow.io/docs/latest/tracing.html#trace-report) report for the process | -| versions.yml | A YAML formatted file with program versions | - -#### Nextflow Reports - -These Nextflow reports provide great a great summary of your run. These can be used to optimize -resource usage and estimate expected costs if using cloud platforms. - -| Filename | Description | -|----------|-------------| -| mashtree-dag.dot | The Nextflow [DAG visualisation](https://www.nextflow.io/docs/latest/tracing.html#dag-visualisation) | -| mashtree-report.html | The Nextflow [Execution Report](https://www.nextflow.io/docs/latest/tracing.html#execution-report) | -| mashtree-timeline.html | The Nextflow [Timeline Report](https://www.nextflow.io/docs/latest/tracing.html#timeline-report) | -| mashtree-trace.txt | The Nextflow [Trace](https://www.nextflow.io/docs/latest/tracing.html#trace-report) report | - - -#### Program Versions - -At the end of each run, each of the `versions.yml` files are merged into the files below. - -| Filename | Description | -|---------------------------|-------------| -| software_versions.yml | A complete list of programs and versions used by each process | -| software_versions_mqc.yml | A complete list of programs and versions formatted for [MultiQC](https://multiqc.info/) | - -## Parameters - - -### Required Parameters -Define where the pipeline should find input data and save output data. - -| Parameter | Description | -|:---|---| -| ` --bactopia` | The path to bactopia results to use as inputs
**Type:** `string` | - -### Filtering Parameters -Use these parameters to specify which samples to include or exclude. - -| Parameter | Description | -|:---|---| -| ` --include` | A text file containing sample names (one per line) to include from the analysis
**Type:** `string` | -| ` --exclude` | A text file containing sample names (one per line) to exclude from the analysis
**Type:** `string` | - - -### Mashtree Parameters - - -| Parameter | Description | -|:---|---| -| ` --trunclength` | How many characters to keep in a filename
**Type:** `integer`, **Default:** `250` | -| ` --sortorder` | For neighbor-joining, the sort order can make a difference.
**Type:** `string`, **Default:** `ABC` | -| ` --genomesize` | Genome size of the input samples
**Type:** `integer`, **Default:** `5000000` | -| ` --mindepth` | If mindepth is zero, then it will be chosen in a smart but slower method, to discard lower-abundance kmers.
**Type:** `integer`, **Default:** `5` | -| ` --kmerlength` | Hashes will be based on strings of this many nucleotides
**Type:** `integer`, **Default:** `21` | -| ` --sketchsize` | Each sketch will have at most this many non-redundant min-hashes
**Type:** `integer`, **Default:** `10000` | -| ` --save_sketches` | Save sketches created during the process
**Type:** `boolean` | - - -### Optional Parameters -These optional parameters can be useful in certain settings. - -| Parameter | Description | -|:---|---| -| ` --outdir` | Base directory to write results to
**Type:** `string`, **Default:** `bactopia` | -| ` --skip_compression` | Ouput files will not be compressed
**Type:** `boolean` | -| ` --datasets` | The path to cache datasets to
**Type:** `string` | -| ` --keep_all_files` | Keeps all analysis files created
**Type:** `boolean` | - -### Max Job Request Parameters -Set the top limit for requested resources for any single job. - -| Parameter | Description | -|:---|---| -| ` --max_retry` | Maximum times to retry a process before allowing it to fail.
**Type:** `integer`, **Default:** `3` | -| ` --max_cpus` | Maximum number of CPUs that can be requested for any single job.
**Type:** `integer`, **Default:** `4` | -| ` --max_memory` | Maximum amount of memory that can be requested for any single job.
**Type:** `string`, **Default:** `128.GB` | -| ` --max_time` | Maximum amount of time that can be requested for any single job.
**Type:** `string`, **Default:** `240.h` | -| ` --max_downloads` | Maximum number of samples to download at a time
**Type:** `integer`, **Default:** `3` | - -### Nextflow Configuration Parameters -Parameters to fine-tune your Nextflow setup. - -| Parameter | Description | -|:---|---| -| ` --nfconfig` | A Nextflow compatible config file for custom profiles, loaded last and will overwrite existing variables if set.
**Type:** `string` | -| ` --publish_dir_mode` | Method used to save pipeline results to output directory.
**Type:** `string`, **Default:** `copy` | -| ` --infodir` | Directory to keep pipeline Nextflow logs and reports.
**Type:** `string`, **Default:** `${params.outdir}/pipeline_info` | -| ` --force` | Nextflow will overwrite existing output files.
**Type:** `boolean` | -| ` --cleanup_workdir` | After Bactopia is successfully executed, the `work` directory will be deleted.
**Type:** `boolean` | - -### Institutional config options -Parameters used to describe centralized config profiles. These should not be edited. - -| Parameter | Description | -|:---|---| -| ` --custom_config_version` | Git commit id for Institutional configs.
**Type:** `string`, **Default:** `master` | -| ` --custom_config_base` | Base directory for Institutional configs.
**Type:** `string`, **Default:** `https://raw.githubusercontent.com/nf-core/configs/master` | -| ` --config_profile_name` | Institutional config name.
**Type:** `string` | -| ` --config_profile_description` | Institutional config description.
**Type:** `string` | -| ` --config_profile_contact` | Institutional config contact information.
**Type:** `string` | -| ` --config_profile_url` | Institutional config URL link.
**Type:** `string` | - -### Nextflow Profile Parameters -Parameters to fine-tune your Nextflow setup. - -| Parameter | Description | -|:---|---| -| ` --condadir` | Directory to Nextflow should use for Conda environments
**Type:** `string` | -| ` --registry` | Docker registry to pull containers from.
**Type:** `string`, **Default:** `dockerhub` | -| ` --datasets_cache` | Directory where downloaded datasets should be stored.
**Type:** `string`, **Default:** `/data/datasets` | -| ` --singularity_cache_dir` | Directory where remote Singularity images are stored.
**Type:** `string` | -| ` --singularity_pull_docker_container` | Instead of directly downloading Singularity images for use with Singularity, force the workflow to pull and convert Docker containers instead.
**Type:** `boolean` | -| ` --force_rebuild` | Force overwrite of existing pre-built environments.
**Type:** `boolean` | -| ` --queue` | Comma-separated name of the queue(s) to be used by a job scheduler (e.g. AWS Batch or SLURM)
**Type:** `string`, **Default:** `general,high-memory` | -| ` --cluster_opts` | Additional options to pass to the executor. (e.g. SLURM: '--account=my_acct_name'
**Type:** `string` | -| ` --container_opts` | Additional options to pass to Apptainer, Docker, or Singularityu. (e.g. Singularity: '-D `pwd`'
**Type:** `string` | -| ` --disable_scratch` | All intermediate files created on worker nodes of will be transferred to the head node.
**Type:** `boolean` | - -### Helpful Parameters -Uncommonly used parameters that might be useful. - -| Parameter | Description | -|:---|---| -| ` --monochrome_logs` | Do not use coloured log outputs.
**Type:** `boolean` | -| ` --nfdir` | Print directory Nextflow has pulled Bactopia to
**Type:** `boolean` | -| ` --sleep_time` | The amount of time (seconds) Nextflow will wait after setting up datasets before execution.
**Type:** `integer`, **Default:** `5` | -| ` --validate_params` | Boolean whether to validate parameters against the schema at runtime
**Type:** `boolean`, **Default:** `True` | -| ` --help` | Display help text.
**Type:** `boolean` | -| ` --wf` | Specify which workflow or Bactopia Tool to execute
**Type:** `string`, **Default:** `bactopia` | -| ` --list_wfs` | List the available workflows and Bactopia Tools to use with '--wf'
**Type:** `boolean` | -| ` --show_hidden_params` | Show all params when using `--help`
**Type:** `boolean` | -| ` --help_all` | An alias for --help --show_hidden_params
**Type:** `boolean` | -| ` --version` | Display version text.
**Type:** `boolean` | - -## Citations -If you use Bactopia and `mashtree` in your analysis, please cite the following. - -- [Bactopia](https://bactopia.github.io/) - Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) - - -- [Mash](https://github.com/marbl/Mash) - Ondov BD, Treangen TJ, Melsted P, Mallonee AB, Bergman NH, Koren S, Phillippy AM [Mash: fast genome and metagenome distance estimation using MinHash](http://dx.doi.org/10.1186/s13059-016-0997-x). _Genome Biol_ 17, 132 (2016) - -- [Mashtree](https://github.com/lskatz/mashtree) - Katz LS, Griswold T, Morrison S, Caravas J, Zhang S, den Bakker HC, Deng X, Carleton HA [Mashtree: a rapid comparison of whole genome sequence files.](https://doi.org/10.21105/joss.01762) _Journal of Open Source Software_, 4(44), 1762 (2019) - -- [ncbi-genome-download](https://github.com/kblin/ncbi-genome-download) - Blin K [ncbi-genome-download: Scripts to download genomes from the NCBI FTP servers](https://github.com/kblin/ncbi-genome-download) (GitHub) - diff --git a/docs/bactopia-tools/mcroni.md b/docs/bactopia-tools/mcroni.md deleted file mode 100644 index 5b611a55..00000000 --- a/docs/bactopia-tools/mcroni.md +++ /dev/null @@ -1,226 +0,0 @@ ---- -title: mcroni -description: A Bactopia Tool which uses mcroni to identify _mcr-1_ genes in assemblies. ---- -# Bactopia Tool - `mcroni` -The `mcroni` module uses [mcroni](https://github.com/liampshaw/mcroni) to identify _mcr-1_ genes in -assemblies. If _mcr-1_ is found, the variations will be reported and available in an output FASTA file. - - -## Example Usage -``` -bactopia --wf mcroni \ - --bactopia /path/to/your/bactopia/results -``` - -## Output Overview - -Below is the default output structure for the `mcroni` tool. Where possible the -file descriptions below were modified from a tools description. - -```bash - -├── -│ └── tools -│ └── mcroni -│ ├── _sequence.fa -│ ├── _table.tsv -│ └── logs -│ ├── nf-mcroni.{begin,err,log,out,run,sh,trace} -│ └── versions.yml -└── bactopia-runs - └── mcroni- - ├── merged-results - │ ├── logs - │ │ └── mcroni-concat - │ │ ├── nf-merged-results.{begin,err,log,out,run,sh,trace} - │ │ └── versions.yml - │ └── mcroni.tsv - └── nf-reports - ├── mcroni-dag.dot - ├── mcroni-report.html - ├── mcroni-timeline.html - └── mcroni-trace.txt - -``` - - - -### Results - -#### Merged Results - -Below are results that are concatenated into a single file. - - -| Filename | Description | -|-------------------------------|-------------| -| mcroni.tsv | A merged TSV of mcroni results from all samples | - - -#### mcroni - -Below is a description of the _per-sample_ results from [mcroni](https://github.com/liampshaw/mcroni). - - -| Extension | Description | -|-------------------------------|-------------| -| _table.tsv | mcroni results in tab-delimited format | -| _sequence.fa | Sequences matching the _mcr-1_ gene | - - - - - -### Audit Trail - -Below are files that can assist you in understanding which parameters and program versions were used. - -#### Logs - -Each process that is executed will have a folder named `logs`. In this folder are helpful -files for you to review if the need ever arises. - -| Extension | Description | -|--------------|-------------| -| .begin | An empty file used to designate the process started | -| .err | Contains STDERR outputs from the process | -| .log | Contains both STDERR and STDOUT outputs from the process | -| .out | Contains STDOUT outputs from the process | -| .run | The script Nextflow uses to stage/unstage files and queue processes based on given profile | -| .sh | The script executed by bash for the process | -| .trace | The Nextflow [Trace](https://www.nextflow.io/docs/latest/tracing.html#trace-report) report for the process | -| versions.yml | A YAML formatted file with program versions | - -#### Nextflow Reports - -These Nextflow reports provide great a great summary of your run. These can be used to optimize -resource usage and estimate expected costs if using cloud platforms. - -| Filename | Description | -|----------|-------------| -| mcroni-dag.dot | The Nextflow [DAG visualisation](https://www.nextflow.io/docs/latest/tracing.html#dag-visualisation) | -| mcroni-report.html | The Nextflow [Execution Report](https://www.nextflow.io/docs/latest/tracing.html#execution-report) | -| mcroni-timeline.html | The Nextflow [Timeline Report](https://www.nextflow.io/docs/latest/tracing.html#timeline-report) | -| mcroni-trace.txt | The Nextflow [Trace](https://www.nextflow.io/docs/latest/tracing.html#trace-report) report | - - -#### Program Versions - -At the end of each run, each of the `versions.yml` files are merged into the files below. - -| Filename | Description | -|---------------------------|-------------| -| software_versions.yml | A complete list of programs and versions used by each process | -| software_versions_mqc.yml | A complete list of programs and versions formatted for [MultiQC](https://multiqc.info/) | - -## Parameters - - -### Required Parameters -Define where the pipeline should find input data and save output data. - -| Parameter | Description | -|:---|---| -| ` --bactopia` | The path to bactopia results to use as inputs
**Type:** `string` | - -### Filtering Parameters -Use these parameters to specify which samples to include or exclude. - -| Parameter | Description | -|:---|---| -| ` --include` | A text file containing sample names (one per line) to include from the analysis
**Type:** `string` | -| ` --exclude` | A text file containing sample names (one per line) to exclude from the analysis
**Type:** `string` | - - - - -### Optional Parameters -These optional parameters can be useful in certain settings. - -| Parameter | Description | -|:---|---| -| ` --outdir` | Base directory to write results to
**Type:** `string`, **Default:** `bactopia` | -| ` --skip_compression` | Ouput files will not be compressed
**Type:** `boolean` | -| ` --datasets` | The path to cache datasets to
**Type:** `string` | -| ` --keep_all_files` | Keeps all analysis files created
**Type:** `boolean` | - -### Max Job Request Parameters -Set the top limit for requested resources for any single job. - -| Parameter | Description | -|:---|---| -| ` --max_retry` | Maximum times to retry a process before allowing it to fail.
**Type:** `integer`, **Default:** `3` | -| ` --max_cpus` | Maximum number of CPUs that can be requested for any single job.
**Type:** `integer`, **Default:** `4` | -| ` --max_memory` | Maximum amount of memory that can be requested for any single job.
**Type:** `string`, **Default:** `128.GB` | -| ` --max_time` | Maximum amount of time that can be requested for any single job.
**Type:** `string`, **Default:** `240.h` | -| ` --max_downloads` | Maximum number of samples to download at a time
**Type:** `integer`, **Default:** `3` | - -### Nextflow Configuration Parameters -Parameters to fine-tune your Nextflow setup. - -| Parameter | Description | -|:---|---| -| ` --nfconfig` | A Nextflow compatible config file for custom profiles, loaded last and will overwrite existing variables if set.
**Type:** `string` | -| ` --publish_dir_mode` | Method used to save pipeline results to output directory.
**Type:** `string`, **Default:** `copy` | -| ` --infodir` | Directory to keep pipeline Nextflow logs and reports.
**Type:** `string`, **Default:** `${params.outdir}/pipeline_info` | -| ` --force` | Nextflow will overwrite existing output files.
**Type:** `boolean` | -| ` --cleanup_workdir` | After Bactopia is successfully executed, the `work` directory will be deleted.
**Type:** `boolean` | - -### Institutional config options -Parameters used to describe centralized config profiles. These should not be edited. - -| Parameter | Description | -|:---|---| -| ` --custom_config_version` | Git commit id for Institutional configs.
**Type:** `string`, **Default:** `master` | -| ` --custom_config_base` | Base directory for Institutional configs.
**Type:** `string`, **Default:** `https://raw.githubusercontent.com/nf-core/configs/master` | -| ` --config_profile_name` | Institutional config name.
**Type:** `string` | -| ` --config_profile_description` | Institutional config description.
**Type:** `string` | -| ` --config_profile_contact` | Institutional config contact information.
**Type:** `string` | -| ` --config_profile_url` | Institutional config URL link.
**Type:** `string` | - -### Nextflow Profile Parameters -Parameters to fine-tune your Nextflow setup. - -| Parameter | Description | -|:---|---| -| ` --condadir` | Directory to Nextflow should use for Conda environments
**Type:** `string` | -| ` --registry` | Docker registry to pull containers from.
**Type:** `string`, **Default:** `dockerhub` | -| ` --datasets_cache` | Directory where downloaded datasets should be stored.
**Type:** `string`, **Default:** `/data/datasets` | -| ` --singularity_cache_dir` | Directory where remote Singularity images are stored.
**Type:** `string` | -| ` --singularity_pull_docker_container` | Instead of directly downloading Singularity images for use with Singularity, force the workflow to pull and convert Docker containers instead.
**Type:** `boolean` | -| ` --force_rebuild` | Force overwrite of existing pre-built environments.
**Type:** `boolean` | -| ` --queue` | Comma-separated name of the queue(s) to be used by a job scheduler (e.g. AWS Batch or SLURM)
**Type:** `string`, **Default:** `general,high-memory` | -| ` --cluster_opts` | Additional options to pass to the executor. (e.g. SLURM: '--account=my_acct_name'
**Type:** `string` | -| ` --container_opts` | Additional options to pass to Apptainer, Docker, or Singularityu. (e.g. Singularity: '-D `pwd`'
**Type:** `string` | -| ` --disable_scratch` | All intermediate files created on worker nodes of will be transferred to the head node.
**Type:** `boolean` | - -### Helpful Parameters -Uncommonly used parameters that might be useful. - -| Parameter | Description | -|:---|---| -| ` --monochrome_logs` | Do not use coloured log outputs.
**Type:** `boolean` | -| ` --nfdir` | Print directory Nextflow has pulled Bactopia to
**Type:** `boolean` | -| ` --sleep_time` | The amount of time (seconds) Nextflow will wait after setting up datasets before execution.
**Type:** `integer`, **Default:** `5` | -| ` --validate_params` | Boolean whether to validate parameters against the schema at runtime
**Type:** `boolean`, **Default:** `True` | -| ` --help` | Display help text.
**Type:** `boolean` | -| ` --wf` | Specify which workflow or Bactopia Tool to execute
**Type:** `string`, **Default:** `bactopia` | -| ` --list_wfs` | List the available workflows and Bactopia Tools to use with '--wf'
**Type:** `boolean` | -| ` --show_hidden_params` | Show all params when using `--help`
**Type:** `boolean` | -| ` --help_all` | An alias for --help --show_hidden_params
**Type:** `boolean` | -| ` --version` | Display version text.
**Type:** `boolean` | - -## Citations -If you use Bactopia and `mcroni` in your analysis, please cite the following. - -- [Bactopia](https://bactopia.github.io/) - Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) - - -- [csvtk](https://bioinf.shenwei.me/csvtk/) - Shen, W [csvtk: A cross-platform, efficient and practical CSV/TSV toolkit in Golang.](https://github.com/shenwei356/csvtk/) (GitHub) - -- [mcroni](https://github.com/liampshaw/mcroni) - Shaw L [mcroni: Scripts for finding and processing promoter variants upstream of mcr-1](https://github.com/liampshaw/mcroni) (GitHub) - diff --git a/docs/bactopia-tools/meningotype.md b/docs/bactopia-tools/meningotype.md deleted file mode 100644 index b8a16d60..00000000 --- a/docs/bactopia-tools/meningotype.md +++ /dev/null @@ -1,244 +0,0 @@ ---- -title: meningotype -description: A Bactopia Tool which uses meningotype for _in silico_ typing of _Neisseria meningitidis_ genomes. ---- -# Bactopia Tool - `meningotype` -The `meningotype` module uses [meningotype](https://github.com/MDU-PHL/meningotype) -for _in silico_ typing of _Neisseria meningitidis_ genomes. It uses the contigs from -assemblies to determine the serotype, MLST, finetyping (_porA_, _fetA_, _porB_), and -Bexsero antigen sequence typing (BAST) (_fHbp_, _NHBA_, _NadA_, _PorA_). - - -## Example Usage -``` -bactopia --wf meningotype \ - --bactopia /path/to/your/bactopia/results -``` - -## Output Overview - -Below is the default output structure for the `meningotype` tool. Where possible the -file descriptions below were modified from a tools description. - -```bash - -├── -│ └── tools -│ └── meningotype -│ ├── .tsv -│ └── logs -│ ├── nf-meningotype.{begin,err,log,out,run,sh,trace} -│ └── versions.yml -└── bactopia-runs - └── meningotype- - ├── merged-results - │ ├── logs - │ │ └── meningotype-concat - │ │ ├── nf-merged-results.{begin,err,log,out,run,sh,trace} - │ │ └── versions.yml - │ └── meningotype.tsv - └── nf-reports - ├── meningotype-dag.dot - ├── meningotype-report.html - ├── meningotype-timeline.html - └── meningotype-trace.txt - -``` - -:::info[Directory structure might be different] - -`meningotype` is available as a standalone Bactopia Tool, as well as from -the main Bactopia workflow (e.g. through Staphopia or Merlin). If executed -from Bactopia, the `meningotype` directory structure might be different, but the -output descriptions below still apply. -::: - - - -### Results - -#### Merged Results - -Below are results that are concatenated into a single file. - - -| Filename | Description | -|-------------------------------|-------------| -| meningotype.tsv | A merged TSV file with `meningotype` results from all samples | - - -#### meningotype - -Below is a description of the _per-sample_ results from [meningotype](https://github.com/MDU-PHL/meningotype) . - - -| Filename | Description | -|-------------------------------|-------------| -| <SAMPLE_NAME>.tsv | A tab-delimited file with `meningotype` result | - - - - - -### Audit Trail - -Below are files that can assist you in understanding which parameters and program versions were used. - -#### Logs - -Each process that is executed will have a folder named `logs`. In this folder are helpful -files for you to review if the need ever arises. - -| Extension | Description | -|--------------|-------------| -| .begin | An empty file used to designate the process started | -| .err | Contains STDERR outputs from the process | -| .log | Contains both STDERR and STDOUT outputs from the process | -| .out | Contains STDOUT outputs from the process | -| .run | The script Nextflow uses to stage/unstage files and queue processes based on given profile | -| .sh | The script executed by bash for the process | -| .trace | The Nextflow [Trace](https://www.nextflow.io/docs/latest/tracing.html#trace-report) report for the process | -| versions.yml | A YAML formatted file with program versions | - -#### Nextflow Reports - -These Nextflow reports provide great a great summary of your run. These can be used to optimize -resource usage and estimate expected costs if using cloud platforms. - -| Filename | Description | -|----------|-------------| -| meningotype-dag.dot | The Nextflow [DAG visualisation](https://www.nextflow.io/docs/latest/tracing.html#dag-visualisation) | -| meningotype-report.html | The Nextflow [Execution Report](https://www.nextflow.io/docs/latest/tracing.html#execution-report) | -| meningotype-timeline.html | The Nextflow [Timeline Report](https://www.nextflow.io/docs/latest/tracing.html#timeline-report) | -| meningotype-trace.txt | The Nextflow [Trace](https://www.nextflow.io/docs/latest/tracing.html#trace-report) report | - - -#### Program Versions - -At the end of each run, each of the `versions.yml` files are merged into the files below. - -| Filename | Description | -|---------------------------|-------------| -| software_versions.yml | A complete list of programs and versions used by each process | -| software_versions_mqc.yml | A complete list of programs and versions formatted for [MultiQC](https://multiqc.info/) | - -## Parameters - - -### Required Parameters -Define where the pipeline should find input data and save output data. - -| Parameter | Description | -|:---|---| -| ` --bactopia` | The path to bactopia results to use as inputs
**Type:** `string` | - -### Filtering Parameters -Use these parameters to specify which samples to include or exclude. - -| Parameter | Description | -|:---|---| -| ` --include` | A text file containing sample names (one per line) to include from the analysis
**Type:** `string` | -| ` --exclude` | A text file containing sample names (one per line) to exclude from the analysis
**Type:** `string` | - - -### meningotype Parameters -You can use these parameters to fine-tune your meningotype analysis - -| Parameter | Description | -|:---|---| -| ` --finetype` | perform porA and fetA fine typing
**Type:** `boolean` | -| ` --porB` | perform porB sequence typing (NEIS2020)
**Type:** `boolean` | -| ` --bast` | perform Bexsero antigen sequence typing (BAST)
**Type:** `boolean` | -| ` --mlst` | perform MLST
**Type:** `boolean` | -| ` --all` | perform MLST, porA, fetA, porB, BAST typing
**Type:** `boolean` | - - -### Optional Parameters -These optional parameters can be useful in certain settings. - -| Parameter | Description | -|:---|---| -| ` --outdir` | Base directory to write results to
**Type:** `string`, **Default:** `bactopia` | -| ` --skip_compression` | Ouput files will not be compressed
**Type:** `boolean` | -| ` --datasets` | The path to cache datasets to
**Type:** `string` | -| ` --keep_all_files` | Keeps all analysis files created
**Type:** `boolean` | - -### Max Job Request Parameters -Set the top limit for requested resources for any single job. - -| Parameter | Description | -|:---|---| -| ` --max_retry` | Maximum times to retry a process before allowing it to fail.
**Type:** `integer`, **Default:** `3` | -| ` --max_cpus` | Maximum number of CPUs that can be requested for any single job.
**Type:** `integer`, **Default:** `4` | -| ` --max_memory` | Maximum amount of memory that can be requested for any single job.
**Type:** `string`, **Default:** `128.GB` | -| ` --max_time` | Maximum amount of time that can be requested for any single job.
**Type:** `string`, **Default:** `240.h` | -| ` --max_downloads` | Maximum number of samples to download at a time
**Type:** `integer`, **Default:** `3` | - -### Nextflow Configuration Parameters -Parameters to fine-tune your Nextflow setup. - -| Parameter | Description | -|:---|---| -| ` --nfconfig` | A Nextflow compatible config file for custom profiles, loaded last and will overwrite existing variables if set.
**Type:** `string` | -| ` --publish_dir_mode` | Method used to save pipeline results to output directory.
**Type:** `string`, **Default:** `copy` | -| ` --infodir` | Directory to keep pipeline Nextflow logs and reports.
**Type:** `string`, **Default:** `${params.outdir}/pipeline_info` | -| ` --force` | Nextflow will overwrite existing output files.
**Type:** `boolean` | -| ` --cleanup_workdir` | After Bactopia is successfully executed, the `work` directory will be deleted.
**Type:** `boolean` | - -### Institutional config options -Parameters used to describe centralized config profiles. These should not be edited. - -| Parameter | Description | -|:---|---| -| ` --custom_config_version` | Git commit id for Institutional configs.
**Type:** `string`, **Default:** `master` | -| ` --custom_config_base` | Base directory for Institutional configs.
**Type:** `string`, **Default:** `https://raw.githubusercontent.com/nf-core/configs/master` | -| ` --config_profile_name` | Institutional config name.
**Type:** `string` | -| ` --config_profile_description` | Institutional config description.
**Type:** `string` | -| ` --config_profile_contact` | Institutional config contact information.
**Type:** `string` | -| ` --config_profile_url` | Institutional config URL link.
**Type:** `string` | - -### Nextflow Profile Parameters -Parameters to fine-tune your Nextflow setup. - -| Parameter | Description | -|:---|---| -| ` --condadir` | Directory to Nextflow should use for Conda environments
**Type:** `string` | -| ` --registry` | Docker registry to pull containers from.
**Type:** `string`, **Default:** `dockerhub` | -| ` --datasets_cache` | Directory where downloaded datasets should be stored.
**Type:** `string`, **Default:** `/data/datasets` | -| ` --singularity_cache_dir` | Directory where remote Singularity images are stored.
**Type:** `string` | -| ` --singularity_pull_docker_container` | Instead of directly downloading Singularity images for use with Singularity, force the workflow to pull and convert Docker containers instead.
**Type:** `boolean` | -| ` --force_rebuild` | Force overwrite of existing pre-built environments.
**Type:** `boolean` | -| ` --queue` | Comma-separated name of the queue(s) to be used by a job scheduler (e.g. AWS Batch or SLURM)
**Type:** `string`, **Default:** `general,high-memory` | -| ` --cluster_opts` | Additional options to pass to the executor. (e.g. SLURM: '--account=my_acct_name'
**Type:** `string` | -| ` --container_opts` | Additional options to pass to Apptainer, Docker, or Singularityu. (e.g. Singularity: '-D `pwd`'
**Type:** `string` | -| ` --disable_scratch` | All intermediate files created on worker nodes of will be transferred to the head node.
**Type:** `boolean` | - -### Helpful Parameters -Uncommonly used parameters that might be useful. - -| Parameter | Description | -|:---|---| -| ` --monochrome_logs` | Do not use coloured log outputs.
**Type:** `boolean` | -| ` --nfdir` | Print directory Nextflow has pulled Bactopia to
**Type:** `boolean` | -| ` --sleep_time` | The amount of time (seconds) Nextflow will wait after setting up datasets before execution.
**Type:** `integer`, **Default:** `5` | -| ` --validate_params` | Boolean whether to validate parameters against the schema at runtime
**Type:** `boolean`, **Default:** `True` | -| ` --help` | Display help text.
**Type:** `boolean` | -| ` --wf` | Specify which workflow or Bactopia Tool to execute
**Type:** `string`, **Default:** `bactopia` | -| ` --list_wfs` | List the available workflows and Bactopia Tools to use with '--wf'
**Type:** `boolean` | -| ` --show_hidden_params` | Show all params when using `--help`
**Type:** `boolean` | -| ` --help_all` | An alias for --help --show_hidden_params
**Type:** `boolean` | -| ` --version` | Display version text.
**Type:** `boolean` | - -## Citations -If you use Bactopia and `meningotype` in your analysis, please cite the following. - -- [Bactopia](https://bactopia.github.io/) - Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) - - -- [csvtk](https://bioinf.shenwei.me/csvtk/) - Shen, W [csvtk: A cross-platform, efficient and practical CSV/TSV toolkit in Golang.](https://github.com/shenwei356/csvtk/) (GitHub) - -- [meningotype](https://github.com/MDU-PHL/meningotype) - Kwong JC, Gonçalves da Silva A, Stinear TP, Howden BP, & Seemann T [meningotype: in silico typing for _Neisseria meningitidis_.](https://github.com/MDU-PHL/meningotype) (GitHub) - diff --git a/docs/bactopia-tools/merlin.md b/docs/bactopia-tools/merlin.md deleted file mode 100644 index cfb658d7..00000000 --- a/docs/bactopia-tools/merlin.md +++ /dev/null @@ -1,955 +0,0 @@ ---- -title: merlin -description: A Bactopia Tool for the automatic selection and executions of species-specific tools. ---- -# Bactopia Tool - `merlin` -_MinmER assisted species-specific bactopia tool seLectIoN_, or Merlin, uses distances based -on the RefSeq sketch downloaded by `bactopia datasets` to automatically run species-specific tools. - -Currently Merlin knows 16 spells for which cover the following: - -| Genus/Species | Tools | -|---------------|-------| -| Escherichia / Shigella | [ECTyper](../bactopia-tools/ectyper.md), [ShigaTyper](../bactopia-tools/shigatyper.md), [ShigEiFinder](../bactopia-tools/shigeifinder.md) | -| Haemophilus | [hicap](../bactopia-tools/hicap.md), [HpsuisSero](../bactopia-tools/ssuissero.md) | -| Klebsiella | [Kleborate](../bactopia-tools/kleborate.md) | -| Legionella | [legsta](../bactopia-tools/legsta.md) | -| Listeria | [LisSero](../bactopia-tools/lissero.md) | -| Mycobacterium | [TBProfiler](../bactopia-tools/tbprofiler.md) | -| Neisseria | [meningotype](../bactopia-tools/meningotype.md), [ngmaster](../bactopia-tools/ngmaster.md) | -| Pseudomonas | [pasty](../bactopia-tools/pasty.md) | -| Salmonella | [SeqSero2](../bactopia-tools/seqsero2.md), [SISTR](../bactopia-tools/sistr.md) | -| Staphylococcus | [AgrVATE](../bactopia-tools/agrvate.md), [spaTyper](../bactopia-tools/spatyper.md), [staphopia-sccmec](../bactopia-tools/staphopiasccmec.md) | -| Streptococcus | [emmtyper](../bactopia-tools/emmtyper.md), [pbptyper](../bactopia-tools/pbptyper.md), [SsuisSero](../bactopia-tools/ssuissero.md) | - -Merlin is avialable as an independent Bactopia Tool, or in the Bactopia with the `--ask_merlin` parameter. Even better, -if you want to force Merlin to execute all species-specific tools (no matter the distance), you can use `--full_merlin`. -Then all the spells will be unleashed! - - -## Example Usage -``` -bactopia --wf merlin \ - --bactopia /path/to/your/bactopia/results -``` - -## Output Overview - -Below is the default output structure for the `merlin` tool. Where possible the -file descriptions below were modified from a tools description. - -```bash - -├── -│ └── tools -│ ├── agrvate -│ │ ├── -agr_gp.tab -│ │ ├── -blastn_log.txt -│ │ ├── -hmm-log.txt -│ │ ├── -hmm.tab -│ │ ├── -summary.tab -│ │ └── logs -│ │ ├── nf-agrvate.{begin,err,log,out,run,sh,trace} -│ │ └── versions.yml -│ ├── ectyper -│ │ ├── .tsv -│ │ ├── blast_output_alleles.txt -│ │ └── logs -│ │ ├── ectyper.log -│ │ ├── nf-ectyper.{begin,err,log,out,run,sh,trace} -│ │ └── versions.yml -│ ├── emmtyper -│ │ ├── .tsv -│ │ └── logs -│ │ ├── nf-emmtyper.{begin,err,log,out,run,sh,trace} -│ │ └── versions.yml -│ ├── genotyphi -│ │ ├── .csv -│ │ ├── .json -│ │ ├── .tsv -│ │ └── logs -│ │ ├── genotyphi -│ │ │ ├── nf-genotyphi.{begin,err,log,out,run,sh,trace} -│ │ │ └── versions.yml -│ │ └── mykrobe -│ │ ├── nf-genotyphi.{begin,err,log,out,run,sh,trace} -│ │ └── versions.yml -│ ├── hicap -│ │ ├── .tsv -│ │ └── logs -│ │ ├── nf-hicap.{begin,err,log,out,run,sh,trace} -│ │ └── versions.yml -│ ├── hpsuissero -│ │ ├── _serotyping_res.tsv -│ │ └── logs -│ │ ├── nf-hpsuissero.{begin,err,log,out,run,sh,trace} -│ │ └── versions.yml -│ ├── kleborate -│ │ ├── .results.txt -│ │ └── logs -│ │ ├── nf-kleborate.{begin,err,log,out,run,sh,trace} -│ │ └── versions.yml -│ ├── legsta -│ │ ├── .tsv -│ │ └── logs -│ │ ├── nf-legsta.{begin,err,log,out,run,sh,trace} -│ │ └── versions.yml -│ ├── mashdist -│ │ └── merlin -│ │ ├── -dist.txt -│ │ └── logs -│ │ ├── nf-mashdist.{begin,err,log,out,run,sh,trace} -│ │ └── versions.yml -│ ├── meningotype -│ │ ├── .tsv -│ │ └── logs -│ │ ├── nf-meningotype.{begin,err,log,out,run,sh,trace} -│ │ └── versions.yml -│ ├── ngmaster -│ │ ├── .tsv -│ │ └── logs -│ │ ├── nf-ngmaster.{begin,err,log,out,run,sh,trace} -│ │ └── versions.yml -│ ├── pasty -│ │ ├── .blastn.tsv -│ │ ├── .details.tsv -│ │ ├── .tsv -│ │ └── logs -│ │ ├── nf-pasty.{begin,err,log,out,run,sh,trace} -│ │ └── versions.yml -│ ├── pbptyper -│ │ ├── -1A.tblastn.tsv -│ │ ├── -2B.tblastn.tsv -│ │ ├── -2X.tblastn.tsv -│ │ ├── .tsv -│ │ └── logs -│ │ ├── nf-pbptyper.{begin,err,log,out,run,sh,trace} -│ │ └── versions.yml -│ ├── seqsero2 -│ │ ├── _log.txt -│ │ ├── _result.tsv -│ │ ├── _result.txt -│ │ └── logs -│ │ ├── nf-seqsero2.{begin,err,log,out,run,sh,trace} -│ │ └── versions.yml -│ ├── seroba -│ │ ├── .tsv -│ │ └── logs -│ │ ├── nf-seroba.{begin,err,log,out,run,sh,trace} -│ │ └── versions.yml -│ ├── shigatyper -│ │ ├── .tsv -│ │ └── logs -│ │ ├── nf-shigatyper.{begin,err,log,out,run,sh,trace} -│ │ └── versions.yml -│ ├── shigeifinder -│ │ ├── .tsv -│ │ └── logs -│ │ ├── nf-shigeifinder.{begin,err,log,out,run,sh,trace} -│ │ └── versions.yml -│ ├── sistr -│ │ ├── -allele.fasta.gz -│ │ ├── -allele.json.gz -│ │ ├── -cgmlst.csv -│ │ ├── .tsv -│ │ └── logs -│ │ ├── nf-sistr.{begin,err,log,out,run,sh,trace} -│ │ └── versions.yml -│ ├── spatyper -│ │ ├── .tsv -│ │ └── logs -│ │ ├── nf-spatyper.{begin,err,log,out,run,sh,trace} -│ │ └── versions.yml -│ ├── ssuissero -│ │ ├── _serotyping_res.tsv -│ │ └── logs -│ │ ├── nf-ssuissero.{begin,err,log,out,run,sh,trace} -│ │ └── versions.yml -│ ├── staphopiasccmec -│ │ ├── .tsv -│ │ └── logs -│ │ ├── nf-staphopiasccmec.{begin,err,log,out,run,sh,trace} -│ │ └── versions.yml -│ ├── stecfinder -│ │ ├── .tsv -│ │ └── logs -│ │ ├── nf-stecfinder.{begin,err,log,out,run,sh,trace} -│ │ └── versions.yml -│ └── tbprofiler -│ ├── .results.csv -│ ├── .results.json -│ ├── .results.txt -│ ├── bam -│ │ └── .bam -│ ├── logs -│ │ ├── nf-tbprofiler.{begin,err,log,out,run,sh,trace} -│ │ └── versions.yml -│ └── vcf -│ └── .targets.csq.vcf.gz -└── bactopia-runs - └── merlin- - ├── merged-results - │ ├── agrvate.tsv - │ ├── ectyper.tsv - │ ├── emmtyper.tsv - │ ├── genotyphi.tsv - │ ├── hicap.tsv - │ ├── hpsuissero.tsv - │ ├── kleborate.tsv - │ ├── legsta.tsv - │ ├── logs - │ │ └── -concat - │ │ ├── nf-merged-results.{begin,err,log,out,run,sh,trace} - │ │ └── versions.yml - │ ├── meningotype.tsv - │ ├── ngmaster.tsv - │ ├── pasty.tsv - │ ├── pbptyper.tsv - │ ├── seqsero2.tsv - │ ├── seroba.tsv - │ ├── shigatyper.tsv - │ ├── shigeifinder.tsv - │ ├── sistr.tsv - │ ├── spatyper.tsv - │ ├── ssuissero.tsv - │ ├── staphopiasccmec.tsv - │ └── stecfinder.tsv - └── nf-reports - ├── merlin-dag.dot - ├── merlin-report.html - ├── merlin-timeline.html - └── merlin-trace.txt - -``` - -:::info[Directory structure might be different] - -`merlin` is available as a standalone Bactopia Tool, as well as from -the main Bactopia workflow (e.g. through Staphopia or Merlin). If executed -from Bactopia, the `merlin` directory structure might be different, but the -output descriptions below still apply. -::: - - - -### Results - -#### Merged Results - -Below are results that are concatenated into a single file. - - -| Filename | Description | -|-------------------------------|-------------| -| agrvate.tsv | A merged TSV file with `AgrVATE` results from all samples | -| clermontyping.csv | A merged TSV file with `ClermonTyping` results from all samples | -| ectyper.tsv | A merged TSV file with `ECTyper` results from all samples | -| emmtyper.tsv | A merged TSV file with `emmtyper` results from all samples | -| genotyphi.tsv | A merged TSV file with `genotyphi` results from all samples | -| hicap.tsv | A merged TSV file with `hicap` results from all samples | -| hpsuissero.tsv | A merged TSV file with `HpsuisSero` results from all samples | -| kleborate.tsv | A merged TSV file with `Kleborate` results from all samples | -| legsta.tsv | A merged TSV file with `legsta` results from all samples | -| lissero.tsv | A merged TSV file with `LisSero` results from all samples | -| meningotype.tsv | A merged TSV file with `meningotype` results from all samples | -| ngmaster.tsv | A merged TSV file with `ngmaster` results from all samples | -| pasty.tsv | A merged TSV file with `pasty` results from all samples | -| pbptyper.tsv | A merged TSV file with `pbptyper` results from all samples | -| seqsero2.tsv | A merged TSV file with `seqsero2` results from all samples | -| seroba.tsv | A merged TSV file with `seroba` results from all samples | -| shigapass.csv | A merged CSV file with `ShigaPass` results from all samples | -| shigatyper.tsv | A merged TSV file with `ShigaTyper` results from all samples | -| shigeifinder.tsv | A merged TSV file with `ShigEiFinder` results from all samples | -| sistr.tsv | A merged TSV file with `SISTR` results from all samples | -| spatyper.tsv | A merged TSV file with `spaTyper` results from all samples | -| ssuissero.tsv | A merged TSV file with `SsuisSero` results from all samples | -| staphopiasccmec.tsv | A merged TSV file with `staphopia-sccmec` results from all samples | -| stecfinder.tsv | A merged TSV file with `stecfinder` results from all samples | - - -#### AgrVATE - -Below is a description of the _per-sample_ results from [AgrVATE](https://github.com/VishnuRaghuram94/AgrVATE). - - -| Extension | Description | -|-------------------------------|-------------| -| -agr_gp.tab | A detailed report for _agr_ kmer matches | -| -blastn_log.txt | Log files from programs called by `AgrVATE` | -| -summary.tab | A final summary report for _agr_ typing | - - -#### ClermonTyping - -Below is a description of the _per-sample_ results from [ClermonTyping](https://github.com/happykhan/ClermonTyping). - - -| Extension | Description | -|-------------------------------|-------------| -| <SAMPLE_NAME>.blast.xml | A BLAST XML file with the results of the ClermonTyping analysis | -| <SAMPLE_NAME>.html | A HTML file with the results of the ClermonTyping analysis | -| <SAMPLE_NAME>.mash.tsv | A TSV file with the Mash distances | -| <SAMPLE_NAME>.phylogroups.txt | A TSV file with the final phylogroup assignments | - - -#### ECTyper - -Below is a description of the _per-sample_ results from [ECTyper](https://github.com/phac-nml/ecoli_serotyping). - - -| Filename | Description | -|-------------------------------|-------------| -| <SAMPLE_NAME>.tsv | A tab-delimited file with `ECTyper` result, see [ECTyper - Report format](https://github.com/phac-nml/ecoli_serotyping#report-format) for details | -| blast_output_alleles.txt | Allele report generated from BLAST results | - - -#### emmtyper - -Below is a description of the _per-sample_ results from [emmtyper](https://github.com/MDU-PHL/emmtyper). - - -| Filename | Description | -|-------------------------------|-------------| -| <SAMPLE_NAME>.tsv | A tab-delimited file with `emmtyper` result, see [emmtyper - Result format](https://github.com/MDU-PHL/emmtyper#result-format) for details | - - -#### hicap - -Below is a description of the _per-sample_ results from [hicap](https://github.com/scwatts/hicap). - - -| Filename | Description | -|-------------------------------|-------------| -| <SAMPLE_NAME>.gbk | GenBank file and cap locus annotations | -| <SAMPLE_NAME>.svg | Visualization of annotated cap locus | -| <SAMPLE_NAME>.tsv | A tab-delimited file with `hicap` results | - - -#### HpsuisSero - -Below is a description of the _per-sample_ results from [HpsuisSero](https://github.com/jimmyliu1326/HpsuisSero). - - -| Filename | Description | -|-------------------------------|-------------| -| <SAMPLE_NAME>_serotyping_res.tsv | A tab-delimited file with `HpsuisSero` result | - - -#### GenoTyphi - -Below is a description of the _per-sample_ results from [GenoTyphi](https://github.com/katholt/genotyphi). A -full description of the GenoTyphi output is available at [GenoTyphi - Output](https://github.com/katholt/genotyphi/blob/main/README.md#explanation-of-columns-in-the-output) - - -| Filename | Description | -|-------------------------------|-------------| -| <SAMPLE_NAME>_predictResults.tsv | A tab-delimited file with `GenoTyphi` results | -| <SAMPLE_NAME>.csv | The output of `mykrobe predict` in comma-separated format | -| <SAMPLE_NAME>.json | The output of `mykrobe predict` in JSON format | - - -#### Kleborate - -Below is a description of the _per-sample_ results from [Kleborate](https://github.com/katholt/Kleborate). - - -| Filename | Description | -|-------------------------------|-------------| -| <SAMPLE_NAME>.results.txt | A tab-delimited file with `Kleborate` result, see [Kleborate - Example output](https://github.com/katholt/Kleborate/wiki/Tests-and-example-outputs#example-output) for more details. | - - -#### legsta - -Below is a description of the _per-sample_ results from [legsta](https://github.com/tseemann/legsta). - - -| Filename | Description | -|-------------------------------|-------------| -| <SAMPLE_NAME>.tsv | A tab-delimited file with `legsta` result, see [legsta - Output](https://github.com/tseemann/legsta#output) for more details | - - -#### LisSero - -Below is a description of the _per-sample_ results from [LisSero](https://github.com/MDU-PHL/LisSero). - - -| Filename | Description | -|-------------------------------|-------------| -| <SAMPLE_NAME>.tsv | A tab-delimited file with `LisSero` results | - - -#### Mash - -Below is a description of the _per-sample_ results from [Mash](https://github.com/marbl/Mash). - - -| Filename | Description | -|-------------------------------|-------------| -| <SAMPLE_NAME>-dist.txt | A tab-delimited file with `mash dist` results | - - -#### meningotype - -Below is a description of the _per-sample_ results from [meningotype](https://github.com/MDU-PHL/meningotype) . - - -| Filename | Description | -|-------------------------------|-------------| -| <SAMPLE_NAME>.tsv | A tab-delimited file with `meningotype` result | - - -#### ngmaster - -Below is a description of the _per-sample_ results from [ngmaster](https://github.com/MDU-PHL/ngmaster). - - -| Filename | Description | -|-------------------------------|-------------| -| <SAMPLE_NAME>.tsv | A tab-delimited file with `ngmaster` results | - - -#### pasty - -Below is a description of the _per-sample_ results from [pasty](https://github.com/rpetit3/pasty). - - -| Extension | Description | -|-------------------------------|-------------| -| .blastn.tsv | A tab-delimited file of all blast hits | -| .details.tsv | A tab-delimited file with details for each serogroup | -| .tsv | A tab-delimited file with the predicted serogroup | - - -#### pbptyper - -Below is a description of the _per-sample_ results from [pbptyper](https://github.com/rpetit3/pbptyper). - - -| Extension | Description | -|-------------------------------|-------------| -| .tblastn.tsv | A tab-delimited file of all blast hits | -| .tsv | A tab-delimited file with the predicted PBP type | - - -#### SeqSero2 - -Below is a description of the _per-sample_ results from [SeqSero2](https://github.com/denglab/SeqSero2). - - -| Filename | Description | -|-------------------------------|-------------| -| <SAMPLE_NAME>_result.tsv | A tab-delimited file with `SeqSero2` results | -| <SAMPLE_NAME>_result.txt | A text file with key-value pairs of `SeqSero2` results | - - -#### Seroba - -Below is a description of the _per-sample_ results from [Seroba](https://github.com/sanger-pathogens/seroba). -More details about the outputs are available from [Seroba - Output](https://sanger-pathogens.github.io/seroba/#output). - - -| Filename | Description | -|-------------------------------|-------------| -| <SAMPLE_NAME>.tsv | A tab-delimited file with the predicted serotype | -| detailed_serogroup_info.txt | Detailed information about the predicted results | - - -#### ShigaPass - -Below is a description of the _per-sample_ results from [ShigaPass](https://github.com/imanyass/ShigaPass). - - -| Extension | Description | -|-------------------------------|-------------| -| <SAMPLE_NAME>.csv | A CSV file with the predicted Shigella or EIEC serotype | - - -#### ShigaTyper - -Below is a description of the _per-sample_ results from [ShigaTyyper](https://github.com/CFSAN-Biostatistics/shigatyper). - - -| Filename | Description | -|-------------------------------|-------------| -| <SAMPLE_NAME>-hits.tsv | Detailed statistics about each individual gene hit | -| <SAMPLE_NAME>.tsv | The final predicted serotype by `ShigaTyper` | - - -#### ShigEiFinder - -Below is a description of the _per-sample_ results from [ShigEiFinder](https://github.com/LanLab/ShigEiFinder). - - -| Extension | Description | -|-------------------------------|-------------| -| <SAMPLE_NAME>.tsv | A tab-delimited file with the predicted Shigella or EIEC serotype | - - -#### SISTR - -Below is a description of the _per-sample_ results from [SISTR](https://github.com/phac-nml/sistr_cmd). - - -| Filename | Description | -|-------------------------------|-------------| -| <SAMPLE_NAME>-allele.fasta.gz | A FASTA file of the cgMLST allele search results | -| <SAMPLE_NAME>-allele.json.gz | JSON formated cgMLST allele search results, see [SISTR - cgMLST search results](https://github.com/phac-nml/sistr_cmd#cgmlst-allele-search-results) for more details | -| <SAMPLE_NAME>-cgmlst.csv | A comma-delimited summary of the cgMLST allele search results | -| <SAMPLE_NAME>.tsv | A tab-delimited file with `SISTR` results, see [SISTR - Primary results](https://github.com/phac-nml/sistr_cmd#primary-results-output--o-sistr-results) for more details | - - -#### spaTyper - -Below is a description of the _per-sample_ results from [spaTyper](https://github.com/HCGB-IGTP/spaTyper). - - -| Filename | Description | -|-------------------------------|-------------| -| <SAMPLE_NAME>.tsv | A tab-delimited file with `spaTyper` result | - - -#### SsuisSero - -Below is a description of the _per-sample_ results from [SsuisSero](https://github.com/jimmyliu1326/SsuisSero). - - -| Filename | Description | -|-------------------------------|-------------| -| <SAMPLE_NAME>_serotyping_res.tsv | A tab-delimited file with `SsuisSero` results | - - -#### staphopia-sccmec - -Below is a description of the _per-sample_ results from [staphopia-sccmec](https://github.com/staphopia/staphopia-sccmec). - - -| Filename | Description | -|-------------------------------|-------------| -| <SAMPLE_NAME>.tsv | A tab-delimited file with `staphopia-sccmec` results | - - -#### TBProfiler - -Below is a description of the _per-sample_ results from [TBProfiler](https://github.com/jodyphelan/TBProfiler). - - -| Filename | Description | -|-------------------------------|-------------| -| <SAMPLE_NAME>.results.csv | A CSV formated `TBProfiler` result file of resistance and strain type | -| <SAMPLE_NAME>.results.json | A JSON formated `TBProfiler` result file of resistance and strain type | -| <SAMPLE_NAME>.results.txt | A text file with `TBProfiler` results | -| <SAMPLE_NAME>.bam | BAM file with alignment details | -| <SAMPLE_NAME>.targets.csq.vcf.gz | VCF with variant info again reference genomes | - - - - - -### Audit Trail - -Below are files that can assist you in understanding which parameters and program versions were used. - -#### Logs - -Each process that is executed will have a folder named `logs`. In this folder are helpful -files for you to review if the need ever arises. - -| Extension | Description | -|--------------|-------------| -| .begin | An empty file used to designate the process started | -| .err | Contains STDERR outputs from the process | -| .log | Contains both STDERR and STDOUT outputs from the process | -| .out | Contains STDOUT outputs from the process | -| .run | The script Nextflow uses to stage/unstage files and queue processes based on given profile | -| .sh | The script executed by bash for the process | -| .trace | The Nextflow [Trace](https://www.nextflow.io/docs/latest/tracing.html#trace-report) report for the process | -| versions.yml | A YAML formatted file with program versions | - -#### Nextflow Reports - -These Nextflow reports provide great a great summary of your run. These can be used to optimize -resource usage and estimate expected costs if using cloud platforms. - -| Filename | Description | -|----------|-------------| -| merlin-dag.dot | The Nextflow [DAG visualisation](https://www.nextflow.io/docs/latest/tracing.html#dag-visualisation) | -| merlin-report.html | The Nextflow [Execution Report](https://www.nextflow.io/docs/latest/tracing.html#execution-report) | -| merlin-timeline.html | The Nextflow [Timeline Report](https://www.nextflow.io/docs/latest/tracing.html#timeline-report) | -| merlin-trace.txt | The Nextflow [Trace](https://www.nextflow.io/docs/latest/tracing.html#trace-report) report | - - -#### Program Versions - -At the end of each run, each of the `versions.yml` files are merged into the files below. - -| Filename | Description | -|---------------------------|-------------| -| software_versions.yml | A complete list of programs and versions used by each process | -| software_versions_mqc.yml | A complete list of programs and versions formatted for [MultiQC](https://multiqc.info/) | - -## Parameters - - -### Required Parameters -Define where the pipeline should find input data and save output data. - -| Parameter | Description | -|:---|---| -| ` --bactopia` | The path to bactopia results to use as inputs
**Type:** `string` | - -### Filtering Parameters -Use these parameters to specify which samples to include or exclude. - -| Parameter | Description | -|:---|---| -| ` --include` | A text file containing sample names (one per line) to include from the analysis
**Type:** `string` | -| ` --exclude` | A text file containing sample names (one per line) to exclude from the analysis
**Type:** `string` | - - -### mashdist Parameters - - -| Parameter | Description | -|:---|---| -| ` --mash_sketch` | The reference sequence as a Mash Sketch (.msh file)
**Type:** `string` | -| ` --mash_seed` | Seed to provide to the hash function
**Type:** `integer`, **Default:** `42` | -| ` --mash_table` | Table output (fields will be blank if they do not meet the p-value threshold)
**Type:** `boolean` | -| ` --mash_m` | Minimum copies of each k-mer required to pass noise filter for reads
**Type:** `integer`, **Default:** `1` | -| ` --mash_w` | Probability threshold for warning about low k-mer size.
**Type:** `number`, **Default:** `0.01` | -| ` --max_p` | Maximum p-value to report.
**Type:** `number`, **Default:** `1.0` | -| ` --max_dist` | Maximum distance to report.
**Type:** `number`, **Default:** `1.0` | -| ` --merlin_dist` | Maximum distance to report when using Merlin .
**Type:** `number`, **Default:** `0.1` | -| ` --full_merlin` | Go full Merlin and run all species-specific tools, no matter the Mash distance
**Type:** `boolean` | -| ` --use_fastqs` | Query with FASTQs instead of the assemblies
**Type:** `boolean` | - -### AgrVATE Parameters - - -| Parameter | Description | -|:---|---| -| ` --typing_only` | agr typing only. Skips agr operon extraction and frameshift detection
**Type:** `boolean` | - -### ClermonTyping Parameters - - -| Parameter | Description | -|:---|---| -| ` --clermon_threshold` | Do not use contigs under this size
**Type:** `number` | - -### ECTyper Parameters - - -| Parameter | Description | -|:---|---| -| ` --opid` | Percent identity required for an O antigen allele match
**Type:** `integer`, **Default:** `90` | -| ` --opcov` | Minumum percent coverage required for an O antigen allele match
**Type:** `integer`, **Default:** `90` | -| ` --hpid` | Percent identity required for an H antigen allele match
**Type:** `integer`, **Default:** `95` | -| ` --hpcov` | Minumum percent coverage required for an H antigen allele match
**Type:** `integer`, **Default:** `50` | -| ` --verify` | Enable E. coli species verification
**Type:** `boolean` | -| ` --print_alleles` | Prints the allele sequences if enabled as the final column
**Type:** `boolean` | - -### emmtyper Parameters - - -| Parameter | Description | -|:---|---| -| ` --emmtyper_wf` | Workflow for emmtyper to use.
**Type:** `string`, **Default:** `blast` | -| ` --cluster_distance` | Distance between cluster of matches to consider as different clusters
**Type:** `integer`, **Default:** `500` | -| ` --percid` | Minimal percent identity of sequence
**Type:** `integer`, **Default:** `95` | -| ` --culling_limit` | Total hits to return in a position
**Type:** `integer`, **Default:** `5` | -| ` --mismatch` | Threshold for number of mismatch to allow in BLAST hit
**Type:** `integer`, **Default:** `5` | -| ` --align_diff` | Threshold for difference between alignment length and subject length in BLAST
**Type:** `integer`, **Default:** `5` | -| ` --gap` | Threshold gap to allow in BLAST hit
**Type:** `integer`, **Default:** `2` | -| ` --min_perfect` | Minimum size of perfect match at 3 primer end
**Type:** `integer`, **Default:** `15` | -| ` --min_good` | Minimum size where there must be 2 matches for each mismatch
**Type:** `integer`, **Default:** `15` | -| ` --max_size` | Maximum size of PCR product
**Type:** `integer`, **Default:** `2000` | - -### hicap Parameters - - -| Parameter | Description | -|:---|---| -| ` --database_dir` | Directory containing locus database
**Type:** `string` | -| ` --model_fp` | Path to prodigal model
**Type:** `string` | -| ` --full_sequence` | Write the full input sequence out to the genbank file rather than just the region surrounding and including the locus
**Type:** `boolean` | -| ` --hicap_debug` | hicap will print debug messages
**Type:** `boolean` | -| ` --gene_coverage` | Minimum percentage coverage to consider a single gene complete
**Type:** `number`, **Default:** `0.8` | -| ` --gene_identity` | Minimum percentage identity to consider a single gene complete
**Type:** `number`, **Default:** `0.7` | -| ` --broken_gene_length` | Minimum length to consider a broken gene
**Type:** `integer`, **Default:** `60` | -| ` --broken_gene_identity` | Minimum percentage identity to consider a broken gene
**Type:** `number`, **Default:** `0.8` | - -### GenoTyphi Parameters - - -| Parameter | Description | -|:---|---| -| ` --kmer` | K-mer length
**Type:** `integer`, **Default:** `21` | -| ` --min_depth` | Minimum depth
**Type:** `integer`, **Default:** `1` | -| ` --model` | Genotype model used.
**Type:** `string`, **Default:** `kmer_count` | -| ` --report_all_calls` | Report all calls
**Type:** `boolean` | -| ` --mykrobe_opts` | Extra Mykrobe options in quotes
**Type:** `string` | - -### Kleborate Parameters - - -| Parameter | Description | -|:---|---| -| ` --kleborate_preset` | Preset module to use for Kleborate
**Type:** `string`, **Default:** `kpsc` | -| ` --kleborate_opts` | Extra options in quotes for Kleborate
**Type:** `string` | - -### legsta Parameters - - -| Parameter | Description | -|:---|---| -| ` --noheader` | Don't print header row
**Type:** `boolean` | - -### LisSero Parameters - - -| Parameter | Description | -|:---|---| -| ` --min_id` | Minimum percent identity to accept a match
**Type:** `number`, **Default:** `95.0` | -| ` --min_cov` | Minimum coverage of the gene to accept a match
**Type:** `number`, **Default:** `95.0` | - -### meningotype Parameters -You can use these parameters to fine-tune your meningotype analysis - -| Parameter | Description | -|:---|---| -| ` --finetype` | perform porA and fetA fine typing
**Type:** `boolean` | -| ` --porB` | perform porB sequence typing (NEIS2020)
**Type:** `boolean` | -| ` --bast` | perform Bexsero antigen sequence typing (BAST)
**Type:** `boolean` | -| ` --mlst` | perform MLST
**Type:** `boolean` | -| ` --all` | perform MLST, porA, fetA, porB, BAST typing
**Type:** `boolean` | - -### ngmaster Parameters - - -| Parameter | Description | -|:---|---| -| ` --csv` | output comma-separated format (CSV) rather than tab-separated
**Type:** `boolean` | - -### pasty Parameters - - -| Parameter | Description | -|:---|---| -| ` --pasty_min_pident` | Minimum percent identity to count a hit
**Type:** `integer`, **Default:** `95` | -| ` --pasty_min_coverage` | Minimum percent coverage to count a hit
**Type:** `integer`, **Default:** `95` | - -### pbptyper Parameters - - -| Parameter | Description | -|:---|---| -| ` --pbptyper_min_pident` | Minimum percent identity to count a hit
**Type:** `integer`, **Default:** `95` | -| ` --pbptyper_min_coverage` | Minimum percent coverage to count a hit
**Type:** `integer`, **Default:** `95` | - -### SeqSero2 Parameters - - -| Parameter | Description | -|:---|---| -| ` --run_mode` | Workflow to run. 'a' allele mode, or 'k' k-mer mode
**Type:** `string`, **Default:** `k` | -| ` --input_type` | Input format to analyze. 'assembly' or 'fastq'
**Type:** `string`, **Default:** `assembly` | -| ` --bwa_mode` | Algorithms for bwa mapping for allele mode
**Type:** `string`, **Default:** `mem` | - -### SISTR Parameters - - -| Parameter | Description | -|:---|---| -| ` --full_cgmlst` | Use the full set of cgMLST alleles which can include highly similar alleles
**Type:** `boolean` | - -### spaTyper Parameters - - -| Parameter | Description | -|:---|---| -| ` --repeats` | List of spa repeats
**Type:** `string` | -| ` --repeat_order` | List spa types and order of repeats
**Type:** `string` | -| ` --do_enrich` | Do PCR product enrichment
**Type:** `boolean` | - -### staphopia-sccmec Parameters - - -| Parameter | Description | -|:---|---| -| ` --hamming` | Report the results as hamming distances
**Type:** `boolean` | - -### TBProfiler Profile Parameters - - -| Parameter | Description | -|:---|---| -| ` --call_whole_genome` | Call whole genome
**Type:** `boolean` | -| ` --mapper` | Mapping tool to use. If you are using nanopore data it will default to minimap2
**Type:** `string`, **Default:** `bwa` | -| ` --caller` | Variant calling tool to use
**Type:** `string`, **Default:** `freebayes` | -| ` --calling_params` | Extra variant caller options in quotes
**Type:** `string` | -| ` --suspect` | Use the suspect suite of tools to add ML predictions
**Type:** `boolean` | -| ` --no_flagstat` | Don't collect flagstats
**Type:** `boolean` | -| ` --no_delly` | Don't run delly
**Type:** `boolean` | -| ` --tbprofiler_opts` | Extra options in quotes for TBProfiler
**Type:** `string` | - - -### Optional Parameters -These optional parameters can be useful in certain settings. - -| Parameter | Description | -|:---|---| -| ` --outdir` | Base directory to write results to
**Type:** `string`, **Default:** `bactopia` | -| ` --skip_compression` | Ouput files will not be compressed
**Type:** `boolean` | -| ` --datasets` | The path to cache datasets to
**Type:** `string` | -| ` --keep_all_files` | Keeps all analysis files created
**Type:** `boolean` | - -### Max Job Request Parameters -Set the top limit for requested resources for any single job. - -| Parameter | Description | -|:---|---| -| ` --max_retry` | Maximum times to retry a process before allowing it to fail.
**Type:** `integer`, **Default:** `3` | -| ` --max_cpus` | Maximum number of CPUs that can be requested for any single job.
**Type:** `integer`, **Default:** `4` | -| ` --max_memory` | Maximum amount of memory that can be requested for any single job.
**Type:** `string`, **Default:** `128.GB` | -| ` --max_time` | Maximum amount of time that can be requested for any single job.
**Type:** `string`, **Default:** `240.h` | -| ` --max_downloads` | Maximum number of samples to download at a time
**Type:** `integer`, **Default:** `3` | - -### Nextflow Configuration Parameters -Parameters to fine-tune your Nextflow setup. - -| Parameter | Description | -|:---|---| -| ` --nfconfig` | A Nextflow compatible config file for custom profiles, loaded last and will overwrite existing variables if set.
**Type:** `string` | -| ` --publish_dir_mode` | Method used to save pipeline results to output directory.
**Type:** `string`, **Default:** `copy` | -| ` --infodir` | Directory to keep pipeline Nextflow logs and reports.
**Type:** `string`, **Default:** `${params.outdir}/pipeline_info` | -| ` --force` | Nextflow will overwrite existing output files.
**Type:** `boolean` | -| ` --cleanup_workdir` | After Bactopia is successfully executed, the `work` directory will be deleted.
**Type:** `boolean` | - -### Institutional config options -Parameters used to describe centralized config profiles. These should not be edited. - -| Parameter | Description | -|:---|---| -| ` --custom_config_version` | Git commit id for Institutional configs.
**Type:** `string`, **Default:** `master` | -| ` --custom_config_base` | Base directory for Institutional configs.
**Type:** `string`, **Default:** `https://raw.githubusercontent.com/nf-core/configs/master` | -| ` --config_profile_name` | Institutional config name.
**Type:** `string` | -| ` --config_profile_description` | Institutional config description.
**Type:** `string` | -| ` --config_profile_contact` | Institutional config contact information.
**Type:** `string` | -| ` --config_profile_url` | Institutional config URL link.
**Type:** `string` | - -### Nextflow Profile Parameters -Parameters to fine-tune your Nextflow setup. - -| Parameter | Description | -|:---|---| -| ` --condadir` | Directory to Nextflow should use for Conda environments
**Type:** `string` | -| ` --registry` | Docker registry to pull containers from.
**Type:** `string`, **Default:** `dockerhub` | -| ` --datasets_cache` | Directory where downloaded datasets should be stored.
**Type:** `string`, **Default:** `/data/datasets` | -| ` --singularity_cache_dir` | Directory where remote Singularity images are stored.
**Type:** `string` | -| ` --singularity_pull_docker_container` | Instead of directly downloading Singularity images for use with Singularity, force the workflow to pull and convert Docker containers instead.
**Type:** `boolean` | -| ` --force_rebuild` | Force overwrite of existing pre-built environments.
**Type:** `boolean` | -| ` --queue` | Comma-separated name of the queue(s) to be used by a job scheduler (e.g. AWS Batch or SLURM)
**Type:** `string`, **Default:** `general,high-memory` | -| ` --cluster_opts` | Additional options to pass to the executor. (e.g. SLURM: '--account=my_acct_name'
**Type:** `string` | -| ` --container_opts` | Additional options to pass to Apptainer, Docker, or Singularityu. (e.g. Singularity: '-D `pwd`'
**Type:** `string` | -| ` --disable_scratch` | All intermediate files created on worker nodes of will be transferred to the head node.
**Type:** `boolean` | - -### Helpful Parameters -Uncommonly used parameters that might be useful. - -| Parameter | Description | -|:---|---| -| ` --monochrome_logs` | Do not use coloured log outputs.
**Type:** `boolean` | -| ` --nfdir` | Print directory Nextflow has pulled Bactopia to
**Type:** `boolean` | -| ` --sleep_time` | The amount of time (seconds) Nextflow will wait after setting up datasets before execution.
**Type:** `integer`, **Default:** `5` | -| ` --validate_params` | Boolean whether to validate parameters against the schema at runtime
**Type:** `boolean`, **Default:** `True` | -| ` --help` | Display help text.
**Type:** `boolean` | -| ` --wf` | Specify which workflow or Bactopia Tool to execute
**Type:** `string`, **Default:** `bactopia` | -| ` --list_wfs` | List the available workflows and Bactopia Tools to use with '--wf'
**Type:** `boolean` | -| ` --show_hidden_params` | Show all params when using `--help`
**Type:** `boolean` | -| ` --help_all` | An alias for --help --show_hidden_params
**Type:** `boolean` | -| ` --version` | Display version text.
**Type:** `boolean` | - -## Citations -If you use Bactopia and `merlin` in your analysis, please cite the following. - -- [Bactopia](https://bactopia.github.io/) - Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) - - -- [AgrVATE](https://github.com/VishnuRaghuram94/AgrVATE) - Raghuram V. [AgrVATE: Rapid identification of Staphylococcus aureus agr locus type and agr operon variants.](https://github.com/VishnuRaghuram94/AgrVATE) (GitHub) - -- [ClermontTyping](https://github.com/happykhan/ClermonTyping) - Beghain J, Bridier-Nahmias A, Le Nagard H, Denamur E, Clermont O. [ClermonTyping: an easy-to-use and accurate in silico method for Escherichia genus strain phylotyping.](https://doi.org/10.1099/mgen.0.000192) Microbial Genomics, 4(7), e000192. (2018) - -- [csvtk](https://bioinf.shenwei.me/csvtk/) - Shen, W [csvtk: A cross-platform, efficient and practical CSV/TSV toolkit in Golang.](https://github.com/shenwei356/csvtk/) (GitHub) - -- [ECTyper](https://github.com/phac-nml/ecoli_serotyping) - Laing C, Bessonov K, Sung S, La Rose C [ECTyper - In silico prediction of _Escherichia coli_ serotype](https://github.com/phac-nml/ecoli_serotyping) (GitHub) -- [emmtyper](https://github.com/MDU-PHL/emmtyper) - Tan A, Seemann T, Lacey D, Davies M, Mcintyre L, Frost H, Williamson D, Gonçalves da Silva A [emmtyper - emm Automatic Isolate Labeller](https://github.com/MDU-PHL/emmtyper) (GitHub) - -- [GenoTyphi](https://github.com/katholt/genotyphi) - Wong VK, Baker S, Connor TR, Pickard D, Page AJ, Dave J, Murphy N, Holliman R, Sefton A, Millar M, Dyson ZA, Dougan G, Holt KE, & International Typhoid Consortium. [An extended genotyping framework for Salmonella enterica serovar Typhi, the cause of human typhoid](https://doi.org/10.1038/ncomms12827) _Nature Communications_ 7, 12827. (2016) - -- [hicap](https://github.com/scwatts/hicap) - Watts SC, Holt KE [hicap: in silico serotyping of the Haemophilus influenzae capsule locus.](https://doi.org/10.1128/JCM.00190-19) _Journal of Clinical Microbiology_ JCM.00190-19 (2019) - -- [HpsuisSero](https://github.com/jimmyliu1326/HpsuisSero) - Lui J [HpsuisSero: Rapid _Haemophilus parasuis_ serotyping](https://github.com/jimmyliu1326/HpsuisSero) (GitHub) - -- [Kleborate](https://github.com/katholt/Kleborate) - Lam MMC, Wick RR, Watts, SC, Cerdeira LT, Wyres KL, Holt KE [A genomic surveillance framework and genotyping tool for Klebsiella pneumoniae and its related species complex.](https://doi.org/10.1038/s41467-021-24448-3) _Nat Commun_ 12, 4188 (2021) - -- [legsta](https://github.com/tseemann/legsta) - Seemann T [legsta: In silico Legionella pneumophila Sequence Based Typing](https://github.com/tseemann/legsta) (GitHub) - -- [LisSero](https://github.com/MDU-PHL/LisSero) - Kwong J, Zhang J, Seeman T, Horan, K, Gonçalves da Silva A [LisSero - _In silico_ serotype prediction for _Listeria monocytogenes_](https://github.com/MDU-PHL/LisSero) (GitHub) - -- [Mash](https://github.com/marbl/Mash) - Ondov BD, Treangen TJ, Melsted P, Mallonee AB, Bergman NH, Koren S, Phillippy AM [Mash: fast genome and metagenome distance estimation using MinHash](http://dx.doi.org/10.1186/s13059-016-0997-x). _Genome Biol_ 17, 132 (2016) - -- [meningotype](https://github.com/MDU-PHL/meningotype) - Kwong JC, Gonçalves da Silva A, Stinear TP, Howden BP, & Seemann T [meningotype: in silico typing for _Neisseria meningitidis_.](https://github.com/MDU-PHL/meningotype) (GitHub) - -- [Mykrobe](https://github.com/Mykrobe-tools/mykrobe) - Hunt M, Bradley P, Lapierre SG, Heys S, Thomsit M, Hall MB, Malone KM, Wintringer P, Walker TM, Cirillo DM, Comas I, Farhat MR, Fowler P, Gardy J, Ismail N, Kohl TA, Mathys V, Merker M, Niemann S, Omar SV, Sintchenko V, Smith G, Supply P, Tahseen S, Wilcox M, Arandjelovic I, Peto TEA, Crook, DW, Iqbal Z [Antibiotic resistance prediction for Mycobacterium tuberculosis from genome sequence data with Mykrobe](https://doi.org/10.12688/wellcomeopenres.15603.1) _Wellcome Open Research_ 4, 191. (2019) - -- [ngmaster](https://github.com/MDU-PHL/ngmaster) - Kwong J, Gonçalves da Silva A, Schultz M, Seeman T [ngmaster - _In silico_ multi-antigen sequence typing for _Neisseria gonorrhoeae_ (NG-MAST)](https://github.com/MDU-PHL/ngmaster) (GitHub) - -- [pasty](https://github.com/rpetit3/pasty) - Petit III RA [pasty: in silico serogrouping of _Pseudomonas aeruginosa_ isolates](https://github.com/rpetit3/pasty) (GitHub) - -- [pbptyper](https://github.com/rpetit3/pbptyper) - Petit III RA [pbptyper: In silico Penicillin Binding Protein (PBP) typer for _Streptococcus pneumoniae_ assemblies](https://github.com/rpetit3/pbptyper) (GitHub) - -- [SeqSero2](https://github.com/denglab/SeqSero2) - Zhang S, Den-Bakker HC, Li S, Dinsmore BA, Lane C, Lauer AC, Fields PI, Deng X. [SeqSero2: rapid and improved Salmonella serotype determination using whole genome sequencing data.](https://doi.org/10.1128/AEM.01746-19) _Appl Environ Microbiology_ 85(23):e01746-19 (2019) - -- [shigapass](https://github.com/imanyass/ShigaPass) - Yassine I, Hansen EE, Lefèvre S, Ruckly C, Carle I, Lejay-Collin M, Fabre L, Rafei R, Pardos de la Gandara M, Daboussi F, Shahin A, Weill FX [ShigaPass: an in silico tool predicting Shigella serotypes from whole-genome sequencing assemblies.](https://doi.org/10.1099%2Fmgen.0.000961) _Microb Genomics_ 9(3) (2023) - -- [ShigaTyper](https://github.com/CFSAN-Biostatistics/shigatyper) - Wu Y, Lau HK, Lee T, Lau DK, Payne J [In Silico Serotyping Based on Whole-Genome Sequencing Improves the Accuracy of Shigella Identification.](https://doi.org/10.1128/AEM.00165-19) *Applied and Environmental Microbiology*, 85(7). (2019) - -- [ShigEiFinder](https://github.com/LanLab/ShigEiFinder) - Zhang X, Payne M, Nguyen T, Kaur S, Lan R [Cluster-specific gene markers enhance Shigella and enteroinvasive Escherichia coli in silico serotyping.](https://doi.org/10.1099/mgen.0.000704) Microbial Genomics, 7(12). (2021) - -- [SISTR](https://github.com/phac-nml/sistr_cmd) - Yoshida CE, Kruczkiewicz P, Laing CR, Lingohr EJ, Gannon VPJ, Nash JHE, Taboada EN [The Salmonella In Silico Typing Resource (SISTR): An Open Web-Accessible Tool for Rapidly Typing and Subtyping Draft Salmonella Genome Assemblies.](https://doi.org/10.1371/journal.pone.0147101) _PloS One_, 11(1), e0147101. (2016) - -- [spaTyper](https://github.com/HCGB-IGTP/spaTyper) - Sanchez-Herrero JF, and Sullivan M [spaTyper: Staphylococcal protein A (spa) characterization pipeline](http://doi.org/10.5281/zenodo.4063625). Zenodo. (2020) - -- [SsuisSero](https://github.com/jimmyliu1326/SsuisSero) - Lui J [SsuisSero: Rapid _Streptococcus suis_ serotyping](https://github.com/jimmyliu1326/SsuisSero) (GitHub) - -- [staphopia-sccmec](https://github.com/staphopia/staphopia-sccmec) - Petit III RA, Read TD [_Staphylococcus aureus_ viewed from the perspective of 40,000+ genomes.](http://dx.doi.org/10.7717/peerj.5261) _PeerJ_ 6, e5261 (2018) - -- [TBProfiler](https://github.com/jodyphelan/TBProfiler) - Phelan JE, O’Sullivan DM, Machado D, Ramos J, Oppong YEA, Campino S, O’Grady J, McNerney R, Hibberd ML, Viveiros M, Huggett JF, Clark TG [Integrating informatics tools and portable sequencing technology for rapid detection of resistance to anti-tuberculous drugs.](https://doi.org/10.1186/s13073-019-0650-x) _Genome Med_ 11, 41 (2019) - diff --git a/docs/bactopia-tools/midas.md b/docs/bactopia-tools/midas.md deleted file mode 100644 index bcd64057..00000000 --- a/docs/bactopia-tools/midas.md +++ /dev/null @@ -1,240 +0,0 @@ ---- -title: midas -description: A Bactopia Tool which uses MIDAS to estimate the abundance of bacterial species in a sample. - ---- -# Bactopia Tool - `midas` -The `midas` module uses [MIDAS](https://github.com/snayfach/MIDAS) to estimate the -abundance of bacterial species in a sample. MIDAS makes use of a database with -more than 30,000 reference genomes to estimate abundances. - - -## Example Usage -``` -bactopia --wf midas \ - --bactopia /path/to/your/bactopia/results -``` - -## Output Overview - -Below is the default output structure for the `midas` tool. Where possible the -file descriptions below were modified from a tools description. - -```bash - -├── -│ └── tools -│ └── midas -│ ├── .midas.abundances.txt -│ ├── .midas.adjusted.abundances.txt -│ ├── .midas.tsv -│ └── logs -│ ├── nf-midas.{begin,err,log,out,run,sh,trace} -│ └── versions.yml -└── bactopia-runs - └── midas- - ├── merged-results - │ ├── logs - │ │ └── midas-concat - │ │ ├── nf-merged-results.{begin,err,log,out,run,sh,trace} - │ │ └── versions.yml - │ └── midas.tsv - └── nf-reports - ├── midas-dag.dot - ├── midas-report.html - ├── midas-timeline.html - └── midas-trace.txt - -``` - - - -### Results - -#### Merged Results - -Below are results that are concatenated into a single file. - - -| Filename | Description | -|-------------------------------|-------------| -| midas.tsv | A summary of the MIDAS results for all samples | - - -#### MIDAS - -Below is a description of the _per-sample_ results from [MIDAS](https://github.com/snayfach/MIDAS). - - -| Filename | Description | -|-------------------------------|-------------| -| <SAMPLE_NAME>.midas.abundances.txt | A TSV file containing the abundance of each bacterial species in the sample | -| <SAMPLE_NAME>.midas.adjusted.abundances.txt | A TSV file containing the abundance of each bacterial species in the sample (adjusted for unclassified reads) | -| <SAMPLE_NAME>.midas.tsv | A TSV file containing the abundance of each bacterial species in the sample | - - - - - -### Audit Trail - -Below are files that can assist you in understanding which parameters and program versions were used. - -#### Logs - -Each process that is executed will have a folder named `logs`. In this folder are helpful -files for you to review if the need ever arises. - -| Extension | Description | -|--------------|-------------| -| .begin | An empty file used to designate the process started | -| .err | Contains STDERR outputs from the process | -| .log | Contains both STDERR and STDOUT outputs from the process | -| .out | Contains STDOUT outputs from the process | -| .run | The script Nextflow uses to stage/unstage files and queue processes based on given profile | -| .sh | The script executed by bash for the process | -| .trace | The Nextflow [Trace](https://www.nextflow.io/docs/latest/tracing.html#trace-report) report for the process | -| versions.yml | A YAML formatted file with program versions | - -#### Nextflow Reports - -These Nextflow reports provide great a great summary of your run. These can be used to optimize -resource usage and estimate expected costs if using cloud platforms. - -| Filename | Description | -|----------|-------------| -| midas-dag.dot | The Nextflow [DAG visualisation](https://www.nextflow.io/docs/latest/tracing.html#dag-visualisation) | -| midas-report.html | The Nextflow [Execution Report](https://www.nextflow.io/docs/latest/tracing.html#execution-report) | -| midas-timeline.html | The Nextflow [Timeline Report](https://www.nextflow.io/docs/latest/tracing.html#timeline-report) | -| midas-trace.txt | The Nextflow [Trace](https://www.nextflow.io/docs/latest/tracing.html#trace-report) report | - - -#### Program Versions - -At the end of each run, each of the `versions.yml` files are merged into the files below. - -| Filename | Description | -|---------------------------|-------------| -| software_versions.yml | A complete list of programs and versions used by each process | -| software_versions_mqc.yml | A complete list of programs and versions formatted for [MultiQC](https://multiqc.info/) | - -## Parameters - - -### Required Parameters -Define where the pipeline should find input data and save output data. - -| Parameter | Description | -|:---|---| -| ` --bactopia` | The path to bactopia results to use as inputs
**Type:** `string` | - -### Filtering Parameters -Use these parameters to specify which samples to include or exclude. - -| Parameter | Description | -|:---|---| -| ` --include` | A text file containing sample names (one per line) to include from the analysis
**Type:** `string` | -| ` --exclude` | A text file containing sample names (one per line) to exclude from the analysis
**Type:** `string` | - - -### MIDAS species Parameters - - -| Parameter | Description | -|:---|---| -| ` --midas_db` | A single tarball or path to a MIDAS formatted database
**Type:** `string` | -| ` --midas_word_size` | Word size for BLAST search
**Type:** `integer`, **Default:** `28` | -| ` --midas_aln_cov` | Discard reads with alignment coverage < ALN_COV
**Type:** `number`, **Default:** `0.75` | -| ` --midas_opts` | Extra MIDAS options
**Type:** `string` | -| ` --midas_debug` | Keep all temporary files created by MIDAS
**Type:** `boolean` | - - -### Optional Parameters -These optional parameters can be useful in certain settings. - -| Parameter | Description | -|:---|---| -| ` --outdir` | Base directory to write results to
**Type:** `string`, **Default:** `bactopia` | -| ` --skip_compression` | Ouput files will not be compressed
**Type:** `boolean` | -| ` --datasets` | The path to cache datasets to
**Type:** `string` | -| ` --keep_all_files` | Keeps all analysis files created
**Type:** `boolean` | - -### Max Job Request Parameters -Set the top limit for requested resources for any single job. - -| Parameter | Description | -|:---|---| -| ` --max_retry` | Maximum times to retry a process before allowing it to fail.
**Type:** `integer`, **Default:** `3` | -| ` --max_cpus` | Maximum number of CPUs that can be requested for any single job.
**Type:** `integer`, **Default:** `4` | -| ` --max_memory` | Maximum amount of memory that can be requested for any single job.
**Type:** `string`, **Default:** `128.GB` | -| ` --max_time` | Maximum amount of time that can be requested for any single job.
**Type:** `string`, **Default:** `240.h` | -| ` --max_downloads` | Maximum number of samples to download at a time
**Type:** `integer`, **Default:** `3` | - -### Nextflow Configuration Parameters -Parameters to fine-tune your Nextflow setup. - -| Parameter | Description | -|:---|---| -| ` --nfconfig` | A Nextflow compatible config file for custom profiles, loaded last and will overwrite existing variables if set.
**Type:** `string` | -| ` --publish_dir_mode` | Method used to save pipeline results to output directory.
**Type:** `string`, **Default:** `copy` | -| ` --infodir` | Directory to keep pipeline Nextflow logs and reports.
**Type:** `string`, **Default:** `${params.outdir}/pipeline_info` | -| ` --force` | Nextflow will overwrite existing output files.
**Type:** `boolean` | -| ` --cleanup_workdir` | After Bactopia is successfully executed, the `work` directory will be deleted.
**Type:** `boolean` | - -### Institutional config options -Parameters used to describe centralized config profiles. These should not be edited. - -| Parameter | Description | -|:---|---| -| ` --custom_config_version` | Git commit id for Institutional configs.
**Type:** `string`, **Default:** `master` | -| ` --custom_config_base` | Base directory for Institutional configs.
**Type:** `string`, **Default:** `https://raw.githubusercontent.com/nf-core/configs/master` | -| ` --config_profile_name` | Institutional config name.
**Type:** `string` | -| ` --config_profile_description` | Institutional config description.
**Type:** `string` | -| ` --config_profile_contact` | Institutional config contact information.
**Type:** `string` | -| ` --config_profile_url` | Institutional config URL link.
**Type:** `string` | - -### Nextflow Profile Parameters -Parameters to fine-tune your Nextflow setup. - -| Parameter | Description | -|:---|---| -| ` --condadir` | Directory to Nextflow should use for Conda environments
**Type:** `string` | -| ` --registry` | Docker registry to pull containers from.
**Type:** `string`, **Default:** `dockerhub` | -| ` --datasets_cache` | Directory where downloaded datasets should be stored.
**Type:** `string`, **Default:** `/data/datasets` | -| ` --singularity_cache_dir` | Directory where remote Singularity images are stored.
**Type:** `string` | -| ` --singularity_pull_docker_container` | Instead of directly downloading Singularity images for use with Singularity, force the workflow to pull and convert Docker containers instead.
**Type:** `boolean` | -| ` --force_rebuild` | Force overwrite of existing pre-built environments.
**Type:** `boolean` | -| ` --queue` | Comma-separated name of the queue(s) to be used by a job scheduler (e.g. AWS Batch or SLURM)
**Type:** `string`, **Default:** `general,high-memory` | -| ` --cluster_opts` | Additional options to pass to the executor. (e.g. SLURM: '--account=my_acct_name'
**Type:** `string` | -| ` --container_opts` | Additional options to pass to Apptainer, Docker, or Singularityu. (e.g. Singularity: '-D `pwd`'
**Type:** `string` | -| ` --disable_scratch` | All intermediate files created on worker nodes of will be transferred to the head node.
**Type:** `boolean` | - -### Helpful Parameters -Uncommonly used parameters that might be useful. - -| Parameter | Description | -|:---|---| -| ` --monochrome_logs` | Do not use coloured log outputs.
**Type:** `boolean` | -| ` --nfdir` | Print directory Nextflow has pulled Bactopia to
**Type:** `boolean` | -| ` --sleep_time` | The amount of time (seconds) Nextflow will wait after setting up datasets before execution.
**Type:** `integer`, **Default:** `5` | -| ` --validate_params` | Boolean whether to validate parameters against the schema at runtime
**Type:** `boolean`, **Default:** `True` | -| ` --help` | Display help text.
**Type:** `boolean` | -| ` --wf` | Specify which workflow or Bactopia Tool to execute
**Type:** `string`, **Default:** `bactopia` | -| ` --list_wfs` | List the available workflows and Bactopia Tools to use with '--wf'
**Type:** `boolean` | -| ` --show_hidden_params` | Show all params when using `--help`
**Type:** `boolean` | -| ` --help_all` | An alias for --help --show_hidden_params
**Type:** `boolean` | -| ` --version` | Display version text.
**Type:** `boolean` | - -## Citations -If you use Bactopia and `midas` in your analysis, please cite the following. - -- [Bactopia](https://bactopia.github.io/) - Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) - - -- [csvtk](https://bioinf.shenwei.me/csvtk/) - Shen, W [csvtk: A cross-platform, efficient and practical CSV/TSV toolkit in Golang.](https://github.com/shenwei356/csvtk/) (GitHub) - -- [MIDAS](https://github.com/snayfach/MIDAS) - Nayfach S, Rodriguez-Mueller B, Garud N, and Pollard KS [An integrated metagenomics pipeline for strain profiling reveals novel patterns of bacterial transmission and biogeography.](https://doi.org/10.1101/gr.201863.115) _Genome Research_, 26(11), 1612–1625. (2016) - diff --git a/docs/bactopia-tools/mlst.md b/docs/bactopia-tools/mlst.md deleted file mode 100644 index ed4ece3b..00000000 --- a/docs/bactopia-tools/mlst.md +++ /dev/null @@ -1,239 +0,0 @@ ---- -title: mlst -description: A Bactopia Tool which uses mlst scan assemblies and determine the sequence type based on PubMLST schemas. ---- -# Bactopia Tool - `mlst` -The `mlst` module uses [mlst](https://github.com/tseemann/mlst) scan assemblies and determine the sequence type. -It makes use of [PubMLST](https://pubmlst.org/) schemes and by default automatically scans each schema. To specify -a specific scheme to scan, you can provide it with `--scheme`. - - -## Example Usage -``` -bactopia --wf mlst \ - --bactopia /path/to/your/bactopia/results -``` - -## Output Overview - -Below is the default output structure for the `mlst` tool. Where possible the -file descriptions below were modified from a tools description. - -```bash - -├── -│ └── tools -│ └── mlst -│ ├── .tsv -│ └── logs -│ ├── nf-mlst.{begin,err,log,out,run,sh,trace} -│ └── versions.yml -└── bactopia-runs - └── mlst- - ├── merged-results - │ ├── logs - │ │ └── mlst-concat - │ │ ├── nf-merged-results.{begin,err,log,out,run,sh,trace} - │ │ └── versions.yml - │ └── mlst.tsv - └── nf-reports - ├── mlst-dag.dot - ├── mlst-report.html - ├── mlst-timeline.html - └── mlst-trace.txt - -``` - - - -### Results - -#### Merged Results - -Below are results that are concatenated into a single file. - - -| Filename | Description | -|-------------------------------|-------------| -| mlst.tsv | A merged TSV file with `mlst` results from all samples | - - -#### mlst - -Below is a description of the _per-sample_ results from [mlst](https://github.com/tseemann/mlst). - - -| Filename | Description | -|-------------------------------|-------------| -| <SAMPLE_NAME>.tsv | A tab-delimited file with `mlst` result, see [mlst - Usage](https://github.com/tseemann/mlst#usage) for more details | - - - - - -### Audit Trail - -Below are files that can assist you in understanding which parameters and program versions were used. - -#### Logs - -Each process that is executed will have a folder named `logs`. In this folder are helpful -files for you to review if the need ever arises. - -| Extension | Description | -|--------------|-------------| -| .begin | An empty file used to designate the process started | -| .err | Contains STDERR outputs from the process | -| .log | Contains both STDERR and STDOUT outputs from the process | -| .out | Contains STDOUT outputs from the process | -| .run | The script Nextflow uses to stage/unstage files and queue processes based on given profile | -| .sh | The script executed by bash for the process | -| .trace | The Nextflow [Trace](https://www.nextflow.io/docs/latest/tracing.html#trace-report) report for the process | -| versions.yml | A YAML formatted file with program versions | - -#### Nextflow Reports - -These Nextflow reports provide great a great summary of your run. These can be used to optimize -resource usage and estimate expected costs if using cloud platforms. - -| Filename | Description | -|----------|-------------| -| mlst-dag.dot | The Nextflow [DAG visualisation](https://www.nextflow.io/docs/latest/tracing.html#dag-visualisation) | -| mlst-report.html | The Nextflow [Execution Report](https://www.nextflow.io/docs/latest/tracing.html#execution-report) | -| mlst-timeline.html | The Nextflow [Timeline Report](https://www.nextflow.io/docs/latest/tracing.html#timeline-report) | -| mlst-trace.txt | The Nextflow [Trace](https://www.nextflow.io/docs/latest/tracing.html#trace-report) report | - - -#### Program Versions - -At the end of each run, each of the `versions.yml` files are merged into the files below. - -| Filename | Description | -|---------------------------|-------------| -| software_versions.yml | A complete list of programs and versions used by each process | -| software_versions_mqc.yml | A complete list of programs and versions formatted for [MultiQC](https://multiqc.info/) | - -## Parameters - - -### Required Parameters -Define where the pipeline should find input data and save output data. - -| Parameter | Description | -|:---|---| -| ` --bactopia` | The path to bactopia results to use as inputs
**Type:** `string` | - -### Filtering Parameters -Use these parameters to specify which samples to include or exclude. - -| Parameter | Description | -|:---|---| -| ` --include` | A text file containing sample names (one per line) to include from the analysis
**Type:** `string` | -| ` --exclude` | A text file containing sample names (one per line) to exclude from the analysis
**Type:** `string` | - - -### MLST Parameters - - -| Parameter | Description | -|:---|---| -| ` --scheme` | Don't autodetect, force this scheme on all inputs
**Type:** `string` | -| ` --minid` | Minimum DNA percent identity of full allelle to consider 'similar'
**Type:** `integer`, **Default:** `95` | -| ` --mincov` | Minimum DNA percent coverage to report partial allele at all
**Type:** `integer`, **Default:** `10` | -| ` --minscore` | Minimum score out of 100 to match a scheme
**Type:** `integer`, **Default:** `50` | -| ` --nopath` | Strip filename paths from FILE column
**Type:** `boolean` | -| ` --mlst_db` | A custom MLST database to use, either a tarball or a directory
**Type:** `string` | - - -### Optional Parameters -These optional parameters can be useful in certain settings. - -| Parameter | Description | -|:---|---| -| ` --outdir` | Base directory to write results to
**Type:** `string`, **Default:** `bactopia` | -| ` --skip_compression` | Ouput files will not be compressed
**Type:** `boolean` | -| ` --datasets` | The path to cache datasets to
**Type:** `string` | -| ` --keep_all_files` | Keeps all analysis files created
**Type:** `boolean` | - -### Max Job Request Parameters -Set the top limit for requested resources for any single job. - -| Parameter | Description | -|:---|---| -| ` --max_retry` | Maximum times to retry a process before allowing it to fail.
**Type:** `integer`, **Default:** `3` | -| ` --max_cpus` | Maximum number of CPUs that can be requested for any single job.
**Type:** `integer`, **Default:** `4` | -| ` --max_memory` | Maximum amount of memory that can be requested for any single job.
**Type:** `string`, **Default:** `128.GB` | -| ` --max_time` | Maximum amount of time that can be requested for any single job.
**Type:** `string`, **Default:** `240.h` | -| ` --max_downloads` | Maximum number of samples to download at a time
**Type:** `integer`, **Default:** `3` | - -### Nextflow Configuration Parameters -Parameters to fine-tune your Nextflow setup. - -| Parameter | Description | -|:---|---| -| ` --nfconfig` | A Nextflow compatible config file for custom profiles, loaded last and will overwrite existing variables if set.
**Type:** `string` | -| ` --publish_dir_mode` | Method used to save pipeline results to output directory.
**Type:** `string`, **Default:** `copy` | -| ` --infodir` | Directory to keep pipeline Nextflow logs and reports.
**Type:** `string`, **Default:** `${params.outdir}/pipeline_info` | -| ` --force` | Nextflow will overwrite existing output files.
**Type:** `boolean` | -| ` --cleanup_workdir` | After Bactopia is successfully executed, the `work` directory will be deleted.
**Type:** `boolean` | - -### Institutional config options -Parameters used to describe centralized config profiles. These should not be edited. - -| Parameter | Description | -|:---|---| -| ` --custom_config_version` | Git commit id for Institutional configs.
**Type:** `string`, **Default:** `master` | -| ` --custom_config_base` | Base directory for Institutional configs.
**Type:** `string`, **Default:** `https://raw.githubusercontent.com/nf-core/configs/master` | -| ` --config_profile_name` | Institutional config name.
**Type:** `string` | -| ` --config_profile_description` | Institutional config description.
**Type:** `string` | -| ` --config_profile_contact` | Institutional config contact information.
**Type:** `string` | -| ` --config_profile_url` | Institutional config URL link.
**Type:** `string` | - -### Nextflow Profile Parameters -Parameters to fine-tune your Nextflow setup. - -| Parameter | Description | -|:---|---| -| ` --condadir` | Directory to Nextflow should use for Conda environments
**Type:** `string` | -| ` --registry` | Docker registry to pull containers from.
**Type:** `string`, **Default:** `dockerhub` | -| ` --datasets_cache` | Directory where downloaded datasets should be stored.
**Type:** `string`, **Default:** `/data/datasets` | -| ` --singularity_cache_dir` | Directory where remote Singularity images are stored.
**Type:** `string` | -| ` --singularity_pull_docker_container` | Instead of directly downloading Singularity images for use with Singularity, force the workflow to pull and convert Docker containers instead.
**Type:** `boolean` | -| ` --force_rebuild` | Force overwrite of existing pre-built environments.
**Type:** `boolean` | -| ` --queue` | Comma-separated name of the queue(s) to be used by a job scheduler (e.g. AWS Batch or SLURM)
**Type:** `string`, **Default:** `general,high-memory` | -| ` --cluster_opts` | Additional options to pass to the executor. (e.g. SLURM: '--account=my_acct_name'
**Type:** `string` | -| ` --container_opts` | Additional options to pass to Apptainer, Docker, or Singularityu. (e.g. Singularity: '-D `pwd`'
**Type:** `string` | -| ` --disable_scratch` | All intermediate files created on worker nodes of will be transferred to the head node.
**Type:** `boolean` | - -### Helpful Parameters -Uncommonly used parameters that might be useful. - -| Parameter | Description | -|:---|---| -| ` --monochrome_logs` | Do not use coloured log outputs.
**Type:** `boolean` | -| ` --nfdir` | Print directory Nextflow has pulled Bactopia to
**Type:** `boolean` | -| ` --sleep_time` | The amount of time (seconds) Nextflow will wait after setting up datasets before execution.
**Type:** `integer`, **Default:** `5` | -| ` --validate_params` | Boolean whether to validate parameters against the schema at runtime
**Type:** `boolean`, **Default:** `True` | -| ` --help` | Display help text.
**Type:** `boolean` | -| ` --wf` | Specify which workflow or Bactopia Tool to execute
**Type:** `string`, **Default:** `bactopia` | -| ` --list_wfs` | List the available workflows and Bactopia Tools to use with '--wf'
**Type:** `boolean` | -| ` --show_hidden_params` | Show all params when using `--help`
**Type:** `boolean` | -| ` --help_all` | An alias for --help --show_hidden_params
**Type:** `boolean` | -| ` --version` | Display version text.
**Type:** `boolean` | - -## Citations -If you use Bactopia and `mlst` in your analysis, please cite the following. - -- [Bactopia](https://bactopia.github.io/) - Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) - - -- [csvtk](https://bioinf.shenwei.me/csvtk/) - Shen, W [csvtk: A cross-platform, efficient and practical CSV/TSV toolkit in Golang.](https://github.com/shenwei356/csvtk/) (GitHub) - -- [mlst](https://github.com/tseemann/mlst) - Seemann T [mlst: scan contig files against PubMLST typing schemes](https://github.com/tseemann/mlst) (GitHub) - -- [PubMLST.org](https://pubmlst.org/) - Jolley KA, Bray JE, Maiden MCJ [Open-access bacterial population genomics: BIGSdb software, the PubMLST.org website and their applications.](http://dx.doi.org/10.12688/wellcomeopenres.14826.1) _Wellcome Open Res_ 3, 124 (2018) - diff --git a/docs/bactopia-tools/mobsuite.md b/docs/bactopia-tools/mobsuite.md deleted file mode 100644 index bc17ba14..00000000 --- a/docs/bactopia-tools/mobsuite.md +++ /dev/null @@ -1,240 +0,0 @@ ---- -title: mobsuite -description: A Bactopia Tool which uses MOB-suite to reconstruct and annotate plasmids in draft assemblies. ---- -# Bactopia Tool - `mobsuite` -The `mobsuite` module uses [MOB-suite](https://github.com/phac-nml/mob-suite) to reconstruct and annotate plasmids in draft assemblies. - -## Example Usage -``` -bactopia --wf mobsuite \ - --bactopia /path/to/your/bactopia/results -``` - -## Output Overview - -Below is the default output structure for the `mobsuite` tool. Where possible the -file descriptions below were modified from a tools description. - -```bash - -├── -│ └── tools -│ └── mobsuite -│ ├── -mobtyper.txt -│ ├── chromosome.fasta -│ ├── contig_report.txt -│ ├── logs -│ │ ├── nf-mobsuite.{begin,err,log,out,run,sh,trace} -│ │ └── versions.yml -│ └── plasmid_.fasta -└── bactopia-runs - └── mobsuite- - ├── merged-results - │ ├── logs - │ │ └── mobsuite-concat - │ │ ├── nf-merged-results.{begin,err,log,out,run,sh,trace} - │ │ └── versions.yml - │ └── mobsuite.tsv - └── nf-reports - ├── mobsuite-dag.dot - ├── mobsuite-report.html - ├── mobsuite-timeline.html - └── mobsuite-trace.txt - -``` - - - -### Results - -#### Merged Results - -Below are results that are concatenated into a single file. - - -| Filename | Description | -|-------------------------------|-------------| -| mobsuite.tsv | A merged TSV file with `mobsuite` results from all samples | - - -#### MOB-suite - -Below is a description of the _per-sample_ results from [MOB-suite](https://github.com/phac-nml/mob-suite). - - -| Filename | Description | -|-------------------------------|-------------| -| <SAMPLE_NAME>-mobtyper.txt | Aggregate MOB-typer report files for all identified plasmid, see [MOB-typer - report file](https://github.com/phac-nml/mob-suite#mob-typer-report-file-format) for more details | -| chromosome.fasta | FASTA file of all contigs found to belong to the chromosome | -| contig_report.txt | Assignment of the contig to chromosome or a particular plasmid grouping, see [MOB-recon - contig report](https://github.com/phac-nml/mob-suite#mob-recon-contig-report-format) for more details | -| plasmid_<PLASMID_NAME>.fasta | Each plasmid group is written to an individual FASTA | - - - - - -### Audit Trail - -Below are files that can assist you in understanding which parameters and program versions were used. - -#### Logs - -Each process that is executed will have a folder named `logs`. In this folder are helpful -files for you to review if the need ever arises. - -| Extension | Description | -|--------------|-------------| -| .begin | An empty file used to designate the process started | -| .err | Contains STDERR outputs from the process | -| .log | Contains both STDERR and STDOUT outputs from the process | -| .out | Contains STDOUT outputs from the process | -| .run | The script Nextflow uses to stage/unstage files and queue processes based on given profile | -| .sh | The script executed by bash for the process | -| .trace | The Nextflow [Trace](https://www.nextflow.io/docs/latest/tracing.html#trace-report) report for the process | -| versions.yml | A YAML formatted file with program versions | - -#### Nextflow Reports - -These Nextflow reports provide great a great summary of your run. These can be used to optimize -resource usage and estimate expected costs if using cloud platforms. - -| Filename | Description | -|----------|-------------| -| mobsuite-dag.dot | The Nextflow [DAG visualisation](https://www.nextflow.io/docs/latest/tracing.html#dag-visualisation) | -| mobsuite-report.html | The Nextflow [Execution Report](https://www.nextflow.io/docs/latest/tracing.html#execution-report) | -| mobsuite-timeline.html | The Nextflow [Timeline Report](https://www.nextflow.io/docs/latest/tracing.html#timeline-report) | -| mobsuite-trace.txt | The Nextflow [Trace](https://www.nextflow.io/docs/latest/tracing.html#trace-report) report | - - -#### Program Versions - -At the end of each run, each of the `versions.yml` files are merged into the files below. - -| Filename | Description | -|---------------------------|-------------| -| software_versions.yml | A complete list of programs and versions used by each process | -| software_versions_mqc.yml | A complete list of programs and versions formatted for [MultiQC](https://multiqc.info/) | - -## Parameters - - -### Required Parameters -Define where the pipeline should find input data and save output data. - -| Parameter | Description | -|:---|---| -| ` --bactopia` | The path to bactopia results to use as inputs
**Type:** `string` | - -### Filtering Parameters -Use these parameters to specify which samples to include or exclude. - -| Parameter | Description | -|:---|---| -| ` --include` | A text file containing sample names (one per line) to include from the analysis
**Type:** `string` | -| ` --exclude` | A text file containing sample names (one per line) to exclude from the analysis
**Type:** `string` | - - -### MOB-suite Recon Parameters - - -| Parameter | Description | -|:---|---| -| ` --mb_max_contig_size` | Maximum size of a contig to be considered a plasmid
**Type:** `integer`, **Default:** `310000` | -| ` --mb_min_contig_size` | Minimum length of contigs to classify
**Type:** `integer`, **Default:** `1000` | -| ` --mb_max_plasmid_size` | Maximum size of a reconstructed plasmid
**Type:** `integer`, **Default:** `350000` | -| ` --mobsuite_opts` | Extra MOB-suite options in quotes. Example: '--min_mob_evalue 0.001'
**Type:** `string` | - - -### Optional Parameters -These optional parameters can be useful in certain settings. - -| Parameter | Description | -|:---|---| -| ` --outdir` | Base directory to write results to
**Type:** `string`, **Default:** `bactopia` | -| ` --skip_compression` | Ouput files will not be compressed
**Type:** `boolean` | -| ` --datasets` | The path to cache datasets to
**Type:** `string` | -| ` --keep_all_files` | Keeps all analysis files created
**Type:** `boolean` | - -### Max Job Request Parameters -Set the top limit for requested resources for any single job. - -| Parameter | Description | -|:---|---| -| ` --max_retry` | Maximum times to retry a process before allowing it to fail.
**Type:** `integer`, **Default:** `3` | -| ` --max_cpus` | Maximum number of CPUs that can be requested for any single job.
**Type:** `integer`, **Default:** `4` | -| ` --max_memory` | Maximum amount of memory that can be requested for any single job.
**Type:** `string`, **Default:** `128.GB` | -| ` --max_time` | Maximum amount of time that can be requested for any single job.
**Type:** `string`, **Default:** `240.h` | -| ` --max_downloads` | Maximum number of samples to download at a time
**Type:** `integer`, **Default:** `3` | - -### Nextflow Configuration Parameters -Parameters to fine-tune your Nextflow setup. - -| Parameter | Description | -|:---|---| -| ` --nfconfig` | A Nextflow compatible config file for custom profiles, loaded last and will overwrite existing variables if set.
**Type:** `string` | -| ` --publish_dir_mode` | Method used to save pipeline results to output directory.
**Type:** `string`, **Default:** `copy` | -| ` --infodir` | Directory to keep pipeline Nextflow logs and reports.
**Type:** `string`, **Default:** `${params.outdir}/pipeline_info` | -| ` --force` | Nextflow will overwrite existing output files.
**Type:** `boolean` | -| ` --cleanup_workdir` | After Bactopia is successfully executed, the `work` directory will be deleted.
**Type:** `boolean` | - -### Institutional config options -Parameters used to describe centralized config profiles. These should not be edited. - -| Parameter | Description | -|:---|---| -| ` --custom_config_version` | Git commit id for Institutional configs.
**Type:** `string`, **Default:** `master` | -| ` --custom_config_base` | Base directory for Institutional configs.
**Type:** `string`, **Default:** `https://raw.githubusercontent.com/nf-core/configs/master` | -| ` --config_profile_name` | Institutional config name.
**Type:** `string` | -| ` --config_profile_description` | Institutional config description.
**Type:** `string` | -| ` --config_profile_contact` | Institutional config contact information.
**Type:** `string` | -| ` --config_profile_url` | Institutional config URL link.
**Type:** `string` | - -### Nextflow Profile Parameters -Parameters to fine-tune your Nextflow setup. - -| Parameter | Description | -|:---|---| -| ` --condadir` | Directory to Nextflow should use for Conda environments
**Type:** `string` | -| ` --registry` | Docker registry to pull containers from.
**Type:** `string`, **Default:** `dockerhub` | -| ` --datasets_cache` | Directory where downloaded datasets should be stored.
**Type:** `string`, **Default:** `/data/datasets` | -| ` --singularity_cache_dir` | Directory where remote Singularity images are stored.
**Type:** `string` | -| ` --singularity_pull_docker_container` | Instead of directly downloading Singularity images for use with Singularity, force the workflow to pull and convert Docker containers instead.
**Type:** `boolean` | -| ` --force_rebuild` | Force overwrite of existing pre-built environments.
**Type:** `boolean` | -| ` --queue` | Comma-separated name of the queue(s) to be used by a job scheduler (e.g. AWS Batch or SLURM)
**Type:** `string`, **Default:** `general,high-memory` | -| ` --cluster_opts` | Additional options to pass to the executor. (e.g. SLURM: '--account=my_acct_name'
**Type:** `string` | -| ` --container_opts` | Additional options to pass to Apptainer, Docker, or Singularityu. (e.g. Singularity: '-D `pwd`'
**Type:** `string` | -| ` --disable_scratch` | All intermediate files created on worker nodes of will be transferred to the head node.
**Type:** `boolean` | - -### Helpful Parameters -Uncommonly used parameters that might be useful. - -| Parameter | Description | -|:---|---| -| ` --monochrome_logs` | Do not use coloured log outputs.
**Type:** `boolean` | -| ` --nfdir` | Print directory Nextflow has pulled Bactopia to
**Type:** `boolean` | -| ` --sleep_time` | The amount of time (seconds) Nextflow will wait after setting up datasets before execution.
**Type:** `integer`, **Default:** `5` | -| ` --validate_params` | Boolean whether to validate parameters against the schema at runtime
**Type:** `boolean`, **Default:** `True` | -| ` --help` | Display help text.
**Type:** `boolean` | -| ` --wf` | Specify which workflow or Bactopia Tool to execute
**Type:** `string`, **Default:** `bactopia` | -| ` --list_wfs` | List the available workflows and Bactopia Tools to use with '--wf'
**Type:** `boolean` | -| ` --show_hidden_params` | Show all params when using `--help`
**Type:** `boolean` | -| ` --help_all` | An alias for --help --show_hidden_params
**Type:** `boolean` | -| ` --version` | Display version text.
**Type:** `boolean` | - -## Citations -If you use Bactopia and `mobsuite` in your analysis, please cite the following. - -- [Bactopia](https://bactopia.github.io/) - Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) - - -- [csvtk](https://bioinf.shenwei.me/csvtk/) - Shen, W [csvtk: A cross-platform, efficient and practical CSV/TSV toolkit in Golang.](https://github.com/shenwei356/csvtk/) (GitHub) - -- [MOB-suite](https://github.com/phac-nml/mob-suite) - Robertson J, Nash JHE [MOB-suite: software tools for clustering, reconstruction and typing of plasmids from draft assemblies.](https://doi.org/10.1099/mgen.0.000206) _Microbial Genomics_ 4(8). (2018) - -- [MOB-suite Database](https://github.com/phac-nml/mob-suite) - Robertson J, Bessonov K, Schonfeld J, Nash JHE. [Universal whole-sequence-based plasmid typing and its utility to prediction of host range and epidemiological surveillance.](https://doi.org/10.1099/mgen.0.000435) _Microbial Genomics_, 6(10)(2020) - diff --git a/docs/bactopia-tools/mykrobe.md b/docs/bactopia-tools/mykrobe.md deleted file mode 100644 index 71837775..00000000 --- a/docs/bactopia-tools/mykrobe.md +++ /dev/null @@ -1,238 +0,0 @@ ---- -title: mykrobe -description: A Bactopia Tool which uses Mykrobe to predict antimicrobial resistance for _Mycobacterium tuberculosis_, _Staphylococcus aureus_, _Shigella sonnei_, and _Salmonella typhi_. ---- -# Bactopia Tool - `mykrobe` -The `mykrobe` module uses the command-line version of [Mykrobe](https://github.com/Mykrobe-tools/mykrobe) to predict -antimicrobial resistance for _Mycobacterium tuberculosis_, _Staphylococcus aureus_, _Shigella sonnei_, and _Salmonella typhi_. - - -## Example Usage -``` -bactopia --wf mykrobe \ - --bactopia /path/to/your/bactopia/results -``` - -## Output Overview - -Below is the default output structure for the `mykrobe` tool. Where possible the -file descriptions below were modified from a tools description. - -```bash - -├── -│ └── tools -│ └── mykrobe -│ ├── logs -│ │ ├── nf-mykrobe.{begin,err,log,out,run,sh,trace} -│ │ └── versions.yml -│ ├── .csv -│ └── .json -└── bactopia-runs - └── mykrobe- - ├── merged-results - │ ├── logs - │ │ └── mykrobe-concat - │ │ ├── nf-merged-results.{begin,err,log,out,run,sh,trace} - │ │ └── versions.yml - │ └── mykrobe.csv - └── nf-reports - ├── mykrobe-dag.dot - ├── mykrobe-report.html - ├── mykrobe-timeline.html - └── mykrobe-trace.txt - -``` - - - -### Results - -#### Merged Results - -Below are results that are concatenated into a single file. - - -| Filename | Description | -|-------------------------------|-------------| -| mykrobe.tsv | A merged CSV file with `Mykrobe` results from all samples | - - -#### Mykrobe - -Below is a description of the _per-sample_ results from [Mykrobe](https://github.com/Mykrobe-tools/mykrobe). A full description of -each output file is available at [Mykrobe - AMR Prediction Output](https://github.com/Mykrobe-tools/mykrobe/wiki/AMR-prediction-output). - - -| Filename | Description | -|-------------------------------|-------------| -| <SAMPLE_NAME>.csv | The output of `mykrobe predict` in comma-separated format | -| <SAMPLE_NAME>.json | The output of `mykrobe predict` in JSON format | - - - - - -### Audit Trail - -Below are files that can assist you in understanding which parameters and program versions were used. - -#### Logs - -Each process that is executed will have a folder named `logs`. In this folder are helpful -files for you to review if the need ever arises. - -| Extension | Description | -|--------------|-------------| -| .begin | An empty file used to designate the process started | -| .err | Contains STDERR outputs from the process | -| .log | Contains both STDERR and STDOUT outputs from the process | -| .out | Contains STDOUT outputs from the process | -| .run | The script Nextflow uses to stage/unstage files and queue processes based on given profile | -| .sh | The script executed by bash for the process | -| .trace | The Nextflow [Trace](https://www.nextflow.io/docs/latest/tracing.html#trace-report) report for the process | -| versions.yml | A YAML formatted file with program versions | - -#### Nextflow Reports - -These Nextflow reports provide great a great summary of your run. These can be used to optimize -resource usage and estimate expected costs if using cloud platforms. - -| Filename | Description | -|----------|-------------| -| mykrobe-dag.dot | The Nextflow [DAG visualisation](https://www.nextflow.io/docs/latest/tracing.html#dag-visualisation) | -| mykrobe-report.html | The Nextflow [Execution Report](https://www.nextflow.io/docs/latest/tracing.html#execution-report) | -| mykrobe-timeline.html | The Nextflow [Timeline Report](https://www.nextflow.io/docs/latest/tracing.html#timeline-report) | -| mykrobe-trace.txt | The Nextflow [Trace](https://www.nextflow.io/docs/latest/tracing.html#trace-report) report | - - -#### Program Versions - -At the end of each run, each of the `versions.yml` files are merged into the files below. - -| Filename | Description | -|---------------------------|-------------| -| software_versions.yml | A complete list of programs and versions used by each process | -| software_versions_mqc.yml | A complete list of programs and versions formatted for [MultiQC](https://multiqc.info/) | - -## Parameters - - -### Required Parameters -Define where the pipeline should find input data and save output data. - -| Parameter | Description | -|:---|---| -| ` --bactopia` | The path to bactopia results to use as inputs
**Type:** `string` | - -### Filtering Parameters -Use these parameters to specify which samples to include or exclude. - -| Parameter | Description | -|:---|---| -| ` --include` | A text file containing sample names (one per line) to include from the analysis
**Type:** `string` | -| ` --exclude` | A text file containing sample names (one per line) to exclude from the analysis
**Type:** `string` | - - -### Mykrobe Parameters - - -| Parameter | Description | -|:---|---| -| ` --mykrobe_species` | Species panel to use
**Type:** `string` | -| ` --kmer` | K-mer length
**Type:** `integer`, **Default:** `21` | -| ` --min_depth` | Minimum depth
**Type:** `integer`, **Default:** `1` | -| ` --model` | Genotype model used.
**Type:** `string`, **Default:** `kmer_count` | -| ` --report_all_calls` | Report all calls
**Type:** `boolean` | -| ` --mykrobe_opts` | Extra Mykrobe options in quotes
**Type:** `string` | - - -### Optional Parameters -These optional parameters can be useful in certain settings. - -| Parameter | Description | -|:---|---| -| ` --outdir` | Base directory to write results to
**Type:** `string`, **Default:** `bactopia` | -| ` --skip_compression` | Ouput files will not be compressed
**Type:** `boolean` | -| ` --datasets` | The path to cache datasets to
**Type:** `string` | -| ` --keep_all_files` | Keeps all analysis files created
**Type:** `boolean` | - -### Max Job Request Parameters -Set the top limit for requested resources for any single job. - -| Parameter | Description | -|:---|---| -| ` --max_retry` | Maximum times to retry a process before allowing it to fail.
**Type:** `integer`, **Default:** `3` | -| ` --max_cpus` | Maximum number of CPUs that can be requested for any single job.
**Type:** `integer`, **Default:** `4` | -| ` --max_memory` | Maximum amount of memory that can be requested for any single job.
**Type:** `string`, **Default:** `128.GB` | -| ` --max_time` | Maximum amount of time that can be requested for any single job.
**Type:** `string`, **Default:** `240.h` | -| ` --max_downloads` | Maximum number of samples to download at a time
**Type:** `integer`, **Default:** `3` | - -### Nextflow Configuration Parameters -Parameters to fine-tune your Nextflow setup. - -| Parameter | Description | -|:---|---| -| ` --nfconfig` | A Nextflow compatible config file for custom profiles, loaded last and will overwrite existing variables if set.
**Type:** `string` | -| ` --publish_dir_mode` | Method used to save pipeline results to output directory.
**Type:** `string`, **Default:** `copy` | -| ` --infodir` | Directory to keep pipeline Nextflow logs and reports.
**Type:** `string`, **Default:** `${params.outdir}/pipeline_info` | -| ` --force` | Nextflow will overwrite existing output files.
**Type:** `boolean` | -| ` --cleanup_workdir` | After Bactopia is successfully executed, the `work` directory will be deleted.
**Type:** `boolean` | - -### Institutional config options -Parameters used to describe centralized config profiles. These should not be edited. - -| Parameter | Description | -|:---|---| -| ` --custom_config_version` | Git commit id for Institutional configs.
**Type:** `string`, **Default:** `master` | -| ` --custom_config_base` | Base directory for Institutional configs.
**Type:** `string`, **Default:** `https://raw.githubusercontent.com/nf-core/configs/master` | -| ` --config_profile_name` | Institutional config name.
**Type:** `string` | -| ` --config_profile_description` | Institutional config description.
**Type:** `string` | -| ` --config_profile_contact` | Institutional config contact information.
**Type:** `string` | -| ` --config_profile_url` | Institutional config URL link.
**Type:** `string` | - -### Nextflow Profile Parameters -Parameters to fine-tune your Nextflow setup. - -| Parameter | Description | -|:---|---| -| ` --condadir` | Directory to Nextflow should use for Conda environments
**Type:** `string` | -| ` --registry` | Docker registry to pull containers from.
**Type:** `string`, **Default:** `dockerhub` | -| ` --datasets_cache` | Directory where downloaded datasets should be stored.
**Type:** `string`, **Default:** `/data/datasets` | -| ` --singularity_cache_dir` | Directory where remote Singularity images are stored.
**Type:** `string` | -| ` --singularity_pull_docker_container` | Instead of directly downloading Singularity images for use with Singularity, force the workflow to pull and convert Docker containers instead.
**Type:** `boolean` | -| ` --force_rebuild` | Force overwrite of existing pre-built environments.
**Type:** `boolean` | -| ` --queue` | Comma-separated name of the queue(s) to be used by a job scheduler (e.g. AWS Batch or SLURM)
**Type:** `string`, **Default:** `general,high-memory` | -| ` --cluster_opts` | Additional options to pass to the executor. (e.g. SLURM: '--account=my_acct_name'
**Type:** `string` | -| ` --container_opts` | Additional options to pass to Apptainer, Docker, or Singularityu. (e.g. Singularity: '-D `pwd`'
**Type:** `string` | -| ` --disable_scratch` | All intermediate files created on worker nodes of will be transferred to the head node.
**Type:** `boolean` | - -### Helpful Parameters -Uncommonly used parameters that might be useful. - -| Parameter | Description | -|:---|---| -| ` --monochrome_logs` | Do not use coloured log outputs.
**Type:** `boolean` | -| ` --nfdir` | Print directory Nextflow has pulled Bactopia to
**Type:** `boolean` | -| ` --sleep_time` | The amount of time (seconds) Nextflow will wait after setting up datasets before execution.
**Type:** `integer`, **Default:** `5` | -| ` --validate_params` | Boolean whether to validate parameters against the schema at runtime
**Type:** `boolean`, **Default:** `True` | -| ` --help` | Display help text.
**Type:** `boolean` | -| ` --wf` | Specify which workflow or Bactopia Tool to execute
**Type:** `string`, **Default:** `bactopia` | -| ` --list_wfs` | List the available workflows and Bactopia Tools to use with '--wf'
**Type:** `boolean` | -| ` --show_hidden_params` | Show all params when using `--help`
**Type:** `boolean` | -| ` --help_all` | An alias for --help --show_hidden_params
**Type:** `boolean` | -| ` --version` | Display version text.
**Type:** `boolean` | - -## Citations -If you use Bactopia and `mykrobe` in your analysis, please cite the following. - -- [Bactopia](https://bactopia.github.io/) - Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) - - -- [csvtk](https://bioinf.shenwei.me/csvtk/) - Shen, W [csvtk: A cross-platform, efficient and practical CSV/TSV toolkit in Golang.](https://github.com/shenwei356/csvtk/) (GitHub) - -- [Mykrobe](https://github.com/Mykrobe-tools/mykrobe) - Hunt M, Bradley P, Lapierre SG, Heys S, Thomsit M, Hall MB, Malone KM, Wintringer P, Walker TM, Cirillo DM, Comas I, Farhat MR, Fowler P, Gardy J, Ismail N, Kohl TA, Mathys V, Merker M, Niemann S, Omar SV, Sintchenko V, Smith G, Supply P, Tahseen S, Wilcox M, Arandjelovic I, Peto TEA, Crook, DW, Iqbal Z [Antibiotic resistance prediction for Mycobacterium tuberculosis from genome sequence data with Mykrobe](https://doi.org/10.12688/wellcomeopenres.15603.1) _Wellcome Open Research_ 4, 191. (2019) - diff --git a/docs/bactopia-tools/ncbigenomedownload.md b/docs/bactopia-tools/ncbigenomedownload.md deleted file mode 100644 index 09c1a57c..00000000 --- a/docs/bactopia-tools/ncbigenomedownload.md +++ /dev/null @@ -1,128 +0,0 @@ ---- -tags: - - assembly - - download - - fasta ---- - - - -# Bactopia Tool - `ncbigenomedownload` -The `ncbi-genome-download` module uses [ncbi-genome-download](https://github.com/kblin/ncbi-genome-download) to -download genomes from NCBI Assembly database. This allows you to easily supplement your analyses with -publicly available genomes. - - -## Example Usage -``` -bactopia --wf ncbigenomedownload \ - --bactopia /path/to/your/bactopia/results \ - --include includes.txt -``` - -## Parameters - - -### Required Parameters -Define where the pipeline should find input data and save output data. - -| Parameter | Description | Default | -|---|---|---| -| `--bactopia` | The path to bactopia results to use as inputs | | - -### Filtering Parameters -Use these parameters to specify which samples to include or exclude. - -| Parameter | Description | Default | -|---|---|---| -| `--include` | A text file containing sample names (one per line) to include from the analysis | | -| `--exclude` | A text file containing sample names (one per line) to exclude from the analysis | | - - -### NCBI Genome Download Parameters - - -| Parameter | Description | Default | -|---|---|---| -| `--species` | Name of the species to download assemblies | | -| `--accession` | An NCBI Assembly accession to be downloaded | | -| `--accessions` | An file of NCBI Assembly accessions (one per line) to be downloaded | | -| `--format` | Comma separated list of formats to download | fasta | -| `--section` | NCBI section to download | refseq | -| `--assembly_level` | Comma separated list of assembly levels to download | complete | -| `--kingdom` | Comma separated list of formats to download | bacteria | -| `--limit` | Limit the number of assemblies to download | | - - -### Optional Parameters -These optional parameters can be useful in certain settings. - -| Parameter | Description | Default | -|---|---|---| -| `--outdir` | Base directory to write results to | ./ | -| `--run_name` | Name of the directory to hold results | bactopia | -| `--skip_compression` | Ouput files will not be compressed | False | -| `--keep_all_files` | Keeps all analysis files created | False | - -### Max Job Request Parameters -Set the top limit for requested resources for any single job. - -| Parameter | Description | Default | -|---|---|---| -| `--max_retry` | Maximum times to retry a process before allowing it to fail. | 3 | -| `--max_cpus` | Maximum number of CPUs that can be requested for any single job. | 4 | -| `--max_memory` | Maximum amount of memory (in GB) that can be requested for any single job. | 32 | -| `--max_time` | Maximum amount of time (in minutes) that can be requested for any single job. | 120 | -| `--max_downloads` | Maximum number of samples to download at a time | 3 | - -### Nextflow Configuration Parameters -Parameters to fine-tune your Nextflow setup. - -| Parameter | Description | Default | -|---|---|---| -| `--nfconfig` | A Nextflow compatible config file for custom profiles, loaded last and will overwrite existing variables if set. | | -| `--publish_dir_mode` | Method used to save pipeline results to output directory. | copy | -| `--infodir` | Directory to keep pipeline Nextflow logs and reports. | ${params.outdir}/pipeline_info | -| `--force` | Nextflow will overwrite existing output files. | False | -| `--cleanup_workdir` | After Bactopia is successfully executed, the `work` directory will be deleted. | False | - -### Nextflow Profile Parameters -Parameters to fine-tune your Nextflow setup. - -| Parameter | Description | Default | -|---|---|---| -| `--condadir` | Directory to Nextflow should use for Conda environments | | -| `--registry` | Docker registry to pull containers from. | dockerhub | -| `--singularity_cache` | Directory where remote Singularity images are stored. | | -| `--singularity_pull_docker_container` | Instead of directly downloading Singularity images for use with Singularity, force the workflow to pull and convert Docker containers instead. | | -| `--force_rebuild` | Force overwrite of existing pre-built environments. | False | -| `--queue` | Comma-separated name of the queue(s) to be used by a job scheduler (e.g. AWS Batch or SLURM) | general,high-memory | -| `--cluster_opts` | Additional options to pass to the executor. (e.g. SLURM: '--account=my_acct_name' | | -| `--disable_scratch` | All intermediate files created on worker nodes of will be transferred to the head node. | False | - -### Helpful Parameters -Uncommonly used parameters that might be useful. - -| Parameter | Description | Default | -|---|---|---| -| `--monochrome_logs` | Do not use coloured log outputs. | | -| `--nfdir` | Print directory Nextflow has pulled Bactopia to | | -| `--sleep_time` | The amount of time (seconds) Nextflow will wait after setting up datasets before execution. | 5 | -| `--validate_params` | Boolean whether to validate parameters against the schema at runtime | True | -| `--help` | Display help text. | | -| `--wf` | Specify which workflow or Bactopia Tool to execute | bactopia | -| `--list_wfs` | List the available workflows and Bactopia Tools to use with '--wf' | | -| `--show_hidden_params` | Show all params when using `--help` | | -| `--help_all` | An alias for --help --show_hidden_params | | -| `--version` | Display version text. | | - -## Citations -If you use Bactopia and `ncbigenomedownload` in your analysis, please cite the following. - -- [Bactopia](https://bactopia.github.io/) - Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) - - -- [ncbi-genome-download](https://github.com/kblin/ncbi-genome-download) - Blin K [ncbi-genome-download: Scripts to download genomes from the NCBI FTP servers](https://github.com/kblin/ncbi-genome-download) (GitHub) - diff --git a/docs/bactopia-tools/ngmaster.md b/docs/bactopia-tools/ngmaster.md deleted file mode 100644 index 3effc7f5..00000000 --- a/docs/bactopia-tools/ngmaster.md +++ /dev/null @@ -1,238 +0,0 @@ ---- -title: ngmaster -description: A Bactopia Tool which uses ngmaster for _in silico_ multi-antigen sequence typing for _Neisseria gonorrhoeae_ (NG-MAST). ---- -# Bactopia Tool - `ngmaster` -The `ngmaster` module uses [ngmaster](https://github.com/MDU-PHL/ngmaster) for -_in silico_ multi-antigen sequence typing for _Neisseria gonorrhoeae_ (NG-MAST). - - -## Example Usage -``` -bactopia --wf ngmaster \ - --bactopia /path/to/your/bactopia/results -``` - -## Output Overview - -Below is the default output structure for the `ngmaster` tool. Where possible the -file descriptions below were modified from a tools description. - -```bash - -├── -│ └── tools -│ └── ngmaster -│ ├── .tsv -│ └── logs -│ ├── nf-ngmaster.{begin,err,log,out,run,sh,trace} -│ └── versions.yml -└── bactopia-runs - └── ngmaster- - ├── merged-results - │ ├── logs - │ │ └── ngmaster-concat - │ │ ├── nf-merged-results.{begin,err,log,out,run,sh,trace} - │ │ └── versions.yml - │ └── ngmaster.tsv - └── nf-reports - ├── ngmaster-dag.dot - ├── ngmaster-report.html - ├── ngmaster-timeline.html - └── ngmaster-trace.txt - -``` - -:::info[Directory structure might be different] - -`ngmaster` is available as a standalone Bactopia Tool, as well as from -the main Bactopia workflow (e.g. through Staphopia or Merlin). If executed -from Bactopia, the `ngmaster` directory structure might be different, but the -output descriptions below still apply. -::: - - - -### Results - -#### Merged Results - -Below are results that are concatenated into a single file. - - -| Filename | Description | -|-------------------------------|-------------| -| ngmaster.tsv | A merged TSV file with `ngmaster` results from all samples | - - -#### ngmaster - -Below is a description of the _per-sample_ results from [ngmaster](https://github.com/MDU-PHL/ngmaster). - - -| Filename | Description | -|-------------------------------|-------------| -| <SAMPLE_NAME>.tsv | A tab-delimited file with `ngmaster` results | - - - - - -### Audit Trail - -Below are files that can assist you in understanding which parameters and program versions were used. - -#### Logs - -Each process that is executed will have a folder named `logs`. In this folder are helpful -files for you to review if the need ever arises. - -| Extension | Description | -|--------------|-------------| -| .begin | An empty file used to designate the process started | -| .err | Contains STDERR outputs from the process | -| .log | Contains both STDERR and STDOUT outputs from the process | -| .out | Contains STDOUT outputs from the process | -| .run | The script Nextflow uses to stage/unstage files and queue processes based on given profile | -| .sh | The script executed by bash for the process | -| .trace | The Nextflow [Trace](https://www.nextflow.io/docs/latest/tracing.html#trace-report) report for the process | -| versions.yml | A YAML formatted file with program versions | - -#### Nextflow Reports - -These Nextflow reports provide great a great summary of your run. These can be used to optimize -resource usage and estimate expected costs if using cloud platforms. - -| Filename | Description | -|----------|-------------| -| ngmaster-dag.dot | The Nextflow [DAG visualisation](https://www.nextflow.io/docs/latest/tracing.html#dag-visualisation) | -| ngmaster-report.html | The Nextflow [Execution Report](https://www.nextflow.io/docs/latest/tracing.html#execution-report) | -| ngmaster-timeline.html | The Nextflow [Timeline Report](https://www.nextflow.io/docs/latest/tracing.html#timeline-report) | -| ngmaster-trace.txt | The Nextflow [Trace](https://www.nextflow.io/docs/latest/tracing.html#trace-report) report | - - -#### Program Versions - -At the end of each run, each of the `versions.yml` files are merged into the files below. - -| Filename | Description | -|---------------------------|-------------| -| software_versions.yml | A complete list of programs and versions used by each process | -| software_versions_mqc.yml | A complete list of programs and versions formatted for [MultiQC](https://multiqc.info/) | - -## Parameters - - -### Required Parameters -Define where the pipeline should find input data and save output data. - -| Parameter | Description | -|:---|---| -| ` --bactopia` | The path to bactopia results to use as inputs
**Type:** `string` | - -### Filtering Parameters -Use these parameters to specify which samples to include or exclude. - -| Parameter | Description | -|:---|---| -| ` --include` | A text file containing sample names (one per line) to include from the analysis
**Type:** `string` | -| ` --exclude` | A text file containing sample names (one per line) to exclude from the analysis
**Type:** `string` | - - -### ngmaster Parameters - - -| Parameter | Description | -|:---|---| -| ` --csv` | output comma-separated format (CSV) rather than tab-separated
**Type:** `boolean` | - - -### Optional Parameters -These optional parameters can be useful in certain settings. - -| Parameter | Description | -|:---|---| -| ` --outdir` | Base directory to write results to
**Type:** `string`, **Default:** `bactopia` | -| ` --skip_compression` | Ouput files will not be compressed
**Type:** `boolean` | -| ` --datasets` | The path to cache datasets to
**Type:** `string` | -| ` --keep_all_files` | Keeps all analysis files created
**Type:** `boolean` | - -### Max Job Request Parameters -Set the top limit for requested resources for any single job. - -| Parameter | Description | -|:---|---| -| ` --max_retry` | Maximum times to retry a process before allowing it to fail.
**Type:** `integer`, **Default:** `3` | -| ` --max_cpus` | Maximum number of CPUs that can be requested for any single job.
**Type:** `integer`, **Default:** `4` | -| ` --max_memory` | Maximum amount of memory that can be requested for any single job.
**Type:** `string`, **Default:** `128.GB` | -| ` --max_time` | Maximum amount of time that can be requested for any single job.
**Type:** `string`, **Default:** `240.h` | -| ` --max_downloads` | Maximum number of samples to download at a time
**Type:** `integer`, **Default:** `3` | - -### Nextflow Configuration Parameters -Parameters to fine-tune your Nextflow setup. - -| Parameter | Description | -|:---|---| -| ` --nfconfig` | A Nextflow compatible config file for custom profiles, loaded last and will overwrite existing variables if set.
**Type:** `string` | -| ` --publish_dir_mode` | Method used to save pipeline results to output directory.
**Type:** `string`, **Default:** `copy` | -| ` --infodir` | Directory to keep pipeline Nextflow logs and reports.
**Type:** `string`, **Default:** `${params.outdir}/pipeline_info` | -| ` --force` | Nextflow will overwrite existing output files.
**Type:** `boolean` | -| ` --cleanup_workdir` | After Bactopia is successfully executed, the `work` directory will be deleted.
**Type:** `boolean` | - -### Institutional config options -Parameters used to describe centralized config profiles. These should not be edited. - -| Parameter | Description | -|:---|---| -| ` --custom_config_version` | Git commit id for Institutional configs.
**Type:** `string`, **Default:** `master` | -| ` --custom_config_base` | Base directory for Institutional configs.
**Type:** `string`, **Default:** `https://raw.githubusercontent.com/nf-core/configs/master` | -| ` --config_profile_name` | Institutional config name.
**Type:** `string` | -| ` --config_profile_description` | Institutional config description.
**Type:** `string` | -| ` --config_profile_contact` | Institutional config contact information.
**Type:** `string` | -| ` --config_profile_url` | Institutional config URL link.
**Type:** `string` | - -### Nextflow Profile Parameters -Parameters to fine-tune your Nextflow setup. - -| Parameter | Description | -|:---|---| -| ` --condadir` | Directory to Nextflow should use for Conda environments
**Type:** `string` | -| ` --registry` | Docker registry to pull containers from.
**Type:** `string`, **Default:** `dockerhub` | -| ` --datasets_cache` | Directory where downloaded datasets should be stored.
**Type:** `string`, **Default:** `/data/datasets` | -| ` --singularity_cache_dir` | Directory where remote Singularity images are stored.
**Type:** `string` | -| ` --singularity_pull_docker_container` | Instead of directly downloading Singularity images for use with Singularity, force the workflow to pull and convert Docker containers instead.
**Type:** `boolean` | -| ` --force_rebuild` | Force overwrite of existing pre-built environments.
**Type:** `boolean` | -| ` --queue` | Comma-separated name of the queue(s) to be used by a job scheduler (e.g. AWS Batch or SLURM)
**Type:** `string`, **Default:** `general,high-memory` | -| ` --cluster_opts` | Additional options to pass to the executor. (e.g. SLURM: '--account=my_acct_name'
**Type:** `string` | -| ` --container_opts` | Additional options to pass to Apptainer, Docker, or Singularityu. (e.g. Singularity: '-D `pwd`'
**Type:** `string` | -| ` --disable_scratch` | All intermediate files created on worker nodes of will be transferred to the head node.
**Type:** `boolean` | - -### Helpful Parameters -Uncommonly used parameters that might be useful. - -| Parameter | Description | -|:---|---| -| ` --monochrome_logs` | Do not use coloured log outputs.
**Type:** `boolean` | -| ` --nfdir` | Print directory Nextflow has pulled Bactopia to
**Type:** `boolean` | -| ` --sleep_time` | The amount of time (seconds) Nextflow will wait after setting up datasets before execution.
**Type:** `integer`, **Default:** `5` | -| ` --validate_params` | Boolean whether to validate parameters against the schema at runtime
**Type:** `boolean`, **Default:** `True` | -| ` --help` | Display help text.
**Type:** `boolean` | -| ` --wf` | Specify which workflow or Bactopia Tool to execute
**Type:** `string`, **Default:** `bactopia` | -| ` --list_wfs` | List the available workflows and Bactopia Tools to use with '--wf'
**Type:** `boolean` | -| ` --show_hidden_params` | Show all params when using `--help`
**Type:** `boolean` | -| ` --help_all` | An alias for --help --show_hidden_params
**Type:** `boolean` | -| ` --version` | Display version text.
**Type:** `boolean` | - -## Citations -If you use Bactopia and `ngmaster` in your analysis, please cite the following. - -- [Bactopia](https://bactopia.github.io/) - Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) - - -- [csvtk](https://bioinf.shenwei.me/csvtk/) - Shen, W [csvtk: A cross-platform, efficient and practical CSV/TSV toolkit in Golang.](https://github.com/shenwei356/csvtk/) (GitHub) - -- [ngmaster](https://github.com/MDU-PHL/ngmaster) - Kwong J, Gonçalves da Silva A, Schultz M, Seeman T [ngmaster - _In silico_ multi-antigen sequence typing for _Neisseria gonorrhoeae_ (NG-MAST)](https://github.com/MDU-PHL/ngmaster) (GitHub) - diff --git a/docs/bactopia-tools/pangenome.md b/docs/bactopia-tools/pangenome.md deleted file mode 100644 index 6bb77658..00000000 --- a/docs/bactopia-tools/pangenome.md +++ /dev/null @@ -1,642 +0,0 @@ ---- -title: pangenome -description: A Bactopia Tool which creates a pangenome, then a core-genome phylogeny with multiple different tools. ---- -# Bactopia Tool - `pangenome` -The `pangenome` subworkflow allows you to create a pan-genome with [PIRATE](https://github.com/SionBayliss/PIRATE), -[Panaroo](https://github.com/gtonkinhill/panaroo), or [Roary](https://github.com/sanger-pathogens/Roary)) of your samples. - -You can further supplement your pan-genome by including completed genomes. This is possible using the `--species` -or `--accessions` parameters. If used, [ncbi-genome-download](https://github.com/kblin/ncbi-genome-download) will -download available completed genomes available from RefSeq. Any downloaded genomes will be annotated with -[Prokka](https://github.com/tseemann/prokka) to create compatible GFF3 files. - -A phylogeny, based on the core-genome alignment, will be created by [IQ-Tree](https://github.com/Cibiv/IQ-TREE). Optionally -a recombination-masked core-genome alignment can be created with [ClonalFrameML](https://github.com/xavierdidelot/ClonalFrameML) -and [maskrc-svg](https://github.com/kwongj/maskrc-svg). - -Finally, the core genome pair-wise SNP distance for each sample is also calculated with -[snp-dists](https://github.com/tseemann/snp-dists) and additional pan-genome wide association studies can be conducted -using [Scoary](https://github.com/AdmiralenOla/Scoary). - - -## Example Usage -``` -bactopia --wf pangenome \ - --bactopia /path/to/your/bactopia/results -``` - -## Output Overview - -Below is the default output structure for the `pangenome` tool. Where possible the -file descriptions below were modified from a tools description. - -```bash - -└── bactopia-runs - └── pangenome- - ├── clonalframeml - │ ├── core-genome.ML_sequence.fasta - │ ├── core-genome.em.txt - │ ├── core-genome.emsim.txt - │ ├── core-genome.importation_status.txt - │ ├── core-genome.labelled_tree.newick - │ ├── core-genome.position_cross_reference.txt - │ └── logs - │ ├── nf-clonalframeml.{begin,err,log,out,run,sh,trace} - │ └── versions.yml - ├── core-genome.aln.gz - ├── core-genome.distance.tsv - ├── core-genome.iqtree - ├── core-genome.masked.aln.gz - ├── iqtree - │ ├── core-genome.alninfo - │ ├── core-genome.bionj - │ ├── core-genome.ckp.gz - │ ├── core-genome.contree - │ ├── core-genome.mldist - │ ├── core-genome.splits.nex - │ ├── core-genome.treefile - │ ├── core-genome.ufboot - │ └── logs - │ ├── core-genome.log - │ ├── nf-iqtree.{begin,err,log,out,run,sh,trace} - │ └── versions.yml - ├── iqtree-fast - │ ├── logs - │ │ ├── nf-iqtree-fast.{begin,err,log,out,run,sh,trace} - │ │ ├── start-tree.log - │ │ └── versions.yml - │ ├── start-tree.bionj - │ ├── start-tree.ckp.gz - │ ├── start-tree.iqtree - │ ├── start-tree.mldist - │ ├── start-tree.model.gz - │ └── start-tree.treefile - ├── nf-reports - │ ├── pangenome-dag.dot - │ ├── pangenome-report.html - │ ├── pangenome-timeline.html - │ └── pangenome-trace.txt - ├── panaroo - │ ├── aligned_gene_sequences - │ ├── alignment_entropy.csv - │ ├── combined_DNA_CDS.fasta - │ ├── combined_protein_CDS.fasta - │ ├── combined_protein_cdhit_out.txt - │ ├── combined_protein_cdhit_out.txt.clstr - │ ├── core_alignment_filtered_header.embl - │ ├── core_alignment_header.embl - │ ├── core_gene_alignment_filtered.aln - │ ├── final_graph.gml - │ ├── gene_data.csv - │ ├── gene_presence_absence.Rtab - │ ├── gene_presence_absence.csv - │ ├── gene_presence_absence_roary.csv - │ ├── logs - │ │ ├── nf-panaroo.{begin,err,log,out,run,sh,trace} - │ │ └── versions.yml - │ ├── pan_genome_reference.fa - │ ├── pre_filt_graph.gml - │ ├── struct_presence_absence.Rtab - │ └── summary_statistics.txt - ├── pirate - │ ├── PIRATE.gene_families.ordered.tsv - │ ├── PIRATE.gene_families.tsv - │ ├── PIRATE.genomes_per_allele.tsv - │ ├── PIRATE.pangenome_summary.txt - │ ├── PIRATE.unique_alleles.tsv - │ ├── binary_presence_absence.fasta.gz - │ ├── binary_presence_absence.nwk - │ ├── cluster_alleles.tab - │ ├── co-ords - │ │ └── .co-ords.tab - │ ├── core_alignment.fasta.gz - │ ├── core_alignment.gff - │ ├── feature_sequences - │ │ └── .{aa|nucleotide|.fasta.gz - │ ├── gene_presence_absence.csv - │ ├── genome2loci.tab - │ ├── genome_list.txt - │ ├── loci_list.tab - │ ├── loci_paralog_categories.tab - │ ├── logs - │ │ ├── nf-pirate.{begin,err,log,out,run,sh,trace} - │ │ ├── results - │ │ │ ├── PIRATE.log - │ │ │ ├── link_clusters.log - │ │ │ └── split_groups.log - │ │ └── versions.yml - │ ├── modified_gffs - │ ├── pan_sequences.fasta.gz - │ ├── pangenome.connected_blocks.tsv - │ ├── pangenome.edges - │ ├── pangenome.gfa - │ ├── pangenome.order.tsv - │ ├── pangenome.reversed.tsv - │ ├── pangenome.syntenic_blocks.tsv - │ ├── pangenome.temp - │ ├── pangenome_alignment.fasta.gz - │ ├── pangenome_alignment.gff - │ ├── pangenome_iterations - │ │ ├── pan_sequences.{50|60|70|80|90|95|98}.reclustered.reinflated - │ │ ├── pan_sequences.blast.output - │ │ ├── pan_sequences.cdhit_clusters - │ │ ├── pan_sequences.core_clusters.tab - │ │ ├── pan_sequences.mcl_log.txt - │ │ └── pan_sequences.representative.fasta.gz - │ ├── pangenome_log.txt - │ ├── paralog_clusters.tab - │ ├── representative_sequences.faa - │ └── representative_sequences.ffn - ├── roary - │ ├── accessory.header.embl - │ ├── accessory.tab - │ ├── accessory_binary_genes.fa.gz - │ ├── accessory_binary_genes.fa.newick - │ ├── accessory_graph.dot - │ ├── blast_identity_frequency.Rtab - │ ├── clustered_proteins - │ ├── core_accessory.header.embl - │ ├── core_accessory.tab - │ ├── core_accessory_graph.dot - │ ├── core_alignment_header.embl - │ ├── gene_presence_absence.Rtab - │ ├── gene_presence_absence.csv - │ ├── logs - │ │ ├── nf-roary.{begin,err,log,out,run,sh,trace} - │ │ └── versions.yml - │ ├── number_of_conserved_genes.Rtab - │ ├── number_of_genes_in_pan_genome.Rtab - │ ├── number_of_new_genes.Rtab - │ ├── number_of_unique_genes.Rtab - │ ├── pan_genome_reference.fa.gz - │ └── summary_statistics.txt - └── snpdists - └── logs - ├── nf-snpdists.{begin,err,log,out,run,sh,trace} - └── versions.yml - -``` - - - -### Results - -#### Main Results - -Below are main results of the `pangenome` Bactopia Tool. - - -| Filename | Description | -|-------------------------------|-------------| -| core-genome.aln.gz | A multiple sequence alignment FASTA of the core genome | -| core-genome.distance.tsv | Core genome pair-wise SNP distance for each sample | -| core-genome.iqtree | Full result of the IQ-TREE core genome phylogeny | -| core-genome.masked.aln.gz | A core-genome alignment with the recombination masked | - - -#### ClonalFrameML - -Below is a description of the [ClonalFrameML](https://github.com/xavierdidelot/ClonalFrameML) results. For more details about -ClonalFrameML outputs see [ClonalFrameML - Outputs](https://github.com/xavierdidelot/clonalframeml/wiki#output). - - -| Filename | Description | -|-------------------------------|-------------| -| core-genome.ML_sequence.fasta | The sequence reconstructed by maximum likelihood for all internal nodes of the phylogeny, as well as for all missing data in the input sequences | -| core-genome.em.txt | The point estimates for R/theta, nu, delta and the branch lengths | -| core-genome.emsim.txt | The bootstrapped values for the three parameters R/theta, nu and delta | -| core-genome.importation_status.txt | The list of reconstructed recombination events | -| core-genome.labelled_tree.newick | The output tree with all nodes labelled so that they can be referred to in other files | -| core-genome.position_cross_reference.txt | A vector of comma-separated values indicating for each location in the input sequence file the corresponding position in the sequences in the output *ML_sequence.fasta* file | - - -#### IQ-TREE - -Below is a description of the [IQ-TREE](http://www.iqtree.org/) results. If ClonalFrameML is executed, a fast tree -is created and given the prefix `start-tree`, the final tree has the prefix `core-genome`. For more details about -IQ-TREE outputs see [IQ-TREE - Outputs](https://github.com/Cibiv/IQ-TREE/wiki/Web-Server-Tutorial#analysis-results). - - -| Filename | Description | -|-------------------------------|-------------| -| core-genome.alninfo | Alignment site statistics | -| {core-genome,start-tree}.bionj | A neighbor joining tree produced by BIONJ | -| {core-genome,start-tree}.ckp.gz | IQ-TREE writes a checkpoint file | -| core-genome.contree | Consensus tree with assigned branch supports where branch lengths are optimized on the original alignment; printed if Ultrafast Bootstrap is selected | -| {core-genome,start-tree}.mldist | Contains the likelihood distances | -| {core-genome,start-tree}.model.gz | Information about all models tested | -| core-genome.splits.nex | Support values in percentage for all splits (bipartitions), computed as the occurence frequencies in the bootstrap trees | -| {core-genome,start-tree}.treefile | Maximum likelihood tree in NEWICK format, can be visualized with treeviewer programs | -| core-genome.ufboot | Trees created during the bootstrap steps | - - -#### PIRATE - -Below is a description of the [PIRATE](https://github.com/SionBayliss/PIRATE) results. For more details about -PIRATE outputs see [PIRATE - Output files](https://github.com/SionBayliss/PIRATE#output-files). - -:::note[Available by default] - -By default PIRATE is used to create the pan-genome. If `--use_panaroo` or `--use_roary` are given, `pirate` outputs will -not be available only Panaroo or Roary outputs. -::: - - -| Filename | Description | -|-------------------------------|-------------| -| PIRATE.gene_families.ordered.tsv | Tabular summary of all gene families ordered on syntenic regions in the pangenome graph | -| PIRATE.gene_families.tsv | Tabular summary of all gene families | -| PIRATE.genomes_per_allele.tsv | A list of genomes associated with each allele | -| PIRATE.pangenome_summary.txt | Short summary of the number and frequency of genes in the pangenome | -| PIRATE.unique_alleles.tsv | Tabular summary of all unique alleles of each gene family | -| binary_presence_absence.{fasta.gz,nwk} | A tree (.nwk) generated by fasttree from binary gene_family presence-absence data and the fasta file used to create it | -| cluster_alleles.tab | List of alleles in paralogous clusters | -| co-ords/${SAMPLE_NAME}.co-ords.tab | Gene feature co-ordinates for each sample | -| core_alignment.fasta.gz | Gene-by-gene nucleotide alignments of the core genome created using MAFFT | -| core_alignment.gff | Annotation containing the position of the gene family within the core genome alignment | -| feature_sequences/${GENE_FAMILY}.{aa|nucleotide}.fasta | Amino acid and nucleotide sequences for each gene family | -| gene_presence_absence.csv | Lists each gene and which samples it is present in | -| genome2loci.tab | List of loci for each genome | -| genome_list.txt | List of genomes in the analysis | -| loci_list.tab | List of loci and their associated genomes | -| loci_paralog_categories.tab | Concatenation of classified paralogs | -| modified_gffs/${SAMPLE_NAME}.gff | GFF3 files which have been standardised for PIRATE | -| pan_sequences.fasta.gz | All representative sequences in the pangenome | -| pangenome.connected_blocks.tsv | List of connected blocks in the pangenome graph | -| pangenome.edges | List of classified edges in the pangenome graph | -| pangenome.gfa | GFA network file representing all unique connections between gene families | -| pangenome.order.tsv | Sorted list gene_families file on pangenome graph | -| pangenome.reversed.tsv | List of reversed blocks in the pangenome graph | -| pangenome.syntenic_blocks.tsv | List of syntenic blocks in the pangenome graph | -| pangenome.temp | Temporary file used by PIRATE | -| pangenome_alignment.fasta.gz | Gene-by-gene nucleotide alignments of the full pangenome created using MAFFT | -| pangenome_alignment.gff | Annotation containing the position of the gene family within the pangenome alignment | -| pangenome_iterations/pan_sequences.{50|60|70|80|90|95|98}.reclustered.reinflated | List of clusters for each reinflation threshold | -| pangenome_iterations/pan_sequences.blast.output | BLAST output of sequences against representatives and self hits. | -| pangenome_iterations/pan_sequences.cdhit_clusters | A list of CDHIT representative clusters | -| pangenome_iterations/pan_sequences.core_clusters.tab | A list of core clusters. | -| pangenome_iterations/pan_sequences.mcl_log.txt | A log file from `mcxdeblast` and `mcl` | -| pangenome_iterations/pan_sequences.representative.fasta | FASTA file with sequences for each representative cluster | -| pangenome_log.txt | Log file from PIRATE | -| paralog_clusters.tab | List of paralogous clusters | -| representative_sequences.{faa,ffn} | Representative protein and gene sequences for each gene family | - - -#### Panaroo - -Below is a description of the [Panaroo](https://github.com/gtonkinhill/panaroo) results. For more details about -Panaroo outputs see [Panaroo Documentation](https://gtonkinhill.github.io/panaroo/#/gettingstarted/output). - -:::note[Only available when `--use_panaroo` is given] - -By default PIRATE is used to create the pan-genome, unless `--use_panaroo` is given. -::: - - -| Filename | Description | -|-------------------------------|-------------| -| aligned_gene_sequences | A directory of per-gene alignments | -| combined_DNA_CDS.fasta.gz | All nucleotide sequence for the annotated genes | -| combined_protein_CDS.fasta.gz | All protein sequence for the annotated proeteins | -| combined_protein_cdhit_out.txt | Log output from CD-HIT | -| combined_protein_cdhit_out.txt.clstr | Cluster information from CD-HIT | -| core_alignment_header.embl | The core/pan-genome alignment in EMBL format | -| core_gene_alignment.aln.gz | The core/pan-genome alignment in FASTA format | -| final_graph.gml | The final pan-genome graph generated by Panaroo | -| gene_data.csv | CSV linking each gene sequnece and annotation to the internal representations | -| gene_presence_absence.Rtab | A binary tab seperated version of the `gene_presence_absence.csv` | -| gene_presence_absence.csv | Lists each gene and which samples it is present in | -| gene_presence_absence_roary.csv | Lists each gene and which samples it is present in in the same format as Roary | -| pan_genome_reference.fa.gz | FASTA file which contains a single representative nucleotide sequence from each of the clusters in the pan genome (core and accessory) | -| pre_filt_graph.gml | An intermeadiate pan-genome graph generated by Panaroo | -| struct_presence_absence.Rtab | A csv file which lists the presence and abscence of different genomic rearrangement events | -| summary_statistics.txt | Number of genes in the core and accessory | - - -#### Roary - -Below is a description of the [Roary](https://github.com/sanger-pathogens/Roary/) results. For more details about -Roary outputs see [Roary Documentation](http://sanger-pathogens.github.io/Roary/). - -:::note[Only available when `--use_roary` is given] - -By default PIRATE is used to create the pan-genome, unless `--use_roary` is given. -::: - - -| Filename | Description | -|-------------------------------|-------------| -| accessory.header.embl | EMBL formatted file of accessory genes | -| accessory.tab | Tab-delimited formatted file of accessory genes | -| accessory_binary_genes.fa | A FASTA file with binary presence and absence of accessory genes | -| accessory_binary_genes.fa.newick | A tree created using the binary presence and absence of accessory genes | -| accessory_graph.dot | A graph in DOT format of how genes are linked together at the contig level in the accessory genome | -| blast_identity_frequency.Rtab | Blast results for percentage idenity graph | -| clustered_proteins | Groups file where each line lists the sequences in a cluster | -| core_accessory.header.embl | EMBL formatted file of core genes | -| core_accessory.tab | Tab-delimited formatted file of core genes | -| core_accessory_graph.dot | A graph in DOT format of how genes are linked together at the contig level in the pan genome | -| core_alignment_header.embl | EMBL formatted file of core genome alignment | -| gene_presence_absence.csv | Lists each gene and which samples it is present in | -| gene_presence_absence.Rtab | Tab delimited binary matrix with the presence and absence of each gene in each sample | -| number_of_conserved_genes.Rtab | Graphs on how the pan genome varies as genomes are added (in random orders) | -| number_of_genes_in_pan_genome.Rtab | Graphs on how the pan genome varies as genomes are added (in random orders) | -| number_of_new_genes.Rtab | Graphs on how the pan genome varies as genomes are added (in random orders) | -| number_of_unique_genes.Rtab | Graphs on how the pan genome varies as genomes are added (in random orders) | -| pan_genome_reference.fa.gz | FASTA file which contains a single representative nucleotide sequence from each of the clusters in the pan genome (core and accessory) | -| summary_statistics.txt | Number of genes in the core and accessory | - - - - - -### Audit Trail - -Below are files that can assist you in understanding which parameters and program versions were used. - -#### Logs - -Each process that is executed will have a folder named `logs`. In this folder are helpful -files for you to review if the need ever arises. - -| Extension | Description | -|--------------|-------------| -| .begin | An empty file used to designate the process started | -| .err | Contains STDERR outputs from the process | -| .log | Contains both STDERR and STDOUT outputs from the process | -| .out | Contains STDOUT outputs from the process | -| .run | The script Nextflow uses to stage/unstage files and queue processes based on given profile | -| .sh | The script executed by bash for the process | -| .trace | The Nextflow [Trace](https://www.nextflow.io/docs/latest/tracing.html#trace-report) report for the process | -| versions.yml | A YAML formatted file with program versions | - -#### Nextflow Reports - -These Nextflow reports provide great a great summary of your run. These can be used to optimize -resource usage and estimate expected costs if using cloud platforms. - -| Filename | Description | -|----------|-------------| -| pangenome-dag.dot | The Nextflow [DAG visualisation](https://www.nextflow.io/docs/latest/tracing.html#dag-visualisation) | -| pangenome-report.html | The Nextflow [Execution Report](https://www.nextflow.io/docs/latest/tracing.html#execution-report) | -| pangenome-timeline.html | The Nextflow [Timeline Report](https://www.nextflow.io/docs/latest/tracing.html#timeline-report) | -| pangenome-trace.txt | The Nextflow [Trace](https://www.nextflow.io/docs/latest/tracing.html#trace-report) report | - - -#### Program Versions - -At the end of each run, each of the `versions.yml` files are merged into the files below. - -| Filename | Description | -|---------------------------|-------------| -| software_versions.yml | A complete list of programs and versions used by each process | -| software_versions_mqc.yml | A complete list of programs and versions formatted for [MultiQC](https://multiqc.info/) | - -## Parameters - - -### Required Parameters -Define where the pipeline should find input data and save output data. - -| Parameter | Description | -|:---|---| -| ` --bactopia` | The path to bactopia results to use as inputs
**Type:** `string` | - -### Filtering Parameters -Use these parameters to specify which samples to include or exclude. - -| Parameter | Description | -|:---|---| -| ` --include` | A text file containing sample names (one per line) to include from the analysis
**Type:** `string` | -| ` --exclude` | A text file containing sample names (one per line) to exclude from the analysis
**Type:** `string` | - - -### ClonalFrameML Parameters - - -| Parameter | Description | -|:---|---| -| ` --emsim` | Number of simulations to estimate uncertainty in the EM results
**Type:** `integer`, **Default:** `100` | -| ` --clonal_opts` | Extra ClonalFrameML options in quotes
**Type:** `string` | -| ` --skip_recombination` | Skip ClonalFrameML execution in subworkflows
**Type:** `boolean` | - -### IQ-TREE Parameters - - -| Parameter | Description | -|:---|---| -| ` --iqtree_model` | Substitution model name
**Type:** `string`, **Default:** `HKY` | -| ` --bb` | Ultrafast bootstrap replicates
**Type:** `integer`, **Default:** `1000` | -| ` --alrt` | SH-like approximate likelihood ratio test replicates
**Type:** `integer`, **Default:** `1000` | -| ` --asr` | Ancestral state reconstruction by empirical Bayes
**Type:** `boolean` | -| ` --iqtree_opts` | Extra IQ-TREE options in quotes.
**Type:** `string` | -| ` --skip_phylogeny` | Skip IQ-TREE execution in subworkflows
**Type:** `boolean` | - -### NCBI Genome Download Parameters - - -| Parameter | Description | -|:---|---| -| ` --species` | Name of the species to download assemblies
**Type:** `string` | -| ` --accession` | An NCBI Assembly accession to be downloaded
**Type:** `string` | -| ` --accessions` | An file of NCBI Assembly accessions (one per line) to be downloaded
**Type:** `string` | -| ` --format` | Comma separated list of formats to download
**Type:** `string`, **Default:** `fasta` | -| ` --section` | NCBI section to download
**Type:** `string`, **Default:** `refseq` | -| ` --assembly_level` | Comma separated list of assembly levels to download
**Type:** `string`, **Default:** `complete` | -| ` --kingdom` | Comma separated list of formats to download
**Type:** `string`, **Default:** `bacteria` | -| ` --limit` | Limit the number of assemblies to download
**Type:** `string` | - -### PIRATE Parameters - - -| Parameter | Description | -|:---|---| -| ` --use_pirate` | Use PIRATE instead of panaroo in the 'pangenome' subworkflow
**Type:** `boolean` | -| ` --steps` | Percent identity thresholds to use for pangenome construction
**Type:** `string`, **Default:** `50,60,70,80,90,95,98` | -| ` --features` | Comma-delimited features to use for pangenome construction
**Type:** `string`, **Default:** `CDS` | -| ` --para_off` | Switch off paralog identification
**Type:** `boolean` | -| ` --z` | Retain all PIRATE intermediate files
**Type:** `boolean` | -| ` --pan_opt` | Additional arguments to pass to pangenome contruction.
**Type:** `string` | - -### Prokka Parameters - - -| Parameter | Description | -|:---|---| -| ` --proteins` | FASTA file of trusted proteins to first annotate from
**Type:** `string` | -| ` --prodigal_tf` | Training file to use for Prodigal
**Type:** `string` | -| ` --compliant` | Force Genbank/ENA/DDJB compliance
**Type:** `boolean` | -| ` --centre` | Sequencing centre ID
**Type:** `string`, **Default:** `Bactopia` | -| ` --prokka_coverage` | Minimum coverage on query protein
**Type:** `integer`, **Default:** `80` | -| ` --prokka_evalue` | Similarity e-value cut-off
**Type:** `string`, **Default:** `1e-09` | -| ` --prokka_opts` | Extra Prokka options in quotes.
**Type:** `string` | - -### Panaroo Parameters - - -| Parameter | Description | -|:---|---| -| ` --panaroo_mode` | The stringency mode at which to run panaroo
**Type:** `string`, **Default:** `strict` | -| ` --panaroo_alignment` | Output alignments of core genes or all genes
**Type:** `string`, **Default:** `core` | -| ` --panaroo_aligner` | Aligner to use for core/pan genome alignment
**Type:** `string`, **Default:** `mafft` | -| ` --panaroo_core_threshold` | Core-genome sample threshold
**Type:** `number`, **Default:** `0.95` | -| ` --panaroo_threshold` | Sequence identity threshold
**Type:** `number`, **Default:** `0.98` | -| ` --panaroo_family_threshold` | Protein family sequence identity threshold
**Type:** `number`, **Default:** `0.7` | -| ` --len_dif_percent` | Length difference cutoff
**Type:** `number`, **Default:** `0.98` | -| ` --merge_paralogs` | Do not split paralogs
**Type:** `boolean` | -| ` --panaroo_opts` | Additional options to pass to panaroo
**Type:** `string` | - -### Roary Parameters - - -| Parameter | Description | -|:---|---| -| ` --use_prank` | Use PRANK instead of MAFFT for core gene
**Type:** `boolean` | -| ` --use_roary` | Use Roary instead of PIRATE in the 'pangenome' subworkflow
**Type:** `boolean` | -| ` --i` | Minimum percentage identity for blastp
**Type:** `integer`, **Default:** `95` | -| ` --cd` | Percentage of isolates a gene must be in to be core
**Type:** `integer`, **Default:** `99` | -| ` --g` | Maximum number of clusters
**Type:** `integer`, **Default:** `50000` | -| ` --s` | Do not split paralogs
**Type:** `boolean` | -| ` --ap` | Allow paralogs in core alignment
**Type:** `boolean` | -| ` --iv` | MCL inflation value
**Type:** `number`, **Default:** `1.5` | - -### Scoary Parameters - - -| Parameter | Description | -|:---|---| -| ` --traits` | Input trait table (CSV) to test for associations
**Type:** `string` | -| ` --p_value_cutoff` | For statistical tests, genes with higher p-values will not be reported
**Type:** `number`, **Default:** `0.05` | -| ` --correction` | Apply the indicated filtration measure.
**Type:** `string`, **Default:** `I` | -| ` --permute` | Perform N number of permutations of the significant results post-analysis
**Type:** `integer` | -| ` --start_col` | On which column in the gene presence/absence file do individual strain info start
**Type:** `integer`, **Default:** `15` | - -### SNP-Dists Parameters - - -| Parameter | Description | -|:---|---| -| ` --a` | Count all differences not just [AGTC]
**Type:** `boolean` | -| ` --b` | Keep top left corner cell
**Type:** `boolean` | -| ` --csv` | Output CSV instead of TSV
**Type:** `boolean` | -| ` --k` | Keep case, don't uppercase all letters
**Type:** `boolean` | - - -### Optional Parameters -These optional parameters can be useful in certain settings. - -| Parameter | Description | -|:---|---| -| ` --outdir` | Base directory to write results to
**Type:** `string`, **Default:** `bactopia` | -| ` --skip_compression` | Ouput files will not be compressed
**Type:** `boolean` | -| ` --datasets` | The path to cache datasets to
**Type:** `string` | -| ` --keep_all_files` | Keeps all analysis files created
**Type:** `boolean` | - -### Max Job Request Parameters -Set the top limit for requested resources for any single job. - -| Parameter | Description | -|:---|---| -| ` --max_retry` | Maximum times to retry a process before allowing it to fail.
**Type:** `integer`, **Default:** `3` | -| ` --max_cpus` | Maximum number of CPUs that can be requested for any single job.
**Type:** `integer`, **Default:** `4` | -| ` --max_memory` | Maximum amount of memory that can be requested for any single job.
**Type:** `string`, **Default:** `128.GB` | -| ` --max_time` | Maximum amount of time that can be requested for any single job.
**Type:** `string`, **Default:** `240.h` | -| ` --max_downloads` | Maximum number of samples to download at a time
**Type:** `integer`, **Default:** `3` | - -### Nextflow Configuration Parameters -Parameters to fine-tune your Nextflow setup. - -| Parameter | Description | -|:---|---| -| ` --nfconfig` | A Nextflow compatible config file for custom profiles, loaded last and will overwrite existing variables if set.
**Type:** `string` | -| ` --publish_dir_mode` | Method used to save pipeline results to output directory.
**Type:** `string`, **Default:** `copy` | -| ` --infodir` | Directory to keep pipeline Nextflow logs and reports.
**Type:** `string`, **Default:** `${params.outdir}/pipeline_info` | -| ` --force` | Nextflow will overwrite existing output files.
**Type:** `boolean` | -| ` --cleanup_workdir` | After Bactopia is successfully executed, the `work` directory will be deleted.
**Type:** `boolean` | - -### Institutional config options -Parameters used to describe centralized config profiles. These should not be edited. - -| Parameter | Description | -|:---|---| -| ` --custom_config_version` | Git commit id for Institutional configs.
**Type:** `string`, **Default:** `master` | -| ` --custom_config_base` | Base directory for Institutional configs.
**Type:** `string`, **Default:** `https://raw.githubusercontent.com/nf-core/configs/master` | -| ` --config_profile_name` | Institutional config name.
**Type:** `string` | -| ` --config_profile_description` | Institutional config description.
**Type:** `string` | -| ` --config_profile_contact` | Institutional config contact information.
**Type:** `string` | -| ` --config_profile_url` | Institutional config URL link.
**Type:** `string` | - -### Nextflow Profile Parameters -Parameters to fine-tune your Nextflow setup. - -| Parameter | Description | -|:---|---| -| ` --condadir` | Directory to Nextflow should use for Conda environments
**Type:** `string` | -| ` --registry` | Docker registry to pull containers from.
**Type:** `string`, **Default:** `dockerhub` | -| ` --datasets_cache` | Directory where downloaded datasets should be stored.
**Type:** `string`, **Default:** `/data/datasets` | -| ` --singularity_cache_dir` | Directory where remote Singularity images are stored.
**Type:** `string` | -| ` --singularity_pull_docker_container` | Instead of directly downloading Singularity images for use with Singularity, force the workflow to pull and convert Docker containers instead.
**Type:** `boolean` | -| ` --force_rebuild` | Force overwrite of existing pre-built environments.
**Type:** `boolean` | -| ` --queue` | Comma-separated name of the queue(s) to be used by a job scheduler (e.g. AWS Batch or SLURM)
**Type:** `string`, **Default:** `general,high-memory` | -| ` --cluster_opts` | Additional options to pass to the executor. (e.g. SLURM: '--account=my_acct_name'
**Type:** `string` | -| ` --container_opts` | Additional options to pass to Apptainer, Docker, or Singularityu. (e.g. Singularity: '-D `pwd`'
**Type:** `string` | -| ` --disable_scratch` | All intermediate files created on worker nodes of will be transferred to the head node.
**Type:** `boolean` | - -### Helpful Parameters -Uncommonly used parameters that might be useful. - -| Parameter | Description | -|:---|---| -| ` --monochrome_logs` | Do not use coloured log outputs.
**Type:** `boolean` | -| ` --nfdir` | Print directory Nextflow has pulled Bactopia to
**Type:** `boolean` | -| ` --sleep_time` | The amount of time (seconds) Nextflow will wait after setting up datasets before execution.
**Type:** `integer`, **Default:** `5` | -| ` --validate_params` | Boolean whether to validate parameters against the schema at runtime
**Type:** `boolean`, **Default:** `True` | -| ` --help` | Display help text.
**Type:** `boolean` | -| ` --wf` | Specify which workflow or Bactopia Tool to execute
**Type:** `string`, **Default:** `bactopia` | -| ` --list_wfs` | List the available workflows and Bactopia Tools to use with '--wf'
**Type:** `boolean` | -| ` --show_hidden_params` | Show all params when using `--help`
**Type:** `boolean` | -| ` --help_all` | An alias for --help --show_hidden_params
**Type:** `boolean` | -| ` --version` | Display version text.
**Type:** `boolean` | - -## Citations -If you use Bactopia and `pangenome` in your analysis, please cite the following. - -- [Bactopia](https://bactopia.github.io/) - Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) - - -- [ClonalFramML](https://github.com/xavierdidelot/ClonalFrameML) - Didelot X, Wilson DJ [ClonalFrameML: Efficient Inference of Recombination in Whole Bacterial Genomes.](https://doi.org/10.1371/journal.pcbi.1004041) _PLoS Comput Biol_ 11(2) e1004041 (2015) - -- [IQ-TREE](https://github.com/Cibiv/IQ-TREE) - Nguyen L-T, Schmidt HA, von Haeseler A, Minh BQ [IQ-TREE: A fast and effective stochastic algorithm for estimating maximum likelihood phylogenies.](https://doi.org/10.1093/molbev/msu300) _Mol. Biol. Evol._ 32:268-274 (2015) - -- [ModelFinder](https://github.com/Cibiv/IQ-TREE) - Kalyaanamoorthy S, Minh BQ, Wong TKF, von Haeseler A, Jermiin LS [ModelFinder - Fast model selection for accurate phylogenetic estimates.](https://doi.org/10.1038/nmeth.4285) _Nat. Methods_ 14:587-589 (2017) - -- [UFBoot2](https://github.com/Cibiv/IQ-TREE) - Hoang DT, Chernomor O, von Haeseler A, Minh BQ, Vinh LS [UFBoot2: Improving the ultrafast bootstrap approximation.](https://doi.org/10.1093/molbev/msx281) _Mol. Biol. Evol._ 35:518–522 (2018) - -- [ncbi-genome-download](https://github.com/kblin/ncbi-genome-download) - Blin K [ncbi-genome-download: Scripts to download genomes from the NCBI FTP servers](https://github.com/kblin/ncbi-genome-download) (GitHub) - -- [Panaroo](https://github.com/gtonkinhill/panaroo) - Tonkin-Hill G, MacAlasdair N, Ruis C, Weimann A, Horesh G, Lees JA, Gladstone RA, Lo S, Beaudoin C, Floto RA, Frost SDW, Corander J, Bentley SD, Parkhill J [Producing polished prokaryotic pangenomes with the Panaroo pipeline.](https://doi.org/10.1186/s13059-020-02090-4) _Genome Biology_ 21(1), 180. (2020) - -- [PIRATE](http://github.com/SionBayliss/PIRATE) - Bayliss SC, Thorpe HA, Coyle NM, Sheppard SK, Feil EJ [PIRATE: A fast and scalable pangenomics toolbox for clustering diverged orthologues in bacteria.](https://doi.org/10.1093/gigascience/giz119) _Gigascience_ 8 (2019) - -- [Prokka](https://github.com/tseemann/prokka) - Seemann T [Prokka: rapid prokaryotic genome annotation](http://dx.doi.org/10.1093/bioinformatics/btu153) _Bioinformatics_ 30, 2068–2069 (2014) - -- [Roary](https://github.com/sanger-pathogens/Roary) - Page AJ, Cummins CA, Hunt M, Wong VK, Reuter S, Holden MTG, Fookes M, Falush D, Keane JA, Parkhill J [Roary: rapid large-scale prokaryote pan genome analysis.](https://doi.org/10.1093/bioinformatics/btv421) _Bioinformatics_ 31, 3691–3693 (2015) - -- [Scoary](https://github.com/AdmiralenOla/Scoary) - Brynildsrud O, Bohlin J, Scheffer L, Eldholm V [Rapid scoring of genes in microbial pan-genome-wide association studies with Scoary.](https://doi.org/10.1186/s13059-016-1108-8) _Genome Biol._ 17:238 (2016) - -- [snp-dists](https://github.com/tseemann/snp-dists) - Seemann T [snp-dists - Pairwise SNP distance matrix from a FASTA sequence alignment.](https://github.com/tseemann/snp-dists) (GitHub) - diff --git a/docs/bactopia-tools/pasty.md b/docs/bactopia-tools/pasty.md deleted file mode 100644 index bfdd017c..00000000 --- a/docs/bactopia-tools/pasty.md +++ /dev/null @@ -1,243 +0,0 @@ ---- -title: pasty -description: A Bactopia Tool which uses pasty for serogrouping _Pseudomonas aeruginosa_ isolates. ---- -# Bactopia Tool - `pasty` -The `pasty` module uses [pasty](https://github.com/rpetit3/pasty) for -serogrouping of _Pseudomonas aeruginosa_ isolates. - - -## Example Usage -``` -bactopia --wf pasty \ - --bactopia /path/to/your/bactopia/results -``` - -## Output Overview - -Below is the default output structure for the `pasty` tool. Where possible the -file descriptions below were modified from a tools description. - -```bash - -├── -│ └── tools -│ └── pasty -│ ├── .blastn.tsv -│ ├── .details.tsv -│ ├── .tsv -│ └── logs -│ ├── nf-pasty.{begin,err,log,out,run,sh,trace} -│ └── versions.yml -└── bactopia-runs - └── pasty- - ├── merged-results - │ ├── logs - │ │ └── pasty-concat - │ │ ├── nf-merged-results.{begin,err,log,out,run,sh,trace} - │ │ └── versions.yml - │ └── pasty.tsv - └── nf-reports - ├── pasty-dag.dot - ├── pasty-report.html - ├── pasty-timeline.html - └── pasty-trace.txt - -``` - -:::info[Directory structure might be different] - -`pasty` is available as a standalone Bactopia Tool, as well as from -the main Bactopia workflow (e.g. through Staphopia or Merlin). If executed -from Bactopia, the `pasty` directory structure might be different, but the -output descriptions below still apply. -::: - - - -### Results - -#### Merged Results - -Below are results that are concatenated into a single file. - - -| Filename | Description | -|-------------------------------|-------------| -| pasty.tsv | A merged TSV file with `pasty` results from all samples | - - -#### pasty - -Below is a description of the _per-sample_ results from [pasty](https://github.com/rpetit3/pasty). - - -| Extension | Description | -|-------------------------------|-------------| -| .blastn.tsv | A tab-delimited file of all blast hits | -| .details.tsv | A tab-delimited file with details for each serogroup | -| .tsv | A tab-delimited file with the predicted serogroup | - - - - - -### Audit Trail - -Below are files that can assist you in understanding which parameters and program versions were used. - -#### Logs - -Each process that is executed will have a folder named `logs`. In this folder are helpful -files for you to review if the need ever arises. - -| Extension | Description | -|--------------|-------------| -| .begin | An empty file used to designate the process started | -| .err | Contains STDERR outputs from the process | -| .log | Contains both STDERR and STDOUT outputs from the process | -| .out | Contains STDOUT outputs from the process | -| .run | The script Nextflow uses to stage/unstage files and queue processes based on given profile | -| .sh | The script executed by bash for the process | -| .trace | The Nextflow [Trace](https://www.nextflow.io/docs/latest/tracing.html#trace-report) report for the process | -| versions.yml | A YAML formatted file with program versions | - -#### Nextflow Reports - -These Nextflow reports provide great a great summary of your run. These can be used to optimize -resource usage and estimate expected costs if using cloud platforms. - -| Filename | Description | -|----------|-------------| -| pasty-dag.dot | The Nextflow [DAG visualisation](https://www.nextflow.io/docs/latest/tracing.html#dag-visualisation) | -| pasty-report.html | The Nextflow [Execution Report](https://www.nextflow.io/docs/latest/tracing.html#execution-report) | -| pasty-timeline.html | The Nextflow [Timeline Report](https://www.nextflow.io/docs/latest/tracing.html#timeline-report) | -| pasty-trace.txt | The Nextflow [Trace](https://www.nextflow.io/docs/latest/tracing.html#trace-report) report | - - -#### Program Versions - -At the end of each run, each of the `versions.yml` files are merged into the files below. - -| Filename | Description | -|---------------------------|-------------| -| software_versions.yml | A complete list of programs and versions used by each process | -| software_versions_mqc.yml | A complete list of programs and versions formatted for [MultiQC](https://multiqc.info/) | - -## Parameters - - -### Required Parameters -Define where the pipeline should find input data and save output data. - -| Parameter | Description | -|:---|---| -| ` --bactopia` | The path to bactopia results to use as inputs
**Type:** `string` | - -### Filtering Parameters -Use these parameters to specify which samples to include or exclude. - -| Parameter | Description | -|:---|---| -| ` --include` | A text file containing sample names (one per line) to include from the analysis
**Type:** `string` | -| ` --exclude` | A text file containing sample names (one per line) to exclude from the analysis
**Type:** `string` | - - -### pasty Parameters - - -| Parameter | Description | -|:---|---| -| ` --pasty_min_pident` | Minimum percent identity to count a hit
**Type:** `integer`, **Default:** `95` | -| ` --pasty_min_coverage` | Minimum percent coverage to count a hit
**Type:** `integer`, **Default:** `95` | - - -### Optional Parameters -These optional parameters can be useful in certain settings. - -| Parameter | Description | -|:---|---| -| ` --outdir` | Base directory to write results to
**Type:** `string`, **Default:** `bactopia` | -| ` --skip_compression` | Ouput files will not be compressed
**Type:** `boolean` | -| ` --datasets` | The path to cache datasets to
**Type:** `string` | -| ` --keep_all_files` | Keeps all analysis files created
**Type:** `boolean` | - -### Max Job Request Parameters -Set the top limit for requested resources for any single job. - -| Parameter | Description | -|:---|---| -| ` --max_retry` | Maximum times to retry a process before allowing it to fail.
**Type:** `integer`, **Default:** `3` | -| ` --max_cpus` | Maximum number of CPUs that can be requested for any single job.
**Type:** `integer`, **Default:** `4` | -| ` --max_memory` | Maximum amount of memory that can be requested for any single job.
**Type:** `string`, **Default:** `128.GB` | -| ` --max_time` | Maximum amount of time that can be requested for any single job.
**Type:** `string`, **Default:** `240.h` | -| ` --max_downloads` | Maximum number of samples to download at a time
**Type:** `integer`, **Default:** `3` | - -### Nextflow Configuration Parameters -Parameters to fine-tune your Nextflow setup. - -| Parameter | Description | -|:---|---| -| ` --nfconfig` | A Nextflow compatible config file for custom profiles, loaded last and will overwrite existing variables if set.
**Type:** `string` | -| ` --publish_dir_mode` | Method used to save pipeline results to output directory.
**Type:** `string`, **Default:** `copy` | -| ` --infodir` | Directory to keep pipeline Nextflow logs and reports.
**Type:** `string`, **Default:** `${params.outdir}/pipeline_info` | -| ` --force` | Nextflow will overwrite existing output files.
**Type:** `boolean` | -| ` --cleanup_workdir` | After Bactopia is successfully executed, the `work` directory will be deleted.
**Type:** `boolean` | - -### Institutional config options -Parameters used to describe centralized config profiles. These should not be edited. - -| Parameter | Description | -|:---|---| -| ` --custom_config_version` | Git commit id for Institutional configs.
**Type:** `string`, **Default:** `master` | -| ` --custom_config_base` | Base directory for Institutional configs.
**Type:** `string`, **Default:** `https://raw.githubusercontent.com/nf-core/configs/master` | -| ` --config_profile_name` | Institutional config name.
**Type:** `string` | -| ` --config_profile_description` | Institutional config description.
**Type:** `string` | -| ` --config_profile_contact` | Institutional config contact information.
**Type:** `string` | -| ` --config_profile_url` | Institutional config URL link.
**Type:** `string` | - -### Nextflow Profile Parameters -Parameters to fine-tune your Nextflow setup. - -| Parameter | Description | -|:---|---| -| ` --condadir` | Directory to Nextflow should use for Conda environments
**Type:** `string` | -| ` --registry` | Docker registry to pull containers from.
**Type:** `string`, **Default:** `dockerhub` | -| ` --datasets_cache` | Directory where downloaded datasets should be stored.
**Type:** `string`, **Default:** `/data/datasets` | -| ` --singularity_cache_dir` | Directory where remote Singularity images are stored.
**Type:** `string` | -| ` --singularity_pull_docker_container` | Instead of directly downloading Singularity images for use with Singularity, force the workflow to pull and convert Docker containers instead.
**Type:** `boolean` | -| ` --force_rebuild` | Force overwrite of existing pre-built environments.
**Type:** `boolean` | -| ` --queue` | Comma-separated name of the queue(s) to be used by a job scheduler (e.g. AWS Batch or SLURM)
**Type:** `string`, **Default:** `general,high-memory` | -| ` --cluster_opts` | Additional options to pass to the executor. (e.g. SLURM: '--account=my_acct_name'
**Type:** `string` | -| ` --container_opts` | Additional options to pass to Apptainer, Docker, or Singularityu. (e.g. Singularity: '-D `pwd`'
**Type:** `string` | -| ` --disable_scratch` | All intermediate files created on worker nodes of will be transferred to the head node.
**Type:** `boolean` | - -### Helpful Parameters -Uncommonly used parameters that might be useful. - -| Parameter | Description | -|:---|---| -| ` --monochrome_logs` | Do not use coloured log outputs.
**Type:** `boolean` | -| ` --nfdir` | Print directory Nextflow has pulled Bactopia to
**Type:** `boolean` | -| ` --sleep_time` | The amount of time (seconds) Nextflow will wait after setting up datasets before execution.
**Type:** `integer`, **Default:** `5` | -| ` --validate_params` | Boolean whether to validate parameters against the schema at runtime
**Type:** `boolean`, **Default:** `True` | -| ` --help` | Display help text.
**Type:** `boolean` | -| ` --wf` | Specify which workflow or Bactopia Tool to execute
**Type:** `string`, **Default:** `bactopia` | -| ` --list_wfs` | List the available workflows and Bactopia Tools to use with '--wf'
**Type:** `boolean` | -| ` --show_hidden_params` | Show all params when using `--help`
**Type:** `boolean` | -| ` --help_all` | An alias for --help --show_hidden_params
**Type:** `boolean` | -| ` --version` | Display version text.
**Type:** `boolean` | - -## Citations -If you use Bactopia and `pasty` in your analysis, please cite the following. - -- [Bactopia](https://bactopia.github.io/) - Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) - - -- [csvtk](https://bioinf.shenwei.me/csvtk/) - Shen, W [csvtk: A cross-platform, efficient and practical CSV/TSV toolkit in Golang.](https://github.com/shenwei356/csvtk/) (GitHub) - -- [pasty](https://github.com/rpetit3/pasty) - Petit III RA [pasty: in silico serogrouping of _Pseudomonas aeruginosa_ isolates](https://github.com/rpetit3/pasty) (GitHub) - diff --git a/docs/bactopia-tools/pbptyper.md b/docs/bactopia-tools/pbptyper.md deleted file mode 100644 index f751a5aa..00000000 --- a/docs/bactopia-tools/pbptyper.md +++ /dev/null @@ -1,241 +0,0 @@ ---- -title: pbptyper -description: A Bactopia Tool which uses pbptyper for typing the Penicillin Binding Protein (PBP) of _Streptococcus pneumoniae_ assemblies. ---- -# Bactopia Tool - `pbptyper` -The `pbptyper` module uses [pbptyper](https://github.com/rpetit3/pbptyper) for typing -the Penicillin Binding Protein (PBP) of _Streptococcus pneumoniae_ assemblies. - - -## Example Usage -``` -bactopia --wf pbptyper \ - --bactopia /path/to/your/bactopia/results -``` - -## Output Overview - -Below is the default output structure for the `pbptyper` tool. Where possible the -file descriptions below were modified from a tools description. - -```bash - -├── -│ └── tools -│ └── pbptyper -│ ├── -{1A,2B,2X}.tblastn.tsv -│ ├── .tsv -│ └── logs -│ ├── nf-pbptyper.{begin,err,log,out,run,sh,trace} -│ └── versions.yml -└── bactopia-runs - └── pbptyper- - ├── merged-results - │ ├── logs - │ │ └── pbptyper-concat - │ │ ├── nf-merged-results.{begin,err,log,out,run,sh,trace} - │ │ └── versions.yml - │ └── pbptyper.tsv - └── nf-reports - ├── pbptyper-dag.dot - ├── pbptyper-report.html - ├── pbptyper-timeline.html - └── pbptyper-trace.txt - -``` - -:::info[Directory structure might be different] - -`pbptyper` is available as a standalone Bactopia Tool, as well as from -the main Bactopia workflow (e.g. through Staphopia or Merlin). If executed -from Bactopia, the `pbptyper` directory structure might be different, but the -output descriptions below still apply. -::: - - - -### Results - -#### Merged Results - -Below are results that are concatenated into a single file. - - -| Filename | Description | -|-------------------------------|-------------| -| pbptyper.tsv | A merged TSV file with `pbptyper` results from all samples | - - -#### pbptyper - -Below is a description of the _per-sample_ results from [pbptyper](https://github.com/rpetit3/pbptyper). - - -| Extension | Description | -|-------------------------------|-------------| -| .tblastn.tsv | A tab-delimited file of all blast hits | -| .tsv | A tab-delimited file with the predicted PBP type | - - - - - -### Audit Trail - -Below are files that can assist you in understanding which parameters and program versions were used. - -#### Logs - -Each process that is executed will have a folder named `logs`. In this folder are helpful -files for you to review if the need ever arises. - -| Extension | Description | -|--------------|-------------| -| .begin | An empty file used to designate the process started | -| .err | Contains STDERR outputs from the process | -| .log | Contains both STDERR and STDOUT outputs from the process | -| .out | Contains STDOUT outputs from the process | -| .run | The script Nextflow uses to stage/unstage files and queue processes based on given profile | -| .sh | The script executed by bash for the process | -| .trace | The Nextflow [Trace](https://www.nextflow.io/docs/latest/tracing.html#trace-report) report for the process | -| versions.yml | A YAML formatted file with program versions | - -#### Nextflow Reports - -These Nextflow reports provide great a great summary of your run. These can be used to optimize -resource usage and estimate expected costs if using cloud platforms. - -| Filename | Description | -|----------|-------------| -| pbptyper-dag.dot | The Nextflow [DAG visualisation](https://www.nextflow.io/docs/latest/tracing.html#dag-visualisation) | -| pbptyper-report.html | The Nextflow [Execution Report](https://www.nextflow.io/docs/latest/tracing.html#execution-report) | -| pbptyper-timeline.html | The Nextflow [Timeline Report](https://www.nextflow.io/docs/latest/tracing.html#timeline-report) | -| pbptyper-trace.txt | The Nextflow [Trace](https://www.nextflow.io/docs/latest/tracing.html#trace-report) report | - - -#### Program Versions - -At the end of each run, each of the `versions.yml` files are merged into the files below. - -| Filename | Description | -|---------------------------|-------------| -| software_versions.yml | A complete list of programs and versions used by each process | -| software_versions_mqc.yml | A complete list of programs and versions formatted for [MultiQC](https://multiqc.info/) | - -## Parameters - - -### Required Parameters -Define where the pipeline should find input data and save output data. - -| Parameter | Description | -|:---|---| -| ` --bactopia` | The path to bactopia results to use as inputs
**Type:** `string` | - -### Filtering Parameters -Use these parameters to specify which samples to include or exclude. - -| Parameter | Description | -|:---|---| -| ` --include` | A text file containing sample names (one per line) to include from the analysis
**Type:** `string` | -| ` --exclude` | A text file containing sample names (one per line) to exclude from the analysis
**Type:** `string` | - - -### pbptyper Parameters - - -| Parameter | Description | -|:---|---| -| ` --pbptyper_min_pident` | Minimum percent identity to count a hit
**Type:** `integer`, **Default:** `95` | -| ` --pbptyper_min_coverage` | Minimum percent coverage to count a hit
**Type:** `integer`, **Default:** `95` | - - -### Optional Parameters -These optional parameters can be useful in certain settings. - -| Parameter | Description | -|:---|---| -| ` --outdir` | Base directory to write results to
**Type:** `string`, **Default:** `bactopia` | -| ` --skip_compression` | Ouput files will not be compressed
**Type:** `boolean` | -| ` --datasets` | The path to cache datasets to
**Type:** `string` | -| ` --keep_all_files` | Keeps all analysis files created
**Type:** `boolean` | - -### Max Job Request Parameters -Set the top limit for requested resources for any single job. - -| Parameter | Description | -|:---|---| -| ` --max_retry` | Maximum times to retry a process before allowing it to fail.
**Type:** `integer`, **Default:** `3` | -| ` --max_cpus` | Maximum number of CPUs that can be requested for any single job.
**Type:** `integer`, **Default:** `4` | -| ` --max_memory` | Maximum amount of memory that can be requested for any single job.
**Type:** `string`, **Default:** `128.GB` | -| ` --max_time` | Maximum amount of time that can be requested for any single job.
**Type:** `string`, **Default:** `240.h` | -| ` --max_downloads` | Maximum number of samples to download at a time
**Type:** `integer`, **Default:** `3` | - -### Nextflow Configuration Parameters -Parameters to fine-tune your Nextflow setup. - -| Parameter | Description | -|:---|---| -| ` --nfconfig` | A Nextflow compatible config file for custom profiles, loaded last and will overwrite existing variables if set.
**Type:** `string` | -| ` --publish_dir_mode` | Method used to save pipeline results to output directory.
**Type:** `string`, **Default:** `copy` | -| ` --infodir` | Directory to keep pipeline Nextflow logs and reports.
**Type:** `string`, **Default:** `${params.outdir}/pipeline_info` | -| ` --force` | Nextflow will overwrite existing output files.
**Type:** `boolean` | -| ` --cleanup_workdir` | After Bactopia is successfully executed, the `work` directory will be deleted.
**Type:** `boolean` | - -### Institutional config options -Parameters used to describe centralized config profiles. These should not be edited. - -| Parameter | Description | -|:---|---| -| ` --custom_config_version` | Git commit id for Institutional configs.
**Type:** `string`, **Default:** `master` | -| ` --custom_config_base` | Base directory for Institutional configs.
**Type:** `string`, **Default:** `https://raw.githubusercontent.com/nf-core/configs/master` | -| ` --config_profile_name` | Institutional config name.
**Type:** `string` | -| ` --config_profile_description` | Institutional config description.
**Type:** `string` | -| ` --config_profile_contact` | Institutional config contact information.
**Type:** `string` | -| ` --config_profile_url` | Institutional config URL link.
**Type:** `string` | - -### Nextflow Profile Parameters -Parameters to fine-tune your Nextflow setup. - -| Parameter | Description | -|:---|---| -| ` --condadir` | Directory to Nextflow should use for Conda environments
**Type:** `string` | -| ` --registry` | Docker registry to pull containers from.
**Type:** `string`, **Default:** `dockerhub` | -| ` --datasets_cache` | Directory where downloaded datasets should be stored.
**Type:** `string`, **Default:** `/data/datasets` | -| ` --singularity_cache_dir` | Directory where remote Singularity images are stored.
**Type:** `string` | -| ` --singularity_pull_docker_container` | Instead of directly downloading Singularity images for use with Singularity, force the workflow to pull and convert Docker containers instead.
**Type:** `boolean` | -| ` --force_rebuild` | Force overwrite of existing pre-built environments.
**Type:** `boolean` | -| ` --queue` | Comma-separated name of the queue(s) to be used by a job scheduler (e.g. AWS Batch or SLURM)
**Type:** `string`, **Default:** `general,high-memory` | -| ` --cluster_opts` | Additional options to pass to the executor. (e.g. SLURM: '--account=my_acct_name'
**Type:** `string` | -| ` --container_opts` | Additional options to pass to Apptainer, Docker, or Singularityu. (e.g. Singularity: '-D `pwd`'
**Type:** `string` | -| ` --disable_scratch` | All intermediate files created on worker nodes of will be transferred to the head node.
**Type:** `boolean` | - -### Helpful Parameters -Uncommonly used parameters that might be useful. - -| Parameter | Description | -|:---|---| -| ` --monochrome_logs` | Do not use coloured log outputs.
**Type:** `boolean` | -| ` --nfdir` | Print directory Nextflow has pulled Bactopia to
**Type:** `boolean` | -| ` --sleep_time` | The amount of time (seconds) Nextflow will wait after setting up datasets before execution.
**Type:** `integer`, **Default:** `5` | -| ` --validate_params` | Boolean whether to validate parameters against the schema at runtime
**Type:** `boolean`, **Default:** `True` | -| ` --help` | Display help text.
**Type:** `boolean` | -| ` --wf` | Specify which workflow or Bactopia Tool to execute
**Type:** `string`, **Default:** `bactopia` | -| ` --list_wfs` | List the available workflows and Bactopia Tools to use with '--wf'
**Type:** `boolean` | -| ` --show_hidden_params` | Show all params when using `--help`
**Type:** `boolean` | -| ` --help_all` | An alias for --help --show_hidden_params
**Type:** `boolean` | -| ` --version` | Display version text.
**Type:** `boolean` | - -## Citations -If you use Bactopia and `pbptyper` in your analysis, please cite the following. - -- [Bactopia](https://bactopia.github.io/) - Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) - - -- [csvtk](https://bioinf.shenwei.me/csvtk/) - Shen, W [csvtk: A cross-platform, efficient and practical CSV/TSV toolkit in Golang.](https://github.com/shenwei356/csvtk/) (GitHub) - -- [pbptyper](https://github.com/rpetit3/pbptyper) - Petit III RA [pbptyper: In silico Penicillin Binding Protein (PBP) typer for _Streptococcus pneumoniae_ assemblies](https://github.com/rpetit3/pbptyper) (GitHub) - diff --git a/docs/bactopia-tools/phispy.md b/docs/bactopia-tools/phispy.md deleted file mode 100644 index 78bb602e..00000000 --- a/docs/bactopia-tools/phispy.md +++ /dev/null @@ -1,253 +0,0 @@ ---- -title: phispy -description: A Bactopia Tool which uses PhiSpy to identify prophages from bacterial and archaeal genomes. - ---- -# Bactopia Tool - `phispy` -The `phispy` module uses [PhiSpy](https://github.com/linsalrob/PhiSpy) to identify prophages -from bacterial and archaeal genomes. - - -## Example Usage -``` -bactopia --wf phispy \ - --bactopia /path/to/your/bactopia/results -``` - -## Output Overview - -Below is the default output structure for the `phispy` tool. Where possible the -file descriptions below were modified from a tools description. - -```bash - -├── -│ └── tools -│ └── phispy -| ├── _prophage_information.tsv -| ├── _bacteria.fasta -| ├── _bacteria.gbk -| ├── _phage.fasta -| ├── _phage.gbk -| ├── _prophage.gff3 -| ├── _prophage.tbl -| └── _prophage.tsv -│ ├── .tsv -│ └── logs -│ ├── .log -│ ├── nf-phispy.{begin,err,log,out,run,sh,trace} -│ └── versions.yml -└── bactopia-runs - └── phispy- - ├── merged-results - │ ├── logs - │ │ └── phispy-concat - │ │ ├── nf-merged-results.{begin,err,log,out,run,sh,trace} - │ │ └── versions.yml - │ └── phispy.tsv - └── nf-reports - ├── phispy-dag.dot - ├── phispy-report.html - ├── phispy-timeline.html - └── phispy-trace.txt - -``` - - - -### Results - -#### Merged Results - -Below are results that are concatenated into a single file. - - -| Filename | Description | -|-------------------------------|-------------| -| phispy.tsv | A summary of the `phispy` results for all samples | - - -#### PhiSpy - -Below is a description of the _per-sample_ results from [PhiSpy](https://github.com/linsalrob/PhiSpy). - - -| Filename | Description | -|-------------------------------|-------------| -| <SAMPLE_NAME>_prophage_information.tsv | Contains prophage information for all the genes of the genome, one per line. | -| <SAMPLE_NAME>_bacteria.fasta | Genome with prophage regions masked with N. | -| <SAMPLE_NAME>_bacteria.gbk | Genome sequences identified as bacterial. | -| <SAMPLE_NAME>_phage.fasta | Phage sequences extracted from the genome in FASTA format. | -| <SAMPLE_NAME>_phage.gbk | Phage sequences extracted from the genome in GenBank format. | -| <SAMPLE_NAME>_prophage.gff3 | Prophage information in GFF3 format. | -| <SAMPLE_NAME>_prophage.tbl | Prophage number and its location in the genome. | -| <SAMPLE_NAME>.tsv | Coordinates of each prophage identified in the genome, and their att sites (if found). | - - - - - -### Audit Trail - -Below are files that can assist you in understanding which parameters and program versions were used. - -#### Logs - -Each process that is executed will have a folder named `logs`. In this folder are helpful -files for you to review if the need ever arises. - -| Extension | Description | -|--------------|-------------| -| .begin | An empty file used to designate the process started | -| .err | Contains STDERR outputs from the process | -| .log | Contains both STDERR and STDOUT outputs from the process | -| .out | Contains STDOUT outputs from the process | -| .run | The script Nextflow uses to stage/unstage files and queue processes based on given profile | -| .sh | The script executed by bash for the process | -| .trace | The Nextflow [Trace](https://www.nextflow.io/docs/latest/tracing.html#trace-report) report for the process | -| versions.yml | A YAML formatted file with program versions | - -#### Nextflow Reports - -These Nextflow reports provide great a great summary of your run. These can be used to optimize -resource usage and estimate expected costs if using cloud platforms. - -| Filename | Description | -|----------|-------------| -| phispy-dag.dot | The Nextflow [DAG visualisation](https://www.nextflow.io/docs/latest/tracing.html#dag-visualisation) | -| phispy-report.html | The Nextflow [Execution Report](https://www.nextflow.io/docs/latest/tracing.html#execution-report) | -| phispy-timeline.html | The Nextflow [Timeline Report](https://www.nextflow.io/docs/latest/tracing.html#timeline-report) | -| phispy-trace.txt | The Nextflow [Trace](https://www.nextflow.io/docs/latest/tracing.html#trace-report) report | - - -#### Program Versions - -At the end of each run, each of the `versions.yml` files are merged into the files below. - -| Filename | Description | -|---------------------------|-------------| -| software_versions.yml | A complete list of programs and versions used by each process | -| software_versions_mqc.yml | A complete list of programs and versions formatted for [MultiQC](https://multiqc.info/) | - -## Parameters - - -### Required Parameters -Define where the pipeline should find input data and save output data. - -| Parameter | Description | -|:---|---| -| ` --bactopia` | The path to bactopia results to use as inputs
**Type:** `string` | - -### Filtering Parameters -Use these parameters to specify which samples to include or exclude. - -| Parameter | Description | -|:---|---| -| ` --include` | A text file containing sample names (one per line) to include from the analysis
**Type:** `string` | -| ` --exclude` | A text file containing sample names (one per line) to exclude from the analysis
**Type:** `string` | - - -### PhiSpy Parameters - - -| Parameter | Description | -|:---|---| -| ` --phispy_number` | Number of consecutive genes in a region of window size that must be prophage genes to be called
**Type:** `integer`, **Default:** `5` | -| ` --phispy_mincontigsize` | Minimum contig size (in bp) to be included in the analysis. Smaller contigs will be dropped.
**Type:** `integer`, **Default:** `5000` | -| ` --phispy_windowsize` | Window size of consecutive genes to look through to find phages
**Type:** `integer`, **Default:** `30` | -| ` --phispy_nonprophage_genegaps` | The number of non phage genes between prophages
**Type:** `integer`, **Default:** `10` | -| ` --phispy_phage_genes` | The minimum number of genes that must be identified as belonging to a phage for the region to be included
**Type:** `integer`, **Default:** `1` | -| ` --phispy_randomforest_trees` | Number of trees generated by Random Forest classifier
**Type:** `integer`, **Default:** `500` | -| ` --phispy_opts` | Extra options in quotes for Snippy
**Type:** `string` | - - -### Optional Parameters -These optional parameters can be useful in certain settings. - -| Parameter | Description | -|:---|---| -| ` --outdir` | Base directory to write results to
**Type:** `string`, **Default:** `bactopia` | -| ` --skip_compression` | Ouput files will not be compressed
**Type:** `boolean` | -| ` --datasets` | The path to cache datasets to
**Type:** `string` | -| ` --keep_all_files` | Keeps all analysis files created
**Type:** `boolean` | - -### Max Job Request Parameters -Set the top limit for requested resources for any single job. - -| Parameter | Description | -|:---|---| -| ` --max_retry` | Maximum times to retry a process before allowing it to fail.
**Type:** `integer`, **Default:** `3` | -| ` --max_cpus` | Maximum number of CPUs that can be requested for any single job.
**Type:** `integer`, **Default:** `4` | -| ` --max_memory` | Maximum amount of memory that can be requested for any single job.
**Type:** `string`, **Default:** `128.GB` | -| ` --max_time` | Maximum amount of time that can be requested for any single job.
**Type:** `string`, **Default:** `240.h` | -| ` --max_downloads` | Maximum number of samples to download at a time
**Type:** `integer`, **Default:** `3` | - -### Nextflow Configuration Parameters -Parameters to fine-tune your Nextflow setup. - -| Parameter | Description | -|:---|---| -| ` --nfconfig` | A Nextflow compatible config file for custom profiles, loaded last and will overwrite existing variables if set.
**Type:** `string` | -| ` --publish_dir_mode` | Method used to save pipeline results to output directory.
**Type:** `string`, **Default:** `copy` | -| ` --infodir` | Directory to keep pipeline Nextflow logs and reports.
**Type:** `string`, **Default:** `${params.outdir}/pipeline_info` | -| ` --force` | Nextflow will overwrite existing output files.
**Type:** `boolean` | -| ` --cleanup_workdir` | After Bactopia is successfully executed, the `work` directory will be deleted.
**Type:** `boolean` | - -### Institutional config options -Parameters used to describe centralized config profiles. These should not be edited. - -| Parameter | Description | -|:---|---| -| ` --custom_config_version` | Git commit id for Institutional configs.
**Type:** `string`, **Default:** `master` | -| ` --custom_config_base` | Base directory for Institutional configs.
**Type:** `string`, **Default:** `https://raw.githubusercontent.com/nf-core/configs/master` | -| ` --config_profile_name` | Institutional config name.
**Type:** `string` | -| ` --config_profile_description` | Institutional config description.
**Type:** `string` | -| ` --config_profile_contact` | Institutional config contact information.
**Type:** `string` | -| ` --config_profile_url` | Institutional config URL link.
**Type:** `string` | - -### Nextflow Profile Parameters -Parameters to fine-tune your Nextflow setup. - -| Parameter | Description | -|:---|---| -| ` --condadir` | Directory to Nextflow should use for Conda environments
**Type:** `string` | -| ` --registry` | Docker registry to pull containers from.
**Type:** `string`, **Default:** `dockerhub` | -| ` --datasets_cache` | Directory where downloaded datasets should be stored.
**Type:** `string`, **Default:** `/data/datasets` | -| ` --singularity_cache_dir` | Directory where remote Singularity images are stored.
**Type:** `string` | -| ` --singularity_pull_docker_container` | Instead of directly downloading Singularity images for use with Singularity, force the workflow to pull and convert Docker containers instead.
**Type:** `boolean` | -| ` --force_rebuild` | Force overwrite of existing pre-built environments.
**Type:** `boolean` | -| ` --queue` | Comma-separated name of the queue(s) to be used by a job scheduler (e.g. AWS Batch or SLURM)
**Type:** `string`, **Default:** `general,high-memory` | -| ` --cluster_opts` | Additional options to pass to the executor. (e.g. SLURM: '--account=my_acct_name'
**Type:** `string` | -| ` --container_opts` | Additional options to pass to Apptainer, Docker, or Singularityu. (e.g. Singularity: '-D `pwd`'
**Type:** `string` | -| ` --disable_scratch` | All intermediate files created on worker nodes of will be transferred to the head node.
**Type:** `boolean` | - -### Helpful Parameters -Uncommonly used parameters that might be useful. - -| Parameter | Description | -|:---|---| -| ` --monochrome_logs` | Do not use coloured log outputs.
**Type:** `boolean` | -| ` --nfdir` | Print directory Nextflow has pulled Bactopia to
**Type:** `boolean` | -| ` --sleep_time` | The amount of time (seconds) Nextflow will wait after setting up datasets before execution.
**Type:** `integer`, **Default:** `5` | -| ` --validate_params` | Boolean whether to validate parameters against the schema at runtime
**Type:** `boolean`, **Default:** `True` | -| ` --help` | Display help text.
**Type:** `boolean` | -| ` --wf` | Specify which workflow or Bactopia Tool to execute
**Type:** `string`, **Default:** `bactopia` | -| ` --list_wfs` | List the available workflows and Bactopia Tools to use with '--wf'
**Type:** `boolean` | -| ` --show_hidden_params` | Show all params when using `--help`
**Type:** `boolean` | -| ` --help_all` | An alias for --help --show_hidden_params
**Type:** `boolean` | -| ` --version` | Display version text.
**Type:** `boolean` | - -## Citations -If you use Bactopia and `phispy` in your analysis, please cite the following. - -- [Bactopia](https://bactopia.github.io/) - Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) - - -- [csvtk](https://bioinf.shenwei.me/csvtk/) - Shen, W [csvtk: A cross-platform, efficient and practical CSV/TSV toolkit in Golang.](https://github.com/shenwei356/csvtk/) (GitHub) - -- [PhiSpy](https://github.com/linsalrob/PhiSpy) - Akhter S, Aziz RK, and Edwards RA [PhiSpy: a novel algorithm for finding prophages in bacterial genomes that combines similarity- and composition-based strategies.](https://doi.org/10.1093/nar/gks406) _Nucleic Acids Research_, 40(16), e126. (2012) - diff --git a/docs/bactopia-tools/phyloflash.md b/docs/bactopia-tools/phyloflash.md deleted file mode 100644 index 7419de20..00000000 --- a/docs/bactopia-tools/phyloflash.md +++ /dev/null @@ -1,346 +0,0 @@ -# Bactopia Tools - *phyloflash* -The `phyloflash` tool uses [phyloFlash](https://github.com/HRGV/phyloFlash) to -resconstruct 16S rRNA genes from your input samples. Optionally these reconstructed -genes can then be aligned to one another with [MAFFT](https://mafft.cbrc.jp/alignment/software/) -and a phylogenetic representation created using [IQ-TREE](http://www.iqtree.org/) - -## Example -The following command will reconstruct the 16S rRNA gene for each sample except those listed in the *exclude* file. -``` -bactopia tools phyloflash \ - --bactopia ~/bactopia-tutorial/bactopia \ - --phyloflash ~/bactopia-tutorial/bactopia-datasets/16s/138 \ - --exclude ~/bactopia-tutorial/bactopia-tools/summary/bactopia-exclude.txt -``` - -## Output Overview -Below is the default output structure for the `phyloflash` tool. Where possible the -file descriptions below were modified from a tools description. - -``` -bactopia-tools/ -└── phyloflash/ - └── ${PREFIX} - ├── alignment - │   ├── phyloflash-alignment.fasta - │   └── phyloflash-matches.txt - ├── bactopia-info - │   ├── phyloflash-report.html - │   ├── phyloflash-timeline.html - │   └── phyloflash-trace.txt - ├── iqtree - │   ├── 16s.alninfo - │   ├── 16s.bionj - │   ├── 16s.ckp.gz - │   ├── 16s.iqtree - │   ├── 16s.log - │   ├── 16s.mldist - │   ├── 16s.model.gz - │   ├── 16s.treefile - │   └── 16s.uniqueseq.phy - ├── phyloflash.iqtree - ├── phyloflash-summary.txt - └── samples - └── ${SAMPLE_NAME} - ├── ${SAMPLE_NAME}.all.dbhits.NR97.fa - ├── ${SAMPLE_NAME}.all.final.fasta - ├── ${SAMPLE_NAME}.all.final.phyloFlash.dbhits.fa - ├── ${SAMPLE_NAME}.all.final.phyloFlash.notmatched.fa - ├── ${SAMPLE_NAME}.all.vsearch.csv - ├── ${SAMPLE_NAME}.assemratio.csv - ├── ${SAMPLE_NAME}.assemratio.csv.svg - ├── ${SAMPLE_NAME}.bbmap.out - ├── ${SAMPLE_NAME}.bbmap.sam - ├── ${SAMPLE_NAME}.hitstats - ├── ${SAMPLE_NAME}.idhistogram - ├── ${SAMPLE_NAME}.idhistogram.svg - ├── ${SAMPLE_NAME}.inserthistogram - ├── ${SAMPLE_NAME}.inserthistogram.svg - ├── ${SAMPLE_NAME}.mapratio.csv - ├── ${SAMPLE_NAME}.mapratio.csv.svg - ├── ${SAMPLE_NAME}.phyloFlash - ├── ${SAMPLE_NAME}.phyloFlash.extractedSSUclassifications.csv - ├── ${SAMPLE_NAME}.phyloFlash.html - ├── ${SAMPLE_NAME}.phyloFlash.json - ├── ${SAMPLE_NAME}.phyloFlash.NTUabundance.csv - ├── ${SAMPLE_NAME}.phyloFlash.NTUabundance.csv.svg - ├── ${SAMPLE_NAME}.phyloFlash.NTUfull_abundance.csv - ├── ${SAMPLE_NAME}.phyloFlash.report.csv - ├── ${SAMPLE_NAME}.phyloFlash.unassembled.NTUabundance.csv - ├── ${SAMPLE_NAME}.remap_spades.bbmap.out - ├── ${SAMPLE_NAME}.spades.out - ├── ${SAMPLE_NAME}.spades_rRNAs.final.fasta - ├── ${SAMPLE_NAME}.${SAMPLE_NAME}_R1.fastq.gz.SSU.1.fq - ├── ${SAMPLE_NAME}.${SAMPLE_NAME}_R1.fastq.gz.SSU.2.fq - ├── ${SAMPLE_NAME}.${SAMPLE_NAME}_R1.fastq.gz.SSU.sam - ├── ${SAMPLE_NAME}.${SAMPLE_NAME}_R1.fastq.gz.SSU_spades.sam - ├── ${SAMPLE_NAME}.SSU.collection.alignment.fasta - ├── ${SAMPLE_NAME}.SSU.collection.fasta - ├── ${SAMPLE_NAME}.SSU.collection.fasta.tree - ├── ${SAMPLE_NAME}.SSU.collection.fasta.tree.svg - ├── ${SAMPLE_NAME}.toalign.fasta - └── ${SAMPLE_NAME}-unprocessed.txt -``` - -| Filename | Description | -|----------|-------------| -| phyloflash.iqtree | Full result of the run, this is the main report file (a copy of *iqtree/16s.iqtree*) | -| phyloflash-summary.txt | The aggregated phyloFlash results of all samples | - -### Directory Description -#### alignment -| Filename | Description | -|----------|-------------| -| phyloflash-alignment.fasta | The multiple sequence alignment produced by MAFFT. | -| phyloflash-matches.txt | A list of reconstructed 16S genes and their match | - -#### bactopia-info -| Filename | Description | -|----------|-------------| -| phyloflash-report.html | The Nextflow [Execution Report](https://www.nextflow.io/docs/latest/tracing.html#execution-report) | -| phyloflash-timeline.html | The Nextflow [Timeline Report](https://www.nextflow.io/docs/latest/tracing.html#timeline-report) | -| phyloflash-trace.txt | The Nextflow [Trace](https://www.nextflow.io/docs/latest/tracing.html#trace-report) report | - -#### iqtree -Where possible descriptions were taken from -IQ-TREE's [Command Reference](https://github.com/Cibiv/IQ-TREE/wiki/Command-Reference) -page, [Web Server Tutorial](https://github.com/Cibiv/IQ-TREE/wiki/Web-Server-Tutorial) page, -and the [Tutorial](http://www.iqtree.org/doc/Tutorial) page. - -| Filename | Description | -|----------|-------------| -| 16s.alninfo | Alignment site statistics | -| 16s.bionj | A neighbor joining tree produced by BIONJ | -| 16s.ckp.gz | IQ-TREE writes a checkpoint file | -| 16s.contree | Consensus tree with assigned branch supports where branch lengths are optimized on the original alignment; printed if Ultrafast Bootstrap is selected | -| 16s.iqtree | Full result of the run, this is the main report file | -| 16s.log | Run log | -| 16s.mldist | Contains the likelihood distances | -| 16s.model.gz | Information about all models tested | -| 16s.splits.nex | Support values in percentage for all splits (bipartitions), computed as the occurence frequencies in the bootstrap trees | -| 16s.treefile | Maximum likelihood tree in NEWICK format, can be visualized with treeviewer programs | -| 16s.ufboot | Trees created during the bootstrap steps | -| 16s.uniqueseq.phy | Unique sequences indentified by IQ-TREE | - - -#### samples -Where possible descriptions were taken from phyloFlash's [Output Summary](https://hrgv.github.io/phyloFlash/output.html) -and the phyloFlash source [PhyloFlash.pm](https://github.com/HRGV/phyloFlash/blob/master/PhyloFlash.pm) - -| Filename | Description | -|----------|-------------| -| ${SAMPLE_NAME}.all.dbhits.NR97.fa | Reference sequences from database with hits from the supplied reads, clustered at 97% identity | -| ${SAMPLE_NAME}.all.final.fasta | All assembled and reconstructed sequences from SPAdes in a single file | -| ${SAMPLE_NAME}.all.final.phyloFlash.dbhits.fa | FASTA file of all sequences in database with hits to reconstructed sequences | -| ${SAMPLE_NAME}.all.final.phyloFlash.notmatched.fa | FASTA file of full-length sequences without any database hits | -| ${SAMPLE_NAME}.all.vsearch.csv | CSV file of Vsearch output | -| ${SAMPLE_NAME}.assemratio.csv | CSV file of ratio assembled to unassembled | -| ${SAMPLE_NAME}.assemratio.csv.svg | A SVG image of the above ratios | -| ${SAMPLE_NAME}.bbmap.out | The bbmap log | -| ${SAMPLE_NAME}.bbmap.sam | The alignment of reads against 16S genes | -| ${SAMPLE_NAME}.hitstats | A SVG image of the above ratios | -| ${SAMPLE_NAME}.idhistogram | Histogram of the % identity of reads vs. reference database sequences, in tab-separated format | -| ${SAMPLE_NAME}.idhistogram.svg | A SVG image of the histogram above | -| ${SAMPLE_NAME}.inserthistogram | Histogram of detected insert sizes in tab-separated format, if paired-end reads were input | -| ${SAMPLE_NAME}.inserthistogram.svg | A SVG image of the above histogram | -| ${SAMPLE_NAME}.mapratio.csv | Ratios of mapped vs unmapped to report | -| ${SAMPLE_NAME}.mapratio.csv.svg | A SVG image of the above ratio | -| ${SAMPLE_NAME}.phyloFlash | Plain text file version of the HTML report | -| ${SAMPLE_NAME}.phyloFlash.extractedSSUclassifications.csv | Taxonomic classification of full-length sequences, in CSV format | -| ${SAMPLE_NAME}.phyloFlash.html | phyloFlash report file in HTML format, with a report on the taxonomic composition of SSU rRNA reads, quality metrics for the library, and affiliation of the reconstructed/assembled full-length sequences | -| ${SAMPLE_NAME}.phyloFlash.json | JSON version of *${SAMPLE_NAME}.phyloFlash* | -| ${SAMPLE_NAME}.phyloFlash.NTUabundance.csv | The list of uniqe higher level taxa (e.g. orders for bacteria) in the order of their appearance | -| ${SAMPLE_NAME}.phyloFlash.NTUabundance.csv.svg | A SVG image depicting the NTU abundances | -| ${SAMPLE_NAME}.phyloFlash.NTUfull_abundance.csv | NTU abundances (untruncated) from initial mapping, in CSV format | -| ${SAMPLE_NAME}.phyloFlash.report.csv | phyloFlash report in CSV format | -| ${SAMPLE_NAME}.phyloFlash.unassembled.NTUabundance.csv | Taxonomic composition of unassembled SSU reads in CSV format | -| ${SAMPLE_NAME}.remap_spades.bbmap.out | SAM file of re-mapping extracted reads to SPAdes full-length sequences | -| ${SAMPLE_NAME}.spades.out | The SPAdes log | -| ${SAMPLE_NAME}.spades_rRNAs.final.fasta | Assembled OTUs from SPAdes with phyloFlash simplified headers | -| ${SAMPLE_NAME}.${SAMPLE_NAME}_R1.fastq.gz.SSU.1.fq | The filtered SSU reads and their paired read, forward read file | -| ${SAMPLE_NAME}.${SAMPLE_NAME}_R1.fastq.gz.SSU.2.fq | The filtered SSU reads and their paired read, reverse read file | -| ${SAMPLE_NAME}.${SAMPLE_NAME}_R1.fastq.gz.SSU.sam | SAM file of initial read mapping to SSU rRNA database | -| ${SAMPLE_NAME}.${SAMPLE_NAME}_R1.fastq.gz.SSU_spades.sam | SAM file of re-mapping extracted reads to SPAdes full-length sequences | -| ${SAMPLE_NAME}.SSU.collection.alignment.fasta | An aligned multifasta of all the predicted OTUs and the references | -| ${SAMPLE_NAME}.SSU.collection.fasta | A multifasta of all the predicted OTUs and the references | -| ${SAMPLE_NAME}.SSU.collection.fasta.tree | An NJ tree of the MAFFT alignment of all the predicted OTUs and the references | -| ${SAMPLE_NAME}.SSU.collection.fasta.tree.svg | An SVG image of the tree above | -| ${SAMPLE_NAME}.toalign.fasta | Sequences from the sample that were used in the MAFFT alignment | -| ${SAMPLE_NAME}-unprocessed.txt | Text file with reason for not processing sample | - -## Usage -``` -Required Parameters: - --bactopia STR Directory containing Bactopia analysis results for all samples. - - --phyloflash STR Directory containing a pre-built phyloFlash database. - -Optional Parameters: - --include STR A text file containing sample names to include in the - analysis. The expected format is a single sample per line. - - --exclude STR A text file containing sample names to exclude from the - analysis. The expected format is a single sample per line. - - --prefix DIR Prefix to use for final output files - Default: phyloflash - - --outdir DIR Directory to write results to - Default: ./ - - --max_time INT The maximum number of minutes a job should run before being halted. - Default: 1440 minutes - - --max_memory INT The maximum amount of memory (Gb) allowed to a single process. - Default: 32 Gb - - --cpus INT Number of processors made available to a single - process. - Default: 4 - -phyloFlash Related Parameters: - --download_phyloflash Download the latest phyloFlash database, even it exists. - - --yes You acknowledge SILVAs license. - - --taxlevel INT Level in the taxonomy string to summarize read counts per taxon. - Numeric and 1-based (i.e. "1" corresponds to "Domain"). - Default: 6 - - --phyloflash_opts STR Extra phyloFlash options in quotes. - Default: '' - - --allow_multiple_16s Include samples with multiple reconstructed 16S genes. Due to - high sequence similarity in true multi-copy 16S genes, it - is unlikely each copy will be reconstructed, instead only - one. In order to get more than one reconstructed 16S gene - there must be a significant difference in the sequence - identity. As a consequence, any samples that have multiple - 16S genes reconstructed contain multiple different species - within their sequencing. - Default: Exclude samples with multiple 16S genes - - -MAFFT Related Parameters: - --align_all Include reconstructed 16S genes as well as the corresponding - reference 16S genes in the alignment. - - --mafft_opts STR MAFFT options to include (in quotes). - Default: '' - -IQ-TREE Related Parameters: - --skip_phylogeny Skip the creation a core-genome based phylogeny - - --m STR Substitution model name - Default: MFP - - --bb INT Ultrafast bootstrap replicates - Default: 1000 - - --alrt INT SH-like approximate likelihood ratio test replicates - Default: 1000 - - --asr Ancestral state reconstruction by empirical Bayes - Default: false - - --iqtree_opts STR Extra IQ-TREE options in quotes. - Default: '' - -Nextflow Related Parameters: - --condadir DIR Directory to Nextflow should use for Conda environments - Default: Bactopia's Nextflow directory - - --publish_mode Set Nextflow's method for publishing output files. Allowed methods are: - 'copy' (default) Copies the output files into the published directory. - - 'copyNoFollow' Copies the output files into the published directory - without following symlinks ie. copies the links themselves. - - 'link' Creates a hard link in the published directory for each - process output file. - - 'rellink' Creates a relative symbolic link in the published directory - for each process output file. - - 'symlink' Creates an absolute symbolic link in the published directory - for each process output file. - - Default: copy - - --force Nextflow will overwrite existing output files. - Default: false - - --conatainerPath Path to Singularity containers to be used by the 'slurm' - profile. - Default: /opt/bactopia/singularity - - --sleep_time After reading datases, the amount of time (seconds) Nextflow - will wait before execution. - Default: 5 seconds -Useful Parameters: - --version Print workflow version information - --help Show this message and exit -``` - -## Conda Environment -Below is the command that was used to create the Conda environment. -``` -conda create -y -n bactopia-phyloflash -c conda-forge -c bioconda \ - phyloflash \ - mafft \ - iqtree \ - pigz -``` - -## References -* __[Barrnap](https://github.com/tseemann/barrnap)__ -_Seemann, T. [Barrnap: Bacterial ribosomal RNA predictor](https://github.com/tseemann/barrnap)._ - -* __[BBTools](https://jgi.doe.gov/data-and-tools/bbtools/)__ -_Bushnell, B. [BBMap short read aligner, and other bioinformatic tools.](http://sourceforge.net/projects/bbmap/)_ - -* __[Bedtools](https://github.com/arq5x/bedtools2)__ -_Quinlan, A. R. & Hall, I. M. [BEDTools: a flexible suite of utilities for -comparing genomic features](http://dx.doi.org/10.1093/bioinformatics/btq033). -Bioinformatics 26, 841–842 (2010)._ - -* __[IQ-TREE](https://github.com/Cibiv/IQ-TREE)__ -_L.-T. Nguyen, H.A. Schmidt, A. von Haeseler, B.Q. Minh (2015) -[IQ-TREE: A fast and effective stochastic algorithm for estimating maximum likelihood phylogenies.](https://doi.org/10.1093/molbev/msu300) - Mol. Biol. Evol., 32:268-274._ -_S. Kalyaanamoorthy, B.Q. Minh, T.K.F. Wong, A. von Haeseler, L.S. Jermiin (2017) -[ModelFinder: Fast model selection for accurate phylogenetic estimates.](https://doi.org/10.1038/nmeth.4285) -Nat. Methods, 14:587-589._ -_D.T. Hoang, O. Chernomor, A. von Haeseler, B.Q. Minh, L.S. Vinh (2018) [ -UFBoot2: Improving the ultrafast bootstrap approximation.](https://doi.org/10.1093/molbev/msx281) - Mol. Biol. Evol., 35:518–522._ - -* __[MAFFT](https://mafft.cbrc.jp/alignment/software/)__ -_Katoh, K. & Standley, D. M. -[MAFFT multiple sequence alignment software version 7: improvements in performance and usability.](https://doi.org/10.1093/molbev/mst010) -Mol. Biol. Evol. 30, 772–780 (2013)_ - -* __[nhmmer](http://hmmer.org/)__ -_Wheeler, T. J. & Eddy, S. R. -[nhmmer: DNA homology search with profile HMMs.](https://doi.org/10.1093/bioinformatics/btt403) - Bioinformatics 29, 2487–2489 (2013)_ - -* __[phyloFlash](https://github.com/HRGV/phyloFlash)__ -_H. R. Gruber-Vodicka, B.KB. Seah, E. Pruesse. -[phyloFlash — Rapid SSU rRNA profiling and targeted assembly from metagenomes.](https://doi.org/10.1101/521922) -bioRxiv 521922_ - -* __[SILVA rRNA Database](https://www.arb-silva.de/)__ -_Quast, C. et al. -[The SILVA ribosomal RNA gene database project: improved data processing and web-based tools.](https://doi.org/10.1093/nar/gks1219) -Nucleic Acids Res. 41, D590–6 (2013)_ - -* __[SPAdes](https://github.com/ablab/spades)__ -_Bankevich, A., et al. -[SPAdes: a new genome assembly algorithm and its applications to single-cell sequencing.](https://doi.org/10.1089/cmb.2012.0021) -Journal of computational biology 19.5 (2012): 455-477._ - -* __[VSEARCH](https://github.com/torognes/vsearch)__ -_Rognes, T., Flouri, T., Nichols, B., Quince, C. & Mahé, F. -[VSEARCH: a versatile open source tool for metagenomics.](https://doi.org/10.7717/peerj.2584) - PeerJ 4, e2584 (2016)_ diff --git a/docs/bactopia-tools/pirate.md b/docs/bactopia-tools/pirate.md deleted file mode 100644 index f5205ad1..00000000 --- a/docs/bactopia-tools/pirate.md +++ /dev/null @@ -1,124 +0,0 @@ ---- -tags: - - alignment - - gff - - pan-genome ---- - - - -# Bactopia Tool - `pirate` -The `pirate` module uses [PIRATE](https://github.com/SionBayliss/PIRATE) to create a pan-genome of -your samples. - - -## Example Usage -``` -bactopia --wf pirate \ - --bactopia /path/to/your/bactopia/results \ - --include includes.txt -``` - -## Parameters - - -### Required Parameters -Define where the pipeline should find input data and save output data. - -| Parameter | Description | Default | -|---|---|---| -| `--bactopia` | The path to bactopia results to use as inputs | | - -### Filtering Parameters -Use these parameters to specify which samples to include or exclude. - -| Parameter | Description | Default | -|---|---|---| -| `--include` | A text file containing sample names (one per line) to include from the analysis | | -| `--exclude` | A text file containing sample names (one per line) to exclude from the analysis | | - - -### PIRATE Parameters - - -| Parameter | Description | Default | -|---|---|---| -| `--steps` | Percent identity thresholds to use for pangenome construction | 50,60,70,80,90,95,98 | -| `--features` | Comma-delimited features to use for pangenome construction | CDS | -| `--para_off` | Switch off paralog identification | False | -| `--z` | Retain all PIRATE intermediate files | False | -| `--pan_opt` | Additional arguments to pass to pangenome contruction. | | - - -### Optional Parameters -These optional parameters can be useful in certain settings. - -| Parameter | Description | Default | -|---|---|---| -| `--outdir` | Base directory to write results to | ./ | -| `--run_name` | Name of the directory to hold results | bactopia | -| `--skip_compression` | Ouput files will not be compressed | False | -| `--keep_all_files` | Keeps all analysis files created | False | - -### Max Job Request Parameters -Set the top limit for requested resources for any single job. - -| Parameter | Description | Default | -|---|---|---| -| `--max_retry` | Maximum times to retry a process before allowing it to fail. | 3 | -| `--max_cpus` | Maximum number of CPUs that can be requested for any single job. | 4 | -| `--max_memory` | Maximum amount of memory (in GB) that can be requested for any single job. | 32 | -| `--max_time` | Maximum amount of time (in minutes) that can be requested for any single job. | 120 | -| `--max_downloads` | Maximum number of samples to download at a time | 3 | - -### Nextflow Configuration Parameters -Parameters to fine-tune your Nextflow setup. - -| Parameter | Description | Default | -|---|---|---| -| `--nfconfig` | A Nextflow compatible config file for custom profiles, loaded last and will overwrite existing variables if set. | | -| `--publish_dir_mode` | Method used to save pipeline results to output directory. | copy | -| `--infodir` | Directory to keep pipeline Nextflow logs and reports. | ${params.outdir}/pipeline_info | -| `--force` | Nextflow will overwrite existing output files. | False | -| `--cleanup_workdir` | After Bactopia is successfully executed, the `work` directory will be deleted. | False | - -### Nextflow Profile Parameters -Parameters to fine-tune your Nextflow setup. - -| Parameter | Description | Default | -|---|---|---| -| `--condadir` | Directory to Nextflow should use for Conda environments | | -| `--registry` | Docker registry to pull containers from. | dockerhub | -| `--singularity_cache` | Directory where remote Singularity images are stored. | | -| `--singularity_pull_docker_container` | Instead of directly downloading Singularity images for use with Singularity, force the workflow to pull and convert Docker containers instead. | | -| `--force_rebuild` | Force overwrite of existing pre-built environments. | False | -| `--queue` | Comma-separated name of the queue(s) to be used by a job scheduler (e.g. AWS Batch or SLURM) | general,high-memory | -| `--cluster_opts` | Additional options to pass to the executor. (e.g. SLURM: '--account=my_acct_name' | | -| `--disable_scratch` | All intermediate files created on worker nodes of will be transferred to the head node. | False | - -### Helpful Parameters -Uncommonly used parameters that might be useful. - -| Parameter | Description | Default | -|---|---|---| -| `--monochrome_logs` | Do not use coloured log outputs. | | -| `--nfdir` | Print directory Nextflow has pulled Bactopia to | | -| `--sleep_time` | The amount of time (seconds) Nextflow will wait after setting up datasets before execution. | 5 | -| `--validate_params` | Boolean whether to validate parameters against the schema at runtime | True | -| `--help` | Display help text. | | -| `--wf` | Specify which workflow or Bactopia Tool to execute | bactopia | -| `--list_wfs` | List the available workflows and Bactopia Tools to use with '--wf' | | -| `--show_hidden_params` | Show all params when using `--help` | | -| `--help_all` | An alias for --help --show_hidden_params | | -| `--version` | Display version text. | | - -## Citations -If you use Bactopia and `pirate` in your analysis, please cite the following. - -- [Bactopia](https://bactopia.github.io/) - Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) - - -- [PIRATE](http://github.com/SionBayliss/PIRATE) - Bayliss SC, Thorpe HA, Coyle NM, Sheppard SK, Feil EJ [PIRATE: A fast and scalable pangenomics toolbox for clustering diverged orthologues in bacteria.](https://doi.org/10.1093/gigascience/giz119) _Gigascience_ 8 (2019) - diff --git a/docs/bactopia-tools/plasmidfinder.md b/docs/bactopia-tools/plasmidfinder.md deleted file mode 100644 index 51429ef0..00000000 --- a/docs/bactopia-tools/plasmidfinder.md +++ /dev/null @@ -1,237 +0,0 @@ ---- -title: Bactopia Tools - -description: A Bactopia Tool which uses - ---- -# Bactopia Tool - `plasmidfinder` -The `plasmidfinder` module identifies plasmids in total or partial sequenced isolates of bacteria. - - -## Example Usage -``` -bactopia --wf plasmidfinder \ - --bactopia /path/to/your/bactopia/results -``` - -## Output Overview - -Below is the default output structure for the `plasmidfinder` tool. Where possible the -file descriptions below were modified from a tools description. - -```bash - -├── -│ └── tools -│ └── plasmidfinder -│ ├── -hit_in_genome_seq.fsa -│ ├── -plasmid_seqs.fsa -│ ├── .{json|tsv|txt} -│ └── logs -│ ├── nf-plasmidfinder.{begin,err,log,out,run,sh,trace} -│ └── versions.yml -└── bactopia-runs - └── plasmidfinder- - ├── merged-results - │ ├── logs - │ │ └── plasmidfinder-concat - │ │ ├── nf-merged-results.{begin,err,log,out,run,sh,trace} - │ │ └── versions.yml - │ └── plasmidfinder.tsv - └── nf-reports - ├── plasmidfinder-dag.dot - ├── plasmidfinder-report.html - ├── plasmidfinder-timeline.html - └── plasmidfinder-trace.txt - -``` - - - -### Results - -#### Merged Results - -Below are results that are concatenated into a single file. - - -| Filename | Description | -|-------------------------------|-------------| -| plasmidfinder.tsv | A merged TSV file with `PlasmidFinder` results from all samples | - - -#### PlasmidFinder - -Below is a description of the _per-sample_ results from [PlasmidFinder](https://bitbucket.org/genomicepidemiology/plasmidfinder/src/master/). - - -| Filename | Description | -|-------------------------------|-------------| -| <SAMPLE_NAME>.json | The results from analysis in JSON format | -| <SAMPLE_NAME>.tsv | The results from analysis in TSV format | -| <SAMPLE_NAME>.txt | A text file containing the result table and alignments | -| <SAMPLE_NAME>-hit_in_genome_seq.fsa | A fasta file containing the best matching sequences from the query genome. | -| <SAMPLE_NAME>-plasmid_seqs.fsa | A fasta file containing the best matching plasmid genes from the database. | - - - - - -### Audit Trail - -Below are files that can assist you in understanding which parameters and program versions were used. - -#### Logs - -Each process that is executed will have a folder named `logs`. In this folder are helpful -files for you to review if the need ever arises. - -| Extension | Description | -|--------------|-------------| -| .begin | An empty file used to designate the process started | -| .err | Contains STDERR outputs from the process | -| .log | Contains both STDERR and STDOUT outputs from the process | -| .out | Contains STDOUT outputs from the process | -| .run | The script Nextflow uses to stage/unstage files and queue processes based on given profile | -| .sh | The script executed by bash for the process | -| .trace | The Nextflow [Trace](https://www.nextflow.io/docs/latest/tracing.html#trace-report) report for the process | -| versions.yml | A YAML formatted file with program versions | - -#### Nextflow Reports - -These Nextflow reports provide great a great summary of your run. These can be used to optimize -resource usage and estimate expected costs if using cloud platforms. - -| Filename | Description | -|----------|-------------| -| plasmidfinder-dag.dot | The Nextflow [DAG visualisation](https://www.nextflow.io/docs/latest/tracing.html#dag-visualisation) | -| plasmidfinder-report.html | The Nextflow [Execution Report](https://www.nextflow.io/docs/latest/tracing.html#execution-report) | -| plasmidfinder-timeline.html | The Nextflow [Timeline Report](https://www.nextflow.io/docs/latest/tracing.html#timeline-report) | -| plasmidfinder-trace.txt | The Nextflow [Trace](https://www.nextflow.io/docs/latest/tracing.html#trace-report) report | - - -#### Program Versions - -At the end of each run, each of the `versions.yml` files are merged into the files below. - -| Filename | Description | -|---------------------------|-------------| -| software_versions.yml | A complete list of programs and versions used by each process | -| software_versions_mqc.yml | A complete list of programs and versions formatted for [MultiQC](https://multiqc.info/) | - -## Parameters - - -### Required Parameters -Define where the pipeline should find input data and save output data. - -| Parameter | Description | -|:---|---| -| ` --bactopia` | The path to bactopia results to use as inputs
**Type:** `string` | - -### Filtering Parameters -Use these parameters to specify which samples to include or exclude. - -| Parameter | Description | -|:---|---| -| ` --include` | A text file containing sample names (one per line) to include from the analysis
**Type:** `string` | -| ` --exclude` | A text file containing sample names (one per line) to exclude from the analysis
**Type:** `string` | - - -### PlasmidFinder Parameters - - -| Parameter | Description | -|:---|---| -| ` --pf_mincov` | Minimum percent coverage to be considered a hit
**Type:** `number`, **Default:** `0.6` | -| ` --pf_threshold` | Minimum threshold for identity
**Type:** `number`, **Default:** `0.9` | - - -### Optional Parameters -These optional parameters can be useful in certain settings. - -| Parameter | Description | -|:---|---| -| ` --outdir` | Base directory to write results to
**Type:** `string`, **Default:** `bactopia` | -| ` --skip_compression` | Ouput files will not be compressed
**Type:** `boolean` | -| ` --datasets` | The path to cache datasets to
**Type:** `string` | -| ` --keep_all_files` | Keeps all analysis files created
**Type:** `boolean` | - -### Max Job Request Parameters -Set the top limit for requested resources for any single job. - -| Parameter | Description | -|:---|---| -| ` --max_retry` | Maximum times to retry a process before allowing it to fail.
**Type:** `integer`, **Default:** `3` | -| ` --max_cpus` | Maximum number of CPUs that can be requested for any single job.
**Type:** `integer`, **Default:** `4` | -| ` --max_memory` | Maximum amount of memory that can be requested for any single job.
**Type:** `string`, **Default:** `128.GB` | -| ` --max_time` | Maximum amount of time that can be requested for any single job.
**Type:** `string`, **Default:** `240.h` | -| ` --max_downloads` | Maximum number of samples to download at a time
**Type:** `integer`, **Default:** `3` | - -### Nextflow Configuration Parameters -Parameters to fine-tune your Nextflow setup. - -| Parameter | Description | -|:---|---| -| ` --nfconfig` | A Nextflow compatible config file for custom profiles, loaded last and will overwrite existing variables if set.
**Type:** `string` | -| ` --publish_dir_mode` | Method used to save pipeline results to output directory.
**Type:** `string`, **Default:** `copy` | -| ` --infodir` | Directory to keep pipeline Nextflow logs and reports.
**Type:** `string`, **Default:** `${params.outdir}/pipeline_info` | -| ` --force` | Nextflow will overwrite existing output files.
**Type:** `boolean` | -| ` --cleanup_workdir` | After Bactopia is successfully executed, the `work` directory will be deleted.
**Type:** `boolean` | - -### Institutional config options -Parameters used to describe centralized config profiles. These should not be edited. - -| Parameter | Description | -|:---|---| -| ` --custom_config_version` | Git commit id for Institutional configs.
**Type:** `string`, **Default:** `master` | -| ` --custom_config_base` | Base directory for Institutional configs.
**Type:** `string`, **Default:** `https://raw.githubusercontent.com/nf-core/configs/master` | -| ` --config_profile_name` | Institutional config name.
**Type:** `string` | -| ` --config_profile_description` | Institutional config description.
**Type:** `string` | -| ` --config_profile_contact` | Institutional config contact information.
**Type:** `string` | -| ` --config_profile_url` | Institutional config URL link.
**Type:** `string` | - -### Nextflow Profile Parameters -Parameters to fine-tune your Nextflow setup. - -| Parameter | Description | -|:---|---| -| ` --condadir` | Directory to Nextflow should use for Conda environments
**Type:** `string` | -| ` --registry` | Docker registry to pull containers from.
**Type:** `string`, **Default:** `dockerhub` | -| ` --datasets_cache` | Directory where downloaded datasets should be stored.
**Type:** `string`, **Default:** `/data/datasets` | -| ` --singularity_cache_dir` | Directory where remote Singularity images are stored.
**Type:** `string` | -| ` --singularity_pull_docker_container` | Instead of directly downloading Singularity images for use with Singularity, force the workflow to pull and convert Docker containers instead.
**Type:** `boolean` | -| ` --force_rebuild` | Force overwrite of existing pre-built environments.
**Type:** `boolean` | -| ` --queue` | Comma-separated name of the queue(s) to be used by a job scheduler (e.g. AWS Batch or SLURM)
**Type:** `string`, **Default:** `general,high-memory` | -| ` --cluster_opts` | Additional options to pass to the executor. (e.g. SLURM: '--account=my_acct_name'
**Type:** `string` | -| ` --container_opts` | Additional options to pass to Apptainer, Docker, or Singularityu. (e.g. Singularity: '-D `pwd`'
**Type:** `string` | -| ` --disable_scratch` | All intermediate files created on worker nodes of will be transferred to the head node.
**Type:** `boolean` | - -### Helpful Parameters -Uncommonly used parameters that might be useful. - -| Parameter | Description | -|:---|---| -| ` --monochrome_logs` | Do not use coloured log outputs.
**Type:** `boolean` | -| ` --nfdir` | Print directory Nextflow has pulled Bactopia to
**Type:** `boolean` | -| ` --sleep_time` | The amount of time (seconds) Nextflow will wait after setting up datasets before execution.
**Type:** `integer`, **Default:** `5` | -| ` --validate_params` | Boolean whether to validate parameters against the schema at runtime
**Type:** `boolean`, **Default:** `True` | -| ` --help` | Display help text.
**Type:** `boolean` | -| ` --wf` | Specify which workflow or Bactopia Tool to execute
**Type:** `string`, **Default:** `bactopia` | -| ` --list_wfs` | List the available workflows and Bactopia Tools to use with '--wf'
**Type:** `boolean` | -| ` --show_hidden_params` | Show all params when using `--help`
**Type:** `boolean` | -| ` --help_all` | An alias for --help --show_hidden_params
**Type:** `boolean` | -| ` --version` | Display version text.
**Type:** `boolean` | - -## Citations -If you use Bactopia and `plasmidfinder` in your analysis, please cite the following. - -- [Bactopia](https://bactopia.github.io/) - Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) - - -- [csvtk](https://bioinf.shenwei.me/csvtk/) - Shen, W [csvtk: A cross-platform, efficient and practical CSV/TSV toolkit in Golang.](https://github.com/shenwei356/csvtk/) (GitHub) - -- [PlasmidFinder](https://bitbucket.org/genomicepidemiology/plasmidfinder) - Carattoli A, Zankari E, García-Fernández A, Voldby Larsen M, Lund O, Villa L, Møller Aarestrup F, Hasman H [In silico detection and typing of plasmids using PlasmidFinder and plasmid multilocus sequence typing.](https://doi.org/10.1128/AAC.02412-14) _Antimicrobial Agents and Chemotherapy_ 58(7), 3895–3903. (2014) - diff --git a/docs/bactopia-tools/pneumocat.md b/docs/bactopia-tools/pneumocat.md deleted file mode 100644 index 0578776e..00000000 --- a/docs/bactopia-tools/pneumocat.md +++ /dev/null @@ -1,209 +0,0 @@ ---- -title: pneumocat -description: A Bactopia Tool which uses PneumoCaT to assign capsular type to _Streptococcus pneumoniae_ from sequence reads. - ---- -# Bactopia Tool - `pneumocat` -The `pneumocat` module uses [PneumoCaT](https://github.com/ukhsa-collaboration/PneumoCaT) to assign capsular -type to _Streptococcus pneumoniae_ from sequence reads. - - -## Example Usage -``` -bactopia --wf pneumocat \ - --bactopia /path/to/your/bactopia/results -``` - -## Output Overview - -Below is the default output structure for the `pneumocat` tool. Where possible the -file descriptions below were modified from a tools description. - -```bash - -├── -│ └── tools -│ └── pneumocat -│ ├── .results.xml -│ ├── coverage_summary.txt -│ └── logs -│ ├── nf-pneumocat.{begin,err,log,out,run,sh,trace} -| ├── pneumo_capsular_typing.std{err|out} -│ └── versions.yml -└── bactopia-runs - └── pneumocat- - └── nf-reports - ├── pneumocat-dag.dot - ├── pneumocat-report.html - ├── pneumocat-timeline.html - └── pneumocat-trace.txt - -``` - - - -### Results - -#### PneumoCaT - -Below is a description of the _per-sample_ results from [PneumoCaT](https://github.com/ukhsa-collaboration/PneumoCaT). - - -| Filename | Description | -|-------------------------------|-------------| -| <SAMPLE_NAME>.results.xml | An XML that details the final assigned capsular type | -| coverage_summary.txt | A summary of the coverage for each capsular type | - - - - - -### Audit Trail - -Below are files that can assist you in understanding which parameters and program versions were used. - -#### Logs - -Each process that is executed will have a folder named `logs`. In this folder are helpful -files for you to review if the need ever arises. - -| Extension | Description | -|--------------|-------------| -| .begin | An empty file used to designate the process started | -| .err | Contains STDERR outputs from the process | -| .log | Contains both STDERR and STDOUT outputs from the process | -| .out | Contains STDOUT outputs from the process | -| .run | The script Nextflow uses to stage/unstage files and queue processes based on given profile | -| .sh | The script executed by bash for the process | -| .trace | The Nextflow [Trace](https://www.nextflow.io/docs/latest/tracing.html#trace-report) report for the process | -| versions.yml | A YAML formatted file with program versions | - -#### Nextflow Reports - -These Nextflow reports provide great a great summary of your run. These can be used to optimize -resource usage and estimate expected costs if using cloud platforms. - -| Filename | Description | -|----------|-------------| -| pneumocat-dag.dot | The Nextflow [DAG visualisation](https://www.nextflow.io/docs/latest/tracing.html#dag-visualisation) | -| pneumocat-report.html | The Nextflow [Execution Report](https://www.nextflow.io/docs/latest/tracing.html#execution-report) | -| pneumocat-timeline.html | The Nextflow [Timeline Report](https://www.nextflow.io/docs/latest/tracing.html#timeline-report) | -| pneumocat-trace.txt | The Nextflow [Trace](https://www.nextflow.io/docs/latest/tracing.html#trace-report) report | - - -#### Program Versions - -At the end of each run, each of the `versions.yml` files are merged into the files below. - -| Filename | Description | -|---------------------------|-------------| -| software_versions.yml | A complete list of programs and versions used by each process | -| software_versions_mqc.yml | A complete list of programs and versions formatted for [MultiQC](https://multiqc.info/) | - -## Parameters - - -### Required Parameters -Define where the pipeline should find input data and save output data. - -| Parameter | Description | -|:---|---| -| ` --bactopia` | The path to bactopia results to use as inputs
**Type:** `string` | - -### Filtering Parameters -Use these parameters to specify which samples to include or exclude. - -| Parameter | Description | -|:---|---| -| ` --include` | A text file containing sample names (one per line) to include from the analysis
**Type:** `string` | -| ` --exclude` | A text file containing sample names (one per line) to exclude from the analysis
**Type:** `string` | - - - - -### Optional Parameters -These optional parameters can be useful in certain settings. - -| Parameter | Description | -|:---|---| -| ` --outdir` | Base directory to write results to
**Type:** `string`, **Default:** `bactopia` | -| ` --skip_compression` | Ouput files will not be compressed
**Type:** `boolean` | -| ` --datasets` | The path to cache datasets to
**Type:** `string` | -| ` --keep_all_files` | Keeps all analysis files created
**Type:** `boolean` | - -### Max Job Request Parameters -Set the top limit for requested resources for any single job. - -| Parameter | Description | -|:---|---| -| ` --max_retry` | Maximum times to retry a process before allowing it to fail.
**Type:** `integer`, **Default:** `3` | -| ` --max_cpus` | Maximum number of CPUs that can be requested for any single job.
**Type:** `integer`, **Default:** `4` | -| ` --max_memory` | Maximum amount of memory that can be requested for any single job.
**Type:** `string`, **Default:** `128.GB` | -| ` --max_time` | Maximum amount of time that can be requested for any single job.
**Type:** `string`, **Default:** `240.h` | -| ` --max_downloads` | Maximum number of samples to download at a time
**Type:** `integer`, **Default:** `3` | - -### Nextflow Configuration Parameters -Parameters to fine-tune your Nextflow setup. - -| Parameter | Description | -|:---|---| -| ` --nfconfig` | A Nextflow compatible config file for custom profiles, loaded last and will overwrite existing variables if set.
**Type:** `string` | -| ` --publish_dir_mode` | Method used to save pipeline results to output directory.
**Type:** `string`, **Default:** `copy` | -| ` --infodir` | Directory to keep pipeline Nextflow logs and reports.
**Type:** `string`, **Default:** `${params.outdir}/pipeline_info` | -| ` --force` | Nextflow will overwrite existing output files.
**Type:** `boolean` | -| ` --cleanup_workdir` | After Bactopia is successfully executed, the `work` directory will be deleted.
**Type:** `boolean` | - -### Institutional config options -Parameters used to describe centralized config profiles. These should not be edited. - -| Parameter | Description | -|:---|---| -| ` --custom_config_version` | Git commit id for Institutional configs.
**Type:** `string`, **Default:** `master` | -| ` --custom_config_base` | Base directory for Institutional configs.
**Type:** `string`, **Default:** `https://raw.githubusercontent.com/nf-core/configs/master` | -| ` --config_profile_name` | Institutional config name.
**Type:** `string` | -| ` --config_profile_description` | Institutional config description.
**Type:** `string` | -| ` --config_profile_contact` | Institutional config contact information.
**Type:** `string` | -| ` --config_profile_url` | Institutional config URL link.
**Type:** `string` | - -### Nextflow Profile Parameters -Parameters to fine-tune your Nextflow setup. - -| Parameter | Description | -|:---|---| -| ` --condadir` | Directory to Nextflow should use for Conda environments
**Type:** `string` | -| ` --registry` | Docker registry to pull containers from.
**Type:** `string`, **Default:** `dockerhub` | -| ` --datasets_cache` | Directory where downloaded datasets should be stored.
**Type:** `string`, **Default:** `/data/datasets` | -| ` --singularity_cache_dir` | Directory where remote Singularity images are stored.
**Type:** `string` | -| ` --singularity_pull_docker_container` | Instead of directly downloading Singularity images for use with Singularity, force the workflow to pull and convert Docker containers instead.
**Type:** `boolean` | -| ` --force_rebuild` | Force overwrite of existing pre-built environments.
**Type:** `boolean` | -| ` --queue` | Comma-separated name of the queue(s) to be used by a job scheduler (e.g. AWS Batch or SLURM)
**Type:** `string`, **Default:** `general,high-memory` | -| ` --cluster_opts` | Additional options to pass to the executor. (e.g. SLURM: '--account=my_acct_name'
**Type:** `string` | -| ` --container_opts` | Additional options to pass to Apptainer, Docker, or Singularityu. (e.g. Singularity: '-D `pwd`'
**Type:** `string` | -| ` --disable_scratch` | All intermediate files created on worker nodes of will be transferred to the head node.
**Type:** `boolean` | - -### Helpful Parameters -Uncommonly used parameters that might be useful. - -| Parameter | Description | -|:---|---| -| ` --monochrome_logs` | Do not use coloured log outputs.
**Type:** `boolean` | -| ` --nfdir` | Print directory Nextflow has pulled Bactopia to
**Type:** `boolean` | -| ` --sleep_time` | The amount of time (seconds) Nextflow will wait after setting up datasets before execution.
**Type:** `integer`, **Default:** `5` | -| ` --validate_params` | Boolean whether to validate parameters against the schema at runtime
**Type:** `boolean`, **Default:** `True` | -| ` --help` | Display help text.
**Type:** `boolean` | -| ` --wf` | Specify which workflow or Bactopia Tool to execute
**Type:** `string`, **Default:** `bactopia` | -| ` --list_wfs` | List the available workflows and Bactopia Tools to use with '--wf'
**Type:** `boolean` | -| ` --show_hidden_params` | Show all params when using `--help`
**Type:** `boolean` | -| ` --help_all` | An alias for --help --show_hidden_params
**Type:** `boolean` | -| ` --version` | Display version text.
**Type:** `boolean` | - -## Citations -If you use Bactopia and `pneumocat` in your analysis, please cite the following. - -- [Bactopia](https://bactopia.github.io/) - Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) - - -- [PneumoCaT](https://github.com/ukhsa-collaboration/PneumoCaT) - Kapatai G, Sheppard CL, Al-Shahib A, Litt DJ, Underwood AP, Harrison TG, and Fry NK [Whole genome sequencing of Streptococcus pneumoniae: development, evaluation and verification of targets for serogroup and serotype prediction using an automated pipeline.](https://doi.org/10.7717/peerj.2477) PeerJ, 4, e2477. (2016) - diff --git a/docs/bactopia-tools/prokka.md b/docs/bactopia-tools/prokka.md deleted file mode 100644 index 5b4fc149..00000000 --- a/docs/bactopia-tools/prokka.md +++ /dev/null @@ -1,123 +0,0 @@ ---- -tags: - - annotation - - fasta ---- - - - -# Bactopia Tool - `prokka` -The `prokka` module uses [Prokka](https://github.com/tseemann/prokka) to rapidly annotate bacterial -genomes and plasmids. - - -## Example Usage -``` -bactopia --wf prokka \ - --bactopia /path/to/your/bactopia/results \ - --include includes.txt -``` - -## Parameters - - -### Required Parameters -Define where the pipeline should find input data and save output data. - -| Parameter | Description | Default | -|---|---|---| -| `--bactopia` | The path to bactopia results to use as inputs | | - -### Filtering Parameters -Use these parameters to specify which samples to include or exclude. - -| Parameter | Description | Default | -|---|---|---| -| `--include` | A text file containing sample names (one per line) to include from the analysis | | -| `--exclude` | A text file containing sample names (one per line) to exclude from the analysis | | - - -### Prokka Parameters - - -| Parameter | Description | Default | -|---|---|---| -| `--proteins` | FASTA file of trusted proteins to first annotate from | | -| `--prodigal_tf` | Training file to use for Prodigal | | -| `--prokka_coverage` | Minimum coverage on query protein | 80 | -| `--prokka_evalue` | Similarity e-value cut-off | 1e-09 | -| `--prokka_opts` | Extra Prokka options in quotes. | | - - -### Optional Parameters -These optional parameters can be useful in certain settings. - -| Parameter | Description | Default | -|---|---|---| -| `--outdir` | Base directory to write results to | ./ | -| `--run_name` | Name of the directory to hold results | bactopia | -| `--skip_compression` | Ouput files will not be compressed | False | -| `--keep_all_files` | Keeps all analysis files created | False | - -### Max Job Request Parameters -Set the top limit for requested resources for any single job. - -| Parameter | Description | Default | -|---|---|---| -| `--max_retry` | Maximum times to retry a process before allowing it to fail. | 3 | -| `--max_cpus` | Maximum number of CPUs that can be requested for any single job. | 4 | -| `--max_memory` | Maximum amount of memory (in GB) that can be requested for any single job. | 32 | -| `--max_time` | Maximum amount of time (in minutes) that can be requested for any single job. | 120 | -| `--max_downloads` | Maximum number of samples to download at a time | 3 | - -### Nextflow Configuration Parameters -Parameters to fine-tune your Nextflow setup. - -| Parameter | Description | Default | -|---|---|---| -| `--nfconfig` | A Nextflow compatible config file for custom profiles, loaded last and will overwrite existing variables if set. | | -| `--publish_dir_mode` | Method used to save pipeline results to output directory. | copy | -| `--infodir` | Directory to keep pipeline Nextflow logs and reports. | ${params.outdir}/pipeline_info | -| `--force` | Nextflow will overwrite existing output files. | False | -| `--cleanup_workdir` | After Bactopia is successfully executed, the `work` directory will be deleted. | False | - -### Nextflow Profile Parameters -Parameters to fine-tune your Nextflow setup. - -| Parameter | Description | Default | -|---|---|---| -| `--condadir` | Directory to Nextflow should use for Conda environments | | -| `--registry` | Docker registry to pull containers from. | dockerhub | -| `--singularity_cache` | Directory where remote Singularity images are stored. | | -| `--singularity_pull_docker_container` | Instead of directly downloading Singularity images for use with Singularity, force the workflow to pull and convert Docker containers instead. | | -| `--force_rebuild` | Force overwrite of existing pre-built environments. | False | -| `--queue` | Comma-separated name of the queue(s) to be used by a job scheduler (e.g. AWS Batch or SLURM) | general,high-memory | -| `--cluster_opts` | Additional options to pass to the executor. (e.g. SLURM: '--account=my_acct_name' | | -| `--disable_scratch` | All intermediate files created on worker nodes of will be transferred to the head node. | False | - -### Helpful Parameters -Uncommonly used parameters that might be useful. - -| Parameter | Description | Default | -|---|---|---| -| `--monochrome_logs` | Do not use coloured log outputs. | | -| `--nfdir` | Print directory Nextflow has pulled Bactopia to | | -| `--sleep_time` | The amount of time (seconds) Nextflow will wait after setting up datasets before execution. | 5 | -| `--validate_params` | Boolean whether to validate parameters against the schema at runtime | True | -| `--help` | Display help text. | | -| `--wf` | Specify which workflow or Bactopia Tool to execute | bactopia | -| `--list_wfs` | List the available workflows and Bactopia Tools to use with '--wf' | | -| `--show_hidden_params` | Show all params when using `--help` | | -| `--help_all` | An alias for --help --show_hidden_params | | -| `--version` | Display version text. | | - -## Citations -If you use Bactopia and `prokka` in your analysis, please cite the following. - -- [Bactopia](https://bactopia.github.io/) - Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) - - -- [Prokka](https://github.com/tseemann/prokka) - Seemann T [Prokka: rapid prokaryotic genome annotation](http://dx.doi.org/10.1093/bioinformatics/btu153) _Bioinformatics_ 30, 2068–2069 (2014) - diff --git a/docs/bactopia-tools/quast.md b/docs/bactopia-tools/quast.md deleted file mode 100644 index 0325d46c..00000000 --- a/docs/bactopia-tools/quast.md +++ /dev/null @@ -1,249 +0,0 @@ ---- -title: quast -description: A Bactopia Tool which uses QUAST to assess the quality of assembled contigs. - ---- -# Bactopia Tool - `quast` -The `quast` module uses [QUAST](https://github.com/ablab/quast) to assess the quality of -assembled contigs. To assist assessment, QUAST produces a number of plots and tables that -can be evaluated. - - -## Example Usage -``` -bactopia --wf quast \ - --bactopia /path/to/your/bactopia/results -``` - -## Output Overview - -Below is the default output structure for the `quast` tool. Where possible the -file descriptions below were modified from a tools description. - -```bash - -├── -│ └── tools -│ └── quast -│ ├── .tsv -│ ├── basic_stats/ -│ ├── icarus.html -│ ├── icarus_viewers -│ │ └── contig_size_viewer.html -│ ├── logs -│ │ ├── nf-quast.{begin,err,log,out,run,sh,trace} -│ │ ├── quast.log -│ │ └── versions.yml -│ ├── predicted_genes -│ │ ├── .stderr -│ │ └── _glimmer_genes.gff -│ ├── report.{html|pdf|tex|tsv|txt} -│ └── transposed_report.{tex|tsv|txt} -└── bactopia-runs - └── quast- - ├── merged-results - │ ├── logs - │ │ └── quast-concat - │ │ ├── nf-merged-results.{begin,err,log,out,run,sh,trace} - │ │ └── versions.yml - │ └── quast.tsv - └── nf-reports - ├── quast-dag.dot - ├── quast-report.html - ├── quast-timeline.html - └── quast-trace.txt - -``` - - - -### Results - -#### Merged Results - -Below are results that are concatenated into a single file. - - -| Filename | Description | -|-------------------------------|-------------| -| <SAMPLE_NAME>.tsv | A summary of the quast results for all samples | - - -#### QUAST - -Below is a description of the _per-sample_ results from [QUAST](https://github.com/ablab/quast). - - -| Filename | Description | -|-------------------------------|-------------| -| basic_stats/ | A folder with plots of assembly metrics (e.g. GC content, NGx, Nx) | -| icarus.html | Icarus main menu with links to interactive viewers. | -| icarus_viewers/ | Additional reports for Icarus | -| predicted_genes/ | Predicted gene output | -| quast.log | Detailed information about the QUAST run | -| report.{html\|pdf\|tex\|tsv\|txt} | Assessment summary including all tables and plots | -| transposed_report.{tex\|tsv\|txt} | Transposed version of the assessment summary | - - - - - -### Audit Trail - -Below are files that can assist you in understanding which parameters and program versions were used. - -#### Logs - -Each process that is executed will have a folder named `logs`. In this folder are helpful -files for you to review if the need ever arises. - -| Extension | Description | -|--------------|-------------| -| .begin | An empty file used to designate the process started | -| .err | Contains STDERR outputs from the process | -| .log | Contains both STDERR and STDOUT outputs from the process | -| .out | Contains STDOUT outputs from the process | -| .run | The script Nextflow uses to stage/unstage files and queue processes based on given profile | -| .sh | The script executed by bash for the process | -| .trace | The Nextflow [Trace](https://www.nextflow.io/docs/latest/tracing.html#trace-report) report for the process | -| versions.yml | A YAML formatted file with program versions | - -#### Nextflow Reports - -These Nextflow reports provide great a great summary of your run. These can be used to optimize -resource usage and estimate expected costs if using cloud platforms. - -| Filename | Description | -|----------|-------------| -| quast-dag.dot | The Nextflow [DAG visualisation](https://www.nextflow.io/docs/latest/tracing.html#dag-visualisation) | -| quast-report.html | The Nextflow [Execution Report](https://www.nextflow.io/docs/latest/tracing.html#execution-report) | -| quast-timeline.html | The Nextflow [Timeline Report](https://www.nextflow.io/docs/latest/tracing.html#timeline-report) | -| quast-trace.txt | The Nextflow [Trace](https://www.nextflow.io/docs/latest/tracing.html#trace-report) report | - - -#### Program Versions - -At the end of each run, each of the `versions.yml` files are merged into the files below. - -| Filename | Description | -|---------------------------|-------------| -| software_versions.yml | A complete list of programs and versions used by each process | -| software_versions_mqc.yml | A complete list of programs and versions formatted for [MultiQC](https://multiqc.info/) | - -## Parameters - - -### Required Parameters -Define where the pipeline should find input data and save output data. - -| Parameter | Description | -|:---|---| -| ` --bactopia` | The path to bactopia results to use as inputs
**Type:** `string` | - -### Filtering Parameters -Use these parameters to specify which samples to include or exclude. - -| Parameter | Description | -|:---|---| -| ` --include` | A text file containing sample names (one per line) to include from the analysis
**Type:** `string` | -| ` --exclude` | A text file containing sample names (one per line) to exclude from the analysis
**Type:** `string` | - - -### Quast Parameters - - -| Parameter | Description | -|:---|---| -| ` --contig_thresholds` | Comma-separated list of contig length thresholds
**Type:** `string`, **Default:** `0,1000,10000,100000,250000,1000000` | -| ` --plots_format` | Save plots in specified format
**Type:** `string`, **Default:** `pdf` | - - -### Optional Parameters -These optional parameters can be useful in certain settings. - -| Parameter | Description | -|:---|---| -| ` --outdir` | Base directory to write results to
**Type:** `string`, **Default:** `bactopia` | -| ` --skip_compression` | Ouput files will not be compressed
**Type:** `boolean` | -| ` --datasets` | The path to cache datasets to
**Type:** `string` | -| ` --keep_all_files` | Keeps all analysis files created
**Type:** `boolean` | - -### Max Job Request Parameters -Set the top limit for requested resources for any single job. - -| Parameter | Description | -|:---|---| -| ` --max_retry` | Maximum times to retry a process before allowing it to fail.
**Type:** `integer`, **Default:** `3` | -| ` --max_cpus` | Maximum number of CPUs that can be requested for any single job.
**Type:** `integer`, **Default:** `4` | -| ` --max_memory` | Maximum amount of memory that can be requested for any single job.
**Type:** `string`, **Default:** `128.GB` | -| ` --max_time` | Maximum amount of time that can be requested for any single job.
**Type:** `string`, **Default:** `240.h` | -| ` --max_downloads` | Maximum number of samples to download at a time
**Type:** `integer`, **Default:** `3` | - -### Nextflow Configuration Parameters -Parameters to fine-tune your Nextflow setup. - -| Parameter | Description | -|:---|---| -| ` --nfconfig` | A Nextflow compatible config file for custom profiles, loaded last and will overwrite existing variables if set.
**Type:** `string` | -| ` --publish_dir_mode` | Method used to save pipeline results to output directory.
**Type:** `string`, **Default:** `copy` | -| ` --infodir` | Directory to keep pipeline Nextflow logs and reports.
**Type:** `string`, **Default:** `${params.outdir}/pipeline_info` | -| ` --force` | Nextflow will overwrite existing output files.
**Type:** `boolean` | -| ` --cleanup_workdir` | After Bactopia is successfully executed, the `work` directory will be deleted.
**Type:** `boolean` | - -### Institutional config options -Parameters used to describe centralized config profiles. These should not be edited. - -| Parameter | Description | -|:---|---| -| ` --custom_config_version` | Git commit id for Institutional configs.
**Type:** `string`, **Default:** `master` | -| ` --custom_config_base` | Base directory for Institutional configs.
**Type:** `string`, **Default:** `https://raw.githubusercontent.com/nf-core/configs/master` | -| ` --config_profile_name` | Institutional config name.
**Type:** `string` | -| ` --config_profile_description` | Institutional config description.
**Type:** `string` | -| ` --config_profile_contact` | Institutional config contact information.
**Type:** `string` | -| ` --config_profile_url` | Institutional config URL link.
**Type:** `string` | - -### Nextflow Profile Parameters -Parameters to fine-tune your Nextflow setup. - -| Parameter | Description | -|:---|---| -| ` --condadir` | Directory to Nextflow should use for Conda environments
**Type:** `string` | -| ` --registry` | Docker registry to pull containers from.
**Type:** `string`, **Default:** `dockerhub` | -| ` --datasets_cache` | Directory where downloaded datasets should be stored.
**Type:** `string`, **Default:** `/data/datasets` | -| ` --singularity_cache_dir` | Directory where remote Singularity images are stored.
**Type:** `string` | -| ` --singularity_pull_docker_container` | Instead of directly downloading Singularity images for use with Singularity, force the workflow to pull and convert Docker containers instead.
**Type:** `boolean` | -| ` --force_rebuild` | Force overwrite of existing pre-built environments.
**Type:** `boolean` | -| ` --queue` | Comma-separated name of the queue(s) to be used by a job scheduler (e.g. AWS Batch or SLURM)
**Type:** `string`, **Default:** `general,high-memory` | -| ` --cluster_opts` | Additional options to pass to the executor. (e.g. SLURM: '--account=my_acct_name'
**Type:** `string` | -| ` --container_opts` | Additional options to pass to Apptainer, Docker, or Singularityu. (e.g. Singularity: '-D `pwd`'
**Type:** `string` | -| ` --disable_scratch` | All intermediate files created on worker nodes of will be transferred to the head node.
**Type:** `boolean` | - -### Helpful Parameters -Uncommonly used parameters that might be useful. - -| Parameter | Description | -|:---|---| -| ` --monochrome_logs` | Do not use coloured log outputs.
**Type:** `boolean` | -| ` --nfdir` | Print directory Nextflow has pulled Bactopia to
**Type:** `boolean` | -| ` --sleep_time` | The amount of time (seconds) Nextflow will wait after setting up datasets before execution.
**Type:** `integer`, **Default:** `5` | -| ` --validate_params` | Boolean whether to validate parameters against the schema at runtime
**Type:** `boolean`, **Default:** `True` | -| ` --help` | Display help text.
**Type:** `boolean` | -| ` --wf` | Specify which workflow or Bactopia Tool to execute
**Type:** `string`, **Default:** `bactopia` | -| ` --list_wfs` | List the available workflows and Bactopia Tools to use with '--wf'
**Type:** `boolean` | -| ` --show_hidden_params` | Show all params when using `--help`
**Type:** `boolean` | -| ` --help_all` | An alias for --help --show_hidden_params
**Type:** `boolean` | -| ` --version` | Display version text.
**Type:** `boolean` | - -## Citations -If you use Bactopia and `quast` in your analysis, please cite the following. - -- [Bactopia](https://bactopia.github.io/) - Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) - - -- [csvtk](https://bioinf.shenwei.me/csvtk/) - Shen, W [csvtk: A cross-platform, efficient and practical CSV/TSV toolkit in Golang.](https://github.com/shenwei356/csvtk/) (GitHub) - -- [QUAST](http://quast.sourceforge.net/) - Gurevich A, Saveliev V, Vyahhi N, Tesler G [QUAST: quality assessment tool for genome assemblies.](http://dx.doi.org/10.1093/bioinformatics/btt086) _Bioinformatics_ 29, 1072–1075 (2013) - diff --git a/docs/bactopia-tools/rgi.md b/docs/bactopia-tools/rgi.md deleted file mode 100644 index bee463a8..00000000 --- a/docs/bactopia-tools/rgi.md +++ /dev/null @@ -1,243 +0,0 @@ ---- -title: rgi -description: A Bactopia Tool which uses Resistance Gene Identifier (RGI) to identify antibiotic resistance genes in assemblies. ---- -# Bactopia Tool - `rgi` -The `rgi` module uses [Resistance Gene Identifier (RGI)](https://github.com/arpcard/rgi) to identify antibiotic -resistance genes in assemblies. - - -## Example Usage -``` -bactopia --wf rgi \ - --bactopia /path/to/your/bactopia/results -``` - -## Output Overview - -Below is the default output structure for the `rgi` tool. Where possible the -file descriptions below were modified from a tools description. - -```bash - -├── -│ └── tools -│ └── rgi -│ ├── .json -│ ├── .txt -│ └── logs -│ ├── nf-rgi.{begin,err,log,out,run,sh,trace} -│ └── versions.yml -└── bactopia-runs - └── rgi- - ├── merged-results - │ ├── logs - │ │ ├── rgi-concat - │ │ │ ├── nf-merged-results.{begin,err,log,out,run,sh,trace} - │ │ │ └── versions.yml - │ │ └── rgi-heatmap - │ │ ├── nf-merged-results.{begin,err,log,out,run,sh,trace} - │ │ └── versions.yml - │ ├── rgi-2.{csv,eps,png} - │ └── rgi.tsv - └── nf-reports - ├── rgi-dag.dot - ├── rgi-report.html - ├── rgi-timeline.html - └── rgi-trace.txt - -``` - - - -### Results - -#### Merged Results - -Below are results that are concatenated into a single file. - - -| Filename | Description | -|-------------------------------|-------------| -| rgi.tsv | A merged TSV file with `rgi` results from all samples | -| rgi-2.{csv,eps,png} | Heatmap representations as text and images, see [RGI - Heatmap](https://github.com/arpcard/rgi#generating-heat-maps-of-rgi-main-results) for more details | - - -#### rgi - -Below is a description of the _per-sample_ results from [RGI](https://github.com/arpcard/rgi). - - -| Filename | Description | -|-------------------------------|-------------| -| <SAMPLE_NAME>.json | A JSON file with `rgi` results | -| <SAMPLE_NAME>.txt | A tab-delimited file with `egi` results, see [RGI - Output Details](https://github.com/arpcard/rgi#rgi-main-tab-delimited-output-details) for more details | - - - - - -### Audit Trail - -Below are files that can assist you in understanding which parameters and program versions were used. - -#### Logs - -Each process that is executed will have a folder named `logs`. In this folder are helpful -files for you to review if the need ever arises. - -| Extension | Description | -|--------------|-------------| -| .begin | An empty file used to designate the process started | -| .err | Contains STDERR outputs from the process | -| .log | Contains both STDERR and STDOUT outputs from the process | -| .out | Contains STDOUT outputs from the process | -| .run | The script Nextflow uses to stage/unstage files and queue processes based on given profile | -| .sh | The script executed by bash for the process | -| .trace | The Nextflow [Trace](https://www.nextflow.io/docs/latest/tracing.html#trace-report) report for the process | -| versions.yml | A YAML formatted file with program versions | - -#### Nextflow Reports - -These Nextflow reports provide great a great summary of your run. These can be used to optimize -resource usage and estimate expected costs if using cloud platforms. - -| Filename | Description | -|----------|-------------| -| rgi-dag.dot | The Nextflow [DAG visualisation](https://www.nextflow.io/docs/latest/tracing.html#dag-visualisation) | -| rgi-report.html | The Nextflow [Execution Report](https://www.nextflow.io/docs/latest/tracing.html#execution-report) | -| rgi-timeline.html | The Nextflow [Timeline Report](https://www.nextflow.io/docs/latest/tracing.html#timeline-report) | -| rgi-trace.txt | The Nextflow [Trace](https://www.nextflow.io/docs/latest/tracing.html#trace-report) report | - - -#### Program Versions - -At the end of each run, each of the `versions.yml` files are merged into the files below. - -| Filename | Description | -|---------------------------|-------------| -| software_versions.yml | A complete list of programs and versions used by each process | -| software_versions_mqc.yml | A complete list of programs and versions formatted for [MultiQC](https://multiqc.info/) | - -## Parameters - - -### Required Parameters -Define where the pipeline should find input data and save output data. - -| Parameter | Description | -|:---|---| -| ` --bactopia` | The path to bactopia results to use as inputs
**Type:** `string` | - -### Filtering Parameters -Use these parameters to specify which samples to include or exclude. - -| Parameter | Description | -|:---|---| -| ` --include` | A text file containing sample names (one per line) to include from the analysis
**Type:** `string` | -| ` --exclude` | A text file containing sample names (one per line) to exclude from the analysis
**Type:** `string` | - - -### RGI Parameters - - -| Parameter | Description | -|:---|---| -| ` --use_diamond` | Use DIAMOND for alignments instead of BLAST
**Type:** `boolean` | -| ` --include_loose` | Include loose hits in addition to strict and perfect hits
**Type:** `boolean` | -| ` --exclude_nudge` | Exclude hits nudged from loose to strict hits
**Type:** `boolean` | -| ` --rgi_frequency` | Represent samples based on resistance profile
**Type:** `boolean` | -| ` --rgi_category` | Organize resistance genes based on a category
**Type:** `string` | -| ` --rgi_cluster` | Use SciPy's hiearchical clustering algorithm to cluster rows (AMR genes) or columns (samples)
**Type:** `string` | -| ` --rgi_display` | Specify display options for categories
**Type:** `string`, **Default:** `plain` | - - -### Optional Parameters -These optional parameters can be useful in certain settings. - -| Parameter | Description | -|:---|---| -| ` --outdir` | Base directory to write results to
**Type:** `string`, **Default:** `bactopia` | -| ` --skip_compression` | Ouput files will not be compressed
**Type:** `boolean` | -| ` --datasets` | The path to cache datasets to
**Type:** `string` | -| ` --keep_all_files` | Keeps all analysis files created
**Type:** `boolean` | - -### Max Job Request Parameters -Set the top limit for requested resources for any single job. - -| Parameter | Description | -|:---|---| -| ` --max_retry` | Maximum times to retry a process before allowing it to fail.
**Type:** `integer`, **Default:** `3` | -| ` --max_cpus` | Maximum number of CPUs that can be requested for any single job.
**Type:** `integer`, **Default:** `4` | -| ` --max_memory` | Maximum amount of memory that can be requested for any single job.
**Type:** `string`, **Default:** `128.GB` | -| ` --max_time` | Maximum amount of time that can be requested for any single job.
**Type:** `string`, **Default:** `240.h` | -| ` --max_downloads` | Maximum number of samples to download at a time
**Type:** `integer`, **Default:** `3` | - -### Nextflow Configuration Parameters -Parameters to fine-tune your Nextflow setup. - -| Parameter | Description | -|:---|---| -| ` --nfconfig` | A Nextflow compatible config file for custom profiles, loaded last and will overwrite existing variables if set.
**Type:** `string` | -| ` --publish_dir_mode` | Method used to save pipeline results to output directory.
**Type:** `string`, **Default:** `copy` | -| ` --infodir` | Directory to keep pipeline Nextflow logs and reports.
**Type:** `string`, **Default:** `${params.outdir}/pipeline_info` | -| ` --force` | Nextflow will overwrite existing output files.
**Type:** `boolean` | -| ` --cleanup_workdir` | After Bactopia is successfully executed, the `work` directory will be deleted.
**Type:** `boolean` | - -### Institutional config options -Parameters used to describe centralized config profiles. These should not be edited. - -| Parameter | Description | -|:---|---| -| ` --custom_config_version` | Git commit id for Institutional configs.
**Type:** `string`, **Default:** `master` | -| ` --custom_config_base` | Base directory for Institutional configs.
**Type:** `string`, **Default:** `https://raw.githubusercontent.com/nf-core/configs/master` | -| ` --config_profile_name` | Institutional config name.
**Type:** `string` | -| ` --config_profile_description` | Institutional config description.
**Type:** `string` | -| ` --config_profile_contact` | Institutional config contact information.
**Type:** `string` | -| ` --config_profile_url` | Institutional config URL link.
**Type:** `string` | - -### Nextflow Profile Parameters -Parameters to fine-tune your Nextflow setup. - -| Parameter | Description | -|:---|---| -| ` --condadir` | Directory to Nextflow should use for Conda environments
**Type:** `string` | -| ` --registry` | Docker registry to pull containers from.
**Type:** `string`, **Default:** `dockerhub` | -| ` --datasets_cache` | Directory where downloaded datasets should be stored.
**Type:** `string`, **Default:** `/data/datasets` | -| ` --singularity_cache_dir` | Directory where remote Singularity images are stored.
**Type:** `string` | -| ` --singularity_pull_docker_container` | Instead of directly downloading Singularity images for use with Singularity, force the workflow to pull and convert Docker containers instead.
**Type:** `boolean` | -| ` --force_rebuild` | Force overwrite of existing pre-built environments.
**Type:** `boolean` | -| ` --queue` | Comma-separated name of the queue(s) to be used by a job scheduler (e.g. AWS Batch or SLURM)
**Type:** `string`, **Default:** `general,high-memory` | -| ` --cluster_opts` | Additional options to pass to the executor. (e.g. SLURM: '--account=my_acct_name'
**Type:** `string` | -| ` --container_opts` | Additional options to pass to Apptainer, Docker, or Singularityu. (e.g. Singularity: '-D `pwd`'
**Type:** `string` | -| ` --disable_scratch` | All intermediate files created on worker nodes of will be transferred to the head node.
**Type:** `boolean` | - -### Helpful Parameters -Uncommonly used parameters that might be useful. - -| Parameter | Description | -|:---|---| -| ` --monochrome_logs` | Do not use coloured log outputs.
**Type:** `boolean` | -| ` --nfdir` | Print directory Nextflow has pulled Bactopia to
**Type:** `boolean` | -| ` --sleep_time` | The amount of time (seconds) Nextflow will wait after setting up datasets before execution.
**Type:** `integer`, **Default:** `5` | -| ` --validate_params` | Boolean whether to validate parameters against the schema at runtime
**Type:** `boolean`, **Default:** `True` | -| ` --help` | Display help text.
**Type:** `boolean` | -| ` --wf` | Specify which workflow or Bactopia Tool to execute
**Type:** `string`, **Default:** `bactopia` | -| ` --list_wfs` | List the available workflows and Bactopia Tools to use with '--wf'
**Type:** `boolean` | -| ` --show_hidden_params` | Show all params when using `--help`
**Type:** `boolean` | -| ` --help_all` | An alias for --help --show_hidden_params
**Type:** `boolean` | -| ` --version` | Display version text.
**Type:** `boolean` | - -## Citations -If you use Bactopia and `rgi` in your analysis, please cite the following. - -- [Bactopia](https://bactopia.github.io/) - Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) - - -- [csvtk](https://bioinf.shenwei.me/csvtk/) - Shen, W [csvtk: A cross-platform, efficient and practical CSV/TSV toolkit in Golang.](https://github.com/shenwei356/csvtk/) (GitHub) - -- [Resistance Gene Identifier (RGI)](https://github.com/arpcard/rgi) - Alcock BP, Raphenya AR, Lau TTY, Tsang KK, Bouchard M, Edalatmand A, Huynh W, Nguyen A-L V, Cheng AA, Liu S, Min SY, Miroshnichenko A, Tran H-K, Werfalli RE, Nasir JA, Oloni M, Speicher DJ, Florescu A, Singh B, Faltyn M, Hernandez-Koutoucheva A, Sharma AN, Bordeleau E, Pawlowski AC, Zubyk HL, Dooley D, Griffiths E, Maguire F, Winsor GL, Beiko RG, Brinkman FSL, Hsiao WWL, Domselaar GV, McArthur AG [CARD 2020: antibiotic resistome surveillance with the comprehensive antibiotic resistance database.](https://doi.org/10.1093/nar/gkz935) _Nucleic acids research_ 48.D1, D517-D525 (2020) - diff --git a/docs/bactopia-tools/roary.md b/docs/bactopia-tools/roary.md deleted file mode 100644 index 0035c3d9..00000000 --- a/docs/bactopia-tools/roary.md +++ /dev/null @@ -1,124 +0,0 @@ ---- -tags: ---- - - - -# Bactopia Tool - `roary` -The `roary` module uses [Roary](https://github.com/sanger-pathogens/Roary) to create a pan-genome of -your samples. - - -## Example Usage -``` -bactopia --wf roary \ - --bactopia /path/to/your/bactopia/results \ - --include includes.txt -``` - -## Parameters - - -### Required Parameters -Define where the pipeline should find input data and save output data. - -| Parameter | Description | Default | -|---|---|---| -| `--bactopia` | The path to bactopia results to use as inputs | | - -### Filtering Parameters -Use these parameters to specify which samples to include or exclude. - -| Parameter | Description | Default | -|---|---|---| -| `--include` | A text file containing sample names (one per line) to include from the analysis | | -| `--exclude` | A text file containing sample names (one per line) to exclude from the analysis | | - - -### Roary Parameters - - -| Parameter | Description | Default | -|---|---|---| -| `--use_prank` | Use PRANK instead of MAFFT for core gene | False | -| `--use_roary` | Use Roary instead of PIRATE in the 'pangenome' subworkflow | False | -| `--i` | Minimum percentage identity for blastp | 95 | -| `--cd` | Percentage of isolates a gene must be in to be core | 99 | -| `--g` | Maximum number of clusters | 50000 | -| `--s` | Do not split paralogs | False | -| `--ap` | Allow paralogs in core alignment | False | -| `--iv` | MCL inflation value | 1.5 | - - -### Optional Parameters -These optional parameters can be useful in certain settings. - -| Parameter | Description | Default | -|---|---|---| -| `--outdir` | Base directory to write results to | ./ | -| `--run_name` | Name of the directory to hold results | bactopia | -| `--skip_compression` | Ouput files will not be compressed | False | -| `--keep_all_files` | Keeps all analysis files created | False | - -### Max Job Request Parameters -Set the top limit for requested resources for any single job. - -| Parameter | Description | Default | -|---|---|---| -| `--max_retry` | Maximum times to retry a process before allowing it to fail. | 3 | -| `--max_cpus` | Maximum number of CPUs that can be requested for any single job. | 4 | -| `--max_memory` | Maximum amount of memory (in GB) that can be requested for any single job. | 32 | -| `--max_time` | Maximum amount of time (in minutes) that can be requested for any single job. | 120 | -| `--max_downloads` | Maximum number of samples to download at a time | 3 | - -### Nextflow Configuration Parameters -Parameters to fine-tune your Nextflow setup. - -| Parameter | Description | Default | -|---|---|---| -| `--nfconfig` | A Nextflow compatible config file for custom profiles, loaded last and will overwrite existing variables if set. | | -| `--publish_dir_mode` | Method used to save pipeline results to output directory. | copy | -| `--infodir` | Directory to keep pipeline Nextflow logs and reports. | ${params.outdir}/pipeline_info | -| `--force` | Nextflow will overwrite existing output files. | False | -| `--cleanup_workdir` | After Bactopia is successfully executed, the `work` directory will be deleted. | False | - -### Nextflow Profile Parameters -Parameters to fine-tune your Nextflow setup. - -| Parameter | Description | Default | -|---|---|---| -| `--condadir` | Directory to Nextflow should use for Conda environments | | -| `--registry` | Docker registry to pull containers from. | dockerhub | -| `--singularity_cache` | Directory where remote Singularity images are stored. | | -| `--singularity_pull_docker_container` | Instead of directly downloading Singularity images for use with Singularity, force the workflow to pull and convert Docker containers instead. | | -| `--force_rebuild` | Force overwrite of existing pre-built environments. | False | -| `--queue` | Comma-separated name of the queue(s) to be used by a job scheduler (e.g. AWS Batch or SLURM) | general,high-memory | -| `--cluster_opts` | Additional options to pass to the executor. (e.g. SLURM: '--account=my_acct_name' | | -| `--disable_scratch` | All intermediate files created on worker nodes of will be transferred to the head node. | False | - -### Helpful Parameters -Uncommonly used parameters that might be useful. - -| Parameter | Description | Default | -|---|---|---| -| `--monochrome_logs` | Do not use coloured log outputs. | | -| `--nfdir` | Print directory Nextflow has pulled Bactopia to | | -| `--sleep_time` | The amount of time (seconds) Nextflow will wait after setting up datasets before execution. | 5 | -| `--validate_params` | Boolean whether to validate parameters against the schema at runtime | True | -| `--help` | Display help text. | | -| `--wf` | Specify which workflow or Bactopia Tool to execute | bactopia | -| `--list_wfs` | List the available workflows and Bactopia Tools to use with '--wf' | | -| `--show_hidden_params` | Show all params when using `--help` | | -| `--help_all` | An alias for --help --show_hidden_params | | -| `--version` | Display version text. | | - -## Citations -If you use Bactopia and `roary` in your analysis, please cite the following. - -- [Bactopia](https://bactopia.github.io/) - Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) - - -- [Roary](https://github.com/sanger-pathogens/Roary) - Page AJ, Cummins CA, Hunt M, Wong VK, Reuter S, Holden MTG, Fookes M, Falush D, Keane JA, Parkhill J [Roary: rapid large-scale prokaryote pan genome analysis.](https://doi.org/10.1093/bioinformatics/btv421) _Bioinformatics_ 31, 3691–3693 (2015) - diff --git a/docs/bactopia-tools/sccmec.md b/docs/bactopia-tools/sccmec.md deleted file mode 100644 index 934f1bbf..00000000 --- a/docs/bactopia-tools/sccmec.md +++ /dev/null @@ -1,249 +0,0 @@ ---- -title: sccmec -description: A Bactopia Tool which uses sccmec for typing SCCmec cassettes in _Staphylococcus aureus_ assemblies. ---- -# Bactopia Tool - `sccmec` -The `sccmec` module uses [sccmec](https://github.com/rpetit3/sccmec) for typing -SCCmec cassettes in _Staphylococcus aureus_ assemblies. - - -## Example Usage -``` -bactopia --wf sccmec \ - --bactopia /path/to/your/bactopia/results -``` - -## Output Overview - -Below is the default output structure for the `sccmec` tool. Where possible the -file descriptions below were modified from a tools description. - -```bash - -├── -│ └── tools -│ └── sccmec - │ ├── .targets.blastn.tsv - │ ├── .targets.details.tsv - │ ├── .regions.blastn.tsv - │ ├── .regions.details.tsv -│ ├── .tsv -│ └── logs -│ ├── nf-sccmec.{begin,err,log,out,run,sh,trace} -│ └── versions.yml -└── bactopia-runs - └── sccmec- - ├── merged-results - │ ├── logs - │ │ └── sccmec-concat - │ │ ├── nf-merged-results.{begin,err,log,out,run,sh,trace} - │ │ └── versions.yml - │ └── sccmec.tsv - └── nf-reports - ├── sccmec-dag.dot - ├── sccmec-report.html - ├── sccmec-timeline.html - └── sccmec-trace.txt - -``` - -:::info[Directory structure might be different] - -`sccmec` is available as a standalone Bactopia Tool, as well as from -the main Bactopia workflow (e.g. through Staphopia or Merlin). If executed -from Bactopia, the `sccmec` directory structure might be different, but the -output descriptions below still apply. -::: - - - -### Results - -#### Merged Results - -Below are results that are concatenated into a single file. - - -| Filename | Description | -|-------------------------------|-------------| -| sccmec.tsv | A merged TSV file with `sccmec` results from all samples | - - -#### sccmec - -Below is a description of the _per-sample_ results from [sccmec](https://github.com/rpetit3/sccmec). - - -| Extension | Description | -|-------------------------------|-------------| -| .tsv | A tab-delimited file with the predicted type | -| .targets.blastn.tsv | A tab-delimited file of all target-specific blast hits | -| .targets.details.tsv | A tab-delimited file with details for each type based on targets | -| .regions.blastn.tsv | A tab-delimited file of all full cassette blast hits | -| .regions.details.tsv | A tab-delimited file with details for each type based on full cassettes | - - - - - -### Audit Trail - -Below are files that can assist you in understanding which parameters and program versions were used. - -#### Logs - -Each process that is executed will have a folder named `logs`. In this folder are helpful -files for you to review if the need ever arises. - -| Extension | Description | -|--------------|-------------| -| .begin | An empty file used to designate the process started | -| .err | Contains STDERR outputs from the process | -| .log | Contains both STDERR and STDOUT outputs from the process | -| .out | Contains STDOUT outputs from the process | -| .run | The script Nextflow uses to stage/unstage files and queue processes based on given profile | -| .sh | The script executed by bash for the process | -| .trace | The Nextflow [Trace](https://www.nextflow.io/docs/latest/tracing.html#trace-report) report for the process | -| versions.yml | A YAML formatted file with program versions | - -#### Nextflow Reports - -These Nextflow reports provide great a great summary of your run. These can be used to optimize -resource usage and estimate expected costs if using cloud platforms. - -| Filename | Description | -|----------|-------------| -| sccmec-dag.dot | The Nextflow [DAG visualisation](https://www.nextflow.io/docs/latest/tracing.html#dag-visualisation) | -| sccmec-report.html | The Nextflow [Execution Report](https://www.nextflow.io/docs/latest/tracing.html#execution-report) | -| sccmec-timeline.html | The Nextflow [Timeline Report](https://www.nextflow.io/docs/latest/tracing.html#timeline-report) | -| sccmec-trace.txt | The Nextflow [Trace](https://www.nextflow.io/docs/latest/tracing.html#trace-report) report | - - -#### Program Versions - -At the end of each run, each of the `versions.yml` files are merged into the files below. - -| Filename | Description | -|---------------------------|-------------| -| software_versions.yml | A complete list of programs and versions used by each process | -| software_versions_mqc.yml | A complete list of programs and versions formatted for [MultiQC](https://multiqc.info/) | - -## Parameters - - -### Required Parameters -Define where the pipeline should find input data and save output data. - -| Parameter | Description | -|:---|---| -| ` --bactopia` | The path to bactopia results to use as inputs
**Type:** `string` | - -### Filtering Parameters -Use these parameters to specify which samples to include or exclude. - -| Parameter | Description | -|:---|---| -| ` --include` | A text file containing sample names (one per line) to include from the analysis
**Type:** `string` | -| ` --exclude` | A text file containing sample names (one per line) to exclude from the analysis
**Type:** `string` | - - -### sccmec Parameters - - -| Parameter | Description | -|:---|---| -| ` --sccmec_min_targets_pident` | Minimum percent identity to count a target hit
**Type:** `integer`, **Default:** `90` | -| ` --sccmec_min_targets_coverage` | Minimum percent coverage to count a target hit
**Type:** `integer`, **Default:** `80` | -| ` --sccmec_min_regions_pident` | Minimum percent identity to count a region hit
**Type:** `integer`, **Default:** `85` | -| ` --sccmec_min_regions_coverage` | Minimum percent coverage to count a region hit
**Type:** `integer`, **Default:** `93` | - - -### Optional Parameters -These optional parameters can be useful in certain settings. - -| Parameter | Description | -|:---|---| -| ` --outdir` | Base directory to write results to
**Type:** `string`, **Default:** `bactopia` | -| ` --skip_compression` | Ouput files will not be compressed
**Type:** `boolean` | -| ` --datasets` | The path to cache datasets to
**Type:** `string` | -| ` --keep_all_files` | Keeps all analysis files created
**Type:** `boolean` | - -### Max Job Request Parameters -Set the top limit for requested resources for any single job. - -| Parameter | Description | -|:---|---| -| ` --max_retry` | Maximum times to retry a process before allowing it to fail.
**Type:** `integer`, **Default:** `3` | -| ` --max_cpus` | Maximum number of CPUs that can be requested for any single job.
**Type:** `integer`, **Default:** `4` | -| ` --max_memory` | Maximum amount of memory that can be requested for any single job.
**Type:** `string`, **Default:** `128.GB` | -| ` --max_time` | Maximum amount of time that can be requested for any single job.
**Type:** `string`, **Default:** `240.h` | -| ` --max_downloads` | Maximum number of samples to download at a time
**Type:** `integer`, **Default:** `3` | - -### Nextflow Configuration Parameters -Parameters to fine-tune your Nextflow setup. - -| Parameter | Description | -|:---|---| -| ` --nfconfig` | A Nextflow compatible config file for custom profiles, loaded last and will overwrite existing variables if set.
**Type:** `string` | -| ` --publish_dir_mode` | Method used to save pipeline results to output directory.
**Type:** `string`, **Default:** `copy` | -| ` --infodir` | Directory to keep pipeline Nextflow logs and reports.
**Type:** `string`, **Default:** `${params.outdir}/pipeline_info` | -| ` --force` | Nextflow will overwrite existing output files.
**Type:** `boolean` | -| ` --cleanup_workdir` | After Bactopia is successfully executed, the `work` directory will be deleted.
**Type:** `boolean` | - -### Institutional config options -Parameters used to describe centralized config profiles. These should not be edited. - -| Parameter | Description | -|:---|---| -| ` --custom_config_version` | Git commit id for Institutional configs.
**Type:** `string`, **Default:** `master` | -| ` --custom_config_base` | Base directory for Institutional configs.
**Type:** `string`, **Default:** `https://raw.githubusercontent.com/nf-core/configs/master` | -| ` --config_profile_name` | Institutional config name.
**Type:** `string` | -| ` --config_profile_description` | Institutional config description.
**Type:** `string` | -| ` --config_profile_contact` | Institutional config contact information.
**Type:** `string` | -| ` --config_profile_url` | Institutional config URL link.
**Type:** `string` | - -### Nextflow Profile Parameters -Parameters to fine-tune your Nextflow setup. - -| Parameter | Description | -|:---|---| -| ` --condadir` | Directory to Nextflow should use for Conda environments
**Type:** `string` | -| ` --registry` | Docker registry to pull containers from.
**Type:** `string`, **Default:** `dockerhub` | -| ` --datasets_cache` | Directory where downloaded datasets should be stored.
**Type:** `string`, **Default:** `/data/datasets` | -| ` --singularity_cache_dir` | Directory where remote Singularity images are stored.
**Type:** `string` | -| ` --singularity_pull_docker_container` | Instead of directly downloading Singularity images for use with Singularity, force the workflow to pull and convert Docker containers instead.
**Type:** `boolean` | -| ` --force_rebuild` | Force overwrite of existing pre-built environments.
**Type:** `boolean` | -| ` --queue` | Comma-separated name of the queue(s) to be used by a job scheduler (e.g. AWS Batch or SLURM)
**Type:** `string`, **Default:** `general,high-memory` | -| ` --cluster_opts` | Additional options to pass to the executor. (e.g. SLURM: '--account=my_acct_name'
**Type:** `string` | -| ` --container_opts` | Additional options to pass to Apptainer, Docker, or Singularityu. (e.g. Singularity: '-D `pwd`'
**Type:** `string` | -| ` --disable_scratch` | All intermediate files created on worker nodes of will be transferred to the head node.
**Type:** `boolean` | - -### Helpful Parameters -Uncommonly used parameters that might be useful. - -| Parameter | Description | -|:---|---| -| ` --monochrome_logs` | Do not use coloured log outputs.
**Type:** `boolean` | -| ` --nfdir` | Print directory Nextflow has pulled Bactopia to
**Type:** `boolean` | -| ` --sleep_time` | The amount of time (seconds) Nextflow will wait after setting up datasets before execution.
**Type:** `integer`, **Default:** `5` | -| ` --validate_params` | Boolean whether to validate parameters against the schema at runtime
**Type:** `boolean`, **Default:** `True` | -| ` --help` | Display help text.
**Type:** `boolean` | -| ` --wf` | Specify which workflow or Bactopia Tool to execute
**Type:** `string`, **Default:** `bactopia` | -| ` --list_wfs` | List the available workflows and Bactopia Tools to use with '--wf'
**Type:** `boolean` | -| ` --show_hidden_params` | Show all params when using `--help`
**Type:** `boolean` | -| ` --help_all` | An alias for --help --show_hidden_params
**Type:** `boolean` | -| ` --version` | Display version text.
**Type:** `boolean` | - -## Citations -If you use Bactopia and `sccmec` in your analysis, please cite the following. - -- [Bactopia](https://bactopia.github.io/) - Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) - - -- [csvtk](https://bioinf.shenwei.me/csvtk/) - Shen, W [csvtk: A cross-platform, efficient and practical CSV/TSV toolkit in Golang.](https://github.com/shenwei356/csvtk/) (GitHub) - -- [sccmec](https://github.com/rpetit3/sccmec) - Petit III RA, Read TD [sccmec: A tool for typing SCCmec cassettes in assemblies](https://github.com/rpetit3/sccmec) (GitHub) - diff --git a/docs/bactopia-tools/scoary.md b/docs/bactopia-tools/scoary.md deleted file mode 100644 index 0a2e5eab..00000000 --- a/docs/bactopia-tools/scoary.md +++ /dev/null @@ -1,122 +0,0 @@ ---- -tags: ---- - - - -# Bactopia Tool - `scoary` -The `scoary` module uses [Scoary](https://github.com/AdmiralenOla/Scoary) to conduct -pan-genome wide association studies of [PIRATE](https://github.com/SionBayliss/PIRATE) -and [Roary](https://github.com/sanger-pathogens/Roary) outputs. - - -## Example Usage -``` -bactopia --wf scoary \ - --bactopia /path/to/your/bactopia/results \ - --include includes.txt -``` - -## Parameters - - -### Required Parameters -Define where the pipeline should find input data and save output data. - -| Parameter | Description | Default | -|---|---|---| -| `--bactopia` | The path to bactopia results to use as inputs | | - -### Filtering Parameters -Use these parameters to specify which samples to include or exclude. - -| Parameter | Description | Default | -|---|---|---| -| `--include` | A text file containing sample names (one per line) to include from the analysis | | -| `--exclude` | A text file containing sample names (one per line) to exclude from the analysis | | - - -### Scoary Parameters - - -| Parameter | Description | Default | -|---|---|---| -| `--traits` | Input trait table (CSV) to test for associations | | -| `--p_value_cutoff` | For statistical tests, genes with higher p-values will not be reported | 0.05 | -| `--correction` | Apply the indicated filtration measure. | I | -| `--permute` | Perform N number of permutations of the significant results post-analysis | 0 | -| `--start_col` | On which column in the gene presence/absence file do individual strain info start | 15 | - - -### Optional Parameters -These optional parameters can be useful in certain settings. - -| Parameter | Description | Default | -|---|---|---| -| `--outdir` | Base directory to write results to | ./ | -| `--run_name` | Name of the directory to hold results | bactopia | -| `--skip_compression` | Ouput files will not be compressed | False | -| `--keep_all_files` | Keeps all analysis files created | False | - -### Max Job Request Parameters -Set the top limit for requested resources for any single job. - -| Parameter | Description | Default | -|---|---|---| -| `--max_retry` | Maximum times to retry a process before allowing it to fail. | 3 | -| `--max_cpus` | Maximum number of CPUs that can be requested for any single job. | 4 | -| `--max_memory` | Maximum amount of memory (in GB) that can be requested for any single job. | 32 | -| `--max_time` | Maximum amount of time (in minutes) that can be requested for any single job. | 120 | -| `--max_downloads` | Maximum number of samples to download at a time | 3 | - -### Nextflow Configuration Parameters -Parameters to fine-tune your Nextflow setup. - -| Parameter | Description | Default | -|---|---|---| -| `--nfconfig` | A Nextflow compatible config file for custom profiles, loaded last and will overwrite existing variables if set. | | -| `--publish_dir_mode` | Method used to save pipeline results to output directory. | copy | -| `--infodir` | Directory to keep pipeline Nextflow logs and reports. | ${params.outdir}/pipeline_info | -| `--force` | Nextflow will overwrite existing output files. | False | -| `--cleanup_workdir` | After Bactopia is successfully executed, the `work` directory will be deleted. | False | - -### Nextflow Profile Parameters -Parameters to fine-tune your Nextflow setup. - -| Parameter | Description | Default | -|---|---|---| -| `--condadir` | Directory to Nextflow should use for Conda environments | | -| `--registry` | Docker registry to pull containers from. | dockerhub | -| `--singularity_cache` | Directory where remote Singularity images are stored. | | -| `--singularity_pull_docker_container` | Instead of directly downloading Singularity images for use with Singularity, force the workflow to pull and convert Docker containers instead. | | -| `--force_rebuild` | Force overwrite of existing pre-built environments. | False | -| `--queue` | Comma-separated name of the queue(s) to be used by a job scheduler (e.g. AWS Batch or SLURM) | general,high-memory | -| `--cluster_opts` | Additional options to pass to the executor. (e.g. SLURM: '--account=my_acct_name' | | -| `--disable_scratch` | All intermediate files created on worker nodes of will be transferred to the head node. | False | - -### Helpful Parameters -Uncommonly used parameters that might be useful. - -| Parameter | Description | Default | -|---|---|---| -| `--monochrome_logs` | Do not use coloured log outputs. | | -| `--nfdir` | Print directory Nextflow has pulled Bactopia to | | -| `--sleep_time` | The amount of time (seconds) Nextflow will wait after setting up datasets before execution. | 5 | -| `--validate_params` | Boolean whether to validate parameters against the schema at runtime | True | -| `--help` | Display help text. | | -| `--wf` | Specify which workflow or Bactopia Tool to execute | bactopia | -| `--list_wfs` | List the available workflows and Bactopia Tools to use with '--wf' | | -| `--show_hidden_params` | Show all params when using `--help` | | -| `--help_all` | An alias for --help --show_hidden_params | | -| `--version` | Display version text. | | - -## Citations -If you use Bactopia and `scoary` in your analysis, please cite the following. - -- [Bactopia](https://bactopia.github.io/) - Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) - - -- [Scoary](https://github.com/AdmiralenOla/Scoary) - Brynildsrud O, Bohlin J, Scheffer L, Eldholm V [Rapid scoring of genes in microbial pan-genome-wide association studies with Scoary.](https://doi.org/10.1186/s13059-016-1108-8) _Genome Biol._ 17:238 (2016) - diff --git a/docs/bactopia-tools/scrubber.md b/docs/bactopia-tools/scrubber.md deleted file mode 100644 index 53e1f04e..00000000 --- a/docs/bactopia-tools/scrubber.md +++ /dev/null @@ -1,208 +0,0 @@ ---- -title: scrubber -description: A Bactopia Tool which uses sra-human-scrubber to identify and remove any potential human reads. ---- -# Bactopia Tool - `scrubber` -The `scrubber` module uses [sra-human-scrubber](https://github.com/ncbi/sra-human-scrubber/) -to identify and remove any potential human reads. - - -## Example Usage -``` -bactopia --wf scrubber \ - --bactopia /path/to/your/bactopia/results -``` - -## Output Overview - -Below is the default output structure for the `scrubber` tool. Where possible the -file descriptions below were modified from a tools description. - -```bash - -├── -│ └── tools -│ └── srahumanscrubber -│ ├── _R1.scrubbed.fastq.gz -│ ├── _R2.scrubbed.fastq.gz -│ ├── .scrubbed.fastq.gz -│ └── logs -│ ├── nf-srahumanscrubber.{begin,err,log,out,run,sh,trace} -│ └── versions.yml -└── bactopia-runs - └── scrubber- - └── nf-reports - ├── scrubber-dag.dot - ├── scrubber-report.html - ├── scrubber-timeline.html - └── scrubber-trace.txt - -``` - - - -### Results - -#### scrubber - -Below is a description of the _per-sample_ results from -[sra-human-scrubber](https://github.com/ncbi/sra-human-scrubber/). - - -| Extension | Description | -|-------------------------------|-------------| -| .scrubbed.fastq.gz | FASTQs with any flagged human reads removed. | - - - - - -### Audit Trail - -Below are files that can assist you in understanding which parameters and program versions were used. - -#### Logs - -Each process that is executed will have a folder named `logs`. In this folder are helpful -files for you to review if the need ever arises. - -| Extension | Description | -|--------------|-------------| -| .begin | An empty file used to designate the process started | -| .err | Contains STDERR outputs from the process | -| .log | Contains both STDERR and STDOUT outputs from the process | -| .out | Contains STDOUT outputs from the process | -| .run | The script Nextflow uses to stage/unstage files and queue processes based on given profile | -| .sh | The script executed by bash for the process | -| .trace | The Nextflow [Trace](https://www.nextflow.io/docs/latest/tracing.html#trace-report) report for the process | -| versions.yml | A YAML formatted file with program versions | - -#### Nextflow Reports - -These Nextflow reports provide great a great summary of your run. These can be used to optimize -resource usage and estimate expected costs if using cloud platforms. - -| Filename | Description | -|----------|-------------| -| scrubber-dag.dot | The Nextflow [DAG visualisation](https://www.nextflow.io/docs/latest/tracing.html#dag-visualisation) | -| scrubber-report.html | The Nextflow [Execution Report](https://www.nextflow.io/docs/latest/tracing.html#execution-report) | -| scrubber-timeline.html | The Nextflow [Timeline Report](https://www.nextflow.io/docs/latest/tracing.html#timeline-report) | -| scrubber-trace.txt | The Nextflow [Trace](https://www.nextflow.io/docs/latest/tracing.html#trace-report) report | - - -#### Program Versions - -At the end of each run, each of the `versions.yml` files are merged into the files below. - -| Filename | Description | -|---------------------------|-------------| -| software_versions.yml | A complete list of programs and versions used by each process | -| software_versions_mqc.yml | A complete list of programs and versions formatted for [MultiQC](https://multiqc.info/) | - -## Parameters - - -### Required Parameters -Define where the pipeline should find input data and save output data. - -| Parameter | Description | -|:---|---| -| ` --bactopia` | The path to bactopia results to use as inputs
**Type:** `string` | - -### Filtering Parameters -Use these parameters to specify which samples to include or exclude. - -| Parameter | Description | -|:---|---| -| ` --include` | A text file containing sample names (one per line) to include from the analysis
**Type:** `string` | -| ` --exclude` | A text file containing sample names (one per line) to exclude from the analysis
**Type:** `string` | - - - - -### Optional Parameters -These optional parameters can be useful in certain settings. - -| Parameter | Description | -|:---|---| -| ` --outdir` | Base directory to write results to
**Type:** `string`, **Default:** `bactopia` | -| ` --skip_compression` | Ouput files will not be compressed
**Type:** `boolean` | -| ` --datasets` | The path to cache datasets to
**Type:** `string` | -| ` --keep_all_files` | Keeps all analysis files created
**Type:** `boolean` | - -### Max Job Request Parameters -Set the top limit for requested resources for any single job. - -| Parameter | Description | -|:---|---| -| ` --max_retry` | Maximum times to retry a process before allowing it to fail.
**Type:** `integer`, **Default:** `3` | -| ` --max_cpus` | Maximum number of CPUs that can be requested for any single job.
**Type:** `integer`, **Default:** `4` | -| ` --max_memory` | Maximum amount of memory that can be requested for any single job.
**Type:** `string`, **Default:** `128.GB` | -| ` --max_time` | Maximum amount of time that can be requested for any single job.
**Type:** `string`, **Default:** `240.h` | -| ` --max_downloads` | Maximum number of samples to download at a time
**Type:** `integer`, **Default:** `3` | - -### Nextflow Configuration Parameters -Parameters to fine-tune your Nextflow setup. - -| Parameter | Description | -|:---|---| -| ` --nfconfig` | A Nextflow compatible config file for custom profiles, loaded last and will overwrite existing variables if set.
**Type:** `string` | -| ` --publish_dir_mode` | Method used to save pipeline results to output directory.
**Type:** `string`, **Default:** `copy` | -| ` --infodir` | Directory to keep pipeline Nextflow logs and reports.
**Type:** `string`, **Default:** `${params.outdir}/pipeline_info` | -| ` --force` | Nextflow will overwrite existing output files.
**Type:** `boolean` | -| ` --cleanup_workdir` | After Bactopia is successfully executed, the `work` directory will be deleted.
**Type:** `boolean` | - -### Institutional config options -Parameters used to describe centralized config profiles. These should not be edited. - -| Parameter | Description | -|:---|---| -| ` --custom_config_version` | Git commit id for Institutional configs.
**Type:** `string`, **Default:** `master` | -| ` --custom_config_base` | Base directory for Institutional configs.
**Type:** `string`, **Default:** `https://raw.githubusercontent.com/nf-core/configs/master` | -| ` --config_profile_name` | Institutional config name.
**Type:** `string` | -| ` --config_profile_description` | Institutional config description.
**Type:** `string` | -| ` --config_profile_contact` | Institutional config contact information.
**Type:** `string` | -| ` --config_profile_url` | Institutional config URL link.
**Type:** `string` | - -### Nextflow Profile Parameters -Parameters to fine-tune your Nextflow setup. - -| Parameter | Description | -|:---|---| -| ` --condadir` | Directory to Nextflow should use for Conda environments
**Type:** `string` | -| ` --registry` | Docker registry to pull containers from.
**Type:** `string`, **Default:** `dockerhub` | -| ` --datasets_cache` | Directory where downloaded datasets should be stored.
**Type:** `string`, **Default:** `/data/datasets` | -| ` --singularity_cache_dir` | Directory where remote Singularity images are stored.
**Type:** `string` | -| ` --singularity_pull_docker_container` | Instead of directly downloading Singularity images for use with Singularity, force the workflow to pull and convert Docker containers instead.
**Type:** `boolean` | -| ` --force_rebuild` | Force overwrite of existing pre-built environments.
**Type:** `boolean` | -| ` --queue` | Comma-separated name of the queue(s) to be used by a job scheduler (e.g. AWS Batch or SLURM)
**Type:** `string`, **Default:** `general,high-memory` | -| ` --cluster_opts` | Additional options to pass to the executor. (e.g. SLURM: '--account=my_acct_name'
**Type:** `string` | -| ` --container_opts` | Additional options to pass to Apptainer, Docker, or Singularityu. (e.g. Singularity: '-D `pwd`'
**Type:** `string` | -| ` --disable_scratch` | All intermediate files created on worker nodes of will be transferred to the head node.
**Type:** `boolean` | - -### Helpful Parameters -Uncommonly used parameters that might be useful. - -| Parameter | Description | -|:---|---| -| ` --monochrome_logs` | Do not use coloured log outputs.
**Type:** `boolean` | -| ` --nfdir` | Print directory Nextflow has pulled Bactopia to
**Type:** `boolean` | -| ` --sleep_time` | The amount of time (seconds) Nextflow will wait after setting up datasets before execution.
**Type:** `integer`, **Default:** `5` | -| ` --validate_params` | Boolean whether to validate parameters against the schema at runtime
**Type:** `boolean`, **Default:** `True` | -| ` --help` | Display help text.
**Type:** `boolean` | -| ` --wf` | Specify which workflow or Bactopia Tool to execute
**Type:** `string`, **Default:** `bactopia` | -| ` --list_wfs` | List the available workflows and Bactopia Tools to use with '--wf'
**Type:** `boolean` | -| ` --show_hidden_params` | Show all params when using `--help`
**Type:** `boolean` | -| ` --help_all` | An alias for --help --show_hidden_params
**Type:** `boolean` | -| ` --version` | Display version text.
**Type:** `boolean` | - -## Citations -If you use Bactopia and `scrubber` in your analysis, please cite the following. - -- [Bactopia](https://bactopia.github.io/) - Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) - - -- [SRA Human Scrubber](https://github.com/ncbi/sra-human-scrubber) - Katz KS, Shutov O, Lapoint R, Kimelman M, Brister JR, and O’Sullivan C [STAT: a fast, scalable, MinHash-based k-mer tool to assess Sequence Read Archive next-generation sequence submissions.](https://doi.org/10.1186/s13059-021-02490-0) _Genome Biology_, 22(1), 270 (2021) - diff --git a/docs/bactopia-tools/seqsero2.md b/docs/bactopia-tools/seqsero2.md deleted file mode 100644 index 7b4fac92..00000000 --- a/docs/bactopia-tools/seqsero2.md +++ /dev/null @@ -1,243 +0,0 @@ ---- -title: seqsero2 -description: A Bactopia Tool which uses SeqSero2 for Salmonella serotype prediction from reads and assemblies. ---- -# Bactopia Tool - `seqsero2` -The `seqsero2` module uses [SeqSero2](https://github.com/denglab/SeqSero2) for Salmonella -serotype prediction from reads and assemblies. - - -## Example Usage -``` -bactopia --wf seqsero2 \ - --bactopia /path/to/your/bactopia/results -``` - -## Output Overview - -Below is the default output structure for the `seqsero2` tool. Where possible the -file descriptions below were modified from a tools description. - -```bash - -├── -│ └── tools -│ └── seqsero2 -│ ├── _log.txt -│ ├── _result.tsv -│ ├── _result.txt -│ └── logs -│ ├── nf-seqsero2.{begin,err,log,out,run,sh,trace} -│ └── versions.yml -└── bactopia-runs - └── seqsero2- - ├── merged-results - │ ├── logs - │ │ └── seqsero2-concat - │ │ ├── nf-merged-results.{begin,err,log,out,run,sh,trace} - │ │ └── versions.yml - │ └── seqsero2.tsv - └── nf-reports - ├── seqsero2-dag.dot - ├── seqsero2-report.html - ├── seqsero2-timeline.html - └── seqsero2-trace.txt - -``` - -:::info[Directory structure might be different] - -`seqsero2` is available as a standalone Bactopia Tool, as well as from -the main Bactopia workflow (e.g. through Staphopia or Merlin). If executed -from Bactopia, the `seqsero2` directory structure might be different, but the -output descriptions below still apply. -::: - - - -### Results - -#### Merged Results - -Below are results that are concatenated into a single file. - - -| Filename | Description | -|-------------------------------|-------------| -| seqsero2.tsv | A merged TSV file with `SeqSero2` results from all samples | - - -#### SeqSero2 - -Below is a description of the _per-sample_ results from [SeqSero2](https://github.com/denglab/SeqSero2). - - -| Filename | Description | -|-------------------------------|-------------| -| <SAMPLE_NAME>_result.tsv | A tab-delimited file with `SeqSero2` results | -| <SAMPLE_NAME>_result.txt | A text file with key-value pairs of `SeqSero2` results | - - - - - -### Audit Trail - -Below are files that can assist you in understanding which parameters and program versions were used. - -#### Logs - -Each process that is executed will have a folder named `logs`. In this folder are helpful -files for you to review if the need ever arises. - -| Extension | Description | -|--------------|-------------| -| .begin | An empty file used to designate the process started | -| .err | Contains STDERR outputs from the process | -| .log | Contains both STDERR and STDOUT outputs from the process | -| .out | Contains STDOUT outputs from the process | -| .run | The script Nextflow uses to stage/unstage files and queue processes based on given profile | -| .sh | The script executed by bash for the process | -| .trace | The Nextflow [Trace](https://www.nextflow.io/docs/latest/tracing.html#trace-report) report for the process | -| versions.yml | A YAML formatted file with program versions | - -#### Nextflow Reports - -These Nextflow reports provide great a great summary of your run. These can be used to optimize -resource usage and estimate expected costs if using cloud platforms. - -| Filename | Description | -|----------|-------------| -| seqsero2-dag.dot | The Nextflow [DAG visualisation](https://www.nextflow.io/docs/latest/tracing.html#dag-visualisation) | -| seqsero2-report.html | The Nextflow [Execution Report](https://www.nextflow.io/docs/latest/tracing.html#execution-report) | -| seqsero2-timeline.html | The Nextflow [Timeline Report](https://www.nextflow.io/docs/latest/tracing.html#timeline-report) | -| seqsero2-trace.txt | The Nextflow [Trace](https://www.nextflow.io/docs/latest/tracing.html#trace-report) report | - - -#### Program Versions - -At the end of each run, each of the `versions.yml` files are merged into the files below. - -| Filename | Description | -|---------------------------|-------------| -| software_versions.yml | A complete list of programs and versions used by each process | -| software_versions_mqc.yml | A complete list of programs and versions formatted for [MultiQC](https://multiqc.info/) | - -## Parameters - - -### Required Parameters -Define where the pipeline should find input data and save output data. - -| Parameter | Description | -|:---|---| -| ` --bactopia` | The path to bactopia results to use as inputs
**Type:** `string` | - -### Filtering Parameters -Use these parameters to specify which samples to include or exclude. - -| Parameter | Description | -|:---|---| -| ` --include` | A text file containing sample names (one per line) to include from the analysis
**Type:** `string` | -| ` --exclude` | A text file containing sample names (one per line) to exclude from the analysis
**Type:** `string` | - - -### SeqSero2 Parameters - - -| Parameter | Description | -|:---|---| -| ` --run_mode` | Workflow to run. 'a' allele mode, or 'k' k-mer mode
**Type:** `string`, **Default:** `k` | -| ` --input_type` | Input format to analyze. 'assembly' or 'fastq'
**Type:** `string`, **Default:** `assembly` | -| ` --bwa_mode` | Algorithms for bwa mapping for allele mode
**Type:** `string`, **Default:** `mem` | - - -### Optional Parameters -These optional parameters can be useful in certain settings. - -| Parameter | Description | -|:---|---| -| ` --outdir` | Base directory to write results to
**Type:** `string`, **Default:** `bactopia` | -| ` --skip_compression` | Ouput files will not be compressed
**Type:** `boolean` | -| ` --datasets` | The path to cache datasets to
**Type:** `string` | -| ` --keep_all_files` | Keeps all analysis files created
**Type:** `boolean` | - -### Max Job Request Parameters -Set the top limit for requested resources for any single job. - -| Parameter | Description | -|:---|---| -| ` --max_retry` | Maximum times to retry a process before allowing it to fail.
**Type:** `integer`, **Default:** `3` | -| ` --max_cpus` | Maximum number of CPUs that can be requested for any single job.
**Type:** `integer`, **Default:** `4` | -| ` --max_memory` | Maximum amount of memory that can be requested for any single job.
**Type:** `string`, **Default:** `128.GB` | -| ` --max_time` | Maximum amount of time that can be requested for any single job.
**Type:** `string`, **Default:** `240.h` | -| ` --max_downloads` | Maximum number of samples to download at a time
**Type:** `integer`, **Default:** `3` | - -### Nextflow Configuration Parameters -Parameters to fine-tune your Nextflow setup. - -| Parameter | Description | -|:---|---| -| ` --nfconfig` | A Nextflow compatible config file for custom profiles, loaded last and will overwrite existing variables if set.
**Type:** `string` | -| ` --publish_dir_mode` | Method used to save pipeline results to output directory.
**Type:** `string`, **Default:** `copy` | -| ` --infodir` | Directory to keep pipeline Nextflow logs and reports.
**Type:** `string`, **Default:** `${params.outdir}/pipeline_info` | -| ` --force` | Nextflow will overwrite existing output files.
**Type:** `boolean` | -| ` --cleanup_workdir` | After Bactopia is successfully executed, the `work` directory will be deleted.
**Type:** `boolean` | - -### Institutional config options -Parameters used to describe centralized config profiles. These should not be edited. - -| Parameter | Description | -|:---|---| -| ` --custom_config_version` | Git commit id for Institutional configs.
**Type:** `string`, **Default:** `master` | -| ` --custom_config_base` | Base directory for Institutional configs.
**Type:** `string`, **Default:** `https://raw.githubusercontent.com/nf-core/configs/master` | -| ` --config_profile_name` | Institutional config name.
**Type:** `string` | -| ` --config_profile_description` | Institutional config description.
**Type:** `string` | -| ` --config_profile_contact` | Institutional config contact information.
**Type:** `string` | -| ` --config_profile_url` | Institutional config URL link.
**Type:** `string` | - -### Nextflow Profile Parameters -Parameters to fine-tune your Nextflow setup. - -| Parameter | Description | -|:---|---| -| ` --condadir` | Directory to Nextflow should use for Conda environments
**Type:** `string` | -| ` --registry` | Docker registry to pull containers from.
**Type:** `string`, **Default:** `dockerhub` | -| ` --datasets_cache` | Directory where downloaded datasets should be stored.
**Type:** `string`, **Default:** `/data/datasets` | -| ` --singularity_cache_dir` | Directory where remote Singularity images are stored.
**Type:** `string` | -| ` --singularity_pull_docker_container` | Instead of directly downloading Singularity images for use with Singularity, force the workflow to pull and convert Docker containers instead.
**Type:** `boolean` | -| ` --force_rebuild` | Force overwrite of existing pre-built environments.
**Type:** `boolean` | -| ` --queue` | Comma-separated name of the queue(s) to be used by a job scheduler (e.g. AWS Batch or SLURM)
**Type:** `string`, **Default:** `general,high-memory` | -| ` --cluster_opts` | Additional options to pass to the executor. (e.g. SLURM: '--account=my_acct_name'
**Type:** `string` | -| ` --container_opts` | Additional options to pass to Apptainer, Docker, or Singularityu. (e.g. Singularity: '-D `pwd`'
**Type:** `string` | -| ` --disable_scratch` | All intermediate files created on worker nodes of will be transferred to the head node.
**Type:** `boolean` | - -### Helpful Parameters -Uncommonly used parameters that might be useful. - -| Parameter | Description | -|:---|---| -| ` --monochrome_logs` | Do not use coloured log outputs.
**Type:** `boolean` | -| ` --nfdir` | Print directory Nextflow has pulled Bactopia to
**Type:** `boolean` | -| ` --sleep_time` | The amount of time (seconds) Nextflow will wait after setting up datasets before execution.
**Type:** `integer`, **Default:** `5` | -| ` --validate_params` | Boolean whether to validate parameters against the schema at runtime
**Type:** `boolean`, **Default:** `True` | -| ` --help` | Display help text.
**Type:** `boolean` | -| ` --wf` | Specify which workflow or Bactopia Tool to execute
**Type:** `string`, **Default:** `bactopia` | -| ` --list_wfs` | List the available workflows and Bactopia Tools to use with '--wf'
**Type:** `boolean` | -| ` --show_hidden_params` | Show all params when using `--help`
**Type:** `boolean` | -| ` --help_all` | An alias for --help --show_hidden_params
**Type:** `boolean` | -| ` --version` | Display version text.
**Type:** `boolean` | - -## Citations -If you use Bactopia and `seqsero2` in your analysis, please cite the following. - -- [Bactopia](https://bactopia.github.io/) - Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) - - -- [csvtk](https://bioinf.shenwei.me/csvtk/) - Shen, W [csvtk: A cross-platform, efficient and practical CSV/TSV toolkit in Golang.](https://github.com/shenwei356/csvtk/) (GitHub) - -- [SeqSero2](https://github.com/denglab/SeqSero2) - Zhang S, Den-Bakker HC, Li S, Dinsmore BA, Lane C, Lauer AC, Fields PI, Deng X. [SeqSero2: rapid and improved Salmonella serotype determination using whole genome sequencing data.](https://doi.org/10.1128/AEM.01746-19) _Appl Environ Microbiology_ 85(23):e01746-19 (2019) - diff --git a/docs/bactopia-tools/seroba.md b/docs/bactopia-tools/seroba.md deleted file mode 100644 index d80f12b1..00000000 --- a/docs/bactopia-tools/seroba.md +++ /dev/null @@ -1,241 +0,0 @@ ---- -title: seroba -description: A Bactopia Tool which uses Seroba to predict the serotype of _Streptococcus pneumoniae_ samples with Illumina paired-end reads. ---- -# Bactopia Tool - `seroba` -The `seroba` module uses [Seroba](https://github.com/sanger-pathogens/seroba) to predict the -serotype of _Streptococcus pneumoniae_ samples with Illumina paired-end reads. - - -## Example Usage -``` -bactopia --wf seroba \ - --bactopia /path/to/your/bactopia/results -``` - -## Output Overview - -Below is the default output structure for the `seroba` tool. Where possible the -file descriptions below were modified from a tools description. - -```bash - -├── -│ └── tools -│ └── seroba -│ ├── logs -│ │ ├── nf-seroba.{begin,err,log,out,run,sh,trace} -│ │ └── versions.yml -│ └── .tsv -└── bactopia-runs - └── seroba- - ├── merged-results - │ ├── logs - │ │ └── seroba-concat - │ │ ├── nf-merged-results.{begin,err,log,out,run,sh,trace} - │ │ └── versions.yml - │ └── seroba.tsv - └── nf-reports - ├── seroba-dag.dot - ├── seroba-report.html - ├── seroba-timeline.html - └── seroba-trace.txt - -``` - -:::info[Directory structure might be different] - -`seroba` is available as a standalone Bactopia Tool, as well as from -the main Bactopia workflow (e.g. through Staphopia or Merlin). If executed -from Bactopia, the `seroba` directory structure might be different, but the -output descriptions below still apply. -::: - - - -### Results - -#### Merged Results - -Below are results that are concatenated into a single file. - - -| Filename | Description | -|-------------------------------|-------------| -| seroba.tsv | A merged TSV file with `seroba` results from all samples | - - -#### Seroba - -Below is a description of the _per-sample_ results from [Seroba](https://github.com/sanger-pathogens/seroba). -More details about the outputs are available from [Seroba - Output](https://sanger-pathogens.github.io/seroba/#output). - - -| Filename | Description | -|-------------------------------|-------------| -| <SAMPLE_NAME>.tsv | A tab-delimited file with the predicted serotype | -| detailed_serogroup_info.txt | Detailed information about the predicted results | - - - - - -### Audit Trail - -Below are files that can assist you in understanding which parameters and program versions were used. - -#### Logs - -Each process that is executed will have a folder named `logs`. In this folder are helpful -files for you to review if the need ever arises. - -| Extension | Description | -|--------------|-------------| -| .begin | An empty file used to designate the process started | -| .err | Contains STDERR outputs from the process | -| .log | Contains both STDERR and STDOUT outputs from the process | -| .out | Contains STDOUT outputs from the process | -| .run | The script Nextflow uses to stage/unstage files and queue processes based on given profile | -| .sh | The script executed by bash for the process | -| .trace | The Nextflow [Trace](https://www.nextflow.io/docs/latest/tracing.html#trace-report) report for the process | -| versions.yml | A YAML formatted file with program versions | - -#### Nextflow Reports - -These Nextflow reports provide great a great summary of your run. These can be used to optimize -resource usage and estimate expected costs if using cloud platforms. - -| Filename | Description | -|----------|-------------| -| seroba-dag.dot | The Nextflow [DAG visualisation](https://www.nextflow.io/docs/latest/tracing.html#dag-visualisation) | -| seroba-report.html | The Nextflow [Execution Report](https://www.nextflow.io/docs/latest/tracing.html#execution-report) | -| seroba-timeline.html | The Nextflow [Timeline Report](https://www.nextflow.io/docs/latest/tracing.html#timeline-report) | -| seroba-trace.txt | The Nextflow [Trace](https://www.nextflow.io/docs/latest/tracing.html#trace-report) report | - - -#### Program Versions - -At the end of each run, each of the `versions.yml` files are merged into the files below. - -| Filename | Description | -|---------------------------|-------------| -| software_versions.yml | A complete list of programs and versions used by each process | -| software_versions_mqc.yml | A complete list of programs and versions formatted for [MultiQC](https://multiqc.info/) | - -## Parameters - - -### Required Parameters -Define where the pipeline should find input data and save output data. - -| Parameter | Description | -|:---|---| -| ` --bactopia` | The path to bactopia results to use as inputs
**Type:** `string` | - -### Filtering Parameters -Use these parameters to specify which samples to include or exclude. - -| Parameter | Description | -|:---|---| -| ` --include` | A text file containing sample names (one per line) to include from the analysis
**Type:** `string` | -| ` --exclude` | A text file containing sample names (one per line) to exclude from the analysis
**Type:** `string` | - - -### Seroba Parameters - - -| Parameter | Description | -|:---|---| -| ` --seroba_noclean` | Do not clean up intermediate files
**Type:** `boolean` | -| ` --seroba_coverage` | Threshold for k-mer coverage of the reference sequence
**Type:** `integer`, **Default:** `20` | - - -### Optional Parameters -These optional parameters can be useful in certain settings. - -| Parameter | Description | -|:---|---| -| ` --outdir` | Base directory to write results to
**Type:** `string`, **Default:** `bactopia` | -| ` --skip_compression` | Ouput files will not be compressed
**Type:** `boolean` | -| ` --datasets` | The path to cache datasets to
**Type:** `string` | -| ` --keep_all_files` | Keeps all analysis files created
**Type:** `boolean` | - -### Max Job Request Parameters -Set the top limit for requested resources for any single job. - -| Parameter | Description | -|:---|---| -| ` --max_retry` | Maximum times to retry a process before allowing it to fail.
**Type:** `integer`, **Default:** `3` | -| ` --max_cpus` | Maximum number of CPUs that can be requested for any single job.
**Type:** `integer`, **Default:** `4` | -| ` --max_memory` | Maximum amount of memory that can be requested for any single job.
**Type:** `string`, **Default:** `128.GB` | -| ` --max_time` | Maximum amount of time that can be requested for any single job.
**Type:** `string`, **Default:** `240.h` | -| ` --max_downloads` | Maximum number of samples to download at a time
**Type:** `integer`, **Default:** `3` | - -### Nextflow Configuration Parameters -Parameters to fine-tune your Nextflow setup. - -| Parameter | Description | -|:---|---| -| ` --nfconfig` | A Nextflow compatible config file for custom profiles, loaded last and will overwrite existing variables if set.
**Type:** `string` | -| ` --publish_dir_mode` | Method used to save pipeline results to output directory.
**Type:** `string`, **Default:** `copy` | -| ` --infodir` | Directory to keep pipeline Nextflow logs and reports.
**Type:** `string`, **Default:** `${params.outdir}/pipeline_info` | -| ` --force` | Nextflow will overwrite existing output files.
**Type:** `boolean` | -| ` --cleanup_workdir` | After Bactopia is successfully executed, the `work` directory will be deleted.
**Type:** `boolean` | - -### Institutional config options -Parameters used to describe centralized config profiles. These should not be edited. - -| Parameter | Description | -|:---|---| -| ` --custom_config_version` | Git commit id for Institutional configs.
**Type:** `string`, **Default:** `master` | -| ` --custom_config_base` | Base directory for Institutional configs.
**Type:** `string`, **Default:** `https://raw.githubusercontent.com/nf-core/configs/master` | -| ` --config_profile_name` | Institutional config name.
**Type:** `string` | -| ` --config_profile_description` | Institutional config description.
**Type:** `string` | -| ` --config_profile_contact` | Institutional config contact information.
**Type:** `string` | -| ` --config_profile_url` | Institutional config URL link.
**Type:** `string` | - -### Nextflow Profile Parameters -Parameters to fine-tune your Nextflow setup. - -| Parameter | Description | -|:---|---| -| ` --condadir` | Directory to Nextflow should use for Conda environments
**Type:** `string` | -| ` --registry` | Docker registry to pull containers from.
**Type:** `string`, **Default:** `dockerhub` | -| ` --datasets_cache` | Directory where downloaded datasets should be stored.
**Type:** `string`, **Default:** `/data/datasets` | -| ` --singularity_cache_dir` | Directory where remote Singularity images are stored.
**Type:** `string` | -| ` --singularity_pull_docker_container` | Instead of directly downloading Singularity images for use with Singularity, force the workflow to pull and convert Docker containers instead.
**Type:** `boolean` | -| ` --force_rebuild` | Force overwrite of existing pre-built environments.
**Type:** `boolean` | -| ` --queue` | Comma-separated name of the queue(s) to be used by a job scheduler (e.g. AWS Batch or SLURM)
**Type:** `string`, **Default:** `general,high-memory` | -| ` --cluster_opts` | Additional options to pass to the executor. (e.g. SLURM: '--account=my_acct_name'
**Type:** `string` | -| ` --container_opts` | Additional options to pass to Apptainer, Docker, or Singularityu. (e.g. Singularity: '-D `pwd`'
**Type:** `string` | -| ` --disable_scratch` | All intermediate files created on worker nodes of will be transferred to the head node.
**Type:** `boolean` | - -### Helpful Parameters -Uncommonly used parameters that might be useful. - -| Parameter | Description | -|:---|---| -| ` --monochrome_logs` | Do not use coloured log outputs.
**Type:** `boolean` | -| ` --nfdir` | Print directory Nextflow has pulled Bactopia to
**Type:** `boolean` | -| ` --sleep_time` | The amount of time (seconds) Nextflow will wait after setting up datasets before execution.
**Type:** `integer`, **Default:** `5` | -| ` --validate_params` | Boolean whether to validate parameters against the schema at runtime
**Type:** `boolean`, **Default:** `True` | -| ` --help` | Display help text.
**Type:** `boolean` | -| ` --wf` | Specify which workflow or Bactopia Tool to execute
**Type:** `string`, **Default:** `bactopia` | -| ` --list_wfs` | List the available workflows and Bactopia Tools to use with '--wf'
**Type:** `boolean` | -| ` --show_hidden_params` | Show all params when using `--help`
**Type:** `boolean` | -| ` --help_all` | An alias for --help --show_hidden_params
**Type:** `boolean` | -| ` --version` | Display version text.
**Type:** `boolean` | - -## Citations -If you use Bactopia and `seroba` in your analysis, please cite the following. - -- [Bactopia](https://bactopia.github.io/) - Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) - - -- [csvtk](https://bioinf.shenwei.me/csvtk/) - Shen, W [csvtk: A cross-platform, efficient and practical CSV/TSV toolkit in Golang.](https://github.com/shenwei356/csvtk/) (GitHub) - -- [Seroba](https://github.com/sanger-pathogens/seroba) - Epping L, van Tonder AJ, Gladstone RA, The Global Pneumococcal Sequencing Consortium, Bentley SD, Page AJ, Keane JA [SeroBA: rapid high-throughput serotyping of Streptococcus pneumoniae from whole genome sequence data.](https://doi.org/10.1099/mgen.0.000186) _Microbial Genomics_, 4(7) (2018) - diff --git a/docs/bactopia-tools/shigapass.md b/docs/bactopia-tools/shigapass.md deleted file mode 100644 index eba76618..00000000 --- a/docs/bactopia-tools/shigapass.md +++ /dev/null @@ -1,233 +0,0 @@ ---- -title: shigapass -description: A Bactopia Tool which uses ShigaPass to conduct _in silico_ prediction of serotype for Shigella and EIEC genomes. ---- -# Bactopia Tool - `shigapass` -The `shigapass` module uses [ShigaPass](https://github.com/imanyass/ShigaPass) for -_in silico_ prediction of serotypes in _Shigella_ assemblies. It will also differentiate -between _Shigella_, EIEC (Enteroinvasive _E. coli_) and non _Shigella_/EIEC. - - -## Example Usage -``` -bactopia --wf shigapass \ - --bactopia /path/to/your/bactopia/results -``` - -## Output Overview - -Below is the default output structure for the `shigapass` tool. Where possible the -file descriptions below were modified from a tools description. - -```bash - -├── -│ └── tools -│ └── shigapass -│ ├── .csv -│ └── logs -│ ├── nf-shigapass.{begin,err,log,out,run,sh,trace} -│ └── versions.yml -└── bactopia-runs - └── shigapass - ├── merged-results - │ ├── logs - │ │ └── shigapass-concat - │ │ ├── nf-merged-results.{begin,err,log,out,run,sh,trace} - │ │ └── versions.yml - │ └── shigapass.csv - └── nf-reports - ├── shigapass-dag.dot - ├── shigapass-report.html - ├── shigapass-timeline.html - └── shigapass-trace.txt - -``` - -:::info[Directory structure might be different] - -`shigapass` is available as a standalone Bactopia Tool, as well as from -the main Bactopia workflow (e.g. through Staphopia or Merlin). If executed -from Bactopia, the `shigapass` directory structure might be different, but the -output descriptions below still apply. -::: - - - -### Results - -#### Merged Results - -Below are results that are concatenated into a single file. - - -| Filename | Description | -|-------------------------------|-------------| -| shigapass.csv | A merged CSV file with `ShigaPass` results from all samples | - - -#### ShigaPass - -Below is a description of the _per-sample_ results from [ShigaPass](https://github.com/imanyass/ShigaPass). - - -| Extension | Description | -|-------------------------------|-------------| -| <SAMPLE_NAME>.csv | A CSV file with the predicted Shigella or EIEC serotype | - - - - - -### Audit Trail - -Below are files that can assist you in understanding which parameters and program versions were used. - -#### Logs - -Each process that is executed will have a folder named `logs`. In this folder are helpful -files for you to review if the need ever arises. - -| Extension | Description | -|--------------|-------------| -| .begin | An empty file used to designate the process started | -| .err | Contains STDERR outputs from the process | -| .log | Contains both STDERR and STDOUT outputs from the process | -| .out | Contains STDOUT outputs from the process | -| .run | The script Nextflow uses to stage/unstage files and queue processes based on given profile | -| .sh | The script executed by bash for the process | -| .trace | The Nextflow [Trace](https://www.nextflow.io/docs/latest/tracing.html#trace-report) report for the process | -| versions.yml | A YAML formatted file with program versions | - -#### Nextflow Reports - -These Nextflow reports provide great a great summary of your run. These can be used to optimize -resource usage and estimate expected costs if using cloud platforms. - -| Filename | Description | -|----------|-------------| -| shigapass-dag.dot | The Nextflow [DAG visualisation](https://www.nextflow.io/docs/latest/tracing.html#dag-visualisation) | -| shigapass-report.html | The Nextflow [Execution Report](https://www.nextflow.io/docs/latest/tracing.html#execution-report) | -| shigapass-timeline.html | The Nextflow [Timeline Report](https://www.nextflow.io/docs/latest/tracing.html#timeline-report) | -| shigapass-trace.txt | The Nextflow [Trace](https://www.nextflow.io/docs/latest/tracing.html#trace-report) report | - - -#### Program Versions - -At the end of each run, each of the `versions.yml` files are merged into the files below. - -| Filename | Description | -|---------------------------|-------------| -| software_versions.yml | A complete list of programs and versions used by each process | -| software_versions_mqc.yml | A complete list of programs and versions formatted for [MultiQC](https://multiqc.info/) | - -## Parameters - - -### Required Parameters -Define where the pipeline should find input data and save output data. - -| Parameter | Description | -|:---|---| -| ` --bactopia` | The path to bactopia results to use as inputs
**Type:** `string` | - -### Filtering Parameters -Use these parameters to specify which samples to include or exclude. - -| Parameter | Description | -|:---|---| -| ` --include` | A text file containing sample names (one per line) to include from the analysis
**Type:** `string` | -| ` --exclude` | A text file containing sample names (one per line) to exclude from the analysis
**Type:** `string` | - - - - -### Optional Parameters -These optional parameters can be useful in certain settings. - -| Parameter | Description | -|:---|---| -| ` --outdir` | Base directory to write results to
**Type:** `string`, **Default:** `bactopia` | -| ` --skip_compression` | Ouput files will not be compressed
**Type:** `boolean` | -| ` --datasets` | The path to cache datasets to
**Type:** `string` | -| ` --keep_all_files` | Keeps all analysis files created
**Type:** `boolean` | - -### Max Job Request Parameters -Set the top limit for requested resources for any single job. - -| Parameter | Description | -|:---|---| -| ` --max_retry` | Maximum times to retry a process before allowing it to fail.
**Type:** `integer`, **Default:** `3` | -| ` --max_cpus` | Maximum number of CPUs that can be requested for any single job.
**Type:** `integer`, **Default:** `4` | -| ` --max_memory` | Maximum amount of memory that can be requested for any single job.
**Type:** `string`, **Default:** `128.GB` | -| ` --max_time` | Maximum amount of time that can be requested for any single job.
**Type:** `string`, **Default:** `240.h` | -| ` --max_downloads` | Maximum number of samples to download at a time
**Type:** `integer`, **Default:** `3` | - -### Nextflow Configuration Parameters -Parameters to fine-tune your Nextflow setup. - -| Parameter | Description | -|:---|---| -| ` --nfconfig` | A Nextflow compatible config file for custom profiles, loaded last and will overwrite existing variables if set.
**Type:** `string` | -| ` --publish_dir_mode` | Method used to save pipeline results to output directory.
**Type:** `string`, **Default:** `copy` | -| ` --infodir` | Directory to keep pipeline Nextflow logs and reports.
**Type:** `string`, **Default:** `${params.outdir}/pipeline_info` | -| ` --force` | Nextflow will overwrite existing output files.
**Type:** `boolean` | -| ` --cleanup_workdir` | After Bactopia is successfully executed, the `work` directory will be deleted.
**Type:** `boolean` | - -### Institutional config options -Parameters used to describe centralized config profiles. These should not be edited. - -| Parameter | Description | -|:---|---| -| ` --custom_config_version` | Git commit id for Institutional configs.
**Type:** `string`, **Default:** `master` | -| ` --custom_config_base` | Base directory for Institutional configs.
**Type:** `string`, **Default:** `https://raw.githubusercontent.com/nf-core/configs/master` | -| ` --config_profile_name` | Institutional config name.
**Type:** `string` | -| ` --config_profile_description` | Institutional config description.
**Type:** `string` | -| ` --config_profile_contact` | Institutional config contact information.
**Type:** `string` | -| ` --config_profile_url` | Institutional config URL link.
**Type:** `string` | - -### Nextflow Profile Parameters -Parameters to fine-tune your Nextflow setup. - -| Parameter | Description | -|:---|---| -| ` --condadir` | Directory to Nextflow should use for Conda environments
**Type:** `string` | -| ` --registry` | Docker registry to pull containers from.
**Type:** `string`, **Default:** `dockerhub` | -| ` --datasets_cache` | Directory where downloaded datasets should be stored.
**Type:** `string`, **Default:** `/data/datasets` | -| ` --singularity_cache_dir` | Directory where remote Singularity images are stored.
**Type:** `string` | -| ` --singularity_pull_docker_container` | Instead of directly downloading Singularity images for use with Singularity, force the workflow to pull and convert Docker containers instead.
**Type:** `boolean` | -| ` --force_rebuild` | Force overwrite of existing pre-built environments.
**Type:** `boolean` | -| ` --queue` | Comma-separated name of the queue(s) to be used by a job scheduler (e.g. AWS Batch or SLURM)
**Type:** `string`, **Default:** `general,high-memory` | -| ` --cluster_opts` | Additional options to pass to the executor. (e.g. SLURM: '--account=my_acct_name'
**Type:** `string` | -| ` --container_opts` | Additional options to pass to Apptainer, Docker, or Singularityu. (e.g. Singularity: '-D `pwd`'
**Type:** `string` | -| ` --disable_scratch` | All intermediate files created on worker nodes of will be transferred to the head node.
**Type:** `boolean` | - -### Helpful Parameters -Uncommonly used parameters that might be useful. - -| Parameter | Description | -|:---|---| -| ` --monochrome_logs` | Do not use coloured log outputs.
**Type:** `boolean` | -| ` --nfdir` | Print directory Nextflow has pulled Bactopia to
**Type:** `boolean` | -| ` --sleep_time` | The amount of time (seconds) Nextflow will wait after setting up datasets before execution.
**Type:** `integer`, **Default:** `5` | -| ` --validate_params` | Boolean whether to validate parameters against the schema at runtime
**Type:** `boolean`, **Default:** `True` | -| ` --help` | Display help text.
**Type:** `boolean` | -| ` --wf` | Specify which workflow or Bactopia Tool to execute
**Type:** `string`, **Default:** `bactopia` | -| ` --list_wfs` | List the available workflows and Bactopia Tools to use with '--wf'
**Type:** `boolean` | -| ` --show_hidden_params` | Show all params when using `--help`
**Type:** `boolean` | -| ` --help_all` | An alias for --help --show_hidden_params
**Type:** `boolean` | -| ` --version` | Display version text.
**Type:** `boolean` | - -## Citations -If you use Bactopia and `shigapass` in your analysis, please cite the following. - -- [Bactopia](https://bactopia.github.io/) - Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) - - -- [csvtk](https://bioinf.shenwei.me/csvtk/) - Shen, W [csvtk: A cross-platform, efficient and practical CSV/TSV toolkit in Golang.](https://github.com/shenwei356/csvtk/) (GitHub) - -- [shigapass](https://github.com/imanyass/ShigaPass) - Yassine I, Hansen EE, Lefèvre S, Ruckly C, Carle I, Lejay-Collin M, Fabre L, Rafei R, Pardos de la Gandara M, Daboussi F, Shahin A, Weill FX [ShigaPass: an in silico tool predicting Shigella serotypes from whole-genome sequencing assemblies.](https://doi.org/10.1099%2Fmgen.0.000961) _Microb Genomics_ 9(3) (2023) - diff --git a/docs/bactopia-tools/shigatyper.md b/docs/bactopia-tools/shigatyper.md deleted file mode 100644 index 27a0663c..00000000 --- a/docs/bactopia-tools/shigatyper.md +++ /dev/null @@ -1,234 +0,0 @@ ---- -title: shigatyper -description: A Bactopia Tool which uses ShigaTyyper to rapidly determine Shigella serotype using Illumina or Oxford Nanopore reads. ---- -# Bactopia Tool - `shigatyper` -The `shigatyper` module uses [ShigaTyyper](https://github.com/CFSAN-Biostatistics/shigatyper) to rapidly determine -Shigella serotype using Illumina (single or paired-end) or Oxford Nanopore reads. - - -## Example Usage -``` -bactopia --wf shigatyper \ - --bactopia /path/to/your/bactopia/results -``` - -## Output Overview - -Below is the default output structure for the `shigatyper` tool. Where possible the -file descriptions below were modified from a tools description. - -```bash - -├── -│ └── tools -│ └── shigatyper -│ ├── -hits.tsv -│ ├── .tsv -│ └── logs -│ ├── nf-shigatyper.{begin,err,log,out,run,sh,trace} -│ └── versions.yml -└── bactopia-runs - └── shigatyper- - ├── merged-results - │ ├── logs - │ │ └── shigatyper-concat - │ │ ├── nf-merged-results.{begin,err,log,out,run,sh,trace} - │ │ └── versions.yml - │ └── shigatyper.tsv - └── nf-reports - ├── shigatyper-dag.dot - ├── shigatyper-report.html - ├── shigatyper-timeline.html - └── shigatyper-trace.txt - -``` - -:::info[Directory structure might be different] - -`shigatyper` is available as a standalone Bactopia Tool, as well as from -the main Bactopia workflow (e.g. through Staphopia or Merlin). If executed -from Bactopia, the `shigatyper` directory structure might be different, but the -output descriptions below still apply. -::: - - - -### Results - -#### Merged Results - -Below are results that are concatenated into a single file. - - -| Filename | Description | -|-------------------------------|-------------| -| shigatyper.tsv | A merged TSV file with `ShigaTyper` results from all samples | - - -#### ShigaTyper - -Below is a description of the _per-sample_ results from [ShigaTyyper](https://github.com/CFSAN-Biostatistics/shigatyper). - - -| Filename | Description | -|-------------------------------|-------------| -| <SAMPLE_NAME>-hits.tsv | Detailed statistics about each individual gene hit | -| <SAMPLE_NAME>.tsv | The final predicted serotype by `ShigaTyper` | - - - - - -### Audit Trail - -Below are files that can assist you in understanding which parameters and program versions were used. - -#### Logs - -Each process that is executed will have a folder named `logs`. In this folder are helpful -files for you to review if the need ever arises. - -| Extension | Description | -|--------------|-------------| -| .begin | An empty file used to designate the process started | -| .err | Contains STDERR outputs from the process | -| .log | Contains both STDERR and STDOUT outputs from the process | -| .out | Contains STDOUT outputs from the process | -| .run | The script Nextflow uses to stage/unstage files and queue processes based on given profile | -| .sh | The script executed by bash for the process | -| .trace | The Nextflow [Trace](https://www.nextflow.io/docs/latest/tracing.html#trace-report) report for the process | -| versions.yml | A YAML formatted file with program versions | - -#### Nextflow Reports - -These Nextflow reports provide great a great summary of your run. These can be used to optimize -resource usage and estimate expected costs if using cloud platforms. - -| Filename | Description | -|----------|-------------| -| shigatyper-dag.dot | The Nextflow [DAG visualisation](https://www.nextflow.io/docs/latest/tracing.html#dag-visualisation) | -| shigatyper-report.html | The Nextflow [Execution Report](https://www.nextflow.io/docs/latest/tracing.html#execution-report) | -| shigatyper-timeline.html | The Nextflow [Timeline Report](https://www.nextflow.io/docs/latest/tracing.html#timeline-report) | -| shigatyper-trace.txt | The Nextflow [Trace](https://www.nextflow.io/docs/latest/tracing.html#trace-report) report | - - -#### Program Versions - -At the end of each run, each of the `versions.yml` files are merged into the files below. - -| Filename | Description | -|---------------------------|-------------| -| software_versions.yml | A complete list of programs and versions used by each process | -| software_versions_mqc.yml | A complete list of programs and versions formatted for [MultiQC](https://multiqc.info/) | - -## Parameters - - -### Required Parameters -Define where the pipeline should find input data and save output data. - -| Parameter | Description | -|:---|---| -| ` --bactopia` | The path to bactopia results to use as inputs
**Type:** `string` | - -### Filtering Parameters -Use these parameters to specify which samples to include or exclude. - -| Parameter | Description | -|:---|---| -| ` --include` | A text file containing sample names (one per line) to include from the analysis
**Type:** `string` | -| ` --exclude` | A text file containing sample names (one per line) to exclude from the analysis
**Type:** `string` | - - - - -### Optional Parameters -These optional parameters can be useful in certain settings. - -| Parameter | Description | -|:---|---| -| ` --outdir` | Base directory to write results to
**Type:** `string`, **Default:** `bactopia` | -| ` --skip_compression` | Ouput files will not be compressed
**Type:** `boolean` | -| ` --datasets` | The path to cache datasets to
**Type:** `string` | -| ` --keep_all_files` | Keeps all analysis files created
**Type:** `boolean` | - -### Max Job Request Parameters -Set the top limit for requested resources for any single job. - -| Parameter | Description | -|:---|---| -| ` --max_retry` | Maximum times to retry a process before allowing it to fail.
**Type:** `integer`, **Default:** `3` | -| ` --max_cpus` | Maximum number of CPUs that can be requested for any single job.
**Type:** `integer`, **Default:** `4` | -| ` --max_memory` | Maximum amount of memory that can be requested for any single job.
**Type:** `string`, **Default:** `128.GB` | -| ` --max_time` | Maximum amount of time that can be requested for any single job.
**Type:** `string`, **Default:** `240.h` | -| ` --max_downloads` | Maximum number of samples to download at a time
**Type:** `integer`, **Default:** `3` | - -### Nextflow Configuration Parameters -Parameters to fine-tune your Nextflow setup. - -| Parameter | Description | -|:---|---| -| ` --nfconfig` | A Nextflow compatible config file for custom profiles, loaded last and will overwrite existing variables if set.
**Type:** `string` | -| ` --publish_dir_mode` | Method used to save pipeline results to output directory.
**Type:** `string`, **Default:** `copy` | -| ` --infodir` | Directory to keep pipeline Nextflow logs and reports.
**Type:** `string`, **Default:** `${params.outdir}/pipeline_info` | -| ` --force` | Nextflow will overwrite existing output files.
**Type:** `boolean` | -| ` --cleanup_workdir` | After Bactopia is successfully executed, the `work` directory will be deleted.
**Type:** `boolean` | - -### Institutional config options -Parameters used to describe centralized config profiles. These should not be edited. - -| Parameter | Description | -|:---|---| -| ` --custom_config_version` | Git commit id for Institutional configs.
**Type:** `string`, **Default:** `master` | -| ` --custom_config_base` | Base directory for Institutional configs.
**Type:** `string`, **Default:** `https://raw.githubusercontent.com/nf-core/configs/master` | -| ` --config_profile_name` | Institutional config name.
**Type:** `string` | -| ` --config_profile_description` | Institutional config description.
**Type:** `string` | -| ` --config_profile_contact` | Institutional config contact information.
**Type:** `string` | -| ` --config_profile_url` | Institutional config URL link.
**Type:** `string` | - -### Nextflow Profile Parameters -Parameters to fine-tune your Nextflow setup. - -| Parameter | Description | -|:---|---| -| ` --condadir` | Directory to Nextflow should use for Conda environments
**Type:** `string` | -| ` --registry` | Docker registry to pull containers from.
**Type:** `string`, **Default:** `dockerhub` | -| ` --datasets_cache` | Directory where downloaded datasets should be stored.
**Type:** `string`, **Default:** `/data/datasets` | -| ` --singularity_cache_dir` | Directory where remote Singularity images are stored.
**Type:** `string` | -| ` --singularity_pull_docker_container` | Instead of directly downloading Singularity images for use with Singularity, force the workflow to pull and convert Docker containers instead.
**Type:** `boolean` | -| ` --force_rebuild` | Force overwrite of existing pre-built environments.
**Type:** `boolean` | -| ` --queue` | Comma-separated name of the queue(s) to be used by a job scheduler (e.g. AWS Batch or SLURM)
**Type:** `string`, **Default:** `general,high-memory` | -| ` --cluster_opts` | Additional options to pass to the executor. (e.g. SLURM: '--account=my_acct_name'
**Type:** `string` | -| ` --container_opts` | Additional options to pass to Apptainer, Docker, or Singularityu. (e.g. Singularity: '-D `pwd`'
**Type:** `string` | -| ` --disable_scratch` | All intermediate files created on worker nodes of will be transferred to the head node.
**Type:** `boolean` | - -### Helpful Parameters -Uncommonly used parameters that might be useful. - -| Parameter | Description | -|:---|---| -| ` --monochrome_logs` | Do not use coloured log outputs.
**Type:** `boolean` | -| ` --nfdir` | Print directory Nextflow has pulled Bactopia to
**Type:** `boolean` | -| ` --sleep_time` | The amount of time (seconds) Nextflow will wait after setting up datasets before execution.
**Type:** `integer`, **Default:** `5` | -| ` --validate_params` | Boolean whether to validate parameters against the schema at runtime
**Type:** `boolean`, **Default:** `True` | -| ` --help` | Display help text.
**Type:** `boolean` | -| ` --wf` | Specify which workflow or Bactopia Tool to execute
**Type:** `string`, **Default:** `bactopia` | -| ` --list_wfs` | List the available workflows and Bactopia Tools to use with '--wf'
**Type:** `boolean` | -| ` --show_hidden_params` | Show all params when using `--help`
**Type:** `boolean` | -| ` --help_all` | An alias for --help --show_hidden_params
**Type:** `boolean` | -| ` --version` | Display version text.
**Type:** `boolean` | - -## Citations -If you use Bactopia and `shigatyper` in your analysis, please cite the following. - -- [Bactopia](https://bactopia.github.io/) - Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) - - -- [csvtk](https://bioinf.shenwei.me/csvtk/) - Shen, W [csvtk: A cross-platform, efficient and practical CSV/TSV toolkit in Golang.](https://github.com/shenwei356/csvtk/) (GitHub) - -- [ShigaTyper](https://github.com/CFSAN-Biostatistics/shigatyper) - Wu Y, Lau HK, Lee T, Lau DK, Payne J [In Silico Serotyping Based on Whole-Genome Sequencing Improves the Accuracy of Shigella Identification.](https://doi.org/10.1128/AEM.00165-19) *Applied and Environmental Microbiology*, 85(7). (2019) - diff --git a/docs/bactopia-tools/shigeifinder.md b/docs/bactopia-tools/shigeifinder.md deleted file mode 100644 index 7ba3e46b..00000000 --- a/docs/bactopia-tools/shigeifinder.md +++ /dev/null @@ -1,233 +0,0 @@ ---- -title: shigeifinder -description: A Bactopia Tool which uses ShigEiFinder to conduct _in silico_ prediction of serotype for Shigella and EIEC genomes. ---- -# Bactopia Tool - `shigeifinder` -The `shigeifinder` module uses [ShigEiFinder](https://github.com/LanLab/ShigEiFinder) to conduct -_in silico_ prediction of serotype for Shigella and EIEC. It uses the genome assemblies -tp provide basic species identification and the predicted serotype (e.g. O174:H21). - - -## Example Usage -``` -bactopia --wf shigeifinder \ - --bactopia /path/to/your/bactopia/results -``` - -## Output Overview - -Below is the default output structure for the `shigeifinder` tool. Where possible the -file descriptions below were modified from a tools description. - -```bash - -├── -│ └── tools -│ └── shigeifinder -│ ├── GCF_016726285.tsv -│ └── logs -│ ├── nf-shigeifinder.{begin,err,log,out,run,sh,trace} -│ └── versions.yml -└── bactopia-runs - └── shigeifinder- - ├── merged-results - │ ├── logs - │ │ └── shigeifinder-concat - │ │ ├── nf-merged-results.{begin,err,log,out,run,sh,trace} - │ │ └── versions.yml - │ └── shigeifinder.tsv - └── nf-reports - ├── shigeifinder-dag.dot - ├── shigeifinder-report.html - ├── shigeifinder-timeline.html - └── shigeifinder-trace.txt - -``` - -:::info[Directory structure might be different] - -`shigeifinder` is available as a standalone Bactopia Tool, as well as from -the main Bactopia workflow (e.g. through Staphopia or Merlin). If executed -from Bactopia, the `shigeifinder` directory structure might be different, but the -output descriptions below still apply. -::: - - - -### Results - -#### Merged Results - -Below are results that are concatenated into a single file. - - -| Filename | Description | -|-------------------------------|-------------| -| shigeifinder.tsv | A merged TSV file with `ShigEiFinder` results from all samples | - - -#### ShigEiFinder - -Below is a description of the _per-sample_ results from [ShigEiFinder](https://github.com/LanLab/ShigEiFinder). - - -| Extension | Description | -|-------------------------------|-------------| -| <SAMPLE_NAME>.tsv | A tab-delimited file with the predicted Shigella or EIEC serotype | - - - - - -### Audit Trail - -Below are files that can assist you in understanding which parameters and program versions were used. - -#### Logs - -Each process that is executed will have a folder named `logs`. In this folder are helpful -files for you to review if the need ever arises. - -| Extension | Description | -|--------------|-------------| -| .begin | An empty file used to designate the process started | -| .err | Contains STDERR outputs from the process | -| .log | Contains both STDERR and STDOUT outputs from the process | -| .out | Contains STDOUT outputs from the process | -| .run | The script Nextflow uses to stage/unstage files and queue processes based on given profile | -| .sh | The script executed by bash for the process | -| .trace | The Nextflow [Trace](https://www.nextflow.io/docs/latest/tracing.html#trace-report) report for the process | -| versions.yml | A YAML formatted file with program versions | - -#### Nextflow Reports - -These Nextflow reports provide great a great summary of your run. These can be used to optimize -resource usage and estimate expected costs if using cloud platforms. - -| Filename | Description | -|----------|-------------| -| shigeifinder-dag.dot | The Nextflow [DAG visualisation](https://www.nextflow.io/docs/latest/tracing.html#dag-visualisation) | -| shigeifinder-report.html | The Nextflow [Execution Report](https://www.nextflow.io/docs/latest/tracing.html#execution-report) | -| shigeifinder-timeline.html | The Nextflow [Timeline Report](https://www.nextflow.io/docs/latest/tracing.html#timeline-report) | -| shigeifinder-trace.txt | The Nextflow [Trace](https://www.nextflow.io/docs/latest/tracing.html#trace-report) report | - - -#### Program Versions - -At the end of each run, each of the `versions.yml` files are merged into the files below. - -| Filename | Description | -|---------------------------|-------------| -| software_versions.yml | A complete list of programs and versions used by each process | -| software_versions_mqc.yml | A complete list of programs and versions formatted for [MultiQC](https://multiqc.info/) | - -## Parameters - - -### Required Parameters -Define where the pipeline should find input data and save output data. - -| Parameter | Description | -|:---|---| -| ` --bactopia` | The path to bactopia results to use as inputs
**Type:** `string` | - -### Filtering Parameters -Use these parameters to specify which samples to include or exclude. - -| Parameter | Description | -|:---|---| -| ` --include` | A text file containing sample names (one per line) to include from the analysis
**Type:** `string` | -| ` --exclude` | A text file containing sample names (one per line) to exclude from the analysis
**Type:** `string` | - - - - -### Optional Parameters -These optional parameters can be useful in certain settings. - -| Parameter | Description | -|:---|---| -| ` --outdir` | Base directory to write results to
**Type:** `string`, **Default:** `bactopia` | -| ` --skip_compression` | Ouput files will not be compressed
**Type:** `boolean` | -| ` --datasets` | The path to cache datasets to
**Type:** `string` | -| ` --keep_all_files` | Keeps all analysis files created
**Type:** `boolean` | - -### Max Job Request Parameters -Set the top limit for requested resources for any single job. - -| Parameter | Description | -|:---|---| -| ` --max_retry` | Maximum times to retry a process before allowing it to fail.
**Type:** `integer`, **Default:** `3` | -| ` --max_cpus` | Maximum number of CPUs that can be requested for any single job.
**Type:** `integer`, **Default:** `4` | -| ` --max_memory` | Maximum amount of memory that can be requested for any single job.
**Type:** `string`, **Default:** `128.GB` | -| ` --max_time` | Maximum amount of time that can be requested for any single job.
**Type:** `string`, **Default:** `240.h` | -| ` --max_downloads` | Maximum number of samples to download at a time
**Type:** `integer`, **Default:** `3` | - -### Nextflow Configuration Parameters -Parameters to fine-tune your Nextflow setup. - -| Parameter | Description | -|:---|---| -| ` --nfconfig` | A Nextflow compatible config file for custom profiles, loaded last and will overwrite existing variables if set.
**Type:** `string` | -| ` --publish_dir_mode` | Method used to save pipeline results to output directory.
**Type:** `string`, **Default:** `copy` | -| ` --infodir` | Directory to keep pipeline Nextflow logs and reports.
**Type:** `string`, **Default:** `${params.outdir}/pipeline_info` | -| ` --force` | Nextflow will overwrite existing output files.
**Type:** `boolean` | -| ` --cleanup_workdir` | After Bactopia is successfully executed, the `work` directory will be deleted.
**Type:** `boolean` | - -### Institutional config options -Parameters used to describe centralized config profiles. These should not be edited. - -| Parameter | Description | -|:---|---| -| ` --custom_config_version` | Git commit id for Institutional configs.
**Type:** `string`, **Default:** `master` | -| ` --custom_config_base` | Base directory for Institutional configs.
**Type:** `string`, **Default:** `https://raw.githubusercontent.com/nf-core/configs/master` | -| ` --config_profile_name` | Institutional config name.
**Type:** `string` | -| ` --config_profile_description` | Institutional config description.
**Type:** `string` | -| ` --config_profile_contact` | Institutional config contact information.
**Type:** `string` | -| ` --config_profile_url` | Institutional config URL link.
**Type:** `string` | - -### Nextflow Profile Parameters -Parameters to fine-tune your Nextflow setup. - -| Parameter | Description | -|:---|---| -| ` --condadir` | Directory to Nextflow should use for Conda environments
**Type:** `string` | -| ` --registry` | Docker registry to pull containers from.
**Type:** `string`, **Default:** `dockerhub` | -| ` --datasets_cache` | Directory where downloaded datasets should be stored.
**Type:** `string`, **Default:** `/data/datasets` | -| ` --singularity_cache_dir` | Directory where remote Singularity images are stored.
**Type:** `string` | -| ` --singularity_pull_docker_container` | Instead of directly downloading Singularity images for use with Singularity, force the workflow to pull and convert Docker containers instead.
**Type:** `boolean` | -| ` --force_rebuild` | Force overwrite of existing pre-built environments.
**Type:** `boolean` | -| ` --queue` | Comma-separated name of the queue(s) to be used by a job scheduler (e.g. AWS Batch or SLURM)
**Type:** `string`, **Default:** `general,high-memory` | -| ` --cluster_opts` | Additional options to pass to the executor. (e.g. SLURM: '--account=my_acct_name'
**Type:** `string` | -| ` --container_opts` | Additional options to pass to Apptainer, Docker, or Singularityu. (e.g. Singularity: '-D `pwd`'
**Type:** `string` | -| ` --disable_scratch` | All intermediate files created on worker nodes of will be transferred to the head node.
**Type:** `boolean` | - -### Helpful Parameters -Uncommonly used parameters that might be useful. - -| Parameter | Description | -|:---|---| -| ` --monochrome_logs` | Do not use coloured log outputs.
**Type:** `boolean` | -| ` --nfdir` | Print directory Nextflow has pulled Bactopia to
**Type:** `boolean` | -| ` --sleep_time` | The amount of time (seconds) Nextflow will wait after setting up datasets before execution.
**Type:** `integer`, **Default:** `5` | -| ` --validate_params` | Boolean whether to validate parameters against the schema at runtime
**Type:** `boolean`, **Default:** `True` | -| ` --help` | Display help text.
**Type:** `boolean` | -| ` --wf` | Specify which workflow or Bactopia Tool to execute
**Type:** `string`, **Default:** `bactopia` | -| ` --list_wfs` | List the available workflows and Bactopia Tools to use with '--wf'
**Type:** `boolean` | -| ` --show_hidden_params` | Show all params when using `--help`
**Type:** `boolean` | -| ` --help_all` | An alias for --help --show_hidden_params
**Type:** `boolean` | -| ` --version` | Display version text.
**Type:** `boolean` | - -## Citations -If you use Bactopia and `shigeifinder` in your analysis, please cite the following. - -- [Bactopia](https://bactopia.github.io/) - Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) - - -- [csvtk](https://bioinf.shenwei.me/csvtk/) - Shen, W [csvtk: A cross-platform, efficient and practical CSV/TSV toolkit in Golang.](https://github.com/shenwei356/csvtk/) (GitHub) - -- [ShigEiFinder](https://github.com/LanLab/ShigEiFinder) - Zhang X, Payne M, Nguyen T, Kaur S, Lan R [Cluster-specific gene markers enhance Shigella and enteroinvasive Escherichia coli in silico serotyping.](https://doi.org/10.1099/mgen.0.000704) Microbial Genomics, 7(12). (2021) - diff --git a/docs/bactopia-tools/sistr.md b/docs/bactopia-tools/sistr.md deleted file mode 100644 index 6f509472..00000000 --- a/docs/bactopia-tools/sistr.md +++ /dev/null @@ -1,244 +0,0 @@ ---- -title: sistr -description: A Bactopia Tool which uses Salmonella In Silico Typing Resource, or SISTR, for serovar prediction of Salmonella assemblies. ---- -# Bactopia Tool - `sistr` -The `sistr` module uses [Salmonella In Silico Typing Resource](https://github.com/phac-nml/sistr_cmd), -or SISTR, for serovar prediction of Salmonella assemblies. - - -## Example Usage -``` -bactopia --wf sistr \ - --bactopia /path/to/your/bactopia/results -``` - -## Output Overview - -Below is the default output structure for the `sistr` tool. Where possible the -file descriptions below were modified from a tools description. - -```bash - -├── -│ └── tools -│ └── sistr -│ ├── -allele.fasta.gz -│ ├── -allele.json.gz -│ ├── -cgmlst.csv -│ ├── .tsv -│ └── logs -│ ├── nf-sistr.{begin,err,log,out,run,sh,trace} -│ └── versions.yml -└── bactopia-runs - └── sistr- - ├── merged-results - │ ├── logs - │ │ └── sistr-concat - │ │ ├── nf-merged-results.{begin,err,log,out,run,sh,trace} - │ │ └── versions.yml - │ └── sistr.tsv - └── nf-reports - ├── sistr-dag.dot - ├── sistr-report.html - ├── sistr-timeline.html - └── sistr-trace.txt - -``` - -:::info[Directory structure might be different] - -`sistr` is available as a standalone Bactopia Tool, as well as from -the main Bactopia workflow (e.g. through Staphopia or Merlin). If executed -from Bactopia, the `sistr` directory structure might be different, but the -output descriptions below still apply. -::: - - - -### Results - -#### Merged Results - -Below are results that are concatenated into a single file. - - -| Filename | Description | -|-------------------------------|-------------| -| sistr.tsv | A merged TSV file with `SISTR` results from all samples | - - -#### SISTR - -Below is a description of the _per-sample_ results from [SISTR](https://github.com/phac-nml/sistr_cmd). - - -| Filename | Description | -|-------------------------------|-------------| -| <SAMPLE_NAME>-allele.fasta.gz | A FASTA file of the cgMLST allele search results | -| <SAMPLE_NAME>-allele.json.gz | JSON formated cgMLST allele search results, see [SISTR - cgMLST search results](https://github.com/phac-nml/sistr_cmd#cgmlst-allele-search-results) for more details | -| <SAMPLE_NAME>-cgmlst.csv | A comma-delimited summary of the cgMLST allele search results | -| <SAMPLE_NAME>.tsv | A tab-delimited file with `SISTR` results, see [SISTR - Primary results](https://github.com/phac-nml/sistr_cmd#primary-results-output--o-sistr-results) for more details | - - - - - -### Audit Trail - -Below are files that can assist you in understanding which parameters and program versions were used. - -#### Logs - -Each process that is executed will have a folder named `logs`. In this folder are helpful -files for you to review if the need ever arises. - -| Extension | Description | -|--------------|-------------| -| .begin | An empty file used to designate the process started | -| .err | Contains STDERR outputs from the process | -| .log | Contains both STDERR and STDOUT outputs from the process | -| .out | Contains STDOUT outputs from the process | -| .run | The script Nextflow uses to stage/unstage files and queue processes based on given profile | -| .sh | The script executed by bash for the process | -| .trace | The Nextflow [Trace](https://www.nextflow.io/docs/latest/tracing.html#trace-report) report for the process | -| versions.yml | A YAML formatted file with program versions | - -#### Nextflow Reports - -These Nextflow reports provide great a great summary of your run. These can be used to optimize -resource usage and estimate expected costs if using cloud platforms. - -| Filename | Description | -|----------|-------------| -| sistr-dag.dot | The Nextflow [DAG visualisation](https://www.nextflow.io/docs/latest/tracing.html#dag-visualisation) | -| sistr-report.html | The Nextflow [Execution Report](https://www.nextflow.io/docs/latest/tracing.html#execution-report) | -| sistr-timeline.html | The Nextflow [Timeline Report](https://www.nextflow.io/docs/latest/tracing.html#timeline-report) | -| sistr-trace.txt | The Nextflow [Trace](https://www.nextflow.io/docs/latest/tracing.html#trace-report) report | - - -#### Program Versions - -At the end of each run, each of the `versions.yml` files are merged into the files below. - -| Filename | Description | -|---------------------------|-------------| -| software_versions.yml | A complete list of programs and versions used by each process | -| software_versions_mqc.yml | A complete list of programs and versions formatted for [MultiQC](https://multiqc.info/) | - -## Parameters - - -### Required Parameters -Define where the pipeline should find input data and save output data. - -| Parameter | Description | -|:---|---| -| ` --bactopia` | The path to bactopia results to use as inputs
**Type:** `string` | - -### Filtering Parameters -Use these parameters to specify which samples to include or exclude. - -| Parameter | Description | -|:---|---| -| ` --include` | A text file containing sample names (one per line) to include from the analysis
**Type:** `string` | -| ` --exclude` | A text file containing sample names (one per line) to exclude from the analysis
**Type:** `string` | - - -### SISTR Parameters - - -| Parameter | Description | -|:---|---| -| ` --full_cgmlst` | Use the full set of cgMLST alleles which can include highly similar alleles
**Type:** `boolean` | - - -### Optional Parameters -These optional parameters can be useful in certain settings. - -| Parameter | Description | -|:---|---| -| ` --outdir` | Base directory to write results to
**Type:** `string`, **Default:** `bactopia` | -| ` --skip_compression` | Ouput files will not be compressed
**Type:** `boolean` | -| ` --datasets` | The path to cache datasets to
**Type:** `string` | -| ` --keep_all_files` | Keeps all analysis files created
**Type:** `boolean` | - -### Max Job Request Parameters -Set the top limit for requested resources for any single job. - -| Parameter | Description | -|:---|---| -| ` --max_retry` | Maximum times to retry a process before allowing it to fail.
**Type:** `integer`, **Default:** `3` | -| ` --max_cpus` | Maximum number of CPUs that can be requested for any single job.
**Type:** `integer`, **Default:** `4` | -| ` --max_memory` | Maximum amount of memory that can be requested for any single job.
**Type:** `string`, **Default:** `128.GB` | -| ` --max_time` | Maximum amount of time that can be requested for any single job.
**Type:** `string`, **Default:** `240.h` | -| ` --max_downloads` | Maximum number of samples to download at a time
**Type:** `integer`, **Default:** `3` | - -### Nextflow Configuration Parameters -Parameters to fine-tune your Nextflow setup. - -| Parameter | Description | -|:---|---| -| ` --nfconfig` | A Nextflow compatible config file for custom profiles, loaded last and will overwrite existing variables if set.
**Type:** `string` | -| ` --publish_dir_mode` | Method used to save pipeline results to output directory.
**Type:** `string`, **Default:** `copy` | -| ` --infodir` | Directory to keep pipeline Nextflow logs and reports.
**Type:** `string`, **Default:** `${params.outdir}/pipeline_info` | -| ` --force` | Nextflow will overwrite existing output files.
**Type:** `boolean` | -| ` --cleanup_workdir` | After Bactopia is successfully executed, the `work` directory will be deleted.
**Type:** `boolean` | - -### Institutional config options -Parameters used to describe centralized config profiles. These should not be edited. - -| Parameter | Description | -|:---|---| -| ` --custom_config_version` | Git commit id for Institutional configs.
**Type:** `string`, **Default:** `master` | -| ` --custom_config_base` | Base directory for Institutional configs.
**Type:** `string`, **Default:** `https://raw.githubusercontent.com/nf-core/configs/master` | -| ` --config_profile_name` | Institutional config name.
**Type:** `string` | -| ` --config_profile_description` | Institutional config description.
**Type:** `string` | -| ` --config_profile_contact` | Institutional config contact information.
**Type:** `string` | -| ` --config_profile_url` | Institutional config URL link.
**Type:** `string` | - -### Nextflow Profile Parameters -Parameters to fine-tune your Nextflow setup. - -| Parameter | Description | -|:---|---| -| ` --condadir` | Directory to Nextflow should use for Conda environments
**Type:** `string` | -| ` --registry` | Docker registry to pull containers from.
**Type:** `string`, **Default:** `dockerhub` | -| ` --datasets_cache` | Directory where downloaded datasets should be stored.
**Type:** `string`, **Default:** `/data/datasets` | -| ` --singularity_cache_dir` | Directory where remote Singularity images are stored.
**Type:** `string` | -| ` --singularity_pull_docker_container` | Instead of directly downloading Singularity images for use with Singularity, force the workflow to pull and convert Docker containers instead.
**Type:** `boolean` | -| ` --force_rebuild` | Force overwrite of existing pre-built environments.
**Type:** `boolean` | -| ` --queue` | Comma-separated name of the queue(s) to be used by a job scheduler (e.g. AWS Batch or SLURM)
**Type:** `string`, **Default:** `general,high-memory` | -| ` --cluster_opts` | Additional options to pass to the executor. (e.g. SLURM: '--account=my_acct_name'
**Type:** `string` | -| ` --container_opts` | Additional options to pass to Apptainer, Docker, or Singularityu. (e.g. Singularity: '-D `pwd`'
**Type:** `string` | -| ` --disable_scratch` | All intermediate files created on worker nodes of will be transferred to the head node.
**Type:** `boolean` | - -### Helpful Parameters -Uncommonly used parameters that might be useful. - -| Parameter | Description | -|:---|---| -| ` --monochrome_logs` | Do not use coloured log outputs.
**Type:** `boolean` | -| ` --nfdir` | Print directory Nextflow has pulled Bactopia to
**Type:** `boolean` | -| ` --sleep_time` | The amount of time (seconds) Nextflow will wait after setting up datasets before execution.
**Type:** `integer`, **Default:** `5` | -| ` --validate_params` | Boolean whether to validate parameters against the schema at runtime
**Type:** `boolean`, **Default:** `True` | -| ` --help` | Display help text.
**Type:** `boolean` | -| ` --wf` | Specify which workflow or Bactopia Tool to execute
**Type:** `string`, **Default:** `bactopia` | -| ` --list_wfs` | List the available workflows and Bactopia Tools to use with '--wf'
**Type:** `boolean` | -| ` --show_hidden_params` | Show all params when using `--help`
**Type:** `boolean` | -| ` --help_all` | An alias for --help --show_hidden_params
**Type:** `boolean` | -| ` --version` | Display version text.
**Type:** `boolean` | - -## Citations -If you use Bactopia and `sistr` in your analysis, please cite the following. - -- [Bactopia](https://bactopia.github.io/) - Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) - - -- [csvtk](https://bioinf.shenwei.me/csvtk/) - Shen, W [csvtk: A cross-platform, efficient and practical CSV/TSV toolkit in Golang.](https://github.com/shenwei356/csvtk/) (GitHub) - -- [SISTR](https://github.com/phac-nml/sistr_cmd) - Yoshida CE, Kruczkiewicz P, Laing CR, Lingohr EJ, Gannon VPJ, Nash JHE, Taboada EN [The Salmonella In Silico Typing Resource (SISTR): An Open Web-Accessible Tool for Rapidly Typing and Subtyping Draft Salmonella Genome Assemblies.](https://doi.org/10.1371/journal.pone.0147101) _PloS One_, 11(1), e0147101. (2016) - diff --git a/docs/bactopia-tools/snippy.md b/docs/bactopia-tools/snippy.md deleted file mode 100644 index 87d5226a..00000000 --- a/docs/bactopia-tools/snippy.md +++ /dev/null @@ -1,436 +0,0 @@ ---- -title: snippy -description: A Bactopia Tool which uses Snippy to call SNPs and InDels against a reference and create a core-SNP phylogeny using IQ-Tree and Gubbins. ---- -# Bactopia Tool - `snippy` -The `snippy` subworkflow allows you to call SNPs and InDels against a reference with -[Snippy](https://github.com/tseemann/snippy). With the called SNPs/InDels, [snippy-core](https://github.com/tseemann/snippy#core-snp-phylogeny) -a core-SNP alignment is created. - -A phylogeny, based on the core-SNP alignment, will be created by [IQ-Tree](https://github.com/Cibiv/IQ-TREE). Optionally -a recombination-masked core-SNP alignment can be created with [Gubbins](https://github.com/nickjcroucher/gubbins). - -Finally, the pair-wise SNP distance for each sample is also calculated with -[snp-dists](https://github.com/tseemann/snp-dists). - - -## Example Usage -``` -bactopia --wf snippy \ - --bactopia /path/to/your/bactopia/results -``` - -## Output Overview - -Below is the default output structure for the `snippy` tool. Where possible the -file descriptions below were modified from a tools description. - -```bash - -├── -│ └── tools -│ └── snippy -│ └── -│ ├── logs -│ │ ├── nf-snippy.{begin,err,log,out,run,sh,trace} -│ │ ├── .log -│ │ └── versions.yml -│ ├── .aligned.fa.gz -│ ├── .annotated.vcf.gz -│ ├── .bam -│ ├── .bam.bai -│ ├── .bed.gz -│ ├── .consensus.fa.gz -│ ├── .consensus.subs.fa.gz -│ ├── .consensus.subs.masked.fa.gz -│ ├── .coverage.txt.gz -│ ├── .csv.gz -│ ├── .filt.vcf.gz -│ ├── .gff.gz -│ ├── .html -│ ├── .raw.vcf.gz -│ ├── .subs.vcf.gz -│ ├── .tab -│ ├── .txt -│ └── .vcf.gz -└── bactopia-runs - └── snippy- - ├── core-snp-clean.full.aln.gz - ├── core-snp.full.aln.gz - ├── .samples.txt - ├── gubbins - │ ├── core-snp.branch_base_reconstruction.embl.gz - │ ├── core-snp.filtered_polymorphic_sites.fasta.gz - │ ├── core-snp.filtered_polymorphic_sites.phylip - │ ├── core-snp.final_tree.tre - │ ├── core-snp.node_labelled.final_tree.tre - │ ├── core-snp.per_branch_statistics.csv - │ ├── core-snp.recombination_predictions.embl.gz - │ ├── core-snp.recombination_predictions.gff.gz - │ ├── core-snp.summary_of_snp_distribution.vcf.gz - │ └── logs - │ ├── core-snp.log - │ ├── nf-gubbins.{begin,err,log,out,run,sh,trace} - │ └── versions.yml - ├── iqtree - │ ├── core-snp.alninfo - │ ├── core-snp.bionj - │ ├── core-snp.ckp.gz - │ ├── core-snp.contree - │ ├── core-snp.iqtree - │ ├── core-snp.mldist - │ ├── core-snp.splits.nex - │ ├── core-snp.treefile - │ ├── core-snp.ufboot - │ └── logs - │ ├── core-snp.log - │ ├── nf-iqtree.{begin,err,log,out,run,sh,trace} - │ └── versions.yml - ├── nf-reports - │ ├── snippy-dag.dot - │ ├── snippy-report.html - │ ├── snippy-timeline.html - │ └── snippy-trace.txt - ├── snippy-core - │ ├── core-snp.aln.gz - │ ├── core-snp.tab.gz - │ ├── core-snp.txt - │ ├── core-snp.vcf.gz - │ └── logs - │ ├── nf-snippy-core.{begin,err,log,out,run,sh,trace} - │ └── versions.yml - └── snpdists - ├── core-snp.distance.tsv - └── logs - ├── nf-snpdists.{begin,err,log,out,run,sh,trace} - └── versions.yml - -``` - - - -### Results - -#### Main Results - -Below are the main results from the `snippy` Bactopia Tool. - - -| Filename | Description | -|-------------------------------|-------------| -| core-snp-clean.full.aln.gz | Same as `core-snp.full.aln.gz` with unusual characters replaced with `N` | -| core-snp.distance.tsv | Core genome Pair-wise SNP distance for each sample | -| core-snp.full.aln.gz | A whole genome SNP alignment (includes invariant sites) | -| core-genome.iqtree | Full result of the IQ-TREE core genome phylogeny | -| core-genome.masked.aln.gz | A core-SNP alignment with the recombination masked | -| <REFERENCE_NAME>.samples.txt | The reference and samples used in the run | - - -#### Gubbins - -Below is a description of the [Gubbins](https://github.com/nickjcroucher/gubbins) results. For more details about -Gubbins outputs see [Gubbins - Outputs](https://github.com/nickjcroucher/gubbins/blob/master/docs/gubbins_manual.md#output-files). - - -| Filename | Description | -|-------------------------------|-------------| -| core-snp.branch_base_reconstruction.embl.gz | Base substitution reconstruction in EMBL format | -| core-snp.filtered_polymorphic_sites.fasta.gz | FASTA format alignment of filtered polymorphic sites | -| core-snp.filtered_polymorphic_sites.phylip | Phylip format alignment of filtered polymorphic sites | -| core-snp.final_tree.tre | Final phylogeny in Newick format (_branch lengths are in point mutations_) | -| core-snp.node_labelled.final_tree.tre | Final phylogeny in Newick format but with internal node labels | -| core-snp.per_branch_statistics.csv | Per-branch reporting of the base substitutions inside and outside recombination events | -| core-snp.recombination_predictions.embl.gz | Recombination predictions in EMBL file format | -| core-snp.recombination_predictions.gff.gz | Recombination predictions in GFF file format | -| core-snp.summary_of_snp_distribution.vcf.gz | VCF file summarising the distribution of point mutations | | - - -#### IQ-TREE - -Below is a description of the [IQ-TREE](http://www.iqtree.org/) results. If ClonalFrameML is executed, a fast tree -is created and given the prefix `start-tree`, the final tree has the prefix `core-genome`. For more details about -IQ-TREE outputs see [IQ-TREE - Outputs](https://github.com/Cibiv/IQ-TREE/wiki/Web-Server-Tutorial#analysis-results). - - -| Filename | Description | -|-------------------------------|-------------| -| core-snp.alninfo | Alignment site statistics | -| core-snp.bionj | A neighbor joining tree produced by BIONJ | -| core-snp.ckp.gz | IQ-TREE writes a checkpoint file | -| core-snp.contree | Consensus tree with assigned branch supports where branch lengths are optimized on the original alignment; printed if Ultrafast Bootstrap is selected | -| core-snp.mldist | Contains the likelihood distances | -| core-snp.splits.nex | Support values in percentage for all splits (bipartitions), computed as the occurrence frequencies in the bootstrap trees | -| core-snp.treefile | Maximum likelihood tree in NEWICK format, can be visualized with treeviewer programs | -| core-snp.ufboot | Trees created during the bootstrap steps | - - -#### Snippy - -Below is a description of the per-sample [Snippy](https://github.com/tseemann/snippy) results. For more details about -Snippy outputs see [Snippy - Outputs](https://github.com/tseemann/snippy#output-files). - - -| Filename | Description | -|-------------------------------|-------------| -| <SAMPLE_NAME>.aligned.fa.gz | A version of the reference but with `-` at position with `depth=0` and `N` for `0 < depth < --mincov` (**does not have variants**) | -| <SAMPLE_NAME>.annotated.vcf.gz | The final variant calls with additional annotations from Reference genome's GenBank file | -| <SAMPLE_NAME>.bam | The alignments in [BAM](http://en.wikipedia.org/wiki/SAMtools) format. Includes unmapped, multimapped reads. Excludes duplicates | -| <SAMPLE_NAME>.bam.bai | Index for the .bam file | -| <SAMPLE_NAME>.bed.gz | The variants in [BED](http://genome.ucsc.edu/FAQ/FAQformat.html#format1) format | -| <SAMPLE_NAME>.consensus.fa.gz | A version of the reference genome with *all* variants instantiated | -| <SAMPLE_NAME>.consensus.subs.fa.gz | A version of the reference genome with *only substitution* variants instantiated | -| <SAMPLE_NAME>.consensus.subs.masked.fa.gz | A version of the reference genome with *only substitution* variants instantiated and low-coverage regions masked | -| <SAMPLE_NAME>.coverage.txt.gz | The per-base coverage of each position in the reference genome | -| <SAMPLE_NAME>.csv.gz | A [comma-separated](http://en.wikipedia.org/wiki/Comma-separated_values) version of the .tab file | -| <SAMPLE_NAME>.filt.vcf.gz | The filtered variant calls from Freebayes | -| <SAMPLE_NAME>.gff.gz | The variants in [GFF3](http://www.sequenceontology.org/gff3.shtml) format | -| <SAMPLE_NAME>.html | A [HTML](http://en.wikipedia.org/wiki/HTML) version of the .tab file | -| <SAMPLE_NAME>.raw.vcf.gz | The unfiltered variant calls from Freebayes | -| <SAMPLE_NAME>.subs.vcf.gz | _Only substitution_ variants from the final annotated variants | -| <SAMPLE_NAME>.tab | A simple [tab-separated](http://en.wikipedia.org/wiki/Tab-separated_values) summary of all the variants | -| <SAMPLE_NAME>.txt | A summary of the Snippy run | -| <SAMPLE_NAME>.vcf.gz | The final annotated variants in [VCF](http://en.wikipedia.org/wiki/Variant_Call_Format) format | - - -#### Snippy-Core - -Below is a description of the [Snippy-Core](https://github.com/tseemann/snippy) results. For more details about -Snippy-Core outputs see [Snippy-Core - Outputs](https://github.com/tseemann/snippy#output-files-1). - - -| Filename | Description | -|-------------------------------|-------------| -| core-snp.aln.gz | A core SNP alignment in FASTA format | -| core-snp.tab.gz | Tab-separated columnar list of core SNP sites with alleles but **NO** annotations | -| core-snp.txt | Tab-separated columnar list of alignment/core-size statistics | -| core-snp.vcf.gz | Multi-sample VCF file with genotype GT tags for all discovered alleles | | - - - - - -### Audit Trail - -Below are files that can assist you in understanding which parameters and program versions were used. - -#### Logs - -Each process that is executed will have a folder named `logs`. In this folder are helpful -files for you to review if the need ever arises. - -| Extension | Description | -|--------------|-------------| -| .begin | An empty file used to designate the process started | -| .err | Contains STDERR outputs from the process | -| .log | Contains both STDERR and STDOUT outputs from the process | -| .out | Contains STDOUT outputs from the process | -| .run | The script Nextflow uses to stage/unstage files and queue processes based on given profile | -| .sh | The script executed by bash for the process | -| .trace | The Nextflow [Trace](https://www.nextflow.io/docs/latest/tracing.html#trace-report) report for the process | -| versions.yml | A YAML formatted file with program versions | - -#### Nextflow Reports - -These Nextflow reports provide great a great summary of your run. These can be used to optimize -resource usage and estimate expected costs if using cloud platforms. - -| Filename | Description | -|----------|-------------| -| snippy-dag.dot | The Nextflow [DAG visualisation](https://www.nextflow.io/docs/latest/tracing.html#dag-visualisation) | -| snippy-report.html | The Nextflow [Execution Report](https://www.nextflow.io/docs/latest/tracing.html#execution-report) | -| snippy-timeline.html | The Nextflow [Timeline Report](https://www.nextflow.io/docs/latest/tracing.html#timeline-report) | -| snippy-trace.txt | The Nextflow [Trace](https://www.nextflow.io/docs/latest/tracing.html#trace-report) report | - - -#### Program Versions - -At the end of each run, each of the `versions.yml` files are merged into the files below. - -| Filename | Description | -|---------------------------|-------------| -| software_versions.yml | A complete list of programs and versions used by each process | -| software_versions_mqc.yml | A complete list of programs and versions formatted for [MultiQC](https://multiqc.info/) | - -## Parameters - - -### Required Parameters -Define where the pipeline should find input data and save output data. - -| Parameter | Description | -|:---|---| -| ` --bactopia` | The path to bactopia results to use as inputs
**Type:** `string` | - -### Filtering Parameters -Use these parameters to specify which samples to include or exclude. - -| Parameter | Description | -|:---|---| -| ` --include` | A text file containing sample names (one per line) to include from the analysis
**Type:** `string` | -| ` --exclude` | A text file containing sample names (one per line) to exclude from the analysis
**Type:** `string` | - - -### Snippy Parameters - - -| Parameter | Description | -|:---|---| -| ` --reference` | Reference genome in GenBank format
**Type:** `string` | -| ` --mapqual` | Minimum read mapping quality to consider
**Type:** `integer`, **Default:** `60` | -| ` --basequal` | Minimum base quality to consider
**Type:** `integer`, **Default:** `13` | -| ` --mincov` | Minimum site depth to for calling alleles
**Type:** `integer`, **Default:** `10` | -| ` --minfrac` | Minimum proportion for variant evidence (0=AUTO)
**Type:** `integer` | -| ` --minqual` | Minimum QUALITY in VCF column 6
**Type:** `integer`, **Default:** `100` | -| ` --maxsoft` | Maximum soft clipping to allow
**Type:** `integer`, **Default:** `10` | -| ` --bwaopt` | Extra BWA MEM options, eg. -x pacbio
**Type:** `string` | -| ` --fbopt` | Extra Freebayes options, eg. --theta 1E-6 --read-snp-limit 2
**Type:** `string` | -| ` --snippy_opts` | Extra options in quotes for Snippy
**Type:** `string` | - -### Snippy-Core Parameters - - -| Parameter | Description | -|:---|---| -| ` --maxhap` | Largest haplotype to decompose
**Type:** `integer`, **Default:** `100` | -| ` --mask` | BED file of sites to mask
**Type:** `string` | -| ` --mask_char` | Masking character
**Type:** `string`, **Default:** `X` | -| ` --snippy_core_opts` | Extra options in quotes for snippy-core
**Type:** `string` | - -### Gubbins Parameters - - -| Parameter | Description | -|:---|---| -| ` --iterations` | Maximum number of iterations
**Type:** `integer`, **Default:** `5` | -| ` --min_snps` | Min SNPs to identify a recombination block
**Type:** `integer`, **Default:** `3` | -| ` --min_window_size` | Minimum window size
**Type:** `integer`, **Default:** `100` | -| ` --max_window_size` | Maximum window size
**Type:** `integer`, **Default:** `10000` | -| ` --filter_percentage` | Filter out taxa with more than this percentage of gaps
**Type:** `number`, **Default:** `25.0` | -| ` --remove_identical_sequences` | Remove identical sequences
**Type:** `boolean` | -| ` --gubbin_opts` | Extra Gubbins options in quotes
**Type:** `string` | -| ` --skip_recombination` | Skip Gubbins execution in subworkflows
**Type:** `boolean` | - -### IQ-TREE Parameters - - -| Parameter | Description | -|:---|---| -| ` --iqtree_model` | Substitution model name
**Type:** `string`, **Default:** `HKY` | -| ` --bb` | Ultrafast bootstrap replicates
**Type:** `integer`, **Default:** `1000` | -| ` --alrt` | SH-like approximate likelihood ratio test replicates
**Type:** `integer`, **Default:** `1000` | -| ` --asr` | Ancestral state reconstruction by empirical Bayes
**Type:** `boolean` | -| ` --iqtree_opts` | Extra IQ-TREE options in quotes.
**Type:** `string` | -| ` --skip_phylogeny` | Skip IQ-TREE execution in subworkflows
**Type:** `boolean` | - -### SNP-Dists Parameters - - -| Parameter | Description | -|:---|---| -| ` --a` | Count all differences not just [AGTC]
**Type:** `boolean` | -| ` --b` | Keep top left corner cell
**Type:** `boolean` | -| ` --csv` | Output CSV instead of TSV
**Type:** `boolean` | -| ` --k` | Keep case, don't uppercase all letters
**Type:** `boolean` | - - -### Optional Parameters -These optional parameters can be useful in certain settings. - -| Parameter | Description | -|:---|---| -| ` --outdir` | Base directory to write results to
**Type:** `string`, **Default:** `bactopia` | -| ` --skip_compression` | Ouput files will not be compressed
**Type:** `boolean` | -| ` --datasets` | The path to cache datasets to
**Type:** `string` | -| ` --keep_all_files` | Keeps all analysis files created
**Type:** `boolean` | - -### Max Job Request Parameters -Set the top limit for requested resources for any single job. - -| Parameter | Description | -|:---|---| -| ` --max_retry` | Maximum times to retry a process before allowing it to fail.
**Type:** `integer`, **Default:** `3` | -| ` --max_cpus` | Maximum number of CPUs that can be requested for any single job.
**Type:** `integer`, **Default:** `4` | -| ` --max_memory` | Maximum amount of memory that can be requested for any single job.
**Type:** `string`, **Default:** `128.GB` | -| ` --max_time` | Maximum amount of time that can be requested for any single job.
**Type:** `string`, **Default:** `240.h` | -| ` --max_downloads` | Maximum number of samples to download at a time
**Type:** `integer`, **Default:** `3` | - -### Nextflow Configuration Parameters -Parameters to fine-tune your Nextflow setup. - -| Parameter | Description | -|:---|---| -| ` --nfconfig` | A Nextflow compatible config file for custom profiles, loaded last and will overwrite existing variables if set.
**Type:** `string` | -| ` --publish_dir_mode` | Method used to save pipeline results to output directory.
**Type:** `string`, **Default:** `copy` | -| ` --infodir` | Directory to keep pipeline Nextflow logs and reports.
**Type:** `string`, **Default:** `${params.outdir}/pipeline_info` | -| ` --force` | Nextflow will overwrite existing output files.
**Type:** `boolean` | -| ` --cleanup_workdir` | After Bactopia is successfully executed, the `work` directory will be deleted.
**Type:** `boolean` | - -### Institutional config options -Parameters used to describe centralized config profiles. These should not be edited. - -| Parameter | Description | -|:---|---| -| ` --custom_config_version` | Git commit id for Institutional configs.
**Type:** `string`, **Default:** `master` | -| ` --custom_config_base` | Base directory for Institutional configs.
**Type:** `string`, **Default:** `https://raw.githubusercontent.com/nf-core/configs/master` | -| ` --config_profile_name` | Institutional config name.
**Type:** `string` | -| ` --config_profile_description` | Institutional config description.
**Type:** `string` | -| ` --config_profile_contact` | Institutional config contact information.
**Type:** `string` | -| ` --config_profile_url` | Institutional config URL link.
**Type:** `string` | - -### Nextflow Profile Parameters -Parameters to fine-tune your Nextflow setup. - -| Parameter | Description | -|:---|---| -| ` --condadir` | Directory to Nextflow should use for Conda environments
**Type:** `string` | -| ` --registry` | Docker registry to pull containers from.
**Type:** `string`, **Default:** `dockerhub` | -| ` --datasets_cache` | Directory where downloaded datasets should be stored.
**Type:** `string`, **Default:** `/data/datasets` | -| ` --singularity_cache_dir` | Directory where remote Singularity images are stored.
**Type:** `string` | -| ` --singularity_pull_docker_container` | Instead of directly downloading Singularity images for use with Singularity, force the workflow to pull and convert Docker containers instead.
**Type:** `boolean` | -| ` --force_rebuild` | Force overwrite of existing pre-built environments.
**Type:** `boolean` | -| ` --queue` | Comma-separated name of the queue(s) to be used by a job scheduler (e.g. AWS Batch or SLURM)
**Type:** `string`, **Default:** `general,high-memory` | -| ` --cluster_opts` | Additional options to pass to the executor. (e.g. SLURM: '--account=my_acct_name'
**Type:** `string` | -| ` --container_opts` | Additional options to pass to Apptainer, Docker, or Singularityu. (e.g. Singularity: '-D `pwd`'
**Type:** `string` | -| ` --disable_scratch` | All intermediate files created on worker nodes of will be transferred to the head node.
**Type:** `boolean` | - -### Helpful Parameters -Uncommonly used parameters that might be useful. - -| Parameter | Description | -|:---|---| -| ` --monochrome_logs` | Do not use coloured log outputs.
**Type:** `boolean` | -| ` --nfdir` | Print directory Nextflow has pulled Bactopia to
**Type:** `boolean` | -| ` --sleep_time` | The amount of time (seconds) Nextflow will wait after setting up datasets before execution.
**Type:** `integer`, **Default:** `5` | -| ` --validate_params` | Boolean whether to validate parameters against the schema at runtime
**Type:** `boolean`, **Default:** `True` | -| ` --help` | Display help text.
**Type:** `boolean` | -| ` --wf` | Specify which workflow or Bactopia Tool to execute
**Type:** `string`, **Default:** `bactopia` | -| ` --list_wfs` | List the available workflows and Bactopia Tools to use with '--wf'
**Type:** `boolean` | -| ` --show_hidden_params` | Show all params when using `--help`
**Type:** `boolean` | -| ` --help_all` | An alias for --help --show_hidden_params
**Type:** `boolean` | -| ` --version` | Display version text.
**Type:** `boolean` | - -## Citations -If you use Bactopia and `snippy` in your analysis, please cite the following. - -- [Bactopia](https://bactopia.github.io/) - Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) - - -- [Gubbins](https://github.com/nickjcroucher/gubbins) - Croucher NJ, Page AJ, Connor TR, Delaney AJ, Keane JA, Bentley SD, Parkhill J, Harris SR [Rapid phylogenetic analysis of large samples of recombinant bacterial whole genome sequences using Gubbins.](https://doi.org/10.1093/nar/gku1196) _Nucleic Acids Research_ 43(3), e15. (2015) - -- [IQ-TREE](https://github.com/Cibiv/IQ-TREE) - Nguyen L-T, Schmidt HA, von Haeseler A, Minh BQ [IQ-TREE: A fast and effective stochastic algorithm for estimating maximum likelihood phylogenies.](https://doi.org/10.1093/molbev/msu300) _Mol. Biol. Evol._ 32:268-274 (2015) - -- [ModelFinder](https://github.com/Cibiv/IQ-TREE) - Kalyaanamoorthy S, Minh BQ, Wong TKF, von Haeseler A, Jermiin LS [ModelFinder - Fast model selection for accurate phylogenetic estimates.](https://doi.org/10.1038/nmeth.4285) _Nat. Methods_ 14:587-589 (2017) - -- [UFBoot2](https://github.com/Cibiv/IQ-TREE) - Hoang DT, Chernomor O, von Haeseler A, Minh BQ, Vinh LS [UFBoot2: Improving the ultrafast bootstrap approximation.](https://doi.org/10.1093/molbev/msx281) _Mol. Biol. Evol._ 35:518–522 (2018) - -- [Snippy](https://github.com/tseemann/snippy) - Seemann T [Snippy: fast bacterial variant calling from NGS reads](https://github.com/tseemann/snippy) (GitHub) - -- [snp-dists](https://github.com/tseemann/snp-dists) - Seemann T [snp-dists - Pairwise SNP distance matrix from a FASTA sequence alignment.](https://github.com/tseemann/snp-dists) (GitHub) - diff --git a/docs/bactopia-tools/spatyper.md b/docs/bactopia-tools/spatyper.md deleted file mode 100644 index 8134bb7c..00000000 --- a/docs/bactopia-tools/spatyper.md +++ /dev/null @@ -1,241 +0,0 @@ ---- -title: spatyper -description: A Bactopia Tool which uses spaTyper to assign _spa_ types to _Staphylococcus aureus_ assemblies. ---- -# Bactopia Tool - `spatyper` -The `spatyper` module uses [spaTyper](https://github.com/HCGB-IGTP/spaTyper) to assign _spa_ types to _Staphylococcus aureus_ assemblies. - -## Example Usage -``` -bactopia --wf spatyper \ - --bactopia /path/to/your/bactopia/results -``` - -## Output Overview - -Below is the default output structure for the `spatyper` tool. Where possible the -file descriptions below were modified from a tools description. - -```bash - -├── -│ └── tools -│ └── spatyper -│ ├── .tsv -│ └── logs -│ ├── nf-spatyper.{begin,err,log,out,run,sh,trace} -│ └── versions.yml -└── bactopia-runs - └── spatyper- - ├── merged-results - │ ├── logs - │ │ └── spatyper-concat - │ │ ├── nf-merged-results.{begin,err,log,out,run,sh,trace} - │ │ └── versions.yml - │ └── spatyper.tsv - └── nf-reports - ├── spatyper-dag.dot - ├── spatyper-report.html - ├── spatyper-timeline.html - └── spatyper-trace.txt - -``` - -:::info[Directory structure might be different] - -`spatyper` is available as a standalone Bactopia Tool, as well as from -the main Bactopia workflow (e.g. through Staphopia or Merlin). If executed -from Bactopia, the `spatyper` directory structure might be different, but the -output descriptions below still apply. -::: - - - -### Results - -#### Merged Results - -Below are results that are concatenated into a single file. - - -| Filename | Description | -|-------------------------------|-------------| -| spatyper.tsv | A merged TSV file with `spaTyper` results from all samples | - - -#### spaTyper - -Below is a description of the _per-sample_ results from [spaTyper](https://github.com/HCGB-IGTP/spaTyper). - - -| Filename | Description | -|-------------------------------|-------------| -| <SAMPLE_NAME>.tsv | A tab-delimited file with `spaTyper` result | - - - - - -### Audit Trail - -Below are files that can assist you in understanding which parameters and program versions were used. - -#### Logs - -Each process that is executed will have a folder named `logs`. In this folder are helpful -files for you to review if the need ever arises. - -| Extension | Description | -|--------------|-------------| -| .begin | An empty file used to designate the process started | -| .err | Contains STDERR outputs from the process | -| .log | Contains both STDERR and STDOUT outputs from the process | -| .out | Contains STDOUT outputs from the process | -| .run | The script Nextflow uses to stage/unstage files and queue processes based on given profile | -| .sh | The script executed by bash for the process | -| .trace | The Nextflow [Trace](https://www.nextflow.io/docs/latest/tracing.html#trace-report) report for the process | -| versions.yml | A YAML formatted file with program versions | - -#### Nextflow Reports - -These Nextflow reports provide great a great summary of your run. These can be used to optimize -resource usage and estimate expected costs if using cloud platforms. - -| Filename | Description | -|----------|-------------| -| spatyper-dag.dot | The Nextflow [DAG visualisation](https://www.nextflow.io/docs/latest/tracing.html#dag-visualisation) | -| spatyper-report.html | The Nextflow [Execution Report](https://www.nextflow.io/docs/latest/tracing.html#execution-report) | -| spatyper-timeline.html | The Nextflow [Timeline Report](https://www.nextflow.io/docs/latest/tracing.html#timeline-report) | -| spatyper-trace.txt | The Nextflow [Trace](https://www.nextflow.io/docs/latest/tracing.html#trace-report) report | - - -#### Program Versions - -At the end of each run, each of the `versions.yml` files are merged into the files below. - -| Filename | Description | -|---------------------------|-------------| -| software_versions.yml | A complete list of programs and versions used by each process | -| software_versions_mqc.yml | A complete list of programs and versions formatted for [MultiQC](https://multiqc.info/) | - -## Parameters - - -### Required Parameters -Define where the pipeline should find input data and save output data. - -| Parameter | Description | -|:---|---| -| ` --bactopia` | The path to bactopia results to use as inputs
**Type:** `string` | - -### Filtering Parameters -Use these parameters to specify which samples to include or exclude. - -| Parameter | Description | -|:---|---| -| ` --include` | A text file containing sample names (one per line) to include from the analysis
**Type:** `string` | -| ` --exclude` | A text file containing sample names (one per line) to exclude from the analysis
**Type:** `string` | - - -### spaTyper Parameters - - -| Parameter | Description | -|:---|---| -| ` --repeats` | List of spa repeats
**Type:** `string` | -| ` --repeat_order` | List spa types and order of repeats
**Type:** `string` | -| ` --do_enrich` | Do PCR product enrichment
**Type:** `boolean` | - - -### Optional Parameters -These optional parameters can be useful in certain settings. - -| Parameter | Description | -|:---|---| -| ` --outdir` | Base directory to write results to
**Type:** `string`, **Default:** `bactopia` | -| ` --skip_compression` | Ouput files will not be compressed
**Type:** `boolean` | -| ` --datasets` | The path to cache datasets to
**Type:** `string` | -| ` --keep_all_files` | Keeps all analysis files created
**Type:** `boolean` | - -### Max Job Request Parameters -Set the top limit for requested resources for any single job. - -| Parameter | Description | -|:---|---| -| ` --max_retry` | Maximum times to retry a process before allowing it to fail.
**Type:** `integer`, **Default:** `3` | -| ` --max_cpus` | Maximum number of CPUs that can be requested for any single job.
**Type:** `integer`, **Default:** `4` | -| ` --max_memory` | Maximum amount of memory that can be requested for any single job.
**Type:** `string`, **Default:** `128.GB` | -| ` --max_time` | Maximum amount of time that can be requested for any single job.
**Type:** `string`, **Default:** `240.h` | -| ` --max_downloads` | Maximum number of samples to download at a time
**Type:** `integer`, **Default:** `3` | - -### Nextflow Configuration Parameters -Parameters to fine-tune your Nextflow setup. - -| Parameter | Description | -|:---|---| -| ` --nfconfig` | A Nextflow compatible config file for custom profiles, loaded last and will overwrite existing variables if set.
**Type:** `string` | -| ` --publish_dir_mode` | Method used to save pipeline results to output directory.
**Type:** `string`, **Default:** `copy` | -| ` --infodir` | Directory to keep pipeline Nextflow logs and reports.
**Type:** `string`, **Default:** `${params.outdir}/pipeline_info` | -| ` --force` | Nextflow will overwrite existing output files.
**Type:** `boolean` | -| ` --cleanup_workdir` | After Bactopia is successfully executed, the `work` directory will be deleted.
**Type:** `boolean` | - -### Institutional config options -Parameters used to describe centralized config profiles. These should not be edited. - -| Parameter | Description | -|:---|---| -| ` --custom_config_version` | Git commit id for Institutional configs.
**Type:** `string`, **Default:** `master` | -| ` --custom_config_base` | Base directory for Institutional configs.
**Type:** `string`, **Default:** `https://raw.githubusercontent.com/nf-core/configs/master` | -| ` --config_profile_name` | Institutional config name.
**Type:** `string` | -| ` --config_profile_description` | Institutional config description.
**Type:** `string` | -| ` --config_profile_contact` | Institutional config contact information.
**Type:** `string` | -| ` --config_profile_url` | Institutional config URL link.
**Type:** `string` | - -### Nextflow Profile Parameters -Parameters to fine-tune your Nextflow setup. - -| Parameter | Description | -|:---|---| -| ` --condadir` | Directory to Nextflow should use for Conda environments
**Type:** `string` | -| ` --registry` | Docker registry to pull containers from.
**Type:** `string`, **Default:** `dockerhub` | -| ` --datasets_cache` | Directory where downloaded datasets should be stored.
**Type:** `string`, **Default:** `/data/datasets` | -| ` --singularity_cache_dir` | Directory where remote Singularity images are stored.
**Type:** `string` | -| ` --singularity_pull_docker_container` | Instead of directly downloading Singularity images for use with Singularity, force the workflow to pull and convert Docker containers instead.
**Type:** `boolean` | -| ` --force_rebuild` | Force overwrite of existing pre-built environments.
**Type:** `boolean` | -| ` --queue` | Comma-separated name of the queue(s) to be used by a job scheduler (e.g. AWS Batch or SLURM)
**Type:** `string`, **Default:** `general,high-memory` | -| ` --cluster_opts` | Additional options to pass to the executor. (e.g. SLURM: '--account=my_acct_name'
**Type:** `string` | -| ` --container_opts` | Additional options to pass to Apptainer, Docker, or Singularityu. (e.g. Singularity: '-D `pwd`'
**Type:** `string` | -| ` --disable_scratch` | All intermediate files created on worker nodes of will be transferred to the head node.
**Type:** `boolean` | - -### Helpful Parameters -Uncommonly used parameters that might be useful. - -| Parameter | Description | -|:---|---| -| ` --monochrome_logs` | Do not use coloured log outputs.
**Type:** `boolean` | -| ` --nfdir` | Print directory Nextflow has pulled Bactopia to
**Type:** `boolean` | -| ` --sleep_time` | The amount of time (seconds) Nextflow will wait after setting up datasets before execution.
**Type:** `integer`, **Default:** `5` | -| ` --validate_params` | Boolean whether to validate parameters against the schema at runtime
**Type:** `boolean`, **Default:** `True` | -| ` --help` | Display help text.
**Type:** `boolean` | -| ` --wf` | Specify which workflow or Bactopia Tool to execute
**Type:** `string`, **Default:** `bactopia` | -| ` --list_wfs` | List the available workflows and Bactopia Tools to use with '--wf'
**Type:** `boolean` | -| ` --show_hidden_params` | Show all params when using `--help`
**Type:** `boolean` | -| ` --help_all` | An alias for --help --show_hidden_params
**Type:** `boolean` | -| ` --version` | Display version text.
**Type:** `boolean` | - -## Citations -If you use Bactopia and `spatyper` in your analysis, please cite the following. - -- [Bactopia](https://bactopia.github.io/) - Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) - - -- [csvtk](https://bioinf.shenwei.me/csvtk/) - Shen, W [csvtk: A cross-platform, efficient and practical CSV/TSV toolkit in Golang.](https://github.com/shenwei356/csvtk/) (GitHub) - -- [spaTyper](https://github.com/HCGB-IGTP/spaTyper) - Sanchez-Herrero JF, and Sullivan M [spaTyper: Staphylococcal protein A (spa) characterization pipeline](http://doi.org/10.5281/zenodo.4063625). Zenodo. (2020) - -- [spaTyper Database](https://cge.cbs.dtu.dk/services/spatyper/) - Harmsen D, Claus H, Witte W, Rothgänger J, Claus H, Turnwald D, and Vogel U [Typing of methicillin-resistant _Staphylococcus aureus_ in a university hospital setting using a novel software for spa-repeat determination and database management.](https://doi.org/10.1128/jcm.41.12.5442-5448.2003) _J. Clin. Microbiol._ 41:5442-5448 (2003) - diff --git a/docs/bactopia-tools/ssuissero.md b/docs/bactopia-tools/ssuissero.md deleted file mode 100644 index 9cb425bb..00000000 --- a/docs/bactopia-tools/ssuissero.md +++ /dev/null @@ -1,232 +0,0 @@ ---- -title: ssuissero -description: A Bactopia Tool which uses SsuisSero to predict the serotype of _Streptococcus suis_ assemblies. ---- -# Bactopia Tool - `ssuissero` -The `ssuissero` module uses [SsuisSero](https://github.com/jimmyliu1326/SsuisSero) to predict -the serotype of _Streptococcus suis_ assemblies. - - -## Example Usage -``` -bactopia --wf ssuissero \ - --bactopia /path/to/your/bactopia/results -``` - -## Output Overview - -Below is the default output structure for the `ssuissero` tool. Where possible the -file descriptions below were modified from a tools description. - -```bash - -├── -│ └── tools -│ └── ssuissero -│ ├── _serotyping_res.tsv -│ └── logs -│ ├── nf-ssuissero.{begin,err,log,out,run,sh,trace} -│ └── versions.yml -└── bactopia-runs - └── ssuissero- - ├── merged-results - │ ├── logs - │ │ └── ssuissero-concat - │ │ ├── nf-merged-results.{begin,err,log,out,run,sh,trace} - │ │ └── versions.yml - │ └── ssuissero.tsv - └── nf-reports - ├── ssuissero-dag.dot - ├── ssuissero-report.html - ├── ssuissero-timeline.html - └── ssuissero-trace.txt - -``` - -:::info[Directory structure might be different] - -`ssuissero` is available as a standalone Bactopia Tool, as well as from -the main Bactopia workflow (e.g. through Staphopia or Merlin). If executed -from Bactopia, the `ssuissero` directory structure might be different, but the -output descriptions below still apply. -::: - - - -### Results - -#### Merged Results - -Below are results that are concatenated into a single file. - - -| Filename | Description | -|-------------------------------|-------------| -| ssuissero.tsv | A merged TSV file with `SsuisSero` results from all samples | - - -#### SsuisSero - -Below is a description of the _per-sample_ results from [SsuisSero](https://github.com/jimmyliu1326/SsuisSero). - - -| Filename | Description | -|-------------------------------|-------------| -| <SAMPLE_NAME>_serotyping_res.tsv | A tab-delimited file with `SsuisSero` results | - - - - - -### Audit Trail - -Below are files that can assist you in understanding which parameters and program versions were used. - -#### Logs - -Each process that is executed will have a folder named `logs`. In this folder are helpful -files for you to review if the need ever arises. - -| Extension | Description | -|--------------|-------------| -| .begin | An empty file used to designate the process started | -| .err | Contains STDERR outputs from the process | -| .log | Contains both STDERR and STDOUT outputs from the process | -| .out | Contains STDOUT outputs from the process | -| .run | The script Nextflow uses to stage/unstage files and queue processes based on given profile | -| .sh | The script executed by bash for the process | -| .trace | The Nextflow [Trace](https://www.nextflow.io/docs/latest/tracing.html#trace-report) report for the process | -| versions.yml | A YAML formatted file with program versions | - -#### Nextflow Reports - -These Nextflow reports provide great a great summary of your run. These can be used to optimize -resource usage and estimate expected costs if using cloud platforms. - -| Filename | Description | -|----------|-------------| -| ssuissero-dag.dot | The Nextflow [DAG visualisation](https://www.nextflow.io/docs/latest/tracing.html#dag-visualisation) | -| ssuissero-report.html | The Nextflow [Execution Report](https://www.nextflow.io/docs/latest/tracing.html#execution-report) | -| ssuissero-timeline.html | The Nextflow [Timeline Report](https://www.nextflow.io/docs/latest/tracing.html#timeline-report) | -| ssuissero-trace.txt | The Nextflow [Trace](https://www.nextflow.io/docs/latest/tracing.html#trace-report) report | - - -#### Program Versions - -At the end of each run, each of the `versions.yml` files are merged into the files below. - -| Filename | Description | -|---------------------------|-------------| -| software_versions.yml | A complete list of programs and versions used by each process | -| software_versions_mqc.yml | A complete list of programs and versions formatted for [MultiQC](https://multiqc.info/) | - -## Parameters - - -### Required Parameters -Define where the pipeline should find input data and save output data. - -| Parameter | Description | -|:---|---| -| ` --bactopia` | The path to bactopia results to use as inputs
**Type:** `string` | - -### Filtering Parameters -Use these parameters to specify which samples to include or exclude. - -| Parameter | Description | -|:---|---| -| ` --include` | A text file containing sample names (one per line) to include from the analysis
**Type:** `string` | -| ` --exclude` | A text file containing sample names (one per line) to exclude from the analysis
**Type:** `string` | - - - - -### Optional Parameters -These optional parameters can be useful in certain settings. - -| Parameter | Description | -|:---|---| -| ` --outdir` | Base directory to write results to
**Type:** `string`, **Default:** `bactopia` | -| ` --skip_compression` | Ouput files will not be compressed
**Type:** `boolean` | -| ` --datasets` | The path to cache datasets to
**Type:** `string` | -| ` --keep_all_files` | Keeps all analysis files created
**Type:** `boolean` | - -### Max Job Request Parameters -Set the top limit for requested resources for any single job. - -| Parameter | Description | -|:---|---| -| ` --max_retry` | Maximum times to retry a process before allowing it to fail.
**Type:** `integer`, **Default:** `3` | -| ` --max_cpus` | Maximum number of CPUs that can be requested for any single job.
**Type:** `integer`, **Default:** `4` | -| ` --max_memory` | Maximum amount of memory that can be requested for any single job.
**Type:** `string`, **Default:** `128.GB` | -| ` --max_time` | Maximum amount of time that can be requested for any single job.
**Type:** `string`, **Default:** `240.h` | -| ` --max_downloads` | Maximum number of samples to download at a time
**Type:** `integer`, **Default:** `3` | - -### Nextflow Configuration Parameters -Parameters to fine-tune your Nextflow setup. - -| Parameter | Description | -|:---|---| -| ` --nfconfig` | A Nextflow compatible config file for custom profiles, loaded last and will overwrite existing variables if set.
**Type:** `string` | -| ` --publish_dir_mode` | Method used to save pipeline results to output directory.
**Type:** `string`, **Default:** `copy` | -| ` --infodir` | Directory to keep pipeline Nextflow logs and reports.
**Type:** `string`, **Default:** `${params.outdir}/pipeline_info` | -| ` --force` | Nextflow will overwrite existing output files.
**Type:** `boolean` | -| ` --cleanup_workdir` | After Bactopia is successfully executed, the `work` directory will be deleted.
**Type:** `boolean` | - -### Institutional config options -Parameters used to describe centralized config profiles. These should not be edited. - -| Parameter | Description | -|:---|---| -| ` --custom_config_version` | Git commit id for Institutional configs.
**Type:** `string`, **Default:** `master` | -| ` --custom_config_base` | Base directory for Institutional configs.
**Type:** `string`, **Default:** `https://raw.githubusercontent.com/nf-core/configs/master` | -| ` --config_profile_name` | Institutional config name.
**Type:** `string` | -| ` --config_profile_description` | Institutional config description.
**Type:** `string` | -| ` --config_profile_contact` | Institutional config contact information.
**Type:** `string` | -| ` --config_profile_url` | Institutional config URL link.
**Type:** `string` | - -### Nextflow Profile Parameters -Parameters to fine-tune your Nextflow setup. - -| Parameter | Description | -|:---|---| -| ` --condadir` | Directory to Nextflow should use for Conda environments
**Type:** `string` | -| ` --registry` | Docker registry to pull containers from.
**Type:** `string`, **Default:** `dockerhub` | -| ` --datasets_cache` | Directory where downloaded datasets should be stored.
**Type:** `string`, **Default:** `/data/datasets` | -| ` --singularity_cache_dir` | Directory where remote Singularity images are stored.
**Type:** `string` | -| ` --singularity_pull_docker_container` | Instead of directly downloading Singularity images for use with Singularity, force the workflow to pull and convert Docker containers instead.
**Type:** `boolean` | -| ` --force_rebuild` | Force overwrite of existing pre-built environments.
**Type:** `boolean` | -| ` --queue` | Comma-separated name of the queue(s) to be used by a job scheduler (e.g. AWS Batch or SLURM)
**Type:** `string`, **Default:** `general,high-memory` | -| ` --cluster_opts` | Additional options to pass to the executor. (e.g. SLURM: '--account=my_acct_name'
**Type:** `string` | -| ` --container_opts` | Additional options to pass to Apptainer, Docker, or Singularityu. (e.g. Singularity: '-D `pwd`'
**Type:** `string` | -| ` --disable_scratch` | All intermediate files created on worker nodes of will be transferred to the head node.
**Type:** `boolean` | - -### Helpful Parameters -Uncommonly used parameters that might be useful. - -| Parameter | Description | -|:---|---| -| ` --monochrome_logs` | Do not use coloured log outputs.
**Type:** `boolean` | -| ` --nfdir` | Print directory Nextflow has pulled Bactopia to
**Type:** `boolean` | -| ` --sleep_time` | The amount of time (seconds) Nextflow will wait after setting up datasets before execution.
**Type:** `integer`, **Default:** `5` | -| ` --validate_params` | Boolean whether to validate parameters against the schema at runtime
**Type:** `boolean`, **Default:** `True` | -| ` --help` | Display help text.
**Type:** `boolean` | -| ` --wf` | Specify which workflow or Bactopia Tool to execute
**Type:** `string`, **Default:** `bactopia` | -| ` --list_wfs` | List the available workflows and Bactopia Tools to use with '--wf'
**Type:** `boolean` | -| ` --show_hidden_params` | Show all params when using `--help`
**Type:** `boolean` | -| ` --help_all` | An alias for --help --show_hidden_params
**Type:** `boolean` | -| ` --version` | Display version text.
**Type:** `boolean` | - -## Citations -If you use Bactopia and `ssuissero` in your analysis, please cite the following. - -- [Bactopia](https://bactopia.github.io/) - Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) - - -- [csvtk](https://bioinf.shenwei.me/csvtk/) - Shen, W [csvtk: A cross-platform, efficient and practical CSV/TSV toolkit in Golang.](https://github.com/shenwei356/csvtk/) (GitHub) - -- [SsuisSero](https://github.com/jimmyliu1326/SsuisSero) - Lui J [SsuisSero: Rapid _Streptococcus suis_ serotyping](https://github.com/jimmyliu1326/SsuisSero) (GitHub) - diff --git a/docs/bactopia-tools/staphopiasccmec.md b/docs/bactopia-tools/staphopiasccmec.md deleted file mode 100644 index 0bfba724..00000000 --- a/docs/bactopia-tools/staphopiasccmec.md +++ /dev/null @@ -1,236 +0,0 @@ ---- -title: staphopiasccmec -description: A Bactopia Tool which uses staphopia-sccmec to assign SCCmec types to _Staphylococcus aureus_ assemblies. ---- -# Bactopia Tool - `staphopiasccmec` -The `staphopiasccmec` module uses [staphopia-sccmec](https://github.com/staphopia/staphopia-sccmec) to assign SCCmec types to _Staphylococcus aureus_ assemblies. - -## Example Usage -``` -bactopia --wf staphopiasccmec \ - --bactopia /path/to/your/bactopia/results -``` - -## Output Overview - -Below is the default output structure for the `staphopiasccmec` tool. Where possible the -file descriptions below were modified from a tools description. - -```bash - -├── -│ └── tools -│ └── staphopiasccmec -│ ├── .tsv -│ └── logs -│ ├── nf-staphopiasccmec.{begin,err,log,out,run,sh,trace} -│ └── versions.yml -└── bactopia-runs - └── staphopiasccmec- - ├── merged-results - │ ├── logs - │ │ └── staphopiasccmec-concat - │ │ ├── nf-merged-results.{begin,err,log,out,run,sh,trace} - │ │ └── versions.yml - │ └── staphopiasccmec.tsv - └── nf-reports - ├── staphopiasccmec-dag.dot - ├── staphopiasccmec-report.html - ├── staphopiasccmec-timeline.html - └── staphopiasccmec-trace.txt - -``` - -:::info[Directory structure might be different] - -`staphopiasccmec` is available as a standalone Bactopia Tool, as well as from -the main Bactopia workflow (e.g. through Staphopia or Merlin). If executed -from Bactopia, the `staphopiasccmec` directory structure might be different, but the -output descriptions below still apply. -::: - - - -### Results - -#### Merged Results - -Below are results that are concatenated into a single file. - - -| Filename | Description | -|-------------------------------|-------------| -| staphopiasccmec | A merged TSV file with `staphopia-sccmec` results from all samples | - - -#### staphopia-sccmec - -Below is a description of the _per-sample_ results from [staphopia-sccmec](https://github.com/staphopia/staphopia-sccmec). - - -| Filename | Description | -|-------------------------------|-------------| -| <SAMPLE_NAME>.tsv | A tab-delimited file with `staphopia-sccmec` results | - - - - - -### Audit Trail - -Below are files that can assist you in understanding which parameters and program versions were used. - -#### Logs - -Each process that is executed will have a folder named `logs`. In this folder are helpful -files for you to review if the need ever arises. - -| Extension | Description | -|--------------|-------------| -| .begin | An empty file used to designate the process started | -| .err | Contains STDERR outputs from the process | -| .log | Contains both STDERR and STDOUT outputs from the process | -| .out | Contains STDOUT outputs from the process | -| .run | The script Nextflow uses to stage/unstage files and queue processes based on given profile | -| .sh | The script executed by bash for the process | -| .trace | The Nextflow [Trace](https://www.nextflow.io/docs/latest/tracing.html#trace-report) report for the process | -| versions.yml | A YAML formatted file with program versions | - -#### Nextflow Reports - -These Nextflow reports provide great a great summary of your run. These can be used to optimize -resource usage and estimate expected costs if using cloud platforms. - -| Filename | Description | -|----------|-------------| -| staphopiasccmec-dag.dot | The Nextflow [DAG visualisation](https://www.nextflow.io/docs/latest/tracing.html#dag-visualisation) | -| staphopiasccmec-report.html | The Nextflow [Execution Report](https://www.nextflow.io/docs/latest/tracing.html#execution-report) | -| staphopiasccmec-timeline.html | The Nextflow [Timeline Report](https://www.nextflow.io/docs/latest/tracing.html#timeline-report) | -| staphopiasccmec-trace.txt | The Nextflow [Trace](https://www.nextflow.io/docs/latest/tracing.html#trace-report) report | - - -#### Program Versions - -At the end of each run, each of the `versions.yml` files are merged into the files below. - -| Filename | Description | -|---------------------------|-------------| -| software_versions.yml | A complete list of programs and versions used by each process | -| software_versions_mqc.yml | A complete list of programs and versions formatted for [MultiQC](https://multiqc.info/) | - -## Parameters - - -### Required Parameters -Define where the pipeline should find input data and save output data. - -| Parameter | Description | -|:---|---| -| ` --bactopia` | The path to bactopia results to use as inputs
**Type:** `string` | - -### Filtering Parameters -Use these parameters to specify which samples to include or exclude. - -| Parameter | Description | -|:---|---| -| ` --include` | A text file containing sample names (one per line) to include from the analysis
**Type:** `string` | -| ` --exclude` | A text file containing sample names (one per line) to exclude from the analysis
**Type:** `string` | - - -### staphopia-sccmec Parameters - - -| Parameter | Description | -|:---|---| -| ` --hamming` | Report the results as hamming distances
**Type:** `boolean` | - - -### Optional Parameters -These optional parameters can be useful in certain settings. - -| Parameter | Description | -|:---|---| -| ` --outdir` | Base directory to write results to
**Type:** `string`, **Default:** `bactopia` | -| ` --skip_compression` | Ouput files will not be compressed
**Type:** `boolean` | -| ` --datasets` | The path to cache datasets to
**Type:** `string` | -| ` --keep_all_files` | Keeps all analysis files created
**Type:** `boolean` | - -### Max Job Request Parameters -Set the top limit for requested resources for any single job. - -| Parameter | Description | -|:---|---| -| ` --max_retry` | Maximum times to retry a process before allowing it to fail.
**Type:** `integer`, **Default:** `3` | -| ` --max_cpus` | Maximum number of CPUs that can be requested for any single job.
**Type:** `integer`, **Default:** `4` | -| ` --max_memory` | Maximum amount of memory that can be requested for any single job.
**Type:** `string`, **Default:** `128.GB` | -| ` --max_time` | Maximum amount of time that can be requested for any single job.
**Type:** `string`, **Default:** `240.h` | -| ` --max_downloads` | Maximum number of samples to download at a time
**Type:** `integer`, **Default:** `3` | - -### Nextflow Configuration Parameters -Parameters to fine-tune your Nextflow setup. - -| Parameter | Description | -|:---|---| -| ` --nfconfig` | A Nextflow compatible config file for custom profiles, loaded last and will overwrite existing variables if set.
**Type:** `string` | -| ` --publish_dir_mode` | Method used to save pipeline results to output directory.
**Type:** `string`, **Default:** `copy` | -| ` --infodir` | Directory to keep pipeline Nextflow logs and reports.
**Type:** `string`, **Default:** `${params.outdir}/pipeline_info` | -| ` --force` | Nextflow will overwrite existing output files.
**Type:** `boolean` | -| ` --cleanup_workdir` | After Bactopia is successfully executed, the `work` directory will be deleted.
**Type:** `boolean` | - -### Institutional config options -Parameters used to describe centralized config profiles. These should not be edited. - -| Parameter | Description | -|:---|---| -| ` --custom_config_version` | Git commit id for Institutional configs.
**Type:** `string`, **Default:** `master` | -| ` --custom_config_base` | Base directory for Institutional configs.
**Type:** `string`, **Default:** `https://raw.githubusercontent.com/nf-core/configs/master` | -| ` --config_profile_name` | Institutional config name.
**Type:** `string` | -| ` --config_profile_description` | Institutional config description.
**Type:** `string` | -| ` --config_profile_contact` | Institutional config contact information.
**Type:** `string` | -| ` --config_profile_url` | Institutional config URL link.
**Type:** `string` | - -### Nextflow Profile Parameters -Parameters to fine-tune your Nextflow setup. - -| Parameter | Description | -|:---|---| -| ` --condadir` | Directory to Nextflow should use for Conda environments
**Type:** `string` | -| ` --registry` | Docker registry to pull containers from.
**Type:** `string`, **Default:** `dockerhub` | -| ` --datasets_cache` | Directory where downloaded datasets should be stored.
**Type:** `string`, **Default:** `/data/datasets` | -| ` --singularity_cache_dir` | Directory where remote Singularity images are stored.
**Type:** `string` | -| ` --singularity_pull_docker_container` | Instead of directly downloading Singularity images for use with Singularity, force the workflow to pull and convert Docker containers instead.
**Type:** `boolean` | -| ` --force_rebuild` | Force overwrite of existing pre-built environments.
**Type:** `boolean` | -| ` --queue` | Comma-separated name of the queue(s) to be used by a job scheduler (e.g. AWS Batch or SLURM)
**Type:** `string`, **Default:** `general,high-memory` | -| ` --cluster_opts` | Additional options to pass to the executor. (e.g. SLURM: '--account=my_acct_name'
**Type:** `string` | -| ` --container_opts` | Additional options to pass to Apptainer, Docker, or Singularityu. (e.g. Singularity: '-D `pwd`'
**Type:** `string` | -| ` --disable_scratch` | All intermediate files created on worker nodes of will be transferred to the head node.
**Type:** `boolean` | - -### Helpful Parameters -Uncommonly used parameters that might be useful. - -| Parameter | Description | -|:---|---| -| ` --monochrome_logs` | Do not use coloured log outputs.
**Type:** `boolean` | -| ` --nfdir` | Print directory Nextflow has pulled Bactopia to
**Type:** `boolean` | -| ` --sleep_time` | The amount of time (seconds) Nextflow will wait after setting up datasets before execution.
**Type:** `integer`, **Default:** `5` | -| ` --validate_params` | Boolean whether to validate parameters against the schema at runtime
**Type:** `boolean`, **Default:** `True` | -| ` --help` | Display help text.
**Type:** `boolean` | -| ` --wf` | Specify which workflow or Bactopia Tool to execute
**Type:** `string`, **Default:** `bactopia` | -| ` --list_wfs` | List the available workflows and Bactopia Tools to use with '--wf'
**Type:** `boolean` | -| ` --show_hidden_params` | Show all params when using `--help`
**Type:** `boolean` | -| ` --help_all` | An alias for --help --show_hidden_params
**Type:** `boolean` | -| ` --version` | Display version text.
**Type:** `boolean` | - -## Citations -If you use Bactopia and `staphopiasccmec` in your analysis, please cite the following. - -- [Bactopia](https://bactopia.github.io/) - Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) - - -- [csvtk](https://bioinf.shenwei.me/csvtk/) - Shen, W [csvtk: A cross-platform, efficient and practical CSV/TSV toolkit in Golang.](https://github.com/shenwei356/csvtk/) (GitHub) - -- [staphopia-sccmec](https://github.com/staphopia/staphopia-sccmec) - Petit III RA, Read TD [_Staphylococcus aureus_ viewed from the perspective of 40,000+ genomes.](http://dx.doi.org/10.7717/peerj.5261) _PeerJ_ 6, e5261 (2018) - diff --git a/docs/bactopia-tools/staphtyper.md b/docs/bactopia-tools/staphtyper.md deleted file mode 100644 index 70fbd6c5..00000000 --- a/docs/bactopia-tools/staphtyper.md +++ /dev/null @@ -1,319 +0,0 @@ ---- -title: staphtyper -description: A Bactopia Tool which includes multiple tools that are specific for typing certain features of _Staphylococcus aureus_. ---- -# Bactopia Tool - `staphtyper` -The `staphtyper` subworkflow includes multiple tools that are specific for typing certain features -of *Staphylococcus aureus*. Currently `staphtyper` includes - -1. [AgrVATE](https://github.com/VishnuRaghuram94/AgrVATE) - *agr* locus type and *agr* operon variants. -2. [spaTyper](https://github.com/HCGB-IGTP/spaTyper) - *spa* type -3. [sccmec](https://github.com/rpetit3/sccmec) - SCCmec type - -This tool will evolve with *S. aureus* genomics, so you can expect it to add more typing methods -(maybe even replace current methods) in the future. If a certain typing method for *S. aureus* -please feel free to suggest it be added!~ - - -## Example Usage -``` -bactopia --wf staphtyper \ - --bactopia /path/to/your/bactopia/results -``` - -## Output Overview - -Below is the default output structure for the `staphtyper` tool. Where possible the -file descriptions below were modified from a tools description. - -```bash - -├── -│ └── tools -│ ├── agrvate -│ │ ├── -agr_gp.tab -│ │ ├── -blastn_log.txt -│ │ ├── -summary.tab -│ │ └── logs -│ │ ├── nf-agrvate.{begin,err,log,out,run,sh,trace} -│ │ └── versions.yml -│ ├── spatyper -│ │ ├── .tsv -│ │ └── logs -│ │ ├── nf-spatyper.{begin,err,log,out,run,sh,trace} -│ │ └── versions.yml -│ └── sccmec -│ ├── .tsv -│ └── logs -│ ├── nf-sccmec.{begin,err,log,out,run,sh,trace} -│ └── versions.yml -└── bactopia-runs - └── staphtyper- - ├── merged-results - │ ├── agrvate.tsv - │ ├── logs - │ │ ├── agrvate-concat - │ │ │ ├── nf-merged-results.{begin,err,log,out,run,sh,trace} - │ │ │ └── versions.yml - │ │ ├── spatyper-concat - │ │ │ ├── nf-merged-results.{begin,err,log,out,run,sh,trace} - │ │ │ └── versions.yml - │ │ └── sccmec-concat - │ │ ├── nf-merged-results.{begin,err,log,out,run,sh,trace} - │ │ └── versions.yml - │ ├── spatyper.tsv - │ └── sccmec.tsv - └── nf-reports - ├── staphtyper-dag.dot - ├── staphtyper-report.html - ├── staphtyper-timeline.html - └── staphtyper-trace.txt - -``` - -:::info[Directory structure might be different] - -`staphtyper` is available as a standalone Bactopia Tool, as well as from -the main Bactopia workflow (e.g. through Staphopia or Merlin). If executed -from Bactopia, the `staphtyper` directory structure might be different, but the -output descriptions below still apply. -::: - - - -### Results - -#### Merged Results - -Below are results that are concatenated into a single file. - - -| Filename | Description | -|-------------------------------|-------------| -| agrvate.tsv | A merged TSV file with `AgrVATE` results from all samples | -| spatyper.tsv | A merged TSV file with `spaTyper` results from all samples | -| sccmec.tsv | A merged TSV file with `sccmec` results from all samples | - - -#### AgrVATE - -Below is a description of the _per-sample_ results from [AgrVATE](https://github.com/VishnuRaghuram94/AgrVATE). - - -| Extension | Description | -|-------------------------------|-------------| -| -agr_gp.tab | A detailed report for _agr_ kmer matches | -| -blastn_log.txt | Log files from programs called by `AgrVATE` | -| -summary.tab | A final summary report for _agr_ typing | - - -#### spaTyper - -Below is a description of the _per-sample_ results from [spaTyper](https://github.com/HCGB-IGTP/spaTyper). - - -| Filename | Description | -|-------------------------------|-------------| -| <SAMPLE_NAME>.tsv | A tab-delimited file with `spaTyper` result | - - -#### sccmec - -Below is a description of the _per-sample_ results from [sccmec](https://github.com/rpetit3/sccmec). - - -| Filename | Description | -|-------------------------------|-------------| -| .tsv | A tab-delimited file with the predicted type | -| .targets.blastn.tsv | A tab-delimited file of all target-specific blast hits | -| .targets.details.tsv | A tab-delimited file with details for each type based on targets | -| .regions.blastn.tsv | A tab-delimited file of all full cassette blast hits | -| .regions.details.tsv | A tab-delimited file with details for each type based on full cassettes | - - - - - -### Audit Trail - -Below are files that can assist you in understanding which parameters and program versions were used. - -#### Logs - -Each process that is executed will have a folder named `logs`. In this folder are helpful -files for you to review if the need ever arises. - -| Extension | Description | -|--------------|-------------| -| .begin | An empty file used to designate the process started | -| .err | Contains STDERR outputs from the process | -| .log | Contains both STDERR and STDOUT outputs from the process | -| .out | Contains STDOUT outputs from the process | -| .run | The script Nextflow uses to stage/unstage files and queue processes based on given profile | -| .sh | The script executed by bash for the process | -| .trace | The Nextflow [Trace](https://www.nextflow.io/docs/latest/tracing.html#trace-report) report for the process | -| versions.yml | A YAML formatted file with program versions | - -#### Nextflow Reports - -These Nextflow reports provide great a great summary of your run. These can be used to optimize -resource usage and estimate expected costs if using cloud platforms. - -| Filename | Description | -|----------|-------------| -| staphtyper-dag.dot | The Nextflow [DAG visualisation](https://www.nextflow.io/docs/latest/tracing.html#dag-visualisation) | -| staphtyper-report.html | The Nextflow [Execution Report](https://www.nextflow.io/docs/latest/tracing.html#execution-report) | -| staphtyper-timeline.html | The Nextflow [Timeline Report](https://www.nextflow.io/docs/latest/tracing.html#timeline-report) | -| staphtyper-trace.txt | The Nextflow [Trace](https://www.nextflow.io/docs/latest/tracing.html#trace-report) report | - - -#### Program Versions - -At the end of each run, each of the `versions.yml` files are merged into the files below. - -| Filename | Description | -|---------------------------|-------------| -| software_versions.yml | A complete list of programs and versions used by each process | -| software_versions_mqc.yml | A complete list of programs and versions formatted for [MultiQC](https://multiqc.info/) | - -## Parameters - - -### Required Parameters -Define where the pipeline should find input data and save output data. - -| Parameter | Description | -|:---|---| -| ` --bactopia` | The path to bactopia results to use as inputs
**Type:** `string` | - -### Filtering Parameters -Use these parameters to specify which samples to include or exclude. - -| Parameter | Description | -|:---|---| -| ` --include` | A text file containing sample names (one per line) to include from the analysis
**Type:** `string` | -| ` --exclude` | A text file containing sample names (one per line) to exclude from the analysis
**Type:** `string` | - - -### AgrVATE Parameters - - -| Parameter | Description | -|:---|---| -| ` --typing_only` | agr typing only. Skips agr operon extraction and frameshift detection
**Type:** `boolean` | - -### spaTyper Parameters - - -| Parameter | Description | -|:---|---| -| ` --repeats` | List of spa repeats
**Type:** `string` | -| ` --repeat_order` | List spa types and order of repeats
**Type:** `string` | -| ` --do_enrich` | Do PCR product enrichment
**Type:** `boolean` | - -### sccmec Parameters - - -| Parameter | Description | -|:---|---| -| ` --sccmec_min_targets_pident` | Minimum percent identity to count a target hit
**Type:** `integer`, **Default:** `90` | -| ` --sccmec_min_targets_coverage` | Minimum percent coverage to count a target hit
**Type:** `integer`, **Default:** `80` | -| ` --sccmec_min_regions_pident` | Minimum percent identity to count a region hit
**Type:** `integer`, **Default:** `85` | -| ` --sccmec_min_regions_coverage` | Minimum percent coverage to count a region hit
**Type:** `integer`, **Default:** `93` | - - -### Optional Parameters -These optional parameters can be useful in certain settings. - -| Parameter | Description | -|:---|---| -| ` --outdir` | Base directory to write results to
**Type:** `string`, **Default:** `bactopia` | -| ` --skip_compression` | Ouput files will not be compressed
**Type:** `boolean` | -| ` --datasets` | The path to cache datasets to
**Type:** `string` | -| ` --keep_all_files` | Keeps all analysis files created
**Type:** `boolean` | - -### Max Job Request Parameters -Set the top limit for requested resources for any single job. - -| Parameter | Description | -|:---|---| -| ` --max_retry` | Maximum times to retry a process before allowing it to fail.
**Type:** `integer`, **Default:** `3` | -| ` --max_cpus` | Maximum number of CPUs that can be requested for any single job.
**Type:** `integer`, **Default:** `4` | -| ` --max_memory` | Maximum amount of memory that can be requested for any single job.
**Type:** `string`, **Default:** `128.GB` | -| ` --max_time` | Maximum amount of time that can be requested for any single job.
**Type:** `string`, **Default:** `240.h` | -| ` --max_downloads` | Maximum number of samples to download at a time
**Type:** `integer`, **Default:** `3` | - -### Nextflow Configuration Parameters -Parameters to fine-tune your Nextflow setup. - -| Parameter | Description | -|:---|---| -| ` --nfconfig` | A Nextflow compatible config file for custom profiles, loaded last and will overwrite existing variables if set.
**Type:** `string` | -| ` --publish_dir_mode` | Method used to save pipeline results to output directory.
**Type:** `string`, **Default:** `copy` | -| ` --infodir` | Directory to keep pipeline Nextflow logs and reports.
**Type:** `string`, **Default:** `${params.outdir}/pipeline_info` | -| ` --force` | Nextflow will overwrite existing output files.
**Type:** `boolean` | -| ` --cleanup_workdir` | After Bactopia is successfully executed, the `work` directory will be deleted.
**Type:** `boolean` | - -### Institutional config options -Parameters used to describe centralized config profiles. These should not be edited. - -| Parameter | Description | -|:---|---| -| ` --custom_config_version` | Git commit id for Institutional configs.
**Type:** `string`, **Default:** `master` | -| ` --custom_config_base` | Base directory for Institutional configs.
**Type:** `string`, **Default:** `https://raw.githubusercontent.com/nf-core/configs/master` | -| ` --config_profile_name` | Institutional config name.
**Type:** `string` | -| ` --config_profile_description` | Institutional config description.
**Type:** `string` | -| ` --config_profile_contact` | Institutional config contact information.
**Type:** `string` | -| ` --config_profile_url` | Institutional config URL link.
**Type:** `string` | - -### Nextflow Profile Parameters -Parameters to fine-tune your Nextflow setup. - -| Parameter | Description | -|:---|---| -| ` --condadir` | Directory to Nextflow should use for Conda environments
**Type:** `string` | -| ` --registry` | Docker registry to pull containers from.
**Type:** `string`, **Default:** `dockerhub` | -| ` --datasets_cache` | Directory where downloaded datasets should be stored.
**Type:** `string`, **Default:** `/data/datasets` | -| ` --singularity_cache_dir` | Directory where remote Singularity images are stored.
**Type:** `string` | -| ` --singularity_pull_docker_container` | Instead of directly downloading Singularity images for use with Singularity, force the workflow to pull and convert Docker containers instead.
**Type:** `boolean` | -| ` --force_rebuild` | Force overwrite of existing pre-built environments.
**Type:** `boolean` | -| ` --queue` | Comma-separated name of the queue(s) to be used by a job scheduler (e.g. AWS Batch or SLURM)
**Type:** `string`, **Default:** `general,high-memory` | -| ` --cluster_opts` | Additional options to pass to the executor. (e.g. SLURM: '--account=my_acct_name'
**Type:** `string` | -| ` --container_opts` | Additional options to pass to Apptainer, Docker, or Singularityu. (e.g. Singularity: '-D `pwd`'
**Type:** `string` | -| ` --disable_scratch` | All intermediate files created on worker nodes of will be transferred to the head node.
**Type:** `boolean` | - -### Helpful Parameters -Uncommonly used parameters that might be useful. - -| Parameter | Description | -|:---|---| -| ` --monochrome_logs` | Do not use coloured log outputs.
**Type:** `boolean` | -| ` --nfdir` | Print directory Nextflow has pulled Bactopia to
**Type:** `boolean` | -| ` --sleep_time` | The amount of time (seconds) Nextflow will wait after setting up datasets before execution.
**Type:** `integer`, **Default:** `5` | -| ` --validate_params` | Boolean whether to validate parameters against the schema at runtime
**Type:** `boolean`, **Default:** `True` | -| ` --help` | Display help text.
**Type:** `boolean` | -| ` --wf` | Specify which workflow or Bactopia Tool to execute
**Type:** `string`, **Default:** `bactopia` | -| ` --list_wfs` | List the available workflows and Bactopia Tools to use with '--wf'
**Type:** `boolean` | -| ` --show_hidden_params` | Show all params when using `--help`
**Type:** `boolean` | -| ` --help_all` | An alias for --help --show_hidden_params
**Type:** `boolean` | -| ` --version` | Display version text.
**Type:** `boolean` | - -## Citations -If you use Bactopia and `staphtyper` in your analysis, please cite the following. - -- [Bactopia](https://bactopia.github.io/) - Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) - - -- [AgrVATE](https://github.com/VishnuRaghuram94/AgrVATE) - Raghuram V. [AgrVATE: Rapid identification of Staphylococcus aureus agr locus type and agr operon variants.](https://github.com/VishnuRaghuram94/AgrVATE) (GitHub) - -- [csvtk](https://bioinf.shenwei.me/csvtk/) - Shen, W [csvtk: A cross-platform, efficient and practical CSV/TSV toolkit in Golang.](https://github.com/shenwei356/csvtk/) (GitHub) - -- [sccmec](https://github.com/rpetit3/sccmec) - Petit III RA, Read TD [sccmec: A tool for typing SCCmec cassettes in assemblies](https://github.com/rpetit3/sccmec) (GitHub) - -- [spaTyper](https://github.com/HCGB-IGTP/spaTyper) - Sanchez-Herrero JF, and Sullivan M [spaTyper: Staphylococcal protein A (spa) characterization pipeline](http://doi.org/10.5281/zenodo.4063625). Zenodo. (2020) - diff --git a/docs/bactopia-tools/stecfinder.md b/docs/bactopia-tools/stecfinder.md deleted file mode 100644 index af6253b0..00000000 --- a/docs/bactopia-tools/stecfinder.md +++ /dev/null @@ -1,243 +0,0 @@ ---- -title: stecfinder -description: A Bactopia Tool which uses STECFinder to identify the serotype of Shigatoxin producing _E. coli_ (STEC). - ---- -# Bactopia Tool - `stecfinder` -The `stecfinder` module uses [STECFinder](https://github.com/LanLab/STECFinder) to identify -the serotype of Shigatoxin producing _E. coli_ (STEC). STECFinder identifies the serotype -as well as the O-antigen and H-antigens. - - -## Example Usage -``` -bactopia --wf stecfinder \ - --bactopia /path/to/your/bactopia/results -``` - -## Output Overview - -Below is the default output structure for the `stecfinder` tool. Where possible the -file descriptions below were modified from a tools description. - -```bash - -├── -│ └── tools -│ └── stecfinder -│ ├── .tsv -│ └── logs -│ ├── nf-stecfinder.{begin,err,log,out,run,sh,trace} -│ └── versions.yml -└── bactopia-runs - └── stecfinder- - ├── merged-results - │ ├── logs - │ │ └── stecfinder-concat - │ │ ├── nf-merged-results.{begin,err,log,out,run,sh,trace} - │ │ └── versions.yml - │ └── stecfinder.tsv - └── nf-reports - ├── stecfinder-dag.dot - ├── stecfinder-report.html - ├── stecfinder-timeline.html - └── stecfinder-trace.txt - -``` - - - -### Results - -#### Merged Results - -Below are results that are concatenated into a single file. - - -| Filename | Description | -|-------------------------------|-------------| -| stecfinder.tsv | A summary of the `stecfinder` results for all samples | - - -#### STECFinder - -Below is a description of the _per-sample_ results from [STECFinder](https://github.com/LanLab/STECFinder). - - -| Filename | Description | -|-------------------------------|-------------| -| <SAMPLE_NAME>.tsv | The `stecfinder` results in tab-delimited format. See [STECFinder Column Descriptions](https://github.com/LanLab/STECFinder#column-descriptions) for more details. | - - - - - -### Audit Trail - -Below are files that can assist you in understanding which parameters and program versions were used. - -#### Logs - -Each process that is executed will have a folder named `logs`. In this folder are helpful -files for you to review if the need ever arises. - -| Extension | Description | -|--------------|-------------| -| .begin | An empty file used to designate the process started | -| .err | Contains STDERR outputs from the process | -| .log | Contains both STDERR and STDOUT outputs from the process | -| .out | Contains STDOUT outputs from the process | -| .run | The script Nextflow uses to stage/unstage files and queue processes based on given profile | -| .sh | The script executed by bash for the process | -| .trace | The Nextflow [Trace](https://www.nextflow.io/docs/latest/tracing.html#trace-report) report for the process | -| versions.yml | A YAML formatted file with program versions | - -#### Nextflow Reports - -These Nextflow reports provide great a great summary of your run. These can be used to optimize -resource usage and estimate expected costs if using cloud platforms. - -| Filename | Description | -|----------|-------------| -| stecfinder-dag.dot | The Nextflow [DAG visualisation](https://www.nextflow.io/docs/latest/tracing.html#dag-visualisation) | -| stecfinder-report.html | The Nextflow [Execution Report](https://www.nextflow.io/docs/latest/tracing.html#execution-report) | -| stecfinder-timeline.html | The Nextflow [Timeline Report](https://www.nextflow.io/docs/latest/tracing.html#timeline-report) | -| stecfinder-trace.txt | The Nextflow [Trace](https://www.nextflow.io/docs/latest/tracing.html#trace-report) report | - - -#### Program Versions - -At the end of each run, each of the `versions.yml` files are merged into the files below. - -| Filename | Description | -|---------------------------|-------------| -| software_versions.yml | A complete list of programs and versions used by each process | -| software_versions_mqc.yml | A complete list of programs and versions formatted for [MultiQC](https://multiqc.info/) | - -## Parameters - - -### Required Parameters -Define where the pipeline should find input data and save output data. - -| Parameter | Description | -|:---|---| -| ` --bactopia` | The path to bactopia results to use as inputs
**Type:** `string` | - -### Filtering Parameters -Use these parameters to specify which samples to include or exclude. - -| Parameter | Description | -|:---|---| -| ` --include` | A text file containing sample names (one per line) to include from the analysis
**Type:** `string` | -| ` --exclude` | A text file containing sample names (one per line) to exclude from the analysis
**Type:** `string` | - - -### STECFinder Parameters - - -| Parameter | Description | -|:---|---| -| ` --stecfinder_use_reads` | Paired-end Illumina reads will be used instead of assemblies
**Type:** `boolean` | -| ` --stecfinder_hits` | Show detailed gene search results
**Type:** `boolean` | -| ` --stecfinder_cutoff` | Minimum read coverage for gene to be called
**Type:** `number`, **Default:** `10.0` | -| ` --stecfinder_length` | Percentage of gene length needed for positive call
**Type:** `number`, **Default:** `50.0` | -| ` --stecfinder_ipah_length` | Percentage of ipaH gene length needed for positive gene call
**Type:** `number`, **Default:** `10.0` | -| ` --stecfinder_ipah_depth` | Minimum depth for positive ipaH gene call (requires --stecfinder_use_reads)
**Type:** `number`, **Default:** `1.0` | -| ` --stecfinder_stx_length` | Percentage of stx gene length needed for positive gene call
**Type:** `number`, **Default:** `10.0` | -| ` --stecfinder_stx_depth` | Minimum depth for positive stx gene call (requires --stecfinder_use_reads)
**Type:** `number`, **Default:** `10.0` | -| ` --stecfinder_o_length` | Percentage of wz_ gene length needed for positive call
**Type:** `number`, **Default:** `60.0` | -| ` --stecfinder_o_depth` | Minimum depth for positive qz_ gene call (requires --stecfinder_use_reads)
**Type:** `number`, **Default:** `1.0` | -| ` --stecfinder_h_length` | Percentage of fliC gene length needed for positive call
**Type:** `number`, **Default:** `60.0` | -| ` --stecfinder_h_depth` | Minimum depth for positive fliC gene call (requires --stecfinder_use_reads)
**Type:** `number`, **Default:** `1.0` | - - -### Optional Parameters -These optional parameters can be useful in certain settings. - -| Parameter | Description | -|:---|---| -| ` --outdir` | Base directory to write results to
**Type:** `string`, **Default:** `bactopia` | -| ` --skip_compression` | Ouput files will not be compressed
**Type:** `boolean` | -| ` --datasets` | The path to cache datasets to
**Type:** `string` | -| ` --keep_all_files` | Keeps all analysis files created
**Type:** `boolean` | - -### Max Job Request Parameters -Set the top limit for requested resources for any single job. - -| Parameter | Description | -|:---|---| -| ` --max_retry` | Maximum times to retry a process before allowing it to fail.
**Type:** `integer`, **Default:** `3` | -| ` --max_cpus` | Maximum number of CPUs that can be requested for any single job.
**Type:** `integer`, **Default:** `4` | -| ` --max_memory` | Maximum amount of memory that can be requested for any single job.
**Type:** `string`, **Default:** `128.GB` | -| ` --max_time` | Maximum amount of time that can be requested for any single job.
**Type:** `string`, **Default:** `240.h` | -| ` --max_downloads` | Maximum number of samples to download at a time
**Type:** `integer`, **Default:** `3` | - -### Nextflow Configuration Parameters -Parameters to fine-tune your Nextflow setup. - -| Parameter | Description | -|:---|---| -| ` --nfconfig` | A Nextflow compatible config file for custom profiles, loaded last and will overwrite existing variables if set.
**Type:** `string` | -| ` --publish_dir_mode` | Method used to save pipeline results to output directory.
**Type:** `string`, **Default:** `copy` | -| ` --infodir` | Directory to keep pipeline Nextflow logs and reports.
**Type:** `string`, **Default:** `${params.outdir}/pipeline_info` | -| ` --force` | Nextflow will overwrite existing output files.
**Type:** `boolean` | -| ` --cleanup_workdir` | After Bactopia is successfully executed, the `work` directory will be deleted.
**Type:** `boolean` | - -### Institutional config options -Parameters used to describe centralized config profiles. These should not be edited. - -| Parameter | Description | -|:---|---| -| ` --custom_config_version` | Git commit id for Institutional configs.
**Type:** `string`, **Default:** `master` | -| ` --custom_config_base` | Base directory for Institutional configs.
**Type:** `string`, **Default:** `https://raw.githubusercontent.com/nf-core/configs/master` | -| ` --config_profile_name` | Institutional config name.
**Type:** `string` | -| ` --config_profile_description` | Institutional config description.
**Type:** `string` | -| ` --config_profile_contact` | Institutional config contact information.
**Type:** `string` | -| ` --config_profile_url` | Institutional config URL link.
**Type:** `string` | - -### Nextflow Profile Parameters -Parameters to fine-tune your Nextflow setup. - -| Parameter | Description | -|:---|---| -| ` --condadir` | Directory to Nextflow should use for Conda environments
**Type:** `string` | -| ` --registry` | Docker registry to pull containers from.
**Type:** `string`, **Default:** `dockerhub` | -| ` --datasets_cache` | Directory where downloaded datasets should be stored.
**Type:** `string`, **Default:** `/data/datasets` | -| ` --singularity_cache_dir` | Directory where remote Singularity images are stored.
**Type:** `string` | -| ` --singularity_pull_docker_container` | Instead of directly downloading Singularity images for use with Singularity, force the workflow to pull and convert Docker containers instead.
**Type:** `boolean` | -| ` --force_rebuild` | Force overwrite of existing pre-built environments.
**Type:** `boolean` | -| ` --queue` | Comma-separated name of the queue(s) to be used by a job scheduler (e.g. AWS Batch or SLURM)
**Type:** `string`, **Default:** `general,high-memory` | -| ` --cluster_opts` | Additional options to pass to the executor. (e.g. SLURM: '--account=my_acct_name'
**Type:** `string` | -| ` --container_opts` | Additional options to pass to Apptainer, Docker, or Singularityu. (e.g. Singularity: '-D `pwd`'
**Type:** `string` | -| ` --disable_scratch` | All intermediate files created on worker nodes of will be transferred to the head node.
**Type:** `boolean` | - -### Helpful Parameters -Uncommonly used parameters that might be useful. - -| Parameter | Description | -|:---|---| -| ` --monochrome_logs` | Do not use coloured log outputs.
**Type:** `boolean` | -| ` --nfdir` | Print directory Nextflow has pulled Bactopia to
**Type:** `boolean` | -| ` --sleep_time` | The amount of time (seconds) Nextflow will wait after setting up datasets before execution.
**Type:** `integer`, **Default:** `5` | -| ` --validate_params` | Boolean whether to validate parameters against the schema at runtime
**Type:** `boolean`, **Default:** `True` | -| ` --help` | Display help text.
**Type:** `boolean` | -| ` --wf` | Specify which workflow or Bactopia Tool to execute
**Type:** `string`, **Default:** `bactopia` | -| ` --list_wfs` | List the available workflows and Bactopia Tools to use with '--wf'
**Type:** `boolean` | -| ` --show_hidden_params` | Show all params when using `--help`
**Type:** `boolean` | -| ` --help_all` | An alias for --help --show_hidden_params
**Type:** `boolean` | -| ` --version` | Display version text.
**Type:** `boolean` | - -## Citations -If you use Bactopia and `stecfinder` in your analysis, please cite the following. - -- [Bactopia](https://bactopia.github.io/) - Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) - - -- [csvtk](https://bioinf.shenwei.me/csvtk/) - Shen, W [csvtk: A cross-platform, efficient and practical CSV/TSV toolkit in Golang.](https://github.com/shenwei356/csvtk/) (GitHub) - -- [STECFinder](https://github.com/LanLab/STECFinder) - Zhang X, Payne M, Kaur S, and Lan R [Improved Genomic Identification, Clustering, and Serotyping of Shiga Toxin-Producing Escherichia coli Using Cluster/Serotype-Specific Gene Markers.](https://doi.org/10.3389/fcimb.2021.772574) _Frontiers in Cellular and Infection Microbiology_, 11, 772574. (2021) - diff --git a/docs/bactopia-tools/summary.md b/docs/bactopia-tools/summary.md deleted file mode 100644 index a070d6e3..00000000 --- a/docs/bactopia-tools/summary.md +++ /dev/null @@ -1,184 +0,0 @@ -# Bactopia Tools - *summary* -The `summary` tool allows you to quickly aggregate the results of your Bactopia -analysis. For each sample the sequence stats (before and after QC), assembly stats, -and the annotation stats are put into a single tab-delimited file. - -For each sample, the `summary` assigns a rank of *Gold*, *Silver*, *Bronze*, -or *Fail*. The rank is determined by sequence quality and assembly quality. Below -is the default cutoffs for each rank. - -| Rank | Coverage | Mean Per-Read Quality | Mean Read Length | Total Contigs | -|----------|:-------------:|:---:|:---:|:--:| -| Gold | 100x | Q30 | 95bp | 100 | -| Silver | 50x | Q20 | 75bp | 200 | -| Bronze | 20x | Q12 | 49bp | 500 | -| Fail | <20x | 500 | - -Samples that fail to meet all the cutoffs for at least a *Bronze* rank are added -to a *exclude* file. This turns out to be a useful feature beacuse all other -Bactopia Tools can read this file and automatically -exclude the samples marked as *Fail* from downstream analysis. - -## Example -``` -bactopia tools summary --bactopia ~/bactopia-tutorial/bactopia -``` - -## Output Overview -``` -bactopia-tools/ -└── summary/ - ├── amrfinder - │   ├── amrfinder-(gene|protein)-detailed-summary.txt - │   └── amrfinder-(gene|protein)-summary.txt - ├── ariba - │   ├── ariba-(card|vfdb|etc...)-detailed-summary.txt - │   └── ariba-(card|vfdb|etc...)-summary.txt - ├── bactopia-exclude.txt - ├── bactopia-info - │   ├── summary-report.html - │   ├── summary-timeline.html - │   └── summary-trace.txt - ├── bactopia-results.txt - └── bactopia-summary.txt -``` - -| Filename | Description | -|----------|-------------| -| bactopia-exclude.txt | A list of samples and the reason they failed quality cutoffs | -| bactopia-results.txt | A tab-delimited file containing sequence, assembly and annotation stats for all samples | -| bactopia-summary.txt | Brief breakdown of ranks and qc-failures | - -### Directory Description - -#### amrfinder -| Filename | Description | -|----------|-------------| -| amrfinder-(gene\|protein)-detailed-summary.txt | Detailed information about each hit against a specific antimicrobial resistance | -| amrfinder-(gene\|protein)-summary.txt | A presence/absence matrix for hits against a specific antimicrobial resistance | - -#### ariba -| Filename | Description | -|----------|-------------| -| ariba-(card\|vfdb\|etc...)-detailed-summary.txt | Detailed information about each hit against a reference Ariba dataset | -| ariba-(card\|vfdb\|etc...)-summary.txt | A presence/absence matrix for hits against a reference Ariba dataset | - -#### bactopia-info -| Filename | Description | -|----------|-------------| -| summary-report.html | The Nextflow [Execution Report](https://www.nextflow.io/docs/latest/tracing.html#execution-report) | -| summary-timeline.html | The Nextflow [Timeline Report](https://www.nextflow.io/docs/latest/tracing.html#timeline-report) | -| summary-trace.txt | The Nextflow [Trace](https://www.nextflow.io/docs/latest/tracing.html#trace-report) report | - -## Usage -``` -Required Parameters: - --bactopia STR Directory containing Bactopia analysis results for all samples. - -Bactopia Summary Parameters: - --gold_coverage FLOAT Minimum amount of coverage required for Gold status - Default: 100 - - --gold_quality INT Minimum per-read mean quality score required for Gold - status - Default: 30 - - --gold_read_length INT Minimum mean read length required for Gold status - Default: 95 - - --gold_contigs INT Maximum contig count required for Gold status - Default: 100 - - --silver_coverage FLOAT Minimum amount of coverage required for Silver status - Default: 50 - - --silver_quality INT Minimum per-read mean quality score required for - Silver status - Default: 20 - - --silver_read_length INT - Minimum mean read length required for Silver status - Default: 75 - - --silver_contigs INT Maximum contig count required for Silver status - Default: 200 - - --min_coverage FLOAT Minimum amount of coverage required to pass - Default: 20 - - --min_quality INT Minimum per-read mean quality score required to pass - Default: 12 - - --min_read_length INT Minimum mean read length required to pass - Default: 49 - - --max_contigs INT Maximum contig count required to pass - Default: 500 - - --min_genome_size INT Minimum assembled genome size. - Default: null - - --max_genome_size INT Maximum assembled genome size. - Default: null - -Ariba Summary Parameters: - --all_hits Include all hits (matches and partials) in the summary - Default: Only report hits that are a match - -AMRFinder+ Summary Parameters: - --subclass Group the report by subclass (ex. Streptomycin). - Default: Group by class (ex. Aminoglycoside) - -Optional Parameters: - --prefix STR Prefix to use for final output files - Default: bactopia - - --outdir DIR Directory to write results to - Default: ./ - - --max_time INT The maximum number of minutes a job should run before being halted. - Default: 120 minutes - - --max_memory INT The maximum amount of memory (Gb) allowed to a single process. - Default: 32 Gb - - --cpus INT Number of processors made available to a single - process. - Default: 4 - -Nextflow Related Parameters: - --condadir DIR Directory to Nextflow should use for Conda environments - Default: Bactopia's Nextflow directory - - --publish_mode Set Nextflow's method for publishing output files. Allowed methods are: - 'copy' (default) Copies the output files into the published directory. - - 'copyNoFollow' Copies the output files into the published directory - without following symlinks ie. copies the links themselves. - - 'link' Creates a hard link in the published directory for each - process output file. - - 'rellink' Creates a relative symbolic link in the published directory - for each process output file. - - 'symlink' Creates an absolute symbolic link in the published directory - for each process output file. - - Default: copy - - --force Nextflow will overwrite existing output files. - Default: false - - --conatainerPath Path to Singularity containers to be used by the 'slurm' - profile. - Default: /opt/bactopia/singularity - - --sleep_time After reading datases, the amount of time (seconds) Nextflow - will wait before execution. - Default: 5 seconds -Useful Parameters: - --verbose Increase the verbosity of processes. - --version Print workflow version information - --help Show this message and exit -``` diff --git a/docs/bactopia-tools/tblastn.md b/docs/bactopia-tools/tblastn.md deleted file mode 100644 index 39ebb86e..00000000 --- a/docs/bactopia-tools/tblastn.md +++ /dev/null @@ -1,236 +0,0 @@ ---- -title: tblastn -description: A Bactopia Tool which uses TBLASTN to query protein sequences against translated nucleotide databases. - ---- -# Bactopia Tool - `tblastn` -The `tblastn` module uses [TBLASTN](https://blast.ncbi.nlm.nih.gov/Blast.cgi?CMD=Web&PAGE_TYPE=Blastdocs) -to query protein sequences against translated nucleotide databases (contigs) for each sample. - - -## Example Usage -``` -bactopia --wf tblastn \ - --bactopia /path/to/your/bactopia/results -``` - -## Output Overview - -Below is the default output structure for the `tblastn` tool. Where possible the -file descriptions below were modified from a tools description. - -```bash - -├── -│ └── tools -│ └── tblastn -│ ├── .tblastn.tsv -│ └── logs -│ ├── nf-tblastn.{begin,err,log,out,run,sh,trace} -│ └── versions.yml -└── bactopia-runs - └── tblastn- - ├── merged-results - │ ├── tblastn.tsv - │ └── logs - │ └── tblastn-concat - │ ├── nf-merged-results.{begin,err,log,out,run,sh,trace} - │ └── versions.yml - └── nf-reports - ├── tblastn-dag.dot - ├── tblastn-report.html - ├── tblastn-timeline.html - └── tblastn-trace.txt - -``` - - - -### Results - -#### Merged Results - -Below are results that are concatenated into a single file. - - -| Filename | Description | -|-------------------------------|-------------| -| tblastn.tsv | A merged TSV file with `tblastn` results from all samples | - - -#### tblastn - -Below is a description of the _per-sample_ results from [TBLASTN](https://blast.ncbi.nlm.nih.gov/Blast.cgi?CMD=Web&PAGE_TYPE=Blastdocs). - - -| Filename | Description | -|-------------------------------|-------------| -| <SAMPLE_NAME>.tblastn.tsv | A TSV file with `tblastn` results for a single sample | - - - - - -### Audit Trail - -Below are files that can assist you in understanding which parameters and program versions were used. - -#### Logs - -Each process that is executed will have a folder named `logs`. In this folder are helpful -files for you to review if the need ever arises. - -| Extension | Description | -|--------------|-------------| -| .begin | An empty file used to designate the process started | -| .err | Contains STDERR outputs from the process | -| .log | Contains both STDERR and STDOUT outputs from the process | -| .out | Contains STDOUT outputs from the process | -| .run | The script Nextflow uses to stage/unstage files and queue processes based on given profile | -| .sh | The script executed by bash for the process | -| .trace | The Nextflow [Trace](https://www.nextflow.io/docs/latest/tracing.html#trace-report) report for the process | -| versions.yml | A YAML formatted file with program versions | - -#### Nextflow Reports - -These Nextflow reports provide great a great summary of your run. These can be used to optimize -resource usage and estimate expected costs if using cloud platforms. - -| Filename | Description | -|----------|-------------| -| tblastn-dag.dot | The Nextflow [DAG visualisation](https://www.nextflow.io/docs/latest/tracing.html#dag-visualisation) | -| tblastn-report.html | The Nextflow [Execution Report](https://www.nextflow.io/docs/latest/tracing.html#execution-report) | -| tblastn-timeline.html | The Nextflow [Timeline Report](https://www.nextflow.io/docs/latest/tracing.html#timeline-report) | -| tblastn-trace.txt | The Nextflow [Trace](https://www.nextflow.io/docs/latest/tracing.html#trace-report) report | - - -#### Program Versions - -At the end of each run, each of the `versions.yml` files are merged into the files below. - -| Filename | Description | -|---------------------------|-------------| -| software_versions.yml | A complete list of programs and versions used by each process | -| software_versions_mqc.yml | A complete list of programs and versions formatted for [MultiQC](https://multiqc.info/) | - -## Parameters - - -### Required Parameters -Define where the pipeline should find input data and save output data. - -| Parameter | Description | -|:---|---| -| ` --bactopia` | The path to bactopia results to use as inputs
**Type:** `string` | - -### Filtering Parameters -Use these parameters to specify which samples to include or exclude. - -| Parameter | Description | -|:---|---| -| ` --include` | A text file containing sample names (one per line) to include from the analysis
**Type:** `string` | -| ` --exclude` | A text file containing sample names (one per line) to exclude from the analysis
**Type:** `string` | - - -### TBLASTN Parameters - - -| Parameter | Description | -|:---|---| -| ` --tblastn_query` | A fasta file containing the query sequences to BLAST against the database
**Type:** `string` | -| ` --tblastn_outfmt` | The columns to include with -outfmt 6
**Type:** `string`, **Default:** `sseqid qseqid pident qlen slen length nident positive mismatch gapopen gaps qstart qend sstart send evalue bitscore` | -| ` --tblastn_opts` | Additional options to pass to BLASTN
**Type:** `string` | -| ` --tblastn_qcov_hsp_perc` | Percent query coverage per hsp
**Type:** `integer`, **Default:** `50` | -| ` --tblastn_max_target_seqs` | Maximum number of aligned sequences to keep
**Type:** `integer`, **Default:** `2000` | -| ` --tblastn_use_genes` | Blast against genes sequences instead of contigs
**Type:** `boolean` | - - -### Optional Parameters -These optional parameters can be useful in certain settings. - -| Parameter | Description | -|:---|---| -| ` --outdir` | Base directory to write results to
**Type:** `string`, **Default:** `bactopia` | -| ` --skip_compression` | Ouput files will not be compressed
**Type:** `boolean` | -| ` --datasets` | The path to cache datasets to
**Type:** `string` | -| ` --keep_all_files` | Keeps all analysis files created
**Type:** `boolean` | - -### Max Job Request Parameters -Set the top limit for requested resources for any single job. - -| Parameter | Description | -|:---|---| -| ` --max_retry` | Maximum times to retry a process before allowing it to fail.
**Type:** `integer`, **Default:** `3` | -| ` --max_cpus` | Maximum number of CPUs that can be requested for any single job.
**Type:** `integer`, **Default:** `4` | -| ` --max_memory` | Maximum amount of memory that can be requested for any single job.
**Type:** `string`, **Default:** `128.GB` | -| ` --max_time` | Maximum amount of time that can be requested for any single job.
**Type:** `string`, **Default:** `240.h` | -| ` --max_downloads` | Maximum number of samples to download at a time
**Type:** `integer`, **Default:** `3` | - -### Nextflow Configuration Parameters -Parameters to fine-tune your Nextflow setup. - -| Parameter | Description | -|:---|---| -| ` --nfconfig` | A Nextflow compatible config file for custom profiles, loaded last and will overwrite existing variables if set.
**Type:** `string` | -| ` --publish_dir_mode` | Method used to save pipeline results to output directory.
**Type:** `string`, **Default:** `copy` | -| ` --infodir` | Directory to keep pipeline Nextflow logs and reports.
**Type:** `string`, **Default:** `${params.outdir}/pipeline_info` | -| ` --force` | Nextflow will overwrite existing output files.
**Type:** `boolean` | -| ` --cleanup_workdir` | After Bactopia is successfully executed, the `work` directory will be deleted.
**Type:** `boolean` | - -### Institutional config options -Parameters used to describe centralized config profiles. These should not be edited. - -| Parameter | Description | -|:---|---| -| ` --custom_config_version` | Git commit id for Institutional configs.
**Type:** `string`, **Default:** `master` | -| ` --custom_config_base` | Base directory for Institutional configs.
**Type:** `string`, **Default:** `https://raw.githubusercontent.com/nf-core/configs/master` | -| ` --config_profile_name` | Institutional config name.
**Type:** `string` | -| ` --config_profile_description` | Institutional config description.
**Type:** `string` | -| ` --config_profile_contact` | Institutional config contact information.
**Type:** `string` | -| ` --config_profile_url` | Institutional config URL link.
**Type:** `string` | - -### Nextflow Profile Parameters -Parameters to fine-tune your Nextflow setup. - -| Parameter | Description | -|:---|---| -| ` --condadir` | Directory to Nextflow should use for Conda environments
**Type:** `string` | -| ` --registry` | Docker registry to pull containers from.
**Type:** `string`, **Default:** `dockerhub` | -| ` --datasets_cache` | Directory where downloaded datasets should be stored.
**Type:** `string`, **Default:** `/data/datasets` | -| ` --singularity_cache_dir` | Directory where remote Singularity images are stored.
**Type:** `string` | -| ` --singularity_pull_docker_container` | Instead of directly downloading Singularity images for use with Singularity, force the workflow to pull and convert Docker containers instead.
**Type:** `boolean` | -| ` --force_rebuild` | Force overwrite of existing pre-built environments.
**Type:** `boolean` | -| ` --queue` | Comma-separated name of the queue(s) to be used by a job scheduler (e.g. AWS Batch or SLURM)
**Type:** `string`, **Default:** `general,high-memory` | -| ` --cluster_opts` | Additional options to pass to the executor. (e.g. SLURM: '--account=my_acct_name'
**Type:** `string` | -| ` --container_opts` | Additional options to pass to Apptainer, Docker, or Singularityu. (e.g. Singularity: '-D `pwd`'
**Type:** `string` | -| ` --disable_scratch` | All intermediate files created on worker nodes of will be transferred to the head node.
**Type:** `boolean` | - -### Helpful Parameters -Uncommonly used parameters that might be useful. - -| Parameter | Description | -|:---|---| -| ` --monochrome_logs` | Do not use coloured log outputs.
**Type:** `boolean` | -| ` --nfdir` | Print directory Nextflow has pulled Bactopia to
**Type:** `boolean` | -| ` --sleep_time` | The amount of time (seconds) Nextflow will wait after setting up datasets before execution.
**Type:** `integer`, **Default:** `5` | -| ` --validate_params` | Boolean whether to validate parameters against the schema at runtime
**Type:** `boolean`, **Default:** `True` | -| ` --help` | Display help text.
**Type:** `boolean` | -| ` --wf` | Specify which workflow or Bactopia Tool to execute
**Type:** `string`, **Default:** `bactopia` | -| ` --list_wfs` | List the available workflows and Bactopia Tools to use with '--wf'
**Type:** `boolean` | -| ` --show_hidden_params` | Show all params when using `--help`
**Type:** `boolean` | -| ` --help_all` | An alias for --help --show_hidden_params
**Type:** `boolean` | -| ` --version` | Display version text.
**Type:** `boolean` | - -## Citations -If you use Bactopia and `tblastn` in your analysis, please cite the following. - -- [Bactopia](https://bactopia.github.io/) - Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) - - -- [BLAST](https://blast.ncbi.nlm.nih.gov/Blast.cgi) - Camacho C, Coulouris G, Avagyan V, Ma N, Papadopoulos J, Bealer K, Madden TL [BLAST+: architecture and applications](http://dx.doi.org/10.1186/1471-2105-10-421). _BMC Bioinformatics_ 10, 421 (2009) - -- [csvtk](https://bioinf.shenwei.me/csvtk/) - Shen, W [csvtk: A cross-platform, efficient and practical CSV/TSV toolkit in Golang.](https://github.com/shenwei356/csvtk/) (GitHub) - diff --git a/docs/bactopia-tools/tblastx.md b/docs/bactopia-tools/tblastx.md deleted file mode 100644 index b5381db4..00000000 --- a/docs/bactopia-tools/tblastx.md +++ /dev/null @@ -1,236 +0,0 @@ ---- -title: tblastx -description: A Bactopia Tool which uses TBLASTX to query translated nucleotide sequences against translated nucleotide databases. - ---- -# Bactopia Tool - `tblastx` -The `tblastx` module uses [TBLASTX](https://blast.ncbi.nlm.nih.gov/Blast.cgi?CMD=Web&PAGE_TYPE=Blastdocs) -to query translated nucleotide sequences against translated nucleotide databases for each sample. - - -## Example Usage -``` -bactopia --wf tblastx \ - --bactopia /path/to/your/bactopia/results -``` - -## Output Overview - -Below is the default output structure for the `tblastx` tool. Where possible the -file descriptions below were modified from a tools description. - -```bash - -├── -│ └── tools -│ └── tblastx -│ ├── .tblastx.tsv -│ └── logs -│ ├── nf-tblastx.{begin,err,log,out,run,sh,trace} -│ └── versions.yml -└── bactopia-runs - └── tblastx- - ├── merged-results - │ ├── tblastx.tsv - │ └── logs - │ └── tblastx-concat - │ ├── nf-merged-results.{begin,err,log,out,run,sh,trace} - │ └── versions.yml - └── nf-reports - ├── tblastx-dag.dot - ├── tblastx-report.html - ├── tblastx-timeline.html - └── tblastx-trace.txt - -``` - - - -### Results - -#### Merged Results - -Below are results that are concatenated into a single file. - - -| Filename | Description | -|-------------------------------|-------------| -| tblastx.tsv | A merged TSV file with `tblastx` results from all samples | - - -#### tblastx - -Below is a description of the _per-sample_ results from [TBLASTX](https://blast.ncbi.nlm.nih.gov/Blast.cgi?CMD=Web&PAGE_TYPE=Blastdocs). - - -| Filename | Description | -|-------------------------------|-------------| -| <SAMPLE_NAME>.tblastx.tsv | A TSV file with `tblastx` results for a single sample | - - - - - -### Audit Trail - -Below are files that can assist you in understanding which parameters and program versions were used. - -#### Logs - -Each process that is executed will have a folder named `logs`. In this folder are helpful -files for you to review if the need ever arises. - -| Extension | Description | -|--------------|-------------| -| .begin | An empty file used to designate the process started | -| .err | Contains STDERR outputs from the process | -| .log | Contains both STDERR and STDOUT outputs from the process | -| .out | Contains STDOUT outputs from the process | -| .run | The script Nextflow uses to stage/unstage files and queue processes based on given profile | -| .sh | The script executed by bash for the process | -| .trace | The Nextflow [Trace](https://www.nextflow.io/docs/latest/tracing.html#trace-report) report for the process | -| versions.yml | A YAML formatted file with program versions | - -#### Nextflow Reports - -These Nextflow reports provide great a great summary of your run. These can be used to optimize -resource usage and estimate expected costs if using cloud platforms. - -| Filename | Description | -|----------|-------------| -| tblastx-dag.dot | The Nextflow [DAG visualisation](https://www.nextflow.io/docs/latest/tracing.html#dag-visualisation) | -| tblastx-report.html | The Nextflow [Execution Report](https://www.nextflow.io/docs/latest/tracing.html#execution-report) | -| tblastx-timeline.html | The Nextflow [Timeline Report](https://www.nextflow.io/docs/latest/tracing.html#timeline-report) | -| tblastx-trace.txt | The Nextflow [Trace](https://www.nextflow.io/docs/latest/tracing.html#trace-report) report | - - -#### Program Versions - -At the end of each run, each of the `versions.yml` files are merged into the files below. - -| Filename | Description | -|---------------------------|-------------| -| software_versions.yml | A complete list of programs and versions used by each process | -| software_versions_mqc.yml | A complete list of programs and versions formatted for [MultiQC](https://multiqc.info/) | - -## Parameters - - -### Required Parameters -Define where the pipeline should find input data and save output data. - -| Parameter | Description | -|:---|---| -| ` --bactopia` | The path to bactopia results to use as inputs
**Type:** `string` | - -### Filtering Parameters -Use these parameters to specify which samples to include or exclude. - -| Parameter | Description | -|:---|---| -| ` --include` | A text file containing sample names (one per line) to include from the analysis
**Type:** `string` | -| ` --exclude` | A text file containing sample names (one per line) to exclude from the analysis
**Type:** `string` | - - -### TBLASTX Parameters - - -| Parameter | Description | -|:---|---| -| ` --tblastx_query` | A fasta file containing the query sequences to BLAST against the database
**Type:** `string` | -| ` --tblastx_outfmt` | The columns to include with -outfmt 6
**Type:** `string`, **Default:** `sseqid qseqid pident qlen slen length nident positive mismatch gapopen gaps qstart qend sstart send evalue bitscore` | -| ` --tblastx_opts` | Additional options to pass to BLASTN
**Type:** `string` | -| ` --tblastx_qcov_hsp_perc` | Percent query coverage per hsp
**Type:** `integer`, **Default:** `50` | -| ` --tblastx_max_target_seqs` | Maximum number of aligned sequences to keep
**Type:** `integer`, **Default:** `2000` | -| ` --tblastx_use_genes` | Blast against genes sequences instead of contigs
**Type:** `boolean` | - - -### Optional Parameters -These optional parameters can be useful in certain settings. - -| Parameter | Description | -|:---|---| -| ` --outdir` | Base directory to write results to
**Type:** `string`, **Default:** `bactopia` | -| ` --skip_compression` | Ouput files will not be compressed
**Type:** `boolean` | -| ` --datasets` | The path to cache datasets to
**Type:** `string` | -| ` --keep_all_files` | Keeps all analysis files created
**Type:** `boolean` | - -### Max Job Request Parameters -Set the top limit for requested resources for any single job. - -| Parameter | Description | -|:---|---| -| ` --max_retry` | Maximum times to retry a process before allowing it to fail.
**Type:** `integer`, **Default:** `3` | -| ` --max_cpus` | Maximum number of CPUs that can be requested for any single job.
**Type:** `integer`, **Default:** `4` | -| ` --max_memory` | Maximum amount of memory that can be requested for any single job.
**Type:** `string`, **Default:** `128.GB` | -| ` --max_time` | Maximum amount of time that can be requested for any single job.
**Type:** `string`, **Default:** `240.h` | -| ` --max_downloads` | Maximum number of samples to download at a time
**Type:** `integer`, **Default:** `3` | - -### Nextflow Configuration Parameters -Parameters to fine-tune your Nextflow setup. - -| Parameter | Description | -|:---|---| -| ` --nfconfig` | A Nextflow compatible config file for custom profiles, loaded last and will overwrite existing variables if set.
**Type:** `string` | -| ` --publish_dir_mode` | Method used to save pipeline results to output directory.
**Type:** `string`, **Default:** `copy` | -| ` --infodir` | Directory to keep pipeline Nextflow logs and reports.
**Type:** `string`, **Default:** `${params.outdir}/pipeline_info` | -| ` --force` | Nextflow will overwrite existing output files.
**Type:** `boolean` | -| ` --cleanup_workdir` | After Bactopia is successfully executed, the `work` directory will be deleted.
**Type:** `boolean` | - -### Institutional config options -Parameters used to describe centralized config profiles. These should not be edited. - -| Parameter | Description | -|:---|---| -| ` --custom_config_version` | Git commit id for Institutional configs.
**Type:** `string`, **Default:** `master` | -| ` --custom_config_base` | Base directory for Institutional configs.
**Type:** `string`, **Default:** `https://raw.githubusercontent.com/nf-core/configs/master` | -| ` --config_profile_name` | Institutional config name.
**Type:** `string` | -| ` --config_profile_description` | Institutional config description.
**Type:** `string` | -| ` --config_profile_contact` | Institutional config contact information.
**Type:** `string` | -| ` --config_profile_url` | Institutional config URL link.
**Type:** `string` | - -### Nextflow Profile Parameters -Parameters to fine-tune your Nextflow setup. - -| Parameter | Description | -|:---|---| -| ` --condadir` | Directory to Nextflow should use for Conda environments
**Type:** `string` | -| ` --registry` | Docker registry to pull containers from.
**Type:** `string`, **Default:** `dockerhub` | -| ` --datasets_cache` | Directory where downloaded datasets should be stored.
**Type:** `string`, **Default:** `/data/datasets` | -| ` --singularity_cache_dir` | Directory where remote Singularity images are stored.
**Type:** `string` | -| ` --singularity_pull_docker_container` | Instead of directly downloading Singularity images for use with Singularity, force the workflow to pull and convert Docker containers instead.
**Type:** `boolean` | -| ` --force_rebuild` | Force overwrite of existing pre-built environments.
**Type:** `boolean` | -| ` --queue` | Comma-separated name of the queue(s) to be used by a job scheduler (e.g. AWS Batch or SLURM)
**Type:** `string`, **Default:** `general,high-memory` | -| ` --cluster_opts` | Additional options to pass to the executor. (e.g. SLURM: '--account=my_acct_name'
**Type:** `string` | -| ` --container_opts` | Additional options to pass to Apptainer, Docker, or Singularityu. (e.g. Singularity: '-D `pwd`'
**Type:** `string` | -| ` --disable_scratch` | All intermediate files created on worker nodes of will be transferred to the head node.
**Type:** `boolean` | - -### Helpful Parameters -Uncommonly used parameters that might be useful. - -| Parameter | Description | -|:---|---| -| ` --monochrome_logs` | Do not use coloured log outputs.
**Type:** `boolean` | -| ` --nfdir` | Print directory Nextflow has pulled Bactopia to
**Type:** `boolean` | -| ` --sleep_time` | The amount of time (seconds) Nextflow will wait after setting up datasets before execution.
**Type:** `integer`, **Default:** `5` | -| ` --validate_params` | Boolean whether to validate parameters against the schema at runtime
**Type:** `boolean`, **Default:** `True` | -| ` --help` | Display help text.
**Type:** `boolean` | -| ` --wf` | Specify which workflow or Bactopia Tool to execute
**Type:** `string`, **Default:** `bactopia` | -| ` --list_wfs` | List the available workflows and Bactopia Tools to use with '--wf'
**Type:** `boolean` | -| ` --show_hidden_params` | Show all params when using `--help`
**Type:** `boolean` | -| ` --help_all` | An alias for --help --show_hidden_params
**Type:** `boolean` | -| ` --version` | Display version text.
**Type:** `boolean` | - -## Citations -If you use Bactopia and `tblastx` in your analysis, please cite the following. - -- [Bactopia](https://bactopia.github.io/) - Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) - - -- [BLAST](https://blast.ncbi.nlm.nih.gov/Blast.cgi) - Camacho C, Coulouris G, Avagyan V, Ma N, Papadopoulos J, Bealer K, Madden TL [BLAST+: architecture and applications](http://dx.doi.org/10.1186/1471-2105-10-421). _BMC Bioinformatics_ 10, 421 (2009) - -- [csvtk](https://bioinf.shenwei.me/csvtk/) - Shen, W [csvtk: A cross-platform, efficient and practical CSV/TSV toolkit in Golang.](https://github.com/shenwei356/csvtk/) (GitHub) - diff --git a/docs/bactopia-tools/tbprofiler.md b/docs/bactopia-tools/tbprofiler.md deleted file mode 100644 index 15724ee4..00000000 --- a/docs/bactopia-tools/tbprofiler.md +++ /dev/null @@ -1,236 +0,0 @@ ---- -title: tbprofiler -description: A Bactopia Tool which uses TBProfiler for profiling reads to determine resistance and _Mycobacterium tuberculosis_ strain type. ---- -# Bactopia Tool - `tbprofiler` -The `tbprofiler` module uses [TBProfiler](https://github.com/jodyphelan/TBProfiler) -for profiling reads to determine resistance and _Mycobacterium tuberculosis_ strain type. - - -## Example Usage -``` -bactopia --wf tbprofiler \ - --bactopia /path/to/your/bactopia/results -``` - -## Output Overview - -Below is the default output structure for the `tbprofiler` tool. Where possible the -file descriptions below were modified from a tools description. - -```bash - -├── -│ └── tools -│ └── tbprofiler -│ ├── bam -│ │ └── .bam -│ ├── logs -│ │ ├── nf-tbprofiler.{begin,err,log,out,run,sh,trace} -│ │ └── versions.yml -│ ├── .results.csv -│ ├── .results.json -│ ├── .results.txt -│ └── vcf -│ └── .targets.csq.vcf.gz -└── bactopia-runs - └── tbprofiler- - └── nf-reports - ├── tbprofiler-dag.dot - ├── tbprofiler-report.html - ├── tbprofiler-timeline.html - └── tbprofiler-trace.txt - -``` - -:::info[Directory structure might be different] - -`tbprofiler` is available as a standalone Bactopia Tool, as well as from -the main Bactopia workflow (e.g. through Staphopia or Merlin). If executed -from Bactopia, the `tbprofiler` directory structure might be different, but the -output descriptions below still apply. -::: - - - -### Results - -#### TBProfiler - -Below is a description of the _per-sample_ results from [TBProfiler](https://github.com/jodyphelan/TBProfiler). - - -| Filename | Description | -|-------------------------------|-------------| -| <SAMPLE_NAME>.results.csv | A CSV formated `TBProfiler` result file of resistance and strain type | -| <SAMPLE_NAME>.results.json | A JSON formated `TBProfiler` result file of resistance and strain type | -| <SAMPLE_NAME>.results.txt | A text file with `TBProfiler` results | -| <SAMPLE_NAME>.bam | BAM file with alignment details | -| <SAMPLE_NAME>.targets.csq.vcf.gz | VCF with variant info again reference genomes | - - - - - -### Audit Trail - -Below are files that can assist you in understanding which parameters and program versions were used. - -#### Logs - -Each process that is executed will have a folder named `logs`. In this folder are helpful -files for you to review if the need ever arises. - -| Extension | Description | -|--------------|-------------| -| .begin | An empty file used to designate the process started | -| .err | Contains STDERR outputs from the process | -| .log | Contains both STDERR and STDOUT outputs from the process | -| .out | Contains STDOUT outputs from the process | -| .run | The script Nextflow uses to stage/unstage files and queue processes based on given profile | -| .sh | The script executed by bash for the process | -| .trace | The Nextflow [Trace](https://www.nextflow.io/docs/latest/tracing.html#trace-report) report for the process | -| versions.yml | A YAML formatted file with program versions | - -#### Nextflow Reports - -These Nextflow reports provide great a great summary of your run. These can be used to optimize -resource usage and estimate expected costs if using cloud platforms. - -| Filename | Description | -|----------|-------------| -| tbprofiler-dag.dot | The Nextflow [DAG visualisation](https://www.nextflow.io/docs/latest/tracing.html#dag-visualisation) | -| tbprofiler-report.html | The Nextflow [Execution Report](https://www.nextflow.io/docs/latest/tracing.html#execution-report) | -| tbprofiler-timeline.html | The Nextflow [Timeline Report](https://www.nextflow.io/docs/latest/tracing.html#timeline-report) | -| tbprofiler-trace.txt | The Nextflow [Trace](https://www.nextflow.io/docs/latest/tracing.html#trace-report) report | - - -#### Program Versions - -At the end of each run, each of the `versions.yml` files are merged into the files below. - -| Filename | Description | -|---------------------------|-------------| -| software_versions.yml | A complete list of programs and versions used by each process | -| software_versions_mqc.yml | A complete list of programs and versions formatted for [MultiQC](https://multiqc.info/) | - -## Parameters - - -### Required Parameters -Define where the pipeline should find input data and save output data. - -| Parameter | Description | -|:---|---| -| ` --bactopia` | The path to bactopia results to use as inputs
**Type:** `string` | - -### Filtering Parameters -Use these parameters to specify which samples to include or exclude. - -| Parameter | Description | -|:---|---| -| ` --include` | A text file containing sample names (one per line) to include from the analysis
**Type:** `string` | -| ` --exclude` | A text file containing sample names (one per line) to exclude from the analysis
**Type:** `string` | - - -### TBProfiler Profile Parameters - - -| Parameter | Description | -|:---|---| -| ` --call_whole_genome` | Call whole genome
**Type:** `boolean` | -| ` --mapper` | Mapping tool to use. If you are using nanopore data it will default to minimap2
**Type:** `string`, **Default:** `bwa` | -| ` --caller` | Variant calling tool to use
**Type:** `string`, **Default:** `freebayes` | -| ` --calling_params` | Extra variant caller options in quotes
**Type:** `string` | -| ` --suspect` | Use the suspect suite of tools to add ML predictions
**Type:** `boolean` | -| ` --no_flagstat` | Don't collect flagstats
**Type:** `boolean` | -| ` --no_delly` | Don't run delly
**Type:** `boolean` | -| ` --tbprofiler_opts` | Extra options in quotes for TBProfiler
**Type:** `string` | - - -### Optional Parameters -These optional parameters can be useful in certain settings. - -| Parameter | Description | -|:---|---| -| ` --outdir` | Base directory to write results to
**Type:** `string`, **Default:** `bactopia` | -| ` --skip_compression` | Ouput files will not be compressed
**Type:** `boolean` | -| ` --datasets` | The path to cache datasets to
**Type:** `string` | -| ` --keep_all_files` | Keeps all analysis files created
**Type:** `boolean` | - -### Max Job Request Parameters -Set the top limit for requested resources for any single job. - -| Parameter | Description | -|:---|---| -| ` --max_retry` | Maximum times to retry a process before allowing it to fail.
**Type:** `integer`, **Default:** `3` | -| ` --max_cpus` | Maximum number of CPUs that can be requested for any single job.
**Type:** `integer`, **Default:** `4` | -| ` --max_memory` | Maximum amount of memory that can be requested for any single job.
**Type:** `string`, **Default:** `128.GB` | -| ` --max_time` | Maximum amount of time that can be requested for any single job.
**Type:** `string`, **Default:** `240.h` | -| ` --max_downloads` | Maximum number of samples to download at a time
**Type:** `integer`, **Default:** `3` | - -### Nextflow Configuration Parameters -Parameters to fine-tune your Nextflow setup. - -| Parameter | Description | -|:---|---| -| ` --nfconfig` | A Nextflow compatible config file for custom profiles, loaded last and will overwrite existing variables if set.
**Type:** `string` | -| ` --publish_dir_mode` | Method used to save pipeline results to output directory.
**Type:** `string`, **Default:** `copy` | -| ` --infodir` | Directory to keep pipeline Nextflow logs and reports.
**Type:** `string`, **Default:** `${params.outdir}/pipeline_info` | -| ` --force` | Nextflow will overwrite existing output files.
**Type:** `boolean` | -| ` --cleanup_workdir` | After Bactopia is successfully executed, the `work` directory will be deleted.
**Type:** `boolean` | - -### Institutional config options -Parameters used to describe centralized config profiles. These should not be edited. - -| Parameter | Description | -|:---|---| -| ` --custom_config_version` | Git commit id for Institutional configs.
**Type:** `string`, **Default:** `master` | -| ` --custom_config_base` | Base directory for Institutional configs.
**Type:** `string`, **Default:** `https://raw.githubusercontent.com/nf-core/configs/master` | -| ` --config_profile_name` | Institutional config name.
**Type:** `string` | -| ` --config_profile_description` | Institutional config description.
**Type:** `string` | -| ` --config_profile_contact` | Institutional config contact information.
**Type:** `string` | -| ` --config_profile_url` | Institutional config URL link.
**Type:** `string` | - -### Nextflow Profile Parameters -Parameters to fine-tune your Nextflow setup. - -| Parameter | Description | -|:---|---| -| ` --condadir` | Directory to Nextflow should use for Conda environments
**Type:** `string` | -| ` --registry` | Docker registry to pull containers from.
**Type:** `string`, **Default:** `dockerhub` | -| ` --datasets_cache` | Directory where downloaded datasets should be stored.
**Type:** `string`, **Default:** `/data/datasets` | -| ` --singularity_cache_dir` | Directory where remote Singularity images are stored.
**Type:** `string` | -| ` --singularity_pull_docker_container` | Instead of directly downloading Singularity images for use with Singularity, force the workflow to pull and convert Docker containers instead.
**Type:** `boolean` | -| ` --force_rebuild` | Force overwrite of existing pre-built environments.
**Type:** `boolean` | -| ` --queue` | Comma-separated name of the queue(s) to be used by a job scheduler (e.g. AWS Batch or SLURM)
**Type:** `string`, **Default:** `general,high-memory` | -| ` --cluster_opts` | Additional options to pass to the executor. (e.g. SLURM: '--account=my_acct_name'
**Type:** `string` | -| ` --container_opts` | Additional options to pass to Apptainer, Docker, or Singularityu. (e.g. Singularity: '-D `pwd`'
**Type:** `string` | -| ` --disable_scratch` | All intermediate files created on worker nodes of will be transferred to the head node.
**Type:** `boolean` | - -### Helpful Parameters -Uncommonly used parameters that might be useful. - -| Parameter | Description | -|:---|---| -| ` --monochrome_logs` | Do not use coloured log outputs.
**Type:** `boolean` | -| ` --nfdir` | Print directory Nextflow has pulled Bactopia to
**Type:** `boolean` | -| ` --sleep_time` | The amount of time (seconds) Nextflow will wait after setting up datasets before execution.
**Type:** `integer`, **Default:** `5` | -| ` --validate_params` | Boolean whether to validate parameters against the schema at runtime
**Type:** `boolean`, **Default:** `True` | -| ` --help` | Display help text.
**Type:** `boolean` | -| ` --wf` | Specify which workflow or Bactopia Tool to execute
**Type:** `string`, **Default:** `bactopia` | -| ` --list_wfs` | List the available workflows and Bactopia Tools to use with '--wf'
**Type:** `boolean` | -| ` --show_hidden_params` | Show all params when using `--help`
**Type:** `boolean` | -| ` --help_all` | An alias for --help --show_hidden_params
**Type:** `boolean` | -| ` --version` | Display version text.
**Type:** `boolean` | - -## Citations -If you use Bactopia and `tbprofiler` in your analysis, please cite the following. - -- [Bactopia](https://bactopia.github.io/) - Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) - - -- [TBProfiler](https://github.com/jodyphelan/TBProfiler) - Phelan JE, O’Sullivan DM, Machado D, Ramos J, Oppong YEA, Campino S, O’Grady J, McNerney R, Hibberd ML, Viveiros M, Huggett JF, Clark TG [Integrating informatics tools and portable sequencing technology for rapid detection of resistance to anti-tuberculous drugs.](https://doi.org/10.1186/s13073-019-0650-x) _Genome Med_ 11, 41 (2019) - diff --git a/docs/bactopia-tools/teton.md b/docs/bactopia-tools/teton.md deleted file mode 100644 index 60cb842c..00000000 --- a/docs/bactopia-tools/teton.md +++ /dev/null @@ -1,493 +0,0 @@ ---- -title: Beginner's Guide -description: >- - A beginner's guide to getting started using Bactopia to process your bacterial genomes. ---- - -Bactopia is a complete pipeline for the analysis of bacterial genomes, which includes -more than 150 bioinformatics tools. In this section, we will discuss the most essential -parameters users will need to make use of to get started with Bactopia. We are going -to focus on the parameters associated with processing input samples. - -Looking at the workflow overview below, we are really going to focus on the first step, -the _Gather_ step. The overview also provides a few examples of the accepted inputs, -including: - -- Local Illumina and/or Nanopore Reads -- Local Assemblies -- ENA/SRA Experiment Accessions -- NCBI Assembly Accessions - -Along with these accepted inputs, we'll also take a look at some helpful parameters. If you -are interested in learning more about the full set of parameters available in Bactopia, -please check out the [Full Guide](full-guide.md) section. - -![Bactopia Workflow](../assets/bactopia-workflow.png) - -## Bactopia Inputs - -Below is a table of input parameters for you to provide either local or remote samples -to be processed by Bactopia. These are the most essential parameters you will need to -get started using Bactopia. We will start here, with a brief description of each parameter, -then we will go into more detail about each. - - -### Required Parameters -Define where the pipeline should find input data and save output data. - -| Parameter | Description | -|:---|---| -| ` --samples` | A FOFN (via bactopia prepare) with sample names and paths to FASTQ/FASTAs to process -
**Type:** `string` | -| ` --R1` | First set of compressed (gzip) paired-end FASTQ reads (requires --R2 and --sample)
**Type:** `string` | -| ` --R2` | Second set of compressed (gzip) paired-end FASTQ reads (requires --R1 and --sample)
**Type:** `string` | -| ` --SE` | Compressed (gzip) single-end FASTQ reads (requires --sample)
**Type:** `string` | -| ` --ont` | Treat `--SE` or `--accession` as long reads for analysis. (requires --sample if using --SE)
**Type:** `boolean` | -| ` --hybrid` | Treat `--SE` as long reads for hybrid assembly. (requires --R1, --R2, --SE and --sample)
**Type:** `boolean` | -| ` --short_polish` | Treat `--SE` as long reads for long-read assembly and short read polishing. (requires --R1, --R2, --SE and --sample)
**Type:** `boolean` | -| ` --sample` | Sample name to use for the input sequences -
**Type:** `string` | -| ` --accessions` | A file containing ENA/SRA Experiment accessions or NCBI Assembly accessions to processed
**Type:** `string` | -| ` --accession` | Sample name to use for the input sequences -
**Type:** `string` | -| ` --assembly` | A assembled genome in compressed FASTA format. (requires --sample)
**Type:** `string` | -| ` --check_samples` | Validate the input FOFN provided by --samples
**Type:** `boolean` | - -### Dataset Parameters - - -| Parameter | Description | -|:---|---| -| ` --species` | Name of species for species-specific dataset to use
**Type:** `string` | -| ` --ask_merlin` | Ask Merlin to execute species specific Bactopia tools based on Mash distances
**Type:** `boolean` | -| ` --coverage` | Reduce samples to a given coverage, requires a genome size
**Type:** `integer`, **Default:** `100` | -| ` --genome_size` | Expected genome size (bp) for all samples, required for read error correction and read subsampling
**Type:** `string`, **Default:** `0` | -| ` --use_bakta` | Use Bakta for annotation, instead of Prokka
**Type:** `boolean` | - - - -``` ---------------------------------------------- - _ _ _ - | |__ __ _ ___| |_ ___ _ __ (_) __ _ - | '_ \ / _` |/ __| __/ _ \| '_ \| |/ _` | - | |_) | (_| | (__| || (_) | |_) | | (_| | - |_.__/ \__,_|\___|\__\___/| .__/|_|\__,_| - |_| - bactopia v2.x.x - Bactopia is a flexible pipeline for complete analysis of bacterial genomes. ---------------------------------------------- -Typical pipeline command: - - bactopia --samples samples.txt --datasets datasets/ --species 'Staphylococcus aureus' -profile singularity - -Required Parameters - ### For Procesessing Multiple Samples - --samples [string] A FOFN with sample names and paths to FASTQ/FASTAs to process - - ### For Processing A Single Sample - --R1 [string] First set of compressed (gzip) paired-end FASTQ reads (requires --R2 and --sample) - --R2 [string] Second set of compressed (gzip) paired-end FASTQ reads (requires --R1 and --sample) - --SE [string] Compressed (gzip) single-end FASTQ reads (requires --sample) - --ont [boolean] Treat `--SE` or `--accession` as long reads for analysis. (requires --sample if using --SE) - --hybrid [boolean] Treat `--SE` as long reads for hybrid assembly. (requires --R1, --R2, --SE and --sample) - --sample [string] Sample name to use for the input sequences - - ### For Downloading from SRA/ENA or NCBI Assembly - **Note: Downloaded assemblies will have error free Illumina reads simulated for processing.** - --accessions [string] A file containing ENA/SRA Experiment accessions or NCBI Assembly accessions to processed - --accession [string] Sample name to use for the input sequences - - ### For Processing an Assembly - **Note: Assemblies will have error free Illumina reads simulated for processing.** - --assembly [string] A assembled genome in compressed FASTA format. (requires --sample) - -Dataset Parameters - --datasets [string] The path to datasets that have already been set up - --species [string] Name of species for species-specific dataset to use - --ask_merlin [boolean] Ask Merlin to execute species specific Bactopia tools based on Mash distances - --coverage [integer] Reduce samples to a given coverage [default: 100] - --genome_size [string] Expected genome size (bp) for all samples, a value of '0' will disable read error correction and read - subsampling, otherwise estimate with Mash [default: 0] - -Annotate Genome Parameters - --use_bakta [boolean] Use Bakta for genome annotation (requires --bakta_db) - -Optional Parameters - --outdir [string] Base directory to write results to [default: ./] - --run_name [string] Name of the directory to hold results [default: bactopia] - -Helpful Parameters - --wf [string] Specify which workflow or Bactopia Tool to execute [default: bactopia] - --list_wfs [boolean] List the available workflows and Bactopia Tools to use with '--wf' - --help_all [boolean] An alias for --help --show_hidden_params - --version [boolean] Display version text. - -!! Hiding 166 params, use --show_hidden_params (or --help_all) to show them !! --------------------------------------------------------------------- -If you use bactopia for your analysis please cite: - -* Bactopia - https://doi.org/10.1128/mSystems.00190-20 - -* The nf-core framework - https://doi.org/10.1038/s41587-020-0439-x - -* Software dependencies - https://bactopia.github.io/acknowledgements/ --------------------------------------------------------------------- -``` - -## Inputs -Bactopia has multiple approaches to specify your input sequences. Bactopia can process Illumina and Nanopore FASTQs and assemblies. - -Illumina and Nanopore FASTQs a can be locally available or an Experiment accession to download associated FASTQs from the [European Nucleotide Archive (ENA)](https://www.ebi.ac.uk/ena). If you have Illumina and Nanopore reads for a sample you have choose to do a hybrid assembly. - -Likewise assemblies can be local, or a GenBank/RefSeq accession to download from NCBI Assembly. Input assemblies will have Illumina reads simulated so that the complete Bactopia pipeline run. By default, the assembly will not be reassembled. - -Which approach really depends on what you need to achieve! The following sections describe methods to process single samples, multiple samples, downloading samples from the ENA. - -### Local -#### Single Sample -When you only need to process a single sample at a time, Bactopia allows that! You only have to the sample name (`--sample`) and the whether the read set is paired-end (`--R1` and `--R2`), single-end (`--SE`), Illumina paired-end + long reads (`--hybrid`), or an assembly (`--assembly`). - -##### Paired-End -:::info[Use --R1, --R2 for Paired-End FASTQs] -`bactopia --sample my-sample --R1 /path/to/my-sample_R1.fastq.gz --R2 /path/to/my-sample_R2.fastq.gz` -::: - -##### Single-End -:::info[Use --SE for Single-End FASTQs] -`bactopia --sample my-sample --SE /path/to/my-sample.fastq.gz` -::: - -##### Nanopore -:::info[Use --SE and --ont for Oxford Nanopore FASTQs] -`bactopia --sample my-sample --SE /path/to/my-ont-sample.fastq.gz --ont` -::: - -##### Hybrid Assembly -:::info[Use --R1, --R2, --SE, and --hybrid for Paired-End FASTQs with Long Reads] -At the assembly step, Unicycler will be used to create a hybrid assembly using the paired-end reads and the long reads. -``` -bactopia --sample my-sample - --R1 /path/to/my-sample_R1.fastq.gz \ - --R2 /path/to/my-sample_R2.fastq.gz \ - --SE /path/to/my-ont-sample.fastq.gz \ - --hybrid -``` -::: - -##### Assembly -:::info[Use --assembly for an assembled FASTA] -Assemblies will have 2x250bp Illumina reads simulated without insertions or deletions in the sequence and a minimum PHRED score of Q33. By default, the input assembly will be used for all downstream analyses (e.g. annotation) which use an assembly. If the `--reassemble` parameter is given, then the a assembly will be created from the simulated reads. -``` -bactopia --sample my-sample --assembly /path/to/my-sample.fna.gz -``` -::: - -#### Multiple Samples -For multiple samples, you must create a file with information about the inputs, a *file of filenames* (FOFN). This file specifies sample names and location of FASTQs/FASTAs to be processed. Using this information, paired-end, single-end, nanopore, hybrid or assembly information can be extracted as well as naming output files. - -While this is an additional step for you, the user, it helps to avoid potential pattern matching errors. - -Most importantly, by taking this approach, you can process hundreds of samples in a single command. There is also the added benefit of knowing which FASTQs were analysed and their location at a later time! - -:::info[Use --samples for Multiple Samples] -`bactopia --samples my-samples.txt` -::: - - -##### The FOFN Format -Here is an example FOFN created by `bactopia prepare`. - -``` -sample runtype r1 r2 extra -SA103113 assembly /example/SA103113.fna.gz -SA110685 hybrid /example/SA110685_R1.fastq.gz /SA110685_R2.fastq.gz /example/SA110685.fastq.gz -SA123186 paired-end /example/SA123186_R1.fastq.gz /example/SA123186_R2.fastq.gz -SA123456 single-end /example/SA12345.fastq.gz -SA123456ONT ont /example/SA12345ONT.fastq.gz -``` - -The expected structure is a **tab-delimited** table with three columns: - -1. `sample`: A unique prefix, or unique name, to be used for naming output files -2. `runtype`: Informs Bactopia what type of input the sample is -3. `r1`: If paired-end, the first pair of reads, else the single-end reads -4. `r2`: If paired-end, the second pair of reads -5. `extra`: Either the assembly or long reads associated with a sample. - -These five columns are used as the header for the file. In other words, all input FOFNs require their first line to be: -``` -sample runtype r1 r2 extra -``` - -All lines after the header line, contain unique sample names and location(s) to associated FASTQ file(s). Absolute paths should be used to prevent any *file not found* errors due to the relative path changing. - -In the example above, four samples would be processed by Bactopia. - -1. `SA103113` would have simulated reads crreated from the assembly -2. `SA110685` would have a hybrid assembly created using the paired-end reads and long-reads -3. `SA123186` would be processed as paired-end reads -4. `SA123456` would be processed as single-end reads -5. `SA123456ONT` would be processed as Nanopore reads - -:::info[Use `bactopia prepare` to generate the FOFN] -You can manually create the FOFN, but it is highly recommended to always use `bactopia prepare` to generate the FOFN. By using a FOFN generated from `bactopia prepare` you can be confident your FOFN will work with Bactopia. -::: - -##### Generating A FOFN -`bactopia prepare` has been included to help aid (hopefully!) the process of creating a FOFN for your samples. This script will attempt to find FASTQ files in a given directory and output the expected FOFN format. It will also output any potential issues associated with the pattern matching. - -:::danger[Verify accuracy of FOFN] -This is currently an experimental function. There are likely bugs to be ironed out. Please be sure to give the resulting FOFN a quick look over. -::: - -``` -usage: bactopia prepare [-h] [-f STR] [-a STR] [--fastq_separator STR] [--fastq_pattern STR] - [--pe1_pattern STR] [--pe2_pattern STR] [--assembly_pattern STR] [-r] - [--long_reads] [--merge] [--prefix STR] [--version] STR - -bactopia prepare - Read a directory and prepare a FOFN of FASTQs/FASTAs - -positional arguments: - STR Directory where FASTQ files are stored - -optional arguments: - -h, --help show this help message and exit - -f STR, --fastq_ext STR - Extension of the FASTQs. Default: .fastq.gz - -a STR, --assembly_ext STR - Extension of the FASTA assemblies. Default: .fna.gz - --fastq_separator STR - Split FASTQ name on the last occurrence of the separator. Default: _ - --fastq_pattern STR Glob pattern to match FASTQs. Default: *.fastq.gz - --pe1_pattern STR Designates difference first set of paired-end reads. Default: ([Aa]|[Rr]1|1) (R1, r1, 1, A, a) - --pe2_pattern STR Designates difference second set of paired-end reads. Default: ([Bb]|[Rr]2|2) (R2, r2, 2, AB b) - --assembly_pattern STR - Glob pattern to match assembly FASTAs. Default: *.fna.gz - -r, --recursive Directories will be traversed recursively - --long_reads Single-end reads should be treated as long reads - --merge Flag samples with multiple read sets to be merged by Bactopia - --prefix STR Replace the absolute path with a given string. Default: Use absolute path - --version show program's version number and exit -``` - -###### Nanopore -:::info[Use `--long_reads` to tell Bactopia to process as Nanopore reads] -When `--long_reads` is used, any reads that are identified as single-end will be given a `runtype` of `ont`. This will tell Bactopia to process these reads as Nanopore reads. -::: - -##### Validating FOFN -When a FOFN is given, the first thing Bactopia does is verify all FASTQ files are found. If everything checks out, each sample will then be processed, otherwise a list of samples with errors will be output to STDERR. - -If you would like to only validate your FOFN (and not run the full pipeline), you can use the `--check_samples` parameter. - -###### Without Errors -``` -N E X T F L O W ~ version 20.01.0 -Launching `/home/rpetit3/repos/bactopia/main.nf` [gigantic_meitner] - revision: 6a0fbfbd9c -Printing what would have been processed. Each line consists of an array of -five elements: [SAMPLE_NAME, RUNTYPE, IS_SINGLE_END, [FASTQ_1, FASTQ_2], EXTRA] - -Found: - -[SA103113, assembly, false, [null, null], /example/SA103113.fna.gz] -[SA110685, hybrid, false, [/example/SA110685_R1.fastq.gz, /example/SA110685_R2.fastq.gz], /example/SA110685.fastq.gz] -[SA123186, paired-end, false, [/example/SA123186_R1.fastq.gz, /example/SA123186_R2.fastq.gz], null] -[SA12345, single-end, true, [/example/SA12345.fastq.gz], null] -[SA12345ONT, ont, true, [/example/SA12345ONT.fastq.gz], null] -``` -Each sample has passed validation and is put into a five element array: - -1. sample - the name for this sample -2. runtype - the type of run (paired, single, ont, etc...) that should be used -3. is_single_end - the reads are single-end (true) or paired-end (false) -4. fastq_array - the fastqs associated with the sample -5. extra - Extra column for reads to be used in hybrid assembly - -This array is then automatically queued up for proccessing by Nextflow. - -###### With errors -``` -N E X T F L O W ~ version 20.01.0 -Launching `/home/rpetit3/repos/bactopia/main.nf` [special_ampere] - revision: 6a0fbfbd9c -LINE 4:ERROR: Please verify /example-bad/SA123186_R1.fastq.gz exists, and try again -LINE 4:ERROR: Please verify /example-bad/SA123186_R2.fastq.gz exists, and try again -LINE 5:ERROR: Please verify /example-bad/SA12345.fastq.gz exists, and try again -Sample name "SA123186" is not unique, please revise sample names -Verify sample names are unique and/or FASTA/FASTQ paths are correct -See "--example_fastqs" for an example -Exiting -``` - -In the above example, there are multiple errors. Lines 4 and 5 (`LINE 4:ERROR` or `LINE 5:ERROR`) suggest that based on the given paths the FASTQs do not exist. The sample name `SA123186` has been used multiple times, and must be corrected. - -### ENA & SRA -There are a lot of publicly avilable sequences available from the [European Nucleotide Archive](https://www.ebi.ac.uk/ena) (ENA) and the [Sequence Read Archive](https://www.ncbi.nlm.nih.gov/sra) (SRA). There's a good chance you might want to include some of those sequences in your analysis! If that sounds like you, Bactopia has that built in for you! You can give a single *Experiment* accession (`--accession`) or a file where each line is a single *Experiment* accession (`--accessions`). Bactopia will then query ENA to determine *Run* accession(s) associated with the given Experiment accession and proceed download the corresponding FASTQ files from either the SRA (default) or ENA (`--use_ena`). - -After the download is completed, it will be processed through Bactopia. - -:::info[Use --accession for a Single Experiment Accession] -SRA: `bactopia --accession SRX476958` -ENA: `bactopia --accession SRX476958 --use_ena` -::: - -:::info[Use --accessions for Multiple Experiment Accessions] -SRA: `bactopia --accessions my-accessions.txt` -ENA: `bactopia --accessions my-accessions.txt --use_ena` -::: - -:::info[What happens when an Experiment has multiple Runs?] -In cases where a single Experiment might have multiple Run accessions associated with it, the FASTQ files from each Run are merged into a single set of sequences. -::: - -#### Generating Accession List -`bactopia search` has been made to help assist in generating a list of Experiment accessions to be procesed by Bactopia (via `--accessions`). Users can provide a Taxon ID (e.g. 1280), a binary name (e.g. Staphylococcus aureus), a Study accession (e.g. PRJNA480016), a BioSample accession (e.g. SAMN01737350), or a Run accession (e.g. SRR578340). This value is then queried against ENA's [Data Warehouse API](https://www.ebi.ac.uk/ena/browse/search-rest)), and a list of all Experiment accessions associated with the query is returned. - -##### Usage -``` -usage: bactopia search [-h] [--exact_taxon] [--outdir OUTPUT_DIRECTORY] - [--prefix PREFIX] [--limit INT] [--version] - STR - -bactopia search - Search ENA for associated WGS samples - -positional arguments: - STR Taxon ID or Study, BioSample, or Run accession - -optional arguments: - -h, --help show this help message and exit - --exact_taxon Exclude Taxon ID descendents. - --outdir OUTPUT_DIRECTORY - Directory to write output. (Default: .) - --prefix PREFIX Prefix to use for output file names. (Default: ena) - --limit INT Maximum number of results to return. (Default: - 1000000) - --version show program's version number and exit - -example usage: - bactopia search PRJNA480016 --limit 20 - bactopia search 1280 --exact_taxon --limit 20' - bactopia search "staphylococcus aureus" --limit 20 - bactopia search SAMN01737350 - bactopia search SRR578340 -``` - -##### Example -``` -bactopia search PRJNA480016 --limit 5 -``` - -When completed three files are produced: - -1. `ena-accessions.txt` - Contains a list of Experiment accessions to be processed. - ``` - SRX4563686 - SRX4563689 - SRX4563687 - SRX4563690 - SRX4563688 - ``` - -:::info[Input for Bactopia] -This file can be used in conjunction with the `--accessions` parameter for Bactopia processing. -::: - - -2. `ena-results.txt` - Contains the full results of the API query. This includes multiples fields (sample_accession, tax_id, sample_alias, center_name, etc...) - -3. `ena-summary.txt` - Contains a small summary of the completed request - ``` - QUERY: (study_accession=PRJNA480016 OR secondary_study_accession=PRJNA480016) - LIMIT: 5 - RESULTS: 5 (./ena-results.txt) - ILLUMINA ACCESSIONS: 5 (./ena-accessions.txt) - ``` - -## `--cleanup_workdir` -After you run Bactopia, you will notice a directory called `work`. This directory is where Nextflow runs all the processes and stores the intermediate files. After a process completes successfully, the appropriate results are pulled out and placed in the sample's result folder. The `work` directory can grow very large very quickly! Please keep this in mind when using Bactopia. To help prevent the build up of the `work` directory you can use `--cleanup_workdir` to delete intermediate files after a successful execution of Bactopia. - -:::info[Bactopia and Bactopia Tools use separate `work` directories] -Inside the `work` directory there will be separate subfolders that correspond to a Bactopia run or a specific Bactopia Tool run. This allows you to more easily identify which are ok to delete. The `work` directory is always ok to delete after a successful run. -::: - -## `--max_cpus` -At execution, Nextflow creates a queue and the number of slots in the queue is determined by the total number of cores on the system. So if you have a 24-core system, that means Nextflow will have a queue with 24-slots available. This feature kind of makes `--max_cpus` a little misleading. Typically when you give `--max_cpus` you are saying *"use this amount of cpus"*. But that is not the case for Nextflow and Bactopia. When you use `--max_cpus` what you are actually saying is *"for any particular task, use this amount of slots"*. Commands within a task processors will use the amount specified by `--max_cpus`. - -:::danger[`--max_cpus` can have a significant effect on the efficiency of Bactopia] -So for example if you have a system with 24-cores. - -This command, `bactopia ... --max_cpus 24`, says *for any particular task, use 24 slots*. Nextflow will give tasks in Bactopia 24 slots out of 24 available (24-core machine). In other words the queue can one have one task running at once because each task occupies 24 slots. - -On the other hand, `bactopia ... --max_cpus 4` says *for any particular task, use 4 slots*. Now, for Nextflow will give each task 4 slots out of 24 slots. Which means 6 tasks can be running at once. This can lead to much better efficiency because less jobs are stuck waiting in line. - -There are some tasks in Bactopia that will only ever use a single slot because they are single-core tasks. But for example the `annotation` step will always use the number of slots specified by `--max_cpus`. If the `--max_cpus` is too high, the `annotation` will get bogged down, which causes tasks dependent on `annotation` to also get bogged down. -::: - -:::info[When in doubt `--max_cpus 4` is a safe value.] -This is also the default value for Bactopia. -::: - -## `-qs` -The `-qs` parameter is short for *queue size*. As described above for `--max_cpus`, the default value for `-qs` is set to the total number of cores on the system. This parameter allows you to adjust the maximum number of cores Nextflow can use at any given moment. - -:::danger[`-qs` allows you to play nicely on shared resources] -From the example above, if you have a system with 24-cores. The default queue size if 24 slots. - -`bactopia ... --max_cpus 4` says *for any particular task, use a maximum of 4 slots*. Nextflow will give each task 4 slots out of 24 slots. But there might be other people also using the server. - -`bactopia ... --max_cpus 4 -qs 12` says *for any particular task, use a maximum of 4 slots, but don't use more than 12 slots*. Nextflow will give each task 4 slots out of 12 slots. Now instead of using all the cores on the server, the maximum that can be used in 12. -::: - -:::info[`-qs` might need adjusting for job schedulers.] -The default value for `-qs` is set to 100 when using a job scheduler (e.g. SLURM, AWS Batch). There may be times when you need adjust this to meet your needs. For example, if using AWS Batch you might want to increase the value to have more jobs processed at once (e.g. 100 vs 500). -::: - - -## `--genome_size` -Throughout the Bactopia workflow a genome size is used for various tasks. By default, a genome size is estimated using Mash. However, users can provide their own value for genome size, use values based on [Species Specific Datasets](datasets/#species-specific), or completely disable it. - -| Value | Result | -|-------|--------| -| *empty* | Mash is used to estimate the genome size | -| integer | Uses the genome size (e.g. `--genome_size 2800000`) provided by the user | -| 0 | Read error correct and read subsampling will be disabled. | -| min | Requires `--species`, the minimum completed genome size for a species is used | -| median | Requires `--species`, the median completed genome size for a species is used | -| mean | Requires `--species`, the mean completed genome size for a species is used | -| max | Requires `--species`, the maximum completed genome size for a species is used | - -:::danger[Mash may not be the most accurate estimate] -Mash is very convenient to quickly estimate a genome size, but it may not be the most accurate in all cases and will differ between samples. It is recommended that when possible a known genome size or one based off completed genomes should be used. -::: - -## `--nfconfig` -A key feature of Nextflow is you can provide your own config files. What this boils down to you can easily set Bactopia to run on your environment. With `--nfconfig` you can tell Bactopia to import your config file. - -`--nfconfig` has been set up so that it is the last config file to be loaded by Nextflow. This means that if your config file contains variables (e.g. params or profiles) already set they will be overwritten by your values. - -[Nextflow goes into great details on how to create configuration files.](https://www.nextflow.io/docs/latest/config.html) Please check the following links for adjustsments you be interested in making. - -| Scope | Description | -|---------|-------------| -| [env](https://www.nextflow.io/docs/latest/config.html#scope-env) | Set any environment variables that might be required | -| [params](https://www.nextflow.io/docs/latest/config.html#scope-params) | Change the default values of command line arguments | -| [process](https://www.nextflow.io/docs/latest/config.html#scope-process) | Adjust perprocess configurations such as containers, conda envs, or resource usage | -| [profile](https://www.nextflow.io/docs/latest/config.html#config-profiles) | Create predefined profiles for your [Executor](https://www.nextflow.io/docs/latest/operator.html#filtering-operators) | - -There are [many other scopes](https://www.nextflow.io/docs/latest/config.html#config-scopes) that you might be interested in checking out. - -You are most like going to want to create a custom profile. By doing so you can specify it at runtime (`-profile myProfile`) and Nextflow will be excuted based on that profile. Often times your custom profile will include information on the executor (queues, allocations, apths, etc...). - -If you need help please [reach out](https://github.com/bactopia/bactopia/issues/new/choose)! - -*If you're using the standard profile (did not specify -profile 'xyz') this might not be necessary.* - -## `-resume` -Bactopia relies on [Nextflow's Resume Feature](https://www.nextflow.io/docs/latest/getstarted.html#modify-and-resume) to resume runs. You can tell Bactopia to resume by adding `-resume` to your command line. When `-resume` is used, Nextflow will review the cache and determine if the previous run is resumable. If the previous run is not resumable, execution will -start at the beginning. - -## `--keep_all_files` -In some processes, Bactopia will delete large intermediate files (e.g. multiple uncompressed FASTQs) **only** after a process successfully completes. Since this a per-process function, it does not affect Nextflow's ability to resume (`-resume`)a workflow. You can deactivate this feature using `--keep_all_files`. Please, keep in mind the *work* directory is already large, this will make it 2-3 times larger. \ No newline at end of file diff --git a/docs/bactopia-tools/whatsgnu.md b/docs/bactopia-tools/whatsgnu.md deleted file mode 100644 index e69de29b..00000000 diff --git a/docs/bactopia/annotator/bakta.md b/docs/bactopia/annotator/bakta.md deleted file mode 100644 index 03669274..00000000 --- a/docs/bactopia/annotator/bakta.md +++ /dev/null @@ -1,142 +0,0 @@ ---- -title: bakta -description: A Bactopia Tool which uses Bakta to rapidly provide extensive annotations (tRNA, tmRNA, rRNA, ncRNA, CRISPR, CDS, pseudogenes, and sORFs) in a standardized fashion. ---- - -The `bakta` module uses [Bakta](https://github.com/oschwengers/bakta) to rapidly annotate bacterial -genomes and plasmids in a standardized fashion. Bakta makes use of a large database ([40+ GB](https://doi.org/10.5281/zenodo.4247252)) -to provide extensive annotations including: tRNA, tmRNA, rRNA, ncRNA, CRISPR, CDS, and sORFs. - - -## Output Overview - -Below is the default output structure for the `bakta` step in Bactopia. Where -possible the file descriptions below were modified from a tools description. - -```bash - -├── -│ └── main -│ └── annotator -│ └── bakta -│ ├── -blastdb.tar.gz -│ ├── .embl.gz -│ ├── .faa.gz -│ ├── .ffn.gz -│ ├── .fna.gz -│ ├── .gbff.gz -│ ├── .gff3.gz -│ ├── .hypotheticals.faa.gz -│ ├── .hypotheticals.tsv -│ ├── .tsv -│ ├── .txt -│ └── logs -│ ├── nf-bakta.{begin,err,log,out,run,sh,trace} -│ └── versions.yml -└── bactopia-runs - └── bakta- - └── nf-reports - ├── bakta-dag.dot - ├── bakta-report.html - ├── bakta-timeline.html - └── bakta-trace.txt - -``` - - - -### Results - -#### Bakta - -Below is a description of the _per-sample_ results from [Bakta](https://github.com/oschwengers/bakta). - - -| Extension | Description | -|-------------------------------|-------------| -| .blastdb.tar.gz | A gzipped tar archive of BLAST+ database of the contigs, genes, and proteins | -| .embl.gz | Annotations & sequences in (multi) EMBL format | -| .faa.gz | CDS/sORF amino acid sequences as FASTA | -| .ffn.gz | Feature nucleotide sequences as FASTA | -| .fna.gz | Replicon/contig DNA sequences as FASTA | -| .gbff.gz | Annotations & sequences in (multi) GenBank format | -| .gff3.gz | Annotations & sequences in GFF3 format | -| .hypotheticals.faa.gz | Hypothetical protein CDS amino acid sequences as FASTA | -| .hypotheticals.tsv | Further information on hypothetical protein CDS as simple human readable tab separated values | -| .tsv | Annotations as simple human readable tab separated values | -| .txt | Broad summary of `Bakta` annotations | - - - - - - - - -### Audit Trail - -Below are files that can assist you in understanding which parameters and program versions -were used. - -#### Logs - -Each process that is executed will have a folder named `logs`. In this folder are helpful -files for you to review if the need ever arises. - -| Extension | Description | -|--------------|-------------| -| .begin | An empty file used to designate the process started | -| .err | Contains STDERR outputs from the process | -| .log | Contains both STDERR and STDOUT outputs from the process | -| .out | Contains STDOUT outputs from the process | -| .run | The script Nextflow uses to stage/unstage files and queue processes based on given profile | -| .sh | The script executed by bash for the process | -| .trace | The Nextflow [Trace](https://www.nextflow.io/docs/latest/tracing.html#trace-report) report for the process | -| versions.yml | A YAML formatted file with program versions | - -## Parameters - - -### Bakta Download - - -| Parameter | Description | -|:---|---| -| ` --bakta_db` | Tarball or path to the Bakta database
**Type:** `string` | -| ` --bakta_db_type` | Which Bakta DB to download 'full' (~30GB) or 'light' (~2GB)
**Type:** `string`, **Default:** `full` | -| ` --bakta_save_as_tarball` | Save the Bakta database as a tarball
**Type:** `boolean` | -| ` --download_bakta` | Download the Bakta database to the path given by --bakta_db
**Type:** `boolean` | - -### Bakta - - -| Parameter | Description | -|:---|---| -| ` --proteins` | FASTA file of trusted proteins to first annotate from
**Type:** `string` | -| ` --prodigal_tf` | Training file to use for Prodigal
**Type:** `string` | -| ` --replicons` | Replicon information table (tsv/csv)
**Type:** `string` | -| ` --min_contig_length` | Minimum contig size to annotate
**Type:** `integer`, **Default:** `1` | -| ` --keep_contig_headers` | Keep original contig headers
**Type:** `boolean` | -| ` --compliant` | Force Genbank/ENA/DDJB compliance
**Type:** `boolean` | -| ` --skip_trna` | Skip tRNA detection & annotation
**Type:** `boolean` | -| ` --skip_tmrna` | Skip tmRNA detection & annotation
**Type:** `boolean` | -| ` --skip_rrna` | Skip rRNA detection & annotation
**Type:** `boolean` | -| ` --skip_ncrna` | Skip ncRNA detection & annotation
**Type:** `boolean` | -| ` --skip_ncrna_region` | Skip ncRNA region detection & annotation
**Type:** `boolean` | -| ` --skip_crispr` | Skip CRISPR array detection & annotation
**Type:** `boolean` | -| ` --skip_cds` | Skip CDS detection & annotation
**Type:** `boolean` | -| ` --skip_sorf` | Skip sORF detection & annotation
**Type:** `boolean` | -| ` --skip_gap` | Skip gap detection & annotation
**Type:** `boolean` | -| ` --skip_ori` | Skip oriC/oriT detection & annotation
**Type:** `boolean` | -| ` --bakta_opts` | Extra Backa options in quotes. Example: '--gram +'
**Type:** `string` | - -## Citations -If you use Bactopia and `bakta` in your analysis, please cite the following. - -- [Bactopia](https://bactopia.github.io/) - Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) - - -- [Bakta](https://github.com/oschwengers/bakta) - Schwengers O, Jelonek L, Dieckmann MA, Beyvers S, Blom J, Goesmann A [Bakta - rapid and standardized annotation of bacterial genomes via alignment-free sequence identification.](https://doi.org/10.1099/mgen.0.000685) _Microbial Genomics_ 7(11) (2021) - diff --git a/docs/bactopia/annotator/prokka.md b/docs/bactopia/annotator/prokka.md deleted file mode 100644 index f9c8ddef..00000000 --- a/docs/bactopia/annotator/prokka.md +++ /dev/null @@ -1,121 +0,0 @@ ---- -title: stecfinder -description: A Bactopia Tool which uses Prokka to rapidly provide annotations in a standardized fashion. ---- - -The `prokka` module uses [Prokka](https://github.com/tseemann/prokka) to rapidly annotate bacterial -genomes in a standardized fashion. - - -## Output Overview - -Below is the default output structure for the `prokka` step in Bactopia. Where -possible the file descriptions below were modified from a tools description. - -```bash - -├── -│ └── main -│ └── annotator -│ └── prokka -│ ├── -blastdb.tar.gz -│ ├── .faa.gz -│ ├── .ffn.gz -│ ├── .fna.gz -│ ├── .fsa.gz -│ ├── .gbk.gz -│ ├── .gff.gz -│ ├── .sqn.gz -│ ├── .tbl.gz -│ ├── .tsv -│ ├── .txt -│ └── logs -│ ├── .{err|log} -│ ├── nf-prokka.{begin,err,log,out,run,sh,trace} -│ └── versions.yml -└── bactopia-runs - └── prokka- - └── nf-reports - ├── prokka-dag.dot - ├── prokka-report.html - ├── prokka-timeline.html - └── prokka-trace.txt - -``` - - - -### Results - -#### Prokka - -Below is a description of the _per-sample_ results from [Prokka](https://github.com/tseemann/prokka). - - -| Extension | Description | -|-------------------------------|-------------| -| .blastdb.tar.gz | A gzipped tar archive of BLAST+ database of the contigs, genes, and proteins | -| .faa.gz | Protein FASTA file of the translated CDS sequences. | -| .ffn.gz | Nucleotide FASTA file of all the prediction transcripts (CDS, rRNA, tRNA, tmRNA, misc_RNA) | -| .fna.gz | Nucleotide FASTA file of the input contig sequences. | -| .gbk.gz | This is a standard GenBank file derived from the master .gff. If the input to prokka was a multi-FASTA, then this will be a multi-GenBank, with one record for each sequence. | -| .gff.gz | This is the master annotation in GFF3 format, containing both sequences and annotations. It can be viewed directly in Artemis or IGV. | -| .sqn.gz | An ASN1 format "Sequin" file for submission to GenBank. It needs to be edited to set the correct taxonomy, authors, related publication etc. | -| .tbl.gz | Feature Table file, used by "tbl2asn" to create the .sqn file. | -| .tsv | Tab-separated file of all features (locus_tag,ftype,len_bp,gene,EC_number,COG,product) | -| .txt | Statistics relating to the annotated features found. | - - - - - - - - -### Audit Trail - -Below are files that can assist you in understanding which parameters and program versions -were used. - -#### Logs - -Each process that is executed will have a folder named `logs`. In this folder are helpful -files for you to review if the need ever arises. - -| Extension | Description | -|--------------|-------------| -| .begin | An empty file used to designate the process started | -| .err | Contains STDERR outputs from the process | -| .log | Contains both STDERR and STDOUT outputs from the process | -| .out | Contains STDOUT outputs from the process | -| .run | The script Nextflow uses to stage/unstage files and queue processes based on given profile | -| .sh | The script executed by bash for the process | -| .trace | The Nextflow [Trace](https://www.nextflow.io/docs/latest/tracing.html#trace-report) report for the process | -| versions.yml | A YAML formatted file with program versions | - -## Parameters - - -### Prokka - - -| Parameter | Description | -|:---|---| -| ` --proteins` | FASTA file of trusted proteins to first annotate from
**Type:** `string` | -| ` --prodigal_tf` | Training file to use for Prodigal
**Type:** `string` | -| ` --compliant` | Force Genbank/ENA/DDJB compliance
**Type:** `boolean` | -| ` --centre` | Sequencing centre ID
**Type:** `string`, **Default:** `Bactopia` | -| ` --prokka_coverage` | Minimum coverage on query protein
**Type:** `integer`, **Default:** `80` | -| ` --prokka_evalue` | Similarity e-value cut-off
**Type:** `string`, **Default:** `1e-09` | -| ` --prokka_opts` | Extra Prokka options in quotes.
**Type:** `string` | - -## Citations -If you use Bactopia and `prokka` in your analysis, please cite the following. - -- [Bactopia](https://bactopia.github.io/) - Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) - - -- [Prokka](https://github.com/tseemann/prokka) - Seemann T [Prokka: rapid prokaryotic genome annotation](http://dx.doi.org/10.1093/bioinformatics/btu153) _Bioinformatics_ 30, 2068–2069 (2014) - diff --git a/docs/bactopia/antimicrobial-resistance/amrfinderplus.md b/docs/bactopia/antimicrobial-resistance/amrfinderplus.md deleted file mode 100644 index bf490c68..00000000 --- a/docs/bactopia/antimicrobial-resistance/amrfinderplus.md +++ /dev/null @@ -1,129 +0,0 @@ ---- -title: amrfinderplus -description: A Bactopia Tool which uses AMRFinder+ to screen assemblies and proteins for antimicrobial resistance and virulence genes. ---- - -The `amrfinderplus` module uses [AMRFinder+](https://github.com/ncbi/amr) to screen assemblies and proteins -for antimicrobial resistance and virulence genes. - - -## Output Overview - -Below is the default output structure for the `amrfinderplus` step in Bactopia. Where -possible the file descriptions below were modified from a tools description. - -```bash - -├── -│ └── tools -│ └── amrfinderplus -│ ├── -genes.tsv -│ ├── -proteins.tsv -│ └── logs -│ ├── nf-amrfinderplus.{begin,err,log,out,run,sh,trace} -│ └── versions.yml -└── bactopia-runs - └── amrfinderplus- - ├── merged-results - │ ├── amrfinderplus-genes.tsv - │ ├── amrfinderplus-proteins.tsv - │ └── logs - │ └── amrfinderplus-{genes|proteins|-concat - │ ├── nf-merged-results.{begin,err,log,out,run,sh,trace} - │ └── versions.yml - └── nf-reports - ├── amrfinderplus-dag.dot - ├── amrfinderplus-report.html - ├── amrfinderplus-timeline.html - └── amrfinderplus-trace.txt - -``` - - - -### Results - -#### Merged Results - -Below are results that are concatenated into a single file. - - -| Filename | Description | -|-------------------------------|-------------| -| amrfinderplus-genes.tsv | A merged TSV file with `AMRFinder+` results using nucleotide inputs | -| amrfinderplus-proteins.tsv | A merged TSV file with `AMRFinder+` results using protein inputs | - - - - - - -#### AMRFinder+ - -Below is a description of the _per-sample_ results from [AMRFinder+](https://github.com/ncbi/amr). - - -| Extension | Description | -|-------------------------------|-------------| -| -genes.tsv | A TSV file with `AMRFinder+` results using nucleotide inputs | -| -proteins.tsv | A TSV file with `AMRFinder+` results using protein inputs | - - - - - - - - -### Audit Trail - -Below are files that can assist you in understanding which parameters and program versions -were used. - -#### Logs - -Each process that is executed will have a folder named `logs`. In this folder are helpful -files for you to review if the need ever arises. - -| Extension | Description | -|--------------|-------------| -| .begin | An empty file used to designate the process started | -| .err | Contains STDERR outputs from the process | -| .log | Contains both STDERR and STDOUT outputs from the process | -| .out | Contains STDOUT outputs from the process | -| .run | The script Nextflow uses to stage/unstage files and queue processes based on given profile | -| .sh | The script executed by bash for the process | -| .trace | The Nextflow [Trace](https://www.nextflow.io/docs/latest/tracing.html#trace-report) report for the process | -| versions.yml | A YAML formatted file with program versions | - -## Parameters - - -### AMRFinder+ - - -| Parameter | Description | -|:---|---| -| ` --ident_min` | Minimum proportion of identical amino acids in alignment for hit (0..1)
**Type:** `number`, **Default:** `-1` | -| ` --coverage_min` | Minimum coverage of the reference protein (0..1)
**Type:** `number`, **Default:** `0.5` | -| ` --organism` | Taxonomy group to run additional screens against
**Type:** `string` | -| ` --translation_table` | NCBI genetic code for translated BLAST
**Type:** `integer`, **Default:** `11` | -| ` --amrfinder_noplus` | Disable running AMRFinder+ with the --plus option
**Type:** `boolean` | -| ` --report_common` | Report proteins common to a taxonomy group
**Type:** `boolean` | -| ` --report_all_equal` | Report all equally-scoring BLAST and HMM matches
**Type:** `boolean` | -| ` --amrfinder_opts` | Extra AMRFinder+ options in quotes.
**Type:** `string` | -| ` --amrfinder_db` | A custom AMRFinder+ database to use, either a tarball or a folder
**Type:** `string` | - -## Citations -If you use Bactopia and `amrfinderplus` in your analysis, please cite the following. - -- [Bactopia](https://bactopia.github.io/) - Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) - - -- [AMRFinderPlus](https://github.com/ncbi/amr) - Feldgarden M, Brover V, Haft DH, Prasad AB, Slotta DJ, Tolstoy I, Tyson GH, Zhao S, Hsu C-H, McDermott PF, Tadesse DA, Morales C, Simmons M, Tillman G, Wasilenko J, Folster JP, Klimke W [Validating the NCBI AMRFinder Tool and Resistance Gene Database Using Antimicrobial Resistance Genotype-Phenotype Correlations in a Collection of NARMS Isolates](https://doi.org/10.1128/AAC.00483-19). _Antimicrob. Agents Chemother._ (2019) - -- [csvtk](https://bioinf.shenwei.me/csvtk/) - Shen, W [csvtk: A cross-platform, efficient and practical CSV/TSV toolkit in Golang.](https://github.com/shenwei356/csvtk/) (GitHub) - diff --git a/docs/bactopia/assembler.md b/docs/bactopia/assembler.md deleted file mode 100644 index 5daebee3..00000000 --- a/docs/bactopia/assembler.md +++ /dev/null @@ -1,360 +0,0 @@ ---- -title: assembler -description: A Bactopia Tool which uses a variety of assembly tools to create an assembly of Illumina and Oxford Nanopore reads. - ---- - -The `assembler` module uses a variety of assembly tools to create an assembly of -Illumina and Oxford Nanopore reads. The tools used are: - -| Tool | Description | -|------|-------------| -| [Dragonflye](https://github.com/rpetit3/dragonflye) | Assembly of Oxford Nanopore reads, as well as hybrid assembly with short-read polishing | -| [Shovill](https://github.com/tseemann/shovill) | Assembly of Illumina paired-end reads | -| [Shovill-SE](https://github.com/rpetit3/shovill) | Assembly of Illumina single-end reads | -| [Unicycler](https://github.com/rrwick/Unicycler) | Hybrid assembly, using short-reads first then long-reads | - -Summary statistics for each assembly are generated using [assembly-scan](https://github.com/rpetit3/assembly-scan). - - -## Output Overview - -Below is the default output structure for the `assembler` step in Bactopia. Where -possible the file descriptions below were modified from a tools description. - -```bash - -├── -│ └── main -│ └── assembler -│ ├── flash.{hist|histogram} -| | flye.info -│ ├── logs -│ │ ├── {dragonflye|shovill|unicycler}.log -│ │ ├── nf-assembler.{begin,err,log,out,run,sh,trace} -│ │ └── versions.yml -│ ├── .fna.gz -│ ├── .tsv -│ ├── -assembly-error.txt -│ ├── shovill.corrections -│ ├── {flye|miniasm|raven|unicycler}-unpolished.fasta.gz -│ └── {flye|megahit|miniasm|raven|spades|unicycler|velvet}-unpolished.gfa.gz -└── bactopia-runs - └── bactopia- - ├── merged-results - │ ├── assembly-scan.tsv - │ └── logs - │ └── assembly-scan-concat - │ ├── nf-merged-results.{begin,err,log,out,run,sh,trace} - │ └── versions.yml - └── nf-reports - ├── bactopia-dag.dot - ├── bactopia-report.html - ├── bactopia-timeline.html - └── bactopia-trace.txt - -``` - -:::info[Directory structure might be different] - -Depending on the options used at runtime, the `assembler` directory structure might -be different, but the output descriptions below still apply. -::: - - - -### Results - -#### Merged Results - -Below are results that are concatenated into a single file. - - -| Filename | Description | -|-------------------------------|-------------| -| assembly-scan.tsv | Assembly statistics for all samples | - - - - - - -#### Dragonflye - -Below is a description of the _per-sample_ results for Oxford Nanopore reads using -[Dragonflye](https://github.com/rpetit3/dragonflye). - - -| Filename | Description | -|-------------------------------|-------------| -| <SAMPLE_NAME>.fna.gz | The final assembly produced by Dragonflye | -| <SAMPLE_NAME>.tsv | A tab-delimited file containing assembly statistics | -| flye-info.txt | A log file containing information about the Flye assembly | -| \{flye\|miniasm\|raven\}-unpolished.fasta.gz | Raw unprocessed assembly produced by the used assembler | -| \{flye\|miniasm\|raven\}-unpolished.gfa.gz | Raw unprocessed assembly graph produced by the used assembler | - - - - - - -#### Shovill - -Below is a description of the _per-sample_ results for Illumina reads using -[Shovill](https://github.com/tseemann/shovill) or [Shovill-SE](https://github.com/rpetit3/shovill). - - -| Filename | Description | -|-------------------------------|-------------| -| <SAMPLE_NAME>.fna.gz | The final assembly produced by Dragonflye | -| <SAMPLE_NAME>.tsv | A tab-delimited file containing assembly statistics | -| flash.hist | (Paired-End Only) Numeric histogram of merged read lengths. | -| flash.histogram | (Paired-End Only) Visual histogram of merged read lengths | -| \{megahit\|spades\|velvet\}-unpolished.gfa.gz | Raw unprocessed assembly graph produced by the used assembler | -| shovill.corrections | List of post-assembly corrections made by Shovill | - - - - - - -#### Hybrid Assembly (Unicycler) - -Below is a description of the _per-sample_ results for a hybrid assembly using -[Unicycler](https://github.com/rrwick/Unicycler) (`--hybrid`). When using Unicycler, -the short-reads are assembled first, then the long-reads are used to polish the -assembly. - - -| Filename | Description | -|-------------------------------|-------------| -| <SAMPLE_NAME>.fna.gz | The final assembly produced by Dragonflye | -| <SAMPLE_NAME>.tsv | A tab-delimited file containing assembly statistics | -| unicycler-unpolished.fasta.gz | Raw unprocessed assembly produced by Unicycler | -| unicycler-unpolished.fasta.gz | Raw unprocessed assembly graph produced by Unicycler | - - - - - - -#### Hybrid Assembly (Short Read Polishing) - -Below is a description of the _per-sample_ results for a hybrid assembly using -[Dragonflye](https://github.com/rpetit3/dragonflye) (`--short_polish`). When using -Dragonflye, the long-reads are assembled first, then the short-reads are used -to polish the assembly. - -:::tip[Prefer `--short_polish` over `--hybrid` with recent ONT sequencing] -Using [Unicycler](https://github.com/rrwick/Unicycler) (`--hybrid`) to create a hybrid -assembly works great when you have low-coverage noisy long-reads. However, if you are -using recent ONT sequencing, you likely have high-coverage and using the `--short_polish` -method is going to yeild better results (_and be faster!_) than `--hybrid`. -::: - - -| Filename | Description | -|-------------------------------|-------------| -| <SAMPLE_NAME>.fna.gz | The final assembly produced by Dragonflye | -| <SAMPLE_NAME>.tsv | A tab-delimited file containing assembly statistics | -| flye-info.txt | A log file containing information about the Flye assembly | -| \{flye\|miniasm\|raven\}-unpolished.fasta.gz | Raw unprocessed assembly produced by the used assembler | -| \{flye\|miniasm\|raven\}-unpolished.gfa.gz | Raw unprocessed assembly graph produced by the used assembler | - - - - - - -#### Failed Quality Checks - -Built into Bactopia are few basic quality checks to help prevent downstream failures. -If a sample fails one of these checks, it will be excluded from further analysis. By -excluding these samples, complete pipeline failures are prevented. - - -| Extension | Description | -|-------------------------------|-------------| -| -assembly-error.txt | Sample failed read count checks and excluded from further analysis | - - - -:::info[Poor samples are excluded to prevent downstream failures] -Samples that fail any of the QC checks will be excluded from further analysis. -Those samples will generate a `*-error.txt` file with the error message. Excluding -these samples prevents downstream failures that cause the whole workflow to fail. -::: - - - -
-Example Error: Assembled Successfully, but 0 Contigs - -If a sample assembles successfully, but 0 contigs are formed, the sample will be -excluded from further analysis. - -__Example Text from <SAMPLE_NAME>-assembly-error.txt__ -_<SAMPLE_NAME> assembled successfully, but 0 contigs were formed. Please investigate -<SAMPLE_NAME> to determine a cause (e.g. metagenomic, contaminants, etc...) for this -outcome. Further assembly-based analysis of <SAMPLE_NAME> will be discontinued._ - -
- -
-Example Error: Assembled successfully, but poor assembly size - -If you sample assembles successfully, but the assembly size is less than the minimum -allowed genome size, the sample will be excluded from further analysis. You can -adjust this minimum size using the `--min_genome_size` parameter. - -__Example Text from <SAMPLE_NAME>-assembly-error.txt__ -_<SAMPLE_NAME> assembled size (000 bp) is less than the minimum allowed genome -size (000 bp). If this is unexpected, please investigate <SAMPLE_NAME> to -determine a cause (e.g. metagenomic, contaminants, etc...) for the poor assembly. -Otherwise, adjust the `--min_genome_size` parameter to fit your need. Further -assembly based analysis of <SAMPLE_NAME> will be discontinued._ - -
- - - - - -### Audit Trail - -Below are files that can assist you in understanding which parameters and program versions -were used. - -#### Logs - -Each process that is executed will have a folder named `logs`. In this folder are helpful -files for you to review if the need ever arises. - -| Extension | Description | -|--------------|-------------| -| .begin | An empty file used to designate the process started | -| .err | Contains STDERR outputs from the process | -| .log | Contains both STDERR and STDOUT outputs from the process | -| .out | Contains STDOUT outputs from the process | -| .run | The script Nextflow uses to stage/unstage files and queue processes based on given profile | -| .sh | The script executed by bash for the process | -| .trace | The Nextflow [Trace](https://www.nextflow.io/docs/latest/tracing.html#trace-report) report for the process | -| versions.yml | A YAML formatted file with program versions | - -## Parameters - - -### Assembler - - -| Parameter | Description | -|:---|---| -| ` --shovill_assembler` | Assembler to be used by Shovill
**Type:** `string`, **Default:** `skesa` | -| ` --dragonflye_assembler` | Assembler to be used by Dragonflye
**Type:** `string`, **Default:** `flye` | -| ` --use_unicycler` | Use unicycler for paired end assembly
**Type:** `boolean` | -| ` --min_contig_len` | Minimum contig length <0=AUTO>
**Type:** `integer`, **Default:** `500` | -| ` --min_contig_cov` | Minimum contig coverage <0=AUTO>
**Type:** `integer`, **Default:** `2` | -| ` --contig_namefmt` | Format of contig FASTA IDs in 'printf' style
**Type:** `string` | -| ` --shovill_opts` | Extra assembler options in quotes for Shovill
**Type:** `string` | -| ` --shovill_kmers` | K-mers to use
**Type:** `string` | -| ` --dragonflye_opts` | Extra assembler options in quotes for Dragonflye
**Type:** `string` | -| ` --trim` | Enable adaptor trimming
**Type:** `boolean` | -| ` --no_stitch` | Disable read stitching for paired-end reads
**Type:** `boolean` | -| ` --no_corr` | Disable post-assembly correction
**Type:** `boolean` | -| ` --unicycler_mode` | Bridging mode used by Unicycler
**Type:** `string`, **Default:** `normal` | -| ` --min_polish_size` | Contigs shorter than this value (bp) will not be polished using Pilon
**Type:** `integer`, **Default:** `10000` | -| ` --min_component_size` | Graph dead ends smaller than this size (bp) will be removed from the final graph
**Type:** `integer`, **Default:** `1000` | -| ` --min_dead_end_size` | Graph dead ends smaller than this size (bp) will be removed from the final graph
**Type:** `integer`, **Default:** `1000` | -| ` --nanohq` | For Flye, use '--nano-hq' instead of --nano-raw
**Type:** `boolean` | -| ` --medaka_model` | The model to use for Medaka polishing
**Type:** `string` | -| ` --medaka_rounds` | The number of Medaka polishing rounds to conduct
**Type:** `integer` | -| ` --racon_rounds` | The number of Racon polishing rounds to conduct
**Type:** `integer`, **Default:** `1` | -| ` --no_polish` | Skip the assembly polishing step
**Type:** `boolean` | -| ` --no_miniasm` | Skip miniasm+Racon bridging
**Type:** `boolean` | -| ` --no_rotate` | Do not rotate completed replicons to start at a standard gene
**Type:** `boolean` | -| ` --reassemble` | If reads were simulated, they will be used to create a new assembly.
**Type:** `boolean` | -| ` --polypolish_rounds` | Number of polishing rounds to conduct with Polypolish for short read polishing
**Type:** `integer`, **Default:** `1` | -| ` --pilon_rounds` | Number of polishing rounds to conduct with Pilon for short read polishing
**Type:** `integer` | - -## Citations -If you use Bactopia and `assembler` in your analysis, please cite the following. - -- [Bactopia](https://bactopia.github.io/) - Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) - - -- [any2fasta](https://github.com/tseemann/any2fasta) - Seemann T [any2fasta: Convert various sequence formats to FASTA](https://github.com/tseemann/any2fasta) (GitHub) - -- [assembly-scan](https://github.com/rpetit3/assembly-scan) - Petit III RA [assembly-scan: generate basic stats for an assembly](https://github.com/rpetit3/assembly-scan) (GitHub) - -- [BWA](https://github.com/lh3/bwa/) - Li H [Aligning sequence reads, clone sequences and assembly contigs with BWA-MEM](http://arxiv.org/abs/1303.3997). _arXiv_ [q-bio.GN] (2013) - -- [csvtk](https://bioinf.shenwei.me/csvtk/) - Shen, W [csvtk: A cross-platform, efficient and practical CSV/TSV toolkit in Golang.](https://github.com/shenwei356/csvtk/) (GitHub) - -- [Dragonflye](https://github.com/rpetit3/dragonflye) - Petit III RA [Dragonflye: Assemble bacterial isolate genomes from Nanopore reads.](https://github.com/rpetit3/dragonflye) (GitHub) - -- [FLASH](https://ccb.jhu.edu/software/FLASH/) - Magoč T, Salzberg SL [FLASH: fast length adjustment of short reads to improve genome assemblies.](https://doi.org/10.1093/bioinformatics/btr507) _Bioinformatics_ 27.21 2957-2963 (2011) - -- [Flye](https://github.com/fenderglass/Flye) - Kolmogorov M, Yuan J, Lin Y, Pevzner P [Assembly of Long Error-Prone Reads Using Repeat Graphs](https://doi.org/10.1038/s41587-019-0072-8) _Nature Biotechnology_ (2019) - -- [Medaka](https://github.com/nanoporetech/medaka) - ONT Research [Medaka: Sequence correction provided by ONT Research](https://github.com/nanoporetech/medaka) (GitHub) - -- [MEGAHIT](https://github.com/voutcn/megahit) - Li D, Liu C-M, Luo R, Sadakane K, Lam T-W [MEGAHIT: an ultra-fast single-node solution for large and complex metagenomics assembly via succinct de Bruijn graph.](https://doi.org/10.1093/bioinformatics/btv033) _Bioinformatics_ 31.10 1674-1676 (2015) - -- [Miniasm](https://github.com/lh3/miniasm) - Li H [Miniasm: Ultrafast de novo assembly for long noisy reads](https://github.com/lh3/miniasm) (GitHub) - -- [Minimap2](https://github.com/lh3/minimap2) - Li H [Minimap2: pairwise alignment for nucleotide sequences.](https://doi.org/10.1093/bioinformatics/bty191) _Bioinformatics_ 34:3094-3100 (2018) - -- [Nanoq](https://github.com/esteinig/nanoq) - Steinig E [Nanoq: Minimal but speedy quality control for nanopore reads in Rust](https://github.com/esteinig/nanoq) (GitHub) - -- [Pigz](https://zlib.net/pigz/) - Adler M. [pigz: A parallel implementation of gzip for modern multi-processor, multi-core machines.](https://zlib.net/pigz/) _Jet Propulsion Laboratory_ (2015) - -- [Pilon](https://github.com/broadinstitute/pilon/) - Walker BJ, Abeel T, Shea T, Priest M, Abouelliel A, Sakthikumar S, Cuomo CA, Zeng Q, Wortman J, Young SK, Earl AM [Pilon: an integrated tool for comprehensive microbial variant detection and genome assembly improvement.](https://doi.org/10.1371/journal.pone.0112963) _PloS one_ 9.11 e112963 (2014) - -- [Racon](https://github.com/lbcb-sci/racon) - Vaser R, Sović I, Nagarajan N, Šikić M [Fast and accurate de novo genome assembly from long uncorrected reads.](http://dx.doi.org/10.1101/gr.214270.116) _Genome Res_ 27, 737–746 (2017) - -- [Rasusa](https://github.com/mbhall88/rasusa) - Hall MB [Rasusa: Randomly subsample sequencing reads to a specified coverage.](https://doi.org/10.5281/zenodo.3731394) (2019). - -- [Raven](https://github.com/lbcb-sci/raven) - Vaser R, Šikić M [Time- and memory-efficient genome assembly with Raven.](https://doi.org/10.1038/s43588-021-00073-4) _Nat Comput Sci_ 1, 332–336 (2021) - -- [samclip](https://github.com/tseemann/samclip) - Seemann T [Samclip: Filter SAM file for soft and hard clipped alignments](https://github.com/tseemann/samclip) (GitHub) - -- [Samtools](https://github.com/samtools/samtools) - Li H, Handsaker B, Wysoker A, Fennell T, Ruan J, Homer N, Marth G, Abecasis G, Durbin R [The Sequence Alignment/Map format and SAMtools](http://dx.doi.org/10.1093/bioinformatics/btp352). _Bioinformatics_ 25, 2078–2079 (2009) - -- [Shovill](https://github.com/tseemann/shovill) - Seemann T [Shovill: De novo assembly pipeline for Illumina paired reads](https://github.com/tseemann/shovill) (GitHub) - -- [Shovill-SE](https://github.com/rpetit3/shovill) - Petit III RA [Shovill-SE: A fork of Shovill that includes support for single end reads.](https://github.com/rpetit3/shovill) (GitHub) - -- [SKESA](https://github.com/ncbi/SKESA) - Souvorov A, Agarwala R, Lipman DJ [SKESA: strategic k-mer extension for scrupulous assemblies.](https://doi.org/10.1186/s13059-018-1540-z) _Genome Biology_ 19:153 (2018) - -- [SPAdes](https://github.com/ablab/spades) - Bankevich A, Nurk S, Antipov D, Gurevich AA, Dvorkin M, Kulikov AS, Lesin VM, Nikolenko SI, Pham S, Prjibelski AD, Pyshkin AV, Sirotkin AV, Vyahhi N, Tesler G, Alekseyev MA, Pevzner PA [SPAdes: a new genome assembly algorithm and its applications to single-cell sequencing.](https://doi.org/10.1089/cmb.2012.0021) _Journal of computational biology_ 19.5 455-477 (2012) - -- [Unicycler](https://github.com/rrwick/Unicycler) - Wick RR, Judd LM, Gorrie CL, Holt KE [Unicycler: Resolving bacterial genome assemblies from short and long sequencing reads.](http://dx.doi.org/10.1371/journal.pcbi.1005595) _PLoS Comput. Biol._ 13, e1005595 (2017) - -- [Velvet](https://github.com/dzerbino/velvet) - Zerbino DR, Birney E [Velvet: algorithms for de novo short read assembly using de Bruijn graphs.](http://www.genome.org/cgi/doi/10.1101/gr.074492.107) _Genome research_ 18.5 821-829 (2008) - diff --git a/docs/bactopia/gather.md b/docs/bactopia/gather.md deleted file mode 100644 index 3444ce8e..00000000 --- a/docs/bactopia/gather.md +++ /dev/null @@ -1,226 +0,0 @@ ---- -title: gather -description: A Bactopia Tool which gathers all the input samples into a single place, including downloading samples from ENA/SRA or NCBI Assembly. - ---- - -The main purpose of the `gather` step is to get all the samples into a single place. This -includes downloading samples from ENA/SRA or NCBI Assembly. The tools used are: - -| Tool | Description | -|------|-------------| -| [art](https://www.niehs.nih.gov/research/resources/software/biostatistics/art/index.cfm) | For simulating error-free reads for an input assembly | -| [fastq-dl](https://github.com/rpetit3/fastq-dl) | Downloading FASTQ files from ENA/SRA | -| [ncbi-genome-download](https://github.com/kblin/ncbi-genome-download) | Downloading FASTA files from NCBI Assembly | - -This `gather` step also does basic QC checks to help prevent downstream failures. - - -## Output Overview - -Below is the default output structure for the `gather` step in Bactopia. Where -possible the file descriptions below were modified from a tools description. - -```bash - -├── -│ └── main -│ └── gather -│ ├── logs -│ │ ├── nf-gather.{begin,err,log,out,run,sh,trace} -│ │ └── versions.yml -│ ├── -gzip-error.txt -│ ├── -low-basepair-proportion-error.txt -│ ├── -low-read-count-error.txt -│ ├── -low-sequence-depth-error.txt -│ └── -meta.tsv -└── bactopia-runs - └── bactopia- - ├── merged-results - │ ├── logs - │ │ └── meta-concat - │ │ ├── nf-merged-results.{begin,err,log,out,run,sh,trace} - │ │ └── versions.yml - │ └── meta.tsv - └── nf-reports - ├── bactopia-dag.dot - ├── bactopia-report.html - ├── bactopia-timeline.html - └── bactopia-trace.txt - -``` - - - -### Results - -#### Merged Results - -Below are results that are concatenated into a single file. - - -| Filename | Description | -|-------------------------------|-------------| -| meta.tsv | A tab-delimited file with bactopia metadata for all samples | - - - - - - -#### gather - -Below is a description of the _per-sample_ results from the `gather` subworkflow. - - -| Extension | Description | -|-------------------------------|-------------| -| -meta.tsv | A tab-delimited file with bactopia metadata for each sample | - - - - - - -#### Failed Quality Checks - -Built into Bactopia are few basic quality checks to help prevent downstream failures. -If a sample fails one of these checks, it will be excluded from further analysis. By -excluding these samples, complete pipeline failures are prevented. - - -| Extension | Description | -|-------------------------------|-------------| -| -gzip-error.txt | Sample failed Gzip checks and excluded from further analysis | -| -low-basepair-proportion-error.txt | Sample failed basepair proportion checks and excluded from further analysis | -| -low-read-count-error.txt | Sample failed read count checks and excluded from further analysis | -| -low-sequence-depth-error.txt | Sample failed sequenced basepair checks and excluded from further analysis | - - - -:::info[Poor samples are excluded to prevent downstream failures] -Samples that fail any of the QC checks will be excluded from further analysis. -Those samples will generate a `*-error.txt` file with the error message. Excluding -these samples prevents downstream failures that cause the whole workflow to fail. -::: - - - -
-Example Error: Input FASTQ(s) failed Gzip checks - -If input FASTQ(s) fail to pass Gzip test, the sample will be excluded from -further analysis. - -__Example Text from <SAMPLE_NAME>-gzip-error.txt__ -_<SAMPLE_NAME> FASTQs failed Gzip tests. Please check the input FASTQs. Further -analysis is discontinued._ - -
- -
-Example Error: Input FASTQs have disproportionate number of reads - -If input FASTQ(s) for a sample have disproportionately different number of reads -between the two pairs, the sample will be excluded from further analysis. You can -adjust this minimum read count using the `--min_proportion` parameter. - -__Example Text from <SAMPLE_NAME>-low-basepair-proportion-error.txt__ -_<SAMPLE_NAME> FASTQs failed to meet the minimum shared basepairs (`X``). They -shared `Y` basepairs, with R1 having `A` bp and R2 having `B` bp. Further -analysis is discontinued._ - -
- -
-Example Error: Input FASTQ(s) has too few reads - -If input FASTQ(s) for a sample have less than the minimum required reads, the -sample will be excluded from further analysis. You can adjust this minimum read -count using the `--min_reads` parameter. - -__Example Text from <SAMPLE_NAME>-low-read-count-error.txt__ -_<SAMPLE_NAME> FASTQ(s) contain `X` total reads. This does not exceed the required -minimum `Y` read count. Further analysis is discontinued._ - -
- -
-Example Error: Input FASTQ(s) has too little sequenced basepairs - -If input FASTQ(s) for a sample fails to meet the minimum number of sequenced -basepairs, the sample will be excluded from further analysis. You can -adjust this minimum read count using the `--min_basepairs` parameter. - -__Example Text from <SAMPLE_NAME>-low-sequence-depth-error.txt__ -_<SAMPLE_NAME> FASTQ(s) contain `X` total basepairs. This does not exceed the -required minimum `Y` bp. Further analysis is discontinued._ - -
- - - - - -### Audit Trail - -Below are files that can assist you in understanding which parameters and program versions -were used. - -#### Logs - -Each process that is executed will have a folder named `logs`. In this folder are helpful -files for you to review if the need ever arises. - -| Extension | Description | -|--------------|-------------| -| .begin | An empty file used to designate the process started | -| .err | Contains STDERR outputs from the process | -| .log | Contains both STDERR and STDOUT outputs from the process | -| .out | Contains STDOUT outputs from the process | -| .run | The script Nextflow uses to stage/unstage files and queue processes based on given profile | -| .sh | The script executed by bash for the process | -| .trace | The Nextflow [Trace](https://www.nextflow.io/docs/latest/tracing.html#trace-report) report for the process | -| versions.yml | A YAML formatted file with program versions | - -## Parameters - - -### Gather - - -| Parameter | Description | -|:---|---| -| ` --skip_fastq_check` | Skip minimum requirement checks for input FASTQs
**Type:** `boolean` | -| ` --min_basepairs` | The minimum amount of basepairs required to continue downstream analyses.
**Type:** `integer`, **Default:** `2241820` | -| ` --min_reads` | The minimum amount of reads required to continue downstream analyses.
**Type:** `integer`, **Default:** `7472` | -| ` --min_coverage` | The minimum amount of coverage required to continue downstream analyses.
**Type:** `integer`, **Default:** `10` | -| ` --min_proportion` | The minimum proportion of basepairs for paired-end reads to continue downstream analyses.
**Type:** `number`, **Default:** `0.5` | -| ` --min_genome_size` | The minimum estimated genome size allowed for the input sequence to continue downstream analyses.
**Type:** `integer`, **Default:** `100000` | -| ` --max_genome_size` | The maximum estimated genome size allowed for the input sequence to continue downstream analyses.
**Type:** `integer`, **Default:** `18040666` | -| ` --attempts` | Maximum times to attempt downloads
**Type:** `integer`, **Default:** `3` | -| ` --use_ena` | Download FASTQs from ENA
**Type:** `boolean` | -| ` --no_cache` | Skip caching the assembly summary file from ncbi-genome-download
**Type:** `boolean` | - -## Citations -If you use Bactopia and `gather` in your analysis, please cite the following. - -- [Bactopia](https://bactopia.github.io/) - Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) - - -- [ART](https://www.niehs.nih.gov/research/resources/software/biostatistics/art/index.cfm) - Huang W, Li L, Myers JR, Marth GT [ART: a next-generation sequencing read simulator.](http://dx.doi.org/10.1093/bioinformatics/btr708) _Bioinformatics_ 28, 593–594 (2012) - -- [fastq-dl](https://github.com/rpetit3/fastq-dl) - Petit III RA [fastq-dl: Download FASTQ files from SRA or ENA repositories.](https://github.com/rpetit3/fastq-dl) (GitHub) - -- [fastq-scan](https://github.com/rpetit3/fastq-scan) - Petit III RA [fastq-scan: generate summary statistics of input FASTQ sequences.](https://github.com/rpetit3/fastq-scan) (GitHub) - -- [ncbi-genome-download](https://github.com/kblin/ncbi-genome-download) - Blin K [ncbi-genome-download: Scripts to download genomes from the NCBI FTP servers](https://github.com/kblin/ncbi-genome-download) (GitHub) - -- [Pigz](https://zlib.net/pigz/) - Adler M. [pigz: A parallel implementation of gzip for modern multi-processor, multi-core machines.](https://zlib.net/pigz/) _Jet Propulsion Laboratory_ (2015) - diff --git a/docs/bactopia/merlin.md b/docs/bactopia/merlin.md deleted file mode 100644 index 596a559b..00000000 --- a/docs/bactopia/merlin.md +++ /dev/null @@ -1,936 +0,0 @@ ---- -title: merlin -description: A Bactopia Tool for the automatic selection and executions of species-specific tools. ---- - -_MinmER assisted species-specific bactopia tool seLectIoN_, or Merlin, uses distances based -on the RefSeq sketch downloaded by `bactopia datasets` to automatically run species-specific tools. - -Currently Merlin knows 16 spells for which cover the following: - -| Genus/Species | Tools | -|---------------|-------| -| Escherichia / Shigella | [ECTyper](../bactopia-tools/ectyper.md), [ShigaTyper](../bactopia-tools/shigatyper.md), [ShigEiFinder](../bactopia-tools/shigeifinder.md), [ShigaPass](../bactopia-tools/shigapass.md) | -| Haemophilus | [hicap](../bactopia-tools/hicap.md), [HpsuisSero](../bactopia-tools/ssuissero.md) | -| Klebsiella | [Kleborate](../bactopia-tools/kleborate.md) | -| Legionella | [legsta](../bactopia-tools/legsta.md) | -| Listeria | [LisSero](../bactopia-tools/lissero.md) | -| Mycobacterium | [TBProfiler](../bactopia-tools/tbprofiler.md) | -| Neisseria | [meningotype](../bactopia-tools/meningotype.md), [ngmaster](../bactopia-tools/ngmaster.md) | -| Pseudomonas | [pasty](../bactopia-tools/pasty.md) | -| Salmonella | [SeqSero2](../bactopia-tools/seqsero2.md), [SISTR](../bactopia-tools/sistr.md) | -| Staphylococcus | [AgrVATE](../bactopia-tools/agrvate.md), [spaTyper](../bactopia-tools/spatyper.md), [staphopia-sccmec](../bactopia-tools/staphopiasccmec.md) | -| Streptococcus | [emmtyper](../bactopia-tools/emmtyper.md), [pbptyper](../bactopia-tools/pbptyper.md), [SsuisSero](../bactopia-tools/ssuissero.md) | - -Merlin is avialable as an independent Bactopia Tool, or in the Bactopia with the `--ask_merlin` parameter. Even better, -if you want to force Merlin to execute all species-specific tools (no matter the distance), you can use `--full_merlin`. -Then all the spells will be unleashed! - - -## Output Overview - -Below is the default output structure for the `merlin` step in Bactopia. Where -possible the file descriptions below were modified from a tools description. - -```bash - -├── -│ └── tools -│ ├── agrvate -│ │ ├── -agr_gp.tab -│ │ ├── -blastn_log.txt -│ │ ├── -hmm-log.txt -│ │ ├── -hmm.tab -│ │ ├── -summary.tab -│ │ └── logs -│ │ ├── nf-agrvate.{begin,err,log,out,run,sh,trace} -│ │ └── versions.yml -│ ├── ectyper -│ │ ├── .tsv -│ │ ├── blast_output_alleles.txt -│ │ └── logs -│ │ ├── ectyper.log -│ │ ├── nf-ectyper.{begin,err,log,out,run,sh,trace} -│ │ └── versions.yml -│ ├── emmtyper -│ │ ├── .tsv -│ │ └── logs -│ │ ├── nf-emmtyper.{begin,err,log,out,run,sh,trace} -│ │ └── versions.yml -│ ├── genotyphi -│ │ ├── .csv -│ │ ├── .json -│ │ ├── .tsv -│ │ └── logs -│ │ ├── genotyphi -│ │ │ ├── nf-genotyphi.{begin,err,log,out,run,sh,trace} -│ │ │ └── versions.yml -│ │ └── mykrobe -│ │ ├── nf-genotyphi.{begin,err,log,out,run,sh,trace} -│ │ └── versions.yml -│ ├── hicap -│ │ ├── .tsv -│ │ └── logs -│ │ ├── nf-hicap.{begin,err,log,out,run,sh,trace} -│ │ └── versions.yml -│ ├── hpsuissero -│ │ ├── _serotyping_res.tsv -│ │ └── logs -│ │ ├── nf-hpsuissero.{begin,err,log,out,run,sh,trace} -│ │ └── versions.yml -│ ├── kleborate -│ │ ├── .results.txt -│ │ └── logs -│ │ ├── nf-kleborate.{begin,err,log,out,run,sh,trace} -│ │ └── versions.yml -│ ├── legsta -│ │ ├── .tsv -│ │ └── logs -│ │ ├── nf-legsta.{begin,err,log,out,run,sh,trace} -│ │ └── versions.yml -│ ├── mashdist -│ │ └── merlin -│ │ ├── -dist.txt -│ │ └── logs -│ │ ├── nf-mashdist.{begin,err,log,out,run,sh,trace} -│ │ └── versions.yml -│ ├── meningotype -│ │ ├── .tsv -│ │ └── logs -│ │ ├── nf-meningotype.{begin,err,log,out,run,sh,trace} -│ │ └── versions.yml -│ ├── ngmaster -│ │ ├── .tsv -│ │ └── logs -│ │ ├── nf-ngmaster.{begin,err,log,out,run,sh,trace} -│ │ └── versions.yml -│ ├── pasty -│ │ ├── .blastn.tsv -│ │ ├── .details.tsv -│ │ ├── .tsv -│ │ └── logs -│ │ ├── nf-pasty.{begin,err,log,out,run,sh,trace} -│ │ └── versions.yml -│ ├── pbptyper -│ │ ├── -1A.tblastn.tsv -│ │ ├── -2B.tblastn.tsv -│ │ ├── -2X.tblastn.tsv -│ │ ├── .tsv -│ │ └── logs -│ │ ├── nf-pbptyper.{begin,err,log,out,run,sh,trace} -│ │ └── versions.yml -│ ├── seqsero2 -│ │ ├── _log.txt -│ │ ├── _result.tsv -│ │ ├── _result.txt -│ │ └── logs -│ │ ├── nf-seqsero2.{begin,err,log,out,run,sh,trace} -│ │ └── versions.yml -│ ├── seroba -│ │ ├── .tsv -│ │ └── logs -│ │ ├── nf-seroba.{begin,err,log,out,run,sh,trace} -│ │ └── versions.yml -│ ├── shigatyper -│ │ ├── .tsv -│ │ └── logs -│ │ ├── nf-shigatyper.{begin,err,log,out,run,sh,trace} -│ │ └── versions.yml -│ ├── shigeifinder -│ │ ├── .tsv -│ │ └── logs -│ │ ├── nf-shigeifinder.{begin,err,log,out,run,sh,trace} -│ │ └── versions.yml -│ ├── sistr -│ │ ├── -allele.fasta.gz -│ │ ├── -allele.json.gz -│ │ ├── -cgmlst.csv -│ │ ├── .tsv -│ │ └── logs -│ │ ├── nf-sistr.{begin,err,log,out,run,sh,trace} -│ │ └── versions.yml -│ ├── spatyper -│ │ ├── .tsv -│ │ └── logs -│ │ ├── nf-spatyper.{begin,err,log,out,run,sh,trace} -│ │ └── versions.yml -│ ├── ssuissero -│ │ ├── _serotyping_res.tsv -│ │ └── logs -│ │ ├── nf-ssuissero.{begin,err,log,out,run,sh,trace} -│ │ └── versions.yml -│ ├── staphopiasccmec -│ │ ├── .tsv -│ │ └── logs -│ │ ├── nf-staphopiasccmec.{begin,err,log,out,run,sh,trace} -│ │ └── versions.yml -│ ├── stecfinder -│ │ ├── .tsv -│ │ └── logs -│ │ ├── nf-stecfinder.{begin,err,log,out,run,sh,trace} -│ │ └── versions.yml -│ └── tbprofiler -│ ├── .results.csv -│ ├── .results.json -│ ├── .results.txt -│ ├── bam -│ │ └── .bam -│ ├── logs -│ │ ├── nf-tbprofiler.{begin,err,log,out,run,sh,trace} -│ │ └── versions.yml -│ └── vcf -│ └── .targets.csq.vcf.gz -└── bactopia-runs - └── merlin- - ├── merged-results - │ ├── agrvate.tsv - │ ├── ectyper.tsv - │ ├── emmtyper.tsv - │ ├── genotyphi.tsv - │ ├── hicap.tsv - │ ├── hpsuissero.tsv - │ ├── kleborate.tsv - │ ├── legsta.tsv - │ ├── logs - │ │ └── -concat - │ │ ├── nf-merged-results.{begin,err,log,out,run,sh,trace} - │ │ └── versions.yml - │ ├── meningotype.tsv - │ ├── ngmaster.tsv - │ ├── pasty.tsv - │ ├── pbptyper.tsv - │ ├── seqsero2.tsv - │ ├── seroba.tsv - │ ├── shigatyper.tsv - │ ├── shigeifinder.tsv - │ ├── sistr.tsv - │ ├── spatyper.tsv - │ ├── ssuissero.tsv - │ ├── staphopiasccmec.tsv - │ └── stecfinder.tsv - └── nf-reports - ├── merlin-dag.dot - ├── merlin-report.html - ├── merlin-timeline.html - └── merlin-trace.txt - -``` - -:::info[Directory structure might be different] - -Depending on the options used at runtime, the `merlin` directory structure might -be different, but the output descriptions below still apply. -::: - - - -### Results - -#### Merged Results - -Below are results that are concatenated into a single file. - - -| Filename | Description | -|-------------------------------|-------------| -| agrvate.tsv | A merged TSV file with `AgrVATE` results from all samples | -| clermontyping.csv | A merged TSV file with `ClermonTyping` results from all samples | -| ectyper.tsv | A merged TSV file with `ECTyper` results from all samples | -| emmtyper.tsv | A merged TSV file with `emmtyper` results from all samples | -| genotyphi.tsv | A merged TSV file with `genotyphi` results from all samples | -| hicap.tsv | A merged TSV file with `hicap` results from all samples | -| hpsuissero.tsv | A merged TSV file with `HpsuisSero` results from all samples | -| kleborate.tsv | A merged TSV file with `Kleborate` results from all samples | -| legsta.tsv | A merged TSV file with `legsta` results from all samples | -| lissero.tsv | A merged TSV file with `LisSero` results from all samples | -| meningotype.tsv | A merged TSV file with `meningotype` results from all samples | -| ngmaster.tsv | A merged TSV file with `ngmaster` results from all samples | -| pasty.tsv | A merged TSV file with `pasty` results from all samples | -| pbptyper.tsv | A merged TSV file with `pbptyper` results from all samples | -| seqsero2.tsv | A merged TSV file with `seqsero2` results from all samples | -| seroba.tsv | A merged TSV file with `seroba` results from all samples | -| shigapass.csv | A merged CSV file with `ShigaPass` results from all samples | -| shigatyper.tsv | A merged TSV file with `ShigaTyper` results from all samples | -| shigeifinder.tsv | A merged TSV file with `ShigEiFinder` results from all samples | -| sistr.tsv | A merged TSV file with `SISTR` results from all samples | -| spatyper.tsv | A merged TSV file with `spaTyper` results from all samples | -| ssuissero.tsv | A merged TSV file with `SsuisSero` results from all samples | -| staphopiasccmec.tsv | A merged TSV file with `staphopia-sccmec` results from all samples | -| stecfinder.tsv | A merged TSV file with `stecfinder` results from all samples | - - - - - - -#### AgrVATE - -Below is a description of the _per-sample_ results from [AgrVATE](https://github.com/VishnuRaghuram94/AgrVATE). - - -| Extension | Description | -|-------------------------------|-------------| -| -agr_gp.tab | A detailed report for _agr_ kmer matches | -| -blastn_log.txt | Log files from programs called by `AgrVATE` | -| -summary.tab | A final summary report for _agr_ typing | - - - - - - -#### ClermonTyping - -Below is a description of the _per-sample_ results from [ClermonTyping](https://github.com/happykhan/ClermonTyping). - - -| Extension | Description | -|-------------------------------|-------------| -| <SAMPLE_NAME>.blast.xml | A BLAST XML file with the results of the ClermonTyping analysis | -| <SAMPLE_NAME>.html | A HTML file with the results of the ClermonTyping analysis | -| <SAMPLE_NAME>.mash.tsv | A TSV file with the Mash distances | -| <SAMPLE_NAME>.phylogroups.txt | A TSV file with the final phylogroup assignments | - - - - - - -#### ECTyper - -Below is a description of the _per-sample_ results from [ECTyper](https://github.com/phac-nml/ecoli_serotyping). - - -| Filename | Description | -|-------------------------------|-------------| -| <SAMPLE_NAME>.tsv | A tab-delimited file with `ECTyper` result, see [ECTyper - Report format](https://github.com/phac-nml/ecoli_serotyping#report-format) for details | -| blast_output_alleles.txt | Allele report generated from BLAST results | - - - - - - -#### emmtyper - -Below is a description of the _per-sample_ results from [emmtyper](https://github.com/MDU-PHL/emmtyper). - - -| Filename | Description | -|-------------------------------|-------------| -| <SAMPLE_NAME>.tsv | A tab-delimited file with `emmtyper` result, see [emmtyper - Result format](https://github.com/MDU-PHL/emmtyper#result-format) for details | - - - - - - -#### hicap - -Below is a description of the _per-sample_ results from [hicap](https://github.com/scwatts/hicap). - - -| Filename | Description | -|-------------------------------|-------------| -| <SAMPLE_NAME>.gbk | GenBank file and cap locus annotations | -| <SAMPLE_NAME>.svg | Visualization of annotated cap locus | -| <SAMPLE_NAME>.tsv | A tab-delimited file with `hicap` results | - - - - - - -#### HpsuisSero - -Below is a description of the _per-sample_ results from [HpsuisSero](https://github.com/jimmyliu1326/HpsuisSero). - - -| Filename | Description | -|-------------------------------|-------------| -| <SAMPLE_NAME>_serotyping_res.tsv | A tab-delimited file with `HpsuisSero` result | - - - - - - -#### GenoTyphi - -Below is a description of the _per-sample_ results from [GenoTyphi](https://github.com/katholt/genotyphi). A -full description of the GenoTyphi output is available at [GenoTyphi - Output](https://github.com/katholt/genotyphi/blob/main/README.md#explanation-of-columns-in-the-output) - - -| Filename | Description | -|-------------------------------|-------------| -| <SAMPLE_NAME>_predictResults.tsv | A tab-delimited file with `GenoTyphi` results | -| <SAMPLE_NAME>.csv | The output of `mykrobe predict` in comma-separated format | -| <SAMPLE_NAME>.json | The output of `mykrobe predict` in JSON format | - - - - - - -#### Kleborate - -Below is a description of the _per-sample_ results from [Kleborate](https://github.com/katholt/Kleborate). - - -| Filename | Description | -|-------------------------------|-------------| -| <SAMPLE_NAME>.results.txt | A tab-delimited file with `Kleborate` result, see [Kleborate - Example output](https://github.com/katholt/Kleborate/wiki/Tests-and-example-outputs#example-output) for more details. | - - - - - - -#### legsta - -Below is a description of the _per-sample_ results from [legsta](https://github.com/tseemann/legsta). - - -| Filename | Description | -|-------------------------------|-------------| -| <SAMPLE_NAME>.tsv | A tab-delimited file with `legsta` result, see [legsta - Output](https://github.com/tseemann/legsta#output) for more details | - - - - - - -#### LisSero - -Below is a description of the _per-sample_ results from [LisSero](https://github.com/MDU-PHL/LisSero). - - -| Filename | Description | -|-------------------------------|-------------| -| <SAMPLE_NAME>.tsv | A tab-delimited file with `LisSero` results | - - - - - - -#### Mash - -Below is a description of the _per-sample_ results from [Mash](https://github.com/marbl/Mash). - - -| Filename | Description | -|-------------------------------|-------------| -| <SAMPLE_NAME>-dist.txt | A tab-delimited file with `mash dist` results | - - - - - - -#### meningotype - -Below is a description of the _per-sample_ results from [meningotype](https://github.com/MDU-PHL/meningotype) . - - -| Filename | Description | -|-------------------------------|-------------| -| <SAMPLE_NAME>.tsv | A tab-delimited file with `meningotype` result | - - - - - - -#### ngmaster - -Below is a description of the _per-sample_ results from [ngmaster](https://github.com/MDU-PHL/ngmaster). - - -| Filename | Description | -|-------------------------------|-------------| -| <SAMPLE_NAME>.tsv | A tab-delimited file with `ngmaster` results | - - - - - - -#### pasty - -Below is a description of the _per-sample_ results from [pasty](https://github.com/rpetit3/pasty). - - -| Extension | Description | -|-------------------------------|-------------| -| .blastn.tsv | A tab-delimited file of all blast hits | -| .details.tsv | A tab-delimited file with details for each serogroup | -| .tsv | A tab-delimited file with the predicted serogroup | - - - - - - -#### pbptyper - -Below is a description of the _per-sample_ results from [pbptyper](https://github.com/rpetit3/pbptyper). - - -| Extension | Description | -|-------------------------------|-------------| -| .tblastn.tsv | A tab-delimited file of all blast hits | -| .tsv | A tab-delimited file with the predicted PBP type | - - - - - - -#### SeqSero2 - -Below is a description of the _per-sample_ results from [SeqSero2](https://github.com/denglab/SeqSero2). - - -| Filename | Description | -|-------------------------------|-------------| -| <SAMPLE_NAME>_result.tsv | A tab-delimited file with `SeqSero2` results | -| <SAMPLE_NAME>_result.txt | A text file with key-value pairs of `SeqSero2` results | - - - - - - -#### Seroba - -Below is a description of the _per-sample_ results from [Seroba](https://github.com/sanger-pathogens/seroba). -More details about the outputs are available from [Seroba - Output](https://sanger-pathogens.github.io/seroba/#output). - - -| Filename | Description | -|-------------------------------|-------------| -| <SAMPLE_NAME>.tsv | A tab-delimited file with the predicted serotype | -| detailed_serogroup_info.txt | Detailed information about the predicted results | - - - - - - -#### ShigaPass - -Below is a description of the _per-sample_ results from [ShigaPass](https://github.com/imanyass/ShigaPass). - - -| Extension | Description | -|-------------------------------|-------------| -| <SAMPLE_NAME>.csv | A CSV file with the predicted Shigella or EIEC serotype | - - - - - - -#### ShigaTyper - -Below is a description of the _per-sample_ results from [ShigaTyyper](https://github.com/CFSAN-Biostatistics/shigatyper). - - -| Filename | Description | -|-------------------------------|-------------| -| <SAMPLE_NAME>-hits.tsv | Detailed statistics about each individual gene hit | -| <SAMPLE_NAME>.tsv | The final predicted serotype by `ShigaTyper` | - - - - - - -#### ShigEiFinder - -Below is a description of the _per-sample_ results from [ShigEiFinder](https://github.com/LanLab/ShigEiFinder). - - -| Extension | Description | -|-------------------------------|-------------| -| <SAMPLE_NAME>.tsv | A tab-delimited file with the predicted Shigella or EIEC serotype | - - - - - - -#### SISTR - -Below is a description of the _per-sample_ results from [SISTR](https://github.com/phac-nml/sistr_cmd). - - -| Filename | Description | -|-------------------------------|-------------| -| <SAMPLE_NAME>-allele.fasta.gz | A FASTA file of the cgMLST allele search results | -| <SAMPLE_NAME>-allele.json.gz | JSON formated cgMLST allele search results, see [SISTR - cgMLST search results](https://github.com/phac-nml/sistr_cmd#cgmlst-allele-search-results) for more details | -| <SAMPLE_NAME>-cgmlst.csv | A comma-delimited summary of the cgMLST allele search results | -| <SAMPLE_NAME>.tsv | A tab-delimited file with `SISTR` results, see [SISTR - Primary results](https://github.com/phac-nml/sistr_cmd#primary-results-output--o-sistr-results) for more details | - - - - - - -#### spaTyper - -Below is a description of the _per-sample_ results from [spaTyper](https://github.com/HCGB-IGTP/spaTyper). - - -| Filename | Description | -|-------------------------------|-------------| -| <SAMPLE_NAME>.tsv | A tab-delimited file with `spaTyper` result | - - - - - - -#### SsuisSero - -Below is a description of the _per-sample_ results from [SsuisSero](https://github.com/jimmyliu1326/SsuisSero). - - -| Filename | Description | -|-------------------------------|-------------| -| <SAMPLE_NAME>_serotyping_res.tsv | A tab-delimited file with `SsuisSero` results | - - - - - - -#### staphopia-sccmec - -Below is a description of the _per-sample_ results from [staphopia-sccmec](https://github.com/staphopia/staphopia-sccmec). - - -| Filename | Description | -|-------------------------------|-------------| -| <SAMPLE_NAME>.tsv | A tab-delimited file with `staphopia-sccmec` results | - - - - - - -#### TBProfiler - -Below is a description of the _per-sample_ results from [TBProfiler](https://github.com/jodyphelan/TBProfiler). - - -| Filename | Description | -|-------------------------------|-------------| -| <SAMPLE_NAME>.results.csv | A CSV formated `TBProfiler` result file of resistance and strain type | -| <SAMPLE_NAME>.results.json | A JSON formated `TBProfiler` result file of resistance and strain type | -| <SAMPLE_NAME>.results.txt | A text file with `TBProfiler` results | -| <SAMPLE_NAME>.bam | BAM file with alignment details | -| <SAMPLE_NAME>.targets.csq.vcf.gz | VCF with variant info again reference genomes | - - - - - - - - -### Audit Trail - -Below are files that can assist you in understanding which parameters and program versions -were used. - -#### Logs - -Each process that is executed will have a folder named `logs`. In this folder are helpful -files for you to review if the need ever arises. - -| Extension | Description | -|--------------|-------------| -| .begin | An empty file used to designate the process started | -| .err | Contains STDERR outputs from the process | -| .log | Contains both STDERR and STDOUT outputs from the process | -| .out | Contains STDOUT outputs from the process | -| .run | The script Nextflow uses to stage/unstage files and queue processes based on given profile | -| .sh | The script executed by bash for the process | -| .trace | The Nextflow [Trace](https://www.nextflow.io/docs/latest/tracing.html#trace-report) report for the process | -| versions.yml | A YAML formatted file with program versions | - -## Parameters - - -### mashdist - - -| Parameter | Description | -|:---|---| -| ` --mash_sketch` | The reference sequence as a Mash Sketch (.msh file)
**Type:** `string` | -| ` --mash_seed` | Seed to provide to the hash function
**Type:** `integer`, **Default:** `42` | -| ` --mash_table` | Table output (fields will be blank if they do not meet the p-value threshold)
**Type:** `boolean` | -| ` --mash_m` | Minimum copies of each k-mer required to pass noise filter for reads
**Type:** `integer`, **Default:** `1` | -| ` --mash_w` | Probability threshold for warning about low k-mer size.
**Type:** `number`, **Default:** `0.01` | -| ` --max_p` | Maximum p-value to report.
**Type:** `number`, **Default:** `1.0` | -| ` --max_dist` | Maximum distance to report.
**Type:** `number`, **Default:** `1.0` | -| ` --merlin_dist` | Maximum distance to report when using Merlin .
**Type:** `number`, **Default:** `0.1` | -| ` --full_merlin` | Go full Merlin and run all species-specific tools, no matter the Mash distance
**Type:** `boolean` | -| ` --use_fastqs` | Query with FASTQs instead of the assemblies
**Type:** `boolean` | - -### AgrVATE - - -| Parameter | Description | -|:---|---| -| ` --typing_only` | agr typing only. Skips agr operon extraction and frameshift detection
**Type:** `boolean` | - -### ClermonTyping - - -| Parameter | Description | -|:---|---| -| ` --clermon_threshold` | Do not use contigs under this size
**Type:** `number` | - -### ECTyper - - -| Parameter | Description | -|:---|---| -| ` --opid` | Percent identity required for an O antigen allele match
**Type:** `integer`, **Default:** `90` | -| ` --opcov` | Minumum percent coverage required for an O antigen allele match
**Type:** `integer`, **Default:** `90` | -| ` --hpid` | Percent identity required for an H antigen allele match
**Type:** `integer`, **Default:** `95` | -| ` --hpcov` | Minumum percent coverage required for an H antigen allele match
**Type:** `integer`, **Default:** `50` | -| ` --verify` | Enable E. coli species verification
**Type:** `boolean` | -| ` --print_alleles` | Prints the allele sequences if enabled as the final column
**Type:** `boolean` | - -### emmtyper - - -| Parameter | Description | -|:---|---| -| ` --emmtyper_wf` | Workflow for emmtyper to use.
**Type:** `string`, **Default:** `blast` | -| ` --cluster_distance` | Distance between cluster of matches to consider as different clusters
**Type:** `integer`, **Default:** `500` | -| ` --percid` | Minimal percent identity of sequence
**Type:** `integer`, **Default:** `95` | -| ` --culling_limit` | Total hits to return in a position
**Type:** `integer`, **Default:** `5` | -| ` --mismatch` | Threshold for number of mismatch to allow in BLAST hit
**Type:** `integer`, **Default:** `5` | -| ` --align_diff` | Threshold for difference between alignment length and subject length in BLAST
**Type:** `integer`, **Default:** `5` | -| ` --gap` | Threshold gap to allow in BLAST hit
**Type:** `integer`, **Default:** `2` | -| ` --min_perfect` | Minimum size of perfect match at 3 primer end
**Type:** `integer`, **Default:** `15` | -| ` --min_good` | Minimum size where there must be 2 matches for each mismatch
**Type:** `integer`, **Default:** `15` | -| ` --max_size` | Maximum size of PCR product
**Type:** `integer`, **Default:** `2000` | - -### hicap - - -| Parameter | Description | -|:---|---| -| ` --database_dir` | Directory containing locus database
**Type:** `string` | -| ` --model_fp` | Path to prodigal model
**Type:** `string` | -| ` --full_sequence` | Write the full input sequence out to the genbank file rather than just the region surrounding and including the locus
**Type:** `boolean` | -| ` --hicap_debug` | hicap will print debug messages
**Type:** `boolean` | -| ` --gene_coverage` | Minimum percentage coverage to consider a single gene complete
**Type:** `number`, **Default:** `0.8` | -| ` --gene_identity` | Minimum percentage identity to consider a single gene complete
**Type:** `number`, **Default:** `0.7` | -| ` --broken_gene_length` | Minimum length to consider a broken gene
**Type:** `integer`, **Default:** `60` | -| ` --broken_gene_identity` | Minimum percentage identity to consider a broken gene
**Type:** `number`, **Default:** `0.8` | - -### GenoTyphi - - -| Parameter | Description | -|:---|---| -| ` --kmer` | K-mer length
**Type:** `integer`, **Default:** `21` | -| ` --min_depth` | Minimum depth
**Type:** `integer`, **Default:** `1` | -| ` --model` | Genotype model used.
**Type:** `string`, **Default:** `kmer_count` | -| ` --report_all_calls` | Report all calls
**Type:** `boolean` | -| ` --mykrobe_opts` | Extra Mykrobe options in quotes
**Type:** `string` | - -### Kleborate - - -| Parameter | Description | -|:---|---| -| ` --kleborate_preset` | Preset module to use for Kleborate
**Type:** `string`, **Default:** `kpsc` | -| ` --kleborate_opts` | Extra options in quotes for Kleborate
**Type:** `string` | - -### legsta - - -| Parameter | Description | -|:---|---| -| ` --noheader` | Don't print header row
**Type:** `boolean` | - -### LisSero - - -| Parameter | Description | -|:---|---| -| ` --min_id` | Minimum percent identity to accept a match
**Type:** `number`, **Default:** `95.0` | -| ` --min_cov` | Minimum coverage of the gene to accept a match
**Type:** `number`, **Default:** `95.0` | - -### meningotype -You can use these parameters to fine-tune your meningotype analysis - -| Parameter | Description | -|:---|---| -| ` --finetype` | perform porA and fetA fine typing
**Type:** `boolean` | -| ` --porB` | perform porB sequence typing (NEIS2020)
**Type:** `boolean` | -| ` --bast` | perform Bexsero antigen sequence typing (BAST)
**Type:** `boolean` | -| ` --mlst` | perform MLST
**Type:** `boolean` | -| ` --all` | perform MLST, porA, fetA, porB, BAST typing
**Type:** `boolean` | - -### ngmaster - - -| Parameter | Description | -|:---|---| -| ` --csv` | output comma-separated format (CSV) rather than tab-separated
**Type:** `boolean` | - -### pasty - - -| Parameter | Description | -|:---|---| -| ` --pasty_min_pident` | Minimum percent identity to count a hit
**Type:** `integer`, **Default:** `95` | -| ` --pasty_min_coverage` | Minimum percent coverage to count a hit
**Type:** `integer`, **Default:** `95` | - -### pbptyper - - -| Parameter | Description | -|:---|---| -| ` --pbptyper_min_pident` | Minimum percent identity to count a hit
**Type:** `integer`, **Default:** `95` | -| ` --pbptyper_min_coverage` | Minimum percent coverage to count a hit
**Type:** `integer`, **Default:** `95` | - -### SeqSero2 - - -| Parameter | Description | -|:---|---| -| ` --run_mode` | Workflow to run. 'a' allele mode, or 'k' k-mer mode
**Type:** `string`, **Default:** `k` | -| ` --input_type` | Input format to analyze. 'assembly' or 'fastq'
**Type:** `string`, **Default:** `assembly` | -| ` --bwa_mode` | Algorithms for bwa mapping for allele mode
**Type:** `string`, **Default:** `mem` | - -### SISTR - - -| Parameter | Description | -|:---|---| -| ` --full_cgmlst` | Use the full set of cgMLST alleles which can include highly similar alleles
**Type:** `boolean` | - -### spaTyper - - -| Parameter | Description | -|:---|---| -| ` --repeats` | List of spa repeats
**Type:** `string` | -| ` --repeat_order` | List spa types and order of repeats
**Type:** `string` | -| ` --do_enrich` | Do PCR product enrichment
**Type:** `boolean` | - -### staphopia-sccmec - - -| Parameter | Description | -|:---|---| -| ` --hamming` | Report the results as hamming distances
**Type:** `boolean` | - -### TBProfiler Profile - - -| Parameter | Description | -|:---|---| -| ` --call_whole_genome` | Call whole genome
**Type:** `boolean` | -| ` --mapper` | Mapping tool to use. If you are using nanopore data it will default to minimap2
**Type:** `string`, **Default:** `bwa` | -| ` --caller` | Variant calling tool to use
**Type:** `string`, **Default:** `freebayes` | -| ` --calling_params` | Extra variant caller options in quotes
**Type:** `string` | -| ` --suspect` | Use the suspect suite of tools to add ML predictions
**Type:** `boolean` | -| ` --no_flagstat` | Don't collect flagstats
**Type:** `boolean` | -| ` --no_delly` | Don't run delly
**Type:** `boolean` | -| ` --tbprofiler_opts` | Extra options in quotes for TBProfiler
**Type:** `string` | - -## Citations -If you use Bactopia and `merlin` in your analysis, please cite the following. - -- [Bactopia](https://bactopia.github.io/) - Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) - - -- [AgrVATE](https://github.com/VishnuRaghuram94/AgrVATE) - Raghuram V. [AgrVATE: Rapid identification of Staphylococcus aureus agr locus type and agr operon variants.](https://github.com/VishnuRaghuram94/AgrVATE) (GitHub) - -- [ClermontTyping](https://github.com/happykhan/ClermonTyping) - Beghain J, Bridier-Nahmias A, Le Nagard H, Denamur E, Clermont O. [ClermonTyping: an easy-to-use and accurate in silico method for Escherichia genus strain phylotyping.](https://doi.org/10.1099/mgen.0.000192) Microbial Genomics, 4(7), e000192. (2018) - -- [csvtk](https://bioinf.shenwei.me/csvtk/) - Shen, W [csvtk: A cross-platform, efficient and practical CSV/TSV toolkit in Golang.](https://github.com/shenwei356/csvtk/) (GitHub) - -- [ECTyper](https://github.com/phac-nml/ecoli_serotyping) - Laing C, Bessonov K, Sung S, La Rose C [ECTyper - In silico prediction of _Escherichia coli_ serotype](https://github.com/phac-nml/ecoli_serotyping) (GitHub) -- [emmtyper](https://github.com/MDU-PHL/emmtyper) - Tan A, Seemann T, Lacey D, Davies M, Mcintyre L, Frost H, Williamson D, Gonçalves da Silva A [emmtyper - emm Automatic Isolate Labeller](https://github.com/MDU-PHL/emmtyper) (GitHub) - -- [GenoTyphi](https://github.com/katholt/genotyphi) - Wong VK, Baker S, Connor TR, Pickard D, Page AJ, Dave J, Murphy N, Holliman R, Sefton A, Millar M, Dyson ZA, Dougan G, Holt KE, & International Typhoid Consortium. [An extended genotyping framework for Salmonella enterica serovar Typhi, the cause of human typhoid](https://doi.org/10.1038/ncomms12827) _Nature Communications_ 7, 12827. (2016) - -- [hicap](https://github.com/scwatts/hicap) - Watts SC, Holt KE [hicap: in silico serotyping of the Haemophilus influenzae capsule locus.](https://doi.org/10.1128/JCM.00190-19) _Journal of Clinical Microbiology_ JCM.00190-19 (2019) - -- [HpsuisSero](https://github.com/jimmyliu1326/HpsuisSero) - Lui J [HpsuisSero: Rapid _Haemophilus parasuis_ serotyping](https://github.com/jimmyliu1326/HpsuisSero) (GitHub) - -- [Kleborate](https://github.com/katholt/Kleborate) - Lam MMC, Wick RR, Watts, SC, Cerdeira LT, Wyres KL, Holt KE [A genomic surveillance framework and genotyping tool for Klebsiella pneumoniae and its related species complex.](https://doi.org/10.1038/s41467-021-24448-3) _Nat Commun_ 12, 4188 (2021) - -- [legsta](https://github.com/tseemann/legsta) - Seemann T [legsta: In silico Legionella pneumophila Sequence Based Typing](https://github.com/tseemann/legsta) (GitHub) - -- [LisSero](https://github.com/MDU-PHL/LisSero) - Kwong J, Zhang J, Seeman T, Horan, K, Gonçalves da Silva A [LisSero - _In silico_ serotype prediction for _Listeria monocytogenes_](https://github.com/MDU-PHL/LisSero) (GitHub) - -- [Mash](https://github.com/marbl/Mash) - Ondov BD, Treangen TJ, Melsted P, Mallonee AB, Bergman NH, Koren S, Phillippy AM [Mash: fast genome and metagenome distance estimation using MinHash](http://dx.doi.org/10.1186/s13059-016-0997-x). _Genome Biol_ 17, 132 (2016) - -- [meningotype](https://github.com/MDU-PHL/meningotype) - Kwong JC, Gonçalves da Silva A, Stinear TP, Howden BP, & Seemann T [meningotype: in silico typing for _Neisseria meningitidis_.](https://github.com/MDU-PHL/meningotype) (GitHub) - -- [Mykrobe](https://github.com/Mykrobe-tools/mykrobe) - Hunt M, Bradley P, Lapierre SG, Heys S, Thomsit M, Hall MB, Malone KM, Wintringer P, Walker TM, Cirillo DM, Comas I, Farhat MR, Fowler P, Gardy J, Ismail N, Kohl TA, Mathys V, Merker M, Niemann S, Omar SV, Sintchenko V, Smith G, Supply P, Tahseen S, Wilcox M, Arandjelovic I, Peto TEA, Crook, DW, Iqbal Z [Antibiotic resistance prediction for Mycobacterium tuberculosis from genome sequence data with Mykrobe](https://doi.org/10.12688/wellcomeopenres.15603.1) _Wellcome Open Research_ 4, 191. (2019) - -- [ngmaster](https://github.com/MDU-PHL/ngmaster) - Kwong J, Gonçalves da Silva A, Schultz M, Seeman T [ngmaster - _In silico_ multi-antigen sequence typing for _Neisseria gonorrhoeae_ (NG-MAST)](https://github.com/MDU-PHL/ngmaster) (GitHub) - -- [pasty](https://github.com/rpetit3/pasty) - Petit III RA [pasty: in silico serogrouping of _Pseudomonas aeruginosa_ isolates](https://github.com/rpetit3/pasty) (GitHub) - -- [pbptyper](https://github.com/rpetit3/pbptyper) - Petit III RA [pbptyper: In silico Penicillin Binding Protein (PBP) typer for _Streptococcus pneumoniae_ assemblies](https://github.com/rpetit3/pbptyper) (GitHub) - -- [SeqSero2](https://github.com/denglab/SeqSero2) - Zhang S, Den-Bakker HC, Li S, Dinsmore BA, Lane C, Lauer AC, Fields PI, Deng X. [SeqSero2: rapid and improved Salmonella serotype determination using whole genome sequencing data.](https://doi.org/10.1128/AEM.01746-19) _Appl Environ Microbiology_ 85(23):e01746-19 (2019) - -- [shigapass](https://github.com/imanyass/ShigaPass) - Yassine I, Hansen EE, Lefèvre S, Ruckly C, Carle I, Lejay-Collin M, Fabre L, Rafei R, Pardos de la Gandara M, Daboussi F, Shahin A, Weill FX [ShigaPass: an in silico tool predicting Shigella serotypes from whole-genome sequencing assemblies.](https://doi.org/10.1099%2Fmgen.0.000961) _Microb Genomics_ 9(3) (2023) - -- [ShigaTyper](https://github.com/CFSAN-Biostatistics/shigatyper) - Wu Y, Lau HK, Lee T, Lau DK, Payne J [In Silico Serotyping Based on Whole-Genome Sequencing Improves the Accuracy of Shigella Identification.](https://doi.org/10.1128/AEM.00165-19) *Applied and Environmental Microbiology*, 85(7). (2019) - -- [ShigEiFinder](https://github.com/LanLab/ShigEiFinder) - Zhang X, Payne M, Nguyen T, Kaur S, Lan R [Cluster-specific gene markers enhance Shigella and enteroinvasive Escherichia coli in silico serotyping.](https://doi.org/10.1099/mgen.0.000704) Microbial Genomics, 7(12). (2021) - -- [SISTR](https://github.com/phac-nml/sistr_cmd) - Yoshida CE, Kruczkiewicz P, Laing CR, Lingohr EJ, Gannon VPJ, Nash JHE, Taboada EN [The Salmonella In Silico Typing Resource (SISTR): An Open Web-Accessible Tool for Rapidly Typing and Subtyping Draft Salmonella Genome Assemblies.](https://doi.org/10.1371/journal.pone.0147101) _PloS One_, 11(1), e0147101. (2016) - -- [spaTyper](https://github.com/HCGB-IGTP/spaTyper) - Sanchez-Herrero JF, and Sullivan M [spaTyper: Staphylococcal protein A (spa) characterization pipeline](http://doi.org/10.5281/zenodo.4063625). Zenodo. (2020) - -- [SsuisSero](https://github.com/jimmyliu1326/SsuisSero) - Lui J [SsuisSero: Rapid _Streptococcus suis_ serotyping](https://github.com/jimmyliu1326/SsuisSero) (GitHub) - -- [staphopia-sccmec](https://github.com/staphopia/staphopia-sccmec) - Petit III RA, Read TD [_Staphylococcus aureus_ viewed from the perspective of 40,000+ genomes.](http://dx.doi.org/10.7717/peerj.5261) _PeerJ_ 6, e5261 (2018) - -- [TBProfiler](https://github.com/jodyphelan/TBProfiler) - Phelan JE, O’Sullivan DM, Machado D, Ramos J, Oppong YEA, Campino S, O’Grady J, McNerney R, Hibberd ML, Viveiros M, Huggett JF, Clark TG [Integrating informatics tools and portable sequencing technology for rapid detection of resistance to anti-tuberculous drugs.](https://doi.org/10.1186/s13073-019-0650-x) _Genome Med_ 11, 41 (2019) - diff --git a/docs/bactopia/qc.md b/docs/bactopia/qc.md deleted file mode 100644 index 9a3da69b..00000000 --- a/docs/bactopia/qc.md +++ /dev/null @@ -1,255 +0,0 @@ ---- -title: qc -description: A Bactopia Tool which uses a variety of tools to perform quality control on Illumina and Oxford Nanopore reads. - ---- - -The `qc` module uses a variety of tools to perform quality control on Illumina and -Oxford Nanopore reads. The tools used are: - -| Tool | Technology | Description | -|------|------------|-------------| -| [bbtools](https://jgi.doe.gov/data-and-tools/bbtools/) | Illumina | A suite of tools for manipulating reads | -| [fastp](https://github.com/OpenGene/fastp) | Illumina | A tool designed to provide fast all-in-one preprocessing for FastQ files | -| [fastqc](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/) | Illumina | A quality control tool for high throughput sequence data | -| [fastq_scan](https://github.com/rpetit3/fastq-scan) | Nanopore | A tool for quickly scanning FASTQ files | -| [lighter](https://github.com/mourisl/Lighter) | Illumina | A tool for correcting sequencing errors in Illumina reads | -| [NanoPlot](https://github.com/wdecoster/NanoPlot) | Nanopore | A tool for plotting long read sequencing data | -| [nanoq](https://github.com/esteinig/nanoq) | Nanopore | A tool for calculating quality metrics for Oxford Nanopore reads | -| [porechop](https://github.com/rrwick/Porechop) | Nanopore | A tool for removing adapters from Oxford Nanopore reads | -| [rasusa](https://github.com/mbhall88/rasusa) | Nanopore | Randomly subsample sequencing reads to a specified coverage | - -Similar to the `gather` step, the `qc` step will also stop samples that fail to meet -basic QC checks from continuing downstream. - - -## Output Overview - -Below is the default output structure for the `qc` step in Bactopia. Where -possible the file descriptions below were modified from a tools description. - -```bash - -├── -│ └── main -│ └── qc -│ ├── extra -│ ├── logs -│ │ ├── nf-qc.{begin,err,log,out,run,sh,trace} -│ │ ├── output-fastp.log -│ │ └── versions.yml -│ ├── -low-read-count-error.txt -│ ├── -low-sequence-coverage-error.txt -│ ├── -low-sequence-depth-error.txt -│ ├── {|_R1|_R2}.error-fastq.gz -│ ├── {|_R1|_R2}.fastq.gz -│ └── summary -│ ├── .fastp.{html|json} -│ ├── {|_R1|_R2}-{final|original}.json -│ └── {|_R1|_R2}-{final|original}_fastqc.{html|zip} -│ └── {|_R1|_R2}-{final|original}_NanoPlot-report.html -│ └── {|_R1|_R2}-{final|original}_NanoPlot.tar.gz -└── bactopia-runs - └── bactopia- - └── nf-reports - ├── bactopia-dag.dot - ├── bactopia-report.html - ├── bactopia-timeline.html - └── bactopia-trace.txt - -``` - - - -### Results - -#### Quality Control - -Below is a description of the _per-sample_ results from `qc` subworkflow. - - -| Filename | Description | -|-------------------------------|-------------| -| <SAMPLE_NAME>.fastq.gz | A gzipped FASTQ file containing the cleaned Illumina single-end, or Oxford Nanopore reads | -| <SAMPLE_NAME>_R{1\|2}.fastq.gz | A gzipped FASTQ file containing the cleaned Illumina paired-end reads | -| <SAMPLE_NAME>-{final\|original}.json | A JSON file containing the QC results generated by [fastq-scan](https://github.com/rpetit3/fastq-scan) | -| <SAMPLE_NAME>-{final\|original}_fastqc.html | (Illumina Only) A HTML report of the QC results generated by [fastqc](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/) | -| <SAMPLE_NAME>-{final\|original}_fastqc.zip | (Illumina Only) A zip file containing the complete set of [fastqc](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/) results | -| <SAMPLE_NAME>-{final\|original}_fastp.json | (Illumina Only) A JSON file containing the QC results generated by [fastp](https://github.com/OpenGene/fastp) | -| <SAMPLE_NAME>-{final\|original}_fastp.html | (Illumina Only) A HTML report of the QC results generated by [fastp](https://github.com/OpenGene/fastp) | -| <SAMPLE_NAME>-{final\|original}_NanoPlot-report.html | (ONT Only) A HTML report of the QC results generated by [NanoPlot](https://github.com/wdecoster/NanoPlot) | -| <SAMPLE_NAME>-{final\|original}_NanoPlot.tar.gz | (ONT Only) A tarball containing the complete set of [NanoPlot](https://github.com/wdecoster/NanoPlot) results | - - - - - - -#### Failed Quality Checks - -Built into Bactopia are few basic quality checks to help prevent downstream failures. -If a sample fails one of these checks, it will be excluded from further analysis. By -excluding these samples, complete pipeline failures are prevented. - - -| Extension | Description | -|-------------------------------|-------------| -| .error-fastq.gz | A gzipped FASTQ file of Illumina Single-End or Oxford Nanopore reads that failed QC | -| _R{1\|2}.error-fastq.gz | A gzipped FASTQ file of Illumina Single-End or Oxford Nanopore reads that failed QC | -| -low-read-count-error.txt | Sample failed read count checks and excluded from further analysis | -| -low-sequence-coverage-error.txt | Sample failed sequenced coverage checks and excluded from further analysis | -| -low-sequence-depth-error.txt | Sample failed sequenced basepair checks and excluded from further analysis | - - - -:::info[Poor samples are excluded to prevent downstream failures] -Samples that fail any of the QC checks will be excluded from further analysis. -Those samples will generate a `*-error.txt` file with the error message. Excluding -these samples prevents downstream failures that cause the whole workflow to fail. -::: - - - -
-Example Error: After QC, too few reads remain - -If after cleaning reads, a sample has less than the minimum required reads, the -sample will be excluded from further analysis. You can adjust this minimum read -count using the `--min_reads` parameter. - -__Example Text from <SAMPLE_NAME>-low-read-count-error.txt__ -_<SAMPLE_NAME> FASTQ(s) contain `X` total reads. This does not exceed the required -minimum `Y` read count. Further analysis is discontinued._ - -
- -
-Example Error: After QC, too little sequence coverage remains - -If after cleaning reads, a sample has failed to meet the minimum sequence -coverage required, the sample will be excluded from further analysis. You can -adjust this minimum read count using the `--min_coverage` parameter. - -__Note:__ This check is only performed when a genome size is available. - -__Example Text from <SAMPLE_NAME>-low-sequence-coverage-error.txt__ -_After QC, <SAMPLE_NAME> FASTQ(s) contain `X` total basepairs. This does not -exceed the required minimum `Y` bp (`Z`x coverage). Further analysis is -discontinued._ - -
- -
-Example Error: After QC, too little sequenced basepairs remain - -If after cleaning reads, a sample has failed to meet the minimum number of -sequenced basepairs, the sample will be excluded from further analysis. You can -adjust this minimum read count using the `--min_basepairs` parameter. - -__Example Text from <SAMPLE_NAME>-low-sequence-depth-error.txt__ -_<SAMPLE_NAME> FASTQ(s) contain `X` total basepairs. This does not exceed the -required minimum `Y` bp. Further analysis is discontinued._ - -
- - - - - -### Audit Trail - -Below are files that can assist you in understanding which parameters and program versions -were used. - -#### Logs - -Each process that is executed will have a folder named `logs`. In this folder are helpful -files for you to review if the need ever arises. - -| Extension | Description | -|--------------|-------------| -| .begin | An empty file used to designate the process started | -| .err | Contains STDERR outputs from the process | -| .log | Contains both STDERR and STDOUT outputs from the process | -| .out | Contains STDOUT outputs from the process | -| .run | The script Nextflow uses to stage/unstage files and queue processes based on given profile | -| .sh | The script executed by bash for the process | -| .trace | The Nextflow [Trace](https://www.nextflow.io/docs/latest/tracing.html#trace-report) report for the process | -| versions.yml | A YAML formatted file with program versions | - -## Parameters - - -### QC - - -| Parameter | Description | -|:---|---| -| ` --use_bbmap` | Illumina reads will be QC'd using BBMap
**Type:** `boolean` | -| ` --use_porechop` | Use Porechop to remove adapters from ONT reads
**Type:** `boolean` | -| ` --skip_qc` | The QC step will be skipped and it will be assumed the inputs sequences have already been QCed.
**Type:** `boolean` | -| ` --skip_qc_plots` | QC Plot creation by FastQC or Nanoplot will be skipped
**Type:** `boolean` | -| ` --skip_error_correction` | FLASH error correction of reads will be skipped.
**Type:** `boolean` | -| ` --adapters` | A FASTA file containing adapters to remove
**Type:** `string`, **Default:** `/home/robert_petit/bactopia/data/EMPTY_ADAPTERS` | -| ` --adapter_k` | Kmer length used for finding adapters.
**Type:** `integer`, **Default:** `23` | -| ` --phix` | phiX174 reference genome to remove
**Type:** `string`, **Default:** `/home/robert_petit/bactopia/data/EMPTY_PHIX` | -| ` --phix_k` | Kmer length used for finding phiX174.
**Type:** `integer`, **Default:** `31` | -| ` --ktrim` | Trim reads to remove bases matching reference kmers
**Type:** `string`, **Default:** `r` | -| ` --mink` | Look for shorter kmers at read tips down to this length, when k-trimming or masking.
**Type:** `integer`, **Default:** `11` | -| ` --hdist` | Maximum Hamming distance for ref kmers (subs only)
**Type:** `integer`, **Default:** `1` | -| ` --tpe` | When kmer right-trimming, trim both reads to the minimum length of either
**Type:** `string`, **Default:** `t` | -| ` --tbo` | Trim adapters based on where paired reads overlap
**Type:** `string`, **Default:** `t` | -| ` --qtrim` | Trim read ends to remove bases with quality below trimq.
**Type:** `string`, **Default:** `rl` | -| ` --trimq` | Regions with average quality BELOW this will be trimmed if qtrim is set to something other than f
**Type:** `integer`, **Default:** `6` | -| ` --maq` | Reads with average quality (after trimming) below this will be discarded
**Type:** `integer`, **Default:** `10` | -| ` --minlength` | Reads shorter than this after trimming will be discarded
**Type:** `integer`, **Default:** `35` | -| ` --ftm` | If positive, right-trim length to be equal to zero, modulo this number
**Type:** `integer`, **Default:** `5` | -| ` --tossjunk` | Discard reads with invalid characters as bases
**Type:** `string`, **Default:** `t` | -| ` --ain` | When detecting pair names, allow identical names
**Type:** `string`, **Default:** `f` | -| ` --qout` | PHRED offset to use for output FASTQs
**Type:** `string`, **Default:** `33` | -| ` --maxcor` | Max number of corrections within a 20bp window
**Type:** `integer`, **Default:** `1` | -| ` --sampleseed` | Set to a positive number to use as the random number generator seed for sampling
**Type:** `integer`, **Default:** `42` | -| ` --ont_minlength` | ONT Reads shorter than this will be discarded
**Type:** `integer`, **Default:** `1000` | -| ` --ont_minqual` | Minimum average read quality filter of ONT reads
**Type:** `integer` | -| ` --porechop_opts` | Extra Porechop options in quotes
**Type:** `string` | -| ` --nanoplot_opts` | Extra NanoPlot options in quotes
**Type:** `string` | -| ` --bbduk_opts` | Extra BBDuk options in quotes
**Type:** `string` | -| ` --fastp_opts` | Extra fastp options in quotes
**Type:** `string` | - -## Citations -If you use Bactopia and `qc` in your analysis, please cite the following. - -- [Bactopia](https://bactopia.github.io/) - Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) - - -- [BBTools](https://jgi.doe.gov/data-and-tools/bbtools/) - Bushnell B [BBMap short read aligner, and other bioinformatic tools.](http://sourceforge.net/projects/bbmap/) (Link) - -- [fastp](https://github.com/OpenGene/fastp) - Chen S, Zhou Y, Chen Y, and Gu J [fastp: an ultra-fast all-in-one FASTQ preprocessor.](https://doi.org/10.1093/bioinformatics/bty560) _Bioinformatics_, 34(17), i884–i890. (2018) - -- [fastq-scan](https://github.com/rpetit3/fastq-scan) - Petit III RA [fastq-scan: generate summary statistics of input FASTQ sequences.](https://github.com/rpetit3/fastq-scan) (GitHub) - -- [FastQC](https://github.com/s-andrews/FastQC) - Andrews S [FastQC: a quality control tool for high throughput sequence data.](http://www.bioinformatics.babraham.ac.uk/projects/fastqc) (WebLink) - -- [Lighter](https://github.com/mourisl/Lighter) - Song L, Florea L, Langmead B [Lighter: Fast and Memory-efficient Sequencing Error Correction without Counting](https://doi.org/10.1186/s13059-014-0509-9). _Genome Biol._ 15(11):509 (2014) - -- [NanoPlot](https://github.com/wdecoster/NanoPlot) - De Coster W, D’Hert S, Schultz DT, Cruts M, Van Broeckhoven C [NanoPack: visualizing and processing long-read sequencing data](https://doi.org/10.1093/bioinformatics/bty149) _Bioinformatics_ Volume 34, Issue 15 (2018) - -- [Nanoq](https://github.com/esteinig/nanoq) - Steinig E [Nanoq: Minimal but speedy quality control for nanopore reads in Rust](https://github.com/esteinig/nanoq) (GitHub) - -- [Pigz](https://zlib.net/pigz/) - Adler M. [pigz: A parallel implementation of gzip for modern multi-processor, multi-core machines.](https://zlib.net/pigz/) _Jet Propulsion Laboratory_ (2015) - -- [Porechop](https://github.com/rrwick/Porechop) - Wick RR, Judd LM, Gorrie CL, Holt KE. [Completing bacterial genome assemblies with multiplex MinION sequencing.](https://doi.org/10.1099/mgen.0.000132) _Microb Genom._ 3(10):e000132 (2017) - -- [Rasusa](https://github.com/mbhall88/rasusa) - Hall MB [Rasusa: Randomly subsample sequencing reads to a specified coverage.](https://doi.org/10.5281/zenodo.3731394) (2019). - diff --git a/docs/bactopia/sequence-typing/mlst.md b/docs/bactopia/sequence-typing/mlst.md deleted file mode 100644 index f9666981..00000000 --- a/docs/bactopia/sequence-typing/mlst.md +++ /dev/null @@ -1,126 +0,0 @@ ---- -title: mlst -description: A Bactopia Tool which uses mlst scan assemblies and determine the sequence type based on PubMLST schemas. ---- - -The `mlst` module uses [mlst](https://github.com/tseemann/mlst) scan assemblies and determine the sequence type. -It makes use of [PubMLST](https://pubmlst.org/) schemes and by default automatically scans each schema. To specify -a specific scheme to scan, you can provide it with `--scheme`. - - -## Output Overview - -Below is the default output structure for the `mlst` step in Bactopia. Where -possible the file descriptions below were modified from a tools description. - -```bash - -├── -│ └── tools -│ └── mlst -│ ├── .tsv -│ └── logs -│ ├── nf-mlst.{begin,err,log,out,run,sh,trace} -│ └── versions.yml -└── bactopia-runs - └── mlst- - ├── merged-results - │ ├── logs - │ │ └── mlst-concat - │ │ ├── nf-merged-results.{begin,err,log,out,run,sh,trace} - │ │ └── versions.yml - │ └── mlst.tsv - └── nf-reports - ├── mlst-dag.dot - ├── mlst-report.html - ├── mlst-timeline.html - └── mlst-trace.txt - -``` - - - -### Results - -#### Merged Results - -Below are results that are concatenated into a single file. - - -| Filename | Description | -|-------------------------------|-------------| -| mlst.tsv | A merged TSV file with `mlst` results from all samples | - - - - - - -#### mlst - -Below is a description of the _per-sample_ results from [mlst](https://github.com/tseemann/mlst). - - -| Filename | Description | -|-------------------------------|-------------| -| <SAMPLE_NAME>.tsv | A tab-delimited file with `mlst` result, see [mlst - Usage](https://github.com/tseemann/mlst#usage) for more details | - - - - - - - - -### Audit Trail - -Below are files that can assist you in understanding which parameters and program versions -were used. - -#### Logs - -Each process that is executed will have a folder named `logs`. In this folder are helpful -files for you to review if the need ever arises. - -| Extension | Description | -|--------------|-------------| -| .begin | An empty file used to designate the process started | -| .err | Contains STDERR outputs from the process | -| .log | Contains both STDERR and STDOUT outputs from the process | -| .out | Contains STDOUT outputs from the process | -| .run | The script Nextflow uses to stage/unstage files and queue processes based on given profile | -| .sh | The script executed by bash for the process | -| .trace | The Nextflow [Trace](https://www.nextflow.io/docs/latest/tracing.html#trace-report) report for the process | -| versions.yml | A YAML formatted file with program versions | - -## Parameters - - -### MLST - - -| Parameter | Description | -|:---|---| -| ` --scheme` | Don't autodetect, force this scheme on all inputs
**Type:** `string` | -| ` --minid` | Minimum DNA percent identity of full allelle to consider 'similar'
**Type:** `integer`, **Default:** `95` | -| ` --mincov` | Minimum DNA percent coverage to report partial allele at all
**Type:** `integer`, **Default:** `10` | -| ` --minscore` | Minimum score out of 100 to match a scheme
**Type:** `integer`, **Default:** `50` | -| ` --nopath` | Strip filename paths from FILE column
**Type:** `boolean` | -| ` --mlst_db` | A custom MLST database to use, either a tarball or a directory
**Type:** `string` | - -## Citations -If you use Bactopia and `mlst` in your analysis, please cite the following. - -- [Bactopia](https://bactopia.github.io/) - Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) - - -- [csvtk](https://bioinf.shenwei.me/csvtk/) - Shen, W [csvtk: A cross-platform, efficient and practical CSV/TSV toolkit in Golang.](https://github.com/shenwei356/csvtk/) (GitHub) - -- [mlst](https://github.com/tseemann/mlst) - Seemann T [mlst: scan contig files against PubMLST typing schemes](https://github.com/tseemann/mlst) (GitHub) - -- [PubMLST.org](https://pubmlst.org/) - Jolley KA, Bray JE, Maiden MCJ [Open-access bacterial population genomics: BIGSdb software, the PubMLST.org website and their applications.](http://dx.doi.org/10.12688/wellcomeopenres.14826.1) _Wellcome Open Res_ 3, 124 (2018) - diff --git a/docs/bactopia/sketcher.md b/docs/bactopia/sketcher.md deleted file mode 100644 index 61a1a1f7..00000000 --- a/docs/bactopia/sketcher.md +++ /dev/null @@ -1,117 +0,0 @@ ---- -title: sketcher -description: A Bactopia Tool which uses Mash and Sourmash to create sketches and query RefSeq and GTDB. - ---- - -The `sketcher` module uses [Mash](https://github.com/marbl/Mash) and -[Sourmash](https://github.com/dib-lab/sourmash) to create sketches and query -[RefSeq](https://www.ncbi.nlm.nih.gov/refseq/) and [GTDB](https://gtdb.ecogenomic.org/). - - -## Output Overview - -Below is the default output structure for the `sketcher` step in Bactopia. Where -possible the file descriptions below were modified from a tools description. - -```bash - -├── -│ └── main -│ └── sketcher -│ ├── logs -│ │ ├── nf-sketcher.{begin,err,log,out,run,sh,trace} -│ │ └── versions.yml -│ ├── -k{21|31}.msh -│ ├── -mash-refseq88-k21.txt -│ ├── -sourmash-gtdb-rs207-k31.txt -│ └── .sig -└── bactopia-runs - └── bactopia- - └── nf-reports - ├── bactopia-dag.dot - ├── bactopia-report.html - ├── bactopia-timeline.html - └── bactopia-trace.txt - -``` - - - -### Results - -#### sketcher - -Below is a description of the _per-sample_ results from the `sketcher` subworkflow. - - -| Filename | Description | -|-------------------------------|-------------| -| <SAMPLE_NAME>-k{21\|31}.msh | A Mash sketch of the input assembly for k=21 and k=31 | -| <SAMPLE_NAME>-mash-refseq88-k21.txt | The results of querying the Mash sketch against RefSeq88 | -| <SAMPLE_NAME>-sourmash-gtdb-rs207-k31.txt | The results of querying the Sourmash sketch against GTDB-rs207 | -| <SAMPLE_NAME>.sig | A Sourmash sketch of the input assembly for k=21, k=31, and k=51 | - - - - - - - - -### Audit Trail - -Below are files that can assist you in understanding which parameters and program versions -were used. - -#### Logs - -Each process that is executed will have a folder named `logs`. In this folder are helpful -files for you to review if the need ever arises. - -| Extension | Description | -|--------------|-------------| -| .begin | An empty file used to designate the process started | -| .err | Contains STDERR outputs from the process | -| .log | Contains both STDERR and STDOUT outputs from the process | -| .out | Contains STDOUT outputs from the process | -| .run | The script Nextflow uses to stage/unstage files and queue processes based on given profile | -| .sh | The script executed by bash for the process | -| .trace | The Nextflow [Trace](https://www.nextflow.io/docs/latest/tracing.html#trace-report) report for the process | -| versions.yml | A YAML formatted file with program versions | - -## Parameters - - -### Sketcher - - -| Parameter | Description | -|:---|---| -| ` --sketch_size` | Sketch size. Each sketch will have at most this many non-redundant min-hashes.
**Type:** `integer`, **Default:** `10000` | -| ` --sourmash_scale` | Choose number of hashes as 1 in FRACTION of input k-mers
**Type:** `integer`, **Default:** `10000` | -| ` --no_winner_take_all` | Disable winner-takes-all strategy for identity estimates
**Type:** `boolean` | -| ` --screen_i` | Minimum identity to report.
**Type:** `number`, **Default:** `0.8` | - -## Citations -If you use Bactopia and `sketcher` in your analysis, please cite the following. - -- [Bactopia](https://bactopia.github.io/) - Petit III RA, Read TD [Bactopia - a flexible pipeline for complete analysis of bacterial genomes.](https://doi.org/10.1128/mSystems.00190-20) _mSystems_ 5 (2020) - - -- [Genome Taxonomy Database](https://gtdb.ecogenomic.org/) - Parks DH, Chuvochina M, Rinke C, Mussig AJ, Chaumeil P-A, Hugenholtz P [GTDB: an ongoing census of bacterial and archaeal diversity through a phylogenetically consistent, rank normalized and complete genome-based taxonomy](https://doi.org/10.1093/nar/gkab776) _Nucleic Acids Research_ gkab776 (2021) - -- [Mash](https://github.com/marbl/Mash) - Ondov BD, Treangen TJ, Melsted P, Mallonee AB, Bergman NH, Koren S, Phillippy AM [Mash: fast genome and metagenome distance estimation using MinHash](http://dx.doi.org/10.1186/s13059-016-0997-x). _Genome Biol_ 17, 132 (2016) - -- [Mash](https://github.com/marbl/Mash) - Ondov BD, Starrett GJ, Sappington A, Kostic A, Koren S, Buck CB, Phillippy AM [Mash Screen: high-throughput sequence containment estimation for genome discovery](https://doi.org/10.1186/s13059-019-1841-x) _Genome Biol_ 20, 232 (2019) - -- [NCBI RefSeq Database](https://www.ncbi.nlm.nih.gov/refseq/) - O'Leary NA, Wright MW, Brister JR, Ciufo S, Haddad D, McVeigh R, Rajput B, Robbertse B, Smith-White B, Ako-Adjei D, Astashyn A, Badretdin A, Bao Y, Blinkova O0, Brover V, Chetvernin V, Choi J, Cox E, Ermolaeva O, Farrell CM, Goldfarb T, Gupta T, Haft D, Hatcher E, Hlavina W, Joardar VS, Kodali VK, Li W, Maglott D, Masterson P, McGarvey KM, Murphy MR, O'Neill K, Pujar S, Rangwala SH, Rausch D, Riddick LD, Schoch C, Shkeda A, Storz SS, Sun H, Thibaud-Nissen F, Tolstoy I, Tully RE, Vatsan AR, Wallin C, Webb D, Wu W, Landrum MJ, Kimchi A, Tatusova T, DiCuccio M, Kitts P, Murphy TD, Pruitt KD [Reference sequence (RefSeq) database at NCBI: current status, taxonomic expansion, and functional annotation.](https://doi.org/10.1093/nar/gkv1189) _Nucleic Acids Res._ 44, D733–45 (2016) - -- [Sourmash](https://github.com/dib-lab/sourmash) - Brown CT, Irber L [sourmash: a library for MinHash sketching of DNA](http://dx.doi.org/10.21105/joss.00027). _JOSS_ 1, 27 (2016) - diff --git a/docs/beginners-guide.md b/docs/beginners-guide.md index 683f44f0..31a2b275 100644 --- a/docs/beginners-guide.md +++ b/docs/beginners-guide.md @@ -2,6 +2,7 @@ title: Beginner's Guide description: >- A beginner's guide to getting started using Bactopia to process your bacterial genomes. +sidebar_position: 3 --- Bactopia is a complete pipeline for the analysis of bacterial genomes, which includes @@ -23,7 +24,7 @@ Towards the end of this guide, we'll also take a look at some helpful parameters are interested in learning more about the full set of parameters available in Bactopia, please check out the [Full Guide](full-guide.md) section. -![Bactopia Workflow](assets/bactopia-workflow.png) +![Bactopia Workflow](/img/bactopia-workflow.png) ## Gathering Inputs @@ -172,7 +173,7 @@ bactopia \ Using [Unicycler](https://github.com/rrwick/Unicycler) (`--hybrid`) to create a hybrid assembly works great when you have low-coverage noisy long-reads. However, if you are using recent ONT sequencing, you likely have high-coverage and using the `--short_polish` -method is going to yeild better results (_and be faster!_) than `--hybrid`. +method is going to yield better results (_and be faster!_) than `--hybrid`. ::: @@ -225,7 +226,7 @@ bactopia \ :::info[Why only Experiment accessions?] In the grand scheme of accession hierarchies, Experiment accessions are really the only unique ones. For example, a multiple Run accessions can be associated with a single -Experiment accession. Or, multiple Exeriment accessions can be associated with a single +Experiment accession. Or, multiple Experiment accessions can be associated with a single BioSample accession. So, by using Experiment accessions, you can be confident you are getting only the sequences associated with that "_unique_" Experiment. ::: @@ -342,7 +343,7 @@ While manually creating the necessary FOFN is possible, it's not recommended. It bit tedious and error-prone, so please avoid manually creating your FOFN. Instead, use `bactopia prepare` to help accurately generate a FOFN for your samples. -When Bactopia recieves a FOFN, the first thing Bactopia does is verify all input files are +When Bactopia receives a FOFN, the first thing Bactopia does is verify all input files are found and compressed using Gzip. If everything checks out, each sample will then be processed, otherwise a list of samples with errors will be output to STDERR. @@ -516,7 +517,7 @@ by [Submitting and Issue on GitHub](https://github.com/bactopia/bactopia/issues/ ### Accessions -If you started from the top, and made it this far I commend you! Eitherway, above you +If you started from the top, and made it this far I commend you! Either way, above you learned you could use `--accession` to download FASTQs from ENA/SRA or assemblies from NCBI Assembly. Then you just learned you could use `--samples` to process as many samples as you want. So, it only makes sense that there would be a complement to `--samples` for @@ -540,7 +541,7 @@ Let's take a look at `bactopia search` and how it can help you. #### bactopia search `bactopia search` has been made to help assist in generating a list of Experiment accessions -to be procesed by Bactopia (via `--accessions`). You can provide a Taxon ID (e.g. 1280), a +to be processed by Bactopia (via `--accessions`). You can provide a Taxon ID (e.g. 1280), a organism name (e.g. Staphylococcus aureus), a Study accession (e.g. PRJNA480016), a BioSample accession (e.g. SAMN01737350), or a Run accession (e.g. SRR578340). This value is then queried against ENA's [Data Warehouse API](https://www.ebi.ac.uk/ena/browse/search-rest)), @@ -567,7 +568,7 @@ to just 5 Experiment accessions. Then multiple files are produced: | Extension | Description | |-------------------|--------------------------------------------------------------------------------| -| `-metadata.txt` | A tab-delimted file of all results from the query | +| `-metadata.txt` | A tab-delimited file of all results from the query | | `-accessions.txt` | A list of Experiment accessions to be processed | | `-filtered.txt` | A list of any Experiment accessions that were filtered out, otherwise an empty | | `-search.txt` | A summary of the completed request | @@ -790,14 +791,14 @@ Please check the following links for adjustments you be interested in making. |---------|-------------| | [env](https://www.nextflow.io/docs/latest/config.html#scope-env) | Set any environment variables that might be required | | [params](https://www.nextflow.io/docs/latest/config.html#scope-params) | Change the default values of command line arguments | -| [process](https://www.nextflow.io/docs/latest/config.html#scope-process) | Adjust perprocess configurations such as containers, conda envs, or resource usage | +| [process](https://www.nextflow.io/docs/latest/config.html#scope-process) | Adjust per-process configurations such as containers, conda envs, or resource usage | | [profile](https://www.nextflow.io/docs/latest/config.html#config-profiles) | Create predefined profiles for your [Executor](https://www.nextflow.io/docs/latest/operator.html#filtering-operators) | There are [many other scopes](https://www.nextflow.io/docs/latest/config.html#config-scopes) that you might be interested in checking out. You are most like going to want to create a custom profile. By doing so you can specify it -at runtime (`-profile myProfile`) and Nextflow will be excuted based on that profile. Often +at runtime (`-profile myProfile`) and Nextflow will be executed based on that profile. Often times your custom profile will include information on the executor (queues, allocations, paths, etc...). If you need help please [reach out](https://github.com/bactopia/bactopia/issues/new/choose)! @@ -812,4 +813,4 @@ successfully, the appropriate results are pulled out and placed in the sample's The `work` directory can grow very large very quickly! Please keep this in mind when using Bactopia (_and other Nextflow pipelines_). To help prevent the build up of the `work` directory you can use `--cleanup_workdir` to automatically delete the `work` directory after -a successful run. \ No newline at end of file +a successful run. diff --git a/docs/blog/.authors.yml b/docs/blog/.authors.yml deleted file mode 100644 index 2d27790e..00000000 --- a/docs/blog/.authors.yml +++ /dev/null @@ -1,7 +0,0 @@ -authors: - rpetit3: - name: Robert A. Petit III - description: The primary developer of Bactopia - avatar: https://avatars.githubusercontent.com/u/5334269 - slug: rpetit3 - url: https://www.robertpetit.com/ diff --git a/docs/blog/index.md b/docs/blog/index.md deleted file mode 100644 index a0f2ec00..00000000 --- a/docs/blog/index.md +++ /dev/null @@ -1,4 +0,0 @@ ---- -title: Bactopia Blog -description: A blog about Bactopia, including tutorials, updates, paper reviews, and more. ---- diff --git a/docs/changelog.md b/docs/changelog.md index 6a879bff..5d1d9a97 100644 --- a/docs/changelog.md +++ b/docs/changelog.md @@ -1,8 +1,8 @@ --- -title: changelog +title: Changelog description: A full list of Bactopia releases and a description of the changes. +sidebar_position: 50 --- -# Changelog ## v3.1.0 bactopia/bactopia "Dance Powder" 2024/09/22 diff --git a/docs/custom/extra.css b/docs/custom/extra.css deleted file mode 100644 index 4616d516..00000000 --- a/docs/custom/extra.css +++ /dev/null @@ -1,58 +0,0 @@ -.zoom { - cursor: zoom-in !important; -} - -.fa-xl { - padding-right: 10px !important; -} - -.fa-lg { - padding-right: 10px !important; -} - -.mfp-img { - cursor: zoom-out; -} - -/* overlay at start */ -.mfp-fade.mfp-bg { - opacity: 0; - - -webkit-transition: all 0.15s ease-out; - -moz-transition: all 0.15s ease-out; - transition: all 0.15s ease-out; - } -/* overlay animate in */ -.mfp-fade.mfp-bg.mfp-ready { -opacity: 0.8; -} -/* overlay animate out */ -.mfp-fade.mfp-bg.mfp-removing { -opacity: 0; -} - -/* content at start */ -.mfp-fade.mfp-wrap .mfp-content { -opacity: 0; - --webkit-transition: all 0.15s ease-out; --moz-transition: all 0.15s ease-out; -transition: all 0.15s ease-out; -} -/* content animate it */ -.mfp-fade.mfp-wrap.mfp-ready .mfp-content { -opacity: 1; -} -/* content animate out */ -.mfp-fade.mfp-wrap.mfp-removing .mfp-content { -opacity: 0; -} - -/* make table 100% width */ -.md-typeset__table { - min-width: 100%; -} - -.md-typeset table:not([class]) { - display: table; -} diff --git a/docs/custom/extra.js b/docs/custom/extra.js deleted file mode 100644 index ef7baf43..00000000 --- a/docs/custom/extra.js +++ /dev/null @@ -1,9 +0,0 @@ -$(document).ready(function() { - $('.zoom').magnificPopup({ - type:'image', - removalDelay: 600, - mainClass: 'mfp-fade', - closeOnContentClick: true, - showCloseBtn: false - }); -}); diff --git a/docs/custom/fontawesome.js b/docs/custom/fontawesome.js deleted file mode 100644 index 49713de6..00000000 --- a/docs/custom/fontawesome.js +++ /dev/null @@ -1,2 +0,0 @@ -window.FontAwesomeKitConfig = {"asyncLoading":{"enabled":false},"autoA11y":{"enabled":true},"baseUrl":"https://ka-f.fontawesome.com","baseUrlKit":"https://kit.fontawesome.com","detectConflictsUntil":null,"iconUploads":{},"id":51334793,"license":"free","method":"css","minify":{"enabled":true},"token":"a28b3e0d7d","v4FontFaceShim":{"enabled":true},"v4shim":{"enabled":true},"v5FontFaceShim":{"enabled":true},"version":"6.4.2"}; -!function(t){"function"==typeof define&&define.amd?define("kit-loader",t):t()}((function(){"use strict";function t(t,e){var n=Object.keys(t);if(Object.getOwnPropertySymbols){var r=Object.getOwnPropertySymbols(t);e&&(r=r.filter((function(e){return Object.getOwnPropertyDescriptor(t,e).enumerable}))),n.push.apply(n,r)}return n}function e(e){for(var n=1;nt.length)&&(e=t.length);for(var n=0,r=new Array(e);n-1}(t)&&("URLSearchParams"in window?(i=new URL(t)).searchParams.set("token",o):i=i+"?token="+encodeURIComponent(o)),i=i.toString(),new E((function(t,e){if("function"==typeof n)n(i,{mode:"cors",cache:"default"}).then((function(t){if(t.ok)return t.text();throw new Error("")})).then((function(e){t(e)})).catch(e);else if("function"==typeof r){var o=new r;o.addEventListener("loadend",(function(){this.responseText?t(this.responseText):e(new Error(""))}));["abort","error","timeout"].map((function(t){o.addEventListener(t,(function(){e(new Error(""))}))})),o.open("GET",i),o.send()}else{e(new Error(""))}}))}function _(t,e,n){var r=t;return[[/(url\("?)\.\.\/\.\.\/\.\./g,function(t,n){return"".concat(n).concat(e)}],[/(url\("?)\.\.\/webfonts/g,function(t,r){return"".concat(r).concat(e,"/releases/v").concat(n,"/webfonts")}],[/(url\("?)https:\/\/kit-free([^.])*\.fontawesome\.com/g,function(t,n){return"".concat(n).concat(e)}]].forEach((function(t){var e=o(t,2),n=e[0],i=e[1];r=r.replace(n,i)})),r}function F(t,n){var r=arguments.length>2&&void 0!==arguments[2]?arguments[2]:function(){},o=n.document||o,i=a.bind(a,o,["fa","fab","fas","far","fal","fad","fak"]);t.autoA11y.enabled&&r(i);var u=t.subsetPath&&t.baseUrl+"/"+t.subsetPath,f=[{id:"fa-main",addOn:void 0,url:u}];if(t.v4shim&&t.v4shim.enabled&&f.push({id:"fa-v4-shims",addOn:"-v4-shims"}),t.v5FontFaceShim&&t.v5FontFaceShim.enabled&&f.push({id:"fa-v5-font-face",addOn:"-v5-font-face"}),t.v4FontFaceShim&&t.v4FontFaceShim.enabled&&f.push({id:"fa-v4-font-face",addOn:"-v4-font-face"}),!u&&t.customIconsCssPath){var s=t.customIconsCssPath.indexOf("kit-upload.css")>-1?t.baseUrlKit:t.baseUrl,l=s+"/"+t.customIconsCssPath;f.push({id:"fa-kit-upload",url:l})}var d=f.map((function(r){return new E((function(o,i){var a=r.url||c(t,{addOn:r.addOn,minify:t.minify.enabled}),u={id:r.id},f=t.subset?u:e(e(e({},n),u),{},{baseUrl:t.baseUrl,version:t.version,id:r.id,contentFilter:function(t,e){return _(t,e.baseUrl,e.version)}});P(a,n).then((function(t){o(C(t,f))})).catch(i)}))}));return E.all(d)}function C(t,e){var n=e.contentFilter||function(t,e){return t},r=document.createElement("style"),o=document.createTextNode(n(t,e));return r.appendChild(o),r.media="all",e.id&&r.setAttribute("id",e.id),e&&e.detectingConflicts&&e.detectionIgnoreAttr&&r.setAttributeNode(document.createAttribute(e.detectionIgnoreAttr)),r}function I(t,n){n.autoA11y=t.autoA11y.enabled,"pro"===t.license&&(n.autoFetchSvg=!0,n.fetchSvgFrom=t.baseUrl+"/releases/"+("latest"===t.version?"latest":"v".concat(t.version))+"/svgs",n.fetchUploadedSvgFrom=t.uploadsUrl);var r=[];return t.v4shim.enabled&&r.push(new E((function(r,o){P(c(t,{addOn:"-v4-shims",minify:t.minify.enabled}),n).then((function(t){r(U(t,e(e({},n),{},{id:"fa-v4-shims"})))})).catch(o)}))),r.push(new E((function(r,o){P(t.subsetPath&&t.baseUrl+"/"+t.subsetPath||c(t,{minify:t.minify.enabled}),n).then((function(t){var o=U(t,e(e({},n),{},{id:"fa-main"}));r(function(t,e){var n=e&&void 0!==e.autoFetchSvg?e.autoFetchSvg:void 0,r=e&&void 0!==e.autoA11y?e.autoA11y:void 0;void 0!==r&&t.setAttribute("data-auto-a11y",r?"true":"false");n&&(t.setAttributeNode(document.createAttribute("data-auto-fetch-svg")),t.setAttribute("data-fetch-svg-from",e.fetchSvgFrom),t.setAttribute("data-fetch-uploaded-svg-from",e.fetchUploadedSvgFrom));return t}(o,n))})).catch(o)}))),E.all(r)}function U(t,e){var n=document.createElement("SCRIPT"),r=document.createTextNode(t);return n.appendChild(r),n.referrerPolicy="strict-origin",e.id&&n.setAttribute("id",e.id),e&&e.detectingConflicts&&e.detectionIgnoreAttr&&n.setAttributeNode(document.createAttribute(e.detectionIgnoreAttr)),n}function T(t){var e,n=[],r=document,o=r.documentElement.doScroll,i=(o?/^loaded|^c/:/^loaded|^i|^c/).test(r.readyState);i||r.addEventListener("DOMContentLoaded",e=function(){for(r.removeEventListener("DOMContentLoaded",e),i=1;e=n.shift();)e()}),i?setTimeout(t,0):n.push(t)}function L(t){"undefined"!=typeof MutationObserver&&new MutationObserver(t).observe(document,{childList:!0,subtree:!0})}try{if(window.FontAwesomeKitConfig){var k=window.FontAwesomeKitConfig,x={detectingConflicts:k.detectConflictsUntil&&new Date<=new Date(k.detectConflictsUntil),detectionIgnoreAttr:"data-fa-detection-ignore",fetch:window.fetch,token:k.token,XMLHttpRequest:window.XMLHttpRequest,document:document},M=document.currentScript,N=M?M.parentElement:document.head;(function(){var t=arguments.length>0&&void 0!==arguments[0]?arguments[0]:{},e=arguments.length>1&&void 0!==arguments[1]?arguments[1]:{};return"js"===t.method?I(t,e):"css"===t.method?F(t,e,(function(t){T(t),L(t)})):void 0})(k,x).then((function(t){t.map((function(t){try{N.insertBefore(t,M?M.nextSibling:null)}catch(e){N.appendChild(t)}})),x.detectingConflicts&&M&&T((function(){M.setAttributeNode(document.createAttribute(x.detectionIgnoreAttr));var t=function(t,e){var n=document.createElement("script");return e&&e.detectionIgnoreAttr&&n.setAttributeNode(document.createAttribute(e.detectionIgnoreAttr)),n.src=c(t,{baseFilename:"conflict-detection",fileSuffix:"js",subdir:"js",minify:t.minify.enabled}),n}(k,x);document.body.appendChild(t)}))})).catch((function(t){console.error("".concat("Font Awesome Kit:"," ").concat(t))}))}}catch(t){console.error("".concat("Font Awesome Kit:"," ").concat(t))}})); diff --git a/docs/custom/jquery.magnific-popup.min.js b/docs/custom/jquery.magnific-popup.min.js deleted file mode 100644 index 6ee3a3bd..00000000 --- a/docs/custom/jquery.magnific-popup.min.js +++ /dev/null @@ -1,4 +0,0 @@ -/*! Magnific Popup - v1.1.0 - 2016-02-20 -* http://dimsemenov.com/plugins/magnific-popup/ -* Copyright (c) 2016 Dmitry Semenov; */ -!function(a){"function"==typeof define&&define.amd?define(["jquery"],a):a("object"==typeof exports?require("jquery"):window.jQuery||window.Zepto)}(function(a){var b,c,d,e,f,g,h="Close",i="BeforeClose",j="AfterClose",k="BeforeAppend",l="MarkupParse",m="Open",n="Change",o="mfp",p="."+o,q="mfp-ready",r="mfp-removing",s="mfp-prevent-close",t=function(){},u=!!window.jQuery,v=a(window),w=function(a,c){b.ev.on(o+a+p,c)},x=function(b,c,d,e){var f=document.createElement("div");return f.className="mfp-"+b,d&&(f.innerHTML=d),e?c&&c.appendChild(f):(f=a(f),c&&f.appendTo(c)),f},y=function(c,d){b.ev.triggerHandler(o+c,d),b.st.callbacks&&(c=c.charAt(0).toLowerCase()+c.slice(1),b.st.callbacks[c]&&b.st.callbacks[c].apply(b,a.isArray(d)?d:[d]))},z=function(c){return c===g&&b.currTemplate.closeBtn||(b.currTemplate.closeBtn=a(b.st.closeMarkup.replace("%title%",b.st.tClose)),g=c),b.currTemplate.closeBtn},A=function(){a.magnificPopup.instance||(b=new t,b.init(),a.magnificPopup.instance=b)},B=function(){var a=document.createElement("p").style,b=["ms","O","Moz","Webkit"];if(void 0!==a.transition)return!0;for(;b.length;)if(b.pop()+"Transition"in a)return!0;return!1};t.prototype={constructor:t,init:function(){var c=navigator.appVersion;b.isLowIE=b.isIE8=document.all&&!document.addEventListener,b.isAndroid=/android/gi.test(c),b.isIOS=/iphone|ipad|ipod/gi.test(c),b.supportsTransition=B(),b.probablyMobile=b.isAndroid||b.isIOS||/(Opera Mini)|Kindle|webOS|BlackBerry|(Opera Mobi)|(Windows Phone)|IEMobile/i.test(navigator.userAgent),d=a(document),b.popupsCache={}},open:function(c){var e;if(c.isObj===!1){b.items=c.items.toArray(),b.index=0;var g,h=c.items;for(e=0;e(a||v.height())},_setFocus:function(){(b.st.focus?b.content.find(b.st.focus).eq(0):b.wrap).focus()},_onFocusIn:function(c){return c.target===b.wrap[0]||a.contains(b.wrap[0],c.target)?void 0:(b._setFocus(),!1)},_parseMarkup:function(b,c,d){var e;d.data&&(c=a.extend(d.data,c)),y(l,[b,c,d]),a.each(c,function(c,d){if(void 0===d||d===!1)return!0;if(e=c.split("_"),e.length>1){var f=b.find(p+"-"+e[0]);if(f.length>0){var g=e[1];"replaceWith"===g?f[0]!==d[0]&&f.replaceWith(d):"img"===g?f.is("img")?f.attr("src",d):f.replaceWith(a("").attr("src",d).attr("class",f.attr("class"))):f.attr(e[1],d)}}else b.find(p+"-"+c).html(d)})},_getScrollbarSize:function(){if(void 0===b.scrollbarSize){var a=document.createElement("div");a.style.cssText="width: 99px; height: 99px; overflow: scroll; position: absolute; top: -9999px;",document.body.appendChild(a),b.scrollbarSize=a.offsetWidth-a.clientWidth,document.body.removeChild(a)}return b.scrollbarSize}},a.magnificPopup={instance:null,proto:t.prototype,modules:[],open:function(b,c){return A(),b=b?a.extend(!0,{},b):{},b.isObj=!0,b.index=c||0,this.instance.open(b)},close:function(){return a.magnificPopup.instance&&a.magnificPopup.instance.close()},registerModule:function(b,c){c.options&&(a.magnificPopup.defaults[b]=c.options),a.extend(this.proto,c.proto),this.modules.push(b)},defaults:{disableOn:0,key:null,midClick:!1,mainClass:"",preloader:!0,focus:"",closeOnContentClick:!1,closeOnBgClick:!0,closeBtnInside:!0,showCloseBtn:!0,enableEscapeKey:!0,modal:!1,alignTop:!1,removalDelay:0,prependTo:null,fixedContentPos:"auto",fixedBgPos:"auto",overflowY:"auto",closeMarkup:'',tClose:"Close (Esc)",tLoading:"Loading...",autoFocusLast:!0}},a.fn.magnificPopup=function(c){A();var d=a(this);if("string"==typeof c)if("open"===c){var e,f=u?d.data("magnificPopup"):d[0].magnificPopup,g=parseInt(arguments[1],10)||0;f.items?e=f.items[g]:(e=d,f.delegate&&(e=e.find(f.delegate)),e=e.eq(g)),b._openClick({mfpEl:e},d,f)}else b.isOpen&&b[c].apply(b,Array.prototype.slice.call(arguments,1));else c=a.extend(!0,{},c),u?d.data("magnificPopup",c):d[0].magnificPopup=c,b.addGroup(d,c);return d};var C,D,E,F="inline",G=function(){E&&(D.after(E.addClass(C)).detach(),E=null)};a.magnificPopup.registerModule(F,{options:{hiddenClass:"hide",markup:"",tNotFound:"Content not found"},proto:{initInline:function(){b.types.push(F),w(h+"."+F,function(){G()})},getInline:function(c,d){if(G(),c.src){var e=b.st.inline,f=a(c.src);if(f.length){var g=f[0].parentNode;g&&g.tagName&&(D||(C=e.hiddenClass,D=x(C),C="mfp-"+C),E=f.after(D).detach().removeClass(C)),b.updateStatus("ready")}else b.updateStatus("error",e.tNotFound),f=a("
");return c.inlineElement=f,f}return b.updateStatus("ready"),b._parseMarkup(d,{},c),d}}});var H,I="ajax",J=function(){H&&a(document.body).removeClass(H)},K=function(){J(),b.req&&b.req.abort()};a.magnificPopup.registerModule(I,{options:{settings:null,cursor:"mfp-ajax-cur",tError:'The content could not be loaded.'},proto:{initAjax:function(){b.types.push(I),H=b.st.ajax.cursor,w(h+"."+I,K),w("BeforeChange."+I,K)},getAjax:function(c){H&&a(document.body).addClass(H),b.updateStatus("loading");var d=a.extend({url:c.src,success:function(d,e,f){var g={data:d,xhr:f};y("ParseAjax",g),b.appendContent(a(g.data),I),c.finished=!0,J(),b._setFocus(),setTimeout(function(){b.wrap.addClass(q)},16),b.updateStatus("ready"),y("AjaxContentAdded")},error:function(){J(),c.finished=c.loadError=!0,b.updateStatus("error",b.st.ajax.tError.replace("%url%",c.src))}},b.st.ajax.settings);return b.req=a.ajax(d),""}}});var L,M=function(c){if(c.data&&void 0!==c.data.title)return c.data.title;var d=b.st.image.titleSrc;if(d){if(a.isFunction(d))return d.call(b,c);if(c.el)return c.el.attr(d)||""}return""};a.magnificPopup.registerModule("image",{options:{markup:'
',cursor:"mfp-zoom-out-cur",titleSrc:"title",verticalFit:!0,tError:'The image could not be loaded.'},proto:{initImage:function(){var c=b.st.image,d=".image";b.types.push("image"),w(m+d,function(){"image"===b.currItem.type&&c.cursor&&a(document.body).addClass(c.cursor)}),w(h+d,function(){c.cursor&&a(document.body).removeClass(c.cursor),v.off("resize"+p)}),w("Resize"+d,b.resizeImage),b.isLowIE&&w("AfterChange",b.resizeImage)},resizeImage:function(){var a=b.currItem;if(a&&a.img&&b.st.image.verticalFit){var c=0;b.isLowIE&&(c=parseInt(a.img.css("padding-top"),10)+parseInt(a.img.css("padding-bottom"),10)),a.img.css("max-height",b.wH-c)}},_onImageHasSize:function(a){a.img&&(a.hasSize=!0,L&&clearInterval(L),a.isCheckingImgSize=!1,y("ImageHasSize",a),a.imgHidden&&(b.content&&b.content.removeClass("mfp-loading"),a.imgHidden=!1))},findImageSize:function(a){var c=0,d=a.img[0],e=function(f){L&&clearInterval(L),L=setInterval(function(){return d.naturalWidth>0?void b._onImageHasSize(a):(c>200&&clearInterval(L),c++,void(3===c?e(10):40===c?e(50):100===c&&e(500)))},f)};e(1)},getImage:function(c,d){var e=0,f=function(){c&&(c.img[0].complete?(c.img.off(".mfploader"),c===b.currItem&&(b._onImageHasSize(c),b.updateStatus("ready")),c.hasSize=!0,c.loaded=!0,y("ImageLoadComplete")):(e++,200>e?setTimeout(f,100):g()))},g=function(){c&&(c.img.off(".mfploader"),c===b.currItem&&(b._onImageHasSize(c),b.updateStatus("error",h.tError.replace("%url%",c.src))),c.hasSize=!0,c.loaded=!0,c.loadError=!0)},h=b.st.image,i=d.find(".mfp-img");if(i.length){var j=document.createElement("img");j.className="mfp-img",c.el&&c.el.find("img").length&&(j.alt=c.el.find("img").attr("alt")),c.img=a(j).on("load.mfploader",f).on("error.mfploader",g),j.src=c.src,i.is("img")&&(c.img=c.img.clone()),j=c.img[0],j.naturalWidth>0?c.hasSize=!0:j.width||(c.hasSize=!1)}return b._parseMarkup(d,{title:M(c),img_replaceWith:c.img},c),b.resizeImage(),c.hasSize?(L&&clearInterval(L),c.loadError?(d.addClass("mfp-loading"),b.updateStatus("error",h.tError.replace("%url%",c.src))):(d.removeClass("mfp-loading"),b.updateStatus("ready")),d):(b.updateStatus("loading"),c.loading=!0,c.hasSize||(c.imgHidden=!0,d.addClass("mfp-loading"),b.findImageSize(c)),d)}}});var N,O=function(){return void 0===N&&(N=void 0!==document.createElement("p").style.MozTransform),N};a.magnificPopup.registerModule("zoom",{options:{enabled:!1,easing:"ease-in-out",duration:300,opener:function(a){return a.is("img")?a:a.find("img")}},proto:{initZoom:function(){var a,c=b.st.zoom,d=".zoom";if(c.enabled&&b.supportsTransition){var e,f,g=c.duration,j=function(a){var b=a.clone().removeAttr("style").removeAttr("class").addClass("mfp-animated-image"),d="all "+c.duration/1e3+"s "+c.easing,e={position:"fixed",zIndex:9999,left:0,top:0,"-webkit-backface-visibility":"hidden"},f="transition";return e["-webkit-"+f]=e["-moz-"+f]=e["-o-"+f]=e[f]=d,b.css(e),b},k=function(){b.content.css("visibility","visible")};w("BuildControls"+d,function(){if(b._allowZoom()){if(clearTimeout(e),b.content.css("visibility","hidden"),a=b._getItemToZoom(),!a)return void k();f=j(a),f.css(b._getOffset()),b.wrap.append(f),e=setTimeout(function(){f.css(b._getOffset(!0)),e=setTimeout(function(){k(),setTimeout(function(){f.remove(),a=f=null,y("ZoomAnimationEnded")},16)},g)},16)}}),w(i+d,function(){if(b._allowZoom()){if(clearTimeout(e),b.st.removalDelay=g,!a){if(a=b._getItemToZoom(),!a)return;f=j(a)}f.css(b._getOffset(!0)),b.wrap.append(f),b.content.css("visibility","hidden"),setTimeout(function(){f.css(b._getOffset())},16)}}),w(h+d,function(){b._allowZoom()&&(k(),f&&f.remove(),a=null)})}},_allowZoom:function(){return"image"===b.currItem.type},_getItemToZoom:function(){return b.currItem.hasSize?b.currItem.img:!1},_getOffset:function(c){var d;d=c?b.currItem.img:b.st.zoom.opener(b.currItem.el||b.currItem);var e=d.offset(),f=parseInt(d.css("padding-top"),10),g=parseInt(d.css("padding-bottom"),10);e.top-=a(window).scrollTop()-f;var h={width:d.width(),height:(u?d.innerHeight():d[0].offsetHeight)-g-f};return O()?h["-moz-transform"]=h.transform="translate("+e.left+"px,"+e.top+"px)":(h.left=e.left,h.top=e.top),h}}});var P="iframe",Q="//about:blank",R=function(a){if(b.currTemplate[P]){var c=b.currTemplate[P].find("iframe");c.length&&(a||(c[0].src=Q),b.isIE8&&c.css("display",a?"block":"none"))}};a.magnificPopup.registerModule(P,{options:{markup:'
',srcAction:"iframe_src",patterns:{youtube:{index:"youtube.com",id:"v=",src:"//www.youtube.com/embed/%id%?autoplay=1"},vimeo:{index:"vimeo.com/",id:"/",src:"//player.vimeo.com/video/%id%?autoplay=1"},gmaps:{index:"//maps.google.",src:"%id%&output=embed"}}},proto:{initIframe:function(){b.types.push(P),w("BeforeChange",function(a,b,c){b!==c&&(b===P?R():c===P&&R(!0))}),w(h+"."+P,function(){R()})},getIframe:function(c,d){var e=c.src,f=b.st.iframe;a.each(f.patterns,function(){return e.indexOf(this.index)>-1?(this.id&&(e="string"==typeof this.id?e.substr(e.lastIndexOf(this.id)+this.id.length,e.length):this.id.call(this,e)),e=this.src.replace("%id%",e),!1):void 0});var g={};return f.srcAction&&(g[f.srcAction]=e),b._parseMarkup(d,g,c),b.updateStatus("ready"),d}}});var S=function(a){var c=b.items.length;return a>c-1?a-c:0>a?c+a:a},T=function(a,b,c){return a.replace(/%curr%/gi,b+1).replace(/%total%/gi,c)};a.magnificPopup.registerModule("gallery",{options:{enabled:!1,arrowMarkup:'',preload:[0,2],navigateByImgClick:!0,arrows:!0,tPrev:"Previous (Left arrow key)",tNext:"Next (Right arrow key)",tCounter:"%curr% of %total%"},proto:{initGallery:function(){var c=b.st.gallery,e=".mfp-gallery";return b.direction=!0,c&&c.enabled?(f+=" mfp-gallery",w(m+e,function(){c.navigateByImgClick&&b.wrap.on("click"+e,".mfp-img",function(){return b.items.length>1?(b.next(),!1):void 0}),d.on("keydown"+e,function(a){37===a.keyCode?b.prev():39===a.keyCode&&b.next()})}),w("UpdateStatus"+e,function(a,c){c.text&&(c.text=T(c.text,b.currItem.index,b.items.length))}),w(l+e,function(a,d,e,f){var g=b.items.length;e.counter=g>1?T(c.tCounter,f.index,g):""}),w("BuildControls"+e,function(){if(b.items.length>1&&c.arrows&&!b.arrowLeft){var d=c.arrowMarkup,e=b.arrowLeft=a(d.replace(/%title%/gi,c.tPrev).replace(/%dir%/gi,"left")).addClass(s),f=b.arrowRight=a(d.replace(/%title%/gi,c.tNext).replace(/%dir%/gi,"right")).addClass(s);e.click(function(){b.prev()}),f.click(function(){b.next()}),b.container.append(e.add(f))}}),w(n+e,function(){b._preloadTimeout&&clearTimeout(b._preloadTimeout),b._preloadTimeout=setTimeout(function(){b.preloadNearbyImages(),b._preloadTimeout=null},16)}),void w(h+e,function(){d.off(e),b.wrap.off("click"+e),b.arrowRight=b.arrowLeft=null})):!1},next:function(){b.direction=!0,b.index=S(b.index+1),b.updateItemHTML()},prev:function(){b.direction=!1,b.index=S(b.index-1),b.updateItemHTML()},goTo:function(a){b.direction=a>=b.index,b.index=a,b.updateItemHTML()},preloadNearbyImages:function(){var a,c=b.st.gallery.preload,d=Math.min(c[0],b.items.length),e=Math.min(c[1],b.items.length);for(a=1;a<=(b.direction?e:d);a++)b._preloadItem(b.index+a);for(a=1;a<=(b.direction?d:e);a++)b._preloadItem(b.index-a)},_preloadItem:function(c){if(c=S(c),!b.items[c].preloaded){var d=b.items[c];d.parsed||(d=b.parseEl(c)),y("LazyLoad",d),"image"===d.type&&(d.img=a('').on("load.mfploader",function(){d.hasSize=!0}).on("error.mfploader",function(){d.hasSize=!0,d.loadError=!0,y("LazyLoadError",d)}).attr("src",d.src)),d.preloaded=!0}}}});var U="retina";a.magnificPopup.registerModule(U,{options:{replaceSrc:function(a){return a.src.replace(/\.\w+$/,function(a){return"@2x"+a})},ratio:1},proto:{initRetina:function(){if(window.devicePixelRatio>1){var a=b.st.retina,c=a.ratio;c=isNaN(c)?c():c,c>1&&(w("ImageHasSize."+U,function(a,b){b.img.css({"max-width":b.img[0].naturalWidth/c,width:"100%"})}),w("ElementParse."+U,function(b,d){d.src=a.replaceSrc(d,c)}))}}}}),A()}); \ No newline at end of file diff --git a/docs/custom/jquery.min.js b/docs/custom/jquery.min.js deleted file mode 100644 index 006e9531..00000000 --- a/docs/custom/jquery.min.js +++ /dev/null @@ -1,5 +0,0 @@ -/*! jQuery v1.9.1 | (c) 2005, 2012 jQuery Foundation, Inc. | jquery.org/license -//@ sourceMappingURL=jquery.min.map -*/(function(e,t){var n,r,i=typeof t,o=e.document,a=e.location,s=e.jQuery,u=e.$,l={},c=[],p="1.9.1",f=c.concat,d=c.push,h=c.slice,g=c.indexOf,m=l.toString,y=l.hasOwnProperty,v=p.trim,b=function(e,t){return new b.fn.init(e,t,r)},x=/[+-]?(?:\d*\.|)\d+(?:[eE][+-]?\d+|)/.source,w=/\S+/g,T=/^[\s\uFEFF\xA0]+|[\s\uFEFF\xA0]+$/g,N=/^(?:(<[\w\W]+>)[^>]*|#([\w-]*))$/,C=/^<(\w+)\s*\/?>(?:<\/\1>|)$/,k=/^[\],:{}\s]*$/,E=/(?:^|:|,)(?:\s*\[)+/g,S=/\\(?:["\\\/bfnrt]|u[\da-fA-F]{4})/g,A=/"[^"\\\r\n]*"|true|false|null|-?(?:\d+\.|)\d+(?:[eE][+-]?\d+|)/g,j=/^-ms-/,D=/-([\da-z])/gi,L=function(e,t){return t.toUpperCase()},H=function(e){(o.addEventListener||"load"===e.type||"complete"===o.readyState)&&(q(),b.ready())},q=function(){o.addEventListener?(o.removeEventListener("DOMContentLoaded",H,!1),e.removeEventListener("load",H,!1)):(o.detachEvent("onreadystatechange",H),e.detachEvent("onload",H))};b.fn=b.prototype={jquery:p,constructor:b,init:function(e,n,r){var i,a;if(!e)return this;if("string"==typeof e){if(i="<"===e.charAt(0)&&">"===e.charAt(e.length-1)&&e.length>=3?[null,e,null]:N.exec(e),!i||!i[1]&&n)return!n||n.jquery?(n||r).find(e):this.constructor(n).find(e);if(i[1]){if(n=n instanceof b?n[0]:n,b.merge(this,b.parseHTML(i[1],n&&n.nodeType?n.ownerDocument||n:o,!0)),C.test(i[1])&&b.isPlainObject(n))for(i in n)b.isFunction(this[i])?this[i](n[i]):this.attr(i,n[i]);return this}if(a=o.getElementById(i[2]),a&&a.parentNode){if(a.id!==i[2])return r.find(e);this.length=1,this[0]=a}return this.context=o,this.selector=e,this}return e.nodeType?(this.context=this[0]=e,this.length=1,this):b.isFunction(e)?r.ready(e):(e.selector!==t&&(this.selector=e.selector,this.context=e.context),b.makeArray(e,this))},selector:"",length:0,size:function(){return this.length},toArray:function(){return h.call(this)},get:function(e){return null==e?this.toArray():0>e?this[this.length+e]:this[e]},pushStack:function(e){var t=b.merge(this.constructor(),e);return t.prevObject=this,t.context=this.context,t},each:function(e,t){return b.each(this,e,t)},ready:function(e){return b.ready.promise().done(e),this},slice:function(){return this.pushStack(h.apply(this,arguments))},first:function(){return this.eq(0)},last:function(){return this.eq(-1)},eq:function(e){var t=this.length,n=+e+(0>e?t:0);return this.pushStack(n>=0&&t>n?[this[n]]:[])},map:function(e){return this.pushStack(b.map(this,function(t,n){return e.call(t,n,t)}))},end:function(){return this.prevObject||this.constructor(null)},push:d,sort:[].sort,splice:[].splice},b.fn.init.prototype=b.fn,b.extend=b.fn.extend=function(){var e,n,r,i,o,a,s=arguments[0]||{},u=1,l=arguments.length,c=!1;for("boolean"==typeof s&&(c=s,s=arguments[1]||{},u=2),"object"==typeof s||b.isFunction(s)||(s={}),l===u&&(s=this,--u);l>u;u++)if(null!=(o=arguments[u]))for(i in o)e=s[i],r=o[i],s!==r&&(c&&r&&(b.isPlainObject(r)||(n=b.isArray(r)))?(n?(n=!1,a=e&&b.isArray(e)?e:[]):a=e&&b.isPlainObject(e)?e:{},s[i]=b.extend(c,a,r)):r!==t&&(s[i]=r));return s},b.extend({noConflict:function(t){return e.$===b&&(e.$=u),t&&e.jQuery===b&&(e.jQuery=s),b},isReady:!1,readyWait:1,holdReady:function(e){e?b.readyWait++:b.ready(!0)},ready:function(e){if(e===!0?!--b.readyWait:!b.isReady){if(!o.body)return setTimeout(b.ready);b.isReady=!0,e!==!0&&--b.readyWait>0||(n.resolveWith(o,[b]),b.fn.trigger&&b(o).trigger("ready").off("ready"))}},isFunction:function(e){return"function"===b.type(e)},isArray:Array.isArray||function(e){return"array"===b.type(e)},isWindow:function(e){return null!=e&&e==e.window},isNumeric:function(e){return!isNaN(parseFloat(e))&&isFinite(e)},type:function(e){return null==e?e+"":"object"==typeof e||"function"==typeof e?l[m.call(e)]||"object":typeof e},isPlainObject:function(e){if(!e||"object"!==b.type(e)||e.nodeType||b.isWindow(e))return!1;try{if(e.constructor&&!y.call(e,"constructor")&&!y.call(e.constructor.prototype,"isPrototypeOf"))return!1}catch(n){return!1}var r;for(r in e);return r===t||y.call(e,r)},isEmptyObject:function(e){var t;for(t in e)return!1;return!0},error:function(e){throw Error(e)},parseHTML:function(e,t,n){if(!e||"string"!=typeof e)return null;"boolean"==typeof t&&(n=t,t=!1),t=t||o;var r=C.exec(e),i=!n&&[];return r?[t.createElement(r[1])]:(r=b.buildFragment([e],t,i),i&&b(i).remove(),b.merge([],r.childNodes))},parseJSON:function(n){return e.JSON&&e.JSON.parse?e.JSON.parse(n):null===n?n:"string"==typeof n&&(n=b.trim(n),n&&k.test(n.replace(S,"@").replace(A,"]").replace(E,"")))?Function("return "+n)():(b.error("Invalid JSON: "+n),t)},parseXML:function(n){var r,i;if(!n||"string"!=typeof n)return null;try{e.DOMParser?(i=new DOMParser,r=i.parseFromString(n,"text/xml")):(r=new ActiveXObject("Microsoft.XMLDOM"),r.async="false",r.loadXML(n))}catch(o){r=t}return r&&r.documentElement&&!r.getElementsByTagName("parsererror").length||b.error("Invalid XML: "+n),r},noop:function(){},globalEval:function(t){t&&b.trim(t)&&(e.execScript||function(t){e.eval.call(e,t)})(t)},camelCase:function(e){return e.replace(j,"ms-").replace(D,L)},nodeName:function(e,t){return e.nodeName&&e.nodeName.toLowerCase()===t.toLowerCase()},each:function(e,t,n){var r,i=0,o=e.length,a=M(e);if(n){if(a){for(;o>i;i++)if(r=t.apply(e[i],n),r===!1)break}else for(i in e)if(r=t.apply(e[i],n),r===!1)break}else if(a){for(;o>i;i++)if(r=t.call(e[i],i,e[i]),r===!1)break}else for(i in e)if(r=t.call(e[i],i,e[i]),r===!1)break;return e},trim:v&&!v.call("\ufeff\u00a0")?function(e){return null==e?"":v.call(e)}:function(e){return null==e?"":(e+"").replace(T,"")},makeArray:function(e,t){var n=t||[];return null!=e&&(M(Object(e))?b.merge(n,"string"==typeof e?[e]:e):d.call(n,e)),n},inArray:function(e,t,n){var r;if(t){if(g)return g.call(t,e,n);for(r=t.length,n=n?0>n?Math.max(0,r+n):n:0;r>n;n++)if(n in t&&t[n]===e)return n}return-1},merge:function(e,n){var r=n.length,i=e.length,o=0;if("number"==typeof r)for(;r>o;o++)e[i++]=n[o];else while(n[o]!==t)e[i++]=n[o++];return e.length=i,e},grep:function(e,t,n){var r,i=[],o=0,a=e.length;for(n=!!n;a>o;o++)r=!!t(e[o],o),n!==r&&i.push(e[o]);return i},map:function(e,t,n){var r,i=0,o=e.length,a=M(e),s=[];if(a)for(;o>i;i++)r=t(e[i],i,n),null!=r&&(s[s.length]=r);else for(i in e)r=t(e[i],i,n),null!=r&&(s[s.length]=r);return f.apply([],s)},guid:1,proxy:function(e,n){var r,i,o;return"string"==typeof n&&(o=e[n],n=e,e=o),b.isFunction(e)?(r=h.call(arguments,2),i=function(){return e.apply(n||this,r.concat(h.call(arguments)))},i.guid=e.guid=e.guid||b.guid++,i):t},access:function(e,n,r,i,o,a,s){var u=0,l=e.length,c=null==r;if("object"===b.type(r)){o=!0;for(u in r)b.access(e,n,u,r[u],!0,a,s)}else if(i!==t&&(o=!0,b.isFunction(i)||(s=!0),c&&(s?(n.call(e,i),n=null):(c=n,n=function(e,t,n){return c.call(b(e),n)})),n))for(;l>u;u++)n(e[u],r,s?i:i.call(e[u],u,n(e[u],r)));return o?e:c?n.call(e):l?n(e[0],r):a},now:function(){return(new Date).getTime()}}),b.ready.promise=function(t){if(!n)if(n=b.Deferred(),"complete"===o.readyState)setTimeout(b.ready);else if(o.addEventListener)o.addEventListener("DOMContentLoaded",H,!1),e.addEventListener("load",H,!1);else{o.attachEvent("onreadystatechange",H),e.attachEvent("onload",H);var r=!1;try{r=null==e.frameElement&&o.documentElement}catch(i){}r&&r.doScroll&&function a(){if(!b.isReady){try{r.doScroll("left")}catch(e){return setTimeout(a,50)}q(),b.ready()}}()}return n.promise(t)},b.each("Boolean Number String Function Array Date RegExp Object Error".split(" "),function(e,t){l["[object "+t+"]"]=t.toLowerCase()});function M(e){var t=e.length,n=b.type(e);return b.isWindow(e)?!1:1===e.nodeType&&t?!0:"array"===n||"function"!==n&&(0===t||"number"==typeof t&&t>0&&t-1 in e)}r=b(o);var _={};function F(e){var t=_[e]={};return b.each(e.match(w)||[],function(e,n){t[n]=!0}),t}b.Callbacks=function(e){e="string"==typeof e?_[e]||F(e):b.extend({},e);var n,r,i,o,a,s,u=[],l=!e.once&&[],c=function(t){for(r=e.memory&&t,i=!0,a=s||0,s=0,o=u.length,n=!0;u&&o>a;a++)if(u[a].apply(t[0],t[1])===!1&&e.stopOnFalse){r=!1;break}n=!1,u&&(l?l.length&&c(l.shift()):r?u=[]:p.disable())},p={add:function(){if(u){var t=u.length;(function i(t){b.each(t,function(t,n){var r=b.type(n);"function"===r?e.unique&&p.has(n)||u.push(n):n&&n.length&&"string"!==r&&i(n)})})(arguments),n?o=u.length:r&&(s=t,c(r))}return this},remove:function(){return u&&b.each(arguments,function(e,t){var r;while((r=b.inArray(t,u,r))>-1)u.splice(r,1),n&&(o>=r&&o--,a>=r&&a--)}),this},has:function(e){return e?b.inArray(e,u)>-1:!(!u||!u.length)},empty:function(){return u=[],this},disable:function(){return u=l=r=t,this},disabled:function(){return!u},lock:function(){return l=t,r||p.disable(),this},locked:function(){return!l},fireWith:function(e,t){return t=t||[],t=[e,t.slice?t.slice():t],!u||i&&!l||(n?l.push(t):c(t)),this},fire:function(){return p.fireWith(this,arguments),this},fired:function(){return!!i}};return p},b.extend({Deferred:function(e){var t=[["resolve","done",b.Callbacks("once memory"),"resolved"],["reject","fail",b.Callbacks("once memory"),"rejected"],["notify","progress",b.Callbacks("memory")]],n="pending",r={state:function(){return n},always:function(){return i.done(arguments).fail(arguments),this},then:function(){var e=arguments;return b.Deferred(function(n){b.each(t,function(t,o){var a=o[0],s=b.isFunction(e[t])&&e[t];i[o[1]](function(){var e=s&&s.apply(this,arguments);e&&b.isFunction(e.promise)?e.promise().done(n.resolve).fail(n.reject).progress(n.notify):n[a+"With"](this===r?n.promise():this,s?[e]:arguments)})}),e=null}).promise()},promise:function(e){return null!=e?b.extend(e,r):r}},i={};return r.pipe=r.then,b.each(t,function(e,o){var a=o[2],s=o[3];r[o[1]]=a.add,s&&a.add(function(){n=s},t[1^e][2].disable,t[2][2].lock),i[o[0]]=function(){return i[o[0]+"With"](this===i?r:this,arguments),this},i[o[0]+"With"]=a.fireWith}),r.promise(i),e&&e.call(i,i),i},when:function(e){var t=0,n=h.call(arguments),r=n.length,i=1!==r||e&&b.isFunction(e.promise)?r:0,o=1===i?e:b.Deferred(),a=function(e,t,n){return function(r){t[e]=this,n[e]=arguments.length>1?h.call(arguments):r,n===s?o.notifyWith(t,n):--i||o.resolveWith(t,n)}},s,u,l;if(r>1)for(s=Array(r),u=Array(r),l=Array(r);r>t;t++)n[t]&&b.isFunction(n[t].promise)?n[t].promise().done(a(t,l,n)).fail(o.reject).progress(a(t,u,s)):--i;return i||o.resolveWith(l,n),o.promise()}}),b.support=function(){var t,n,r,a,s,u,l,c,p,f,d=o.createElement("div");if(d.setAttribute("className","t"),d.innerHTML="
a",n=d.getElementsByTagName("*"),r=d.getElementsByTagName("a")[0],!n||!r||!n.length)return{};s=o.createElement("select"),l=s.appendChild(o.createElement("option")),a=d.getElementsByTagName("input")[0],r.style.cssText="top:1px;float:left;opacity:.5",t={getSetAttribute:"t"!==d.className,leadingWhitespace:3===d.firstChild.nodeType,tbody:!d.getElementsByTagName("tbody").length,htmlSerialize:!!d.getElementsByTagName("link").length,style:/top/.test(r.getAttribute("style")),hrefNormalized:"/a"===r.getAttribute("href"),opacity:/^0.5/.test(r.style.opacity),cssFloat:!!r.style.cssFloat,checkOn:!!a.value,optSelected:l.selected,enctype:!!o.createElement("form").enctype,html5Clone:"<:nav>"!==o.createElement("nav").cloneNode(!0).outerHTML,boxModel:"CSS1Compat"===o.compatMode,deleteExpando:!0,noCloneEvent:!0,inlineBlockNeedsLayout:!1,shrinkWrapBlocks:!1,reliableMarginRight:!0,boxSizingReliable:!0,pixelPosition:!1},a.checked=!0,t.noCloneChecked=a.cloneNode(!0).checked,s.disabled=!0,t.optDisabled=!l.disabled;try{delete d.test}catch(h){t.deleteExpando=!1}a=o.createElement("input"),a.setAttribute("value",""),t.input=""===a.getAttribute("value"),a.value="t",a.setAttribute("type","radio"),t.radioValue="t"===a.value,a.setAttribute("checked","t"),a.setAttribute("name","t"),u=o.createDocumentFragment(),u.appendChild(a),t.appendChecked=a.checked,t.checkClone=u.cloneNode(!0).cloneNode(!0).lastChild.checked,d.attachEvent&&(d.attachEvent("onclick",function(){t.noCloneEvent=!1}),d.cloneNode(!0).click());for(f in{submit:!0,change:!0,focusin:!0})d.setAttribute(c="on"+f,"t"),t[f+"Bubbles"]=c in e||d.attributes[c].expando===!1;return d.style.backgroundClip="content-box",d.cloneNode(!0).style.backgroundClip="",t.clearCloneStyle="content-box"===d.style.backgroundClip,b(function(){var n,r,a,s="padding:0;margin:0;border:0;display:block;box-sizing:content-box;-moz-box-sizing:content-box;-webkit-box-sizing:content-box;",u=o.getElementsByTagName("body")[0];u&&(n=o.createElement("div"),n.style.cssText="border:0;width:0;height:0;position:absolute;top:0;left:-9999px;margin-top:1px",u.appendChild(n).appendChild(d),d.innerHTML="
t
",a=d.getElementsByTagName("td"),a[0].style.cssText="padding:0;margin:0;border:0;display:none",p=0===a[0].offsetHeight,a[0].style.display="",a[1].style.display="none",t.reliableHiddenOffsets=p&&0===a[0].offsetHeight,d.innerHTML="",d.style.cssText="box-sizing:border-box;-moz-box-sizing:border-box;-webkit-box-sizing:border-box;padding:1px;border:1px;display:block;width:4px;margin-top:1%;position:absolute;top:1%;",t.boxSizing=4===d.offsetWidth,t.doesNotIncludeMarginInBodyOffset=1!==u.offsetTop,e.getComputedStyle&&(t.pixelPosition="1%"!==(e.getComputedStyle(d,null)||{}).top,t.boxSizingReliable="4px"===(e.getComputedStyle(d,null)||{width:"4px"}).width,r=d.appendChild(o.createElement("div")),r.style.cssText=d.style.cssText=s,r.style.marginRight=r.style.width="0",d.style.width="1px",t.reliableMarginRight=!parseFloat((e.getComputedStyle(r,null)||{}).marginRight)),typeof d.style.zoom!==i&&(d.innerHTML="",d.style.cssText=s+"width:1px;padding:1px;display:inline;zoom:1",t.inlineBlockNeedsLayout=3===d.offsetWidth,d.style.display="block",d.innerHTML="
",d.firstChild.style.width="5px",t.shrinkWrapBlocks=3!==d.offsetWidth,t.inlineBlockNeedsLayout&&(u.style.zoom=1)),u.removeChild(n),n=d=a=r=null)}),n=s=u=l=r=a=null,t}();var O=/(?:\{[\s\S]*\}|\[[\s\S]*\])$/,B=/([A-Z])/g;function P(e,n,r,i){if(b.acceptData(e)){var o,a,s=b.expando,u="string"==typeof n,l=e.nodeType,p=l?b.cache:e,f=l?e[s]:e[s]&&s;if(f&&p[f]&&(i||p[f].data)||!u||r!==t)return f||(l?e[s]=f=c.pop()||b.guid++:f=s),p[f]||(p[f]={},l||(p[f].toJSON=b.noop)),("object"==typeof n||"function"==typeof n)&&(i?p[f]=b.extend(p[f],n):p[f].data=b.extend(p[f].data,n)),o=p[f],i||(o.data||(o.data={}),o=o.data),r!==t&&(o[b.camelCase(n)]=r),u?(a=o[n],null==a&&(a=o[b.camelCase(n)])):a=o,a}}function R(e,t,n){if(b.acceptData(e)){var r,i,o,a=e.nodeType,s=a?b.cache:e,u=a?e[b.expando]:b.expando;if(s[u]){if(t&&(o=n?s[u]:s[u].data)){b.isArray(t)?t=t.concat(b.map(t,b.camelCase)):t in o?t=[t]:(t=b.camelCase(t),t=t in o?[t]:t.split(" "));for(r=0,i=t.length;i>r;r++)delete o[t[r]];if(!(n?$:b.isEmptyObject)(o))return}(n||(delete s[u].data,$(s[u])))&&(a?b.cleanData([e],!0):b.support.deleteExpando||s!=s.window?delete s[u]:s[u]=null)}}}b.extend({cache:{},expando:"jQuery"+(p+Math.random()).replace(/\D/g,""),noData:{embed:!0,object:"clsid:D27CDB6E-AE6D-11cf-96B8-444553540000",applet:!0},hasData:function(e){return e=e.nodeType?b.cache[e[b.expando]]:e[b.expando],!!e&&!$(e)},data:function(e,t,n){return P(e,t,n)},removeData:function(e,t){return R(e,t)},_data:function(e,t,n){return P(e,t,n,!0)},_removeData:function(e,t){return R(e,t,!0)},acceptData:function(e){if(e.nodeType&&1!==e.nodeType&&9!==e.nodeType)return!1;var t=e.nodeName&&b.noData[e.nodeName.toLowerCase()];return!t||t!==!0&&e.getAttribute("classid")===t}}),b.fn.extend({data:function(e,n){var r,i,o=this[0],a=0,s=null;if(e===t){if(this.length&&(s=b.data(o),1===o.nodeType&&!b._data(o,"parsedAttrs"))){for(r=o.attributes;r.length>a;a++)i=r[a].name,i.indexOf("data-")||(i=b.camelCase(i.slice(5)),W(o,i,s[i]));b._data(o,"parsedAttrs",!0)}return s}return"object"==typeof e?this.each(function(){b.data(this,e)}):b.access(this,function(n){return n===t?o?W(o,e,b.data(o,e)):null:(this.each(function(){b.data(this,e,n)}),t)},null,n,arguments.length>1,null,!0)},removeData:function(e){return this.each(function(){b.removeData(this,e)})}});function W(e,n,r){if(r===t&&1===e.nodeType){var i="data-"+n.replace(B,"-$1").toLowerCase();if(r=e.getAttribute(i),"string"==typeof r){try{r="true"===r?!0:"false"===r?!1:"null"===r?null:+r+""===r?+r:O.test(r)?b.parseJSON(r):r}catch(o){}b.data(e,n,r)}else r=t}return r}function $(e){var t;for(t in e)if(("data"!==t||!b.isEmptyObject(e[t]))&&"toJSON"!==t)return!1;return!0}b.extend({queue:function(e,n,r){var i;return e?(n=(n||"fx")+"queue",i=b._data(e,n),r&&(!i||b.isArray(r)?i=b._data(e,n,b.makeArray(r)):i.push(r)),i||[]):t},dequeue:function(e,t){t=t||"fx";var n=b.queue(e,t),r=n.length,i=n.shift(),o=b._queueHooks(e,t),a=function(){b.dequeue(e,t)};"inprogress"===i&&(i=n.shift(),r--),o.cur=i,i&&("fx"===t&&n.unshift("inprogress"),delete o.stop,i.call(e,a,o)),!r&&o&&o.empty.fire()},_queueHooks:function(e,t){var n=t+"queueHooks";return b._data(e,n)||b._data(e,n,{empty:b.Callbacks("once memory").add(function(){b._removeData(e,t+"queue"),b._removeData(e,n)})})}}),b.fn.extend({queue:function(e,n){var r=2;return"string"!=typeof e&&(n=e,e="fx",r--),r>arguments.length?b.queue(this[0],e):n===t?this:this.each(function(){var t=b.queue(this,e,n);b._queueHooks(this,e),"fx"===e&&"inprogress"!==t[0]&&b.dequeue(this,e)})},dequeue:function(e){return this.each(function(){b.dequeue(this,e)})},delay:function(e,t){return e=b.fx?b.fx.speeds[e]||e:e,t=t||"fx",this.queue(t,function(t,n){var r=setTimeout(t,e);n.stop=function(){clearTimeout(r)}})},clearQueue:function(e){return this.queue(e||"fx",[])},promise:function(e,n){var r,i=1,o=b.Deferred(),a=this,s=this.length,u=function(){--i||o.resolveWith(a,[a])};"string"!=typeof e&&(n=e,e=t),e=e||"fx";while(s--)r=b._data(a[s],e+"queueHooks"),r&&r.empty&&(i++,r.empty.add(u));return u(),o.promise(n)}});var I,z,X=/[\t\r\n]/g,U=/\r/g,V=/^(?:input|select|textarea|button|object)$/i,Y=/^(?:a|area)$/i,J=/^(?:checked|selected|autofocus|autoplay|async|controls|defer|disabled|hidden|loop|multiple|open|readonly|required|scoped)$/i,G=/^(?:checked|selected)$/i,Q=b.support.getSetAttribute,K=b.support.input;b.fn.extend({attr:function(e,t){return b.access(this,b.attr,e,t,arguments.length>1)},removeAttr:function(e){return this.each(function(){b.removeAttr(this,e)})},prop:function(e,t){return b.access(this,b.prop,e,t,arguments.length>1)},removeProp:function(e){return e=b.propFix[e]||e,this.each(function(){try{this[e]=t,delete this[e]}catch(n){}})},addClass:function(e){var t,n,r,i,o,a=0,s=this.length,u="string"==typeof e&&e;if(b.isFunction(e))return this.each(function(t){b(this).addClass(e.call(this,t,this.className))});if(u)for(t=(e||"").match(w)||[];s>a;a++)if(n=this[a],r=1===n.nodeType&&(n.className?(" "+n.className+" ").replace(X," "):" ")){o=0;while(i=t[o++])0>r.indexOf(" "+i+" ")&&(r+=i+" ");n.className=b.trim(r)}return this},removeClass:function(e){var t,n,r,i,o,a=0,s=this.length,u=0===arguments.length||"string"==typeof e&&e;if(b.isFunction(e))return this.each(function(t){b(this).removeClass(e.call(this,t,this.className))});if(u)for(t=(e||"").match(w)||[];s>a;a++)if(n=this[a],r=1===n.nodeType&&(n.className?(" "+n.className+" ").replace(X," "):"")){o=0;while(i=t[o++])while(r.indexOf(" "+i+" ")>=0)r=r.replace(" "+i+" "," ");n.className=e?b.trim(r):""}return this},toggleClass:function(e,t){var n=typeof e,r="boolean"==typeof t;return b.isFunction(e)?this.each(function(n){b(this).toggleClass(e.call(this,n,this.className,t),t)}):this.each(function(){if("string"===n){var o,a=0,s=b(this),u=t,l=e.match(w)||[];while(o=l[a++])u=r?u:!s.hasClass(o),s[u?"addClass":"removeClass"](o)}else(n===i||"boolean"===n)&&(this.className&&b._data(this,"__className__",this.className),this.className=this.className||e===!1?"":b._data(this,"__className__")||"")})},hasClass:function(e){var t=" "+e+" ",n=0,r=this.length;for(;r>n;n++)if(1===this[n].nodeType&&(" "+this[n].className+" ").replace(X," ").indexOf(t)>=0)return!0;return!1},val:function(e){var n,r,i,o=this[0];{if(arguments.length)return i=b.isFunction(e),this.each(function(n){var o,a=b(this);1===this.nodeType&&(o=i?e.call(this,n,a.val()):e,null==o?o="":"number"==typeof o?o+="":b.isArray(o)&&(o=b.map(o,function(e){return null==e?"":e+""})),r=b.valHooks[this.type]||b.valHooks[this.nodeName.toLowerCase()],r&&"set"in r&&r.set(this,o,"value")!==t||(this.value=o))});if(o)return r=b.valHooks[o.type]||b.valHooks[o.nodeName.toLowerCase()],r&&"get"in r&&(n=r.get(o,"value"))!==t?n:(n=o.value,"string"==typeof n?n.replace(U,""):null==n?"":n)}}}),b.extend({valHooks:{option:{get:function(e){var t=e.attributes.value;return!t||t.specified?e.value:e.text}},select:{get:function(e){var t,n,r=e.options,i=e.selectedIndex,o="select-one"===e.type||0>i,a=o?null:[],s=o?i+1:r.length,u=0>i?s:o?i:0;for(;s>u;u++)if(n=r[u],!(!n.selected&&u!==i||(b.support.optDisabled?n.disabled:null!==n.getAttribute("disabled"))||n.parentNode.disabled&&b.nodeName(n.parentNode,"optgroup"))){if(t=b(n).val(),o)return t;a.push(t)}return a},set:function(e,t){var n=b.makeArray(t);return b(e).find("option").each(function(){this.selected=b.inArray(b(this).val(),n)>=0}),n.length||(e.selectedIndex=-1),n}}},attr:function(e,n,r){var o,a,s,u=e.nodeType;if(e&&3!==u&&8!==u&&2!==u)return typeof e.getAttribute===i?b.prop(e,n,r):(a=1!==u||!b.isXMLDoc(e),a&&(n=n.toLowerCase(),o=b.attrHooks[n]||(J.test(n)?z:I)),r===t?o&&a&&"get"in o&&null!==(s=o.get(e,n))?s:(typeof e.getAttribute!==i&&(s=e.getAttribute(n)),null==s?t:s):null!==r?o&&a&&"set"in o&&(s=o.set(e,r,n))!==t?s:(e.setAttribute(n,r+""),r):(b.removeAttr(e,n),t))},removeAttr:function(e,t){var n,r,i=0,o=t&&t.match(w);if(o&&1===e.nodeType)while(n=o[i++])r=b.propFix[n]||n,J.test(n)?!Q&&G.test(n)?e[b.camelCase("default-"+n)]=e[r]=!1:e[r]=!1:b.attr(e,n,""),e.removeAttribute(Q?n:r)},attrHooks:{type:{set:function(e,t){if(!b.support.radioValue&&"radio"===t&&b.nodeName(e,"input")){var n=e.value;return e.setAttribute("type",t),n&&(e.value=n),t}}}},propFix:{tabindex:"tabIndex",readonly:"readOnly","for":"htmlFor","class":"className",maxlength:"maxLength",cellspacing:"cellSpacing",cellpadding:"cellPadding",rowspan:"rowSpan",colspan:"colSpan",usemap:"useMap",frameborder:"frameBorder",contenteditable:"contentEditable"},prop:function(e,n,r){var i,o,a,s=e.nodeType;if(e&&3!==s&&8!==s&&2!==s)return a=1!==s||!b.isXMLDoc(e),a&&(n=b.propFix[n]||n,o=b.propHooks[n]),r!==t?o&&"set"in o&&(i=o.set(e,r,n))!==t?i:e[n]=r:o&&"get"in o&&null!==(i=o.get(e,n))?i:e[n]},propHooks:{tabIndex:{get:function(e){var n=e.getAttributeNode("tabindex");return n&&n.specified?parseInt(n.value,10):V.test(e.nodeName)||Y.test(e.nodeName)&&e.href?0:t}}}}),z={get:function(e,n){var r=b.prop(e,n),i="boolean"==typeof r&&e.getAttribute(n),o="boolean"==typeof r?K&&Q?null!=i:G.test(n)?e[b.camelCase("default-"+n)]:!!i:e.getAttributeNode(n);return o&&o.value!==!1?n.toLowerCase():t},set:function(e,t,n){return t===!1?b.removeAttr(e,n):K&&Q||!G.test(n)?e.setAttribute(!Q&&b.propFix[n]||n,n):e[b.camelCase("default-"+n)]=e[n]=!0,n}},K&&Q||(b.attrHooks.value={get:function(e,n){var r=e.getAttributeNode(n);return b.nodeName(e,"input")?e.defaultValue:r&&r.specified?r.value:t},set:function(e,n,r){return b.nodeName(e,"input")?(e.defaultValue=n,t):I&&I.set(e,n,r)}}),Q||(I=b.valHooks.button={get:function(e,n){var r=e.getAttributeNode(n);return r&&("id"===n||"name"===n||"coords"===n?""!==r.value:r.specified)?r.value:t},set:function(e,n,r){var i=e.getAttributeNode(r);return i||e.setAttributeNode(i=e.ownerDocument.createAttribute(r)),i.value=n+="","value"===r||n===e.getAttribute(r)?n:t}},b.attrHooks.contenteditable={get:I.get,set:function(e,t,n){I.set(e,""===t?!1:t,n)}},b.each(["width","height"],function(e,n){b.attrHooks[n]=b.extend(b.attrHooks[n],{set:function(e,r){return""===r?(e.setAttribute(n,"auto"),r):t}})})),b.support.hrefNormalized||(b.each(["href","src","width","height"],function(e,n){b.attrHooks[n]=b.extend(b.attrHooks[n],{get:function(e){var r=e.getAttribute(n,2);return null==r?t:r}})}),b.each(["href","src"],function(e,t){b.propHooks[t]={get:function(e){return e.getAttribute(t,4)}}})),b.support.style||(b.attrHooks.style={get:function(e){return e.style.cssText||t},set:function(e,t){return e.style.cssText=t+""}}),b.support.optSelected||(b.propHooks.selected=b.extend(b.propHooks.selected,{get:function(e){var t=e.parentNode;return t&&(t.selectedIndex,t.parentNode&&t.parentNode.selectedIndex),null}})),b.support.enctype||(b.propFix.enctype="encoding"),b.support.checkOn||b.each(["radio","checkbox"],function(){b.valHooks[this]={get:function(e){return null===e.getAttribute("value")?"on":e.value}}}),b.each(["radio","checkbox"],function(){b.valHooks[this]=b.extend(b.valHooks[this],{set:function(e,n){return b.isArray(n)?e.checked=b.inArray(b(e).val(),n)>=0:t}})});var Z=/^(?:input|select|textarea)$/i,et=/^key/,tt=/^(?:mouse|contextmenu)|click/,nt=/^(?:focusinfocus|focusoutblur)$/,rt=/^([^.]*)(?:\.(.+)|)$/;function it(){return!0}function ot(){return!1}b.event={global:{},add:function(e,n,r,o,a){var s,u,l,c,p,f,d,h,g,m,y,v=b._data(e);if(v){r.handler&&(c=r,r=c.handler,a=c.selector),r.guid||(r.guid=b.guid++),(u=v.events)||(u=v.events={}),(f=v.handle)||(f=v.handle=function(e){return typeof b===i||e&&b.event.triggered===e.type?t:b.event.dispatch.apply(f.elem,arguments)},f.elem=e),n=(n||"").match(w)||[""],l=n.length;while(l--)s=rt.exec(n[l])||[],g=y=s[1],m=(s[2]||"").split(".").sort(),p=b.event.special[g]||{},g=(a?p.delegateType:p.bindType)||g,p=b.event.special[g]||{},d=b.extend({type:g,origType:y,data:o,handler:r,guid:r.guid,selector:a,needsContext:a&&b.expr.match.needsContext.test(a),namespace:m.join(".")},c),(h=u[g])||(h=u[g]=[],h.delegateCount=0,p.setup&&p.setup.call(e,o,m,f)!==!1||(e.addEventListener?e.addEventListener(g,f,!1):e.attachEvent&&e.attachEvent("on"+g,f))),p.add&&(p.add.call(e,d),d.handler.guid||(d.handler.guid=r.guid)),a?h.splice(h.delegateCount++,0,d):h.push(d),b.event.global[g]=!0;e=null}},remove:function(e,t,n,r,i){var o,a,s,u,l,c,p,f,d,h,g,m=b.hasData(e)&&b._data(e);if(m&&(c=m.events)){t=(t||"").match(w)||[""],l=t.length;while(l--)if(s=rt.exec(t[l])||[],d=g=s[1],h=(s[2]||"").split(".").sort(),d){p=b.event.special[d]||{},d=(r?p.delegateType:p.bindType)||d,f=c[d]||[],s=s[2]&&RegExp("(^|\\.)"+h.join("\\.(?:.*\\.|)")+"(\\.|$)"),u=o=f.length;while(o--)a=f[o],!i&&g!==a.origType||n&&n.guid!==a.guid||s&&!s.test(a.namespace)||r&&r!==a.selector&&("**"!==r||!a.selector)||(f.splice(o,1),a.selector&&f.delegateCount--,p.remove&&p.remove.call(e,a));u&&!f.length&&(p.teardown&&p.teardown.call(e,h,m.handle)!==!1||b.removeEvent(e,d,m.handle),delete c[d])}else for(d in c)b.event.remove(e,d+t[l],n,r,!0);b.isEmptyObject(c)&&(delete m.handle,b._removeData(e,"events"))}},trigger:function(n,r,i,a){var s,u,l,c,p,f,d,h=[i||o],g=y.call(n,"type")?n.type:n,m=y.call(n,"namespace")?n.namespace.split("."):[];if(l=f=i=i||o,3!==i.nodeType&&8!==i.nodeType&&!nt.test(g+b.event.triggered)&&(g.indexOf(".")>=0&&(m=g.split("."),g=m.shift(),m.sort()),u=0>g.indexOf(":")&&"on"+g,n=n[b.expando]?n:new b.Event(g,"object"==typeof n&&n),n.isTrigger=!0,n.namespace=m.join("."),n.namespace_re=n.namespace?RegExp("(^|\\.)"+m.join("\\.(?:.*\\.|)")+"(\\.|$)"):null,n.result=t,n.target||(n.target=i),r=null==r?[n]:b.makeArray(r,[n]),p=b.event.special[g]||{},a||!p.trigger||p.trigger.apply(i,r)!==!1)){if(!a&&!p.noBubble&&!b.isWindow(i)){for(c=p.delegateType||g,nt.test(c+g)||(l=l.parentNode);l;l=l.parentNode)h.push(l),f=l;f===(i.ownerDocument||o)&&h.push(f.defaultView||f.parentWindow||e)}d=0;while((l=h[d++])&&!n.isPropagationStopped())n.type=d>1?c:p.bindType||g,s=(b._data(l,"events")||{})[n.type]&&b._data(l,"handle"),s&&s.apply(l,r),s=u&&l[u],s&&b.acceptData(l)&&s.apply&&s.apply(l,r)===!1&&n.preventDefault();if(n.type=g,!(a||n.isDefaultPrevented()||p._default&&p._default.apply(i.ownerDocument,r)!==!1||"click"===g&&b.nodeName(i,"a")||!b.acceptData(i)||!u||!i[g]||b.isWindow(i))){f=i[u],f&&(i[u]=null),b.event.triggered=g;try{i[g]()}catch(v){}b.event.triggered=t,f&&(i[u]=f)}return n.result}},dispatch:function(e){e=b.event.fix(e);var n,r,i,o,a,s=[],u=h.call(arguments),l=(b._data(this,"events")||{})[e.type]||[],c=b.event.special[e.type]||{};if(u[0]=e,e.delegateTarget=this,!c.preDispatch||c.preDispatch.call(this,e)!==!1){s=b.event.handlers.call(this,e,l),n=0;while((o=s[n++])&&!e.isPropagationStopped()){e.currentTarget=o.elem,a=0;while((i=o.handlers[a++])&&!e.isImmediatePropagationStopped())(!e.namespace_re||e.namespace_re.test(i.namespace))&&(e.handleObj=i,e.data=i.data,r=((b.event.special[i.origType]||{}).handle||i.handler).apply(o.elem,u),r!==t&&(e.result=r)===!1&&(e.preventDefault(),e.stopPropagation()))}return c.postDispatch&&c.postDispatch.call(this,e),e.result}},handlers:function(e,n){var r,i,o,a,s=[],u=n.delegateCount,l=e.target;if(u&&l.nodeType&&(!e.button||"click"!==e.type))for(;l!=this;l=l.parentNode||this)if(1===l.nodeType&&(l.disabled!==!0||"click"!==e.type)){for(o=[],a=0;u>a;a++)i=n[a],r=i.selector+" ",o[r]===t&&(o[r]=i.needsContext?b(r,this).index(l)>=0:b.find(r,this,null,[l]).length),o[r]&&o.push(i);o.length&&s.push({elem:l,handlers:o})}return n.length>u&&s.push({elem:this,handlers:n.slice(u)}),s},fix:function(e){if(e[b.expando])return e;var t,n,r,i=e.type,a=e,s=this.fixHooks[i];s||(this.fixHooks[i]=s=tt.test(i)?this.mouseHooks:et.test(i)?this.keyHooks:{}),r=s.props?this.props.concat(s.props):this.props,e=new b.Event(a),t=r.length;while(t--)n=r[t],e[n]=a[n];return e.target||(e.target=a.srcElement||o),3===e.target.nodeType&&(e.target=e.target.parentNode),e.metaKey=!!e.metaKey,s.filter?s.filter(e,a):e},props:"altKey bubbles cancelable ctrlKey currentTarget eventPhase metaKey relatedTarget shiftKey target timeStamp view which".split(" "),fixHooks:{},keyHooks:{props:"char charCode key keyCode".split(" "),filter:function(e,t){return null==e.which&&(e.which=null!=t.charCode?t.charCode:t.keyCode),e}},mouseHooks:{props:"button buttons clientX clientY fromElement offsetX offsetY pageX pageY screenX screenY toElement".split(" "),filter:function(e,n){var r,i,a,s=n.button,u=n.fromElement;return null==e.pageX&&null!=n.clientX&&(i=e.target.ownerDocument||o,a=i.documentElement,r=i.body,e.pageX=n.clientX+(a&&a.scrollLeft||r&&r.scrollLeft||0)-(a&&a.clientLeft||r&&r.clientLeft||0),e.pageY=n.clientY+(a&&a.scrollTop||r&&r.scrollTop||0)-(a&&a.clientTop||r&&r.clientTop||0)),!e.relatedTarget&&u&&(e.relatedTarget=u===e.target?n.toElement:u),e.which||s===t||(e.which=1&s?1:2&s?3:4&s?2:0),e}},special:{load:{noBubble:!0},click:{trigger:function(){return b.nodeName(this,"input")&&"checkbox"===this.type&&this.click?(this.click(),!1):t}},focus:{trigger:function(){if(this!==o.activeElement&&this.focus)try{return this.focus(),!1}catch(e){}},delegateType:"focusin"},blur:{trigger:function(){return this===o.activeElement&&this.blur?(this.blur(),!1):t},delegateType:"focusout"},beforeunload:{postDispatch:function(e){e.result!==t&&(e.originalEvent.returnValue=e.result)}}},simulate:function(e,t,n,r){var i=b.extend(new b.Event,n,{type:e,isSimulated:!0,originalEvent:{}});r?b.event.trigger(i,null,t):b.event.dispatch.call(t,i),i.isDefaultPrevented()&&n.preventDefault()}},b.removeEvent=o.removeEventListener?function(e,t,n){e.removeEventListener&&e.removeEventListener(t,n,!1)}:function(e,t,n){var r="on"+t;e.detachEvent&&(typeof e[r]===i&&(e[r]=null),e.detachEvent(r,n))},b.Event=function(e,n){return this instanceof b.Event?(e&&e.type?(this.originalEvent=e,this.type=e.type,this.isDefaultPrevented=e.defaultPrevented||e.returnValue===!1||e.getPreventDefault&&e.getPreventDefault()?it:ot):this.type=e,n&&b.extend(this,n),this.timeStamp=e&&e.timeStamp||b.now(),this[b.expando]=!0,t):new b.Event(e,n)},b.Event.prototype={isDefaultPrevented:ot,isPropagationStopped:ot,isImmediatePropagationStopped:ot,preventDefault:function(){var e=this.originalEvent;this.isDefaultPrevented=it,e&&(e.preventDefault?e.preventDefault():e.returnValue=!1)},stopPropagation:function(){var e=this.originalEvent;this.isPropagationStopped=it,e&&(e.stopPropagation&&e.stopPropagation(),e.cancelBubble=!0)},stopImmediatePropagation:function(){this.isImmediatePropagationStopped=it,this.stopPropagation()}},b.each({mouseenter:"mouseover",mouseleave:"mouseout"},function(e,t){b.event.special[e]={delegateType:t,bindType:t,handle:function(e){var n,r=this,i=e.relatedTarget,o=e.handleObj; -return(!i||i!==r&&!b.contains(r,i))&&(e.type=o.origType,n=o.handler.apply(this,arguments),e.type=t),n}}}),b.support.submitBubbles||(b.event.special.submit={setup:function(){return b.nodeName(this,"form")?!1:(b.event.add(this,"click._submit keypress._submit",function(e){var n=e.target,r=b.nodeName(n,"input")||b.nodeName(n,"button")?n.form:t;r&&!b._data(r,"submitBubbles")&&(b.event.add(r,"submit._submit",function(e){e._submit_bubble=!0}),b._data(r,"submitBubbles",!0))}),t)},postDispatch:function(e){e._submit_bubble&&(delete e._submit_bubble,this.parentNode&&!e.isTrigger&&b.event.simulate("submit",this.parentNode,e,!0))},teardown:function(){return b.nodeName(this,"form")?!1:(b.event.remove(this,"._submit"),t)}}),b.support.changeBubbles||(b.event.special.change={setup:function(){return Z.test(this.nodeName)?(("checkbox"===this.type||"radio"===this.type)&&(b.event.add(this,"propertychange._change",function(e){"checked"===e.originalEvent.propertyName&&(this._just_changed=!0)}),b.event.add(this,"click._change",function(e){this._just_changed&&!e.isTrigger&&(this._just_changed=!1),b.event.simulate("change",this,e,!0)})),!1):(b.event.add(this,"beforeactivate._change",function(e){var t=e.target;Z.test(t.nodeName)&&!b._data(t,"changeBubbles")&&(b.event.add(t,"change._change",function(e){!this.parentNode||e.isSimulated||e.isTrigger||b.event.simulate("change",this.parentNode,e,!0)}),b._data(t,"changeBubbles",!0))}),t)},handle:function(e){var n=e.target;return this!==n||e.isSimulated||e.isTrigger||"radio"!==n.type&&"checkbox"!==n.type?e.handleObj.handler.apply(this,arguments):t},teardown:function(){return b.event.remove(this,"._change"),!Z.test(this.nodeName)}}),b.support.focusinBubbles||b.each({focus:"focusin",blur:"focusout"},function(e,t){var n=0,r=function(e){b.event.simulate(t,e.target,b.event.fix(e),!0)};b.event.special[t]={setup:function(){0===n++&&o.addEventListener(e,r,!0)},teardown:function(){0===--n&&o.removeEventListener(e,r,!0)}}}),b.fn.extend({on:function(e,n,r,i,o){var a,s;if("object"==typeof e){"string"!=typeof n&&(r=r||n,n=t);for(a in e)this.on(a,n,r,e[a],o);return this}if(null==r&&null==i?(i=n,r=n=t):null==i&&("string"==typeof n?(i=r,r=t):(i=r,r=n,n=t)),i===!1)i=ot;else if(!i)return this;return 1===o&&(s=i,i=function(e){return b().off(e),s.apply(this,arguments)},i.guid=s.guid||(s.guid=b.guid++)),this.each(function(){b.event.add(this,e,i,r,n)})},one:function(e,t,n,r){return this.on(e,t,n,r,1)},off:function(e,n,r){var i,o;if(e&&e.preventDefault&&e.handleObj)return i=e.handleObj,b(e.delegateTarget).off(i.namespace?i.origType+"."+i.namespace:i.origType,i.selector,i.handler),this;if("object"==typeof e){for(o in e)this.off(o,n,e[o]);return this}return(n===!1||"function"==typeof n)&&(r=n,n=t),r===!1&&(r=ot),this.each(function(){b.event.remove(this,e,r,n)})},bind:function(e,t,n){return this.on(e,null,t,n)},unbind:function(e,t){return this.off(e,null,t)},delegate:function(e,t,n,r){return this.on(t,e,n,r)},undelegate:function(e,t,n){return 1===arguments.length?this.off(e,"**"):this.off(t,e||"**",n)},trigger:function(e,t){return this.each(function(){b.event.trigger(e,t,this)})},triggerHandler:function(e,n){var r=this[0];return r?b.event.trigger(e,n,r,!0):t}}),function(e,t){var n,r,i,o,a,s,u,l,c,p,f,d,h,g,m,y,v,x="sizzle"+-new Date,w=e.document,T={},N=0,C=0,k=it(),E=it(),S=it(),A=typeof t,j=1<<31,D=[],L=D.pop,H=D.push,q=D.slice,M=D.indexOf||function(e){var t=0,n=this.length;for(;n>t;t++)if(this[t]===e)return t;return-1},_="[\\x20\\t\\r\\n\\f]",F="(?:\\\\.|[\\w-]|[^\\x00-\\xa0])+",O=F.replace("w","w#"),B="([*^$|!~]?=)",P="\\["+_+"*("+F+")"+_+"*(?:"+B+_+"*(?:(['\"])((?:\\\\.|[^\\\\])*?)\\3|("+O+")|)|)"+_+"*\\]",R=":("+F+")(?:\\(((['\"])((?:\\\\.|[^\\\\])*?)\\3|((?:\\\\.|[^\\\\()[\\]]|"+P.replace(3,8)+")*)|.*)\\)|)",W=RegExp("^"+_+"+|((?:^|[^\\\\])(?:\\\\.)*)"+_+"+$","g"),$=RegExp("^"+_+"*,"+_+"*"),I=RegExp("^"+_+"*([\\x20\\t\\r\\n\\f>+~])"+_+"*"),z=RegExp(R),X=RegExp("^"+O+"$"),U={ID:RegExp("^#("+F+")"),CLASS:RegExp("^\\.("+F+")"),NAME:RegExp("^\\[name=['\"]?("+F+")['\"]?\\]"),TAG:RegExp("^("+F.replace("w","w*")+")"),ATTR:RegExp("^"+P),PSEUDO:RegExp("^"+R),CHILD:RegExp("^:(only|first|last|nth|nth-last)-(child|of-type)(?:\\("+_+"*(even|odd|(([+-]|)(\\d*)n|)"+_+"*(?:([+-]|)"+_+"*(\\d+)|))"+_+"*\\)|)","i"),needsContext:RegExp("^"+_+"*[>+~]|:(even|odd|eq|gt|lt|nth|first|last)(?:\\("+_+"*((?:-\\d)?\\d*)"+_+"*\\)|)(?=[^-]|$)","i")},V=/[\x20\t\r\n\f]*[+~]/,Y=/^[^{]+\{\s*\[native code/,J=/^(?:#([\w-]+)|(\w+)|\.([\w-]+))$/,G=/^(?:input|select|textarea|button)$/i,Q=/^h\d$/i,K=/'|\\/g,Z=/\=[\x20\t\r\n\f]*([^'"\]]*)[\x20\t\r\n\f]*\]/g,et=/\\([\da-fA-F]{1,6}[\x20\t\r\n\f]?|.)/g,tt=function(e,t){var n="0x"+t-65536;return n!==n?t:0>n?String.fromCharCode(n+65536):String.fromCharCode(55296|n>>10,56320|1023&n)};try{q.call(w.documentElement.childNodes,0)[0].nodeType}catch(nt){q=function(e){var t,n=[];while(t=this[e++])n.push(t);return n}}function rt(e){return Y.test(e+"")}function it(){var e,t=[];return e=function(n,r){return t.push(n+=" ")>i.cacheLength&&delete e[t.shift()],e[n]=r}}function ot(e){return e[x]=!0,e}function at(e){var t=p.createElement("div");try{return e(t)}catch(n){return!1}finally{t=null}}function st(e,t,n,r){var i,o,a,s,u,l,f,g,m,v;if((t?t.ownerDocument||t:w)!==p&&c(t),t=t||p,n=n||[],!e||"string"!=typeof e)return n;if(1!==(s=t.nodeType)&&9!==s)return[];if(!d&&!r){if(i=J.exec(e))if(a=i[1]){if(9===s){if(o=t.getElementById(a),!o||!o.parentNode)return n;if(o.id===a)return n.push(o),n}else if(t.ownerDocument&&(o=t.ownerDocument.getElementById(a))&&y(t,o)&&o.id===a)return n.push(o),n}else{if(i[2])return H.apply(n,q.call(t.getElementsByTagName(e),0)),n;if((a=i[3])&&T.getByClassName&&t.getElementsByClassName)return H.apply(n,q.call(t.getElementsByClassName(a),0)),n}if(T.qsa&&!h.test(e)){if(f=!0,g=x,m=t,v=9===s&&e,1===s&&"object"!==t.nodeName.toLowerCase()){l=ft(e),(f=t.getAttribute("id"))?g=f.replace(K,"\\$&"):t.setAttribute("id",g),g="[id='"+g+"'] ",u=l.length;while(u--)l[u]=g+dt(l[u]);m=V.test(e)&&t.parentNode||t,v=l.join(",")}if(v)try{return H.apply(n,q.call(m.querySelectorAll(v),0)),n}catch(b){}finally{f||t.removeAttribute("id")}}}return wt(e.replace(W,"$1"),t,n,r)}a=st.isXML=function(e){var t=e&&(e.ownerDocument||e).documentElement;return t?"HTML"!==t.nodeName:!1},c=st.setDocument=function(e){var n=e?e.ownerDocument||e:w;return n!==p&&9===n.nodeType&&n.documentElement?(p=n,f=n.documentElement,d=a(n),T.tagNameNoComments=at(function(e){return e.appendChild(n.createComment("")),!e.getElementsByTagName("*").length}),T.attributes=at(function(e){e.innerHTML="";var t=typeof e.lastChild.getAttribute("multiple");return"boolean"!==t&&"string"!==t}),T.getByClassName=at(function(e){return e.innerHTML="",e.getElementsByClassName&&e.getElementsByClassName("e").length?(e.lastChild.className="e",2===e.getElementsByClassName("e").length):!1}),T.getByName=at(function(e){e.id=x+0,e.innerHTML="
",f.insertBefore(e,f.firstChild);var t=n.getElementsByName&&n.getElementsByName(x).length===2+n.getElementsByName(x+0).length;return T.getIdNotName=!n.getElementById(x),f.removeChild(e),t}),i.attrHandle=at(function(e){return e.innerHTML="",e.firstChild&&typeof e.firstChild.getAttribute!==A&&"#"===e.firstChild.getAttribute("href")})?{}:{href:function(e){return e.getAttribute("href",2)},type:function(e){return e.getAttribute("type")}},T.getIdNotName?(i.find.ID=function(e,t){if(typeof t.getElementById!==A&&!d){var n=t.getElementById(e);return n&&n.parentNode?[n]:[]}},i.filter.ID=function(e){var t=e.replace(et,tt);return function(e){return e.getAttribute("id")===t}}):(i.find.ID=function(e,n){if(typeof n.getElementById!==A&&!d){var r=n.getElementById(e);return r?r.id===e||typeof r.getAttributeNode!==A&&r.getAttributeNode("id").value===e?[r]:t:[]}},i.filter.ID=function(e){var t=e.replace(et,tt);return function(e){var n=typeof e.getAttributeNode!==A&&e.getAttributeNode("id");return n&&n.value===t}}),i.find.TAG=T.tagNameNoComments?function(e,n){return typeof n.getElementsByTagName!==A?n.getElementsByTagName(e):t}:function(e,t){var n,r=[],i=0,o=t.getElementsByTagName(e);if("*"===e){while(n=o[i++])1===n.nodeType&&r.push(n);return r}return o},i.find.NAME=T.getByName&&function(e,n){return typeof n.getElementsByName!==A?n.getElementsByName(name):t},i.find.CLASS=T.getByClassName&&function(e,n){return typeof n.getElementsByClassName===A||d?t:n.getElementsByClassName(e)},g=[],h=[":focus"],(T.qsa=rt(n.querySelectorAll))&&(at(function(e){e.innerHTML="",e.querySelectorAll("[selected]").length||h.push("\\["+_+"*(?:checked|disabled|ismap|multiple|readonly|selected|value)"),e.querySelectorAll(":checked").length||h.push(":checked")}),at(function(e){e.innerHTML="",e.querySelectorAll("[i^='']").length&&h.push("[*^$]="+_+"*(?:\"\"|'')"),e.querySelectorAll(":enabled").length||h.push(":enabled",":disabled"),e.querySelectorAll("*,:x"),h.push(",.*:")})),(T.matchesSelector=rt(m=f.matchesSelector||f.mozMatchesSelector||f.webkitMatchesSelector||f.oMatchesSelector||f.msMatchesSelector))&&at(function(e){T.disconnectedMatch=m.call(e,"div"),m.call(e,"[s!='']:x"),g.push("!=",R)}),h=RegExp(h.join("|")),g=RegExp(g.join("|")),y=rt(f.contains)||f.compareDocumentPosition?function(e,t){var n=9===e.nodeType?e.documentElement:e,r=t&&t.parentNode;return e===r||!(!r||1!==r.nodeType||!(n.contains?n.contains(r):e.compareDocumentPosition&&16&e.compareDocumentPosition(r)))}:function(e,t){if(t)while(t=t.parentNode)if(t===e)return!0;return!1},v=f.compareDocumentPosition?function(e,t){var r;return e===t?(u=!0,0):(r=t.compareDocumentPosition&&e.compareDocumentPosition&&e.compareDocumentPosition(t))?1&r||e.parentNode&&11===e.parentNode.nodeType?e===n||y(w,e)?-1:t===n||y(w,t)?1:0:4&r?-1:1:e.compareDocumentPosition?-1:1}:function(e,t){var r,i=0,o=e.parentNode,a=t.parentNode,s=[e],l=[t];if(e===t)return u=!0,0;if(!o||!a)return e===n?-1:t===n?1:o?-1:a?1:0;if(o===a)return ut(e,t);r=e;while(r=r.parentNode)s.unshift(r);r=t;while(r=r.parentNode)l.unshift(r);while(s[i]===l[i])i++;return i?ut(s[i],l[i]):s[i]===w?-1:l[i]===w?1:0},u=!1,[0,0].sort(v),T.detectDuplicates=u,p):p},st.matches=function(e,t){return st(e,null,null,t)},st.matchesSelector=function(e,t){if((e.ownerDocument||e)!==p&&c(e),t=t.replace(Z,"='$1']"),!(!T.matchesSelector||d||g&&g.test(t)||h.test(t)))try{var n=m.call(e,t);if(n||T.disconnectedMatch||e.document&&11!==e.document.nodeType)return n}catch(r){}return st(t,p,null,[e]).length>0},st.contains=function(e,t){return(e.ownerDocument||e)!==p&&c(e),y(e,t)},st.attr=function(e,t){var n;return(e.ownerDocument||e)!==p&&c(e),d||(t=t.toLowerCase()),(n=i.attrHandle[t])?n(e):d||T.attributes?e.getAttribute(t):((n=e.getAttributeNode(t))||e.getAttribute(t))&&e[t]===!0?t:n&&n.specified?n.value:null},st.error=function(e){throw Error("Syntax error, unrecognized expression: "+e)},st.uniqueSort=function(e){var t,n=[],r=1,i=0;if(u=!T.detectDuplicates,e.sort(v),u){for(;t=e[r];r++)t===e[r-1]&&(i=n.push(r));while(i--)e.splice(n[i],1)}return e};function ut(e,t){var n=t&&e,r=n&&(~t.sourceIndex||j)-(~e.sourceIndex||j);if(r)return r;if(n)while(n=n.nextSibling)if(n===t)return-1;return e?1:-1}function lt(e){return function(t){var n=t.nodeName.toLowerCase();return"input"===n&&t.type===e}}function ct(e){return function(t){var n=t.nodeName.toLowerCase();return("input"===n||"button"===n)&&t.type===e}}function pt(e){return ot(function(t){return t=+t,ot(function(n,r){var i,o=e([],n.length,t),a=o.length;while(a--)n[i=o[a]]&&(n[i]=!(r[i]=n[i]))})})}o=st.getText=function(e){var t,n="",r=0,i=e.nodeType;if(i){if(1===i||9===i||11===i){if("string"==typeof e.textContent)return e.textContent;for(e=e.firstChild;e;e=e.nextSibling)n+=o(e)}else if(3===i||4===i)return e.nodeValue}else for(;t=e[r];r++)n+=o(t);return n},i=st.selectors={cacheLength:50,createPseudo:ot,match:U,find:{},relative:{">":{dir:"parentNode",first:!0}," ":{dir:"parentNode"},"+":{dir:"previousSibling",first:!0},"~":{dir:"previousSibling"}},preFilter:{ATTR:function(e){return e[1]=e[1].replace(et,tt),e[3]=(e[4]||e[5]||"").replace(et,tt),"~="===e[2]&&(e[3]=" "+e[3]+" "),e.slice(0,4)},CHILD:function(e){return e[1]=e[1].toLowerCase(),"nth"===e[1].slice(0,3)?(e[3]||st.error(e[0]),e[4]=+(e[4]?e[5]+(e[6]||1):2*("even"===e[3]||"odd"===e[3])),e[5]=+(e[7]+e[8]||"odd"===e[3])):e[3]&&st.error(e[0]),e},PSEUDO:function(e){var t,n=!e[5]&&e[2];return U.CHILD.test(e[0])?null:(e[4]?e[2]=e[4]:n&&z.test(n)&&(t=ft(n,!0))&&(t=n.indexOf(")",n.length-t)-n.length)&&(e[0]=e[0].slice(0,t),e[2]=n.slice(0,t)),e.slice(0,3))}},filter:{TAG:function(e){return"*"===e?function(){return!0}:(e=e.replace(et,tt).toLowerCase(),function(t){return t.nodeName&&t.nodeName.toLowerCase()===e})},CLASS:function(e){var t=k[e+" "];return t||(t=RegExp("(^|"+_+")"+e+"("+_+"|$)"))&&k(e,function(e){return t.test(e.className||typeof e.getAttribute!==A&&e.getAttribute("class")||"")})},ATTR:function(e,t,n){return function(r){var i=st.attr(r,e);return null==i?"!="===t:t?(i+="","="===t?i===n:"!="===t?i!==n:"^="===t?n&&0===i.indexOf(n):"*="===t?n&&i.indexOf(n)>-1:"$="===t?n&&i.slice(-n.length)===n:"~="===t?(" "+i+" ").indexOf(n)>-1:"|="===t?i===n||i.slice(0,n.length+1)===n+"-":!1):!0}},CHILD:function(e,t,n,r,i){var o="nth"!==e.slice(0,3),a="last"!==e.slice(-4),s="of-type"===t;return 1===r&&0===i?function(e){return!!e.parentNode}:function(t,n,u){var l,c,p,f,d,h,g=o!==a?"nextSibling":"previousSibling",m=t.parentNode,y=s&&t.nodeName.toLowerCase(),v=!u&&!s;if(m){if(o){while(g){p=t;while(p=p[g])if(s?p.nodeName.toLowerCase()===y:1===p.nodeType)return!1;h=g="only"===e&&!h&&"nextSibling"}return!0}if(h=[a?m.firstChild:m.lastChild],a&&v){c=m[x]||(m[x]={}),l=c[e]||[],d=l[0]===N&&l[1],f=l[0]===N&&l[2],p=d&&m.childNodes[d];while(p=++d&&p&&p[g]||(f=d=0)||h.pop())if(1===p.nodeType&&++f&&p===t){c[e]=[N,d,f];break}}else if(v&&(l=(t[x]||(t[x]={}))[e])&&l[0]===N)f=l[1];else while(p=++d&&p&&p[g]||(f=d=0)||h.pop())if((s?p.nodeName.toLowerCase()===y:1===p.nodeType)&&++f&&(v&&((p[x]||(p[x]={}))[e]=[N,f]),p===t))break;return f-=i,f===r||0===f%r&&f/r>=0}}},PSEUDO:function(e,t){var n,r=i.pseudos[e]||i.setFilters[e.toLowerCase()]||st.error("unsupported pseudo: "+e);return r[x]?r(t):r.length>1?(n=[e,e,"",t],i.setFilters.hasOwnProperty(e.toLowerCase())?ot(function(e,n){var i,o=r(e,t),a=o.length;while(a--)i=M.call(e,o[a]),e[i]=!(n[i]=o[a])}):function(e){return r(e,0,n)}):r}},pseudos:{not:ot(function(e){var t=[],n=[],r=s(e.replace(W,"$1"));return r[x]?ot(function(e,t,n,i){var o,a=r(e,null,i,[]),s=e.length;while(s--)(o=a[s])&&(e[s]=!(t[s]=o))}):function(e,i,o){return t[0]=e,r(t,null,o,n),!n.pop()}}),has:ot(function(e){return function(t){return st(e,t).length>0}}),contains:ot(function(e){return function(t){return(t.textContent||t.innerText||o(t)).indexOf(e)>-1}}),lang:ot(function(e){return X.test(e||"")||st.error("unsupported lang: "+e),e=e.replace(et,tt).toLowerCase(),function(t){var n;do if(n=d?t.getAttribute("xml:lang")||t.getAttribute("lang"):t.lang)return n=n.toLowerCase(),n===e||0===n.indexOf(e+"-");while((t=t.parentNode)&&1===t.nodeType);return!1}}),target:function(t){var n=e.location&&e.location.hash;return n&&n.slice(1)===t.id},root:function(e){return e===f},focus:function(e){return e===p.activeElement&&(!p.hasFocus||p.hasFocus())&&!!(e.type||e.href||~e.tabIndex)},enabled:function(e){return e.disabled===!1},disabled:function(e){return e.disabled===!0},checked:function(e){var t=e.nodeName.toLowerCase();return"input"===t&&!!e.checked||"option"===t&&!!e.selected},selected:function(e){return e.parentNode&&e.parentNode.selectedIndex,e.selected===!0},empty:function(e){for(e=e.firstChild;e;e=e.nextSibling)if(e.nodeName>"@"||3===e.nodeType||4===e.nodeType)return!1;return!0},parent:function(e){return!i.pseudos.empty(e)},header:function(e){return Q.test(e.nodeName)},input:function(e){return G.test(e.nodeName)},button:function(e){var t=e.nodeName.toLowerCase();return"input"===t&&"button"===e.type||"button"===t},text:function(e){var t;return"input"===e.nodeName.toLowerCase()&&"text"===e.type&&(null==(t=e.getAttribute("type"))||t.toLowerCase()===e.type)},first:pt(function(){return[0]}),last:pt(function(e,t){return[t-1]}),eq:pt(function(e,t,n){return[0>n?n+t:n]}),even:pt(function(e,t){var n=0;for(;t>n;n+=2)e.push(n);return e}),odd:pt(function(e,t){var n=1;for(;t>n;n+=2)e.push(n);return e}),lt:pt(function(e,t,n){var r=0>n?n+t:n;for(;--r>=0;)e.push(r);return e}),gt:pt(function(e,t,n){var r=0>n?n+t:n;for(;t>++r;)e.push(r);return e})}};for(n in{radio:!0,checkbox:!0,file:!0,password:!0,image:!0})i.pseudos[n]=lt(n);for(n in{submit:!0,reset:!0})i.pseudos[n]=ct(n);function ft(e,t){var n,r,o,a,s,u,l,c=E[e+" "];if(c)return t?0:c.slice(0);s=e,u=[],l=i.preFilter;while(s){(!n||(r=$.exec(s)))&&(r&&(s=s.slice(r[0].length)||s),u.push(o=[])),n=!1,(r=I.exec(s))&&(n=r.shift(),o.push({value:n,type:r[0].replace(W," ")}),s=s.slice(n.length));for(a in i.filter)!(r=U[a].exec(s))||l[a]&&!(r=l[a](r))||(n=r.shift(),o.push({value:n,type:a,matches:r}),s=s.slice(n.length));if(!n)break}return t?s.length:s?st.error(e):E(e,u).slice(0)}function dt(e){var t=0,n=e.length,r="";for(;n>t;t++)r+=e[t].value;return r}function ht(e,t,n){var i=t.dir,o=n&&"parentNode"===i,a=C++;return t.first?function(t,n,r){while(t=t[i])if(1===t.nodeType||o)return e(t,n,r)}:function(t,n,s){var u,l,c,p=N+" "+a;if(s){while(t=t[i])if((1===t.nodeType||o)&&e(t,n,s))return!0}else while(t=t[i])if(1===t.nodeType||o)if(c=t[x]||(t[x]={}),(l=c[i])&&l[0]===p){if((u=l[1])===!0||u===r)return u===!0}else if(l=c[i]=[p],l[1]=e(t,n,s)||r,l[1]===!0)return!0}}function gt(e){return e.length>1?function(t,n,r){var i=e.length;while(i--)if(!e[i](t,n,r))return!1;return!0}:e[0]}function mt(e,t,n,r,i){var o,a=[],s=0,u=e.length,l=null!=t;for(;u>s;s++)(o=e[s])&&(!n||n(o,r,i))&&(a.push(o),l&&t.push(s));return a}function yt(e,t,n,r,i,o){return r&&!r[x]&&(r=yt(r)),i&&!i[x]&&(i=yt(i,o)),ot(function(o,a,s,u){var l,c,p,f=[],d=[],h=a.length,g=o||xt(t||"*",s.nodeType?[s]:s,[]),m=!e||!o&&t?g:mt(g,f,e,s,u),y=n?i||(o?e:h||r)?[]:a:m;if(n&&n(m,y,s,u),r){l=mt(y,d),r(l,[],s,u),c=l.length;while(c--)(p=l[c])&&(y[d[c]]=!(m[d[c]]=p))}if(o){if(i||e){if(i){l=[],c=y.length;while(c--)(p=y[c])&&l.push(m[c]=p);i(null,y=[],l,u)}c=y.length;while(c--)(p=y[c])&&(l=i?M.call(o,p):f[c])>-1&&(o[l]=!(a[l]=p))}}else y=mt(y===a?y.splice(h,y.length):y),i?i(null,a,y,u):H.apply(a,y)})}function vt(e){var t,n,r,o=e.length,a=i.relative[e[0].type],s=a||i.relative[" "],u=a?1:0,c=ht(function(e){return e===t},s,!0),p=ht(function(e){return M.call(t,e)>-1},s,!0),f=[function(e,n,r){return!a&&(r||n!==l)||((t=n).nodeType?c(e,n,r):p(e,n,r))}];for(;o>u;u++)if(n=i.relative[e[u].type])f=[ht(gt(f),n)];else{if(n=i.filter[e[u].type].apply(null,e[u].matches),n[x]){for(r=++u;o>r;r++)if(i.relative[e[r].type])break;return yt(u>1&>(f),u>1&&dt(e.slice(0,u-1)).replace(W,"$1"),n,r>u&&vt(e.slice(u,r)),o>r&&vt(e=e.slice(r)),o>r&&dt(e))}f.push(n)}return gt(f)}function bt(e,t){var n=0,o=t.length>0,a=e.length>0,s=function(s,u,c,f,d){var h,g,m,y=[],v=0,b="0",x=s&&[],w=null!=d,T=l,C=s||a&&i.find.TAG("*",d&&u.parentNode||u),k=N+=null==T?1:Math.random()||.1;for(w&&(l=u!==p&&u,r=n);null!=(h=C[b]);b++){if(a&&h){g=0;while(m=e[g++])if(m(h,u,c)){f.push(h);break}w&&(N=k,r=++n)}o&&((h=!m&&h)&&v--,s&&x.push(h))}if(v+=b,o&&b!==v){g=0;while(m=t[g++])m(x,y,u,c);if(s){if(v>0)while(b--)x[b]||y[b]||(y[b]=L.call(f));y=mt(y)}H.apply(f,y),w&&!s&&y.length>0&&v+t.length>1&&st.uniqueSort(f)}return w&&(N=k,l=T),x};return o?ot(s):s}s=st.compile=function(e,t){var n,r=[],i=[],o=S[e+" "];if(!o){t||(t=ft(e)),n=t.length;while(n--)o=vt(t[n]),o[x]?r.push(o):i.push(o);o=S(e,bt(i,r))}return o};function xt(e,t,n){var r=0,i=t.length;for(;i>r;r++)st(e,t[r],n);return n}function wt(e,t,n,r){var o,a,u,l,c,p=ft(e);if(!r&&1===p.length){if(a=p[0]=p[0].slice(0),a.length>2&&"ID"===(u=a[0]).type&&9===t.nodeType&&!d&&i.relative[a[1].type]){if(t=i.find.ID(u.matches[0].replace(et,tt),t)[0],!t)return n;e=e.slice(a.shift().value.length)}o=U.needsContext.test(e)?0:a.length;while(o--){if(u=a[o],i.relative[l=u.type])break;if((c=i.find[l])&&(r=c(u.matches[0].replace(et,tt),V.test(a[0].type)&&t.parentNode||t))){if(a.splice(o,1),e=r.length&&dt(a),!e)return H.apply(n,q.call(r,0)),n;break}}}return s(e,p)(r,t,d,n,V.test(e)),n}i.pseudos.nth=i.pseudos.eq;function Tt(){}i.filters=Tt.prototype=i.pseudos,i.setFilters=new Tt,c(),st.attr=b.attr,b.find=st,b.expr=st.selectors,b.expr[":"]=b.expr.pseudos,b.unique=st.uniqueSort,b.text=st.getText,b.isXMLDoc=st.isXML,b.contains=st.contains}(e);var at=/Until$/,st=/^(?:parents|prev(?:Until|All))/,ut=/^.[^:#\[\.,]*$/,lt=b.expr.match.needsContext,ct={children:!0,contents:!0,next:!0,prev:!0};b.fn.extend({find:function(e){var t,n,r,i=this.length;if("string"!=typeof e)return r=this,this.pushStack(b(e).filter(function(){for(t=0;i>t;t++)if(b.contains(r[t],this))return!0}));for(n=[],t=0;i>t;t++)b.find(e,this[t],n);return n=this.pushStack(i>1?b.unique(n):n),n.selector=(this.selector?this.selector+" ":"")+e,n},has:function(e){var t,n=b(e,this),r=n.length;return this.filter(function(){for(t=0;r>t;t++)if(b.contains(this,n[t]))return!0})},not:function(e){return this.pushStack(ft(this,e,!1))},filter:function(e){return this.pushStack(ft(this,e,!0))},is:function(e){return!!e&&("string"==typeof e?lt.test(e)?b(e,this.context).index(this[0])>=0:b.filter(e,this).length>0:this.filter(e).length>0)},closest:function(e,t){var n,r=0,i=this.length,o=[],a=lt.test(e)||"string"!=typeof e?b(e,t||this.context):0;for(;i>r;r++){n=this[r];while(n&&n.ownerDocument&&n!==t&&11!==n.nodeType){if(a?a.index(n)>-1:b.find.matchesSelector(n,e)){o.push(n);break}n=n.parentNode}}return this.pushStack(o.length>1?b.unique(o):o)},index:function(e){return e?"string"==typeof e?b.inArray(this[0],b(e)):b.inArray(e.jquery?e[0]:e,this):this[0]&&this[0].parentNode?this.first().prevAll().length:-1},add:function(e,t){var n="string"==typeof e?b(e,t):b.makeArray(e&&e.nodeType?[e]:e),r=b.merge(this.get(),n);return this.pushStack(b.unique(r))},addBack:function(e){return this.add(null==e?this.prevObject:this.prevObject.filter(e))}}),b.fn.andSelf=b.fn.addBack;function pt(e,t){do e=e[t];while(e&&1!==e.nodeType);return e}b.each({parent:function(e){var t=e.parentNode;return t&&11!==t.nodeType?t:null},parents:function(e){return b.dir(e,"parentNode")},parentsUntil:function(e,t,n){return b.dir(e,"parentNode",n)},next:function(e){return pt(e,"nextSibling")},prev:function(e){return pt(e,"previousSibling")},nextAll:function(e){return b.dir(e,"nextSibling")},prevAll:function(e){return b.dir(e,"previousSibling")},nextUntil:function(e,t,n){return b.dir(e,"nextSibling",n)},prevUntil:function(e,t,n){return b.dir(e,"previousSibling",n)},siblings:function(e){return b.sibling((e.parentNode||{}).firstChild,e)},children:function(e){return b.sibling(e.firstChild)},contents:function(e){return b.nodeName(e,"iframe")?e.contentDocument||e.contentWindow.document:b.merge([],e.childNodes)}},function(e,t){b.fn[e]=function(n,r){var i=b.map(this,t,n);return at.test(e)||(r=n),r&&"string"==typeof r&&(i=b.filter(r,i)),i=this.length>1&&!ct[e]?b.unique(i):i,this.length>1&&st.test(e)&&(i=i.reverse()),this.pushStack(i)}}),b.extend({filter:function(e,t,n){return n&&(e=":not("+e+")"),1===t.length?b.find.matchesSelector(t[0],e)?[t[0]]:[]:b.find.matches(e,t)},dir:function(e,n,r){var i=[],o=e[n];while(o&&9!==o.nodeType&&(r===t||1!==o.nodeType||!b(o).is(r)))1===o.nodeType&&i.push(o),o=o[n];return i},sibling:function(e,t){var n=[];for(;e;e=e.nextSibling)1===e.nodeType&&e!==t&&n.push(e);return n}});function ft(e,t,n){if(t=t||0,b.isFunction(t))return b.grep(e,function(e,r){var i=!!t.call(e,r,e);return i===n});if(t.nodeType)return b.grep(e,function(e){return e===t===n});if("string"==typeof t){var r=b.grep(e,function(e){return 1===e.nodeType});if(ut.test(t))return b.filter(t,r,!n);t=b.filter(t,r)}return b.grep(e,function(e){return b.inArray(e,t)>=0===n})}function dt(e){var t=ht.split("|"),n=e.createDocumentFragment();if(n.createElement)while(t.length)n.createElement(t.pop());return n}var ht="abbr|article|aside|audio|bdi|canvas|data|datalist|details|figcaption|figure|footer|header|hgroup|mark|meter|nav|output|progress|section|summary|time|video",gt=/ jQuery\d+="(?:null|\d+)"/g,mt=RegExp("<(?:"+ht+")[\\s/>]","i"),yt=/^\s+/,vt=/<(?!area|br|col|embed|hr|img|input|link|meta|param)(([\w:]+)[^>]*)\/>/gi,bt=/<([\w:]+)/,xt=/\s*$/g,At={option:[1,""],legend:[1,"
","
"],area:[1,"",""],param:[1,"",""],thead:[1,"","
"],tr:[2,"","
"],col:[2,"","
"],td:[3,"","
"],_default:b.support.htmlSerialize?[0,"",""]:[1,"X
","
"]},jt=dt(o),Dt=jt.appendChild(o.createElement("div"));At.optgroup=At.option,At.tbody=At.tfoot=At.colgroup=At.caption=At.thead,At.th=At.td,b.fn.extend({text:function(e){return b.access(this,function(e){return e===t?b.text(this):this.empty().append((this[0]&&this[0].ownerDocument||o).createTextNode(e))},null,e,arguments.length)},wrapAll:function(e){if(b.isFunction(e))return this.each(function(t){b(this).wrapAll(e.call(this,t))});if(this[0]){var t=b(e,this[0].ownerDocument).eq(0).clone(!0);this[0].parentNode&&t.insertBefore(this[0]),t.map(function(){var e=this;while(e.firstChild&&1===e.firstChild.nodeType)e=e.firstChild;return e}).append(this)}return this},wrapInner:function(e){return b.isFunction(e)?this.each(function(t){b(this).wrapInner(e.call(this,t))}):this.each(function(){var t=b(this),n=t.contents();n.length?n.wrapAll(e):t.append(e)})},wrap:function(e){var t=b.isFunction(e);return this.each(function(n){b(this).wrapAll(t?e.call(this,n):e)})},unwrap:function(){return this.parent().each(function(){b.nodeName(this,"body")||b(this).replaceWith(this.childNodes)}).end()},append:function(){return this.domManip(arguments,!0,function(e){(1===this.nodeType||11===this.nodeType||9===this.nodeType)&&this.appendChild(e)})},prepend:function(){return this.domManip(arguments,!0,function(e){(1===this.nodeType||11===this.nodeType||9===this.nodeType)&&this.insertBefore(e,this.firstChild)})},before:function(){return this.domManip(arguments,!1,function(e){this.parentNode&&this.parentNode.insertBefore(e,this)})},after:function(){return this.domManip(arguments,!1,function(e){this.parentNode&&this.parentNode.insertBefore(e,this.nextSibling)})},remove:function(e,t){var n,r=0;for(;null!=(n=this[r]);r++)(!e||b.filter(e,[n]).length>0)&&(t||1!==n.nodeType||b.cleanData(Ot(n)),n.parentNode&&(t&&b.contains(n.ownerDocument,n)&&Mt(Ot(n,"script")),n.parentNode.removeChild(n)));return this},empty:function(){var e,t=0;for(;null!=(e=this[t]);t++){1===e.nodeType&&b.cleanData(Ot(e,!1));while(e.firstChild)e.removeChild(e.firstChild);e.options&&b.nodeName(e,"select")&&(e.options.length=0)}return this},clone:function(e,t){return e=null==e?!1:e,t=null==t?e:t,this.map(function(){return b.clone(this,e,t)})},html:function(e){return b.access(this,function(e){var n=this[0]||{},r=0,i=this.length;if(e===t)return 1===n.nodeType?n.innerHTML.replace(gt,""):t;if(!("string"!=typeof e||Tt.test(e)||!b.support.htmlSerialize&&mt.test(e)||!b.support.leadingWhitespace&&yt.test(e)||At[(bt.exec(e)||["",""])[1].toLowerCase()])){e=e.replace(vt,"<$1>");try{for(;i>r;r++)n=this[r]||{},1===n.nodeType&&(b.cleanData(Ot(n,!1)),n.innerHTML=e);n=0}catch(o){}}n&&this.empty().append(e)},null,e,arguments.length)},replaceWith:function(e){var t=b.isFunction(e);return t||"string"==typeof e||(e=b(e).not(this).detach()),this.domManip([e],!0,function(e){var t=this.nextSibling,n=this.parentNode;n&&(b(this).remove(),n.insertBefore(e,t))})},detach:function(e){return this.remove(e,!0)},domManip:function(e,n,r){e=f.apply([],e);var i,o,a,s,u,l,c=0,p=this.length,d=this,h=p-1,g=e[0],m=b.isFunction(g);if(m||!(1>=p||"string"!=typeof g||b.support.checkClone)&&Ct.test(g))return this.each(function(i){var o=d.eq(i);m&&(e[0]=g.call(this,i,n?o.html():t)),o.domManip(e,n,r)});if(p&&(l=b.buildFragment(e,this[0].ownerDocument,!1,this),i=l.firstChild,1===l.childNodes.length&&(l=i),i)){for(n=n&&b.nodeName(i,"tr"),s=b.map(Ot(l,"script"),Ht),a=s.length;p>c;c++)o=l,c!==h&&(o=b.clone(o,!0,!0),a&&b.merge(s,Ot(o,"script"))),r.call(n&&b.nodeName(this[c],"table")?Lt(this[c],"tbody"):this[c],o,c);if(a)for(u=s[s.length-1].ownerDocument,b.map(s,qt),c=0;a>c;c++)o=s[c],kt.test(o.type||"")&&!b._data(o,"globalEval")&&b.contains(u,o)&&(o.src?b.ajax({url:o.src,type:"GET",dataType:"script",async:!1,global:!1,"throws":!0}):b.globalEval((o.text||o.textContent||o.innerHTML||"").replace(St,"")));l=i=null}return this}});function Lt(e,t){return e.getElementsByTagName(t)[0]||e.appendChild(e.ownerDocument.createElement(t))}function Ht(e){var t=e.getAttributeNode("type");return e.type=(t&&t.specified)+"/"+e.type,e}function qt(e){var t=Et.exec(e.type);return t?e.type=t[1]:e.removeAttribute("type"),e}function Mt(e,t){var n,r=0;for(;null!=(n=e[r]);r++)b._data(n,"globalEval",!t||b._data(t[r],"globalEval"))}function _t(e,t){if(1===t.nodeType&&b.hasData(e)){var n,r,i,o=b._data(e),a=b._data(t,o),s=o.events;if(s){delete a.handle,a.events={};for(n in s)for(r=0,i=s[n].length;i>r;r++)b.event.add(t,n,s[n][r])}a.data&&(a.data=b.extend({},a.data))}}function Ft(e,t){var n,r,i;if(1===t.nodeType){if(n=t.nodeName.toLowerCase(),!b.support.noCloneEvent&&t[b.expando]){i=b._data(t);for(r in i.events)b.removeEvent(t,r,i.handle);t.removeAttribute(b.expando)}"script"===n&&t.text!==e.text?(Ht(t).text=e.text,qt(t)):"object"===n?(t.parentNode&&(t.outerHTML=e.outerHTML),b.support.html5Clone&&e.innerHTML&&!b.trim(t.innerHTML)&&(t.innerHTML=e.innerHTML)):"input"===n&&Nt.test(e.type)?(t.defaultChecked=t.checked=e.checked,t.value!==e.value&&(t.value=e.value)):"option"===n?t.defaultSelected=t.selected=e.defaultSelected:("input"===n||"textarea"===n)&&(t.defaultValue=e.defaultValue)}}b.each({appendTo:"append",prependTo:"prepend",insertBefore:"before",insertAfter:"after",replaceAll:"replaceWith"},function(e,t){b.fn[e]=function(e){var n,r=0,i=[],o=b(e),a=o.length-1;for(;a>=r;r++)n=r===a?this:this.clone(!0),b(o[r])[t](n),d.apply(i,n.get());return this.pushStack(i)}});function Ot(e,n){var r,o,a=0,s=typeof e.getElementsByTagName!==i?e.getElementsByTagName(n||"*"):typeof e.querySelectorAll!==i?e.querySelectorAll(n||"*"):t;if(!s)for(s=[],r=e.childNodes||e;null!=(o=r[a]);a++)!n||b.nodeName(o,n)?s.push(o):b.merge(s,Ot(o,n));return n===t||n&&b.nodeName(e,n)?b.merge([e],s):s}function Bt(e){Nt.test(e.type)&&(e.defaultChecked=e.checked)}b.extend({clone:function(e,t,n){var r,i,o,a,s,u=b.contains(e.ownerDocument,e);if(b.support.html5Clone||b.isXMLDoc(e)||!mt.test("<"+e.nodeName+">")?o=e.cloneNode(!0):(Dt.innerHTML=e.outerHTML,Dt.removeChild(o=Dt.firstChild)),!(b.support.noCloneEvent&&b.support.noCloneChecked||1!==e.nodeType&&11!==e.nodeType||b.isXMLDoc(e)))for(r=Ot(o),s=Ot(e),a=0;null!=(i=s[a]);++a)r[a]&&Ft(i,r[a]);if(t)if(n)for(s=s||Ot(e),r=r||Ot(o),a=0;null!=(i=s[a]);a++)_t(i,r[a]);else _t(e,o);return r=Ot(o,"script"),r.length>0&&Mt(r,!u&&Ot(e,"script")),r=s=i=null,o},buildFragment:function(e,t,n,r){var i,o,a,s,u,l,c,p=e.length,f=dt(t),d=[],h=0;for(;p>h;h++)if(o=e[h],o||0===o)if("object"===b.type(o))b.merge(d,o.nodeType?[o]:o);else if(wt.test(o)){s=s||f.appendChild(t.createElement("div")),u=(bt.exec(o)||["",""])[1].toLowerCase(),c=At[u]||At._default,s.innerHTML=c[1]+o.replace(vt,"<$1>")+c[2],i=c[0];while(i--)s=s.lastChild;if(!b.support.leadingWhitespace&&yt.test(o)&&d.push(t.createTextNode(yt.exec(o)[0])),!b.support.tbody){o="table"!==u||xt.test(o)?""!==c[1]||xt.test(o)?0:s:s.firstChild,i=o&&o.childNodes.length;while(i--)b.nodeName(l=o.childNodes[i],"tbody")&&!l.childNodes.length&&o.removeChild(l) -}b.merge(d,s.childNodes),s.textContent="";while(s.firstChild)s.removeChild(s.firstChild);s=f.lastChild}else d.push(t.createTextNode(o));s&&f.removeChild(s),b.support.appendChecked||b.grep(Ot(d,"input"),Bt),h=0;while(o=d[h++])if((!r||-1===b.inArray(o,r))&&(a=b.contains(o.ownerDocument,o),s=Ot(f.appendChild(o),"script"),a&&Mt(s),n)){i=0;while(o=s[i++])kt.test(o.type||"")&&n.push(o)}return s=null,f},cleanData:function(e,t){var n,r,o,a,s=0,u=b.expando,l=b.cache,p=b.support.deleteExpando,f=b.event.special;for(;null!=(n=e[s]);s++)if((t||b.acceptData(n))&&(o=n[u],a=o&&l[o])){if(a.events)for(r in a.events)f[r]?b.event.remove(n,r):b.removeEvent(n,r,a.handle);l[o]&&(delete l[o],p?delete n[u]:typeof n.removeAttribute!==i?n.removeAttribute(u):n[u]=null,c.push(o))}}});var Pt,Rt,Wt,$t=/alpha\([^)]*\)/i,It=/opacity\s*=\s*([^)]*)/,zt=/^(top|right|bottom|left)$/,Xt=/^(none|table(?!-c[ea]).+)/,Ut=/^margin/,Vt=RegExp("^("+x+")(.*)$","i"),Yt=RegExp("^("+x+")(?!px)[a-z%]+$","i"),Jt=RegExp("^([+-])=("+x+")","i"),Gt={BODY:"block"},Qt={position:"absolute",visibility:"hidden",display:"block"},Kt={letterSpacing:0,fontWeight:400},Zt=["Top","Right","Bottom","Left"],en=["Webkit","O","Moz","ms"];function tn(e,t){if(t in e)return t;var n=t.charAt(0).toUpperCase()+t.slice(1),r=t,i=en.length;while(i--)if(t=en[i]+n,t in e)return t;return r}function nn(e,t){return e=t||e,"none"===b.css(e,"display")||!b.contains(e.ownerDocument,e)}function rn(e,t){var n,r,i,o=[],a=0,s=e.length;for(;s>a;a++)r=e[a],r.style&&(o[a]=b._data(r,"olddisplay"),n=r.style.display,t?(o[a]||"none"!==n||(r.style.display=""),""===r.style.display&&nn(r)&&(o[a]=b._data(r,"olddisplay",un(r.nodeName)))):o[a]||(i=nn(r),(n&&"none"!==n||!i)&&b._data(r,"olddisplay",i?n:b.css(r,"display"))));for(a=0;s>a;a++)r=e[a],r.style&&(t&&"none"!==r.style.display&&""!==r.style.display||(r.style.display=t?o[a]||"":"none"));return e}b.fn.extend({css:function(e,n){return b.access(this,function(e,n,r){var i,o,a={},s=0;if(b.isArray(n)){for(o=Rt(e),i=n.length;i>s;s++)a[n[s]]=b.css(e,n[s],!1,o);return a}return r!==t?b.style(e,n,r):b.css(e,n)},e,n,arguments.length>1)},show:function(){return rn(this,!0)},hide:function(){return rn(this)},toggle:function(e){var t="boolean"==typeof e;return this.each(function(){(t?e:nn(this))?b(this).show():b(this).hide()})}}),b.extend({cssHooks:{opacity:{get:function(e,t){if(t){var n=Wt(e,"opacity");return""===n?"1":n}}}},cssNumber:{columnCount:!0,fillOpacity:!0,fontWeight:!0,lineHeight:!0,opacity:!0,orphans:!0,widows:!0,zIndex:!0,zoom:!0},cssProps:{"float":b.support.cssFloat?"cssFloat":"styleFloat"},style:function(e,n,r,i){if(e&&3!==e.nodeType&&8!==e.nodeType&&e.style){var o,a,s,u=b.camelCase(n),l=e.style;if(n=b.cssProps[u]||(b.cssProps[u]=tn(l,u)),s=b.cssHooks[n]||b.cssHooks[u],r===t)return s&&"get"in s&&(o=s.get(e,!1,i))!==t?o:l[n];if(a=typeof r,"string"===a&&(o=Jt.exec(r))&&(r=(o[1]+1)*o[2]+parseFloat(b.css(e,n)),a="number"),!(null==r||"number"===a&&isNaN(r)||("number"!==a||b.cssNumber[u]||(r+="px"),b.support.clearCloneStyle||""!==r||0!==n.indexOf("background")||(l[n]="inherit"),s&&"set"in s&&(r=s.set(e,r,i))===t)))try{l[n]=r}catch(c){}}},css:function(e,n,r,i){var o,a,s,u=b.camelCase(n);return n=b.cssProps[u]||(b.cssProps[u]=tn(e.style,u)),s=b.cssHooks[n]||b.cssHooks[u],s&&"get"in s&&(a=s.get(e,!0,r)),a===t&&(a=Wt(e,n,i)),"normal"===a&&n in Kt&&(a=Kt[n]),""===r||r?(o=parseFloat(a),r===!0||b.isNumeric(o)?o||0:a):a},swap:function(e,t,n,r){var i,o,a={};for(o in t)a[o]=e.style[o],e.style[o]=t[o];i=n.apply(e,r||[]);for(o in t)e.style[o]=a[o];return i}}),e.getComputedStyle?(Rt=function(t){return e.getComputedStyle(t,null)},Wt=function(e,n,r){var i,o,a,s=r||Rt(e),u=s?s.getPropertyValue(n)||s[n]:t,l=e.style;return s&&(""!==u||b.contains(e.ownerDocument,e)||(u=b.style(e,n)),Yt.test(u)&&Ut.test(n)&&(i=l.width,o=l.minWidth,a=l.maxWidth,l.minWidth=l.maxWidth=l.width=u,u=s.width,l.width=i,l.minWidth=o,l.maxWidth=a)),u}):o.documentElement.currentStyle&&(Rt=function(e){return e.currentStyle},Wt=function(e,n,r){var i,o,a,s=r||Rt(e),u=s?s[n]:t,l=e.style;return null==u&&l&&l[n]&&(u=l[n]),Yt.test(u)&&!zt.test(n)&&(i=l.left,o=e.runtimeStyle,a=o&&o.left,a&&(o.left=e.currentStyle.left),l.left="fontSize"===n?"1em":u,u=l.pixelLeft+"px",l.left=i,a&&(o.left=a)),""===u?"auto":u});function on(e,t,n){var r=Vt.exec(t);return r?Math.max(0,r[1]-(n||0))+(r[2]||"px"):t}function an(e,t,n,r,i){var o=n===(r?"border":"content")?4:"width"===t?1:0,a=0;for(;4>o;o+=2)"margin"===n&&(a+=b.css(e,n+Zt[o],!0,i)),r?("content"===n&&(a-=b.css(e,"padding"+Zt[o],!0,i)),"margin"!==n&&(a-=b.css(e,"border"+Zt[o]+"Width",!0,i))):(a+=b.css(e,"padding"+Zt[o],!0,i),"padding"!==n&&(a+=b.css(e,"border"+Zt[o]+"Width",!0,i)));return a}function sn(e,t,n){var r=!0,i="width"===t?e.offsetWidth:e.offsetHeight,o=Rt(e),a=b.support.boxSizing&&"border-box"===b.css(e,"boxSizing",!1,o);if(0>=i||null==i){if(i=Wt(e,t,o),(0>i||null==i)&&(i=e.style[t]),Yt.test(i))return i;r=a&&(b.support.boxSizingReliable||i===e.style[t]),i=parseFloat(i)||0}return i+an(e,t,n||(a?"border":"content"),r,o)+"px"}function un(e){var t=o,n=Gt[e];return n||(n=ln(e,t),"none"!==n&&n||(Pt=(Pt||b("