Added step documentation. Fixed typo in ROH calling document.

bihealth · Oct 14, 2021 · 2580076 · 2580076
1 parent 903e1b6
commit 2580076
Show file tree

Hide file tree

Showing 4 changed files with 87 additions and 1 deletion.
diff --git a/docs/index.rst b/docs/index.rst
@@ -52,6 +52,7 @@ Project Info
     step/ngs_mapping
     step/ngs_sanity_checking
     step/roh_calling
+    step/repeat_analysis
     step/somatic_gene_fusion_calling
     step/somatic_neoepitope_prediction
     step/somatic_ngs_sanity_checking

diff --git a/docs/step/repeat_analysis.rst b/docs/step/repeat_analysis.rst
@@ -0,0 +1,7 @@
+.. _step_repeat_analysis:
+
+==================================
+Germline Repeat Expansion Analysis
+==================================
+
+.. automodule:: snappy_pipeline.workflows.repeat_expansion
diff --git a/docs/step/roh_calling.rst b/docs/step/roh_calling.rst
@@ -1,7 +1,7 @@
 .. _step_roh_calling:
 
 ====================
-Germlien RoH Calling
+Germline RoH Calling
 ====================
 
 .. automodule:: snappy_pipeline.workflows.roh_calling

diff --git a/snappy_pipeline/workflows/repeat_expansion/__init__.py b/snappy_pipeline/workflows/repeat_expansion/__init__.py
@@ -1,4 +1,82 @@
 # -*- coding: utf-8 -*
+"""Implementation of the ``repeat_analysis`` step
+
+The ``repeat_analysis`` step takes as the input the results of the ``ngs_mapping`` step
+(aligned reads in BAM format) and performs repeat expansion analysis.  The result are variant files
+(VCF) with the repeat expansions definitions, and associated annotations (JSON).
+
+==========
+Stability
+==========
+
+This step is considered experimental, use it at your own discretion.
+
+==========
+Step Input
+==========
+
+The repeat analysis step uses Snakemake sub workflows for using the result of the ``ngs_mapping``
+step.
+
+===========
+Step Output
+===========
+
+For all samples, repeat analysis will be performed on the primary DNA NGS libraries separately for
+each configured read mapper and repeat analysis tool. The name of the primary DNA NGS library will
+be used as an identification token in the output file.
+
+For each read mapper, repeat analysis tool, and sample, the following files will be generated:
+
+- ``{mapper}.{repeat_tool}.{lib_name}.vcf``
+- ``{mapper}.{repeat_tool}.{lib_name}.vcf.md5``
+- ``{mapper}.{repeat_tool}_annotated.{lib_name}.json``
+- ``{mapper}.{repeat_tool}_annotated.{lib_name}.json.md5``
+
+For example, it might look as follows for the example from above:
+
+::
+
+    output/
+    +-- bwa.expansionhunter.P001-N1-DNA1-WES1
+    |   `-- out
+    |       |-- bwa.expansionhunter.P001-N1-DNA1-WES1.vcf
+    |       |-- bwa.expansionhunter.P001-N1-DNA1-WES1.vcf.md5
+    +-- bwa.expansionhunter_annotated.P001-N1-DNA1-WES1
+    |   `-- out
+    |       |-- bwa.expansionhunter_annotated.P001-N1-DNA1-WES1.json
+    |       |-- bwa.expansionhunter_annotated.P001-N1-DNA1-WES1.json.md5
+    [...]
+
+====================
+Global Configuration
+====================
+
+Not applicable.
+
+=====================
+Default Configuration
+=====================
+
+The default configuration is as follows:
+
+.. include:: DEFAULT_CONFIG_repeat_expansion.rst
+
+===============================
+Available Repeat Analysis Tools
+===============================
+
+The following germline repeat analysis tool is currently available:
+
+- ``"ExpansionHunter"``
+
+
+==================
+Parallel Execution
+==================
+
+Not available.
+"""
 from collections import OrderedDict
 import os