Update to version 1.1.0 (#229)

Major changes included in this update is: * adapt to new snakemake version (#208) * Speedup CI (use micromamba, #218) * Add HTML cutoff for results (#217) * Separate modules (#209) * Update documentation (#228) On top of this many small bug-fixes are included: * #175 #192 #198 #231 * Simplify result reporting Co-authored-by: Vicente Yepez <30469316+vyepez88@users.noreply.github.com> Co-authored-by: Vicente <yepez@in.tum.de> Co-authored-by: Michaela Mueller <51025211+mumichae@users.noreply.github.com> Co-authored-by: Michaela Mueller <mumichae@in.tum.de> Co-authored-by: Smith Nicholas <smith@in.tum.de> Co-authored-by: nickhsmith <smithnickh@gmail.com> Co-authored-by: Christian Mertes <mertes@in.tum.de>
gagneurlab · Jul 21, 2021 · bf2eb91 · bf2eb91
1 parent 1527d24
commit bf2eb91
Show file tree

Hide file tree

Showing 48 changed files with 621 additions and 409 deletions.
diff --git a/.github/workflows/python-package-conda.yml b/.github/workflows/python-package-conda.yml
@@ -5,33 +5,39 @@ on: [push]
 jobs:
   build-linux:
     runs-on: ubuntu-latest
-    strategy:
-      max-parallel: 5
+#    container:
+#      image: mambaorg/micromamba:latest
 
     steps:
     - uses: actions/checkout@v2
-    - name: Set up Python 3.8
-      uses: actions/setup-python@v2
-      with:
-        python-version: 3.8
-    - name: Add conda to system path
-      run: |
-        # $CONDA is an environment variable pointing to the root of the miniconda directory
-        echo $CONDA/bin >> $GITHUB_PATH
-    - name: Install dependencies
+
+    - name: Setup environment with micromamba
+      uses: mamba-org/provision-with-micromamba@v10
+
+    - name: Check micromamba install
+      shell: bash -l {0}
       run: |
-        #conda env update --file environment.yml --name base
-        conda install -c conda-forge -c bioconda drop 'r-dplyr=1.0.5' 
-        pip install .
+        micromamba --version
+        micromamba env list
+
+#    - name: Install dependencies
+#      run: mamba env update -f environment.yml
+
     - name: Lint with flake8
+      shell: bash -l {0}
       run: |
-        conda install flake8
         # stop the build if there are Python syntax errors or undefined names
-        flake8 . --count --builtins="snakemake" --select=E9,F63,F7,F82 --show-source --statistics 
+        flake8 . --count --builtins="snakemake" --select=E9,F63,F7,F82 --show-source --statistics
         # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
         flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
+
+    - name: Install DROP
+      shell: bash -l {0}
+      run: pip install -e .
+
     - name: Test with pytest
+      shell: bash -l {0}
       run: |
-        conda install pytest
+        micromamba install pytest
         pip install -r tests/requirements.txt
         pytest
diff --git a/docs/source/conf.py b/docs/source/conf.py
@@ -23,7 +23,7 @@
 author = 'Michaela Müller'
 
 # The full version, including alpha/beta/rc tags
-release_ = '1.0.5'
+release_ = '1.1.0'
 
 
 # -- General configuration ---------------------------------------------------

diff --git a/docs/source/index.rst b/docs/source/index.rst
@@ -38,7 +38,7 @@ The pipeline can be run using `snakemake <https://snakemake.readthedocs.io/>`_ c
 
 .. code-block:: bash
 
-    snakemake -n # dryrun
+    snakemake --cores 1 -n # dryrun
     snakemake --cores 1
 
 Expected runtime: 25 min

diff --git a/docs/source/installation.rst b/docs/source/installation.rst
@@ -30,7 +30,7 @@ The pipeline can be run using `snakemake <snakemake.readthedocs.io/>`_ commands
 
 .. code-block:: bash
 
-    snakemake -n # dryrun
+    snakemake --cores 1 -n # dryrun
     snakemake --cores 1
 
 Initialize a project

diff --git a/docs/source/pipeline.rst b/docs/source/pipeline.rst
@@ -10,21 +10,21 @@ Open a terminal in your project repository. Execute
 
 .. code-block:: bash
     
-    snakemake -n 
+    snakemake --cores 1 -n 
 
 This will perform a *dry-run*, which means it will display all the steps (or rules) that need to be executed. To also display the reason why those rules need to be exeucted, run 
 
 .. code-block:: bash
 
-    snakemake -nr
+    snakemake --cores 1 -nr
 
 Finally, a simplified dry-run can be achieved by executing
 
 .. code-block:: bash
 
-    snakemake -nq
+    snakemake --cores 1 -nq
     
-Calling ``snakemake`` without any parameters will execute the whole workflow. 
+Calling ``snakemake --cores 1`` without any additional parameters will execute the whole workflow. Snakemake requires you to designate the number of cores when running the ``snakemake`` command.
 
 
 Parallelizing jobs
@@ -36,8 +36,6 @@ DROP's steps are computationally heavy, therefore it is a good idea to run them
 
     snakemake --cores 10
 
-If the ``--cores`` flag is not specified, snakemake will use a single core by default.
-
 
 Executing subworkflows
 ----------------------

diff --git a/docs/source/prepare.rst b/docs/source/prepare.rst
@@ -75,6 +75,7 @@ Aberrant expression dictionary
 ============================  =========  =================================================================================================================================  ======
 Parameter                     Type       Description                                                                                                                        Default/Examples
 ============================  =========  =================================================================================================================================  ======
+run                           boolean    If true, the module will be run. If false, it will be ignored.                                                                     ``true``
 groups                        list       DROP groups that should be executed in this module. If not specified or ``null`` all groups are used.                              ``- group1``
 
                                                                                                                                                                             ``- group2``
@@ -92,6 +93,7 @@ Aberrant splicing dictionary
 ============================  =========  ============================================================================================  ======
 Parameter                     Type       Description                                                                                   Default/Examples
 ============================  =========  ============================================================================================  ======
+run                           boolean    If true, the module will be run. If false, it will be ignored.                                ``true``
 groups                        list       Same as in aberrant expression.                                                               ``# see aberrant expression example``
 minIds                        numeric    Same as in aberrant expression.                                                               ``1``
 recount                       boolean    If true, it forces samples to be recounted.                                                   ``false``
@@ -113,6 +115,7 @@ Mono-allelic expression dictionary
 =====================  =========  ========================================================================================================================  ======
 Parameter              Type       Description                                                                                                               Default/Examples
 =====================  =========  ========================================================================================================================  ======
+run                    boolean    If true, the module will be run. If false, it will be ignored.                                                            ``true``
 groups                 list       Same as in aberrant expression.                                                                                           ``# see aberrant expression example``
 gatkIgnoreHeaderCheck  boolean    If true (recommended), it ignores the header warnings of a VCF file when performing the allelic counts                    ``true``
 padjCutoff             numeric    Same as in aberrant expression.                                                                                           ``0.05``
@@ -124,6 +127,11 @@ qcVcf                  character  Full path to the vcf file used for VCF-BAM mat
 qcGroups               list       Same as “groups”, but for the VCF-BAM matching                                                                            ``# see aberrant expression example``
 =====================  =========  ========================================================================================================================  ======
 
+Modularization of DROP
+-----------------------------------
+DROP allows to control which modules to run via the  ``run`` variable in the config file. By default, each module is set to ``run: true``.  Setting this value to  ``false``  stops a particular module from being run. This will be noted as a warning at the beginning of the ``snakemake`` run, and the corresponding module will be renamed in the ``Scripts/`` directory. 
+
+For example, if the AberrantExpression module is set to false, the  ``Scripts/AberrantExpression/`` directory will be renamed to ``Scripts/_AberrantExpression/`` which tells DROP not to execute this module.
 
 Creating the sample annotation table
 ------------------------------------

diff --git a/drop/__init__.py b/drop/__init__.py
@@ -4,4 +4,4 @@
 from . import utils
 from . import demo
 
-__version__ = "1.0.5"
+__version__ = "1.1.0"
diff --git a/drop/__init__.pyc b/drop/__init__.pyc
diff --git a/drop/cli.py b/drop/cli.py
@@ -1,9 +1,8 @@
 import wbuild
 import drop
-import yaml
 from pathlib import Path
-from shutil import copy2
-from distutils.dir_util import mkpath, copy_tree, remove_tree
+from shutil import copy2, rmtree
+from distutils.dir_util import copy_tree, remove_tree
 import subprocess
 import click
 import click_log
@@ -15,73 +14,97 @@
 logger = logging.getLogger(__name__)
 click_log.basic_config(logger)
 
+
 @click.group()
 @click_log.simple_verbosity_option(logger)
-@click.version_option('1.0.5',prog_name='drop')
+@click.version_option('1.1.0',prog_name='drop')
+
 def main():
     pass
 
 
-def overwrite(base_repo,local_proj):
-    fc.clear_cache() # clear file compare cache to avoid mistakes
-    compare_obj = fc.dircmp(base_repo,local_proj)
+def overwrite(base_repo, local_proj):
+    fc.clear_cache()  # clear file compare cache to avoid mistakes
+    compare_obj = fc.dircmp(base_repo, local_proj)
 
-    #remove all things not in the base_repo
+    # remove all things not in the base_repo
     for i in compare_obj.right_only:
         logger.info(f"removing local file {i} it is not in the base drop")
         if os.path.isfile(local_proj / i):
-            removeFile(local_proj / i,warn = False)   
+            removeFile(local_proj / i, warn=False)
         else:
             remove_tree(local_proj / i)
 
     # for all dirs and files in base_dir
     for i in compare_obj.left_list:
-        #files
+        # files
         if os.path.isfile(base_repo / i):
             # filename is the same in both
             if i in compare_obj.common_files:
 
                 # if file is diff copy original over. otherwise do nothing
                 if i in compare_obj.diff_files:
                     logger.info(f"overwriting {local_proj / i} with {base_repo / i})")
-                    copy2(base_repo / i,local_proj / i)
+                    copy2(base_repo / i, local_proj / i)
 
 
             # file not present in local project. Copy it
             else:
                 logger.info(f"overwriting {local_proj / i} with {base_repo / i})")
-                copy2(base_repo / i,local_proj / i)
+                copy2(base_repo / i, local_proj / i)
 
-        #dirs
+        # dirs
         elif os.path.isdir(base_repo / i):
             if i in compare_obj.common_dirs:
                 overwrite(base_repo / i, local_proj / i)
             else:
-                logger.info(f"the directory {str(base_repo / i)} does not exist locally. copying here: {str(local_proj)}")
+                logger.info(
+                    f"the directory {str(base_repo / i)} does not exist locally. copying here: {str(local_proj)}")
                 copy_tree(str(base_repo / i), str(local_proj / i))
 
         # other?
         else:
             logger.info(i, "is something other than file or dir. Ignoring")
 
 
-def copyModuleCode(repoPaths,projectPaths):
+def copyModuleCode(repoPaths, projectPaths):
     repo_map = {
         "aberrant-expression-pipeline": "AberrantExpression",
         "aberrant-splicing-pipeline": "AberrantSplicing",
         "mae-pipeline": "MonoallelicExpression"
     }
 
+    import sys
     for repo, analysis_dir in repo_map.items():
-        fc.clear_cache() # clear file compare cache to avoid mistakes
+        fc.clear_cache()  # clear file compare cache to avoid mistakes
+
         base_repo = repoPaths["modules"] / repo
-        local_proj = projectPaths["Scripts"] / analysis_dir / "pipeline"
-        if not local_proj.is_dir(): # module directory does not exist. copy it
+        local_proj = projectPaths["Scripts"] / analysis_dir
+
+        #look for analysis_dir hidden from wbuild with "_" prefix and remove dir
+        wbuild_hidden_path = projectPaths["Scripts"] / ("_" + analysis_dir)
+
+        #if both hidden and local exist. Delete the hidden
+        if wbuild_hidden_path.is_dir() and local_proj.is_dir():
+            logger.info(f"removing the hidden wBuild path: {analysis_dir}")
+            rmtree(wbuild_hidden_path,ignore_errors=True)
+            logger.info("done")
+        # if only hidden exists. rename and run normally
+        elif wbuild_hidden_path.is_dir() and not local_proj.is_dir():
+            logger.info(f"renaming the hidden wBuild path: {analysis_dir}")
+            os.rename(wbuild_hidden_path,local_proj)
+            logger.info("done")
+
+        local_proj = local_proj / "pipeline"
+        if not local_proj.is_dir():  # module directory does not exist. copy it
             logger.info(f"{local_proj} is not a directory, copy over from drop base")
             copy_tree(str(base_repo), str(local_proj))
-        else: #module dir does exist. Do a safe-overwrite
-            overwrite(base_repo,local_proj)
-
+        else:  # module dir does exist. Do a safe-overwrite
+            logger.info(f"rewriting the module {analysis_dir} from the base DROP path")
+            overwrite(base_repo, local_proj)
+            logger.info("done")
+
+
 def removeFile(filePath, warn=True):
     filePath = Path(filePath)
     if filePath.is_file():
@@ -99,7 +122,6 @@ def setFiles(projectDir=None):
         if not path.is_dir():
             path.mkdir(parents=True)
             logger.info(f"create {str(path)}")
-
     # hidden files
     copy_tree(str(wbuildPath), str(projectPaths["projectDir"] / ".wBuild"))
     copy_tree(str(repoPaths["modules"] / "helpers"), str(projectPaths["dropDir"] / "helpers"))
@@ -109,7 +131,6 @@ def setFiles(projectDir=None):
     copy2(repoPaths["template"] / "Snakefile", projectPaths["projectDir"] / "Snakefile")
     copy_tree(str(repoPaths["Scripts"]), str(projectPaths["Scripts"]))
     copyModuleCode(repoPaths, projectPaths)
-    #copyModuleCode(repoPaths, projectPaths)
 
     config_file = projectPaths["projectDir"] / "config.yaml"
     if not config_file.is_file():
@@ -131,9 +152,8 @@ def init():
 
 @main.command()
 def update():
-    drop.checkDropVersion(Path().cwd().resolve(), force=True)
     logger.info("updating local Scripts if necessary")
-    setFiles()
+    drop.checkDropVersion(Path().cwd().resolve(), force=True)
     logger.info("update...done")
 
 

diff --git a/drop/config/DropConfig.py b/drop/config/DropConfig.py
@@ -19,12 +19,13 @@ class DropConfig:
         "aberrantExpression", "aberrantSplicing", "mae"
     ]
 
-    def __init__(self, wbuildConfig):
+    def __init__(self, wbuildConfig, workDir):
         """
         Parse wbuild/snakemake config object for DROP-specific content
         :param wbuildConfig: wBuild config object
+        :param workDir: path to project working directory
         """
-
+        self.workDir = Path(workDir)
         self.wBuildConfig = wbuildConfig
         self.config_dict = self.setDefaults(wbuildConfig.getConfig())
 
@@ -56,21 +57,23 @@ def __init__(self, wbuildConfig):
             config=self.get("aberrantExpression"),
             sampleAnnotation=self.sampleAnnotation,
             processedDataDir=self.processedDataDir,
-            processedResultsDir=self.processedResultsDir
+            processedResultsDir=self.processedResultsDir,
+            workDir=workDir
         )
 
-
         self.AS = AS(
             config=self.get("aberrantSplicing"),
             sampleAnnotation=self.sampleAnnotation,
             processedDataDir=self.processedDataDir,
-            processedResultsDir=self.processedResultsDir
+            processedResultsDir=self.processedResultsDir,
+            workDir=workDir
         )
         self.MAE = MAE(
             config=self.get("mae"),
             sampleAnnotation=self.sampleAnnotation,
             processedDataDir=self.processedDataDir,
             processedResultsDir=self.processedResultsDir,
+            workDir=workDir,
             genome=self.genome
         )
 
@@ -86,10 +89,11 @@ def __init__(self, wbuildConfig):
 
         # write sample params for each module AS not currently supported
         sampleParams = SampleParams(
-            self.AE, 
-            self.MAE, 
+            self.AE,
+            self.AS,
+            self.MAE,
             self.get("geneAnnotation"),
-            self.processedDataDir, 
+            self.processedDataDir,
             self.sampleAnnotation
         )