reformat, update PipestatManafer configuration

databio · May 4, 2021 · 7f0a682 · 7f0a682
1 parent 0916e3e
commit 7f0a682
Show file tree

Hide file tree

Showing 35 changed files with 2,570 additions and 1,653 deletions.
diff --git a/docs/conf.py b/docs/conf.py
diff --git a/example_pipelines/basic.py b/example_pipelines/basic.py
@@ -8,13 +8,13 @@
 # First, make sure you can import the pypiper package
 
 import os
+
 import pypiper
 
 # Create a PipelineManager instance (don't forget to name it!)
 # This starts the pipeline.
 
-pm = pypiper.PipelineManager(name="BASIC",
-    outfolder="pipeline_output/")
+pm = pypiper.PipelineManager(name="BASIC", outfolder="pipeline_output/")
 
 # Now just build shell command strings, and use the run function
 # to execute them in order. run needs 2 things: a command, and the
@@ -57,5 +57,5 @@
 # Now, stop the pipeline to complete gracefully.
 pm.stop_pipeline()
 
-# Observe your outputs in the pipeline_output folder 
+# Observe your outputs in the pipeline_output folder
 # to see what you've created.
diff --git a/example_pipelines/count_reads.py b/example_pipelines/count_reads.py
@@ -9,25 +9,32 @@
 __license__ = "GPL3"
 __version__ = "0.1"
 
-from argparse import ArgumentParser
-import os, re
-import sys
+import os
+import re
 import subprocess
+import sys
+from argparse import ArgumentParser
+
 import yaml
+
 import pypiper
 
 parser = ArgumentParser(
     description="A pipeline to count the number of reads and file size. Accepts"
-    " BAM, fastq, or fastq.gz files.")
+    " BAM, fastq, or fastq.gz files."
+)
 
 # First, add standard arguments from Pypiper.
 # groups="pypiper" will add all the arguments that pypiper uses,
 # and adding "common" adds arguments for --input and --sample--name
 # and "output_parent". You can read more about your options for standard
 # arguments in the pypiper docs (section "command-line arguments")
-parser = pypiper.add_pypiper_args(parser, groups=["pypiper", "common", "ngs"],
-                                    args=["output-parent", "config"],
-                                    required=['sample-name', 'output-parent'])
+parser = pypiper.add_pypiper_args(
+    parser,
+    groups=["pypiper", "common", "ngs"],
+    args=["output-parent", "config"],
+    required=["sample-name", "output-parent"],
+)
 
 # Add any pipeline-specific arguments if you like here.
 
@@ -42,16 +49,14 @@
 else:
     args.paired_end = False
 
-# args for `output_parent` and `sample_name` were added by the standard 
-# `add_pypiper_args` function. 
+# args for `output_parent` and `sample_name` were added by the standard
+# `add_pypiper_args` function.
 # A good practice is to make an output folder for each sample, housed under
 # the parent output folder, like this:
 outfolder = os.path.abspath(os.path.join(args.output_parent, args.sample_name))
 
 # Create a PipelineManager object and start the pipeline
-pm = pypiper.PipelineManager(name="count",
-                             outfolder=outfolder, 
-                             args=args)
+pm = pypiper.PipelineManager(name="count", outfolder=outfolder, args=args)
 
 # NGSTk is a "toolkit" that comes with pypiper, providing some functions
 # for dealing with genome sequence data. You can read more about toolkits in the
@@ -75,15 +80,12 @@
 # and convert these to fastq files.
 
 local_input_files = ngstk.merge_or_link(
-                        [args.input, args.input2],
-                        raw_folder,
-                        args.sample_name)
+    [args.input, args.input2], raw_folder, args.sample_name
+)
 
 cmd, out_fastq_pre, unaligned_fastq = ngstk.input_to_fastq(
-                                            local_input_files,
-                                            args.sample_name,
-                                            args.paired_end,
-                                            fastq_folder)
+    local_input_files, args.sample_name, args.paired_end, fastq_folder
+)
 
 
 # Now we'll use another NGSTk function to grab the file size from the input files
@@ -95,10 +97,17 @@
 
 n_input_files = len(list(filter(bool, local_input_files)))
 
-raw_reads = sum([int(ngstk.count_reads(input_file, args.paired_end)) 
-                for input_file in local_input_files]) / n_input_files
-
-# Finally, we use the report_result() function to print the output and 
+raw_reads = (
+    sum(
+        [
+            int(ngstk.count_reads(input_file, args.paired_end))
+            for input_file in local_input_files
+        ]
+    )
+    / n_input_files
+)
+
+# Finally, we use the report_result() function to print the output and
 # log the key-value pair in the standard stats.tsv file
 pm.report_result("Raw_reads", str(raw_reads))
 

diff --git a/example_pipelines/hello_pypiper.py b/example_pipelines/hello_pypiper.py
@@ -1,7 +1,8 @@
 #!/usr/bin/env python
 
 import pypiper
-outfolder = "hello_pypiper_results" # Choose a folder for your results
+
+outfolder = "hello_pypiper_results"  # Choose a folder for your results
 
 # Create a PipelineManager, the workhorse of pypiper
 pm = pypiper.PipelineManager(name="hello_pypiper", outfolder=outfolder)

diff --git a/example_pipelines/logmuse_example.py b/example_pipelines/logmuse_example.py
@@ -9,52 +9,57 @@
 __license__ = "GPL3"
 __version__ = "0.1"
 
-from argparse import ArgumentParser
-import os, re
-import sys
+import os
+import re
 import subprocess
+import sys
+from argparse import ArgumentParser
+
 import yaml
-import pypiper
 
+import pypiper
 
 
 def build_argparser():
 
     parser = ArgumentParser(
         description="A pipeline to count the number of reads and file size. Accepts"
-        " BAM, fastq, or fastq.gz files.")
+        " BAM, fastq, or fastq.gz files."
+    )
 
     # First, add standard arguments from Pypiper.
     # groups="pypiper" will add all the arguments that pypiper uses,
     # and adding "common" adds arguments for --input and --sample--name
     # and "output_parent". You can read more about your options for standard
     # arguments in the pypiper docs (section "command-line arguments")
-    parser = pypiper.add_pypiper_args(parser, groups=["pypiper", "common", "ngs", "logmuse"],
-                                        args=["output-parent", "config"],
-                                        required=['sample-name', 'output-parent'])
+    parser = pypiper.add_pypiper_args(
+        parser,
+        groups=["pypiper", "common", "ngs", "logmuse"],
+        args=["output-parent", "config"],
+        required=["sample-name", "output-parent"],
+    )
 
     # Add any pipeline-specific arguments if you like here.
 
-    # args for `output_parent` and `sample_name` were added by the standard 
-    # `add_pypiper_args` function. 
+    # args for `output_parent` and `sample_name` were added by the standard
+    # `add_pypiper_args` function.
 
     return parser
 
+
 def run_pipeline():
     # A good practice is to make an output folder for each sample, housed under
     # the parent output folder, like this:
     outfolder = os.path.abspath(os.path.join(args.output_parent, args.sample_name))
 
     # Create a PipelineManager object and start the pipeline
-    pm = pypiper.PipelineManager(name="logmuse-test",
-                                 outfolder=outfolder, 
-                                 args=args)
+    pm = pypiper.PipelineManager(name="logmuse-test", outfolder=outfolder, args=args)
     pm.info("Getting started!")
     # NGSTk is a "toolkit" that comes with pypiper, providing some functions
     # for dealing with genome sequence data. You can read more about toolkits in the
     # documentation
 
-    files = [str(x) + ".tmp" for x in range(1,20)]
+    files = [str(x) + ".tmp" for x in range(1, 20)]
 
     pm.run("touch " + " ".join(files), target=files, clean=True)
 
@@ -76,38 +81,40 @@ def run_pipeline():
     # and convert these to fastq files.
 
     local_input_files = ngstk.merge_or_link(
-                            [args.input, args.input2],
-                            raw_folder,
-                            args.sample_name)
+        [args.input, args.input2], raw_folder, args.sample_name
+    )
 
     cmd, out_fastq_pre, unaligned_fastq = ngstk.input_to_fastq(
-                                                local_input_files,
-                                                args.sample_name,
-                                                args.paired_end,
-                                                fastq_folder)
-
+        local_input_files, args.sample_name, args.paired_end, fastq_folder
+    )
 
     # Now we'll use another NGSTk function to grab the file size from the input files
     #
     pm.report_result("File_mb", ngstk.get_file_size(local_input_files))
 
-
     # And then count the number of reads in the file
 
     n_input_files = len(list(filter(bool, local_input_files)))
 
-    raw_reads = sum([int(ngstk.count_reads(input_file, args.paired_end)) 
-                    for input_file in local_input_files]) / n_input_files
-
-    # Finally, we use the report_result() function to print the output and 
+    raw_reads = (
+        sum(
+            [
+                int(ngstk.count_reads(input_file, args.paired_end))
+                for input_file in local_input_files
+            ]
+        )
+        / n_input_files
+    )
+
+    # Finally, we use the report_result() function to print the output and
     # log the key-value pair in the standard stats.tsv file
     pm.report_result("Raw_reads", str(raw_reads))
 
     # Cleanup
     pm.stop_pipeline()
 
 
-if __name__ == '__main__':
+if __name__ == "__main__":
     try:
         parser = build_argparser()
         args = parser.parse_args()

diff --git a/init_interactive.py b/init_interactive.py
@@ -1,14 +1,12 @@
 """ Create dummy PipelineManager and NGSTk instance for interactive session. """
 
 import os
-from pypiper import PipelineManager
-from pypiper import NGSTk
 
+from pypiper import NGSTk, PipelineManager
 
 __author__ = "Vince Reuter"
 __email__ = "vreuter@virginia.edu"
 
 
-
 pm = PipelineManager(name="interactive", outfolder=os.path.expanduser("~"))
 tk = NGSTk(pm=pm)
diff --git a/pypiper/__init__.py b/pypiper/__init__.py
@@ -1,10 +1,10 @@
+# Implicitly re-export so logmuse usage by pipeline author routes through here.
+from logmuse import add_logging_options
+
 from ._version import __version__
+from .exceptions import *
 from .manager import *
 from .ngstk import *
-from .utils import *
 from .pipeline import *
-from .exceptions import *
 from .stage import *
-
-# Implicitly re-export so logmuse usage by pipeline author routes through here.
-from logmuse import add_logging_options
+from .utils import *
diff --git a/pypiper/const.py b/pypiper/const.py
@@ -4,4 +4,4 @@
 CHECKPOINT_EXTENSION = ".checkpoint"
 PIPELINE_CHECKPOINT_DELIMITER = "_"
 STAGE_NAME_SPACE_REPLACEMENT = "-"
-PROFILE_COLNAMES = ['pid', 'hash', 'cid', 'runtime', 'mem', 'cmd', 'lock']
+PROFILE_COLNAMES = ["pid", "hash", "cid", "runtime", "mem", "cmd", "lock"]