Skip to content

Commit

Permalink
reformat, update PipestatManafer configuration
Browse files Browse the repository at this point in the history
  • Loading branch information
stolarczyk committed May 4, 2021
1 parent 0916e3e commit 7f0a682
Show file tree
Hide file tree
Showing 35 changed files with 2,570 additions and 1,653 deletions.
214 changes: 117 additions & 97 deletions docs/conf.py

Large diffs are not rendered by default.

6 changes: 3 additions & 3 deletions example_pipelines/basic.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,13 +8,13 @@
# First, make sure you can import the pypiper package

import os

import pypiper

# Create a PipelineManager instance (don't forget to name it!)
# This starts the pipeline.

pm = pypiper.PipelineManager(name="BASIC",
outfolder="pipeline_output/")
pm = pypiper.PipelineManager(name="BASIC", outfolder="pipeline_output/")

# Now just build shell command strings, and use the run function
# to execute them in order. run needs 2 things: a command, and the
Expand Down Expand Up @@ -57,5 +57,5 @@
# Now, stop the pipeline to complete gracefully.
pm.stop_pipeline()

# Observe your outputs in the pipeline_output folder
# Observe your outputs in the pipeline_output folder
# to see what you've created.
55 changes: 32 additions & 23 deletions example_pipelines/count_reads.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,25 +9,32 @@
__license__ = "GPL3"
__version__ = "0.1"

from argparse import ArgumentParser
import os, re
import sys
import os
import re
import subprocess
import sys
from argparse import ArgumentParser

import yaml

import pypiper

parser = ArgumentParser(
description="A pipeline to count the number of reads and file size. Accepts"
" BAM, fastq, or fastq.gz files.")
" BAM, fastq, or fastq.gz files."
)

# First, add standard arguments from Pypiper.
# groups="pypiper" will add all the arguments that pypiper uses,
# and adding "common" adds arguments for --input and --sample--name
# and "output_parent". You can read more about your options for standard
# arguments in the pypiper docs (section "command-line arguments")
parser = pypiper.add_pypiper_args(parser, groups=["pypiper", "common", "ngs"],
args=["output-parent", "config"],
required=['sample-name', 'output-parent'])
parser = pypiper.add_pypiper_args(
parser,
groups=["pypiper", "common", "ngs"],
args=["output-parent", "config"],
required=["sample-name", "output-parent"],
)

# Add any pipeline-specific arguments if you like here.

Expand All @@ -42,16 +49,14 @@
else:
args.paired_end = False

# args for `output_parent` and `sample_name` were added by the standard
# `add_pypiper_args` function.
# args for `output_parent` and `sample_name` were added by the standard
# `add_pypiper_args` function.
# A good practice is to make an output folder for each sample, housed under
# the parent output folder, like this:
outfolder = os.path.abspath(os.path.join(args.output_parent, args.sample_name))

# Create a PipelineManager object and start the pipeline
pm = pypiper.PipelineManager(name="count",
outfolder=outfolder,
args=args)
pm = pypiper.PipelineManager(name="count", outfolder=outfolder, args=args)

# NGSTk is a "toolkit" that comes with pypiper, providing some functions
# for dealing with genome sequence data. You can read more about toolkits in the
Expand All @@ -75,15 +80,12 @@
# and convert these to fastq files.

local_input_files = ngstk.merge_or_link(
[args.input, args.input2],
raw_folder,
args.sample_name)
[args.input, args.input2], raw_folder, args.sample_name
)

cmd, out_fastq_pre, unaligned_fastq = ngstk.input_to_fastq(
local_input_files,
args.sample_name,
args.paired_end,
fastq_folder)
local_input_files, args.sample_name, args.paired_end, fastq_folder
)


# Now we'll use another NGSTk function to grab the file size from the input files
Expand All @@ -95,10 +97,17 @@

n_input_files = len(list(filter(bool, local_input_files)))

raw_reads = sum([int(ngstk.count_reads(input_file, args.paired_end))
for input_file in local_input_files]) / n_input_files

# Finally, we use the report_result() function to print the output and
raw_reads = (
sum(
[
int(ngstk.count_reads(input_file, args.paired_end))
for input_file in local_input_files
]
)
/ n_input_files
)

# Finally, we use the report_result() function to print the output and
# log the key-value pair in the standard stats.tsv file
pm.report_result("Raw_reads", str(raw_reads))

Expand Down
3 changes: 2 additions & 1 deletion example_pipelines/hello_pypiper.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
#!/usr/bin/env python

import pypiper
outfolder = "hello_pypiper_results" # Choose a folder for your results

outfolder = "hello_pypiper_results" # Choose a folder for your results

# Create a PipelineManager, the workhorse of pypiper
pm = pypiper.PipelineManager(name="hello_pypiper", outfolder=outfolder)
Expand Down
63 changes: 35 additions & 28 deletions example_pipelines/logmuse_example.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,52 +9,57 @@
__license__ = "GPL3"
__version__ = "0.1"

from argparse import ArgumentParser
import os, re
import sys
import os
import re
import subprocess
import sys
from argparse import ArgumentParser

import yaml
import pypiper

import pypiper


def build_argparser():

parser = ArgumentParser(
description="A pipeline to count the number of reads and file size. Accepts"
" BAM, fastq, or fastq.gz files.")
" BAM, fastq, or fastq.gz files."
)

# First, add standard arguments from Pypiper.
# groups="pypiper" will add all the arguments that pypiper uses,
# and adding "common" adds arguments for --input and --sample--name
# and "output_parent". You can read more about your options for standard
# arguments in the pypiper docs (section "command-line arguments")
parser = pypiper.add_pypiper_args(parser, groups=["pypiper", "common", "ngs", "logmuse"],
args=["output-parent", "config"],
required=['sample-name', 'output-parent'])
parser = pypiper.add_pypiper_args(
parser,
groups=["pypiper", "common", "ngs", "logmuse"],
args=["output-parent", "config"],
required=["sample-name", "output-parent"],
)

# Add any pipeline-specific arguments if you like here.

# args for `output_parent` and `sample_name` were added by the standard
# `add_pypiper_args` function.
# args for `output_parent` and `sample_name` were added by the standard
# `add_pypiper_args` function.

return parser


def run_pipeline():
# A good practice is to make an output folder for each sample, housed under
# the parent output folder, like this:
outfolder = os.path.abspath(os.path.join(args.output_parent, args.sample_name))

# Create a PipelineManager object and start the pipeline
pm = pypiper.PipelineManager(name="logmuse-test",
outfolder=outfolder,
args=args)
pm = pypiper.PipelineManager(name="logmuse-test", outfolder=outfolder, args=args)
pm.info("Getting started!")
# NGSTk is a "toolkit" that comes with pypiper, providing some functions
# for dealing with genome sequence data. You can read more about toolkits in the
# documentation

files = [str(x) + ".tmp" for x in range(1,20)]
files = [str(x) + ".tmp" for x in range(1, 20)]

pm.run("touch " + " ".join(files), target=files, clean=True)

Expand All @@ -76,38 +81,40 @@ def run_pipeline():
# and convert these to fastq files.

local_input_files = ngstk.merge_or_link(
[args.input, args.input2],
raw_folder,
args.sample_name)
[args.input, args.input2], raw_folder, args.sample_name
)

cmd, out_fastq_pre, unaligned_fastq = ngstk.input_to_fastq(
local_input_files,
args.sample_name,
args.paired_end,
fastq_folder)

local_input_files, args.sample_name, args.paired_end, fastq_folder
)

# Now we'll use another NGSTk function to grab the file size from the input files
#
pm.report_result("File_mb", ngstk.get_file_size(local_input_files))


# And then count the number of reads in the file

n_input_files = len(list(filter(bool, local_input_files)))

raw_reads = sum([int(ngstk.count_reads(input_file, args.paired_end))
for input_file in local_input_files]) / n_input_files

# Finally, we use the report_result() function to print the output and
raw_reads = (
sum(
[
int(ngstk.count_reads(input_file, args.paired_end))
for input_file in local_input_files
]
)
/ n_input_files
)

# Finally, we use the report_result() function to print the output and
# log the key-value pair in the standard stats.tsv file
pm.report_result("Raw_reads", str(raw_reads))

# Cleanup
pm.stop_pipeline()


if __name__ == '__main__':
if __name__ == "__main__":
try:
parser = build_argparser()
args = parser.parse_args()
Expand Down
4 changes: 1 addition & 3 deletions init_interactive.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,12 @@
""" Create dummy PipelineManager and NGSTk instance for interactive session. """

import os
from pypiper import PipelineManager
from pypiper import NGSTk

from pypiper import NGSTk, PipelineManager

__author__ = "Vince Reuter"
__email__ = "vreuter@virginia.edu"



pm = PipelineManager(name="interactive", outfolder=os.path.expanduser("~"))
tk = NGSTk(pm=pm)
10 changes: 5 additions & 5 deletions pypiper/__init__.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
# Implicitly re-export so logmuse usage by pipeline author routes through here.
from logmuse import add_logging_options

from ._version import __version__
from .exceptions import *
from .manager import *
from .ngstk import *
from .utils import *
from .pipeline import *
from .exceptions import *
from .stage import *

# Implicitly re-export so logmuse usage by pipeline author routes through here.
from logmuse import add_logging_options
from .utils import *
2 changes: 1 addition & 1 deletion pypiper/const.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,4 @@
CHECKPOINT_EXTENSION = ".checkpoint"
PIPELINE_CHECKPOINT_DELIMITER = "_"
STAGE_NAME_SPACE_REPLACEMENT = "-"
PROFILE_COLNAMES = ['pid', 'hash', 'cid', 'runtime', 'mem', 'cmd', 'lock']
PROFILE_COLNAMES = ["pid", "hash", "cid", "runtime", "mem", "cmd", "lock"]

0 comments on commit 7f0a682

Please sign in to comment.