Skip to content

Commit

Permalink
Merge dfda31b into 870870e
Browse files Browse the repository at this point in the history
  • Loading branch information
mlin committed Nov 22, 2018
2 parents 870870e + dfda31b commit d20df0a
Show file tree
Hide file tree
Showing 3 changed files with 232 additions and 87 deletions.
15 changes: 1 addition & 14 deletions WDL/CLI.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,20 +42,7 @@ def check(args):
args.path = []
doc = WDL.load(args.uri, args.path)

# Mark up the AST
WDL.Walker.SetParents()(doc)
WDL.Walker.MarkCalled()(doc)

linters = [
WDL.Lint.StringCoercion(),
WDL.Lint.ArrayCoercion(),
WDL.Lint.OptionalCoercion(),
WDL.Lint.IncompleteCall(),
WDL.Lint.CallImportNameCollision(),
WDL.Lint.UnusedImport()
]
for linter in linters:
linter(doc)
WDL.Lint.lint(doc)

# Print an outline
print(os.path.basename(args.uri))
Expand Down
89 changes: 88 additions & 1 deletion WDL/Lint.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,19 +7,101 @@

class Linter(WDL.Walker.Base):
"""
Linters are Walkers which annotate each tree node with
Linters are Walkers which annotate each Tree node with
``lint : List[Tuple[SourceNode,str,str]]``
providing lint warnings with a node (possibly more-specific than the
node it's attached to), short codename, and message.
"""

def add(self, obj: WDL.SourceNode, message: str,
subnode: Optional[WDL.SourceNode] = None):
"""
Used by subclasses to attach lint to a node.
Note, lint attaches to Tree nodes (Decl, Task, Workflow, Scatter,
Conditional, Doucemnt). Warnings about individual expressinos should
attach to their parent Tree node.
"""
assert not isinstance(obj, WDL.Expr.Base)
if not hasattr(obj, 'lint'):
obj.lint = []
obj.lint.append((subnode or obj, self.__class__.__name__, message))


_all_linters = []


def a_linter(cls):
"""
Decorator for subclasses of ``Linter`` to register them for use
"""
_all_linters.append(cls)


def lint(doc):
"""
Apply all linters to the document
"""

# Add additional markups to the AST for use by the linters
WDL.Walker.SetParents()(doc)
WDL.Walker.MarkCalled()(doc)

for linter in _all_linters:
linter()(doc)

return doc


class _Collector(WDL.Walker.Base):
def __init__(self):
super().__init__()
self.lint = []

def _collect(self, obj):
if hasattr(obj, 'lint'):
self.lint.extend(getattr(obj, 'lint'))

def document(self, obj) -> Any:
self._collect(obj)
super().document(obj)

def workflow(self, obj) -> Any:
self._collect(obj)
super().workflow(obj)

def call(self, obj) -> Any:
self._collect(obj)
super().call(obj)

def scatter(self, obj) -> Any:
self._collect(obj)
super().scatter(obj)

def conditional(self, obj) -> Any:
self._collect(obj)
super().conditional(obj)

def decl(self, obj) -> Any:
self._collect(obj)
super().decl(obj)

def task(self, obj) -> Any:
self._collect(obj)
super().task(obj)


def collect(doc):
"""
Recursively traverse the already-linted document and collect a flat list of
(tree node, linter_class, message)
"""
collector = _Collector()
collector(doc)
return collector.lint


@a_linter
class StringCoercion(Linter):
# String declaration with non-String rhs expression
def decl(self, obj: WDL.Decl) -> Any:
Expand Down Expand Up @@ -107,6 +189,7 @@ def _is_array_coercion(value_type: WDL.Type.Base, expr_type: WDL.Type.Base):
value_type) > _array_levels(expr_type)


@a_linter
class ArrayCoercion(Linter):

def decl(self, obj: WDL.Decl) -> Any:
Expand Down Expand Up @@ -148,6 +231,7 @@ def call(self, obj: WDL.Tree.Call) -> Any:
self.add(obj, msg, inp_expr)


@a_linter
class OptionalCoercion(Linter):
# Expressions that could blow up at runtime with empty optional values
def expr(self, obj: WDL.Expr.Base) -> Any:
Expand Down Expand Up @@ -175,6 +259,7 @@ def expr(self, obj: WDL.Expr.Base) -> Any:
super().expr(obj)


@a_linter
class IncompleteCall(Linter):
# Call without all required inputs (allowed for top-level workflow)
def call(self, obj: WDL.Call) -> Any:
Expand All @@ -190,6 +275,7 @@ def call(self, obj: WDL.Call) -> Any:
super().call(obj)


@a_linter
class CallImportNameCollision(Linter):
# A call name collides with the namespace of an imported document; allowed
# but potentially confusing.
Expand All @@ -204,6 +290,7 @@ def call(self, obj: WDL.Call) -> Any:
super().call(obj)


@a_linter
class UnusedImport(Linter):
# Nothing used from an imported document
def document(self, obj: WDL.Document) -> Any:
Expand Down
215 changes: 143 additions & 72 deletions tests/test_3corpi.py
Original file line number Diff line number Diff line change
@@ -1,91 +1,162 @@
import unittest, inspect, subprocess, tempfile, os, glob
from .context import WDL

def test_corpus(test_klass, prefix, dir, path=[], blacklist=[]):
files = glob.glob(os.path.join(*(dir + ['*.wdl'])), recursive=True)
assert len(files) > 0, "{} test corpus missing from {}; please `git submodule update --init --recursive`".format(prefix, os.path.join(*dir))
gpath = []
for p in path:
gpath = gpath + glob.glob(os.path.join(*p), recursive=True)
for fn in files:
name = os.path.split(fn)[1]
name = name[:-4]
if name not in blacklist:
name = "test_" + prefix + "_" + name.replace('.', '_')
def t(self, fn=fn):
cmd = ['check']
for dn in gpath:
cmd.append('--path')
cmd.append(dn)
cmd.append(fn)
print()
WDL.CLI.main(cmd)
setattr(test_klass, name, t)

# download and extract a zip file with a corpus of WDL documents; load each one
def test_corpus_zip(test_klass, prefix, zip_url, dir=['**'], path=[], blacklist=[]):
tdn = tempfile.mkdtemp(prefix='miniwdl_test_'+prefix+"_")
subprocess.check_call(['wget', '-q', '-O', 'corpus.zip', zip_url], cwd=tdn)
subprocess.check_call(['unzip', '-q', 'corpus.zip'], cwd=tdn)
return test_corpus(test_klass, prefix, [tdn] + dir, [[tdn] + p for p in path], blacklist)

class TestHCAskylab(unittest.TestCase):
def test_corpus(dir, path=[], blacklist=[], expected_lint={}):
def decorator(test_klass):

test_klass._lint_count = {}
test_klass._expected_lint = expected_lint
test_klass.tearDownClass = classmethod(check_lint)

prefix = test_klass.__name__
files = glob.glob(os.path.join(*(dir + ['*.wdl'])), recursive=True)
assert len(files) > 0, "{} test corpus missing from {}; please `git submodule update --init --recursive`".format(prefix, os.path.join(*dir))
gpath = []
for p in path:
gpath = gpath + glob.glob(os.path.join(*p), recursive=True)
for fn in files:
name = os.path.split(fn)[1]
name = name[:-4]
if name not in blacklist:
name = "test_" + prefix + "_" + name.replace('.', '_')
def t(self, fn=fn):
# run the 'miniwd check' command-line tool
cmd = ['check']
for dn in gpath:
cmd.append('--path')
cmd.append(dn)
cmd.append(fn)
print()
WDL.CLI.main(cmd)

# also load & lint the document to verify the lint count
doc = WDL.load(fn, path=gpath)
WDL.Lint.lint(doc)
for _, linter, _ in WDL.Lint.collect(doc):
test_klass._lint_count[linter] = 1 + test_klass._lint_count.get(linter, 0)
setattr(test_klass, name, t)

return test_klass
return decorator

def check_lint(cls):
if cls._lint_count != cls._expected_lint:
raise Exception("Lint results changed for {}; expected: {} got: {}".format(cls.__name__, str(cls._expected_lint), str(cls._lint_count)))

@test_corpus(
["test_corpi/HumanCellAtlas/skylab/library/tasks/**"],
expected_lint={'StringCoercion': 3}
)
class HCAskylab_task(unittest.TestCase):
pass

@test_corpus(
["test_corpi/HumanCellAtlas/skylab/pipelines/**"],
path=[["test_corpi/HumanCellAtlas/skylab/library/tasks"]],
expected_lint={'CallImportNameCollision': 1, 'StringCoercion': 3}
)
class HCAskylab_workflow(unittest.TestCase):
pass
test_corpus(TestHCAskylab, "HCAskylab_task", ["test_corpi/HumanCellAtlas/skylab/library/tasks/**"])
test_corpus(TestHCAskylab, "HCAskylab_workflow", ["test_corpi/HumanCellAtlas/skylab/pipelines/**"],
path=[["test_corpi/HumanCellAtlas/skylab/library/tasks"]])

class TestGATK(unittest.TestCase):
@test_corpus(
["test_corpi/gatk-workflows/five-dollar-genome-analysis-pipeline/**"],
path=[["test_corpi/gatk-workflows/five-dollar-genome-analysis-pipeline"]],
blacklist=['fc_germline_single_sample_workflow'],
expected_lint={'CallImportNameCollision': 2, 'ArrayCoercion': 4, 'StringCoercion': 5}
)
class GATK_five_dollar(unittest.TestCase):
pass
test_corpus(TestGATK, "GATK_five_dollar", ["test_corpi/gatk-workflows/five-dollar-genome-analysis-pipeline/**"],
path=[["test_corpi/gatk-workflows/five-dollar-genome-analysis-pipeline"]],
blacklist=['fc_germline_single_sample_workflow'])
test_corpus(TestGATK, "gatk4_germline_snps_indels", ["test_corpi/gatk-workflows/gatk4-germline-snps-indels/**"],
# TODO: support pre-1.0 style of workflow outputs (identifiers and wildcards)
# https://github.com/gatk-workflows/gatk4-germline-snps-indels/blob/b9bbbdcfca7ece0d011ac1225ce6818b33720f48/joint-discovery-gatk4-local.wdl#L345
# also needed for the CNN variant filter repo.
blacklist=['joint-discovery-gatk4-local', 'joint-discovery-gatk4'])
test_corpus(TestGATK, "broad_prod_wgs", ["test_corpi/gatk-workflows/broad-prod-wgs-germline-snps-indels/**"],
blacklist=['JointGenotypingWf'])

# TODO: support out-of-order use of artifact_modes in https://github.com/gatk-workflows/gatk4-somatic-snvs-indels/blob/0a82bedcedd2a2176ccced7cc2ed700e37a025f5/mutect2.wdl#L90
#test_corpus_zip(TestGATK, "gatk4_somatic_snvs_indels",
# 'https://github.com/gatk-workflows/gatk4-somatic-snvs-indels/archive/0a82bed.zip')

class TestGTEx(unittest.TestCase):
@test_corpus(
["test_corpi/gatk-workflows/gatk4-germline-snps-indels/**"],
# TODO: support pre-1.0 style of workflow outputs (identifiers and wildcards)
# https://github.com/gatk-workflows/gatk4-germline-snps-indels/blob/b9bbbdcfca7ece0d011ac1225ce6818b33720f48/joint-discovery-gatk4-local.wdl#L345
# also needed for the CNN variant filter repo.
blacklist=['joint-discovery-gatk4-local', 'joint-discovery-gatk4'],
expected_lint={'StringCoercion': 2}
)
class gatk4_germline_snps_indels(unittest.TestCase):
pass

@test_corpus(
["test_corpi/gatk-workflows/broad-prod-wgs-germline-snps-indels/**"],
blacklist=['JointGenotypingWf'],
expected_lint={'ArrayCoercion': 4, 'StringCoercion': 6}
)
class broad_prod_wgs(unittest.TestCase):
pass

@test_corpus(
["test_corpi/broadinstitute/gtex-pipeline/**"],
# need URI import
blacklist=["rnaseq_pipeline_bam","rnaseq_pipeline_fastq"],
expected_lint={'IncompleteCall': 30}
)
class GTEx(unittest.TestCase):
pass

@test_corpus(
["test_corpi/DataBiosphere/topmed-workflows/**"],
# need URI import
blacklist=['CRAM_md5sum_checker_wrapper', 'checker-workflow-wrapping-alignment-workflow',
'topmed_freeze3_calling', 'topmed_freeze3_calling_checker', 'u_of_michigan_aligner_checker'],
expected_lint={'StringCoercion': 2}
)
class TOPMed(unittest.TestCase):
pass

@test_corpus(
["test_corpi/broadinstitute/viral-ngs/pipes/WDL/workflows"],
path=[["test_corpi/broadinstitute/viral-ngs/pipes/WDL/workflows/tasks"]],
expected_lint={'IncompleteCall': 44, 'UnusedImport': 1}
)
class ViralNGS(unittest.TestCase):
pass

@test_corpus(
["test_corpi/ENCODE-DCC/chip-seq-pipeline2/**"],
expected_lint={'StringCoercion': 48, 'ArrayCoercion': 64}
)
class ENCODE_ChIPseq(unittest.TestCase):
pass
test_corpus(TestGTEx, "GTEx", ["test_corpi/broadinstitute/gtex-pipeline/**"],
# need URI import
blacklist=["rnaseq_pipeline_bam","rnaseq_pipeline_fastq"])

class TestTOPMed(unittest.TestCase):
@test_corpus(
["test_corpi/ENCODE-DCC/atac-seq-pipeline/**"],
expected_lint={'StringCoercion': 52, 'ArrayCoercion': 41}
)
class ENCODE_ATACseq(unittest.TestCase):
pass
test_corpus(TestTOPMed, "TOPMed", ["test_corpi/DataBiosphere/topmed-workflows/**"],
# need URI import
blacklist=['CRAM_md5sum_checker_wrapper', 'checker-workflow-wrapping-alignment-workflow',
'topmed_freeze3_calling', 'topmed_freeze3_calling_checker', 'u_of_michigan_aligner_checker'])

class TestViralNGS(unittest.TestCase):
@test_corpus(
["test_corpi/ENCODE-DCC/rna-seq-pipeline/**"],
expected_lint={'IncompleteCall': 3}
)
class ENCODE_RNAseq(unittest.TestCase):
pass
test_corpus(TestViralNGS, "ViralNGS", ["test_corpi/broadinstitute/viral-ngs/pipes/WDL/workflows"],
path=[["test_corpi/broadinstitute/viral-ngs/pipes/WDL/workflows/tasks"]])

class TestENCODE(unittest.TestCase):
@test_corpus(
["test_corpi/ENCODE-DCC/wgbs-pipeline/**"],
expected_lint={'StringCoercion': 1}
)
class ENCODE_WGBS(unittest.TestCase):
pass
test_corpus(TestENCODE, "ENCODE_ChIPseq", ["test_corpi/ENCODE-DCC/chip-seq-pipeline2/**"])
test_corpus(TestENCODE, "ENCODE_ATACseq", ["test_corpi/ENCODE-DCC/atac-seq-pipeline/**"])
test_corpus(TestENCODE, "ENCODE_RNAseq", ["test_corpi/ENCODE-DCC/rna-seq-pipeline/**"])
test_corpus(TestENCODE, "ENCODE_WGBS", ["test_corpi/ENCODE-DCC/wgbs-pipeline/**"])

class TestDxWDL(unittest.TestCase):
@test_corpus(
["test_corpi/dnanexus/dxWDL/test/**"],
blacklist=[
# library_math and docs that import it use Object
"cast","complex","decl_mid_wf","dict","library_math","math","math2","optionals","toplevel_calls","trivial","trivial2",
# use dnanexus extensions
"call_native", "call_native_app",
# pre-1.0 style outputs
"movie", "foo_toplevel", "foo_if_flag", "foo",
# double quantifier
"conditionals_base"
]
)
class dxWDL(unittest.TestCase):
pass
test_corpus(TestDxWDL, "dxWDL", ["test_corpi/dnanexus/dxWDL/test/**"],
blacklist=[
# library_math and docs that import it use Object
"cast","complex","decl_mid_wf","dict","library_math","math","math2","optionals","toplevel_calls","trivial","trivial2",
# use dnanexus extensions
"call_native", "call_native_app",
# pre-1.0 style outputs
"movie", "foo_toplevel", "foo_if_flag", "foo",
# double quantifier
"conditionals_base"
])

0 comments on commit d20df0a

Please sign in to comment.