Skip to content

Commit

Permalink
Merge 1d87d94 into dd80156
Browse files Browse the repository at this point in the history
  • Loading branch information
mlin committed Oct 30, 2018
2 parents dd80156 + 1d87d94 commit 082c1aa
Show file tree
Hide file tree
Showing 9 changed files with 131 additions and 42 deletions.
16 changes: 10 additions & 6 deletions WDL/Error.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# pyre-strict
from abc import ABC
from typing import Any, List, Optional, Dict, Callable, NamedTuple, TypeVar
from typing import Any, List, Optional, Dict, Callable, NamedTuple, TypeVar, Union
import WDL.Type as T
from WDL.Expr import TVApply, TVIdent

Expand All @@ -23,10 +23,14 @@ def __init__(self, pos : SourcePosition) -> None:
self.pos = pos

class Base(Exception):
node : SourceNode
def __init__(self, node : SourceNode, message : str) -> None:
self.node = node
message = "({} Ln {}, Col {}) {}".format(node.pos.filename, node.pos.line, node.pos.column, message)
node : Optional[SourceNode]
def __init__(self, node : Union[SourceNode,SourcePosition], message : str) -> None:
if isinstance(node,SourceNode):
self.node = node
self.pos = node.pos
else:
self.pos = node
message = "({} Ln {}, Col {}) {}".format(self.pos.filename, self.pos.line, self.pos.column, message)
super().__init__(message)

class NoSuchFunction(Base):
Expand Down Expand Up @@ -79,5 +83,5 @@ def __init__(self, node : SourceNode) -> None:
super().__init__(node, "Null value")

class MultipleDefinitions(Base):
def __init__(self, node : SourceNode, message : str) -> None:
def __init__(self, node : Union[SourceNode,SourcePosition], message : str) -> None:
super().__init__(node, message)
35 changes: 35 additions & 0 deletions WDL/StdLib.py
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,7 @@ def __call__(self, expr : E.Apply, env : E.Env) -> V.Base:
("stdout", [], T.String(), lambda: exec('raise NotImplementedError()')),
("size", [T.File(), T.String()], T.Float(), lambda file: exec('raise NotImplementedError()')),
("ceil", [T.Float()], T.Int(), lambda x: exec('raise NotImplementedError()')),
("round", [T.Float()], T.Int(), lambda x: exec('raise NotImplementedError()')),
("glob", [T.String()], T.Array(T.File()), lambda pattern: exec('raise NotImplementedError()')),
("read_int", [T.String()], T.Int(), lambda pattern: exec('raise NotImplementedError()')),
("read_boolean", [T.String()], T.Boolean(), lambda pattern: exec('raise NotImplementedError()')),
Expand All @@ -132,8 +133,10 @@ def __call__(self, expr : E.Apply, env : E.Env) -> V.Base:
("read_map", [T.String()], T.Map(None), lambda pattern: exec('raise NotImplementedError()')),
("read_lines", [T.String()], T.Array(None), lambda pattern: exec('raise NotImplementedError()')),
("read_tsv", [T.String()], T.Array(T.Array(T.String())), lambda pattern: exec('raise NotImplementedError()')),
("write_lines", [T.Array(T.String())], T.String(), lambda pattern: exec('raise NotImplementedError()')),
("write_map", [T.Map(None)], T.String(), lambda pattern: exec('raise NotImplementedError()')),
("range", [T.Int()], T.Array(T.Int()), lambda high: exec('raise NotImplementedError()')),
("sub", [T.String(), T.String(), T.String()], T.String(), lambda high: exec('raise NotImplementedError()')),
]
for name, argument_types, return_type, F in _static_functions:
E._stdlib[name] = _StaticFunction(name, argument_types, return_type, F)
Expand Down Expand Up @@ -283,6 +286,7 @@ def infer_type(self, expr : E.Apply) -> T.Base:
def __call__(self, expr : E.Apply, env : Env.Values) -> V.Base:
raise NotImplementedError()
E._stdlib["select_first"] = _SelectFirst()
E._stdlib["select_all"] = _SelectFirst() # TODO

class _Zip(E._Function):
# 'a array -> 'b array -> ('a,'b) array
Expand All @@ -302,3 +306,34 @@ def infer_type(self, expr : E.Apply) -> T.Base:
def __call__(self, expr : E.Apply, env : Env.Values) -> V.Base:
raise NotImplementedError()
E._stdlib["zip"] = _Zip()
E._stdlib["cross"] = _Zip() # TODO

class _Basename(E._Function):
def infer_type(self, expr : E.Apply) -> T.Base:
if len(expr.arguments) not in [1,2]:
raise Error.WrongArity(expr, 2)
expr.arguments[0].typecheck(T.String())
if len(expr.arguments) == 2:
expr.arguments[1].typecheck(T.String())
return T.String()

def __call__(self, expr : E.Apply, env : Env.Values) -> V.Base:
raise NotImplementedError()
E._stdlib["basename"] = _Basename()

class _Flatten(E._Function):
# t array array -> t array
def infer_type(self, expr : E.Apply) -> T.Base:
if len(expr.arguments) != 1:
raise Error.WrongArity(expr, 1)
expr.arguments[0].typecheck(T.Array(None))
# TODO: won't handle implicit coercion from T to Array[T]
assert isinstance(expr.arguments[0].type, T.Array)
if expr.arguments[0].type.item_type is None:
return T.Array(None)
elif not isinstance(expr.arguments[0].type.item_type, T.Array):
raise Error.StaticTypeMismatch(expr.arguments[0], T.Array(T.Array(None)), expr.arguments[0].type)
return T.Array(expr.arguments[0].type.item_type.item_type) #pyre-fixme
def __call__(self, expr : E.Apply, env : Env.Values) -> V.Base:
raise NotImplementedError()
E._stdlib["flatten"] = _Flatten()
2 changes: 2 additions & 0 deletions WDL/Type.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,8 @@ class Base(ABC):

def coerces(self, rhs : TVBase) -> bool:
"""True if ``rhs`` is the same type, or can be coerced to, ``self``. Optional/nonempty quantifiers are disregarded for this purpose."""
if isinstance(rhs, Array) and rhs.item_type == self: # coerce T to Array[T]
return True
return (self == rhs) or isinstance(rhs, String)

def __str__(self) -> str:
Expand Down
1 change: 1 addition & 0 deletions WDL/Value.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ def coerce(self, desired_type : Optional[T.Base] = None) -> BaseT:
:raises: ReferenceError for a null value and non-optional type
"""
# TODO: coerce T to Array[T] (x to [x])
return self
def expect(self, desired_type : Optional[T.Base] = None) -> BaseT:
"""Alias for coerce"""
Expand Down
13 changes: 9 additions & 4 deletions WDL/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -210,8 +210,9 @@ def runtime_kv(self, items, meta):
def runtime_section(self, items, meta):
d = dict()
for k,v in items:
if k in d:
raise Err.MultipleDefinitions(sp(self.filename, meta), "duplicate keys in runtime section")
# TODO: restore duplicate check, cf. https://github.com/gatk-workflows/five-dollar-genome-analysis-pipeline/blob/89f11befc13abae97ab8fb1b457731f390c8728d/tasks_pipelines/qc.wdl#L288
#if k in d:
# raise Err.MultipleDefinitions(sp(self.filename, meta), "duplicate keys in runtime section")
d[k] = v
return {"runtime": d}
def task(self, items, meta):
Expand Down Expand Up @@ -318,7 +319,10 @@ def parse_expr(txt : str) -> E.Base:
"""
Parse an isolated WDL expression text into an abstract syntax tree
"""
return _ExprTransformer('').transform(WDL._parser.parse(txt, "expr")) # pyre-fixme
try:
return _ExprTransformer(txt).transform(WDL._parser.parse(txt, "expr"))
except lark.exceptions.UnexpectedToken as exn:
raise Err.ParserError(txt) from exn

def parse_tasks(txt : str) -> List[D.Task]:
return _DocTransformer('').transform(WDL._parser.parse(txt, "tasks")) # pyre-fixme
Expand Down Expand Up @@ -350,8 +354,9 @@ def load(uri : str, path : List[str] = []) -> D.Document:
doc = parse_document(infile.read(), uri)
# recursively descend into document's imports, and store the imported
# documents into doc.imports
# TODO: limit recursion; prevent mutual recursion
for i in range(len(doc.imports)):
subdoc = load(doc.imports[i][0], path)
subdoc = load(doc.imports[i][0], [os.path.dirname(fn)]+path)
doc.imports[i] = (doc.imports[i][0], doc.imports[i][1], subdoc)
doc.typecheck()
return doc
Expand Down
16 changes: 11 additions & 5 deletions WDL/_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,13 +53,18 @@
| ident
| CNAME "(" [expr ("," expr)*] ")" -> apply
?literal: "true" -> boolean_true
| "false" -> boolean_false
?literal: _TRUE-> boolean_true
| _FALSE -> boolean_false
| INT -> int
| SIGNED_INT -> int
| FLOAT -> float
| SIGNED_FLOAT -> float
_TRUE.2: "true"
_FALSE.2: "false"
_LEFT.2: "left"
_RIGHT.2: "right"
// string (single-quoted)
STRING1_CHAR: "\\'" | /[^'$]/ | /\$[^{']/
STRING1_END: STRING1_CHAR* "$"? "'"
Expand All @@ -78,8 +83,6 @@
ESCAPED_STRING1: "'" STRING_INNER1* "'"
string_literal: ESCAPED_STRING | ESCAPED_STRING1
_LEFT.2: "left"
_RIGHT.2: "right"
ident: [CNAME ("." CNAME)*]
?map_key: literal | string
Expand Down Expand Up @@ -111,7 +114,10 @@
// WDL task commands: with {} and <<< >>> command and ${} and ~{} placeholder styles
!?placeholder_key: "default" | "false" | "true" | "sep"
placeholder_option: placeholder_key "=" string_literal
?placeholder_value: string_literal
| INT -> int
| FLOAT -> float
placeholder_option: placeholder_key "=" placeholder_value
placeholder: placeholder_option* expr
COMMAND1_CHAR: /[^~$}]/ | /\$[^{]/ | /~[^{]/
Expand Down
27 changes: 0 additions & 27 deletions tests/test_HCAskylab.py

This file was deleted.

60 changes: 60 additions & 0 deletions tests/test_corpi.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
import unittest, inspect, subprocess, tempfile, os, glob
from .context import WDL

# download and extract a zip file with a corpus of WDL documents; load each one
def test_corpus_zip(test_klass, prefix, zip_url, dir=['**'], path=[], blacklist=[]):
tdn = tempfile.mkdtemp(prefix='miniwdl_test_'+prefix+"_")
subprocess.check_call(['wget', '-q', '-O', 'corpus.zip', zip_url], cwd=tdn)
subprocess.check_call(['unzip', '-q', 'corpus.zip'], cwd=tdn)
files = glob.glob(os.path.join(*([tdn] + dir + ['*.wdl'])), recursive=True)
gpath = []
for p in path:
gpath = gpath + glob.glob(os.path.join(*([tdn] + p)), recursive=True)
for fn in files:
name = os.path.split(fn)[1]
name = name[:-4]
if name not in blacklist:
name = "test_" + prefix + "_" + name.replace('.', '_')
def t(self, fn=fn):
WDL.load(fn, path=gpath)
setattr(test_klass, name, t)

class TestHCAskylab(unittest.TestCase):
pass
test_corpus_zip(TestHCAskylab, "HCAskylab_task",
'https://github.com/HumanCellAtlas/skylab/archive/face906.zip', ['skylab-*', 'library', 'tasks'])
test_corpus_zip(TestHCAskylab, "HCAskylab_workflow",
'https://github.com/HumanCellAtlas/skylab/archive/face906.zip', ['skylab-*', 'pipelines', '**'], [['skylab-*', 'library', 'tasks']])

class TestGATK(unittest.TestCase):
pass
test_corpus_zip(TestGATK, "GATK_five_dollar",
'https://github.com/gatk-workflows/five-dollar-genome-analysis-pipeline/archive/89f11be.zip',
blacklist=['fc_germline_single_sample_workflow', 'split_large_readgroup', 'unmapped_bam_to_aligned_bam'])
test_corpus_zip(TestGATK, "gatk4_germline_snps_indels",
'https://github.com/gatk-workflows/gatk4-germline-snps-indels/archive/b9bbbdc.zip',
# TODO: support pre-1.0 style of workflow outputs (identifiers and wildcards)
# https://github.com/gatk-workflows/gatk4-germline-snps-indels/blob/b9bbbdcfca7ece0d011ac1225ce6818b33720f48/joint-discovery-gatk4-local.wdl#L345
# also needed for the CNN variant filter repo.
blacklist=['joint-discovery-gatk4-local', 'joint-discovery-gatk4'])
# TODO: support out-of-order use of artifact_modes in https://github.com/gatk-workflows/gatk4-somatic-snvs-indels/blob/0a82bedcedd2a2176ccced7cc2ed700e37a025f5/mutect2.wdl#L90
#test_corpus_zip(TestGATK, "gatk4_somatic_snvs_indels",
# 'https://github.com/gatk-workflows/gatk4-somatic-snvs-indels/archive/0a82bed.zip')
test_corpus_zip(TestGATK, "broad_prod_wgs",
'https://github.com/gatk-workflows/broad-prod-wgs-germline-snps-indels/archive/5585cdf.zip', blacklist=['JointGenotypingWf'])

class TestGTEx(unittest.TestCase):
pass
test_corpus_zip(TestGTEx, "GTEx",
'https://github.com/broadinstitute/gtex-pipeline/archive/a228198.zip',
# need URI import
# something weird (in lark) with metasoft, possibly a lark bug -- try lark 0.7
blacklist=["rnaseq_pipeline_bam","rnaseq_pipeline_fastq","metasoft"])

class TestTOPMed(unittest.TestCase):
pass
test_corpus_zip(TestTOPMed, "TOPMed",
'https://github.com/DataBiosphere/topmed-workflows/archive/31ba8a7.zip',
# need urI import
blacklist=['CRAM_md5sum_checker_wrapper', 'checker-workflow-wrapping-alignment-workflow',
'topmed_freeze3_calling', 'topmed_freeze3_calling_checker', 'u_of_michigan_aligner_checker'])
3 changes: 3 additions & 0 deletions tests/test_doc.py
Original file line number Diff line number Diff line change
Expand Up @@ -426,6 +426,9 @@ def test_scatter_conditional(self):
output {
Int z = stdout()
}
meta {
foo: "bar"
}
}
workflow contrived {
Array[Int] xs = [1, 2, 3]
Expand Down

0 comments on commit 082c1aa

Please sign in to comment.