Skip to content

Commit

Permalink
Merge 7db1690 into fd8ab70
Browse files Browse the repository at this point in the history
  • Loading branch information
mlin committed Apr 9, 2020
2 parents fd8ab70 + 7db1690 commit 2ab7c4b
Show file tree
Hide file tree
Showing 8 changed files with 144 additions and 64 deletions.
3 changes: 2 additions & 1 deletion WDL/Error.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,8 @@ class SourcePosition(
"""
Source position attached to AST nodes and exceptions; NamedTuple of ``uri`` the filename/URI
passed to :func:`WDL.load` or a WDL import statement, which may be relative; ``abspath`` the
absolute filename/URI; and int positions ``line`` ``end_line`` ``column`` ``end_column``
absolute filename/URI; and one-based int positions ``line`` ``end_line`` ``column``
``end_column``
"""


Expand Down
35 changes: 33 additions & 2 deletions WDL/Tree.py
Original file line number Diff line number Diff line change
Expand Up @@ -1135,6 +1135,12 @@ def visit(node: SourceNode) -> None:
return self._nodes_by_id[workflow_node_id]


SourceComment = NamedTuple("SourceComment", [("pos", Error.SourcePosition), ("text", str)])
"""
Position and text of a comment. The text includes the ``#`` and any preceding or trailing
spaces/tabs.
"""

DocImport = NamedTuple(
"DocImport",
[
Expand All @@ -1146,7 +1152,8 @@ def visit(node: SourceNode) -> None:
],
)
"""
Represents one imported document, with position of the import statement, import URI, namespace, struct type aliases, and (after typechecking) the ``Document`` object.
Represents one imported document, with position of the import statement, import URI, namespace,
struct type aliases, and (after typechecking) the ``Document`` object.
"""


Expand All @@ -1163,6 +1170,23 @@ class Document(SourceNode):
Original WDL source code text
"""

source_lines: List[str]
"""
:type: List[str]
Original WDL source code text split by newlines. ``SourcePosition`` line numbers are
one-based, so line number ``L`` corresponds to ``source_lines[L-1]``.
"""

source_comments: List[Optional[SourceComment]]
"""
:type: List[Optional[SourceComment]]
Lookup table for source code comments. ``source_comments`` has the same length as
``source_lines``, and each entry is the :class:`WDL.Tree.SourceComment` found on the
corresponding source line, or ``None`` if the line has no comment.
"""

imports: List[DocImport]
"""
:type: List[DocImport]
Expand All @@ -1183,15 +1207,22 @@ def __init__(
struct_typedefs: Dict[str, StructTypeDef],
tasks: List[Task],
workflow: Optional[Workflow],
comments: List[SourceComment],
) -> None:
super().__init__(pos)
self.source_text = source_text
self.imports = imports
self.struct_typedefs = Env.Bindings()
for name, struct_typedef in struct_typedefs.items():
self.struct_typedefs = self.struct_typedefs.bind(name, struct_typedef)
self.tasks = tasks
self.workflow = workflow
self.source_text = source_text
self.source_lines = source_text.split("\n")
self.source_comments = [None for _ in self.source_lines]
for comment in comments:
assert self.source_comments[comment.pos.line - 1] is None
assert self.source_lines[comment.pos.line - 1].endswith(comment.text)
self.source_comments[comment.pos.line - 1] = comment

@property
def children(self) -> Iterable[SourceNode]:
Expand Down
1 change: 1 addition & 0 deletions WDL/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
Document,
WorkflowNode,
WorkflowSection,
SourceComment,
)
from . import runtime

Expand Down
16 changes: 9 additions & 7 deletions WDL/_grammar.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,16 +89,17 @@
CNAME: /[a-zA-Z][a-zA-Z0-9_]*/
COMMENT: "#" /[^\r\n]*/ NEWLINE
COMMENT: /[ \t]*/ "#" /[^\r\n]*/
SPACE: /[ \t]+/
%import common.INT
%import common.SIGNED_INT
%import common.FLOAT
%import common.SIGNED_FLOAT
%import common.ESCAPED_STRING
%import common.WS
%import common.NEWLINE
%ignore WS
%ignore SPACE
%ignore NEWLINE
%ignore COMMENT
///////////////////////////////////////////////////////////////////////////////////////////////////
Expand Down Expand Up @@ -475,11 +476,12 @@
///////////////////////////////////////////////////////////////////////////////////////////////////
%import common.NEWLINE
COMMENT: "#" /[^\r\n]*/ NEWLINE
%ignore COMMENT
SPACE: /[ \t]+/
COMMENT: /[ \t]*/ "#" /[^\r\n]*/
%import common.WS
%ignore WS
%ignore SPACE
%ignore NEWLINE
%ignore COMMENT
"""
keywords["development"] = set(
"Array Float Int Map None Pair String alias as call command else false if import input left meta object output parameter_meta right runtime scatter struct task then true workflow".split(
Expand Down
68 changes: 51 additions & 17 deletions WDL/_parser.py
Original file line number Diff line number Diff line change
@@ -1,20 +1,31 @@
# pylint: skip-file
import inspect
from typing import List, Optional, Set
import threading
from typing import List, Optional, Set, Tuple
import lark
from .Error import SourcePosition
from . import Error, Tree, Type, Expr, _grammar

# memoize Lark parsers constructed for version & start symbol
_lark_cache = {}


def parse(grammar: str, txt: str, start: str) -> lark.Tree:
if (grammar, start) not in _lark_cache:
_lark_cache[(grammar, start)] = lark.Lark(
grammar, start=start, parser="lalr", propagate_positions=True
)
return _lark_cache[(grammar, start)].parse(txt + ("\n" if not txt.endswith("\n") else ""))
_lark_comments_buffer = []
_lark_lock = threading.Lock()


def parse(grammar: str, txt: str, start: str) -> Tuple[lark.Tree, List[lark.Token]]:
with _lark_lock:
if (grammar, start) not in _lark_cache:
_lark_cache[(grammar, start)] = lark.Lark(
grammar,
start=start,
parser="lalr",
propagate_positions=True,
lexer_callbacks={"COMMENT": _lark_comments_buffer.append},
)
tree = _lark_cache[(grammar, start)].parse(txt + ("\n" if not txt.endswith("\n") else ""))
comments = _lark_comments_buffer.copy()
_lark_comments_buffer.clear()
return (tree, comments)


def to_int(x):
Expand Down Expand Up @@ -228,11 +239,15 @@ class _DocTransformer(_ExprTransformer, _TypeTransformer):

_keywords: Set[str]
_source_text: str
_comments: List[lark.Token]

def __init__(self, source_text: str, keywords: Set[str], *args, **kwargs):
def __init__(
self, source_text: str, keywords: Set[str], comments: List[lark.Token], *args, **kwargs
):
super().__init__(*args, **kwargs)
self._source_text = source_text
self._keywords = keywords
self._comments = comments

def _check_keyword(self, pos, name):
if name in self._keywords:
Expand Down Expand Up @@ -499,7 +514,24 @@ def document(self, items, meta):
imports.append(item)
else:
assert False
return Tree.Document(self._source_text, self._sp(meta), imports, structs, tasks, workflow)
comments = [
Tree.SourceComment(
SourcePosition(
uri=self.uri,
abspath=self.abspath,
line=comment.line,
column=comment.column,
end_line=comment.end_line or comment.line,
end_column=comment.end_column or (comment.column + len(comment.value)),
),
text=comment.value,
)
for comment in self._comments
]

return Tree.Document(
self._source_text, self._sp(meta), imports, structs, tasks, workflow, comments
)


# have lark pass the 'meta' with line/column numbers to each transformer method
Expand All @@ -511,7 +543,7 @@ def document(self, items, meta):

def parse_expr(txt: str, version: Optional[str] = None) -> Expr.Base:
try:
return _ExprTransformer().transform(parse(_grammar.get(version)[0], txt, "expr"))
return _ExprTransformer().transform(parse(_grammar.get(version)[0], txt, "expr")[0])
except lark.exceptions.UnexpectedInput as exn:
pos = SourcePosition(
uri="(buffer)",
Expand All @@ -529,8 +561,9 @@ def parse_expr(txt: str, version: Optional[str] = None) -> Expr.Base:
def parse_tasks(txt: str, version: Optional[str] = None) -> List[Tree.Task]:
try:
(grammar, keywords) = _grammar.get(version)
return _DocTransformer(source_text=txt, keywords=keywords).transform(
parse(grammar, txt, "tasks")
raw_ast, comments = parse(grammar, txt, "tasks")
return _DocTransformer(source_text=txt, keywords=keywords, comments=comments).transform(
raw_ast
)
except lark.exceptions.VisitError as exn:
raise exn.__context__
Expand All @@ -541,7 +574,7 @@ def parse_document(
) -> Tree.Document:
npos = SourcePosition(uri=uri, abspath=abspath, line=0, column=0, end_line=0, end_column=0)
if not txt.strip():
return Tree.Document(txt, npos, [], {}, [], None,)
return Tree.Document(txt, npos, [], {}, [], None, [])
if version is None:
# for now assume the version is 1.0 if the first line is "version <number>"
# otherwise draft-2
Expand All @@ -557,9 +590,10 @@ def parse_document(
except KeyError:
raise Error.SyntaxError(npos, "unknown WDL version " + version) from None
try:
raw_ast, comments = parse(grammar, txt, "document")
return _DocTransformer(
source_text=txt, uri=uri, abspath=abspath, keywords=keywords
).transform(parse(grammar, txt, "document"))
source_text=txt, uri=uri, abspath=abspath, keywords=keywords, comments=comments
).transform(raw_ast)
except lark.exceptions.UnexpectedInput as exn:
pos = SourcePosition(
uri=(uri if uri else "(buffer)"),
Expand Down
15 changes: 6 additions & 9 deletions examples/paste_wdl_imports.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,9 +46,6 @@ def main():
# run SetParents to facilitate getting from a called task to its containing document
WDL.Walker.SetParents()(doc)

# original document lines for surgery
doc_lines = doc.source_text.split("\n")

# for each call
tasks_processed = set()
for call in calls(doc.workflow):
Expand All @@ -57,30 +54,30 @@ def main():
new_task_name = "__".join(call.callee_id)
assert isinstance(call.callee, WDL.Task), "can't import sub-workflows"
# rewrite the call with the new task name
doc_lines[call.pos.line - 1] = rewrite_line(
doc_lines[call.pos.line - 1],
doc.source_lines[call.pos.line - 1] = rewrite_line(
doc.source_lines[call.pos.line - 1],
"call",
f"{new_task_name} as {call.name}",
old_name="[0-9A-Za-z_\\.]+(\\s+as\\s+[0-9A-Za-z_]+)?",
)
if new_task_name not in tasks_processed:
task_lines = task_source_lines(call.callee)
task_lines[0] = rewrite_line(task_lines[0], "task", new_task_name)
doc_lines += ["\n"] + task_lines + ["\n"]
doc.source_lines += ["\n"] + task_lines + ["\n"]
tasks_processed.add(new_task_name)

# blank out the imports
for imp in doc.imports:
for ln in range(imp.pos.line - 1, imp.pos.end_line):
doc_lines[ln] = ""
doc.source_lines[ln] = ""

# print output
if args.o:
with open(args.o, "w") as outfile:
for line in doc_lines:
for line in doc.source_lines:
print(line, file=outfile)
else:
for line in doc_lines:
for line in doc.source_lines:
print(line)


Expand Down
8 changes: 8 additions & 0 deletions stubs/lark/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,14 @@

from typing import Any

class Token:
value: str
line: int
end_line: int
column: int
end_column: int
...

class Transformer:
def transform(self,tree) -> Any:
...
Expand Down
62 changes: 34 additions & 28 deletions tests/test_1doc.py
Original file line number Diff line number Diff line change
Expand Up @@ -502,43 +502,49 @@ def test_unify(self):

class TestDoc(unittest.TestCase):
def test_count_foo(self):
doc = r"""
workflow count_lines_matching {
call grep
call count_lines {
input:
in = grep.out
}
}
task grep {
File in
String pattern
doc = r"""#foo
workflow count_lines_matching {
call grep
call count_lines {
input:
in = grep.out # bar
}
}
task grep {
File in
String pattern
command {
grep ${pattern} ${in} > ans
}
command {
grep ${pattern} ${in} > ans
}
output {
File out = "ans"
}
}
task count_lines {
File in
output {
File out = "ans"
}
}
#baz
task count_lines {
File in
command {
wc -l ${in}
}
command {
wc -l ${in}
}
output {
Int out = read_int(stdout())
}
}
"""
output {
Int out = read_int(stdout())
}
}
#bas
"""
doc = WDL.parse_document(doc)
self.assertIsInstance(doc.workflow, WDL.Tree.Workflow)
self.assertEqual(len(doc.workflow.body), 2)
self.assertEqual(len(doc.tasks), 2)
doc.typecheck()
self.assertEqual(doc.source_comments[0].text, "#foo")
self.assertEqual(doc.source_comments[5].text, " # bar")
self.assertEqual(doc.source_comments[20].text, " #baz ")
self.assertEqual(doc.source_comments[32].text, " #bas ")

def test_bam_chrom_counter(self):
doc = r"""
Expand Down

0 comments on commit 2ab7c4b

Please sign in to comment.