Skip to content

Commit

Permalink
Reduce task digest sensitivity to comments & whitespace
Browse files Browse the repository at this point in the history
  • Loading branch information
mlin committed Jul 24, 2020
1 parent 36f442e commit d300543
Show file tree
Hide file tree
Showing 2 changed files with 66 additions and 14 deletions.
48 changes: 37 additions & 11 deletions WDL/runtime/cache.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
"""
import hashlib
import json
import itertools
import os
import logging
from pathlib import Path
Expand Down Expand Up @@ -223,9 +224,11 @@ def _describe_task(doc, task: Tree.Task) -> str:
output_lines.append(f"# {struct_name} :: {structs[struct_name]}")

# excerpt task{} from document
output_lines += _excerpt(doc, task.pos)

# TODO (?): delete non-semantic whitespace, perhaps excise the meta & parameter_meta sections
# Possible future improvements:
# excise the meta & parameter_meta sections
# normalize order of declarations
# normalize whitespace within lines (not leading/trailing)
output_lines += _excerpt(doc, task.pos, [task.command.pos])

return "\n".join(output_lines).strip()

Expand All @@ -250,15 +253,38 @@ def _describe_struct_types(task: Tree.Task) -> Dict[str, str]:
return structs


def _excerpt(doc: Tree.Document, pos: Error.SourcePosition) -> List[str]:
def _excerpt(
doc: Tree.Document, pos: Error.SourcePosition, literals: List[Error.SourcePosition]
) -> List[str]:
"""
Excerpt the document's source lines indicated by pos : WDL.SourcePosition
TODO (?): delete comments from the source lines
Excerpt the document's source lines indicated by pos : WDL.SourcePosition. Delete comments,
blank lines, and leading/trailing whitespace from each line -- except those indicated by
literals.
"""

def clean(line: int, column: int = 1, end_column: Optional[int] = None) -> List[str]:
literal = next(
(True for lit in literals if line >= lit.line and line <= lit.end_line), False
)
comment = doc.source_comments[line - 1]
if comment and not literal:
assert comment.pos.line == line
if end_column is None:
end_column = comment.pos.column
else:
end_column = min(end_column, comment.pos.column)
txt = doc.source_lines[line-1][(column - 1) : end_column]
if literal:
return [txt]
txt = txt.strip()
return [txt] if txt else []

if pos.end_line == pos.line:
return [doc.source_lines[pos.line - 1][(pos.column - 1) : pos.end_column]]
return (
[doc.source_lines[pos.line - 1][(pos.column - 1) :]]
+ doc.source_lines[pos.line : (pos.end_line - 1)]
+ [doc.source_lines[pos.end_line - 1][: pos.end_column]]
return clean(pos.line, pos.column, pos.end_column)
return list(
itertools.chain(
clean(pos.line, pos.column),
*(clean(line_nr) for line_nr in range(pos.line+1, pos.end_line)),
clean(pos.end_line, 0, pos.end_column),
)
)
32 changes: 29 additions & 3 deletions tests/test_8cache.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,16 +15,22 @@ class TestTaskRunner(unittest.TestCase):
test_wdl: str = R"""
version 1.0
task hello_blank {
input {
String who
# comment1
input {
String who # comment2
Array[String]? what
Map[String,Map[String,String]]? where
}
}
command <<<
# comment3
echo "Hello, ~{who}!"
>>>
output {
Int count = 12
}
}
"""
Expand Down Expand Up @@ -123,6 +129,26 @@ def test_input_digest_sorts_keys(self):
unordered_digest = CallCache(cfg=self.cfg, logger=self.logger).get_digest_for_inputs(unordered_inputs)
self.assertEqual(ordered_digest, unordered_digest)

def test_normalization(self):
desc = WDL.runtime.cache._describe_task(self.doc, self.doc.tasks[0])
self.assertEqual(desc, R"""
version 1.0
task hello_blank {
input {
String who
Array[String]? what
Map[String,Map[String,String]]? where
}
command <<<
# comment3
echo "Hello, ~{who}!"
>>>
output {
Int count = 12
}
}
""".strip())

def test_task_input_cache_matches_output(self):
# run task, check output matches what was stored in run_dir
cache = CallCache(cfg=self.cfg, logger=self.logger)
Expand Down

0 comments on commit d300543

Please sign in to comment.