Skip to content

Commit

Permalink
refactoring load() to permit overriding file-reading logic (#205)
Browse files Browse the repository at this point in the history
* replaces the `import_uri` callback with a more general override for reading the WDL souce code 
* `SourcePosition.filename` is replaced with `SourcePosition.uri` and `SourcePosition.abspath`. The former field is the uri as given to `load()` or in the WDL import statement. The latter field is the absolute filename after resolution of relative uri.
* simplify and expose relative import resolution logic
  • Loading branch information
mlin committed Jul 16, 2019
1 parent 9dacf83 commit 61f58eb
Show file tree
Hide file tree
Showing 7 changed files with 295 additions and 180 deletions.
28 changes: 15 additions & 13 deletions WDL/CLI.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,7 @@ def check(uri=None, path=None, check_quant=True, shellcheck=True, **kwargs):
Lint._shellcheck_available = False

for uri1 in uri or []:
doc = load(uri1, path or [], check_quant=check_quant, import_uri=import_uri)
doc = load(uri1, path or [], check_quant=check_quant, read_source=read_source)

Lint.lint(doc)

Expand Down Expand Up @@ -218,9 +218,7 @@ def print_error(exn):
else:
if isinstance(getattr(exn, "pos", None), SourcePosition):
print(
"({} Ln {} Col {}) {}".format(
exn.pos.filename, exn.pos.line, exn.pos.column, str(exn)
),
"({} Ln {} Col {}) {}".format(exn.pos.uri, exn.pos.line, exn.pos.column, str(exn)),
file=sys.stderr,
)
else:
Expand Down Expand Up @@ -249,10 +247,14 @@ def print_error(exn):
quant_warning = True


def import_uri(uri):
dn = tempfile.mkdtemp(prefix="miniwdl_import_uri_")
subprocess.check_call(["wget", "-nv", uri], cwd=dn)
return glob.glob(dn + "/*")[0]
async def read_source(uri, path, importer_uri):
if uri.startswith("http:") or uri.startswith("https:"):
dn = tempfile.mkdtemp(prefix="miniwdl_import_uri_")
subprocess.check_call(["wget", "-nv", uri], cwd=dn)
fn = glob.glob(dn + "/*")[0]
with open(fn, "r") as infile:
return ReadSourceResult(infile.read(), os.path.abspath(fn))
return await read_source_default(uri, path, importer_uri)


def fill_run_subparser(subparsers):
Expand Down Expand Up @@ -301,7 +303,7 @@ def runner(
uri, inputs, input_file, json_only, empty, check_quant, rundir=None, path=None, **kwargs
):
# load WDL document
doc = load(uri, path or [], check_quant=check_quant, import_uri=import_uri)
doc = load(uri, path or [], check_quant=check_quant, read_source=read_source)

# validate the provided inputs and prepare Cromwell-style JSON
target, input_env, input_json = runner_input(doc, inputs, input_file, empty)
Expand All @@ -323,7 +325,7 @@ def runner(
except Error.EvalError as exn:
print(
"({} Ln {} Col {}) {}, {}".format(
exn.pos.filename, exn.pos.line, exn.pos.column, exn.__class__.__name__, str(exn)
exn.pos.uri, exn.pos.line, exn.pos.column, exn.__class__.__name__, str(exn)
),
file=sys.stderr,
)
Expand All @@ -335,7 +337,7 @@ def runner(
pos = getattr(exn.__cause__, "pos")
print(
"({} Ln {} Col {}) {}, {}".format(
pos.filename,
pos.uri,
pos.line,
pos.column,
exn.__cause__.__class__.__name__,
Expand Down Expand Up @@ -364,7 +366,7 @@ def runner_input_completer(prefix, parsed_args, **kwargs):
argcomplete.warn("file not found: " + uri)
return []
try:
doc = load(uri, parsed_args.path, parsed_args.check_quant, import_uri=import_uri)
doc = load(uri, parsed_args.path, parsed_args.check_quant, read_source=read_source)
except Exception as exn:
argcomplete.warn(
"unable to load {}; try 'miniwdl check' on it ({})".format(uri, str(exn))
Expand Down Expand Up @@ -699,7 +701,7 @@ def cromwell(
path = path or []

# load WDL document
doc = load(uri, path, check_quant=check_quant, import_uri=import_uri)
doc = load(uri, path, check_quant=check_quant, read_source=read_source)

# validate the provided inputs and prepare Cromwell-style JSON
target, _, input_json = runner_input(doc, inputs, input_file, empty)
Expand Down
27 changes: 20 additions & 7 deletions WDL/Error.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,24 @@
from . import Type


SourcePosition = NamedTuple(
"SourcePosition",
[("filename", str), ("line", int), ("column", int), ("end_line", int), ("end_column", int)],
)
"""Source file, line, and column, attached to each AST node"""
class SourcePosition(
NamedTuple(
"SourcePosition",
[
("uri", str),
("abspath", str),
("line", int),
("column", int),
("end_line", int),
("end_column", int),
],
)
):
"""
Source position attached to AST nodes and exceptions; NamedTuple of ``uri`` the filename/URI
passed to :func:`WDL.load` or a WDL import statement, which may be relative; ``abspath`` the
absolute filename/URI; and int positions ``line`` ``end_line`` ``column`` ``end_column``
"""


class SyntaxError(Exception):
Expand Down Expand Up @@ -53,13 +66,13 @@ def __init__(self, pos: SourcePosition) -> None:
def __lt__(self, rhs: TVSourceNode) -> bool:
if isinstance(rhs, SourceNode):
return (
self.pos.filename,
self.pos.abspath,
self.pos.line,
self.pos.column,
self.pos.end_line,
self.pos.end_column,
) < (
rhs.pos.filename,
rhs.pos.abspath,
rhs.pos.line,
rhs.pos.column,
rhs.pos.end_line,
Expand Down
9 changes: 6 additions & 3 deletions WDL/Lint.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,8 @@
assert isinstance(pos, WDL.SourcePosition)
assert isinstance(lint_class, str) and isinstance(message, str)
print(json.dumps({
"filename" : pos.filename,
"uri" : pos.uri,
"abspath" : pos.abspath,
"line" : pos.line,
"end_line" : pos.end_line,
"column" : pos.column,
Expand Down Expand Up @@ -816,7 +817,8 @@ def task(self, obj: Tree.Task) -> Any:
obj,
"SC{} {}".format(item["code"], item["message"]),
Error.SourcePosition(
filename=obj.command.pos.filename,
uri=obj.command.pos.uri,
abspath=obj.command.pos.abspath,
line=line,
column=column,
end_line=line,
Expand Down Expand Up @@ -861,7 +863,8 @@ def task(self, obj: Tree.Task) -> Any:
obj,
"command indented with both spaces & tabs",
Error.SourcePosition(
filename=obj.command.pos.filename,
uri=obj.command.pos.uri,
abspath=obj.command.pos.abspath,
line=obj.command.pos.line + ofs,
column=1,
end_line=obj.command.pos.line + ofs,
Expand Down
111 changes: 80 additions & 31 deletions WDL/Tree.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
import os
import errno
import itertools
import asyncio
from typing import (
Any,
List,
Expand All @@ -24,6 +25,7 @@
Generator,
Set,
NamedTuple,
Awaitable,
)
from abc import ABC, abstractmethod
from .Error import SourcePosition, SourceNode
Expand Down Expand Up @@ -1255,54 +1257,79 @@ def typecheck(self, check_quant: bool = True) -> None:
self.workflow.typecheck(self, check_quant=check_quant)


def load(
uri: str,
path: Optional[List[str]] = None,
check_quant: bool = True,
import_uri: Optional[Callable[[str], str]] = None,
import_max_depth: int = 10,
source_text: Optional[str] = None,
) -> Document:
path = path or []
if source_text is None:
if uri.startswith("file://"):
uri = uri[7:]
elif uri.find("://") > 0 and import_uri:
uri = import_uri(uri)
# search cwd and path for an extant file
fn = next(
async def resolve_file_import(uri: str, path: List[str], importer: Optional[Document]) -> str:
if uri.startswith("http://") or uri.startswith("https://"):
# for now we do nothing with web URIs
return uri
if uri.startswith("file:///"):
uri = uri[7:]
if os.path.isabs(uri):
# given an already-absolute filename, just normalize it
ans = os.path.abspath(uri)
else:
# resolving a relative import: before searching the user-provided path directories, try the
# directory of the importing document (if any), or the current working directory
# (otherwise)
path = path + [os.path.dirname(importer.pos.abspath) if importer else os.getcwd()]
ans = next(
(
fn
for fn in ([uri] + [os.path.join(dn, uri) for dn in reversed(path)])
if os.path.exists(fn)
for fn in (os.path.abspath(os.path.join(dn, uri)) for dn in reversed(path))
if os.path.isfile(fn)
),
None,
)
if not fn:
raise FileNotFoundError(errno.ENOENT, os.strerror(errno.ENOENT), uri)
# read the document source text
with open(fn, "r") as infile:
source_text = infile.read()
path = path + [os.path.dirname(fn)]
if ans and os.path.isfile(ans):
return ans
raise FileNotFoundError(errno.ENOENT, os.strerror(errno.ENOENT), uri)


ReadSourceResult = NamedTuple("ReadSourceResult", [("source_text", str), ("abspath", str)])


async def read_source_default(
uri: str, path: List[str], importer: Optional[Document]
) -> ReadSourceResult:
abspath = await resolve_file_import(uri, path, importer)
# TODO: actual async read
with open(abspath, "r") as infile:
return ReadSourceResult(source_text=infile.read(), abspath=abspath)


async def load_async(
uri: str,
path: Optional[List[str]] = None,
check_quant: bool = True,
read_source: Optional[
Callable[[str, List[str], Optional[Document]], Awaitable[ReadSourceResult]]
] = None,
import_max_depth: int = 10,
importer: Optional[Document] = None,
) -> Document:
path = list(path) if path is not None else []
read_source = read_source or read_source_default
read_rslt = await read_source(uri, path, importer)
# parse the document
doc = _parser.parse_document(source_text, uri=uri)
assert isinstance(doc, Document)
doc = _parser.parse_document(read_rslt.source_text, uri=uri, abspath=read_rslt.abspath)
assert doc.pos.uri == uri and doc.pos.abspath.endswith(os.path.basename(doc.pos.uri))
# recursively descend into document's imports, and store the imported
# documents into doc.imports
# TODO: are we supposed to do something smart for relative imports
# within a document loaded by URI?
# TODO: concurrent imports
for i in range(len(doc.imports)):
imp = doc.imports[i]
if import_max_depth <= 1:
raise Error.ImportError(
imp.pos, imp.uri, "exceeded import_max_depth; circular imports?"
)
try:
subdoc = load(
subdoc = await load_async(
imp.uri,
path,
path=path,
check_quant=check_quant,
import_uri=import_uri,
read_source=read_source,
importer=doc,
import_max_depth=(import_max_depth - 1),
)
except Exception as exn:
Expand All @@ -1313,16 +1340,38 @@ def load(
try:
doc.typecheck(check_quant=check_quant)
except Error.ValidationError as exn:
exn.source_text = source_text
exn.source_text = read_rslt.source_text
raise exn
except Error.MultipleValidationErrors as multi:
for exn in multi.exceptions:
if not exn.source_text:
exn.source_text = source_text
exn.source_text = read_rslt.source_text
raise multi
return doc


def load(
uri: str,
path: Optional[List[str]] = None,
check_quant: bool = True,
read_source: Optional[
Callable[[str, List[str], Optional[Document]], Awaitable[ReadSourceResult]]
] = None,
import_max_depth: int = 10,
importer: Optional[Document] = None,
) -> Document:
return asyncio.get_event_loop().run_until_complete(
load_async(
uri,
path=path,
importer=importer,
check_quant=check_quant,
read_source=read_source,
import_max_depth=import_max_depth,
)
)


#
# Typechecking helper functions
#
Expand Down
Loading

0 comments on commit 61f58eb

Please sign in to comment.