Skip to content

Commit

Permalink
feat: ✨ improved jmespath transformation
Browse files Browse the repository at this point in the history
* fixed the transformation at root level
* added unique and unique_count custom functions
  • Loading branch information
newgene committed Aug 25, 2023
1 parent 5d4ff4d commit 7682a53
Show file tree
Hide file tree
Showing 5 changed files with 98 additions and 17 deletions.
36 changes: 36 additions & 0 deletions biothings/utils/jmespath.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
"""
We can define jmespath custom functions here
See: https://jmespath.org/tutorial.html#custom-functions
from biothings.utils.jmespath import options as jmp_options
jmespath.search("unique(`foo`)", {}, options=jmp_options)
or
jmespath.compile("unique(`foo`)").search({}, options=jmp_options)
"""
import jmespath
from jmespath import functions


class CustomFunctions(functions.Functions):
"""Create a subclass of functions.Functions.
The function.Functions base class has logic
that introspects all of its methods and automatically
registers your custom functions in its function table.
"""

@functions.signature({"types": ["array"]})
def _func_unique(self, arr):
"""return a list of unique values in an array"""
return sorted(set(arr))

@functions.signature({"types": ["array"]})
def _func_unique_count(self, arr):
"""return the number of unique values in an array"""
return len(set(arr))


# pass this jmespath_options to search to use custom functions
options = jmespath.Options(custom_functions=CustomFunctions())
11 changes: 9 additions & 2 deletions biothings/web/options/manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -122,13 +122,20 @@ def translate(self, value):
value = re.sub(pattern, repl, value)

if self.keyword == "jmespath" and value:
# processing jmespath parameter to be a tuple of (target_field_path, jmes_query)
# processing jmespath parameter to be a tuple of (parent_path, target_field, jmes_query)
try:
target_field_path, jmes_query = value.split("|", maxsplit=1)
jmes_query = jmespath.compile(jmes_query)
except ValueError as err: # JMES exeptions are subclasses of ValueError
raise OptionError(keyword=self.keyword, reason="Invalid value for jmespath parameter", details=str(err))
value = target_field_path, jmes_query
# now split target_field_path into parent_path and target_field
target_field_path = target_field_path or "." # set to root field if not provided
try:
parent_path, target_field = target_field_path.rsplit(".", maxsplit=1)
except ValueError:
# if no . in the path, it means the target field is the root field
parent_path, target_field = "", target_field_path
value = parent_path, target_field, jmes_query

return value

Expand Down
17 changes: 8 additions & 9 deletions biothings/web/query/formatter.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
from collections import UserDict, defaultdict

from biothings.utils.common import dotdict, traverse
from biothings.utils.jmespath import options as jmp_options


class FormatterDict(UserDict):
Expand Down Expand Up @@ -411,25 +412,23 @@ def trasform_jmespath(self, path: str, doc, options) -> None:
"""
# options.jmespath is already validated and processed as a tuple
# see biothings.web.options.manager.Coverter.translate
target_field_path, jmes_query = options.jmespath
target_field_path = target_field_path or "." # set to root field if not provided
try:
parent_path, target_field = target_field_path.rsplit(".", maxsplit=1)
except ValueError:
# if no . in the path, it means the target field is the root field
parent_path, target_field = "", target_field_path
parent_path, target_field, jmes_query = options.jmespath

if path == parent_path:
# we handle jmespath transformation at its parent field level,
# so that we can set a transformed value
target_field_value = doc.get(target_field) if target_field else doc
if target_field_value:
transformed_field_value = jmes_query.search(target_field_value)
# pass jmp_options to include our own custom jmespath functions
transformed_field_value = jmes_query.search(target_field_value, options=jmp_options)
if target_field:
doc[target_field] = transformed_field_value
else:
# if the target field is the root field, we need to replace the whole doc
doc = transformed_field_value
# note that we cannot use `doc = transformed_field_value` here, because
# it will break the reference to the original doc object
doc.clear()
doc.update(transformed_field_value)

def transform_aggs(self, res):
"""
Expand Down
27 changes: 27 additions & 0 deletions tests/utils/test_jmespath.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
import jmespath
import pytest
from jmespath.exceptions import UnknownFunctionError

from biothings.utils.jmespath import options as jmp_options


def test_customfunction_unique():
doc = {
"foo": ["a", "b", "c", "e", "e", "c", "d", "a"],
}
# without passing jmp_options, it should raise UnknownFunctionError
# this test should tell us if we accidentally override a build-in function
with pytest.raises(UnknownFunctionError):
jmespath.search("foo|unique(@)", doc)

assert jmespath.search("foo|unique(@)", doc, options=jmp_options) == ["a", "b", "c", "d", "e"]


def test_customfunction_unique_count():
doc = ["a", "b", "c", "e", "e", "c", "d", "a"]
# without passing jmp_options, it should raise UnknownFunctionError
# this test should tell us if we accidentally override a build-in function
with pytest.raises(UnknownFunctionError):
jmespath.search("unique_count(@)", doc)

assert jmespath.compile("unique_count(@)").search(doc, options=jmp_options) == 5
24 changes: 18 additions & 6 deletions tests/web/options/test_translate.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,19 +50,31 @@ def test_jmespath():
cvt = Converter(keyword="jmespath")

# a simple example
target_field_path, jmes_query = cvt.translate("tags|[?name==`Metadata`]")
assert target_field_path == "tags"
parent_path, target_field, jmes_query = cvt.translate("tags|[?name==`Metadata`]")
assert parent_path == ""
assert target_field == "tags"
assert isinstance(jmes_query, jmespath.parser.ParsedResult)
assert jmes_query.expression == "[?name==`Metadata`]"

# a more complex example
target_field_path, jmes_query = cvt.translate("aaa.bbb|[?(sub_a==`val_a`||sub_a==`val_aa`)&&sub_b==`val_b`]")
assert target_field_path == "aaa.bbb"
parent_path, target_field, jmes_query = cvt.translate(
"aaa.bbb|[?(sub_a==`val_a`||sub_a==`val_aa`)&&sub_b==`val_b`]"
)
assert parent_path == "aaa"
assert target_field == "bbb"
assert isinstance(jmes_query, jmespath.parser.ParsedResult)

# target_field_path can be empty if it operates on the root object
target_field_path, jmes_query = cvt.translate("|b")
assert target_field_path == ""
parent_path, target_field, jmes_query = cvt.translate("|b")
assert parent_path == ""
assert target_field == ""
assert isinstance(jmes_query, jmespath.parser.ParsedResult)
assert jmes_query.expression == "b"

# target_field_path can also be . if it operates on the root object
parent_path, target_field, jmes_query = cvt.translate(".|b")
assert parent_path == ""
assert target_field == ""
assert isinstance(jmes_query, jmespath.parser.ParsedResult)
assert jmes_query.expression == "b"

Expand Down

0 comments on commit 7682a53

Please sign in to comment.