Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
87 changes: 50 additions & 37 deletions jsonpath_rfc9535/segments.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,15 +69,8 @@ class JSONPathRecursiveDescentSegment(JSONPathSegment):

def resolve(self, nodes: Iterable[JSONPathNode]) -> Iterable[JSONPathNode]:
"""Select descendants of each node in _nodes_."""
# The nondeterministic visitor never generates a pre order traversal, so we
# still use the deterministic visitor 20% of the time, to cover all
# permutations.
#
# XXX: This feels like a bit of a hack.
visitor = (
self._nondeterministic_visit
if self.env.nondeterministic and random.random() < 0.8 # noqa: S311, PLR2004
else self._visit
self._nondeterministic_visit if self.env.nondeterministic else self._visit
)

for node in nodes:
Expand Down Expand Up @@ -114,51 +107,51 @@ def _visit(self, node: JSONPathNode, depth: int = 1) -> Iterable[JSONPathNode]:
def _nondeterministic_visit(
self,
root: JSONPathNode,
_: int = 1,
depth: int = 1,
) -> Iterable[JSONPathNode]:
def _children(node: JSONPathNode) -> Iterable[JSONPathNode]:
if isinstance(node.value, dict):
items = list(node.value.items())
random.shuffle(items)
for name, val in items:
if isinstance(val, (dict, list)):
yield JSONPathNode(
value=val,
location=node.location + (name,),
root=node.root,
)
elif isinstance(node.value, list):
for i, element in enumerate(node.value):
if isinstance(element, (dict, list)):
yield JSONPathNode(
value=element,
location=node.location + (i,),
root=node.root,
)

"""Nondeterministic node traversal."""
# (node, depth) tuples
queue: Deque[Tuple[JSONPathNode, int]] = deque()

yield root # visit the root node
queue.extend([(child, 1) for child in _children(root)]) # queue root's children
# Visit the root node
yield root

# Queue root's children
queue.extend([(child, depth) for child in _nondeterministic_children(root)])

while queue:
_node, depth = queue.popleft()
node, depth = queue.popleft()
yield node

if depth >= self.env.max_recursion_depth:
raise JSONPathRecursionError(
"recursion limit exceeded", token=self.token
)

yield _node

# Visit child nodes now or queue them for later?
# Randomly choose to visit child nodes now or queue them for later?
visit_children = random.choice([True, False]) # noqa: S311

for child in _children(_node):
for child in _nondeterministic_children(node):
if visit_children:
yield child
queue.extend([(child, depth + 2) for child in _children(child)])

# Queue grandchildren by randomly interleaving them into the
# queue while maintaining queue and grandchild order.
grandchildren = [
(child, depth + 2)
for child in _nondeterministic_children(child)
]

queue = deque(
[
next(n)
for n in random.sample(
[iter(queue)] * len(queue)
+ [iter(grandchildren)] * len(grandchildren),
len(queue) + len(grandchildren),
)
]
)
else:
queue.append((child, depth + 1))

Expand All @@ -174,3 +167,23 @@ def __eq__(self, __value: object) -> bool:

def __hash__(self) -> int:
return hash(("..", self.selectors, self.token))


def _nondeterministic_children(node: JSONPathNode) -> Iterable[JSONPathNode]:
"""Yield children of _node_ with nondeterministic object/dict iteration."""
if isinstance(node.value, dict):
items = list(node.value.items())
random.shuffle(items)
for name, val in items:
yield JSONPathNode(
value=val,
location=node.location + (name,),
root=node.root,
)
elif isinstance(node.value, list):
for i, element in enumerate(node.value):
yield JSONPathNode(
value=element,
location=node.location + (i,),
root=node.root,
)
28 changes: 18 additions & 10 deletions jsonpath_rfc9535/utils/nondeterministic_descent.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,30 +121,38 @@ def breadth_first_visit(node: AuxNode) -> Iterable[AuxNode]:


def nondeterministic_visit(root: AuxNode) -> Iterable[AuxNode]:
"""Generate nodes rooted at _node_ from a nondeterministic traversal.

This tree visitor will never produce nodes in depth-first pre-order, so
use `pre_order_visit` in addition to `nondeterministic_visit` to get all
permutations. Or use `all_perms()`.
"""
queue: Deque[AuxNode] = deque(root.children)
"""Generate nodes rooted at _node_ from a nondeterministic traversal."""
yield root
queue: Deque[AuxNode] = deque(root.children)

while queue:
_node = queue.popleft()
yield _node
# Visit child nodes now or queue them for later?
# Randomly choose to visit child nodes now or queue them for later?
visit_children = random.choice([True, False])
for child in _node.children:
if visit_children:
yield child
queue.extend(child.children)

# Queue grandchildren by randomly interleaving them into the
# queue while maintaining queue and grandchild order.
grandchildren = child.children

queue = deque(
[
next(n)
for n in random.sample(
[iter(queue)] * len(queue)
+ [iter(grandchildren)] * len(grandchildren),
len(queue) + len(grandchildren),
)
]
)
else:
queue.append(child)


def all_perms(root: AuxNode) -> List[Tuple[AuxNode, ...]]:
"""Return a list of valid permutations for the auxiliary tree _root_."""
perms = {tuple(nondeterministic_visit(root)) for _ in range(1000)}
perms.add(tuple(pre_order_visit(root)))
return sorted(perms, key=lambda t: str(t))
83 changes: 83 additions & 0 deletions tests/test_cts_nondeterminism.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
"""Test against the JSONPath Compliance Test Suite with nondeterminism enabled.

The CTS is a submodule located in /tests/cts. After a git clone, run
`git submodule update --init` from the root of the repository.
"""

import json
import operator
from dataclasses import dataclass
from typing import Any
from typing import List
from typing import Optional
from typing import Tuple

import pytest

from jsonpath_rfc9535 import JSONPathEnvironment
from jsonpath_rfc9535 import JSONValue


@dataclass
class Case:
name: str
selector: str
document: JSONValue = None
result: Any = None
results: Optional[List[Any]] = None
invalid_selector: Optional[bool] = None


def cases() -> List[Case]:
with open("tests/cts/cts.json", encoding="utf8") as fd:
data = json.load(fd)
return [Case(**case) for case in data["tests"]]


def valid_cases() -> List[Case]:
return [case for case in cases() if not case.invalid_selector]


def nondeterministic_cases() -> List[Case]:
return [case for case in valid_cases() if isinstance(case.results, list)]


class MockEnv(JSONPathEnvironment):
nondeterministic = True


@pytest.mark.parametrize("case", valid_cases(), ids=operator.attrgetter("name"))
def test_nondeterminism_valid_cases(case: Case) -> None:
assert case.document is not None
env = MockEnv()
rv = env.find(case.selector, case.document).values()

if case.results is not None:
assert rv in case.results
else:
assert rv == case.result


@pytest.mark.parametrize(
"case", nondeterministic_cases(), ids=operator.attrgetter("name")
)
def test_nondeterminism(case: Case) -> None:
"""Test that we agree with CTS when it comes to nondeterministic results."""
assert case.document is not None
assert case.results is not None

def _result_repr(rv: List[object]) -> Tuple[str, ...]:
"""Return a hashable representation of a result list."""
return tuple([str(value) for value in rv])

env = MockEnv()

# Repeat enough times to has high probability that we've covered all
# valid permutations.
results = {
_result_repr(env.find(case.selector, case.document).values())
for _ in range(1000)
}

assert len(results) == len(case.results)
assert results == {_result_repr(result) for result in case.results}
Loading