Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,14 @@
# Python JSONPath RFC 9535 Change Log

## Version 0.1.3 (unreleased)

**Fixes**

- Fixed decoding of escape sequences in quoted name selectors and string literals. We now raise a `JSONPathSyntaxError` for invalid code points.
- Fixed parsing of number literals with an exponent. We now allow 'e' to be upper case.
- Fixed handling of trailing commas in bracketed segments. We now raise a `JSONPathSyntaxError` in such cases.
- Fixed handling of invalid number literals. We now raise a syntax error for invalid leading zeros and extra negative signs.

## Version 0.1.2

**Fixes**
Expand Down
2 changes: 1 addition & 1 deletion jsonpath_rfc9535/__about__.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = "0.1.2"
__version__ = "0.1.3"
4 changes: 2 additions & 2 deletions jsonpath_rfc9535/lex.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,8 @@
RE_PROPERTY = re.compile(r"[\u0080-\uFFFFa-zA-Z_][\u0080-\uFFFFa-zA-Z0-9_-]*")
RE_INDEX = re.compile(r"-?[0-9]+")
RE_INT = re.compile(r"-?[0-9]+")
RE_EXPONENT = re.compile(r"e[+-]?[0-9]+")
RE_NEGATIVE_EXPONENT = re.compile(r"e-[0-9]+")
RE_EXPONENT = re.compile(r"[eE][+-]?[0-9]+")
RE_NEGATIVE_EXPONENT = re.compile(r"[eE]-[0-9]+")
RE_FUNCTION_NAME = re.compile(r"[a-z][a-z_0-9]*")
RE_AND = re.compile(r"&&")
RE_OR = re.compile(r"\|\|")
Expand Down
151 changes: 142 additions & 9 deletions jsonpath_rfc9535/parse.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@

from __future__ import annotations

import json
from typing import TYPE_CHECKING
from typing import Callable
from typing import Dict
Expand Down Expand Up @@ -312,6 +311,7 @@ def parse_bracketed_selection(self, stream: TokenStream) -> List[JSONPathSelecto
if stream.peek.type_ != TokenType.RBRACKET:
stream.expect_peek(TokenType.COMMA)
stream.next_token()
stream.expect_peek_not(TokenType.RBRACKET, "unexpected trailing comma")

stream.next_token()

Expand Down Expand Up @@ -362,11 +362,29 @@ def parse_string_literal(self, stream: TokenStream) -> Expression:
)

def parse_integer_literal(self, stream: TokenStream) -> Expression:
value = stream.current.value
if value.startswith("0") and len(value) > 1:
raise JSONPathSyntaxError("invalid integer literal", token=stream.current)

# Convert to float first to handle scientific notation.
return IntegerLiteral(stream.current, value=int(float(stream.current.value)))
try:
return IntegerLiteral(stream.current, value=int(float(value)))
except ValueError as err:
raise JSONPathSyntaxError(
"invalid integer literal", token=stream.current
) from err

def parse_float_literal(self, stream: TokenStream) -> Expression:
return FloatLiteral(stream.current, value=float(stream.current.value))
value = stream.current.value
if value.startswith("0") and len(value.split(".")[0]) > 1:
raise JSONPathSyntaxError("invalid float literal", token=stream.current)

try:
return FloatLiteral(stream.current, value=float(stream.current.value))
except ValueError as err:
raise JSONPathSyntaxError(
"invalid float literal", token=stream.current
) from err

def parse_prefix_expression(self, stream: TokenStream) -> Expression:
tok = stream.next_token()
Expand Down Expand Up @@ -514,12 +532,127 @@ def _decode_string_literal(self, token: Token) -> str:
value = token.value.replace('"', '\\"').replace("\\'", "'")
else:
value = token.value
try:
rv = json.loads(f'"{value}"')
assert isinstance(rv, str)
return rv
except json.JSONDecodeError as err:
raise JSONPathSyntaxError(str(err).split(":")[1], token=token) from None

return self._unescape_string(value, token)

def _unescape_string(self, value: str, token: Token) -> str:
unescaped: List[str] = []
index = 0

while index < len(value):
ch = value[index]
if ch == "\\":
index += 1
_ch, index = self._decode_escape_sequence(value, index, token)
unescaped.append(_ch)
else:
self._string_from_codepoint(ord(ch), token)
unescaped.append(ch)
index += 1
return "".join(unescaped)

def _decode_escape_sequence( # noqa: PLR0911
self, value: str, index: int, token: Token
) -> Tuple[str, int]:
ch = value[index]
if ch == '"':
return '"', index
if ch == "\\":
return "\\", index
if ch == "/":
return "/", index
if ch == "b":
return "\x08", index
if ch == "f":
return "\x0c", index
if ch == "n":
return "\n", index
if ch == "r":
return "\r", index
if ch == "t":
return "\t", index
if ch == "u":
codepoint, index = self._decode_hex_char(value, index, token)
return self._string_from_codepoint(codepoint, token), index

raise JSONPathSyntaxError(
f"unknown escape sequence at index {token.index + index - 1}",
token=token,
)

def _decode_hex_char(self, value: str, index: int, token: Token) -> Tuple[int, int]:
length = len(value)

if index + 4 >= length:
raise JSONPathSyntaxError(
f"incomplete escape sequence at index {token.index + index - 1}",
token=token,
)

index += 1 # move past 'u'
codepoint = self._parse_hex_digits(value[index : index + 4], token)

if self._is_low_surrogate(codepoint):
raise JSONPathSyntaxError(
f"unexpected low surrogate at index {token.index + index - 1}",
token=token,
)

if self._is_high_surrogate(codepoint):
# expect a surrogate pair
if not (
index + 9 < length
and value[index + 4] == "\\"
and value[index + 5] == "u"
):
raise JSONPathSyntaxError(
f"incomplete escape sequence at index {token.index + index - 2}",
token=token,
)

low_surrogate = self._parse_hex_digits(value[index + 6 : index + 10], token)

if not self._is_low_surrogate(low_surrogate):
raise JSONPathSyntaxError(
f"unexpected codepoint at index {token.index + index + 4}",
token=token,
)

codepoint = 0x10000 + (
((codepoint & 0x03FF) << 10) | (low_surrogate & 0x03FF)
)

return (codepoint, index + 9)

return (codepoint, index + 3)

def _parse_hex_digits(self, digits: str, token: Token) -> int:
codepoint = 0
for digit in digits.encode():
codepoint <<= 4
if digit >= 48 and digit <= 57:
codepoint |= digit - 48
elif digit >= 65 and digit <= 70:
codepoint |= digit - 65 + 10
elif digit >= 97 and digit <= 102:
codepoint |= digit - 97 + 10
else:
raise JSONPathSyntaxError(
"invalid \\uXXXX escape sequence",
token=token,
)
return codepoint

def _string_from_codepoint(self, codepoint: int, token: Token) -> str:
if codepoint <= 0x1F:
raise JSONPathSyntaxError("invalid character", token=token)
return chr(codepoint)

def _is_high_surrogate(self, codepoint: int) -> bool:
return codepoint >= 0xD800 and codepoint <= 0xDBFF

def _is_low_surrogate(self, codepoint: int) -> bool:
return codepoint >= 0xDC00 and codepoint <= 0xDFFF

def _raise_for_non_comparable_function(
self, expr: Expression, token: Token
Expand Down
5 changes: 5 additions & 0 deletions jsonpath_rfc9535/tokens.py
Original file line number Diff line number Diff line change
Expand Up @@ -193,3 +193,8 @@ def expect_peek(self, *typ: TokenType) -> None:
f"expected {_typ}, found {self.peek.type_.name!r}",
token=self.peek,
)

def expect_peek_not(self, typ: TokenType, message: str) -> None:
"""Raise an exception if the next token type is not one of _type_."""
if self.peek.type_ == typ:
raise JSONPathSyntaxError(message, token=self.peek)
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -179,6 +179,7 @@ convention = "google"
"scripts/__init__.py" = ["D104"]
"tests/*" = ["D100", "D101", "D104", "D103"]
"jsonpath_rfc9535/lex.py" = ["E741"]
"jsonpath_rfc9535/parse.py" = ["PLR2004"]
"jsonpath_rfc9535/utils/nondeterministic_descent.py" = [
"D103",
"D101",
Expand Down
2 changes: 2 additions & 0 deletions tests/test_compliance.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
import json
import operator
from dataclasses import dataclass
from dataclasses import field
from typing import Any
from typing import Dict
from typing import List
Expand All @@ -26,6 +27,7 @@ class Case:
result: Any = None
results: Optional[List[Any]] = None
invalid_selector: Optional[bool] = None
tags: List[str] = field(default_factory=list)


SKIP: Dict[str, str] = {}
Expand Down
2 changes: 2 additions & 0 deletions tests/test_cts_nondeterminism.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
import json
import operator
from dataclasses import dataclass
from dataclasses import field
from typing import Any
from typing import List
from typing import Optional
Expand All @@ -26,6 +27,7 @@ class Case:
result: Any = None
results: Optional[List[Any]] = None
invalid_selector: Optional[bool] = None
tags: List[str] = field(default_factory=list)


def cases() -> List[Case]:
Expand Down