From a9956e97235894d24eceb0e064c54a6e9fc5c3a0 Mon Sep 17 00:00:00 2001 From: Roman Gershman Date: Sat, 11 May 2024 22:30:55 +0300 Subject: [PATCH] feat: support subrange expressions in jsonpathv2 (#3036) Support for expressions like `$.arr[1:-2]`. The change applies to our own implementation of jsonpath, jsoncons already supports this format. Signed-off-by: Roman Gershman --- src/core/json/json_test.cc | 4 +++ src/core/json/jsonpath_grammar.y | 2 ++ src/core/json/jsonpath_lexer.lex | 1 + src/core/json/jsonpath_test.cc | 42 ++++++++++++++++++++++++++++++++ src/core/json/path.cc | 9 +++++-- src/core/json/path.h | 10 +++++++- 6 files changed, 65 insertions(+), 3 deletions(-) diff --git a/src/core/json/json_test.cc b/src/core/json/json_test.cc index 4eb91288068..acfbacf02e4 100644 --- a/src/core/json/json_test.cc +++ b/src/core/json/json_test.cc @@ -135,6 +135,10 @@ TEST_F(JsonTest, Path) { jsonpath::json_query(j2, "$.field[-1]", [&](const std::string& path, const json& val) { EXPECT_EQ(5, val.as()); }); + + jsonpath::json_query(j2, "$.field[-6:1]", [&](const std::string& path, const json& val) { + EXPECT_EQ(1, val.as()); + }); } TEST_F(JsonTest, Delete) { diff --git a/src/core/json/jsonpath_grammar.y b/src/core/json/jsonpath_grammar.y index 10f08e5e20b..49ed36d1343 100644 --- a/src/core/json/jsonpath_grammar.y +++ b/src/core/json/jsonpath_grammar.y @@ -47,6 +47,7 @@ using namespace std; %token LBRACKET "[" RBRACKET "]" + COLON ":" LPARENT "(" RPARENT ")" ROOT "$" @@ -86,6 +87,7 @@ identifier: UNQ_STR bracket_index: WILDCARD { $$ = PathSegment{SegmentType::INDEX, IndexExpr::All()}; } | INT { $$ = PathSegment(SegmentType::INDEX, IndexExpr($1, $1)); } + | INT COLON INT { $$ = PathSegment(SegmentType::INDEX, IndexExpr::HalfOpen($1, $3)); } function_expr: UNQ_STR { driver->AddFunction($1); } LPARENT ROOT relative_location RPARENT %% diff --git a/src/core/json/jsonpath_lexer.lex b/src/core/json/jsonpath_lexer.lex index ca727c00cb7..8e6ec826c39 100644 --- a/src/core/json/jsonpath_lexer.lex +++ b/src/core/json/jsonpath_lexer.lex @@ -45,6 +45,7 @@ "$" return Parser::make_ROOT(loc()); ".." return Parser::make_DESCENT(loc()); "." return Parser::make_DOT(loc()); +":" return Parser::make_COLON(loc()); "[" return Parser::make_LBRACKET(loc()); "]" return Parser::make_RBRACKET(loc()); "*" return Parser::make_WILDCARD(loc()); diff --git a/src/core/json/jsonpath_test.cc b/src/core/json/jsonpath_test.cc index 10e2654cb59..128cb0e27e5 100644 --- a/src/core/json/jsonpath_test.cc +++ b/src/core/json/jsonpath_test.cc @@ -479,4 +479,46 @@ TYPED_TEST(JsonPathTest, Mutate) { } } +TYPED_TEST(JsonPathTest, SubRange) { + TypeParam json = ValidJson(R"({"arr": [1, 2, 3, 4, 5]})"); + ASSERT_EQ(0, this->Parse("$.arr[1:2]")); + Path path = this->driver_.TakePath(); + ASSERT_EQ(2, path.size()); + EXPECT_THAT(path[1], SegType(SegmentType::INDEX)); + + vector arr; + auto cb = [&arr](optional key, const TypeParam& val) { + ASSERT_FALSE(key); + arr.push_back(to_int(val)); + }; + + EvaluatePath(path, json, cb); + ASSERT_THAT(arr, ElementsAre(2)); + arr.clear(); + + ASSERT_EQ(0, this->Parse("$.arr[0:2]")); + path = this->driver_.TakePath(); + EvaluatePath(path, json, cb); + ASSERT_THAT(arr, ElementsAre(1, 2)); + arr.clear(); + + ASSERT_EQ(0, this->Parse("$.arr[2:-1]")); + path = this->driver_.TakePath(); + EvaluatePath(path, json, cb); + ASSERT_THAT(arr, ElementsAre(3, 4)); + arr.clear(); + + ASSERT_EQ(0, this->Parse("$.arr[-2:-1]")); + path = this->driver_.TakePath(); + EvaluatePath(path, json, cb); + ASSERT_THAT(arr, ElementsAre(4)); + arr.clear(); + + ASSERT_EQ(0, this->Parse("$.arr[-2:-2]")); + path = this->driver_.TakePath(); + EvaluatePath(path, json, cb); + ASSERT_THAT(arr, ElementsAre()); + arr.clear(); +} + } // namespace dfly::json diff --git a/src/core/json/path.cc b/src/core/json/path.cc index a5b8670f0b0..ef333859d03 100644 --- a/src/core/json/path.cc +++ b/src/core/json/path.cc @@ -55,14 +55,19 @@ IndexExpr IndexExpr::Normalize(size_t array_len) const { return IndexExpr(1, 0); // empty range. IndexExpr res = *this; + auto wrap = [=](int negative) { + unsigned positive = -negative; + return positive > array_len ? 0 : array_len - positive; + }; + if (res.second >= int(array_len)) { res.second = array_len - 1; } else if (res.second < 0) { - res.second = res.second % array_len; + res.second = wrap(res.second); DCHECK_GE(res.second, 0); } if (res.first < 0) { - res.first = res.first % array_len; + res.first = wrap(res.first); DCHECK_GE(res.first, 0); } return res; diff --git a/src/core/json/path.h b/src/core/json/path.h index 1541041da79..8ce09f7fd82 100644 --- a/src/core/json/path.h +++ b/src/core/json/path.h @@ -55,9 +55,10 @@ class AggFunction { int valid_ = -1; }; -// Bracket index representation +// Bracket index representation, IndexExpr is a closed range, i.e. both ends are inclusive. // Single index is: , wildcard: <0, INT_MAX>, // [begin:end): +// IndexExpr is 0-based, with negative indices referring to the array size of the applied object. struct IndexExpr : public std::pair { bool Empty() const { return first > second; @@ -68,7 +69,14 @@ struct IndexExpr : public std::pair { } using pair::pair; + + // Returns subrange with length `array_len`. IndexExpr Normalize(size_t array_len) const; + + // Returns IndexExpr representing [left_closed, right_open) range. + static IndexExpr HalfOpen(int left_closed, int right_open) { + return IndexExpr(left_closed, right_open - 1); + } }; class PathSegment {