Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 26 additions & 0 deletions tools/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,32 @@ package(default_visibility = ["//visibility:public"])

licenses(["notice"])

cc_library(
name = "cel_field_extractor",
srcs = ["cel_field_extractor.cc"],
hdrs = ["cel_field_extractor.h"],
deps = [
":navigable_ast",
"@com_google_absl//absl/container:flat_hash_set",
"@com_google_absl//absl/strings",
"@com_google_cel_spec//proto/cel/expr:syntax_cc_proto",
],
)

cc_test(
name = "cel_field_extractor_test",
srcs = ["cel_field_extractor_test.cc"],
deps = [
":cel_field_extractor",
"//internal:testing",
"//parser",
"@com_google_absl//absl/container:flat_hash_set",
"@com_google_absl//absl/log:absl_check",
"@com_google_absl//absl/status:statusor",
"@com_google_cel_spec//proto/cel/expr:syntax_cc_proto",
],
)

cc_library(
name = "flatbuffers_backed_impl",
srcs = [
Expand Down
86 changes: 86 additions & 0 deletions tools/cel_field_extractor.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
// Copyright 2025 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "tools/cel_field_extractor.h"

#include <algorithm>
#include <string>
#include <vector>

#include "cel/expr/syntax.pb.h"
#include "absl/container/flat_hash_set.h"
#include "absl/strings/str_join.h"
#include "tools/navigable_ast.h"

namespace cel {

namespace {

bool IsComprehensionDefinedField(const cel::AstNode& node) {
const cel::AstNode* current_node = &node;

while (current_node->parent() != nullptr) {
current_node = current_node->parent();

if (current_node->node_kind() != cel::NodeKind::kComprehension) {
continue;
}

std::string ident_name = node.expr()->ident_expr().name();
bool iter_var_match =
ident_name == current_node->expr()->comprehension_expr().iter_var();
bool iter_var2_match =
ident_name == current_node->expr()->comprehension_expr().iter_var2();
bool accu_var_match =
ident_name == current_node->expr()->comprehension_expr().accu_var();

if (iter_var_match || iter_var2_match || accu_var_match) {
return true;
}
}

return false;
}

} // namespace

absl::flat_hash_set<std::string> ExtractFieldPaths(
const cel::expr::Expr& expr) {
NavigableAst ast = NavigableAst::Build(expr);

absl::flat_hash_set<std::string> field_paths;
std::vector<std::string> fields_in_scope;

// Preorder traversal works because the select nodes (in a well-formed
// expression) always have only one operand, so its operand is visited
// next in the loop iteration (which results in the path being extended,
// completed, or discarded if uninteresting).
for (const cel::AstNode& node : ast.Root().DescendantsPreorder()) {
if (node.node_kind() == cel::NodeKind::kSelect) {
fields_in_scope.push_back(node.expr()->select_expr().field());
continue;
}
if (node.node_kind() == cel::NodeKind::kIdent &&
!IsComprehensionDefinedField(node)) {
fields_in_scope.push_back(node.expr()->ident_expr().name());
std::reverse(fields_in_scope.begin(), fields_in_scope.end());
field_paths.insert(absl::StrJoin(fields_in_scope, "."));
}
fields_in_scope.clear();
}

return field_paths;
}

} // namespace cel
70 changes: 70 additions & 0 deletions tools/cel_field_extractor.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
// Copyright 2025 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#ifndef THIRD_PARTY_CEL_CPP_TOOLS_CEL_FIELD_EXTRACTOR_H
#define THIRD_PARTY_CEL_CPP_TOOLS_CEL_FIELD_EXTRACTOR_H

#include <string>

#include "cel/expr/syntax.pb.h"
#include "absl/container/flat_hash_set.h"

namespace cel {

// ExtractExpressionFieldPaths attempts to extract the set of unique field
// selection paths from top level identifiers (e.g. "request.user.id").
//
// One possible use case for this class is to determine which fields of a
// serialized message are referenced by a CEL query, enabling partial
// deserialization for performance optimization.
//
// Implementation notes:
// The extraction logic focuses on identifying chains of `Select` operations
// that terminate with a primary identifier node (`IdentExpr`). For example,
// in the expression `message.field.subfield == 10`, the path
// "message.field.subfield" would be extracted.
//
// Identifiers defined locally within CEL comprehension expressions (e.g.,
// comprehension variables aliases defined by `iter_var`, `iter_var2`,
// `accu_var` in the AST) are NOT included. Example:
// `list.exists(elem, elem.field == 'value')` would return {"list"} only.
//
// Container indexing with the _[_] is not considered, but map indexing with
// the select operator is considered. For example:
// `message.map_field.key || message.map_field['foo']` results in
// {'message.map_field.key', 'message.map_field'}
//
// This implementation does not consider type check metadata, so there is no
// understanding of whether the primary identifiers and field accesses
// necessarily map to proto messages or proto field accesses. The field
// also does not have any understanding of the type of the leaf of the
// select path.
//
// Example:
// Given the CEL expression:
// `(request.user.id == 'test' && request.user.attributes.exists(attr,
// attr.key
// == 'role')) || size(request.items) > 0`
//
// The extracted field paths would be:
// - "request.user.id"
// - "request.user.attributes" (because `attr` is a comprehension variable)
// - "request.items"

absl::flat_hash_set<std::string> ExtractFieldPaths(
const cel::expr::Expr& expr);

} // namespace cel

#endif // THIRD_PARTY_CEL_CPP_TOOLS_CEL_FIELD_EXTRACTOR_H
148 changes: 148 additions & 0 deletions tools/cel_field_extractor_test.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,148 @@
// Copyright 2025 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "tools/cel_field_extractor.h"

#include <string>

#include "cel/expr/syntax.pb.h"
#include "absl/container/flat_hash_set.h"
#include "absl/log/absl_check.h"
#include "absl/status/statusor.h"
#include "internal/testing.h"
#include "parser/parser.h"

namespace cel {

namespace {

using ::cel::expr::ParsedExpr;
using ::google::api::expr::parser::Parse;
using ::testing::IsEmpty;
using ::testing::UnorderedElementsAre;

absl::flat_hash_set<std::string> GetExtractedFields(
const std::string& cel_query) {
absl::StatusOr<ParsedExpr> parsed_expr_or_status = Parse(cel_query);
ABSL_CHECK_OK(parsed_expr_or_status);
return ExtractFieldPaths(parsed_expr_or_status.value().expr());
}

TEST(TestExtractFieldPaths, CelExprWithOneField) {
EXPECT_THAT(GetExtractedFields("field_name"),
UnorderedElementsAre("field_name"));
}

TEST(TestExtractFieldPaths, CelExprWithNoWithLiteral) {
EXPECT_THAT(GetExtractedFields("'field_name'"), IsEmpty());
}

TEST(TestExtractFieldPaths, CelExprWithFunctionCallOnSingleField) {
EXPECT_THAT(GetExtractedFields("!boolean_field"),
UnorderedElementsAre("boolean_field"));
}

TEST(TestExtractFieldPaths, CelExprWithSizeFuncCallOnSingleField) {
EXPECT_THAT(GetExtractedFields("size(repeated_field)"),
UnorderedElementsAre("repeated_field"));
}

TEST(TestExtractFieldPaths, CelExprWithNestedField) {
EXPECT_THAT(GetExtractedFields("message_field.nested_field.nested_field2"),
UnorderedElementsAre("message_field.nested_field.nested_field2"));
}

TEST(TestExtractFieldPaths, CelExprWithNestedFieldAndIndexAccess) {
EXPECT_THAT(GetExtractedFields(
"repeated_message_field.nested_field[0].nested_field2"),
UnorderedElementsAre("repeated_message_field.nested_field"));
}

TEST(TestExtractFieldPaths, CelExprWithMultipleFunctionCalls) {
EXPECT_THAT(GetExtractedFields(
"(size(repeated_field) > 0 && !boolean_field == true) || "
"request.valid == true && request.count == 0"),
UnorderedElementsAre("boolean_field", "repeated_field",
"request.valid", "request.count"));
}

TEST(TestExtractFieldPaths, CelExprWithNestedComprehension) {
EXPECT_THAT(
GetExtractedFields("repeated_field_1.exists(e, e.key == 'one') && "
"req.repeated_field_2.exists(x, "
"x.y.z == 'val' &&"
"x.array.exists(y, y == 'val' && req.bool_field == "
"true && x.bool_field == false))"),
UnorderedElementsAre("req.repeated_field_2", "req.bool_field",
"repeated_field_1"));
}

TEST(TestExtractFieldPaths, CelExprWithMultipleComprehension) {
EXPECT_THAT(
GetExtractedFields(
"repeated_field_1.exists(e, e.key == 'one' && y.field_1 == 'val') && "
"repeated_field_2.exists(y, y.key == 'one' && e.field_2 == 'val')"),
UnorderedElementsAre("repeated_field_1", "repeated_field_2", "e.field_2",
"y.field_1"));
}

TEST(TestExtractFieldPaths, CelExprWithListLiteral) {
EXPECT_THAT(GetExtractedFields("['a', b, 3].exists(x, x == 1)"),
UnorderedElementsAre("b"));
}

TEST(TestExtractFieldPaths, CelExprWithFunctionCallsAndRepeatedFields) {
EXPECT_THAT(
GetExtractedFields("data == 'data_1' && field_1 == 'val_1' &&"
"(matches(req.field_2, 'val_1') == true) &&"
"repeated_field[0].priority >= 200"),
UnorderedElementsAre("data", "field_1", "req.field_2", "repeated_field"));
}

TEST(TestExtractFieldPaths, CelExprWithFunctionOnRepeatedField) {
EXPECT_THAT(
GetExtractedFields("(contains_data == false && "
"data.field_1=='value_1') || "
"size(data.nodes) > 0 && "
"data.nodes[0].field_2=='value_2'"),
UnorderedElementsAre("contains_data", "data.field_1", "data.nodes"));
}

TEST(TestExtractFieldPaths, CelExprContainingEndsWithFunction) {
EXPECT_THAT(GetExtractedFields("data.repeated_field.exists(f, "
"f.field_1.field_2.endsWith('val_1')) || "
"data.field_3.endsWith('val_3')"),
UnorderedElementsAre("data.repeated_field", "data.field_3"));
}

TEST(TestExtractFieldPaths,
CelExprWithMatchFunctionInsideComprehensionAndRegexConstants) {
EXPECT_THAT(GetExtractedFields("req.field_1.field_2=='val_1' && "
"data!=null && req.repeated_field.exists(f, "
"f.matches('a100.*|.*h100_80gb.*|.*h200.*'))"),
UnorderedElementsAre("req.field_1.field_2", "req.repeated_field",
"data"));
}

TEST(TestExtractFieldPaths, CelExprWithMultipleChecksInComprehension) {
EXPECT_THAT(
GetExtractedFields("req.field.repeated_field.exists(f, f.key == 'data_1'"
" && f.str_value == 'val_1') && "
"req.metadata.type == 3"),
UnorderedElementsAre("req.field.repeated_field", "req.metadata.type"));
}

} // namespace

} // namespace cel