From 7be6a935e088e558eb5807594e2c0f73d42d8c45 Mon Sep 17 00:00:00 2001 From: Stefan VanBuren Date: Mon, 28 Jul 2025 13:09:04 -0400 Subject: [PATCH] Drop custom matcher & config We implemented this so that we could avoid a hard dependency on google/re2 due to the lack of wheels being published for Python 3.13. However, [3.13 wheels are now published][1], and our downstream cel-python dep, as of v0.4, is requiring google/re2 as a dependency. It seems like we ought to remove our matches workaround, as well as the configuration for supplying a user-defined matching func, as we generally want all users to have the same behavior for matching. (We can keep in the config value if we really think it's necessary.) [1]: https://github.com/google/re2/issues/516 --- protovalidate/config.py | 6 --- protovalidate/internal/extra_func.py | 25 +---------- protovalidate/internal/matches.py | 66 ---------------------------- protovalidate/validator.py | 2 +- test/test_config.py | 1 - test/test_format.py | 5 +-- test/test_matches.py | 43 ------------------ test/test_validate.py | 38 ---------------- 8 files changed, 4 insertions(+), 182 deletions(-) delete mode 100644 protovalidate/internal/matches.py delete mode 100644 test/test_matches.py diff --git a/protovalidate/config.py b/protovalidate/config.py index d376a49c..1e21683b 100644 --- a/protovalidate/config.py +++ b/protovalidate/config.py @@ -12,9 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -from collections.abc import Callable from dataclasses import dataclass -from typing import Optional @dataclass @@ -23,10 +21,6 @@ class Config: Attributes: fail_fast (bool): If true, validation will stop after the first violation. Defaults to False. - regex_matches_func: An optional regex matcher to use. If specified, this will be used to match - on regex expressions instead of this library's `matches` logic. """ fail_fast: bool = False - - regex_matches_func: Optional[Callable[[str, str], bool]] = None diff --git a/protovalidate/internal/extra_func.py b/protovalidate/internal/extra_func.py index 5f233105..d8b3a928 100644 --- a/protovalidate/internal/extra_func.py +++ b/protovalidate/internal/extra_func.py @@ -15,15 +15,12 @@ import math import re import typing -from collections.abc import Callable from urllib import parse as urlparse import celpy from celpy import celtypes -from protovalidate.config import Config from protovalidate.internal import string_format -from protovalidate.internal.matches import matches as protovalidate_matches from protovalidate.internal.rules import MessageType, field_to_cel # See https://html.spec.whatwg.org/multipage/input.html#valid-e-mail-address @@ -1556,31 +1553,11 @@ def __peek(self, char: str) -> bool: return self._index < len(self._string) and self._string[self._index] == char -def get_matches_func(matcher: typing.Optional[Callable[[str, str], bool]]): - if matcher is None: - matcher = protovalidate_matches - - def cel_matches(text: celtypes.Value, pattern: celtypes.Value) -> celpy.Result: - if not isinstance(text, celtypes.StringType): - msg = "invalid argument for text, expected string" - raise celpy.CELEvalError(msg) - if not isinstance(pattern, celtypes.StringType): - msg = "invalid argument for pattern, expected string" - raise celpy.CELEvalError(msg) - - b = matcher(text, pattern) - return celtypes.BoolType(b) - - return cel_matches - - -def make_extra_funcs(config: Config) -> dict[str, celpy.CELFunction]: +def make_extra_funcs() -> dict[str, celpy.CELFunction]: string_fmt = string_format.StringFormat() return { # Missing standard functions "format": string_fmt.format, - # Overridden standard functions - "matches": get_matches_func(config.regex_matches_func), # protovalidate specific functions "getField": cel_get_field, "isNan": cel_is_nan, diff --git a/protovalidate/internal/matches.py b/protovalidate/internal/matches.py deleted file mode 100644 index 27e29f9e..00000000 --- a/protovalidate/internal/matches.py +++ /dev/null @@ -1,66 +0,0 @@ -# Copyright 2023-2025 Buf Technologies, Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import re - -import celpy - -# Patterns that are supported in Python's re package and not in re2. -# RE2: https://github.com/google/re2/wiki/syntax -invalid_patterns = [ - r"\\[1-9]", # backreference - r"\\k<\w+>", # backreference - r"\(\?\=", # lookahead - r"\(\?\!", # negative lookahead - r"\(\?\<\=", # lookbehind - r"\(\?\<\!", # negative lookbehind - r"\\c[A-Z]", # control character - r"\\u[0-9a-fA-F]{4}", # UTF-16 code-unit - r"\\0(?!\d)", # NUL - r"\[\\b.*\]", # Backspace eg: [\b] - r"\\Z", # End of text (only lowercase z is supported in re2) -] - - -def matches(text: str, pattern: str) -> bool: - """Return True if the given pattern matches text. False otherwise. - - CEL uses RE2 syntax which diverges from Python re in various ways. Ideally, we - would use the google-re2 package, which is an extra dep in celpy, but at press - time it does not provide a pre-built binary for the latest version of Python (3.13) - which means those using this version will run into many issues. - - Instead of foisting this issue on users, we instead mimic re2 syntax by failing - to compile the regex for patterns not compatible with re2. - - Users can choose to override this behavior by providing their own custom matches - function via the Config. - - Raises: - celpy.CELEvalError: If pattern contains invalid re2 syntax or if an re.error is raised during matching. - """ - # Simulate re2 by failing on any patterns not compatible with re2 syntax - for invalid_pattern in invalid_patterns: - r = re.search(invalid_pattern, pattern) - if r is not None: - msg = f"error evaluating pattern {pattern}, invalid RE2 syntax" - raise celpy.CELEvalError(msg) - - try: - m = re.search(pattern, text) - except re.error as ex: - msg = "match error" - raise celpy.CELEvalError(msg, ex.__class__, ex.args) from ex - - return m is not None diff --git a/protovalidate/validator.py b/protovalidate/validator.py index 5c1850ba..11a22c6d 100644 --- a/protovalidate/validator.py +++ b/protovalidate/validator.py @@ -40,7 +40,7 @@ class Validator: def __init__(self, config=None): self._cfg = config if config is not None else Config() - funcs = extra_func.make_extra_funcs(self._cfg) + funcs = extra_func.make_extra_funcs() self._factory = _rules.RuleFactory(funcs) def validate( diff --git a/test/test_config.py b/test/test_config.py index 16d7eca3..71f33af7 100644 --- a/test/test_config.py +++ b/test/test_config.py @@ -21,4 +21,3 @@ class TestConfig(unittest.TestCase): def test_defaults(self): cfg = Config() self.assertFalse(cfg.fail_fast) - self.assertIsNone(cfg.regex_matches_func) diff --git a/test/test_format.py b/test/test_format.py index f356a3a0..99db67da 100644 --- a/test/test_format.py +++ b/test/test_format.py @@ -23,7 +23,6 @@ from gen.cel.expr import eval_pb2 from gen.cel.expr.conformance.test import simple_pb2 -from protovalidate.config import Config from protovalidate.internal import extra_func from protovalidate.internal.cel_field_presence import InterpretedRunner @@ -109,7 +108,7 @@ def test_format_successes(self): if test.name in skipped_tests: continue ast = self._env.compile(test.expr) - prog = self._env.program(ast, functions=extra_func.make_extra_funcs(Config())) + prog = self._env.program(ast, functions=extra_func.make_extra_funcs()) bindings = build_variables(test.bindings) # Ideally we should use pytest parametrize instead of subtests, but @@ -133,7 +132,7 @@ def test_format_errors(self): if test.name in skipped_error_tests: continue ast = self._env.compile(test.expr) - prog = self._env.program(ast, functions=extra_func.make_extra_funcs(Config())) + prog = self._env.program(ast, functions=extra_func.make_extra_funcs()) bindings = build_variables(test.bindings) # Ideally we should use pytest parametrize instead of subtests, but diff --git a/test/test_matches.py b/test/test_matches.py deleted file mode 100644 index 7d730852..00000000 --- a/test/test_matches.py +++ /dev/null @@ -1,43 +0,0 @@ -# Copyright 2023-2025 Buf Technologies, Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import unittest - -import celpy - -from protovalidate.internal.matches import matches - -invalid_patterns = [ - r"\1", - r"\k", - r"Jack(?=Sprat)", - "Jack(?!Sprat)", - "(?<=Sprat)Jack", - "(? bool: - pattern = pattern.replace("z", "Z") - try: - m = re.search(pattern, text) - except re.error as ex: - msg = "match error" - raise celpy.CELEvalError(msg, ex.__class__, ex.args) from ex - return m is not None - - cfg = Config(regex_matches_func=matcher) - validator = protovalidate.Validator(config=cfg) - - # Test validate - try: - validator.validate(msg) - except Exception: - self.fail("unexpected validation failure") - - # Test collect_violations - violations = validator.collect_violations(msg) - self.assertEqual(len(violations), 0) - def _run_valid_tests(self, msg: message.Message): """A helper function for testing successful validation on a given message