diff --git a/python/_re2.cc b/python/_re2.cc index 22f092b2..9e978f70 100644 --- a/python/_re2.cc +++ b/python/_re2.cc @@ -208,21 +208,24 @@ class Filter { return index; } - bool Compile() { - std::vector atoms; - filter_.Compile(&atoms); + std::vector Compile() { + atoms_.clear() + filter_.Compile(&atoms_); RE2::Options options; options.set_literal(true); options.set_case_sensitive(false); set_ = std::make_unique(options, RE2::UNANCHORED); - for (int i = 0; i < static_cast(atoms.size()); ++i) { - if (set_->Add(atoms[i], /*error=*/NULL) != i) { + for (int i = 0; i < static_cast(atoms_.size()); ++i) { + if (set_->Add(atoms_[i], /*error=*/NULL) != i) { // Should never happen: the atom is a literal! py::pybind11_fail("set_->Add() failed"); } } // Compiling can fail. - return set_->Compile(); + if (!set_->Compile()) { + py::pybind11_fail("set_->Compile() failed"); + }; + return atoms_ } std::vector Match(py::buffer buffer, bool potential) const { @@ -251,6 +254,7 @@ class Filter { private: re2::FilteredRE2 filter_; std::unique_ptr set_; + std::vector atoms_; }; PYBIND11_MODULE(_re2, module) { diff --git a/python/re2.py b/python/re2.py index 12c7b10f..3e2e572e 100644 --- a/python/re2.py +++ b/python/re2.py @@ -563,8 +563,7 @@ def Add(self, pattern, options=None): return index def Compile(self): - if not self._filter.Compile(): - raise error('failed to compile Filter') + return self._filter.Compile() def Match(self, text, potential=False): if isinstance(text, str): diff --git a/python/re2_test.py b/python/re2_test.py index 146b55b4..66cdbe8a 100644 --- a/python/re2_test.py +++ b/python/re2_test.py @@ -62,6 +62,30 @@ def test_compile_with_options(self): with self.assertRaisesRegex(re2.error, 'pattern too large'): re2.compile('.{1000}', options=options) + def test_compile_returns_atoms(self): + f = re2.Filter() + f.Add('hello.*world') + f.Add(r'foo\bar') + atoms = f.Compile() + self.assertIsInstance(atoms, list) + # Atoms are lowercase + self.assertIn('hello', atoms) + self.assertIn('world', atoms) + self.assertIn('foo', atoms) + self.assertIn('bar', atoms) + + def test_compile_no_literals(self): + f = re2.Filter() + f.Add('.*') + f.Add('[a-z') + atoms = f.Compile() + self.assertEqual(atoms, []) + + def test_compile_empty_filter(self): + f = re2.Filter() + atoms = f.Compile() + self.assertEqual(atoms, []) + def test_programsize_reverseprogramsize(self): regexp = re2.compile('a+b') self.assertEqual(7, regexp.programsize)