From 5600f2b1b16abfbbb91be93ac6641154c7b9d322 Mon Sep 17 00:00:00 2001 From: Sainan Date: Tue, 2 Jul 2024 06:02:17 +0200 Subject: [PATCH] Regex: Fix result containing optional capturing group that did not match --- CLI/cli_test.cpp | 2 ++ soup/Regex.cpp | 29 +++++------------------------ soup/RegexMatcher.hpp | 30 ++++++++++++++++++++++++++++-- 3 files changed, 35 insertions(+), 26 deletions(-) diff --git a/CLI/cli_test.cpp b/CLI/cli_test.cpp index e10c31c6..b8801c7a 100644 --- a/CLI/cli_test.cpp +++ b/CLI/cli_test.cpp @@ -870,6 +870,8 @@ spanning over multiple lines */ assert(Regex("a(.*)z").match("az").toString() == R"(0="az", 1="")"); assert(Regex("a(.*)z").match("abz").toString() == R"(0="abz", 1="b")"); + + assert(Regex("(A)(B)?").match("A").toString() == R"(0="A", 1="A")"); }); test("MessageStream", [] diff --git a/soup/Regex.cpp b/soup/Regex.cpp index 010c2957..955b00c5 100644 --- a/soup/Regex.cpp +++ b/soup/Regex.cpp @@ -84,31 +84,9 @@ NAMESPACE_SOUP while (m.c != nullptr) { #if REGEX_DEBUG_MATCH - std::cout << m.c->toString() << ": "; + std::cout << m.c->toString() << " (g " << m.c->group->index << "): "; #endif - // Insert missing capturing groups - for (auto g = m.c->group; g; g = g->parent) - { - if (g->lookahead_or_lookbehind) - { - break; - } - if (g->isNonCapturing()) - { - continue; - } - //std::cout << "group " << g->index << "; "; - while (g->index >= m.result.groups.size()) - { - m.result.groups.emplace_back(std::nullopt); - } - if (!m.result.groups.at(g->index).has_value()) - { - m.result.groups.at(g->index) = RegexMatchedGroup{ g->name, m.it, m.it }; - } - } - if (m.c->rollback_transition) { #if REGEX_DEBUG_MATCH @@ -117,6 +95,8 @@ NAMESPACE_SOUP m.saveRollback(m.c->rollback_transition); } + m.insertMissingCapturingGroups(m.c->group); + if (reset_capture) { reset_capture = false; @@ -180,7 +160,7 @@ NAMESPACE_SOUP #if REGEX_DEBUG_MATCH std::cout << "; rolling back\n"; #endif - m.restoreRollback(); + const RegexGroup* g = m.restoreRollback(); SOUP_ASSERT(!m.shouldSaveCheckpoint()); reset_capture = m.shouldResetCapture(); if (m.c == RegexConstraint::ROLLBACK_TO_SUCCESS) @@ -190,6 +170,7 @@ NAMESPACE_SOUP #endif break; } + m.insertMissingCapturingGroups(g); continue; } diff --git a/soup/RegexMatcher.hpp b/soup/RegexMatcher.hpp index 746cac77..e25ad5d2 100644 --- a/soup/RegexMatcher.hpp +++ b/soup/RegexMatcher.hpp @@ -14,6 +14,7 @@ NAMESPACE_SOUP { struct RollbackPoint { + const RegexGroup* g; const RegexConstraint* c; const char* it; RegexMatchResult result{}; @@ -42,15 +43,17 @@ NAMESPACE_SOUP void saveRollback(const RegexConstraint* rollback_transition) { - rollback_points.emplace_back(RollbackPoint{ rollback_transition, it, result }); + rollback_points.emplace_back(RollbackPoint{ c->group, rollback_transition, it, result }); } - void restoreRollback() + [[nodiscard]] const RegexGroup* restoreRollback() { + const RegexGroup* g = rollback_points.back().g; c = rollback_points.back().c; it = rollback_points.back().it; result = std::move(rollback_points.back().result); rollback_points.pop_back(); + return g; } bool shouldSaveCheckpoint() noexcept @@ -84,5 +87,28 @@ NAMESPACE_SOUP it = checkpoints.back(); checkpoints.pop_back(); } + + void insertMissingCapturingGroups(const RegexGroup* g) + { + for (; g; g = g->parent) + { + if (g->lookahead_or_lookbehind) + { + break; + } + if (g->isNonCapturing()) + { + continue; + } + while (g->index >= this->result.groups.size()) + { + this->result.groups.emplace_back(std::nullopt); + } + if (!this->result.groups.at(g->index).has_value()) + { + this->result.groups.at(g->index) = RegexMatchedGroup{ g->name, this->it, this->it }; + } + } + } }; }