Skip to content

Commit

Permalink
Regex: Fix result containing optional capturing group that did not match
Browse files Browse the repository at this point in the history
  • Loading branch information
Sainan committed Jul 2, 2024
1 parent 83ca134 commit 5600f2b
Show file tree
Hide file tree
Showing 3 changed files with 35 additions and 26 deletions.
2 changes: 2 additions & 0 deletions CLI/cli_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -870,6 +870,8 @@ spanning over multiple lines */

assert(Regex("a(.*)z").match("az").toString() == R"(0="az", 1="")");
assert(Regex("a(.*)z").match("abz").toString() == R"(0="abz", 1="b")");

assert(Regex("(A)(B)?").match("A").toString() == R"(0="A", 1="A")");
});

test("MessageStream", []
Expand Down
29 changes: 5 additions & 24 deletions soup/Regex.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -84,31 +84,9 @@ NAMESPACE_SOUP
while (m.c != nullptr)
{
#if REGEX_DEBUG_MATCH
std::cout << m.c->toString() << ": ";
std::cout << m.c->toString() << " (g " << m.c->group->index << "): ";
#endif

// Insert missing capturing groups
for (auto g = m.c->group; g; g = g->parent)
{
if (g->lookahead_or_lookbehind)
{
break;
}
if (g->isNonCapturing())
{
continue;
}
//std::cout << "group " << g->index << "; ";
while (g->index >= m.result.groups.size())
{
m.result.groups.emplace_back(std::nullopt);
}
if (!m.result.groups.at(g->index).has_value())
{
m.result.groups.at(g->index) = RegexMatchedGroup{ g->name, m.it, m.it };
}
}

if (m.c->rollback_transition)
{
#if REGEX_DEBUG_MATCH
Expand All @@ -117,6 +95,8 @@ NAMESPACE_SOUP
m.saveRollback(m.c->rollback_transition);
}

m.insertMissingCapturingGroups(m.c->group);

if (reset_capture)
{
reset_capture = false;
Expand Down Expand Up @@ -180,7 +160,7 @@ NAMESPACE_SOUP
#if REGEX_DEBUG_MATCH
std::cout << "; rolling back\n";
#endif
m.restoreRollback();
const RegexGroup* g = m.restoreRollback();
SOUP_ASSERT(!m.shouldSaveCheckpoint());
reset_capture = m.shouldResetCapture();
if (m.c == RegexConstraint::ROLLBACK_TO_SUCCESS)
Expand All @@ -190,6 +170,7 @@ NAMESPACE_SOUP
#endif
break;
}
m.insertMissingCapturingGroups(g);
continue;
}

Expand Down
30 changes: 28 additions & 2 deletions soup/RegexMatcher.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ NAMESPACE_SOUP
{
struct RollbackPoint
{
const RegexGroup* g;
const RegexConstraint* c;
const char* it;
RegexMatchResult result{};
Expand Down Expand Up @@ -42,15 +43,17 @@ NAMESPACE_SOUP

void saveRollback(const RegexConstraint* rollback_transition)
{
rollback_points.emplace_back(RollbackPoint{ rollback_transition, it, result });
rollback_points.emplace_back(RollbackPoint{ c->group, rollback_transition, it, result });
}

void restoreRollback()
[[nodiscard]] const RegexGroup* restoreRollback()
{
const RegexGroup* g = rollback_points.back().g;
c = rollback_points.back().c;
it = rollback_points.back().it;
result = std::move(rollback_points.back().result);
rollback_points.pop_back();
return g;
}

bool shouldSaveCheckpoint() noexcept
Expand Down Expand Up @@ -84,5 +87,28 @@ NAMESPACE_SOUP
it = checkpoints.back();
checkpoints.pop_back();
}

void insertMissingCapturingGroups(const RegexGroup* g)
{
for (; g; g = g->parent)
{
if (g->lookahead_or_lookbehind)
{
break;
}
if (g->isNonCapturing())
{
continue;
}
while (g->index >= this->result.groups.size())
{
this->result.groups.emplace_back(std::nullopt);
}
if (!this->result.groups.at(g->index).has_value())
{
this->result.groups.at(g->index) = RegexMatchedGroup{ g->name, this->it, this->it };
}
}
}
};
}

0 comments on commit 5600f2b

Please sign in to comment.