Skip to content

Commit

Permalink
Better handling of character classes within the PCRE parser
Browse files Browse the repository at this point in the history
  • Loading branch information
aelliott committed Mar 28, 2012
1 parent 0d3df24 commit d2b2674
Show file tree
Hide file tree
Showing 4 changed files with 69 additions and 3 deletions.
18 changes: 18 additions & 0 deletions GraphicsItems/bracketexpressiongraphicsitem.cpp
Original file line number Original file line Diff line number Diff line change
Expand Up @@ -77,6 +77,24 @@ void BracketExpressionGraphicsItem::readTokens()
case T_NOT_WORD_BOUNDARY: case T_NOT_WORD_BOUNDARY:
items.push_back(QString("Not Word Boundary")); items.push_back(QString("Not Word Boundary"));
break; break;
case T_BELL:
items.push_back(QString("Bell Character"));
break;
case T_ESCAPE:
items.push_back(QString("Escape Character"));
break;
case T_FORM_FEED:
items.push_back(QString("Form Feed"));
break;
case T_LINE_FEED:
items.push_back(QString("Line Feed"));
break;
case T_HORIZONTAL_TAB:
items.push_back(QString("Tab Character"));
break;
case T_OCTAL_CHAR:
items.push_back(QString("Octal Character"));
break;
default: default:
qDebug() << "Unhandled token in bracket expression: " << token->value(); qDebug() << "Unhandled token in bracket expression: " << token->value();
} }
Expand Down
2 changes: 1 addition & 1 deletion RegexModules/icuparser.cpp
Original file line number Original file line Diff line number Diff line change
Expand Up @@ -198,7 +198,7 @@ void IcuParser::handleToken(RegexpToken token)
{ {
QRegExp rx; QRegExp rx;
QRegExp characterClass; QRegExp characterClass;
QRegExp bracketExpressionLiteral("[^\\\\[\\]()|?*+{}\\.$]|\\\\[^a-zA-Z0-9]"); QRegExp bracketExpressionLiteral("[^\\\\[\\]\.]|\\\\[^a-zA-Z0-9]");
QRegExp bracketExpressionRange(_syntax[T_BRACKET_EXPRESSION_RANGE]); QRegExp bracketExpressionRange(_syntax[T_BRACKET_EXPRESSION_RANGE]);
QRegExp flagClose; QRegExp flagClose;
Token *openingToken; Token *openingToken;
Expand Down
50 changes: 49 additions & 1 deletion RegexModules/pcreparser.cpp
Original file line number Original file line Diff line number Diff line change
Expand Up @@ -194,7 +194,7 @@ void PcreParser::handleToken(RegexpToken token)
{ {
QRegExp rx; QRegExp rx;
QRegExp characterClass; QRegExp characterClass;
QRegExp bracketExpressionLiteral("[^\\\\[\\]()|?*+{}\\.$]|\\\\[^a-zA-Z0-9]"); QRegExp bracketExpressionLiteral("[^\\\\[\\]\.]|\\\\[^a-zA-Z0-9]");
QRegExp bracketExpressionRange(_syntax[T_BRACKET_EXPRESSION_RANGE]); QRegExp bracketExpressionRange(_syntax[T_BRACKET_EXPRESSION_RANGE]);
QRegExp flagClose; QRegExp flagClose;
Token *openingToken; Token *openingToken;
Expand Down Expand Up @@ -363,6 +363,54 @@ void PcreParser::handleToken(RegexpToken token)
_pos += characterClass.matchedLength(); _pos += characterClass.matchedLength();
} }


characterClass.setPattern(_syntax[T_BELL]);
if(matched == false && characterClass.indexIn(_expression, _pos) == _pos)
{
matched = true;
_tokens.push_back(new Token(T_BELL, _expression.mid(_pos, characterClass.matchedLength())));
_pos += characterClass.matchedLength();
}

characterClass.setPattern(_syntax[T_ESCAPE]);
if(matched == false && characterClass.indexIn(_expression, _pos) == _pos)
{
matched = true;
_tokens.push_back(new Token(T_ESCAPE, _expression.mid(_pos, characterClass.matchedLength())));
_pos += characterClass.matchedLength();
}

characterClass.setPattern(_syntax[T_FORM_FEED]);
if(matched == false && characterClass.indexIn(_expression, _pos) == _pos)
{
matched = true;
_tokens.push_back(new Token(T_FORM_FEED, _expression.mid(_pos, characterClass.matchedLength())));
_pos += characterClass.matchedLength();
}

characterClass.setPattern(_syntax[T_LINE_FEED]);
if(matched == false && characterClass.indexIn(_expression, _pos) == _pos)
{
matched = true;
_tokens.push_back(new Token(T_LINE_FEED, _expression.mid(_pos, characterClass.matchedLength())));
_pos += characterClass.matchedLength();
}

characterClass.setPattern(_syntax[T_HORIZONTAL_TAB]);
if(matched == false && characterClass.indexIn(_expression, _pos) == _pos)
{
matched = true;
_tokens.push_back(new Token(T_HORIZONTAL_TAB, _expression.mid(_pos, characterClass.matchedLength())));
_pos += characterClass.matchedLength();
}

characterClass.setPattern(_syntax[T_OCTAL_CHAR]);
if(matched == false && characterClass.indexIn(_expression, _pos) == _pos)
{
matched = true;
_tokens.push_back(new Token(T_OCTAL_CHAR, _expression.mid(_pos, characterClass.matchedLength())));
_pos += characterClass.matchedLength();
}

if(!matched) if(!matched)
{ {
_tokens.push_back(new Token(T_ERROR, _expression.mid(_pos, 1))); _tokens.push_back(new Token(T_ERROR, _expression.mid(_pos, 1)));
Expand Down
2 changes: 1 addition & 1 deletion RegexModules/qtparser.cpp
Original file line number Original file line Diff line number Diff line change
Expand Up @@ -91,7 +91,7 @@ void QtParser::handleToken(RegexpToken token)
{ {
QRegExp groupingClose(_syntax[T_GROUPING_CLOSE]); QRegExp groupingClose(_syntax[T_GROUPING_CLOSE]);
QRegExp assertionClose(_syntax[T_ASSERTION_CLOSE]); QRegExp assertionClose(_syntax[T_ASSERTION_CLOSE]);
QRegExp bracketExpressionLiteral("[^\\\\[\\]()|?*+{}\\.^$]|\\\\[^a-zA-Z0-9]"); QRegExp bracketExpressionLiteral("[^\\\\[\\]\.]|\\\\[^a-zA-Z0-9]");
QRegExp bracketExpressionRange(_syntax[T_BRACKET_EXPRESSION_RANGE]); QRegExp bracketExpressionRange(_syntax[T_BRACKET_EXPRESSION_RANGE]);
QRegExp bracketExpressionClose(_syntax[T_BRACKET_EXPRESSION_CLOSE]); QRegExp bracketExpressionClose(_syntax[T_BRACKET_EXPRESSION_CLOSE]);
QRegExp characterClass(""); QRegExp characterClass("");
Expand Down

0 comments on commit d2b2674

Please sign in to comment.