Permalink
Browse files

Better handling of character classes within the PCRE parser

  • Loading branch information...
aelliott committed Mar 28, 2012
1 parent 0d3df24 commit d2b2674f1e064c92766c5386f466a14e465ce2f0
@@ -77,6 +77,24 @@ void BracketExpressionGraphicsItem::readTokens()
case T_NOT_WORD_BOUNDARY:
items.push_back(QString("Not Word Boundary"));
break;
case T_BELL:
items.push_back(QString("Bell Character"));
break;
case T_ESCAPE:
items.push_back(QString("Escape Character"));
break;
case T_FORM_FEED:
items.push_back(QString("Form Feed"));
break;
case T_LINE_FEED:
items.push_back(QString("Line Feed"));
break;
case T_HORIZONTAL_TAB:
items.push_back(QString("Tab Character"));
break;
case T_OCTAL_CHAR:
items.push_back(QString("Octal Character"));
break;
default:
qDebug() << "Unhandled token in bracket expression: " << token->value();
}
@@ -198,7 +198,7 @@ void IcuParser::handleToken(RegexpToken token)
{
QRegExp rx;
QRegExp characterClass;
QRegExp bracketExpressionLiteral("[^\\\\[\\]()|?*+{}\\.$]|\\\\[^a-zA-Z0-9]");
QRegExp bracketExpressionLiteral("[^\\\\[\\]\.]|\\\\[^a-zA-Z0-9]");
QRegExp bracketExpressionRange(_syntax[T_BRACKET_EXPRESSION_RANGE]);
QRegExp flagClose;
Token *openingToken;
@@ -194,7 +194,7 @@ void PcreParser::handleToken(RegexpToken token)
{
QRegExp rx;
QRegExp characterClass;
QRegExp bracketExpressionLiteral("[^\\\\[\\]()|?*+{}\\.$]|\\\\[^a-zA-Z0-9]");
QRegExp bracketExpressionLiteral("[^\\\\[\\]\.]|\\\\[^a-zA-Z0-9]");
QRegExp bracketExpressionRange(_syntax[T_BRACKET_EXPRESSION_RANGE]);
QRegExp flagClose;
Token *openingToken;
@@ -363,6 +363,54 @@ void PcreParser::handleToken(RegexpToken token)
_pos += characterClass.matchedLength();
}
characterClass.setPattern(_syntax[T_BELL]);
if(matched == false && characterClass.indexIn(_expression, _pos) == _pos)
{
matched = true;
_tokens.push_back(new Token(T_BELL, _expression.mid(_pos, characterClass.matchedLength())));
_pos += characterClass.matchedLength();
}
characterClass.setPattern(_syntax[T_ESCAPE]);
if(matched == false && characterClass.indexIn(_expression, _pos) == _pos)
{
matched = true;
_tokens.push_back(new Token(T_ESCAPE, _expression.mid(_pos, characterClass.matchedLength())));
_pos += characterClass.matchedLength();
}
characterClass.setPattern(_syntax[T_FORM_FEED]);
if(matched == false && characterClass.indexIn(_expression, _pos) == _pos)
{
matched = true;
_tokens.push_back(new Token(T_FORM_FEED, _expression.mid(_pos, characterClass.matchedLength())));
_pos += characterClass.matchedLength();
}
characterClass.setPattern(_syntax[T_LINE_FEED]);
if(matched == false && characterClass.indexIn(_expression, _pos) == _pos)
{
matched = true;
_tokens.push_back(new Token(T_LINE_FEED, _expression.mid(_pos, characterClass.matchedLength())));
_pos += characterClass.matchedLength();
}
characterClass.setPattern(_syntax[T_HORIZONTAL_TAB]);
if(matched == false && characterClass.indexIn(_expression, _pos) == _pos)
{
matched = true;
_tokens.push_back(new Token(T_HORIZONTAL_TAB, _expression.mid(_pos, characterClass.matchedLength())));
_pos += characterClass.matchedLength();
}
characterClass.setPattern(_syntax[T_OCTAL_CHAR]);
if(matched == false && characterClass.indexIn(_expression, _pos) == _pos)
{
matched = true;
_tokens.push_back(new Token(T_OCTAL_CHAR, _expression.mid(_pos, characterClass.matchedLength())));
_pos += characterClass.matchedLength();
}
if(!matched)
{
_tokens.push_back(new Token(T_ERROR, _expression.mid(_pos, 1)));
@@ -91,7 +91,7 @@ void QtParser::handleToken(RegexpToken token)
{
QRegExp groupingClose(_syntax[T_GROUPING_CLOSE]);
QRegExp assertionClose(_syntax[T_ASSERTION_CLOSE]);
QRegExp bracketExpressionLiteral("[^\\\\[\\]()|?*+{}\\.^$]|\\\\[^a-zA-Z0-9]");
QRegExp bracketExpressionLiteral("[^\\\\[\\]\.]|\\\\[^a-zA-Z0-9]");
QRegExp bracketExpressionRange(_syntax[T_BRACKET_EXPRESSION_RANGE]);
QRegExp bracketExpressionClose(_syntax[T_BRACKET_EXPRESSION_CLOSE]);
QRegExp characterClass("");

0 comments on commit d2b2674

Please sign in to comment.