Permalink
Browse files

Better handling of character classes within the PCRE parser

  • Loading branch information...
1 parent 0d3df24 commit d2b2674f1e064c92766c5386f466a14e465ce2f0 @aelliott committed Mar 28, 2012
@@ -77,6 +77,24 @@ void BracketExpressionGraphicsItem::readTokens()
case T_NOT_WORD_BOUNDARY:
items.push_back(QString("Not Word Boundary"));
break;
+ case T_BELL:
+ items.push_back(QString("Bell Character"));
+ break;
+ case T_ESCAPE:
+ items.push_back(QString("Escape Character"));
+ break;
+ case T_FORM_FEED:
+ items.push_back(QString("Form Feed"));
+ break;
+ case T_LINE_FEED:
+ items.push_back(QString("Line Feed"));
+ break;
+ case T_HORIZONTAL_TAB:
+ items.push_back(QString("Tab Character"));
+ break;
+ case T_OCTAL_CHAR:
+ items.push_back(QString("Octal Character"));
+ break;
default:
qDebug() << "Unhandled token in bracket expression: " << token->value();
}
@@ -198,7 +198,7 @@ void IcuParser::handleToken(RegexpToken token)
{
QRegExp rx;
QRegExp characterClass;
- QRegExp bracketExpressionLiteral("[^\\\\[\\]()|?*+{}\\.$]|\\\\[^a-zA-Z0-9]");
+ QRegExp bracketExpressionLiteral("[^\\\\[\\]\.]|\\\\[^a-zA-Z0-9]");
QRegExp bracketExpressionRange(_syntax[T_BRACKET_EXPRESSION_RANGE]);
QRegExp flagClose;
Token *openingToken;
@@ -194,7 +194,7 @@ void PcreParser::handleToken(RegexpToken token)
{
QRegExp rx;
QRegExp characterClass;
- QRegExp bracketExpressionLiteral("[^\\\\[\\]()|?*+{}\\.$]|\\\\[^a-zA-Z0-9]");
+ QRegExp bracketExpressionLiteral("[^\\\\[\\]\.]|\\\\[^a-zA-Z0-9]");
QRegExp bracketExpressionRange(_syntax[T_BRACKET_EXPRESSION_RANGE]);
QRegExp flagClose;
Token *openingToken;
@@ -363,6 +363,54 @@ void PcreParser::handleToken(RegexpToken token)
_pos += characterClass.matchedLength();
}
+ characterClass.setPattern(_syntax[T_BELL]);
+ if(matched == false && characterClass.indexIn(_expression, _pos) == _pos)
+ {
+ matched = true;
+ _tokens.push_back(new Token(T_BELL, _expression.mid(_pos, characterClass.matchedLength())));
+ _pos += characterClass.matchedLength();
+ }
+
+ characterClass.setPattern(_syntax[T_ESCAPE]);
+ if(matched == false && characterClass.indexIn(_expression, _pos) == _pos)
+ {
+ matched = true;
+ _tokens.push_back(new Token(T_ESCAPE, _expression.mid(_pos, characterClass.matchedLength())));
+ _pos += characterClass.matchedLength();
+ }
+
+ characterClass.setPattern(_syntax[T_FORM_FEED]);
+ if(matched == false && characterClass.indexIn(_expression, _pos) == _pos)
+ {
+ matched = true;
+ _tokens.push_back(new Token(T_FORM_FEED, _expression.mid(_pos, characterClass.matchedLength())));
+ _pos += characterClass.matchedLength();
+ }
+
+ characterClass.setPattern(_syntax[T_LINE_FEED]);
+ if(matched == false && characterClass.indexIn(_expression, _pos) == _pos)
+ {
+ matched = true;
+ _tokens.push_back(new Token(T_LINE_FEED, _expression.mid(_pos, characterClass.matchedLength())));
+ _pos += characterClass.matchedLength();
+ }
+
+ characterClass.setPattern(_syntax[T_HORIZONTAL_TAB]);
+ if(matched == false && characterClass.indexIn(_expression, _pos) == _pos)
+ {
+ matched = true;
+ _tokens.push_back(new Token(T_HORIZONTAL_TAB, _expression.mid(_pos, characterClass.matchedLength())));
+ _pos += characterClass.matchedLength();
+ }
+
+ characterClass.setPattern(_syntax[T_OCTAL_CHAR]);
+ if(matched == false && characterClass.indexIn(_expression, _pos) == _pos)
+ {
+ matched = true;
+ _tokens.push_back(new Token(T_OCTAL_CHAR, _expression.mid(_pos, characterClass.matchedLength())));
+ _pos += characterClass.matchedLength();
+ }
+
if(!matched)
{
_tokens.push_back(new Token(T_ERROR, _expression.mid(_pos, 1)));
@@ -91,7 +91,7 @@ void QtParser::handleToken(RegexpToken token)
{
QRegExp groupingClose(_syntax[T_GROUPING_CLOSE]);
QRegExp assertionClose(_syntax[T_ASSERTION_CLOSE]);
- QRegExp bracketExpressionLiteral("[^\\\\[\\]()|?*+{}\\.^$]|\\\\[^a-zA-Z0-9]");
+ QRegExp bracketExpressionLiteral("[^\\\\[\\]\.]|\\\\[^a-zA-Z0-9]");
QRegExp bracketExpressionRange(_syntax[T_BRACKET_EXPRESSION_RANGE]);
QRegExp bracketExpressionClose(_syntax[T_BRACKET_EXPRESSION_CLOSE]);
QRegExp characterClass("");

0 comments on commit d2b2674

Please sign in to comment.