Skip to content
This repository
Browse code

Better handling of character classes within the PCRE parser

  • Loading branch information...
commit d2b2674f1e064c92766c5386f466a14e465ce2f0 1 parent 0d3df24
Alex Elliott authored
18 GraphicsItems/bracketexpressiongraphicsitem.cpp
@@ -77,6 +77,24 @@ void BracketExpressionGraphicsItem::readTokens()
77 77 case T_NOT_WORD_BOUNDARY:
78 78 items.push_back(QString("Not Word Boundary"));
79 79 break;
  80 + case T_BELL:
  81 + items.push_back(QString("Bell Character"));
  82 + break;
  83 + case T_ESCAPE:
  84 + items.push_back(QString("Escape Character"));
  85 + break;
  86 + case T_FORM_FEED:
  87 + items.push_back(QString("Form Feed"));
  88 + break;
  89 + case T_LINE_FEED:
  90 + items.push_back(QString("Line Feed"));
  91 + break;
  92 + case T_HORIZONTAL_TAB:
  93 + items.push_back(QString("Tab Character"));
  94 + break;
  95 + case T_OCTAL_CHAR:
  96 + items.push_back(QString("Octal Character"));
  97 + break;
80 98 default:
81 99 qDebug() << "Unhandled token in bracket expression: " << token->value();
82 100 }
2  RegexModules/icuparser.cpp
@@ -198,7 +198,7 @@ void IcuParser::handleToken(RegexpToken token)
198 198 {
199 199 QRegExp rx;
200 200 QRegExp characterClass;
201   - QRegExp bracketExpressionLiteral("[^\\\\[\\]()|?*+{}\\.$]|\\\\[^a-zA-Z0-9]");
  201 + QRegExp bracketExpressionLiteral("[^\\\\[\\]\.]|\\\\[^a-zA-Z0-9]");
202 202 QRegExp bracketExpressionRange(_syntax[T_BRACKET_EXPRESSION_RANGE]);
203 203 QRegExp flagClose;
204 204 Token *openingToken;
50 RegexModules/pcreparser.cpp
@@ -194,7 +194,7 @@ void PcreParser::handleToken(RegexpToken token)
194 194 {
195 195 QRegExp rx;
196 196 QRegExp characterClass;
197   - QRegExp bracketExpressionLiteral("[^\\\\[\\]()|?*+{}\\.$]|\\\\[^a-zA-Z0-9]");
  197 + QRegExp bracketExpressionLiteral("[^\\\\[\\]\.]|\\\\[^a-zA-Z0-9]");
198 198 QRegExp bracketExpressionRange(_syntax[T_BRACKET_EXPRESSION_RANGE]);
199 199 QRegExp flagClose;
200 200 Token *openingToken;
@@ -363,6 +363,54 @@ void PcreParser::handleToken(RegexpToken token)
363 363 _pos += characterClass.matchedLength();
364 364 }
365 365
  366 + characterClass.setPattern(_syntax[T_BELL]);
  367 + if(matched == false && characterClass.indexIn(_expression, _pos) == _pos)
  368 + {
  369 + matched = true;
  370 + _tokens.push_back(new Token(T_BELL, _expression.mid(_pos, characterClass.matchedLength())));
  371 + _pos += characterClass.matchedLength();
  372 + }
  373 +
  374 + characterClass.setPattern(_syntax[T_ESCAPE]);
  375 + if(matched == false && characterClass.indexIn(_expression, _pos) == _pos)
  376 + {
  377 + matched = true;
  378 + _tokens.push_back(new Token(T_ESCAPE, _expression.mid(_pos, characterClass.matchedLength())));
  379 + _pos += characterClass.matchedLength();
  380 + }
  381 +
  382 + characterClass.setPattern(_syntax[T_FORM_FEED]);
  383 + if(matched == false && characterClass.indexIn(_expression, _pos) == _pos)
  384 + {
  385 + matched = true;
  386 + _tokens.push_back(new Token(T_FORM_FEED, _expression.mid(_pos, characterClass.matchedLength())));
  387 + _pos += characterClass.matchedLength();
  388 + }
  389 +
  390 + characterClass.setPattern(_syntax[T_LINE_FEED]);
  391 + if(matched == false && characterClass.indexIn(_expression, _pos) == _pos)
  392 + {
  393 + matched = true;
  394 + _tokens.push_back(new Token(T_LINE_FEED, _expression.mid(_pos, characterClass.matchedLength())));
  395 + _pos += characterClass.matchedLength();
  396 + }
  397 +
  398 + characterClass.setPattern(_syntax[T_HORIZONTAL_TAB]);
  399 + if(matched == false && characterClass.indexIn(_expression, _pos) == _pos)
  400 + {
  401 + matched = true;
  402 + _tokens.push_back(new Token(T_HORIZONTAL_TAB, _expression.mid(_pos, characterClass.matchedLength())));
  403 + _pos += characterClass.matchedLength();
  404 + }
  405 +
  406 + characterClass.setPattern(_syntax[T_OCTAL_CHAR]);
  407 + if(matched == false && characterClass.indexIn(_expression, _pos) == _pos)
  408 + {
  409 + matched = true;
  410 + _tokens.push_back(new Token(T_OCTAL_CHAR, _expression.mid(_pos, characterClass.matchedLength())));
  411 + _pos += characterClass.matchedLength();
  412 + }
  413 +
366 414 if(!matched)
367 415 {
368 416 _tokens.push_back(new Token(T_ERROR, _expression.mid(_pos, 1)));
2  RegexModules/qtparser.cpp
@@ -91,7 +91,7 @@ void QtParser::handleToken(RegexpToken token)
91 91 {
92 92 QRegExp groupingClose(_syntax[T_GROUPING_CLOSE]);
93 93 QRegExp assertionClose(_syntax[T_ASSERTION_CLOSE]);
94   - QRegExp bracketExpressionLiteral("[^\\\\[\\]()|?*+{}\\.^$]|\\\\[^a-zA-Z0-9]");
  94 + QRegExp bracketExpressionLiteral("[^\\\\[\\]\.]|\\\\[^a-zA-Z0-9]");
95 95 QRegExp bracketExpressionRange(_syntax[T_BRACKET_EXPRESSION_RANGE]);
96 96 QRegExp bracketExpressionClose(_syntax[T_BRACKET_EXPRESSION_CLOSE]);
97 97 QRegExp characterClass("");

0 comments on commit d2b2674

Please sign in to comment.
Something went wrong with that request. Please try again.