Skip to content

Commit

Permalink
Better handling of rules in lex scanners
Browse files Browse the repository at this point in the history
- handling of square brackets, an opening square bracket inside a pair of square brackets has no special meaning unless followed by a colon (resulting in `[:...:]`.
- handling of sharp brackets so that `<tst><<<WORD` is correctly hanlded
- better handling of escaped charcters
- handling of  `(?...)`

(Found during tests on some lex files available on Fossies).
  • Loading branch information
albert-github committed Mar 10, 2021
1 parent 9136e8c commit 36332ec
Show file tree
Hide file tree
Showing 2 changed files with 84 additions and 40 deletions.
64 changes: 45 additions & 19 deletions src/lexcode.l
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,6 @@ struct lexcodeYY_state
QCString CCodeBuffer;
int startCCodeLine = -1;
int roundCount = 0;
int squareCount = 0;
bool insideCode = FALSE;
QCString delimiter;
QCString docBlockName;
Expand Down Expand Up @@ -114,12 +113,13 @@ LiteralStart "%{"{nl}
LiteralEnd "%}"{nl}
RulesStart "%%"{nl}
RulesEnd "%%"{nl}
RulesSharp "<"[^>]*">"
RulesSharp "<"[^>\n]*">"
RulesCurly "{"[^{}\n]*"}"
StartSquare "["
StartDouble "\""
StartRound "("
EscapeRulesCharOpen "\\["|"\\<"|"\\{"|"\\("|"\\\""|"\\{"|"\\ "
StartRoundQuest "(?"
EscapeRulesCharOpen "\\["|"\\<"|"\\{"|"\\("|"\\\""|"\\ "|"\\\\"
EscapeRulesCharClose "\\]"|"\\>"|"\\}"|"\\)"
EscapeRulesChar {EscapeRulesCharOpen}|{EscapeRulesCharClose}

Expand All @@ -134,14 +134,15 @@ CODE [cC][oO][dD][eE]
RAWBEGIN (u|U|L|u8)?R\"[^ \t\(\)\\]{0,16}"("
RAWEND ")"[^ \t\(\)\\]{0,16}\"
CHARLIT (("'"\\[0-7]{1,3}"'")|("'"\\."'")|("'"[^'\\\n]{1,4}"'"))
CHARCE "[:"[^:]*":]"

/* no comment start / end signs inside square brackets */
NCOMM [^/\*]
// C start comment
// C start comment
CCS "/\*"
// C end comment
CCE "*\/"
// Cpp comment
// Cpp comment
CPPC "/\/"
// doxygen start comment
DCOMM ("/\*!"|"/\**"|"/\/!"|"/\/\/")
Expand All @@ -160,6 +161,7 @@ NONLopt [^\n]*
%x RulesSquare
%x RulesRoundSquare
%x RulesRound
%x RulesRoundQuest
%x UserSection

%x TopSection
Expand Down Expand Up @@ -326,35 +328,53 @@ NONLopt [^\n]*
yyextra->rulesPatternBuffer += yytext;
}
<RulesPattern>{StartSquare} {
yyextra->squareCount++;
yyextra->rulesPatternBuffer += yytext;
yyextra->lastContext = YY_START;
BEGIN(RulesSquare);
}
<RulesSquare,RulesRoundSquare>"\\[" |
<RulesSquare,RulesRoundSquare>"\\]" {
<RulesSquare,RulesRoundSquare>{CHARCE} {
yyextra->rulesPatternBuffer += yytext;
}
<RulesSquare,RulesRoundSquare>"[" {
yyextra->squareCount++;
<RulesSquare,RulesRoundSquare>"\\[" |
<RulesSquare,RulesRoundSquare>"\\]" {
yyextra->rulesPatternBuffer += yytext;
}
<RulesSquare>"]" {
yyextra->squareCount--;
yyextra->rulesPatternBuffer += yytext;
if (!yyextra->squareCount) BEGIN(RulesPattern) ;
BEGIN(RulesPattern) ;
}
<RulesRoundSquare>"]" {
yyextra->squareCount--;
yyextra->rulesPatternBuffer += yytext;
if (!yyextra->squareCount) BEGIN(RulesRound) ;
BEGIN(RulesRound) ;
}
<RulesSquare,RulesRoundSquare>"\\\\" {
yyextra->rulesPatternBuffer += yytext;
}
<RulesSquare,RulesRoundSquare>. {
yyextra->rulesPatternBuffer += yytext;
}
<RulesPattern>{StartRoundQuest} {
yyextra->rulesPatternBuffer += yytext;
yyextra->lastContext = YY_START;
BEGIN(RulesRoundQuest);
}
<RulesRoundQuest>{nl} {
yyextra->rulesPatternBuffer += yytext;
if (!yyextra->rulesPatternBuffer.isEmpty())
{
startFontClass(yyscanner,"stringliteral");
codifyLines(yyscanner,yyextra->rulesPatternBuffer.data());
yyextra->rulesPatternBuffer.resize(0);
endFontClass(yyscanner);
}
}
<RulesRoundQuest>[^)] {
yyextra->rulesPatternBuffer += yytext;
}
<RulesRoundQuest>")" {
yyextra->rulesPatternBuffer += yytext;
BEGIN(yyextra->lastContext);
}
<RulesPattern>{StartRound} {
yyextra->roundCount++;
yyextra->rulesPatternBuffer += yytext;
Expand All @@ -365,16 +385,14 @@ NONLopt [^\n]*
yyextra->rulesPatternBuffer += yytext;
}
<RulesRound>{StartSquare} {
yyextra->squareCount++;
yyextra->rulesPatternBuffer += yytext;
BEGIN(RulesRoundSquare);
}
<RulesRound>{StartDouble} {
yyextra->rulesPatternBuffer += yytext;
BEGIN(RulesRoundDouble);
}
<RulesRound>"\\(" |
<RulesRound>"\\)" {
<RulesRound>{EscapeRulesChar} {
yyextra->rulesPatternBuffer += yytext;
}
<RulesRound>"(" {
Expand All @@ -386,6 +404,13 @@ NONLopt [^\n]*
yyextra->rulesPatternBuffer += yytext;
if (!yyextra->roundCount) BEGIN( yyextra->lastContext ) ;
}
<RulesRound>{nl} {
yyextra->rulesPatternBuffer += yytext;
yyextra->yyLineNr++;
}
<RulesRound>{ws} {
yyextra->rulesPatternBuffer += yytext;
}
<RulesRound>. {
yyextra->rulesPatternBuffer += yytext;
}
Expand Down Expand Up @@ -890,9 +915,10 @@ NONLopt [^\n]*
yyextra->CCodeBuffer += yytext;
yyextra->yyLineNr++;
}
/*
/*
<*>. { fprintf(stderr,"Lex code scanner Def rule for %s: #%s#\n",stateToString(YY_START),yytext);}
*/
<*>{nl} { fprintf(stderr,"Lex code scanner Def rule for newline %s: #%s#\n",stateToString(YY_START),yytext); yyextra->yyLineNr++;}
*/
<*><<EOF>> {
handleCCode(yyscanner);
yyterminate();
Expand Down
60 changes: 39 additions & 21 deletions src/lexscanner.l
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,6 @@ struct lexscannerYY_state
QCString prefix = "yy";
QCString CCodeBuffer;
int roundCount = 0;
int squareCount = 0;

QCString yyFileName;
ClangTUParser *clangParser = 0;
Expand Down Expand Up @@ -111,12 +110,13 @@ LiteralEnd "%}"{nl}
OptPrefix "%option"{ws}+"prefix"{ws}*"="{ws}*
RulesStart "%%"{nl}
RulesEnd "%%"{nl}
RulesSharp "<"[^>]*">"
RulesSharp "<"[^>\n]*">"
RulesCurly "{"[^{}\n]*"}"
StartSquare "["
StartDouble "\""
StartRound "("
EscapeRulesCharOpen "\\["|"\\<"|"\\{"|"\\("|"\\\""|"\\{"|"\\ "
StartRoundQuest "(?"
EscapeRulesCharOpen "\\["|"\\<"|"\\{"|"\\("|"\\\""|"\\ "|"\\\\"
EscapeRulesCharClose "\\]"|"\\>"|"\\}"|"\\)"
EscapeRulesChar {EscapeRulesCharOpen}|{EscapeRulesCharClose}

Expand All @@ -131,14 +131,14 @@ CODE [cC][oO][dD][eE]
RAWBEGIN (u|U|L|u8)?R\"[^ \t\(\)\\]{0,16}"("
RAWEND ")"[^ \t\(\)\\]{0,16}\"
CHARLIT (("'"\\[0-7]{1,3}"'")|("'"\\."'")|("'"[^'\\\n]{1,4}"'"))

CHARCE "[:"[^:]*":]"
/* no comment start / end signs inside square brackets */
NCOMM [^/\*]
// C start comment
// C start comment
CCS "/\*"
// C end comment
CCE "*\/"
// Cpp comment
// Cpp comment
CPPC "/\/"
// doxygen start comment
DCOMM ("/\*!"|"/\**"|"/\/!"|"/\/\/")
Expand All @@ -158,6 +158,7 @@ NONLopt [^\n]*
%x RulesSquare
%x RulesRoundSquare
%x RulesRound
%x RulesRoundQuest
%x UserSection

%x TopSection
Expand Down Expand Up @@ -309,35 +310,46 @@ NONLopt [^\n]*
yyextra->CCodeBuffer += repeatChar(' ', yyleng);
}
<RulesPattern>{StartSquare} {
yyextra->squareCount++;
yyextra->CCodeBuffer += repeatChar(' ', yyleng);
yyextra->lastContext = YY_START;
BEGIN(RulesSquare);
}
<RulesSquare,RulesRoundSquare>"\\[" |
<RulesSquare,RulesRoundSquare>"\\]" {
<RulesSquare,RulesRoundSquare>{CHARCE} {
yyextra->CCodeBuffer += repeatChar(' ', yyleng);
}
<RulesSquare,RulesRoundSquare>"[" {
yyextra->squareCount++;
<RulesSquare,RulesRoundSquare>"\\[" |
<RulesSquare,RulesRoundSquare>"\\]" {
yyextra->CCodeBuffer += repeatChar(' ', yyleng);
}
<RulesSquare>"]" {
yyextra->squareCount--;
yyextra->CCodeBuffer += repeatChar(' ', yyleng);
if (!yyextra->squareCount) BEGIN(RulesPattern);
BEGIN(RulesPattern);
}
<RulesRoundSquare>"]" {
yyextra->squareCount--;
yyextra->CCodeBuffer += repeatChar(' ', yyleng);
if (!yyextra->squareCount) BEGIN(RulesRound) ;
BEGIN(RulesRound) ;
}
<RulesSquare,RulesRoundSquare>"\\\\" {
yyextra->CCodeBuffer += repeatChar(' ', yyleng);
}
<RulesSquare,RulesRoundSquare>. {
yyextra->CCodeBuffer += repeatChar(' ', yyleng);
}
<RulesPattern>{StartRoundQuest} {
yyextra->CCodeBuffer += repeatChar(' ', yyleng);
yyextra->lastContext = YY_START;
BEGIN(RulesRoundQuest);
}
<RulesRoundQuest>{nl} {
yyextra->CCodeBuffer += "\n";
}
<RulesRoundQuest>[^)] {
yyextra->CCodeBuffer += repeatChar(' ', yyleng);
}
<RulesRoundQuest>")" {
yyextra->CCodeBuffer += repeatChar(' ', yyleng);
BEGIN(yyextra->lastContext);
}
<RulesPattern>{StartRound} {
yyextra->roundCount++;
yyextra->CCodeBuffer += repeatChar(' ', yyleng);
Expand All @@ -348,16 +360,14 @@ NONLopt [^\n]*
yyextra->CCodeBuffer += repeatChar(' ', yyleng);
}
<RulesRound>{StartSquare} {
yyextra->squareCount++;
yyextra->CCodeBuffer += repeatChar(' ', yyleng);
BEGIN(RulesRoundSquare);
}
<RulesRound>{StartDouble} {
yyextra->CCodeBuffer += repeatChar(' ', yyleng);
BEGIN(RulesRoundDouble);
}
<RulesRound>"\\(" |
<RulesRound>"\\)" {
<RulesRound>{EscapeRulesChar} {
yyextra->CCodeBuffer += repeatChar(' ', yyleng);
}
<RulesRound>"(" {
Expand All @@ -369,6 +379,12 @@ NONLopt [^\n]*
yyextra->CCodeBuffer += repeatChar(' ', yyleng);
if (!yyextra->roundCount) BEGIN( yyextra->lastContext ) ;
}
<RulesRound>{nl} {
yyextra->CCodeBuffer += "\n";
}
<RulesRound>{ws} {
yyextra->CCodeBuffer += repeatChar(' ', yyleng);
}
<RulesRound>. {
yyextra->CCodeBuffer += repeatChar(' ', yyleng);
}
Expand All @@ -391,7 +407,7 @@ NONLopt [^\n]*
<RulesPattern>"\\\\" {
yyextra->CCodeBuffer += repeatChar(' ', yyleng);
}
<RulesPattern>{CCS} {
<RulesPattern>{CCS} {
yyextra->CCodeBuffer += yytext;
yyextra->lastContext = YY_START;
BEGIN(COMMENT);
Expand Down Expand Up @@ -835,9 +851,11 @@ NONLopt [^\n]*
yyextra->CCodeBuffer += yytext;
}

/*

/*
<*>. { fprintf(stderr,"Lex scanner Def rule for %s: #%s#\n",stateToString(YY_START),yytext);}
*/
<*>{nl} { fprintf(stderr,"Lex scanner Def rule for newline %s: #%s#\n",stateToString(YY_START),yytext);}
*/
<*><<EOF>> {
handleCCode(yyscanner);
yyterminate();
Expand Down

0 comments on commit 36332ec

Please sign in to comment.