From 06595257b531e855680cf876f6693bcc1d5b319d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ji=C5=99=C3=AD=20Techet?= Date: Fri, 21 Dec 2018 21:36:08 +0100 Subject: [PATCH 1/9] Add "l" prefix to functions in lcpp.c/h This is to avoid clash with cpreprocessor.c/h used by the new cxx parser. Merging lcpp.c/h with cpreprocessor.c/h would be difficult (at least for now) because of the differences in c.c so keep them separate for now. --- ctags/parsers/geany_c.c | 126 +++++++++++++++++----------------- ctags/parsers/geany_lcpp.c | 48 ++++++------- ctags/parsers/geany_lcpp.h | 32 ++++----- ctags/parsers/geany_verilog.c | 2 +- 4 files changed, 104 insertions(+), 104 deletions(-) diff --git a/ctags/parsers/geany_c.c b/ctags/parsers/geany_c.c index e5f1c1a301..cf543be836 100644 --- a/ctags/parsers/geany_c.c +++ b/ctags/parsers/geany_c.c @@ -942,7 +942,7 @@ static void reinitStatement (statementInfo *const st, const bool partial) initToken (st->context); initToken (st->blockName); vStringClear (st->parentClasses); - cppClearSignature (); + lcppClearSignature (); /* Init member info. */ if (! partial) @@ -1210,7 +1210,7 @@ static void addOtherFields (tagEntryInfo* const tag, const tagType type, if ((true == st->gotArgs) && ((TAG_FUNCTION == type) || (TAG_METHOD == type) || (TAG_PROTOTYPE == type))) { - tag->extensionFields.signature = cppGetSignature (); + tag->extensionFields.signature = lcppGetSignature (); } break; } @@ -1589,7 +1589,7 @@ static int skipToOneOf (const char *const chars) { int c; do - c = cppGetc (); + c = lcppGetc (); while (c != EOF && c != '\0' && strchr (chars, c) == NULL); return c; @@ -1603,7 +1603,7 @@ static int skipToNonWhite (void) do { - c = cppGetc (); + c = lcppGetc (); } while (isspace (c)); @@ -1617,12 +1617,12 @@ static void skipToFormattedBraceMatch (void) { int c, next; - c = cppGetc (); - next = cppGetc (); + c = lcppGetc (); + next = lcppGetc (); while (c != EOF && (c != '\n' || next != '}')) { c = next; - next = cppGetc (); + next = lcppGetc (); } } @@ -1636,20 +1636,20 @@ static void skipToFormattedBraceMatch (void) static void skipToMatch (const char *const pair) { const bool braceMatching = (bool) (strcmp ("{}", pair) == 0); - const bool braceFormatting = (bool) (cppIsBraceFormat () && braceMatching); - const unsigned int initialLevel = cppGetDirectiveNestLevel (); + const bool braceFormatting = (bool) (lcppIsBraceFormat () && braceMatching); + const unsigned int initialLevel = lcppGetDirectiveNestLevel (); const int begin = pair [0], end = pair [1]; const unsigned long inputLineNumber = getInputLineNumber (); int matchLevel = 1; int c = '\0'; if (isInputLanguage(Lang_d) && pair[0] == '<') return; /* ignore e.g. Foo!(x < 2) */ - while (matchLevel > 0 && (c = cppGetc ()) != EOF) + while (matchLevel > 0 && (c = lcppGetc ()) != EOF) { if (c == begin) { ++matchLevel; - if (braceFormatting && cppGetDirectiveNestLevel () != initialLevel) + if (braceFormatting && lcppGetDirectiveNestLevel () != initialLevel) { skipToFormattedBraceMatch (); break; @@ -1658,7 +1658,7 @@ static void skipToMatch (const char *const pair) else if (c == end) { --matchLevel; - if (braceFormatting && cppGetDirectiveNestLevel () != initialLevel) + if (braceFormatting && lcppGetDirectiveNestLevel () != initialLevel) { skipToFormattedBraceMatch (); break; @@ -1671,7 +1671,7 @@ static void skipToMatch (const char *const pair) else if (isInputLanguage (Lang_cpp) && begin == '<' && (c == ';' || c == '{')) { - cppUngetc (c); + lcppUngetc (c); break; } } @@ -1693,7 +1693,7 @@ static void skipParens (void) if (c == '(') skipToMatch ("()"); else - cppUngetc (c); + lcppUngetc (c); } static void skipBraces (void) @@ -1703,7 +1703,7 @@ static void skipBraces (void) if (c == '{') skipToMatch ("{}"); else - cppUngetc (c); + lcppUngetc (c); } static keywordId analyzeKeyword (const char *const name) @@ -1769,9 +1769,9 @@ static void readIdentifier (tokenInfo *const token, const int firstChar) do { vStringPut (name, c); - c = cppGetc (); - } while (cppIsident (c) || (isInputLanguage (Lang_vala) && '.' == c)); - cppUngetc (c); /* unget non-identifier character */ + c = lcppGetc (); + } while (lcppIsident (c) || (isInputLanguage (Lang_vala) && '.' == c)); + lcppUngetc (c); /* unget non-identifier character */ /* Vala supports '?' at end of a type (with or without whitespace before) for nullable types */ if (isInputLanguage (Lang_vala)) @@ -1780,7 +1780,7 @@ static void readIdentifier (tokenInfo *const token, const int firstChar) if ('?' == c) vStringPut (name, c); else - cppUngetc (c); + lcppUngetc (c); } analyzeIdentifier (token); @@ -1793,12 +1793,12 @@ static void readPackageName (tokenInfo *const token, const int firstChar) initToken (token); - while (cppIsident (c) || c == '.') + while (lcppIsident (c) || c == '.') { vStringPut (name, c); - c = cppGetc (); + c = lcppGetc (); } - cppUngetc (c); /* unget non-package character */ + lcppUngetc (c); /* unget non-package character */ } static void readPackageOrNamespace (statementInfo *const st, const declType declaration) @@ -1862,7 +1862,7 @@ static void readOperator (statementInfo *const st) { /* Verify whether this is a valid function call (i.e. "()") operator. */ - if (cppGetc () == ')') + if (lcppGetc () == ')') { vStringPut (name, ' '); /* always separate operator from keyword */ c = skipToNonWhite (); @@ -1872,10 +1872,10 @@ static void readOperator (statementInfo *const st) else { skipToMatch ("()"); - c = cppGetc (); + c = lcppGetc (); } } - else if (cppIsident1 (c)) + else if (lcppIsident1 (c)) { /* Handle "new" and "delete" operators, and conversion functions * (per 13.3.1.1.2 [2] of the C++ spec). @@ -1894,7 +1894,7 @@ static void readOperator (statementInfo *const st) } vStringPut (name, c); } - c = cppGetc (); + c = lcppGetc (); } while (! isOneOf (c, "(;") && c != EOF); } else if (isOneOf (c, acceptable)) @@ -1903,11 +1903,11 @@ static void readOperator (statementInfo *const st) do { vStringPut (name, c); - c = cppGetc (); + c = lcppGetc (); } while (isOneOf (c, acceptable)); } - cppUngetc (c); + lcppUngetc (c); token->type = TOKEN_NAME; token->keyword = KEYWORD_NONE; @@ -1934,7 +1934,7 @@ static void setAccess (statementInfo *const st, const accessType laccess) if (c == ':') reinitStatementWithToken (st, prevToken (st, 1), false); else - cppUngetc (c); + lcppUngetc (c); st->member.accessDefault = laccess; } @@ -1945,14 +1945,14 @@ static void setAccess (statementInfo *const st, const accessType laccess) static void discardTypeList (tokenInfo *const token) { int c = skipToNonWhite (); - while (cppIsident1 (c)) + while (lcppIsident1 (c)) { readIdentifier (token, c); c = skipToNonWhite (); if (c == '.' || c == ',') c = skipToNonWhite (); } - cppUngetc (c); + lcppUngetc (c); } static void addParentClass (statementInfo *const st, tokenInfo *const token) @@ -1974,7 +1974,7 @@ static void readParents (statementInfo *const st, const int qualifier) do { c = skipToNonWhite (); - if (cppIsident1 (c)) + if (lcppIsident1 (c)) { readIdentifier (token, c); if (isType (token, TOKEN_NAME)) @@ -1995,7 +1995,7 @@ static void readParents (statementInfo *const st, const int qualifier) initToken (parent); } } while (c != '{' && c != EOF); - cppUngetc (c); + lcppUngetc (c); deleteToken (parent); deleteToken (token); } @@ -2131,7 +2131,7 @@ static void skipMemIntializerList (tokenInfo *const token) do { c = skipToNonWhite (); - while (cppIsident1 (c) || c == ':') + while (lcppIsident1 (c) || c == ':') { if (c != ':') readIdentifier (token, c); @@ -2148,7 +2148,7 @@ static void skipMemIntializerList (tokenInfo *const token) c = skipToNonWhite (); } } while (c == ','); - cppUngetc (c); + lcppUngetc (c); } static void skipMacro (statementInfo *const st) @@ -2215,9 +2215,9 @@ static bool skipPostArgumentStuff ( case ')': break; case ':': skipMemIntializerList (token);break; /* ctor-initializer */ case '[': skipToMatch ("[]"); break; - case '=': cppUngetc (c); end = true; break; - case '{': cppUngetc (c); end = true; break; - case '}': cppUngetc (c); end = true; break; + case '=': lcppUngetc (c); end = true; break; + case '{': lcppUngetc (c); end = true; break; + case '}': lcppUngetc (c); end = true; break; case '(': { @@ -2231,7 +2231,7 @@ static bool skipPostArgumentStuff ( { if (parameters == 0 || elementCount < 2) { - cppUngetc (c); + lcppUngetc (c); end = true; } else if (--parameters == 0) @@ -2241,7 +2241,7 @@ static bool skipPostArgumentStuff ( default: { - if (cppIsident1 (c)) + if (lcppIsident1 (c)) { readIdentifier (token, c); if (isInputLanguage(Lang_d) && isDPostArgumentToken(token)) @@ -2320,7 +2320,7 @@ static void skipJavaThrows (statementInfo *const st) tokenInfo *const token = activeToken (st); int c = skipToNonWhite (); - if (cppIsident1 (c)) + if (lcppIsident1 (c)) { readIdentifier (token, c); if (token->keyword == KEYWORD_THROWS) @@ -2328,7 +2328,7 @@ static void skipJavaThrows (statementInfo *const st) do { c = skipToNonWhite (); - if (cppIsident1 (c)) + if (lcppIsident1 (c)) { readIdentifier (token, c); c = skipToNonWhite (); @@ -2336,7 +2336,7 @@ static void skipJavaThrows (statementInfo *const st) } while (c == '.' || c == ','); } } - cppUngetc (c); + lcppUngetc (c); setToken (st, TOKEN_NONE); } @@ -2345,7 +2345,7 @@ static void skipValaPostParens (statementInfo *const st) tokenInfo *const token = activeToken (st); int c = skipToNonWhite (); - while (cppIsident1 (c)) + while (lcppIsident1 (c)) { readIdentifier (token, c); if (token->keyword == KEYWORD_ATTRIBUTE) @@ -2359,7 +2359,7 @@ static void skipValaPostParens (statementInfo *const st) do { c = skipToNonWhite (); - if (cppIsident1 (c)) + if (lcppIsident1 (c)) { readIdentifier (token, c); c = skipToNonWhite (); @@ -2369,7 +2369,7 @@ static void skipValaPostParens (statementInfo *const st) else break; } - cppUngetc (c); + lcppUngetc (c); setToken (st, TOKEN_NONE); } @@ -2378,7 +2378,7 @@ static void analyzePostParens (statementInfo *const st, parenInfo *const info) const unsigned long inputLineNumber = getInputLineNumber (); int c = skipToNonWhite (); - cppUngetc (c); + lcppUngetc (c); if (isOneOf (c, "{;,=")) ; else if (isInputLanguage (Lang_java)) @@ -2405,7 +2405,7 @@ static int parseParens (statementInfo *const st, parenInfo *const info) bool firstChar = true; int nextChar = '\0'; - cppStartCollectingSignature (); + lcppStartCollectingSignature (); info->parameterCount = 1; do @@ -2482,7 +2482,7 @@ static int parseParens (statementInfo *const st, parenInfo *const info) if (firstChar) { info->isNameCandidate = false; - cppUngetc (c); + lcppUngetc (c); skipMacro (st); depth = 0; } @@ -2498,8 +2498,8 @@ static int parseParens (statementInfo *const st, parenInfo *const info) } else { - cppUngetc (c); - cppUngetc ('('); + lcppUngetc (c); + lcppUngetc ('('); info->nestedArgs = true; } } @@ -2510,7 +2510,7 @@ static int parseParens (statementInfo *const st, parenInfo *const info) default: { - if (cppIsident1 (c)) + if (lcppIsident1 (c)) { if (++identifierCount > 1) info->isKnrParamList = false; @@ -2548,7 +2548,7 @@ static int parseParens (statementInfo *const st, parenInfo *const info) --depth; } - cppStopCollectingSignature (); + lcppStopCollectingSignature (); if (! info->isNameCandidate) initToken (token); @@ -2579,7 +2579,7 @@ static void analyzeParens (statementInfo *const st) initParenInfo (&info); parseParens (st, &info); c = skipToNonWhite (); - cppUngetc (c); + lcppUngetc (c); if (info.invalidContents) { reinitStatement (st, false); @@ -2650,7 +2650,7 @@ static bool inheritingDeclaration (declType decl) static void processColon (statementInfo *const st) { - int c = cppGetc (); + int c = lcppGetc (); const bool doubleColon = (bool) (c == ':'); if (doubleColon) @@ -2660,7 +2660,7 @@ static void processColon (statementInfo *const st) } else { - cppUngetc (c); + lcppUngetc (c); if ((isInputLanguage (Lang_cpp) || isInputLanguage (Lang_csharp) || isInputLanguage (Lang_d) || isInputLanguage (Lang_vala)) && inheritingDeclaration (st->declaration)) @@ -2720,7 +2720,7 @@ static int skipInitializer (statementInfo *const st) case '}': if (insideEnumBody (st)) done = true; - else if (! cppIsBraceFormat ()) + else if (! lcppIsBraceFormat ()) { verbose ("%s: unexpected closing brace at line %lu\n", getInputFileName (), getInputLineNumber ()); @@ -2745,7 +2745,7 @@ static void processInitializer (statementInfo *const st) setToken (st, TOKEN_COMMA); else if (c == '}' && inEnumBody) { - cppUngetc (c); + lcppUngetc (c); setToken (st, TOKEN_COMMA); } if (st->scope == SCOPE_EXTERN) @@ -2765,7 +2765,7 @@ static void parseGeneralToken (statementInfo *const st, const int c) { const tokenInfo *const prev = prevToken (st, 1); - if (cppIsident1(c)) + if (lcppIsident1(c)) { parseIdentifier (st, c); if (isType (st->context, TOKEN_NAME) && @@ -2904,11 +2904,11 @@ static void checkStatementEnd (statementInfo *const st) reinitStatementWithToken (st, activeToken (st), comma); DebugStatement ( if (debug (DEBUG_PARSE)) printf (""); ) - cppEndStatement (); + lcppEndStatement (); } else { - cppBeginStatement (); + lcppBeginStatement (); advanceToken (st); } } @@ -3130,7 +3130,7 @@ static rescanReason findCTags (const unsigned int passCount) Assert (passCount < 3); - cppInit ((bool) (passCount > 1), isInputLanguage (Lang_csharp), isInputLanguage(Lang_cpp), + lcppInit ((bool) (passCount > 1), isInputLanguage (Lang_csharp), isInputLanguage(Lang_cpp), CK_DEFINE); exception = (exception_t) setjmp (Exception); @@ -3149,7 +3149,7 @@ static rescanReason findCTags (const unsigned int passCount) getInputFileName ()); } } - cppTerminate (); + lcppTerminate (); return rescan; } diff --git a/ctags/parsers/geany_lcpp.c b/ctags/parsers/geany_lcpp.c index f771a61f78..45f4fce0ef 100644 --- a/ctags/parsers/geany_lcpp.c +++ b/ctags/parsers/geany_lcpp.c @@ -106,17 +106,17 @@ static cppState Cpp = { * FUNCTION DEFINITIONS */ -extern bool cppIsBraceFormat (void) +extern bool lcppIsBraceFormat (void) { return BraceFormat; } -extern unsigned int cppGetDirectiveNestLevel (void) +extern unsigned int lcppGetDirectiveNestLevel (void) { return Cpp.directive.nestLevel; } -extern void cppInit (const bool state, const bool hasAtLiteralStrings, +extern void lcppInit (const bool state, const bool hasAtLiteralStrings, const bool hasCxxRawLiteralStrings, int defineMacroKindIndex) { @@ -141,7 +141,7 @@ extern void cppInit (const bool state, const bool hasAtLiteralStrings, Cpp.directive.name = vStringNewOrClear (Cpp.directive.name); } -extern void cppTerminate (void) +extern void lcppTerminate (void) { if (Cpp.directive.name != NULL) { @@ -150,12 +150,12 @@ extern void cppTerminate (void) } } -extern void cppBeginStatement (void) +extern void lcppBeginStatement (void) { Cpp.resolveRequired = true; } -extern void cppEndStatement (void) +extern void lcppEndStatement (void) { Cpp.resolveRequired = false; } @@ -170,7 +170,7 @@ extern void cppEndStatement (void) /* This puts a character back into the input queue for the input File. * Up to two characters may be ungotten. */ -extern void cppUngetc (const int c) +extern void lcppUngetc (const int c) { Assert (Cpp.ungetch2 == '\0'); Cpp.ungetch2 = Cpp.ungetch; @@ -228,7 +228,7 @@ static void readIdentifier (int c, vString *const name) { vStringPut (name, c); c = getcAndCollect (); - } while (c != EOF && cppIsident (c)); + } while (c != EOF && lcppIsident (c)); ungetcAndCollect (c); } @@ -342,7 +342,7 @@ static int makeDefineTag (const char *const name, bool parameterized, bool undef e.isFileScope = isFileScope; e.truncateLineAfterTag = true; if (parameterized) - e.extensionFields.signature = cppGetSignature (); + e.extensionFields.signature = lcppGetSignature (); makeTagEntry (&e); if (parameterized) eFree((char *) e.extensionFields.signature); @@ -354,7 +354,7 @@ static int directiveDefine (const int c, bool undef) { int r = CORK_NIL; - if (cppIsident1 (c)) + if (lcppIsident1 (c)) { bool parameterized; int nc; @@ -364,7 +364,7 @@ static int directiveDefine (const int c, bool undef) parameterized = (nc == '('); if (parameterized) { - cppStartCollectingSignature (); + lcppStartCollectingSignature (); while (nc != EOF) { int lastC = nc; @@ -372,7 +372,7 @@ static int directiveDefine (const int c, bool undef) if (nc == '\n' && lastC != '\\') break; } - cppStopCollectingSignature (); + lcppStopCollectingSignature (); } ungetcAndCollect (nc); if (! isIgnore ()) @@ -396,7 +396,7 @@ static void directiveUndef (const int c) static void directivePragma (int c) { - if (cppIsident1 (c)) + if (lcppIsident1 (c)) { readIdentifier (c, Cpp.directive.name); if (stringMatch (vStringValue (Cpp.directive.name), "weak")) @@ -406,7 +406,7 @@ static void directivePragma (int c) { c = getcAndCollect (); } while (c == SPACE); - if (cppIsident1 (c)) + if (lcppIsident1 (c)) { readIdentifier (c, Cpp.directive.name); makeDefineTag (vStringValue (Cpp.directive.name), NULL, false); @@ -507,7 +507,7 @@ static Comment isComment (void) /* Skips over a C style comment. According to ANSI specification a comment * is treated as white space, so we perform this substitution. */ -int cppSkipOverCComment (void) +int lcppSkipOverCComment (void) { int c = getcAndCollect (); @@ -669,7 +669,7 @@ static int skipToEndOfChar (void) * quoted strings. In short, strip anything which places a burden upon * the tokenizer. */ -extern int cppGetc (void) +extern int lcppGetc (void) { bool directive = false; bool ignore = false; @@ -735,7 +735,7 @@ extern int cppGetc (void) const Comment comment = isComment (); if (comment == COMMENT_C) - c = cppSkipOverCComment (); + c = lcppSkipOverCComment (); else if (comment == COMMENT_CPLUS) { c = skipOverCplusComment (); @@ -859,9 +859,9 @@ extern int cppGetc (void) int prev2 = getNthPrevCFromInputFile (2, '\0'); int prev3 = getNthPrevCFromInputFile (3, '\0'); - if (! cppIsident (prev) || - (! cppIsident (prev2) && (prev == 'L' || prev == 'u' || prev == 'U')) || - (! cppIsident (prev3) && (prev2 == 'u' && prev == '8'))) + if (! lcppIsident (prev) || + (! lcppIsident (prev2) && (prev == 'L' || prev == 'u' || prev == 'U')) || + (! lcppIsident (prev3) && (prev2 == 'u' && prev == '8'))) { int next = getcAndCollect (); if (next != DOUBLE_QUOTE) @@ -977,7 +977,7 @@ static void stripCodeBuffer(char *buf) return; } -extern char *cppGetSignature(void) +extern char *lcppGetSignature(void) { char *start, *end; int level; @@ -1000,19 +1000,19 @@ extern char *cppGetSignature(void) return start; } -extern void cppStartCollectingSignature (void) +extern void lcppStartCollectingSignature (void) { signature = vStringNewOrClear (signature); vStringPut (signature, '('); collectingSignature = true; } -extern void cppStopCollectingSignature (void) +extern void lcppStopCollectingSignature (void) { collectingSignature = false; } -extern void cppClearSignature (void) +extern void lcppClearSignature (void) { signature = vStringNewOrClear (signature); collectingSignature = false; diff --git a/ctags/parsers/geany_lcpp.h b/ctags/parsers/geany_lcpp.h index 73683dc3a4..a60210d541 100644 --- a/ctags/parsers/geany_lcpp.h +++ b/ctags/parsers/geany_lcpp.h @@ -21,15 +21,15 @@ /* Is the character valid as a character of a C identifier? * VMS allows '$' in identifiers. */ -#define cppIsident(c) (isalnum(c) || (c) == '_' || (c) == '$') +#define lcppIsident(c) (isalnum(c) || (c) == '_' || (c) == '$') /* Is the character valid as the first character of a C identifier? * C++ allows '~' in destructors. * VMS allows '$' in identifiers. * Vala allows '@' in identifiers. */ -#define cppIsident1(c) ( ((c >= 0) && (c < 0x80) && isalpha(c)) \ - || (c) == '_' || (c) == '~' || (c) == '$' || (c) == '@') +#define lcppIsident1(c) ( ((c >= 0) && (c < 0x80) && isalpha(c)) \ + || (c) == '_' || (c) == '~' || (c) == '$' || (c) == '@') /* NOTE about isident1 profitability Doing the same as isascii before passing value to isalpha @@ -58,24 +58,24 @@ /* * FUNCTION PROTOTYPES */ -extern bool cppIsBraceFormat (void); -extern unsigned int cppGetDirectiveNestLevel (void); +extern bool lcppIsBraceFormat (void); +extern unsigned int lcppGetDirectiveNestLevel (void); -extern void cppInit (const bool state, +extern void lcppInit (const bool state, const bool hasAtLiteralStrings, const bool hasCxxRawLiteralStrings, int defineMacroKindIndex); -extern void cppTerminate (void); -extern void cppBeginStatement (void); -extern void cppEndStatement (void); -extern void cppUngetc (const int c); -extern int cppGetc (void); -extern int cppSkipOverCComment (void); +extern void lcppTerminate (void); +extern void lcppBeginStatement (void); +extern void lcppEndStatement (void); +extern void lcppUngetc (const int c); +extern int lcppGetc (void); +extern int lcppSkipOverCComment (void); -extern char *cppGetSignature (void); -extern void cppStartCollectingSignature (void); -extern void cppStopCollectingSignature (void); -extern void cppClearSignature (void); +extern char *lcppGetSignature (void); +extern void lcppStartCollectingSignature (void); +extern void lcppStopCollectingSignature (void); +extern void lcppClearSignature (void); extern bool cppIsIgnoreToken (const char *const name, bool *const pIgnoreParens, diff --git a/ctags/parsers/geany_verilog.c b/ctags/parsers/geany_verilog.c index ca01b99c7b..6bb71b5c10 100644 --- a/ctags/parsers/geany_verilog.c +++ b/ctags/parsers/geany_verilog.c @@ -140,7 +140,7 @@ static int vGetc (void) } else if (c2 == '*') /* strip block comment */ { - c = cppSkipOverCComment(); + c = lcppSkipOverCComment(); } else { From 4bcdbcb73f23f72fd65963d9bb8de3010590ad7e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ji=C5=99=C3=AD=20Techet?= Date: Tue, 9 Nov 2021 15:48:31 +0100 Subject: [PATCH 2/9] Rename C/C++ parsers to "Old" As a result, when we copy the new cxx parser, we don't have clashes of these symbols from the two different parsers. --- ctags/parsers/geany_c.c | 4 ++-- src/tagmanager/tm_parsers.h | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/ctags/parsers/geany_c.c b/ctags/parsers/geany_c.c index cf543be836..06c6b00a84 100644 --- a/ctags/parsers/geany_c.c +++ b/ctags/parsers/geany_c.c @@ -3234,7 +3234,7 @@ static void initializeValaParser (const langType language) addKeyword ("requires", language, KEYWORD_ATTRIBUTE); /* ignore */ } -extern parserDefinition* CParser (void) +extern parserDefinition* CParserOld (void) { static const char *const extensions [] = { "c", "pc", "sc", NULL }; parserDefinition* def = parserNew ("C"); @@ -3246,7 +3246,7 @@ extern parserDefinition* CParser (void) return def; } -extern parserDefinition* CppParser (void) +extern parserDefinition* CppParserOld (void) { static const char *const extensions [] = { "c++", "cc", "cp", "cpp", "cxx", "h", "h++", "hh", "hp", "hpp", "hxx", diff --git a/src/tagmanager/tm_parsers.h b/src/tagmanager/tm_parsers.h index 3c552d8a00..1bbe203f33 100644 --- a/src/tagmanager/tm_parsers.h +++ b/src/tagmanager/tm_parsers.h @@ -14,8 +14,8 @@ /* Keep in sync with tm_parser.h */ #define EXTERNAL_PARSER_LIST \ - CParser, \ - CppParser, \ + CParserOld, \ + CppParserOld, \ JavaParser, \ MakefileParser, \ PascalParser, \ From aa3ab44bc31fa712fd0dde09e940b0f28540d851 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ji=C5=99=C3=AD=20Techet?= Date: Tue, 9 Nov 2021 16:06:09 +0100 Subject: [PATCH 3/9] Add the new cxx parser This patch only makes the parser compile, it doesn't enable it yet. --- ctags/Makefile.am | 32 + ctags/parsers/cpreprocessor.c | 2297 ++++++++++++++++++++ ctags/parsers/cpreprocessor.h | 137 ++ ctags/parsers/cxx/cxx.c | 163 ++ ctags/parsers/cxx/cxx_debug.c | 182 ++ ctags/parsers/cxx/cxx_debug.h | 63 + ctags/parsers/cxx/cxx_debug_type.c | 54 + ctags/parsers/cxx/cxx_keyword.c | 639 ++++++ ctags/parsers/cxx/cxx_keyword.h | 178 ++ ctags/parsers/cxx/cxx_parser.c | 2023 +++++++++++++++++ ctags/parsers/cxx/cxx_parser.h | 27 + ctags/parsers/cxx/cxx_parser_block.c | 804 +++++++ ctags/parsers/cxx/cxx_parser_function.c | 2281 +++++++++++++++++++ ctags/parsers/cxx/cxx_parser_internal.h | 388 ++++ ctags/parsers/cxx/cxx_parser_lambda.c | 332 +++ ctags/parsers/cxx/cxx_parser_namespace.c | 345 +++ ctags/parsers/cxx/cxx_parser_template.c | 858 ++++++++ ctags/parsers/cxx/cxx_parser_tokenizer.c | 1689 ++++++++++++++ ctags/parsers/cxx/cxx_parser_typedef.c | 491 +++++ ctags/parsers/cxx/cxx_parser_using.c | 170 ++ ctags/parsers/cxx/cxx_parser_variable.c | 917 ++++++++ ctags/parsers/cxx/cxx_qtmoc.c | 342 +++ ctags/parsers/cxx/cxx_scope.c | 276 +++ ctags/parsers/cxx/cxx_scope.h | 86 + ctags/parsers/cxx/cxx_subparser.c | 125 ++ ctags/parsers/cxx/cxx_subparser.h | 45 + ctags/parsers/cxx/cxx_subparser_internal.h | 27 + ctags/parsers/cxx/cxx_tag.c | 700 ++++++ ctags/parsers/cxx/cxx_tag.h | 202 ++ ctags/parsers/cxx/cxx_token.c | 185 ++ ctags/parsers/cxx/cxx_token.h | 124 ++ ctags/parsers/cxx/cxx_token_chain.c | 1220 +++++++++++ ctags/parsers/cxx/cxx_token_chain.h | 288 +++ 33 files changed, 17690 insertions(+) create mode 100644 ctags/parsers/cpreprocessor.c create mode 100644 ctags/parsers/cpreprocessor.h create mode 100644 ctags/parsers/cxx/cxx.c create mode 100644 ctags/parsers/cxx/cxx_debug.c create mode 100644 ctags/parsers/cxx/cxx_debug.h create mode 100644 ctags/parsers/cxx/cxx_debug_type.c create mode 100644 ctags/parsers/cxx/cxx_keyword.c create mode 100644 ctags/parsers/cxx/cxx_keyword.h create mode 100644 ctags/parsers/cxx/cxx_parser.c create mode 100644 ctags/parsers/cxx/cxx_parser.h create mode 100644 ctags/parsers/cxx/cxx_parser_block.c create mode 100644 ctags/parsers/cxx/cxx_parser_function.c create mode 100644 ctags/parsers/cxx/cxx_parser_internal.h create mode 100644 ctags/parsers/cxx/cxx_parser_lambda.c create mode 100644 ctags/parsers/cxx/cxx_parser_namespace.c create mode 100644 ctags/parsers/cxx/cxx_parser_template.c create mode 100644 ctags/parsers/cxx/cxx_parser_tokenizer.c create mode 100644 ctags/parsers/cxx/cxx_parser_typedef.c create mode 100644 ctags/parsers/cxx/cxx_parser_using.c create mode 100644 ctags/parsers/cxx/cxx_parser_variable.c create mode 100644 ctags/parsers/cxx/cxx_qtmoc.c create mode 100644 ctags/parsers/cxx/cxx_scope.c create mode 100644 ctags/parsers/cxx/cxx_scope.h create mode 100644 ctags/parsers/cxx/cxx_subparser.c create mode 100644 ctags/parsers/cxx/cxx_subparser.h create mode 100644 ctags/parsers/cxx/cxx_subparser_internal.h create mode 100644 ctags/parsers/cxx/cxx_tag.c create mode 100644 ctags/parsers/cxx/cxx_tag.h create mode 100644 ctags/parsers/cxx/cxx_token.c create mode 100644 ctags/parsers/cxx/cxx_token.h create mode 100644 ctags/parsers/cxx/cxx_token_chain.c create mode 100644 ctags/parsers/cxx/cxx_token_chain.h diff --git a/ctags/Makefile.am b/ctags/Makefile.am index c0cd8eaeb8..9d9bf67c5f 100644 --- a/ctags/Makefile.am +++ b/ctags/Makefile.am @@ -11,6 +11,38 @@ AM_CFLAGS = \ noinst_LTLIBRARIES = libctags.la parsers = \ + parsers/cxx/cxx.c \ + parsers/cxx/cxx_debug.c \ + parsers/cxx/cxx_debug.h \ + parsers/cxx/cxx_debug_type.c \ + parsers/cxx/cxx_keyword.c \ + parsers/cxx/cxx_keyword.h \ + parsers/cxx/cxx_parser_block.c \ + parsers/cxx/cxx_parser.c \ + parsers/cxx/cxx_parser_function.c \ + parsers/cxx/cxx_parser.h \ + parsers/cxx/cxx_parser_internal.h \ + parsers/cxx/cxx_parser_lambda.c \ + parsers/cxx/cxx_parser_namespace.c \ + parsers/cxx/cxx_parser_template.c \ + parsers/cxx/cxx_parser_tokenizer.c \ + parsers/cxx/cxx_parser_typedef.c \ + parsers/cxx/cxx_parser_using.c \ + parsers/cxx/cxx_parser_variable.c \ + parsers/cxx/cxx_qtmoc.c \ + parsers/cxx/cxx_scope.c \ + parsers/cxx/cxx_scope.h \ + parsers/cxx/cxx_subparser.c \ + parsers/cxx/cxx_subparser.h \ + parsers/cxx/cxx_subparser_internal.h \ + parsers/cxx/cxx_tag.c \ + parsers/cxx/cxx_tag.h \ + parsers/cxx/cxx_token.c \ + parsers/cxx/cxx_token_chain.c \ + parsers/cxx/cxx_token_chain.h \ + parsers/cxx/cxx_token.h \ + parsers/cpreprocessor.c \ + parsers/cpreprocessor.h \ parsers/geany_abaqus.c \ parsers/geany_abc.c \ parsers/geany_asciidoc.c \ diff --git a/ctags/parsers/cpreprocessor.c b/ctags/parsers/cpreprocessor.c new file mode 100644 index 0000000000..7b88e5821c --- /dev/null +++ b/ctags/parsers/cpreprocessor.c @@ -0,0 +1,2297 @@ +/* +* Copyright (c) 1996-2002, Darren Hiebert +* +* This source code is released for free distribution under the terms of the +* GNU General Public License version 2 or (at your option) any later version. +* +* This module contains the high level input read functions (preprocessor +* directives are handled within this level). +*/ + +/* +* INCLUDE FILES +*/ +#include "general.h" /* must always come first */ + +#include + +#include "debug.h" +#include "entry.h" +#include "htable.h" +#include "cpreprocessor.h" +#include "kind.h" +#include "options.h" +#include "read.h" +#include "vstring.h" +#include "param.h" +#include "parse.h" +#include "xtag.h" + +#include "cxx/cxx_debug.h" + +/* +* MACROS +*/ +#define stringMatch(s1,s2) (strcmp (s1,s2) == 0) +#define isspacetab(c) ((c) == SPACE || (c) == TAB) + +/* +* DATA DECLARATIONS +*/ +typedef enum { COMMENT_NONE, COMMENT_C, COMMENT_CPLUS, COMMENT_D } Comment; + +enum eCppLimits { + MaxCppNestingLevel = 20, + MaxDirectiveName = 10 +}; + +/* Defines the one nesting level of a preprocessor conditional. + */ +typedef struct sConditionalInfo { + bool ignoreAllBranches; /* ignoring parent conditional branch */ + bool singleBranch; /* choose only one branch */ + bool branchChosen; /* branch already selected */ + bool ignoring; /* current ignore state */ + int enterExternalParserBlockNestLevel; /* the parser state when entering this conditional: used only by cxx */ +} conditionalInfo; + +enum eState { + DRCTV_NONE, /* no known directive - ignore to end of line */ + DRCTV_DEFINE, /* "#define" encountered */ + DRCTV_HASH, /* initial '#' read; determine directive */ + DRCTV_IF, /* "#if" or "#ifdef" encountered */ + DRCTV_PRAGMA, /* #pragma encountered */ + DRCTV_UNDEF, /* "#undef" encountered */ + DRCTV_INCLUDE, /* "#include" encountered */ +}; + +/* Defines the current state of the pre-processor. + */ +typedef struct sCppState { + langType lang; + langType clientLang; + + int * ungetBuffer; /* memory buffer for unget characters */ + int ungetBufferSize; /* the current unget buffer size */ + int * ungetPointer; /* the current unget char: points in the middle of the buffer */ + int ungetDataSize; /* the number of valid unget characters in the buffer */ + + /* the contents of the last SYMBOL_CHAR or SYMBOL_STRING */ + vString * charOrStringContents; + + bool resolveRequired; /* must resolve if/else/elif/endif branch */ + bool hasAtLiteralStrings; /* supports @"c:\" strings */ + bool hasCxxRawLiteralStrings; /* supports R"xxx(...)xxx" strings */ + bool hasSingleQuoteLiteralNumbers; /* supports vera number literals: + 'h..., 'o..., 'd..., and 'b... */ + + bool useClientLangDefineMacroKindIndex; + int defineMacroKindIndex; + int macroUndefRoleIndex; + + bool useClientLangMacroParamKindIndex; + int macroParamKindIndex; + + bool useClientLangHeaderKindIndex; + int headerKindIndex; + int headerSystemRoleIndex; + int headerLocalRoleIndex; + + int macrodefFieldIndex; + + struct sDirective { + enum eState state; /* current directive being processed */ + bool accept; /* is a directive syntactically permitted? */ + vString * name; /* macro name */ + unsigned int nestLevel; /* level 0 is not used */ + conditionalInfo ifdef [MaxCppNestingLevel]; + } directive; + + cppMacroInfo * macroInUse; + hashTable * fileMacroTable; + +} cppState; + + +typedef enum { + CPREPRO_MACRO_KIND_UNDEF_ROLE, +} cPreProMacroRole; + +static roleDefinition CPREPROMacroRoles [] = { + RoleTemplateUndef, +}; + + +typedef enum { + CPREPRO_HEADER_KIND_SYSTEM_ROLE, + CPREPRO_HEADER_KIND_LOCAL_ROLE, +} cPreProHeaderRole; + +static roleDefinition CPREPROHeaderRoles [] = { + RoleTemplateSystem, + RoleTemplateLocal, +}; + + +typedef enum { + CPREPRO_MACRO, CPREPRO_HEADER, CPREPRO_PARAM, +} cPreProkind; + +static kindDefinition CPreProKinds [] = { + { true, 'd', "macro", "macro definitions", + .referenceOnly = false, ATTACH_ROLES(CPREPROMacroRoles)}, + { true, 'h', "header", "included header files", + .referenceOnly = true, ATTACH_ROLES(CPREPROHeaderRoles)}, + { false, 'D', "parameter", "macro parameters", }, +}; + +typedef enum { + F_MACRODEF, + COUNT_FIELD +} cPreProField; + +static fieldDefinition CPreProFields[COUNT_FIELD] = { + { .name = "macrodef", + .description = "macro definition", + .enabled = false }, +}; + +/* +* DATA DEFINITIONS +*/ + +static bool doesExaminCodeWithInIf0Branch; +static bool doesExpandMacros; + +/* +* CXX parser state. This is stored at the beginning of a conditional. +* If at the exit of the conditional the state is changed then we assume +* that no further branches should be followed. +*/ +static int externalParserBlockNestLevel; + + +/* Use brace formatting to detect end of block. + */ +static bool BraceFormat = false; + +void cppPushExternalParserBlock(void) +{ + externalParserBlockNestLevel++; +} + +void cppPopExternalParserBlock(void) +{ + externalParserBlockNestLevel--; +} + + +static cppState Cpp = { + .lang = LANG_IGNORE, + .clientLang = LANG_IGNORE, + .ungetBuffer = NULL, + .ungetBufferSize = 0, + .ungetPointer = NULL, + .ungetDataSize = 0, + .charOrStringContents = NULL, + .resolveRequired = false, + .hasAtLiteralStrings = false, + .hasCxxRawLiteralStrings = false, + .hasSingleQuoteLiteralNumbers = false, + .useClientLangDefineMacroKindIndex = false, + .defineMacroKindIndex = CPREPRO_MACRO, + .macroUndefRoleIndex = CPREPRO_MACRO_KIND_UNDEF_ROLE, + .useClientLangMacroParamKindIndex = false, + .macroParamKindIndex = CPREPRO_PARAM, + .useClientLangHeaderKindIndex = false, + .headerKindIndex = CPREPRO_HEADER, + .headerSystemRoleIndex = CPREPRO_HEADER_KIND_SYSTEM_ROLE, + .headerLocalRoleIndex = CPREPRO_HEADER_KIND_LOCAL_ROLE, + .macrodefFieldIndex = FIELD_UNKNOWN, + .directive = { + .state = DRCTV_NONE, + .accept = false, + .name = NULL, + .nestLevel = 0, + .ifdef = { + { + .ignoreAllBranches = false, + .singleBranch = false, + .branchChosen = false, + .ignoring = false, + } + } + } /* directive */ +}; + +/* +* FUNCTION DECLARATIONS +*/ + +static hashTable *makeMacroTable (void); +static cppMacroInfo * saveMacro(hashTable *table, const char * macro); + +/* +* FUNCTION DEFINITIONS +*/ + +extern bool cppIsBraceFormat (void) +{ + return BraceFormat; +} + +extern unsigned int cppGetDirectiveNestLevel (void) +{ + return Cpp.directive.nestLevel; +} + +static void cppInitCommon(langType clientLang, + const bool state, const bool hasAtLiteralStrings, + const bool hasCxxRawLiteralStrings, + const bool hasSingleQuoteLiteralNumbers, + int defineMacroKindIndex, + int macroUndefRoleIndex, + int macroParamKindIndex, + int headerKindIndex, + int headerSystemRoleIndex, int headerLocalRoleIndex, + int macrodefFieldIndex) +{ + BraceFormat = state; + + CXX_DEBUG_PRINT("cppInit: brace format is %d",BraceFormat); + + externalParserBlockNestLevel = 0; + + if (Cpp.lang == LANG_IGNORE) + { + langType t; + + t = getNamedLanguage ("CPreProcessor", 0); + initializeParser (t); + } + + Cpp.clientLang = clientLang; + Cpp.ungetBuffer = NULL; + Cpp.ungetPointer = NULL; + + CXX_DEBUG_ASSERT(!Cpp.charOrStringContents,"This string should be null when CPP is not initialized"); + Cpp.charOrStringContents = vStringNew(); + + Cpp.resolveRequired = false; + Cpp.hasAtLiteralStrings = hasAtLiteralStrings; + Cpp.hasCxxRawLiteralStrings = hasCxxRawLiteralStrings; + Cpp.hasSingleQuoteLiteralNumbers = hasSingleQuoteLiteralNumbers; + + if (defineMacroKindIndex != KIND_GHOST_INDEX) + { + Cpp.defineMacroKindIndex = defineMacroKindIndex; + Cpp.useClientLangDefineMacroKindIndex = true; + + Cpp.macroUndefRoleIndex = macroUndefRoleIndex; + Cpp.macrodefFieldIndex = macrodefFieldIndex; + } + else + { + Cpp.defineMacroKindIndex = CPREPRO_MACRO; + Cpp.useClientLangDefineMacroKindIndex = false; + + Cpp.macroUndefRoleIndex = CPREPRO_MACRO_KIND_UNDEF_ROLE; + Cpp.macrodefFieldIndex = CPreProFields [F_MACRODEF].ftype; + } + + if (macroParamKindIndex != KIND_GHOST_INDEX) + { + Cpp.macroParamKindIndex = macroParamKindIndex; + Cpp.useClientLangMacroParamKindIndex = true; + } + else + { + Cpp.macroParamKindIndex = CPREPRO_PARAM; + Cpp.useClientLangMacroParamKindIndex = false; + } + + if (headerKindIndex != KIND_GHOST_INDEX) + { + Cpp.headerKindIndex = headerKindIndex; + Cpp.useClientLangHeaderKindIndex = true; + + Cpp.headerSystemRoleIndex = headerSystemRoleIndex; + Cpp.headerLocalRoleIndex = headerLocalRoleIndex; + } + else + { + Cpp.headerKindIndex = CPREPRO_HEADER; + Cpp.useClientLangHeaderKindIndex = false; + + Cpp.headerSystemRoleIndex = CPREPRO_HEADER_KIND_SYSTEM_ROLE; + Cpp.headerLocalRoleIndex = CPREPRO_HEADER_KIND_LOCAL_ROLE; + } + + Cpp.directive.state = DRCTV_NONE; + Cpp.directive.accept = true; + Cpp.directive.nestLevel = 0; + + Cpp.directive.ifdef [0].ignoreAllBranches = false; + Cpp.directive.ifdef [0].singleBranch = false; + Cpp.directive.ifdef [0].branchChosen = false; + Cpp.directive.ifdef [0].ignoring = false; + + Cpp.directive.name = vStringNewOrClear (Cpp.directive.name); + + Cpp.macroInUse = NULL; + Cpp.fileMacroTable = + (doesExpandMacros + && isFieldEnabled (FIELD_SIGNATURE) + && isFieldEnabled (Cpp.macrodefFieldIndex) + && (getLanguageCorkUsage ((clientLang == LANG_IGNORE) + ? Cpp.lang + : clientLang) & CORK_SYMTAB)) + ? makeMacroTable () + : NULL; +} + +extern void cppInit (const bool state, const bool hasAtLiteralStrings, + const bool hasCxxRawLiteralStrings, + const bool hasSingleQuoteLiteralNumbers, + int defineMacroKindIndex, + int macroUndefRoleIndex, + int macroParamKindIndex, + int headerKindIndex, + int headerSystemRoleIndex, int headerLocalRoleIndex, + int macrodefFieldIndex) +{ + langType client = getInputLanguage (); + + cppInitCommon (client, state, hasAtLiteralStrings, + hasCxxRawLiteralStrings, hasSingleQuoteLiteralNumbers, + defineMacroKindIndex, macroUndefRoleIndex, macroParamKindIndex, + headerKindIndex, headerSystemRoleIndex, headerLocalRoleIndex, + macrodefFieldIndex); +} + +static void cppClearMacroInUse (cppMacroInfo **pM) +{ + for (cppMacroInfo *p = *pM; p; p = p->next) + { + CXX_DEBUG_PRINT("Macro <%p> clear useCount: %d -> 0", p, p->useCount); + p->useCount = 0; + } + *pM = NULL; +} + +extern void cppTerminate (void) +{ + if (Cpp.directive.name != NULL) + { + vStringDelete (Cpp.directive.name); + Cpp.directive.name = NULL; + } + + if(Cpp.ungetBuffer) + { + eFree(Cpp.ungetBuffer); + Cpp.ungetBuffer = NULL; + } + + if(Cpp.charOrStringContents) + { + vStringDelete(Cpp.charOrStringContents); + Cpp.charOrStringContents = NULL; + } + + Cpp.clientLang = LANG_IGNORE; + + cppClearMacroInUse (&Cpp.macroInUse); + + if (Cpp.fileMacroTable) + { + hashTableDelete (Cpp.fileMacroTable); + Cpp.fileMacroTable = NULL; + } +} + +extern void cppBeginStatement (void) +{ + Cpp.resolveRequired = true; +} + +extern void cppEndStatement (void) +{ + Cpp.resolveRequired = false; +} + +/* +* Scanning functions +* +* This section handles preprocessor directives. It strips out all +* directives and may emit a tag for #define directives. +*/ + +/* This puts a character back into the input queue for the input File. */ +extern void cppUngetc (const int c) +{ + if(!Cpp.ungetPointer) + { + // no unget data + if(!Cpp.ungetBuffer) + { + Cpp.ungetBuffer = (int *)eMalloc(8 * sizeof(int)); + Cpp.ungetBufferSize = 8; + } + Assert(Cpp.ungetBufferSize > 0); + Cpp.ungetPointer = Cpp.ungetBuffer + Cpp.ungetBufferSize - 1; + *(Cpp.ungetPointer) = c; + Cpp.ungetDataSize = 1; + return; + } + + // Already have some unget data in the buffer. Must prepend. + Assert(Cpp.ungetBuffer); + Assert(Cpp.ungetBufferSize > 0); + Assert(Cpp.ungetDataSize > 0); + Assert(Cpp.ungetPointer >= Cpp.ungetBuffer); + + if(Cpp.ungetPointer == Cpp.ungetBuffer) + { + Cpp.ungetBufferSize += 8; + int * tmp = (int *)eMalloc(Cpp.ungetBufferSize * sizeof(int)); + memcpy(tmp+8,Cpp.ungetPointer,Cpp.ungetDataSize * sizeof(int)); + eFree(Cpp.ungetBuffer); + Cpp.ungetBuffer = tmp; + Cpp.ungetPointer = tmp + 7; + } else { + Cpp.ungetPointer--; + } + + *(Cpp.ungetPointer) = c; + Cpp.ungetDataSize++; +} + +int cppUngetBufferSize() +{ + return Cpp.ungetBufferSize; +} + +/* This puts an entire string back into the input queue for the input File. */ +void cppUngetString(const char * string,int len) +{ + if(!string) + return; + if(len < 1) + return; + + if(!Cpp.ungetPointer) + { + // no unget data + if(!Cpp.ungetBuffer) + { + Cpp.ungetBufferSize = 8 + len; + Cpp.ungetBuffer = (int *)eMalloc(Cpp.ungetBufferSize * sizeof(int)); + } else if(Cpp.ungetBufferSize < len) + { + Cpp.ungetBufferSize = 8 + len; + Cpp.ungetBuffer = (int *)eRealloc(Cpp.ungetBuffer,Cpp.ungetBufferSize * sizeof(int)); + } + Cpp.ungetPointer = Cpp.ungetBuffer + Cpp.ungetBufferSize - len; + } else { + // Already have some unget data in the buffer. Must prepend. + Assert(Cpp.ungetBuffer); + Assert(Cpp.ungetBufferSize > 0); + Assert(Cpp.ungetDataSize > 0); + Assert(Cpp.ungetPointer >= Cpp.ungetBuffer); + + if(Cpp.ungetBufferSize < (Cpp.ungetDataSize + len)) + { + Cpp.ungetBufferSize = 8 + len + Cpp.ungetDataSize; + int * tmp = (int *)eMalloc(Cpp.ungetBufferSize * sizeof(int)); + memcpy(tmp + 8 + len,Cpp.ungetPointer,Cpp.ungetDataSize * sizeof(int)); + eFree(Cpp.ungetBuffer); + Cpp.ungetBuffer = tmp; + Cpp.ungetPointer = tmp + 8; + } else { + Cpp.ungetPointer -= len; + Assert(Cpp.ungetPointer >= Cpp.ungetBuffer); + } + } + + int * p = Cpp.ungetPointer; + const char * s = string; + const char * e = string + len; + + while(s < e) + *p++ = *s++; + + Cpp.ungetDataSize += len; +} + +extern void cppUngetStringBuiltByMacro(const char * string,int len, cppMacroInfo *macro) +{ + if (macro->useCount == 0) + { + cppMacroInfo *m = Cpp.macroInUse; + Cpp.macroInUse = macro; + macro->next = m; + } + macro->useCount++; + + CXX_DEBUG_PRINT("Macro <%p> increment useCount: %d->%d", macro, + (macro->useCount - 1), macro->useCount); + + cppUngetString (string, len); +} + +static int cppGetcFromUngetBufferOrFile(void) +{ + if(Cpp.ungetPointer) + { + Assert(Cpp.ungetBuffer); + Assert(Cpp.ungetBufferSize > 0); + Assert(Cpp.ungetDataSize > 0); + + int c = *(Cpp.ungetPointer); + Cpp.ungetDataSize--; + if(Cpp.ungetDataSize > 0) + Cpp.ungetPointer++; + else + Cpp.ungetPointer = NULL; + return c; + } + + if (Cpp.macroInUse) + cppClearMacroInUse (&Cpp.macroInUse); + return getcFromInputFile(); +} + + +/* Reads a directive, whose first character is given by "c", into "name". + */ +static bool readDirective (int c, char *const name, unsigned int maxLength) +{ + unsigned int i; + + for (i = 0 ; i < maxLength - 1 ; ++i) + { + if (i > 0) + { + c = cppGetcFromUngetBufferOrFile (); + if (c == EOF || ! isalpha (c)) + { + cppUngetc (c); + break; + } + } + name [i] = c; + } + name [i] = '\0'; /* null terminate */ + + return (bool) isspacetab (c); +} + +/* Reads an identifier, whose first character is given by "c", into "tag", + * together with the file location and corresponding line number. + */ +static void readIdentifier (int c, vString *const name) +{ + vStringClear (name); + do + { + vStringPut (name, c); + c = cppGetcFromUngetBufferOrFile (); + } while (c != EOF && cppIsident (c)); + cppUngetc (c); +} + +static void readFilename (int c, vString *const name) +{ + int c_end = (c == '<')? '>': '"'; + + vStringClear (name); + + while (c = cppGetcFromUngetBufferOrFile (), (c != EOF && c != c_end && c != '\n')) + vStringPut (name, c); +} + +static conditionalInfo *currentConditional (void) +{ + return &Cpp.directive.ifdef [Cpp.directive.nestLevel]; +} + +static bool isIgnore (void) +{ + return Cpp.directive.ifdef [Cpp.directive.nestLevel].ignoring; +} + +static bool setIgnore (const bool ignore) +{ + return Cpp.directive.ifdef [Cpp.directive.nestLevel].ignoring = ignore; +} + +static bool isIgnoreBranch (void) +{ + conditionalInfo *const ifdef = currentConditional (); + + /* Force a single branch if an incomplete statement is discovered + * en route. This may have allowed earlier branches containing complete + * statements to be followed, but we must follow no further branches. + */ + + /* + * CXX: Force a single branch if the external parser (cxx) block nest level at the beginning + * of this conditional is not equal to the current block nest level (at exit of the first branch). + * + * Follow both branches example: (same state at enter and exit) + * + * #if something + * xxxxx; + * #else + * yyyy; + * #endif + * + * Follow single branch example: (different block level at enter and exit) + * + * if { + * #if something + * } else x; + * #else + * } + * #endif + */ + + if ( + (Cpp.resolveRequired || (ifdef->enterExternalParserBlockNestLevel != externalParserBlockNestLevel)) && + (!BraceFormat) + ) + { + CXX_DEBUG_PRINT("Choosing single branch"); + ifdef->singleBranch = true; + } + + /* We will ignore this branch in the following cases: + * + * 1. We are ignoring all branches (conditional was within an ignored + * branch of the parent conditional) + * 2. A branch has already been chosen and either of: + * a. A statement was incomplete upon entering the conditional + * b. A statement is incomplete upon encountering a branch + */ + return (bool) (ifdef->ignoreAllBranches || + (ifdef->branchChosen && ifdef->singleBranch)); +} + +static void chooseBranch (void) +{ + if (! BraceFormat) + { + conditionalInfo *const ifdef = currentConditional (); + + ifdef->branchChosen = (bool) (ifdef->singleBranch || + Cpp.resolveRequired); + } +} + +/* Pushes one nesting level for an #if directive, indicating whether or not + * the branch should be ignored and whether a branch has already been chosen. + */ +static bool pushConditional (const bool firstBranchChosen) +{ + const bool ignoreAllBranches = isIgnore (); /* current ignore */ + bool ignoreBranch = false; + + if (Cpp.directive.nestLevel < (unsigned int) MaxCppNestingLevel - 1) + { + conditionalInfo *ifdef; + + ++Cpp.directive.nestLevel; + ifdef = currentConditional (); + + /* We take a snapshot of whether there is an incomplete statement in + * progress upon encountering the preprocessor conditional. If so, + * then we will flag that only a single branch of the conditional + * should be followed. + */ + ifdef->ignoreAllBranches = ignoreAllBranches; + ifdef->singleBranch = Cpp.resolveRequired; + ifdef->branchChosen = firstBranchChosen; + ifdef->ignoring = (bool) (ignoreAllBranches || ( + ! firstBranchChosen && ! BraceFormat && + (ifdef->singleBranch || !doesExaminCodeWithInIf0Branch))); + ifdef->enterExternalParserBlockNestLevel = externalParserBlockNestLevel; + ignoreBranch = ifdef->ignoring; + } + return ignoreBranch; +} + +/* Pops one nesting level for an #endif directive. + */ +static bool popConditional (void) +{ + if (Cpp.directive.nestLevel > 0) + --Cpp.directive.nestLevel; + + return isIgnore (); +} + +static bool doesCPreProRunAsStandaloneParser (int kind) +{ + if (kind == CPREPRO_HEADER) + return !Cpp.useClientLangDefineMacroKindIndex; + else if (kind == CPREPRO_MACRO) + return !Cpp.useClientLangHeaderKindIndex; + else if (kind == CPREPRO_PARAM) + return !Cpp.useClientLangMacroParamKindIndex; + else + { + AssertNotReached(); + return true; + } +} + +static int makeDefineTag (const char *const name, const char* const signature, bool undef) +{ + bool standing_alone = doesCPreProRunAsStandaloneParser(CPREPRO_MACRO); + langType lang = standing_alone ? Cpp.lang: Cpp.clientLang; + const bool isFileScope = (bool) (! isInputHeaderFile ()); + + if (!isLanguageEnabled (lang)) + return CORK_NIL; + + Assert (Cpp.defineMacroKindIndex != KIND_GHOST_INDEX); + + if (isFileScope && !isXtagEnabled(XTAG_FILE_SCOPE)) + return CORK_NIL; + + if (undef && (Cpp.macroUndefRoleIndex == ROLE_DEFINITION_INDEX)) + return CORK_NIL; + + if (! isLanguageKindEnabled (lang, + Cpp.defineMacroKindIndex)) + return CORK_NIL; + + if ( + /* condition for definition tag */ + (!undef) + || /* condition for reference tag */ + (undef && isXtagEnabled(XTAG_REFERENCE_TAGS) && + isLanguageRoleEnabled(lang, Cpp.defineMacroKindIndex, + Cpp.macroUndefRoleIndex))) + { + tagEntryInfo e; + int r; + + if (standing_alone) + pushLanguage (Cpp.lang); + + if (undef) + initRefTagEntry (&e, name, Cpp.defineMacroKindIndex, + Cpp.macroUndefRoleIndex); + else + initTagEntry (&e, name, Cpp.defineMacroKindIndex); + e.isFileScope = isFileScope; + if (isFileScope) + markTagExtraBit (&e, XTAG_FILE_SCOPE); + e.truncateLineAfterTag = true; + e.extensionFields.signature = signature; + + r = makeTagEntry (&e); + + if (standing_alone) + popLanguage (); + + return r; + } + return CORK_NIL; +} + +static void makeIncludeTag (const char *const name, bool systemHeader) +{ + bool standing_alone = doesCPreProRunAsStandaloneParser(CPREPRO_HEADER); + langType lang = standing_alone ? Cpp.lang: Cpp.clientLang; + tagEntryInfo e; + int role_index; + + if (!isLanguageEnabled (lang)) + return; + + Assert (Cpp.headerKindIndex != KIND_GHOST_INDEX); + + role_index = systemHeader? Cpp.headerSystemRoleIndex: Cpp.headerLocalRoleIndex; + if (role_index == ROLE_DEFINITION_INDEX) + return; + + if (!isXtagEnabled (XTAG_REFERENCE_TAGS)) + return; + + if (!isLanguageKindEnabled(lang, Cpp.headerKindIndex)) + return; + + if (isLanguageRoleEnabled(lang, Cpp.headerKindIndex, role_index)) + { + if (doesCPreProRunAsStandaloneParser (CPREPRO_HEADER)) + pushLanguage (Cpp.lang); + + initRefTagEntry (&e, name, Cpp.headerKindIndex, role_index); + e.isFileScope = false; + e.truncateLineAfterTag = true; + makeTagEntry (&e); + + if (doesCPreProRunAsStandaloneParser (CPREPRO_HEADER)) + popLanguage (); + } +} + +static void makeParamTag (vString *name, short nth, bool placeholder) +{ + bool standing_alone = doesCPreProRunAsStandaloneParser(CPREPRO_MACRO); + langType lang = standing_alone ? Cpp.lang: Cpp.clientLang; + + Assert (Cpp.macroParamKindIndex != KIND_GHOST_INDEX); + + int r; + pushLanguage (lang); + r = makeSimpleTag (name, Cpp.macroParamKindIndex); + popLanguage (); + + tagEntryInfo *e = getEntryInCorkQueue (r); + if (e) + { + e->extensionFields.nth = nth; + if (placeholder) + e->placeholder = 1; + } +} + +static void regenreateSignatureFromParameters (vString * buffer, int from, int to) +{ + vStringPut(buffer, '('); + for (int pindex = from; pindex < to; pindex++) + { + tagEntryInfo *e = getEntryInCorkQueue (pindex); + if (e && !isTagExtra (e)) + { + vStringCatS (buffer, e->name); + vStringPut (buffer, ','); + } + } + if (vStringLast (buffer) == ',') + vStringChop (buffer); + vStringPut (buffer, ')'); +} + +static void patchScopeFieldOfParameters(int from, int to, int parentIndex) +{ + for (int pindex = from; pindex < to; pindex++) + { + tagEntryInfo *e = getEntryInCorkQueue (pindex); + if (e) + e->extensionFields.scopeIndex = parentIndex; + } +} + +static int directiveDefine (const int c, bool undef) +{ + // FIXME: We could possibly handle the macros here! + // However we'd need a separate hash table for macros of the current file + // to avoid breaking the "global" ones. + + int r = CORK_NIL; + + if (cppIsident1 (c)) + { + readIdentifier (c, Cpp.directive.name); + if (! isIgnore ()) + { + unsigned long lineNumber = getInputLineNumber (); + MIOPos filePosition = getInputFilePosition (); + int p = cppGetcFromUngetBufferOrFile (); + short nth = 0; + + if (p == '(') + { + vString *param = vStringNew (); + int param_start = (int)countEntryInCorkQueue(); + do { + p = cppGetcFromUngetBufferOrFile (); + if (isalnum(p) || p == '_' || p == '$' + /* Handle variadic macros like (a,...) */ + || p == '.') + { + vStringPut (param, p); + continue; + } + + if (vStringLength (param) > 0) + { + makeParamTag (param, nth++, vStringChar(param, 0) == '.'); + vStringClear (param); + } + if (p == '\\') + cppGetcFromUngetBufferOrFile (); /* Throw away the next char */ + } while (p != ')' && p != EOF); + vStringDelete (param); + + int param_end = (int)countEntryInCorkQueue(); + if (p == ')') + { + vString *signature = vStringNew (); + regenreateSignatureFromParameters (signature, param_start, param_end); + r = makeDefineTag (vStringValue (Cpp.directive.name), vStringValue (signature), undef); + vStringDelete (signature); + } + else + r = makeDefineTag (vStringValue (Cpp.directive.name), NULL, undef); + + tagEntryInfo *e = getEntryInCorkQueue (r); + if (e) + { + e->lineNumber = lineNumber; + e->filePosition = filePosition; + patchScopeFieldOfParameters (param_start, param_end, r); + } + } + else + { + cppUngetc (p); + r = makeDefineTag (vStringValue (Cpp.directive.name), NULL, undef); + } + } + } + Cpp.directive.state = DRCTV_NONE; + + if (r != CORK_NIL && Cpp.fileMacroTable) + registerEntry (r); + return r; +} + +static void directiveUndef (const int c) +{ + if (isXtagEnabled (XTAG_REFERENCE_TAGS)) + { + directiveDefine (c, true); + } + else + { + Cpp.directive.state = DRCTV_NONE; + } +} + +static void directivePragma (int c) +{ + if (cppIsident1 (c)) + { + readIdentifier (c, Cpp.directive.name); + if (stringMatch (vStringValue (Cpp.directive.name), "weak")) + { + /* generate macro tag for weak name */ + do + { + c = cppGetcFromUngetBufferOrFile (); + } while (c == SPACE); + if (cppIsident1 (c)) + { + readIdentifier (c, Cpp.directive.name); + makeDefineTag (vStringValue (Cpp.directive.name), NULL, false); + } + } + } + Cpp.directive.state = DRCTV_NONE; +} + +static bool directiveIf (const int c) +{ + DebugStatement ( const bool ignore0 = isIgnore (); ) + const bool ignore = pushConditional ((bool) (c != '0')); + + Cpp.directive.state = DRCTV_NONE; + DebugStatement ( debugCppNest (true, Cpp.directive.nestLevel); + if (ignore != ignore0) debugCppIgnore (ignore); ) + + return ignore; +} + + +static void directiveInclude (const int c) +{ + if (c == '<' || c == '"') + { + readFilename (c, Cpp.directive.name); + if ((! isIgnore ()) && vStringLength (Cpp.directive.name)) + makeIncludeTag (vStringValue (Cpp.directive.name), + c == '<'); + } + Cpp.directive.state = DRCTV_NONE; +} + +static bool directiveHash (const int c) +{ + bool ignore = false; + char directive [MaxDirectiveName]; + DebugStatement ( const bool ignore0 = isIgnore (); ) + + readDirective (c, directive, MaxDirectiveName); + if (stringMatch (directive, "define")) + Cpp.directive.state = DRCTV_DEFINE; + else if (stringMatch (directive, "include")) + Cpp.directive.state = DRCTV_INCLUDE; + else if (stringMatch (directive, "undef")) + Cpp.directive.state = DRCTV_UNDEF; + else if (strncmp (directive, "if", (size_t) 2) == 0) + Cpp.directive.state = DRCTV_IF; + else if (stringMatch (directive, "elif") || + stringMatch (directive, "else")) + { + ignore = setIgnore (isIgnoreBranch ()); + CXX_DEBUG_PRINT("Found #elif or #else: ignore is %d",ignore); + if (! ignore && stringMatch (directive, "else")) + chooseBranch (); + Cpp.directive.state = DRCTV_NONE; + DebugStatement ( if (ignore != ignore0) debugCppIgnore (ignore); ) + } + else if (stringMatch (directive, "endif")) + { + DebugStatement ( debugCppNest (false, Cpp.directive.nestLevel); ) + ignore = popConditional (); + Cpp.directive.state = DRCTV_NONE; + DebugStatement ( if (ignore != ignore0) debugCppIgnore (ignore); ) + } + else if (stringMatch (directive, "pragma")) + Cpp.directive.state = DRCTV_PRAGMA; + else + Cpp.directive.state = DRCTV_NONE; + + return ignore; +} + +/* Handles a pre-processor directive whose first character is given by "c". + */ +static bool handleDirective (const int c, int *macroCorkIndex) +{ + bool ignore = isIgnore (); + + switch (Cpp.directive.state) + { + case DRCTV_NONE: ignore = isIgnore (); break; + case DRCTV_DEFINE: + *macroCorkIndex = directiveDefine (c, false); + break; + case DRCTV_HASH: ignore = directiveHash (c); break; + case DRCTV_IF: ignore = directiveIf (c); break; + case DRCTV_PRAGMA: directivePragma (c); break; + case DRCTV_UNDEF: directiveUndef (c); break; + case DRCTV_INCLUDE: directiveInclude (c); break; + } + return ignore; +} + +/* Called upon reading of a slash ('/') characters, determines whether a + * comment is encountered, and its type. + */ +static Comment isComment (void) +{ + Comment comment; + const int next = cppGetcFromUngetBufferOrFile (); + + if (next == '*') + comment = COMMENT_C; + else if (next == '/') + comment = COMMENT_CPLUS; + else if (next == '+') + comment = COMMENT_D; + else + { + cppUngetc (next); + comment = COMMENT_NONE; + } + return comment; +} + +/* Skips over a C style comment. According to ANSI specification a comment + * is treated as white space, so we perform this substitution. + */ +static int cppSkipOverCComment (void) +{ + int c = cppGetcFromUngetBufferOrFile (); + + while (c != EOF) + { + if (c != '*') + c = cppGetcFromUngetBufferOrFile (); + else + { + const int next = cppGetcFromUngetBufferOrFile (); + + if (next != '/') + c = next; + else + { + c = SPACE; /* replace comment with space */ + break; + } + } + } + return c; +} + +/* Skips over a C++ style comment. + */ +static int skipOverCplusComment (void) +{ + int c; + + while ((c = cppGetcFromUngetBufferOrFile ()) != EOF) + { + if (c == BACKSLASH) + cppGetcFromUngetBufferOrFile (); /* throw away next character, too */ + else if (c == NEWLINE) + break; + } + return c; +} + +/* Skips over a D style comment. + * Really we should match nested /+ comments. At least they're less common. + */ +static int skipOverDComment (void) +{ + int c = cppGetcFromUngetBufferOrFile (); + + while (c != EOF) + { + if (c != '+') + c = cppGetcFromUngetBufferOrFile (); + else + { + const int next = cppGetcFromUngetBufferOrFile (); + + if (next != '/') + c = next; + else + { + c = SPACE; /* replace comment with space */ + break; + } + } + } + return c; +} + +const vString * cppGetLastCharOrStringContents (void) +{ + CXX_DEBUG_ASSERT(Cpp.charOrStringContents,"Shouldn't be called when CPP is not initialized"); + return Cpp.charOrStringContents; +} + +/* Skips to the end of a string, returning a special character to + * symbolically represent a generic string. + */ +static int skipToEndOfString (bool ignoreBackslash) +{ + int c; + + vStringClear(Cpp.charOrStringContents); + + while ((c = cppGetcFromUngetBufferOrFile ()) != EOF) + { + if (c == BACKSLASH && ! ignoreBackslash) + { + vStringPutWithLimit (Cpp.charOrStringContents, c, 1024); + c = cppGetcFromUngetBufferOrFile (); /* throw away next character, too */ + if (c != EOF) + vStringPutWithLimit (Cpp.charOrStringContents, c, 1024); + } + else if (c == DOUBLE_QUOTE) + break; + else + vStringPutWithLimit (Cpp.charOrStringContents, c, 1024); + } + return STRING_SYMBOL; /* symbolic representation of string */ +} + +static int isCxxRawLiteralDelimiterChar (int c) +{ + return (c != ' ' && c != '\f' && c != '\n' && c != '\r' && c != '\t' && c != '\v' && + c != '(' && c != ')' && c != '\\'); +} + +static int skipToEndOfCxxRawLiteralString (void) +{ + int c = cppGetcFromUngetBufferOrFile (); + + if (c != '(' && ! isCxxRawLiteralDelimiterChar (c)) + { + cppUngetc (c); + c = skipToEndOfString (false); + } + else + { + char delim[16]; + unsigned int delimLen = 0; + bool collectDelim = true; + + do + { + if (collectDelim) + { + if (isCxxRawLiteralDelimiterChar (c) && + delimLen < (sizeof delim / sizeof *delim)) + delim[delimLen++] = c; + else + collectDelim = false; + } + else if (c == ')') + { + unsigned int i = 0; + + while ((c = cppGetcFromUngetBufferOrFile ()) != EOF && i < delimLen && delim[i] == c) + i++; + if (i == delimLen && c == DOUBLE_QUOTE) + break; + else + cppUngetc (c); + } + } + while ((c = cppGetcFromUngetBufferOrFile ()) != EOF); + c = STRING_SYMBOL; + } + return c; +} + +/* Skips to the end of the three (possibly four) 'c' sequence, returning a + * special character to symbolically represent a generic character. + * Also detects Vera numbers that include a base specifier (ie. 'b1010). + */ +static int skipToEndOfChar () +{ + int c; + int count = 0, veraBase = '\0'; + + vStringClear(Cpp.charOrStringContents); + + while ((c = cppGetcFromUngetBufferOrFile ()) != EOF) + { + ++count; + if (c == BACKSLASH) + { + vStringPutWithLimit (Cpp.charOrStringContents, c, 10); + c = cppGetcFromUngetBufferOrFile (); /* throw away next character, too */ + if (c != EOF) + vStringPutWithLimit (Cpp.charOrStringContents, c, 10); + } + else if (c == SINGLE_QUOTE) + break; + else if (c == NEWLINE) + { + cppUngetc (c); + break; + } + else if (Cpp.hasSingleQuoteLiteralNumbers) + { + if (count == 1 && strchr ("DHOB", toupper (c)) != NULL) + { + veraBase = c; + vStringPutWithLimit (Cpp.charOrStringContents, c, 10); + } + else if (veraBase != '\0' && ! isalnum (c)) + { + cppUngetc (c); + break; + } + else + vStringPutWithLimit (Cpp.charOrStringContents, c, 10); + } + else + vStringPutWithLimit (Cpp.charOrStringContents, c, 10); + } + return CHAR_SYMBOL; /* symbolic representation of character */ +} + +static void attachFields (int macroCorkIndex, unsigned long endLine, const char *macrodef) +{ + tagEntryInfo *tag = getEntryInCorkQueue (macroCorkIndex); + if (!tag) + return; + + tag->extensionFields.endLine = endLine; + if (macrodef) + attachParserFieldToCorkEntry (macroCorkIndex, Cpp.macrodefFieldIndex, macrodef); +} + + +/* This function returns the next character, stripping out comments, + * C pre-processor directives, and the contents of single and double + * quoted strings. In short, strip anything which places a burden upon + * the tokenizer. + */ +extern int cppGetc (void) +{ + bool directive = false; + bool ignore = false; + int c; + int macroCorkIndex = CORK_NIL; + vString *macrodef = NULL; + + + do { +start_loop: + c = cppGetcFromUngetBufferOrFile (); +process: + switch (c) + { + case EOF: + ignore = false; + directive = false; + if (macroCorkIndex != CORK_NIL) + { + attachFields (macroCorkIndex, + getInputLineNumber(), + macrodef? vStringValue (macrodef): NULL); + macroCorkIndex = CORK_NIL; + } + break; + + case TAB: + case SPACE: + if (macrodef && vStringLength (macrodef) > 0 + && vStringLast (macrodef) != ' ') + vStringPut (macrodef, ' '); + break; /* ignore most white space */ + + case NEWLINE: + if (directive && ! ignore) + { + directive = false; + if (macroCorkIndex != CORK_NIL) + { + attachFields (macroCorkIndex, + getInputLineNumber(), + macrodef? vStringValue (macrodef): NULL); + macroCorkIndex = CORK_NIL; + } + } + Cpp.directive.accept = true; + break; + + case DOUBLE_QUOTE: + if (Cpp.directive.state == DRCTV_INCLUDE) + goto enter; + else + { + Cpp.directive.accept = false; + c = skipToEndOfString (false); + } + + if (macrodef) + { + /* We record the contents of string literal. + * + */ + vStringPut (macrodef, '"'); + vStringCat (macrodef, Cpp.charOrStringContents); + vStringPut (macrodef, '"'); + } + + break; + + case '#': + if (Cpp.directive.accept) + { + directive = true; + Cpp.directive.state = DRCTV_HASH; + Cpp.directive.accept = false; + } + if (macrodef) + vStringPut (macrodef, '#'); + break; + + case SINGLE_QUOTE: + Cpp.directive.accept = false; + c = skipToEndOfChar (); + + /* We assume none may want to know the content of the + * literal; just put ''. */ + if (macrodef) + vStringCatS (macrodef, "''"); + + break; + + case '/': + { + const Comment comment = isComment (); + + if (comment == COMMENT_C) + c = cppSkipOverCComment (); + else if (comment == COMMENT_CPLUS) + { + c = skipOverCplusComment (); + if (c == NEWLINE) + cppUngetc (c); + } + else if (comment == COMMENT_D) + c = skipOverDComment (); + else + { + Cpp.directive.accept = false; + if (macrodef) + vStringPut (macrodef, '/'); + } + break; + } + + case BACKSLASH: + { + int next = cppGetcFromUngetBufferOrFile (); + + if (next == NEWLINE) + goto start_loop; + else + { + cppUngetc (next); + if (macrodef) + vStringPut (macrodef, '\\'); + } + break; + } + + case '?': + { + int next = cppGetcFromUngetBufferOrFile (); + if (next != '?') + { + cppUngetc (next); + if (macrodef) + vStringPut (macrodef, '?'); + } + else + { + next = cppGetcFromUngetBufferOrFile (); + switch (next) + { + case '(': c = '['; break; + case ')': c = ']'; break; + case '<': c = '{'; break; + case '>': c = '}'; break; + case '/': c = BACKSLASH; goto process; + case '!': c = '|'; break; + case SINGLE_QUOTE: c = '^'; break; + case '-': c = '~'; break; + case '=': c = '#'; goto process; + default: + cppUngetc ('?'); + cppUngetc (next); + break; + } + if (macrodef) + vStringPut (macrodef, c); + } + } break; + + /* digraphs: + * input: <: :> <% %> %: %:%: + * output: [ ] { } # ## + */ + case '<': + { + /* + Quoted from http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2011/n3237.html: + ------ + if the next three characters are <:: and the + subsequent character is neither : nor >, the < is + treated as a preprocessor token by itself (and not as + the first character of the alternative token */ + int next[3]; + next[0] = cppGetcFromUngetBufferOrFile (); + switch (next[0]) + { + case ':': + next[1] = cppGetcFromUngetBufferOrFile (); + if (next[1] == ':') + { + next[2] = cppGetcFromUngetBufferOrFile (); + if (! (next[2] == ':' || next[2] == '>')) + { + cppUngetc (next[2]); + cppUngetc (next[1]); + cppUngetc (next[0]); + c = '<'; + } + else + { + cppUngetc (next[2]); + cppUngetc (next[1]); + c = '['; + } + } + else + { + cppUngetc (next[1]); + c = '['; + } + break; + case '%': c = '{'; break; + default: cppUngetc (next[0]); + } + + if (macrodef) + vStringPut (macrodef, c); + + goto enter; + } + case ':': + { + int next = cppGetcFromUngetBufferOrFile (); + if (next == '>') + c = ']'; + else + cppUngetc (next); + + if (macrodef) + vStringPut (macrodef, c); + + goto enter; + } + case '%': + { + int next = cppGetcFromUngetBufferOrFile (); + switch (next) + { + case '>': c = '}'; break; + case ':': c = '#'; goto process; + default: cppUngetc (next); + } + + if (macrodef) + vStringPut (macrodef, c); + + goto enter; + } + + default: + if (c == '@' && Cpp.hasAtLiteralStrings) + { + int next = cppGetcFromUngetBufferOrFile (); + if (next == DOUBLE_QUOTE) + { + Cpp.directive.accept = false; + c = skipToEndOfString (true); + if (macrodef) + vStringCatS (macrodef, "@\"\""); + break; + } + else + { + cppUngetc (next); + if (macrodef) + vStringPut (macrodef, '@'); + } + } + else if (c == 'R' && Cpp.hasCxxRawLiteralStrings) + { + /* OMG!11 HACK!!11 Get the previous character. + * + * We need to know whether the previous character was an identifier or not, + * because "R" has to be on its own, not part of an identifier. This allows + * for constructs like: + * + * #define FOUR "4" + * const char *p = FOUR"5"; + * + * which is not a raw literal, but a preprocessor concatenation. + * + * FIXME: handle + * + * const char *p = R\ + * "xxx(raw)xxx"; + * + * which is perfectly valid (yet probably very unlikely). */ + int prev = getNthPrevCFromInputFile (1, '\0'); + int prev2 = getNthPrevCFromInputFile (2, '\0'); + int prev3 = getNthPrevCFromInputFile (3, '\0'); + + if (! cppIsident (prev) || + (! cppIsident (prev2) && (prev == 'L' || prev == 'u' || prev == 'U')) || + (! cppIsident (prev3) && (prev2 == 'u' && prev == '8'))) + { + int next = cppGetcFromUngetBufferOrFile (); + if (next != DOUBLE_QUOTE) + { + cppUngetc (next); + if (macrodef) + vStringPut (macrodef, 'R'); + } + else + { + Cpp.directive.accept = false; + c = skipToEndOfCxxRawLiteralString (); + + /* We assume none may want to know the content of the + * literal; just put "". */ + if (macrodef) + vStringCatS (macrodef, "\"\""); + + break; + } + } + else + { + if (macrodef) + vStringPut (macrodef, 'R'); + } + } + else if(isxdigit(c)) + { + /* Check for digit separator. If we find it we just skip it */ + int next = cppGetcFromUngetBufferOrFile(); + if(next != SINGLE_QUOTE) + cppUngetc(next); + if (macrodef) + vStringPut (macrodef, c); + + } + else + { + if (macrodef) + vStringPut (macrodef, c); + } + enter: + Cpp.directive.accept = false; + if (directive) + { + ignore = handleDirective (c, ¯oCorkIndex); + if (Cpp.macrodefFieldIndex != FIELD_UNKNOWN + && macroCorkIndex != CORK_NIL + && macrodef == NULL) + macrodef = vStringNew (); + } + break; + } + } while (directive || ignore); + + if (macrodef) + vStringDelete (macrodef); + + DebugStatement ( debugPutc (DEBUG_CPP, c); ) + DebugStatement ( if (c == NEWLINE) + debugPrintf (DEBUG_CPP, "%6ld: ", getInputLineNumber () + 1); ) + + return c; +} + +static void findCppTags (void) +{ + cppInitCommon (Cpp.lang, 0, false, false, false, + KIND_GHOST_INDEX, 0, KIND_GHOST_INDEX, + KIND_GHOST_INDEX, 0, 0, + FIELD_UNKNOWN); + + findRegexTagsMainloop (cppGetc); + + cppTerminate (); +} + + +/* + * Token ignore processing + */ + +static hashTable * cmdlineMacroTable; + + +static bool buildMacroInfoFromTagEntry (int corkIndex, + tagEntryInfo * entry, + void * data) +{ + cppMacroInfo **info = data; + + if (entry->langType == Cpp.clientLang + && entry->kindIndex == Cpp.defineMacroKindIndex + && isRoleAssigned (entry, ROLE_DEFINITION_INDEX)) + { + vString *macrodef = vStringNewInit (entry->name); + if (entry->extensionFields.signature) + vStringCatS (macrodef, entry->extensionFields.signature); + vStringPut (macrodef, '='); + + const char *val = getParserFieldValueForType (entry, Cpp.macrodefFieldIndex); + if (val) + vStringCatS (macrodef, val); + + *info = saveMacro (Cpp.fileMacroTable, vStringValue (macrodef)); + vStringDelete (macrodef); + + return false; + } + return true; +} + +extern cppMacroInfo * cppFindMacroFromSymtab (const char *const name) +{ + cppMacroInfo *info = NULL; + foreachEntriesInScope (CORK_NIL, name, buildMacroInfoFromTagEntry, &info); + + return info; +} + +/* Determines whether or not "name" should be ignored, per the ignore list. + */ +extern cppMacroInfo * cppFindMacro (const char *const name) +{ + cppMacroInfo *info; + + if (cmdlineMacroTable) + { + info = (cppMacroInfo *)hashTableGetItem (cmdlineMacroTable,(char *)name); + if (info) + return info; + } + + if (Cpp.fileMacroTable) + { + info = (cppMacroInfo *)hashTableGetItem (Cpp.fileMacroTable,(char *)name); + if (info) + return info; + + info = cppFindMacroFromSymtab(name); + if (info) + return info; + } + return NULL; +} + +extern vString * cppBuildMacroReplacement( + const cppMacroInfo * macro, + const char ** parameters, /* may be NULL */ + int parameterCount + ) +{ + if(!macro) + return NULL; + + if(!macro->replacements) + return NULL; + + vString * ret = vStringNew(); + + cppMacroReplacementPartInfo * r = macro->replacements; + + while(r) + { + if(r->parameterIndex < 0) + { + if(r->constant) + vStringCat(ret,r->constant); + } else { + if(parameters && (r->parameterIndex < parameterCount)) + { + if(r->flags & CPP_MACRO_REPLACEMENT_FLAG_STRINGIFY) + vStringPut(ret,'"'); + + vStringCatS(ret,parameters[r->parameterIndex]); + if(r->flags & CPP_MACRO_REPLACEMENT_FLAG_VARARGS) + { + int idx = r->parameterIndex + 1; + while(idx < parameterCount) + { + vStringPut(ret,','); + vStringCatS(ret,parameters[idx]); + idx++; + } + } + + if(r->flags & CPP_MACRO_REPLACEMENT_FLAG_STRINGIFY) + vStringPut(ret,'"'); + } + } + + r = r->next; + } + + return ret; +} + + +static void saveIgnoreToken(const char * ignoreToken) +{ + if(!ignoreToken) + return; + + Assert (cmdlineMacroTable); + + const char * c = ignoreToken; + char cc = *c; + + const char * tokenBegin = c; + const char * tokenEnd = NULL; + const char * replacement = NULL; + bool ignoreFollowingParenthesis = false; + + while(cc) + { + if(cc == '=') + { + if(!tokenEnd) + tokenEnd = c; + c++; + if(*c) + replacement = c; + break; + } + + if(cc == '+') + { + if(!tokenEnd) + tokenEnd = c; + ignoreFollowingParenthesis = true; + } + + c++; + cc = *c; + } + + if(!tokenEnd) + tokenEnd = c; + + if(tokenEnd <= tokenBegin) + return; + + cppMacroInfo * info = (cppMacroInfo *)eMalloc(sizeof(cppMacroInfo)); + + info->hasParameterList = ignoreFollowingParenthesis; + if(replacement) + { + cppMacroReplacementPartInfo * rep = \ + (cppMacroReplacementPartInfo *)eMalloc(sizeof(cppMacroReplacementPartInfo)); + rep->parameterIndex = -1; + rep->flags = 0; + rep->constant = vStringNewInit(replacement); + rep->next = NULL; + info->replacements = rep; + } else { + info->replacements = NULL; + } + info->useCount = 0; + info->next = NULL; + + hashTablePutItem(cmdlineMacroTable,eStrndup(tokenBegin,tokenEnd - tokenBegin),info); + + verbose (" ignore token: %s\n", ignoreToken); +} + +static cppMacroInfo * saveMacro(hashTable *table, const char * macro) +{ + CXX_DEBUG_ENTER_TEXT("Save macro %s",macro); + + if(!macro) + return NULL; + + Assert (table); + + const char * c = macro; + + // skip initial spaces + while(*c && isspacetab(*c)) + c++; + + if(!*c) + { + CXX_DEBUG_LEAVE_TEXT("Bad empty macro definition"); + return NULL; + } + + if(!(isalpha(*c) || (*c == '_' || (*c == '$') ))) + { + CXX_DEBUG_LEAVE_TEXT("Macro does not start with an alphanumeric character"); + return NULL; // must be a sequence of letters and digits + } + + const char * identifierBegin = c; + + while(*c && (isalnum(*c) || (*c == '_') || (*c == '$') )) + c++; + + const char * identifierEnd = c; + + CXX_DEBUG_PRINT("Macro identifier '%.*s'",identifierEnd - identifierBegin,identifierBegin); + +#define MAX_PARAMS 16 + + const char * paramBegin[MAX_PARAMS]; + const char * paramEnd[MAX_PARAMS]; + + int iParamCount = 0; + + while(*c && isspacetab(*c)) + c++; + + cppMacroInfo * info = (cppMacroInfo *)eMalloc(sizeof(cppMacroInfo)); + info->useCount = 0; + info->next = NULL; + + if(*c == '(') + { + // parameter list + CXX_DEBUG_PRINT("Macro has a parameter list"); + + info->hasParameterList = true; + + c++; + while(*c) + { + while(*c && isspacetab(*c)) + c++; + + if(*c && (*c != ',') && (*c != ')')) + { + paramBegin[iParamCount] = c; + c++; + while(*c && (*c != ',') && (*c != ')') && (!isspacetab(*c))) + c++; + paramEnd[iParamCount] = c; + + CXX_DEBUG_PRINT( + "Macro parameter %d '%.*s'", + iParamCount, + paramEnd[iParamCount] - paramBegin[iParamCount], + paramBegin[iParamCount] + ); + + iParamCount++; + if(iParamCount >= MAX_PARAMS) + break; + } + + while(*c && isspacetab(*c)) + c++; + + if(*c == ')') + break; + + if(*c == ',') + c++; + } + + while(*c && (*c != ')')) + c++; + + if(*c == ')') + c++; + + CXX_DEBUG_PRINT("Got %d parameters",iParamCount); + + } else { + info->hasParameterList = false; + } + + while(*c && isspacetab(*c)) + c++; + + info->replacements = NULL; + + + if(*c == '=') + { + CXX_DEBUG_PRINT("Macro has a replacement part"); + + // have replacement part + c++; + + cppMacroReplacementPartInfo * lastReplacement = NULL; + int nextParameterReplacementFlags = 0; + +#define ADD_REPLACEMENT_NEW_PART(part) \ + do { \ + if(lastReplacement) \ + lastReplacement->next = part; \ + else \ + info->replacements = part; \ + lastReplacement = part; \ + } while(0) + +#define ADD_CONSTANT_REPLACEMENT_NEW_PART(start,len) \ + do { \ + cppMacroReplacementPartInfo * rep = \ + (cppMacroReplacementPartInfo *)eMalloc(sizeof(cppMacroReplacementPartInfo)); \ + rep->parameterIndex = -1; \ + rep->flags = 0; \ + rep->constant = vStringNew(); \ + vStringNCatS(rep->constant,start,len); \ + rep->next = NULL; \ + CXX_DEBUG_PRINT("Constant replacement part: '%s'",vStringValue(rep->constant)); \ + ADD_REPLACEMENT_NEW_PART(rep); \ + } while(0) + +#define ADD_CONSTANT_REPLACEMENT(start,len) \ + do { \ + if(lastReplacement && (lastReplacement->parameterIndex == -1)) \ + { \ + vStringNCatS(lastReplacement->constant,start,len); \ + CXX_DEBUG_PRINT( \ + "Constant replacement part changed: '%s'", \ + vStringValue(lastReplacement->constant) \ + ); \ + } else { \ + ADD_CONSTANT_REPLACEMENT_NEW_PART(start,len); \ + } \ + } while(0) + + // parse replacements + const char * begin = c; + + while(*c) + { + if(isalpha(*c) || (*c == '_')) + { + if(c > begin) + ADD_CONSTANT_REPLACEMENT(begin,c - begin); + + const char * tokenBegin = c; + + while(*c && (isalnum(*c) || (*c == '_'))) + c++; + + // check if it is a parameter + int tokenLen = c - tokenBegin; + + CXX_DEBUG_PRINT("Check token '%.*s'",tokenLen,tokenBegin); + + bool bIsVarArg = (tokenLen == 11) && (strncmp(tokenBegin,"__VA_ARGS__",11) == 0); + + int i = 0; + for(;iparameterIndex = i; + rep->flags = nextParameterReplacementFlags | + (bIsVarArg ? CPP_MACRO_REPLACEMENT_FLAG_VARARGS : 0); + rep->constant = NULL; + rep->next = NULL; + + nextParameterReplacementFlags = 0; + + CXX_DEBUG_PRINT("Parameter replacement part: %d (vararg %d)",i,bIsVarArg); + + ADD_REPLACEMENT_NEW_PART(rep); + break; + } + } + + if(i >= iParamCount) + { + // no parameter found + ADD_CONSTANT_REPLACEMENT(tokenBegin,tokenLen); + } + + begin = c; + continue; + } + + if((*c == '"') || (*c == '\'')) + { + // skip string/char constant + char term = *c; + c++; + while(*c) + { + if(*c == '\\') + { + c++; + if(*c) + c++; + } else if(*c == term) + { + c++; + break; + } + c++; + } + continue; + } + + if(*c == '#') + { + // check for token paste/stringification + if(c > begin) + ADD_CONSTANT_REPLACEMENT(begin,c - begin); + + c++; + if(*c == '#') + { + // token paste + CXX_DEBUG_PRINT("Found token paste operator"); + while(*c == '#') + c++; + + // we just skip this part and the following spaces + while(*c && isspacetab(*c)) + c++; + + if(lastReplacement && (lastReplacement->parameterIndex == -1)) + { + // trim spaces from the last replacement constant! + vStringStripTrailing(lastReplacement->constant); + CXX_DEBUG_PRINT( + "Last replacement truncated to '%s'", + vStringValue(lastReplacement->constant) + ); + } + } else { + // stringification + CXX_DEBUG_PRINT("Found stringification operator"); + nextParameterReplacementFlags |= CPP_MACRO_REPLACEMENT_FLAG_STRINGIFY; + } + + begin = c; + continue; + } + + c++; + } + + if(c > begin) + ADD_CONSTANT_REPLACEMENT(begin,c - begin); + } + + hashTablePutItem(table,eStrndup(identifierBegin,identifierEnd - identifierBegin),info); + CXX_DEBUG_LEAVE(); + + return info; +} + +static void freeMacroInfo(cppMacroInfo * info) +{ + if(!info) + return; + cppMacroReplacementPartInfo * pPart = info->replacements; + while(pPart) + { + if(pPart->constant) + vStringDelete(pPart->constant); + cppMacroReplacementPartInfo * pPartToDelete = pPart; + pPart = pPart->next; + eFree(pPartToDelete); + } + eFree(info); +} + +static hashTable *makeMacroTable (void) +{ + return hashTableNew( + 1024, + hashCstrhash, + hashCstreq, + eFree, + (void (*)(void *))freeMacroInfo + ); +} + +static void initializeCpp (const langType language) +{ + Cpp.lang = language; +} + +static void finalizeCpp (const langType language, bool initialized) +{ + if (cmdlineMacroTable) + { + hashTableDelete (cmdlineMacroTable); + cmdlineMacroTable = NULL; + } +} + +static void CpreProExpandMacrosInInput (const langType language CTAGS_ATTR_UNUSED, const char *name, const char *arg) +{ + doesExpandMacros = paramParserBool (arg, doesExpandMacros, + name, "parameter"); +} + +static void CpreProInstallIgnoreToken (const langType language CTAGS_ATTR_UNUSED, const char *optname CTAGS_ATTR_UNUSED, const char *arg) +{ + if (arg == NULL || arg[0] == '\0') + { + if (cmdlineMacroTable) + { + hashTableDelete(cmdlineMacroTable); + cmdlineMacroTable = NULL; + } + verbose (" clearing list\n"); + } else { + if (!cmdlineMacroTable) + cmdlineMacroTable = makeMacroTable (); + saveIgnoreToken(arg); + } +} + +static void CpreProInstallMacroToken (const langType language CTAGS_ATTR_UNUSED, const char *optname CTAGS_ATTR_UNUSED, const char *arg) +{ + if (arg == NULL || arg[0] == '\0') + { + if (cmdlineMacroTable) + { + hashTableDelete(cmdlineMacroTable); + cmdlineMacroTable = NULL; + } + verbose (" clearing list\n"); + } else { + if (!cmdlineMacroTable) + cmdlineMacroTable = makeMacroTable (); + saveMacro(cmdlineMacroTable, arg); + } +} + +static void CpreProSetIf0 (const langType language CTAGS_ATTR_UNUSED, const char *name, const char *arg) +{ + doesExaminCodeWithInIf0Branch = paramParserBool (arg, doesExaminCodeWithInIf0Branch, + name, "parameter"); +} + +static parameterHandlerTable CpreProParameterHandlerTable [] = { + { .name = "if0", + .desc = "examine code within \"#if 0\" branch (true or [false])", + .handleParameter = CpreProSetIf0, + }, + { .name = "ignore", + .desc = "a token to be specially handled", + .handleParameter = CpreProInstallIgnoreToken, + }, + { .name = "define", + .desc = "define replacement for an identifier (name(params,...)=definition)", + .handleParameter = CpreProInstallMacroToken, + }, + { .name = "_expand", + .desc = "expand macros if their definitions are in the current C/C++/CUDA input file (true or [false])", + .handleParameter = CpreProExpandMacrosInInput, + } +}; + +extern parserDefinition* CPreProParser (void) +{ + parserDefinition* const def = parserNew ("CPreProcessor"); + def->kindTable = CPreProKinds; + def->kindCount = ARRAY_SIZE (CPreProKinds); + def->initialize = initializeCpp; + def->parser = findCppTags; + def->finalize = finalizeCpp; + + def->fieldTable = CPreProFields; + def->fieldCount = ARRAY_SIZE (CPreProFields); + + def->parameterHandlerTable = CpreProParameterHandlerTable; + def->parameterHandlerCount = ARRAY_SIZE(CpreProParameterHandlerTable); + + def->useCork = CORK_QUEUE | CORK_SYMTAB; + return def; +} diff --git a/ctags/parsers/cpreprocessor.h b/ctags/parsers/cpreprocessor.h new file mode 100644 index 0000000000..fe031bd21a --- /dev/null +++ b/ctags/parsers/cpreprocessor.h @@ -0,0 +1,137 @@ +/* +* Copyright (c) 1998-2002, Darren Hiebert +* +* This source code is released for free distribution under the terms of the +* GNU General Public License version 2 or (at your option) any later version. +* +* External interface to get.c +*/ +#ifndef CTAGS_MAIN_GET_H +#define CTAGS_MAIN_GET_H + +/* +* INCLUDE FILES +*/ +#include "general.h" /* must always come first */ +#include "types.h" +#include "vstring.h" + +/* +* MACROS +*/ + +/* + * cppIs... macros are for the value returned from cppGetc(). Don't + * use "char" value. Don't pass a value stored to C-string + * (char*... or char[]) or vString. + * + * cppGetc() can return the value out of range of unsigned char. + * cppGetc calls skipToEndOfString() and skipToEndOfString() internally. + * They return STRING_SYMBOL (== 338) and CHAR_SYMBOL (== 322) in a + * case. (cppGetc() can return EOF (== -1). However, it is not an issue + * here.) + * + * is...() macros/functions defined in ctype.h can handle the value of + * an unsigned char or EOF; we cannot pass STRING_SYMBOL or CHAR_SYMBOL + * returned from cppGetc(). + * + * Depending on the platform, isalpha(338) returns different value. + * As far as Fedora22, it returns 0. On Windows 2010, it returns 1. + * + * So, we need cppIs... macros. + * cppIs... macros considers STRING_SYMBOL and CHAR_SYMBOL */ + +#define cppIsascii(c) ((c >= 0) && (c < 0x80)) +/* isascii is not portable enough. */ + +/* Is the character valid as a character of a C identifier? + * VMS allows '$' in identifiers. + */ +#define cppIsalnum(c) (cppIsascii(c) && isalnum(c)) +#define cppIsident(c) (cppIsalnum(c) \ + || (c) == '_' || (c) == '$') + +/* Is the character valid as the first character of a C identifier? + * C++ allows '~' in destructors. + * VMS allows '$' in identifiers. + */ +#define cppIsalpha(c) (cppIsascii(c) && isalpha(c)) +#define cppIsident1(c) (cppIsalpha(c) \ + || (c) == '_' || (c) == '~' || (c) == '$') + +#define cppIsspace(c) (cppIsascii(c) && isspace(c)) +#define cppIsdigit(c) (cppIsascii(c) && isdigit(c)) + + +#define RoleTemplateUndef { true, "undef", "undefined" } + +#define RoleTemplateSystem { true, "system", "system header" } +#define RoleTemplateLocal { true, "local", "local header" } + +/* +* FUNCTION PROTOTYPES +*/ +extern bool cppIsBraceFormat (void); +extern unsigned int cppGetDirectiveNestLevel (void); + +/* Don't forget to set useCort true in your parser. + * The corkQueue is needed to capture macro parameters. + */ +extern void cppInit (const bool state, + const bool hasAtLiteralStrings, + const bool hasCxxRawLiteralStrings, + const bool hasSingleQuoteLiteralNumbers, + int defineMacroKindIndex, + int macroUndefRoleIndex, + int headerKindIndex, + int headerSystemRoleIndex, int headerLocalRoleIndex, + int macroParamKindIndex, + int macrodefFieldIndex); + +extern void cppTerminate (void); +extern void cppBeginStatement (void); +extern void cppEndStatement (void); +extern void cppUngetc (const int c); +extern int cppUngetBufferSize(); +extern void cppUngetString(const char * string,int len); +extern int cppGetc (void); +extern const vString * cppGetLastCharOrStringContents (void); + +/* Notify the external parser state for the purpose of conditional + * branch choice. The CXX parser stores the block level here. */ +extern void cppPushExternalParserBlock(void); +extern void cppPopExternalParserBlock(void); + +#define CPP_MACRO_REPLACEMENT_FLAG_VARARGS 1 +#define CPP_MACRO_REPLACEMENT_FLAG_STRINGIFY 2 + +typedef struct sCppMacroReplacementPartInfo { + int parameterIndex; /* -1 if this part is a constant */ + int flags; + vString * constant; /* not NULL only if parameterIndex != -1 */ + struct sCppMacroReplacementPartInfo * next; +} cppMacroReplacementPartInfo; + +typedef struct sCppMacroInfo { + bool hasParameterList; /* true if the macro has a trailing () */ + cppMacroReplacementPartInfo * replacements; + int useCount; + struct sCppMacroInfo * next; +} cppMacroInfo; + +extern cppMacroInfo * cppFindMacro (const char *const name); +extern void cppUngetStringBuiltByMacro (const char * string,int len, cppMacroInfo *macro); + +/* +* Build a replacement string for the specified macro. +* If the macro has parameters, they will be used. +* Parameters not found in the list will be assumed to be empty. +* May return NULL or equivalently an empty replacement string. +*/ +extern vString * cppBuildMacroReplacement( + const cppMacroInfo * macro, + const char ** parameters, /* may be NULL */ + int parameterCount + ); + +#endif /* CTAGS_MAIN_GET_H */ diff --git a/ctags/parsers/cxx/cxx.c b/ctags/parsers/cxx/cxx.c new file mode 100644 index 0000000000..a96d756a80 --- /dev/null +++ b/ctags/parsers/cxx/cxx.c @@ -0,0 +1,163 @@ +/* +* Copyright (c) 2016, Szymon Tomasz Stefanek +* +* This source code is released for free distribution under the terms of the +* GNU General Public License version 2 or (at your option) any later version. +* +* This module contains functions for parsing and scanning C++ source files +*/ + +#include "general.h" + +#include "cxx_debug.h" +#include "cxx_keyword.h" +#include "cxx_token.h" +#include "cxx_token_chain.h" +#include "cxx_parser.h" +#include "cxx_scope.h" +#include "cxx_tag.h" + +#include "dependency.h" +#include "selectors.h" + +// +// ---------------------------------------------------------------------------- +// Assumptions. +// ---------------------------------------------------------------------------- +// +// - Parsing C/C++ is hard. Parsing C/C++ correctly without includes and +// without a complete preprocessor is close to impossible in the general +// case. Also ctags is not a compiler. This means that our parser must be +// a "guessing" parser. It's hopeless to try to decode the syntax of the +// language down to the last bit. +// +// - The input may contain syntax errors. This is because we don't have a full +// preprocessor and also because ctags is often used "online" in editors, +// while the user is typing. ctags should be tolerant and try to do its best +// even with syntax errors but: +// - Syntax errors that break the scope hierarchy should be detected and tag +// emission should probably be stopped. Correct tags in a broken hierarchy +// are useless (well, unless the hierarchy itself is ignored by the ctags +// user). +// - CTags should try to avoid emitting tags which involve syntax errors +// +// - There will always be pathologic cases. Don't cry, live with it. +// +// ---------------------------------------------------------------------------- +// TODO LIST +// ---------------------------------------------------------------------------- +// +// - In case of simple syntax error try to recover: +// Skip to the next ; without entering or exiting scopes. +// If this can be done then recovery is feasible. +// - Extension of each block/scope. +// - Unnamed blocks/scopes? +// - Handle syntax errors: +// - If a special switch is used then stop on detecting a syntax error +// (this is useful for code editors that frequently update tags for +// single files) +// - If the switch is not used then do NOT emit tags for a file on a syntax +// error [but do not stop execution of the whole program and continue on +// other files] +// For this purpose: +// - Do not emit tags until the end of the file, if scopes do not match we +// either screwed up something or the programmer did +// Maybe the cork api can be used for this? +// +// Handle variable declarations inside things like while() foreach() FOR() etc.. +// +// - Friend classes. +// - Template parameters as field +// - Template specialisations (another field?) +// - Forward declarations might become tags + + +parserDefinition * CParser (void) +{ + static const char * const extensions [] = + { + "c", + NULL + }; + + static selectLanguage selectors[] = { selectByObjectiveCKeywords, NULL }; + + parserDefinition* def = parserNew("C"); + + def->kindTable = cxxTagGetCKindDefinitions(); + def->kindCount = cxxTagGetCKindDefinitionCount(); + def->fieldTable = cxxTagGetCFieldDefinitionifiers(); + def->fieldCount = cxxTagGetCFieldDefinitionifierCount(); + def->extensions = extensions; + def->parser2 = cxxCParserMain; + def->initialize = cxxCParserInitialize; + def->finalize = cxxParserCleanup; + def->selectLanguage = selectors; + def->useCork = CORK_QUEUE|CORK_SYMTAB; // We use corking to block output until the end of file + + return def; +} + +parserDefinition * CppParser (void) +{ + static const char * const extensions [] = + { + "c++", "cc", "cp", "cpp", "cxx", + "h", "h++", "hh", "hp", "hpp", "hxx", "inl", +#ifndef CASE_INSENSITIVE_FILENAMES + "C", "H", "CPP", "CXX", +#endif + NULL + }; + static parserDependency dependencies [] = { + { DEPTYPE_KIND_OWNER, "C" }, + }; + + static selectLanguage selectors[] = { selectByObjectiveCKeywords, NULL }; + + parserDefinition* def = parserNew("C++"); + + def->dependencies = dependencies; + def->dependencyCount = ARRAY_SIZE (dependencies); + def->kindTable = cxxTagGetCPPKindDefinitions(); + def->kindCount = cxxTagGetCPPKindDefinitionCount(); + def->fieldTable = cxxTagGetCPPFieldDefinitionifiers(); + def->fieldCount = cxxTagGetCPPFieldDefinitionifierCount(); + def->extensions = extensions; + def->parser2 = cxxCppParserMain; + def->initialize = cxxCppParserInitialize; + def->finalize = cxxParserCleanup; + def->selectLanguage = selectors; + def->useCork = CORK_QUEUE|CORK_SYMTAB; // We use corking to block output until the end of file + + return def; +} + +parserDefinition * CUDAParser (void) +{ + static const char * const extensions [] = + { + "cu", "cuh", + NULL + }; + static parserDependency dependencies [] = { + { DEPTYPE_KIND_OWNER, "C" }, + }; + + parserDefinition* def = parserNew("CUDA"); + + def->dependencies = dependencies; + def->dependencyCount = ARRAY_SIZE (dependencies); + def->kindTable = cxxTagGetCUDAKindDefinitions(); + def->kindCount = cxxTagGetCUDAKindDefinitionCount(); + def->fieldTable = cxxTagGetCUDAFieldDefinitionifiers(); + def->fieldCount = cxxTagGetCUDAFieldDefinitionifierCount(); + def->extensions = extensions; + def->parser2 = cxxCUDAParserMain; + def->initialize = cxxCUDAParserInitialize; + def->finalize = cxxParserCleanup; + def->selectLanguage = NULL; + def->useCork = CORK_QUEUE|CORK_SYMTAB; // We use corking to block output until the end of file + + return def; +} diff --git a/ctags/parsers/cxx/cxx_debug.c b/ctags/parsers/cxx/cxx_debug.c new file mode 100644 index 0000000000..c6ec096554 --- /dev/null +++ b/ctags/parsers/cxx/cxx_debug.c @@ -0,0 +1,182 @@ +/* +* Copyright (c) 2016, Szymon Tomasz Stefanek +* +* This source code is released for free distribution under the terms of the +* GNU General Public License version 2 or (at your option) any later version. +* +* This module contains functions for parsing and scanning C++ source files +*/ + +#include "cxx_debug.h" + +#ifdef CXX_DO_DEBUGGING + +#include "trashbox.h" +#include "cxx_parser_internal.h" +#include "cxx_scope.h" + +static void cxxDebugDumpToken0 (CXXToken *pToken, + struct circularRefChecker *pTokenChecker, + struct circularRefChecker *pChainChecker, + bool top_level); + +static void cxxDebugDumpChain0 (CXXTokenChain *pChain, + struct circularRefChecker *pTokenChecker, + struct circularRefChecker *pChainChecker, + bool top_level) +{ + int backref; + + if (top_level) + { + debugIndent (); + fprintf (stderr, "\n", backref); + return; + } + + backref = circularRefCheckerGetCurrent (pChainChecker); + + fprintf (stderr, "[%d %p&C#%d]\n", pChain->iCount, pChain, backref); + + debugInc(); + debugIndent (); + cxxDebugDumpToken0 (pChain->pHead, pTokenChecker, pChainChecker, false); + debugDec(); + + debugIndent (); + fprintf (stderr, ">\n"); +} + +static void cxxDebugDumpToken0 (CXXToken *pToken, + struct circularRefChecker *pTokenChecker, + struct circularRefChecker *pChainChecker, + bool top_level) +{ + int backref; + + if (top_level) + { + debugIndent (); + fprintf (stderr, "\n", backref); + return; + } + + backref = circularRefCheckerGetCurrent (pTokenChecker); + + fprintf (stderr, "\"%s\": [%s %p &T#%d]\n", + vStringValue (pToken->pszWord), + cxxDebugTypeDecode (pToken->eType), pToken, backref); + + debugIndent (); + fprintf (stderr, " chain: "); + debugInc(); + cxxDebugDumpChain0 (pToken->pChain, pTokenChecker, pTokenChecker, false); + debugDec(); + + debugIndent (); + fprintf (stderr, " next: "); + debugInc(); + cxxDebugDumpToken0 (pToken->pNext, pTokenChecker, pTokenChecker, false); + debugDec(); + + debugIndent (); + fprintf (stderr, " prev: "); + debugInc(); + cxxDebugDumpToken0 (pToken->pPrev, pTokenChecker, pTokenChecker, false); + debugDec(); + + debugIndent (); + fprintf (stderr, ">\n"); +} + +typedef void (* cxxDebugDumpCommonFunc)(void *, + struct circularRefChecker *, + struct circularRefChecker *, + bool); +void cxxDebugDumpCommon (void *data, + void (* func)(void *, + struct circularRefChecker *, + struct circularRefChecker *, + bool)) +{ + static struct circularRefChecker *pTokenChecker; + static struct circularRefChecker *pChainChecker; + + if (!pTokenChecker) + { + pTokenChecker = circularRefCheckerNew(); + DEFAULT_TRASH_BOX(pTokenChecker, (TrashBoxDestroyItemProc)circularRefCheckerDestroy); + } + + if (!pChainChecker) + { + pChainChecker = circularRefCheckerNew(); + DEFAULT_TRASH_BOX(pChainChecker, (TrashBoxDestroyItemProc)circularRefCheckerDestroy); + } + + func(data, pTokenChecker, pChainChecker, true); + + circularRefCheckClear (pTokenChecker); + circularRefCheckClear (pChainChecker); +} + +void cxxDebugDumpToken (CXXToken *pToken) +{ + cxxDebugDumpCommon (pToken, (cxxDebugDumpCommonFunc)cxxDebugDumpToken0); +} + +void cxxDebugDumpChain (CXXTokenChain *pChain) +{ + cxxDebugDumpCommon (pChain, (cxxDebugDumpCommonFunc)cxxDebugDumpChain0); +} + +const char* cxxDebugScopeDecode(enum CXXScopeType scope) +{ + const char * table[] = { + [CXXScopeTypeFunction] = "function", + [CXXScopeTypeNamespace] = "namespace", + [CXXScopeTypeClass] = "class", + [CXXScopeTypeEnum] = "enum", + [CXXScopeTypeUnion] = "union", + [CXXScopeTypeStruct] = "struct", + [CXXScopeTypeVariable] = "variable", + [CXXScopeTypePrototype] = "prototype", + [CXXScopeTypeTypedef] = "typedef", + }; + if (CXXScopeTypeLAST > scope) + return table[scope]; + else + return NULL; +} + +#endif diff --git a/ctags/parsers/cxx/cxx_debug.h b/ctags/parsers/cxx/cxx_debug.h new file mode 100644 index 0000000000..142b52ec43 --- /dev/null +++ b/ctags/parsers/cxx/cxx_debug.h @@ -0,0 +1,63 @@ +#ifndef ctags_cxx_debug_h_ +#define ctags_cxx_debug_h_ +/* +* Copyright (c) 2016, Szymon Tomasz Stefanek +* +* This source code is released for free distribution under the terms of the +* GNU General Public License version 2 or (at your option) any later version. +* +* This module contains functions for parsing and scanning C++ source files +*/ + +#include "general.h" +#include "debug.h" +#include "trace.h" +#include "cxx_token.h" +#include "cxx_scope.h" + +#if defined(DO_TRACING) + #define CXX_DO_DEBUGGING +#endif + +#ifdef CXX_DO_DEBUGGING + +const char* cxxDebugTypeDecode(enum CXXTokenType); +void cxxDebugDumpToken (CXXToken *pToken); +void cxxDebugDumpChain (CXXTokenChain *pChain); +const char* cxxDebugScopeDecode(enum CXXScopeType); + +#define CXX_DEBUG_ENTER() TRACE_ENTER() +#define CXX_DEBUG_LEAVE() TRACE_LEAVE() + +#define CXX_DEBUG_ENTER_TEXT(_szFormat,...) \ + TRACE_ENTER_TEXT(_szFormat,## __VA_ARGS__) + +#define CXX_DEBUG_LEAVE_TEXT(_szFormat,...) \ + TRACE_LEAVE_TEXT(_szFormat,## __VA_ARGS__) + +#define CXX_DEBUG_PRINT(_szFormat,...) \ + TRACE_PRINT(_szFormat,## __VA_ARGS__) + +#define CXX_DEBUG_ASSERT(_condition,_szFormat,...) \ + TRACE_ASSERT(_condition,_szFormat,## __VA_ARGS__) + +#define CXX_DEBUG_TOKEN(T) cxxDebugDumpToken(T) +#define CXX_DEBUG_CHAIN(C) cxxDebugDumpChain(C) +#else //!CXX_DO_DEBUGGING + +#define CXX_DEBUG_ENTER() do { } while(0) +#define CXX_DEBUG_LEAVE() do { } while(0) + +#define CXX_DEBUG_ENTER_TEXT(_szFormat,...) do { } while(0) +#define CXX_DEBUG_LEAVE_TEXT(_szFormat,...) do { } while(0) + +#define CXX_DEBUG_PRINT(_szFormat,...) do { } while(0) + +#define CXX_DEBUG_ASSERT(_condition,_szFormat,...) do { } while(0) + +#define CXX_DEBUG_TOKEN(T) do { } while(0) +#define CXX_DEBUG_CHAIN(T) do { } while(0) +#endif //!CXX_DO_DEBUGGING + + +#endif //!ctags_cxx_debug_h_ diff --git a/ctags/parsers/cxx/cxx_debug_type.c b/ctags/parsers/cxx/cxx_debug_type.c new file mode 100644 index 0000000000..bfca7a2f5a --- /dev/null +++ b/ctags/parsers/cxx/cxx_debug_type.c @@ -0,0 +1,54 @@ +/* Automatically generated by misc/gencxxtypedumper.sh */ + +#include "cxx_token.h" +#include "cxx_debug.h" + +#ifdef CXX_DO_DEBUGGING +static bool append(vString *buf, const char *str, bool appended) +{ + if (appended) vStringPut(buf, ' '); + vStringCatS (buf, str); + return true; +} + +const char * cxxDebugTypeDecode (enum CXXTokenType eType) +{ + bool a = false; + static vString *buf; + buf = vStringNewOrClearWithAutoRelease (buf); + + if (eType & CXXTokenTypeEOF) a = append (buf, "EOF", a); + if (eType & CXXTokenTypeIdentifier) a = append (buf, "Identifier", a); + if (eType & CXXTokenTypeKeyword) a = append (buf, "Keyword", a); + if (eType & CXXTokenTypeNumber) a = append (buf, "Number", a); + if (eType & CXXTokenTypeSingleColon) a = append (buf, "SingleColon", a); + if (eType & CXXTokenTypeMultipleColons) a = append (buf, "MultipleColons", a); + if (eType & CXXTokenTypeSemicolon) a = append (buf, "Semicolon", a); + if (eType & CXXTokenTypeComma) a = append (buf, "Comma", a); + if (eType & CXXTokenTypeAssignment) a = append (buf, "Assignment", a); + if (eType & CXXTokenTypeOperator) a = append (buf, "Operator", a); + if (eType & CXXTokenTypeUnknown) a = append (buf, "Unknown", a); + if (eType & CXXTokenTypeDotOperator) a = append (buf, "DotOperator", a); + if (eType & CXXTokenTypePointerOperator) a = append (buf, "PointerOperator", a); + if (eType & CXXTokenTypeStringConstant) a = append (buf, "StringConstant", a); + if (eType & CXXTokenTypeStar) a = append (buf, "Star", a); + if (eType & CXXTokenTypeAnd) a = append (buf, "And", a); + if (eType & CXXTokenTypeMultipleAnds) a = append (buf, "MultipleAnds", a); + if (eType & CXXTokenTypeCharacterConstant) a = append (buf, "CharacterConstant", a); + if (eType & CXXTokenTypeMultipleDots) a = append (buf, "MultipleDots", a); + if (eType & CXXTokenTypeOpeningBracket) a = append (buf, "OpeningBracket", a); + if (eType & CXXTokenTypeOpeningParenthesis) a = append (buf, "OpeningParenthesis", a); + if (eType & CXXTokenTypeOpeningSquareParenthesis) a = append (buf, "OpeningSquareParenthesis", a); + if (eType & CXXTokenTypeSmallerThanSign) a = append (buf, "SmallerThanSign", a); + if (eType & CXXTokenTypeClosingBracket) a = append (buf, "ClosingBracket", a); + if (eType & CXXTokenTypeClosingParenthesis) a = append (buf, "ClosingParenthesis", a); + if (eType & CXXTokenTypeClosingSquareParenthesis) a = append (buf, "ClosingSquareParenthesis", a); + if (eType & CXXTokenTypeGreaterThanSign) a = append (buf, "GreaterThanSign", a); + if (eType & CXXTokenTypeBracketChain) a = append (buf, "BracketChain", a); + if (eType & CXXTokenTypeParenthesisChain) a = append (buf, "ParenthesisChain", a); + if (eType & CXXTokenTypeSquareParenthesisChain) a = append (buf, "SquareParenthesisChain", a); + if (eType & CXXTokenTypeAngleBracketChain) a = append (buf, "AngleBracketChain", a); + if (vStringLength(buf) == 0) vStringCatS(buf, "REALLY-UNKNOWN"); + return vStringValue (buf); +} +#endif diff --git a/ctags/parsers/cxx/cxx_keyword.c b/ctags/parsers/cxx/cxx_keyword.c new file mode 100644 index 0000000000..0eede461bb --- /dev/null +++ b/ctags/parsers/cxx/cxx_keyword.c @@ -0,0 +1,639 @@ +/* +* Copyright (c) 2016, Szymon Tomasz Stefanek +* +* This source code is released for free distribution under the terms of the +* GNU General Public License version 2 or (at your option) any later version. +* +* This module contains functions for parsing and scanning C++ source files +*/ + +#include "cxx_keyword.h" +#include "cxx_parser_internal.h" +#include "cxx_debug.h" + +#include "keyword.h" + +enum CXXKeywordFlag +{ + // Keywords that in most cases are parts of the name of a type. + // Examples: int, void, const, float, stuff like that + CXXKeywordFlagMayBePartOfTypeName = 1, + // struct, class, union, enum, typename + CXXKeywordIsTypeRefMarker = (1 << 1), + // Stuff that often appears together with a type name + // (for example a function return type or a variable type) + // but is not part of the type itself. + // Examples: virtual, inline, friend, static + CXXKeywordExcludeFromTypeNames = (1 << 2), + // true, false, nullptr + CXXKeywordIsConstant = (1 << 3), + // certain keywords are disabled "on-the-fly" to better + // handle C / C++ guessing errors (public, protected, private, namespace etc..) + CXXKeywordIsDisabled = (1 << 4), + // Similar to MayBePartOfTypeName but includes more keywords that are NOT part + // of the type itself. Keywords that do NOT have this flag simply cannot appear + // in a variable declaration. + // Examples: __global__, __host__, restrict, register... + CXXKeywordMayAppearInVariableDeclaration = (1 << 5) +}; + +typedef struct _CXXKeywordDescriptor +{ + const char * szName; + unsigned int uLanguages; + unsigned int uFlags; +} CXXKeywordDescriptor; + + +// This array is indexed by the CXXKeywordType enum +static CXXKeywordDescriptor g_aCXXKeywordTable[] = { + { + "__attribute__", + CXXLanguageC | CXXLanguageCPP | CXXLanguageCUDA, + CXXKeywordMayAppearInVariableDeclaration + }, + { + "__constant__", + CXXLanguageCUDA, + CXXKeywordMayAppearInVariableDeclaration | CXXKeywordExcludeFromTypeNames + }, + { + "__declspec", + CXXLanguageC | CXXLanguageCPP | CXXLanguageCUDA, + CXXKeywordMayAppearInVariableDeclaration | CXXKeywordExcludeFromTypeNames + }, + { + "__device__", + CXXLanguageCUDA, + CXXKeywordMayAppearInVariableDeclaration | CXXKeywordExcludeFromTypeNames + }, + { + "__fastcall", + CXXLanguageCPP + }, + { + "__forceinline", + CXXLanguageC | CXXLanguageCPP | CXXLanguageCUDA, + CXXKeywordExcludeFromTypeNames + }, + { + "__forceinline__", + CXXLanguageCUDA, + CXXKeywordExcludeFromTypeNames + }, + { + "__global__", + CXXLanguageCUDA, + CXXKeywordMayAppearInVariableDeclaration | CXXKeywordExcludeFromTypeNames + }, + { + "__host__", + CXXLanguageCUDA, + CXXKeywordMayAppearInVariableDeclaration | CXXKeywordExcludeFromTypeNames + }, + { + "__inline", + CXXLanguageC | CXXLanguageCPP | CXXLanguageCUDA, + CXXKeywordMayAppearInVariableDeclaration | CXXKeywordExcludeFromTypeNames + }, + { + "__inline__", + CXXLanguageC | CXXLanguageCPP | CXXLanguageCUDA, + CXXKeywordMayAppearInVariableDeclaration | CXXKeywordExcludeFromTypeNames + }, + { + "__managed__", + CXXLanguageCUDA, + CXXKeywordMayAppearInVariableDeclaration | CXXKeywordExcludeFromTypeNames + }, + { + "__noinline__", + CXXLanguageCUDA, + CXXKeywordExcludeFromTypeNames + }, + { + "__restrict", + CXXLanguageC | CXXLanguageCPP | CXXLanguageCUDA, + CXXKeywordMayAppearInVariableDeclaration | CXXKeywordExcludeFromTypeNames + }, + { + "__restrict__", + CXXLanguageC | CXXLanguageCPP | CXXLanguageCUDA, + CXXKeywordMayAppearInVariableDeclaration | CXXKeywordExcludeFromTypeNames + }, + { + "__shared__", + CXXLanguageCUDA, + CXXKeywordMayAppearInVariableDeclaration | CXXKeywordExcludeFromTypeNames + }, + { + "__stdcall", + CXXLanguageC | CXXLanguageCPP | CXXLanguageCUDA, + 0 + }, + { + "__thiscall", + CXXLanguageCPP, + 0 + }, + { + "alignas", + CXXLanguageCPP, + CXXKeywordMayAppearInVariableDeclaration + }, + { + "alignof", + CXXLanguageCPP, + 0 + }, + //{ 1, "and", 0 }, + //{ 1, "and_eq", 0 }, + { + "asm", + CXXLanguageC | CXXLanguageCPP | CXXLanguageCUDA, + 0 + }, + { + "auto", + CXXLanguageCPP, + CXXKeywordMayAppearInVariableDeclaration | CXXKeywordFlagMayBePartOfTypeName + }, + //{ 1, "bitand", 0 }, + //{ 1, "bitor", 0 }, + { + "bool", + CXXLanguageC | CXXLanguageCPP | CXXLanguageCUDA, + CXXKeywordMayAppearInVariableDeclaration | CXXKeywordFlagMayBePartOfTypeName + }, + { + "break", + CXXLanguageC | CXXLanguageCPP | CXXLanguageCUDA, + 0 + }, + { + "case", + CXXLanguageC | CXXLanguageCPP | CXXLanguageCUDA, + 0 + }, + { + "catch", + CXXLanguageCPP, + 0 + }, + { + "char", + CXXLanguageC | CXXLanguageCPP | CXXLanguageCUDA, + CXXKeywordMayAppearInVariableDeclaration | CXXKeywordFlagMayBePartOfTypeName + }, + { + "char16_t", + CXXLanguageCPP, + CXXKeywordMayAppearInVariableDeclaration | CXXKeywordFlagMayBePartOfTypeName + }, + { + "char32_t", + CXXLanguageCPP, + CXXKeywordMayAppearInVariableDeclaration | CXXKeywordFlagMayBePartOfTypeName + }, + { + "class", + CXXLanguageCPP, + CXXKeywordMayAppearInVariableDeclaration | CXXKeywordFlagMayBePartOfTypeName | + CXXKeywordIsTypeRefMarker + }, + //{ 0, "compl", 0 }, + { + "concept", + CXXLanguageCPP, + 0 + }, + { + "const", + CXXLanguageC | CXXLanguageCPP | CXXLanguageCUDA, + CXXKeywordMayAppearInVariableDeclaration | CXXKeywordFlagMayBePartOfTypeName + }, + { + "constexpr", + CXXLanguageCPP, + CXXKeywordMayAppearInVariableDeclaration | CXXKeywordExcludeFromTypeNames + }, + { + "const_cast", + CXXLanguageCPP, + 0 + }, + { + "continue", + CXXLanguageC | CXXLanguageCPP | CXXLanguageCUDA, + 0 + }, + { + "decltype", + CXXLanguageCPP, + CXXKeywordMayAppearInVariableDeclaration | CXXKeywordFlagMayBePartOfTypeName + }, + { + "default", + CXXLanguageC | CXXLanguageCPP | CXXLanguageCUDA, + 0 + }, + { + "delete", + CXXLanguageCPP, + 0 + }, + { + "do", + CXXLanguageC | CXXLanguageCPP | CXXLanguageCUDA, + 0 + }, + { + "double", + CXXLanguageC | CXXLanguageCPP | CXXLanguageCUDA, + CXXKeywordMayAppearInVariableDeclaration | CXXKeywordFlagMayBePartOfTypeName + }, + { + "dynamic_cast", + CXXLanguageCPP, + 0 + }, + { + "else", + CXXLanguageC | CXXLanguageCPP | CXXLanguageCUDA, + 0 + }, + { + "enum", + CXXLanguageC | CXXLanguageCPP | CXXLanguageCUDA, + CXXKeywordMayAppearInVariableDeclaration | CXXKeywordFlagMayBePartOfTypeName | + CXXKeywordIsTypeRefMarker + }, + { + "explicit", + CXXLanguageCPP, + 0 + }, + { + "export", + CXXLanguageCPP, + 0 + }, + { + "extern", + CXXLanguageC | CXXLanguageCPP | CXXLanguageCUDA, + 0 + }, + { + "false", + CXXLanguageC | CXXLanguageCPP | CXXLanguageCUDA, + CXXKeywordIsConstant + }, + // this is a keyword only in special contexts (we have a switch to enable/disable it) + { + "final", + CXXLanguageCPP, + 0 + }, + { + "float", + CXXLanguageC | CXXLanguageCPP | CXXLanguageCUDA, + CXXKeywordMayAppearInVariableDeclaration | CXXKeywordFlagMayBePartOfTypeName + }, + { + "for", + CXXLanguageC | CXXLanguageCPP | CXXLanguageCUDA, + 0 + }, + { + "friend", + CXXLanguageCPP, + CXXKeywordExcludeFromTypeNames + }, + { + "goto", + CXXLanguageC | CXXLanguageCPP | CXXLanguageCUDA, + 0 + }, + { + "if", + CXXLanguageC | CXXLanguageCPP | CXXLanguageCUDA, + 0 + }, + { + "inline", + CXXLanguageC | CXXLanguageCPP | CXXLanguageCUDA, + CXXKeywordMayAppearInVariableDeclaration | CXXKeywordExcludeFromTypeNames + }, + { + "int", + CXXLanguageC | CXXLanguageCPP | CXXLanguageCUDA, + CXXKeywordMayAppearInVariableDeclaration | CXXKeywordFlagMayBePartOfTypeName + }, + { + "long", + CXXLanguageC | CXXLanguageCPP | CXXLanguageCUDA, + CXXKeywordMayAppearInVariableDeclaration | CXXKeywordFlagMayBePartOfTypeName + }, + { + "mutable", + CXXLanguageCPP, + CXXKeywordMayAppearInVariableDeclaration + }, + { + "namespace", + CXXLanguageCPP, + 0 + }, + { + "new", + CXXLanguageCPP, + 0 + }, + { + "noexcept", + CXXLanguageCPP, + 0 + }, + //{ 0, "not", 0 }, + //{ 0, "not_eq", 0 }, + { + "nullptr", + CXXLanguageCPP, + CXXKeywordIsConstant + }, + { + "operator", + CXXLanguageCPP, + 0 + }, + //{ 0, "or", 0 }, + //{ 0, "or_eq", 0 }, + // override is a keyword only after function declarators, + // it's easier handling it as identifier + //{ 0, "override", 0 }, + { + "private", + CXXLanguageCPP, + 0 + }, + { + "protected", + CXXLanguageCPP, + 0 + }, + { + "public", + CXXLanguageCPP, + 0 + }, + { + "register", + CXXLanguageC | CXXLanguageCPP | CXXLanguageCUDA, + CXXKeywordMayAppearInVariableDeclaration + }, + { + "reinterpret_cast", + CXXLanguageCPP, + 0 + }, + { + "requires", + CXXLanguageCPP, + 0 + }, + { + "restrict", + CXXLanguageC | CXXLanguageCPP | CXXLanguageCUDA, + CXXKeywordMayAppearInVariableDeclaration | CXXKeywordExcludeFromTypeNames + }, + { + "return", + CXXLanguageC | CXXLanguageCPP | CXXLanguageCUDA, + 0 + }, + { + "short", + CXXLanguageC | CXXLanguageCPP | CXXLanguageCUDA, + CXXKeywordMayAppearInVariableDeclaration | CXXKeywordFlagMayBePartOfTypeName + }, + { + "signed", + CXXLanguageC | CXXLanguageCPP | CXXLanguageCUDA, + CXXKeywordMayAppearInVariableDeclaration | CXXKeywordFlagMayBePartOfTypeName + }, + { + "sizeof", + CXXLanguageC | CXXLanguageCPP | CXXLanguageCUDA, + 0 + }, + { + "static", + CXXLanguageC | CXXLanguageCPP | CXXLanguageCUDA, + CXXKeywordMayAppearInVariableDeclaration | CXXKeywordExcludeFromTypeNames + }, + { + "static_assert", + CXXLanguageCPP, + 0 + }, + { + "static_cast", + CXXLanguageCPP, + 0 + }, + { + "struct", + CXXLanguageC | CXXLanguageCPP | CXXLanguageCUDA, + CXXKeywordMayAppearInVariableDeclaration | CXXKeywordFlagMayBePartOfTypeName | + CXXKeywordIsTypeRefMarker + }, + { + "switch", + CXXLanguageC | CXXLanguageCPP | CXXLanguageCUDA, + 0 + }, + { + "template", + CXXLanguageCPP, + 0 + }, + { + "this", + CXXLanguageCPP, + 0 + }, + { + "thread_local", + CXXLanguageCPP, + CXXKeywordMayAppearInVariableDeclaration + }, + { + "throw", + CXXLanguageCPP, + 0 + }, + { + "true", + CXXLanguageC | CXXLanguageCPP | CXXLanguageCUDA, + CXXKeywordIsConstant + }, + { + "try", + CXXLanguageCPP, + 0 + }, + { + "typedef", + CXXLanguageC | CXXLanguageCPP | CXXLanguageCUDA, + 0 + }, + { + "typeid", + CXXLanguageCPP, + 0 + }, + { + "typename", + CXXLanguageCPP, + CXXKeywordMayAppearInVariableDeclaration | CXXKeywordFlagMayBePartOfTypeName | + CXXKeywordIsTypeRefMarker + }, + { + "union", + CXXLanguageC | CXXLanguageCPP | CXXLanguageCUDA, + CXXKeywordMayAppearInVariableDeclaration | CXXKeywordFlagMayBePartOfTypeName | + CXXKeywordIsTypeRefMarker + }, + { + "unsigned", + CXXLanguageC | CXXLanguageCPP | CXXLanguageCUDA, + CXXKeywordMayAppearInVariableDeclaration | CXXKeywordFlagMayBePartOfTypeName + }, + { + "using", + CXXLanguageCPP, + 0 + }, + { + "virtual", + CXXLanguageCPP, + CXXKeywordExcludeFromTypeNames + }, + { + "void", + CXXLanguageC | CXXLanguageCPP | CXXLanguageCUDA, + CXXKeywordMayAppearInVariableDeclaration | CXXKeywordFlagMayBePartOfTypeName + }, + { + "volatile", + CXXLanguageC | CXXLanguageCPP | CXXLanguageCUDA, + CXXKeywordMayAppearInVariableDeclaration + }, + { + "wchar_t", + CXXLanguageCPP, + CXXKeywordMayAppearInVariableDeclaration | CXXKeywordFlagMayBePartOfTypeName + }, + { + "while", + CXXLanguageC | CXXLanguageCPP | CXXLanguageCUDA, + 0 + }, + //{ 0, "xor", 0 }, + //{ 0, 1, "xor_eq", 0 } +}; + +const char * cxxKeywordName(CXXKeyword eKeywordId) +{ + return g_aCXXKeywordTable[eKeywordId].szName; +} + +bool cxxKeywordMayBePartOfTypeName(CXXKeyword eKeywordId) +{ + return g_aCXXKeywordTable[eKeywordId].uFlags & + CXXKeywordFlagMayBePartOfTypeName; +} + +bool cxxKeywordMayAppearInVariableDeclaration(CXXKeyword eKeywordId) +{ + return g_aCXXKeywordTable[eKeywordId].uFlags & + CXXKeywordMayAppearInVariableDeclaration; +} + +bool cxxKeywordIsTypeRefMarker(CXXKeyword eKeywordId) +{ + return g_aCXXKeywordTable[eKeywordId].uFlags & + CXXKeywordIsTypeRefMarker; +} + +bool cxxKeywordIsConstant(CXXKeyword eKeywordId) +{ + return g_aCXXKeywordTable[eKeywordId].uFlags & + CXXKeywordIsConstant; +} + +bool cxxKeywordIsCPPSpecific(CXXKeyword eKeywordId) +{ + return g_aCXXKeywordTable[eKeywordId].uLanguages == CXXLanguageCPP; +} + +bool cxxKeywordExcludeFromTypeNames(CXXKeyword eKeywordId) +{ + return g_aCXXKeywordTable[eKeywordId].uFlags & + CXXKeywordExcludeFromTypeNames; +} + +bool cxxKeywordIsDisabled(CXXKeyword eKeywordId) +{ + return g_aCXXKeywordTable[eKeywordId].uFlags & + CXXKeywordIsDisabled; +} + +bool cxxKeywordEnablePublicProtectedPrivate(bool bEnableIt) +{ + bool bEnabledNow = + !(g_aCXXKeywordTable[CXXKeywordPUBLIC].uFlags & CXXKeywordIsDisabled); + + if(bEnabledNow == bEnableIt) + return bEnabledNow; + + if(bEnableIt) + { + CXX_DEBUG_PRINT("Enabling public/protected/private keywords"); + + g_aCXXKeywordTable[CXXKeywordPUBLIC].uFlags &= ~CXXKeywordIsDisabled; + g_aCXXKeywordTable[CXXKeywordPROTECTED].uFlags &= ~CXXKeywordIsDisabled; + g_aCXXKeywordTable[CXXKeywordPRIVATE].uFlags &= ~CXXKeywordIsDisabled; + } else { + CXX_DEBUG_PRINT("Disabling public/protected/private keywords"); + + g_aCXXKeywordTable[CXXKeywordPUBLIC].uFlags |= CXXKeywordIsDisabled; + g_aCXXKeywordTable[CXXKeywordPROTECTED].uFlags |= CXXKeywordIsDisabled; + g_aCXXKeywordTable[CXXKeywordPRIVATE].uFlags |= CXXKeywordIsDisabled; + } + + return bEnabledNow; +} + +void cxxKeywordEnableFinal(bool bEnableIt) +{ + if(bEnableIt) + g_aCXXKeywordTable[CXXKeywordFINAL].uFlags &= ~CXXKeywordIsDisabled; + else + g_aCXXKeywordTable[CXXKeywordFINAL].uFlags |= CXXKeywordIsDisabled; +} + + +void cxxBuildKeywordHash(const langType eLangType,unsigned int uLanguage) +{ + const size_t count = sizeof(g_aCXXKeywordTable) / sizeof(CXXKeywordDescriptor); + + size_t i; + + for(i = 0;i < count;i++) + { + const CXXKeywordDescriptor * p = g_aCXXKeywordTable + i; + if(p->uLanguages & uLanguage) + addKeyword(p->szName,eLangType,i); + } +} diff --git a/ctags/parsers/cxx/cxx_keyword.h b/ctags/parsers/cxx/cxx_keyword.h new file mode 100644 index 0000000000..14a108a1e1 --- /dev/null +++ b/ctags/parsers/cxx/cxx_keyword.h @@ -0,0 +1,178 @@ +#ifndef ctags_cxx_keyword_h_ +#define ctags_cxx_keyword_h_ +/* +* Copyright (c) 2016, Szymon Tomasz Stefanek +* +* This source code is released for free distribution under the terms of the +* GNU General Public License version 2 or (at your option) any later version. +* +* This module contains functions for parsing and scanning C++ source files +*/ + +#include "general.h" +#include "parse.h" + +// WARNING: There is a table in cxx_keyword.c that must match order in this enum +typedef enum _CXXKeyword +{ + CXXKeyword__ATTRIBUTE__, // GCC + CXXKeyword__CONSTANT__, // CUDA + CXXKeyword__DECLSPEC, // Microsoft C/C++ + CXXKeyword__DEVICE__, // CUDA + CXXKeyword__FASTCALL, // Microsoft C/C++ + CXXKeyword__FORCEINLINE, // Microsoft C/C++ + CXXKeyword__FORCEINLINE__, // CUDA + CXXKeyword__GLOBAL__, // CUDA + CXXKeyword__HOST__, // CUDA + CXXKeyword__INLINE, // Microsoft C/C++ + CXXKeyword__INLINE__, // GCC + CXXKeyword__MANAGED__, // CUDA + CXXKeyword__NOINLINE__, // CUDA + CXXKeyword__RESTRICT, // Microsoft C/C++ + CXXKeyword__RESTRICT__, // CUDA + CXXKeyword__SHARED__, // CUDA + CXXKeyword__STDCALL, // Microsoft C/C++ + CXXKeyword__THISCALL, // Microsoft C/C++ + CXXKeywordALIGNAS, // (since C++11) + CXXKeywordALIGNOF, // (since C++11) + //CXXKeywordAND, + //CXXKeywordAND_EQ, + CXXKeywordASM, + CXXKeywordAUTO, + //CXXKeywordBITAND, + //CXXKeywordBITOR, + CXXKeywordBOOL, + CXXKeywordBREAK, + CXXKeywordCASE, + CXXKeywordCATCH, + CXXKeywordCHAR, + CXXKeywordCHAR16_T, // (since C++11) + CXXKeywordCHAR32_T, // (since C++11) + CXXKeywordCLASS, + //CXXKeywordCOMPL, + CXXKeywordCONCEPT, // Concepts TS + CXXKeywordCONST, + CXXKeywordCONSTEXPR, // (since C++11) + CXXKeywordCONST_CAST, + CXXKeywordCONTINUE, + CXXKeywordDECLTYPE, // (since C++11) + CXXKeywordDEFAULT, + CXXKeywordDELETE, + CXXKeywordDO, + CXXKeywordDOUBLE, + CXXKeywordDYNAMIC_CAST, + CXXKeywordELSE, + CXXKeywordENUM, + CXXKeywordEXPLICIT, + CXXKeywordEXPORT, + CXXKeywordEXTERN, + CXXKeywordFALSE, + CXXKeywordFINAL, // not really a keyword, has meanings in some specific contexts + CXXKeywordFLOAT, + CXXKeywordFOR, + CXXKeywordFRIEND, + CXXKeywordGOTO, + CXXKeywordIF, + CXXKeywordINLINE, + CXXKeywordINT, + CXXKeywordLONG, + CXXKeywordMUTABLE, + CXXKeywordNAMESPACE, + CXXKeywordNEW, + CXXKeywordNOEXCEPT, // (since C++11) + //CXXKeywordNOT, + //CXXKeywordNOT_EQ, + CXXKeywordNULLPTR, // (since C++11) + CXXKeywordOPERATOR, + //CXXKeywordOR, + //CXXKeywordOR_EQ, + //CXXKeywordOVERRIDE, // not really a keyword, has meanings in some specific contexts + CXXKeywordPRIVATE, + CXXKeywordPROTECTED, + CXXKeywordPUBLIC, + CXXKeywordREGISTER, + CXXKeywordREINTERPRET_CAST, + CXXKeywordREQUIRES, // (Concepts TS) + CXXKeywordRESTRICT, // C99 extension + CXXKeywordRETURN, + CXXKeywordSHORT, + CXXKeywordSIGNED, + CXXKeywordSIZEOF, + CXXKeywordSTATIC, + CXXKeywordSTATIC_ASSERT, // (since C++11) + CXXKeywordSTATIC_CAST, + CXXKeywordSTRUCT, + CXXKeywordSWITCH, + CXXKeywordTEMPLATE, + CXXKeywordTHIS, + CXXKeywordTHREAD_LOCAL, // (since C++11) + CXXKeywordTHROW, + CXXKeywordTRUE, + CXXKeywordTRY, + CXXKeywordTYPEDEF, + CXXKeywordTYPEID, + CXXKeywordTYPENAME, + CXXKeywordUNION, + CXXKeywordUNSIGNED, + CXXKeywordUSING, + CXXKeywordVIRTUAL, + CXXKeywordVOID, + CXXKeywordVOLATILE, + CXXKeywordWCHAR_T, + CXXKeywordWHILE, + //CXXKeywordXOR, + //CXXKeywordXOR_EQ, + // WARNING: There is a table in cxx_keyword.c that must match order in this enumeration +} CXXKeyword; + +bool cxxKeywordIsConstant(CXXKeyword eKeywordId); +bool cxxKeywordMayBePartOfTypeName(CXXKeyword eKeywordId); +bool cxxKeywordIsTypeRefMarker(CXXKeyword eKeywordId); +bool cxxKeywordExcludeFromTypeNames(CXXKeyword eKeywordId); +bool cxxKeywordMayAppearInVariableDeclaration(CXXKeyword eKeywordId); +bool cxxKeywordIsCPPSpecific(CXXKeyword eKeywordId); + + +const char * cxxKeywordName(CXXKeyword eKeywordId); + +// uLanguage is really CXXLanguage, but we keep it as unsigned int to avoid +// problems with header inclusions. It works anyway. +void cxxBuildKeywordHash(const langType eLangType,unsigned int uLanguage); + +// Keyword enabled/disabled state management. +// +// public, protected, private, class, namespace... keywords are C++ only. +// However when parsing .h files we don't know if they belong to a C program or C++ +// one and thus for safety we parse them as C++. If our guess is wrong then the parser +// may become confused and in some cases even bail out. +// +// For this reason we enable/disable the processing of certain keyword sets +// in certain contexts. + + +// +// "public,protected,private" keywords +// +// In header files we disable processing of such keywords until we either figure +// out that the file really contains C++ or we start parsing a struct/union. +// +// This flag is meaningful only when parsing a .h file as C++ since in C +// public/protected/private are never keywords and we assume that .cpp files +// have C++ content (so public/protected/private are always keywords). +// +// This function returns the previous state of the public/protected/private keywords +// enabled flag so it can be easily restored. +bool cxxKeywordEnablePublicProtectedPrivate(bool bEnableIt); + +// +// "final" keyword +// +// This is actually special at C++ level: it's a keyword only within a specific part +// of a class declaration. In other contexts it's not a keyword. +void cxxKeywordEnableFinal(bool bEnableIt); + +// Is the specific keyword currently disabled? +bool cxxKeywordIsDisabled(CXXKeyword eKeywordId); + + +#endif //!ctags_cxx_keyword_h_ \ No newline at end of file diff --git a/ctags/parsers/cxx/cxx_parser.c b/ctags/parsers/cxx/cxx_parser.c new file mode 100644 index 0000000000..ec29454398 --- /dev/null +++ b/ctags/parsers/cxx/cxx_parser.c @@ -0,0 +1,2023 @@ +/* +* Copyright (c) 2016, Szymon Tomasz Stefanek +* +* This source code is released for free distribution under the terms of the +* GNU General Public License version 2 or (at your option) any later version. +* +* This module contains functions for parsing and scanning C++ source files +*/ +#include "cxx_parser.h" +#include "cxx_parser_internal.h" + +#include "cxx_debug.h" +#include "cxx_keyword.h" +#include "cxx_token.h" +#include "cxx_token_chain.h" +#include "cxx_scope.h" +#include "cxx_tag.h" +#include "cxx_subparser_internal.h" + +#include "parse.h" +#include "vstring.h" +#include "../cpreprocessor.h" +#include "debug.h" +#include "keyword.h" +#include "read.h" +#include "ptrarray.h" +#include "trashbox.h" + +#include + +// +// The global parser state +// +CXXParserState g_cxx; + +// +// This is set to false once the parser is run at least one time. +// Used by cleanup routines. +// +bool g_bFirstRun = true; + +// +// Reset parser state: +// - Clear the token chain +// - Reset "seen" keywords +// +void cxxParserNewStatement(void) +{ + cxxTokenChainClear(g_cxx.pTokenChain); + if(g_cxx.pTemplateTokenChain) + { + cxxTokenChainDestroy(g_cxx.pTemplateTokenChain); + g_cxx.pTemplateTokenChain = NULL; + g_cxx.oTemplateParameters.uCount = 0; + } else { + // we don't care about stale specializations as they + // are destroyed wen the base template prefix is extracted + } + g_cxx.uKeywordState = 0; + + // FIXME: this cpp handling of end/statement is kind of broken: + // it works only because the moon is in the correct phase. + cppEndStatement(); +} + +// +// Parse a subchain of input delimited by matching pairs selected from +// [],(),{} and <>. +// +// On entry g_cxx.pToken is expected to point to the initial token of the pair, +// that is one of ([{<. The function will parse input until the matching +// terminator token is found. Inner parsing is done by +// cxxParserParseAndCondenseSubchainsUpToOneOf() so this is actually a recursive +// subchain nesting algorithm. +// +// Returns true if it has successfully extracted and "condensed" a subchain +// replacing the current token with a subchain subtree. Returns false if +// extraction fails for some reason. +// +// This function never leaves the token chain in an incoherent state. +// The current token is always replaced with a subchain tree. If the subchain +// is broken, its contents are discarded, regardless of the return value. +// +bool cxxParserParseAndCondenseCurrentSubchain( + unsigned int uInitialSubchainMarkerTypes, + bool bAcceptEOF, + bool bCanReduceInnerElements + ) +{ + CXX_DEBUG_ENTER(); + + CXXTokenChain * pCurrentChain = g_cxx.pTokenChain; + + g_cxx.pTokenChain = cxxTokenChainCreate(); + + CXXToken * pInitial = cxxTokenChainTakeLast(pCurrentChain); + cxxTokenChainAppend(g_cxx.pTokenChain,pInitial); + + CXXToken * pChainToken = cxxTokenCreate(); + + pChainToken->iLineNumber = pInitial->iLineNumber; + pChainToken->oFilePosition = pInitial->oFilePosition; + // see the declaration of CXXTokenType enum. + // Shifting by 8 gives the corresponding chain marker + pChainToken->eType = (enum CXXTokenType)(g_cxx.pToken->eType << 8); + pChainToken->pChain = g_cxx.pTokenChain; + cxxTokenChainAppend(pCurrentChain,pChainToken); + + // see the declaration of CXXTokenType enum. + // Shifting by 4 gives the corresponding closing token type + enum CXXTokenType eTermType = (enum CXXTokenType)(g_cxx.pToken->eType << 4); + + unsigned int uTokenTypes = eTermType; + if(bAcceptEOF) + uTokenTypes |= CXXTokenTypeEOF; + + bool bRet = cxxParserParseAndCondenseSubchainsUpToOneOf( + uTokenTypes, + uInitialSubchainMarkerTypes, + bCanReduceInnerElements + ); + + if( + // Parsing the subchain failed: input is broken + (!bRet) || + // Mismatched terminator (i.e EOF was accepted and encountered) + (!cxxTokenTypeIs(cxxTokenChainLast(g_cxx.pTokenChain),eTermType)) + ) + { + // Input is probably broken: discard it in any case, so no one + // is tempted to parse it later. + + CXX_DEBUG_PRINT( + "Parsing the subchain failed or EOF found. Discarding broken subtree" + ); + + while(g_cxx.pTokenChain->iCount > 1) + cxxTokenChainDestroyLast(g_cxx.pTokenChain); + + // Fake the terminator + CXXToken * pFakeLast = cxxTokenCreate(); + pFakeLast->iLineNumber = pChainToken->iLineNumber; + pFakeLast->oFilePosition = pChainToken->oFilePosition; + switch(eTermType) + { + case CXXTokenTypeClosingBracket: + vStringPut(pFakeLast->pszWord,'}'); + break; + case CXXTokenTypeClosingParenthesis: + vStringPut(pFakeLast->pszWord,')'); + break; + case CXXTokenTypeClosingSquareParenthesis: + vStringPut(pFakeLast->pszWord,']'); + break; + case CXXTokenTypeGreaterThanSign: + vStringPut(pFakeLast->pszWord,'>'); + break; + default: + CXX_DEBUG_ASSERT(false,"Unhandled terminator type"); + break; + } + pFakeLast->eType = eTermType; + pFakeLast->pChain = NULL; + + cxxTokenChainAppend(g_cxx.pTokenChain,pFakeLast); + } + + g_cxx.pTokenChain = pCurrentChain; + g_cxx.pToken = pCurrentChain->pTail; + + CXX_DEBUG_LEAVE(); + return bRet; +} + +// +// This function parses input until one of the specified tokens appears. +// The current token is NOT checked against the specified tokens. +// +// The algorithm will also build subchains of matching +// pairs ([...],(...),<...>,{...}): within the subchain analysis +// of uTokenTypes is completely disabled. Subchains do nest. +// +// Returns true if it stops before EOF or it stops at EOF and CXXTokenTypeEOF +// is present in uTokenTypes. Returns false in all the other stop conditions +// and when an unmatched subchain character pair is found (syntax error). +// +bool cxxParserParseAndCondenseSubchainsUpToOneOf( + unsigned int uTokenTypes, + unsigned int uInitialSubchainMarkerTypes, + bool bCanReduceInnerElements + ) +{ + CXX_DEBUG_ENTER_TEXT("Token types = 0x%x(%s), reduce = %d", uTokenTypes, cxxDebugTypeDecode(uTokenTypes), + bCanReduceInnerElements); + + if(!cxxParserParseNextToken()) + { + CXX_DEBUG_LEAVE_TEXT("Found EOF"); + return (uTokenTypes & CXXTokenTypeEOF); // was already at EOF + } + + // see the declaration of CXXTokenType enum. + // Shifting by 4 gives the corresponding closing token type + unsigned int uFinalSubchainMarkerTypes = uInitialSubchainMarkerTypes << 4; + + for(;;) + { + //CXX_DEBUG_PRINT( + // "Current token is '%s' 0x%x", + // vStringValue(g_cxx.pToken->pszWord), + // g_cxx.pToken->eType + //); + + if(cxxTokenTypeIsOneOf(g_cxx.pToken,uTokenTypes)) + { + if (bCanReduceInnerElements) + cxxTokenReduceBackward (g_cxx.pToken); + CXX_DEBUG_LEAVE_TEXT( + "Got terminator token '%s' 0x%x", + vStringValue(g_cxx.pToken->pszWord), + g_cxx.pToken->eType + ); + return true; + } + + if(cxxTokenTypeIsOneOf(g_cxx.pToken,uInitialSubchainMarkerTypes)) + { + // subchain + CXX_DEBUG_PRINT( + "Got subchain start token '%s' 0x%x", + vStringValue(g_cxx.pToken->pszWord), + g_cxx.pToken->eType + ); + CXXToken * pParenthesis; + + if( + cxxTokenTypeIs(g_cxx.pToken,CXXTokenTypeOpeningBracket) && + cxxParserCurrentLanguageIsCPP() && + (pParenthesis = cxxParserOpeningBracketIsLambda()) + ) + { + if(!cxxParserHandleLambda(pParenthesis)) + { + CXX_DEBUG_LEAVE_TEXT("Failed to handle lambda"); + return false; + } + } else { + g_cxx.iNestingLevels++; + + if(g_cxx.iNestingLevels > CXX_PARSER_MAXIMUM_NESTING_LEVELS) + { + CXX_DEBUG_LEAVE_TEXT("Nesting level grown too much: something nasty is going on"); + return false; + } + + bool bRet = cxxParserParseAndCondenseCurrentSubchain( + uInitialSubchainMarkerTypes, + (uTokenTypes & CXXTokenTypeEOF), + bCanReduceInnerElements + ); + + g_cxx.iNestingLevels--; + + if(!bRet) + { + CXX_DEBUG_LEAVE_TEXT( + "Failed to parse subchain of type 0x%x", + g_cxx.pToken->eType + ); + return false; + } + } + + if(cxxTokenTypeIsOneOf(g_cxx.pToken,uTokenTypes)) + { + // was looking for a subchain + CXX_DEBUG_LEAVE_TEXT( + "Got terminator subchain token 0x%x", + g_cxx.pToken->eType + ); + return true; + } + + if(!cxxParserParseNextToken()) + { + CXX_DEBUG_LEAVE_TEXT("Found EOF(2)"); + return (uTokenTypes & CXXTokenTypeEOF); // was already at EOF + } + + continue; // jump up to avoid checking for mismatched pairs below + } + + // Check for mismatched brackets/parentheses + // Note that if we were looking for one of [({ then we would have matched + // it at the top of the for + if(cxxTokenTypeIsOneOf(g_cxx.pToken,uFinalSubchainMarkerTypes)) + { + CXX_DEBUG_LEAVE_TEXT( + "Got mismatched subchain terminator 0x%x", + g_cxx.pToken->eType + ); + return false; // unmatched: syntax error + } + + if(!cxxParserParseNextToken()) + { + CXX_DEBUG_LEAVE_TEXT("Found EOF(3)"); + return (uTokenTypes & CXXTokenTypeEOF); // was already at EOF + } + } + + // not reached + CXX_DEBUG_LEAVE_TEXT("Internal error"); + return false; +} + +// +// This function parses input until one of the specified tokens appears. +// The current token is NOT checked against the specified tokens. +// +// The algorithm will also build subchains of matching pairs ([...],(...),{...}). +// Within the subchain analysis of uTokenTypes is completely disabled. +// Subchains do nest. +// +// Please note that this function will skip entire scopes (matching {} pairs) +// unless you pass CXXTokenTypeOpeningBracket to stop at their beginning. +// This is usually what you want, unless you're really expecting a scope to begin +// in the current statement. +// +bool cxxParserParseUpToOneOf(unsigned int uTokenTypes, + bool bCanReduceInnerElements) +{ + return cxxParserParseAndCondenseSubchainsUpToOneOf( + uTokenTypes, + CXXTokenTypeOpeningBracket | + CXXTokenTypeOpeningParenthesis | + CXXTokenTypeOpeningSquareParenthesis, + bCanReduceInnerElements + ); +} + +// +// Attempts to skip to either a semicolon or an EOF, ignoring anything in between. +// May be also used to recovery from certain forms of syntax errors. +// This function works also if the current token is a semicolon or an EOF. +// +bool cxxParserSkipToSemicolonOrEOF(void) +{ + if(cxxTokenTypeIsOneOf(g_cxx.pToken,CXXTokenTypeSemicolon | CXXTokenTypeEOF)) + return true; + + return cxxParserParseUpToOneOf(CXXTokenTypeSemicolon | CXXTokenTypeEOF, + false); +} + +// This has to be called when pointing to a double-colon token +// or an identifier. +// +// It tries to parse a qualified name in the form of ...::A::B::C::D ... +// and stops at the first token that is not part of such name. +// +// Returns false if it doesn't find an identifier after a double-colon +// or if it finds an EOF. Returns true otherwise. +// +// Upon exit the token preceding the current is the last identifier +// of the qualified name. +bool cxxParserParseToEndOfQualifedName(void) +{ + CXX_DEBUG_ENTER(); + + CXX_DEBUG_ASSERT( + cxxTokenTypeIsOneOf( + g_cxx.pToken, + CXXTokenTypeMultipleColons | CXXTokenTypeIdentifier + ), + "This function should be called when pointing to a double-colon or an identifier" + ); + + if(cxxTokenTypeIs(g_cxx.pToken,CXXTokenTypeIdentifier)) + { + if(!cxxParserParseNextToken()) + { + // syntax error, but we tolerate this + CXX_DEBUG_LEAVE_TEXT("EOF in cxxParserParseNextToken"); + return false; // EOF + } + } + + while(cxxTokenTypeIs(g_cxx.pToken,CXXTokenTypeMultipleColons)) + { + if(!cxxParserParseNextToken()) + { + // syntax error, but we tolerate this + CXX_DEBUG_LEAVE_TEXT("EOF in cxxParserParseNextToken"); + return false; // EOF + } + + if(!cxxTokenTypeIs(g_cxx.pToken,CXXTokenTypeIdentifier)) + { + CXX_DEBUG_LEAVE_TEXT("Found no identifier after multiple colons"); + return false; + } + + if(!cxxParserParseNextToken()) + { + // syntax error, but we tolerate this + CXX_DEBUG_LEAVE_TEXT("EOF in cxxParserParseNextToken"); + return false; // EOF + } + } + + CXX_DEBUG_ASSERT(g_cxx.pToken->pPrev,"There should be a previous token here"); + CXX_DEBUG_ASSERT( + cxxTokenTypeIs(g_cxx.pToken->pPrev,CXXTokenTypeIdentifier), + "The qualified name should end with an identifier" + ); + + CXX_DEBUG_LEAVE(); + return true; +} + +void cxxParserSetEndLineForTagInCorkQueue(int iCorkQueueIndex,unsigned long lEndLine) +{ + CXX_DEBUG_ASSERT(iCorkQueueIndex > CORK_NIL,"The cork queue index is not valid"); + + tagEntryInfo * tag = getEntryInCorkQueue (iCorkQueueIndex); + + CXX_DEBUG_ASSERT(tag,"No tag entry in the cork queue"); + + tag->extensionFields.endLine = lEndLine; +} + +// +// Attach the current position of input file as "end" field of +// the specified tag in the cork queue +// +void cxxParserMarkEndLineForTagInCorkQueue(int iCorkQueueIndex) +{ + cxxParserSetEndLineForTagInCorkQueue(iCorkQueueIndex,getInputLineNumber()); +} + + +// Make sure that the token chain contains only the specified keyword and eventually +// the "const" or "volatile" type modifiers. +static void cxxParserCleanupEnumStructClassOrUnionPrefixChain(CXXKeyword eKeyword,CXXToken * pLastToken) +{ + CXXToken * pToken = cxxTokenChainFirst(g_cxx.pTokenChain); + while(pToken && (pToken != pLastToken)) + { + if( + cxxTokenTypeIs(pToken,CXXTokenTypeKeyword) && + ( + (pToken->eKeyword == eKeyword) || + (pToken->eKeyword == CXXKeywordCONST) || + (pToken->eKeyword == CXXKeywordVOLATILE) + ) + ) + { + // keep + pToken = pToken->pNext; + } else { + CXXToken * pPrev = pToken->pPrev; + if(pPrev) + { + cxxTokenChainTake(g_cxx.pTokenChain,pToken); + cxxTokenDestroy(pToken); + pToken = pPrev->pNext; + } else { + cxxTokenChainDestroyFirst(g_cxx.pTokenChain); + pToken = cxxTokenChainFirst(g_cxx.pTokenChain); + } + } + } +} + +// +// This is called after a full enum/struct/class/union declaration +// that ends with a closing bracket. +// +static bool cxxParserParseEnumStructClassOrUnionFullDeclarationTrailer( + unsigned int uKeywordState, + CXXKeyword eTagKeyword, + const char * szTypeName + ) +{ + CXX_DEBUG_ENTER(); + + cxxTokenChainClear(g_cxx.pTokenChain); + + CXX_DEBUG_PRINT( + "Parse enum/struct/class/union trailer, typename is '%s'", + szTypeName + ); + + MIOPos oFilePosition = getInputFilePosition(); + int iFileLine = getInputLineNumber(); + + if(!cxxParserParseUpToOneOf( + CXXTokenTypeEOF | CXXTokenTypeSemicolon | + CXXTokenTypeOpeningBracket | CXXTokenTypeAssignment, + false + )) + { + CXX_DEBUG_LEAVE_TEXT("Failed to parse up to EOF/semicolon"); + return false; + } + + if(cxxTokenTypeIs(g_cxx.pToken,CXXTokenTypeEOF)) + { + // It's a syntax error, but we can be tolerant here. + CXX_DEBUG_LEAVE_TEXT("Got EOF after enum/class/struct/union block"); + return true; + } + + if(g_cxx.pTokenChain->iCount < 2) + { + CXX_DEBUG_LEAVE_TEXT("Nothing interesting after enum/class/struct block"); + return true; + } + + // fake the initial two tokens + CXXToken * pIdentifier = cxxTokenCreate(); + pIdentifier->oFilePosition = oFilePosition; + pIdentifier->iLineNumber = iFileLine; + pIdentifier->eType = CXXTokenTypeIdentifier; + pIdentifier->bFollowedBySpace = true; + vStringCatS(pIdentifier->pszWord,szTypeName); + cxxTokenChainPrepend(g_cxx.pTokenChain,pIdentifier); + + cxxTokenChainPrepend( + g_cxx.pTokenChain, + cxxTokenCreateKeyword(iFileLine,oFilePosition,eTagKeyword) + ); + + if(uKeywordState & CXXParserKeywordStateSeenConst) + { + cxxTokenChainPrepend( + g_cxx.pTokenChain, + cxxTokenCreateKeyword(iFileLine,oFilePosition,CXXKeywordCONST) + ); + } + + if(uKeywordState & CXXParserKeywordStateSeenVolatile) + { + cxxTokenChainPrepend( + g_cxx.pTokenChain, + cxxTokenCreateKeyword(iFileLine,oFilePosition,CXXKeywordVOLATILE) + ); + } + + if(cxxTokenTypeIs(g_cxx.pToken,CXXTokenTypeOpeningBracket)) + { + CXX_DEBUG_PRINT("Found opening bracket: possibly a function declaration?"); + if(!cxxParserParseBlockHandleOpeningBracket()) + { + CXX_DEBUG_LEAVE_TEXT("Failed to handle the opening bracket"); + return false; + } + CXX_DEBUG_LEAVE_TEXT("Opening bracket handled"); + return true; + } + + if(cxxTokenTypeIs(g_cxx.pToken,CXXTokenTypeAssignment)) + { + if(!cxxParserParseUpToOneOf( + CXXTokenTypeEOF | CXXTokenTypeSemicolon, + false + )) + { + CXX_DEBUG_LEAVE_TEXT("Failed to parse up to EOF/semicolon"); + return false; + } + } + + if(uKeywordState & CXXParserKeywordStateSeenTypedef) + cxxParserExtractTypedef(g_cxx.pTokenChain,true,false); + else + cxxParserExtractVariableDeclarations(g_cxx.pTokenChain,0); + + CXX_DEBUG_LEAVE(); + return true; +} + +bool cxxParserParseEnum(void) +{ + CXX_DEBUG_ENTER(); + + unsigned int uInitialKeywordState = g_cxx.uKeywordState; + int iInitialTokenCount = g_cxx.pTokenChain->iCount; + CXXToken * pLastToken = cxxTokenChainLast(g_cxx.pTokenChain); + + /* + Spec is: + enum-key attr(optional) identifier(optional) enum-base(optional) + { enumerator-list(optional) } (1) + enum-key attr(optional) identifier enum-base(optional) ; + (2) (since C++11) + + enum-key - one of enum, enum class(since C++11), or enum struct(since C++11) + attr(C++11) - optional sequence of any number of attributes + identifier - the name of the enumeration that's being declared. + If present, and if this declaration is a re-declaration, it may be preceded by + nested-name-specifier(since C++11): sequence of names and scope-resolution + operators ::, ending with scope-resolution operator. The name can be omitted + only in unscoped enumeration declarations + + enum-base(C++11) - colon (:), followed by a type-specifier-seq that names an + integral type (if it is cv-qualified, qualifications are ignored) + enumerator-list - comma-separated list of enumerator definitions, each of which is + either simply an identifier, which becomes the name of the enumerator, or an + identifier with an initializer: identifier = constexpr. In either case, the + identifier can be directly followed by an optional attribute specifier + sequence. (since C++17) + */ + + // Skip attr and class-head-name + if(!cxxParserParseUpToOneOf( + CXXTokenTypeEOF | CXXTokenTypeSemicolon | CXXTokenTypeKeyword | + CXXTokenTypeSingleColon | CXXTokenTypeParenthesisChain | + CXXTokenTypeOpeningBracket, + false + )) + { + CXX_DEBUG_LEAVE_TEXT("Could not parse enum name"); + return false; + } + + bool bIsScopedEnum = false; // c++11 scoped enum (enum class | enum struct) + + if(cxxTokenTypeIs(g_cxx.pToken,CXXTokenTypeKeyword)) + { + // enum class | enum struct ? + if( + (g_cxx.pToken->eKeyword == CXXKeywordSTRUCT) || + (g_cxx.pToken->eKeyword == CXXKeywordCLASS) + ) + { + bIsScopedEnum = true; + } + + if(!cxxParserParseUpToOneOf( + CXXTokenTypeEOF | CXXTokenTypeSemicolon | CXXTokenTypeSingleColon | + CXXTokenTypeParenthesisChain | CXXTokenTypeOpeningBracket, + false + )) + { + CXX_DEBUG_LEAVE_TEXT("Could not parse enum name"); + return false; + } + } + + if(cxxTokenTypeIs(g_cxx.pToken,CXXTokenTypeEOF)) + { + // tolerate EOF, treat as forward declaration + cxxParserNewStatement(); + CXX_DEBUG_LEAVE_TEXT("EOF before enum block: treating as forward declaration"); + return true; + } + + if(cxxTokenTypeIs(g_cxx.pToken,CXXTokenTypeParenthesisChain)) + { + if(uInitialKeywordState & CXXParserKeywordStateSeenTypedef) + { + CXX_DEBUG_LEAVE_TEXT("Found parenthesis after typedef: parsing as generic typedef"); + return cxxParserParseGenericTypedef(); + } + // probably a function declaration/prototype + // something like enum x func().... + // do not clear statement + CXX_DEBUG_LEAVE_TEXT("Probably a function declaration!"); + return true; + } + + // If we have found a semicolon then we might be in the special case of KnR function + // declaration. This requires at least 5 tokens and has some additional constraints. + // See cxxParserMaybeParseKnRStyleFunctionDefinition() for more informations. + if( + // found semicolon + cxxTokenTypeIs(g_cxx.pToken,CXXTokenTypeSemicolon) && + // many tokens before the enum keyword + (iInitialTokenCount > 3) && + // C language + cxxParserCurrentLanguageIsC() && + // global scope + cxxScopeIsGlobal() && + // no typedef + (!(uInitialKeywordState & CXXParserKeywordStateSeenTypedef)) + ) + { + CXX_DEBUG_PRINT("Maybe KnR function definition"); + + switch(cxxParserMaybeParseKnRStyleFunctionDefinition()) + { + case 1: + // parser moved forward and started a new statement + CXX_DEBUG_LEAVE_TEXT("K&R parser did the job"); + return true; + break; + case 0: + // something else, go ahead + break; + default: + CXX_DEBUG_LEAVE_TEXT("Failed to check for K&R style function definition"); + return false; + break; + } + } + + if(iInitialTokenCount > 1) + cxxParserCleanupEnumStructClassOrUnionPrefixChain(CXXKeywordENUM,pLastToken); + + if(cxxTokenTypeIs(g_cxx.pToken,CXXTokenTypeSemicolon)) + { + CXX_DEBUG_PRINT("Found semicolon, maybe typedef or variable declaration"); + + // scoped enums can't be used to declare variables. + if((!bIsScopedEnum) && (g_cxx.pTokenChain->iCount > 3)) + { + // [typedef] enum X Y; <-- typedef has been removed! + if(g_cxx.uKeywordState & CXXParserKeywordStateSeenTypedef) + cxxParserExtractTypedef(g_cxx.pTokenChain,true,false); + else + cxxParserExtractVariableDeclarations(g_cxx.pTokenChain,0); + } + + cxxParserNewStatement(); + CXX_DEBUG_LEAVE(); + return true; + } + + // colon or opening bracket + CXX_DEBUG_ASSERT( + cxxTokenTypeIsOneOf(g_cxx.pToken,CXXTokenTypeSingleColon | CXXTokenTypeOpeningBracket), + "We should be pointing to a : or a {" + ); + + // check if we can extract a class name identifier now + CXXToken * pEnumName = cxxTokenChainLastTokenOfType( + g_cxx.pTokenChain, + CXXTokenTypeIdentifier + ); + + CXXToken * pTypeBegin; // no need to NULLify, only pTypeEnd matters. + CXXToken * pTypeEnd = NULL; + + if(cxxTokenTypeIs(g_cxx.pToken,CXXTokenTypeSingleColon)) + { + // skip type + CXX_DEBUG_PRINT("Single colon, trying to skip type"); + + pTypeBegin = g_cxx.pToken; + + if(!cxxParserParseUpToOneOf( + CXXTokenTypeEOF | CXXTokenTypeSemicolon | CXXTokenTypeOpeningBracket, + false)) + { + CXX_DEBUG_LEAVE_TEXT("Could not parse enum type"); + return false; + } + + if(cxxTokenTypeIsOneOf(g_cxx.pToken,CXXTokenTypeEOF | CXXTokenTypeSemicolon)) + { + // tolerate EOF, treat as forward declaration + cxxParserNewStatement(); + CXX_DEBUG_LEAVE_TEXT("EOF or semicolon before enum block: can't decode this"); + return true; + } + + // certainly opening bracket now. + if(g_cxx.pToken->pPrev != pTypeBegin) + { + // there were tokens between the semicolon and the type begin + pTypeBegin = pTypeBegin->pNext; + pTypeEnd = g_cxx.pToken->pPrev; + } + } + + + int iPushedScopes = 0; + bool bAnonymous = false; + + if(pEnumName) + { + // good. + // It may be qualified though. + if(cxxParserCurrentLanguageIsCPP()) + { + CXXToken * pNamespaceBegin = pEnumName; + CXXToken * pPrev = pEnumName->pPrev; + while(pPrev) + { + if(!cxxTokenTypeIs(pPrev,CXXTokenTypeMultipleColons)) + break; + pPrev = pPrev->pPrev; + if(!pPrev) + break; + if(!cxxTokenTypeIs(pPrev,CXXTokenTypeIdentifier)) + break; + pNamespaceBegin = pPrev; + pPrev = pPrev->pPrev; + } + + while(pNamespaceBegin != pEnumName) + { + CXXToken * pNext = pNamespaceBegin->pNext; + cxxTokenChainTake(g_cxx.pTokenChain,pNamespaceBegin); + if(cxxParserCurrentLanguageIsCPP()) + { + // FIXME: We don't really know if it's a class! + cxxScopePush(pNamespaceBegin,CXXScopeTypeClass,CXXScopeAccessUnknown); + } else { + // it's a syntax error, but be tolerant + } + iPushedScopes++; + pNamespaceBegin = pNext->pNext; + } + } + + CXX_DEBUG_PRINT("Enum name is %s",vStringValue(pEnumName->pszWord)); + cxxTokenChainTake(g_cxx.pTokenChain,pEnumName); + } else { + pEnumName = cxxTokenCreateAnonymousIdentifier(CXXTagKindENUM); + bAnonymous = true; + CXX_DEBUG_PRINT( + "Enum name is %s (anonymous)", + vStringValue(pEnumName->pszWord) + ); + } + + + tagEntryInfo * tag = cxxTagBegin(CXXTagKindENUM,pEnumName); + + int iCorkQueueIndex = CORK_NIL; + int iCorkQueueIndexFQ = CORK_NIL; + + if(tag) + { + // FIXME: this is debatable + tag->isFileScope = !isInputHeaderFile(); + + if (bAnonymous) + markTagExtraBit (tag, XTAG_ANONYMOUS); + + CXXToken * pTypeName = NULL; + vString * pszProperties = NULL; + + if(pTypeEnd) + { + CXX_DEBUG_ASSERT(pTypeBegin,"Type begin should be also set here"); + pTypeName = cxxTagCheckAndSetTypeField(pTypeBegin,pTypeEnd); + } + + if(bIsScopedEnum) + pszProperties = cxxTagSetProperties(CXXTagPropertyScopedEnum); + + iCorkQueueIndex = cxxTagCommit(&iCorkQueueIndexFQ); + + if (pszProperties) + vStringDelete (pszProperties); + + if(pTypeName) + cxxTokenDestroy(pTypeName); + } + + cxxScopePush(pEnumName,CXXScopeTypeEnum,CXXScopeAccessPublic); + iPushedScopes++; + + vString * pScopeName = cxxScopeGetFullNameAsString(); + + // Special kind of block + for(;;) + { + cxxTokenChainClear(g_cxx.pTokenChain); + + if(!cxxParserParseUpToOneOf( + CXXTokenTypeComma | CXXTokenTypeClosingBracket | CXXTokenTypeEOF, + false + )) + { + CXX_DEBUG_LEAVE_TEXT("Failed to parse enum contents"); + if(pScopeName) + vStringDelete(pScopeName); + return false; + } + + CXXToken * pFirst = cxxTokenChainFirst(g_cxx.pTokenChain); + + // enumerator. + if( + (g_cxx.pTokenChain->iCount > 1) && + cxxTokenTypeIs(pFirst,CXXTokenTypeIdentifier) + ) + { + tag = cxxTagBegin(CXXTagKindENUMERATOR,pFirst); + if(tag) + { + tag->isFileScope = !isInputHeaderFile(); + cxxTagCommit(NULL); + } + } + + if(cxxTokenTypeIsOneOf( + g_cxx.pToken, + CXXTokenTypeEOF | CXXTokenTypeClosingBracket + )) + break; + } + + if(iCorkQueueIndex > CORK_NIL) + { + cxxParserMarkEndLineForTagInCorkQueue(iCorkQueueIndex); + if(iCorkQueueIndexFQ > CORK_NIL) + cxxParserMarkEndLineForTagInCorkQueue(iCorkQueueIndexFQ); + } + + while(iPushedScopes > 0) + { + cxxScopePop(); + iPushedScopes--; + } + + bool bRet = cxxParserParseEnumStructClassOrUnionFullDeclarationTrailer( + uInitialKeywordState, + CXXKeywordENUM, + vStringValue(pScopeName) + ); + + if(pScopeName) + vStringDelete(pScopeName); + + cxxParserNewStatement(); + CXX_DEBUG_LEAVE(); + return bRet; +} + +static bool cxxParserParseClassStructOrUnionInternal( + CXXKeyword eKeyword, + unsigned int uTagKind, + unsigned int uScopeType + ) +{ + CXX_DEBUG_ENTER(); + + unsigned int uInitialKeywordState = g_cxx.uKeywordState; + int iInitialTokenCount = g_cxx.pTokenChain->iCount; + CXXToken * pLastToken = cxxTokenChainLast(g_cxx.pTokenChain); + bool bAnonymous = false; + + /* + Spec is: + class-key attr class-head-name base-clause { member-specification } + + class-key - one of class or struct. The keywords are identical + except for the default member access and the default base class access. + attr(C++11) - optional sequence of any number of attributes, + may include alignas specifier + class-head-name - the name of the class that's being defined. + Optionally qualified, optionally followed by keyword final. + The name may be omitted, in which case the class is unnamed (note + that unnamed class cannot be final) + base-clause - optional list of one or more parent classes and the + model of inheritance used for each (see derived class) + member-specification - list of access specifiers, member object and + member function declarations and definitions (see below) + */ + + // Skip attr and class-head-name + + // enable "final" keyword handling + cxxKeywordEnableFinal(true); + + unsigned int uTerminatorTypes = CXXTokenTypeEOF | CXXTokenTypeSingleColon | + CXXTokenTypeSemicolon | CXXTokenTypeOpeningBracket | + CXXTokenTypeSmallerThanSign | (cxxParserCurrentLanguageIsCPP()? CXXTokenTypeKeyword: 0) | + CXXTokenTypeParenthesisChain; + + if(uTagKind != CXXTagCPPKindCLASS) + uTerminatorTypes |= CXXTokenTypeAssignment; + + bool bRet; + + for(;;) + { + bRet = cxxParserParseUpToOneOf(uTerminatorTypes, false); + + if(!bRet) + { + cxxKeywordEnableFinal(false); + CXX_DEBUG_LEAVE_TEXT("Could not parse class/struct/union name"); + return false; + } + + if(cxxTokenTypeIs(g_cxx.pToken,CXXTokenTypeKeyword)) + { + /* The statement declears or defines an operator, + * not a class, struct not union. */ + if(g_cxx.pToken->eKeyword == CXXKeywordOPERATOR) + return true; + continue; + } + + if( + cxxTokenTypeIs(g_cxx.pToken,CXXTokenTypeParenthesisChain) && + ( + ( + // struct alignas(n) ... + cxxTokenIsKeyword(g_cxx.pToken->pPrev,CXXKeywordALIGNAS) + ) || ( + // things like __builtin_align__(16) + !cxxParserTokenChainLooksLikeFunctionParameterList(g_cxx.pToken->pChain,NULL) + ) + ) + ) + continue; + + if(!cxxTokenTypeIs(g_cxx.pToken,CXXTokenTypeSmallerThanSign)) + break; + + // Probably a template specialisation + if(!cxxParserCurrentLanguageIsCPP()) + { + cxxKeywordEnableFinal(false); + CXX_DEBUG_LEAVE_TEXT("Template specialization in C language?"); + return false; + } + + // template struct X + // { + // } + + if(g_cxx.pTemplateSpecializationTokenChain) + cxxTokenChainDestroy(g_cxx.pTemplateSpecializationTokenChain); + + g_cxx.pTemplateSpecializationTokenChain = cxxParserParseTemplateAngleBracketsToSeparateChain(false); + if(!g_cxx.pTemplateSpecializationTokenChain) + { + cxxKeywordEnableFinal(false); + CXX_DEBUG_LEAVE_TEXT("Could not parse class/struct/union name"); + return false; + } + } + + // Once we reached the terminator, "final" is not a keyword anymore. + cxxKeywordEnableFinal(false); + + if(cxxTokenTypeIs(g_cxx.pToken,CXXTokenTypeParenthesisChain)) + { + if(uInitialKeywordState & CXXParserKeywordStateSeenTypedef) + { + CXX_DEBUG_LEAVE_TEXT("Found parenthesis after typedef: parsing as generic typedef"); + return cxxParserParseGenericTypedef(); + } + + // probably a function declaration/prototype + // something like struct x * func().... + // do not clear statement + CXX_DEBUG_LEAVE_TEXT("Probably a function declaration!"); + return true; + } + + // If we have found a semicolon then we might be in the special case of KnR function + // declaration. This requires at least 5 tokens and has some additional constraints. + // See cxxParserMaybeParseKnRStyleFunctionDefinition() for more informations. + // FIXME: This block is duplicated in enum + if( + // found semicolon + cxxTokenTypeIs(g_cxx.pToken,CXXTokenTypeSemicolon) && + // many tokens before the enum keyword + (iInitialTokenCount > 3) && + // C language + cxxParserCurrentLanguageIsC() && + // global scope + cxxScopeIsGlobal() && + // no typedef + (!(uInitialKeywordState & CXXParserKeywordStateSeenTypedef)) + ) + { + CXX_DEBUG_PRINT("Maybe KnR function definition?"); + + switch(cxxParserMaybeParseKnRStyleFunctionDefinition()) + { + case 1: + // parser moved forward and started a new statement + CXX_DEBUG_LEAVE_TEXT("K&R function definition parser did the job"); + return true; + break; + case 0: + // something else, go ahead + break; + default: + CXX_DEBUG_LEAVE_TEXT("Failed to check for K&R style function definition"); + return false; + break; + } + } + + if(iInitialTokenCount > 1) + cxxParserCleanupEnumStructClassOrUnionPrefixChain(eKeyword,pLastToken); + + if(cxxTokenTypeIs(g_cxx.pToken,CXXTokenTypeSemicolon)) + { + if(g_cxx.pTokenChain->iCount > 3) + { + // [typedef] struct X Y; <-- typedef has been removed! + if(uInitialKeywordState & CXXParserKeywordStateSeenTypedef) + cxxParserExtractTypedef(g_cxx.pTokenChain,true,false); + else if(!(g_cxx.uKeywordState & CXXParserKeywordStateSeenFriend)) + cxxParserExtractVariableDeclarations(g_cxx.pTokenChain,0); + } + + cxxParserNewStatement(); + CXX_DEBUG_LEAVE(); + return true; + } + + if(cxxTokenTypeIs(g_cxx.pToken,CXXTokenTypeAssignment)) + { + // struct X Y = ...; + bool bCanExtractVariables = g_cxx.pTokenChain->iCount > 3; + + // Skip the initialization (which almost certainly contains a block) + if(!cxxParserParseUpToOneOf(CXXTokenTypeEOF | CXXTokenTypeSemicolon, false)) + { + CXX_DEBUG_LEAVE_TEXT("Failed to parse up to EOF/semicolon"); + return false; + } + + if(bCanExtractVariables) + cxxParserExtractVariableDeclarations(g_cxx.pTokenChain,0); + + cxxParserNewStatement(); + CXX_DEBUG_LEAVE(); + return true; + } + + if(cxxTokenTypeIs(g_cxx.pToken,CXXTokenTypeEOF)) + { + // tolerate EOF, just ignore this + cxxParserNewStatement(); + CXX_DEBUG_LEAVE_TEXT("EOF: ignoring"); + return true; + } + + // semicolon or opening bracket + + // check if we can extract a class name identifier + CXXToken * pClassName = cxxTokenChainLastTokenOfType( + g_cxx.pTokenChain, + CXXTokenTypeIdentifier + ); + + // If no identifier has been found we can try some fallbacks. + if(!pClassName) + { + // If we're in C++ mode, but C++ language hasn't been confirmed yet, + // and there is a C++ specific keyword just before the terminator we found + // then we'll try to use it as class/struct/union name. + if( + cxxParserCurrentLanguageIsCPP() && + (!g_cxx.bConfirmedCPPLanguage) && + (eKeyword != CXXKeywordCLASS) && + (g_cxx.pTokenChain->iCount >= 3) && + cxxTokenTypeIs(g_cxx.pToken->pPrev,CXXTokenTypeKeyword) && + cxxKeywordIsCPPSpecific(g_cxx.pToken->pPrev->eKeyword) + ) + { + pClassName = g_cxx.pToken->pPrev; + pClassName->eType = CXXTokenTypeIdentifier; + CXX_DEBUG_PRINT( + "Found no class/struct/union name identifier but there is '%s' which might look good", + vStringValue(pClassName->pszWord) + ); + } + } + + int iPushedScopes = 0; + + if(pClassName) + { + // good. + // It may be qualified though. + CXXToken * pNamespaceBegin = pClassName; + CXXToken * pPrev = pClassName->pPrev; + while(pPrev) + { + if(!cxxTokenTypeIs(pPrev,CXXTokenTypeMultipleColons)) + break; + pPrev = pPrev->pPrev; + if(!pPrev) + break; + if(!cxxTokenTypeIs(pPrev,CXXTokenTypeIdentifier)) + break; + pNamespaceBegin = pPrev; + pPrev = pPrev->pPrev; + } + + while(pNamespaceBegin != pClassName) + { + CXXToken * pNext = pNamespaceBegin->pNext; + cxxTokenChainTake(g_cxx.pTokenChain,pNamespaceBegin); + if(cxxParserCurrentLanguageIsCPP()) + { + // FIXME: We don't really know if it's a class! + cxxScopePush(pNamespaceBegin,CXXScopeTypeClass,CXXScopeAccessUnknown); + iPushedScopes++; + } else { + // it's a syntax error, but be tolerant + cxxTokenDestroy(pNamespaceBegin); + } + pNamespaceBegin = pNext->pNext; + } + + CXX_DEBUG_PRINT( + "Class/struct/union name is %s", + vStringValue(pClassName->pszWord) + ); + cxxTokenChainTake(g_cxx.pTokenChain,pClassName); + } else { + pClassName = cxxTokenCreateAnonymousIdentifier(uTagKind); + bAnonymous = true; + CXX_DEBUG_PRINT( + "Class/struct/union name is %s (anonymous)", + vStringValue(pClassName->pszWord) + ); + } + + if(cxxTokenTypeIs(g_cxx.pToken,CXXTokenTypeSingleColon)) + { + // check for base classes + cxxTokenChainClear(g_cxx.pTokenChain); + + if(!cxxParserParseUpToOneOf( + CXXTokenTypeEOF | CXXTokenTypeSemicolon | CXXTokenTypeOpeningBracket, + false + )) + { + cxxTokenDestroy(pClassName); + CXX_DEBUG_LEAVE_TEXT("Failed to parse base class part"); + return false; + } + + if(cxxTokenTypeIsOneOf(g_cxx.pToken,CXXTokenTypeSemicolon | CXXTokenTypeEOF)) + { + cxxTokenDestroy(pClassName); + cxxParserNewStatement(); + CXX_DEBUG_LEAVE_TEXT("Syntax error: ignoring"); + return true; + } + + cxxTokenChainDestroyLast(g_cxx.pTokenChain); // remove the { + } else { + cxxTokenChainClear(g_cxx.pTokenChain); + } + + // OK. This seems to be a valid class/struct/union declaration. + + if( + (uTagKind == CXXTagCPPKindCLASS) && + (!g_cxx.bConfirmedCPPLanguage) + ) + { + CXX_DEBUG_PRINT("Succeeded in parsing a C++ class: this really seems to be C++"); + g_cxx.bConfirmedCPPLanguage = true; + } + + tagEntryInfo * tag = cxxTagBegin(uTagKind,pClassName); + + int iCorkQueueIndex = CORK_NIL; + int iCorkQueueIndexFQ = CORK_NIL; + + bool bGotTemplate = g_cxx.pTemplateTokenChain && + (g_cxx.pTemplateTokenChain->iCount > 0) && + cxxParserCurrentLanguageIsCPP(); + + if(tag) + { + if (bAnonymous) + markTagExtraBit (tag, XTAG_ANONYMOUS); + + if(g_cxx.pTokenChain->iCount > 0) + { + // Strip inheritance type information + // FIXME: This could be optional! + + CXXToken * t = cxxTokenChainFirst(g_cxx.pTokenChain); + while(t) + { + if( + cxxTokenTypeIs(t,CXXTokenTypeKeyword) && + ( + (t->eKeyword == CXXKeywordPUBLIC) || + (t->eKeyword == CXXKeywordPROTECTED) || + (t->eKeyword == CXXKeywordPRIVATE) || + (t->eKeyword == CXXKeywordVIRTUAL) + ) + ) + { + CXXToken * pNext = t->pNext; + cxxTokenChainTake(g_cxx.pTokenChain,t); + cxxTokenDestroy(t); + t = pNext; + } else { + t = t->pNext; + } + } + + if(g_cxx.pTokenChain->iCount > 0) + { + cxxTokenChainCondense( + g_cxx.pTokenChain, + CXXTokenChainCondenseNoTrailingSpaces + ); + tag->extensionFields.inheritance = vStringValue( + g_cxx.pTokenChain->pHead->pszWord + ); + } + } + + if(bGotTemplate) + cxxTagHandleTemplateFields(); + + tag->isFileScope = !isInputHeaderFile(); + + iCorkQueueIndex = cxxTagCommit(&iCorkQueueIndexFQ); + + } + + cxxScopePush( + pClassName, + uScopeType, + (uTagKind == CXXTagCPPKindCLASS) ? + CXXScopeAccessPrivate : CXXScopeAccessPublic + ); + + if( + bGotTemplate && + cxxTagKindEnabled(CXXTagCPPKindTEMPLATEPARAM) + ) + cxxParserEmitTemplateParameterTags(); + + vString * pScopeName = cxxScopeGetFullNameAsString(); + + if(!cxxParserParseBlock(true)) + { + CXX_DEBUG_LEAVE_TEXT("Failed to parse scope"); + if(pScopeName) + vStringDelete(pScopeName); + return false; + } + + if(iCorkQueueIndex > CORK_NIL) + { + cxxParserMarkEndLineForTagInCorkQueue(iCorkQueueIndex); + if(iCorkQueueIndexFQ > CORK_NIL) + cxxParserMarkEndLineForTagInCorkQueue(iCorkQueueIndexFQ); + } + + iPushedScopes++; + while(iPushedScopes > 0) + { + cxxScopePop(); + iPushedScopes--; + } + + bRet = cxxParserParseEnumStructClassOrUnionFullDeclarationTrailer( + uInitialKeywordState, + eKeyword, + vStringValue(pScopeName) + ); + + if(pScopeName) + vStringDelete(pScopeName); + + cxxParserNewStatement(); + CXX_DEBUG_LEAVE(); + return bRet; +} + +bool cxxParserParseClassStructOrUnion( + CXXKeyword eKeyword, + unsigned int uTagKind, + unsigned int uScopeType + ) +{ + // Trick for "smart" handling of public/protected/private keywords in .h files parsed as C++. + // See the declaration of cxxKeywordEnablePublicProtectedPrivate for more info. + + // Enable public/protected/private keywords and save the previous state + bool bEnablePublicProtectedPrivateKeywords = cxxKeywordEnablePublicProtectedPrivate(true); + + bool bRet = cxxParserParseClassStructOrUnionInternal(eKeyword,uTagKind,uScopeType); + + // If parsing succeeded, we're in C++ mode and the keyword is "class" then + // we're fairly certain that the source code is *really* C++. + if(g_cxx.bConfirmedCPPLanguage) + bEnablePublicProtectedPrivateKeywords = true; // leave it on for good: we're (almost) sure it's C++ + + cxxKeywordEnablePublicProtectedPrivate(bEnablePublicProtectedPrivateKeywords); + + return bRet; +} + + +// +// This is called at block level, upon encountering a semicolon, an unbalanced +// closing bracket or EOF.The current token is something like: +// static const char * variable; +// int i = .... +// const QString & function(whatever) const; +// QString szText("ascii"); +// QString(...) +// +// Notable facts: +// - several special statements never end up here: this includes class, +// struct, union, enum, namespace, typedef, case, try, catch and other +// similar stuff. +// - the terminator is always at the end. It's either a semicolon, a closing +// bracket or an EOF +// - the parentheses and brackets are always condensed in subchains +// (unless unbalanced). +// +// int __attribute__() function(); +// | | +// ("whatever") (int var1,type var2) +// +// const char * strings[] = {} +// | | +// [10] { "string","string",.... } +// +// This function tries to extract variable declarations and function prototypes. +// +// Yes, it's complex: it's because C/C++ is complex. +// +void cxxParserAnalyzeOtherStatement(void) +{ + CXX_DEBUG_ENTER(); + +#ifdef CXX_DO_DEBUGGING + vString * pChain = cxxTokenChainJoin(g_cxx.pTokenChain,NULL,0); + CXX_DEBUG_PRINT("Analyzing statement '%s'",vStringValue(pChain)); + vStringDelete(pChain); +#endif + + CXX_DEBUG_ASSERT( + g_cxx.pTokenChain->iCount > 0, + "There should be at least the terminator here!" + ); + + if(g_cxx.pTokenChain->iCount < 2) + { + CXX_DEBUG_LEAVE_TEXT("Empty statement"); + return; + } + + if(g_cxx.uKeywordState & CXXParserKeywordStateSeenReturn) + { + CXX_DEBUG_LEAVE_TEXT("Statement after a return is not interesting"); + return; + } + + // Everything we can make sense of starts with an identifier or keyword. + // This is usually a type name (eventually decorated by some attributes + // and modifiers) with the notable exception of constructor/destructor + // declarations (which are still identifiers tho). + + CXXToken * t = cxxTokenChainFirst(g_cxx.pTokenChain); + + if(!cxxTokenTypeIsOneOf(t,CXXTokenTypeIdentifier | CXXTokenTypeKeyword)) + { + CXX_DEBUG_LEAVE_TEXT("Statement does not start with an identifier or keyword"); + return; + } + + enum CXXScopeType eScopeType = cxxScopeGetType(); + + CXXFunctionSignatureInfo oInfo; + + // kinda looks like a function or variable instantiation... maybe + if(eScopeType == CXXScopeTypeFunction) + { + // prefer variable declarations. + // if none found then try function prototype + if(cxxParserExtractVariableDeclarations(g_cxx.pTokenChain,0)) + { + CXX_DEBUG_LEAVE_TEXT("Found variable declarations"); + return; + } + + // FIXME: This *COULD* work but we should first rule out the possibility + // of simple function calls like func(a). The function signature search + // should be far stricter here. + + //if(cxxParserLookForFunctionSignature(g_cxx.pTokenChain,&oInfo,NULL)) + // cxxParserEmitFunctionTags(&oInfo,CXXTagKindPROTOTYPE,0); + + CXX_DEBUG_LEAVE(); + return; + } + + // prefer function. + CXXTypedVariableSet oParamInfo; + const bool bPrototypeParams = cxxTagKindEnabled(CXXTagKindPROTOTYPE) && cxxTagKindEnabled(CXXTagKindPARAMETER); +check_function_signature: + + if(cxxParserLookForFunctionSignature(g_cxx.pTokenChain,&oInfo,bPrototypeParams?&oParamInfo:NULL)) + { + CXX_DEBUG_PRINT("Found function prototype"); + + if(g_cxx.uKeywordState & CXXParserKeywordStateSeenFriend) + { + // class X { + // friend void aFunction(); + // }; + // 'aFunction' is NOT X::aFunction() and in complex cases we can't figure + // out its proper scope. Better avoid emitting this one. + CXX_DEBUG_PRINT("But it has been preceded by the 'friend' keyword: this is not a real prototype"); + } else { + int iCorkQueueIndex, iCorkQueueIndexFQ; + int iScopesPushed = cxxParserEmitFunctionTags(&oInfo,CXXTagKindPROTOTYPE,CXXEmitFunctionTagsPushScopes,&iCorkQueueIndex,&iCorkQueueIndexFQ); + if (iCorkQueueIndex != CORK_NIL) + { + CXXToken * t = cxxTokenChainLast(g_cxx.pTokenChain); + cxxParserSetEndLineForTagInCorkQueue (iCorkQueueIndex, t->iLineNumber); + if (iCorkQueueIndexFQ != CORK_NIL) + cxxParserSetEndLineForTagInCorkQueue (iCorkQueueIndexFQ, t->iLineNumber); + } + + if(bPrototypeParams) + cxxParserEmitFunctionParameterTags(&oParamInfo); + + while(iScopesPushed > 0) + { + cxxScopePop(); + iScopesPushed--; + } + } + + if(oInfo.pTrailingComma) + { + // got a trailing comma after the function signature. + // This might be a special case of multiple prototypes in a single declaration. + // + // RetType functionA(...), functionB(...), functionC(...); + // + // Let's try to extract also the other declarations. + // + // We cannot rely on oInfo.pIdentifierStart after cxxParserEmitFunctionTags() + // since it has been removed. Manually skip the initial type name. + + CXXToken * pBegin = cxxTokenChainFirstTokenNotOfType( + g_cxx.pTokenChain, + CXXTokenTypeIdentifier | CXXTokenTypeKeyword + ); + + CXX_DEBUG_ASSERT(pBegin,"We should have found a begin token here!"); + cxxTokenChainDestroyRange(g_cxx.pTokenChain,pBegin,oInfo.pTrailingComma); + goto check_function_signature; + } + + CXX_DEBUG_LEAVE(); + return; + } + + if( + g_cxx.uKeywordState & + ( + // Note that since C++-17 inline can be used as a modifier for variables + // so don't be tempted to put it here. + CXXParserKeywordStateSeenExplicit | + CXXParserKeywordStateSeenOperator | CXXParserKeywordStateSeenVirtual + ) + ) + { + // must be function! + CXX_DEBUG_LEAVE_TEXT( + "WARNING: Was expecting to find a function prototype " \ + "but did not find one" + ); + return; + } + + cxxParserExtractVariableDeclarations(g_cxx.pTokenChain,0); + CXX_DEBUG_LEAVE_TEXT("Nothing else"); +} + + +// This is called when we encounter a "public", "protected" or "private" keyword +// that is NOT in the class declaration header line. +bool cxxParserParseAccessSpecifier(void) +{ + CXX_DEBUG_ENTER(); + + CXX_DEBUG_ASSERT( + cxxTokenTypeIs(g_cxx.pToken,CXXTokenTypeKeyword) && + ( + (g_cxx.pToken->eKeyword == CXXKeywordPUBLIC) || + (g_cxx.pToken->eKeyword == CXXKeywordPROTECTED) || + (g_cxx.pToken->eKeyword == CXXKeywordPRIVATE) + ), + "This must be called just after parsing public/protected/private" + ); + + unsigned int uExtraType = 0; + + enum CXXScopeType eScopeType = cxxScopeGetType(); + + static ptrArray *pSubparsers; + if (!pSubparsers) + { + pSubparsers = ptrArrayNew(NULL); + DEFAULT_TRASH_BOX (pSubparsers, ptrArrayDelete); + } + + if( + (eScopeType != CXXScopeTypeClass) && + (eScopeType != CXXScopeTypeUnion) && + (eScopeType != CXXScopeTypeStruct) + ) + { + CXX_DEBUG_LEAVE_TEXT( + "Access specified in wrong context (%d)", + eScopeType + ); + + if(!g_cxx.bConfirmedCPPLanguage) + { + CXX_DEBUG_LEAVE_TEXT("C++ is not confirmed and the scope is not right: likely not access specifier"); + g_cxx.pToken->eType = CXXTokenTypeIdentifier; + return true; + } + + // this is a syntax error: we're in the wrong scope. + CXX_DEBUG_LEAVE_TEXT("C++ language is confirmed: bailing out to avoid reporting broken structure"); + return false; + } + + if(!g_cxx.bConfirmedCPPLanguage) + { + if(g_cxx.pToken->pPrev) + { + // ugly, there is something before the public/private/protected keyword. + // This is likely a type or something else. + CXX_DEBUG_LEAVE_TEXT( + "C++ is not confirmed and there is something before: likely not access specifier" + ); + g_cxx.pToken->eType = CXXTokenTypeIdentifier; + return true; + } + } + + if (cxxSubparserNotifyParseAccessSpecifier (pSubparsers)) + uExtraType = CXXTokenTypeIdentifier; + + CXXToken * pInitialToken = g_cxx.pToken; + + // skip to the next :, without leaving scope. + findColon: + if(!cxxParserParseUpToOneOf( + uExtraType | + CXXTokenTypeSingleColon | CXXTokenTypeSemicolon | + CXXTokenTypeClosingBracket | CXXTokenTypeEOF, + false + )) + { + CXX_DEBUG_LEAVE_TEXT("Failed to parse up to the next ;"); + ptrArrayClear (pSubparsers); + return false; + } + + if (uExtraType && cxxTokenTypeIs(g_cxx.pToken,CXXTokenTypeIdentifier)) + { + cxxSubparserNotifyfoundExtraIdentifierAsAccessSpecifier (pSubparsers, + g_cxx.pToken); + goto findColon; + } + + if(cxxTokenTypeIs(g_cxx.pToken,CXXTokenTypeSingleColon)) + { + if(!pInitialToken->pPrev) + { + CXX_DEBUG_PRINT("The access specifier was the first token and I have found a colon: this is C++"); + g_cxx.bConfirmedCPPLanguage = true; + } + } + + switch(pInitialToken->eKeyword) + { + case CXXKeywordPUBLIC: + cxxScopeSetAccess(CXXScopeAccessPublic); + break; + case CXXKeywordPRIVATE: + cxxScopeSetAccess(CXXScopeAccessPrivate); + break; + case CXXKeywordPROTECTED: + cxxScopeSetAccess(CXXScopeAccessProtected); + break; + default: + CXX_DEBUG_ASSERT(false,"Bad keyword in cxxParserParseAccessSpecifier!"); + break; + } + + cxxTokenChainClear(g_cxx.pTokenChain); + ptrArrayClear (pSubparsers); + CXX_DEBUG_LEAVE(); + return true; +} + +bool cxxParserParseIfForWhileSwitchCatchParenthesis(void) +{ + CXX_DEBUG_ENTER(); + + CXX_DEBUG_ASSERT( + cxxTokenTypeIs(g_cxx.pToken,CXXTokenTypeKeyword), + "This function should be called only after encountering one of the keywords" + ); + + CXXKeyword eKeyword = g_cxx.pToken->eKeyword; + + if(!cxxParserParseUpToOneOf( + CXXTokenTypeParenthesisChain | CXXTokenTypeSemicolon | + CXXTokenTypeOpeningBracket | CXXTokenTypeEOF, + false + )) + { + CXX_DEBUG_LEAVE_TEXT("Failed to parse if/for/while/switch/catch up to parenthesis"); + return false; + } + + if(cxxTokenTypeIsOneOf( + g_cxx.pToken, + CXXTokenTypeEOF | CXXTokenTypeSemicolon | CXXTokenTypeOpeningBracket + )) + { + CXX_DEBUG_LEAVE_TEXT( + "Found EOF/semicolon/opening bracket while parsing if/for/while/switch/catch" + ); + return true; + } + + CXX_DEBUG_ASSERT( + cxxTokenTypeIs(g_cxx.pToken,CXXTokenTypeParenthesisChain), + "Expected a parenthesis chain here" + ); + + CXX_DEBUG_PRINT("Found if/for/while/switch/catch parenthesis chain"); + + // Extract variables from the parenthesis chain + + CXXTokenChain * pChain = g_cxx.pToken->pChain; + + CXX_DEBUG_ASSERT( + pChain->iCount >= 2, + "The parenthesis chain must have initial and final parenthesis" + ); + + // There are several constructs that can fool the parser here. + // + // The most frequent problems arise with + // + // if(a & b ...) + // if(a * b ...) + // if(a && b ...) + // + // which may or may not be variable declarations, depending on the + // meaning of identifier a. + // Other problems involve balanced operator that resemble templates: + // + // if(a < b || c > d ...) + // + // Here we attempt to rule out these special cases. + + // First try the easy "inclusive" cases. + + // catch() always contains variable declarations + + bool bOkToExtractVariables = eKeyword == CXXKeywordCATCH; + + if(!bOkToExtractVariables) + { + // Another easy one: try parenthesis contents that start with a keyword. + // + // if(const std::exception & e) + // if(int i ... + // + bOkToExtractVariables = cxxTokenTypeIs( + cxxTokenChainAt(pChain,1), + CXXTokenTypeKeyword + ); + + if(!bOkToExtractVariables) + { + // If there is &, && or * then we expect there to be also a = or + // a semicolon that comes after it. + // This is not 100% foolproof but works most of the times. + + CXXToken * pToken = cxxTokenChainFirstTokenOfType( + pChain, + CXXTokenTypeAnd | CXXTokenTypeMultipleAnds | CXXTokenTypeStar | + CXXTokenTypeSmallerThanSign | + CXXTokenTypeAssignment | CXXTokenTypeSemicolon + ); + + if(pToken) + { + switch(pToken->eType) + { + case CXXTokenTypeAnd: + case CXXTokenTypeMultipleAnds: + case CXXTokenTypeStar: + case CXXTokenTypeSmallerThanSign: + // troublesome cases. + // Require an assignment or a semicolon to follow + bOkToExtractVariables = (cxxTokenChainFirstTokenOfType( + pChain, + CXXTokenTypeAssignment | CXXTokenTypeSemicolon + ) ? true : false); // ternary ?: needed because of MSVC + break; + case CXXTokenTypeAssignment: + case CXXTokenTypeSemicolon: + // looks ok + bOkToExtractVariables = true; + break; + default: + // should NOT happen! + CXX_DEBUG_ASSERT(false,"Unexpected token type"); + break; + } + } else { + // looks ok + bOkToExtractVariables = true; + } + } + } + + if(bOkToExtractVariables) + { + // Kill the initial parenthesis + cxxTokenChainDestroyFirst(pChain); + // Fake the final semicolon + CXXToken * t = cxxTokenChainLast(pChain); + t->eType = CXXTokenTypeSemicolon; + vStringClear(t->pszWord); + vStringPut(t->pszWord,';'); + + // and extract variable declarations if possible + cxxParserExtractVariableDeclarations(pChain,0); + } + + CXX_DEBUG_LEAVE(); + return true; +} + +static rescanReason cxxParserMain(const unsigned int passCount) +{ + cxxScopeClear(); + cxxTokenAPINewFile(); + cxxParserNewStatement(); + + int kind_for_define = CXXTagKindMACRO; + int kind_for_header = CXXTagKindINCLUDE; + int kind_for_macro_param = CXXTagKindMACROPARAM; + int role_for_macro_undef = CR_MACRO_UNDEF; + int role_for_header_system = CR_HEADER_SYSTEM; + int role_for_header_local = CR_HEADER_LOCAL; + + Assert(passCount < 3); + + cppInit( + (bool) (passCount > 1), + false, + true, // raw literals + false, + kind_for_define, + role_for_macro_undef, + kind_for_macro_param, + kind_for_header, + role_for_header_system, + role_for_header_local, + g_cxx.pFieldOptions[CXXTagFieldMacrodef].ftype + ); + + g_cxx.iChar = ' '; + + g_cxx.iNestingLevels = 0; + + bool bRet = cxxParserParseBlock(false); + + cppTerminate (); + + // Shut up coveralls: LCOV_EXCL_START + cxxTokenChainClear(g_cxx.pTokenChain); + if(g_cxx.pTemplateTokenChain) + cxxTokenChainClear(g_cxx.pTemplateTokenChain); + if(g_cxx.pTemplateSpecializationTokenChain) + cxxTokenChainClear(g_cxx.pTemplateSpecializationTokenChain); + // Restart coveralls: LCOV_EXCL_END + + if(!bRet && (passCount == 1)) + { + CXX_DEBUG_PRINT("Processing failed: trying to rescan"); + return RESCAN_FAILED; + } + + return RESCAN_NONE; +} + +rescanReason cxxCParserMain(const unsigned int passCount) +{ + CXX_DEBUG_ENTER(); + cxxTagInitForLanguage(g_cxx.eCLangType); + + g_cxx.bConfirmedCPPLanguage = false; + cxxKeywordEnablePublicProtectedPrivate(false); + + rescanReason r = cxxParserMain(passCount); + CXX_DEBUG_LEAVE(); + return r; +} + +rescanReason cxxCUDAParserMain(const unsigned int passCount) +{ + CXX_DEBUG_ENTER(); + cxxTagInitForLanguage(g_cxx.eCUDALangType); + + // CUDA is C. + g_cxx.bConfirmedCPPLanguage = false; + cxxKeywordEnablePublicProtectedPrivate(false); + + rescanReason r = cxxParserMain(passCount); + CXX_DEBUG_LEAVE(); + return r; +} + +rescanReason cxxCppParserMain(const unsigned int passCount) +{ + CXX_DEBUG_ENTER(); + cxxTagInitForLanguage(g_cxx.eCPPLangType); + + // In header files we disable processing of public/protected/private keywords + // until we either figure out that this is really C++ or we're start parsing + // a struct/union. + g_cxx.bConfirmedCPPLanguage = !isInputHeaderFile(); + cxxKeywordEnablePublicProtectedPrivate(g_cxx.bConfirmedCPPLanguage); + + rescanReason r = cxxParserMain(passCount); + CXX_DEBUG_LEAVE(); + return r; +} + +static void cxxParserFirstInit(void) +{ + memset(&g_cxx,0,sizeof(CXXParserState)); + + g_cxx.eCLangType = -1; + g_cxx.eCPPLangType = -1; + g_cxx.eCUDALangType = -1; + + cxxTokenAPIInit(); + + g_cxx.pTokenChain = cxxTokenChainCreate(); + + cxxScopeInit(); + + g_bFirstRun = false; +} + +void cxxCUDAParserInitialize(const langType language) +{ + CXX_DEBUG_PRINT("Parser initialize for language CUDA"); + if(g_bFirstRun) + cxxParserFirstInit(); + + g_cxx.eCUDALangType = language; + + cxxBuildKeywordHash(language,CXXLanguageCUDA); +} + +void cxxCppParserInitialize(const langType language) +{ + CXX_DEBUG_PRINT("Parser initialize for language C++"); + if(g_bFirstRun) + cxxParserFirstInit(); + + g_cxx.eCPPLangType = language; + + cxxBuildKeywordHash(language,CXXLanguageCPP); +} + +void cxxCParserInitialize(const langType language) +{ + CXX_DEBUG_PRINT("Parser initialize for language C"); + if(g_bFirstRun) + cxxParserFirstInit(); + + g_cxx.eCLangType = language; + + cxxBuildKeywordHash(language,CXXLanguageC); +} + +void cxxParserCleanup(langType language CTAGS_ATTR_UNUSED,bool initialized CTAGS_ATTR_UNUSED) +{ + if(g_bFirstRun) + return; // didn't run at all + + // This function is used as finalizer for all the sub-language parsers. + // The next line forces this function to be called only once + g_bFirstRun = true; + + // Shut up coveralls: LCOV_EXCL_START + if(g_cxx.pUngetToken) + cxxTokenDestroy(g_cxx.pUngetToken); + if(g_cxx.pTokenChain) + cxxTokenChainDestroy(g_cxx.pTokenChain); + if(g_cxx.pTemplateTokenChain) + cxxTokenChainDestroy(g_cxx.pTemplateTokenChain); + if(g_cxx.pTemplateSpecializationTokenChain) + cxxTokenChainDestroy(g_cxx.pTemplateSpecializationTokenChain); + // Restart coveralls: LCOV_EXCL_END + + cxxScopeDone(); + + cxxTokenAPIDone(); +} diff --git a/ctags/parsers/cxx/cxx_parser.h b/ctags/parsers/cxx/cxx_parser.h new file mode 100644 index 0000000000..58bb910a0f --- /dev/null +++ b/ctags/parsers/cxx/cxx_parser.h @@ -0,0 +1,27 @@ +#ifndef ctags_cxx_parser_h_ +#define ctags_cxx_parser_h_ +/* +* Copyright (c) 2016, Szymon Tomasz Stefanek +* +* This source code is released for free distribution under the terms of the +* GNU General Public License version 2 or (at your option) any later version. +* +* This module contains functions for parsing and scanning C++ source files +*/ + +#include "general.h" + +#include "parse.h" + +// public parser api +rescanReason cxxCParserMain(const unsigned int passCount); +rescanReason cxxCppParserMain(const unsigned int passCount); +rescanReason cxxCUDAParserMain(const unsigned int passCount); + +void cxxCParserInitialize(const langType language); +void cxxCppParserInitialize(const langType language); +void cxxCUDAParserInitialize(const langType language); + +void cxxParserCleanup(langType language, bool initialized); + +#endif //!ctags_cxx_parser_h_ \ No newline at end of file diff --git a/ctags/parsers/cxx/cxx_parser_block.c b/ctags/parsers/cxx/cxx_parser_block.c new file mode 100644 index 0000000000..714026d216 --- /dev/null +++ b/ctags/parsers/cxx/cxx_parser_block.c @@ -0,0 +1,804 @@ +/* +* Copyright (c) 2016, Szymon Tomasz Stefanek +* +* This source code is released for free distribution under the terms of the +* GNU General Public License version 2 or (at your option) any later version. +* +* This module contains functions for parsing and scanning C++ source files +*/ +#include "cxx_parser.h" +#include "cxx_parser_internal.h" + +#include "cxx_debug.h" +#include "cxx_keyword.h" +#include "cxx_token.h" +#include "cxx_token_chain.h" +#include "cxx_scope.h" +#include "cxx_tag.h" + +#include "parse.h" +#include "vstring.h" +#include "../cpreprocessor.h" +#include "debug.h" +#include "keyword.h" +#include "read.h" + +#include "cxx_subparser_internal.h" + +#include + +bool cxxParserParseBlockHandleOpeningBracket(void) +{ + CXX_DEBUG_ENTER(); + + CXX_DEBUG_ASSERT( + g_cxx.pToken->eType == CXXTokenTypeOpeningBracket, + "This must be called when pointing at an opening bracket!" + ); + + enum CXXScopeType eScopeType = cxxScopeGetType(); + bool bIsCPP = cxxParserCurrentLanguageIsCPP(); + CXXToken * pAux; + + if( + ( + // something = {...} + (g_cxx.pToken->pPrev) && + cxxTokenTypeIs(g_cxx.pToken->pPrev,CXXTokenTypeAssignment) && + ( + (eScopeType == CXXScopeTypeFunction) || + (eScopeType == CXXScopeTypeClass) || + (eScopeType == CXXScopeTypeStruct) || + (eScopeType == CXXScopeTypeUnion) || + (eScopeType == CXXScopeTypeNamespace) + ) + ) || ( + bIsCPP && + (g_cxx.pToken->pPrev) && + ( + ( + // T { arg1, arg2, ... } (1) + // T object { arg1, arg2, ... } (2) + // new T { arg1, arg2, ... } (3) + // Class::Class() : member { arg1, arg2, ... } { (4) + cxxTokenTypeIs(g_cxx.pToken->pPrev,CXXTokenTypeIdentifier) && + ( + // case 1 + (!g_cxx.pToken->pPrev->pPrev) || + // case 4 + cxxTokenTypeIsOneOf( + g_cxx.pToken->pPrev->pPrev, + CXXTokenTypeSingleColon | CXXTokenTypeComma + ) || + // cases 1,2,3 but not 4 + ( + // more parts of typename or maybe the "new" keyword before the identifier + cxxTokenTypeIsOneOf( + g_cxx.pToken->pPrev->pPrev, + CXXTokenTypeIdentifier | CXXTokenTypeStar | CXXTokenTypeAnd | + CXXTokenTypeGreaterThanSign | CXXTokenTypeKeyword + ) && + // but no parenthesis (discard things like bool test() Q_DECL_NO_THROW { ... }) + (!(pAux = cxxTokenChainPreviousTokenOfType( + g_cxx.pToken->pPrev->pPrev, + CXXTokenTypeParenthesisChain + )) + ) + ) + ) && + // "override" is handled as identifier since it's a keyword only after function signatures + (strcmp(vStringValue(g_cxx.pToken->pPrev->pszWord),"override") != 0) + ) || ( + // type var[][][]..[] { ... } + // (but not '[] { ... }' which is a parameterless lambda) + cxxTokenTypeIs(g_cxx.pToken->pPrev,CXXTokenTypeSquareParenthesisChain) && + ( + pAux = cxxTokenChainPreviousTokenNotOfType( + g_cxx.pToken->pPrev, + CXXTokenTypeSquareParenthesisChain + ) + ) && + cxxTokenTypeIs(pAux,CXXTokenTypeIdentifier) + ) + ) + ) || ( + // return { } + (!g_cxx.pToken->pPrev) && + (g_cxx.uKeywordState & CXXParserKeywordStateSeenReturn) + ) + ) + { + // array or list-like initialisation + bool bRet = cxxParserParseAndCondenseCurrentSubchain( + CXXTokenTypeOpeningBracket | CXXTokenTypeOpeningParenthesis | + CXXTokenTypeOpeningSquareParenthesis, + false, + true + ); + + CXX_DEBUG_LEAVE_TEXT("Handled array or list-like initialisation or return"); + return bRet; + } + + // In C++ mode check for lambdas + CXXToken * pParenthesis; + + if( + bIsCPP && + (pParenthesis = cxxParserOpeningBracketIsLambda()) + ) + { + if(!cxxParserHandleLambda(pParenthesis)) + { + CXX_DEBUG_LEAVE_TEXT("Lambda handling failed"); + return false; + } + + // Note that here we're leaving the token chain "alive" so further parsing can be performed. + CXX_DEBUG_LEAVE_TEXT("Lambda handling succeeded"); + return true; + } + + int iScopes; + int iCorkQueueIndex = CORK_NIL; + int iCorkQueueIndexFQ = CORK_NIL; + + CXXFunctionSignatureInfo oInfo; + + if(eScopeType != CXXScopeTypeFunction) + { + // very likely a function definition + // (but may be also a toplevel block, like "extern "C" { ... }") + iScopes = cxxParserExtractFunctionSignatureBeforeOpeningBracket(&oInfo,&iCorkQueueIndex,&iCorkQueueIndexFQ); + + // FIXME: Handle syntax (5) of list initialization: + // Class::Class() : member { arg1, arg2, ... } {... + } else { + // some kind of other block: + // - anonymous block + // - block after for(),while(),foreach(),if() and other similar stuff + // (note that {}-style initializers have been handled above and thus are excluded) + + iScopes = 0; + } + + cxxParserNewStatement(); + + if(!cxxParserParseBlock(true)) + { + CXX_DEBUG_LEAVE_TEXT("Failed to parse nested block"); + return false; + } + + if(iScopes < 1) + { + CXX_DEBUG_LEAVE_TEXT("The block was not a function"); + return true; + } + + unsigned long uEndPosition = getInputLineNumber(); + + // If the function contained a "try" keyword before the opening bracket + // then it's likely to be a function-try-block and should be followed by a catch + // block that is in the same scope. + + if(oInfo.uFlags & CXXFunctionSignatureInfoFunctionTryBlock) + { + // look for the catch blocks. + CXX_DEBUG_PRINT("The function is a function-try-block: looking for catch blocks"); + + for(;;) + { + CXX_DEBUG_PRINT("Looking ahead for a catch block..."); + + if(!cxxParserParseNextToken()) + break; // EOF + + if(!cxxTokenIsKeyword(g_cxx.pToken,CXXKeywordCATCH)) + { + // No more catches. Unget and exit. + CXX_DEBUG_PRINT("No more catch blocks"); + cxxParserUngetCurrentToken(); + break; + } + + // assume it's a catch block. + + CXX_DEBUG_PRINT("Found catch block"); + + if(!cxxParserParseIfForWhileSwitchCatchParenthesis()) + { + CXX_DEBUG_LEAVE_TEXT("Failed to parse the catch parenthesis"); + return false; + } + + // the standard requires a bracket here (catch block is always a compound statement). + + cxxParserNewStatement(); + + if(!cxxParserParseNextToken()) + { + CXX_DEBUG_LEAVE_TEXT("Found EOF while looking for catch() block: playing nice"); + break; // EOF (would be a syntax error!) + } + + if(!cxxTokenTypeIs(g_cxx.pToken,CXXTokenTypeOpeningBracket)) + { + // Aargh... + CXX_DEBUG_LEAVE_TEXT("Found something unexpected while looking for catch() block: playing nice"); + cxxParserUngetCurrentToken(); + break; // (would be a syntax error!) + } + + if(!cxxParserParseBlock(true)) + return false; + + uEndPosition = getInputLineNumber(); + } + } + + if(iCorkQueueIndex > CORK_NIL) + { + cxxParserSetEndLineForTagInCorkQueue(iCorkQueueIndex,uEndPosition); + if(iCorkQueueIndexFQ > CORK_NIL) + cxxParserSetEndLineForTagInCorkQueue(iCorkQueueIndexFQ,uEndPosition); + } + while(iScopes > 0) + { + cxxScopePop(); + iScopes--; + } + + CXX_DEBUG_LEAVE(); + return true; +} + +static bool cxxParserParseBlockInternal(bool bExpectClosingBracket) +{ + CXX_DEBUG_ENTER(); + + //char * szScopeName = cxxScopeGetFullName(); + //CXX_DEBUG_PRINT("Scope name is '%s'",szScopeName ? szScopeName : ""); + + cxxParserNewStatement(); + + if(bExpectClosingBracket) + { + // FIXME: this cpp handling is kind of broken: + // it works only because the moon is in the correct phase. + cppBeginStatement(); + } + + for(;;) + { + if(!cxxParserParseNextToken()) + { +found_eof: + + if(bExpectClosingBracket) + { + CXX_DEBUG_LEAVE_TEXT( + "Syntax error: found EOF in block but a closing " \ + "bracket was expected!" + ); + return false; + } + + CXX_DEBUG_LEAVE_TEXT("EOF in main block"); + return true; // EOF + } + +process_token: + + CXX_DEBUG_PRINT( + "Token '%s' of type 0x%02x", + vStringValue(g_cxx.pToken->pszWord), + g_cxx.pToken->eType + ); + + if (cxxTokenTypeIs(g_cxx.pToken,CXXTokenTypeIdentifier) + && cxxScopeGetType() == CXXScopeTypeClass + && cxxSubparserNewIdentifierAsHeadOfMemberNotify(g_cxx.pToken)) + { + cxxTokenChainDestroyLast(g_cxx.pTokenChain); + continue; + } + + switch(g_cxx.pToken->eType) + { + case CXXTokenTypeKeyword: + { + switch(g_cxx.pToken->eKeyword) + { + case CXXKeywordNAMESPACE: + { + enum CXXScopeType eScopeType = cxxScopeGetType(); + + if( + ( + // toplevel or nested within a namespace + (eScopeType == CXXScopeTypeNamespace) || + // namespace X = Y inside a function + (eScopeType == CXXScopeTypeFunction) + ) && ( + // either certainly C++ + g_cxx.bConfirmedCPPLanguage || + // or a "sane" namespace syntax + ( + !cxxTokenChainPreviousTokenOfType( + g_cxx.pToken, + CXXTokenTypeStar | + CXXTokenTypeAnd | + CXXTokenTypeKeyword + ) + ) + ) + ) + { + if(!cxxParserParseNamespace()) + { + CXX_DEBUG_LEAVE_TEXT("Failed to parse namespace"); + return false; + } + } else { + // If we're pretty sure this is C++ then this is a syntax error. + // If we're not sure (namely when we're in a *.h file) then + // let's try to be flexible: treat the namespace keyword as an identifier. + if(!g_cxx.bConfirmedCPPLanguage) + { + CXX_DEBUG_LEAVE_TEXT( + "Found namespace in unexpected place, but we're not sure it's really C++ " + "so we'll treat it as an identifier instead" + ); + g_cxx.pToken->eType = CXXTokenTypeIdentifier; + continue; + } + + CXX_DEBUG_LEAVE_TEXT( + "Found namespace in a wrong place: we're probably out of sync" + ); + return false; + } + + cxxParserNewStatement(); + } + break; + case CXXKeywordTEMPLATE: + if( + // beginning of the statement + (!g_cxx.pToken->pPrev) || + // previous token is not "." or "->", syntax that is found in + // p.template func(); + (!cxxTokenTypeIsOneOf( + g_cxx.pToken->pPrev, + CXXTokenTypeDotOperator | CXXTokenTypePointerOperator + ) + ) + ) + { + if(!cxxParserParseTemplatePrefix()) + { + CXX_DEBUG_LEAVE_TEXT("Failed to parse template"); + return false; + } + // Here we are just after the "template" prefix. + } else { + CXX_DEBUG_LEAVE_TEXT("Template keyword that is not a prefix"); + } + break; + case CXXKeywordTYPEDEF: + // Mark the next declaration as a typedef + g_cxx.uKeywordState |= CXXParserKeywordStateSeenTypedef; + cxxTokenChainClear(g_cxx.pTokenChain); + break; + case CXXKeywordENUM: + if(!cxxParserParseEnum()) + { + CXX_DEBUG_LEAVE_TEXT("Failed to parse enum"); + return false; + } + break; + case CXXKeywordCLASS: + if( + // do not trigger on X + (!g_cxx.pToken->pPrev) || + (!cxxTokenTypeIsOneOf(g_cxx.pToken->pPrev,CXXTokenTypeSmallerThanSign | CXXTokenTypeComma)) + ) + { + if(!cxxParserParseClassStructOrUnion(CXXKeywordCLASS,CXXTagCPPKindCLASS,CXXScopeTypeClass)) + { + CXX_DEBUG_LEAVE_TEXT("Failed to parse class/struct/union"); + return false; + } + } + break; + case CXXKeywordSTRUCT: + if( + // do not trigger on X + (!g_cxx.pToken->pPrev) || + (!cxxTokenTypeIsOneOf(g_cxx.pToken->pPrev,CXXTokenTypeSmallerThanSign | CXXTokenTypeComma)) + ) + { + if(!cxxParserParseClassStructOrUnion(CXXKeywordSTRUCT,CXXTagKindSTRUCT,CXXScopeTypeStruct)) + { + CXX_DEBUG_LEAVE_TEXT("Failed to parse class/struct/union"); + return false; + } + } + break; + case CXXKeywordUNION: + if( + // do not trigger on X + (!g_cxx.pToken->pPrev) || + (!cxxTokenTypeIsOneOf(g_cxx.pToken->pPrev,CXXTokenTypeSmallerThanSign | CXXTokenTypeComma)) + ) + { + if(!cxxParserParseClassStructOrUnion(CXXKeywordUNION,CXXTagKindUNION,CXXScopeTypeUnion)) + { + CXX_DEBUG_LEAVE_TEXT("Failed to parse class/struct/union"); + return false; + } + } + break; + case CXXKeywordPUBLIC: + case CXXKeywordPROTECTED: + case CXXKeywordPRIVATE: + // Note that the class keyword has its own handler + // so the only possibility here is an access specifier + if(!cxxParserParseAccessSpecifier()) + { + CXX_DEBUG_LEAVE_TEXT("Failed to parse access specifier"); + return false; + } + break; + case CXXKeywordUSING: + if(!cxxParserParseUsingClause()) + { + CXX_DEBUG_LEAVE_TEXT("Failed to parse using clause"); + return false; + } + cxxParserNewStatement(); + break; + case CXXKeywordIF: + case CXXKeywordFOR: + case CXXKeywordWHILE: + case CXXKeywordSWITCH: + case CXXKeywordCATCH: + if(!cxxParserParseIfForWhileSwitchCatchParenthesis()) + { + CXX_DEBUG_LEAVE_TEXT("Failed to parse if/for/while/switch/catch parenthesis"); + return false; + } + // Now we're just before the block that follows the parenthesis. + cxxParserNewStatement(); + // Force the cpp preprocessor to think that we're in the middle of a statement. + cppBeginStatement(); + break; + case CXXKeywordTRY: + // We parse try in different ways depending on the context. + // Inside a function, and without preceding tokens it's assumed to be + // a plain try {} catch {} block. This is easy. + // Out of a function it's likely to be a function try block: + // int f(int n = 2) try { ... } catch { ... } + // Inside a function but with some preceding tokens it's likely to be a + // lambda expressed as function-try-block. + // auto f() -> void try { ... } catch { ... } + if((cxxScopeGetType() != CXXScopeTypeFunction) || g_cxx.pToken->pPrev) + { + CXX_DEBUG_PRINT("Found try that looks like a function-try-block"); + // Maybe function-try-block. + // Keep in the chain and continue parsing. + continue; + } + // Fall through. + case CXXKeywordELSE: + case CXXKeywordDO: + // parse as normal statement/block + cxxParserNewStatement(); + // Force the cpp preprocessor to think that we're in the middle of a statement. + cppBeginStatement(); + break; + case CXXKeywordRETURN: + if(cxxParserCurrentLanguageIsCPP()) + { + // may be followed by a lambda, otherwise it's not interesting. + cxxParserNewStatement(); + g_cxx.uKeywordState |= CXXParserKeywordStateSeenReturn; + } else { + // ignore + if(!cxxParserParseUpToOneOf(CXXTokenTypeSemicolon | CXXTokenTypeEOF, + false)) + { + CXX_DEBUG_LEAVE_TEXT("Failed to parse return"); + return false; + } + cxxParserNewStatement(); + } + break; + case CXXKeywordCONTINUE: + case CXXKeywordBREAK: + case CXXKeywordGOTO: + // ignore + if(!cxxParserParseUpToOneOf(CXXTokenTypeSemicolon | CXXTokenTypeEOF, + false)) + { + CXX_DEBUG_LEAVE_TEXT("Failed to parse continue/break/goto"); + return false; + } + cxxParserNewStatement(); + break; + case CXXKeywordTHROW: + // We ignore whole "throw expressions" as they contain nothing useful + // and may confuse us. We keep "throw" when used as exception specification, + // and this is certainly outside of a function and when the token chain + // already contains at least a type, an identifier and a parenthesis. + // This check seems excessive but keep in mind that we deal with + // broken input and we might also be wrong about the current scope. + if((cxxScopeGetType() == CXXScopeTypeFunction) || (g_cxx.pTokenChain->iCount < 3)) + { + CXX_DEBUG_PRINT("Skipping throw statement"); + if(!cxxParserParseUpToOneOf(CXXTokenTypeSemicolon | CXXTokenTypeEOF, + false)) + { + CXX_DEBUG_LEAVE_TEXT("Failed to skip throw statement"); + return false; + } + cxxParserNewStatement(); + } + break; + case CXXKeywordCASE: + // ignore + if(!cxxParserParseUpToOneOf( + CXXTokenTypeSemicolon | CXXTokenTypeEOF | CXXTokenTypeSingleColon, + false + )) + { + CXX_DEBUG_LEAVE_TEXT("Failed to parse case keyword"); + return false; + } + cxxParserNewStatement(); + break; + case CXXKeywordEXTERN: + g_cxx.uKeywordState |= CXXParserKeywordStateSeenExtern; + + cxxTokenChainDestroyLast(g_cxx.pTokenChain); + + if(!cxxParserParseNextToken()) + goto found_eof; + + if(cxxTokenTypeIs(g_cxx.pToken,CXXTokenTypeStringConstant)) + { + // assume extern "language" + + // Strictly speaking this is a C++ only syntax. + // However we allow it also in C as it doesn't really hurt. + + cxxTokenChainDestroyLast(g_cxx.pTokenChain); + + // Note that extern "C" may be followed by a block with declarations + // + // extern "C" { ... } + // + // However in this case the declarations are ALSO definitions + // and extern "C" is used only to specify the name mangling mode. + // + // extern "C" int x; <-- a declaration and not a definition + // extern "C" { int x; } <-- a declaration and definition: x IS defined + // here and is NOT extern. + // + // A variable in an extern "C" block has to be re-declared extern again + // to be really treated as declaration only. + // + // extern "C" { extern int x; } + // + // So in this case we do NOT treat the inner declarations as extern + // and we don't need specific handling code for this case. + } else { + // something else: handle it the normal way + goto process_token; + } + break; + case CXXKeywordSTATIC: + g_cxx.uKeywordState |= CXXParserKeywordStateSeenStatic; + cxxTokenChainDestroyLast(g_cxx.pTokenChain); + break; + case CXXKeywordINLINE: + case CXXKeyword__INLINE: + case CXXKeyword__INLINE__: + case CXXKeyword__FORCEINLINE: + case CXXKeyword__FORCEINLINE__: + g_cxx.uKeywordState |= CXXParserKeywordStateSeenInline; + cxxTokenChainDestroyLast(g_cxx.pTokenChain); + break; + case CXXKeywordEXPLICIT: + g_cxx.uKeywordState |= CXXParserKeywordStateSeenExplicit; + cxxTokenChainDestroyLast(g_cxx.pTokenChain); + break; + case CXXKeywordOPERATOR: + g_cxx.uKeywordState |= CXXParserKeywordStateSeenOperator; + break; + case CXXKeywordVIRTUAL: + g_cxx.uKeywordState |= CXXParserKeywordStateSeenVirtual; + cxxTokenChainDestroyLast(g_cxx.pTokenChain); + break; + case CXXKeywordMUTABLE: + g_cxx.uKeywordState |= CXXParserKeywordStateSeenMutable; + cxxTokenChainDestroyLast(g_cxx.pTokenChain); + break; + case CXXKeywordFRIEND: + g_cxx.uKeywordState |= CXXParserKeywordStateSeenFriend; + break; + // "const" and "volatile" are part of the type. Don't treat them specially + // and don't attempt to extract an eventual typedef yet, + // as there might be a struct/class/union keyword following. + case CXXKeywordVOLATILE: + g_cxx.uKeywordState |= CXXParserKeywordStateSeenVolatile; + break; + case CXXKeywordCONST: + g_cxx.uKeywordState |= CXXParserKeywordStateSeenConst; + break; + default: + if(g_cxx.uKeywordState & CXXParserKeywordStateSeenTypedef) + { + g_cxx.uKeywordState &= ~CXXParserKeywordStateSeenTypedef; + if(!cxxParserParseGenericTypedef()) + { + CXX_DEBUG_LEAVE_TEXT("Failed to parse generic typedef"); + return false; + } + cxxParserNewStatement(); + } + break; + } + } + break; + case CXXTokenTypeSemicolon: + { + if( + (cxxParserCurrentLanguageIsC()) && + cxxScopeIsGlobal() && + (!(g_cxx.uKeywordState & CXXParserKeywordStateSeenExtern)) && + (!(g_cxx.uKeywordState & CXXParserKeywordStateSeenTypedef)) + ) + { + // Special handling of K&R style function declarations. + // We might be in the following situation: + // + // type whatever fname(par1,par2) int par1; int par2; { + // ^ + // + switch(cxxParserMaybeParseKnRStyleFunctionDefinition()) + { + case 1: + // K&R parser did the job and started a new statement + break; + case 0: + // something else + cxxParserAnalyzeOtherStatement(); + break; + default: + CXX_DEBUG_LEAVE_TEXT("Failed to check for K&R style function definition"); + return false; + break; + } + } else { + // K&R style function declarations not allowed here. + cxxParserAnalyzeOtherStatement(); + } + cxxParserNewStatement(); + } + break; + case CXXTokenTypeSingleColon: + { + // label ? + if( + (g_cxx.pTokenChain->iCount == 2) && + cxxTokenTypeIs( + cxxTokenChainFirst(g_cxx.pTokenChain), + CXXTokenTypeIdentifier + ) + ) + { + CXXToken * pFirst = cxxTokenChainFirst(g_cxx.pTokenChain); + // assume it's label + tagEntryInfo * tag = cxxTagBegin(CXXTagKindLABEL,pFirst); + + if(tag) + { + tag->isFileScope = true; + cxxTagCommit(NULL); + } + } else { + // what is this? (default: and similar things have been handled at keyword level) + } + } + break; + case CXXTokenTypeOpeningBracket: + if(!cxxParserParseBlockHandleOpeningBracket()) + { + CXX_DEBUG_LEAVE_TEXT("Failed to handle opening bracket"); + return false; + } + break; + case CXXTokenTypeClosingBracket: + // scope finished + if(!bExpectClosingBracket) + { + CXX_DEBUG_LEAVE_TEXT( + "Found unexpected closing bracket: probably preprocessing problem" + ); + return false; + } + CXX_DEBUG_LEAVE_TEXT("Closing bracket!"); + cxxParserNewStatement(); + return true; + break; + case CXXTokenTypeOpeningParenthesis: + case CXXTokenTypeOpeningSquareParenthesis: + if(!cxxParserParseAndCondenseCurrentSubchain( + CXXTokenTypeOpeningBracket | CXXTokenTypeOpeningParenthesis | + CXXTokenTypeOpeningSquareParenthesis, + true, + false + )) + { + CXX_DEBUG_LEAVE_TEXT("Parsing the parenthesis failed"); + return false; + } + + if(cxxTokenTypeIs(g_cxx.pToken,CXXTokenTypeEOF)) + { + if(bExpectClosingBracket) + { + CXX_DEBUG_LEAVE_TEXT( + "Syntax error: found EOF in block but a closing bracket was expected!" + ); + return false; + } + return true; // EOF + } + break; + case CXXTokenTypeIdentifier: + if(g_cxx.uKeywordState & CXXParserKeywordStateSeenTypedef) + { + g_cxx.uKeywordState &= ~CXXParserKeywordStateSeenTypedef; + if(!cxxParserParseGenericTypedef()) + { + CXX_DEBUG_LEAVE_TEXT("Failed to parse generic typedef"); + return false; + } + cxxParserNewStatement(); + } + else if (cxxScopeGetType() == CXXScopeTypeClass) + cxxSubparserUnknownIdentifierInClassNotify(g_cxx.pToken); + break; + default: + // something else we didn't handle + break; + } + } + + CXX_DEBUG_LEAVE_TEXT("WARNING: Not reached"); + return true; +} + +// +// This is the toplevel scanning function. It's a forward-only scanner that keeps +// accumulating tokens in the chain until either a characteristic token is found +// or the statement ends. When a characteristic token is found it usually enters +// a specialized scanning routine (e.g for classes, namespaces, structs...). +// When the statement ends without finding any characteristic token the chain +// is passed to an analysis routine which does a second scan pass. +// +bool cxxParserParseBlock(bool bExpectClosingBracket) +{ + cxxSubparserNotifyEnterBlock (); + + cppPushExternalParserBlock(); + bool bRet = cxxParserParseBlockInternal(bExpectClosingBracket); + cppPopExternalParserBlock(); + + cxxSubparserNotifyLeaveBlock (); + + return bRet; +} diff --git a/ctags/parsers/cxx/cxx_parser_function.c b/ctags/parsers/cxx/cxx_parser_function.c new file mode 100644 index 0000000000..dcb09906ee --- /dev/null +++ b/ctags/parsers/cxx/cxx_parser_function.c @@ -0,0 +1,2281 @@ +/* +* Copyright (c) 2016, Szymon Tomasz Stefanek +* +* This source code is released for free distribution under the terms of the +* GNU General Public License version 2 or (at your option) any later version. +* +* This module contains functions for parsing and scanning C++ source files +*/ +#include "cxx_parser.h" +#include "cxx_parser_internal.h" + +#include "cxx_debug.h" +#include "cxx_keyword.h" +#include "cxx_token.h" +#include "cxx_token_chain.h" +#include "cxx_scope.h" + +#include "parse.h" +#include "vstring.h" +#include "../cpreprocessor.h" +#include "debug.h" +#include "keyword.h" +#include "read.h" +#include "trashbox.h" + +#include + +// +// This is called upon encountering a semicolon, when current language is +// C and we are in global scope. +// +// Try to handle the special case of C K&R style function declarations. +// +// The possible return values are: +// 1: The parser has moved forward, the statement has been parsed and cleared. +// A K&R function declaration has possibly been extracted (but not necessarily). +// Anyway, a new statement has been started. +// 0: The parser has NOT moved forward and the current statement hasn't been cleared: +// other options may be evaluated. +// -1: unrecoverable error +// +int cxxParserMaybeParseKnRStyleFunctionDefinition(void) +{ +#ifdef CXX_DO_DEBUGGING + vString * pChain = cxxTokenChainJoin(g_cxx.pTokenChain,NULL,0); + CXX_DEBUG_PRINT( + "Looking for K&R-style function in '%s'", + vStringValue(pChain) + ); + vStringDelete(pChain); +#endif + + // Check if we are in the following situation: + // + // type1 function(arg1,arg2,...) type2 arg1; type3 arg2; { + // ^ + // we're here + + CXX_DEBUG_ASSERT( + cxxParserCurrentLanguageIsC(), + "Should be called only when parsing C" + ); + CXX_DEBUG_ASSERT( + cxxTokenChainLast(g_cxx.pTokenChain), + "At least one token should be there" + ); + CXX_DEBUG_ASSERT( + cxxTokenTypeIs(cxxTokenChainLast(g_cxx.pTokenChain),CXXTokenTypeSemicolon), + "Only upon encountering a semicolon" + ); + + // The minimum possible case is: + // + // func(arg) type2 arg; + // + // where (arg) is a condensed parenthesis chain. + // So the minimum number of tokens required is 5: func, (arg), type2, arg, ; + if(g_cxx.pTokenChain->iCount < 5) + return 0; // no way + + // There must be a parenthesis chain + CXXToken * pParenthesis = cxxTokenChainFirstTokenOfType( + g_cxx.pTokenChain, + CXXTokenTypeParenthesisChain + ); + if(!pParenthesis) + return 0; // no parenthesis chain + + // The parenthesis chain must have an identifier before it + CXXToken * pIdentifier = pParenthesis->pPrev; + if(!pIdentifier) + return 0; + if(!cxxTokenTypeIs(pIdentifier,CXXTokenTypeIdentifier)) + return 0; + + // And least three tokens after it + CXXToken * x = pParenthesis->pNext; + if(!x) + return 0; + x = x->pNext; + if(!x) + return 0; + x = x->pNext; + if(!x) + return 0; + + // The tokens following must be only things allowed in a variable declaration + x = cxxTokenChainNextTokenNotOfType( + pParenthesis, + CXXTokenTypeIdentifier | CXXTokenTypeKeyword | + CXXTokenTypeSquareParenthesisChain | CXXTokenTypeStar | + CXXTokenTypeComma | CXXTokenTypeSingleColon | CXXTokenTypeNumber + ); + + CXX_DEBUG_ASSERT(x,"There should be at least the terminator here!"); + if(!x) + return 0; + + if(!cxxTokenTypeIs(x,CXXTokenTypeSemicolon)) + return 0; // does not look like a variable declaration. + + x = cxxTokenChainPreviousTokenNotOfType( + x, + CXXTokenTypeSquareParenthesisChain | CXXTokenTypeSingleColon | + CXXTokenTypeNumber + ); + + CXX_DEBUG_ASSERT(x,"We should have found an identifier here"); + if(!x) + return 0; + + if(!cxxTokenTypeIs(x,CXXTokenTypeIdentifier)) + return 0; // does not look like a variable declaration. + + CXX_DEBUG_ASSERT( + pParenthesis->pChain, + "The parenthesis should be condensed here!" + ); + + CXXTokenChain * pParenthesisTokenChain = g_cxx.pTokenChain; + + CXXToken * pFirstArgumentToken = pParenthesis->pNext; + + // Special case inside special case. + // Check if we're at something like func __ARGS(()) + if( + (pParenthesis->pChain->iCount == 3) && + cxxTokenTypeIs( + cxxTokenChainAt(pParenthesis->pChain,1), + CXXTokenTypeParenthesisChain + ) && + (pIdentifier->pPrev) && + cxxTokenTypeIs(pIdentifier->pPrev,CXXTokenTypeIdentifier) + ) + { + // Looks exactly like our special case. + pIdentifier = pIdentifier->pPrev; + pParenthesisTokenChain = pParenthesis->pChain; + pParenthesis = cxxTokenChainAt(pParenthesis->pChain,1); + } + + // Now check if the contents of the parenthesis chain look like a K&R signature + + // This is something like identifier,identifier,identifier,... + if(pParenthesis->pChain->iCount < 3) + return 0; // no way + + x = pParenthesis->pChain->pHead->pNext; + CXX_DEBUG_ASSERT(x,"We should have found something in the parenthesis chain"); + + int iParameterCount = 0; + bool bGotMultipleDots = false; + + for(;;) + { + if(cxxTokenTypeIs(x,CXXTokenTypeIdentifier)) + iParameterCount++; + else if(cxxTokenTypeIs(x,CXXTokenTypeMultipleDots)) + bGotMultipleDots = true; + else { + // not valid (note that (void) is not allowed here since we + // wouldn't have a following variable declaration) + return 0; + } + + x = x->pNext; + CXX_DEBUG_ASSERT(x,"We should have found at least the closing parenthesis"); + if(cxxTokenTypeIs(x,CXXTokenTypeClosingParenthesis)) + break; + if(bGotMultipleDots) + return 0; // not valid + if(!cxxTokenTypeIs(x,CXXTokenTypeComma)) + return 0; + x = x->pNext; + CXX_DEBUG_ASSERT(x,"We should have found at least the closing parenthesis"); + } + + if(iParameterCount < 1) + { + // we should have found at least one parameter + // (the one that we found before the ;) + return 0; + } + + cxxTokenChainTake(g_cxx.pTokenChain,pIdentifier); + cxxTokenChainTake(pParenthesisTokenChain,pParenthesis); + + // remove the whole signature from the chain + while(g_cxx.pTokenChain->pHead != pFirstArgumentToken) + cxxTokenChainDestroyFirst(g_cxx.pTokenChain); + + CXX_DEBUG_ASSERT( + g_cxx.pTokenChain->pHead, + "We should have the variable declaration in the chain now!" + ); + + // There is exactly one statement in chain now. + + // Extra here means "following the first" +#define MAX_EXTRA_KNR_PARAMETERS 10 + + CXXToken * aExtraParameterStarts[MAX_EXTRA_KNR_PARAMETERS]; + int iExtraStatementsInChain = 0; + + // From here we should never return 0 as the parser is going to move forward. + + // Now we should have no more than iParameterCount-1 parameters before + // an opening bracket. There may be less declarations as each one may + // declare multiple variables and C89 supports the implicit "int" type rule. + // Note that we parse up to iParameterCount statements (which will be lost + // if we can't find an opening bracket). + while(iParameterCount > 0) + { + CXXToken * pCurrentTail = g_cxx.pTokenChain->pTail; + + if(!cxxParserParseUpToOneOf( + CXXTokenTypeSemicolon | CXXTokenTypeOpeningBracket | CXXTokenTypeEOF, + false + )) + { + cxxTokenDestroy(pIdentifier); + cxxTokenDestroy(pParenthesis); + return -1; + } + + if(cxxTokenTypeIs(g_cxx.pToken,CXXTokenTypeEOF)) + { + cxxTokenDestroy(pIdentifier); + cxxTokenDestroy(pParenthesis); + cxxParserNewStatement(); + return 1; // tolerate syntax error + } + + if(iExtraStatementsInChain < MAX_EXTRA_KNR_PARAMETERS) + { + CXX_DEBUG_ASSERT( + pCurrentTail->pNext, + "We should have parsed an additional statement here" + ); + aExtraParameterStarts[iExtraStatementsInChain] = pCurrentTail->pNext; + iExtraStatementsInChain++; + } + + if(cxxTokenTypeIs(g_cxx.pToken,CXXTokenTypeOpeningBracket)) + break; // gotcha + + iParameterCount--; + } + + if(!cxxTokenTypeIs(g_cxx.pToken,CXXTokenTypeOpeningBracket)) + { + cxxTokenDestroy(pIdentifier); + cxxTokenDestroy(pParenthesis); + // Didn't find an opening bracket. + // This probably wasn't a K&R style function declaration after all. + cxxParserNewStatement(); + return 1; + } + + tagEntryInfo * tag = cxxTagBegin(CXXTagKindFUNCTION,pIdentifier); + + int iCorkQueueIndex = CORK_NIL; + int iCorkQueueIndexFQ = CORK_NIL; + + if(tag) + { + if(pParenthesis->pChain->pTail) + { + // normalize signature + cxxTokenChainNormalizeTypeNameSpacing(pParenthesis->pChain); + // make sure we don't emit the trailing space + pParenthesis->pChain->pTail->bFollowedBySpace = false; + } + + tag->isFileScope = (g_cxx.uKeywordState & CXXParserKeywordStateSeenStatic) && + !isInputHeaderFile(); + + vString * pszSignature = cxxTokenChainJoin(pParenthesis->pChain,NULL,0); + + // FIXME: Return type! + // FIXME: Properties? + + if(pszSignature) + tag->extensionFields.signature = vStringValue(pszSignature); + + iCorkQueueIndex = cxxTagCommit(&iCorkQueueIndexFQ); + + if(pszSignature) + vStringDelete(pszSignature); + } + + cxxTokenDestroy(pParenthesis); + + CXX_DEBUG_PRINT( + "Found K&R-style function '%s'", + vStringValue(pIdentifier->pszWord) + ); + + cxxScopePush(pIdentifier,CXXScopeTypeFunction,CXXScopeAccessUnknown); + + // emit parameters + if(cxxTagKindEnabled(CXXTagKindPARAMETER)) + { + // The chain contains 1 + iExtraStatementsInChain statements now + int iIdx = 0; + for(;;) + { + cxxParserExtractVariableDeclarations( + g_cxx.pTokenChain, + CXXExtractVariableDeclarationsKnRStyleParameters + ); + if(iIdx >= iExtraStatementsInChain) + break; + + // kill everything up to the next start + while(g_cxx.pTokenChain->pHead != aExtraParameterStarts[iIdx]) + cxxTokenChainDestroyFirst(g_cxx.pTokenChain); + + iIdx++; + } + } + + cxxParserNewStatement(); + + if(!cxxParserParseBlock(true)) + { + CXX_DEBUG_PRINT("Failed to parse K&R function block"); + return -1; + } + + if(iCorkQueueIndex > CORK_NIL) + { + cxxParserMarkEndLineForTagInCorkQueue(iCorkQueueIndex); + if(iCorkQueueIndexFQ > CORK_NIL) + cxxParserMarkEndLineForTagInCorkQueue(iCorkQueueIndexFQ); + } + + cxxScopePop(); + return 1; +} + +// +// This function attempts to verify that the specified chain _looks like_ +// a set of parameters to a function call. It's quite fuzzy and thus not +// 100% accurate, but it tries to exclude the obvious cases. If it says +// "no" then the specified chain CAN'T be a set of parameters to +// a function call. If it says "yes" then the result has to be considered +// a guess: the chain *might* be a set of parameters to a function call. +// +// This function is used to check both () and {} parenthesis chains. +// function(...) +// variable(...) +// variable{...} +// +bool cxxParserTokenChainLooksLikeFunctionCallParameterSet( + CXXTokenChain * pChain + ) +{ + CXXToken * t = pChain->pHead; + CXXToken * pLast = pChain->pTail; + + CXX_DEBUG_ASSERT( + cxxTokenTypeIsOneOf(t,CXXTokenTypeOpeningParenthesis | CXXTokenTypeOpeningBracket), + "The token chain should start with an opening parenthesis/bracket" + ); + CXX_DEBUG_ASSERT( + cxxTokenTypeIsOneOf(pLast,CXXTokenTypeClosingParenthesis | CXXTokenTypeClosingBracket), + "The token chain should end with an closing parenthesis/bracket" + ); + + unsigned int uTerminator = t->eType << 4; + + // Dealing with (...) type chain and not {...} one + bool bDealingWithParenthesisChain = (uTerminator == CXXTokenTypeClosingParenthesis); + + t = t->pNext; + + while(t != pLast) + { + if( + bDealingWithParenthesisChain && + cxxTokenTypeIsOneOf(t, + CXXTokenTypeNumber | CXXTokenTypeStringConstant | + CXXTokenTypeCharacterConstant | CXXTokenTypePointerOperator | + CXXTokenTypeDotOperator | CXXTokenTypeOperator | CXXTokenTypeMultipleDots + )) + { + // Not allowed in a function signature before an equal sign (which + // we haven't encountered yet). + // assume this looks like a function call + return true; + } + + if(cxxTokenTypeIs(t,CXXTokenTypeKeyword)) + { + if(cxxKeywordMayBePartOfTypeName(t->eKeyword)) + { + // parts of type name (not inside a parenthesis + // which is assumed to be condensed) + return false; + } + + if( + bDealingWithParenthesisChain && + ( + cxxKeywordIsConstant(t->eKeyword) || + (t->eKeyword == CXXKeywordNEW) + ) + ) + { + // Not allowed in a function signature before an equal sign (which + // we haven't encountered yet). + // assume this looks like a function call + return true; + } + + if( + (t->eKeyword != CXXKeywordNEW) && + cxxTokenTypeIsOneOf( + t->pNext, + CXXTokenTypeKeyword | CXXTokenTypeStar | CXXTokenTypeAnd | + CXXTokenTypeMultipleAnds | CXXTokenTypeIdentifier + ) + ) + { + // this is something like: + // (int a... + // (void *... + // (unsigned int... + return false; + } + + } else if(cxxTokenTypeIs(t,CXXTokenTypeIdentifier)) + { + if(cxxTokenTypeIsOneOf(t->pNext,CXXTokenTypeKeyword | CXXTokenTypeIdentifier)) + { + // this is something like: + // (a b... + return false; + } + } else if(cxxTokenTypeIs(t,CXXTokenTypeGreaterThanSign)) + { + if(cxxTokenTypeIsOneOf( + t->pNext, + CXXTokenTypeAnd | CXXTokenTypeStar | + CXXTokenTypeMultipleAnds | CXXTokenTypeComma | uTerminator + )) + { + // > & + // > * + // > && + // >, + // >) or >} + return false; + } + + if(cxxTokenTypeIsOneOf(t->pPrev,CXXTokenTypeKeyword)) + { + // int> + // + return false; + } + } else if( + cxxTokenTypeIs(t,CXXTokenTypeParenthesisChain) && + cxxTokenTypeIsOneOf( + t->pPrev, + CXXTokenTypeIdentifier | CXXTokenTypeKeyword | + CXXTokenTypeStar | CXXTokenTypeAnd | CXXTokenTypeGreaterThanSign + ) && + cxxTokenTypeIs(t->pNext,CXXTokenTypeParenthesisChain) && + cxxTokenTypeIs(cxxTokenChainAt(t->pChain,1),CXXTokenTypeStar) && + cxxParserTokenChainLooksLikeFunctionParameterList(t->pNext->pChain,NULL) + ) + { + // looks like a function pointer + // someType (*p)(int) + return false; + } + + if(cxxTokenTypeIs(t,CXXTokenTypeAssignment)) + { + // after an assignment prototypes and constructor + // declarations may look the same, skip to next comma or end + t = cxxTokenChainNextTokenOfType( + t, + uTerminator | CXXTokenTypeComma + ); + CXX_DEBUG_ASSERT(t,"We should have found the terminator here!"); + if(cxxTokenTypeIs(t,CXXTokenTypeComma)) + t = t->pNext; + } else { + t = t->pNext; + } + } + + // We must assume that it might be... + return true; +} + +// +// Try to tell if the specified token chain is valid as a parameter list +// for a constructor. It's used to check if something like type name(args) +// belongs to a variable declaration. +// +// This is more of a guess for now: tries to exclude trivial cases. +// +bool cxxParserTokenChainLooksLikeConstructorParameterSet( + CXXTokenChain * pChain + ) +{ + // We assume that the chain has a starting parenthesis and an + // ending parenthesis. + + if(pChain->iCount < 3) + { + CXX_DEBUG_ASSERT( + pChain->iCount == 2, + "This function should be called only on parenthesis and bracket chains" + ); + + if(cxxTokenTypeIs(cxxTokenChainFirst(pChain),CXXTokenTypeOpeningBracket)) + { + CXX_DEBUG_ASSERT( + cxxTokenTypeIs(cxxTokenChainLast(pChain),CXXTokenTypeClosingBracket), + "The last token should have been a closing bracket here" + ); + return true; // type var {} is valid in C++11 + } + + CXX_DEBUG_ASSERT( + cxxTokenTypeIs(cxxTokenChainFirst(pChain),CXXTokenTypeOpeningParenthesis), + "This function should be called only on parenthesis and bracket chains" + ); + + return false; // type var() is NOT valid C++ + } + + return cxxParserTokenChainLooksLikeFunctionCallParameterSet(pChain); +} + +// +// Check the parenthesis chain and the identifier found by +// cxxParserLookForFunctionSignature() to determine if its valid for +// a function signature. +// +static bool cxxParserLookForFunctionSignatureCheckParenthesisAndIdentifier( + CXXToken * pParenthesis, + CXXTokenChain * pIdentifierChain, + CXXToken * pIdentifierStart, + CXXToken * pIdentifierEnd, + CXXFunctionSignatureInfo * pInfo, + CXXTypedVariableSet * pParamInfo + ) +{ + CXX_DEBUG_ENTER(); + + CXX_DEBUG_ASSERT( + pParenthesis && pIdentifierChain && pIdentifierStart && pIdentifierEnd && pInfo, + "All parameters other than `pParamInfo' must be non null here" + ); + + // Even if we have found a parenthesis and proper identifier we still + // continue looping until a termination condition is found. + + CXX_DEBUG_ASSERT( + cxxTokenTypeIs(pParenthesis,CXXTokenTypeParenthesisChain), + "Must have found a parenthesis chain here" + ); + + // looks almost fine + + CXXToken * pInner = cxxTokenChainAt(pParenthesis->pChain,1); + + // Look for the __ARGS(()) macro pattern. + if( + // nested parentheses + (pParenthesis->pChain->iCount == 3) && + cxxTokenTypeIs(pInner,CXXTokenTypeParenthesisChain) && + // FIXME: This actually excludes operator! + cxxTokenTypeIs(pIdentifierEnd,CXXTokenTypeIdentifier) && + // an identifier right before the identifier we found + pIdentifierEnd->pPrev && + cxxTokenTypeIs(pIdentifierEnd->pPrev,CXXTokenTypeIdentifier) && + cxxParserTokenChainLooksLikeFunctionParameterList( + pInner->pChain, + pParamInfo + ) + ) + { + // __ARGS() case + pInfo->pParenthesisContainerChain = pParenthesis->pChain; + pInfo->pIdentifierEnd = pIdentifierEnd->pPrev; + pInfo->pIdentifierStart = pInfo->pIdentifierEnd; + pInfo->pIdentifierChain = pIdentifierChain; + pInfo->pParenthesis = pInner; + + CXX_DEBUG_LEAVE_TEXT("Looks like an __ARGS() case parenthesis chain"); + return true; + } + + if(cxxParserTokenChainLooksLikeFunctionParameterList( + pParenthesis->pChain, + pParamInfo + )) + { + // non __ARGS() + pInfo->pParenthesisContainerChain = pIdentifierChain; + pInfo->pIdentifierStart = pIdentifierStart; + pInfo->pIdentifierEnd = pIdentifierEnd; + pInfo->pIdentifierChain = pIdentifierChain; + pInfo->pParenthesis = pParenthesis; + + CXX_DEBUG_LEAVE_TEXT("Looks like valid parenthesis chain"); + return true; + } + + CXX_DEBUG_LEAVE_TEXT("Doesn't look like a valid parenthesis chain"); + return false; +} + +// +// Look for a function signature in the specified chain. +// +// If a proper function signature is found then also standardize the spacing +// of the identifier so we always get it as "operator ()" and never as +// "operator() or operator ( ) ". +// +// Note that this function does NOT handle the special case of K&R-style +// declarations. +// +// If pParamInfo is not null, it is passed to +// cxxParserTokenChainLooksLikeFunctionParameterList() which will eventually +// fill it up. +// +// +bool cxxParserLookForFunctionSignature( + CXXTokenChain * pChain, + CXXFunctionSignatureInfo * pInfo, + CXXTypedVariableSet * pParamInfo + ) +{ + CXX_DEBUG_ENTER(); + + if(pChain->iCount < 1) + { + CXX_DEBUG_LEAVE_TEXT("Chain is empty"); + return false; + } + +#ifdef CXX_DO_DEBUGGING + vString * pJoinedChain = cxxTokenChainJoin(pChain,NULL,0); + CXX_DEBUG_PRINT( + "Looking for function signature in '%s'", + vStringValue(pJoinedChain) + ); + vStringDelete(pJoinedChain); +#endif + + if(pParamInfo) + pParamInfo->uCount = 0; + + CXX_DEBUG_ASSERT(pChain,"Null chain"); + + CXXToken * pToken = cxxTokenChainFirst(pChain); + + pInfo->uFlags = 0; + pInfo->pParenthesis = NULL; + pInfo->pTrailingComma = NULL; + pInfo->pTemplateSpecializationStart = NULL; + + CXXToken * pIdentifierStart = NULL; + CXXToken * pIdentifierEnd = NULL; + CXXToken * pTopLevelParenthesis = NULL; + + bool bSkippedAngleBrackets = false; + + // Strategy: + // + // Scan the toplevel token chain and look for the first identifier immediately + // followed by a parenthesis chain that looks like a (possibly empty) + // list of function parameters. + // + // Since the identifier may be hidden within a parenthesis chain (and thus NOT be toplevel) + // we must scan the inner parenthesis chains in a sequence of special cases. + // + // (Mainly) for this reason this loop first looks for a parenthesis chain (which is always + // present at toplevel) and then looks for a suitable identifier near or inside it. + // + // Once we have found a suitable parenthesis-chain/identifier pair we continue + // scanning until one of { ; EOF : , is found. + // + // We bail out if anything suspicious is found in the middle of the scan. + // + + while(pToken) + { + CXX_DEBUG_PRINT( + "Token '%s' of type 0x%02x (%s)", + vStringValue(pToken->pszWord), + pToken->eType, + cxxDebugTypeDecode(pToken->eType) + ); + + // Check exit conditions first + + if(cxxTokenTypeIsOneOf( + pToken, + CXXTokenTypeOpeningBracket | CXXTokenTypeSemicolon | CXXTokenTypeEOF + )) + { + // reached end + CXX_DEBUG_PRINT("Found opening bracket, semicolon or EOF"); + break; + } + + if(cxxTokenTypeIs(pToken,CXXTokenTypeComma)) + { + // reached end, but we have a trailing comma. + pInfo->pTrailingComma = pToken; + CXX_DEBUG_PRINT("Found trailing comma"); + break; + } + + if( + cxxParserCurrentLanguageIsCPP() && + cxxTokenTypeIsOneOf( + pToken, + CXXTokenTypeSingleColon | CXXTokenTypeAssignment | + CXXTokenTypePointerOperator + ) + ) + { + // With a single colon it might be a constructor. + // With assignment it might be virtual type func(..) = 0; + // With a pointer operator it might be trailing return type + CXX_DEBUG_PRINT("Found single colon"); + break; + } + + // Check for tokens that should never appear at top level of a function signature + + if(cxxTokenTypeIsOneOf( + pToken, + CXXTokenTypeOperator | CXXTokenTypePointerOperator | + CXXTokenTypeBracketChain | CXXTokenTypeStringConstant | + CXXTokenTypeCharacterConstant | CXXTokenTypeMultipleDots | + CXXTokenTypeClosingBracket | CXXTokenTypeClosingParenthesis | + CXXTokenTypeClosingSquareParenthesis + ) + ) + { + // Nope. + CXX_DEBUG_LEAVE_TEXT("Found token that should never appear at toplevel of a signature"); + return false; + } + + // Explicitly skip template-like angle brackets, which are not condensed here and may confuse us + + if(cxxTokenTypeIs(pToken,CXXTokenTypeSmallerThanSign)) + { + pToken = cxxTokenChainSkipToEndOfTemplateAngleBracket(pToken); + if(!pToken) + { + CXX_DEBUG_LEAVE_TEXT("Couldn't skip past angle bracket chain"); + return false; + } + bSkippedAngleBrackets = true; + CXX_DEBUG_PRINT("Skipped angle bracket chain"); + goto next_token; + } + + // If we have already found a parenthesis+identifier just continue scanning + // until an exit condition is found. Do not look for parenthesis+identifier again. + + if(pInfo->pParenthesis) + { + CXX_DEBUG_PRINT("Already have a proper parenthesis: continuing loop to find terminator"); + goto next_token; + } + + // Parenthesis+identifier hasn't been found yet, look for it. + // Several specialized cases follow. + + if(cxxTokenIsKeyword(pToken,CXXKeywordOPERATOR)) + { + // Special case for operator (...), where can be + // either a simple thing like &, +, =, a keyword or a full fledged type + // with scoping and template parts. + + // void operator = () + // int operator + (...) + // void * operator new[] () + // template cv::Affine3::operator Eigen::Transform() const + + CXX_DEBUG_PRINT("operator token found: looking for proper identifier"); + + pIdentifierStart = pToken; + pToken = pToken->pNext; + + while(pToken) + { + CXX_DEBUG_PRINT( + "Candidate token '%s' of type 0x%02x (%s)", + vStringValue(pToken->pszWord), + pToken->eType, + cxxDebugTypeDecode(pToken->eType) + ); + + if(cxxTokenTypeIs(pToken,CXXTokenTypeParenthesisChain)) + { + // check for operator ()() + if( + pToken->pNext && + cxxTokenTypeIs(pToken->pNext,CXXTokenTypeParenthesisChain) + ) + pToken = pToken->pNext; + + break; + } else if(cxxTokenTypeIs(pToken,CXXTokenTypeKeyword)) + { + if( + (!cxxTokenIsKeyword(pToken,CXXKeywordNEW)) && + (!cxxTokenIsKeyword(pToken,CXXKeywordDELETE)) && + (!cxxKeywordMayBePartOfTypeName(pToken->eKeyword)) && + (!cxxTokenIsKeyword(pToken,CXXKeywordVOLATILE)) + ) + { + CXX_DEBUG_LEAVE_TEXT("Unexpected token after the operator keyword"); + return false; + } + } else if(cxxTokenTypeIs(pToken,CXXTokenTypeSmallerThanSign)) + { + if(pToken->pPrev == pIdentifierStart) + { + // operator < + } else if(cxxTokenTypeIs(pToken->pPrev,CXXTokenTypeIdentifier)) + { + // assume template, which is generally uncondensed at this level + CXX_DEBUG_LEAVE_TEXT("Trying to handle uncondensed template"); + + pToken = cxxTokenChainSkipToEndOfTemplateAngleBracket(pToken); + if(!pToken) + { + CXX_DEBUG_LEAVE_TEXT("Failed to skip to end of template"); + return false; + } + + CXX_DEBUG_ASSERT( + cxxTokenTypeIs(pToken,CXXTokenTypeGreaterThanSign), + "Should have found a >" + ); + + } else { + CXX_DEBUG_LEAVE_TEXT("Unexpected token after the operator keyword"); + return false; + } + } else if(cxxTokenTypeIs(pToken,CXXTokenTypeStringConstant)) { + // check for operator "" _fn () + if (strcmp (vStringValue(pToken->pszWord), "\"\"") != 0) + { + CXX_DEBUG_LEAVE_TEXT("Non-empty string after operator"); + return false; + } + } else if(!cxxTokenTypeIsOneOf( + pToken, + CXXTokenTypeAnd | CXXTokenTypeAssignment | + CXXTokenTypeComma | CXXTokenTypeDotOperator | + CXXTokenTypeAngleBracketChain | + CXXTokenTypeGreaterThanSign | CXXTokenTypeOperator | + CXXTokenTypePointerOperator | CXXTokenTypeSingleColon | + CXXTokenTypeSquareParenthesisChain | + CXXTokenTypeAngleBracketChain | CXXTokenTypeMultipleColons | + CXXTokenTypeStar | CXXTokenTypeMultipleAnds | CXXTokenTypeIdentifier + ) + ) + { + CXX_DEBUG_LEAVE_TEXT("Unexpected token after the operator keyword"); + return false; + } + + pToken = pToken->pNext; + } + + if(!pToken) + { + CXX_DEBUG_LEAVE_TEXT("Didn't find a parenthesis subchain after operator keyword"); + return false; + } + + CXX_DEBUG_ASSERT( + cxxTokenTypeIs(pToken,CXXTokenTypeParenthesisChain), + "Must have found a parenthesis chain here" + ); + + pTopLevelParenthesis = pToken; + pIdentifierEnd = pToken->pPrev; + + cxxParserLookForFunctionSignatureCheckParenthesisAndIdentifier( + pTopLevelParenthesis, + pChain, + pIdentifierStart, + pIdentifierEnd, + pInfo, + pParamInfo + ); + + // Even if the check above failed we have nothing more to do with this token + goto next_token; + } + + // Now we look only at parenthesis chains + + if(!cxxTokenTypeIs(pToken,CXXTokenTypeParenthesisChain)) + { + CXX_DEBUG_PRINT("Not a parenthesis chain: assume this can be skipped"); + goto next_token; + } + + // parentheses at position 0 are always meaningless for us + + if(!pToken->pPrev) + { + CXX_DEBUG_PRINT("Parenthesis at position 0, meaningless"); + goto next_token; + } + + CXX_DEBUG_PRINT("Found interesting parenthesis chain: check for identifier"); + + // parentheses at position 1 they are likely to be macro invocations... + // but we still handle them in case we find nothing else. + + pTopLevelParenthesis = pToken; + + if(cxxTokenTypeIs(pToken->pPrev,CXXTokenTypeIdentifier)) + { + // identifier before + + // This is the most common case. + + CXX_DEBUG_PRINT("Got identifier before parenthesis chain"); + + pIdentifierStart = pToken->pPrev; + pIdentifierEnd = pToken->pPrev; + + if( + cxxParserLookForFunctionSignatureCheckParenthesisAndIdentifier( + pTopLevelParenthesis, + pChain, + pIdentifierStart, + pIdentifierEnd, + pInfo, + pParamInfo + ) + ) + { + // This looks like a good candidate for a function name + parenthesis. + // The scanning process will skip all the following tokens until + // an exit condition is found. + // + // However, there are a couple of very common special cases that is nice to + // handle automatically. + // + // Case 1: + // MACRO(return_type) function(...) + // Case 2: + // MACRO(return_type) variable; + // + // These *could* be handled by the user with -D 'MACRO(x) x' but since + // they are quite common we can't expect the user to look up and define + // all macros for a large project. For this reason we use some heuristics + // to handle these special cases automatically. + if( + // Identifier is the first token of the chain + (!pIdentifierStart->pPrev) && + // The token following the parenthesis is an identifier + pInfo->pParenthesis->pNext && + cxxTokenTypeIs(pInfo->pParenthesis->pNext,CXXTokenTypeIdentifier) && + // There is something after the identifier + pInfo->pParenthesis->pNext->pNext && + ( + // The token following the identifier is again a parenthesis chain + cxxTokenTypeIs(pInfo->pParenthesis->pNext->pNext,CXXTokenTypeParenthesisChain) || + // The token following the identifier is a semicolon + cxxTokenTypeIs(pInfo->pParenthesis->pNext->pNext,CXXTokenTypeSemicolon) + ) && + // The current parenthesis does not contain commas + // (...maybe this check is too much?) + (!cxxTokenChainFirstTokenOfType(pInfo->pParenthesis->pChain,CXXTokenTypeComma)) + ) + { + CXX_DEBUG_PRINT("Found special case of MACRO(return_type) function()/variable"); + pInfo->pParenthesis = NULL; + } + + goto next_token; + } + + // If the check above failed, try different identifier possibilities + CXX_DEBUG_PRINT("Checks for common case failed: trying other options"); + } + + if( + // The previous token is > + cxxTokenTypeIs(pToken->pPrev,CXXTokenTypeGreaterThanSign) && + // We extracted an initial template<*> token chain + // (which has been removed from the currently examined chain) + g_cxx.pTemplateTokenChain && + // We skipped an additional <...> block in *this* chain + bSkippedAngleBrackets + ) + { + // look for template specialisation + CXX_DEBUG_PRINT("Maybe template specialisation?"); + + CXXToken * pSpecBegin = cxxTokenChainSkipBackToStartOfTemplateAngleBracket( + pToken->pPrev + ); + + if( + pSpecBegin && + pSpecBegin->pPrev && + cxxTokenTypeIs(pSpecBegin->pPrev,CXXTokenTypeIdentifier) + ) + { + // template specialisation + + CXX_DEBUG_PRINT("Template specialization looks quite right"); + + pIdentifierStart = pSpecBegin->pPrev; + pIdentifierEnd = pSpecBegin->pPrev; + pInfo->uFlags |= CXXFunctionSignatureInfoTemplateSpecialization; + + pInfo->pTemplateSpecializationStart = pSpecBegin; + pInfo->pTemplateSpecializationEnd = pToken->pPrev; + + if( + cxxParserLookForFunctionSignatureCheckParenthesisAndIdentifier( + pTopLevelParenthesis, + pChain, + pIdentifierStart, + pIdentifierEnd, + pInfo, + pParamInfo + ) + ) + goto next_token; + + } + + CXX_DEBUG_PRINT("Checks for template spec failed: trying other options"); + } + + CXXTokenChain * pIdentifierChain; + + if( + // check for complex parenthesized declarations. + // Keep functions, discard everything else. + // + // Possible cases: + // ret type (*baz)(params) <-- function pointer (variable) + // ret type (*(baz))(params) <-- function pointer (variable) + // ret type (* const (baz))(params) <-- function pointer (variable) + // ret type (*baz())() <-- function returning function pointer + // ret type (*baz(params))(params) <-- function returning function pointer + // ret type (*baz(params)) <-- function returning a pointer + // ret type (*baz(params))[2] <-- function returning a pointer to array + (pIdentifierStart = cxxParserFindFirstPossiblyNestedAndQualifiedIdentifier( + pToken->pChain, + &pIdentifierChain + )) + ) + { + CXX_DEBUG_PRINT( + "Got identifier '%s' inside parenthesis chain", + vStringValue(pIdentifierStart->pszWord) + ); + + // Now pIdentifierStart points at the innermost identifier + // Check if it's followed by a parameter list + if( + pIdentifierStart->pNext && + cxxTokenTypeIs(pIdentifierStart->pNext,CXXTokenTypeParenthesisChain) && + cxxParserTokenChainLooksLikeFunctionParameterList( + pIdentifierStart->pNext->pChain, + NULL + ) + ) + { + CXX_DEBUG_PRINT("Identifier followed by a parameter-like parenthesis chain"); + pIdentifierEnd = pIdentifierStart; + // correct our guess for parenthesis + pTopLevelParenthesis = pIdentifierStart->pNext; + + if( + cxxParserLookForFunctionSignatureCheckParenthesisAndIdentifier( + pTopLevelParenthesis, + pIdentifierChain, + pIdentifierStart, + pIdentifierEnd, + pInfo, + pParamInfo + ) + ) + goto next_token; + + } else { + // Looks more like a function pointer or something else we can't figure out + CXX_DEBUG_LEAVE_TEXT("Identifier NOT followed by a parameter-like parenthesis chain"); + } + + // If the check above failed, try different identifier possibilities + CXX_DEBUG_PRINT("Checks for nested () failed: trying other options"); + } + +next_token: + pToken = pToken->pNext; + } + + if(!pInfo->pParenthesis) + { + CXX_DEBUG_LEAVE_TEXT("No suitable parenthesis chain found"); + return false; // no function, no party + } + + // parenthesis + identifier has been found, this is a function signature. + + // Figure out the remaining parameters. + + CXX_DEBUG_ASSERT(pTopLevelParenthesis,"This should have been set"); + + if(pInfo->pIdentifierStart != pInfo->pIdentifierEnd) + { + // operator case + pInfo->pIdentifierStart->bFollowedBySpace = true; // force proper spacing + CXXToken * t = pInfo->pIdentifierStart->pNext; + while(t != pInfo->pIdentifierEnd) + { + // If a keyword or an identifier followed by another keyword + // or an identifier need a space. + t->bFollowedBySpace = ( + (cxxTokenTypeIsOneOf(t,CXXTokenTypeIdentifier|CXXTokenTypeKeyword)) + && cxxTokenTypeIsOneOf(t->pNext,CXXTokenTypeIdentifier|CXXTokenTypeKeyword) + ) + ? true + : false; + t = t->pNext; + } + } else { + // non operator + pInfo->pIdentifierStart->bFollowedBySpace = false; // force proper spacing + } + + pInfo->pIdentifierEnd->bFollowedBySpace = false; // force proper spacing + + pInfo->pScopeStart = NULL; + + if(cxxParserCurrentLanguageIsCPP()) + { + // Look for scope prefix + CXXToken * pAux = pInfo->pIdentifierStart->pPrev; + + CXX_DEBUG_PRINT("Looking for scope prefix"); + + while(pAux) + { + CXX_DEBUG_PRINT( + "Token '%s' of type 0x%02x", + vStringValue(pAux->pszWord), + pAux->eType + ); + + if(!cxxTokenTypeIs(pAux,CXXTokenTypeMultipleColons)) + break; + pAux = pAux->pPrev; + if(!pAux) + break; + if(!cxxTokenTypeIs(pAux,CXXTokenTypeIdentifier)) + { + // check for template specialization + if(cxxTokenTypeIs(pAux,CXXTokenTypeGreaterThanSign)) + { + // might be something like type X::func() + // (explicit specialization of template class X). + CXXToken * pSmallerThan = cxxTokenChainSkipBackToStartOfTemplateAngleBracket( + pAux + ); + if(!pSmallerThan) + break; // nope + if(!pSmallerThan->pPrev) + break; // nope + if(!cxxTokenTypeIs(pSmallerThan->pPrev,CXXTokenTypeIdentifier)) + break; // nope + // hmm.. probably a template specialisation + pAux = pSmallerThan->pPrev; + pInfo->uFlags |= CXXFunctionSignatureInfoScopeTemplateSpecialization; + } else if(pAux->eType == CXXTokenTypeAngleBracketChain) + { + // same as above, but already condensed (though it should never happen) + if(!pAux->pPrev) + break; // nope + if(!cxxTokenTypeIs(pAux->pPrev,CXXTokenTypeIdentifier)) + break; // nope + // hmm.. probably a template specialisation + pAux = pAux->pPrev; + pInfo->uFlags |= CXXFunctionSignatureInfoScopeTemplateSpecialization; + } else { + // no more scope names + break; + } + } + + CXX_DEBUG_PRINT("Shifting scope start to '%s'",vStringValue(pAux->pszWord)); + + pInfo->pScopeStart = pAux; + + pAux = pAux->pPrev; + } + + CXX_DEBUG_PRINT("Scope prefix search finished"); + + // Look for trailing const and other interesting things that may come after the parenthesis. + + if(pTopLevelParenthesis->pNext) + { + CXX_DEBUG_PRINT( + "Top level parenthesis is followed by '%s' (%s)", + vStringValue(pTopLevelParenthesis->pNext->pszWord), + cxxDebugTypeDecode(pTopLevelParenthesis->pNext->eType) + ); + + if(cxxTokenIsKeyword(pTopLevelParenthesis->pNext,CXXKeywordCONST)) + pInfo->pSignatureConst = pTopLevelParenthesis->pNext; + else + pInfo->pSignatureConst = NULL; + + // Look for = 0 for "pure" modifier + CXXToken * pAssignment = cxxTokenChainNextTokenOfType( + pTopLevelParenthesis, + CXXTokenTypeAssignment + ); + + if(pAssignment && pAssignment->pNext) + { + if( + cxxTokenTypeIs(pAssignment->pNext,CXXTokenTypeNumber) && + (strcmp(vStringValue(pAssignment->pNext->pszWord),"0") == 0) + ) + pInfo->uFlags |= CXXFunctionSignatureInfoPure; + else if(cxxTokenTypeIs(pAssignment->pNext,CXXTokenTypeKeyword)) + { + if(pAssignment->pNext->eKeyword == CXXKeywordDEFAULT) + pInfo->uFlags |= CXXFunctionSignatureInfoDefault; + if(pAssignment->pNext->eKeyword == CXXKeywordDELETE) + pInfo->uFlags |= CXXFunctionSignatureInfoDelete; + } + } + + CXXToken * pIdentOrKeyword = cxxTokenChainNextTokenOfType( + pTopLevelParenthesis, + CXXTokenTypeIdentifier | CXXTokenTypeKeyword + ); + + while(pIdentOrKeyword) + { + // override is a keyword only in specific contexts so we handle it as identifier + if(cxxTokenTypeIs(pIdentOrKeyword,CXXTokenTypeKeyword)) + { + if(pIdentOrKeyword->eKeyword == CXXKeywordVOLATILE) + pInfo->uFlags |= CXXFunctionSignatureInfoVolatile; + else if(pIdentOrKeyword->eKeyword == CXXKeywordTRY) + pInfo->uFlags |= CXXFunctionSignatureInfoFunctionTryBlock; + } else { + // The "final" keyword is actually disabled in most contexts so we handle + // it as identifier. "override is always handled as identifier. + if(strcmp(vStringValue(pIdentOrKeyword->pszWord),"final") == 0) + pInfo->uFlags |= CXXFunctionSignatureInfoFinal; + else if(strcmp(vStringValue(pIdentOrKeyword->pszWord),"override") == 0) + pInfo->uFlags |= CXXFunctionSignatureInfoOverride; + } + + pIdentOrKeyword = cxxTokenChainNextTokenOfType( + pIdentOrKeyword, + CXXTokenTypeIdentifier | CXXTokenTypeKeyword + ); + } + } else { + pInfo->pSignatureConst = NULL; + } + } else { + pInfo->pSignatureConst = NULL; + } + + // Check return type + if(pInfo->pIdentifierChain != pChain) + { + // Nested parentheses. In this case the type name is the whole chain (excluding + // the identifier and the signature). + CXX_DEBUG_PRINT("Nested parentheses, probably a function returning pointers"); + pInfo->pTypeStart = cxxTokenChainFirst(pChain); + pInfo->pTypeEnd = pToken ? pToken->pPrev : cxxTokenChainLast(pChain); + pInfo->bTypeContainsIdentifierScopeAndSignature = true; + } else { + pToken = pInfo->pScopeStart ? pInfo->pScopeStart : pInfo->pIdentifierStart; + + if(pToken->pPrev) + { + CXXToken * pParenthesisOrConst = pInfo->pSignatureConst ? + pInfo->pSignatureConst : pInfo->pParenthesis; + if( + cxxParserCurrentLanguageIsCPP() && + cxxTokenTypeIs(pToken->pPrev,CXXTokenTypeKeyword) && + (pToken->pPrev->eKeyword == CXXKeywordAUTO) && + pParenthesisOrConst->pNext && + cxxTokenTypeIs( + pParenthesisOrConst->pNext, + CXXTokenTypePointerOperator + ) && + pParenthesisOrConst->pNext->pNext && + (!cxxTokenTypeIsOneOf( + pParenthesisOrConst->pNext->pNext, + CXXTokenTypeSemicolon | CXXTokenTypeOpeningBracket + )) + ) + { + // looks like trailing return type + // auto f() -> int; + // auto f() -> int { + pInfo->pTypeStart = pParenthesisOrConst->pNext->pNext; + pInfo->pTypeEnd = pInfo->pTypeStart; + while( + pInfo->pTypeEnd->pNext && + (!cxxTokenTypeIsOneOf( + pInfo->pTypeEnd->pNext, + CXXTokenTypeSemicolon | CXXTokenTypeOpeningBracket + )) + ) + pInfo->pTypeEnd = pInfo->pTypeEnd->pNext; + } else { + // probably normal return type + pInfo->pTypeEnd = pToken->pPrev; + pInfo->pTypeStart = cxxTokenChainFirst(pChain); + + // Handle the common special case of + // + // MACRO(return_type) function() + // + + if( + cxxTokenTypeIs(pInfo->pTypeEnd,CXXTokenTypeParenthesisChain) && + (pInfo->pTypeEnd->pChain->iCount >= 3) && + (pInfo->pTypeEnd->pPrev == pInfo->pTypeStart) && + cxxTokenTypeIs(pInfo->pTypeStart,CXXTokenTypeIdentifier) + ) + { + CXX_DEBUG_PRINT("Return type seems to be embedded in a macro"); + pInfo->pTypeStart = cxxTokenChainFirst(pInfo->pTypeEnd->pChain)->pNext; + pInfo->pTypeEnd = cxxTokenChainLast(pInfo->pTypeEnd->pChain)->pPrev; + } + } + } else { + pInfo->pTypeEnd = NULL; + pInfo->pTypeStart = NULL; + } + pInfo->bTypeContainsIdentifierScopeAndSignature = false; + } + +#if 0 + while( + (pInfo->pTypeStart != pInfo->pTypeEnd) && + cxxTokenTypeIs(pInfo->pTypeStart,CXXTokenTypeKeyword) && + cxxKeywordExcludeFromTypeNames(pInfo->pTypeStart->eKeyword) + ) + pInfo->pTypeStart = pInfo->pTypeStart->pNext; +#endif + + CXX_DEBUG_LEAVE_TEXT("Found function signature"); + return true; +} + + +// +// Emit a function tag. +// +// WARNING: This function is destructive: it removes the scope and +// identifier tokens from the chain. It will also move the parenthesis +// around (but will keep it as it contains the parameter definitions). +// +// Returns the number of scopes pushed if CXXEmitFunctionTagsPushScopes +// is present in uOptions and 0 otherwise. +// +int cxxParserEmitFunctionTags( + CXXFunctionSignatureInfo * pInfo, + unsigned int uTagKind, + unsigned int uOptions, + int * piCorkQueueIndex, + int * piCorkQueueIndexFQ + ) +{ + CXX_DEBUG_ENTER(); + + int iScopesPushed = 0; + + if(piCorkQueueIndex) + *piCorkQueueIndex = CORK_NIL; + if(piCorkQueueIndexFQ) + *piCorkQueueIndexFQ = CORK_NIL; + + enum CXXScopeType eOuterScopeType = cxxScopeGetType(); + + bool bPushScopes = uOptions & CXXEmitFunctionTagsPushScopes; + + CXX_DEBUG_PRINT("Scope start is %x, push scope is %d",pInfo->pScopeStart,bPushScopes); + + // We'll be removing the scope and identifier, fix type + if( + pInfo->pTypeStart && + ( + (pInfo->pTypeStart == pInfo->pScopeStart) || + (pInfo->pTypeStart == pInfo->pIdentifierStart) + ) + ) + pInfo->pTypeStart = pInfo->pIdentifierEnd->pNext; + + CXX_DEBUG_ASSERT(pInfo->pTypeEnd != pInfo->pIdentifierEnd,"The type should never end at identifier"); + + if(pInfo->pScopeStart) + { + if(bPushScopes) + { + CXX_DEBUG_PRINT("There is a scope and we're requested to push scopes"); + + // there is a scope + while(pInfo->pScopeStart != pInfo->pIdentifierStart) + { + CXXToken * pScopeId = pInfo->pScopeStart; + + pInfo->pScopeStart = cxxTokenChainNextTokenOfType( + pInfo->pScopeStart, + CXXTokenTypeMultipleColons + ); + + CXX_DEBUG_ASSERT(pInfo->pScopeStart,"We should have found a next token here"); + + pInfo->pScopeStart = pInfo->pScopeStart->pNext; + + cxxTokenChainDestroyRange( + pInfo->pIdentifierChain, + pScopeId->pNext, + pInfo->pScopeStart->pPrev + ); + + cxxTokenChainTake(pInfo->pIdentifierChain,pScopeId); + + + CXX_DEBUG_PRINT("Pushing scope %s",vStringValue(pScopeId->pszWord)); + + cxxScopePush( + pScopeId, + CXXScopeTypeClass, + // WARNING: We don't know if it's really a class! (FIXME?) + CXXScopeAccessUnknown + ); + iScopesPushed++; + } + } else { + cxxTokenChainDestroyRange( + pInfo->pIdentifierChain, + pInfo->pScopeStart, + pInfo->pIdentifierStart->pPrev + ); + } + } + + CXXToken * pIdentifier = cxxTokenChainExtractRange( + pInfo->pIdentifierStart, + pInfo->pIdentifierEnd, + // proper spacing has been already ensured + // by cxxParserLookForFunctionSignature() + 0 + ); + + cxxTokenChainDestroyRange(pInfo->pIdentifierChain,pInfo->pIdentifierStart,pInfo->pIdentifierEnd); + + CXX_DEBUG_ASSERT( + pIdentifier, + "The identifier should have been non null since the " \ + "indices point inside this chain" + ); + pIdentifier->eType = CXXTokenTypeIdentifier; // force it + + CXX_DEBUG_PRINT("Identifier is '%s'",vStringValue(pIdentifier->pszWord)); + + tagEntryInfo * tag; + CXXToken * pSavedScope; + + if( + (uTagKind == CXXTagKindFUNCTION) && + (g_cxx.uKeywordState & CXXParserKeywordStateSeenFriend) && + (!cxxScopeIsGlobal()) + ) + { + // When "friend" has been seen, and uTagKind == CXXTagKindFUNCTION + // the scope we're using is off by one level. This is the "friend definition" + // trick: + // + // class X + // { + // inline friend void y(){ ... } + // } + // + // Here y() is implicitly defined as a function in the namespace contaning X + // (so it is NOT X::y()). + + pSavedScope = cxxScopeTakeTop(); + tag = cxxTagBegin(uTagKind,pIdentifier); + + // We shouldn't really push back the last scope while the function is being + // parsed, but this is hard to do with the current implementation. We would need + // to store this scope somewhere and push it back after the body of the function + // has been parsed. It can be done, but it's expensive. + // + // Friend declarations are very rare and are implicitly inlined per C++ standard + // so "sane" such declarations are short and usually don't have meaningful tags inside. + + } else { + pSavedScope = NULL; + tag = cxxTagBegin(uTagKind,pIdentifier); + } + + bool bGotTemplate = g_cxx.pTemplateTokenChain && + (g_cxx.pTemplateTokenChain->iCount > 0) && + cxxParserCurrentLanguageIsCPP(); + + if(tag) + { + if(pInfo->pParenthesis->pChain->pTail) + { + // normalize signature + cxxTokenChainNormalizeTypeNameSpacing(pInfo->pParenthesis->pChain); + // make sure we don't emit the trailing space + pInfo->pParenthesis->pChain->pTail->bFollowedBySpace = false; + } + + if(uTagKind == CXXTagKindPROTOTYPE) + { + tag->isFileScope = !isInputHeaderFile(); + } else { + // function definitions + if(eOuterScopeType == CXXScopeTypeNamespace) + { + // in a namespace only static stuff declared in cpp files is file scoped + tag->isFileScope = ( + g_cxx.uKeywordState & CXXParserKeywordStateSeenStatic + ) && ( + !isInputHeaderFile() + ); + } else { + // in a class/struct/union file scope stuff is only in cpp files + tag->isFileScope = !isInputHeaderFile(); + } + } + + vString * pszSignature = cxxTokenChainJoin(pInfo->pParenthesis->pChain,NULL,0); + if(pInfo->pSignatureConst) + { + vStringPut (pszSignature, ' '); + cxxTokenAppendToString(pszSignature,pInfo->pSignatureConst); + } + + CXXToken * pTypeName; + + if(pInfo->pTypeStart) + { + if(pInfo->bTypeContainsIdentifierScopeAndSignature) + { + CXX_DEBUG_PRINT("Type contains identifier and scope"); + // Special case: the type contains the identifier and parenthesis + // (generally things like int (*foo(void))[2] or similar). + + // Scope and identifier have already been removed. + // Remove the parenthesis, temporarily. + if(pInfo->pTypeStart == pInfo->pParenthesis) + pInfo->pTypeStart = pInfo->pParenthesis->pNext; + if(pInfo->pTypeEnd == pInfo->pParenthesis) + pInfo->pTypeEnd = pInfo->pParenthesis->pPrev; + + if(pInfo->pTypeStart && pInfo->pTypeEnd) + { + CXXToken * pTokenBeforeParenthesis = pInfo->pParenthesis->pPrev; + cxxTokenChainTake(pInfo->pParenthesisContainerChain,pInfo->pParenthesis); + + pTypeName = cxxTagCheckAndSetTypeField(pInfo->pTypeStart,pInfo->pTypeEnd); + + cxxTokenChainInsertAfter( + pInfo->pParenthesisContainerChain, + pTokenBeforeParenthesis, + pInfo->pParenthesis + ); + } else { + pTypeName = NULL; + } + } else { + pTypeName = cxxTagCheckAndSetTypeField(pInfo->pTypeStart,pInfo->pTypeEnd); + } + } else { + pTypeName = NULL; + } + + if(pszSignature) + tag->extensionFields.signature = vStringValue(pszSignature); + + bool bIsEmptyTemplate; + + if(bGotTemplate) + { + bIsEmptyTemplate = g_cxx.pTemplateTokenChain->iCount == 2; + + if(pInfo->pTemplateSpecializationStart) + { + CXX_DEBUG_ASSERT(pInfo->pTemplateSpecializationEnd,"Bug"); + cxxTokenChainNormalizeTypeNameSpacingInRange( + pInfo->pTemplateSpecializationStart, + pInfo->pTemplateSpecializationEnd + ); + // make sure we don't emit the trailing space + pInfo->pTemplateSpecializationStart->bFollowedBySpace = false; + + CXXToken * pToken = cxxTokenChainExtractRange( + pInfo->pTemplateSpecializationStart, + pInfo->pTemplateSpecializationEnd, + 0 + ); + + // Tricky. We append it to the specialization chain which will + // be then used by cxxTagHandleTemplateFileds() + if(pToken) + { + if(g_cxx.pTemplateSpecializationTokenChain) + cxxTokenChainClear(g_cxx.pTemplateSpecializationTokenChain); + else + g_cxx.pTemplateSpecializationTokenChain = cxxTokenChainCreate(); + cxxTokenChainAppend(g_cxx.pTemplateSpecializationTokenChain,pToken); + } + } + + cxxTagHandleTemplateFields(); + } else { + bIsEmptyTemplate = false; + } + + vString * pszProperties = NULL; + + if(cxxTagFieldEnabled(CXXTagFieldProperties)) + { + unsigned int uProperties = 0; + + if(g_cxx.uKeywordState & CXXParserKeywordStateSeenVirtual) + uProperties |= CXXTagPropertyVirtual; + if(g_cxx.uKeywordState & CXXParserKeywordStateSeenStatic) + uProperties |= CXXTagPropertyStatic; + if(g_cxx.uKeywordState & CXXParserKeywordStateSeenInline) + uProperties |= CXXTagPropertyInline; + if(g_cxx.uKeywordState & CXXParserKeywordStateSeenExplicit) + uProperties |= CXXTagPropertyExplicit; // FIXME: Handle "CXXTagPropertyConstructor"? + if(g_cxx.uKeywordState & CXXParserKeywordStateSeenExtern) + uProperties |= CXXTagPropertyExtern; + if(g_cxx.uKeywordState & CXXParserKeywordStateSeenAttributeDeprecated) + uProperties |= CXXTagPropertyDeprecated; + if(pInfo->pSignatureConst) + uProperties |= CXXTagPropertyConst; + if(pInfo->uFlags & CXXFunctionSignatureInfoPure) + uProperties |= CXXTagPropertyPure | CXXTagPropertyVirtual; + if(pInfo->uFlags & CXXFunctionSignatureInfoOverride) + uProperties |= CXXTagPropertyOverride | CXXTagPropertyVirtual; + if(pInfo->uFlags & CXXFunctionSignatureInfoFinal) + uProperties |= CXXTagPropertyFinal | CXXTagPropertyVirtual; + if(pInfo->uFlags & CXXFunctionSignatureInfoDefault) + uProperties |= CXXTagPropertyDefault; + if(pInfo->uFlags & CXXFunctionSignatureInfoDelete) + uProperties |= CXXTagPropertyDelete; + if(pInfo->uFlags & CXXFunctionSignatureInfoVolatile) + uProperties |= CXXTagPropertyVolatile; + if(pInfo->uFlags & CXXFunctionSignatureInfoFunctionTryBlock) + uProperties |= CXXTagPropertyFunctionTryBlock; + if(pInfo->uFlags & CXXFunctionSignatureInfoScopeTemplateSpecialization) + uProperties |= CXXTagPropertyScopeTemplateSpecialization | + CXXTagPropertyTemplateSpecialization; + if((pInfo->uFlags & CXXFunctionSignatureInfoTemplateSpecialization) || bIsEmptyTemplate) + uProperties |= CXXTagPropertyTemplateSpecialization; + + pszProperties = cxxTagSetProperties(uProperties); + } + + int iCorkQueueIndex = cxxTagCommit(piCorkQueueIndexFQ); + + if(piCorkQueueIndex) + *piCorkQueueIndex = iCorkQueueIndex; + + if(pszSignature) + vStringDelete(pszSignature); + + if(pszProperties) + vStringDelete(pszProperties); + + if(pTypeName) + cxxTokenDestroy(pTypeName); + } + + if(pSavedScope) + cxxScopePushTop(pSavedScope); + +#ifdef CXX_DO_DEBUGGING + if(tag) + { + if(uTagKind == CXXTagKindFUNCTION) + CXX_DEBUG_PRINT("Emitted function '%s'",vStringValue(pIdentifier->pszWord)); + else + CXX_DEBUG_PRINT("Emitted prototype '%s'",vStringValue(pIdentifier->pszWord)); + } +#endif + + if(bPushScopes) + { + cxxScopePush(pIdentifier, + (uTagKind == CXXTagKindPROTOTYPE)? CXXScopeTypePrototype: CXXScopeTypeFunction, + CXXScopeAccessUnknown); + iScopesPushed++; + } else { + cxxTokenDestroy(pIdentifier); + } + + if( + tag && + bGotTemplate && + cxxTagKindEnabled(CXXTagCPPKindTEMPLATEPARAM) + ) + cxxParserEmitTemplateParameterTags(); + + CXX_DEBUG_LEAVE(); + return iScopesPushed; +} + +// +// This is called at block level upon encountering an opening bracket, +// when we are not in a function. The current block chain almost certainly +// contains a function signature. +// +// This function attempts to extract the function name, emit it as a tag +// and push all the necessary scopes for the next block. It returns the number +// of scopes pushed. +// +// When the returned number of scopes is 0 then no function has been found. +// +int cxxParserExtractFunctionSignatureBeforeOpeningBracket( + CXXFunctionSignatureInfo * pInfo, + int * piCorkQueueIndex, + int * piCorkQueueIndexFQ + ) +{ + CXX_DEBUG_ENTER(); + +#ifdef CXX_DO_DEBUGGING + vString * pChain = cxxTokenChainJoin(g_cxx.pTokenChain,NULL,0); + CXX_DEBUG_PRINT("Looking for function in '%s'",vStringValue(pChain)); + vStringDelete(pChain); +#endif + + // Note that the token chain ALWAYS contains the final delimiter here. + + CXX_DEBUG_ASSERT( + g_cxx.pTokenChain->iCount > 0, + "There should be at least the terminator here!" + ); + CXX_DEBUG_ASSERT( + cxxTokenChainLast(g_cxx.pTokenChain)->eType == CXXTokenTypeOpeningBracket, + "We should have been called when pointing on an opening bracket!" + ); + + cxxTokenChainDestroyLast(g_cxx.pTokenChain); + + CXXTypedVariableSet oParamInfo; + bool bParams = cxxTagKindEnabled(CXXTagKindPARAMETER); + + if(!cxxParserLookForFunctionSignature(g_cxx.pTokenChain,pInfo,bParams?&oParamInfo:NULL)) + { + CXX_DEBUG_LEAVE_TEXT("No parenthesis found: no function"); + return 0; + } + + // Note that emitting the tag is ok even if 'friend' has been seen, + // but the scope will be adjusted inside cxxParserEmitFunctionTags() + + int iScopesPushed = cxxParserEmitFunctionTags( + pInfo, + CXXTagKindFUNCTION, + CXXEmitFunctionTagsPushScopes, + piCorkQueueIndex, + piCorkQueueIndexFQ + ); + + if(bParams) + cxxParserEmitFunctionParameterTags(&oParamInfo); + + CXX_DEBUG_LEAVE(); + return iScopesPushed; +} + +// This function *may* change the token chain +void cxxParserEmitFunctionParameterTags(CXXTypedVariableSet * pInfo) +{ + // emit parameters + CXX_DEBUG_ENTER(); + + unsigned int i = 0; + while(i < pInfo->uCount) + { + tagEntryInfo * tag = cxxTagBegin( + CXXTagKindPARAMETER, + pInfo->aIdentifiers[i] + ); + + if(!tag) + break; + + CXXToken * pTypeName; + + if(pInfo->aTypeStarts[i] && pInfo->aTypeEnds[i]) + { + // This is tricky. + // We know that the declaration contains the identifier. + // We don't want the identifier to appear in the type name. + // So we have to remove it from the chain (eventually recursively if there + // are nested parentheses). + // However the declaration might start or end with the identifier + // and in that case we would be effectively breaking the type chain. + // Work around it. + + CXXToken * pTypeStart = pInfo->aTypeStarts[i]; + CXXToken * pTypeEnd = pInfo->aTypeEnds[i]; + + if(pTypeStart != pTypeEnd) + { + if(pTypeStart == pInfo->aIdentifiers[i]) + pTypeStart = pTypeStart->pNext; + else if(pTypeEnd == pInfo->aIdentifiers[i]) + pTypeEnd = pTypeEnd->pPrev; + + cxxTokenChainTakeRecursive(pInfo->pChain,pInfo->aIdentifiers[i]); + + pTypeName = cxxTagCheckAndSetTypeField( + pTypeStart, + pTypeEnd + ); + } else { + // The declaration contains only the identifier! + pTypeName = NULL; + } + } else { + pTypeName = NULL; + } + tag->extensionFields.nth = i; + + tag->isFileScope = true; + + if (pInfo->uAnonymous & (0x1u << i)) + markTagExtraBit(tag, XTAG_ANONYMOUS); + + cxxTagCommit(NULL); + + if(pTypeName) + { + cxxTokenDestroy(pInfo->aIdentifiers[i]); + cxxTokenDestroy(pTypeName); + } + + i++; + } + CXX_DEBUG_LEAVE(); +} + + + +// +// This function checks if the specified token chain looks like a +// non K&R style function parameter list, eventually with default arguments +// and such. +// +// If pParamInfo is non NULL then the function will also gather +// informations about the parameters and store them. +// +bool cxxParserTokenChainLooksLikeFunctionParameterList( + CXXTokenChain * tc, + CXXTypedVariableSet * pParamInfo + ) +{ + CXX_DEBUG_ENTER(); + CXX_DEBUG_ASSERT( + tc->iCount >= 2, + "At least initial and final parenthesis should be there" + ); + + CXX_DEBUG_ASSERT( + (cxxTokenChainFirst(tc)->eType == CXXTokenTypeOpeningParenthesis) && + (cxxTokenChainLast(tc)->eType == CXXTokenTypeClosingParenthesis), + "The first and last token should be parentheses here" + ); + + if(pParamInfo) + { + pParamInfo->uCount = 0; + pParamInfo->pChain = tc; + } + + if(tc->iCount == 2) + { + CXX_DEBUG_LEAVE_TEXT("Empty signature is valid for a function"); + return true; + } + + CXXToken * t = cxxTokenChainAt(tc,1); + + bool bIsCPP = cxxParserCurrentLanguageIsCPP(); + + for(;;) + { + // Check every parameter. + // + // Possibilities: + // + // type variable + // type /* variable omitted */ + // type variable[..] + // type variable:bits + // type (*variable)(args) + // type ((*variable)(args)) + // = default <-- C++ only + // ... <-- vararg + // + + CXXToken * pStart = t; + + // First token must be identifier/keyword, :: or ... + if(!cxxTokenTypeIsOneOf( + t, + CXXTokenTypeIdentifier | CXXTokenTypeKeyword | + CXXTokenTypeMultipleDots | CXXTokenTypeMultipleColons + )) + { + CXX_DEBUG_LEAVE_TEXT( + "Token '%s' is something that is not a identifier, keyword, :: or ...", + vStringValue(t->pszWord) + ); + return false; + } + +#define TOKENS_THAT_SHOULD_NOT_APPEAR_IN_SIGNATURE_BEFORE_ASSIGNMENT \ + ( \ + CXXTokenTypePointerOperator | \ + CXXTokenTypeOperator | \ + CXXTokenTypeDotOperator | \ + CXXTokenTypeNumber | \ + CXXTokenTypeStringConstant | \ + CXXTokenTypeCharacterConstant | \ + CXXTokenTypeAngleBracketChain | \ + CXXTokenTypeSingleColon \ + ) + +try_again: + t = cxxTokenChainNextTokenOfType( + t, + CXXTokenTypeClosingParenthesis | CXXTokenTypeComma | + CXXTokenTypeAssignment | CXXTokenTypeSmallerThanSign | + CXXTokenTypeGreaterThanSign | CXXTokenTypeParenthesisChain | + TOKENS_THAT_SHOULD_NOT_APPEAR_IN_SIGNATURE_BEFORE_ASSIGNMENT + ); + + CXX_DEBUG_ASSERT(t,"We should have found the closing parenthesis here"); + + if(cxxTokenTypeIs(t,CXXTokenTypeParenthesisChain)) + { + CXX_DEBUG_PRINT("Found parenthesis chain"); + // Either part of function pointer declaration or a very ugly variable decl + // Examples are: + // type (*name)(args) + // type ((*name)(args)) + // type (*name) + // type (&name) + // type (&name)[something] + // ... + // + // FIXME: This check should be stricter (?) + if( + ( + !cxxTokenChainFirstTokenOfType( + t->pChain, + TOKENS_THAT_SHOULD_NOT_APPEAR_IN_SIGNATURE_BEFORE_ASSIGNMENT + ) + ) && ( + cxxTokenChainFirstTokenOfType( + t->pChain, + CXXTokenTypeStar | CXXTokenTypeAnd + ) || // part of (*name) or (&name) + cxxParserTokenChainLooksLikeFunctionParameterList( + t->pChain, + NULL + ) || // (args) + !cxxTokenChainFirstTokenNotOfType( + t->pChain, + CXXTokenTypeOpeningParenthesis | + CXXTokenTypeParenthesisChain | + CXXTokenTypeClosingParenthesis + ) // ((whatever)(whatever)) + ) + ) + goto try_again; + + CXX_DEBUG_LEAVE_TEXT( + "Found a parenthesis chain that doesn't belong to a function parameters list" + ); + return false; + } + + if(cxxTokenTypeIs(t,CXXTokenTypeSmallerThanSign)) + { + CXX_DEBUG_PRINT("Maybe template?"); + + t = cxxTokenChainSkipToEndOfTemplateAngleBracket(t); + + if(!t) + { + CXX_DEBUG_LEAVE_TEXT( + "Either not a function declaration or unbalanced " \ + "template angle brackets" + ); + return false; + } + + goto try_again; + } + + if(cxxTokenTypeIs(t,CXXTokenTypeGreaterThanSign)) + { + CXX_DEBUG_LEAVE_TEXT("Unbalanced > (a < should have been found before)"); + return false; + } + + if(cxxTokenTypeIsOneOf( + t, + TOKENS_THAT_SHOULD_NOT_APPEAR_IN_SIGNATURE_BEFORE_ASSIGNMENT + )) + { + CXX_DEBUG_LEAVE_TEXT( + "Token '%s' is something that doesn't belong to a function " \ + "parameter list", + vStringValue(t->pszWord) + ); + return false; + } + + // closing parenthesis, assignment or comma + + if(pParamInfo && (t->pPrev != pStart)) + { + // FIXME: This may break in some special macro cases? + if(pParamInfo->uCount < CXX_TYPED_VARIABLE_SET_ITEM_COUNT) + { + // locate identifier + + CXXToken * pIdentifier = NULL; + + if(cxxTokenTypeIs(t->pPrev,CXXTokenTypeIdentifier)) + { + // type var + pIdentifier = t->pPrev; + } else if(t->pPrev->pPrev) + { + CXXToken *pNonSquareParenthesis = cxxTokenChainPreviousTokenNotOfType( + t, + CXXTokenTypeSquareParenthesisChain + ); + + bool bPrevIsSquareParenthesis = ( + pNonSquareParenthesis && + (pNonSquareParenthesis != t->pPrev) + ); + + if( + bPrevIsSquareParenthesis && + cxxTokenTypeIs(pNonSquareParenthesis,CXXTokenTypeIdentifier) + ) + { + // type var[] + // type var[]...[] + pIdentifier = pNonSquareParenthesis; + } else if( + bPrevIsSquareParenthesis && + cxxTokenTypeIs(pNonSquareParenthesis,CXXTokenTypeParenthesisChain) && + (pIdentifier = cxxTokenChainFirstTokenOfType( + pNonSquareParenthesis->pChain, + CXXTokenTypeIdentifier + )) + ) + { + // type (...var)[] + } else if( + cxxTokenTypeIs(t->pPrev,CXXTokenTypeNumber) && + cxxTokenTypeIs(t->pPrev->pPrev,CXXTokenTypeIdentifier) + ) + { + // type var:bits + pIdentifier = t->pPrev->pPrev; + } else if( + cxxTokenTypeIs(t->pPrev,CXXTokenTypeParenthesisChain) && + ( + ( + // type (*name)(args) + cxxTokenTypeIs( + t->pPrev->pPrev, + CXXTokenTypeParenthesisChain + ) && + (pIdentifier = cxxTokenChainLastPossiblyNestedTokenOfType( + t->pPrev->pPrev->pChain, + CXXTokenTypeIdentifier, NULL + )) && + pIdentifier->pPrev && + cxxTokenTypeIs(pIdentifier->pPrev,CXXTokenTypeStar) + ) || ( + // type (*&name) + (pIdentifier = cxxTokenChainLastPossiblyNestedTokenOfType( + t->pPrev->pChain, + CXXTokenTypeIdentifier, NULL + )) && + pIdentifier->pPrev && + cxxTokenTypeIsOneOf( + pIdentifier->pPrev, + CXXTokenTypeStar | CXXTokenTypeAnd + ) + ) + ) + ) + { + // type (*ptr)(args) + // pIdentifier already set above + // FIXME: Check this better? + } + } + + if(pIdentifier || isXtagEnabled(XTAG_ANONYMOUS)) + { + pParamInfo->aTypeStarts[pParamInfo->uCount] = pStart; + pParamInfo->aTypeEnds[pParamInfo->uCount] = t->pPrev; + pParamInfo->uAnonymous &= ~(0x1u << pParamInfo->uCount); + if(!pIdentifier) + { + /* This block handles parameter having no name lie + * + * void f(int *); + */ + pIdentifier = cxxTokenCreateAnonymousIdentifier(CXXTagKindPARAMETER); + pIdentifier->iLineNumber = t->pPrev->iLineNumber; + pIdentifier->oFilePosition = t->pPrev->oFilePosition; + pParamInfo->uAnonymous |= (0x1u << pParamInfo->uCount); + } + pParamInfo->aIdentifiers[pParamInfo->uCount] = pIdentifier; + pParamInfo->uCount++; + +#ifdef CXX_DO_DEBUGGING + CXXToken * pDecl = cxxTokenChainExtractRange(pStart,t->pPrev,0); + CXX_DEBUG_PRINT( + "Found parameter '%s' in '%s'", + vStringValue(pIdentifier->pszWord), + vStringValue(pDecl->pszWord) + ); + cxxTokenDestroy(pDecl); + CXX_DEBUG_ASSERT( + cxxTokenChainFindToken(pParamInfo->pChain,pStart) >= 0, + "The start token must be in the chain" + ); + CXX_DEBUG_ASSERT( + cxxTokenChainFindToken(pParamInfo->pChain,t->pPrev) >= 0, + "The end token must be in the chain" + ); +#endif + } + } else { + pParamInfo = NULL; // reset so condition will be faster to check + } + } else if (pParamInfo + && (pParamInfo->uCount < CXX_TYPED_VARIABLE_SET_ITEM_COUNT) + && (!cxxTokenIsKeyword(pStart, CXXKeywordVOID)) + && (!cxxTokenTypeIs(pStart,CXXTokenTypeMultipleDots)) + && isXtagEnabled(XTAG_ANONYMOUS)) { + /* This block handles parameter having no name like + * + * int f (int); + * + * In C language, you will find such a thing in a prototype. + * In C++ language, you will find it even in a function definition. + * + */ + CXXToken * pFakeStart = cxxTokenCopy(pStart); + CXXToken * pFakeId = cxxTokenCreateAnonymousIdentifier(CXXTagKindPARAMETER); + pFakeId->iLineNumber = pStart->iLineNumber; + pFakeId->oFilePosition = pStart->oFilePosition; + + pFakeStart->pNext = pFakeId; + pFakeId->pPrev = pFakeStart; + + pParamInfo->aTypeStarts[pParamInfo->uCount] = pFakeStart; + pParamInfo->aTypeEnds[pParamInfo->uCount] = pFakeId; + pParamInfo->aIdentifiers[pParamInfo->uCount] = pFakeId; + pParamInfo->uAnonymous |= (0x1u << pParamInfo->uCount); + pParamInfo->uCount++; + + PARSER_TRASH_BOX (pFakeStart, cxxTokenDestroy); + /* pFakeId may be destroyed via pParamInfo->aIdentifiers[i]. */ + } + + if(cxxTokenTypeIs(t,CXXTokenTypeClosingParenthesis)) + { + CXX_DEBUG_LEAVE_TEXT("Found closing parenthesis, it's OK"); + return true; + } + + if(cxxTokenTypeIs(t,CXXTokenTypeComma)) + { + // ok, go ahead + CXX_DEBUG_PRINT("Found comma"); + t = t->pNext; + continue; + } + + // assignment. + if(!bIsCPP) + { + CXX_DEBUG_LEAVE_TEXT( + "Found assignment, this doesn't look like valid C function parameter list" + ); + return false; + } + + CXX_DEBUG_PRINT("Found assignment"); + + t = cxxTokenChainNextTokenOfType(t,CXXTokenTypeClosingParenthesis | CXXTokenTypeComma); + + CXX_DEBUG_ASSERT(t,"We should have found the closing parenthesis here"); + + if(cxxTokenTypeIs(t,CXXTokenTypeClosingParenthesis)) + { + CXX_DEBUG_LEAVE_TEXT("Found closing parenthesis, it's OK"); + return true; + } + + // ok, comma + t = t->pNext; + } + + // not reached + CXX_DEBUG_LEAVE(); + return true; +} diff --git a/ctags/parsers/cxx/cxx_parser_internal.h b/ctags/parsers/cxx/cxx_parser_internal.h new file mode 100644 index 0000000000..78ed5f237d --- /dev/null +++ b/ctags/parsers/cxx/cxx_parser_internal.h @@ -0,0 +1,388 @@ +#ifndef ctags_cxx_parser_internal_h_ +#define ctags_cxx_parser_internal_h_ +/* +* Copyright (c) 2016, Szymon Tomasz Stefanek +* +* This source code is released for free distribution under the terms of the +* GNU General Public License version 2 or (at your option) any later version. +* +* This module contains functions for parsing and scanning C++ source files +*/ + +#include "general.h" + +#include "parse.h" +#include "ptrarray.h" + +#include "cxx_tag.h" +#include "cxx_keyword.h" +#include "cxx_token.h" +#include "cxx_token_chain.h" + +// +// CXX parser internal declarations. +// This file is included only by cxx_parser_*.c +// + +// CXX parser language. We use a specific enum and not langType +// since we want to be able to use it in bit fields. +typedef enum _CXXLanguage +{ + CXXLanguageC = 1, + CXXLanguageCPP = (1 << 1), + CXXLanguageCUDA = (1 << 2) +} CXXLanguage; + +// cxx_parser_tokenizer.c +bool cxxParserParseNextToken(void); +void cxxParserUngetCurrentToken(void); + +// cxx_parser_lambda.c +CXXToken * cxxParserOpeningBracketIsLambda(void); +bool cxxParserHandleLambda(CXXToken * pParenthesis); + +// cxx_parser_block.c +bool cxxParserParseBlock(bool bExpectClosingBracket); +bool cxxParserParseBlockHandleOpeningBracket(void); + +enum CXXExtractVariableDeclarationsFlags +{ + // We are parsing K&R style parameter declarations. + CXXExtractVariableDeclarationsKnRStyleParameters = 1 +}; + +// cxx_parser_variable.c +bool cxxParserExtractVariableDeclarations( + CXXTokenChain * pChain, + unsigned int uFlags + ); + +CXXToken * cxxParserFindFirstPossiblyNestedAndQualifiedIdentifier( + CXXTokenChain * pChain, + CXXTokenChain ** pParentChain + ); + +// cxx_parser_function.c + +bool cxxParserTokenChainLooksLikeFunctionCallParameterSet( + CXXTokenChain * pChain + ); +bool cxxParserTokenChainLooksLikeConstructorParameterSet( + CXXTokenChain * pChain + ); + +typedef enum _CXXFunctionSignatureInfoFlag +{ + // Followed by = 0 + CXXFunctionSignatureInfoPure = 1, + // Followed by = default + CXXFunctionSignatureInfoDefault = (1 << 1), + // Followed by "override" + CXXFunctionSignatureInfoOverride = (1 << 2), + // Followed by "final" + CXXFunctionSignatureInfoFinal = (1 << 3), + // Followed by = delete + CXXFunctionSignatureInfoDelete = (1 << 4), + // Followed by volatile + CXXFunctionSignatureInfoVolatile = (1 << 5), + // Is template specialization a() + CXXFunctionSignatureInfoTemplateSpecialization = (1 << 6), + // Is scope template specialization a::b() + // (implies that this is a template specialization too) + CXXFunctionSignatureInfoScopeTemplateSpecialization = (1 << 7), + // function-try-block: int f() try { ... } catch { ... } + CXXFunctionSignatureInfoFunctionTryBlock = (1 << 8), +} CXXFunctionSignatureInfoFlag; + +// +// Description of a function signature. +// +typedef struct _CXXFunctionSignatureInfo +{ + // The parenthesis token. + // It is always contained in the chain pointed by pParenthesisContainerChain + CXXToken * pParenthesis; + + // The token chain that contains the parenthesis above. May or may not + // be the toplevel chain. + CXXTokenChain * pParenthesisContainerChain; + + // The identifier. It's either a single token (so both pIdentifierStart + // and pIdentifierEnd point to the same token) or multiple tokens starting + // with the "operator" keyword. Spacing of the tokens is adjusted. + // The identifier is always contained in the chain pointed by pIdentifierChain. + CXXToken * pIdentifierStart; + CXXToken * pIdentifierEnd; + + // The chain that pIdentifierStart, pIdentifierEnd and pScopeStart + // belong to. It MAY be a nested chain and it may even be included in the + // range specified by pTypeStart / pTypeEnd below! + CXXTokenChain * pIdentifierChain; + + // Non-NULL if the signature is followed by the "const" keyword + CXXToken * pSignatureConst; + + // Non-NULL if there is a scope before the identifier. + // The scope ends at pIdentifierStart. + // The scope start is always in the chain pointed by pIdentifierChain. + CXXToken * pScopeStart; + + // Non-NULL if a return type has been identified. + CXXToken * pTypeStart; + CXXToken * pTypeEnd; + // There are cases in that the type range defined above may + // contain the identifier, scope and signature ranges. + // This happens, for example, with functions returning + // nasty things, like: + // int (*foo(void))[2] + // It is granted that the scope and identifier are either + // completely included or completely excluded from the type range. + bool bTypeContainsIdentifierScopeAndSignature; + + // Non-NULL if there is a trailing comma after the function. + // This is used for the special case of multiple prototypes in a single + // declaration: + // RetType functionA(...), functionB(...); + CXXToken * pTrailingComma; + + // Template specialization token range, if any. + CXXToken * pTemplateSpecializationStart; + CXXToken * pTemplateSpecializationEnd; + + // Additional informations + unsigned int uFlags; + +} CXXFunctionSignatureInfo; + +int cxxParserMaybeParseKnRStyleFunctionDefinition(void); +int cxxParserExtractFunctionSignatureBeforeOpeningBracket( + CXXFunctionSignatureInfo * pInfo, + int * piCorkQueueIndex, + int * piCorkQueueIndexFQ + ); + +/* This must be smaller than (sizeof(unsigned int) * 8). + * See CXXTypedVariableSet::uAnonymous. */ +#define CXX_TYPED_VARIABLE_SET_ITEM_COUNT 24 + +typedef struct _CXXTypedVariableSet +{ + // The number of parameters found + unsigned int uCount; + + // All the tokens are references to the source chain (do not delete) + CXXTokenChain * pChain; + // The initial tokens of the type + CXXToken * aTypeStarts[CXX_TYPED_VARIABLE_SET_ITEM_COUNT]; + // The final tokens of the type + CXXToken * aTypeEnds[CXX_TYPED_VARIABLE_SET_ITEM_COUNT]; + // The identifier tokens + CXXToken * aIdentifiers[CXX_TYPED_VARIABLE_SET_ITEM_COUNT]; + + unsigned int uAnonymous; +} CXXTypedVariableSet; + +bool cxxParserTokenChainLooksLikeFunctionParameterList( + CXXTokenChain * tc, + CXXTypedVariableSet * pParamInfo + ); +bool cxxParserLookForFunctionSignature( + CXXTokenChain * pChain, + CXXFunctionSignatureInfo * pInfo, + CXXTypedVariableSet * pParamInfo + ); + +enum CXXEmitFunctionTagsOptions +{ + // Push the scopes defined by the function + CXXEmitFunctionTagsPushScopes = 1 +}; + +int cxxParserEmitFunctionTags( + CXXFunctionSignatureInfo * pInfo, + unsigned int uTagKind, + unsigned int uOptions, + int * piCorkQueueIndex, + int * piCorkQueueIndexFQ + ); + +void cxxParserEmitFunctionParameterTags(CXXTypedVariableSet * pInfo); + +// cxx_parser_typedef.c +bool cxxParserParseGenericTypedef(void); +void cxxParserExtractTypedef( + CXXTokenChain * pChain, + bool bExpectTerminatorAtEnd, + bool bGotTemplate + ); + +// cxx_parser_namespace.c +bool cxxParserParseNamespace(void); + +// cxx_parser.c +void cxxParserNewStatement(void); +bool cxxParserSkipToSemicolonOrEOF(void); +bool cxxParserParseToEndOfQualifedName(void); +bool cxxParserParseEnum(void); +bool cxxParserParseClassStructOrUnion( + CXXKeyword eKeyword, + unsigned int uTagKind, + unsigned int uScopeType + ); +bool cxxParserParseAndCondenseCurrentSubchain( + unsigned int uInitialSubchainMarkerTypes, + bool bAcceptEOF, + bool bCanReduceInnerElements + ); +bool cxxParserParseUpToOneOf(unsigned int uTokenTypes, + bool bCanReduceInnerElements); +bool cxxParserParseIfForWhileSwitchCatchParenthesis(void); +bool cxxParserParseTemplatePrefix(void); +CXXTokenChain * cxxParserParseTemplateAngleBracketsToSeparateChain(bool bCaptureTypeParameters); +bool cxxParserParseTemplateAngleBracketsToTemplateChain(void); +void cxxParserEmitTemplateParameterTags(void); +bool cxxParserParseUsingClause(void); +bool cxxParserParseAccessSpecifier(void); +void cxxParserAnalyzeOtherStatement(void); +bool cxxParserParseAndCondenseSubchainsUpToOneOf( + unsigned int uTokenTypes, + unsigned int uInitialSubchainMarkerTypes, + bool bCanReduceInnerElements + ); +void cxxParserMarkEndLineForTagInCorkQueue(int iCorkQueueIndex); +void cxxParserSetEndLineForTagInCorkQueue(int iCorkQueueIndex,unsigned long lEndLine); + +typedef enum _CXXParserKeywordState +{ + // We are parsing a statement that comes right after + // a typedef keyword (so we're parsing the type being typedef'd). + CXXParserKeywordStateSeenTypedef = 1, + // We are parsing a statement that comes right after + // an inline keyword + CXXParserKeywordStateSeenInline = (1 << 1), + // We are parsing a statement that comes right after + // an extern keyword + CXXParserKeywordStateSeenExtern = (1 << 2), + // We are parsing a statement that comes right after + // a static keyword + CXXParserKeywordStateSeenStatic = (1 << 3), + // an "explicit" keyword has been seen + CXXParserKeywordStateSeenExplicit = (1 << 4), + // an "operator" keyword has been seen + CXXParserKeywordStateSeenOperator = (1 << 5), + // "virtual" has been seen + CXXParserKeywordStateSeenVirtual = (1 << 6), + // "return" has been seen + CXXParserKeywordStateSeenReturn = (1 << 7), + // "mutable" has been seen + CXXParserKeywordStateSeenMutable = (1 << 8), + // "const" has been seen at block level + CXXParserKeywordStateSeenConst = (1 << 9), + // "volatile" has been seen at block level + CXXParserKeywordStateSeenVolatile = (1 << 10), + // __attribute__((deprecated)) has been seen + CXXParserKeywordStateSeenAttributeDeprecated = (1 << 11), + // "friend" has been seen at block level + CXXParserKeywordStateSeenFriend = (1 << 12), +} CXXParserKeywordState; + +#define CXX_PARSER_MAXIMUM_NESTING_LEVELS 1024 + +typedef struct _CXXParserState +{ + // The current language + CXXLanguage eLanguage; + + // The current language as langType + langType eLangType; + + // The identifier of the CPP language, as indicated by ctags core + langType eCPPLangType; + // The identifier of the C language, as indicated by ctags core + langType eCLangType; + // The identifier of the CUDA language, as indicated by ctags core + langType eCUDALangType; + + // The kind options associated to the current language + kindDefinition * pKindDefinitions; + // The number of kind options, used mainly for checking/debug purposes + unsigned int uKindDefinitionCount; + + // The fields associated to the current language + fieldDefinition * pFieldOptions; + // The number of field options, used mainly for checking/debug purposes + unsigned int uFieldOptionCount; + + // The current token chain + CXXTokenChain * pTokenChain; + + // The last template token chain we found + // This remains valid within the statement, so it can be used slightly + // after the template has been parsed (i.e. in the class coming after) + CXXTokenChain * pTemplateTokenChain; + + // The last template specialization token chain we found. May be null. + // This pointer, if non null, is valid only if pTemplateTokenChain is non null. + CXXTokenChain * pTemplateSpecializationTokenChain; + + // The array of CXXToken objects that are found to be template + // type parameters and belong to the pTemplateTokenChain above. + // The validity of this array is tied to the validity of + // pTemplateTokenChain above. If there is no pTemplateTokenChain + // then this array is simply invalid (even if not empty) + CXXTypedVariableSet oTemplateParameters; + + // The last token we have extracted. This is always pushed to + // the token chain tail (which will take care of deletion) + CXXToken * pToken; // the token chain tail + + // The parser internally supports a look-ahead of one token. + // It is rarely needed though. + // This is the token that has been "unget" from the token chain tail. + CXXToken * pUngetToken; + + // The last char we have extracted from input + int iChar; + + // Toplevel keyword state. A combination of CXXParserKeywordState flags. + // Please note that the keywords appearing inside a () subchain are NOT marked. + unsigned int uKeywordState; + + // This is set to true when we're parsing a *.cpp file (cpp extension!) + // or we're parsing a header but we have encountered valid C++ constructs that + // definitely confirm we're parsing C++. + bool bConfirmedCPPLanguage; + + // The nesting levels our parser is in. + // + // Note that this is really a kind-of arbitrary measure as the counter + // is increased in certain parser code paths that often lead to recursion. + // It does not necessairly match the real number of stack frames or nested + // brackets/parentheses in the input. + // + // The counter is used to avoid stack overflow when nesting grows too much. + // This usually happens only with erroneous macro usage or broken input. + int iNestingLevels; + +} CXXParserState; + + +// defined in cxx_parser.c +extern CXXParserState g_cxx; + +#define cxxParserCurrentLanguageIs(_eLanguage) \ + (g_cxx.eLanguage == _eLanguage) + +#define cxxParserCurrentLanguageIsOneOf(_eLanguageMask) \ + (((int)(g_cxx.eLanguage)) & (_eLanguageMask)) + +#define cxxParserCurrentLanguageIsCPP() \ + cxxParserCurrentLanguageIs(CXXLanguageCPP) + +#define cxxParserCurrentLanguageIsC() \ + cxxParserCurrentLanguageIs(CXXLanguageC) + +#define cxxParserCurrentLanguageIsCUDA() \ + cxxParserCurrentLanguageIs(CXXLanguageCUDA) + +#endif //!ctags_cxx_parser_internal_h_ diff --git a/ctags/parsers/cxx/cxx_parser_lambda.c b/ctags/parsers/cxx/cxx_parser_lambda.c new file mode 100644 index 0000000000..e12991551d --- /dev/null +++ b/ctags/parsers/cxx/cxx_parser_lambda.c @@ -0,0 +1,332 @@ +/* +* Copyright (c) 2016, Szymon Tomasz Stefanek +* +* This source code is released for free distribution under the terms of the +* GNU General Public License version 2 or (at your option) any later version. +* +* This module contains functions for parsing and scanning C++ source files +*/ +#include "cxx_parser.h" +#include "cxx_parser_internal.h" + +#include "cxx_debug.h" +#include "cxx_keyword.h" +#include "cxx_token.h" +#include "cxx_token_chain.h" +#include "cxx_scope.h" +#include "cxx_tag.h" + +#include "parse.h" +#include "vstring.h" +#include "../cpreprocessor.h" +#include "debug.h" +#include "keyword.h" +#include "read.h" + +#include + +// +// This has to be called when pointing at an opening bracket. +// Returns NULL if it does not look to be a lambda invocation. +// Otherwise it returns the parameter parenthesis token or the +// capture list square parenthesis token if the lambda is +// parameterless. +// +CXXToken * cxxParserOpeningBracketIsLambda(void) +{ + CXX_DEBUG_ENTER(); + + // Lambda syntax variants: + // + // 1) [ capture-list ] ( params ) mutable(opt) exception attr -> ret { body } + // 2) [ capture-list ] ( params ) -> ret { body } + // 3) [ capture-list ] ( params ) { body } + // 4) [ capture-list ] { body } + + // Similar, but not lambda: + // + // 5) type var[] { ... } + // 6) operator [] ( params ) { ... } + + CXX_DEBUG_ASSERT(cxxParserCurrentLanguageIsCPP(),"C++ only"); + + CXXToken * t = g_cxx.pToken->pPrev; + + if(!t) + { + CXX_DEBUG_LEAVE_TEXT("Not a lambda: no token before bracket"); + return NULL; // not a lambda + } + + // Check simple cases first + + // case 4? + if(cxxTokenTypeIs(t,CXXTokenTypeSquareParenthesisChain)) + { + if( + t->pPrev && + cxxTokenTypeIs(t->pPrev,CXXTokenTypeIdentifier) + ) + { + // case 5 + CXX_DEBUG_LEAVE_TEXT("Not a lambda: looks like type var[] { ... }"); + return NULL; + } + + // very likely parameterless lambda + CXX_DEBUG_LEAVE_TEXT("Likely a parameterless lambda"); + return t; + } + + // case 3? + if(cxxTokenTypeIs(t,CXXTokenTypeParenthesisChain)) + { + t = t->pPrev; + if(!t) + { + CXX_DEBUG_LEAVE_TEXT("Not a lambda: nothing before ()"); + return NULL; // can't be + } + + if(!cxxTokenTypeIs(t,CXXTokenTypeSquareParenthesisChain)) + { + CXX_DEBUG_LEAVE_TEXT("Not a lambda: no [] before ()"); + return NULL; // can't be + } + + if( + t->pPrev && + // namely: operator [], operator new[], operator delete[] + cxxTokenTypeIs(t->pPrev,CXXTokenTypeKeyword) + ) + { + // case 6 + CXX_DEBUG_LEAVE_TEXT("Not a lambda: keyword before []"); + return NULL; + } + + CXX_DEBUG_LEAVE_TEXT("Looks like a lambda with parameters"); + return t->pNext; + } + + // Handle the harder cases. + // Look backwards for the square parenthesis chain, but stop at + // tokens that shouldn't be present between the bracket and the + // parenthesis. + t = cxxTokenChainPreviousTokenOfType( + t, + CXXTokenTypeSquareParenthesisChain | + CXXTokenTypeBracketChain | + CXXTokenTypeAssignment | + CXXTokenTypeOperator + ); + + if(!t) + { + CXX_DEBUG_LEAVE_TEXT("Not a lambda: no []"); + return NULL; + } + + if(!cxxTokenTypeIs(t,CXXTokenTypeSquareParenthesisChain)) + { + CXX_DEBUG_LEAVE_TEXT("Not a lambda: no [] before assignment or operator"); + return NULL; + } + + if( + t->pPrev && + // namely: operator [], operator new[], operator delete[] + cxxTokenTypeIs(t->pPrev,CXXTokenTypeKeyword) + ) + { + // case 6 + CXX_DEBUG_LEAVE_TEXT("Not a lambda: keyword before []"); + return NULL; + } + + t = t->pNext; + + if(cxxTokenTypeIs(t,CXXTokenTypeParenthesisChain)) + { + CXX_DEBUG_LEAVE_TEXT("Looks like a lambda (got () after [])"); + return t; + } + + CXX_DEBUG_LEAVE_TEXT("Not a lambda: no () after []"); + return NULL; +} + +// In case of a parameterless lambda (that has no parenthesis) the parameter +// is the capture list token. +bool cxxParserHandleLambda(CXXToken * pParenthesis) +{ + CXX_DEBUG_ENTER(); + + CXX_DEBUG_ASSERT(cxxParserCurrentLanguageIsCPP(),"C++ only"); + + CXXToken * pIdentifier = cxxTokenCreateAnonymousIdentifier(CXXTagKindFUNCTION); + + CXXTokenChain * pSave = g_cxx.pTokenChain; + CXXTokenChain * pNew = cxxTokenChainCreate(); + g_cxx.pTokenChain = pNew; + + tagEntryInfo * tag = cxxTagBegin(CXXTagKindFUNCTION,pIdentifier); + + CXXToken * pAfterParenthesis = pParenthesis ? pParenthesis->pNext : NULL; + + CXXToken * pCaptureList = NULL; + + if(pParenthesis) + { + if(cxxTokenTypeIs(pParenthesis,CXXTokenTypeSquareParenthesisChain)) + { + // form (4) of lambda (see cxxParserOpeningBracketIsLambda()). + pCaptureList = pParenthesis; + } else if( + pParenthesis->pPrev && + cxxTokenTypeIs(pParenthesis->pPrev,CXXTokenTypeSquareParenthesisChain) + ) + { + // other forms of lambda (see cxxParserOpeningBracketIsLambda()). + pCaptureList = pParenthesis->pPrev; + } + } + + if( + pAfterParenthesis && + cxxTokenTypeIs(pAfterParenthesis,CXXTokenTypeKeyword) && + (pAfterParenthesis->eKeyword == CXXKeywordCONST) + ) + pAfterParenthesis = pAfterParenthesis->pNext; + + CXXToken * pTypeStart = NULL; + CXXToken * pTypeEnd; + + if( + pAfterParenthesis && + cxxTokenTypeIs(pAfterParenthesis,CXXTokenTypePointerOperator) && + pAfterParenthesis->pNext && + !cxxTokenTypeIs(pAfterParenthesis->pNext,CXXTokenTypeOpeningBracket) + ) + { + pTypeStart = pAfterParenthesis->pNext; + pTypeEnd = pTypeStart; + while( + pTypeEnd->pNext && + (!cxxTokenTypeIs(pTypeEnd->pNext,CXXTokenTypeOpeningBracket)) + ) + pTypeEnd = pTypeEnd->pNext; + +#if 0 + while( + (pTypeStart != pTypeEnd) && + cxxTokenTypeIs(pTypeStart,CXXTokenTypeKeyword) && + cxxKeywordExcludeFromTypeNames(pTypeStart->eKeyword) + ) + pTypeStart = pTypeStart->pNext; +#endif + } + + int iCorkQueueIndex = CORK_NIL; + int iCorkQueueIndexFQ = CORK_NIL; + + if(tag) + { + tag->isFileScope = true; + + CXXToken * pTypeName; + + markTagExtraBit (tag, XTAG_ANONYMOUS); + + if(pTypeStart) + pTypeName = cxxTagCheckAndSetTypeField(pTypeStart,pTypeEnd); + else + pTypeName = NULL; + + if(pCaptureList && cxxTagFieldEnabled(CXXTagCPPFieldLambdaCaptureList)) + { + CXX_DEBUG_ASSERT(pCaptureList->pChain,"The capture list must be a chain"); + cxxTokenChainCondense(pCaptureList->pChain,0); + CXX_DEBUG_ASSERT( + cxxTokenChainFirst(pCaptureList->pChain), + "Condensation should have created a single token in the chain" + ); + cxxTagSetField( + CXXTagCPPFieldLambdaCaptureList, + vStringValue(cxxTokenChainFirst(pCaptureList->pChain)->pszWord), + false + ); + } + + // FIXME: Properties? + + vString * pszSignature = NULL; + if(cxxTokenTypeIs(pParenthesis,CXXTokenTypeParenthesisChain)) + pszSignature = cxxTokenChainJoin(pParenthesis->pChain,NULL,0); + + if(pszSignature) + tag->extensionFields.signature = vStringValue(pszSignature); + + iCorkQueueIndex = cxxTagCommit(&iCorkQueueIndexFQ); + + if(pTypeName) + cxxTokenDestroy(pTypeName); + + if(pszSignature) + vStringDelete(pszSignature); + } + + cxxScopePush( + pIdentifier, + CXXScopeTypeFunction, + CXXScopeAccessUnknown + ); + + if( + pParenthesis && + cxxTokenTypeIs(pParenthesis,CXXTokenTypeParenthesisChain) && + cxxTagKindEnabled(CXXTagKindPARAMETER) + ) + { + CXXTypedVariableSet oParamInfo; + if(cxxParserTokenChainLooksLikeFunctionParameterList( + pParenthesis->pChain,&oParamInfo + )) + cxxParserEmitFunctionParameterTags(&oParamInfo); + } + + bool bRet = cxxParserParseBlock(true); + + if(iCorkQueueIndex > CORK_NIL) + { + cxxParserMarkEndLineForTagInCorkQueue(iCorkQueueIndex); + if(iCorkQueueIndexFQ > CORK_NIL) + cxxParserMarkEndLineForTagInCorkQueue(iCorkQueueIndexFQ); + } + + cxxScopePop(); + + pNew = g_cxx.pTokenChain; // May have been destroyed and re-created + + g_cxx.pTokenChain = pSave; + g_cxx.pToken = pSave->pTail; + + // change the type of token so following parsing code is not confused too much + g_cxx.pToken->eType = CXXTokenTypeAngleBracketChain; + g_cxx.pToken->pChain = pNew; + + cxxTokenChainClear(pNew); + + CXXToken * t = cxxTokenCreate(); + t->eType = CXXTokenTypeOpeningBracket; + vStringPut (t->pszWord, '{'); + cxxTokenChainAppend(pNew,t); + + t = cxxTokenCreate(); + t->eType = CXXTokenTypeClosingBracket; + vStringPut (t->pszWord, '}'); + cxxTokenChainAppend(pNew,t); + + CXX_DEBUG_LEAVE(); + return bRet; +} diff --git a/ctags/parsers/cxx/cxx_parser_namespace.c b/ctags/parsers/cxx/cxx_parser_namespace.c new file mode 100644 index 0000000000..0eb4bdcfd4 --- /dev/null +++ b/ctags/parsers/cxx/cxx_parser_namespace.c @@ -0,0 +1,345 @@ +/* +* Copyright (c) 2016, Szymon Tomasz Stefanek +* +* This source code is released for free distribution under the terms of the +* GNU General Public License version 2 or (at your option) any later version. +* +* This module contains functions for parsing and scanning C++ source files +*/ +#include "cxx_parser.h" +#include "cxx_parser_internal.h" + +#include "cxx_debug.h" +#include "cxx_keyword.h" +#include "cxx_token.h" +#include "cxx_token_chain.h" +#include "cxx_scope.h" + +#include "parse.h" +#include "vstring.h" +#include "../cpreprocessor.h" +#include "debug.h" +#include "keyword.h" +#include "read.h" + + +#define MAX_NESTED_NAMESPACES 16 + + +bool cxxParserParseNamespace(void) +{ + CXX_DEBUG_ENTER(); + + CXX_DEBUG_ASSERT(cxxParserCurrentLanguageIsCPP(),"This should be called only in C++"); + + /* + Spec is: + + namespace ns_name { declarations } (1) + inline namespace ns_name { declarations } (2) (since C++11) + namespace { declarations } (3) + namespace name = qualified-namespace ; (7) + namespace ns_name::name { (8) (since C++17) + + Note that the using clauses have their own parsing routine and do not end up here. + */ + + // namespace { + // namespace :::: { + // namespace ::::; + // namespace ; + // namespace; + + unsigned int uProperties = 0; + + if(cxxTagFieldEnabled(CXXTagFieldProperties)) + { + if(g_cxx.uKeywordState & CXXParserKeywordStateSeenInline) + uProperties |= CXXTagPropertyInline; + } + + cxxParserNewStatement(); // always a new statement + cppBeginStatement(); // but we're in the middle of it + + int iScopeCount = 0; + + int i; + + struct { int leafnm, fqnm; } aCorkQueueIndices[MAX_NESTED_NAMESPACES]; + for(i=0;ipszWord->buffer); + + CXXToken * pFirstIdentifier = g_cxx.pToken; + CXXToken * pLastIdentifier = g_cxx.pToken; + + if(!cxxParserParseNextToken()) + { + // syntax error, but we tolerate this + CXX_DEBUG_LEAVE_TEXT("EOF in cxxParserParseNextToken"); + return true; // EOF + } + + switch(g_cxx.pToken->eType) + { + case CXXTokenTypeAssignment: + { + // probably namespace alias + CXX_DEBUG_PRINT("Found assignment"); + + if(!cxxParserParseNextToken()) + { + // syntax error, but we tolerate this + CXX_DEBUG_LEAVE_TEXT("EOF in cxxParserParseNextToken"); + return true; // EOF + } + + if(!cxxTokenTypeIsOneOf( + g_cxx.pToken, + CXXTokenTypeIdentifier | CXXTokenTypeMultipleColons + )) + { + CXX_DEBUG_LEAVE_TEXT("Some kind of syntax error here"); + return cxxParserSkipToSemicolonOrEOF(); + } + + CXXToken * pAlias = pFirstIdentifier; + pFirstIdentifier = g_cxx.pToken; + + if(!cxxParserParseToEndOfQualifedName()) + { + CXX_DEBUG_LEAVE_TEXT("Failed to parse the aliased name"); + return cxxParserSkipToSemicolonOrEOF(); + } + + pLastIdentifier = g_cxx.pToken->pPrev; + + tagEntryInfo * tag = cxxTagBegin(CXXTagCPPKindALIAS,pAlias); + + if(tag) + { + // This is highly questionable but well.. it's how old ctags did, so we do. + tag->isFileScope = !isInputHeaderFile(); + + CXXToken * pAliasedName = cxxTokenChainExtractRange( + pFirstIdentifier, + pLastIdentifier, + CXXTokenChainExtractRangeNoTrailingSpaces + ); + + cxxTagSetField( + CXXTagCPPFieldAliasedName, + vStringValue(pAliasedName->pszWord), + false + ); + + cxxTagCommit(NULL); + + cxxTokenDestroy(pAliasedName); + } + + CXX_DEBUG_LEAVE_TEXT("Finished parsing namespace alias"); + return cxxParserSkipToSemicolonOrEOF(); + } + break; + case CXXTokenTypeMultipleColons: + // multi-namespace + CXX_DEBUG_PRINT("Found multiple colons"); + + if(!cxxParserParseToEndOfQualifedName()) + { + CXX_DEBUG_LEAVE_TEXT("Failed to parse the namespace name"); + return cxxParserSkipToSemicolonOrEOF(); + } + + pLastIdentifier = g_cxx.pToken->pPrev; + + CXX_DEBUG_ASSERT( + pFirstIdentifier != pLastIdentifier, + "We expected multiple identifiers here" + ); + + if(!cxxTokenTypeIs(g_cxx.pToken,CXXTokenTypeOpeningBracket)) + { + if(!cxxParserParseUpToOneOf( + CXXTokenTypeOpeningBracket | CXXTokenTypeSemicolon | CXXTokenTypeEOF, + false + )) + { + CXX_DEBUG_LEAVE_TEXT("Failed to parse up to an opening bracket"); + return false; + } + + if(!cxxTokenTypeIs(g_cxx.pToken,CXXTokenTypeOpeningBracket)) + { + // tolerate syntax error + CXX_DEBUG_LEAVE_TEXT("Found semicolon/EOF just after namespace declaration"); + return true; + } + } + break; + case CXXTokenTypeOpeningBracket: + // single name namespace + CXX_DEBUG_PRINT("Found opening bracket"); + break; + case CXXTokenTypeSemicolon: + // tolerate syntax error + CXX_DEBUG_LEAVE_TEXT("Found semicolon just after namespace declaration"); + return true; + break; + case CXXTokenTypeIdentifier: + // Probably some kind of macro + if(!cxxParserParseUpToOneOf( + CXXTokenTypeOpeningBracket | CXXTokenTypeSemicolon | CXXTokenTypeEOF, + false + )) + { + CXX_DEBUG_LEAVE_TEXT("Failed to parse up to an opening bracket"); + return false; + } + + if(!cxxTokenTypeIs(g_cxx.pToken,CXXTokenTypeOpeningBracket)) + { + // tolerate syntax error + CXX_DEBUG_LEAVE_TEXT("Found semicolon/EOF just after namespace declaration"); + return true; + } + break; + default: + CXX_DEBUG_LEAVE_TEXT("Some kind of syntax error here"); + return cxxParserSkipToSemicolonOrEOF(); + break; + } + + CXX_DEBUG_ASSERT( + cxxTokenTypeIs(g_cxx.pToken,CXXTokenTypeOpeningBracket), + "Should have an opening bracket here!" + ); + + CXX_DEBUG_PRINT("Found regular namespace start"); + + CXXToken * t = pFirstIdentifier; + + while(t) + { + tagEntryInfo * tag = cxxTagBegin(CXXTagCPPKindNAMESPACE,t); + + if(tag) + { + // This is highly questionable but well.. it's how old ctags did, so we do. + tag->isFileScope = !isInputHeaderFile(); + + vString * pszProperties = uProperties ? cxxTagSetProperties(uProperties) : NULL; + + int iCorkQueueIndexFQ; + int iCorkQueueIndex = cxxTagCommit(&iCorkQueueIndexFQ); + if(iScopeCount < MAX_NESTED_NAMESPACES) + { + aCorkQueueIndices[iScopeCount].leafnm = iCorkQueueIndex; + aCorkQueueIndices[iScopeCount].fqnm = iCorkQueueIndexFQ; + } + + if(pszProperties) + vStringDelete(pszProperties); + } + + CXXToken * pNext = (t == pLastIdentifier) ? NULL : t->pNext->pNext; + + cxxTokenChainTake(g_cxx.pTokenChain,t); + + cxxScopePush( + t, + CXXScopeTypeNamespace, + CXXScopeAccessUnknown + ); + + iScopeCount++; + + t = pNext; + } + + } else if(cxxTokenTypeIs(g_cxx.pToken,CXXTokenTypeOpeningBracket)) + { + // anonymous namespace + CXX_DEBUG_PRINT("Found anonymous namespace start"); + + CXXToken * t = cxxTokenCreateAnonymousIdentifier(CXXTagCPPKindNAMESPACE); + tagEntryInfo * tag = cxxTagBegin(CXXTagCPPKindNAMESPACE,t); + if(tag) + { + tag->isFileScope = !isInputHeaderFile(); + + markTagExtraBit (tag, XTAG_ANONYMOUS); + + vString * pszProperties = uProperties ? cxxTagSetProperties(uProperties) : NULL; + + int iCorkQueueIndexFQ; + aCorkQueueIndices[0].leafnm = cxxTagCommit(&iCorkQueueIndexFQ); + aCorkQueueIndices[0].fqnm = iCorkQueueIndexFQ; + + if(pszProperties) + vStringDelete(pszProperties); + } + cxxScopePush(t,CXXScopeTypeNamespace,CXXScopeAccessUnknown); + + iScopeCount++; + + } else { + + CXX_DEBUG_LEAVE_TEXT("Some kind of syntax error after namespace declaration"); + return cxxParserSkipToSemicolonOrEOF(); + } + + CXX_DEBUG_ASSERT( + cxxTokenTypeIs(g_cxx.pToken,CXXTokenTypeOpeningBracket), + "Should have an opening bracket here!" + ); + + if(!g_cxx.bConfirmedCPPLanguage) + { + CXX_DEBUG_PRINT( + "Succeeded in parsing a C++ namespace: this really seems to be C++" + ); + g_cxx.bConfirmedCPPLanguage = true; + } + + // Here we certainly got an opening bracket: namespace block + + if(!cxxParserParseBlock(true)) + { + CXX_DEBUG_LEAVE_TEXT("Failed to parse namespace block"); + return false; + } + + while(iScopeCount > 0) + { + cxxScopePop(); + iScopeCount--; + + if(iScopeCount < MAX_NESTED_NAMESPACES) + { + if(aCorkQueueIndices[iScopeCount].leafnm > CORK_NIL) + cxxParserMarkEndLineForTagInCorkQueue(aCorkQueueIndices[iScopeCount].leafnm); + if(aCorkQueueIndices[iScopeCount].fqnm > CORK_NIL) + cxxParserMarkEndLineForTagInCorkQueue(aCorkQueueIndices[iScopeCount].fqnm); + } + } + + CXX_DEBUG_LEAVE_TEXT("Finished parsing namespace"); + return true; +} diff --git a/ctags/parsers/cxx/cxx_parser_template.c b/ctags/parsers/cxx/cxx_parser_template.c new file mode 100644 index 0000000000..02ecbca2a2 --- /dev/null +++ b/ctags/parsers/cxx/cxx_parser_template.c @@ -0,0 +1,858 @@ +/* +* Copyright (c) 2016, Szymon Tomasz Stefanek +* +* This source code is released for free distribution under the terms of the +* GNU General Public License version 2 or (at your option) any later version. +* +* This module contains functions for parsing and scanning C++ source files +*/ +#include "cxx_parser.h" +#include "cxx_parser_internal.h" + +#include "cxx_debug.h" +#include "cxx_keyword.h" +#include "cxx_token.h" +#include "cxx_token_chain.h" +#include "cxx_scope.h" + +#include "parse.h" +#include "vstring.h" +#include "../cpreprocessor.h" +#include "debug.h" +#include "keyword.h" +#include "read.h" +#include "strlist.h" + +#include + +typedef enum _CXXParserParseTemplateAngleBracketsResult +{ + // Succeeded parsing the template angle bracket, everything looks fine + CXXParserParseTemplateAngleBracketsSucceeded, + // Succeeded, but the parsing was unbalanced and was terminated by an 'unexpected' condition + // that detected the end of the template. If this is function has been called + // from an upper template parsing level, the caller should exit too. + CXXParserParseTemplateAngleBracketsFinishedPrematurely, + // Failed miserably, continuing parsing is not possible + CXXParserParseTemplateAngleBracketsFailed, + // Failed miserably, but it may be possible to continue parsing + CXXParserParseTemplateAngleBracketsFailedRecoverable +} CXXParserParseTemplateAngleBracketsResult; + + +static bool cxxTemplateTokenCheckIsNonTypeAndCompareWord(const CXXToken *t,void * szWord) +{ + // To be non type the token must NOT be preceded by class/struct/union + if(!t->pPrev) + return false; + if(cxxTokenTypeIs(t->pPrev,CXXTokenTypeKeyword)) + { + if(cxxKeywordIsTypeRefMarker(t->pPrev->eKeyword)) + return false; // preceded by a type ref marker + + // otherwise it's probably something like "int" + } + const char * w = (const char *)szWord; + return strcmp(vStringValue(t->pszWord),w) == 0; +} + +static bool cxxTokenIsPresentInTemplateParametersAsNonType(CXXToken * t) +{ + CXX_DEBUG_ASSERT( + cxxTokenTypeIsOneOf(t,CXXTokenTypeIdentifier), + "Token must be identifier" + ); + + for(unsigned int u=0;upszWord) + ) + ) + return true; + } + + return false; +} + +static bool cxxTemplateTokenCheckIsTypeAndCompareWord(const CXXToken * t,void * szWord) +{ + // To be non type the token must be preceded by class/struct/union + if(!t->pPrev) + return false; + if(!cxxTokenTypeIs(t->pPrev,CXXTokenTypeKeyword)) + return false; + if(!cxxKeywordIsTypeRefMarker(t->pPrev->eKeyword)) + return false; + const char * w = (const char *)szWord; + return strcmp(vStringValue(t->pszWord),w) == 0; +} + +static bool cxxTokenIsPresentInTemplateParametersAsType(CXXToken * t) +{ + CXX_DEBUG_ASSERT( + cxxTokenTypeIsOneOf(t,CXXTokenTypeIdentifier), + "Token must be identifier" + ); + + for(unsigned int u=0;upszWord) + ) + ) + return true; + } + + return false; +} + +// Attempt to capture a template parameter that is between the +// specified tokens. pBeforeParameter points to the first token +// of the parameter (just after < or ,). pAfterParameter points +// somewhere after the end of the parameter, usually to the next +// , or > but it may happen that it points to an unbalanced > +// because of broken input or because we screwed up parsing a bit. +static void cxxParserParseTemplateAngleBracketsCaptureTypeParameter( + CXXToken * pParameterStart, + CXXToken * pAfterParameter + ) +{ + CXX_DEBUG_ENTER(); + + if(g_cxx.oTemplateParameters.uCount >= CXX_TYPED_VARIABLE_SET_ITEM_COUNT) + { + CXX_DEBUG_LEAVE_TEXT("No space for more parameters"); + return; + } + + CXX_DEBUG_ASSERT( + pParameterStart && + pParameterStart->pPrev && + cxxTokenTypeIsOneOf( + pParameterStart->pPrev, + CXXTokenTypeSmallerThanSign | CXXTokenTypeComma + ), + "pParameterStart should point to the parameter start" + ); + + CXX_DEBUG_ASSERT( + pAfterParameter && + pAfterParameter->pPrev && + cxxTokenTypeIsOneOf( + pAfterParameter, + CXXTokenTypeGreaterThanSign | CXXTokenTypeComma + ), + "pAfterParameter should point after the parameter" + ); + + CXX_DEBUG_ASSERT( + pParameterStart != pAfterParameter, + "The tokens should not be the same" + ); + + // We're OK with: + // + // typename X + // class X + // int X + // unsigned int X + // typeName * X + // typeName X + // typeName ...X + // + // but not + // + // typename boost::enable_if... + // + + if(pParameterStart->pNext == g_cxx.pToken) + { + // Only one token in the parameter. Can't be. + CXX_DEBUG_LEAVE_TEXT("Single parameter token"); + return; + } + + // Straegy: run to the first , > or =. + + CXXToken * t = pParameterStart; + + for(;;) + { + CXX_DEBUG_PRINT( + "Token '%s' [%s]", + vStringValue(t->pszWord), + cxxDebugTypeDecode(t->eType) + ); + + if(cxxTokenTypeIsOneOf( + t, + CXXTokenTypeComma | CXXTokenTypeGreaterThanSign | + CXXTokenTypeAssignment + )) + { + CXX_DEBUG_PRINT("Found terminator, stopping"); + break; + } + + if(!(( + cxxTokenTypeIs(t,CXXTokenTypeKeyword) && + cxxKeywordMayBePartOfTypeName(t->eKeyword) + ) || ( + cxxTokenTypeIsOneOf( + t, + CXXTokenTypeIdentifier | CXXTokenTypeStar | + CXXTokenTypeAnd | CXXTokenTypeMultipleAnds | + CXXTokenTypeMultipleDots + ) + ))) + { + // something we don't like + CXX_DEBUG_LEAVE_TEXT("This is something we don't like"); + return; + } + + t = t->pNext; + } + + if(!cxxTokenTypeIs(t->pPrev,CXXTokenTypeIdentifier)) + { + CXX_DEBUG_LEAVE_TEXT("The previous token is not an identifier"); + return; // bad + } + + CXX_DEBUG_PRINT( + "Adding %s to template parameters", + vStringValue(t->pPrev->pszWord) + ); + + unsigned int c = g_cxx.oTemplateParameters.uCount; + + g_cxx.oTemplateParameters.aIdentifiers[c] = t->pPrev; + g_cxx.oTemplateParameters.aTypeStarts[c] = pParameterStart; + g_cxx.oTemplateParameters.aTypeEnds[c] = t->pPrev->pPrev; + + g_cxx.oTemplateParameters.uCount++; + + CXX_DEBUG_LEAVE(); +} + + +// +// Parses the part of a template specification. +// Here we are pointing at the initial <. +// +static CXXParserParseTemplateAngleBracketsResult +cxxParserParseTemplateAngleBracketsInternal(bool bCaptureTypeParameters,int iNestedTemplateLevel) +{ + CXX_DEBUG_ENTER(); + + // Here we have the big problem of <> characters which may be + // template argument delimiters, less than/greater than operators, + // shift left/right operators. + // + // A well written code will have parentheses around all the ambiguous cases. + // We handle that and we permit any kind of syntax inside a parenthesis. + // + // Without parentheses we still try to handle the << and >> shift operator cases: + // - << is always recognized as shift operator + // - >> is recognized as shift unless it's non-nested. This is what C++11 + // spec says and theoretically it should be also pseudo-compatible with C++03 + // which treats this case as a syntax error. + // + // The 'less-than' and 'greater-than' operators are hopeless in the general + // case: gcc is smart enough to figure them out by looking at the identifiers + // around but without proper state (include headers, macro expansion, full type + // database etc) we simply can't do the same. However, we try to recover if we + // figure out we (or the programmer?) screwed up. + // + // + // Like gcc, if this function knows identifiers in a template prefix more, + // the quality of parsing becomes better. + // Introducing `pslTypeParams` is the first step to know identifiers. + // pslTypeParams list keeps all type parameters introduced in templated prefixes. + // When deciding whether ">>" is an end marker of template prefixes or a shift + // operator, this function looks up pslTypeParams list. + // If this function can find A of "A >>" in the list, we can say ">>" is not an + // operator. + + CXX_DEBUG_ASSERT( + cxxTokenTypeIs(cxxTokenChainLast(g_cxx.pTokenChain),CXXTokenTypeSmallerThanSign), + "We should be pointing at the opening angle bracket here" + ); + + int iNestedAngleBracketLevel = 1; + + // This points to the token before the current parameter start: < or a comma. + CXXToken * pBeforeParameterStart = g_cxx.pToken; + + for(;;) + { + // Within parentheses everything is permitted. + if(!cxxParserParseAndCondenseSubchainsUpToOneOf( + CXXTokenTypeGreaterThanSign | CXXTokenTypeSmallerThanSign | + CXXTokenTypeOpeningBracket | CXXTokenTypeSemicolon | + CXXTokenTypeComma | CXXTokenTypeEOF | CXXTokenTypeKeyword, + CXXTokenTypeOpeningParenthesis | + CXXTokenTypeOpeningSquareParenthesis, + false + )) + { + CXX_DEBUG_LEAVE_TEXT("Failed to parse up to '<>{EOF'"); + return CXXParserParseTemplateAngleBracketsFailed; + } + + // note that g_cxx.pToken->pPrev here is always non null. + + switch(g_cxx.pToken->eType) + { + case CXXTokenTypeComma: + if( + bCaptureTypeParameters && + (iNestedTemplateLevel == 0) && + (iNestedAngleBracketLevel == 1) && + (pBeforeParameterStart->pNext != g_cxx.pToken) + ) + cxxParserParseTemplateAngleBracketsCaptureTypeParameter( + pBeforeParameterStart->pNext, + g_cxx.pToken + ); + + if(iNestedAngleBracketLevel == 1) + pBeforeParameterStart = g_cxx.pToken; + break; + case CXXTokenTypeSmallerThanSign: + { + // blah < + + CXXToken * pSmallerThan = g_cxx.pToken; + + if(!cxxParserParseNextToken()) + { + CXX_DEBUG_LEAVE_TEXT("Syntax error, but tolerate it at this level"); + return CXXParserParseTemplateAngleBracketsFailedRecoverable; + } + + CXX_DEBUG_PRINT( + "< followed by token '%s' of type 0x%02x (%s)", + vStringValue(g_cxx.pToken->pszWord), + g_cxx.pToken->eType, + cxxDebugTypeDecode(g_cxx.pToken->eType) + ); + + // Check less than operator for the special conditions + // we can be sure of: + // ... 1 < whatever ... + // ... typeParam < 1 ... where ident is a type parameter + // The other cases can't be handled safely. We expect the user + // to use parentheses. + if( + // ... 1 < whatever ... + cxxTokenTypeIs(pSmallerThan->pPrev,CXXTokenTypeNumber) || + // ... nonTypeParam < whatever ... + cxxTokenIsPresentInTemplateParametersAsNonType(pSmallerThan->pPrev) + ) + { + CXX_DEBUG_PRINT("Treating < as less-than operator"); + + } else { + + CXX_DEBUG_PRINT("Increasing angle bracket level by one"); + iNestedAngleBracketLevel++; + } + + if(cxxTokenTypeIsOneOf( + g_cxx.pToken, + CXXTokenTypeOpeningParenthesis | + CXXTokenTypeOpeningSquareParenthesis + )) + { + // would need to be condensed: unget and try again above + cxxParserUngetCurrentToken(); + } + + // anything else is OK + } + break; + case CXXTokenTypeGreaterThanSign: + { + // > : is it a part of a shift operator? + + bool bFirstFollowedBySpace = g_cxx.pToken->bFollowedBySpace; + + int iGreaterThanCount = 1; + + // Here we skip all of the greater than signs we find + // up to the number we need to exit the template, but at least three. + // The minimum of three means that in the following loop we skip + // at least two greater than signs AND at least another token. + + int iMaxGreaterThanCount = iNestedAngleBracketLevel; + if(iMaxGreaterThanCount < 3) + iMaxGreaterThanCount = 3; + + while(iGreaterThanCount < iMaxGreaterThanCount) + { + if(!cxxParserParseNextToken()) + { + CXX_DEBUG_LEAVE_TEXT("Syntax error, but tolerate it at this level"); + return CXXParserParseTemplateAngleBracketsFailedRecoverable; + } + + CXX_DEBUG_PRINT( + "> followed by token '%s' of type 0x%02x (%s)", + vStringValue(g_cxx.pToken->pszWord), + g_cxx.pToken->eType, + cxxDebugTypeDecode(g_cxx.pToken->eType) + ); + + if(!cxxTokenTypeIs(g_cxx.pToken,CXXTokenTypeGreaterThanSign)) + break; + + iGreaterThanCount++; + } + + CXX_DEBUG_PRINT("Found %d greater-than signs",iGreaterThanCount); + + // check greater than operator: very narrow conditions + if( + (iGreaterThanCount == 1) && + ( + // whatever op 2 [C++03 allows this without parens] + // whatever op (...) [C++03 allows this without parens] + cxxTokenTypeIsOneOf( + g_cxx.pToken, + CXXTokenTypeNumber | CXXTokenTypeOpeningParenthesis + ) || + // whatever op nonTypeParameter [C++03 allows this without parens] + ( + cxxTokenTypeIs(g_cxx.pToken,CXXTokenTypeIdentifier) && + cxxTokenIsPresentInTemplateParametersAsNonType(g_cxx.pToken) + ) + // WARNING: don't be tempted to add a loose condition that has + // (!cxxTokenIsPresentInTemplateParametersAsType()) on the right. + // It's unsafe. + ) + ) + { + CXX_DEBUG_PRINT("Treating as greater-than sign"); + + if(cxxTokenTypeIsOneOf(g_cxx.pToken,CXXTokenTypeOpeningParenthesis)) + cxxParserUngetCurrentToken(); // needs to be condensed + + continue; + } + + + // check right shift operator: a bit broader conditions + if( + ( + (!bFirstFollowedBySpace) && + (iGreaterThanCount == 2) + ) && ( + // whatever op 2 [C++03 allows this without parens] + // whatever op (...) [C++03 allows this without parens] + cxxTokenTypeIsOneOf( + g_cxx.pToken, + CXXTokenTypeNumber | CXXTokenTypeOpeningParenthesis + ) || + // whatever op nonTypeParameter [C++03 allows this without parens] + ( + cxxTokenTypeIs(g_cxx.pToken,CXXTokenTypeIdentifier) && + cxxTokenIsPresentInTemplateParametersAsNonType(g_cxx.pToken) + ) || + // a broader condition that kind-of-works at top level + ( + // topmost template nesting level + (iNestedTemplateLevel == 0) && + // Only one level of angle brackets. + // This means that: + // - >> has one angle bracket too much to exit the template + // - we have screwed up the parsing of an opening angle bracket + // In the first case it's very likely that we're over a shift + // operator. In the other case we're screwed anyway. + (iNestedAngleBracketLevel == 1) && + // identifier on the right that is not clearly identified as type + ( + cxxTokenTypeIs(g_cxx.pToken,CXXTokenTypeIdentifier) && + (!cxxTokenIsPresentInTemplateParametersAsType(g_cxx.pToken)) + ) + ) + ) + ) + { + CXX_DEBUG_PRINT("Treating as right shift operator"); + if(cxxTokenTypeIsOneOf(g_cxx.pToken,CXXTokenTypeOpeningParenthesis)) + cxxParserUngetCurrentToken(); // needs to be condensed + + continue; + } + + if(!cxxTokenTypeIsOneOf(g_cxx.pToken,CXXTokenTypeGreaterThanSign)) + { + // The loop above stopped because of a non > token. + CXX_DEBUG_ASSERT(iGreaterThanCount < iMaxGreaterThanCount,"Bug"); + + // Handle gracefully some special cases + if( + cxxTokenIsNonConstantKeyword(g_cxx.pToken) || + ( + cxxTokenTypeIs(g_cxx.pToken,CXXTokenTypeIdentifier) && + cxxTokenIsPresentInTemplateParametersAsType(g_cxx.pToken) + ) + ) + { + // We found something like + // ... > void ... + // ... > static ... + // ... > typeParameter ... + // The part on the right of > does not seem to be a constant + // so this is not a comparison. + CXX_DEBUG_PRINT( + "Found '> %s': assuming end of template", + vStringValue(g_cxx.pToken->pszWord) + ); + + cxxParserUngetCurrentToken(); + + if( + bCaptureTypeParameters && + (iNestedTemplateLevel == 0) && + (pBeforeParameterStart->pNext != g_cxx.pToken) + ) + cxxParserParseTemplateAngleBracketsCaptureTypeParameter( + pBeforeParameterStart->pNext, + g_cxx.pToken + ); + + if(iGreaterThanCount > iNestedAngleBracketLevel) + { + // Most likely explanation: + // We screwed up the parsing of the template. + // However we can still attempt to emit a symbol here. + CXX_DEBUG_LEAVE_TEXT("Found (broken) end of template"); + return CXXParserParseTemplateAngleBracketsFinishedPrematurely; + } + + CXX_DEBUG_LEAVE_TEXT("Found end of template"); + return CXXParserParseTemplateAngleBracketsSucceeded; + } + + cxxParserUngetCurrentToken(); + } + + while(iGreaterThanCount > iNestedAngleBracketLevel) + { + CXX_DEBUG_PRINT("Going back one >"); + iGreaterThanCount--; + cxxParserUngetCurrentToken(); + } + + CXX_DEBUG_PRINT("Decreasing angle bracket level by %d",iGreaterThanCount); + iNestedAngleBracketLevel -= iGreaterThanCount; + + if(iNestedAngleBracketLevel == 0) + { + if( + bCaptureTypeParameters && + (iNestedTemplateLevel == 0) && + (pBeforeParameterStart->pNext != g_cxx.pToken) + ) + cxxParserParseTemplateAngleBracketsCaptureTypeParameter( + pBeforeParameterStart->pNext, + g_cxx.pToken + ); + + CXX_DEBUG_LEAVE_TEXT("Found end of template"); + return CXXParserParseTemplateAngleBracketsSucceeded; + } + } + break; + case CXXTokenTypeKeyword: + + if(cxxTokenIsKeyword(g_cxx.pToken,CXXKeywordTEMPLATE)) + { + CXX_DEBUG_PRINT("Found nested template keyword"); + + // nested nastiness. + if(!cxxParserParseNextToken()) + { + CXX_DEBUG_LEAVE_TEXT("Syntax error, but tolerate it at this level"); + return CXXParserParseTemplateAngleBracketsFailedRecoverable; + } + + if(!cxxTokenTypeIs(g_cxx.pToken,CXXTokenTypeSmallerThanSign)) + { + if(!cxxTokenTypeIs(g_cxx.pToken->pPrev->pPrev,CXXTokenTypeMultipleColons)) + { + // aaargh... + CXX_DEBUG_PRINT( + "Found unexpected token '%s' of type 0x%02x", + vStringValue(g_cxx.pToken->pszWord), + g_cxx.pToken->eType + ); + + CXX_DEBUG_LEAVE_TEXT("No smaller than sign after template keyword"); + return CXXParserParseTemplateAngleBracketsFailed; + } + + // + // Possibly X::template Something disambiguation syntax. + // See https://en.cppreference.com/w/cpp/language/dependent_name + // + CXX_DEBUG_PRINT("But it's not followed by a < and has leading ::"); + continue; + } + + switch( + cxxParserParseTemplateAngleBracketsInternal( + false, + iNestedTemplateLevel+1 + ) + ) + { + case CXXParserParseTemplateAngleBracketsFailed: + CXX_DEBUG_LEAVE_TEXT("Nested template parsing failed"); + return CXXParserParseTemplateAngleBracketsFailed; + break; + case CXXParserParseTemplateAngleBracketsFailedRecoverable: + CXX_DEBUG_LEAVE_TEXT("Nested template parsing recovered"); + return CXXParserParseTemplateAngleBracketsFailedRecoverable; + break; + case CXXParserParseTemplateAngleBracketsFinishedPrematurely: + CXX_DEBUG_LEAVE_TEXT("Nested template finished prematurely"); + return CXXParserParseTemplateAngleBracketsFinishedPrematurely; + break; + case CXXParserParseTemplateAngleBracketsSucceeded: + // ok + CXX_DEBUG_PRINT("Nested template parsing succeeded"); + break; + default: + CXX_DEBUG_ASSERT(false,"Should never end up here"); + return CXXParserParseTemplateAngleBracketsFailed; + break; + } + + continue; + } + // other keyword + break; + case CXXTokenTypeEOF: + CXX_DEBUG_LEAVE_TEXT("Syntax error, but tolerate it at this level"); + return CXXParserParseTemplateAngleBracketsFailedRecoverable; + break; + case CXXTokenTypeSemicolon: + cxxParserNewStatement(); + CXX_DEBUG_LEAVE_TEXT("Broken template arguments, attempting to continue"); + return CXXParserParseTemplateAngleBracketsFailedRecoverable; + break; + case CXXTokenTypeOpeningBracket: + CXX_DEBUG_PRINT( + "Found opening bracket: either syntax error or we screwed up parsing " \ + "the template parameters (some kind of ugly C++11 syntax?), " \ + "but we try to recover..." + ); + // skip the whole bracketed part. + if(!cxxParserParseUpToOneOf(CXXTokenTypeClosingBracket | CXXTokenTypeEOF, false)) + { + CXX_DEBUG_LEAVE_TEXT("Failed to parse up to '}EOF'"); + return CXXParserParseTemplateAngleBracketsFailed; + } + cxxParserNewStatement(); + CXX_DEBUG_LEAVE_TEXT("Broken template arguments recovery complete"); + return CXXParserParseTemplateAngleBracketsFailedRecoverable; + break; + default: + CXX_DEBUG_ASSERT(false,"Found unexpected token type 0x%02x",g_cxx.pToken->eType); + CXX_DEBUG_LEAVE_TEXT("Found unexpected token type 0x%02x",g_cxx.pToken->eType); + return CXXParserParseTemplateAngleBracketsFailed; + break; + } + } + + // never reached + CXX_DEBUG_LEAVE_TEXT("This should be never reached!"); + return CXXParserParseTemplateAngleBracketsFailed; +} + +// +// Parses the part of a template specification. +// Here we are pointing at the initial <. +// +static bool cxxParserParseTemplateAngleBrackets(bool bCaptureTypeParameters) +{ + CXX_DEBUG_ENTER(); + + CXXParserParseTemplateAngleBracketsResult r; + r = cxxParserParseTemplateAngleBracketsInternal(bCaptureTypeParameters,0); + + switch(r) + { + case CXXParserParseTemplateAngleBracketsFailed: + CXX_DEBUG_LEAVE(); + return false; + break; + // TODO: We could signal failure+recovery to upper levels + // so the caller could take recovery actions too. + //case CXXParserParseTemplateAngleBracketsFailedRecoverable: + //case CXXParserParseTemplateAngleBracketsFinishedPrematurely: + //case CXXParserParseTemplateAngleBracketsSucceeded: + default: + CXX_DEBUG_LEAVE(); + return true; + break; + } + CXX_DEBUG_ASSERT(false,"Never here"); +} + +CXXTokenChain * cxxParserParseTemplateAngleBracketsToSeparateChain(bool bCaptureTypeParameters) +{ + CXX_DEBUG_ENTER(); + + CXX_DEBUG_ASSERT(cxxParserCurrentLanguageIsCPP(),"This should be called only in C++"); + + CXX_DEBUG_ASSERT( + cxxTokenTypeIs(cxxTokenChainLast(g_cxx.pTokenChain),CXXTokenTypeSmallerThanSign), + "We should be pointing at the opening angle bracket here" + ); + + CXXTokenChain * pOut = cxxTokenChainCreate(); + cxxTokenChainAppend(pOut,cxxTokenChainTakeLast(g_cxx.pTokenChain)); + + CXXTokenChain * pSave = g_cxx.pTokenChain; + g_cxx.pTokenChain = pOut; + + bool bRet = cxxParserParseTemplateAngleBrackets(bCaptureTypeParameters); + + g_cxx.pTokenChain = pSave; + + if(!bRet) + { + CXX_DEBUG_LEAVE_TEXT("Failed to parse angle brackets"); + cxxTokenChainDestroy(pOut); + return NULL; + } + + CXX_DEBUG_LEAVE(); + return pOut; +} + +// +// Parses the template angle brackets and puts it in g_cxx.pTemplateTokenChain. +// Also captures he template type parameters in g_cxx.pTemplateParameters. +// +bool cxxParserParseTemplateAngleBracketsToTemplateChain(void) +{ + CXX_DEBUG_ENTER(); + + g_cxx.oTemplateParameters.uCount = 0; + + CXXTokenChain * pOut = cxxParserParseTemplateAngleBracketsToSeparateChain(true); + + if(!pOut) + { + CXX_DEBUG_LEAVE_TEXT("Failed to parse angle brackets"); + return false; + } + + if(g_cxx.pTemplateTokenChain) + cxxTokenChainDestroy(g_cxx.pTemplateTokenChain); + + g_cxx.pTemplateTokenChain = pOut; + + g_cxx.oTemplateParameters.pChain = pOut; + + // make sure we have no stale specializations + // (note that specializations always come AFTER the main template) + if(g_cxx.pTemplateSpecializationTokenChain) + { + cxxTokenChainDestroy(g_cxx.pTemplateSpecializationTokenChain); + g_cxx.pTemplateSpecializationTokenChain = NULL; + } + + CXX_DEBUG_LEAVE(); + return true; +} + +// +// Parses a template prefix. +// The parsed template parameter definition is stored in a separate token chain. +// +bool cxxParserParseTemplatePrefix(void) +{ + CXX_DEBUG_ENTER(); + + CXX_DEBUG_ASSERT(cxxParserCurrentLanguageIsCPP(),"This should be called only in C++"); + + CXX_DEBUG_ASSERT( + cxxTokenTypeIs(cxxTokenChainLast(g_cxx.pTokenChain),CXXTokenTypeKeyword), + "We should be pointing at the template keyword here" + ); + + cxxTokenChainDestroyLast(g_cxx.pTokenChain); // kill the template keyword + + if(!cxxParserParseUpToOneOf( + CXXTokenTypeSmallerThanSign | CXXTokenTypeEOF | CXXTokenTypeSemicolon, + false + )) + { + CXX_DEBUG_LEAVE_TEXT("Failed to parse up to the < sign"); + return false; + } + + if(cxxTokenTypeIsOneOf(g_cxx.pToken,CXXTokenTypeEOF | CXXTokenTypeSemicolon)) + { + CXX_DEBUG_LEAVE_TEXT("Found EOF or semicolon: assuming this is unparseable"); + cxxParserNewStatement(); + return true; // tolerate syntax error + } + + bool bRet = cxxParserParseTemplateAngleBracketsToTemplateChain(); + + CXX_DEBUG_LEAVE(); + return bRet; +} + +void cxxParserEmitTemplateParameterTags(void) +{ + CXX_DEBUG_ASSERT( + g_cxx.pTemplateTokenChain && + (g_cxx.pTemplateTokenChain->iCount > 0) && + cxxParserCurrentLanguageIsCPP() && + cxxTagKindEnabled(CXXTagCPPKindTEMPLATEPARAM), + "Template existence must be checked before calling this function" + ); + + unsigned int c = g_cxx.oTemplateParameters.uCount; + + for(unsigned int i=0;ipszWord) + ); + + tagEntryInfo * tag = cxxTagBegin( + CXXTagCPPKindTEMPLATEPARAM, + g_cxx.oTemplateParameters.aIdentifiers[i] + ); + + if(!tag) + continue; + + tag->extensionFields.nth = (short)i; + + CXXToken * pTypeToken = cxxTagCheckAndSetTypeField( + g_cxx.oTemplateParameters.aTypeStarts[i], + g_cxx.oTemplateParameters.aTypeEnds[i] + ); + + cxxTagCommit(NULL); + if (pTypeToken) + cxxTokenDestroy(pTypeToken); + } +} diff --git a/ctags/parsers/cxx/cxx_parser_tokenizer.c b/ctags/parsers/cxx/cxx_parser_tokenizer.c new file mode 100644 index 0000000000..056537d447 --- /dev/null +++ b/ctags/parsers/cxx/cxx_parser_tokenizer.c @@ -0,0 +1,1689 @@ +/* +* Copyright (c) 2016, Szymon Tomasz Stefanek +* +* This source code is released for free distribution under the terms of the +* GNU General Public License version 2 or (at your option) any later version. +* +* This module contains functions for parsing and scanning C++ source files +*/ +#include "cxx_parser.h" +#include "cxx_parser_internal.h" + +#include "cxx_debug.h" +#include "cxx_keyword.h" +#include "cxx_token.h" +#include "cxx_token_chain.h" + +#include "parse.h" +#include "vstring.h" +#include "../cpreprocessor.h" +#include "debug.h" +#include "keyword.h" +#include "read.h" +#include "options.h" + +#include + +#define UINFO(c) (((c) < 0x80 && (c) >= 0) ? g_aCharTable[c].uType : 0) + +static void cxxParserSkipToNonWhiteSpace(void) +{ + while(cppIsspace(g_cxx.iChar)) + g_cxx.iChar = cppGetc(); +} + +enum CXXCharType +{ + // Start of an identifier a-z A-Z _ and ~ since + // it's part of the destructor name + CXXCharTypeStartOfIdentifier = 1, + // Part of identifier a-z a-Z 0-9 _ + CXXCharTypePartOfIdentifier = (1 << 1), + // A decimal digit + CXXCharTypeDecimalDigit = (1 << 2), + // A hexadecimal digit + CXXCharTypeHexadecimalDigit = (1 << 3), + // Hex digits x X u U l L and . + CXXCharTypeValidInNumber = (1 << 4), + // A named single char token. + CXXCharTypeNamedSingleCharToken = (1 << 5), + // A named single or repeated char token. + CXXCharTypeNamedSingleOrRepeatedCharToken = (1 << 6), + // An operator (we merge them) + CXXCharTypeOperator = (1 << 7), + // Full custom handling. Mostly operators or brackets. + CXXCharTypeCustomHandling = (1 << 8) +}; + +typedef struct _CXXCharTypeData +{ + unsigned int uType; + unsigned int uSingleTokenType; + unsigned int uMultiTokenType; +} CXXCharTypeData; + + +static CXXCharTypeData g_aCharTable[128] = +{ + // 000 (0x00) NUL + { + 0, + 0, + 0 + }, + // 001 (0x01) SOH + { + 0, + 0, + 0 + }, + // 002 (0x02) STX + { + 0, + 0, + 0 + }, + // 003 (0x03) ETX + { + 0, + 0, + 0 + }, + // 004 (0x04) EOT + { + 0, + 0, + 0 + }, + // 005 (0x05) ENQ + { + 0, + 0, + 0 + }, + // 006 (0x06) ACK + { + 0, + 0, + 0 + }, + // 007 (0x07) BEL + { + 0, + 0, + 0 + }, + // 008 (0x08) BS + { + 0, + 0, + 0 + }, + // 009 (0x09) '\t' HT + { + 0, + 0, + 0 + }, + // 010 (0x0a) '\n' LF + { + 0, + 0, + 0 + }, + // 011 (0x0b) '\v' VT + { + 0, + 0, + 0 + }, + // 012 (0x0c) FF + { + 0, + 0, + 0 + }, + // 013 (0x0d) '\r' CR + { + 0, + 0, + 0 + }, + // 014 (0x0e) 'SO' + { + 0, + 0, + 0 + }, + // 015 (0x0f) 'SI' + { + 0, + 0, + 0 + }, + // 016 (0x10) DLE + { + 0, + 0, + 0 + }, + // 017 (0x11) DC1 + { + 0, + 0, + 0 + }, + // 018 (0x12) DC2 + { + 0, + 0, + 0 + }, + // 019 (0x13) DC3 + { + 0, + 0, + 0 + }, + // 020 (0x14) DC4 + { + 0, + 0, + 0 + }, + // 021 (0x15) NAK + { + 0, + 0, + 0 + }, + // 022 (0x16) SYN + { + 0, + 0, + 0 + }, + // 023 (0x17) ETB + { + 0, + 0, + 0 + }, + // 024 (0x18) CAN + { + 0, + 0, + 0 + }, + // 025 (0x19) EM + { + 0, + 0, + 0 + }, + // 026 (0x1a) SUB + { + 0, + 0, + 0 + }, + // 027 (0x1b) ESC + { + 0, + 0, + 0 + }, + // 028 (0x1c) FS + { + 0, + 0, + 0 + }, + // 029 (0x1d) GS + { + 0, + 0, + 0 + }, + // 030 (0x1e) RS + { + 0, + 0, + 0 + }, + // 031 (0x1f) US + { + 0, + 0, + 0 + }, + // 032 (0x20) ' ' + { + 0, + 0, + 0 + }, + // 033 (0x21) '!' + { + CXXCharTypeOperator, + 0 , + 0 + }, + // 034 (0x22) '"' + { + 0, + 0, + 0 + }, + // 035 (0x23) '#' + { + 0, + 0, + 0 + }, + // 036 (0x24) '$' + { + CXXCharTypeStartOfIdentifier | CXXCharTypePartOfIdentifier, + 0, + 0 + }, + // 037 (0x25) '%' + { + CXXCharTypeOperator, + 0 , + 0 + }, + // 038 (0x26) '&' + { + CXXCharTypeNamedSingleOrRepeatedCharToken, + CXXTokenTypeAnd, + CXXTokenTypeMultipleAnds + }, + // 039 (0x27) ''' + { + 0, + 0, + 0 + }, + // 040 (0x28) '(' + { + CXXCharTypeNamedSingleCharToken, + CXXTokenTypeOpeningParenthesis, + 0 + }, + // 041 (0x29) ')' + { + CXXCharTypeNamedSingleCharToken, + CXXTokenTypeClosingParenthesis, + 0 + }, + // 042 (0x2a) '*' + { + CXXCharTypeNamedSingleCharToken, + CXXTokenTypeStar, + 0 + }, + // 043 (0x2b) '+' + { + CXXCharTypeOperator, + 0 , + 0 + }, + // 044 (0x2c) ',' + { + CXXCharTypeNamedSingleCharToken, + CXXTokenTypeComma, + 0 + }, + // 045 (0x2d) '-' + { + CXXCharTypeOperator, + 0 , + 0 + }, + // 046 (0x2e) '.' + { + CXXCharTypeValidInNumber | CXXCharTypeNamedSingleOrRepeatedCharToken, + CXXTokenTypeDotOperator, + CXXTokenTypeMultipleDots + }, + // 047 (0x2f) '/' + { + CXXCharTypeOperator, + 0 , + 0 + }, + // 048 (0x30) '0' + { + CXXCharTypePartOfIdentifier | CXXCharTypeDecimalDigit | + CXXCharTypeHexadecimalDigit | CXXCharTypeValidInNumber, + 0, + 0 + }, + // 049 (0x31) '1' + { + CXXCharTypePartOfIdentifier | CXXCharTypeDecimalDigit | + CXXCharTypeHexadecimalDigit | CXXCharTypeValidInNumber, + 0, + 0 + }, + // 050 (0x32) '2' + { + CXXCharTypePartOfIdentifier | CXXCharTypeDecimalDigit | + CXXCharTypeHexadecimalDigit | CXXCharTypeValidInNumber, + 0, + 0 + }, + // 051 (0x33) '3' + { + CXXCharTypePartOfIdentifier | CXXCharTypeDecimalDigit | + CXXCharTypeHexadecimalDigit | CXXCharTypeValidInNumber, + 0, + 0 + }, + // 052 (0x34) '4' + { + CXXCharTypePartOfIdentifier | CXXCharTypeDecimalDigit | + CXXCharTypeHexadecimalDigit | CXXCharTypeValidInNumber, + 0, + 0 + }, + // 053 (0x35) '5' + { + CXXCharTypePartOfIdentifier | CXXCharTypeDecimalDigit | + CXXCharTypeHexadecimalDigit | CXXCharTypeValidInNumber, + 0, + 0 + }, + // 054 (0x36) '6' + { + CXXCharTypePartOfIdentifier | CXXCharTypeDecimalDigit | + CXXCharTypeHexadecimalDigit | CXXCharTypeValidInNumber, + 0, + 0 + }, + // 055 (0x37) '7' + { + CXXCharTypePartOfIdentifier | CXXCharTypeDecimalDigit | + CXXCharTypeHexadecimalDigit | CXXCharTypeValidInNumber, + 0, + 0 + }, + // 056 (0x38) '8' + { + CXXCharTypePartOfIdentifier | CXXCharTypeDecimalDigit | + CXXCharTypeHexadecimalDigit | CXXCharTypeValidInNumber, + 0, + 0 + }, + // 057 (0x39) '9' + { + CXXCharTypePartOfIdentifier | CXXCharTypeDecimalDigit | + CXXCharTypeHexadecimalDigit | CXXCharTypeValidInNumber, + 0, + 0 + }, + // 058 (0x3a) ':' + { + CXXCharTypeNamedSingleOrRepeatedCharToken, + CXXTokenTypeSingleColon, + CXXTokenTypeMultipleColons + }, + // 059 (0x3b) ';' + { + CXXCharTypeNamedSingleCharToken, + CXXTokenTypeSemicolon, + 0 + }, + // 060 (0x3c) '<' + { + CXXCharTypeCustomHandling, + CXXTokenTypeSmallerThanSign, + 0 + }, + // 061 (0x3d) '=' + { + CXXCharTypeOperator | CXXCharTypeNamedSingleOrRepeatedCharToken, + CXXTokenTypeAssignment, + CXXTokenTypeOperator + }, + // 062 (0x3e) '>' // We never merge two >> + { + CXXCharTypeNamedSingleCharToken, + CXXTokenTypeGreaterThanSign, + 0 + }, + // 063 (0x3f) '?' + { + CXXCharTypeOperator, + 0 , + 0 + }, + // 064 (0x40) '@' + { + 0, + 0, + 0 + }, + // 065 (0x41) 'A' + { + CXXCharTypeStartOfIdentifier | CXXCharTypePartOfIdentifier | + CXXCharTypeHexadecimalDigit | CXXCharTypeValidInNumber, + 0, + 0 + }, + // 066 (0x42) 'B' + { + CXXCharTypeStartOfIdentifier | CXXCharTypePartOfIdentifier | + CXXCharTypeHexadecimalDigit | CXXCharTypeValidInNumber, + 0, + 0 + }, + // 067 (0x43) 'C' + { + CXXCharTypeStartOfIdentifier | CXXCharTypePartOfIdentifier | + CXXCharTypeHexadecimalDigit | CXXCharTypeValidInNumber, + 0, + 0 + }, + // 068 (0x44) 'D' + { + CXXCharTypeStartOfIdentifier | CXXCharTypePartOfIdentifier | + CXXCharTypeHexadecimalDigit | CXXCharTypeValidInNumber, + 0, + 0 + }, + // 069 (0x45) 'E' + { + CXXCharTypeStartOfIdentifier | CXXCharTypePartOfIdentifier | + CXXCharTypeHexadecimalDigit | CXXCharTypeValidInNumber, + 0, + 0 + }, + // 070 (0x46) 'F' + { + CXXCharTypeStartOfIdentifier | CXXCharTypePartOfIdentifier | + CXXCharTypeHexadecimalDigit | CXXCharTypeValidInNumber, + 0, + 0 + }, + // 071 (0x47) 'G' + { + CXXCharTypeStartOfIdentifier | CXXCharTypePartOfIdentifier, + 0 , + 0 + }, + // 072 (0x48) 'H' + { + CXXCharTypeStartOfIdentifier | CXXCharTypePartOfIdentifier | + CXXCharTypeValidInNumber, + 0, + 0 + }, + // 073 (0x49) 'I' + { + CXXCharTypeStartOfIdentifier | CXXCharTypePartOfIdentifier, + 0 , + 0 + }, + // 074 (0x4a) 'J' + { + CXXCharTypeStartOfIdentifier | CXXCharTypePartOfIdentifier, + 0 , + 0 + }, + // 075 (0x4b) 'K' + { + CXXCharTypeStartOfIdentifier | CXXCharTypePartOfIdentifier, + 0 , + 0 + }, + // 076 (0x4c) 'L' + { + CXXCharTypeStartOfIdentifier | CXXCharTypePartOfIdentifier | + CXXCharTypeValidInNumber, + 0, + 0 + }, + // 077 (0x4d) 'M' + { + CXXCharTypeStartOfIdentifier | CXXCharTypePartOfIdentifier, + 0 , + 0 + }, + // 078 (0x4e) 'N' + { + CXXCharTypeStartOfIdentifier | CXXCharTypePartOfIdentifier, + 0 , + 0 + }, + // 079 (0x4f) 'O' + { + CXXCharTypeStartOfIdentifier | CXXCharTypePartOfIdentifier, + 0 , + 0 + }, + // 080 (0x50) 'P' + { + CXXCharTypeStartOfIdentifier | CXXCharTypePartOfIdentifier, + 0 , + 0 + }, + // 081 (0x51) 'Q' + { + CXXCharTypeStartOfIdentifier | CXXCharTypePartOfIdentifier, + 0, + 0 + }, + // 082 (0x52) 'R' + { + CXXCharTypeStartOfIdentifier | CXXCharTypePartOfIdentifier, + 0 , + 0 + }, + // 083 (0x53) 'S' + { + CXXCharTypeStartOfIdentifier | CXXCharTypePartOfIdentifier, + 0 , + 0 + }, + // 084 (0x54) 'T' + { + CXXCharTypeStartOfIdentifier | CXXCharTypePartOfIdentifier, + 0 , + 0 + }, + // 085 (0x55) 'U' + { + CXXCharTypeStartOfIdentifier | CXXCharTypePartOfIdentifier | + CXXCharTypeValidInNumber, + 0 , + 0 + }, + // 086 (0x56) 'V' + { + CXXCharTypeStartOfIdentifier | CXXCharTypePartOfIdentifier, + 0 , + 0 + }, + // 087 (0x57) 'W' + { + CXXCharTypeStartOfIdentifier | CXXCharTypePartOfIdentifier, + 0 , + 0 + }, + // 088 (0x58) 'X' + { + CXXCharTypeStartOfIdentifier | CXXCharTypePartOfIdentifier | + CXXCharTypeValidInNumber, + 0 , + 0 + }, + // 089 (0x59) 'Y' + { + CXXCharTypeStartOfIdentifier | CXXCharTypePartOfIdentifier, + 0 , + 0 + }, + // 090 (0x5a) 'Z' + { + CXXCharTypeStartOfIdentifier | CXXCharTypePartOfIdentifier, + 0 , + 0 + }, + // 091 (0x5b) '[' + { + CXXCharTypeCustomHandling, + CXXTokenTypeOpeningSquareParenthesis, + 0 + }, + // 092 (0x5c) '\' + { + 0, + 0, + 0 + }, + // 093 (0x5d) ']' + { + CXXCharTypeNamedSingleCharToken, + CXXTokenTypeClosingSquareParenthesis, + 0 + }, + // 094 (0x5e) '^' + { + CXXCharTypeOperator, + 0, + 0 + }, + // 095 (0x5f) '_' + { + CXXCharTypeStartOfIdentifier | CXXCharTypePartOfIdentifier, + 0 , + 0 + }, + // 096 (0x60) '`' + { + 0, + 0, + 0 + }, + // 097 (0x61) 'a' + { + CXXCharTypeStartOfIdentifier | CXXCharTypePartOfIdentifier | + CXXCharTypeHexadecimalDigit | CXXCharTypeValidInNumber, + 0, + 0 + }, + // 098 (0x62) 'b' + { + CXXCharTypeStartOfIdentifier | CXXCharTypePartOfIdentifier | + CXXCharTypeHexadecimalDigit | CXXCharTypeValidInNumber, + 0 , + 0 + }, + // 099 (0x63) 'c' + { + CXXCharTypeStartOfIdentifier | CXXCharTypePartOfIdentifier | + CXXCharTypeHexadecimalDigit | CXXCharTypeValidInNumber, + 0 , + 0 + }, + // 100 (0x64) 'd' + { + CXXCharTypeStartOfIdentifier | CXXCharTypePartOfIdentifier | + CXXCharTypeHexadecimalDigit | CXXCharTypeValidInNumber, + 0 , + 0 + }, + // 101 (0x65) 'e' + { + CXXCharTypeStartOfIdentifier | CXXCharTypePartOfIdentifier | + CXXCharTypeHexadecimalDigit | CXXCharTypeValidInNumber, + 0 , + 0 + }, + // 102 (0x66) 'f' + { + CXXCharTypeStartOfIdentifier | CXXCharTypePartOfIdentifier | + CXXCharTypeHexadecimalDigit | CXXCharTypeValidInNumber, + 0 , + 0 + }, + // 103 (0x67) 'g' + { + CXXCharTypeStartOfIdentifier | CXXCharTypePartOfIdentifier, + 0 , + 0 + }, + // 104 (0x68) 'h' + { + CXXCharTypeStartOfIdentifier | CXXCharTypePartOfIdentifier | + CXXCharTypeValidInNumber, + 0 , + 0 + }, + // 105 (0x69) 'i' + { + CXXCharTypeStartOfIdentifier | CXXCharTypePartOfIdentifier, + 0 , + 0 + }, + // 106 (0x6a) 'j' + { + CXXCharTypeStartOfIdentifier | CXXCharTypePartOfIdentifier, + 0 , + 0 + }, + // 107 (0x6b) 'k' + { + CXXCharTypeStartOfIdentifier | CXXCharTypePartOfIdentifier, + 0 , + 0 + }, + // 108 (0x6c) 'l' + { + CXXCharTypeStartOfIdentifier | CXXCharTypePartOfIdentifier | + CXXCharTypeValidInNumber, + 0 , + 0 + }, + // 109 (0x6d) 'm' + { + CXXCharTypeStartOfIdentifier | CXXCharTypePartOfIdentifier, + 0 , + 0 + }, + // 110 (0x6e) 'n' + { + CXXCharTypeStartOfIdentifier | CXXCharTypePartOfIdentifier, + 0 , + 0 + }, + // 111 (0x6f) 'o' + { + CXXCharTypeStartOfIdentifier | CXXCharTypePartOfIdentifier, + 0 , + 0 + }, + // 112 (0x70) 'p' + { + CXXCharTypeStartOfIdentifier | CXXCharTypePartOfIdentifier, + 0 , + 0 + }, + // 113 (0x71) 'q' + { + CXXCharTypeStartOfIdentifier | CXXCharTypePartOfIdentifier, + 0 , + 0 + }, + // 114 (0x72) 'r' + { + CXXCharTypeStartOfIdentifier | CXXCharTypePartOfIdentifier, + 0 , + 0 + }, + // 115 (0x73) 's' + { + CXXCharTypeStartOfIdentifier | CXXCharTypePartOfIdentifier, + 0 , + 0 + }, + // 116 (0x74) 't' + { + CXXCharTypeStartOfIdentifier | CXXCharTypePartOfIdentifier, + 0 , + 0 + }, + // 117 (0x75) 'u' + { + CXXCharTypeStartOfIdentifier | CXXCharTypePartOfIdentifier | + CXXCharTypeValidInNumber, + 0 , + 0 + }, + // 118 (0x76) 'v' + { + CXXCharTypeStartOfIdentifier | CXXCharTypePartOfIdentifier, + 0 , + 0 + }, + // 119 (0x77) 'w' + { + CXXCharTypeStartOfIdentifier | CXXCharTypePartOfIdentifier, + 0 , + 0 + }, + // 120 (0x78) 'x' + { + CXXCharTypeStartOfIdentifier | CXXCharTypePartOfIdentifier | + CXXCharTypeValidInNumber, + 0 , + 0 + }, + // 121 (0x79) 'y' + { + CXXCharTypeStartOfIdentifier | CXXCharTypePartOfIdentifier, + 0 , + 0 + }, + // 122 (0x7a) 'z' + { + CXXCharTypeStartOfIdentifier | CXXCharTypePartOfIdentifier, + 0 , + 0 + }, + // 123 (0x7b) '{' + { + CXXCharTypeNamedSingleCharToken, + CXXTokenTypeOpeningBracket, + 0 + }, + // 124 (0x7c) '|' + { + CXXCharTypeOperator, + 0 , + 0 + }, + // 125 (0x7d) '}' + { + CXXCharTypeNamedSingleCharToken, + CXXTokenTypeClosingBracket, + 0 + }, + // 126 (0x7e) '~' + { + CXXCharTypeStartOfIdentifier, + 0 , + 0 + }, + // 127 (0x7f) '' + { 0, 0, 0 } +}; + +// Parse the contents of an attribute chain. +// The input is the innermost chain of __attribute__((...)) or [[...]] +static void cxxParserAnalyzeAttributeChain(CXXTokenChain * pChain) +{ + CXXToken * pToken = cxxTokenChainFirst(pChain); + + while(pToken) + { + if(cxxTokenTypeIs(pToken,CXXTokenTypeIdentifier)) + { + CXX_DEBUG_PRINT("Analyzing attribute %s",vStringValue(pToken->pszWord)); + if( + (strcmp(vStringValue(pToken->pszWord),"always_inline") == 0) || + (strcmp(vStringValue(pToken->pszWord),"__always_inline__") == 0) + ) + { + CXX_DEBUG_PRINT("Found attribute 'always_inline'"); + // assume "inline" has been seen. + g_cxx.uKeywordState |= CXXParserKeywordStateSeenInline; + } else if( + (strcmp(vStringValue(pToken->pszWord),"deprecated") == 0) || + (strcmp(vStringValue(pToken->pszWord),"__deprecated__") == 0) + ) + { + CXX_DEBUG_PRINT("Found attribute 'deprecated'"); + // assume "inline" has been seen. + g_cxx.uKeywordState |= CXXParserKeywordStateSeenAttributeDeprecated; + } + } + + pToken = pToken->pNext; + } +} + +// +// The __attribute__((...)) sequence complicates parsing quite a lot. +// For this reason we attempt to "hide" it from the rest of the parser +// at tokenizer level. +// +// Returns false if it finds an EOF. This is an important invariant required by +// cxxParserParseNextToken(), the only caller. +// +static bool cxxParserParseNextTokenCondenseAttribute(void) +{ + // Since cxxParserParseNextToken() returns false only when it has found + // an EOF, this function must do the same. + // This means that any broken input must be discarded here. + + CXX_DEBUG_ENTER(); + + CXX_DEBUG_ASSERT( + cxxTokenIsKeyword(g_cxx.pToken,CXXKeyword__ATTRIBUTE__), + "This function should be called only after we have parsed __attribute__" + ); + + // Kill it + cxxTokenChainDestroyLast(g_cxx.pTokenChain); + + // And go ahead. + + if(!cxxParserParseNextToken()) + { + CXX_DEBUG_LEAVE_TEXT("No next token after __attribute__"); + return false; + } + + if(!cxxTokenTypeIs(g_cxx.pToken,CXXTokenTypeOpeningParenthesis)) + { + CXX_DEBUG_LEAVE_TEXT("Something that is not an opening parenthesis"); + return true; + } + + // Do NOT accept EOF as a valid terminator as it implies broken input. + if(!cxxParserParseAndCondenseCurrentSubchain( + CXXTokenTypeOpeningParenthesis | + CXXTokenTypeOpeningSquareParenthesis | + CXXTokenTypeOpeningBracket, + false, + false + )) + { + // Parsing and/or condensation of the subchain failed. This implies broken + // input (mismatched parenthesis/bracket, early EOF). + + CXX_DEBUG_LEAVE_TEXT("Failed to parse subchains. The input is broken..."); + + // However our invariant (see comment at the beginning of the function) + // forbids us to return false if we didn't find an EOF. So we attempt + // to resume parsing anyway. If there is an EOF, cxxParserParseNextToken() + // will report it. + + // Kill the token chain + cxxTokenChainDestroyLast(g_cxx.pTokenChain); + + return cxxParserParseNextToken(); + } + + CXX_DEBUG_ASSERT( + cxxTokenTypeIs(g_cxx.pToken,CXXTokenTypeParenthesisChain), + "Should have a parenthesis chain as last token!" + ); + + // Try to make sense of certain kinds of __attribute__. + // the proper syntax is __attribute__(()), so look at the inner chain + + CXXToken * pInner = cxxTokenChainFirst(g_cxx.pToken->pChain); + if(pInner) + { + if(pInner->pNext && cxxTokenTypeIs(pInner->pNext,CXXTokenTypeParenthesisChain)) + cxxParserAnalyzeAttributeChain(pInner->pNext->pChain); + } + + // Now just kill the chain. + cxxTokenChainDestroyLast(g_cxx.pTokenChain); + + // And finally extract yet another token. + bool bRet = cxxParserParseNextToken(); + + CXX_DEBUG_LEAVE(); + return bRet; +} + +// +// We handle the attribute [[...]] sequence introduced in c++11 in the same way +// as __attribute__((...)). We move it out of the parser's way as it complicates parsing. +// +// Returns false if it finds an EOF. This is an important invariant required by +// cxxParserParseNextToken(), the only caller. +// +static bool cxxParserParseNextTokenCondenseCXX11Attribute(void) +{ + CXX_DEBUG_ENTER(); + + CXX_DEBUG_ASSERT( + cxxTokenTypeIs(g_cxx.pToken, CXXTokenTypeOpeningSquareParenthesis), + "This function should be called only after we have parsed [" + ); + + // Input stream: [[... + // If the syntax is correct then this is an attribute sequence [[foo]] + // + // g_cxx.pToken points the first '['. + // g_cxx.iChar points the second '['. + // + // A caller calls this function only when the second '[' is found. + + if(!cxxParserParseAndCondenseCurrentSubchain( + CXXTokenTypeOpeningParenthesis | + CXXTokenTypeOpeningSquareParenthesis | + CXXTokenTypeOpeningBracket, + false, + false + )) + { + // Parsing and/or condensation of the subchain failed. This implies broken + // input (mismatched parenthesis/bracket, early EOF). + + CXX_DEBUG_LEAVE_TEXT("Failed to parse subchains. The input is broken..."); + + // However our invariant + // forbids us to return false if we didn't find an EOF. So we attempt + // to resume parsing anyway. If there is an EOF, cxxParserParseNextToken() + // will report it. + + // Kill the token chain + cxxTokenChainDestroyLast(g_cxx.pTokenChain); + + return cxxParserParseNextToken(); + } + + // Now the current token should be replaced by a square parenthesis chain + // that contains another square parenthesis chain. + CXX_DEBUG_ASSERT( + cxxTokenTypeIs(g_cxx.pToken,CXXTokenTypeSquareParenthesisChain), + "Should have a parenthesis chain as last token!" + ); + CXX_DEBUG_ASSERT( + // at least [ + [*] + ] + (g_cxx.pToken->pChain->iCount >= 3) && + cxxTokenTypeIs( + cxxTokenChainAt(g_cxx.pToken->pChain,1), + CXXTokenTypeSquareParenthesisChain + ), + "Should have a nested parenthesis chain inside the last token!" + ); + + cxxParserAnalyzeAttributeChain( + cxxTokenChainAt(g_cxx.pToken->pChain,1)->pChain + ); + + // Now just kill it. + cxxTokenChainDestroyLast(g_cxx.pTokenChain); + + // And finally extract yet another token. + bool bRet = cxxParserParseNextToken(); + + CXX_DEBUG_LEAVE(); + return bRet; +} + +// A macro token was encountered and it expects a parameter list. +// The routine has to check if there is a following parenthesis +// and eventually skip it but it MUST NOT parse the next token +// if it is not a parenthesis. This is because the macro token +// may have a replacement and is that one that has to be returned +// back to the caller from cxxParserParseNextToken(). +static bool cxxParserParseNextTokenSkipMacroParenthesis(CXXToken ** ppChain) +{ + CXX_DEBUG_ENTER(); + + CXX_DEBUG_ASSERT(ppChain,"ppChain should not be null here"); + + cxxParserSkipToNonWhiteSpace(); + + if(g_cxx.iChar != '(') + { + *ppChain = NULL; + return true; // no parenthesis + } + + if(!cxxParserParseNextToken()) + { + CXX_DEBUG_LEAVE_TEXT("No next token after ignored identifier"); + return false; + } + + if(!cxxTokenTypeIs(g_cxx.pToken,CXXTokenTypeOpeningParenthesis)) + { + CXX_DEBUG_ASSERT(false,"Should have found an open parenthesis token here!"); + CXX_DEBUG_LEAVE_TEXT("Internal error"); + return false; + } + + if(!cxxParserParseAndCondenseCurrentSubchain( + CXXTokenTypeOpeningParenthesis, + false, + false + )) + { + CXX_DEBUG_LEAVE_TEXT("Failed to parse and condense subchains"); + return false; + } + + CXX_DEBUG_ASSERT( + cxxTokenTypeIs(g_cxx.pToken,CXXTokenTypeParenthesisChain), + "Should have a parenthesis chain as last token!" + ); + + // Now just kill the chain. + *ppChain = cxxTokenChainTakeLast(g_cxx.pTokenChain); + + CXX_DEBUG_LEAVE(); + return true; +} + +static void cxxParserParseNextTokenApplyReplacement( + cppMacroInfo * pInfo, + CXXToken * pParameterChainToken + ) +{ + CXX_DEBUG_ENTER(); + + CXX_DEBUG_ASSERT(pInfo,"Info must be not null"); + CXX_DEBUG_ASSERT(pInfo->replacements,"There should be a replacement"); + + if(!pInfo->hasParameterList) + { + CXX_DEBUG_ASSERT(!pParameterChainToken,"This shouldn't have been extracted"); + } + + CXXTokenChain * pParameters = NULL; + const char ** aParameters = NULL; + int iParameterCount = 0; + + if(pInfo->hasParameterList && pParameterChainToken && (pParameterChainToken->pChain->iCount >= 3)) + { + // kill parenthesis + cxxTokenChainDestroyFirst(pParameterChainToken->pChain); + cxxTokenChainDestroyLast(pParameterChainToken->pChain); + + pParameters = cxxTokenChainSplitOnComma( + pParameterChainToken->pChain + ); + + aParameters = (const char **)eMalloc(sizeof(const char *) * pParameters->iCount); + CXXToken * pParam = cxxTokenChainFirst(pParameters); + while(pParam) + { + aParameters[iParameterCount] = vStringValue(pParam->pszWord); + iParameterCount++; + pParam = pParam->pNext; + } + + CXX_DEBUG_ASSERT(iParameterCount == pParameters->iCount,"Bad number of parameters found"); + } + + vString * pReplacement = cppBuildMacroReplacement(pInfo,aParameters,iParameterCount); + + if(pParameters) + { + cxxTokenChainDestroy(pParameters); + eFree((char**)aParameters); + } + + CXX_DEBUG_PRINT("Applying complex replacement '%s'",vStringValue(pReplacement)); + + cppUngetStringBuiltByMacro(vStringValue(pReplacement),vStringLength(pReplacement), pInfo); + + vStringDelete(pReplacement); + + CXX_DEBUG_LEAVE(); +} + +void cxxParserUngetCurrentToken(void) +{ + CXX_DEBUG_ASSERT( + g_cxx.pToken && + g_cxx.pTokenChain && + (g_cxx.pTokenChain->iCount > 0), + "There should be at least one token to unget" + ); + + if(g_cxx.pUngetToken) + { + if(g_cxx.pUngetToken->bFollowedBySpace) + cppUngetc(' '); + cppUngetString(vStringValue(g_cxx.pUngetToken->pszWord),vStringLength(g_cxx.pUngetToken->pszWord)); + cxxTokenDestroy(g_cxx.pUngetToken); + } + + g_cxx.pUngetToken = cxxTokenChainTakeLast(g_cxx.pTokenChain); + + CXX_DEBUG_ASSERT(g_cxx.pUngetToken == g_cxx.pToken,"Oops.. ungot a token that was not the chain tail"); + + g_cxx.pToken = cxxTokenChainLast(g_cxx.pTokenChain); +} + + +#define CXX_PARSER_MAXIMUM_TOKEN_CHAIN_SIZE 16384 + +// We stop applying macro replacements if the unget buffer gets too big +// as it is a sign of recursive macro expansion +#define CXX_PARSER_MAXIMUM_UNGET_BUFFER_SIZE_FOR_MACRO_REPLACEMENTS 65536 + +// We stop applying macro replacements if a macro is used so many +// times in a recursive macro expansion. +#define CXX_PARSER_MAXIMUM_MACRO_USE_COUNT 8 + +// Returns false if it finds an EOF. Returns true otherwise. +// +// In some special cases this function may parse more than one token, +// however only a single token will always be returned. +bool cxxParserParseNextToken(void) +{ + // The token chain should not be allowed to grow arbitrarily large. + // The token structures are quite big and it's easy to grow up to + // 5-6GB or memory usage. However this limit should be large enough + // to accommodate all the reasonable statements that could have some + // information in them. This includes multiple function prototypes + // in a single statement (ImageMagick has some examples) but probably + // does NOT include large data tables. + int iInitialTokenChainSize = g_cxx.pTokenChain->iCount; + if(iInitialTokenChainSize >= CXX_PARSER_MAXIMUM_TOKEN_CHAIN_SIZE) + cxxTokenChainDestroyLast(g_cxx.pTokenChain); + + if(g_cxx.pUngetToken) + { + // got some tokens in the unget chain. + cxxTokenChainAppend(g_cxx.pTokenChain,g_cxx.pUngetToken); + + g_cxx.pToken = g_cxx.pUngetToken; + + g_cxx.pUngetToken = NULL; + + return !cxxTokenTypeIs(g_cxx.pToken,CXXTokenTypeEOF); + } + + CXXToken * t = cxxTokenCreate(); + + cxxTokenChainAppend(g_cxx.pTokenChain,t); + + g_cxx.pToken = t; + + cxxParserSkipToNonWhiteSpace(); + + // FIXME: this cpp handling is kind of broken: + // it works only because the moon is in the correct phase. + cppBeginStatement(); + + // This must be done after getting char from input + t->iLineNumber = getInputLineNumber(); + t->oFilePosition = getInputFilePosition(); + + if(g_cxx.iChar == EOF) + { + t->eType = CXXTokenTypeEOF; + t->bFollowedBySpace = false; + return false; + } + + unsigned int uInfo = UINFO(g_cxx.iChar); + + //fprintf(stderr,"Char %c %02x info %u\n",g_cxx.iChar,g_cxx.iChar,uInfo); + + if(uInfo & CXXCharTypeStartOfIdentifier) + { + // word + t->eType = CXXTokenTypeIdentifier; + t->bFollowedBySpace = false; + + vStringPut(t->pszWord,g_cxx.iChar); + + // special case for tile, which may actually be an operator + if(g_cxx.iChar == '~') + { + // may be followed by space! + g_cxx.iChar = cppGetc(); + if(cppIsspace(g_cxx.iChar)) + { + t->bFollowedBySpace = true; + g_cxx.iChar = cppGetc(); + while(cppIsspace(g_cxx.iChar)) + g_cxx.iChar = cppGetc(); + } + + // non space + uInfo = UINFO(g_cxx.iChar); + if(!(uInfo & CXXCharTypeStartOfIdentifier)) + { + // this is not an identifier after all + t->eType = CXXTokenTypeOperator; + if((!t->bFollowedBySpace) && g_cxx.iChar == '=') + { + // make ~= single token so it's not handled as + // a separate assignment + vStringPut(t->pszWord,g_cxx.iChar); + g_cxx.iChar = cppGetc(); + t->bFollowedBySpace = cppIsspace(g_cxx.iChar); + } + return true; + } + } else { + g_cxx.iChar = cppGetc(); + } + + for(;;) + { + uInfo = UINFO(g_cxx.iChar); + if(!(uInfo & CXXCharTypePartOfIdentifier)) + break; + vStringPut(t->pszWord,g_cxx.iChar); + g_cxx.iChar = cppGetc(); + } + + int iCXXKeyword = lookupKeyword(t->pszWord->buffer,g_cxx.eLangType); + if(iCXXKeyword >= 0) + { + if(cxxKeywordIsDisabled((CXXKeyword)iCXXKeyword)) + { + t->eType = CXXTokenTypeIdentifier; + } else { + + t->eType = CXXTokenTypeKeyword; + t->eKeyword = (CXXKeyword)iCXXKeyword; + + if(iCXXKeyword == CXXKeyword__ATTRIBUTE__) + { + // special handling for __attribute__ + return cxxParserParseNextTokenCondenseAttribute(); + } + } + } else { + + cppMacroInfo * pMacro = cppFindMacro(vStringValue(t->pszWord)); + +#ifdef DEBUG + if(pMacro && (pMacro->useCount >= CXX_PARSER_MAXIMUM_MACRO_USE_COUNT)) + { + /* If the macro is overly used, report it here. */ + CXX_DEBUG_PRINT("Overly uesd macro %s <%p> useCount: %d (> %d)", + vStringValue(t->pszWord), + pMacro, pMacro? pMacro->useCount: -1, + CXX_PARSER_MAXIMUM_MACRO_USE_COUNT); + } +#endif + + if(pMacro && (pMacro->useCount < CXX_PARSER_MAXIMUM_MACRO_USE_COUNT)) + { + CXX_DEBUG_PRINT("Macro %s <%p> useCount: %d", vStringValue(t->pszWord), + pMacro, pMacro? pMacro->useCount: -1); + + cxxTokenChainDestroyLast(g_cxx.pTokenChain); + + CXXToken * pParameterChain = NULL; + + if(pMacro->hasParameterList) + { + CXX_DEBUG_PRINT("Macro has parameter list"); + if(!cxxParserParseNextTokenSkipMacroParenthesis(&pParameterChain)) + return false; + } + + // This is used to avoid infinite recursion in substitution + // (things like -D foo=foo or similar) + + if(pMacro->replacements) + { + CXX_DEBUG_PRINT("The token has replacements: applying"); + + if( + // Exclude possible cases of recursive macro expansion that + // causes level nesting + // -D'x=y(x)' + (g_cxx.iNestingLevels < CXX_PARSER_MAXIMUM_NESTING_LEVELS) && + // Exclude possible cases of recursive macro expansion that + // causes a single token chain to grow too big + // -D'x=y.x' + (iInitialTokenChainSize < CXX_PARSER_MAXIMUM_TOKEN_CHAIN_SIZE) && + // Detect other cases of nasty macro expansion that cause + // the unget buffer to grow fast (but the token chain to grow slowly) + // -D'p=a' -D'a=p+p' + (cppUngetBufferSize() < CXX_PARSER_MAXIMUM_UNGET_BUFFER_SIZE_FOR_MACRO_REPLACEMENTS) + ) + { + // unget last char + cppUngetc(g_cxx.iChar); + // unget the replacement + cxxParserParseNextTokenApplyReplacement( + pMacro, + pParameterChain + ); + + g_cxx.iChar = cppGetc(); + } else { + // Possibly a recursive macro + CXX_DEBUG_PRINT( + "Token has replacement but either nesting level is too " + "big (%d), the token chain (%d) or the unget buffer (%d) " + "have grown too large", + g_cxx.iNestingLevels, + g_cxx.pTokenChain->iCount, + cppUngetBufferSize() + ); + } + } + + if(pParameterChain) + cxxTokenDestroy(pParameterChain); + + g_cxx.iNestingLevels++; + // Have no token to return: parse it + CXX_DEBUG_PRINT("Parse inner token"); + bool bRet = cxxParserParseNextToken(); + CXX_DEBUG_PRINT("Parsed inner token: %s type %d",g_cxx.pToken->pszWord->buffer,g_cxx.pToken->eType); + g_cxx.iNestingLevels--; + return bRet; + } + } + + t->bFollowedBySpace = cppIsspace(g_cxx.iChar); + + return true; + } + + if(g_cxx.iChar == '-') + { + // special case for pointer + vStringPut(t->pszWord,g_cxx.iChar); + g_cxx.iChar = cppGetc(); + if(g_cxx.iChar == '>') + { + t->eType = CXXTokenTypePointerOperator; + vStringPut(t->pszWord,g_cxx.iChar); + g_cxx.iChar = cppGetc(); + } else { + t->eType = CXXTokenTypeOperator; + if(g_cxx.iChar == '-') + { + vStringPut(t->pszWord,g_cxx.iChar); + g_cxx.iChar = cppGetc(); + } + } + t->bFollowedBySpace = cppIsspace(g_cxx.iChar); + return true; + } + +#if 0 + // As long as we use cppGetc() we don't need this + + if(g_cxx.iChar == '"') + { + // special case for strings + t->eType = CXXTokenTypeStringConstant; + vStringPut(t->pszWord,g_cxx.iChar); + // We don't even care of storing the other chars: we don't need + // them for parsing + // FIXME: We might need them in signature:() tag.. maybe add + // them up to a certain length only? + for(;;) + { + g_cxx.iChar = cppGetc(); + if(g_cxx.iChar == EOF) + { + t->bFollowedBySpace = false; + return true; + } + if(g_cxx.iChar == '\\') + { + // escape + g_cxx.iChar = cppGetc(); + if(g_cxx.iChar == EOF) + { + t->bFollowedBySpace = false; + return true; + } + } else if(g_cxx.iChar == '"') + { + g_cxx.iChar = cppGetc(); + break; + } + } + t->bFollowedBySpace = cppIsspace(g_cxx.iChar); + return true; + } +#else + if(g_cxx.iChar == STRING_SYMBOL) + { + t->eType = CXXTokenTypeStringConstant; + vStringPut(t->pszWord,'"'); + vStringCat(t->pszWord,cppGetLastCharOrStringContents()); + vStringPut(t->pszWord,'"'); + g_cxx.iChar = cppGetc(); + t->bFollowedBySpace = cppIsspace(g_cxx.iChar); + return true; + } +#endif + +#if 0 + // As long as we use cppGetc() we don't need this + if(g_cxx.iChar == '\'') + { + // special case for strings + t->eType = CXXTokenTypeCharacterConstant; + vStringPut(t->pszWord,g_cxx.iChar); + // We don't even care storing the other chars: we don't + // need them for parsing + for(;;) + { + g_cxx.iChar = cppGetc(); + if(g_cxx.iChar == EOF) + { + t->bFollowedBySpace = false; + return true; + } + if(g_cxx.iChar == '\\') + { + // escape + g_cxx.iChar = cppGetc(); + if(g_cxx.iChar == EOF) + { + t->bFollowedBySpace = false; + return true; + } + } else if(g_cxx.iChar == '\'') + { + g_cxx.iChar = cppGetc(); + break; + } + } + t->bFollowedBySpace = cppIsspace(g_cxx.iChar); + return true; + } +#else + if(g_cxx.iChar == CHAR_SYMBOL) + { + t->eType = CXXTokenTypeCharacterConstant; + vStringPut(t->pszWord,'\''); + vStringCat(t->pszWord,cppGetLastCharOrStringContents()); + vStringPut(t->pszWord,'\''); + g_cxx.iChar = cppGetc(); + t->bFollowedBySpace = cppIsspace(g_cxx.iChar); + return true; + } +#endif + + if(uInfo & CXXCharTypeDecimalDigit) + { + // number + t->eType = CXXTokenTypeNumber; + vStringPut(t->pszWord,g_cxx.iChar); + + for(;;) + { + g_cxx.iChar = cppGetc(); + uInfo = UINFO(g_cxx.iChar); + if(!(uInfo & CXXCharTypeValidInNumber)) + break; + vStringPut(t->pszWord,g_cxx.iChar); + } + + t->bFollowedBySpace = cppIsspace(g_cxx.iChar); + return true; + } + + if(uInfo & CXXCharTypeNamedSingleOrRepeatedCharToken) + { + t->eType = g_aCharTable[g_cxx.iChar].uSingleTokenType; + vStringPut(t->pszWord,g_cxx.iChar); + int iChar = g_cxx.iChar; + g_cxx.iChar = cppGetc(); + if(g_cxx.iChar == iChar) + { + t->eType = g_aCharTable[g_cxx.iChar].uMultiTokenType; + // We could signal a syntax error with more than two colons + // or equal signs...but we're tolerant + do { + vStringPut(t->pszWord,g_cxx.iChar); + g_cxx.iChar = cppGetc(); + } while(g_cxx.iChar == iChar); + } + t->bFollowedBySpace = cppIsspace(g_cxx.iChar); + return true; + } + + if(uInfo & CXXCharTypeCustomHandling) + { + t->eType = g_aCharTable[g_cxx.iChar].uSingleTokenType; + vStringPut(t->pszWord,g_cxx.iChar); + g_cxx.iChar = cppGetc(); + switch(t->eType) + { + case CXXTokenTypeSmallerThanSign: + // The < sign is used in templates and is problematic if parsed incorrectly. + // We must exctract only the valid operator types: <, <<, <<=, <= <=> + switch(g_cxx.iChar) + { + case '<': + // << + t->eType = CXXTokenTypeOperator; + vStringPut(t->pszWord,g_cxx.iChar); + g_cxx.iChar = cppGetc(); + if(g_cxx.iChar == '=') + { + // <<= + vStringPut(t->pszWord,g_cxx.iChar); + g_cxx.iChar = cppGetc(); + } + break; + case '=': + // <= + t->eType = CXXTokenTypeOperator; + vStringPut(t->pszWord,g_cxx.iChar); + g_cxx.iChar = cppGetc(); + if(g_cxx.iChar == '>') + { + // <=> + vStringPut(t->pszWord,g_cxx.iChar); + g_cxx.iChar = cppGetc(); + } + break; + default: + // fall down + break; + } + + t->bFollowedBySpace = cppIsspace(g_cxx.iChar); + break; + case CXXTokenTypeOpeningSquareParenthesis: + // special handling for [[ attribute ]] which can appear almost anywhere + // in the source code and is kind of annoying for the parser. + + t->bFollowedBySpace = cppIsspace(g_cxx.iChar); + + if(t->bFollowedBySpace) + { + // The tokens can be separated by a space, at least according to gcc. + do { + g_cxx.iChar = cppGetc(); + } while(cppIsspace(g_cxx.iChar)); + } + + if(g_cxx.iChar == '[') + return cxxParserParseNextTokenCondenseCXX11Attribute(); + break; + default: + CXX_DEBUG_ASSERT(false,"There should be a custom handler for this token type"); + // treat as single token type in non debug builds + t->bFollowedBySpace = cppIsspace(g_cxx.iChar); + break; + } + + return true; + } + + if(uInfo & CXXCharTypeNamedSingleCharToken) + { + t->eType = g_aCharTable[g_cxx.iChar].uSingleTokenType; + vStringPut(t->pszWord,g_cxx.iChar); + g_cxx.iChar = cppGetc(); + t->bFollowedBySpace = cppIsspace(g_cxx.iChar); + return true; + } + + if(uInfo & CXXCharTypeOperator) + { + t->eType = CXXTokenTypeOperator; + vStringPut(t->pszWord,g_cxx.iChar); + g_cxx.iChar = cppGetc(); + uInfo = UINFO(g_cxx.iChar); + while(uInfo & CXXCharTypeOperator) + { + vStringPut(t->pszWord,g_cxx.iChar); + g_cxx.iChar = cppGetc(); + uInfo = UINFO(g_cxx.iChar); + } + t->bFollowedBySpace = cppIsspace(g_cxx.iChar); + return true; + } + + t->eType = CXXTokenTypeUnknown; + vStringPut(t->pszWord,g_cxx.iChar); + g_cxx.iChar = cppGetc(); + t->bFollowedBySpace = cppIsspace(g_cxx.iChar); + + return true; +} diff --git a/ctags/parsers/cxx/cxx_parser_typedef.c b/ctags/parsers/cxx/cxx_parser_typedef.c new file mode 100644 index 0000000000..61fa5ba225 --- /dev/null +++ b/ctags/parsers/cxx/cxx_parser_typedef.c @@ -0,0 +1,491 @@ +/* +* Copyright (c) 2016, Szymon Tomasz Stefanek +* +* This source code is released for free distribution under the terms of the +* GNU General Public License version 2 or (at your option) any later version. +* +* This module contains functions for parsing and scanning C++ source files +*/ +#include "cxx_parser.h" +#include "cxx_parser_internal.h" + +#include "cxx_debug.h" +#include "cxx_keyword.h" +#include "cxx_token.h" +#include "cxx_token_chain.h" +#include "cxx_scope.h" + +#include "parse.h" +#include "vstring.h" +#include "debug.h" +#include "read.h" + +// +// This is used to pre-parse non struct/class/union/enum typedefs. +// Please note that struct/class/union/enum has its own pre-parsing routine. +// +bool cxxParserParseGenericTypedef(void) +{ + CXX_DEBUG_ENTER(); + + for(;;) + { + if(!cxxParserParseUpToOneOf( + CXXTokenTypeSemicolon | CXXTokenTypeEOF | + CXXTokenTypeClosingBracket | CXXTokenTypeKeyword, + false + )) + { + CXX_DEBUG_LEAVE_TEXT("Failed to parse fast statement"); + return false; + } + + // This fixes bug reported by Emil Rojas in 2002. + // Though it's quite debatable if we really *should* do this. + if(!cxxTokenTypeIs(g_cxx.pToken,CXXTokenTypeKeyword)) + { + // not a keyword + if(!cxxTokenTypeIs(g_cxx.pToken,CXXTokenTypeSemicolon)) + { + // not semicolon + CXX_DEBUG_LEAVE_TEXT("Found EOF/closing bracket at typedef"); + return true; // EOF + } + // semicolon: exit + break; + } + + // keyword + if( + (g_cxx.pToken->eKeyword == CXXKeywordEXTERN) || + (g_cxx.pToken->eKeyword == CXXKeywordTYPEDEF) || + (g_cxx.pToken->eKeyword == CXXKeywordSTATIC) + ) + { + CXX_DEBUG_LEAVE_TEXT("Found a terminating keyword inside typedef"); + return true; // treat as semicolon but don't dare to emit a tag + } + } + + cxxParserExtractTypedef(g_cxx.pTokenChain,true,false); + CXX_DEBUG_LEAVE(); + return true; +} + +// +// This function attempts to extract a typedef from the +// specified chain. +// The typedef keyword should already have been removed (!) +// The function expects a terminator to be present at the end +// unless bExpectTerminatorAtEnd is set to false +// The token chain may be condensed/destroyed upon exit. +// +// Samples: +// [typedef] int x; +// x = int +// +// [typedef] struct y x; +// x = struct y +// +// [typedef] some complex type x, *y, **z; +// x = some complex type +// y = some complex type * +// z = some complex type ** +// +// [typedef] int x[2]; +// x = int[2] +// +// [typedef] int (*x)[2]; +// x = int (*)[2] <-- pointer to an array of two integers +// +// [typedef] int *x[2]; +// x = int * [2] <-- array of two pointers to integer +// +// [typedef] int (*x)(void); +// x = int (*)(void) <--- function pointer +// +// [typedef] int (x)(void) +// x = int ()(void) <--- function type (not a pointer!) +// +// [typedef] int x(void) +// x = int ()(void) <--- still function type +// +// [typedef] int (MACRO *x)(void); +// x = int (MACRO *)(void) <--- function pointer +// (WINAPI is an example of MACRO.) +// +// [typedef] int (MACRO x)(void) +// x = int (MACRO)(void) <--- function type (not a pointer!) +// (WINAPI is an example of MACRO.) +// +// [typedef] int ((x))(void) +// x = int ()(void) <--- same as above +// +// [typedef] int (*(*x)(int))[2]; +// x = int (*(*)(int))[2] <-- which is a function pointer taking an int and +// returning a pointer to an array of two integers... +// +// [typedef] blah (*x(k (*)(y *)))(z *); +// x = blah (*(k (*)(y *)))(z *) <-- which is a function (!not a function pointer!) +// taking a function pointer A as argument and returning +// a function pointer B. A = k (*)(y *) +// and B = blah (*)(z *) +// +// Note that not all syntaxes involving parentheses are valid. +// Examples of what is NOT valid: +// +// [typedef] unsigned (int)(*x)() +// [typedef] int[] x; +// +// So: +// - if there is an identifier at the end, we use that +// - if there are round parentheses then the identifier seems to be the first +// one found in the nested parentheses chain +// - if there are no round parentheses then the identifier is the last one +// found in the toplevel chain +// +// In case of multiple declarations it seems that only the first part with identifiers +// and keywords is kept across types. +// Ex: +// [typedef] int (*int2ptr)[2], baz; +// int2ptr = int (*)[2] +// baz = int +// +void cxxParserExtractTypedef( + CXXTokenChain * pChain, + bool bExpectTerminatorAtEnd, + bool bGotTemplate + ) +{ + CXX_DEBUG_ENTER(); + +#ifdef CXX_DO_DEBUGGING + vString * pX = cxxTokenChainJoin(pChain,NULL,0); + CXX_DEBUG_PRINT("Extracting typedef from '%s'",vStringValue(pX)); + vStringDelete(pX); +#endif + + // At least something like + // a b; + + if(pChain->iCount < (bExpectTerminatorAtEnd ? 3 : 2)) + { + CXX_DEBUG_LEAVE_TEXT("Not enough tokens for a type definition"); + return; + } + + CXXToken * t; + + if(bExpectTerminatorAtEnd) + { + t = cxxTokenChainLast(pChain); + CXX_DEBUG_ASSERT( + cxxTokenTypeIs(t,CXXTokenTypeSemicolon), + "The terminator should be present here" + ); + cxxTokenChainDestroyLast(pChain); + } + + // There may be multiple typedefs inside a single declaration. + + // [typedef] x y, *z; + // -> y is x + // -> z is x * + + // The angle brackets are not necessarily condensed in chains here + // since we were parsing a generic statement which expected less-than + // and greater-than operators to be present. We need to take care of that. + + while(pChain->iCount >= 2) + { + // + // Skip either to a comma or to the end, but keep track of the first parenthesis. + // + + t = cxxTokenChainFirst(pChain); + + CXX_DEBUG_ASSERT(t,"There should be a token here!"); + + CXXToken * pFirstParenthesis = NULL; + int iSearchTypes = CXXTokenTypeComma | CXXTokenTypeSmallerThanSign | + CXXTokenTypeParenthesisChain; + CXXToken * pComma; + +skip_to_comma_or_end: + + pComma = cxxTokenChainNextTokenOfType(t,iSearchTypes); + + if(pComma) + { + // , < or ( + if(cxxTokenTypeIs(pComma,CXXTokenTypeSmallerThanSign)) + { + CXX_DEBUG_PRINT("Found angle bracket, trying to skip it"); + + t = cxxTokenChainSkipToEndOfTemplateAngleBracket(pComma); + if(!t) + { + CXX_DEBUG_LEAVE_TEXT("Mismatched < sign inside typedef: giving up on it"); + return; + } + // and go ahead + goto skip_to_comma_or_end; + } + + if(cxxTokenTypeIs(pComma,CXXTokenTypeParenthesisChain)) + { + // We keep track only of the first one + CXX_DEBUG_ASSERT( + !pFirstParenthesis, + "We should have stopped only at the first parenthesis" + ); + + iSearchTypes &= ~CXXTokenTypeParenthesisChain; + pFirstParenthesis = pComma; + // and go ahead + goto skip_to_comma_or_end; + } + + CXX_DEBUG_ASSERT(cxxTokenTypeIs(pComma,CXXTokenTypeComma),"Oops, expected a comma!"); + + // Really a comma! + CXX_DEBUG_PRINT("Found comma"); + + if((!pComma->pPrev) || (!pComma->pPrev->pPrev)) + { + CXX_DEBUG_LEAVE_TEXT("Found comma but not enough tokens before it"); + return; + } + + t = pComma->pPrev; + } else { + CXX_DEBUG_PRINT("Found no comma, pointing at end of declaration"); + + t = cxxTokenChainLast(pChain); + } + + CXX_DEBUG_ASSERT(t,"We should have found a token here!"); + + // + // Now look for the identifier + // + CXXTokenChain * pTParentChain; + CXXToken * pLookupStart = t; + + if(cxxTokenTypeIs(t,CXXTokenTypeIdentifier)) + { + // Use the identifier at end, whatever comes before + CXX_DEBUG_PRINT("Identifier seems to be at end: %s",vStringValue(t->pszWord)); + pTParentChain = pChain; + } else if(pFirstParenthesis) + { + CXX_DEBUG_PRINT("Identifier not at end, but got parenthesis chain"); + // + // Possibly function pointer or function type definition. + // + // typedef blah (*(foo))(baz) + // typedef blah (*foo)(baz) + // typedef blah (*foo)[...] + // typedef blah (foo)(baz) + // typedef blah ((foo))(baz) + // typedef blah foo(baz) + // + // So we have two cases: either there are at least two parentheses at the + // top level or there is only one. If there are at least two then + // we expect foo we want to capture to be at the end of the first nested + // parenthesis chain. If there is only one then we expect it to be the + // last identifier before the parenthesis. + // + if( + pFirstParenthesis->pNext && + cxxTokenTypeIsOneOf( + pFirstParenthesis->pNext, + CXXTokenTypeParenthesisChain | + CXXTokenTypeSquareParenthesisChain + ) + ) + { + CXX_DEBUG_PRINT("There are two parenthesis chains. Looking in the first one"); + t = cxxTokenChainLastPossiblyNestedTokenOfType( + pFirstParenthesis->pChain, + CXXTokenTypeIdentifier, + &pTParentChain + ); + } else { + CXX_DEBUG_PRINT("There is one parenthesis chain. Looking just before"); + + if( + pFirstParenthesis->pPrev && + cxxTokenTypeIs(pFirstParenthesis->pPrev,CXXTokenTypeIdentifier) + ) + { + // Nasty "typedef blah foo(baz)" case. + t = pFirstParenthesis->pPrev; + + // Let's have a consistent typeref too. We correct user + // input so it becomes "typedef blah (foo)(baz)". + + pTParentChain = cxxTokenChainCreate(); + + CXXToken * par = cxxTokenCreate(); + par->eType = CXXTokenTypeOpeningParenthesis; + par->iLineNumber = t->iLineNumber; + par->oFilePosition = t->oFilePosition; + vStringPut(par->pszWord,'('); + par->pChain = NULL; + cxxTokenChainAppend(pTParentChain,par); + + par = cxxTokenCreate(); + par->eType = CXXTokenTypeIdentifier; + par->iLineNumber = t->iLineNumber; + par->oFilePosition = t->oFilePosition; + vStringCopy(par->pszWord,t->pszWord); + par->pChain = NULL; + cxxTokenChainAppend(pTParentChain,par); + + t->eType = CXXTokenTypeParenthesisChain; + t->pChain = pTParentChain; + vStringClear(t->pszWord); + + pFirstParenthesis = t; + t = par; + + par = cxxTokenCreate(); + par->eType = CXXTokenTypeClosingParenthesis; + par->iLineNumber = t->iLineNumber; + par->oFilePosition = t->oFilePosition; + vStringPut(par->pszWord,')'); + par->pChain = NULL; + cxxTokenChainAppend(pTParentChain,par); + + } else { + CXX_DEBUG_LEAVE_TEXT("Parenthesis but no identifier: no clue"); + return; + } + } + } else { + // just scan backwards to the last identifier + CXX_DEBUG_PRINT("No identifier and no parenthesis chain, trying to scan backwards"); + pTParentChain = pChain; + t = cxxTokenChainPreviousTokenOfType(t,CXXTokenTypeIdentifier); + } + + if(!t) + { + // Not found yet. + // If we're in C++ mode but we haven't confirmed that the language is really C++ + // then we might try to look for a C++ identifier here. + if( + cxxParserCurrentLanguageIsCPP() && + (!g_cxx.bConfirmedCPPLanguage) && + cxxTokenTypeIs(pLookupStart,CXXTokenTypeKeyword) && + cxxKeywordIsCPPSpecific(pLookupStart->eKeyword) + ) + { + // treat as identifier + pLookupStart->eType = CXXTokenTypeIdentifier; + t = pLookupStart; + } + + if(!t) + { + CXX_DEBUG_LEAVE_TEXT("Didn't find an identifier: something nasty is going on"); + return; + } + } + + tagEntryInfo * tag = cxxTagBegin(CXXTagKindTYPEDEF,t); + + if(tag) + { + CXXToken * pTypeName = NULL; + + cxxTokenChainTake(pTParentChain,t); + + // Avoid emitting typerefs for strange things like + // typedef MACRO(stuff,stuff) X; + // or parsing errors we might make in ugly cases like + // typedef WHATEVER struct x { ... } y; + if( + (pTParentChain == pChain) && // not function pointer (see above) + ( + pComma ? + cxxTokenChainPreviousTokenOfType( + pComma, + CXXTokenTypeParenthesisChain | CXXTokenTypeAngleBracketChain + ) : + cxxTokenChainLastTokenOfType( + pChain, + CXXTokenTypeParenthesisChain | CXXTokenTypeAngleBracketChain + ) + ) + ) + { + CXX_DEBUG_LEAVE_TEXT( + "Wild parenthesis in type definition: not emitting typeref" + ); + } else { + // other kind of typeref, use typename here. + CXX_DEBUG_ASSERT( + pChain->iCount > 0, + "There should be at least another token here!" + ); + + pTypeName = cxxTagCheckAndSetTypeField( + cxxTokenChainFirst(pChain), + pComma ? pComma->pPrev : cxxTokenChainLast(pChain) + ); + } + + tag->isFileScope = !isInputHeaderFile(); + + if(bGotTemplate) + cxxTagHandleTemplateFields(); + + cxxTagCommit(NULL); + + if ( + bGotTemplate && + cxxTagKindEnabled(CXXTagCPPKindTEMPLATEPARAM) + ) + { + cxxScopePush(t,CXXScopeTypeTypedef,CXXScopeAccessUnknown); + cxxParserEmitTemplateParameterTags(); + cxxScopePop(); + } else { + cxxTokenDestroy(t); + } + if(pTypeName) + cxxTokenDestroy(pTypeName); + } + + if(!pComma) + break; + + // We must kill anything up to either an identifier, a keyword (type) or > which is + // assumed to be part of a template. + + while( + pComma->pPrev && + ( + !cxxTokenTypeIsOneOf( + pComma->pPrev, + CXXTokenTypeIdentifier | CXXTokenTypeKeyword | + CXXTokenTypeGreaterThanSign | CXXTokenTypeAngleBracketChain + ) + ) + ) + { + CXXToken * pAux = pComma->pPrev; + cxxTokenChainTake(pChain,pAux); + cxxTokenDestroy(pAux); + } + + // got a comma. + cxxTokenChainTake(pChain,pComma); + cxxTokenDestroy(pComma); + } + + CXX_DEBUG_LEAVE(); + return; +} diff --git a/ctags/parsers/cxx/cxx_parser_using.c b/ctags/parsers/cxx/cxx_parser_using.c new file mode 100644 index 0000000000..dd133fd2e0 --- /dev/null +++ b/ctags/parsers/cxx/cxx_parser_using.c @@ -0,0 +1,170 @@ +/* +* Copyright (c) 2016, Szymon Tomasz Stefanek +* +* This source code is released for free distribution under the terms of the +* GNU General Public License version 2 or (at your option) any later version. +* +* This module contains functions for parsing and scanning C++ source files +*/ +#include "cxx_parser.h" +#include "cxx_parser_internal.h" + +#include "cxx_debug.h" +#include "cxx_keyword.h" +#include "cxx_token.h" +#include "cxx_token_chain.h" +#include "cxx_scope.h" + +#include "parse.h" +#include "vstring.h" +#include "read.h" + +bool cxxParserParseUsingClause(void) +{ + CXX_DEBUG_ENTER(); + + // using-directives for namespaces and using-declarations + // for namespace members + // using-declarations for class members + // type alias and alias template declaration (since C++11) + + // using namespace ns_name; (5) // whole namespace + // using ns_name::name; (6) // only symbol name + // using B::g; // inside class, using method g from base class B + // using identifier attr(optional) = type-id ; <-- equivalent to a typedef! + + cxxTokenChainClear(g_cxx.pTokenChain); + + // skip to the next ; without leaving scope. + if(!cxxParserParseUpToOneOf( + CXXTokenTypeSemicolon | CXXTokenTypeClosingBracket | CXXTokenTypeEOF, + false + )) + { + CXX_DEBUG_LEAVE_TEXT("Failed to parse up to the next ;"); + return false; + } + + if(!cxxTokenTypeIs(g_cxx.pToken,CXXTokenTypeSemicolon)) + { + CXX_DEBUG_LEAVE_TEXT("This is a syntax error but we tolerate it"); + return true; + } + + cxxTokenChainDestroyLast(g_cxx.pTokenChain); + + if(g_cxx.pTokenChain->iCount < 1) + { + CXX_DEBUG_LEAVE_TEXT("This is a syntax error but we tolerate it"); + return true; + } + + CXXToken * pAssignment = cxxTokenChainFirstTokenOfType( + g_cxx.pTokenChain, + CXXTokenTypeAssignment + ); + + if(pAssignment) + { + CXXToken * pFirst = cxxTokenChainFirst(g_cxx.pTokenChain); + bool bGotTemplate = g_cxx.pTemplateTokenChain && + (g_cxx.pTemplateTokenChain->iCount > 0) && + cxxParserCurrentLanguageIsCPP(); + + if(cxxTokenTypeIs(pFirst,CXXTokenTypeIdentifier)) + { + CXX_DEBUG_PRINT( + "Found using clause '%s' which defines a type", + vStringValue(pFirst->pszWord) + ); + + // It's a typedef. Reorder the tokens in the chain + // so it really looks like a typedef + // and pass it to the specialized extraction routine + cxxTokenChainTake(g_cxx.pTokenChain,pFirst); + + while(cxxTokenChainFirst(g_cxx.pTokenChain) != pAssignment) + cxxTokenChainDestroyFirst(g_cxx.pTokenChain); + // kill assignment itself + cxxTokenChainDestroyFirst(g_cxx.pTokenChain); + + // in typedefs it's at the end + cxxTokenChainAppend(g_cxx.pTokenChain,pFirst); + + cxxParserExtractTypedef(g_cxx.pTokenChain,false,bGotTemplate); + } + } else { + CXX_DEBUG_ASSERT( + g_cxx.pTokenChain->iCount > 0, + "The token chain should be non empty at this point" + ); + + CXXToken * t = cxxTokenChainFirst(g_cxx.pTokenChain); + + bool bUsingNamespace = false; + + if(cxxTokenTypeIs(t,CXXTokenTypeKeyword)) + { + if(t->eKeyword == CXXKeywordNAMESPACE) + { + bUsingNamespace = true; + cxxTokenChainDestroyFirst(g_cxx.pTokenChain); + } else if(t->eKeyword == CXXKeywordTYPENAME) + { + cxxTokenChainDestroyFirst(g_cxx.pTokenChain); + } + } + + if(g_cxx.pTokenChain->iCount > 0) + { + tagEntryInfo * tag; + + if(bUsingNamespace) + { + cxxTokenChainCondense(g_cxx.pTokenChain,0); + + t = cxxTokenChainFirst(g_cxx.pTokenChain); + CXX_DEBUG_ASSERT( + t, + "Condensation of a non empty chain should produce a token!" + ); + + + CXX_DEBUG_PRINT( + "Found using clause '%s' which extends scope", + vStringValue(t->pszWord) + ); + tag = cxxTagBegin(CXXTagCPPKindUSING,t); + } else { + + t = cxxTokenChainLast(g_cxx.pTokenChain); + + CXX_DEBUG_PRINT( + "Found using clause '%s' which imports a name", + vStringValue(t->pszWord) + ); + tag = cxxTagBegin(CXXTagCPPKindNAME,t); + + // FIXME: We need something like "nameref:" here! + } + + if(tag) + { + tag->isFileScope = (cxxScopeGetType() == CXXScopeTypeNamespace) && + (!isInputHeaderFile()); + cxxTagCommit(NULL); + } + } + } + + if(!g_cxx.bConfirmedCPPLanguage) + { + CXX_DEBUG_PRINT( + "Succeeded in parsing C++ using: this really seems to be C++" + ); + g_cxx.bConfirmedCPPLanguage = true; + } + + CXX_DEBUG_LEAVE(); + return true; +} diff --git a/ctags/parsers/cxx/cxx_parser_variable.c b/ctags/parsers/cxx/cxx_parser_variable.c new file mode 100644 index 0000000000..54c4975aa8 --- /dev/null +++ b/ctags/parsers/cxx/cxx_parser_variable.c @@ -0,0 +1,917 @@ +/* +* Copyright (c) 2016, Szymon Tomasz Stefanek +* +* This source code is released for free distribution under the terms of the +* GNU General Public License version 2 or (at your option) any later version. +* +* This module contains functions for parsing and scanning C++ source files +*/ +#include "cxx_parser.h" +#include "cxx_parser_internal.h" + +#include "cxx_debug.h" +#include "cxx_keyword.h" +#include "cxx_token.h" +#include "cxx_token_chain.h" +#include "cxx_scope.h" + +#include "vstring.h" +#include "read.h" + + +static CXXToken * cxxParserVardefInParenthesis (CXXToken *pToken, int depth); + +// +// This is used to find the first identifier in stuff like +// +// ret type (*variable)(params) +// ret type (* const (variable[4]))(params) +// ret type (*baz)(params) <-- function pointer (variable) +// ret type (*(baz))(params) <-- function pointer (variable) +// ret type (* const (baz))(params) <-- function pointer (variable) +// ret type (*baz())() <-- function returning function pointer +// ret type (*baz(params))(params) <-- function returning function pointer +// ret type (*baz(params)) <-- function returning a pointer +// ret type (*baz(params))[2] <-- function returning a pointer to array +// +CXXToken * cxxParserFindFirstPossiblyNestedAndQualifiedIdentifier( + CXXTokenChain * pChain, + CXXTokenChain ** pParentChain + ) +{ + CXXToken * pId = cxxTokenChainFirstPossiblyNestedTokenOfType( + pChain, + CXXTokenTypeIdentifier, + pParentChain + ); + + if(!pId) + return NULL; + + if(!cxxParserCurrentLanguageIsCPP()) + return pId; + + // In the case of CPP we also handle qualifications. + + if(!pId->pNext) + return pId; + + if(!cxxTokenTypeIs(pId->pNext,CXXTokenTypeMultipleColons)) + return pId; + + // identifier:: ... + + // Look for the LAST identifier in the same chain. + // baz::foo::something <-- + + return cxxTokenChainNextTokenOfType(pId,CXXTokenTypeIdentifier); +} + +// +// Attempt to extract variable declarations from the chain. +// Returns true if at least one variable was extracted. +// Returns false if a variable declaration could not be identified. +// +// Recognized variable declarations are of the following kinds: +// +// type var; +// type var1,var2; +// type var[]; +// type var(constructor args); +// type var{list initializer}; +// type var = ...; +// type (*ident)(); +// type (var[]); <-- type (var); is also valid in C syntax. +// type *(var); However, it is handled as macro expansion. +// type var:bits; +// type var: range declaration <-- (FIXME: this is only inside for!) +// very complex type with modifiers() namespace::namespace::var = ...; +// type namespace::var[] = { +// ... +// +// Assumptions: +// - there is a terminator at the end: either ; or { +// +// Notes: +// - Be aware that if this function returns true then the pChain very likely has been modified +// (partially destroyed) as part of the type extraction algorithm. +// If the function returns false the chain has not been modified (and +// to extract something else from it). +// +// - This function is quite tricky. +// +bool cxxParserExtractVariableDeclarations(CXXTokenChain * pChain,unsigned int uFlags) +{ + int iCorkIndex = CORK_NIL; + int iCorkIndexFQ = CORK_NIL; + + CXX_DEBUG_ENTER(); + + if(pChain->iCount < 1) + { + CXX_DEBUG_LEAVE_TEXT("Chain is empty"); + return false; + } + +#ifdef CXX_DO_DEBUGGING + vString * pJoinedChain = cxxTokenChainJoin(pChain,NULL,0); + CXX_DEBUG_PRINT( + "Looking for variable declarations in '%s'", + vStringValue(pJoinedChain) + ); + vStringDelete(pJoinedChain); +#endif + + // + // Strategy: + // - verify that the chain starts with an identifier or keyword (always present) + // - run to one of : ; [] () {} = , + // - ensure that the previous token is an identifier (except for special cases) + // - go back to skip the eventual scope + // - ensure that there is a leading type + // - if we are at : [], () or {} then run to the next ; = or , + // - once we have determined that a variable declaration is there + // modify the chain to contain only the type name + // - emit variable tag + // - if we are at , then check if there are more declarations + // + + CXXToken * t = cxxTokenChainFirst(pChain); + + enum CXXScopeType eScopeType = cxxScopeGetType(); + + CXX_DEBUG_ASSERT(t,"There should be an initial token here"); + + if(!cxxTokenTypeIsOneOf(t,CXXTokenTypeIdentifier | CXXTokenTypeKeyword)) + { + CXX_DEBUG_LEAVE_TEXT("Statement does not start with identifier or keyword"); + return false; + } + + // Only keywords that can appear in a variable declaration. + // + // TODO?: We might add this check also on the other tokens here + // However it's unclear if it would provide some advantages + // It would certainly be a small overhead. + if( + cxxTokenTypeIs(t,CXXTokenTypeKeyword) && + (!cxxKeywordMayAppearInVariableDeclaration(t->eKeyword)) + ) + { + CXX_DEBUG_LEAVE_TEXT("Initial keyword can't appear in a variable declaration"); + return false; + } + + bool bGotVariable = false; + + // Loop over the whole statement. + + while(t) + { + // Scan up to a notable token: ()[]{}=,;:{ + + while(t) + { + if(cxxTokenTypeIsOneOf( + t, + CXXTokenTypeSingleColon | CXXTokenTypeParenthesisChain | + CXXTokenTypeSquareParenthesisChain | CXXTokenTypeBracketChain | + CXXTokenTypeAssignment | CXXTokenTypeComma | + CXXTokenTypeSemicolon | CXXTokenTypeOpeningBracket + )) + { + // Notable token reached. + break; + } + + if( + cxxTokenTypeIsOneOf( + t, + CXXTokenTypeOperator | CXXTokenTypeMultipleAnds | + CXXTokenTypePointerOperator | CXXTokenTypeStringConstant | + CXXTokenTypeAngleBracketChain | CXXTokenTypeCharacterConstant | + CXXTokenTypeMultipleDots | CXXTokenTypeClosingBracket | + CXXTokenTypeClosingParenthesis | CXXTokenTypeClosingSquareParenthesis | + CXXTokenTypeGreaterThanSign + ) + ) + { + // Something that should not appear in a variable declaration + CXX_DEBUG_LEAVE_TEXT( + "Found token '%s' of type 0x%02x that should " \ + "not appear in the initial part of a variable declaration", + vStringValue(t->pszWord), + t->eType + ); + return bGotVariable; + } + + if(t->eType == CXXTokenTypeSmallerThanSign) + { + // Must be part of template type name (so properly balanced). + t = cxxTokenChainSkipToEndOfTemplateAngleBracket(t); + if(!t) + { + CXX_DEBUG_LEAVE_TEXT("Failed to skip past angle bracket chain"); + return bGotVariable; + } + } + + t = t->pNext; + } + + // Notable token reached? + + if(!t) + { + CXX_DEBUG_LEAVE_TEXT("Nothing interesting here"); + return bGotVariable; + } + + CXX_DEBUG_PRINT( + "Found notable token '%s' of type 0x%02x(%s)", + vStringValue(t->pszWord), + t->eType, + cxxDebugTypeDecode(t->eType) + ); + + // Now before the notable token there MUST be an identifier + // (eventually hidden in a parenthesis chain) and also a typename. + if(!t->pPrev) + { + CXX_DEBUG_LEAVE_TEXT("Nothing interesting before notable token"); + return bGotVariable; + } + + CXXToken * pIdentifier = NULL; + CXXToken * pTokenBefore = NULL; + + // If we have to continue scanning we'll remove the tokens from here + // so they don't end up being part of the type name. + // If this is set to NULL then it means that we cannot determine properly + // what to remove and we should stop scanning after the current variable. + CXXToken * pRemoveStart = t; + + switch(t->eType) + { + case CXXTokenTypeParenthesisChain: + + // At a parenthesis chain we need some additional checks. + if( + // check for function pointers or nasty arrays + // Possible cases: + // ret type (*variable)(params) + // ret type (* const (variable[4]))(params) + t->pNext && + ( + ( + cxxTokenTypeIs(t->pNext,CXXTokenTypeParenthesisChain) && + cxxParserTokenChainLooksLikeFunctionParameterList( + t->pNext->pChain, + NULL + ) + ) || + cxxTokenTypeIs(t->pNext,CXXTokenTypeSquareParenthesisChain) + ) && + (pIdentifier = cxxParserFindFirstPossiblyNestedAndQualifiedIdentifier( + t->pChain, + NULL + )) && + // Discard function declarations with function return types + // like void (*A(B))(C); + ( + (!pIdentifier->pNext) || + (!cxxTokenTypeIs(pIdentifier->pNext,CXXTokenTypeParenthesisChain)) + ) + ) + { + // A function pointer. + // There are two parentheses, skip the second too. + pTokenBefore = t->pPrev; + t = t->pNext->pNext; + pRemoveStart = t; + goto got_identifier; + } + + if( + (t->pChain->iCount == 3) && + cxxTokenTypeIs( + cxxTokenChainAt(t->pChain,1), + CXXTokenTypeParenthesisChain + ) && + t->pPrev && + cxxTokenTypeIs(t->pPrev,CXXTokenTypeIdentifier) && + t->pPrev->pPrev && + cxxTokenTypeIs(t->pPrev->pPrev,CXXTokenTypeIdentifier) + ) + { + CXX_DEBUG_LEAVE_TEXT("Parenthesis seems to define an __ARGS style prototype"); + return bGotVariable; + } + + if( + cxxTokenTypeIs(t->pPrev,CXXTokenTypeIdentifier) && + ( + (eScopeType == CXXScopeTypeNamespace) || + (eScopeType == CXXScopeTypeFunction) + ) && + cxxParserCurrentLanguageIsCPP() && + cxxParserTokenChainLooksLikeConstructorParameterSet(t->pChain) + ) + { + // ok, *might* be variable instantiation + pIdentifier = t->pPrev; + pTokenBefore = pIdentifier->pPrev; + goto got_identifier; + } + + if ( + // Variable declaration in parenthesis. + // We handle only cases that are clearly variable declarations. + // We leave alone the stuff that may also be a function call. + // type *(var) + // type &(var) + // type const (var) + // type (var[]) + ( + cxxTokenTypeIsOneOf(t->pPrev,CXXTokenTypeStar | CXXTokenTypeAnd) || + ( + cxxTokenTypeIs(t->pPrev,CXXTokenTypeKeyword) && + cxxKeywordMayBePartOfTypeName(t->pPrev->eKeyword) && + // but not decltype(var)! + (t->pPrev->eKeyword != CXXKeywordDECLTYPE) + ) + ) && + ( + // Inspect the inside parenthesis + pIdentifier = cxxParserVardefInParenthesis(cxxTokenChainFirst(t->pChain), 0) + ) + ) + { + CXX_DEBUG_LEAVE_TEXT("Parenthesis seems to surround a variable definition"); + pTokenBefore = t->pPrev; + t = t->pNext; + goto got_identifier; + } + + if( + cxxTokenIsKeyword(t->pPrev,CXXKeywordDECLTYPE) && + t->pNext + ) + { + // part of typename -> skip ahead + CXX_DEBUG_LEAVE_TEXT("Parenthesis follows decltype(), skipping"); + t = t->pNext; + continue; + } + + CXX_DEBUG_LEAVE_TEXT("No recognizable parenthesis form for a variable"); + return bGotVariable; + break; + case CXXTokenTypeBracketChain: + if( + cxxTokenTypeIs(t->pPrev,CXXTokenTypeIdentifier) && + cxxParserCurrentLanguageIsCPP() && + cxxParserTokenChainLooksLikeConstructorParameterSet(t->pChain) + ) + { + // ok, *might* be new C++ style variable initialization + pIdentifier = t->pPrev; + pTokenBefore = pIdentifier->pPrev; + goto got_identifier; + } + + CXX_DEBUG_LEAVE_TEXT("Bracket chain that doesn't look like a C++ var init"); + return bGotVariable; + break; + case CXXTokenTypeSingleColon: + // check for bitfield + if( + t->pNext && + cxxTokenTypeIsOneOf(t->pNext,CXXTokenTypeNumber | CXXTokenTypeIdentifier) + ) + { + // ok, looks like a bit field + if( + cxxTokenTypeIs(t->pNext,CXXTokenTypeNumber) && + t->pNext->pNext && + cxxTokenTypeIsOneOf( + t->pNext->pNext, + CXXTokenTypeComma | CXXTokenTypeSemicolon | + CXXTokenTypeAssignment + ) + ) + { + // keep bitfield width specification as part of type + pIdentifier = t->pPrev; + pTokenBefore = pIdentifier->pPrev; + t = t->pNext->pNext; + } else { + // Too complex: strip width specification (the best we can do) + pIdentifier = t->pPrev; + pTokenBefore = pIdentifier->pPrev; + } + + goto got_identifier; + + } + + CXX_DEBUG_LEAVE_TEXT("Single colon that doesn't look like a bit field"); + return bGotVariable; + break; + case CXXTokenTypeSquareParenthesisChain: + // check for array + // Keep the array specifier as part of type + + pIdentifier = t->pPrev; + pTokenBefore = pIdentifier->pPrev; + + while(t->pNext && cxxTokenTypeIs(t->pNext,CXXTokenTypeSquareParenthesisChain)) + t = t->pNext; + + if(!t->pNext) + { + CXX_DEBUG_LEAVE_TEXT("No token after []"); + return bGotVariable; + } + + // skip identifies attached to the array as attributes + while(t->pNext && cxxTokenTypeIs(t->pNext, CXXTokenTypeIdentifier)) + { + t = t->pNext; + // skip macro argument(s) + if (t->pNext && cxxTokenTypeIs(t->pNext, CXXTokenTypeParenthesisChain)) + t = t->pNext; + } + + if (!t->pNext) + { + CXX_DEBUG_LEAVE_TEXT("No token after attribute(s) attached to []"); + return bGotVariable; + } + + if(!cxxTokenTypeIsOneOf( + t->pNext, + CXXTokenTypeComma | CXXTokenTypeSemicolon | + CXXTokenTypeAssignment | CXXTokenTypeBracketChain + )) + { + CXX_DEBUG_LEAVE_TEXT("No comma, semicolon, = or {} after [] (\"%s\", %s)", + vStringValue (t->pNext->pszWord), + cxxDebugTypeDecode (t->pNext->eType)); + return bGotVariable; + } + + t = t->pNext; + break; + default: + // Must be identifier + if(t->pPrev->eType != CXXTokenTypeIdentifier) + { + CXX_DEBUG_LEAVE_TEXT("No identifier before the notable token"); + return bGotVariable; + } + + pIdentifier = t->pPrev; + pTokenBefore = pIdentifier->pPrev; + break; + } + +got_identifier: + CXX_DEBUG_ASSERT(pIdentifier,"We should have found an identifier here"); + + if(!pTokenBefore) + { + CXX_DEBUG_LEAVE_TEXT("Identifier not preceded by a type"); + // Here we can handle yet another one of the gazillion of special cases. + // + // MACRO(whatever) variable; + // + if( + cxxTokenTypeIs(t,CXXTokenTypeParenthesisChain) && + t->pNext && + cxxTokenTypeIs(t->pNext,CXXTokenTypeIdentifier) && + t->pNext->pNext && + cxxTokenTypeIs(t->pNext->pNext,CXXTokenTypeSemicolon) + ) + { + CXX_DEBUG_PRINT("Looks like the 'MACRO(whatever) variable;' special case"); + pIdentifier = t->pNext; + pTokenBefore = t; + t = t->pNext->pNext; + } else { + return bGotVariable; + } + } + + CXXToken * pScopeEnd = pTokenBefore->pNext; + CXXToken * pScopeStart = NULL; + + // Skip back to the beginning of the scope, if any + while(pTokenBefore->eType == CXXTokenTypeMultipleColons) + { + if(!cxxParserCurrentLanguageIsCPP()) + { + CXX_DEBUG_LEAVE_TEXT("Syntax error: found multiple colons in C language"); + return false; + } + + pTokenBefore = pTokenBefore->pPrev; + if(!pTokenBefore) + { + CXX_DEBUG_LEAVE_TEXT( + "Identifier preceded by multiple colons " \ + "but not preceded by a type" + ); + return bGotVariable; + } + + if(cxxTokenTypeIs(pTokenBefore,CXXTokenTypeGreaterThanSign)) + { + CXXToken * pAux = cxxTokenChainSkipBackToStartOfTemplateAngleBracket(pTokenBefore); + if((!pAux) || (!pAux->pPrev)) + { + CXX_DEBUG_LEAVE_TEXT( + "Identifier preceded by multiple colons " \ + "and by a >, but failed to skip back to starting <" + ); + return bGotVariable; + } + + pTokenBefore = pAux->pPrev; + } + + if(!cxxTokenTypeIs(pTokenBefore,CXXTokenTypeIdentifier)) + { + CXX_DEBUG_LEAVE_TEXT( + "Identifier preceded by multiple colons " \ + "with probable syntax error" + ); + return bGotVariable; + } + + pScopeStart = pTokenBefore; + + pTokenBefore = pTokenBefore->pPrev; + if(!pTokenBefore) + { + CXX_DEBUG_LEAVE_TEXT( + "Identifier preceded by multiple colons " \ + "but not preceded by a type" + ); + return bGotVariable; + } + } + + if(!bGotVariable) + { + // now pTokenBefore should be part of a type (either the variable type or return + // type of a function in case of a function pointer) + if(!cxxTokenTypeIsOneOf( + pTokenBefore, + CXXTokenTypeIdentifier | CXXTokenTypeKeyword | + CXXTokenTypeStar | CXXTokenTypeAnd + )) + { + if(cxxTokenTypeIs(pTokenBefore,CXXTokenTypeGreaterThanSign)) + { + // the < > must be balanced + CXXToken * t2 = pTokenBefore->pPrev; + int iLevel = 1; + while(t2) + { + if(cxxTokenTypeIs(t2,CXXTokenTypeGreaterThanSign)) + iLevel++; + else if(cxxTokenTypeIs(t2,CXXTokenTypeSmallerThanSign)) + iLevel--; + t2 = t2->pPrev; + } + if(iLevel != 0) + { + CXX_DEBUG_LEAVE_TEXT( + "The > token is unbalanced and does not " \ + "seem to be part of type name" + ); + return bGotVariable; + } + } else if( + // Possibly one of: + // MACRO(whatever) variable; + // decltype(whatever) variable; + cxxTokenTypeIs(pTokenBefore,CXXTokenTypeParenthesisChain) && + pTokenBefore->pPrev && + !pTokenBefore->pPrev->pPrev && + ( + // macro + cxxTokenTypeIs(pTokenBefore->pPrev,CXXTokenTypeIdentifier) || + // decltype + cxxTokenIsKeyword(pTokenBefore->pPrev,CXXKeywordDECLTYPE) + ) + ) + { + CXX_DEBUG_PRINT("Type seems to be hidden in a macro or defined by decltype"); + } else { + CXX_DEBUG_LEAVE_TEXT( + "Token '%s' of type 0x%02x does not seem " \ + "to be part of type name", + vStringValue(pTokenBefore->pszWord), + pTokenBefore->eType + ); + return bGotVariable; + } + } + + bGotVariable = true; + } + + // Goodie. We have an identifier and almost certainly a type here. + + // From now on we start destroying the chain: mark the return value as true + // so nobody else will try to extract stuff from it + + int iScopesPushed = 0; + + if(pScopeStart) + { + // Push the scopes and remove them from the chain so they are not in the way + while(pScopeStart != pScopeEnd) + { + // This is the scope id START. It might contain + // also other tokens like in ...::A::... + + CXXToken * pPartEnd = cxxTokenChainNextTokenOfType( + pScopeStart, + CXXTokenTypeMultipleColons + ); + CXX_DEBUG_ASSERT( + pPartEnd, + "We should have found multiple colons here!" + ); + CXX_DEBUG_ASSERT( + pPartEnd->pPrev, + "And there should be a previous token too" + ); + + CXXToken * pScopeId = cxxTokenChainExtractRange(pScopeStart,pPartEnd->pPrev,0); + cxxScopePush( + pScopeId, + CXXScopeTypeClass, + // WARNING: We don't know if it's really a class! (FIXME?) + CXXScopeAccessUnknown + ); + + CXXToken * pAux = pPartEnd->pNext; + + cxxTokenChainDestroyRange(pChain,pScopeStart,pPartEnd); + + pScopeStart = pAux; + + iScopesPushed++; + } + } + + CXX_DEBUG_ASSERT(t != pIdentifier,"This should not happen"); + + // remove the identifier + cxxTokenChainTakeRecursive(pChain,pIdentifier); + + bool bGotTemplate = g_cxx.pTemplateTokenChain && + (g_cxx.pTemplateTokenChain->iCount > 0) && + cxxParserCurrentLanguageIsCPP(); + + bool bKnRStyleParameters = + (uFlags & CXXExtractVariableDeclarationsKnRStyleParameters); + + tagEntryInfo * tag = cxxTagBegin( + bKnRStyleParameters ? + CXXTagKindPARAMETER : + ((g_cxx.uKeywordState & CXXParserKeywordStateSeenExtern) ? + CXXTagKindEXTERNVAR : cxxScopeGetVariableKind()), + pIdentifier + ); + + if(tag) + { + // Fix square parentheses: if they contain something that is not a numeric + // constant then empty them up + CXXToken * pPartOfType = t->pPrev; // identifier has been removed + CXX_DEBUG_ASSERT(pPartOfType,"There should be a part of type name here"); + + while(pPartOfType && cxxTokenTypeIs(pPartOfType,CXXTokenTypeSquareParenthesisChain)) + { + CXXTokenChain * pAuxChain = pPartOfType->pChain; + + if(pAuxChain->iCount > 2) + { + if( + (pAuxChain->iCount > 3) || + (!cxxTokenTypeIs(cxxTokenChainAt(pAuxChain,1),CXXTokenTypeNumber)) + ) + { + cxxTokenChainDestroyRange( + pAuxChain, + cxxTokenChainFirst(pAuxChain)->pNext, + cxxTokenChainLast(pAuxChain)->pPrev + ); + } + } + pPartOfType = pPartOfType->pPrev; + } + + // anything that remains is part of type + CXXToken * pTypeToken = cxxTagCheckAndSetTypeField(cxxTokenChainFirst(pChain),t->pPrev); + + tag->isFileScope = bKnRStyleParameters ? + true : + ( + ( + (eScopeType == CXXScopeTypeNamespace) && + (g_cxx.uKeywordState & CXXParserKeywordStateSeenStatic) && + (!isInputHeaderFile()) + ) || + // locals are always hidden + (eScopeType == CXXScopeTypeFunction) || + ( + (eScopeType != CXXScopeTypeNamespace) && + (eScopeType != CXXScopeTypeFunction) && + (!isInputHeaderFile()) + ) + ); + + vString * pszProperties = NULL; + + if(cxxTagFieldEnabled(CXXTagFieldProperties)) + { + unsigned int uProperties = 0; + + if(g_cxx.uKeywordState & CXXParserKeywordStateSeenStatic) + uProperties |= CXXTagPropertyStatic; + if(g_cxx.uKeywordState & CXXParserKeywordStateSeenExtern) + uProperties |= CXXTagPropertyExtern; + if(g_cxx.uKeywordState & CXXParserKeywordStateSeenMutable) + uProperties |= CXXTagPropertyMutable; + if(g_cxx.uKeywordState & CXXParserKeywordStateSeenInline) + uProperties |= CXXTagPropertyInline; + if(g_cxx.uKeywordState & CXXParserKeywordStateSeenAttributeDeprecated) + uProperties |= CXXTagPropertyDeprecated; + // Volatile is part of the type, so we don't mark it as a property + //if(g_cxx.uKeywordState & CXXParserKeywordStateSeenVolatile) + // uProperties |= CXXTagPropertyVolatile; + + pszProperties = cxxTagSetProperties(uProperties); + } + + if(bGotTemplate) + cxxTagHandleTemplateFields(); + + iCorkIndex = cxxTagCommit(&iCorkIndexFQ); + + if(pTypeToken) + cxxTokenDestroy(pTypeToken); + if(pszProperties) + vStringDelete(pszProperties); + } + + if( + bGotTemplate && + cxxTagKindEnabled(CXXTagCPPKindTEMPLATEPARAM) + ) + { + cxxScopePush(pIdentifier,CXXScopeTypeVariable,CXXScopeAccessPublic); + cxxParserEmitTemplateParameterTags(); + cxxScopePop(); + } else { + cxxTokenDestroy(pIdentifier); + } + + while(iScopesPushed > 0) + { + cxxScopePop(); + iScopesPushed--; + } + + if(!t) + { + CXX_DEBUG_LEAVE_TEXT("Nothing more"); + return bGotVariable; + } + + if(!cxxTokenTypeIsOneOf( + t, + CXXTokenTypeComma | CXXTokenTypeSemicolon | CXXTokenTypeOpeningBracket + )) + { + // look for it, but also check for "<" signs: these usually indicate an uncondensed + // template. We give up on them as they are too complicated in this context. + // It's rather unlikely to have multiple declarations with templates after the first one + t = cxxTokenChainNextTokenOfType( + t, + CXXTokenTypeComma | CXXTokenTypeSemicolon | CXXTokenTypeOpeningBracket | + CXXTokenTypeSmallerThanSign + ); + if(!t) + { + CXX_DEBUG_LEAVE_TEXT("Didn't find a comma, semicolon or {"); + return bGotVariable; + } + if(cxxTokenTypeIs(t,CXXTokenTypeSmallerThanSign)) + { + CXX_DEBUG_LEAVE_TEXT("Found '<': probably a template on the right side of declaration"); + return bGotVariable; + } + } + + if(cxxTokenTypeIsOneOf(t,CXXTokenTypeSemicolon | CXXTokenTypeOpeningBracket)) + { + if (iCorkIndex != CORK_NIL) + { + cxxParserSetEndLineForTagInCorkQueue (iCorkIndex, t->iLineNumber); + iCorkIndex = CORK_NIL; + if(iCorkIndexFQ != CORK_NIL) + { + cxxParserSetEndLineForTagInCorkQueue (iCorkIndexFQ, t->iLineNumber); + iCorkIndexFQ = CORK_NIL; + } + } + CXX_DEBUG_LEAVE_TEXT("Noting else"); + return bGotVariable; + } + + // Comma. Might have other declarations here. + if (iCorkIndex != CORK_NIL) + { + cxxParserSetEndLineForTagInCorkQueue (iCorkIndex, t->iLineNumber); + iCorkIndex = CORK_NIL; + if(iCorkIndexFQ != CORK_NIL) + { + cxxParserSetEndLineForTagInCorkQueue (iCorkIndexFQ, t->iLineNumber); + iCorkIndexFQ = CORK_NIL; + } + } + + CXX_DEBUG_PRINT("At a comma, might have other declarations here"); + + t = t->pNext; + + CXX_DEBUG_ASSERT(t,"There should be something after the comma here!"); + + if(!pRemoveStart) + { + CXX_DEBUG_LEAVE_TEXT("Could not properly fix type name for next token: stopping here"); + return bGotVariable; + } + + // *, &, && and similar stuff do not "propagate" to the next type + while( + pRemoveStart->pPrev && + cxxTokenTypeIsOneOf( + pRemoveStart->pPrev, + CXXTokenTypeStar | CXXTokenTypeAnd | + CXXTokenTypeMultipleAnds | CXXTokenTypeSquareParenthesisChain + ) + ) + pRemoveStart = pRemoveStart->pPrev; + + cxxTokenChainDestroyRange(pChain,pRemoveStart,t->pPrev); + } + + CXX_DEBUG_LEAVE_TEXT("Reached end"); + return bGotVariable; +} + +static bool isConstVolatileOrStar (CXXToken *t, void *data) +{ + if (cxxTokenTypeIs (t, CXXTokenTypeStar)) + return true; + + if (cxxTokenTypeIs (t, CXXTokenTypeKeyword) && + ((t->eKeyword == CXXKeywordCONST) || + (t->eKeyword == CXXKeywordVOLATILE))) + return true; + + return false; +} + +static CXXToken * cxxParserVardefInParenthesis (CXXToken *pToken, int depth) +{ + // ( const volatile * foo [] ) <- ended with CXXTokenTypeSquareParenthesisChain + // ( const volatile * foo ) <- ended with CXXTokenTypeClosingParenthesis + // ((const volatile * foo)) <- depth > 0 + + CXXToken *t; + + t = cxxTokenChainNextTokenNotOfGeneric (pToken, isConstVolatileOrStar, NULL); + if (!t) + return NULL; + + if (cxxTokenTypeIs(t, CXXTokenTypeParenthesisChain)) + return cxxParserVardefInParenthesis (cxxTokenChainFirst(t->pChain), + depth + 1); + else if (cxxTokenTypeIs (t, CXXTokenTypeIdentifier) && + ((t->pNext && + (cxxTokenTypeIsOneOf (t->pNext, + CXXTokenTypeSquareParenthesisChain | CXXTokenTypeClosingParenthesis)))|| + (t->pNext && depth > 0))) + return t; + return NULL; +} diff --git a/ctags/parsers/cxx/cxx_qtmoc.c b/ctags/parsers/cxx/cxx_qtmoc.c new file mode 100644 index 0000000000..d02f227e29 --- /dev/null +++ b/ctags/parsers/cxx/cxx_qtmoc.c @@ -0,0 +1,342 @@ +/* + * Copyright (c) 2017, Red Hat, Inc. +* Copyright (c) 2017, Masatake YAMATO +* +* This source code is released for free distribution under the terms of the +* GNU General Public License version 2 or (at your option) any later version. +* +* This module contains functions for handling Qt Moc tokens +*/ + +#include "general.h" + +#include "types.h" + +#include "debug.h" +#include "cxx_debug.h" + +#include "cxx_scope.h" +#include "cxx_parser_internal.h" + +#include "cxx_subparser.h" + +#include "keyword.h" +#include "read.h" + +#include + + +typedef enum { + K_SLOT, + K_SIGNAL, + K_PROPERTY, +} qtMocKind; + +static kindDefinition QtMocKinds [] = { + { true, 's', "slot", "slots" }, + { true, 'S', "signal", "signals" }, + { true, 'p', "property", "properties" }, +}; + +enum { + KEYWORD_QOBJECT, + KEYWORD_SIGNALS, + KEYWORD_SLOTS, + KEYWORD_PROPERTY, +}; + +typedef int keywordId; /* to allow KEYWORD_NONE */ + +static const keywordTable QtMocKeywordTable[] = { + /* keyword keyword ID */ + { "Q_OBJECT", KEYWORD_QOBJECT }, + { "Q_SIGNALS", KEYWORD_SIGNALS }, + { "signals", KEYWORD_SIGNALS }, + { "Q_SLOTS", KEYWORD_SLOTS }, + { "slots", KEYWORD_SLOTS }, + { "Q_PROPERTY", KEYWORD_PROPERTY }, +}; + +enum QtMocMemberMarker +{ + QtMocMemberMarkerNone = 0, + QtMocMemberMarkerSlot, + QtMocMemberMarkerSignal, +}; + +struct sQtMocSubparser { + struct sCxxSubparser cxx; + int iBlockDepth; + int iDepthOfQtClass; + enum QtMocMemberMarker eMemberMarker; +}; + +static langType Lang_QtMoc; + +static bool cxxParserSkipToClosingParenthesisOrEOF(void) +{ + if(cxxTokenTypeIsOneOf(g_cxx.pToken,CXXTokenTypeClosingParenthesis | CXXTokenTypeEOF)) + return true; + + return cxxParserParseUpToOneOf(CXXTokenTypeClosingParenthesis | CXXTokenTypeEOF, + false); +} + +static void qtMocMakeTagForProperty (CXXToken * pToken, const char *pszType) +{ + tagEntryInfo tag; + + initTagEntry(&tag, + vStringValue(pToken->pszWord), + K_PROPERTY); + tag.lineNumber = pToken->iLineNumber; + tag.filePosition = pToken->oFilePosition; + tag.isFileScope = false; + + if(!cxxScopeIsGlobal()) + { + tag.extensionFields.scopeLangType = getNamedLanguage ("C++", 0); /* ??? */ + tag.extensionFields.scopeKindIndex = cxxScopeGetKind(); + tag.extensionFields.scopeName = cxxScopeGetFullName(); + } + + tag.extensionFields.typeRef[0] = "typename"; + tag.extensionFields.typeRef[1] = pszType; + + makeTagEntry(&tag); +} + +static bool qtMocParseProperty(void) +{ + char *pszPropType; + + CXX_DEBUG_ENTER(); + + if(!cxxParserParseNextToken()) + { + CXX_DEBUG_LEAVE_TEXT("EOF in cxxParserParseNextToken"); + return false; + } + if (!cxxTokenTypeIs(g_cxx.pToken, CXXTokenTypeOpeningParenthesis)) + { + CXX_DEBUG_LEAVE_TEXT("Found no Opening Parenthesis after Q_PROPERTY"); + return false; + } + + if (!cxxParserParseNextToken()) + { + CXX_DEBUG_LEAVE_TEXT("EOF in cxxParserParseNextToken"); + return false; + } + if (!(cxxTokenTypeIs(g_cxx.pToken, CXXTokenTypeIdentifier) + || (cxxTokenTypeIs(g_cxx.pToken, CXXTokenTypeKeyword) + && cxxKeywordMayBePartOfTypeName (g_cxx.pToken->eKeyword)))) + + { + CXX_DEBUG_LEAVE_TEXT("Found no identifier after Q_PROPERTY("); + + cxxParserSkipToClosingParenthesisOrEOF (); + return false; + } + + pszPropType = vStringStrdup (g_cxx.pToken->pszWord); + if(!cxxParserParseNextToken()) + { + CXX_DEBUG_LEAVE_TEXT("EOF in cxxParserParseNextToken"); + eFree (pszPropType); + return false; + } + + if (!cxxTokenTypeIs(g_cxx.pToken, CXXTokenTypeIdentifier)) + { + CXX_DEBUG_LEAVE_TEXT("Found no identifier after Q_PROPERTY(%s", pszPropType); + cxxParserSkipToClosingParenthesisOrEOF (); + eFree (pszPropType); + return false; + } + + qtMocMakeTagForProperty (g_cxx.pToken, pszPropType); + + eFree (pszPropType); + cxxParserSkipToClosingParenthesisOrEOF (); + + CXX_DEBUG_LEAVE(); + return true; +} + +static void inputStart(subparser *s) +{ + struct sQtMocSubparser *pQtMoc = (struct sQtMocSubparser*)s; + + pQtMoc->iBlockDepth = 0; + pQtMoc->iDepthOfQtClass = 0; + pQtMoc->eMemberMarker = QtMocMemberMarkerNone; +} + +static void makeTagEntryNotify (subparser *s, const tagEntryInfo *entry, int corkIndex) +{ + struct sQtMocSubparser *pQtMoc = (struct sQtMocSubparser*)s; + + if (pQtMoc->iDepthOfQtClass == 0) + return; + + if ((pQtMoc->eMemberMarker != QtMocMemberMarkerNone) && + entry->kindIndex == CXXTagKindPROTOTYPE) + { + tagEntryInfo parasiteTag = *entry; + parasiteTag.langType = getInputLanguage (); + parasiteTag.kindIndex = (pQtMoc->eMemberMarker == QtMocMemberMarkerSlot) + ? K_SLOT + : K_SIGNAL; + + parasiteTag.extensionFields.scopeLangType = entry->langType; + makeTagEntry (¶siteTag); + } +} + +static void enterBlockNotify (struct sCxxSubparser *pSubparser) +{ + struct sQtMocSubparser *pQtMoc = (struct sQtMocSubparser *)pSubparser; + + pQtMoc->iBlockDepth++; +} + +static void leaveBlockNotify (struct sCxxSubparser *pSubparser) +{ + struct sQtMocSubparser *pQtMoc = (struct sQtMocSubparser *)pSubparser; + + if (pQtMoc->iDepthOfQtClass == pQtMoc->iBlockDepth) + pQtMoc->iDepthOfQtClass = 0; + + pQtMoc->iBlockDepth--; +} + +static bool newIdentifierAsHeadOfMemberNotify (struct sCxxSubparser *pSubparser, + CXXToken *pToken) +{ + struct sQtMocSubparser *pQtMoc = (struct sQtMocSubparser *)pSubparser; + keywordId keyword = lookupKeyword (vStringValue (pToken->pszWord), Lang_QtMoc); + + if (keyword == KEYWORD_QOBJECT) + { + if (pQtMoc->iDepthOfQtClass == 0) + pQtMoc->iDepthOfQtClass = pQtMoc->iBlockDepth; + CXX_DEBUG_PRINT("Found \"Q_OBJECT\" Qt Object Marker in depth: %d", + pQtMoc->iDepthOfQtClass); + return true; + } + return false; +} + +static bool unknownIdentifierInClassNotify (struct sCxxSubparser *pSubparser, + CXXToken *pToken) +{ + struct sQtMocSubparser *pQtMoc = (struct sQtMocSubparser *)pSubparser; + + if (pQtMoc->iDepthOfQtClass == 0) + return false; + + keywordId keyword = lookupKeyword (vStringValue (pToken->pszWord), Lang_QtMoc); + + switch (keyword) + { + case KEYWORD_SIGNALS: + CXX_DEBUG_PRINT("Found \"signals\" QtMoc Keyword"); + pToken->eType = CXXTokenTypeKeyword; + pToken->eKeyword = CXXKeywordPUBLIC; + cxxParserParseAccessSpecifier(); + pQtMoc->eMemberMarker = QtMocMemberMarkerSignal; + return true; + case KEYWORD_SLOTS: + CXX_DEBUG_PRINT("Found \"slots\" QtMoc Keyword"); + pToken->eType = CXXTokenTypeKeyword; + g_cxx.pToken->eKeyword = CXXKeywordPUBLIC; /* ??? */ + cxxParserParseAccessSpecifier(); + pQtMoc->eMemberMarker = QtMocMemberMarkerSlot; + return true; + case KEYWORD_PROPERTY: + CXX_DEBUG_PRINT("Found \"Q_PROPERTY\" QtMoc Keyword"); + qtMocParseProperty (); + return true; + default: + break; + } + + return false; +} + +static bool parseAccessSpecifierNotify(struct sCxxSubparser *pSubparser) +{ + struct sQtMocSubparser *pQtMoc = (struct sQtMocSubparser *)pSubparser; + + if (pQtMoc->iBlockDepth > 0) + { + CXX_DEBUG_PRINT("Reset QtMoc member marker state"); + pQtMoc->eMemberMarker = QtMocMemberMarkerNone; + return true; + } + return false; +} + +static void foundExtraIdentifierAsAccessSpecifier(struct sCxxSubparser *pSubparser, + CXXToken *pToken) +{ + struct sQtMocSubparser *pQtMoc = (struct sQtMocSubparser *)pSubparser; + keywordId keyword = lookupKeyword (vStringValue (pToken->pszWord), Lang_QtMoc); + + if (keyword == KEYWORD_SLOTS) + { + CXX_DEBUG_PRINT("Found \"slots\" QtMoc Keyword"); + pQtMoc->eMemberMarker = QtMocMemberMarkerSlot; + } +} + +static void findQtMocTags(void) +{ + scheduleRunningBaseparser (0); +} + +static void initialize (langType lang) +{ + Lang_QtMoc = lang; +} + +extern parserDefinition* QtMocParser (void) +{ + parserDefinition* const def = parserNew("QtMoc"); + + static struct sQtMocSubparser qtMocSubparser = { + .cxx = { + .subparser = { + .direction = SUBPARSER_BI_DIRECTION, + .inputStart = inputStart, + .makeTagEntryNotify = makeTagEntryNotify, + }, + .enterBlockNotify = enterBlockNotify, + .leaveBlockNotify = leaveBlockNotify, + .newIdentifierAsHeadOfMemberNotify = newIdentifierAsHeadOfMemberNotify, + .unknownIdentifierInClassNotify = unknownIdentifierInClassNotify, + .parseAccessSpecifierNotify = parseAccessSpecifierNotify, + .foundExtraIdentifierAsAccessSpecifier = foundExtraIdentifierAsAccessSpecifier, + } + /* The rest fields are initialized in inputStart(). */ + }; + static parserDependency dependencies [] = { + [0] = { DEPTYPE_SUBPARSER, "C++", &qtMocSubparser }, + }; + + def->dependencies = dependencies; + def->dependencyCount = ARRAY_SIZE (dependencies); + + def->kindTable = QtMocKinds; + def->kindCount = ARRAY_SIZE(QtMocKinds); + + def->keywordTable = QtMocKeywordTable; + def->keywordCount = ARRAY_SIZE (QtMocKeywordTable); + + def->parser = findQtMocTags; + def->initialize = initialize; + def->useCork = CORK_QUEUE; + + return def; +} diff --git a/ctags/parsers/cxx/cxx_scope.c b/ctags/parsers/cxx/cxx_scope.c new file mode 100644 index 0000000000..f370c6efce --- /dev/null +++ b/ctags/parsers/cxx/cxx_scope.c @@ -0,0 +1,276 @@ +/* +* Copyright (c) 2016, Szymon Tomasz Stefanek +* +* This source code is released for free distribution under the terms of the +* GNU General Public License version 2 or (at your option) any later version. +* +* This module contains functions for parsing and scanning C++ source files +*/ + +#include "cxx_scope.h" + +#include "vstring.h" +#include "debug.h" + +#include "cxx_tag.h" +#include "cxx_debug.h" +#include "cxx_token_chain.h" + +#ifdef CXX_DO_DEBUGGING +#include "cxx_parser_internal.h" +#endif + +// The tokens defining current scope +static CXXTokenChain * g_pScope = NULL; +static vString * g_szScopeName = NULL; +static bool g_bScopeNameDirty = true; + +void cxxScopeInit(void) +{ + g_pScope = cxxTokenChainCreate(); +} + +void cxxScopeDone(void) +{ + cxxTokenChainDestroy(g_pScope); + if(g_szScopeName) + { + vStringDelete(g_szScopeName); + g_szScopeName = NULL; + } +} + +void cxxScopeClear(void) +{ + if(g_pScope) + cxxTokenChainClear(g_pScope); + if(g_szScopeName) + { + vStringDelete(g_szScopeName); + g_szScopeName = NULL; + } +} + +bool cxxScopeIsGlobal(void) +{ + return (g_pScope->iCount < 1); +} + +enum CXXScopeType cxxScopeGetType(void) +{ + if(g_pScope->iCount < 1) + return CXXScopeTypeNamespace; + return (enum CXXScopeType)g_pScope->pTail->uInternalScopeType; +} + +unsigned int cxxScopeGetVariableKind(void) +{ + switch(cxxScopeGetType()) + { + case CXXScopeTypeClass: + case CXXScopeTypeUnion: + case CXXScopeTypeStruct: + return CXXTagKindMEMBER; + break; + case CXXScopeTypeFunction: + return CXXTagKindLOCAL; + break; + //case CXXScopeTypePrototype: + //case CXXScopeTypeNamespace: + //case CXXScopeTypeEnum: + default: + // fall down + break; + } + return CXXTagKindVARIABLE; +} + + +unsigned int cxxScopeGetKind(void) +{ + CXX_DEBUG_ASSERT(g_pScope->iCount >= 0,"Must not be called in global scope"); + + switch(g_pScope->pTail->uInternalScopeType) + { + case CXXScopeTypeNamespace: + CXX_DEBUG_ASSERT(cxxParserCurrentLanguageIsCPP(),"C++ only"); + return CXXTagCPPKindNAMESPACE; + case CXXScopeTypeClass: + CXX_DEBUG_ASSERT(cxxParserCurrentLanguageIsCPP(),"C++ only"); + return CXXTagCPPKindCLASS; + case CXXScopeTypeEnum: + return CXXTagKindENUM; + case CXXScopeTypeFunction: + return CXXTagKindFUNCTION; + case CXXScopeTypePrototype: + return CXXTagKindPROTOTYPE; + case CXXScopeTypeStruct: + return CXXTagKindSTRUCT; + case CXXScopeTypeUnion: + return CXXTagKindUNION; + case CXXScopeTypeVariable: + return CXXTagKindVARIABLE; + case CXXScopeTypeTypedef: + return CXXTagKindTYPEDEF; + default: + CXX_DEBUG_ASSERT(false,"Unhandled scope type!"); + break; + } + + return CXXTagKindFUNCTION; +} + + +enum CXXScopeAccess cxxScopeGetAccess(void) +{ + if(g_pScope->iCount < 1) + return CXXScopeAccessUnknown; + return (enum CXXScopeAccess)g_pScope->pTail->uInternalScopeAccess; +} + +const char * cxxScopeGetName(void) +{ + if(g_pScope->iCount < 1) + return NULL; + return vStringValue(g_pScope->pTail->pszWord); +} + +int cxxScopeGetSize(void) +{ + return g_pScope->iCount; +} + +const char * cxxScopeGetFullName(void) +{ + if(!g_bScopeNameDirty) + return g_szScopeName ? g_szScopeName->buffer : NULL; + + if(g_pScope->iCount < 1) + { + g_bScopeNameDirty = false; + return NULL; + } + + if(g_szScopeName) + vStringClear(g_szScopeName); + else + g_szScopeName = vStringNew(); + + cxxTokenChainJoinInString( + g_pScope, + g_szScopeName, + "::", + CXXTokenChainJoinNoTrailingSpaces + ); + + g_bScopeNameDirty = false; + return g_szScopeName->buffer; +} + +vString * cxxScopeGetFullNameAsString(void) +{ + vString * ret; + + if(!g_bScopeNameDirty) + { + ret = g_szScopeName; + g_szScopeName = NULL; + g_bScopeNameDirty = true; + return ret; + } + + if(g_pScope->iCount < 1) + return NULL; + + if(g_szScopeName) + vStringClear(g_szScopeName); + else + g_szScopeName = vStringNew(); + + cxxTokenChainJoinInString( + g_pScope, + g_szScopeName, + "::", + CXXTokenChainJoinNoTrailingSpaces + ); + + ret = g_szScopeName; + g_szScopeName = NULL; + return ret; +} + +vString * cxxScopeGetFullNameExceptLastComponentAsString(void) +{ + if(g_pScope->iCount < 2) + return NULL; + + return cxxTokenChainJoinRange( + g_pScope->pHead, + g_pScope->pTail->pPrev, + "::", + CXXTokenChainJoinNoTrailingSpaces + ); +} + + +void cxxScopeSetAccess(enum CXXScopeAccess eAccess) +{ + if(g_pScope->iCount < 1) + return; // warning? + g_pScope->pTail->uInternalScopeAccess = (unsigned char)eAccess; +} + +void cxxScopePushTop(CXXToken * t) +{ + CXX_DEBUG_ASSERT( + t->eType == CXXTokenTypeIdentifier, + "The scope name must be an identifier" + ); + CXX_DEBUG_ASSERT( + t->pszWord, + "The scope name should have a text" + ); + + cxxTokenChainAppend(g_pScope,t); + g_bScopeNameDirty = true; + +#ifdef CXX_DO_DEBUGGING + const char * szScopeName = cxxScopeGetFullName(); + + CXX_DEBUG_PRINT("Pushed scope: '%s'",szScopeName ? szScopeName : ""); +#endif +} + +CXXToken * cxxScopeTakeTop(void) +{ + CXX_DEBUG_ASSERT( + g_pScope->iCount > 0, + "When popping as scope there must be a scope to pop" + ); + + CXXToken * t = cxxTokenChainTakeLast(g_pScope); + g_bScopeNameDirty = true; + +#ifdef CXX_DO_DEBUGGING + const char * szScopeName = cxxScopeGetFullName(); + + CXX_DEBUG_PRINT("Popped scope: '%s'",szScopeName ? szScopeName : ""); +#endif + return t; +} + +void cxxScopePush( + CXXToken * t, + enum CXXScopeType eScopeType, + enum CXXScopeAccess eInitialAccess + ) +{ + t->uInternalScopeType = (unsigned char)eScopeType; + t->uInternalScopeAccess = (unsigned char)eInitialAccess; + cxxScopePushTop(t); +} + +void cxxScopePop(void) +{ + cxxTokenDestroy(cxxScopeTakeTop()); +} diff --git a/ctags/parsers/cxx/cxx_scope.h b/ctags/parsers/cxx/cxx_scope.h new file mode 100644 index 0000000000..dc7cddd20b --- /dev/null +++ b/ctags/parsers/cxx/cxx_scope.h @@ -0,0 +1,86 @@ +#ifndef ctags_cxx_scope_h_ +#define ctags_cxx_scope_h_ +/* +* Copyright (c) 2016, Szymon Tomasz Stefanek +* +* This source code is released for free distribution under the terms of the +* GNU General Public License version 2 or (at your option) any later version. +* +* This module contains functions for parsing and scanning C++ source files +*/ + +#include "general.h" + +#include "cxx_token.h" + +enum CXXScopeAccess +{ + CXXScopeAccessUnknown, + CXXScopeAccessPublic, + CXXScopeAccessPrivate, + CXXScopeAccessProtected +}; + +enum CXXScopeType +{ + CXXScopeTypeFunction, + CXXScopeTypeNamespace, + CXXScopeTypeClass, + CXXScopeTypeEnum, + CXXScopeTypeUnion, + CXXScopeTypeStruct, + CXXScopeTypeVariable, // template variables, mainly + CXXScopeTypePrototype, + CXXScopeTypeTypedef, // template variables used in "using A = B" + CXXScopeTypeLAST +}; + +void cxxScopeInit(void); +void cxxScopeDone(void); +void cxxScopeClear(void); + +// Returns the full current scope name or NULL if there +// is no scope currently. +const char * cxxScopeGetFullName(void); + +// Returns the current scope name of NULL if there is no +// scope currently. This name does not include namespaces so +// it is always a single identifier. +const char * cxxScopeGetName(void); + +// Return the number of components of the scope name. +int cxxScopeGetSize(void); + +// Returns the current scope name or NULL if there is no scope +// currently. Ownership of the string is transferred. +vString * cxxScopeGetFullNameAsString(void); + +// Returns the current scope name with the exception of the +// last component or NULL if there is either no scope or there +// are less than two components. Ownership of the string is transferred. +vString * cxxScopeGetFullNameExceptLastComponentAsString(void); + +enum CXXScopeType cxxScopeGetType(void); +// Returns the current scope kind +unsigned int cxxScopeGetKind(void); +unsigned int cxxScopeGetVariableKind(void); +enum CXXScopeAccess cxxScopeGetAccess(void); +// Are we in global scope? +bool cxxScopeIsGlobal(void); + +// Add a token to the scope chain. The token ownership is transferred. +void cxxScopePush( + CXXToken * t, + enum CXXScopeType eScopeType, + enum CXXScopeAccess eInitialAccess + ); +void cxxScopeSetAccess(enum CXXScopeAccess eAccess); +// Remove the last token from the scope chain +void cxxScopePop(void); + +// Special management: pop one scope level but keep it so it can be pushed back +CXXToken * cxxScopeTakeTop(void); +// Special management: push back a scope taken earlier via cxxScopeTakeTop() +void cxxScopePushTop(CXXToken * t); + +#endif //!ctags_cxx_scope_h_ diff --git a/ctags/parsers/cxx/cxx_subparser.c b/ctags/parsers/cxx/cxx_subparser.c new file mode 100644 index 0000000000..8d65d9d620 --- /dev/null +++ b/ctags/parsers/cxx/cxx_subparser.c @@ -0,0 +1,125 @@ +/* +* Copyright (c) 2017, Red Hat, Inc. +* Copyright (c) 2017, Masatake YAMATO +* +* This source code is released for free distribution under the terms of the +* GNU General Public License version 2 or (at your option) any later version. +* +* This module contains functions for parsing and scanning C++ source files +*/ + +#include "general.h" + +#include "cxx_subparser_internal.h" +#include "cxx_token_chain.h" + + +bool cxxSubparserNotifyParseAccessSpecifier (ptrArray *pSubparsers) +{ + bool bR = false; + subparser *pSubparser; + + foreachSubparser (pSubparser, false) + { + cxxSubparser *pS = (cxxSubparser *)pSubparser; + if (pS->parseAccessSpecifierNotify) + { + enterSubparser(pSubparser); + if (pS->parseAccessSpecifierNotify (pS)) + { + ptrArrayAdd(pSubparsers, pS); + bR = true; + } + leaveSubparser(); + } + } + return bR; +} + +void cxxSubparserNotifyfoundExtraIdentifierAsAccessSpecifier(ptrArray *pSubparsers, + CXXToken *pToken) +{ + unsigned int c = ptrArrayCount(pSubparsers); + for (unsigned int i = 0; i < c; i++) + { + cxxSubparser *pS = ptrArrayItem (pSubparsers, i); + if (pS->foundExtraIdentifierAsAccessSpecifier) + { + enterSubparser((subparser*)pS); + pS->foundExtraIdentifierAsAccessSpecifier(pS, pToken); + leaveSubparser(); + } + } +} + +bool cxxSubparserNewIdentifierAsHeadOfMemberNotify(CXXToken *pToken) +{ + subparser *pSubparser; + bool handled = false; + + foreachSubparser (pSubparser, false) + { + cxxSubparser *pS = (cxxSubparser *)pSubparser; + if (pS->newIdentifierAsHeadOfMemberNotify) + { + enterSubparser(pSubparser); + if (pS->newIdentifierAsHeadOfMemberNotify (pS, pToken)) + handled = true; + leaveSubparser(); + if (handled) + break; + } + } + return handled; +} + +void cxxSubparserUnknownIdentifierInClassNotify(CXXToken *pToken) +{ + subparser *pSubparser; + bool handled = false; + + foreachSubparser (pSubparser, false) + { + cxxSubparser *pS = (cxxSubparser *)pSubparser; + if (pS->unknownIdentifierInClassNotify) + { + enterSubparser(pSubparser); + if (pS->unknownIdentifierInClassNotify (pS, pToken)) + handled = true; + leaveSubparser(); + if (handled) + break; + } + + } +} + +void cxxSubparserNotifyEnterBlock (void) +{ + subparser *pSubparser; + foreachSubparser (pSubparser, false) + { + cxxSubparser *pS = (cxxSubparser *)pSubparser; + if (pS->enterBlockNotify) + { + enterSubparser(pSubparser); + pS->enterBlockNotify (pS); + leaveSubparser(); + } + } +} + +void cxxSubparserNotifyLeaveBlock (void) +{ + subparser *pSubparser; + foreachSubparser (pSubparser, false) + { + cxxSubparser *pS = (cxxSubparser *)pSubparser; + if (pS->leaveBlockNotify) + { + enterSubparser(pSubparser); + pS->leaveBlockNotify (pS); + leaveSubparser(); + } + } +} diff --git a/ctags/parsers/cxx/cxx_subparser.h b/ctags/parsers/cxx/cxx_subparser.h new file mode 100644 index 0000000000..d97ec6c094 --- /dev/null +++ b/ctags/parsers/cxx/cxx_subparser.h @@ -0,0 +1,45 @@ +#ifndef ctags_cxx_subparser_h_ +#define ctags_cxx_subparser_h_ +/* + * Copyright (c) 2017, Red Hat, Inc. +* Copyright (c) 2017, Masatake YAMATO +* +* This source code is released for free distribution under the terms of the +* GNU General Public License version 2 or (at your option) any later version. +* +* This module contains functions for parsing and scanning C++ source files +*/ + +#include "general.h" + +#include "subparser.h" + +#include "cxx_token.h" + + +typedef struct sCxxSubparser cxxSubparser; +struct sCxxSubparser { + subparser subparser; + + void (* enterBlockNotify) (struct sCxxSubparser *pSubparser); + void (* leaveBlockNotify) (struct sCxxSubparser *pSubparser); + + /* Return true if the base parser should delete the token. */ + bool (* newIdentifierAsHeadOfMemberNotify) (struct sCxxSubparser *pSubparser, + CXXToken * pToken); + + /* Return true if the subparser consumes the token and the base + parser should not call the other subparsers. */ + bool (* unknownIdentifierInClassNotify) (struct sCxxSubparser *pSubparser, + CXXToken * pToken); + + /* Return true from parseAccessSpecifierNotify () if a subparser + has an interest in extra identifier in place where an access + specifier is written. The token holding the extra identifier + has passed via foundExtraIdentifierAsAccessSpecifier method. */ + bool (* parseAccessSpecifierNotify) (struct sCxxSubparser *pSubparser); + void (* foundExtraIdentifierAsAccessSpecifier) (struct sCxxSubparser *pSubparser, + CXXToken * pToken); +}; + +#endif //!ctags_cxx_subparser_h_ diff --git a/ctags/parsers/cxx/cxx_subparser_internal.h b/ctags/parsers/cxx/cxx_subparser_internal.h new file mode 100644 index 0000000000..58b90f9a10 --- /dev/null +++ b/ctags/parsers/cxx/cxx_subparser_internal.h @@ -0,0 +1,27 @@ +#ifndef ctags_cxx_subparser_interanl_h_ +#define ctags_cxx_subparser_interanl_h_ +/* +* Copyright (c) 2017, Red Hat, Inc. +* Copyright (c) 2017, Masatake YAMATO +* +* This source code is released for free distribution under the terms of the +* GNU General Public License version 2 or (at your option) any later version. +* +* This module contains functions for parsing and scanning C++ source files +*/ + +#include "general.h" + +#include "cxx_subparser.h" +#include "ptrarray.h" + +bool cxxSubparserNotifyParseAccessSpecifier (ptrArray *pSubparsers); +void cxxSubparserNotifyfoundExtraIdentifierAsAccessSpecifier(ptrArray *pSubparsers, + CXXToken *pToken); + +bool cxxSubparserNewIdentifierAsHeadOfMemberNotify(CXXToken *pToken); +void cxxSubparserUnknownIdentifierInClassNotify(CXXToken *pToken); +void cxxSubparserNotifyEnterBlock (void); +void cxxSubparserNotifyLeaveBlock (void); + +#endif //!ctags_cxx_subparser_interanl_h_ diff --git a/ctags/parsers/cxx/cxx_tag.c b/ctags/parsers/cxx/cxx_tag.c new file mode 100644 index 0000000000..c579408f6f --- /dev/null +++ b/ctags/parsers/cxx/cxx_tag.c @@ -0,0 +1,700 @@ +/* +* Copyright (c) 2016, Szymon Tomasz Stefanek +* +* This source code is released for free distribution under the terms of the +* GNU General Public License version 2 or (at your option) any later version. +* +* This module contains functions for parsing and scanning C++ source files +*/ +#include "cxx_tag.h" + +#include "cxx_scope.h" +#include "cxx_debug.h" +#include "cxx_token_chain.h" +#include "cxx_parser_internal.h" + +#include "entry.h" +#include "../cpreprocessor.h" +#include "routines.h" +#include "trashbox.h" +#include "xtag.h" + +#define CXX_COMMON_MACRO_ROLES(__langPrefix) \ + static roleDefinition __langPrefix##MacroRoles [] = { \ + RoleTemplateUndef, \ + } + +CXX_COMMON_MACRO_ROLES(C); +CXX_COMMON_MACRO_ROLES(CXX); +CXX_COMMON_MACRO_ROLES(CUDA); + +#define CXX_COMMON_HEADER_ROLES(__langPrefix) \ + static roleDefinition __langPrefix##HeaderRoles [] = { \ + RoleTemplateSystem, \ + RoleTemplateLocal, \ + } + +CXX_COMMON_HEADER_ROLES(C); +CXX_COMMON_HEADER_ROLES(CXX); +CXX_COMMON_HEADER_ROLES(CUDA); + + +#define CXX_COMMON_KINDS(_langPrefix, _szMemberDescription, _syncWith) \ + { true, 'd', "macro", "macro definitions", \ + .referenceOnly = false, ATTACH_ROLES(_langPrefix##MacroRoles), .syncWith = _syncWith \ + }, \ + { true, 'e', "enumerator", "enumerators (values inside an enumeration)", .syncWith = _syncWith }, \ + { true, 'f', "function", "function definitions", .syncWith = _syncWith }, \ + { true, 'g', "enum", "enumeration names", .syncWith = _syncWith }, \ + { true, 'h', "header", "included header files", \ + .referenceOnly = true, ATTACH_ROLES(_langPrefix##HeaderRoles), .syncWith = _syncWith \ + }, \ + { false, 'l', "local", "local variables", .syncWith = _syncWith }, \ + { true, 'm', "member", _szMemberDescription, .syncWith = _syncWith }, \ + { false, 'p', "prototype", "function prototypes", .syncWith = _syncWith }, \ + { true, 's', "struct", "structure names", .syncWith = _syncWith }, \ + { true, 't', "typedef", "typedefs", .syncWith = _syncWith }, \ + { true, 'u', "union", "union names", .syncWith = _syncWith }, \ + { true, 'v', "variable", "variable definitions", .syncWith = _syncWith }, \ + { false, 'x', "externvar", "external and forward variable declarations", .syncWith = _syncWith }, \ + { false, 'z', "parameter", "function parameters inside function or prototype definitions", .syncWith = _syncWith }, \ + { false, 'L', "label", "goto labels", .syncWith = _syncWith }, \ + { false, 'D', "macroparam", "parameters inside macro definitions", .syncWith = _syncWith } + +static kindDefinition g_aCXXCKinds [] = { + /* All other than LANG_AUTO are ignored. + LANG_IGNORE is specified as a just placeholder for the macro, + and is not needed. */ + CXX_COMMON_KINDS(C,"struct, and union members", LANG_IGNORE) +}; + +static kindDefinition g_aCXXCPPKinds [] = { + CXX_COMMON_KINDS(CXX,"class, struct, and union members", LANG_AUTO), + { true, 'c', "class", "classes" }, + { true, 'n', "namespace", "namespaces" }, + { false, 'A', "alias", "namespace aliases" }, + { false, 'N', "name", "names imported via using scope::symbol" }, + { false, 'U', "using", "using namespace statements", + .referenceOnly = true }, + { false, 'Z', "tparam", "template parameters" }, +}; + +static kindDefinition g_aCXXCUDAKinds [] = { + CXX_COMMON_KINDS(CUDA,"struct, and union members", LANG_IGNORE) +}; + +static const char * g_aCXXAccessStrings [] = { + NULL, + "public", + "private", + "protected", +}; + +#define CXX_COMMON_FIELDS \ + { \ + .name = "properties", \ + .description = "properties (static, inline, mutable,...)", \ + .enabled = false \ + }, { \ + .name = "macrodef", \ + .description = "macro definition", \ + .enabled = false \ + } + +static fieldDefinition g_aCXXCFields [] = { + CXX_COMMON_FIELDS +}; + +static fieldDefinition g_aCXXCPPFields [] = { + CXX_COMMON_FIELDS, + { + .name = "template", + .description = "template parameters", + .enabled = false, + }, + { + .name = "captures", + .description = "lambda capture list", + .enabled = false + }, + { + .name = "name", + .description = "aliased names", + .enabled = true + }, + { + .name = "specialization", + .description = "template specialization parameters", + .enabled = false, + }, +}; + +static fieldDefinition g_aCXXCUDAFields [] = { + CXX_COMMON_FIELDS +}; + +void cxxTagInitForLanguage(langType eLangType) +{ + g_cxx.eLangType = eLangType; + + if(g_cxx.eLangType == g_cxx.eCLangType) + { + g_cxx.eLanguage = CXXLanguageC; + g_cxx.pKindDefinitions = g_aCXXCKinds; + g_cxx.uKindDefinitionCount = sizeof(g_aCXXCKinds) / sizeof(kindDefinition); + g_cxx.pFieldOptions = g_aCXXCFields; + g_cxx.uFieldOptionCount = sizeof(g_aCXXCFields) / sizeof(fieldDefinition); + } else if(g_cxx.eLangType == g_cxx.eCPPLangType) + { + g_cxx.eLanguage = CXXLanguageCPP; + g_cxx.pKindDefinitions = g_aCXXCPPKinds; + g_cxx.uKindDefinitionCount = sizeof(g_aCXXCPPKinds) / sizeof(kindDefinition); + g_cxx.pFieldOptions = g_aCXXCPPFields; + g_cxx.uFieldOptionCount = sizeof(g_aCXXCPPFields) / sizeof(fieldDefinition); + } else if(g_cxx.eLangType == g_cxx.eCUDALangType) + { + g_cxx.eLanguage = CXXLanguageCUDA; + g_cxx.pKindDefinitions = g_aCXXCUDAKinds; + g_cxx.uKindDefinitionCount = sizeof(g_aCXXCUDAKinds) / sizeof(kindDefinition); + g_cxx.pFieldOptions = g_aCXXCUDAFields; + g_cxx.uFieldOptionCount = sizeof(g_aCXXCUDAFields) / sizeof(fieldDefinition); + } else { + CXX_DEBUG_ASSERT(false,"Invalid language passed to cxxTagInitForLanguage()"); + } +} + +kindDefinition * cxxTagGetCKindDefinitions(void) +{ + return g_aCXXCKinds; +} + +int cxxTagGetCKindDefinitionCount(void) +{ + return sizeof(g_aCXXCKinds) / sizeof(kindDefinition); +} + +kindDefinition * cxxTagGetCUDAKindDefinitions(void) +{ + return g_aCXXCUDAKinds; +} + +int cxxTagGetCUDAKindDefinitionCount(void) +{ + return sizeof(g_aCXXCUDAKinds) / sizeof(kindDefinition); +} + +kindDefinition * cxxTagGetCPPKindDefinitions(void) +{ + return g_aCXXCPPKinds; +} + +int cxxTagGetCPPKindDefinitionCount(void) +{ + return sizeof(g_aCXXCPPKinds) / sizeof(kindDefinition); +} + +bool cxxTagKindEnabled(unsigned int uKind) +{ + CXX_DEBUG_ASSERT( + uKind < g_cxx.uKindDefinitionCount, + "The kind must be associated to the current language!" + ); + return g_cxx.pKindDefinitions[uKind].enabled; +} + +fieldDefinition * cxxTagGetCPPFieldDefinitionifiers(void) +{ + return g_aCXXCPPFields; +} + +int cxxTagGetCPPFieldDefinitionifierCount(void) +{ + return sizeof(g_aCXXCPPFields) / sizeof(fieldDefinition); +} + +fieldDefinition * cxxTagGetCUDAFieldDefinitionifiers(void) +{ + return g_aCXXCUDAFields; +} + +int cxxTagGetCUDAFieldDefinitionifierCount(void) +{ + return sizeof(g_aCXXCUDAFields) / sizeof(fieldDefinition); +} + +fieldDefinition * cxxTagGetCFieldDefinitionifiers(void) +{ + return g_aCXXCFields; +} + +int cxxTagGetCFieldDefinitionifierCount(void) +{ + return sizeof(g_aCXXCFields) / sizeof(fieldDefinition); +} + +bool cxxTagFieldEnabled(unsigned int uField) +{ + CXX_DEBUG_ASSERT( + uField < g_cxx.uFieldOptionCount, + "The field must be associated to the current language!" + ); + return g_cxx.pFieldOptions[uField].enabled; +} + + +static tagEntryInfo g_oCXXTag; + + +tagEntryInfo * cxxTagBegin(unsigned int uKind,CXXToken * pToken) +{ + kindDefinition * pKindDefinitions = g_cxx.pKindDefinitions; + + if(!pKindDefinitions[uKind].enabled) + { + //CXX_DEBUG_PRINT("Tag kind %s is not enabled",g_aCXXKinds[eKind].name); + return NULL; + } + + initTagEntry( + &g_oCXXTag, + vStringValue(pToken->pszWord), + uKind + ); + + g_oCXXTag.lineNumber = pToken->iLineNumber; + g_oCXXTag.filePosition = pToken->oFilePosition; + g_oCXXTag.isFileScope = false; + + if(!cxxScopeIsGlobal()) + { + g_oCXXTag.extensionFields.scopeKindIndex = cxxScopeGetKind(); + g_oCXXTag.extensionFields.scopeName = cxxScopeGetFullName(); + } + + // FIXME: meaning of "is file scope" is quite debatable... + g_oCXXTag.extensionFields.access = g_aCXXAccessStrings[cxxScopeGetAccess()]; + + return &g_oCXXTag; +} + +vString * cxxTagSetProperties(unsigned int uProperties) +{ + if(uProperties == 0) + return NULL; + + if(!cxxTagFieldEnabled(CXXTagFieldProperties)) + return NULL; + + vString * pszProperties = vStringNew(); + + bool bFirst = true; + +#define ADD_PROPERTY(_szProperty) \ + do { \ + if(bFirst) \ + bFirst = false; \ + else \ + vStringPut(pszProperties,','); \ + vStringCatS(pszProperties,_szProperty); \ + } while(0) + + if(uProperties & CXXTagPropertyConst) + ADD_PROPERTY("const"); + if(uProperties & CXXTagPropertyDefault) + ADD_PROPERTY("default"); + if(uProperties & CXXTagPropertyDelete) + ADD_PROPERTY("delete"); + if(uProperties & CXXTagPropertyExplicit) + ADD_PROPERTY("explicit"); + if(uProperties & CXXTagPropertyExtern) + ADD_PROPERTY("extern"); + if(uProperties & CXXTagPropertyFinal) + ADD_PROPERTY("final"); + if(uProperties & CXXTagPropertyInline) + ADD_PROPERTY("inline"); + if(uProperties & CXXTagPropertyMutable) + ADD_PROPERTY("mutable"); + if(uProperties & CXXTagPropertyOverride) + ADD_PROPERTY("override"); + if(uProperties & CXXTagPropertyPure) + ADD_PROPERTY("pure"); + if(uProperties & CXXTagPropertyScopeTemplateSpecialization) + ADD_PROPERTY("scopespecialization"); + if(uProperties & CXXTagPropertyStatic) + ADD_PROPERTY("static"); + if(uProperties & CXXTagPropertyTemplateSpecialization) + ADD_PROPERTY("specialization"); + if(uProperties & CXXTagPropertyVirtual) + ADD_PROPERTY("virtual"); + if(uProperties & CXXTagPropertyVolatile) + ADD_PROPERTY("volatile"); + if(uProperties & CXXTagPropertyDeprecated) + ADD_PROPERTY("deprecated"); + if(uProperties & CXXTagPropertyScopedEnum) + ADD_PROPERTY("scopedenum"); + if(uProperties & CXXTagPropertyFunctionTryBlock) + ADD_PROPERTY("fntryblock"); + + cxxTagSetField(CXXTagFieldProperties,vStringValue(pszProperties),false); + + return pszProperties; +} + +static bool cxxTagCheckTypeField( + CXXToken * pTypeStart, + CXXToken * pTypeEnd + ) +{ + CXX_DEBUG_ENTER(); + if(!pTypeStart || !pTypeEnd) + { + CXX_DEBUG_LEAVE_TEXT("One of the pointers is NULL"); + return false; + } + + int iTotalCount = 0; + int iParenthesisCount = 0; + int iIdentifierOrKeywordCount = 0; + int iConsecutiveIdentifiers = 0; + + while(pTypeStart) + { + iTotalCount++; + if(iTotalCount > 30) + { + CXX_DEBUG_LEAVE_TEXT("The chain is really too long to be a type name"); + return false; + } + + if(cxxTokenTypeIs(pTypeStart,CXXTokenTypeIdentifier)) + { + iConsecutiveIdentifiers++; + iIdentifierOrKeywordCount++; + if(iConsecutiveIdentifiers > 4) + { + // Probably many macros inside. Too many. + CXX_DEBUG_LEAVE_TEXT("Too many consecutive identifiers for a type name"); + return false; + } + } else { + iConsecutiveIdentifiers = 0; + + if(cxxTokenTypeIs(pTypeStart,CXXTokenTypeParenthesisChain)) + { + iParenthesisCount++; + if(iParenthesisCount > 3) + { + CXX_DEBUG_LEAVE_TEXT("Too many non-nested parentheses for a type name"); + return false; + } + + if( + (iTotalCount > 1) && + cxxTokenTypeIs(pTypeStart->pPrev,CXXTokenTypeIdentifier) && + pTypeStart != pTypeEnd && + pTypeStart->pNext && + cxxTokenTypeIs(pTypeStart->pNext,CXXTokenTypeIdentifier) + ) + { + // identifier () identifier + // Looks suspicious, might be macros gathered by mistake + CXX_DEBUG_LEAVE_TEXT("Identifier-parenthesis-identifier pattern: looks suspicious"); + return false; + } + } else if(cxxTokenTypeIs(pTypeStart,CXXTokenTypeKeyword)) + { + iIdentifierOrKeywordCount++; + } + } + + if(pTypeStart == pTypeEnd) + break; + + pTypeStart = pTypeStart->pNext; + } + + if(iIdentifierOrKeywordCount < 1) + { + CXX_DEBUG_LEAVE_TEXT("Type does not seem to contains identifiers or keywords, can't be a type name"); + return false; + } + + if(!pTypeStart) + { + CXX_DEBUG_LEAVE_TEXT("Type tokens do not belong to the same chain!"); + return false; + } + + CXX_DEBUG_LEAVE(); + return true; +} + +CXXToken * cxxTagCheckAndSetTypeField( + CXXToken * pTypeStart, + CXXToken * pTypeEnd + ) +{ + CXX_DEBUG_ASSERT(pTypeStart,"Non null type start is expected here"); + CXX_DEBUG_ASSERT(pTypeEnd,"Non null type end is expected here"); + + const char * szTypeRef0; + + // "typename" is debatable since it's not really + // allowed by C++ for unqualified types. However I haven't been able + // to come up with something better... so "typename" it is for now. + + // FIXME: The typeRef forma with two fields should be dropped. + // It has been created with specific use cases in mind + // and we are pushing it way beyond them. + // We should have a plain "type" field instead. + + static const char * szTypename = "typename"; + static const char * szMeta = "meta"; // for type template arguments + + // Filter out initial keywords that need to be excluded from typenames + for(;;) + { + if(!cxxTokenTypeIs(pTypeStart,CXXTokenTypeKeyword)) + break; + if(!cxxKeywordExcludeFromTypeNames(pTypeStart->eKeyword)) + break; + // must be excluded + if(pTypeStart == pTypeEnd) + { + CXX_DEBUG_PRINT("Type name composed only of ignored keywords"); + return NULL; // only excluded keywords + } + pTypeStart = pTypeStart->pNext; + } + + if(pTypeStart != pTypeEnd) + { + // Note that this does not work for types like "const enum X" + // But that's not backward compatible anyway, so we live with it. + if( + cxxTokenTypeIs(pTypeStart,CXXTokenTypeKeyword) && + cxxKeywordIsTypeRefMarker(pTypeStart->eKeyword) + ) + { + szTypeRef0 = cxxKeywordName(pTypeStart->eKeyword); + pTypeStart = pTypeStart->pNext; + } else { + szTypeRef0 = szTypename; + } + } else { + if( + cxxTokenTypeIs(pTypeStart,CXXTokenTypeKeyword) && + cxxKeywordIsTypeRefMarker(pTypeStart->eKeyword) + ) + { + // A lone "typename", "class", "struct" or similar. + // This almost certainly comes from a template. + szTypeRef0 = szMeta; + } else { + szTypeRef0 = szTypename; + } + } + + if(!cxxTagCheckTypeField(pTypeStart,pTypeEnd)) + { + CXX_DEBUG_PRINT("Type name looks suspicious: refusing to emit it"); + return NULL; + } + + cxxTokenChainNormalizeTypeNameSpacingInRange(pTypeStart,pTypeEnd); + CXXToken * pTypeName = cxxTokenChainExtractRangeFilterTypeName(pTypeStart,pTypeEnd); + + if(!pTypeName) + { + CXX_DEBUG_PRINT("Can't extract type name"); + return NULL; + } + + CXX_DEBUG_PRINT("Type name is '%s'",vStringValue(pTypeName->pszWord)); + + g_oCXXTag.extensionFields.typeRef[0] = szTypeRef0; + g_oCXXTag.extensionFields.typeRef[1] = vStringValue(pTypeName->pszWord); + + return pTypeName; +} + +void cxxTagSetField(unsigned int uField,const char * szValue,bool bCopyValue) +{ + CXX_DEBUG_ASSERT( + uField < g_cxx.uFieldOptionCount, + "The field must be associated to the current language!" + ); + + if(!g_cxx.pFieldOptions[uField].enabled) + return; + + /* If we make a copy for the value, the copy must be freed after + * calling cxxTagCommit() for g_oCXXTag. The parser trash box + * allows us to delay freeing the copy. */ + attachParserField(&g_oCXXTag,false,g_cxx.pFieldOptions[uField].ftype, + bCopyValue?parserTrashBoxPut(eStrdup(szValue),eFree):szValue); +} + +void cxxTagSetCorkQueueField( + int iIndex, + unsigned int uField, + const char * szValue + ) +{ + CXX_DEBUG_ASSERT( + uField < g_cxx.uFieldOptionCount, + "The field must be associated to the current language!" + ); + + CXX_DEBUG_ASSERT(g_cxx.pFieldOptions[uField].enabled,"The field must be enabled!"); + + attachParserFieldToCorkEntry(iIndex,g_cxx.pFieldOptions[uField].ftype,szValue); +} + +void cxxTagHandleTemplateFields() +{ + CXX_DEBUG_ASSERT( + g_cxx.pTemplateTokenChain && + (g_cxx.pTemplateTokenChain->iCount > 0) && + cxxParserCurrentLanguageIsCPP(), + "Template existence must be checked before calling this function" + ); + + if(cxxTagFieldEnabled(CXXTagCPPFieldTemplate)) + { + cxxTokenChainNormalizeTypeNameSpacing(g_cxx.pTemplateTokenChain); + + CXXToken * t = cxxTokenChainCondenseIntoToken(g_cxx.pTemplateTokenChain,0); + + if(t) + { + cxxTagSetField( + CXXTagCPPFieldTemplate, + vStringValue(t->pszWord), + true + ); + + cxxTokenDestroy(t); + } + } + + if( + g_cxx.pTemplateSpecializationTokenChain && + cxxTagFieldEnabled(CXXTagCPPFieldTemplateSpecialization) + ) + { + cxxTokenChainNormalizeTypeNameSpacing(g_cxx.pTemplateSpecializationTokenChain); + + CXXToken * tx = cxxTokenChainCondenseIntoToken(g_cxx.pTemplateSpecializationTokenChain,0); + + if(tx) + { + cxxTagSetField( + CXXTagCPPFieldTemplateSpecialization, + vStringValue(tx->pszWord), + true + ); + + cxxTokenDestroy(tx); + } + } + +} + +int cxxTagCommit(int *piCorkQueueIndexFQ) +{ + if(piCorkQueueIndexFQ) + *piCorkQueueIndexFQ = CORK_NIL; + + if(g_oCXXTag.isFileScope) + { + if(!isXtagEnabled(XTAG_FILE_SCOPE)) + return CORK_NIL; + + markTagExtraBit(&g_oCXXTag,XTAG_FILE_SCOPE); + } + +#ifdef CXX_DO_DEBUGGING + CXX_DEBUG_PRINT( + "Emitting tag for symbol '%s', kind '%s', line %d", + g_oCXXTag.name, + getLanguageKindName(g_oCXXTag.langType, g_oCXXTag.kindIndex), + g_oCXXTag.lineNumber + ); + if( + g_oCXXTag.extensionFields.typeRef[0] && + g_oCXXTag.extensionFields.typeRef[1] + ) + CXX_DEBUG_PRINT( + "Tag has typeref %s %s", + g_oCXXTag.extensionFields.typeRef[0], + g_oCXXTag.extensionFields.typeRef[1] + ); +#endif + + int iCorkQueueIndex = makeTagEntry(&g_oCXXTag); + + // Handle --extra=+q + if(!isXtagEnabled(XTAG_QUALIFIED_TAGS)) + return iCorkQueueIndex; + + markTagExtraBit(&g_oCXXTag,XTAG_QUALIFIED_TAGS); + + if(!g_oCXXTag.extensionFields.scopeName) + return iCorkQueueIndex; + + // WARNING: The following code assumes that the scope + // didn't change between cxxTagBegin() and cxxTagCommit(). + + enum CXXScopeType eScopeType = cxxScopeGetType(); + + if(eScopeType == CXXScopeTypeFunction || eScopeType == CXXScopeTypePrototype) + { + // old ctags didn't do this, and --extra=+q is mainly + // for backward compatibility so... + return iCorkQueueIndex; + } + + // Same tag. Only the name changes. + + vString * x; + + if(eScopeType == CXXScopeTypeEnum) + { + // If the scope kind is enumeration then we need to remove the + // last scope part. This is what old ctags did. + if(cxxScopeGetSize() < 2) + return -1; // toplevel enum + + x = cxxScopeGetFullNameExceptLastComponentAsString(); + CXX_DEBUG_ASSERT(x,"Scope with size >= 2 should have returned a value here"); + } else { + x = vStringNewInit(g_oCXXTag.extensionFields.scopeName); + } + + vStringCatS(x,"::"); + vStringCatS(x,g_oCXXTag.name); + + g_oCXXTag.name = vStringValue(x); + + CXX_DEBUG_PRINT( + "Emitting extra tag for symbol '%s', kind '%s', line %d", + g_oCXXTag.name, + getLanguageKindName(g_oCXXTag.langType, g_oCXXTag.kindIndex), + g_oCXXTag.lineNumber + ); + + int iCorkQueueIndexFQ = makeTagEntry(&g_oCXXTag); + if(piCorkQueueIndexFQ) + *piCorkQueueIndexFQ = iCorkQueueIndexFQ; + + vStringDelete(x); + + return iCorkQueueIndex; +} + +void cxxTag(unsigned int uKind,CXXToken * pToken) +{ + if(cxxTagBegin(uKind,pToken) != NULL) + cxxTagCommit(NULL); +} diff --git a/ctags/parsers/cxx/cxx_tag.h b/ctags/parsers/cxx/cxx_tag.h new file mode 100644 index 0000000000..78f89e91ef --- /dev/null +++ b/ctags/parsers/cxx/cxx_tag.h @@ -0,0 +1,202 @@ +#ifndef _cxxTag_h_ +#define _cxxTag_h_ +/* +* Copyright (c) 2016, Szymon Tomasz Stefanek +* +* This source code is released for free distribution under the terms of the +* GNU General Public License version 2 or (at your option) any later version. +* +* This module contains functions for parsing and scanning C++ source files +*/ + +#include "general.h" + +#include "kind.h" +#include "entry.h" + +#include "cxx_token.h" + +// Tag kinds common to all (sub)languages this parser supports +enum CXXTagCommonKind +{ + CXXTagKindMACRO, + CXXTagKindENUMERATOR, + CXXTagKindFUNCTION, + CXXTagKindENUM, + CXXTagKindINCLUDE, + CXXTagKindLOCAL, + CXXTagKindMEMBER, + CXXTagKindPROTOTYPE, + CXXTagKindSTRUCT, + CXXTagKindTYPEDEF, + CXXTagKindUNION, + CXXTagKindVARIABLE, + CXXTagKindEXTERNVAR, + CXXTagKindPARAMETER, + CXXTagKindLABEL, + CXXTagKindMACROPARAM, + + CXXTagCommonKindCount +}; + +// Tags specific to the CPP language. +enum CXXTagCPPKind +{ + CXXTagCPPKindCLASS = CXXTagCommonKindCount, + CXXTagCPPKindNAMESPACE, + CXXTagCPPKindALIAS, + CXXTagCPPKindNAME, + CXXTagCPPKindUSING, + CXXTagCPPKindTEMPLATEPARAM +}; + +// The fields common to all (sub)languages this parser supports. +enum CXXTagCommonField +{ + CXXTagFieldProperties, + CXXTagFieldMacrodef, + + CXXTagCommonFieldCount +}; + +// The fields specific to the CPP language. +enum CXXTagCPPField +{ + CXXTagCPPFieldTemplate = CXXTagCommonFieldCount, + CXXTagCPPFieldLambdaCaptureList, + CXXTagCPPFieldAliasedName, + CXXTagCPPFieldTemplateSpecialization +}; + + +fieldDefinition * cxxTagGetCPPFieldDefinitionifiers(void); +int cxxTagGetCPPFieldDefinitionifierCount(void); + +fieldDefinition * cxxTagGetCUDAFieldDefinitionifiers(void); +int cxxTagGetCUDAFieldDefinitionifierCount(void); + +fieldDefinition * cxxTagGetCFieldDefinitionifiers(void); +int cxxTagGetCFieldDefinitionifierCount(void); + +bool cxxTagFieldEnabled(unsigned int uField); + +kindDefinition * cxxTagGetCKindDefinitions(void); +int cxxTagGetCKindDefinitionCount(void); + +kindDefinition * cxxTagGetCUDAKindDefinitions(void); +int cxxTagGetCUDAKindDefinitionCount(void); + +kindDefinition * cxxTagGetCPPKindDefinitions(void); +int cxxTagGetCPPKindDefinitionCount(void); + +// Returns true if the specified tag kind is enabled in the current language +bool cxxTagKindEnabled(unsigned int uTagKind); + +// Begin composing a tag. The tag kind must correspond to the current language. +// Returns NULL if the tag should *not* be included in the output +// or the tag entry info that can be filled up with extension fields. +// Must be followed by cxxTagCommit() if it returns a non-NULL value. +// The pToken ownership is NOT transferred. +tagEntryInfo * cxxTagBegin(unsigned int uKind,CXXToken * pToken); + +// Set the type of the current tag from the specified token sequence +// (which must belong to the same chain!). +// Before setting the type this function will check that the specified +// range of tokens looks reasonable for a type name and if it looks +// suspicious will refuse to emit it. +// If the type is effectively set then the return value is a token that must +// be destroyed after cxxTagCommit() has been called. +CXXToken * cxxTagCheckAndSetTypeField( + CXXToken * pTypeStart, + CXXToken * pTypeEnd + ); + +typedef enum _CXXTagProperty +{ + // Function is virtual + CXXTagPropertyVirtual = 1, + // Function/variable is static + CXXTagPropertyStatic = (1 << 1), + // Function is inline + CXXTagPropertyInline = (1 << 2), + // Function is explicit + CXXTagPropertyExplicit = (1 << 3), + // Function/variable is extern + CXXTagPropertyExtern = (1 << 4), + // Function is const + CXXTagPropertyConst = (1 << 5), + // Function is pure virtual + CXXTagPropertyPure = (1 << 6), + // Function is marked as override + CXXTagPropertyOverride = (1 << 7), + // Function is marked as default + CXXTagPropertyDefault = (1 << 8), + // Function is marked as final + CXXTagPropertyFinal = (1 << 9), + // Function is marked as delete + CXXTagPropertyDelete = (1 << 10), + // Variable is marked as mutable + // (C++ treats "mutable" as storage class) + CXXTagPropertyMutable = (1 << 11), + // Function (note: NOT variable) is marked as volatile as in "int a() volatile" + // (Because for variables it's treated as part of type) + CXXTagPropertyVolatile = (1 << 12), + // Template specialization a() + CXXTagPropertyTemplateSpecialization = (1 << 13), + // Template specialization of scope a::b() (which implies TemplateSpec too) + CXXTagPropertyScopeTemplateSpecialization = (1 << 14), + // __attribute__((deprecated)) has been seen + CXXTagPropertyDeprecated = (1 << 15), + // scoped enum (C++11) + CXXTagPropertyScopedEnum = (1 << 16), + // function-try-block: int f() try { ... } catch { ... } + CXXTagPropertyFunctionTryBlock = (1 << 17) +} CXXTagProperty; + +// Set the modifiers field of the tag. +// Returns a string that you must destroy after the call to cxxTagCommit() +// or NULL if the modifiers weren't set for some reason (no modifiers, field +// not enabled or similar...) +vString * cxxTagSetProperties(unsigned int uProperties); + +// Set a parser-local field. +// If bCopyValue is set to false then szValue is not copied and it must +// persist in memory until cxxTagCommit() is called. +// If bCopyValue is set to true then szValue is copied and it can be +// safely destroyed before cxxTagCommit() is called. +// bCopyValue == false is faster: use it whenever possible. +void cxxTagSetField(unsigned int uField,const char * szValue,bool bCopyValue); + +// Set a parser-local CPP field for a tag in cork queue. +// The szValue pointer is copied. +// Make sure that the field is enabled before calling this function. +void cxxTagSetCorkQueueField( + int iIndex, + unsigned int uField, + const char * szValue + ); + +// Handle the template-related parts of the tag (class, function, variable) +void cxxTagHandleTemplateFields(); + +// Commit the composed tag. Must follow a successful cxxTagBegin() call. +// Returns the index of the tag in the cork queue. +int cxxTagCommit(int *piCorkQueueIndexFQ); + +// Same as cxxTagBegin() eventually followed by cxxTagCommit() +void cxxTag(unsigned int uKind,CXXToken * pToken); + +typedef enum { + CR_MACRO_UNDEF, +} cMacroRole; + +typedef enum { + CR_HEADER_SYSTEM, + CR_HEADER_LOCAL, +} cHeaderRole; + +// Initialize the parser state for the specified language. +// Must be called before attempting to access the kind options. +void cxxTagInitForLanguage(langType eLangType); + +#endif //!_cxxTag_h_ diff --git a/ctags/parsers/cxx/cxx_token.c b/ctags/parsers/cxx/cxx_token.c new file mode 100644 index 0000000000..547dc15073 --- /dev/null +++ b/ctags/parsers/cxx/cxx_token.c @@ -0,0 +1,185 @@ +/* +* Copyright (c) 2016, Szymon Tomasz Stefanek +* +* This source code is released for free distribution under the terms of the +* GNU General Public License version 2 or (at your option) any later version. +* +* This module contains functions for parsing and scanning C++ source files +*/ + +#include "cxx_token.h" + +#include "routines.h" +#include "vstring.h" +#include "read.h" +#include "objpool.h" + +#include "cxx_token_chain.h" +#include "cxx_debug.h" +#include "cxx_keyword.h" +#include "cxx_tag.h" + +#define CXX_TOKEN_POOL_MAXIMUM_SIZE 8192 + +static objPool * g_pTokenPool = NULL; + +void cxxTokenForceDestroy(CXXToken * t); + +static CXXToken *createToken(void *createArg CTAGS_ATTR_UNUSED) +{ + CXXToken *t = xMalloc(1, CXXToken); + // we almost always want a string, and since this token + // is being reused..well.. we always want it + t->pszWord = vStringNew(); + return t; +} + +static void deleteToken(CXXToken *token) +{ + vStringDelete(token->pszWord); + eFree(token); +} + +static void clearToken(CXXToken *t) +{ + CXX_DEBUG_ASSERT(t->pszWord,"The string shouldn't have been destroyed"); + + // this won't actually release memory (but we're taking care + // to do not create very large strings) + vStringClear(t->pszWord); + + t->bFollowedBySpace = false; + + t->pChain = NULL; + t->pNext = NULL; + t->pPrev = NULL; +} + +void cxxTokenAPIInit(void) +{ + g_pTokenPool = objPoolNew(CXX_TOKEN_POOL_MAXIMUM_SIZE, + (objPoolCreateFunc)createToken, (objPoolDeleteFunc)deleteToken, + (objPoolClearFunc)clearToken, + NULL); +} + +void cxxTokenAPINewFile(void) +{ + /* Stub */ +} + +void cxxTokenAPIDone(void) +{ + objPoolDelete (g_pTokenPool); +} + +CXXToken * cxxTokenCreate(void) +{ + return objPoolGet (g_pTokenPool); +} + +void cxxTokenDestroy(CXXToken * t) +{ + if(!t) + return; + + if(t->pChain) + { + cxxTokenChainDestroy(t->pChain); + t->pChain = NULL; + } + + objPoolPut (g_pTokenPool, t); +} + +void cxxTokenForceDestroy(CXXToken * t) +{ + if(!t) + return; + + if(t->pChain) + { + cxxTokenChainDestroy(t->pChain); + t->pChain = NULL; + } + + CXX_DEBUG_ASSERT(t->pszWord,"There should be a word here"); + + vStringDelete(t->pszWord); + + eFree(t); +} + +CXXToken * cxxTokenCopy(CXXToken * pToken) +{ + CXXToken * pRetToken = cxxTokenCreate(); + pRetToken->iLineNumber = pToken->iLineNumber; + pRetToken->oFilePosition = pToken->oFilePosition; + pRetToken->eType = pToken->eType; + pRetToken->eKeyword = pToken->eKeyword; + pToken->bFollowedBySpace = pToken->bFollowedBySpace; + vStringCat(pRetToken->pszWord,pToken->pszWord); + + return pRetToken; +} + +CXXToken * cxxTokenCreateKeyword(int iLineNumber,MIOPos oFilePosition,CXXKeyword eKeyword) +{ + CXXToken * pToken = cxxTokenCreate(); + pToken->iLineNumber = iLineNumber; + pToken->oFilePosition = oFilePosition; + pToken->eType = CXXTokenTypeKeyword; + pToken->eKeyword = eKeyword; + pToken->bFollowedBySpace = true; + vStringCatS(pToken->pszWord,cxxKeywordName(eKeyword)); + + return pToken; +} + + +CXXToken * cxxTokenCreateAnonymousIdentifier(unsigned int uTagKind) +{ + CXXToken * t = cxxTokenCreate(); + + anonGenerate (t->pszWord, "__anon", uTagKind); + t->eType = CXXTokenTypeIdentifier; + t->bFollowedBySpace = true; + t->iLineNumber = getInputLineNumber(); + t->oFilePosition = getInputFilePosition(); + + return t; +} + +void cxxTokenAppendToString(vString * s,CXXToken * t) +{ + switch(t->eType) + { + case CXXTokenTypeParenthesisChain: + case CXXTokenTypeSquareParenthesisChain: + case CXXTokenTypeBracketChain: + case CXXTokenTypeAngleBracketChain: + CXX_DEBUG_ASSERT(t->pChain,"This token should have a nested chain!"); + cxxTokenChainJoinInString(t->pChain,s,NULL,0); + break; + default: + vStringCat(s,t->pszWord); + break; + } +} + +void cxxTokenReduceBackward (CXXToken *pStart) +{ + enum CXXTokenType eSentinelType = pStart->eType >> 4; + CXXToken *pTmp = pStart->pPrev; + CXXToken *pReducingCandidate; + + while (pTmp && (!cxxTokenTypeIsOneOf (pTmp, eSentinelType))) + { + pReducingCandidate = pTmp; + pTmp = pTmp->pPrev; + pTmp->pNext = pReducingCandidate->pNext; + pReducingCandidate->pNext->pPrev = pTmp; + CXX_DEBUG_PRINT("reduce inner token: %p",pReducingCandidate); + cxxTokenDestroy (pReducingCandidate); + } +} diff --git a/ctags/parsers/cxx/cxx_token.h b/ctags/parsers/cxx/cxx_token.h new file mode 100644 index 0000000000..b320f09735 --- /dev/null +++ b/ctags/parsers/cxx/cxx_token.h @@ -0,0 +1,124 @@ +#ifndef ctags_cxx_token_h_ +#define ctags_cxx_token_h_ +/* +* Copyright (c) 2016, Szymon Tomasz Stefanek +* +* This source code is released for free distribution under the terms of the +* GNU General Public License version 2 or (at your option) any later version. +* +* This module contains functions for parsing and scanning C++ source files +*/ + +#include "general.h" +#include "vstring.h" + +#include "cxx_keyword.h" + +// We assume that the compiler is capable of generating 32 bit wide enums +// This is used as enumeration but also as mask in several functions. +// +// DON'T FORGET TO RUN misc/gencxxtypedumper.sh after updating the elements. +// +enum CXXTokenType +{ + CXXTokenTypeEOF = 1, + CXXTokenTypeIdentifier = (1 << 1), + CXXTokenTypeKeyword = (1 << 2), + CXXTokenTypeNumber = (1 << 3), + CXXTokenTypeSingleColon = (1 << 4), + CXXTokenTypeMultipleColons = (1 << 5), + CXXTokenTypeSemicolon = (1 << 6), + CXXTokenTypeComma = (1 << 7), // , + CXXTokenTypeAssignment = (1 << 8), // = + CXXTokenTypeOperator = (1 << 9), // != == += ++ -= -- / whatever + CXXTokenTypeUnknown = (1 << 10), + CXXTokenTypeDotOperator = (1 << 11), // . + CXXTokenTypePointerOperator = (1 << 12), // -> + CXXTokenTypeStringConstant = (1 << 13), + CXXTokenTypeStar = (1 << 14), // * + CXXTokenTypeAnd = (1 << 15), // & + CXXTokenTypeMultipleAnds = (1 << 16), // && + CXXTokenTypeCharacterConstant = (1 << 17), + CXXTokenTypeMultipleDots = (1 << 18), // ... + + // These must come in pairs. Note that the opening + // tokens can be shifted by 4 to get the matching closing + // tokens can be shifted by 8 to get the matching subchain marker below + CXXTokenTypeOpeningBracket = (1 << 19), // { + CXXTokenTypeOpeningParenthesis = (1 << 20), // ( + CXXTokenTypeOpeningSquareParenthesis = (1 << 21), // [ + CXXTokenTypeSmallerThanSign = (1 << 22), // < + + CXXTokenTypeClosingBracket = (1 << 23), // } + CXXTokenTypeClosingParenthesis = (1 << 24), // ) + CXXTokenTypeClosingSquareParenthesis = (1 << 25), // ] + CXXTokenTypeGreaterThanSign = (1 << 26), // > + + // Subchains (caution: read the comment above about CXXTokenTypeOpeningBracket) + CXXTokenTypeBracketChain = (1 << 27), // {...} + CXXTokenTypeParenthesisChain = (1 << 28), // (...) + CXXTokenTypeSquareParenthesisChain = (1 << 29), // [...] + CXXTokenTypeAngleBracketChain = (1 << 30), // <...> +}; + +// Forward decl +typedef struct _CXXTokenChain CXXTokenChain; + + + +typedef struct _CXXToken +{ + enum CXXTokenType eType; + vString * pszWord; + CXXKeyword eKeyword; + CXXTokenChain * pChain; // this is NOT the parent chain! + bool bFollowedBySpace; + + int iLineNumber; + MIOPos oFilePosition; + + struct _CXXToken * pNext; + struct _CXXToken * pPrev; + + // These members are used by the scope management functions to store + // scope information. Only cxxScope* functions can make sense of it. + // In other contexts these are simply left + // uninitialized and must be treated as undefined. + unsigned char uInternalScopeType; + unsigned char uInternalScopeAccess; +} CXXToken; + +CXXToken * cxxTokenCreate(void); +void cxxTokenDestroy(CXXToken * t); + +// A shortcut for quickly creating a fake token. +CXXToken * cxxTokenCopy(CXXToken *pToken); + +// A shortcut for quickly creating keyword tokens. +CXXToken * cxxTokenCreateKeyword(int iLineNumber,MIOPos oFilePosition,CXXKeyword eKeyword); + +CXXToken * cxxTokenCreateAnonymousIdentifier(unsigned int uTagKind); + +#define cxxTokenTypeIsOneOf(_pToken,_uTypes) (_pToken->eType & (_uTypes)) +#define cxxTokenTypeIs(_pToken,_eType) (_pToken->eType == _eType) +#define cxxTokenIsKeyword(_pToken,_eKeyword) \ + ( \ + (_pToken->eType == CXXTokenTypeKeyword) && \ + (_pToken->eKeyword == _eKeyword) \ + ) +#define cxxTokenIsNonConstantKeyword(_pToken) \ + ( \ + cxxTokenTypeIs(_pToken,CXXTokenTypeKeyword) && \ + (!cxxKeywordIsConstant(_pToken->eKeyword)) \ + ) + +// FIXME: Bad argument order +void cxxTokenAppendToString(vString * s,CXXToken * t); + +void cxxTokenAPIInit(void); +void cxxTokenAPINewFile(void); +void cxxTokenAPIDone(void); + +void cxxTokenReduceBackward (CXXToken *pStart); + +#endif //!ctags_cxx_token_h_ diff --git a/ctags/parsers/cxx/cxx_token_chain.c b/ctags/parsers/cxx/cxx_token_chain.c new file mode 100644 index 0000000000..7cbe7055f4 --- /dev/null +++ b/ctags/parsers/cxx/cxx_token_chain.c @@ -0,0 +1,1220 @@ +/* +* Copyright (c) 2016, Szymon Tomasz Stefanek +* +* This source code is released for free distribution under the terms of the +* GNU General Public License version 2 or (at your option) any later version. +* +* This module contains functions for parsing and scanning C++ source files +*/ + +#include "cxx_token_chain.h" + +#include "cxx_debug.h" + +#include "vstring.h" +#include "debug.h" +#include "routines.h" + +#include + +void cxxTokenChainInit(CXXTokenChain * tc) +{ + Assert(tc); + tc->pHead = NULL; + tc->pTail = NULL; + tc->iCount = 0; +} + +CXXTokenChain * cxxTokenChainCreate(void) +{ + CXXTokenChain * tc = xMalloc(1, CXXTokenChain); + cxxTokenChainInit(tc); + return tc; +} + +void cxxTokenChainDestroy(CXXTokenChain * tc) +{ + CXXToken * t; + CXXToken * t2; + + if(!tc) + return; + + t = tc->pHead; + while(t) + { + t2 = t->pNext; + cxxTokenDestroy(t); + t = t2; + } + + eFree(tc); +} + +CXXToken * cxxTokenChainTakeFirst(CXXTokenChain * tc) +{ + CXXToken * t; + + if(!tc) + return NULL; + if(!tc->pHead) + return NULL; + + t = tc->pHead; + if(t == tc->pTail) + { + tc->pHead = NULL; + tc->pTail = NULL; + tc->iCount = 0; + return t; + } + + tc->iCount--; + Assert(tc->iCount >= 0); + Assert(t->pNext); + + t->pNext->pPrev = NULL; + tc->pHead = t->pNext; + + return t; +} + +CXXToken * cxxTokenChainTakeLast(CXXTokenChain * tc) +{ + CXXToken * t; + + if(!tc) + return NULL; + if(!tc->pTail) + return NULL; + + t = tc->pTail; + if(t == tc->pHead) + { + tc->pHead = NULL; + tc->pTail = NULL; + tc->iCount = 0; + return t; + } + + tc->iCount--; + Assert(tc->iCount >= 0); + + t->pPrev->pNext = NULL; + tc->pTail = t->pPrev; + + return t; +} + +void cxxTokenChainTake(CXXTokenChain * tc,CXXToken * t) +{ + if(!tc) + return; + if(!tc->pHead) + return; + + /* + Debug with this: + + CXXToken * t2 = tc->pHead; + while(t2 && (t2 != t)) + t2 = t2->pNext; + + Assert(t2); + */ + + if(t == tc->pHead) + { + cxxTokenChainTakeFirst(tc); + return; + } + + if(t == tc->pTail) + { + cxxTokenChainTakeLast(tc); + return; + } + + // in the middle + + CXXToken * n = t->pNext; + CXXToken * p = t->pPrev; + + n->pPrev = p; + p->pNext = n; + + tc->iCount--; + + Assert(tc->iCount > 1); +} + +bool cxxTokenChainTakeRecursive(CXXTokenChain * tc,CXXToken * t) +{ + if(!tc) + return false; + + CXXToken * aux = tc->pHead; + while(aux) + { + if(t == aux) + { + cxxTokenChainTake(tc,aux); + return true; + } + + if(cxxTokenTypeIsOneOf( + aux, + CXXTokenTypeParenthesisChain | CXXTokenTypeAngleBracketChain | + CXXTokenTypeSquareParenthesisChain | CXXTokenTypeBracketChain + )) + { + if(cxxTokenChainTakeRecursive(aux->pChain,t)) + return true; + } + + aux = aux->pNext; + } + + return false; +} + +#if 0 +CXXToken * cxxTokenChainTakeAt(CXXTokenChain * tc,int index) +{ + if(!tc) + return NULL; + CXXToken * token = cxxTokenChainAt(tc,index); + if(!token) + return NULL; + cxxTokenChainTake(tc,token); + return token; +} +#endif + +void cxxTokenChainClear(CXXTokenChain * tc) +{ + CXXToken * t; + + if(!tc) + return; + + if(tc->iCount < 1) + return; + + while((t = cxxTokenChainTakeFirst(tc))) + cxxTokenDestroy(t); + + + Assert(tc->iCount == 0); + Assert(tc->pHead == NULL); + Assert(tc->pTail == NULL); +} + +void cxxTokenChainInsertAfter(CXXTokenChain * tc,CXXToken * before,CXXToken * t) +{ + if(!before) + { + cxxTokenChainPrepend(tc,t); + return; + } + + if(!before->pNext) + { + cxxTokenChainAppend(tc,t); + return; + } + + t->pNext = before->pNext; + t->pPrev = before; + before->pNext = t; + t->pNext->pPrev = t; +} + +void cxxTokenChainAppend(CXXTokenChain * tc,CXXToken * t) +{ + tc->iCount++; + + if(!tc->pTail) + { + tc->pHead = t; + tc->pTail = t; + t->pPrev = NULL; + t->pNext = NULL; + return; + } + + t->pPrev = tc->pTail; + t->pNext = NULL; + + tc->pTail->pNext = t; + tc->pTail = t; +} + +void cxxTokenChainPrepend(CXXTokenChain * tc,CXXToken * t) +{ + tc->iCount++; + + if(!tc->pHead) + { + tc->pHead = t; + tc->pTail = t; + t->pPrev = NULL; + t->pNext = NULL; + return; + } + + t->pNext = tc->pHead; + t->pPrev = NULL; + + tc->pHead->pPrev = t; + tc->pHead = t; +} + +void cxxTokenChainJoinRangeInString( + CXXToken * from, + CXXToken * to, + vString * s, + const char * szSeparator, + unsigned int uFlags + ) +{ + if(!from) + return; + + CXXToken * t = from; + + cxxTokenAppendToString(s,t); + + if((!(uFlags & CXXTokenChainJoinNoTrailingSpaces)) && t->bFollowedBySpace) + vStringPut (s, ' '); + + while(t && (t != to)) + { + t = t->pNext; + if(!t) + return; + + if(szSeparator) + vStringCatS(s,szSeparator); + + cxxTokenAppendToString(s,t); + + if( + (!(uFlags & CXXTokenChainJoinNoTrailingSpaces)) && + t->bFollowedBySpace + ) + vStringPut (s, ' '); + } +} + +vString * cxxTokenChainJoinRange( + CXXToken * from, + CXXToken * to, + const char * szSeparator, + unsigned int uFlags + ) +{ + if(!from) + return NULL; + + vString * s = vStringNew(); + + cxxTokenChainJoinRangeInString(from,to,s,szSeparator,uFlags); + + return s; +} + +void cxxTokenChainJoinInString( + CXXTokenChain * tc, + vString * s, + const char * szSeparator, + unsigned int uFlags + ) +{ + if(!tc) + return; + + if(tc->iCount == 0) + return; + + CXXToken * t = tc->pHead; + + cxxTokenAppendToString(s,t); + + if( + (!(uFlags & CXXTokenChainJoinNoTrailingSpaces)) && + t->bFollowedBySpace + ) + vStringPut (s, ' '); + + t = t->pNext; + while(t) + { + if(szSeparator) + vStringCatS(s,szSeparator); + + cxxTokenAppendToString(s,t); + + if( + (!(uFlags & CXXTokenChainJoinNoTrailingSpaces)) && + t->bFollowedBySpace + ) + vStringPut (s, ' '); + + t = t->pNext; + } +} + + +vString * cxxTokenChainJoin( + CXXTokenChain * tc, + const char * szSeparator, + unsigned int uFlags + ) +{ + if(!tc) + return NULL; + + if(tc->iCount == 0) + return NULL; + + vString * s = vStringNew(); + + cxxTokenChainJoinInString(tc,s,szSeparator,uFlags); + + return s; +} + +#if 0 +// currently unused +void cxxTokenChainMoveEntries(CXXTokenChain * src,CXXTokenChain * dest) +{ + if(dest->iCount > 0) + cxxTokenChainClear(dest); + + dest->iCount = src->iCount; + dest->pHead = src->pHead; + dest->pTail = src->pTail; + + src->iCount = 0; + src->pHead = NULL; + src->pTail = NULL; +} + +void cxxTokenChainMoveEntryRange( + CXXTokenChain * src, + CXXToken * start, + CXXToken * end, + CXXTokenChain * dest + ) +{ + if(!src || !dest || !start || !end) + return; + + CXX_DEBUG_ASSERT( + cxxTokenChainFindToken(src,start) >= 0, + "The start token must be in the source chain!" + ); + CXX_DEBUG_ASSERT( + cxxTokenChainFindToken(src,end) >= 0, + "The end token must be in the source chain!" + ); + CXX_DEBUG_ASSERT( + cxxTokenChainFindToken(src,start) <= cxxTokenChainFindToken(src,end), + "The start token must come before the end token" + ); + + // FIXME: We could have a more efficient version of this + CXXToken * t = start; + for(;;) + { + CXXToken * next = t->pNext; + + cxxTokenChainTake(src,t); + cxxTokenChainAppend(dest,t); + + if(t == end) + break; + + t = next; + } +} +#endif + +CXXTokenChain * cxxTokenChainSplitOnComma(CXXTokenChain * tc) +{ + if(!tc) + return NULL; + + CXXTokenChain * pRet = cxxTokenChainCreate(); + + CXXToken * pToken = cxxTokenChainFirst(tc); + + if(!pToken) + return pRet; + + CXXToken * pStart = pToken; + + while(pStart && pToken->pNext) + { + while(pToken->pNext && (!cxxTokenTypeIs(pToken->pNext,CXXTokenTypeComma))) + pToken = pToken->pNext; + + CXXToken * pNew = cxxTokenChainExtractRange(pStart,pToken,0); + if(pNew) + cxxTokenChainAppend(pRet,pNew); + + pToken = pToken->pNext; // comma or nothing + if(pToken) + pToken = pToken->pNext; // after comma + pStart = pToken; + } + + if(pStart) + { + // finished without comma + CXXToken * pNew = cxxTokenChainExtractRange(pStart,cxxTokenChainLast(tc),0); + if(pNew) + cxxTokenChainAppend(pRet,pNew); + } + + return pRet; +} + +CXXToken * cxxTokenChainCondenseIntoToken(CXXTokenChain * tc,unsigned int uFlags) +{ + if(!tc) + return NULL; + + CXXToken * t = tc->pHead; + if(!t) + return NULL; + + CXXToken * pCondensed = cxxTokenCreate(); + + pCondensed->eType = CXXTokenTypeUnknown; + pCondensed->iLineNumber = t->iLineNumber; + pCondensed->oFilePosition = t->oFilePosition; + + while(t) + { + cxxTokenAppendToString(pCondensed->pszWord,t); + + if( + (!(uFlags & CXXTokenChainCondenseNoTrailingSpaces)) && + t->bFollowedBySpace + ) + vStringPut (pCondensed->pszWord, ' '); + + pCondensed->bFollowedBySpace = t->bFollowedBySpace; + + t = t->pNext; + } + + return pCondensed; +} + +void cxxTokenChainCondense(CXXTokenChain * tc,unsigned int uFlags) +{ + CXXToken * pCondensed = cxxTokenChainCondenseIntoToken(tc,uFlags); + if(!pCondensed) + return; + + cxxTokenChainClear(tc); + + cxxTokenChainAppend(tc,pCondensed); +} + +CXXToken * cxxTokenChainAt(CXXTokenChain * tc,int index) +{ + if(!tc) + return NULL; + if(index < 0) + return NULL; + if(index >= tc->iCount) + return NULL; + CXXToken * pToken = tc->pHead; + while(pToken && index) + { + index--; + pToken = pToken->pNext; + } + + return pToken; +} + +CXXToken * cxxTokenChainSkipToEndOfTemplateAngleBracket(CXXToken * t) +{ + if(!t) + return NULL; + + CXX_DEBUG_ASSERT( + cxxTokenTypeIs(t,CXXTokenTypeSmallerThanSign), + "This function must be called when pointing to a <" + ); + + int iLevel = 1; + t = t->pNext; + while(t) + { + if(cxxTokenTypeIs(t,CXXTokenTypeSmallerThanSign)) + { + iLevel++; + } else if(cxxTokenTypeIs(t,CXXTokenTypeGreaterThanSign)) + { + if(iLevel == 1) + return t; + iLevel--; + } + t = t->pNext; + } + // invalid + return NULL; +} + +CXXToken * cxxTokenChainSkipBackToStartOfTemplateAngleBracket(CXXToken * t) +{ + if(!t) + return NULL; + CXX_DEBUG_ASSERT( + t->eType == CXXTokenTypeGreaterThanSign, + "This function must be called when pointing to a >" + ); + int iLevel = 1; + t = t->pPrev; + while(t) + { + if(cxxTokenTypeIs(t,CXXTokenTypeGreaterThanSign)) + { + iLevel++; + } else if(cxxTokenTypeIs(t,CXXTokenTypeSmallerThanSign)) + { + if(iLevel == 1) + return t; + iLevel--; + } + t = t->pPrev; + } + // invalid + return NULL; +} + +CXXToken * cxxTokenChainFirstTokenOfType( + CXXTokenChain * tc, + unsigned int uTokenTypes + ) +{ + if(!tc) + return NULL; + CXXToken * t = tc->pHead; + while(t) + { + if(t->eType & uTokenTypes) + return t; + t = t->pNext; + } + return NULL; +} + +CXXToken * cxxTokenChainNextTokenOfType( + CXXToken * t, + unsigned int uTokenTypes + ) +{ + if(!t) + return NULL; + t = t->pNext; + while(t) + { + if(t->eType & uTokenTypes) + return t; + t = t->pNext; + } + return NULL; +} + +CXXToken * cxxTokenChainPreviousTokenOfType( + CXXToken * t, + unsigned int uTokenTypes + ) +{ + if(!t) + return NULL; + t = t->pPrev; + while(t) + { + if(t->eType & uTokenTypes) + return t; + t = t->pPrev; + } + return NULL; +} + +CXXToken * cxxTokenChainPreviousTokenNotOfType( + CXXToken * t, + unsigned int uTokenTypes + ) +{ + if(!t) + return NULL; + t = t->pPrev; + while(t) + { + if(!(t->eType & uTokenTypes)) + return t; + t = t->pPrev; + } + return NULL; +} + +CXXToken * cxxTokenChainLastTokenOfType( + CXXTokenChain * tc, + unsigned int uTokenTypes + ) +{ + if(!tc) + return NULL; + CXXToken * t = tc->pTail; + while(t) + { + if(t->eType & uTokenTypes) + return t; + t = t->pPrev; + } + return NULL; +} + +CXXToken * cxxTokenChainLastPossiblyNestedTokenOfType( + CXXTokenChain * tc, + unsigned int uTokenTypes, + CXXTokenChain ** ppParentChain + ) +{ + if(!tc) + return NULL; + CXXToken * t = tc->pTail; + while(t) + { + if(t->eType & uTokenTypes) + { + if(ppParentChain) + *ppParentChain = tc; + return t; + } + if(t->eType == CXXTokenTypeParenthesisChain) + { + CXXToken * tmp = cxxTokenChainLastPossiblyNestedTokenOfType( + t->pChain, + uTokenTypes, + ppParentChain + ); + if(tmp) + return tmp; + } + t = t->pPrev; + } + return NULL; + +} + +CXXToken * cxxTokenChainFirstPossiblyNestedTokenOfType( + CXXTokenChain * tc, + unsigned int uTokenTypes, + CXXTokenChain ** ppParentChain + ) +{ + if(!tc) + return NULL; + CXXToken * t = tc->pHead; + while(t) + { + if(t->eType & uTokenTypes) + { + if(ppParentChain) + *ppParentChain = tc; + return t; + } + if(t->eType == CXXTokenTypeParenthesisChain) + { + CXXToken * tmp = cxxTokenChainFirstPossiblyNestedTokenOfType( + t->pChain, + uTokenTypes, + ppParentChain + ); + if(tmp) + return tmp; // ppParentChain is already set + } + t = t->pNext; + } + return NULL; + +} + + +CXXToken * cxxTokenChainFirstTokenNotOfType( + CXXTokenChain * tc, + unsigned int uTokenTypes + ) +{ + if(!tc) + return NULL; + CXXToken * t = tc->pHead; + while(t) + { + if(!(t->eType & uTokenTypes)) + return t; + t = t->pNext; + } + return NULL; +} + +CXXToken * cxxTokenChainNextTokenNotOfGeneric( + CXXToken * t, + bool (* predicator) (CXXToken *, void *), + void *data + ) +{ + if(!t) + return NULL; + t = t->pNext; + while(t) + { + if(!predicator (t, data)) + return t; + t = t->pNext; + } + return NULL; +} + +CXXToken * cxxTokenChainNextTokenNotOfType( + CXXToken * t, + unsigned int uTokenTypes + ) +{ + if(!t) + return NULL; + t = t->pNext; + while(t) + { + if(!(t->eType & uTokenTypes)) + return t; + t = t->pNext; + } + return NULL; +} + +CXXToken * cxxTokenChainLastTokenNotOfType( + CXXTokenChain * tc, + unsigned int uTokenTypes + ) +{ + if(!tc) + return NULL; + CXXToken * t = tc->pTail; + while(t) + { + if(!(t->eType & uTokenTypes)) + return t; + t = t->pPrev; + } + return NULL; +} + + +int cxxTokenChainFindToken( + CXXTokenChain * tc, + CXXToken * t + ) +{ + if(!tc) + return -1; + if(tc->iCount < 1) + return -1; + + CXXToken * pToken = tc->pHead; + int idx = 0; + while(pToken) + { + if(pToken == t) + return idx; + idx++; + pToken = pToken->pNext; + } + + return -1; +} + +CXXToken * cxxTokenChainPreviousKeyword( + CXXToken * from, + CXXKeyword eKeyword + ) +{ + if(!from) + return NULL; + + CXXToken * t = from->pPrev; + while(t) + { + if(cxxTokenIsKeyword(t,eKeyword)) + return t; + t = t->pPrev; + } + + return NULL; +} + +CXXToken * cxxTokenChainNextKeyword( + CXXToken * from, + CXXKeyword eKeyword + ) +{ + if(!from) + return NULL; + + CXXToken * t = from->pNext; + while(t) + { + if(cxxTokenIsKeyword(t,eKeyword)) + return t; + t = t->pNext; + } + + return NULL; +} + +int cxxTokenChainFirstKeywordIndex( + CXXTokenChain * tc, + CXXKeyword eKeyword + ) +{ + if(!tc) + return -1; + if(tc->iCount < 1) + return -1; + + CXXToken * pToken = tc->pHead; + int idx = 0; + while(pToken) + { + if(cxxTokenIsKeyword(pToken,eKeyword)) + return idx; + idx++; + pToken = pToken->pNext; + } + + return -1; +} + +#if 0 +// This is working code but it's unused and coveralls complains.. sigh. +// Remove the #if above if needed. +CXXToken * cxxTokenChainFirstKeyword( + CXXTokenChain * tc, + CXXKeyword eKeyword + ) +{ + if(!tc) + return NULL; + if(tc->iCount < 1) + return NULL; + + CXXToken * pToken = tc->pHead; + while(pToken) + { + if(cxxTokenIsKeyword(pToken,eKeyword)) + return pToken; + pToken = pToken->pNext; + } + + return NULL; +} +#endif + +CXXToken * cxxTokenChainNextIdentifier( + CXXToken * from, + const char * szIdentifier + ) +{ + if(!from) + return NULL; + + CXXToken * t = from->pNext; + while(t) + { + if( + cxxTokenTypeIs(t,CXXTokenTypeIdentifier) && + (strcmp(vStringValue(t->pszWord),szIdentifier) == 0) + ) + return t; + t = t->pNext; + } + + return NULL; +} + +void cxxTokenChainDestroyRange(CXXTokenChain * pChain,CXXToken * from,CXXToken * to) +{ + if(!from || !to) + return; + CXX_DEBUG_ASSERT(from,"Bad from pointer passed to cxxTokenChainDestroyRange"); + CXX_DEBUG_ASSERT(to,"Bad to pointer passed to cxxTokenChainDestroyRange"); + + for(;;) + { + CXXToken * next = from->pNext; + cxxTokenChainTake(pChain,from); + cxxTokenDestroy(from); + if(from == to) // may be compared even if invalid + return; + from = next; + CXX_DEBUG_ASSERT(from,"Should NOT have found chain termination here"); + } +} + + +CXXToken * cxxTokenChainExtractRange( + CXXToken * from, + CXXToken * to, + unsigned int uFlags + ) +{ + if(!from) + return NULL; + + CXXToken * pToken = from; + + CXXToken * pRet = cxxTokenCreate(); + pRet->iLineNumber = pToken->iLineNumber; + pRet->oFilePosition = pToken->oFilePosition; + pRet->eType = pToken->eType; + + cxxTokenAppendToString(pRet->pszWord,pToken); + if( + (!(uFlags & CXXTokenChainExtractRangeNoTrailingSpaces)) && + pToken->bFollowedBySpace + ) + vStringPut (pRet->pszWord, ' '); + pRet->bFollowedBySpace = pToken->bFollowedBySpace; + + while(pToken != to) + { + pToken = pToken->pNext; + if(!pToken) + return pRet; + cxxTokenAppendToString(pRet->pszWord,pToken); + if( + (!(uFlags & CXXTokenChainExtractRangeNoTrailingSpaces)) && + pToken->bFollowedBySpace + ) + vStringPut (pRet->pszWord, ' '); + pRet->bFollowedBySpace = pToken->bFollowedBySpace; + } + + return pRet; +} + +CXXToken * cxxTokenChainExtractRangeFilterTypeName( + CXXToken * from, + CXXToken * to + ) +{ + if(!from) + return NULL; + + CXXToken * pToken = from; + for(;;) + { + if(!cxxTokenTypeIs(pToken,CXXTokenTypeKeyword)) + break; + if(!cxxKeywordExcludeFromTypeNames(pToken->eKeyword)) + break; + // must be excluded + if(pToken == to) + return NULL; // only excluded keywords + pToken = pToken->pNext; + if(!pToken) + return NULL; // ... bug? + } + + // Got at least one non-excluded keyword + CXXToken * pRet = cxxTokenCreate(); + pRet->iLineNumber = pToken->iLineNumber; + pRet->oFilePosition = pToken->oFilePosition; + pRet->eType = pToken->eType; + + cxxTokenAppendToString(pRet->pszWord,pToken); + if(pToken->bFollowedBySpace) + vStringPut (pRet->pszWord, ' '); + pRet->bFollowedBySpace = pToken->bFollowedBySpace; + + while(pToken != to) + { + pToken = pToken->pNext; + if(!pToken) + return pRet; // ... bug? + + for(;;) + { + if(!cxxTokenTypeIs(pToken,CXXTokenTypeKeyword)) + break; + if(!cxxKeywordExcludeFromTypeNames(pToken->eKeyword)) + break; + // must be excluded + if(pToken == to) + return pRet; + pToken = pToken->pNext; + if(!pToken) + return pRet; // ... bug? + } + + cxxTokenAppendToString(pRet->pszWord,pToken); + if(pToken->bFollowedBySpace) + vStringPut (pRet->pszWord, ' '); + pRet->bFollowedBySpace = pToken->bFollowedBySpace; + } + + return pRet; +} + + +CXXToken * cxxTokenChainExtractIndexRange( + CXXTokenChain * tc, + int iFirstIndex, + int iLastIndex, + unsigned int uFlags + ) +{ + if(!tc) + return NULL; + if(iFirstIndex < 0) + return NULL; + if(iFirstIndex >= tc->iCount) + return NULL; + + CXXToken * pToken = tc->pHead; + int idx = 0; + while(pToken && (idx < iFirstIndex)) + { + idx++; + pToken = pToken->pNext; + } + + if(!pToken) + return NULL; + + CXXToken * pRet = cxxTokenCreate(); + pRet->iLineNumber = pToken->iLineNumber; + pRet->oFilePosition = pToken->oFilePosition; + pRet->eType = pToken->eType; + + cxxTokenAppendToString(pRet->pszWord,pToken); + if( + (!(uFlags & CXXTokenChainExtractRangeNoTrailingSpaces)) && + pToken->bFollowedBySpace + ) + vStringPut (pRet->pszWord, ' '); + pRet->bFollowedBySpace = pToken->bFollowedBySpace; + + while(idx < iLastIndex) + { + pToken = pToken->pNext; + if(!pToken) + return pRet; + cxxTokenAppendToString(pRet->pszWord,pToken); + if( + (!(uFlags & CXXTokenChainExtractRangeNoTrailingSpaces)) && + pToken->bFollowedBySpace + ) + vStringPut (pRet->pszWord, ' '); + pRet->bFollowedBySpace = pToken->bFollowedBySpace; + idx++; + } + + return pRet; +} + +void cxxTokenChainNormalizeTypeNameSpacing(CXXTokenChain * pChain) +{ + if(!pChain) + return; + + if(pChain->iCount < 1) + return; + + cxxTokenChainNormalizeTypeNameSpacingInRange(pChain->pHead,pChain->pTail); +} + +void cxxTokenChainNormalizeTypeNameSpacingInRange(CXXToken * pFrom,CXXToken * pTo) +{ + if(!pFrom || !pTo) + return; + + // Goals: + + // int + // unsigned short int + // int * + // unsigned short int ** + // const Class & + // Class && + // int (*)(type &,type *) + // unsigned short int[3]; + // ClassA> <-- fixme: not sure about the trailing >> + // Class (*)(type[]) + // decltype(something) + + CXXToken * t = pFrom; + + for(;;) + { + if(cxxTokenTypeIsOneOf( + t, + CXXTokenTypeParenthesisChain | CXXTokenTypeSquareParenthesisChain + )) + { + cxxTokenChainNormalizeTypeNameSpacing(t->pChain); + t->bFollowedBySpace = false; + } else if(cxxTokenTypeIs(t,CXXTokenTypeKeyword)) + { + t->bFollowedBySpace = t->pNext && + (t->eKeyword != CXXKeywordDECLTYPE) && + cxxTokenTypeIsOneOf( + t->pNext, + CXXTokenTypeParenthesisChain | CXXTokenTypeIdentifier | + CXXTokenTypeKeyword | CXXTokenTypeStar | + CXXTokenTypeAnd | CXXTokenTypeMultipleAnds + ); + } else if(cxxTokenTypeIsOneOf(t, + CXXTokenTypeIdentifier | + CXXTokenTypeGreaterThanSign | + CXXTokenTypeAnd | CXXTokenTypeMultipleAnds + )) + { + t->bFollowedBySpace = t->pNext && + cxxTokenTypeIsOneOf( + t->pNext, + CXXTokenTypeParenthesisChain | CXXTokenTypeIdentifier | + CXXTokenTypeKeyword | CXXTokenTypeStar | + CXXTokenTypeAnd | CXXTokenTypeMultipleAnds + ); + } else if(cxxTokenTypeIs(t,CXXTokenTypeStar)) + { + t->bFollowedBySpace = t->pNext && + (!cxxTokenTypeIsOneOf( + t->pNext, + CXXTokenTypeStar | CXXTokenTypeComma | + CXXTokenTypeClosingParenthesis + )); + } else { + t->bFollowedBySpace = false; + } + + if(t == pTo) + break; + + t = t->pNext; + } + + // Finally the chain has no space at end + pTo->bFollowedBySpace = false; +} diff --git a/ctags/parsers/cxx/cxx_token_chain.h b/ctags/parsers/cxx/cxx_token_chain.h new file mode 100644 index 0000000000..899e30c75e --- /dev/null +++ b/ctags/parsers/cxx/cxx_token_chain.h @@ -0,0 +1,288 @@ +#ifndef ctags_cxx_token_chain_h_ +#define ctags_cxx_token_chain_h_ +/* +* Copyright (c) 2016, Szymon Tomasz Stefanek +* +* This source code is released for free distribution under the terms of the +* GNU General Public License version 2 or (at your option) any later version. +* +* This module contains functions for parsing and scanning C++ source files +*/ + +#include "general.h" + +#include "cxx_token.h" +#include "cxx_keyword.h" + +struct _CXXTokenChain +{ + CXXToken * pHead; + CXXToken * pTail; + int iCount; +}; + +// The struct is typedef'd in cxx_token.h +// typedef struct _CXXTokenChain CXXTokenChain; + +CXXTokenChain * cxxTokenChainCreate(void); +void cxxTokenChainDestroy(CXXTokenChain * tc); + +// Note: you don't need to call this after cxxTokenChainCreate(). +void cxxTokenChainInit(CXXTokenChain * tc); + +void cxxTokenChainClear(CXXTokenChain * tc); + +// Find a specified token and return its index +int cxxTokenChainFindToken(CXXTokenChain * tc,CXXToken * t); + +// Find the first token with one of the specified types +CXXToken * cxxTokenChainFirstTokenOfType( + CXXTokenChain * tc, + unsigned int uTokenTypes + ); + +// Find the first token with one of the specified types that comes +// after the specified token +CXXToken * cxxTokenChainNextTokenOfType( + CXXToken * t, + unsigned int uTokenTypes + ); + +// Find the first token with one of the specified types that comes +// before the specified token +CXXToken * cxxTokenChainPreviousTokenOfType( + CXXToken * t, + unsigned int uTokenTypes + ); + +// Find the first token with that is not one of the specified types +// that comes before the specified token +CXXToken * cxxTokenChainPreviousTokenNotOfType( + CXXToken * t, + unsigned int uTokenTypes + ); + +// Find the last token with one of the specified types +CXXToken * cxxTokenChainLastTokenOfType( + CXXTokenChain * tc, + unsigned int uTokenTypes + ); + +// Find the last token with one of the specified types. Look also +// in nested () chains (only (), not [], {}...) +CXXToken * cxxTokenChainLastPossiblyNestedTokenOfType( + CXXTokenChain * tc, + unsigned int uTokenTypes, + CXXTokenChain ** ppParentChain + ); + +// Find the first token with one of the specified types. Look also +// in nested () chains (only (), not [], {}...) +CXXToken * cxxTokenChainFirstPossiblyNestedTokenOfType( + CXXTokenChain * tc, + unsigned int uTokenTypes, + CXXTokenChain ** ppParentChain + ); + +// Find the first token with type that is not one of the specified types +CXXToken * cxxTokenChainFirstTokenNotOfType( + CXXTokenChain * tc, + unsigned int uTokenTypes + ); + + +// Find the first token with type that is not accepted by PREDICATOR. +// PREDICATOR returns true when a token is acceptable. +CXXToken * cxxTokenChainNextTokenNotOfGeneric( + CXXToken * t, + bool (* predicator) (CXXToken *, void *), + void *data + ); + +// Find the first token with type that is not one of the specified types +// that comes after the specified token +CXXToken * cxxTokenChainNextTokenNotOfType( + CXXToken * t, + unsigned int uTokenTypes + ); + +// Find the last token with type that is not one of the specified types +CXXToken * cxxTokenChainLastTokenNotOfType( + CXXTokenChain * tc, + unsigned int uTokenTypes + ); + +// Specialized function to skip from a < to the matching > (used for +// templates). Nested <> pairs are skipped properly. +// Parenthesis chains are assumed to be condensed. +// Note that the function stops at the ending > and not past it. +CXXToken * cxxTokenChainSkipToEndOfTemplateAngleBracket( + CXXToken * t + ); + +// Specialized function to skip back from a > to the matching < (used for +// templates). Nested <> pairs are skipped properly. +// Parenthesis chains are assumed to be condensed. +// Note that the function stops at the initial < and not past it. +CXXToken * cxxTokenChainSkipBackToStartOfTemplateAngleBracket( + CXXToken * t + ); + +#define cxxTokenChainFirst(tc) (tc ? tc->pHead : NULL) +#define cxxTokenChainLast(tc) (tc ? tc->pTail : NULL) + +CXXToken * cxxTokenChainAt(CXXTokenChain * tc,int index); + +CXXToken * cxxTokenChainTakeFirst(CXXTokenChain * tc); +CXXToken * cxxTokenChainTakeLast(CXXTokenChain * tc); +#if 0 +CXXToken * cxxTokenChainTakeAt(CXXTokenChain * tc,int index); +#endif +void cxxTokenChainTake(CXXTokenChain * tc,CXXToken * t); +bool cxxTokenChainTakeRecursive(CXXTokenChain * tc,CXXToken * t); + +// Destroy the last token +#define cxxTokenChainDestroyLast(tc) \ + cxxTokenDestroy(cxxTokenChainTakeLast(tc)) + +// Destroy the first token +#define cxxTokenChainDestroyFirst(tc) \ + cxxTokenDestroy(cxxTokenChainTakeFirst(tc)) + +void cxxTokenChainDestroyRange(CXXTokenChain * pChain,CXXToken * from,CXXToken * to); + +void cxxTokenChainAppend(CXXTokenChain * tc,CXXToken * t); +void cxxTokenChainPrepend(CXXTokenChain * tc,CXXToken * t); +void cxxTokenChainInsertAfter(CXXTokenChain * tc,CXXToken * before,CXXToken * t); + +#if 0 +// currently unused +void cxxTokenChainMoveEntries( + CXXTokenChain * src, + CXXTokenChain * dest + ); + +void cxxTokenChainMoveEntryRange( + CXXTokenChain * src, + CXXToken * start, + CXXToken * end, + CXXTokenChain * dest + ); +#endif + +enum CXXTokenChainJoinFlags +{ + // Do not add trailing spaces for entries that are followed by space + CXXTokenChainJoinNoTrailingSpaces = 1 +}; + +void cxxTokenChainJoinInString( + CXXTokenChain * tc, + vString * s, + const char * szSeparator, + unsigned int uFlags + ); +vString * cxxTokenChainJoin( + CXXTokenChain * tc, + const char * szSeparator, + unsigned int uFlags + ); + +void cxxTokenChainJoinRangeInString( + CXXToken * from, + CXXToken * to, + vString * s, + const char * szSeparator, + unsigned int uFlags + ); +vString * cxxTokenChainJoinRange( + CXXToken * from, + CXXToken * to, + const char * szSeparator, + unsigned int uFlags + ); + +// Treat the token chain tc as a comma separated sequence +// of items (something, blah foo, 1 2 3 4 5, ...) +// Create a token chain that contains tokens corresponding +// to each item (i.e, "something", "blah foo", "1 2 3 4 5"). +// Please note that the returned chain may be empty! +CXXTokenChain * cxxTokenChainSplitOnComma(CXXTokenChain * tc); + + +enum CXXTokenChainCondenseFlags +{ + // Do not add trailing spaces for entries that are followed by space + CXXTokenChainCondenseNoTrailingSpaces = 1 +}; + +CXXToken * cxxTokenChainCondenseIntoToken(CXXTokenChain * tc,unsigned int uFlags); +void cxxTokenChainCondense(CXXTokenChain * tc,unsigned int uFlags); + + +enum CXXTokenChainExtractRangeFlags +{ + CXXTokenChainExtractRangeNoTrailingSpaces = 1 +}; + +CXXToken * cxxTokenChainExtractRange( + CXXToken * from, + CXXToken * to, + unsigned int uFlags + ); + +CXXToken * cxxTokenChainExtractRangeFilterTypeName( + CXXToken * from, + CXXToken * to + ); + +CXXToken * cxxTokenChainExtractIndexRange( + CXXTokenChain * tc, + int iFirstIndex, + int iLastIndex, + unsigned int uFlags + ); + +CXXToken * cxxTokenChainPreviousKeyword( + CXXToken * from, + CXXKeyword eKeyword + ); + +CXXToken * cxxTokenChainNextKeyword( + CXXToken * from, + CXXKeyword eKeyword + ); + +CXXToken * cxxTokenChainNextIdentifier( + CXXToken * from, + const char * szIdentifier + ); + +int cxxTokenChainFirstKeywordIndex( + CXXTokenChain * tc, + CXXKeyword eKeyword + ); + +#if 0 +// This is working code but it's unused and coveralls complains.. sigh. +// Remove the #if above if needed. +CXXToken * cxxTokenChainFirstKeyword( + CXXTokenChain * tc, + CXXKeyword eKeyword + ); +#endif + +// Assuming that pChain contains a type name, attempt to normalize the +// spacing within the whole chain. +// +// Please note that this will work also for entire function signatures +// (since type names can contain function pointers which have signatures) +void cxxTokenChainNormalizeTypeNameSpacing( + CXXTokenChain * pChain + ); +void cxxTokenChainNormalizeTypeNameSpacingInRange( + CXXToken * pFrom, + CXXToken * pTo + ); + +#endif //!ctags_cxx_token_chain_h_ From 5b9036a00b6a546b797164da16c1aee4f536b43d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ji=C5=99=C3=AD=20Techet?= Date: Sun, 28 Nov 2021 13:32:49 +0100 Subject: [PATCH 4/9] Enable the new cxx parser There are several things needed for this: 1. The new preprocessor has to be defined as a separate parser. 2. Tags from the new c/c++ parsers and the preprocessor parser have to be mapped to Geany types. We still need to keep the old mappings because some parsers like Ferite or GLSL still use the old C parser. 3. Anonymous tags have a different name so we have to reflect this in tm_tag_is_anon(). --- src/tagmanager/tm_parser.c | 62 +++++++++++++++++++++++++++---------- src/tagmanager/tm_parser.h | 1 + src/tagmanager/tm_parsers.h | 9 +++--- src/tagmanager/tm_tag.c | 2 +- 4 files changed, 53 insertions(+), 21 deletions(-) diff --git a/src/tagmanager/tm_parser.c b/src/tagmanager/tm_parser.c index 86fcf03423..0fecd525aa 100644 --- a/src/tagmanager/tm_parser.c +++ b/src/tagmanager/tm_parser.c @@ -44,25 +44,49 @@ typedef struct static GHashTable *subparser_map = NULL; +#define COMMON_C \ + {'d', tm_tag_macro_t}, \ + {'e', tm_tag_enumerator_t}, \ + {'f', tm_tag_function_t}, \ + {'g', tm_tag_enum_t}, \ + {'m', tm_tag_member_t}, \ + {'p', tm_tag_prototype_t}, \ + {'s', tm_tag_struct_t}, \ + {'t', tm_tag_typedef_t}, \ + {'u', tm_tag_union_t}, \ + {'v', tm_tag_variable_t}, \ + {'x', tm_tag_externvar_t}, -static TMParserMapEntry map_C[] = { +/* Old C parser, also used by GLSL and Ferite */ +static TMParserMapEntry map_C_old_parser[] = { + COMMON_C {'c', tm_tag_class_t}, - {'d', tm_tag_macro_t}, - {'e', tm_tag_enumerator_t}, - {'f', tm_tag_function_t}, - {'g', tm_tag_enum_t}, - {'m', tm_tag_member_t}, {'n', tm_tag_namespace_t}, - {'p', tm_tag_prototype_t}, - {'s', tm_tag_struct_t}, - {'t', tm_tag_typedef_t}, - {'u', tm_tag_union_t}, - {'v', tm_tag_variable_t}, - {'x', tm_tag_externvar_t}, }; -/* C++, same as C */ -#define map_CPP map_C +# define COMMON_C_NEW_PARSER \ + {'h', tm_tag_undef_t}, \ + {'l', tm_tag_undef_t}, \ + {'z', tm_tag_undef_t}, \ + {'L', tm_tag_undef_t}, \ + {'D', tm_tag_undef_t}, + +static TMParserMapEntry map_C[] = { + COMMON_C + COMMON_C_NEW_PARSER +}; + +static TMParserMapEntry map_CPP[] = { + COMMON_C + COMMON_C_NEW_PARSER + + {'c', tm_tag_class_t}, + {'n', tm_tag_namespace_t}, + {'A', tm_tag_undef_t}, + {'N', tm_tag_undef_t}, + {'U', tm_tag_undef_t}, + {'Z', tm_tag_undef_t}, +}; static TMParserMapEntry map_JAVA[] = { {'c', tm_tag_class_t}, @@ -359,7 +383,7 @@ static TMParserMapEntry map_F77[] = { #define map_FORTRAN map_F77 -#define map_FERITE map_C +#define map_FERITE map_C_old_parser /* different parser than in universal-ctags */ static TMParserMapEntry map_MATLAB[] = { @@ -367,7 +391,7 @@ static TMParserMapEntry map_MATLAB[] = { {'s', tm_tag_struct_t}, }; -#define map_GLSL map_C +#define map_GLSL map_C_old_parser /* not in universal-ctags */ static TMParserMapEntry map_VALA[] = { @@ -541,6 +565,11 @@ static TMParserMapEntry map_JULIA[] = { {'x', tm_tag_externvar_t}, }; +static TMParserMapEntry map_CPREPROCESSOR[] = { + {'d', tm_tag_undef_t}, + {'h', tm_tag_undef_t}, + {'D', tm_tag_undef_t}, +}; typedef struct { @@ -605,6 +634,7 @@ static TMParserMap parser_map[] = { MAP_ENTRY(ZEPHIR), MAP_ENTRY(POWERSHELL), MAP_ENTRY(JULIA), + MAP_ENTRY(CPREPROCESSOR), }; /* make sure the parser map is consistent and complete */ G_STATIC_ASSERT(G_N_ELEMENTS(parser_map) == TM_PARSER_COUNT); diff --git a/src/tagmanager/tm_parser.h b/src/tagmanager/tm_parser.h index 6e3adb41b6..e59e141cf1 100644 --- a/src/tagmanager/tm_parser.h +++ b/src/tagmanager/tm_parser.h @@ -111,6 +111,7 @@ enum TM_PARSER_POWERSHELL, TM_PARSER_JULIA, TM_PARSER_BIBTEX, + TM_PARSER_CPREPROCESSOR, TM_PARSER_COUNT }; diff --git a/src/tagmanager/tm_parsers.h b/src/tagmanager/tm_parsers.h index 1bbe203f33..54bdcf89f3 100644 --- a/src/tagmanager/tm_parsers.h +++ b/src/tagmanager/tm_parsers.h @@ -14,8 +14,8 @@ /* Keep in sync with tm_parser.h */ #define EXTERNAL_PARSER_LIST \ - CParserOld, \ - CppParserOld, \ + CParser, \ + CppParser, \ JavaParser, \ MakefileParser, \ PascalParser, \ @@ -65,7 +65,8 @@ JsonParser, \ ZephirParser, \ PowerShellParser, \ - JuliaParser, \ - BibtexParser + JuliaParser, \ + BibtexParser, \ + CPreProParser #endif diff --git a/src/tagmanager/tm_tag.c b/src/tagmanager/tm_tag.c index b16f3bcf94..21c4d3ece2 100644 --- a/src/tagmanager/tm_tag.c +++ b/src/tagmanager/tm_tag.c @@ -671,7 +671,7 @@ gboolean tm_tag_is_anon(const TMTag *tag) char dummy; if (tag->lang == TM_PARSER_C || tag->lang == TM_PARSER_CPP) - return sscanf(tag->name, "anon_%*[a-z]_%u%c", &i, &dummy) == 1; + return sscanf(tag->name, "__anon%u%c", &i, &dummy) == 1; else if (tag->lang == TM_PARSER_FORTRAN || tag->lang == TM_PARSER_F77) return sscanf(tag->name, "Structure#%u%c", &i, &dummy) == 1 || sscanf(tag->name, "Interface#%u%c", &i, &dummy) == 1 || From 1f03474d216ca9898cb98246e50156d1fef532e6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ji=C5=99=C3=AD=20Techet?= Date: Sun, 28 Nov 2021 13:52:41 +0100 Subject: [PATCH 5/9] Update C/C++ unit tests The changes are mostly these: 1. Spaces in function argument list (int var1, int var2, ...) - before (int var1,int var2,...) - now 2. Anonymous tags anon_struct_1 anon_union_2 anon_typedef_3 vs __anon1 __anon2 __anon3 3. Improved parsing of the new parser --- tests/ctags/backslashes.c.tags | 10 +++--- tests/ctags/bit_field.c.tags | 46 ++++++++++++------------ tests/ctags/bug1201689.c.tags | 2 +- tests/ctags/bug1466117.c.tags | 8 ++--- tests/ctags/bug1491666.c.tags | 8 ++--- tests/ctags/bug1563476.cpp.tags | 1 - tests/ctags/bug1575055.cpp.tags | 2 +- tests/ctags/bug1585745.cpp.tags | 8 ++--- tests/ctags/bug1764143.h.tags | 4 +-- tests/ctags/bug1770479.cpp.tags | 2 +- tests/ctags/bug1773926.cpp.tags | 2 +- tests/ctags/bug1799340.cpp.tags | 6 ++-- tests/ctags/bug1907083.cpp.tags | 8 ++--- tests/ctags/bug1924919.cpp.tags | 4 +-- tests/ctags/bug507864.c.tags | 6 ++-- tests/ctags/bug556645.c.tags | 3 +- tests/ctags/bug556646.c.tags | 42 +++++++++++----------- tests/ctags/bug639639.h.tags | 6 ++-- tests/ctags/bug639644.hpp.tags | 4 +-- tests/ctags/c-digraphs.c.tags | 6 ++-- tests/ctags/c-trigraphs.c.tags | 6 ++-- tests/ctags/cpp_destructor.cpp.tags | 6 ++-- tests/ctags/cxx11-final.cpp.tags | 6 ++-- tests/ctags/cxx11-noexcept.cpp.tags | 4 +-- tests/ctags/cxx11-override.cpp.tags | 6 ++-- tests/ctags/cxx11enum.cpp.tags | 10 +++--- tests/ctags/cxx14-combined.cpp.tags | 6 ++-- tests/ctags/extern_variable.h.tags | 2 -- tests/ctags/func_typedef.h.tags | 2 +- tests/ctags/local.c.tags | 2 +- tests/ctags/macros.c.tags | 4 +-- tests/ctags/namespace.cpp.tags | 4 +-- tests/ctags/process_order.c.tags | 12 +++---- tests/ctags/prototype.h.tags | 2 +- tests/ctags/signature.cpp.tags | 9 ++--- tests/ctags/static_array.c.tags | 2 +- tests/ctags/var-and-return-type.cpp.tags | 6 ++-- 37 files changed, 132 insertions(+), 135 deletions(-) diff --git a/tests/ctags/backslashes.c.tags b/tests/ctags/backslashes.c.tags index affce869c4..22c0a7d9ff 100644 --- a/tests/ctags/backslashes.c.tags +++ b/tests/ctags/backslashes.c.tags @@ -3,10 +3,10 @@ MACRO1 MACRO2Ì131072Í(x)Ö0 MACRO3Ì131072Í(y)Ö0 TÌ4096Ö0Ïint -func1Ì1024Í(int var1, int var2, ...)Ö0Ïint -func2Ì1024Í(int var1, int var2, ...)Ö0Ïint -func3Ì1024Í(T var1, T var2, ...)Ö0Ïint -func4Ì1024Í(T var1, T var2, ...)Ö0Ïint -func5Ì1024Í(int var1, int var2, ...)Ö0Ïint +func1Ì1024Í(int var1,int var2,...)Ö0Ïint +func2Ì1024Í(int var1,int var2,...)Ö0Ïint +func3Ì1024Í(T var1,T var2,...)Ö0Ïint +func4Ì1024Í(T var1,T var2,...)Ö0Ïint +func5Ì1024Í(int var1,int var2,...)Ö0Ïint func6Ì1024Í(void)Ö0Ïint func7Ì1024Í(int a)Ö0Ïint diff --git a/tests/ctags/bit_field.c.tags b/tests/ctags/bit_field.c.tags index f002d6aea0..a2062d24be 100644 --- a/tests/ctags/bit_field.c.tags +++ b/tests/ctags/bit_field.c.tags @@ -1,26 +1,26 @@ # format=tagmanager -aÌ64Îbit_fieldsÖ0Ïunsigned int -anon_struct_0Ì2048Ö0 -anon_struct_1Ì2048Ö0 -anon_struct_2Ì2048Ö0 -bÌ64Îbit_fieldsÖ0Ïunsigned int -bad2Ì64Îanon_struct_1Ö0ÏBYTE +__anon1Ì2048Ö0 +__anon2Ì2048Ö0 +__anon3Ì2048Ö0 +aÌ64Îbit_fieldsÖ0Ïunsigned int:1 +bÌ64Îbit_fieldsÖ0Ïunsigned int:1 +bad2Ì64Î__anon2Ö0ÏBYTE:1 bit_fieldsÌ2048Ö0 -bitfield_flagsÌ4096Ö0Ïanon_struct_1 -cÌ64Îbit_fieldsÖ0Ïunsigned int -expÌ64Îanon_struct_0Ö0 -frac0Ì64Îanon_struct_0Ö0 -frac1Ì64Îanon_struct_0Ö0 -groupÌ64Îanon_struct_1Ö0ÏBYTE -lowerÌ64Îshortname_infoÖ0Ïunsigned char -mystructÌ4096Ö0Ïanon_struct_2 -personalÌ64Îanon_struct_1Ö0ÏBYTE -privateÌ64Îanon_struct_2Ö0ÏBYTE -publicÌ64Îanon_struct_1Ö0ÏBYTE -publicÌ64Îanon_struct_2Ö0ÏBYTE +bitfield_flagsÌ4096Ö0Ï__anon2 +cÌ64Îbit_fieldsÖ0Ïunsigned int:2 +expÌ64Î__anon1Ö0Ïunsigned +frac0Ì64Î__anon1Ö0Ïunsigned +frac1Ì64Î__anon1Ö0Ïunsigned +groupÌ64Î__anon2Ö0ÏBYTE:1 +lowerÌ64Îshortname_infoÖ0Ïunsigned char:1 +mystructÌ4096Ö0Ï__anon3 +personalÌ64Î__anon2Ö0ÏBYTE:1 +privateÌ64Î__anon3Ö0ÏBYTE +publicÌ64Î__anon2Ö0ÏBYTE:1 +publicÌ64Î__anon3Ö0ÏBYTE shortname_infoÌ2048Ö0 -signÌ64Îanon_struct_0Ö0 -thatÌ64Îanon_struct_2Ö0ÏBYTE -thisÌ64Îanon_struct_2Ö0ÏBYTE -upperÌ64Îshortname_infoÖ0Ïchar -validÌ64Îshortname_infoÖ0Ïchar +signÌ64Î__anon1Ö0Ïunsigned:1 +thatÌ64Î__anon3Ö0ÏBYTE +thisÌ64Î__anon3Ö0ÏBYTE +upperÌ64Îshortname_infoÖ0Ïunsigned char:1 +validÌ64Îshortname_infoÖ0Ïunsigned char:1 diff --git a/tests/ctags/bug1201689.c.tags b/tests/ctags/bug1201689.c.tags index 8c64bd66a9..9413edf027 100644 --- a/tests/ctags/bug1201689.c.tags +++ b/tests/ctags/bug1201689.c.tags @@ -1,2 +1,2 @@ # format=tagmanager -aÌ16384Ö0Ïchar +testÌ16Í(a,...)Ö0 diff --git a/tests/ctags/bug1466117.c.tags b/tests/ctags/bug1466117.c.tags index 1b124611e9..960bd3b7be 100644 --- a/tests/ctags/bug1466117.c.tags +++ b/tests/ctags/bug1466117.c.tags @@ -1,8 +1,8 @@ # format=tagmanager -aÌ64Îanon_struct_0Ö0Ïint +__anon1Ì2048Ö0 +aÌ64Î__anon1Ö0Ïint aÌ64ÎmystructÖ0Ïint -anon_struct_0Ì2048Ö0 -bÌ64Îanon_struct_0Ö0Ïint +bÌ64Î__anon1Ö0Ïint bÌ64ÎmystructÖ0Ïint mystructÌ2048Ö0 -mystructÌ4096Ö0Ïanon_struct_0 +mystructÌ4096Ö0Ï__anon1 diff --git a/tests/ctags/bug1491666.c.tags b/tests/ctags/bug1491666.c.tags index f259c95d42..697a27a9b1 100644 --- a/tests/ctags/bug1491666.c.tags +++ b/tests/ctags/bug1491666.c.tags @@ -1,6 +1,6 @@ # format=tagmanager -anon_struct_0Ì2048Ö0 +__anon1Ì2048Ö0 mainÌ16Í(void)Ö0Ïvoid -my_structÌ4096Ö0Ïanon_struct_0 -xÌ64Îanon_struct_0Ö0Ïint -yÌ64Îanon_struct_0Ö0Ïfloat +my_structÌ4096Ö0Ï__anon1 +xÌ64Î__anon1Ö0Ïint +yÌ64Î__anon1Ö0Ïfloat diff --git a/tests/ctags/bug1563476.cpp.tags b/tests/ctags/bug1563476.cpp.tags index cf6b91b304..cf7b55512c 100644 --- a/tests/ctags/bug1563476.cpp.tags +++ b/tests/ctags/bug1563476.cpp.tags @@ -1,5 +1,4 @@ # format=tagmanager IntroduceBitDefÌ2048Ö0 -IntroduceBitDefÌ32768Ö0 fÌ16Í()ÎIntroduceBitDefÖ0Ïint gÌ16Í()Ö0Ïint diff --git a/tests/ctags/bug1575055.cpp.tags b/tests/ctags/bug1575055.cpp.tags index 6fc9e49237..5b4bf14095 100644 --- a/tests/ctags/bug1575055.cpp.tags +++ b/tests/ctags/bug1575055.cpp.tags @@ -1,4 +1,4 @@ # format=tagmanager MyClassÌ1ÎTheNamespaceÖ0 TheNamespaceÌ256Ö0 -variableÌ64ÎTheNamespaceÖ0Ïint +variableÌ16384ÎTheNamespaceÖ0Ïint diff --git a/tests/ctags/bug1585745.cpp.tags b/tests/ctags/bug1585745.cpp.tags index db80c8b978..d7ccb059af 100644 --- a/tests/ctags/bug1585745.cpp.tags +++ b/tests/ctags/bug1585745.cpp.tags @@ -1,7 +1,7 @@ # format=tagmanager Class5Ì1Ö0 -~Class1Ì16Í()ÎClass1Ö0ÏClass1 -~Class2Ì16Í()ÎClass2Ö0ÏClass2 -~Class3Ì16Í()ÎClass3Ö0ÏClass3 -~Class4Ì16Í()ÎClass4Ö0ÏClass4 +~Class1Ì16Í()ÎClass1Ö0 +~Class2Ì16Í()ÎClass2Ö0 +~Class3Ì16Í()ÎClass3Ö0 +~Class4Ì16Í()ÎClass4Ö0 ~Class5Ì16Í()ÎClass5Ö0 diff --git a/tests/ctags/bug1764143.h.tags b/tests/ctags/bug1764143.h.tags index 77e6753f55..c9e481e185 100644 --- a/tests/ctags/bug1764143.h.tags +++ b/tests/ctags/bug1764143.h.tags @@ -1,3 +1,3 @@ # format=tagmanager -arch_resetÌ16Í(char mode)Ö0Ïinline void -omap1_arch_resetÌ16Í(char mode)Ö0Ïinline void +arch_resetÌ16Í(char mode)Ö0Ïvoid +omap1_arch_resetÌ16Í(char mode)Ö0Ïvoid diff --git a/tests/ctags/bug1770479.cpp.tags b/tests/ctags/bug1770479.cpp.tags index d56650ec93..95da9c7f2d 100644 --- a/tests/ctags/bug1770479.cpp.tags +++ b/tests/ctags/bug1770479.cpp.tags @@ -1,3 +1,3 @@ # format=tagmanager fooÌ16Í(int i)Ö0Ïint -mainÌ16Í(int argc, char **argv)Ö0Ïint +mainÌ16Í(int argc,char ** argv)Ö0Ïint diff --git a/tests/ctags/bug1773926.cpp.tags b/tests/ctags/bug1773926.cpp.tags index 2a1d279a5b..efd1843262 100644 --- a/tests/ctags/bug1773926.cpp.tags +++ b/tests/ctags/bug1773926.cpp.tags @@ -2,4 +2,4 @@ ERROR_HAPPENEDÌ65536Ö0 NEXT_DEFINEÌ65536Ö0 OKÌ65536Ö0 -mainÌ16Í(int argc, char* argv[])Ö0Ïint +mainÌ16Í(int argc,char * argv[])Ö0Ïint diff --git a/tests/ctags/bug1799340.cpp.tags b/tests/ctags/bug1799340.cpp.tags index feb5230c6c..630cc2b2b4 100644 --- a/tests/ctags/bug1799340.cpp.tags +++ b/tests/ctags/bug1799340.cpp.tags @@ -1,4 +1,4 @@ # format=tagmanager -f1Ì16Í()Ö0Ïstd::string -f2Ì16Í()Ö0Ïconst std::string -f3Ì16Í()ÎstdÖ0Ïstd::string const +f1Ì16Í()Ö0Ïstd::string & +f2Ì16Í()Ö0Ïconst std::string & +f3Ì16Í()Ö0Ïstd::string const & diff --git a/tests/ctags/bug1907083.cpp.tags b/tests/ctags/bug1907083.cpp.tags index 9a40eb9a9d..01ac699240 100644 --- a/tests/ctags/bug1907083.cpp.tags +++ b/tests/ctags/bug1907083.cpp.tags @@ -1,5 +1,5 @@ # format=tagmanager -m1Ì16Í()ÎC::CÖ0ÏC::T * -m2Ì16Í()ÎC::CÖ0ÏC::T *const -m3Ì16Í()ÎC::CÖ0ÏC::T const * -m4Ì16Í()ÎC::CÖ0ÏC::T const *const +m1Ì16Í()ÎCÖ0ÏC::T * +m2Ì16Í()ÎCÖ0ÏC::T * const +m3Ì16Í()ÎCÖ0ÏC::T const * +m4Ì16Í()ÎCÖ0ÏC::T const * const diff --git a/tests/ctags/bug1924919.cpp.tags b/tests/ctags/bug1924919.cpp.tags index 455704b573..0bff14908e 100644 --- a/tests/ctags/bug1924919.cpp.tags +++ b/tests/ctags/bug1924919.cpp.tags @@ -1,4 +1,4 @@ # format=tagmanager -MajorVersionÌ64ÎmudÖ0Ïstd::string -MinorVersionÌ64ÎmudÖ0Ïstd::string +MajorVersionÌ16384ÎmudÖ0Ïstd::string +MinorVersionÌ16384ÎmudÖ0Ïstd::string mudÌ256Ö0 diff --git a/tests/ctags/bug507864.c.tags b/tests/ctags/bug507864.c.tags index 65041ccb1b..643a694e9f 100644 --- a/tests/ctags/bug507864.c.tags +++ b/tests/ctags/bug507864.c.tags @@ -1,5 +1,3 @@ # format=tagmanager -ENTSEQNOÌ16Í(seq)Ö0ÏFUNCSTS -MEMTXTÌ16Í(mail)Ö0Ï -MEMTXTÌ1024Í(form_msg)Ö0ÏFUNCSTS -MEMTXTÌ1024Í(text)Ö0Ï +func1Ì16Í(ENTSEQNO (seq))Ö0ÏFUNCSTS +func2Ì16Í(MEMTXT (form_msg),MEMTXT (text),MEMTXT (mail))Ö0ÏFUNCSTS diff --git a/tests/ctags/bug556645.c.tags b/tests/ctags/bug556645.c.tags index 313026dd97..88666be6f7 100644 --- a/tests/ctags/bug556645.c.tags +++ b/tests/ctags/bug556645.c.tags @@ -1,2 +1,3 @@ # format=tagmanager -A1Ì131072Í(_type, _length)Ö0 +A1Ì131072Í(_type,_length)Ö0 +QtypeÌ4096Ö0 diff --git a/tests/ctags/bug556646.c.tags b/tests/ctags/bug556646.c.tags index 852eab876b..18b18bd30a 100644 --- a/tests/ctags/bug556646.c.tags +++ b/tests/ctags/bug556646.c.tags @@ -1,22 +1,22 @@ # format=tagmanager -AÌ4Îanon_enum_0Ö0 -INDX_C1Ì4Îanon_enum_0Ö0 -INDX_C2Ì4Îanon_enum_0Ö0 -INDX_IM1Ì4Îanon_enum_0Ö0 -INDX_IM2Ì4Îanon_enum_0Ö0 -INDX_LÌ4Îanon_enum_0Ö0 -INDX_L2Ì4Îanon_enum_0Ö0 -INDX_MÌ4Îanon_enum_0Ö0 -INDX_NILÌ4Îanon_enum_0Ö0 -INDX_PÌ4Îanon_enum_0Ö0 -INDX_RÌ4Îanon_enum_0Ö0 -INDX_R2Ì4Îanon_enum_0Ö0 -INDX_SÌ4Îanon_enum_0Ö0 -INDX_S1Ì4Îanon_enum_0Ö0 -INDX_S2Ì4Îanon_enum_0Ö0 -INDX_S3Ì4Îanon_enum_0Ö0 -INDX_S4Ì4Îanon_enum_0Ö0 -INDX_TÌ4Îanon_enum_0Ö0 -INDX_T2Ì4Îanon_enum_0Ö0 -anon_enum_0Ì2Ö0 -task_indx_typeÌ4096Ö0Ïanon_enum_0 +AÌ4Î__anon1Ö0 +INDX_C1Ì4Î__anon1Ö0 +INDX_C2Ì4Î__anon1Ö0 +INDX_IM1Ì4Î__anon1Ö0 +INDX_IM2Ì4Î__anon1Ö0 +INDX_LÌ4Î__anon1Ö0 +INDX_L2Ì4Î__anon1Ö0 +INDX_MÌ4Î__anon1Ö0 +INDX_NILÌ4Î__anon1Ö0 +INDX_PÌ4Î__anon1Ö0 +INDX_RÌ4Î__anon1Ö0 +INDX_R2Ì4Î__anon1Ö0 +INDX_SÌ4Î__anon1Ö0 +INDX_S1Ì4Î__anon1Ö0 +INDX_S2Ì4Î__anon1Ö0 +INDX_S3Ì4Î__anon1Ö0 +INDX_S4Ì4Î__anon1Ö0 +INDX_TÌ4Î__anon1Ö0 +INDX_T2Ì4Î__anon1Ö0 +__anon1Ì2Ö0 +task_indx_typeÌ4096Ö0Ï__anon1 diff --git a/tests/ctags/bug639639.h.tags b/tests/ctags/bug639639.h.tags index 1ddb73f268..1a0526521a 100644 --- a/tests/ctags/bug639639.h.tags +++ b/tests/ctags/bug639639.h.tags @@ -1,5 +1,5 @@ # format=tagmanager Namespace1Ì256Ö0 -anon2Ì4Îanon_enum_0Ö0 -anon_enum_0Ì2Ö0 -function2Ì1024Í(char* str)ÎNamespace1Ö0Ïint +__anon1Ì2Ö0 +anon2Ì4Î__anon1Ö0 +function2Ì1024Í(char * str)ÎNamespace1Ö0Ïint diff --git a/tests/ctags/bug639644.hpp.tags b/tests/ctags/bug639644.hpp.tags index abb8446a89..476a83e3dd 100644 --- a/tests/ctags/bug639644.hpp.tags +++ b/tests/ctags/bug639644.hpp.tags @@ -1,3 +1,3 @@ # format=tagmanager -anon_namespace_0Ì256Ö0 -fooÌ64Îanon_namespace_0Ö0Ïint +__anon1Ì256Ö0 +fooÌ16384Î__anon1Ö0Ïint diff --git a/tests/ctags/c-digraphs.c.tags b/tests/ctags/c-digraphs.c.tags index 7ab527c5ed..4233f3639d 100644 --- a/tests/ctags/c-digraphs.c.tags +++ b/tests/ctags/c-digraphs.c.tags @@ -1,12 +1,12 @@ # format=tagmanager AÌ65536Ö0 BÌ65536Ö0 -M3_INITÌ131072Í(a, b, c)Ö0 +M3_INITÌ131072Í(a,b,c)Ö0 STRINGIFYÌ131072Í(x)Ö0 STRINGIFY_INTERNÌ131072Í(x)Ö0 bufÌ64ÎstrÖ0Ïchar * lenÌ64ÎstrÖ0Ïunsigned int mainÌ16Í(void)Ö0Ïint -matrix3Ì4096Ö0Ïint -sizeÌ64ÎstrÖ0Ïint +matrix3Ì4096Ö0Ïint[3] +sizeÌ64ÎstrÖ0Ïunsigned int strÌ2048Ö0 diff --git a/tests/ctags/c-trigraphs.c.tags b/tests/ctags/c-trigraphs.c.tags index d5f0f703b6..69d54ce8d7 100644 --- a/tests/ctags/c-trigraphs.c.tags +++ b/tests/ctags/c-trigraphs.c.tags @@ -4,12 +4,12 @@ B DÌ65536Ö0 EÌ65536Ö0 FÌ65536Ö0 -M3_INITÌ131072Í(a, b, c)Ö0 +M3_INITÌ131072Í(a,b,c)Ö0 STRINGIFYÌ131072Í(x)Ö0 STRINGIFY_INTERNÌ131072Í(x)Ö0 bufÌ64ÎstrÖ0Ïchar * lenÌ64ÎstrÖ0Ïunsigned int mainÌ16Í(void)Ö0Ïint -matrix3Ì4096Ö0Ïint -sizeÌ64ÎstrÖ0Ïint +matrix3Ì4096Ö0Ïint[3] +sizeÌ64ÎstrÖ0Ïunsigned int strÌ2048Ö0 diff --git a/tests/ctags/cpp_destructor.cpp.tags b/tests/ctags/cpp_destructor.cpp.tags index 29b3aba363..17574a8220 100644 --- a/tests/ctags/cpp_destructor.cpp.tags +++ b/tests/ctags/cpp_destructor.cpp.tags @@ -1,4 +1,4 @@ # format=tagmanager -~AÌ16Í()ÎAÖ0ÏA -~BÌ16Í()ÎBÖ0ÏB -~CÌ16Í()ÎCÖ0ÏC +~AÌ16Í()ÎAÖ0 +~BÌ16Í()ÎBÖ0 +~CÌ16Í()ÎCÖ0 diff --git a/tests/ctags/cxx11-final.cpp.tags b/tests/ctags/cxx11-final.cpp.tags index e82e4f1bcb..05e7f56a3e 100644 --- a/tests/ctags/cxx11-final.cpp.tags +++ b/tests/ctags/cxx11-final.cpp.tags @@ -2,8 +2,8 @@ BaseÌ1Ö0 DerivedÌ1Ö0 finalÌ16Í()ÎDerivedÖ0Ïvoid -finalÌ1024Í()ÎDerivedÖ0Ïvirtual void +finalÌ1024Í()ÎDerivedÖ0Ïvoid fooÌ16Í()ÎBaseÖ0Ïvoid fooÌ16Í()ÎDerivedÖ0Ïvoid -fooÌ1024Í()ÎBaseÖ0Ïvirtual void -fooÌ1024Í()ÎDerivedÖ0Ïvirtual void +fooÌ1024Í()ÎBaseÖ0Ïvoid +fooÌ1024Í()ÎDerivedÖ0Ïvoid diff --git a/tests/ctags/cxx11-noexcept.cpp.tags b/tests/ctags/cxx11-noexcept.cpp.tags index 1b32b9e814..567e580bfc 100644 --- a/tests/ctags/cxx11-noexcept.cpp.tags +++ b/tests/ctags/cxx11-noexcept.cpp.tags @@ -1,5 +1,5 @@ # format=tagmanager BaseÌ1Ö0 -barÌ1024Í()ÎBaseÖ0Ïvirtual void +barÌ1024Í() constÎBaseÖ0Ïvoid bazÌ16Í()ÎBaseÖ0Ïint -fooÌ1024Í()ÎBaseÖ0Ïvirtual void +fooÌ1024Í()ÎBaseÖ0Ïvoid diff --git a/tests/ctags/cxx11-override.cpp.tags b/tests/ctags/cxx11-override.cpp.tags index 1bdac7f6d8..e3b8c07b10 100644 --- a/tests/ctags/cxx11-override.cpp.tags +++ b/tests/ctags/cxx11-override.cpp.tags @@ -3,7 +3,7 @@ Base DerivedÌ1Ö0 fooÌ16Í()ÎBaseÖ0Ïvoid fooÌ16Í()ÎDerivedÖ0Ïvoid -fooÌ1024Í()ÎBaseÖ0Ïvirtual void -fooÌ1024Í()ÎDerivedÖ0Ïvirtual void +fooÌ1024Í()ÎBaseÖ0Ïvoid +fooÌ1024Í()ÎDerivedÖ0Ïvoid overrideÌ16Í()ÎDerivedÖ0Ïvoid -overrideÌ1024Í()ÎDerivedÖ0Ïvirtual void +overrideÌ1024Í()ÎDerivedÖ0Ïvoid diff --git a/tests/ctags/cxx11enum.cpp.tags b/tests/ctags/cxx11enum.cpp.tags index 6477d7b7a6..06d58525ce 100644 --- a/tests/ctags/cxx11enum.cpp.tags +++ b/tests/ctags/cxx11enum.cpp.tags @@ -1,19 +1,19 @@ # format=tagmanager -AÌ2Ö0 +AÌ2Ö0Ïint A_aÌ4ÎAÖ0 A_bÌ4ÎAÖ0 A_cÌ4ÎAÖ0 -BÌ2Ö0 +BÌ2Ö0Ïlong B_aÌ4ÎBÖ0 B_bÌ4ÎBÖ0 B_cÌ4ÎBÖ0 -CÌ2Ö0 +CÌ2Ö0Ïunsigned int C_aÌ4ÎCÖ0 C_bÌ4ÎCÖ0 C_cÌ4ÎCÖ0 -DÌ2ÎFooÖ0 +DÌ2ÎFooÖ0Ïint FooÌ1Ö0 aÌ4ÎFoo::DÖ0 bÌ4ÎFoo::DÖ0 cÌ4ÎFoo::DÖ0 -fooÌ1024Í(enum D a)ÎFooÖ0Ïvirtual void +fooÌ1024Í(enum D a)ÎFooÖ0Ïvoid diff --git a/tests/ctags/cxx14-combined.cpp.tags b/tests/ctags/cxx14-combined.cpp.tags index 9852e8571d..a110932d87 100644 --- a/tests/ctags/cxx14-combined.cpp.tags +++ b/tests/ctags/cxx14-combined.cpp.tags @@ -1,6 +1,6 @@ # format=tagmanager BaseÌ2048Ö0 FooÌ2048Ö0 -barÌ16Í()ÎFooÖ0Ïconstexpr -bazÌ1024Í()ÎBaseÖ0Ïvirtual void -bazÌ1024Í()ÎFooÖ0Ïvirtual void +barÌ16Í()ÎFooÖ0Ïauto +bazÌ1024Í() constÎBaseÖ0Ïvoid +bazÌ1024Í() constÎFooÖ0Ïvoid diff --git a/tests/ctags/extern_variable.h.tags b/tests/ctags/extern_variable.h.tags index 1636b6427f..8a236597be 100644 --- a/tests/ctags/extern_variable.h.tags +++ b/tests/ctags/extern_variable.h.tags @@ -1,5 +1,3 @@ # format=tagmanager -CÌ32768Ö0 -SÌ32768Ö0 aÌ32768Ö0Ïint bÌ32768Ö0ÏB diff --git a/tests/ctags/func_typedef.h.tags b/tests/ctags/func_typedef.h.tags index 29ab4917c4..8d59a48f2a 100644 --- a/tests/ctags/func_typedef.h.tags +++ b/tests/ctags/func_typedef.h.tags @@ -1,2 +1,2 @@ # format=tagmanager -SLIST_HEADÌ4096Ö0 +symlist_tÌ4096Ö0 diff --git a/tests/ctags/local.c.tags b/tests/ctags/local.c.tags index 05eb015b91..bd9faa665e 100644 --- a/tests/ctags/local.c.tags +++ b/tests/ctags/local.c.tags @@ -1,3 +1,3 @@ # format=tagmanager -isContextualKeywordÌ16Í(const tokenInfo *const token)Ö0Ïboolean +isContextualKeywordÌ16Í(const tokenInfo * const token)Ö0Ïboolean mainÌ16Í()Ö0 diff --git a/tests/ctags/macros.c.tags b/tests/ctags/macros.c.tags index af4b151847..6a7bc48552 100644 --- a/tests/ctags/macros.c.tags +++ b/tests/ctags/macros.c.tags @@ -1,6 +1,6 @@ # format=tagmanager FUNCTION_LIKEÌ131072Í(a,b)Ö0 +MACROÌ1024Í(foo)Ö0 VARIABLE_LIKEÌ65536Ö0 WeakSymbolÌ65536Ö0 -fooÌ16384Ö0ÏMACRO -prototypeÌ1024Í((int arg1, void *arg2))Ö0Ïvoid +prototypeÌ1024Í(int arg1,void * arg2)Ö0Ïvoid diff --git a/tests/ctags/namespace.cpp.tags b/tests/ctags/namespace.cpp.tags index 2697889856..7bee2d66a0 100644 --- a/tests/ctags/namespace.cpp.tags +++ b/tests/ctags/namespace.cpp.tags @@ -1,7 +1,7 @@ # format=tagmanager +__anon1Ì256Ö0 aÌ256Ö0 a_b_fÌ16Í()Îa::bÖ0Ïvoid a_fÌ16Í()ÎaÖ0Ïvoid -anon_fÌ16Í()Îanon_namespace_0Ö0Ïvoid -anon_namespace_0Ì256Ö0 +anon_fÌ16Í()Î__anon1Ö0Ïvoid bÌ256ÎaÖ0 diff --git a/tests/ctags/process_order.c.tags b/tests/ctags/process_order.c.tags index 91b317c16d..771640cb46 100644 --- a/tests/ctags/process_order.c.tags +++ b/tests/ctags/process_order.c.tags @@ -1,7 +1,7 @@ # format=tagmanager -I1_E1Ì4Îanon_enum_0Ö0 -I1_E2Ì4Îanon_enum_0Ö0 -I2_E1Ì4Îanon_enum_1Ö0 -I2_E2Ì4Îanon_enum_1Ö0 -anon_enum_0Ì2Ö0 -anon_enum_1Ì2Ö0 +I1_E1Ì4Î__anon1Ö0 +I1_E2Ì4Î__anon1Ö0 +I2_E1Ì4Î__anon2Ö0 +I2_E2Ì4Î__anon2Ö0 +__anon1Ì2Ö0 +__anon2Ì2Ö0 diff --git a/tests/ctags/prototype.h.tags b/tests/ctags/prototype.h.tags index a0387c6804..d6a14831d1 100644 --- a/tests/ctags/prototype.h.tags +++ b/tests/ctags/prototype.h.tags @@ -1,3 +1,3 @@ # format=tagmanager -prototype_aÌ1024Í(int a, char *b)Ö0Ïint +prototype_aÌ1024Í(int a,char * b)Ö0Ïint prototype_bÌ1024Í(void)Ö0Ïvoid diff --git a/tests/ctags/signature.cpp.tags b/tests/ctags/signature.cpp.tags index 0614ffa8fe..5ca708e202 100644 --- a/tests/ctags/signature.cpp.tags +++ b/tests/ctags/signature.cpp.tags @@ -1,5 +1,6 @@ # format=tagmanager -barÌ16Í(a, b)Ö0Ïint -barÌ16Í(char *c, double d[])ÎBARÖ0Ïchar * -fooÌ16Í(int a, char b)Ö0Ïvoid -foobarÌ1024Í((int a, char b))Ö0Ïvoid +bÌ16384Ö0Ïchar +barÌ16Í(char * c,double d[]) constÎBARÖ0Ïchar * +barÌ1024Í(a,b)Ö0Ïint +fooÌ16Í(int a,char b)Ö0Ïvoid +foobarÌ1024Í(int a,char b)Ö0Ïvoid diff --git a/tests/ctags/static_array.c.tags b/tests/ctags/static_array.c.tags index a6ebaa0ab5..36a7eb8908 100644 --- a/tests/ctags/static_array.c.tags +++ b/tests/ctags/static_array.c.tags @@ -1,2 +1,2 @@ # format=tagmanager -charset2uniÌ16384Ö0Ïwchar_t +charset2uniÌ16384Ö0Ïwchar_t[256] diff --git a/tests/ctags/var-and-return-type.cpp.tags b/tests/ctags/var-and-return-type.cpp.tags index 317e7682f2..7ca9ec68ee 100644 --- a/tests/ctags/var-and-return-type.cpp.tags +++ b/tests/ctags/var-and-return-type.cpp.tags @@ -1,18 +1,18 @@ # format=tagmanager func1Ì1024Í()Ö0Ïconst volatile unsigned int -func2Ì1024Í()Ö0Ïtype1 +func2Ì1024Í()Ö0Ïconst struct type1 func3Ì1024Í()Ö0Ïconst type1_t func4Ì1024Í()Ö0Ïtype1 func5Ì1024Í()Ö0Ïtype1_t func6Ì1024Í()Ö0Ïconst std::string func7Ì1024Í()Ö0Ïstd::string memb1Ì64Îtype1Ö0Ïunsigned int -nextÌ64Îtype1Ö0Ïtype1 +nextÌ64Îtype1Ö0Ïtype1 * type1Ì2048Ö0 type1_tÌ4096Ö0Ïtype1 type2_tÌ4096Ö0Ïunsigned long int var1Ì16384Ö0Ïconst volatile unsigned int -var2Ì16384Ö0Ïtype1 +var2Ì16384Ö0Ïconst struct type1 var3Ì16384Ö0Ïconst type1_t var4Ì16384Ö0Ïtype1 var5Ì16384Ö0Ïtype1_t From 9d6c7a6ab14fe7dd643c3694c2c65305d4139a2c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ji=C5=99=C3=AD=20Techet?= Date: Mon, 29 Nov 2021 10:22:10 +0100 Subject: [PATCH 6/9] Eliminate console warning for cxx 'using' tags Fix from https://github.com/universal-ctags/ctags/commit/fb305d8814c4dc53a94fcbc5f0c0d2d701f176fa --- ctags/parsers/cxx/cxx_tag.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/ctags/parsers/cxx/cxx_tag.c b/ctags/parsers/cxx/cxx_tag.c index c579408f6f..40275da659 100644 --- a/ctags/parsers/cxx/cxx_tag.c +++ b/ctags/parsers/cxx/cxx_tag.c @@ -74,8 +74,7 @@ static kindDefinition g_aCXXCPPKinds [] = { { true, 'n', "namespace", "namespaces" }, { false, 'A', "alias", "namespace aliases" }, { false, 'N', "name", "names imported via using scope::symbol" }, - { false, 'U', "using", "using namespace statements", - .referenceOnly = true }, + { false, 'U', "using", "using namespace statements" }, { false, 'Z', "tparam", "template parameters" }, }; From 3bf1c5f390ce66ad02b1a32da6ed50cc939235ee Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ji=C5=99=C3=AD=20Techet?= Date: Tue, 30 Nov 2021 00:33:33 +0100 Subject: [PATCH 7/9] Update update-ctags.py to also update the cxx parser --- scripts/update-ctags.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/scripts/update-ctags.py b/scripts/update-ctags.py index d9f27dfdec..a575dc59c5 100755 --- a/scripts/update-ctags.py +++ b/scripts/update-ctags.py @@ -15,11 +15,20 @@ os.chdir(dstdir + '/parsers') parser_dst_files = glob.glob('*.c') + glob.glob('*.h') parser_dst_files = list(filter(lambda x: not x.startswith('geany_'), parser_dst_files)) +cxx_parser_dst_files = glob.glob('cxx/*.c') + glob.glob('cxx/*.h') +for f in cxx_parser_dst_files: + os.remove(f) + os.chdir(srcdir + '/parsers') print('Copying parsers... ({} files)'.format(len(parser_dst_files))) for f in parser_dst_files: shutil.copy(f, dstdir + '/parsers') +cxx_parser_src_files = glob.glob('cxx/*.c') + glob.glob('cxx/*.h') +print('Copying cxx parser files... ({} files)'.format(len(cxx_parser_src_files))) +for f in cxx_parser_src_files: + shutil.copy(f, dstdir + '/parsers/cxx') + print('Copying dsl files...') for f in ['dsl/es.c', 'dsl/es.h', 'dsl/optscript.c', 'dsl/optscript.h']: shutil.copy(srcdir + '/' + f, dstdir + '/' + f) From 8ba9b24d161f19c3aec57d9ada9d2426d9289e60 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ji=C5=99=C3=AD=20Techet?= Date: Sun, 5 Dec 2021 13:47:59 +0100 Subject: [PATCH 8/9] Pass our ignore.tags file with ignored symbols to cxx preprocessor The syntax is slightly different from the previous syntax and is described here: https://docs.ctags.io/en/latest/parser-cxx.html Basic usage should be the same, uctags just doesn't support Geany's wildcard ignores like G_GNUC_*. On the other hand the new parser is much more resilient to macros so there shouldn't be so much need for manual ignores. The original code is still kept for parsers from c.c that still use the old preprocessor. --- src/symbols.c | 14 +++++++++++++- src/tagmanager/tm_ctags.c | 22 ++++++++++++++++++++++ src/tagmanager/tm_ctags.h | 2 ++ 3 files changed, 37 insertions(+), 1 deletion(-) diff --git a/src/symbols.c b/src/symbols.c index b22d4f6bcb..311d668d71 100644 --- a/src/symbols.c +++ b/src/symbols.c @@ -51,6 +51,7 @@ #include "support.h" #include "tm_parser.h" #include "tm_tag.h" +#include "tm_ctags.h" #include "ui_utils.h" #include "utils.h" @@ -116,7 +117,7 @@ symbol_menu; static void load_user_tags(GeanyFiletypeID ft_id); -/* get the tags_ignore list, exported by tagmanager's geany.c */ +/* get the tags_ignore list, exported by geany_lcpp.c */ extern gchar **c_tags_ignore; /* ignore certain tokens when parsing C-like syntax. @@ -128,11 +129,22 @@ static void load_c_ignore_tags(void) if (g_file_get_contents(path, &content, NULL, NULL)) { + gchar **line; + /* historically we ignore the glib _DECLS for tag generation */ SETPTR(content, g_strconcat("G_BEGIN_DECLS G_END_DECLS\n", content, NULL)); g_strfreev(c_tags_ignore); + tm_ctags_clear_ignore_symbols(); + + /* for old c.c parser */ c_tags_ignore = g_strsplit_set(content, " \n\r", -1); + /* for new cxx parser */ + foreach_strv(line, c_tags_ignore) + { + tm_ctags_add_ignore_symbol(*line); + } + g_free(content); } g_free(path); diff --git a/src/tagmanager/tm_ctags.c b/src/tagmanager/tm_ctags.c index b2e2e0a1f4..7dafd70590 100644 --- a/src/tagmanager/tm_ctags.c +++ b/src/tagmanager/tm_ctags.c @@ -19,6 +19,7 @@ #include "trashbox_p.h" #include "writer_p.h" #include "xtag_p.h" +#include "param_p.h" #include @@ -221,6 +222,27 @@ void tm_ctags_init(void) } +void tm_ctags_add_ignore_symbol(const char *value) +{ + langType lang = getNamedLanguage ("CPreProcessor", 0); + gchar *val = g_strdup(value); + + /* make sure we don't enter empty string - passing NULL or "" clears + * the ignore list in ctags */ + val = g_strstrip(val); + if (*val) + applyParameter (lang, "ignore", val); + g_free(val); +} + + +void tm_ctags_clear_ignore_symbols(void) +{ + langType lang = getNamedLanguage ("CPreProcessor", 0); + applyParameter (lang, "ignore", NULL); +} + + void tm_ctags_parse(guchar *buffer, gsize buffer_size, const gchar *file_name, TMParserType language, TMSourceFile *source_file) { diff --git a/src/tagmanager/tm_ctags.h b/src/tagmanager/tm_ctags.h index 217baf36f0..8124bdb3df 100644 --- a/src/tagmanager/tm_ctags.h +++ b/src/tagmanager/tm_ctags.h @@ -18,6 +18,8 @@ G_BEGIN_DECLS #ifdef GEANY_PRIVATE void tm_ctags_init(void); +void tm_ctags_add_ignore_symbol(const char *value); +void tm_ctags_clear_ignore_symbols(void); void tm_ctags_parse(guchar *buffer, gsize buffer_size, const gchar *file_name, TMParserType language, TMSourceFile *source_file); const gchar *tm_ctags_get_lang_name(TMParserType lang); From 3b015051393a095ccd69bdd6e134a0d1d8182b23 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ji=C5=99=C3=AD=20Techet?= Date: Sat, 11 Dec 2021 00:19:35 +0100 Subject: [PATCH 9/9] Update documentation regarding ignore.tags --- doc/geany.txt | 34 +++++++++++++--------------------- 1 file changed, 13 insertions(+), 21 deletions(-) diff --git a/doc/geany.txt b/doc/geany.txt index a156871ee3..65318046ca 100644 --- a/doc/geany.txt +++ b/doc/geany.txt @@ -1717,33 +1717,25 @@ Example:: G_GNUC_NULL_TERMINATED G_GNUC_PRINTF G_GNUC_WARN_UNUSED_RESULT + BAR -This will parse code like: +This will ignore the above macros and will correctly detect 'Foo' as a type +instead of 'BAR' in the following code: -``gchar **utils_strv_new(const gchar *first, ...) -G_GNUC_NULL_TERMINATED;`` +``struct Foo BAR { int i; };`` -More detailed information about ignore.tags usage from the Exuberant Ctags -manual page: +In addition, it is possible to specify macro definition similarly to the +gcc '-D' option: - Specifies a list of identifiers which are to be specially handled - while parsing C and C++ source files. This option is specifically - provided to handle special cases arising through the use of - pre-processor macros. When the identifiers listed are simple identifiers, - these identifiers will be ignored during parsing of the source files. - If an identifier is suffixed with a '+' character, ctags will also - ignore any parenthesis-enclosed argument list which may immediately - follow the identifier in the source files. - If two identifiers are separated with the '=' character, the first - identifiers is replaced by the second identifiers for parsing purposes. + = + Defines a C preprocessor . This emulates the behavior of + the corresponding gcc option. All types of macros are supported, + including the ones with parameters and variable arguments. + Stringification, token pasting and recursive macro expansion are + also supported. For even more detailed information please read the manual page of -Exuberant Ctags. - -Geany extends Ctags with a '*' character suffix - this means use -prefix matching, e.g. G_GNUC_* will match G_GNUC_NULL_TERMINATED, etc. -Note that prefix match items should be put after other items to ensure -that items like G_GNUC_PRINTF+ get parsed correctly. +Universal Ctags. Preferences