diff --git a/ctags/Makefile.am b/ctags/Makefile.am index 6ca10d3fa4..0749f432d3 100644 --- a/ctags/Makefile.am +++ b/ctags/Makefile.am @@ -82,16 +82,17 @@ parsers = \ parsers/php.c \ parsers/powershell.c \ parsers/geany_python.c \ - parsers/geany_r.c \ + parsers/r.c \ + parsers/r.h \ parsers/rst.c \ parsers/ruby.c \ parsers/rust.c \ - parsers/geany_sh.c \ + parsers/sh.c \ parsers/sql.c \ parsers/geany_tcl.c \ parsers/geany_tex.c \ parsers/txt2tags.c \ - parsers/geany_verilog.c \ + parsers/verilog.c \ parsers/geany_vhdl.c # skip cmd.c and mini-geany.c which define main() diff --git a/ctags/parsers/geany_r.c b/ctags/parsers/geany_r.c deleted file mode 100644 index 8da35e5079..0000000000 --- a/ctags/parsers/geany_r.c +++ /dev/null @@ -1,177 +0,0 @@ -/* -* Copyright (c) 2003-2004, Ascher Stefan -* -* This source code is released for free distribution under the terms of the -* GNU General Public License version 2 or (at your option) any later version. -* -* This module contains functions for generating tags for R language files. -* R is a programming language for statistical computing. -* R is GPL Software, get it from http://www.r-project.org/ -*/ - -/* -* INCLUDE FILES -*/ -#include "general.h" /* must always come first */ - -#include -#include /* to define isalpha(), isalnum(), isspace() */ - -#include "debug.h" -#include "entry.h" -#include "parse.h" -#include "read.h" -#include "vstring.h" -#include "routines.h" - - -#define SKIPSPACE(ch) while (isspace((int)*ch)) \ - ch++ - -typedef enum { - K_FUNCTION, - K_LIBRARY, - K_SOURCE, - KIND_COUNT -} rKind; - -static kindDefinition RKinds [KIND_COUNT] = { - {true, 'f', "function", "functions"}, - {true, 'l', "library", "libraries"}, - {true, 's', "source", "sources"}, -}; - -static void makeRTag (const vString * const name, rKind kind) -{ - tagEntryInfo e; - initTagEntry(&e, vStringValue(name), kind); - - Assert (kind < KIND_COUNT); - - makeTagEntry (&e); -} - -static void createRTags (void) -{ - vString *vLine = vStringNew (); - vString *name = vStringNew (); - int ikind; - const unsigned char *line; - - while ((line = readLineFromInputFile ()) != NULL) - { - const unsigned char *cp = (const unsigned char *) line; - - vStringClear (name); - while ((*cp != '\0') && (*cp != '#')) - { - /* iterate to the end of line or to a comment */ - ikind = -1; - switch (*cp) { - case 'l': - case 's': - if (strncasecmp((const char*)cp, "library", (size_t)7) == 0) { - /* load a library: library(tools) */ - cp += 7; - SKIPSPACE(cp); - if (*cp == '(') - ikind = K_LIBRARY; - else - cp -= 7; - } else if (strncasecmp((const char*)cp, "source", (size_t)6) == 0) { - /* load a source file: source("myfile.r") */ - cp += 6; - SKIPSPACE(cp); - if (*cp == '(') - ikind = K_SOURCE; - else - cp -= 6; - } - if (ikind != -1) { - cp++; - - vStringClear(name); - while ((!isspace((int)*cp)) && *cp != '\0' && *cp != ')') { - vStringPut(name, (int)*cp); - cp++; - } - - /* if the string really exists, make a tag of it */ - if (vStringLength(name) > 0) - makeRTag(name, ikind); - - /* prepare for the next iteration */ - vStringClear(name); - } else { - vStringPut(name, (int)*cp); - cp++; - } - break; - case '<': - cp++; - if (*cp == '-') { - /* assignment: ident <- someval */ - cp++; - SKIPSPACE(cp); - - if (*cp == '\0') { - /* not in this line, read next */ - /* sometimes functions are declared this way: - ident <- - function(...) - { - ... - } - I don't know if there is a reason to write the function keyword - in a new line - */ - if ((line = readLineFromInputFile()) != NULL) { - cp = (const unsigned char*)line; - SKIPSPACE(cp); - } - } - - if (strncasecmp((const char*)cp, "function", (size_t)8) == 0) { - /* it's a function: ident <- function(args) */ - cp += 8; - /* if the string really exists, make a tag of it */ - if (vStringLength(name) > 0) - makeRTag(name, K_FUNCTION); - - /* prepare for the next iteration */ - vStringClear(name); - break; - } - } - /* fall through */ - case ' ': - case '\x009': - /* skip whitespace */ - cp++; - break; - default: - /* collect all characters that could be a part of an identifier */ - vStringPut(name, (int)*cp); - cp++; - break; - } - } - } - - vStringDelete (name); - vStringDelete (vLine); -} - -extern parserDefinition *RParser (void) -{ - /* *.r: R files - * *.s;*.q: S files - */ - static const char *const extensions [] = { "r", "s", "q", NULL }; - parserDefinition *const def = parserNew ("R"); - def->kindTable = RKinds; - def->kindCount = ARRAY_SIZE (RKinds); - def->extensions = extensions; - def->parser = createRTags; - return def; -} diff --git a/ctags/parsers/geany_sh.c b/ctags/parsers/geany_sh.c deleted file mode 100644 index 69a8d50cd9..0000000000 --- a/ctags/parsers/geany_sh.c +++ /dev/null @@ -1,110 +0,0 @@ -/* -* Copyright (c) 2000-2002, Darren Hiebert -* -* This source code is released for free distribution under the terms of the -* GNU General Public License version 2 or (at your option) any later version. -* -* This module contains functions for generating tags for scripts for the -* Bourne shell (and its derivatives, the Korn and Z shells). -*/ - -/* -* INCLUDE FILES -*/ -#include "general.h" /* must always come first */ - -#include - -#include "parse.h" -#include "read.h" -#include "routines.h" -#include "vstring.h" -#include "xtag.h" - -/* -* DATA DEFINITIONS -*/ -typedef enum { - K_FUNCTION -} shKind; - -static kindDefinition ShKinds [] = { - { true, 'f', "function", "functions"} -}; - -/* -* FUNCTION DEFINITIONS -*/ - -/* Reject any tag "main" from a file named "configure". These appear in - * here-documents in GNU autoconf scripts and will add a haystack to the - * needle. - */ -static bool hackReject (const vString* const tagName) -{ - const char *const scriptName = baseFilename (getInputFileName ()); - bool result = (bool) (strcmp (scriptName, "configure") == 0 && - strcmp (vStringValue (tagName), "main") == 0); - return result; -} - -static void findShTags (void) -{ - vString *name = vStringNew (); - const unsigned char *line; - - while ((line = readLineFromInputFile ()) != NULL) - { - const unsigned char* cp = line; - bool functionFound = false; - - if (line [0] == '#') - continue; - - while (isspace (*cp)) - cp++; - if (strncmp ((const char*) cp, "function", (size_t) 8) == 0 && - isspace ((int) cp [8])) - { - functionFound = true; - cp += 8; - if (! isspace ((int) *cp)) - continue; - while (isspace ((int) *cp)) - ++cp; - } - if (! (isalnum ((int) *cp) || *cp == '_')) - continue; - while (isalnum ((int) *cp) || *cp == '_') - { - vStringPut (name, (int) *cp); - ++cp; - } - while (isspace ((int) *cp)) - ++cp; - if (*cp++ == '(') - { - while (isspace ((int) *cp)) - ++cp; - if (*cp == ')' && ! hackReject (name)) - functionFound = true; - } - if (functionFound) - makeSimpleTag (name, K_FUNCTION); - vStringClear (name); - } - vStringDelete (name); -} - -extern parserDefinition* ShParser (void) -{ - static const char *const extensions [] = { - "sh", "SH", "bsh", "bash", "ksh", "zsh", "ash", NULL - }; - parserDefinition* def = parserNew ("Sh"); - def->kindTable = ShKinds; - def->kindCount = ARRAY_SIZE (ShKinds); - def->extensions = extensions; - def->parser = findShTags; - return def; -} diff --git a/ctags/parsers/geany_verilog.c b/ctags/parsers/geany_verilog.c deleted file mode 100644 index 6bb71b5c10..0000000000 --- a/ctags/parsers/geany_verilog.c +++ /dev/null @@ -1,332 +0,0 @@ -/* -* Copyright (c) 2003, Darren Hiebert -* -* This source code is released for free distribution under the terms of the -* GNU General Public License version 2 or (at your option) any later version. -* -* This module contains functions for generating tags for the Verilog HDL -* (Hardware Description Language). -* -* Language definition documents: -* http://www.eg.bucknell.edu/~cs320/verilog/verilog-manual.html -* http://www.sutherland-hdl.com/on-line_ref_guide/vlog_ref_top.html -* http://www.verilog.com/VerilogBNF.html -* http://eesun.free.fr/DOC/VERILOG/verilog_manual1.html -*/ - -/* - * INCLUDE FILES - */ -#include "general.h" /* must always come first */ - -#include -#include - -#include "debug.h" -#include "keyword.h" -#include "parse.h" -#include "read.h" -#include "vstring.h" -#include "geany_lcpp.h" -#include "routines.h" -#include "xtag.h" - -/* - * DATA DECLARATIONS - */ -typedef enum eException { ExceptionNone, ExceptionEOF } exception_t; - -typedef enum { - K_UNDEFINED = -1, - K_CONSTANT, - K_EVENT, - K_FUNCTION, - K_MODULE, - K_NET, - K_PORT, - K_REGISTER, - K_TASK -} verilogKind; - -/* - * DATA DEFINITIONS - */ -static int Ungetc; -static int Lang_verilog; -static jmp_buf Exception; - -static kindDefinition VerilogKinds [] = { - { true, 'c', "constant", "constants (define, parameter, specparam)" }, - { true, 'e', "event", "events" }, - { true, 'f', "function", "functions" }, - { true, 'm', "module", "modules" }, - { true, 'n', "net", "net data types" }, - { true, 'p', "port", "ports" }, - { true, 'r', "register", "register data types" }, - { true, 't', "task", "tasks" } -}; - -static keywordTable VerilogKeywordTable [] = { - { "`define", K_CONSTANT }, - { "event", K_EVENT }, - { "function", K_FUNCTION }, - { "inout", K_PORT }, - { "input", K_PORT }, - { "integer", K_REGISTER }, - { "module", K_MODULE }, - { "output", K_PORT }, - { "parameter", K_CONSTANT }, - { "real", K_REGISTER }, - { "realtime", K_REGISTER }, - { "reg", K_REGISTER }, - { "specparam", K_CONSTANT }, - { "supply0", K_NET }, - { "supply1", K_NET }, - { "task", K_TASK }, - { "time", K_REGISTER }, - { "tri0", K_NET }, - { "tri1", K_NET }, - { "triand", K_NET }, - { "tri", K_NET }, - { "trior", K_NET }, - { "trireg", K_NET }, - { "wand", K_NET }, - { "wire", K_NET }, - { "wor", K_NET } -}; - -/* - * FUNCTION DEFINITIONS - */ - -static void initialize (const langType language) -{ - size_t i; - const size_t count = ARRAY_SIZE (VerilogKeywordTable); - Lang_verilog = language; - for (i = 0 ; i < count ; ++i) - { - const keywordTable* const p = &VerilogKeywordTable [i]; - addKeyword (p->name, language, (int) p->id); - } -} - -static void vUngetc (int c) -{ - Assert (Ungetc == '\0'); - Ungetc = c; -} - -static int vGetc (void) -{ - int c; - if (Ungetc == '\0') - c = getcFromInputFile (); - else - { - c = Ungetc; - Ungetc = '\0'; - } - if (c == '/') - { - int c2 = getcFromInputFile (); - if (c2 == EOF) - longjmp (Exception, (int) ExceptionEOF); - else if (c2 == '/') /* strip comment until end-of-line */ - { - do - c = getcFromInputFile (); - while (c != '\n' && c != EOF); - } - else if (c2 == '*') /* strip block comment */ - { - c = lcppSkipOverCComment(); - } - else - { - ungetcToInputFile (c2); - } - } - else if (c == '"') /* strip string contents */ - { - int c2; - do - c2 = getcFromInputFile (); - while (c2 != '"' && c2 != EOF); - c = '@'; - } - if (c == EOF) - longjmp (Exception, (int) ExceptionEOF); - return c; -} - -static bool isIdentifierCharacter (const int c) -{ - return (bool)(isalnum (c) || c == '_' || c == '`'); -} - -static int skipWhite (int c) -{ - while (isspace (c)) - c = vGetc (); - return c; -} - -static int skipPastMatch (const char *const pair) -{ - const int begin = pair [0], end = pair [1]; - int matchLevel = 1; - int c; - do - { - c = vGetc (); - if (c == begin) - ++matchLevel; - else if (c == end) - --matchLevel; - } - while (matchLevel > 0); - return vGetc (); -} - -static bool readIdentifier (vString *const name, int c) -{ - vStringClear (name); - if (isIdentifierCharacter (c)) - { - while (isIdentifierCharacter (c)) - { - vStringPut (name, c); - c = vGetc (); - } - vUngetc (c); - } - return (bool)(name->length > 0); -} - -static void tagNameList (const verilogKind kind, int c) -{ - vString *name = vStringNew (); - bool repeat; - Assert (isIdentifierCharacter (c)); - do - { - repeat = false; - if (isIdentifierCharacter (c)) - { - readIdentifier (name, c); - makeSimpleTag (name, kind); - } - else - break; - c = skipWhite (vGetc ()); - if (c == '[') - c = skipPastMatch ("[]"); - c = skipWhite (c); - if (c == '=') - { - c = skipWhite (vGetc ()); - if (c == '{') - skipPastMatch ("{}"); - else - { - do - c = vGetc (); - while (c != ',' && c != ';'); - } - } - if (c == ',') - { - c = skipWhite (vGetc ()); - repeat = true; - } - else - repeat = false; - } while (repeat); - vStringDelete (name); - vUngetc (c); -} - -static void findTag (vString *const name) -{ - const verilogKind kind = (verilogKind) lookupKeyword (vStringValue (name), Lang_verilog); - if (kind == K_CONSTANT && vStringChar (name, 0) == '`') - { - /* Bug #961001: Verilog compiler directives are line-based. */ - int c = skipWhite (vGetc ()); - readIdentifier (name, c); - makeSimpleTag (name, kind); - /* Skip the rest of the line. */ - do { - c = vGetc(); - } while (c != '\n'); - vUngetc (c); - } - else if (kind != K_UNDEFINED) - { - int c = skipWhite (vGetc ()); - - /* Many keywords can have bit width. - * reg [3:0] net_name; - * inout [(`DBUSWIDTH-1):0] databus; - */ - if (c == '(') - c = skipPastMatch ("()"); - c = skipWhite (c); - if (c == '[') - c = skipPastMatch ("[]"); - c = skipWhite (c); - if (c == '#') - { - c = vGetc (); - if (c == '(') - c = skipPastMatch ("()"); - } - c = skipWhite (c); - if (isIdentifierCharacter (c)) - tagNameList (kind, c); - } -} - -static void findVerilogTags (void) -{ - vString *const name = vStringNew (); - volatile bool newStatement = true; - volatile int c = '\0'; - exception_t exception = (exception_t) setjmp (Exception); - - if (exception == ExceptionNone) while (c != EOF) - { - c = vGetc (); - switch (c) - { - case ';': - case '\n': - newStatement = true; - break; - - case ' ': - case '\t': - break; - - default: - if (newStatement && readIdentifier (name, c)) - findTag (name); - newStatement = false; - break; - } - } - vStringDelete (name); -} - -extern parserDefinition* VerilogParser (void) -{ - static const char *const extensions [] = { "v", NULL }; - parserDefinition* def = parserNew ("Verilog"); - def->kindTable = VerilogKinds; - def->kindCount = ARRAY_SIZE (VerilogKinds); - def->extensions = extensions; - def->parser = findVerilogTags; - def->initialize = initialize; - return def; -} diff --git a/ctags/parsers/r.c b/ctags/parsers/r.c new file mode 100644 index 0000000000..e66b94faed --- /dev/null +++ b/ctags/parsers/r.c @@ -0,0 +1,1470 @@ +/* +* Copyright (c) 2003-2004, Ascher Stefan +* Copyright (c) 2020, Masatake YAMATO +* Copyright (c) 2020, Red Hat, Inc. +* +* This source code is released for free distribution under the terms of the +* GNU General Public License version 2 or (at your option) any later version. +* +* This module contains functions for generating tags for R language files. +* R is a programming language for statistical computing. +* R is GPL Software, get it from http://www.r-project.org/ +* +* The language references are available at +* https://cran.r-project.org/manuals.html, and +* https://cran.r-project.org/doc/manuals/r-release/R-lang.html +* +* The base library (including library and source functions) release is at +* https://stat.ethz.ch/R-manual/R-devel/library/base/html/00Index.html +*/ + +/* +* INCLUDE FILES +*/ +#include "general.h" /* must always come first */ + +#include "debug.h" +#include "entry.h" +#include "keyword.h" +#include "parse.h" +#include "read.h" +#include "selectors.h" +#include "tokeninfo.h" +#include "trace.h" +#include "vstring.h" +#include "subparser.h" +#include "r.h" + +#include +#include /* to define isalpha(), isalnum(), isspace() */ + + +/* +* MACROS +*/ +#ifdef DEBUG +#define R_TRACE_TOKEN_TEXT(TXT,T,Q) TRACE_PRINT("<%s> token: %s (%s), parent: %s", \ + (TXT), \ + tokenIsTypeVal(T, '\n')? "\\n": tokenString(T), \ + tokenTypeStr(T->type), \ + (Q) == CORK_NIL? "": getEntryInCorkQueue(Q)->name) +#define R_TRACE_TOKEN(T,Q) TRACE_PRINT("token: %s (%s), parent: %s", \ + tokenIsTypeVal((T), '\n')? "\\n": tokenString(T), \ + tokenTypeStr((T)->type), \ + (Q) == CORK_NIL? "": getEntryInCorkQueue(Q)->name) + +#define R_TRACE_ENTER() TRACE_ENTER_TEXT("token: %s (%s), parent: %s", \ + tokenIsTypeVal(token, '\n')? "\\n": tokenString(token), \ + tokenTypeStr(token->type), \ + parent == CORK_NIL? "": getEntryInCorkQueue(parent)->name) +#define R_TRACE_LEAVE() TRACE_LEAVE() +#else +#define R_TRACE_TOKEN_TEXT(TXT,T,Q) do {} while (0); +#define R_TRACE_TOKEN(T,Q) do {} while (0); +#define R_TRACE_ENTER() do {} while (0); +#define R_TRACE_LEAVE() do {} while (0); +#endif + + +/* +* DATA DEFINITIONS +*/ +typedef enum { + K_UNDEFINED = -1, + K_FUNCTION, + K_LIBRARY, + K_SOURCE, + K_GLOBALVAR, + K_FUNCVAR, + K_PARAM, + K_VECTOR, + K_LIST, + K_DATAFRAME, + K_NAMEATTR, + KIND_COUNT +} rKind; + +typedef enum { + R_LIBRARY_ATTACHED_BY_LIBRARY, + R_LIBRARY_ATTACHED_BY_REQUIRE, +} rLibraryRole; + +typedef enum { + R_SOURCE_LOADED_BY_SOURCE, +} rSourceRole; + +static roleDefinition RLibraryRoles [] = { + { true, "library", "library attached by library function" }, + { true, "require", "library attached by require function" }, +}; + +static roleDefinition RSourceRoles [] = { + { true, "source", "source loaded by source fucntion" }, +}; + +static kindDefinition RKinds[KIND_COUNT] = { + {true, 'f', "function", "functions"}, + {true, 'l', "library", "libraries", + .referenceOnly = true, ATTACH_ROLES (RLibraryRoles) }, + {true, 's', "source", "sources", + .referenceOnly = true, ATTACH_ROLES (RSourceRoles) }, + {true, 'g', "globalVar", "global variables having values other than function()"}, + {true, 'v', "functionVar", "function variables having values other than function()"}, + {false,'z', "parameter", "function parameters inside function definitions" }, + {true, 'c', "vector", "vectors explicitly created with `c()'" }, + {true, 'L', "list", "lists explicitly created with `list()'" }, + {true, 'd', "dataframe", "data frame explicitly created with `data.frame()'" }, + {true, 'n', "nameattr", "names attribtes in vectors, lists, or dataframes" }, +}; + +struct sKindExtraInfo { + const char *anon_prefix; + const char *ctor; +}; + +static struct sKindExtraInfo kindExtraInfo[KIND_COUNT] = { + [K_FUNCTION] = { + "anonFunc", + "function", + }, + [K_VECTOR] = { + "anonVec", + "c", + }, + [K_LIST] = { + "anonList", + "list", + }, + [K_DATAFRAME] = { + "anonDataFrame", + "data.frame", + }, +}; + +typedef enum { + F_ASSIGNMENT_OPERATOR, + F_CONSTRUCTOR, +} rField; + +static fieldDefinition RFields [] = { + { + .name = "assignmentop", + .description = "operator for assignment", + .enabled = false, + }, + { + .name = "constructor", + .description = "function used for making value assigned to the nameattr tag", + .enabled = true, + } +}; + +typedef int keywordId; /* to allow KEYWORD_NONE */ + +static const keywordTable RKeywordTable [] = { + { "c", KEYWORD_R_C }, + { "list", KEYWORD_R_LIST }, + { "data.frame",KEYWORD_R_DATAFRAME }, + { "function", KEYWORD_R_FUNCTION }, + { "if", KEYWORD_R_IF }, + { "else", KEYWORD_R_ELSE }, + { "for", KEYWORD_R_FOR }, + { "while", KEYWORD_R_WHILE }, + { "repeat", KEYWORD_R_REPEAT }, + { "in", KEYWORD_R_IN }, + { "next", KEYWORD_R_NEXT }, + { "break", KEYWORD_R_BREAK }, + { "TRUE", KEYWORD_R_TRUE, }, + { "FALSE", KEYWORD_R_FALSE, }, + { "NULL", KEYWORD_R_NULL, }, + { "Inf", KEYWORD_R_INF, }, + { "NaN", KEYWORD_R_NAN, }, + { "NA", KEYWORD_R_NA, }, + { "NA_integer_", KEYWORD_R_NA, }, + { "NA_real_", KEYWORD_R_NA, }, + { "NA_complex_", KEYWORD_R_NA, }, + { "NA_character_", KEYWORD_R_NA, }, + { "source", KEYWORD_R_SOURCE }, + { "library", KEYWORD_R_LIBRARY }, + { "require", KEYWORD_R_LIBRARY }, +}; + +#ifdef DEBUG +static const char *tokenTypeStr(enum RTokenType e); +#endif + +static struct tokenTypePair typePairs [] = { + { '{', '}' }, + { '[', ']' }, + { '(', ')' }, +}; + +typedef struct sRToken { + tokenInfo base; + int scopeIndex; + int parenDepth; + vString *signature; + int kindIndexForParams; /* Used only when gathering parameters */ +} rToken; + +#define R(TOKEN) ((rToken *)TOKEN) + +static int blackHoleIndex; + +static langType Lang_R; + +static void readToken (tokenInfo *const token, void *data); +static void clearToken (tokenInfo *token); +static struct tokenInfoClass rTokenInfoClass = { + .nPreAlloc = 4, + .typeForUndefined = TOKEN_R_UNDEFINED, + .keywordNone = KEYWORD_NONE, + .typeForKeyword = TOKEN_R_KEYWORD, + .typeForEOF = TOKEN_R_EOF, + .extraSpace = sizeof (rToken) - sizeof (tokenInfo), + .pairs = typePairs, + .pairCount = ARRAY_SIZE (typePairs), + .init = NULL, + .read = readToken, + .clear = clearToken, + .copy = NULL, +}; + + +/* + * FUNCTION PROTOTYPES + */ +static bool parseStatement (tokenInfo *const token, int parent, bool in_arglist, bool in_continuous_pair); +static void parsePair (tokenInfo *const token, int parent, tokenInfo *const funcall); + +static int notifyReadRightSideSymbol (tokenInfo *const symbol, + const char *const assignmentOperator, + int parent, + tokenInfo *const token); +static int makeSimpleSubparserTag (int langType, tokenInfo *const token, int parent, + bool in_func, int kindInR, const char *assignmentOperator); +static bool askSubparserTagAcceptancy (tagEntryInfo *pe); +static bool askSubparserTagHasFunctionAlikeKind (tagEntryInfo *e); +static int notifyReadFuncall (tokenInfo *const func, tokenInfo *const token, int parent); + +/* +* FUNCTION DEFINITIONS +*/ +static bool hasKindsOrCtors (tagEntryInfo * e, int kinds[], size_t count) +{ + if (e->langType == Lang_R) + { + for (size_t i = 0; i < count; i++) + { + if (e->kindIndex == kinds[i]) + return true; + } + } + else + { + bool function = false; + for (size_t i = 0; i < count; i++) + { + if (K_FUNCTION == kinds[i]) + { + function = true; + break; + } + } + if (function && askSubparserTagHasFunctionAlikeKind (e)) + return true; + } + + const char *tmp = getParserFieldValueForType (e, + RFields [F_CONSTRUCTOR].ftype); + if (tmp == NULL) + return false; + + for (size_t i = 0; i < count; i++) + { + const char * ctor = kindExtraInfo [kinds[i]].ctor; + if (ctor && strcmp (tmp, ctor) == 0) + return true; + } + + return false; +} + +static int searchScopeOtherThan (int scope, int kinds[], size_t count) +{ + do + { + tagEntryInfo * e = getEntryInCorkQueue (scope); + if (!e) + return CORK_NIL; + + if (!hasKindsOrCtors (e, kinds, count)) + return scope; + + scope = e->extensionFields.scopeIndex; + } + while (1); +} + +static int makeSimpleRTagR (tokenInfo *const token, int parent, int kind, + const char * assignmentOp) +{ + if (assignmentOp && (strlen (assignmentOp) == 3)) + { + /* <<- or ->> is used here. */ + if (anyKindsEntryInScopeRecursive (parent, tokenString (token), + (int[]){K_FUNCTION, + K_GLOBALVAR, + K_FUNCVAR, + K_PARAM}, 4) != CORK_NIL) + return CORK_NIL; + + parent = CORK_NIL; + } + + /* If the tag (T) to be created is defined in a scope and + the scope already has another tag having the same name + as T, T should not be created. */ + tagEntryInfo *pe = getEntryInCorkQueue (parent); + int cousin = CORK_NIL; + if (pe && ((pe->langType == Lang_R && pe->kindIndex == K_FUNCTION) + || (pe->langType != Lang_R && askSubparserTagHasFunctionAlikeKind (pe)))) + { + cousin = anyEntryInScope (parent, tokenString (token)); + if (kind == K_GLOBALVAR) + kind = K_FUNCVAR; + } + else if (pe && (kind == K_GLOBALVAR) + && hasKindsOrCtors (pe, (int[]){K_VECTOR, K_LIST, K_DATAFRAME}, 3)) + { + parent = searchScopeOtherThan (pe->extensionFields.scopeIndex, + (int[]){K_VECTOR, K_LIST, K_DATAFRAME}, 3); + if (parent == CORK_NIL) + cousin = anyKindEntryInScope (parent, tokenString (token), K_GLOBALVAR); + else + { + cousin = anyKindEntryInScope (parent, tokenString (token), K_FUNCVAR); + kind = K_FUNCVAR; + } + } + else if (pe) + { + /* The condition for tagging is a bit relaxed here. + Even if the same name tag is created in the scope, a name + is tagged if kinds are different. */ + cousin = anyKindEntryInScope (parent, tokenString (token), kind); + } + if (cousin != CORK_NIL) + return CORK_NIL; + + int corkIndex = makeSimpleTag (token->string, kind); + tagEntryInfo *tag = getEntryInCorkQueue (corkIndex); + if (tag) + { + tag->extensionFields.scopeIndex = parent; + if (assignmentOp) + { + if (strlen (assignmentOp) > 0) + attachParserField (tag, true, + RFields [F_ASSIGNMENT_OPERATOR].ftype, + assignmentOp); + else + markTagExtraBit (tag, XTAG_ANONYMOUS); + } + registerEntry (corkIndex); + } + return corkIndex; +} + +static int makeSimpleRTag (tokenInfo *const token, int parent, bool in_func, int kind, + const char * assignmentOp) +{ + int r; + const char *ctor = kindExtraInfo [kind].ctor; + tagEntryInfo *pe = (parent == CORK_NIL)? NULL: getEntryInCorkQueue (parent); + + /* makeTagWithTranslation method for subparsers + called from makeSimpleSubparserTag expects + kind should be resolved. */ + if (pe && hasKindsOrCtors (pe, (int[]){K_VECTOR, K_LIST, K_DATAFRAME}, 3)) + { + if (assignmentOp + && strcmp (assignmentOp, "=") == 0) + kind = K_NAMEATTR; + } + + bool foreign_tag = false; + if (pe == NULL || pe->langType == Lang_R || + !askSubparserTagAcceptancy (pe)) + r = makeSimpleRTagR (token, parent, kind, assignmentOp); + else + { + foreign_tag = true; + r = makeSimpleSubparserTag (pe->langType, token, parent, in_func, + kind, assignmentOp); + } + + if ((kind == K_NAMEATTR || foreign_tag) && ctor) + { + tagEntryInfo *e = getEntryInCorkQueue (r); + if (e) + attachParserField (e, true, + RFields [F_CONSTRUCTOR].ftype, + ctor); + } + + return r; +} + +static void clearToken (tokenInfo *token) +{ + R (token)->parenDepth = 0; + R (token)->scopeIndex = CORK_NIL; + R (token)->kindIndexForParams = KIND_GHOST_INDEX; + if (R (token)->signature) + { + vStringDelete (R (token)->signature); + R (token)->signature = NULL; + } +} + +static void readString (tokenInfo *const token, void *data) +{ + int c; + bool escaped = false; + + int c0 = tokenString(token)[0]; + + while (1) + { + c = getcFromInputFile (); + switch (c) + { + case EOF: + return; + case '\'': + case '"': + case '`': + tokenPutc (token, c); + if (!escaped && c == c0) + return; + escaped = false; + break; + case '\\': + tokenPutc (token, c); + escaped = !escaped; + break; + default: + tokenPutc (token, c); + escaped = false; + break; + } + } +} + +static void readNumber (tokenInfo *const token, void *data) +{ + int c; + + /* 10.3.1 Constants + * + * Valid numeric constants: 1 10 0.1 .2 1e-7 1.2e+7 + * Valid integer constants: 1L, 0x10L, 1000000L, 1e6L + * Valid numeric constants: 1.1L, 1e-3L, 0x1.1p-2 + * Valid complex constants: 2i 4.1i 1e-2i + */ + while ((c = getcFromInputFile ())) + { + if (isxdigit (c) || c == '.' || c == 'E' + || c == '+' || c == '-' + || c == 'L' || c == 'x' || c == 'p' + || c == 'i') + tokenPutc (token, c); + else + { + ungetcToInputFile (c); + break; + } + } +} + +static void readSymbol (tokenInfo *const token, void *data) +{ + int c; + while ((c = getcFromInputFile ())) + { + if (isalnum (c) || c == '.' || c == '_') + tokenPutc (token, c); + else + { + ungetcToInputFile (c); + break; + } + } +} + +static keywordId resolveKeyword (vString *string) +{ + char *s = vStringValue (string); + static langType lang = LANG_AUTO; + + if (lang == LANG_AUTO) + lang = getInputLanguage (); + + return lookupCaseKeyword (s, lang); +} + +static bool signatureExpectingParameter (vString *signature) +{ + if (vStringLast (signature) == '(') + return true; + + for (size_t i = vStringLength (signature); i > 0; i--) + { + char c = vStringChar (signature, i - 1); + if (c == ' ') + continue; + else if (c == ',') + return true; + break; + } + return false; +} + +static void readToken (tokenInfo *const token, void *data) +{ + int c, c0; + + token->type = TOKEN_R_UNDEFINED; + token->keyword = KEYWORD_NONE; + vStringClear (token->string); + + do + c = getcFromInputFile (); + while (c == ' ' || c== '\t' || c == '\f'); + + token->lineNumber = getInputLineNumber (); + token->filePosition = getInputFilePosition (); + + switch (c) + { + case EOF: + token->type = TOKEN_R_EOF; + break; + case '#': + while (1) + { + c = getcFromInputFile (); + if (c == EOF) + { + token->type = TOKEN_R_EOF; + break; + } + else if (c == '\n') + { + token->type = c; + tokenPutc (token, c); + break; + } + } + break; + case '\n': + case ';': + token->type = c; + tokenPutc (token, c); + break; + case '\'': + case '"': + case '`': + token->type = TOKEN_R_STRING; + tokenPutc (token, c); + readString (token, data); + break; + case '+': + case '/': + case '^': + case '~': + token->type = TOKEN_R_OPERATOR; + tokenPutc (token, c); + break; + case ':': + token->type = TOKEN_R_OPERATOR; + tokenPutc (token, c); + c = getcFromInputFile (); + if (c == ':') + { + tokenPutc (token, c); + token->type = TOKEN_R_SCOPE; + c = getcFromInputFile (); + if (c == ':') + tokenPutc (token, c); + else + ungetcToInputFile (c); + } + else + ungetcToInputFile (c); + break; + case '&': + case '|': + case '*': + token->type = TOKEN_R_OPERATOR; + tokenPutc (token, c); + c0 = getcFromInputFile (); + if (c == c0) + tokenPutc (token, c0); + else + ungetcToInputFile (c0); + break; + case '=': + token->type = TOKEN_R_OPERATOR; + tokenPutc (token, c); + c = getcFromInputFile (); + if (c == '=') + tokenPutc (token, c); + else + { + token->type = '='; + ungetcToInputFile (c); + } + break; + case '-': + token->type = TOKEN_R_OPERATOR; + tokenPutc (token, c); + c = getcFromInputFile (); + if (c == '>') + { + token->type = TOKEN_R_RASSIGN; + tokenPutc (token, c); + c = getcFromInputFile (); + if (c == '>') + tokenPutc (token, c); + else + ungetcToInputFile (c); + } + else + ungetcToInputFile (c); + break; + case '>': + token->type = TOKEN_R_OPERATOR; + tokenPutc (token, c); + c = getcFromInputFile (); + if (c == '=') + tokenPutc (token, c); + else + ungetcToInputFile (c); + break; + case '<': + token->type = TOKEN_R_OPERATOR; + tokenPutc (token, c); + c = getcFromInputFile (); + + /* <<- */ + if (c == '<') + { + tokenPutc (token, c); + c = getcFromInputFile (); + } + + if (c == '-') + { + token->type = TOKEN_R_LASSIGN; + tokenPutc (token, c); + } + else if (c == '=') + tokenPutc (token, c); + else + ungetcToInputFile (c); + break; + case '%': + token->type = TOKEN_R_OPERATOR; + tokenPutc (token, c); + do + { + c = getcFromInputFile (); + if (c == EOF) + break; + + tokenPutc (token, c); + if (c == '%') + break; + } + while (1); + break; + case '!': + token->type = TOKEN_R_OPERATOR; + tokenPutc (token, c); + c = getcFromInputFile (); + if (c == '=') + tokenPutc (token, c); + else + ungetcToInputFile (c); + break; + case '{': + case '}': + case '(': + case ')': + case '[': + case ']': + case ',': + case '$': + case '@': + token->type = c; + tokenPutc (token, c); + break; + case '.': + tokenPutc (token, c); + c = getcFromInputFile (); + if (isdigit(c)) + { + token->type = TOKEN_R_NUMBER; + tokenPutc (token, c); + readNumber(token, data); + } + else if (isalpha (c) || c == '_') + { + token->type = TOKEN_R_SYMBOL; + tokenPutc (token, c); + readSymbol (token, data); + + token->keyword = resolveKeyword (token->string); + if (token->keyword != KEYWORD_NONE) + token->type = TOKEN_R_KEYWORD; + } + else if (c == '.') + { + token->type = TOKEN_R_DOTS; + tokenPutc (token, c); + + c = getcFromInputFile (); + if (c == '.') + tokenPutc (token, c); + else if (isdigit(c)) + { + token->type = TOKEN_R_DOTS_N; + do + { + tokenPutc (token, c); + c = getcFromInputFile (); + } + while (isdigit(c)); + ungetcToInputFile (c); + } + else if (isalpha (c) || c == '_') + { + token->type = TOKEN_R_SYMBOL; + tokenPutc (token, c); + readSymbol (token, data); + + token->keyword = resolveKeyword (token->string); + if (token->keyword != KEYWORD_NONE) + token->type = TOKEN_R_KEYWORD; + } + else + { + token->type = TOKEN_R_UNDEFINED; + ungetcToInputFile (c); + } + } + break; + default: + tokenPutc (token, c); + if (isdigit (c)) + { + token->type = TOKEN_R_NUMBER; + readNumber(token, data); + } + else if (isalpha (c)) + { + token->type = TOKEN_R_SYMBOL; + readSymbol (token, data); + + token->keyword = resolveKeyword (token->string); + if (token->keyword != KEYWORD_NONE) + token->type = TOKEN_R_KEYWORD; + } + else + token->type = TOKEN_R_UNDEFINED; + break; + } + + /* Handle parameters in a signature */ + if (R(token)->signature && !tokenIsType(token, R_EOF) && !tokenIsTypeVal(token, '\n')) + { + vString *signature = R (token)->signature; + + if (tokenIsTypeVal (token, '(')) + R (token)->parenDepth++; + else if (tokenIsTypeVal (token, ')')) + R (token)->parenDepth--; + + if (R (token)->kindIndexForParams != KIND_GHOST_INDEX + && R (token)->parenDepth == 1 && tokenIsType (token, R_SYMBOL) + && signatureExpectingParameter (signature)) + makeSimpleRTag (token, R (token)->scopeIndex, false, + R (token)->kindIndexForParams, NULL); + + if (vStringLast (signature) != '(' && + !tokenIsTypeVal (token, ',') && + !tokenIsTypeVal (token, ')')) + vStringPut (signature, ' '); + vStringCat (signature, token->string); + } +} + +#define newRToken rNewToken +extern tokenInfo *rNewToken (void) +{ + return newToken (&rTokenInfoClass); +} + +#define tokenReadNoNewline rTokenReadNoNewline +extern void rTokenReadNoNewline (tokenInfo *const token) +{ + while (1) + { + tokenRead(token); + if (!tokenIsTypeVal (token, '\n')) + break; + } +} + +static void setupCollectingSignature (tokenInfo *const token, + vString *signature, + int kindIndexForParams, + int corkIndex) +{ + R (token)->signature = signature; + R (token)->kindIndexForParams = kindIndexForParams; + R (token)->scopeIndex = corkIndex; + R (token)->parenDepth = 1; +} + +extern void rSetupCollectingSignature (tokenInfo *const token, + vString *signature) +{ + setupCollectingSignature (token, signature, + KIND_GHOST_INDEX, CORK_NIL); +} + +static void teardownCollectingSignature (tokenInfo *const token) +{ + R (token)->parenDepth = 0; + R (token)->scopeIndex = CORK_NIL; + R (token)->kindIndexForParams = KIND_GHOST_INDEX; + R (token)->signature = NULL; +} + +extern void rTeardownCollectingSignature (tokenInfo *const token) +{ + teardownCollectingSignature (token); +} + +static int getKindForToken (tokenInfo *const token) +{ + if (tokenIsKeyword (token, R_FUNCTION)) + return K_FUNCTION; + else if (tokenIsKeyword (token, R_C)) + return K_VECTOR; + else if (tokenIsKeyword (token, R_LIST)) + return K_LIST; + else if (tokenIsKeyword (token, R_DATAFRAME)) + return K_DATAFRAME; + return K_GLOBALVAR; +} + +static bool findNonPlaceholder (int corkIndex, tagEntryInfo *entry, void *data) +{ + bool *any_non_placehoders = data; + if (!entry->placeholder) + { + *any_non_placehoders = true; + return false; + } + return true; +} + +static void parseRightSide (tokenInfo *const token, tokenInfo *const symbol, int parent) +{ + R_TRACE_ENTER(); + + char *const assignment_operator = eStrdup (tokenString (token)); + vString *signature = NULL; + + tokenReadNoNewline (token); + + int kind = getKindForToken (token); + + /* Call sub parsers */ + int corkIndex = notifyReadRightSideSymbol (symbol, + assignment_operator, + parent, + token); + if (corkIndex == CORK_NIL) + { + /* No subparser handle the symbol */ + corkIndex = makeSimpleRTag (symbol, parent, kind == K_FUNCTION, + kind, + assignment_operator); + } + + if (kind == K_FUNCTION) + { + /* parse signature */ + tokenReadNoNewline (token); + if (tokenIsTypeVal (token, '(')) + { + if (corkIndex == CORK_NIL) + tokenSkipOverPair (token); + else + { + signature = vStringNewInit("("); + setupCollectingSignature (token, signature, K_PARAM, corkIndex); + tokenSkipOverPair (token); + teardownCollectingSignature (token); + } + tokenReadNoNewline (token); + } + parent = (corkIndex == CORK_NIL + ? blackHoleIndex + : corkIndex); + } + else if (kind == K_VECTOR || kind == K_LIST || kind == K_DATAFRAME) + { + tokenRead (token); + parsePair (token, corkIndex, NULL); + tokenRead (token); + parent = corkIndex; + } + + R_TRACE_TOKEN_TEXT("body", token, parent); + + parseStatement (token, parent, false, false); + + tagEntryInfo *tag = getEntryInCorkQueue (corkIndex); + if (tag) + { + tag->extensionFields.endLine = token->lineNumber; + if (signature) + { + tag->extensionFields.signature = vStringDeleteUnwrap(signature); + signature = NULL; + } + /* If a vector has no named attribte and it has no lval, + * we don't make a tag for the vector. */ + if ((kind == K_VECTOR || kind == K_LIST || kind == K_DATAFRAME) + && *assignment_operator == '\0') + { + bool any_non_placehoders = false; + foreachEntriesInScope (corkIndex, NULL, + findNonPlaceholder, &any_non_placehoders); + if (!any_non_placehoders) + tag->placeholder = 1; + } + } + + vStringDelete (signature); /* NULL is acceptable. */ + eFree (assignment_operator); + R_TRACE_LEAVE(); +} + +/* Parse arguments for library and source. */ +static bool preParseExternalEntitiy (tokenInfo *const token, tokenInfo *const funcall) +{ + TRACE_ENTER(); + + bool r = true; + tokenInfo *prefetch_token = newRToken (); + + tokenReadNoNewline (prefetch_token); + if (tokenIsType (prefetch_token, R_SYMBOL) + || tokenIsType (prefetch_token, R_STRING)) + { + tokenInfo *const loaded_obj_token = newTokenByCopying (prefetch_token); + tokenReadNoNewline (prefetch_token); + if (tokenIsTypeVal (prefetch_token, ')') + || tokenIsTypeVal (prefetch_token, ',')) + { + if (tokenIsTypeVal (prefetch_token, ')')) + r = false; + + makeSimpleRefTag (loaded_obj_token->string, + (tokenIsKeyword (funcall, R_LIBRARY) + ? K_LIBRARY + : K_SOURCE), + (tokenIsKeyword (funcall, R_LIBRARY) + ? (strcmp (tokenString(funcall), "library") == 0 + ? R_LIBRARY_ATTACHED_BY_LIBRARY + : R_LIBRARY_ATTACHED_BY_REQUIRE) + : R_SOURCE_LOADED_BY_SOURCE)); + tokenDelete (loaded_obj_token); + } + else if (tokenIsEOF (prefetch_token)) + { + tokenCopy (token, prefetch_token); + tokenDelete (loaded_obj_token); + r = false; + } + else + { + tokenUnread (prefetch_token); + tokenUnread (loaded_obj_token); + tokenDelete (loaded_obj_token); + } + } + else if (tokenIsEOF (prefetch_token)) + { + tokenCopy (token, prefetch_token); + r = false; + } + else + tokenUnread (prefetch_token); + + tokenDelete (prefetch_token); + + TRACE_LEAVE_TEXT(r + ? "unread tokens and request parsing again to the upper context" + : "parse all arguments"); + return r; +} + +static bool preParseLoopCounter(tokenInfo *const token, int parent) +{ + bool r = true; + TRACE_ENTER(); + + tokenReadNoNewline (token); + if (tokenIsType (token, R_SYMBOL)) + makeSimpleRTag (token, parent, false, K_GLOBALVAR, NULL); + + if (tokenIsEOF (token) + || tokenIsTypeVal (token, ')')) + r = false; + + TRACE_LEAVE_TEXT(r + ? "unread tokens and request parsing again to the upper context" + : "parse all arguments"); + return r; +} + + +/* If funcall is non-NULL, this pair represents the argument list for the function + * call for FUNCALL. */ +static void parsePair (tokenInfo *const token, int parent, tokenInfo *const funcall) +{ + R_TRACE_ENTER(); + + bool in_continuous_pair = tokenIsTypeVal (token, '(') + || tokenIsTypeVal (token, '['); + bool is_funcall = funcall && tokenIsTypeVal (token, '('); + bool done = false; + + if (is_funcall) + { + if (tokenIsKeyword (funcall, R_LIBRARY) || + tokenIsKeyword (funcall, R_SOURCE)) + done = !preParseExternalEntitiy (token, funcall); + else if (tokenIsKeyword (funcall, R_FOR)) + done = !preParseLoopCounter (token, parent); + else if (notifyReadFuncall (funcall, token, parent) != CORK_NIL) + done = true; + } + + if (done) + { + R_TRACE_LEAVE(); + return; + } + + do + { + tokenRead (token); + R_TRACE_TOKEN_TEXT("inside pair", token, parent); + parseStatement (token, parent, (funcall != NULL), in_continuous_pair); + } + while (! (tokenIsEOF (token) + || tokenIsTypeVal (token, ')') + || tokenIsTypeVal (token, '}') + || tokenIsTypeVal (token, ']'))); + R_TRACE_LEAVE(); +} + +static bool isAtConstructorInvocation (void) +{ + bool r = false; + + tokenInfo *const token = newRToken (); + tokenRead (token); + if (tokenIsTypeVal (token, '(')) + r = true; + tokenUnread (token); + tokenDelete (token); + return r; +} + +static bool parseStatement (tokenInfo *const token, int parent, + bool in_arglist, bool in_continuous_pair) +{ + R_TRACE_ENTER(); + int last_count = rTokenInfoClass.read_counter; + + do + { + if (tokenIsEOF (token)) + break; + else if (tokenIsTypeVal (token, ';')) + { + R_TRACE_TOKEN_TEXT ("break with ;", token, parent); + break; + } + else if (tokenIsTypeVal (token, '\n')) + { + R_TRACE_TOKEN_TEXT ("break with \\n", token, parent); + break; + } + else if ((tokenIsKeyword (token, R_FUNCTION) + || ((tokenIsKeyword (token, R_C) + || tokenIsKeyword (token, R_LIST) + || tokenIsKeyword (token, R_DATAFRAME)) + && isAtConstructorInvocation ()))) + { + /* This statement doesn't start with a symbol. + * This function is not assigned to any symbol. */ + tokenInfo *const anonfunc = newTokenByCopying (token); + int kind = getKindForToken (token); + anonGenerate (anonfunc->string, + kindExtraInfo [kind].anon_prefix, kind); + tokenUnread (token); + vStringClear (token->string); + parseRightSide (token, anonfunc, parent); + tokenDelete (anonfunc); + } + else if (tokenIsType (token, R_SYMBOL) + || tokenIsType (token, R_STRING) + || tokenIsType (token, R_KEYWORD)) + { + tokenInfo *const symbol = newTokenByCopying (token); + + if (in_continuous_pair) + tokenReadNoNewline (token); + else + tokenRead (token); + + if (tokenIsType (token, R_LASSIGN)) + { + /* Assignment */ + parseRightSide (token, symbol, parent); + R_TRACE_TOKEN_TEXT ("break with right side", token, parent); + tokenDelete(symbol); + break; + } + else if (tokenIsTypeVal (token, '=')) + { + /* Assignment */ + if (in_arglist) + { + /* Ignore the left side symbol. */ + tokenRead (token); + R_TRACE_TOKEN_TEXT("(in arg list) after = body", token, parent); + } + else + { + parseRightSide (token, symbol, parent); + R_TRACE_TOKEN_TEXT ("break with right side", token, parent); + tokenDelete(symbol); + break; + } + } + else if (tokenIsTypeVal (token, '(')) + { + /* function call */ + parsePair (token, parent, symbol); + tokenRead (token); + R_TRACE_TOKEN_TEXT("after arglist", token, parent); + } + else if (tokenIsTypeVal (token, '$') + || tokenIsTypeVal (token, '@') + || tokenIsType (token, R_SCOPE)) + { + tokenReadNoNewline (token); /* Skip the next identifier */ + tokenRead (token); + R_TRACE_TOKEN_TEXT("after $", token, parent); + } + else + R_TRACE_TOKEN_TEXT("else after symbol", token, parent); + tokenDelete(symbol); + } + else if (tokenIsType (token, R_RASSIGN)) + { + char *const assignment_operator = eStrdup (tokenString (token)); + tokenReadNoNewline (token); + if (tokenIsType (token, R_SYMBOL) + || tokenIsType (token, R_STRING)) + { + makeSimpleRTag (token, parent, false, + K_GLOBALVAR, assignment_operator); + tokenRead (token); + } + eFree (assignment_operator); + R_TRACE_TOKEN_TEXT("after ->", token, parent); + } + else if (tokenIsType (token, R_OPERATOR)) + { + tokenReadNoNewline (token); + R_TRACE_TOKEN_TEXT("after operator", token, parent); + } + else if (tokenIsTypeVal (token, '(') + || tokenIsTypeVal (token, '{') + || tokenIsTypeVal (token, '[')) + { + parsePair (token, parent, NULL); + tokenRead (token); + R_TRACE_TOKEN_TEXT("after pair", token, parent); + } + else if (tokenIsTypeVal (token, ')') + || tokenIsTypeVal (token, '}') + || tokenIsTypeVal (token, ']')) + { + R_TRACE_TOKEN_TEXT ("break with close", token, parent); + break; + } + else if (tokenIsTypeVal (token, '$') + || tokenIsTypeVal (token, '@') + || tokenIsType (token, R_SCOPE)) + { + tokenReadNoNewline (token); /* Skip the next identifier */ + tokenRead (token); + R_TRACE_TOKEN_TEXT("after $", token, parent); + } + else + { + tokenRead (token); + R_TRACE_TOKEN_TEXT("else", token, parent); + } + } + while (!tokenIsEOF (token)); + + R_TRACE_LEAVE(); + + return (last_count != rTokenInfoClass.read_counter); +} + +extern bool rParseStatement (tokenInfo *const token, int parentIndex, bool in_arglist) +{ + pushLanguage (Lang_R); + bool r = parseStatement (token, parentIndex, in_arglist, true); + popLanguage (); + return r; +} + +static int notifyReadRightSideSymbol (tokenInfo *const symbol, + const char *const assignmentOperator, + int parent, + tokenInfo *const token) +{ + subparser *sub; + int q = CORK_NIL; + + foreachSubparser (sub, false) + { + rSubparser *rsub = (rSubparser *)sub; + if (rsub->readRightSideSymbol) + { + enterSubparser (sub); + q = rsub->readRightSideSymbol (rsub, symbol, assignmentOperator, parent, token); + leaveSubparser (); + if (q != CORK_NIL) + break; + } + } + + return q; +} + +static int makeSimpleSubparserTag (int langType, + tokenInfo *const token, int parent, + bool in_func, int kindInR, + const char *assignmentOperator) +{ + int q = CORK_NIL; + subparser *sub = getLanguageSubparser (langType, false); + if (sub) + { + rSubparser *rsub = (rSubparser *)sub; + if (rsub->makeTagWithTranslation) + { + enterSubparser (sub); + q = rsub->makeTagWithTranslation (rsub, + token, parent, + in_func, kindInR, + assignmentOperator); + leaveSubparser (); + } + } + return q; +} + +static bool askSubparserTagAcceptancy (tagEntryInfo *pe) +{ + bool q = false; + subparser *sub = getLanguageSubparser (pe->langType, false); + { + rSubparser *rsub = (rSubparser *)sub; + if (rsub->askTagAcceptancy) + { + enterSubparser (sub); + q = rsub->askTagAcceptancy (rsub, pe); + leaveSubparser (); + } + } + return q; +} + +static bool askSubparserTagHasFunctionAlikeKind (tagEntryInfo *e) +{ + bool q = false; + pushLanguage (Lang_R); + subparser *sub = getLanguageSubparser (e->langType, false); + Assert (sub); + popLanguage (); + rSubparser *rsub = (rSubparser *)sub; + if (rsub->hasFunctionAlikeKind) + { + enterSubparser (sub); + q = rsub->hasFunctionAlikeKind (rsub, e); + leaveSubparser (); + } + return q; +} + +static int notifyReadFuncall (tokenInfo *const func, + tokenInfo *const token, + int parent) +{ + int q = CORK_NIL; + subparser *sub; + foreachSubparser (sub, false) + { + rSubparser *rsub = (rSubparser *)sub; + if (rsub->readFuncall) + { + enterSubparser (sub); + q = rsub->readFuncall (rsub, func, token, parent); + leaveSubparser (); + if (q != CORK_NIL) + break; + } + } + return q; +} + +static void findRTags (void) +{ + tokenInfo *const token = newRToken (); + + blackHoleIndex = makePlaceholder ("**BLACK-HOLE/DON'T TAG ME**"); + registerEntry (blackHoleIndex); + + TRACE_PRINT ("install blackhole: %d", blackHoleIndex); + + do + { + tokenRead(token); + R_TRACE_TOKEN(token, CORK_NIL); + parseStatement (token, CORK_NIL, false, false); + } + while (!tokenIsEOF (token)); + + TRACE_PRINT ("run blackhole", blackHoleIndex); + markAllEntriesInScopeAsPlaceholder (blackHoleIndex); + + tokenDelete (token); +} + +static void initializeRParser (const langType language) +{ + Lang_R = language; +} + +extern parserDefinition *RParser (void) +{ + static const char *const extensions[] = { "r", "R", "s", "q", NULL }; + parserDefinition *const def = parserNew ("R"); + static selectLanguage selectors[] = { selectByArrowOfR, + NULL }; + + def->extensions = extensions; + def->kindTable = RKinds; + def->kindCount = ARRAY_SIZE(RKinds); + def->fieldTable = RFields; + def->fieldCount = ARRAY_SIZE (RFields); + def->keywordTable = RKeywordTable; + def->keywordCount = ARRAY_SIZE(RKeywordTable); + def->useCork = CORK_QUEUE | CORK_SYMTAB; + def->parser = findRTags; + def->selectLanguage = selectors; + def->initialize = initializeRParser; + + return def; +} + +extern vString *rExtractNameFromString (vString* str) +{ + int offset = 0; + + if (vStringLength (str) == 0) + return NULL; + + char b = vStringChar (str, 0); + if (b == '\'' || b == '"' || b == '`') + offset = 1; + + if (offset && vStringLength (str) < 3) + return NULL; + + vString *n = vStringNewInit (vStringValue (str) + offset); + if (vStringChar (n, vStringLength (n) - 1) == b) + vStringChop (n); + + return n; +} + +#ifdef DEBUG +static const char *tokenTypeStr(enum RTokenType e) +{ /* Generated by misc/enumstr.sh with cmdline: + parsers/r.c RTokenType tokenTypeStr TOKEN_R_ --use-lower-bits-as-is */ + switch (e) + { + case TOKEN_R_EOF: return "EOF"; + case TOKEN_R_UNDEFINED: return "UNDEFINED"; + case TOKEN_R_KEYWORD: return "KEYWORD"; + case TOKEN_R_NEWLINE: return "NEWLINE"; + case TOKEN_R_NUMBER: return "NUMBER"; + case TOKEN_R_SYMBOL: return "SYMBOL"; + case TOKEN_R_STRING: return "STRING"; + case TOKEN_R_OPERATOR: return "OPERATOR"; + case TOKEN_R_DOTS: return "DOTS"; + case TOKEN_R_DOTS_N: return "DOTS_N"; + case TOKEN_R_LASSIGN: return "LASSIGN"; + case TOKEN_R_RASSIGN: return "RASSIGN"; + case TOKEN_R_SCOPE: return "SCOPE"; + default: break; + } + static char buf[3]; + if (isprint (e)) + { + buf[0] = e; + buf[1] = '\0'; + } + else if (e == '\n') + { + buf[0] = '\\'; + buf[1] = 'n'; + buf[2] = '\0'; + } + else + { + buf[0] = '\0'; + } + return buf; +} +#endif diff --git a/ctags/parsers/r.h b/ctags/parsers/r.h new file mode 100644 index 0000000000..67b8f25a34 --- /dev/null +++ b/ctags/parsers/r.h @@ -0,0 +1,111 @@ +/* +* Copyright (c) 2003-2004, Ascher Stefan +* Copyright (c) 2020, Masatake YAMATO +* Copyright (c) 2020, Red Hat, Inc. +* +* This source code is released for free distribution under the terms of the +* GNU General Public License version 2 or (at your option) any later version. +*/ + +#ifndef CTAGS_PARSER_R_H +#define CTAGS_PARSER_R_H + +/* +* INCLUDE FILES +*/ + +#include "general.h" /* must always come first */ + +#include "subparser.h" +#include "tokeninfo.h" +#include "entry.h" + + +/* +* DATA DECLARATIONS +*/ + +typedef struct sRSubparser rSubparser; + +enum RTokenType { + /* 0..255 are the byte's values */ + TOKEN_R_EOF = 256, + TOKEN_R_UNDEFINED, + TOKEN_R_KEYWORD, + TOKEN_R_NEWLINE, + TOKEN_R_NUMBER, /* 1, 1L */ + TOKEN_R_SYMBOL, /* [0-9a-zA-Z._] */ + TOKEN_R_STRING, + TOKEN_R_OPERATOR, /* - + ! ~ ? : * / ^ %...%, <, > == + * >=, <=, &, &&, |, || */ + TOKEN_R_DOTS, /* ... */ + TOKEN_R_DOTS_N, /* ..1, ..2, etc */ + TOKEN_R_LASSIGN, /* <-, <<- */ + TOKEN_R_RASSIGN, /* ->, ->> */ + TOKEN_R_SCOPE, /* ::, ::: */ +}; + +enum eRKeywordId +{ + KEYWORD_R_C, + KEYWORD_R_DATAFRAME, + KEYWORD_R_FUNCTION, + KEYWORD_R_IF, + KEYWORD_R_ELSE, + KEYWORD_R_FOR, + KEYWORD_R_WHILE, + KEYWORD_R_REPEAT, + KEYWORD_R_IN, + KEYWORD_R_NEXT, + KEYWORD_R_BREAK, + KEYWORD_R_TRUE, + KEYWORD_R_FALSE, + KEYWORD_R_NULL, + KEYWORD_R_INF, + KEYWORD_R_LIST, + KEYWORD_R_NAN, + KEYWORD_R_NA, + KEYWORD_R_SOURCE, + KEYWORD_R_LIBRARY, +}; + +struct sRSubparser { + subparser subparser; + int (* readRightSideSymbol) (rSubparser *s, + tokenInfo *const symbol, + const char *const assignmentOperator, + int parent, + tokenInfo *const token); + int (* makeTagWithTranslation) (rSubparser *s, + tokenInfo *const token, + int parent, + bool in_func, + int kindInR, + const char *const assignmentOperator); + bool (* askTagAcceptancy) (rSubparser *s, tagEntryInfo *pe); + bool (* hasFunctionAlikeKind) (rSubparser *s, tagEntryInfo *pe); + int (* readFuncall) (rSubparser *s, + tokenInfo *const func, + tokenInfo *const token, + int parent); +}; + +extern void rSetupCollectingSignature (tokenInfo *const token, + vString *signature); +extern void rTeardownCollectingSignature (tokenInfo *const token); + +/* + * FUNCTION PROTOTYPES + */ + +extern tokenInfo *rNewToken (void); + +extern void rTokenReadNoNewline (tokenInfo *const token); + +/* This function returns true if a new token is read. + * EOF is exception. If EOF is read, this function returns FALSE. */ +extern bool rParseStatement (tokenInfo *const token, int parentIndex, bool inArgList); + +extern vString *rExtractNameFromString (vString* str); + +#endif /* CTAGS_PARSER_TEX_H */ diff --git a/ctags/parsers/sh.c b/ctags/parsers/sh.c new file mode 100644 index 0000000000..b469bd2ac4 --- /dev/null +++ b/ctags/parsers/sh.c @@ -0,0 +1,490 @@ +/* +* Copyright (c) 2000-2002, Darren Hiebert +* +* This source code is released for free distribution under the terms of the +* GNU General Public License version 2 or (at your option) any later version. +* +* This module contains functions for generating tags for scripts for the +* Bourne shell (and its derivatives, the Korn and Z shells). +*/ + +/* +* INCLUDE FILES +*/ +#include "general.h" /* must always come first */ + +#include + +#include "entry.h" +#include "kind.h" +#include "parse.h" +#include "read.h" +#include "promise.h" +#include "routines.h" +#include "vstring.h" +#include "xtag.h" + +/* +* DATA DEFINITIONS +*/ +typedef enum { + K_NOTHING = -1, /* place holder. Never appears on tags file. */ + K_ALIAS, + K_FUNCTION, + K_SOURCE, + K_HEREDOCLABEL, +} shKind; + +typedef enum { + R_SCRIPT_LOADED, +} shScriptRole; + +static roleDefinition ShScriptRoles [] = { + { true, "loaded", "loaded" }, +}; + +typedef enum { + R_HEREDOC_ENDMARKER, +} shHeredocRole; + +static roleDefinition ShHeredocRoles [] = { + { true, "endmarker", "end marker" }, +}; + +static kindDefinition ShKinds [] = { + { true, 'a', "alias", "aliases"}, + { true, 'f', "function", "functions"}, + { true, 's', "script", "script files", + .referenceOnly = true, ATTACH_ROLES (ShScriptRoles) }, + { true, 'h', "heredoc", "label for here document", + .referenceOnly = false, ATTACH_ROLES (ShHeredocRoles) }, +}; + +/* +* FUNCTION DEFINITIONS +*/ + +static bool isFileChar (int c) +{ + return (isalnum (c) + || c == '_' || c == '-' + || c == '/' || c == '.' + || c == '+' || c == '^' + || c == '%' || c == '@' + || c == '~'); +} + +static bool isIdentChar (int c) +{ + return (isalnum (c) || c == '_' || c == '-'); +} + +/* bash allows all kinds of crazy stuff as the identifier after 'function' */ +static bool isBashFunctionChar (int c) +{ + return (c > 1 /* NUL and SOH are disallowed */ && c != 0x7f && + /* blanks are disallowed, but VT and FF (and CR to some extent, but + * let's not fall into the pit of craziness) */ + c != ' ' && c != '\t' && c != '\n' && c != '\r' && + c != '"' && c != '\'' && c != '$' && c != '`' && c != '\\' && + c != '&' && c != ';' && + c != '(' && c != ')' && + c != '<' && c != '>'); +} + +static const unsigned char *skipDoubleString (const unsigned char *cp) +{ + const unsigned char* prev = cp; + cp++; + while ((*cp != '"' || *prev == '\\') && *cp != '\0') + { + prev = cp; + cp++; + } + return cp; +} + +static const unsigned char *skipSingleString (const unsigned char *cp) +{ + cp++; + while (*cp != '\'' && *cp != '\0') + cp++; + return cp; +} + +static bool isEnvCommand (const vString *cmd) +{ + const char *lc = vStringValue(cmd); + const char * tmp = baseFilename (lc); + + return (strcmp(tmp, "env") == 0); +} + +static int readDestfileName (const unsigned char *cp, vString *destfile) +{ + const unsigned char *origin = cp; + + while (isspace ((int) *cp)) + ++cp; + + /* >... */ + if (*cp != '>') + return 0; + + /* >>... */ + if (*cp == '>') + ++cp; + + while (isspace ((int) *cp)) + ++cp; + + if (!isFileChar ((int) *cp)) + return 0; + + vStringClear(destfile); + do { + vStringPut (destfile, (int) *cp); + ++cp; + } while (isFileChar ((int) *cp)); + + if (vStringLength(destfile) > 0) + return cp - origin; + + return 0; +} + +struct hereDocParsingState { + vString *args[2]; + vString *destfile; + langType sublang; + unsigned long startLine; + + int corkIndex; +}; + +static void hdocStateInit (struct hereDocParsingState *hstate) +{ + hstate->args[0] = vStringNew (); + hstate->args[1] = vStringNew (); + hstate->destfile = vStringNew (); + + hstate->corkIndex = CORK_NIL; + hstate->sublang = LANG_IGNORE; +} + +static void hdocStateClear (struct hereDocParsingState *hstate) +{ + vStringClear (hstate->args[0]); + vStringClear (hstate->args[1]); + vStringClear (hstate->destfile); +} + +static void hdocStateFini (struct hereDocParsingState *hstate) +{ + vStringDelete (hstate->args[0]); + vStringDelete (hstate->args[1]); + vStringDelete (hstate->destfile); +} + +static void hdocStateUpdateArgs (struct hereDocParsingState *hstate, + vString *name) +{ + if (vStringIsEmpty(hstate->args[0])) + vStringCopy(hstate->args[0], name); + else if (vStringIsEmpty(hstate->args[1])) + vStringCopy(hstate->args[1], name); +} + +static void hdocStateMakePromiseMaybe (struct hereDocParsingState *hstate) +{ + if (hstate->sublang != LANG_IGNORE) + makePromise (getLanguageName(hstate->sublang), + hstate->startLine, 0, + getInputLineNumber(), 0, + 0); + hstate->sublang = LANG_IGNORE; +} + +static void hdocStateRecordStartlineFromDestfileMaybe (struct hereDocParsingState *hstate) +{ + const char *f = vStringValue(hstate->destfile); + + if (hstate->sublang != LANG_IGNORE) + return; + + hstate->sublang = getLanguageForFilename (f, 0); + if (hstate->sublang != LANG_IGNORE) + hstate->startLine = getInputLineNumber () + 1; + vStringClear (hstate->destfile); +} + +static void hdocStateRecordStatelineMaybe (struct hereDocParsingState *hstate) +{ + if (!vStringIsEmpty(hstate->args[0])) + { + const char *cmd; + + cmd = vStringValue(hstate->args[0]); + if (isEnvCommand (hstate->args[0])) + { + cmd = NULL; + if (!vStringIsEmpty(hstate->args[1])) + cmd = vStringValue(hstate->args[1]); + } + + if (cmd) + { + hstate->sublang = getLanguageForCommand (cmd, 0); + if (hstate->sublang != LANG_IGNORE) + hstate->startLine = getInputLineNumber () + 1; + } + } + + if (vStringLength(hstate->destfile) > 0) + hdocStateRecordStartlineFromDestfileMaybe (hstate); +} + +static int hdocStateReadDestfileName (struct hereDocParsingState *hstate, + const unsigned char* cp, + const vString *const hereDocDelimiter) +{ + int d = readDestfileName (cp, hstate->destfile); + + if (d > 0 && hereDocDelimiter) + hdocStateRecordStartlineFromDestfileMaybe (hstate); + + return d; +} + +static void hdocStateUpdateTag (struct hereDocParsingState *hstate, unsigned long endLine) +{ + tagEntryInfo *tag = getEntryInCorkQueue (hstate->corkIndex); + if (tag) + { + tag->extensionFields.endLine = endLine; + hstate->corkIndex = CORK_NIL; + } +} + +static void findShTags (void) +{ + vString *name = vStringNew (); + const unsigned char *line; + vString *hereDocDelimiter = NULL; + bool hereDocIndented = false; + bool (* check_char)(int); + + struct hereDocParsingState hstate; + hdocStateInit (&hstate); + + while ((line = readLineFromInputFile ()) != NULL) + { + const unsigned char* cp = line; + shKind found_kind = K_NOTHING; + + if (hereDocDelimiter) + { + if (hereDocIndented) + { + while (*cp == '\t') + cp++; + } + if ((strncmp ((const char *) cp, vStringValue (hereDocDelimiter), vStringLength (hereDocDelimiter)) == 0) + && ((*(cp + vStringLength (hereDocDelimiter)) == '\0') + || isspace (*(cp + vStringLength (hereDocDelimiter)) ))) + { + hdocStateUpdateTag (&hstate, getInputLineNumber ()); + hdocStateMakePromiseMaybe (&hstate); + + if (!vStringIsEmpty(hereDocDelimiter)) + makeSimpleRefTag(hereDocDelimiter, K_HEREDOCLABEL, R_HEREDOC_ENDMARKER); + vStringDelete (hereDocDelimiter); + hereDocDelimiter = NULL; + } + continue; + } + + hdocStateClear (&hstate); + while (*cp != '\0') + { + /* jump over whitespace */ + while (isspace ((int)*cp)) + cp++; + + /* jump over strings */ + if (*cp == '"') + cp = skipDoubleString (cp); + else if (*cp == '\'') + cp = skipSingleString (cp); + /* jump over comments */ + else if (*cp == '#') + break; + /* jump over here-documents */ + else if (cp[0] == '<' && cp[1] == '<') + { + const unsigned char *start, *end; + bool trimEscapeSequences = false; + bool quoted = false; + cp += 2; + /* an optional "-" strips leading tabulations from the heredoc lines */ + if (*cp != '-') + hereDocIndented = false; + else + { + hereDocIndented = true; + cp++; + } + while (isspace (*cp)) + cp++; + start = end = cp; + /* the delimiter can be surrounded by quotes */ + if (*cp == '"') + { + start++; + end = cp = skipDoubleString (cp); + /* we need not to worry about variable substitution, they + * don't happen in heredoc delimiter definition */ + trimEscapeSequences = true; + quoted = true; + } + else if (*cp == '\'') + { + start++; + end = cp = skipSingleString (cp); + quoted = true; + } + else + { + while (isIdentChar ((int) *cp)) + cp++; + end = cp; + } + if (end > start || quoted) + { + /* The input may be broken as a shell script but we need to avoid + memory leaking. */ + if (hereDocDelimiter) + vStringClear(hereDocDelimiter); + else + hereDocDelimiter = vStringNew (); + for (; end > start; start++) + { + if (trimEscapeSequences && *start == '\\') + start++; + vStringPut (hereDocDelimiter, *start); + } + if (vStringLength(hereDocDelimiter) > 0) + hstate.corkIndex = makeSimpleTag(hereDocDelimiter, K_HEREDOCLABEL); + + hdocStateRecordStatelineMaybe(&hstate); + } + } + + check_char = isBashFunctionChar; + + if (strncmp ((const char*) cp, "function", (size_t) 8) == 0 && + isspace ((int) cp [8])) + { + found_kind = K_FUNCTION; + cp += 8; + } + else if (strncmp ((const char*) cp, "alias", (size_t) 5) == 0 && + isspace ((int) cp [5])) + { + check_char = isIdentChar; + found_kind = K_ALIAS; + cp += 5; + } + else if (cp [0] == '.' + && isspace((int) cp [1])) + { + found_kind = K_SOURCE; + ++cp; + check_char = isFileChar; + } + else if (strncmp ((const char*) cp, "source", (size_t) 6) == 0 + && isspace((int) cp [6])) + { + found_kind = K_SOURCE; + cp += 6; + check_char = isFileChar; + } + + if (found_kind != K_NOTHING) + while (isspace ((int) *cp)) + ++cp; + + // Get the name of the function, alias or file to be read by source + if (! check_char ((int) *cp)) + { + found_kind = K_NOTHING; + + int d = hdocStateReadDestfileName (&hstate, cp, + hereDocDelimiter); + if (d > 0) + cp += d; + else if (*cp != '\0') + ++cp; + continue; + } + while (check_char ((int) *cp)) + { + vStringPut (name, (int) *cp); + ++cp; + } + + while (isspace ((int) *cp)) + ++cp; + + if ((found_kind != K_SOURCE) + && *cp == '(') + { + ++cp; + while (isspace ((int) *cp)) + ++cp; + if (*cp == ')') + { + found_kind = K_FUNCTION; + ++cp; + } + } + + if (found_kind != K_NOTHING) + { + if (found_kind == K_SOURCE) + makeSimpleRefTag (name, K_SOURCE, R_SCRIPT_LOADED); + else + makeSimpleTag (name, found_kind); + found_kind = K_NOTHING; + } + else if (!hereDocDelimiter) + hdocStateUpdateArgs (&hstate, name); + vStringClear (name); + } + } + hdocStateFini (&hstate); + vStringDelete (name); + if (hereDocDelimiter) + vStringDelete (hereDocDelimiter); +} + +extern parserDefinition* ShParser (void) +{ + static const char *const extensions [] = { + "sh", "SH", "bsh", "bash", "ksh", "zsh", "ash", NULL + }; + static const char *const aliases [] = { + "sh", "bash", "ksh", "zsh", "ash", + /* major mode name in emacs */ + "shell-script", + NULL + }; + parserDefinition* def = parserNew ("Sh"); + def->kindTable = ShKinds; + def->kindCount = ARRAY_SIZE (ShKinds); + def->extensions = extensions; + def->aliases = aliases; + def->parser = findShTags; + def->useCork = CORK_QUEUE; + return def; +} diff --git a/ctags/parsers/verilog.c b/ctags/parsers/verilog.c new file mode 100644 index 0000000000..25a9ba8d58 --- /dev/null +++ b/ctags/parsers/verilog.c @@ -0,0 +1,2024 @@ +/* + * Copyright (c) 2003, Darren Hiebert + * Copyright (c) 2017, Vitor Antunes + * Copyright (c) 2020, Hiroo Hayashi + * + * This source code is released for free distribution under the terms of the + * GNU General Public License version 2 or (at your option) any later version. + * + * This module contains functions for generating tags for the Verilog or + * SystemVerilog HDL (Hardware Description Language). + * + * References: + * IEEE Std 1800-2017, SystemVerilog Language Reference Manual + * https://ieeexplore.ieee.org/document/8299595 + * SystemVerilog IEEE Std 1800-2012 Grammer + * https://insights.sigasi.com/tech/systemverilog.ebnf/ + * Verilog Formal Syntax Specification + * http://www.verilog.com/VerilogBNF.html + */ + +/* + * INCLUDE FILES + */ +#include "general.h" /* must always come first */ + +#include + +#include "debug.h" +#include "entry.h" +#include "keyword.h" +#include "options.h" +#include "parse.h" +#include "read.h" +#include "routines.h" +#include "xtag.h" +#include "ptrarray.h" + +/* + * MACROS + */ +#define NUMBER_LANGUAGES 2 /* Indicates number of defined indexes */ +#define IDX_SYSTEMVERILOG 0 +#define IDX_VERILOG 1 + +/* + * DATA DECLARATIONS + */ + +/* A callback function searching a symbol from the cork symbol table assumes + * this kind definitions are shared in Verilog and SystemVerilog parsers. + * If you will separate the definitions for the parsers, you must revise the + * code related to the symbol table. */ +typedef enum { + /* parser private items */ + K_IGNORE = -16, /* Verilog/SystemVerilog keywords to be ignored */ + K_DEFINE, + K_DIRECTIVE, + K_END, + K_END_DE, /* End of Design Elements */ + K_IDENTIFIER, + K_LOCALPARAM, + K_PARAMETER, + K_IMPORT, + K_WITH, + + K_UNDEFINED = KEYWORD_NONE, + /* the followings items are also used as indices for VerilogKinds[] and SystemVerilogKinds[] */ + K_CONSTANT= 0, + K_EVENT, + K_FUNCTION, + K_MODULE, + K_NET, + K_PORT, + K_REGISTER, + K_TASK, + K_BLOCK, + K_INSTANCE, + K_ASSERTION, + K_CLASS, + K_COVERGROUP, + K_ENUM, + K_INTERFACE, + K_MODPORT, + K_PACKAGE, + K_PROGRAM, + K_PROTOTYPE, + K_PROPERTY, + K_STRUCT, + K_TYPEDEF, + K_CHECKER, + K_CLOCKING, + K_SEQUENCE, + K_MEMBER, + K_IFCLASS, /* interface class */ + K_CONSTRAINT, + K_NETTYPE, +} verilogKind; + +typedef struct { + const char *keyword; + verilogKind kind; + short isValid [NUMBER_LANGUAGES]; +} keywordAssoc; + +typedef struct sTokenInfo { + verilogKind kind; + vString* name; /* the name of the token */ + unsigned long lineNumber; /* line number where token was found */ + MIOPos filePosition; /* file position where token was found */ + struct sTokenInfo* scope; /* context of keyword */ + int nestLevel; /* Current nest level */ + verilogKind lastKind; /* Kind of last found tag */ + vString* blockName; /* Current block name */ + vString* inheritance; /* Class inheritance */ + bool prototype; /* Is only a prototype */ + bool classScope; /* Context is local to the current sub-context */ + bool parameter; /* parameter which can be overridden */ + bool hasParamList; /* module definition has a parameter port list */ +} tokenInfo; + +typedef enum { + F_PARAMETER, +} verilogField; + +/* + * DATA DEFINITIONS + */ +static int Ungetc; +static int Lang_verilog; +static int Lang_systemverilog; + +static kindDefinition VerilogKinds [] = { + { true, 'c', "constant", "constants (define, parameter, specparam)" }, + { true, 'e', "event", "events" }, + { true, 'f', "function", "functions" }, + { true, 'm', "module", "modules" }, + { true, 'n', "net", "net data types" }, + { true, 'p', "port", "ports" }, + { true, 'r', "register", "variable data types" }, + { true, 't', "task", "tasks" }, + { true, 'b', "block", "blocks (begin, fork)" }, + { true, 'i', "instance", "instances of module" }, +}; + +static kindDefinition SystemVerilogKinds [] = { + { true, 'c', "constant", "constants (define, parameter, specparam, enum values)" }, + { true, 'e', "event", "events" }, + { true, 'f', "function", "functions" }, + { true, 'm', "module", "modules" }, + { true, 'n', "net", "net data types" }, + { true, 'p', "port", "ports" }, + { true, 'r', "register", "variable data types" }, + { true, 't', "task", "tasks" }, + { true, 'b', "block", "blocks (begin, fork)" }, + { true, 'i', "instance", "instances of module or interface" }, + { true, 'A', "assert", "assertions (assert, assume, cover, restrict)" }, + { true, 'C', "class", "classes" }, + { true, 'V', "covergroup","covergroups" }, + { true, 'E', "enum", "enumerators" }, + { true, 'I', "interface", "interfaces" }, + { true, 'M', "modport", "modports" }, + { true, 'K', "package", "packages" }, + { true, 'P', "program", "programs" }, + { false,'Q', "prototype", "prototypes (extern, pure)" }, + { true, 'R', "property", "properties" }, + { true, 'S', "struct", "structs and unions" }, + { true, 'T', "typedef", "type declarations" }, + { true, 'H', "checker", "checkers" }, + { true, 'L', "clocking", "clocking" }, + { true, 'q', "sequence", "sequences" }, + { true, 'w', "member", "struct and union members" }, + { true, 'l', "ifclass", "interface class" }, + { true, 'O', "constraint","constraints" }, + { true, 'N', "nettype", "nettype declarations" }, +}; + +static const keywordAssoc KeywordTable [] = { + /* SystemVerilog */ + /* | Verilog */ + /* keyword keyword ID | | */ + { "`define", K_DEFINE, { 1, 1 } }, + { "begin", K_BLOCK, { 1, 1 } }, + { "end", K_END, { 1, 1 } }, + { "endfunction", K_END_DE, { 1, 1 } }, + { "endmodule", K_END_DE, { 1, 1 } }, + { "endtask", K_END_DE, { 1, 1 } }, + { "event", K_EVENT, { 1, 1 } }, + { "fork", K_BLOCK, { 1, 1 } }, + { "function", K_FUNCTION, { 1, 1 } }, + { "genvar", K_REGISTER, { 1, 1 } }, + { "inout", K_PORT, { 1, 1 } }, + { "input", K_PORT, { 1, 1 } }, + { "integer", K_REGISTER, { 1, 1 } }, + { "join", K_END, { 1, 1 } }, + { "localparam", K_LOCALPARAM, { 1, 1 } }, + { "module", K_MODULE, { 1, 1 } }, + { "output", K_PORT, { 1, 1 } }, + { "parameter", K_PARAMETER, { 1, 1 } }, + { "real", K_REGISTER, { 1, 1 } }, + { "realtime", K_REGISTER, { 1, 1 } }, + { "reg", K_REGISTER, { 1, 1 } }, + { "signed", K_IGNORE, { 1, 1 } }, + { "specparam", K_CONSTANT, { 1, 1 } }, + { "supply0", K_NET, { 1, 1 } }, + { "supply1", K_NET, { 1, 1 } }, + { "task", K_TASK, { 1, 1 } }, + { "time", K_REGISTER, { 1, 1 } }, + { "tri", K_NET, { 1, 1 } }, + { "triand", K_NET, { 1, 1 } }, + { "trior", K_NET, { 1, 1 } }, + { "trireg", K_NET, { 1, 1 } }, + { "tri0", K_NET, { 1, 1 } }, + { "tri1", K_NET, { 1, 1 } }, + { "uwire", K_NET, { 1, 1 } }, + { "wand", K_NET, { 1, 1 } }, + { "wire", K_NET, { 1, 1 } }, + { "wor", K_NET, { 1, 1 } }, + { "assert", K_ASSERTION, { 1, 0 } }, + { "assume", K_ASSERTION, { 1, 0 } }, + { "bit", K_REGISTER, { 1, 0 } }, + { "byte", K_REGISTER, { 1, 0 } }, + { "chandle", K_REGISTER, { 1, 0 } }, + { "checker", K_CHECKER, { 1, 0 } }, + { "class", K_CLASS, { 1, 0 } }, + { "constraint", K_CONSTRAINT, { 1, 0 } }, + { "cover", K_ASSERTION, { 1, 0 } }, + { "clocking", K_CLOCKING, { 1, 0 } }, + { "covergroup", K_COVERGROUP, { 1, 0 } }, + { "endchecker", K_END_DE, { 1, 0 } }, + { "endclass", K_END_DE, { 1, 0 } }, + { "endclocking", K_END_DE, { 1, 0 } }, + { "endgroup", K_END_DE, { 1, 0 } }, + { "endinterface", K_END_DE, { 1, 0 } }, + { "endpackage", K_END_DE, { 1, 0 } }, + { "endprogram", K_END_DE, { 1, 0 } }, + { "endproperty", K_END_DE, { 1, 0 } }, + { "endsequence", K_END_DE, { 1, 0 } }, + { "enum", K_ENUM, { 1, 0 } }, + { "extern", K_PROTOTYPE, { 1, 0 } }, + { "import", K_IMPORT, { 1, 0 } }, + { "int", K_REGISTER, { 1, 0 } }, + { "interconnect", K_NET, { 1, 0 } }, + { "interface", K_INTERFACE, { 1, 0 } }, + { "join_any", K_END, { 1, 0 } }, + { "join_none", K_END, { 1, 0 } }, + { "logic", K_REGISTER, { 1, 0 } }, + { "longint", K_REGISTER, { 1, 0 } }, + { "modport", K_MODPORT, { 1, 0 } }, + { "package", K_PACKAGE, { 1, 0 } }, + { "program", K_PROGRAM, { 1, 0 } }, + { "property", K_PROPERTY, { 1, 0 } }, + { "pure", K_PROTOTYPE, { 1, 0 } }, + { "ref", K_PORT, { 1, 0 } }, + { "restrict", K_ASSERTION, { 1, 0 } }, + { "sequence", K_SEQUENCE, { 1, 0 } }, + { "shortint", K_REGISTER, { 1, 0 } }, + { "shortreal", K_REGISTER, { 1, 0 } }, + { "string", K_REGISTER, { 1, 0 } }, + { "struct", K_STRUCT, { 1, 0 } }, + { "type", K_REGISTER, { 1, 0 } }, + { "typedef", K_TYPEDEF, { 1, 0 } }, + { "union", K_STRUCT, { 1, 0 } }, + { "var", K_REGISTER, { 1, 0 } }, + { "void", K_REGISTER, { 1, 0 } }, + { "with", K_WITH, { 1, 0 } }, + { "nettype", K_NETTYPE, { 1, 0 } }, +// { "virtual", K_PROTOTYPE, { 1, 0 } }, // do not add for now +}; + +static tokenInfo *currentContext = NULL; +static ptrArray *tagContents; +static fieldDefinition *fieldTable = NULL; + +// IEEE Std 1364-2005 LRM, Appendix B "List of Keywords" +const static struct keywordGroup verilogKeywords = { + .value = K_IGNORE, + .addingUnlessExisting = true, + .keywords = { + "always", "and", "assign", "automatic", "begin", "buf", "bufif0", + "bufif1", "case", "casex", "casez", "cell", "cmos", "config", + "deassign", "default", "defparam", "design", "disable", "edge", + "else", "end", "endcase", "endconfig", "endfunction", "endgenerate", + "endmodule", "endprimitive", "endspecify", "endtable", "endtask", + "event", "for", "force", "forever", "fork", "function", "generate", + "genvar", "highz0", "highz1", "if", "ifnone", "incdir", "include", + "initial", "inout", "input", "instance", "integer", "join", "large", + "liblist", "library", "localparam", "macromodule", "medium", "module", + "nand", "negedge", "nmos", "nor", "noshowcancelled", "not", "notif0", + "notif1", "or", "output", "parameter", "pmos", "posedge", "primitive", + "pull0", "pull1", "pulldown", "pullup", "pulsestyle_onevent", + "pulsestyle_ondetect", "rcmos", "real", "realtime", "reg", "release", + "repeat", "rnmos", "rpmos", "rtran", "rtranif0", "rtranif1", + "scalared", "showcancelled", "signed", "small", "specify", + "specparam", "strong0", "strong1", "supply0", "supply1", "table", + "task", "time", "tran", "tranif0", "tranif1", "tri", "tri0", "tri1", + "triand", "trior", "trireg", "unsigned1", "use", "uwire", "vectored", + "wait", "wand", "weak0", "weak1", "while", "wire", "wor", "xnor", "xor", + NULL + }, +}; +// IEEE Std 1800-2017 LRM, Annex B "Keywords" +const static struct keywordGroup systemVerilogKeywords = { + .value = K_IGNORE, + .addingUnlessExisting = true, + .keywords = { + "accept_on", "alias", "always", "always_comb", "always_ff", + "always_latch", "and", "assert", "assign", "assume", "automatic", + "before", "begin", "bind", "bins", "binsof", "bit", "break", "buf", + "bufif0", "bufif1", "byte", "case", "casex", "casez", "cell", + "chandle", "checker", "class", "clocking", "cmos", "config", "const", + "constraint", "context", "continue", "cover", "covergroup", + "coverpoint", "cross", "deassign", "default", "defparam", "design", + "disable", "dist", "do", "edge", "else", "end", "endcase", + "endchecker", "endclass", "endclocking", "endconfig", "endfunction", + "endgenerate", "endgroup", "endinterface", "endmodule", "endpackage", + "endprimitive", "endprogram", "endproperty", "endspecify", + "endsequence", "endtable", "endtask", "enum", "event", "eventually", + "expect", "export", "extends", "extern", "final", "first_match", + "for", "force", "foreach", "forever", "fork", "forkjoin", "function", + "generate", "genvar", "global", "highz0", "highz1", "if", "iff", + "ifnone", "ignore_bins", "illegal_bins", "implements", "implies", + "import", "incdir", "include", "initial", "inout", "input", "inside", + "instance", "int", "integer", "interconnect", "interface", + "intersect", "join", "join_any", "join_none", "large", "let", + "liblist", "library", "local", "localparam", "logic", "longint", + "macromodule", "matches", "medium", "modport", "module", "nand", + "negedge", "nettype", "new", "nexttime", "nmos", "nor", + "noshowcancelled", "not", "notif0", "notif1", "null", "or", "output", + "package", "packed", "parameter", "pmos", "posedge", "primitive", + "priority", "program", "property", "protected", "pull0", "pull1", + "pulldown", "pullup", "pulsestyle_ondetect", "pulsestyle_onevent", + "pure", "rand", "randc", "randcase", "randsequence", "rcmos", "real", + "realtime", "ref", "reg", "reject_on", "release", "repeat", + "restrict", "return", "rnmos", "rpmos", "rtran", "rtranif0", + "rtranif1", "s_always", "s_eventually", "s_nexttime", "s_until", + "s_until_with", "scalared", "sequence", "shortint", "shortreal", + "showcancelled", "signed", "small", "soft", "solve", "specify", + "specparam", "static", "string", "strong", "strong0", "strong1", + "struct", "super", "supply0", "supply1", "sync_accept_on", + "sync_reject_on", "table", "tagged", "task", "this", "throughout", + "time", "timeprecision", "timeunit", "tran", "tranif0", "tranif1", + "tri", "tri0", "tri1", "triand", "trior", "trireg", "type", "typedef", + "union", "unique", "unique0", "unsigned", "until", "until_with", + "untyped", "use", "uwire", "var", "vectored", "virtual", "void", + "wait", "wait_order", "wand", "weak", "weak0", "weak1", "while", + "wildcard", "wire", "with", "within", "wor", "xnor", "xor", + NULL + }, +}; + +// IEEE Std 1364-2005 LRM, "19. Compiler directives" +const static struct keywordGroup verilogDirectives = { + .value = K_DIRECTIVE, + .addingUnlessExisting = true, + .keywords = { + "`begin_keywords", "`celldefine", "`default_nettype", "`define", + "`else", "`elsif", "`end_keywords", "`endcelldefine", "`endif", + "`ifdef", "`ifndef", "`include", "`line", "`nounconnected_drive", + "`pragma", "`resetall", "`timescale", "`unconnected_drive", "`undef", + NULL + }, +}; + +// IEEE Std 1800-2017 LRM, "22. Compiler directives" +const static struct keywordGroup systemVerilogDirectives = { + .value = K_DIRECTIVE, + .addingUnlessExisting = true, + .keywords = { + "`__LINE__", "`begin_keywords", "`celldefine", "`default_nettype", + "`define", "`else", "`elsif", "`end_keywords", "`endcelldefine", + "`endif", "`ifdef", "`ifndef", "`include", "`line", + "`nounconnected_drive", "`pragma", "`resetall", "`timescale", + "`unconnected_drive", "`undef", "`undefineall", + NULL + }, +}; + +// .enabled field cannot be shared by two languages +static fieldDefinition VerilogFields[] = { + { .name = "parameter", + .description = "parameter whose value can be overridden.", + .enabled = false, + .dataType = FIELDTYPE_BOOL }, +}; + +static fieldDefinition SystemVerilogFields[] = { + { .name = "parameter", + .description = "parameter whose value can be overridden.", + .enabled = false, + .dataType = FIELDTYPE_BOOL }, +}; + +/* + * PROTOTYPE DEFINITIONS + */ + +static bool isIdentifier (tokenInfo* token); +static int processDefine (tokenInfo *const token, int c); +static int processType (tokenInfo* token, int c, verilogKind* kind, bool* with); +static int pushEnumNames (tokenInfo* token, int c); +static int pushMembers (tokenInfo* token, int c); +static int readWordToken (tokenInfo *const token, int c); +static int readWordTokenNoSkip (tokenInfo *const token, int c); +static int skipBlockName (tokenInfo *const token, int c); +static int skipClockEvent (tokenInfo* token, int c); +static int skipDelay (tokenInfo* token, int c); +static int tagIdentifierList (tokenInfo *const token, int c, verilogKind kind, bool mayPortDecl); +static int tagNameList (tokenInfo* token, int c, verilogKind kind); + +/* + * FUNCTION DEFINITIONS + */ + +static short isContainer (verilogKind kind) +{ + switch (kind) + { + case K_MODULE: + case K_TASK: + case K_FUNCTION: + case K_BLOCK: + case K_CHECKER: + case K_CLASS: + case K_CLOCKING: + case K_COVERGROUP: + case K_IFCLASS: + case K_INTERFACE: + case K_PACKAGE: + case K_PROGRAM: + case K_PROPERTY: + case K_SEQUENCE: + case K_TYPEDEF: + case K_NETTYPE: + case K_ENUM: + case K_STRUCT: + return true; + default: + return false; + } +} + +static short isTempContext (tokenInfo const* token) +{ + switch (token->kind) + { + case K_TYPEDEF: + case K_NETTYPE: + case K_ENUM: + case K_STRUCT: + return true; + default: + return false; + } +} + +static void clearToken (tokenInfo *token) +{ + token->kind = K_UNDEFINED; // to be set by updateKind() + vStringClear (token->name); + token->lineNumber = getInputLineNumber (); + token->filePosition = getInputFilePosition (); + token->scope = NULL; + token->nestLevel = 0; + token->lastKind = K_UNDEFINED; + vStringClear (token->blockName); + vStringClear (token->inheritance); + token->prototype = false; + token->classScope = false; + token->parameter = false; + token->hasParamList = false; +} + +static tokenInfo *newToken (void) +{ + tokenInfo *const token = xMalloc (1, tokenInfo); + token->name = vStringNew (); + token->blockName = vStringNew (); + token->inheritance = vStringNew (); + clearToken (token); + return token; +} + +static tokenInfo *dupToken (tokenInfo *token) +{ + tokenInfo *dup = newToken (); + tokenInfo tmp = *dup; // save vStrings, name, blockName, and inheritance + *dup = *token; + // revert vStrings allocated for dup + dup->name = tmp.name; + dup->blockName = tmp.blockName; + dup->inheritance = tmp.inheritance; + // copy contents of vStrings + vStringCopy (dup->name, token->name); + vStringCopy (dup->blockName, token->blockName); + vStringCopy (dup->inheritance, token->inheritance); + return dup; +} + +static void deleteToken (tokenInfo * const token) +{ + if (token != NULL) + { + vStringDelete (token->name); + vStringDelete (token->blockName); + vStringDelete (token->inheritance); + eFree (token); + } +} + +static tokenInfo *pushToken (tokenInfo * const token, tokenInfo * const tokenPush) +{ + tokenPush->scope = token; + return tokenPush; +} + +static tokenInfo *popToken (tokenInfo * const token) +{ + tokenInfo *localToken; + if (token != NULL) + { + localToken = token->scope; + deleteToken (token); + return localToken; + } + return NULL; +} + +static void pruneTokens (tokenInfo * token) +{ + while ((token = popToken (token))) + ; +} + +static void swapToken (tokenInfo *t0, tokenInfo *t1) +{ + tokenInfo tmp = *t0; + *t0 = *t1; + *t1 = tmp; +} + +static const char *getNameForKind (const verilogKind kind) +{ + if (isInputLanguage (Lang_systemverilog)) + return (SystemVerilogKinds[kind]).name; + else /* isInputLanguage (Lang_verilog) */ + return (VerilogKinds[kind]).name; +} + +static char kindEnabled (const verilogKind kind) +{ + if (isInputLanguage (Lang_systemverilog)) + return SystemVerilogKinds[kind].enabled; + else /* isInputLanguage (Lang_verilog) */ + return VerilogKinds[kind].enabled; +} + +static void buildKeywordHash (const langType language, unsigned int idx) +{ + size_t i; + const size_t count = ARRAY_SIZE (KeywordTable); + for (i = 0 ; i < count ; ++i) + { + const keywordAssoc *p = &KeywordTable [i]; + if (p->isValid [idx]) + addKeyword (p->keyword, language, (int) p->kind); + } +} + +static void initializeVerilog (const langType language) +{ + Lang_verilog = language; + buildKeywordHash (language, IDX_VERILOG); + addKeywordGroup (&verilogKeywords, language); + addKeywordGroup (&verilogDirectives, language); + if (tagContents == NULL) + tagContents = ptrArrayNew ((ptrArrayDeleteFunc)deleteToken); + +} + +static void initializeSystemVerilog (const langType language) +{ + Lang_systemverilog = language; + buildKeywordHash (language, IDX_SYSTEMVERILOG); + addKeywordGroup (&systemVerilogKeywords, language); + addKeywordGroup (&systemVerilogDirectives, language); + if (tagContents == NULL) + tagContents = ptrArrayNew ((ptrArrayDeleteFunc)deleteToken); +} + +static void vUngetc (int c) +{ + Assert (Ungetc == '\0'); + Ungetc = c; +} + +/* Mostly copied from cppSkipOverCComment() in cpreprocessor.c. + * + * cppSkipOverCComment() uses the internal ungetc buffer of + * CPreProcessor. On the other hand, the Verilog parser uses + * getcFromInputFile() directly. getcFromInputFile() uses just + * another internal ungetc buffer. Using them mixed way will + * cause a trouble. */ +static int verilogSkipOverCComment (void) +{ + int c = getcFromInputFile (); + + while (c != EOF) + { + if (c != '*') + c = getcFromInputFile (); + else + { + const int next = getcFromInputFile (); + + if (next != '/') + c = next; + else + { + c = SPACE; /* replace comment with space */ + break; + } + } + } + return c; +} + +static int _vGetc (bool inSkipPastMatch) +{ + int c; + if (Ungetc == '\0') + c = getcFromInputFile (); + else + { + c = Ungetc; + Ungetc = '\0'; + } + if (c == '/') + { + int c2 = getcFromInputFile (); + if (c2 == EOF) + return EOF; + else if (c2 == '/') /* strip comment until end-of-line */ + { + do + c = getcFromInputFile (); + while (c != '\n' && c != EOF); + } + else if (c2 == '*') /* strip block comment */ + c = verilogSkipOverCComment (); + else + ungetcToInputFile (c2); + } + // replace a string with "@" only in skipPastMatch() + // because the string may contain parens, etc. + else if (inSkipPastMatch && c == '"') + { + int c2; + do + c2 = getcFromInputFile (); + while (c2 != '"' && c2 != EOF); + c = '@'; + } + return c; +} + +static int vGetc (void) +{ + return _vGetc (false); +} + +// Is the first charactor in an identifier? [a-zA-Z_`] +static bool isWordToken (const int c) +{ + return (isalpha (c) || c == '_' || c == '`'); +} + +// Is a charactor in an identifier? [a-zA-Z0-9_`$] +static bool isIdentifierCharacter (const int c) +{ + return (isalnum (c) || c == '_' || c == '`' || c == '$'); +} + +static int skipWhite (int c) +{ + while (isspace (c)) + c = vGetc (); + return c; +} + +static int skipPastMatch (const char *const pair) +{ + const int begin = pair [0], end = pair [1]; + int matchLevel = 1; + int c; + do + { + c = _vGetc (true); + if (c == begin) + ++matchLevel; + else if (c == end) + --matchLevel; + } + while (matchLevel > 0 && c != EOF); + return skipWhite (vGetc ()); +} + +static int skipDimension (int c) +{ + while (c == '[' && c != EOF) + c = skipPastMatch ("[]"); + return c; +} + +static int skipToSemiColon (int c) +{ + while (c != ';' && c != EOF) + c = vGetc (); + return c; // ';' or EOF +} + +static int skipString (int c) +{ + if (c == '"') + { + do + c = vGetc (); + while (c != '"' && c != EOF); + } + c = skipWhite (vGetc ()); + return c; +} + +static int skipExpression (int c) +{ + while (c != ',' && c != ';' && c != ')' && c != '}' && c != ']' && c != EOF) + { + if (c == '(') + c = skipPastMatch ("()"); + else if (c == '{') + c = skipPastMatch ("{}"); + else if (c == '[') + c = skipPastMatch ("[]"); + else if (c == '"') + c = skipString (c); + else + c = skipWhite (vGetc ()); + } + return c; +} + +// Skip to newline. The newline preceded by a backslash ( \ ) is ignored. +// Should be used after readWordTokenNoSkip() for compiler directives +static int skipToNewLine (int c) +{ + bool escape = false; + for ( ; (c != '\n' || escape) && c != EOF; c = vGetc ()) + escape = (c == '\\'); + + return c; // '\n' or EOF +} + +static int skipMacro (int c, tokenInfo *token) +{ + tokenInfo *localToken = newToken (); // don't update token outside + while (c == '`') // to support back-to-back compiler directives + { + c = readWordTokenNoSkip (localToken, c); + /* Skip compiler directive other than `define */ + if (localToken->kind == K_DIRECTIVE) + { + c = skipToNewLine (c); + c = skipWhite (c); + } + /* Skip `define */ + else if (localToken->kind == K_DEFINE) + { + c = skipWhite (c); + c = processDefine (localToken, c); + } + /* return macro expansion */ + else + { + swapToken (token, localToken); + c = skipWhite (c); + if (c == '(') + c = skipPastMatch ("()"); + break; + } + } + deleteToken (localToken); + return c; +} + +static void _updateKind (tokenInfo *const token) +{ + verilogKind kind = (verilogKind) lookupKeyword (vStringValue (token->name), getInputLanguage () ); + token->kind = ((kind == K_UNDEFINED) && isIdentifier (token)) ? K_IDENTIFIER : kind; +} + +/* read an identifier, keyword, number, compiler directive, or macro identifier */ +static int _readWordToken (tokenInfo *const token, int c, bool skip) +{ + Assert (isWordToken (c)); + + clearToken (token); + do + { + vStringPut (token->name, c); + c = vGetc (); + } while (isIdentifierCharacter (c)); + _updateKind (token); + + if (skip) + return skipWhite (c); + else + return c; +} + +// read a word token starting with "c". +// returns the first charactor of the next token. +static int readWordToken (tokenInfo *const token, int c) +{ + return _readWordToken (token, c, true); +} + +// read a word token starting with "c". +// returns the next charactor of the token read. +// for compiler directives. Since they are line-based, skipWhite() cannot be used. +static int readWordTokenNoSkip (tokenInfo *const token, int c) +{ + return _readWordToken (token, c, false); +} + +/* check if an identifier: + * simple_identifier ::= [ a-zA-Z_ ] { [ a-zA-Z0-9_$ ] } */ +static bool isIdentifier (tokenInfo* token) +{ + if (token->kind == K_UNDEFINED) + { + for (int i = 0; i < vStringLength (token->name); i++) + { + int c = vStringChar (token->name, i); + if (i == 0) + { + if (c == '`' || !isWordToken (c)) + return false; + } + else + { + if (!isIdentifierCharacter (c)) + return false; + } + } + return true; + } + else + return false; +} + +static void createContext (verilogKind kind, vString* const name) +{ + tokenInfo *const scope = newToken (); + vStringCopy (scope->name, name); + scope->kind = kind; + + if (scope) + { + vString *contextName = vStringNew (); + + /* Determine full context name */ + if (currentContext->kind != K_UNDEFINED) + { + vStringCopy (contextName, currentContext->name); + vStringPut (contextName, '.'); + } + vStringCat (contextName, scope->name); + /* Create context */ + currentContext = pushToken (currentContext, scope); + vStringCopy (currentContext->name, contextName); + vStringDelete (contextName); + verbose ("Created new context %s (kind %d)\n", vStringValue (currentContext->name), currentContext->kind); + } +} + +static void dropContext () +{ + verbose ("Dropping context %s\n", vStringValue (currentContext->name)); + currentContext = popToken (currentContext); +} + +/* Drop context, but only if an end token is found */ +static int dropEndContext (tokenInfo *const token, int c) +{ + verbose ("current context %s; context kind %0d; nest level %0d\n", vStringValue (currentContext->name), currentContext->kind, currentContext->nestLevel); + if ((currentContext->kind == K_COVERGROUP && strcmp (vStringValue (token->name), "endgroup") == 0) + || (currentContext->kind == K_IFCLASS && strcmp (vStringValue (token->name), "endclass") == 0)) + { + dropContext (); + c = skipBlockName (token ,c); + } + else if (currentContext->kind != K_UNDEFINED) + { + vString *endTokenName = vStringNewInit ("end"); + vStringCatS (endTokenName, getNameForKind (currentContext->kind)); + if (strcmp (vStringValue (token->name), vStringValue (endTokenName)) == 0) + { + dropContext (); + c = skipBlockName (token ,c); + if (currentContext->classScope) + { + verbose ("Dropping local context %s\n", vStringValue (currentContext->name)); + currentContext = popToken (currentContext); + } + } + vStringDelete (endTokenName); + } + else + verbose ("Unexpected current context %s\n", vStringValue (currentContext->name)); + return c; +} + + +static void createTag (tokenInfo *const token, verilogKind kind) +{ + tagEntryInfo tag; + + if (kind == K_LOCALPARAM) + kind = K_CONSTANT; + else if (kind == K_PARAMETER) + { + kind = K_CONSTANT; + // See LRM 2017 6.20.1 Parameter declaration syntax + if (currentContext->kind != K_CLASS && currentContext->kind != K_PACKAGE && !currentContext->hasParamList) + token->parameter = true; + } + Assert (kind >= 0 && kind != K_UNDEFINED && kind != K_IDENTIFIER); + Assert (vStringLength (token->name) > 0); + + /* check if a container before kind is modified by prototype */ + /* BTW should we create a context for a prototype? */ + bool container = isContainer (kind); + + /* Determine if kind is prototype */ + if (currentContext->prototype) + kind = K_PROTOTYPE; + + /* Do nothing if tag kind is disabled */ + if (! kindEnabled (kind)) + { + verbose ("kind disabled\n"); + return; + } + + /* Create tag */ + initTagEntry (&tag, vStringValue (token->name), kind); + tag.lineNumber = token->lineNumber; + tag.filePosition = token->filePosition; + + verbose ("Adding tag %s (kind %d)", vStringValue (token->name), kind); + if (currentContext->kind != K_UNDEFINED) + { + verbose (" to context %s\n", vStringValue (currentContext->name)); + currentContext->lastKind = kind; + tag.extensionFields.scopeKindIndex = currentContext->kind; + tag.extensionFields.scopeName = vStringValue (currentContext->name); + } + verbose ("\n"); + if (vStringLength (token->inheritance) > 0) + { + tag.extensionFields.inheritance = vStringValue (token->inheritance); + verbose ("Class %s extends %s\n", vStringValue (token->name), tag.extensionFields.inheritance); + } + + if (token->parameter) + attachParserField (&tag, false, fieldTable [F_PARAMETER].ftype, ""); + + makeTagEntry (&tag); + + if (isXtagEnabled (XTAG_QUALIFIED_TAGS) && currentContext->kind != K_UNDEFINED) + { + vString *const scopedName = vStringNew (); + + vStringCopy (scopedName, currentContext->name); + vStringPut (scopedName, '.'); + vStringCat (scopedName, token->name); + tag.name = vStringValue (scopedName); + + markTagExtraBit (&tag, XTAG_QUALIFIED_TAGS); + makeTagEntry (&tag); + + vStringDelete (scopedName); + } + + /* Push token as context if it is a container */ + if (container) + { + createContext (kind, token->name); + + /* Put found contents in context */ + verbose ("Putting tagContents: %d element(s)\n", + ptrArrayCount (tagContents)); + for (unsigned int i = 0; i < ptrArrayCount (tagContents); i++) + { + tokenInfo *content = ptrArrayItem (tagContents, i); + createTag (content, content->kind); + } + + /* Drop temporary contexts */ + if (isTempContext (currentContext)) + dropContext (); + } + + /* Clear no longer required inheritance information */ + vStringClear (token->inheritance); +} + +static int skipBlockName (tokenInfo *const token, int c) +{ + if (c == ':') + { + c = skipWhite (vGetc ()); + if (isWordToken (c)) + c = readWordToken (token, c); + } + return c; +} + +// begin, fork +static int processBlock (tokenInfo *const token, int c) +{ + if (c == ':') // tag an optional block identifier + { + c = skipWhite (vGetc ()); + if (isWordToken (c)) + { + c = readWordToken (token, c); + verbose ("Found block: %s\n", vStringValue (token->name)); + createTag (token, K_BLOCK); + verbose ("Current context %s\n", vStringValue (currentContext->name)); + } + } + currentContext->nestLevel++; // increment after creating a context + return c; +} + +// end, join, join_any, join_none +static int processEnd (tokenInfo *const token, int c) +{ + if (currentContext->nestLevel > 0) // for sanity check + currentContext->nestLevel--; + if (currentContext->kind == K_BLOCK && currentContext->nestLevel == 0) + dropContext (); + + c = skipBlockName (token, c); + return c; +} + +static int processPortList (tokenInfo *token, int c, bool mayPortDecl) +{ + if (c == '(') + { + c = skipWhite (vGetc ()); // skip '(' + c = tagIdentifierList (token, c, K_PORT, mayPortDecl); + if (c == ')') // sanity check + c = skipWhite (vGetc ()); + else + verbose ("Unexpected input: %c\n", c); + } + return c; +} + +static int skipParameterAssignment (int c) +{ + if (c == '#') + { + c = skipWhite (vGetc ()); + if (c == '(') + c = skipPastMatch ("()"); + } + return c; +} + +// Functions are treated differently because they may also include the type of the return value. +// Tasks are treated in the same way, although not having a return value. +// +// function [ lifetime ] function_data_type_or_implicit [ interface_identifier . | class_scope ] function_identifier [ ( [ tf_port_list ] ) ] ; +// task [ lifetime ] task_body_declaration [ interface_identifier . | class_scope ] task_identifier [ ( [ tf_port_list ] ) ] ; +static int processFunction (tokenInfo *const token, int c) +{ + verilogKind kind = token->kind; // K_FUNCTION or K_TASK + + /* Search for function name + * Last identifier found before a '(' or a ';' is the function name */ + while (c != '(' && c != ';' && c != EOF) + { + if (isWordToken (c)) + c = readWordToken (token, c); + else + c = skipWhite (vGetc ()); + /* skip parameter assignment of a class type + * ex. function uvm_port_base #(IF) get_if(int index=0); */ + c = skipParameterAssignment (c); + + /* Identify class type prefixes and create respective context*/ + if (isInputLanguage (Lang_systemverilog) && c == ':') + { + c = vGetc (); + if (c == ':') + { + verbose ("Found function declaration with class type %s\n", vStringValue (token->name)); + createContext (K_CLASS, token->name); + currentContext->classScope = true; + } + else + vUngetc (c); + } + } + verbose ("Found function: %s\n", vStringValue (token->name)); + createTag (token, kind); + + /* Get port list from function */ + c = skipWhite (c); + c = processPortList (token, c, false); + return c; +} + +// ( enum | union ) [ enum_base_type ] { < enum_name_declaration > } { [ ... ] } +static int processEnum (tokenInfo *const token, int c) +{ + tokenInfo* enumToken = dupToken (token); // save enum token + + /* skip enum_base_type */ + while (isWordToken (c)) + c = readWordToken (token, c); + c = skipDimension (c); + + /* Search enum elements */ + c = pushEnumNames (token, c); + + /* Skip bus width definition */ + c = skipDimension (c); + + /* Following identifiers are tag names */ + verbose ("Find enum tags. Token %s kind %d\n", vStringValue (enumToken->name), enumToken->kind); + c = tagNameList (enumToken, c, enumToken->kind); + deleteToken (enumToken); + + // Clean up the tag content list at the end of the declaration to support multiple variables + // enum { ... } foo, bar; + ptrArrayClear (tagContents); + return c; +} + +// [ struct | union [ tagged ] ] [ packed [ signed | unsigned ] ] { struct_union_member { struct_union_member } } { [ ... ] } +static int processStruct (tokenInfo *const token, int c) +{ + verilogKind kind = token->kind; // K_STRUCT, K_TYPEDEF, or K_NETTYPE + + /* Skip packed, signed, and unsigned */ + while (isWordToken (c)) + c = readWordToken (token, c); + + /* create a list of members */ + c = pushMembers (token, c); + + /* Skip packed_dimension */ + c = skipDimension (c); + + /* Following identifiers are tag names */ + verbose ("Find struct|union tags. Token %s kind %d\n", vStringValue (token->name), token->kind); + c = tagNameList (token, c, kind); + ptrArrayClear (tagContents); + return c; +} + +// data_declaration ::= +// [ const ] [ var ] [ static | automatic ] data_type_or_implicit list_of_variable_decl_assignments ; +// | typedef data_type type_identifier { [ ... ] } ; +// | typedef interface_instance_identifier [ ... ] . type_identifier type_identifier ; // interface based typedef +// | typedef [ enum | struct | union | class | interface class ] type_identifier ; +// | import < package_import_item > ; +// | nettype data_type net_type_identifier [ with [ class_type :: | package_identifier :: | $unit :: ] tf_identifier ] ; +// | nettype [ class_type :: | package_identifier :: | $unit :: ] net_type_identifier net_type_identifier ; +static int processTypedef (tokenInfo *const token, int c) +{ + verilogKind kindSave = token->kind; // K_TYPEDEF or K_NETTYPE + verilogKind kind = K_UNDEFINED; + bool not_used; + if (isWordToken (c)) + { + c = readWordToken (token, c); + kind = token->kind; + } + // forward typedef (LRM 6.18) is tagged as prototype + // (I don't know why...) + switch (kind) + { + case K_CLASS: + case K_INTERFACE: + currentContext->prototype = true; + break; + case K_ENUM: + case K_STRUCT: + if (isWordToken (c)) + { + c = readWordToken (token, c); + if (token->kind == K_IDENTIFIER && c == ';') + currentContext->prototype = true; + } + break; + case K_IDENTIFIER: + // interface based typedef + c = skipDimension (c); + if (c == '.') + { + c = skipWhite (vGetc ()); + if (isWordToken (c)) + c = readWordToken (token, c); + } + if (c == ';') + currentContext->prototype = true; + break; + default: + ; // do nothing + } + c = processType (token, c, &kind, ¬_used); + + createTag (token, kindSave); + + ptrArrayClear (tagContents); + return c; +} + +static int processParameterList (tokenInfo *token, int c) +{ + bool parameter = true; // default "parameter" + + if (c != '#') + return c; + c = skipWhite (vGetc ()); + + if (c != '(') + return c; + c = skipWhite (vGetc ()); + + while (c != ')' && c != EOF) + { + if (isWordToken (c)) + { + c = readWordToken (token, c); + verbose ("Found parameter %s\n", vStringValue (token->name)); + if (token->kind == K_IDENTIFIER) + { + if (c == ',' || c == ')' || c == '=') // ignore user defined type + { + tokenInfo *param = dupToken (token); + param->kind = K_CONSTANT; + param->parameter = parameter; + ptrArrayAdd (tagContents, param); + if (c == '=') + c = skipExpression (vGetc ()); + else if (c == ',') + c = skipWhite (vGetc ()); + else // ')' + break; + } + } + else if (token->kind == K_PARAMETER) + parameter = true; + else if (token->kind == K_LOCALPARAM) + parameter = false; + } + else + c = skipWhite (vGetc ()); + // unpacked array is not allowed for a parameter + } + c = skipWhite (vGetc ()); // skip ')' + return c; +} + +// [ virtual ] class [ static | automatic ] class_identifier [ parameter_port_list ] +// [ extends class_type [ ( list_of_arguments ) ] ] [ implements < interface_class_type > ] ; +// interface class class_identifier [ parameter_port_list ] [ extends < interface_class_type > ] ; +static int processClass (tokenInfo *const token, int c, verilogKind kind) +{ + tokenInfo *classToken; + + /* Get identifiers */ + while (isWordToken (c)) + { + c = readWordToken (token, c); + // skip static or automatic + if (token->kind != K_IGNORE) + break; + } + + if (token->kind != K_IDENTIFIER) + { + verbose ("Unexpected input: class name is expected.\n"); + return c; + } + + /* save token */ + classToken = dupToken (token); + + /* Find class parameters list */ + c = processParameterList (token, c); + + /* Search for inheritance information */ + if (isWordToken (c)) + { + c = readWordToken (token, c); + if (strcmp (vStringValue (token->name), "extends") == 0) + { + if (isWordToken (c)) + c = readWordToken (token, c); + vStringCopy (classToken->inheritance, token->name); + verbose ("Inheritance %s\n", vStringValue (classToken->inheritance)); + } + } + // process implements: FIXME + + createTag (classToken, kind); + deleteToken (classToken); + ptrArrayClear (tagContents); + return c; +} + +// constraint_declaration ::= [ static ] constraint constraint_identifier '{' { constraint_block_item } '}' +// constraint_prototype ::= [ extern | pure ] [ static ] constraint constraint_identifier ; +static int processConstraint (tokenInfo *const token, int c) +{ + verilogKind kind; + if (isWordToken (c)) + c = readWordToken (token, c); + if (c == '{') + { + c = skipPastMatch ("{}"); + kind = K_CONSTRAINT; + } + else + kind = K_PROTOTYPE; + createTag (token, kind); + return c; +} + +static int processDefine (tokenInfo *const token, int c) +{ + /* Bug #961001: Verilog compiler directives are line-based. */ + if (isWordToken (c)) + { + c = readWordTokenNoSkip (token, c); + createTag (token, K_CONSTANT); + } + c = skipToNewLine (c); + c = skipWhite (c); + return c; +} + +// immediate_assertion_statement ::= +// ( assert | asume | cover ) [ #0 | final ] '(' expression ')' block +// concurrent_assertion_statement ::= +// ( assert | assume ) property ( property_spec ) action_block +// | expect ( property_spec ) action_block # ignore : processed as same as "if" +// | cover property ( property_spec ) statement_or_null +// | cover sequence ( [clocking_event ] [ disable iff ( expression_or_dist ) ] sequence_expr ) statement_or_null +// | restrict property ( property_spec ) ; +static int processAssertion (tokenInfo *const token, int c) +{ + if (vStringLength (currentContext->blockName) > 0) + { + vStringCopy (token->name, currentContext->blockName); + vStringClear (currentContext->blockName); // clear block name not to be reused + createTag (token, K_ASSERTION); + } + // skip final | property | sequence + if (isWordToken (c)) + c = readWordToken (token, c); + // skip #0 + c = skipDelay (token, c); + // skip ( ... ) + if (c == '(') + c = skipPastMatch ("()"); + return c; +} + +// non-ANSI type +// ( module | interface | program ) [ static | automatic ] identifier { package_import_declaration } [ parameter_port_list ] ( port { , port } ) ; +// ANSI type +// ( module | interface | program ) [ static | automatic ] identifier { package_import_declaration } [ parameter_port_list ] [ ( [ < { (* ... *) } ansi_port_declaration > ] ) ] ; +// +// interface class class_identifier [ parameter_port_list ] [ extends < interface_class_type > ] ; +static int processDesignElementL (tokenInfo *const token, int c) +{ + verilogKind kind = token->kind; + + while (isWordToken (c)) + { + c = readWordToken (token, c); + // interface class + if (token->kind == K_CLASS) + return processClass (token, c, K_IFCLASS); + // skip static or automatic + else if (token->kind != K_IGNORE) + break; + } + if (token->kind == K_IDENTIFIER) + createTag (token, kind); // identifier + + // skip package_import_declaration + while (isWordToken (c)) + { + c = readWordToken (token, c); + if (token->kind == K_IMPORT) + { + c = skipToSemiColon (c); + c = skipWhite (vGetc ()); // skip semicolon + } + else + { + verbose ("Unexpected input\n"); + return c; + } + } + if (c == '#') // parameter_port_list + { + c = processParameterList (token, c); + + /* Put found parameters in context */ + verbose ("Putting parameters: %d element(s)\n", + ptrArrayCount (tagContents)); + for (unsigned int i = 0; i < ptrArrayCount (tagContents); i++) + { + tokenInfo *content = ptrArrayItem (tagContents, i); + createTag (content, K_CONSTANT); + } + ptrArrayClear (tagContents); + // disable parameter property on parameter declaration statement + currentContext->hasParamList = true; + } + // Process ANSI/non-ANSI port list in main loop + c = processPortList (token, c, true); + return c; +} + +// ( checker | property | sequence ) identifier [ ( [ port_list ] ) ] ; +// covergroup identifier [ ( [ port_list ] ) ] [ coverage_event ] ; +// coverage_event ::= clocking_event | with function sample ( ... ) | @@( ... ) +// package identifier ; +// modport < identifier ( < ports_declaration > ) > ; +// [ default | global ] clocking [ identifier ] ( @ identifier | @ ( event_expression ) ) +static int processDesignElementS (tokenInfo *const token, int c) +{ + verilogKind kind = token->kind; + + if (isWordToken (c)) + c = readWordToken (token, c); + else + return c; + + createTag (token, kind); // identifier + + /* Get port list if required */ + if (c == '(') // port_list + { + if (kind == K_MODPORT) + c = skipPastMatch ("()"); // ignore port list + else + c = processPortList (token, c, false); + } + // skip clocking_event for clocking block or coverage_event for covergroup + // "with function sample ()" is processed in the main loop + if (c == '@') + c = skipClockEvent (token, c); + return c; +} + +static int skipDelay (tokenInfo* token, int c) +{ + if (c == '#') + { + c = skipWhite (vGetc ()); + if (c == '(') + c = skipPastMatch ("()"); + else if (c == '#') + // a dirty hack for "x ##delay1 y[*min:max];" + c = skipToSemiColon (vGetc ()); + else // time literals + { + while (isIdentifierCharacter (c) || c == '.') + c = vGetc (); + c = skipWhite (c); + } + } + return c; +} + +static int skipClockEvent (tokenInfo* token, int c) +{ + if (c == '@') + { + c = skipWhite (vGetc ()); + // for @@ ( ... ) : coverage_event + if (c == '@') + c = skipWhite (vGetc ()); + if (c == '(') + c = skipPastMatch ("()"); + else if (isWordToken (c)) + c = readWordToken (token, c); + } + return c; +} + +static int pushEnumNames (tokenInfo* token, int c) +{ + if (c == '{') + { + c = skipWhite (vGetc ()); + while (c != '}' && c != EOF) + { + if (!isWordToken (c)) + { + verbose ("Unexpected input: %c\n", c); + return c; + } + c = readWordToken (token, c); + + token->kind = K_CONSTANT; + ptrArrayAdd (tagContents, dupToken (token)); + verbose ("Pushed enum element \"%s\"\n", vStringValue (token->name)); + + /* Skip element ranges */ + /* TODO Implement element ranges */ + c = skipDimension (c); + + /* Skip value assignments */ + if (c == '=') + c = skipExpression (vGetc ()); + + /* Skip comma */ + if (c == ',') + c = skipWhite (vGetc ()); + } + c = skipWhite (vGetc ()); // skip '}' + } + return c; +} + +// create a list of struct/union members +static int pushMembers (tokenInfo* token, int c) +{ + if (c == '{') + { + c = skipWhite (vGetc ()); + while (c != '}' && c != EOF) + { + verilogKind kind = K_UNDEFINED; // set kind of context for processType() + bool not_used; + if (!isWordToken (c)) + { + verbose ("Unexpected input: %c\n", c); + return c; + } + c = readWordToken (token, c); + + c = processType (token, c, &kind, ¬_used); + while (true) + { + token->kind = K_MEMBER; + ptrArrayAdd (tagContents, dupToken (token)); + verbose ("Pushed struct/union member \"%s\"\n", vStringValue (token->name)); + + /* Skip unpacked dimensions */ + c = skipDimension (c); + + /* Skip value assignments */ + if (c == '=') + c = skipExpression (vGetc ()); + + if (c != ',' || c == EOF) + break; // should be ';' + + c = skipWhite (vGetc ()); // skip ',' + if (isWordToken (c)) + c = readWordToken (token, c); + else + { + verbose ("Unexpected input.\n"); + break; + } + } + /* Skip semicolon */ + if (c == ';') + c = skipWhite (vGetc ()); + /* End of enum elements list */ + } + c = skipWhite (vGetc ()); // skip '}' + } + return c; +} + +// input +// kind: kind of context +// output +// kind: kind of type +// token: identifier token (unless K_IDENTIFIER nor K_UNDEFINED) +static int processType (tokenInfo* token, int c, verilogKind* kind, bool* with) +{ + verilogKind actualKind = K_UNDEFINED; + tokenInfo *tokenSaved; + *with = false; + do + { + c = skipDimension (c); + c = skipDelay (token, c); // class parameter #(...) + if (c == '{') // skip enum, struct, or union member + { + if (*kind == K_ENUM) + c = pushEnumNames (token, c); + else if (*kind == K_STRUCT) + c = pushMembers (token, c); + else // for a nested structure + c = skipPastMatch ("{}"); + } + c = skipDimension (c); + c = skipMacro (c, token); + + // break on ',', ';', ')', '}', or other unexpected charactors + if (!isWordToken (c)) + break; + + tokenSaved = dupToken (token); + c = readWordToken (token, c); + // break on "with" + if (token->kind == K_WITH) + { + swapToken (token, tokenSaved); + deleteToken (tokenSaved); + *with = true; // inform to caller + break; + } + deleteToken (tokenSaved); + + // fix kind of user defined type + if (*kind == K_IDENTIFIER) + { + if (token->kind == K_NET) + actualKind = K_NET; + else if (token->kind == K_REGISTER) + actualKind = K_REGISTER; + else if (token->kind == K_PORT) + actualKind = K_PORT; + else if (token->kind == K_IDENTIFIER) + { // identifier of a user defined type + *kind = K_REGISTER; // FIXME: consider kind of the user defined type + break; + } + else + { + verbose ("Unexpected input\n"); // FIXME: x dist {}, with + break; + } + } + } while (c != '`' && c != EOF); // break on compiler directive + + // skip unpacked dimension (or packed dimension after type-words) + c = skipDimension (skipWhite (c)); + + if (*kind == K_UNDEFINED && *kind != K_PORT) + *kind = actualKind; + return c; +} + +// class_type ::= +// ps_class_identifier [ # ( ... ) ] { :: class_identifier [ # ( ... ) ] } +// "interface_identifier ." is also handled +static int skipClassType (tokenInfo* token, int c) +{ + while (c == '#' || c == ':' || c == '.') + { + if (c == '#') + { + c = skipWhite (vGetc ()); + // a dirty hack for "x ##delay1 y[*min:max];" + if (c == '#') + return skipToSemiColon (vGetc ()); + c = skipPastMatch ("()"); + } + else if (c == ':') + { + c = skipWhite (vGetc ()); + if (c != ':') + { + verbose ("Unexpected input.\n"); + vUngetc (c); + return ':'; + } + c = skipWhite (vGetc ()); + if (isWordToken (c)) + c = readWordToken (token, c); + } + else // c == '.' : interface_identifier . + { + c = skipWhite (vGetc ()); + if (isWordToken (c)) + c = readWordToken (token, c); + } + } + return c; +} + +// Tag a list of identifiers +// data_type :: = +// ... +// | virtual [ interface ] identifier [ # ( [ ... ] ) ] [ . identifier ] +// | [ class_type :: | identifier :: | $unit :: ] identifier { [ ... ] } +// | [ identifier :: | $unit :: ] identifier [ # ( ... ) ] { :: identifier [ # ( ... ) ] } +// | ... +// +// mayPortDecl: may be a ANSI port declaration. true for module, interface, or program. +static int tagIdentifierList (tokenInfo *const token, int c, verilogKind kind, bool mayPortDecl) +{ + bool first_port = true; + bool enableTag = true; + verilogKind localKind; + bool not_used; + + while (c != ')' && c != EOF) // skip empty port, "()" + { + // skip attribute_instance: (* ... *) + if (c == '(') + c = skipPastMatch ("()"); + + // skip port direction, "virtual", or "interface" + while (isWordToken (c)) + { + c = readWordToken (token, c); + if (token->kind == K_PORT || token->kind == K_IGNORE || token->kind == K_INTERFACE) + mayPortDecl = false; // now never be a non-ANSI port + else + break; + } + if (token->kind == K_IDENTIFIER) + c = skipClassType (token, c); + c = skipMacro (c, token); // `ifdef, `else, `endif, etc. (between identifiers) + + if (isWordToken (c)) + { + c = readWordToken (token, c); + if (token->kind == K_IDENTIFIER) + { + mayPortDecl = false; + c = skipClassType (token, c); + } + } + // aoid tagging enum and struct items + localKind = token->kind == K_ENUM || token->kind == K_STRUCT ? K_PORT : token->kind; + c = processType (token, c, &localKind, ¬_used); + + // LRM 23.2.2.3 Rules for determining port kind, data type, and direction + // If the direction, port kind, and data type are all omitted for + // the first port in the port list, ... non-ANSI style, ... + if (mayPortDecl && first_port) + { + first_port = false; + if (localKind == K_IDENTIFIER) + enableTag = false; // don't tag for non-ANSI port + } + if (enableTag && token->kind == K_IDENTIFIER) + createTag (token, kind); + + if (c == '=') + c = skipExpression (vGetc ()); + + c = skipMacro (c, token); // `ifdef, `else, `endif, etc. (before comma) + if (c != ',' || c == EOF) + break; + c = skipWhite (vGetc ()); // skip ',' + c = skipMacro (c, token); // `ifdef, `else, `endif, etc. (after comma) + } + return c; +} + +static int tagNameList (tokenInfo* token, int c, verilogKind kind) +{ + c = skipClassType (token, c); + if (c == ':' || c == ';') // ## (cycle delay) or unexpected input + return c; + + // skip drive|charge strength or type_reference, dimensions, and delay for net + if (c == '(') + c = skipPastMatch ("()"); + c = skipDimension (c); + if (c == '.') + return c; // foo[...].bar = ..; + c = skipDelay (token, c); + + while (c != EOF) + { + bool with = false; + c = processType (token, c, &kind, &with); // update token and kind + + if (c == '=' || c == ',' || c == ';' || c == ')' || c == '`' || with) + { + // ignore an empty token or procedual assignment: foo = bar; + if (kind != K_UNDEFINED && kind != K_IDENTIFIER && token->kind != K_UNDEFINED) + createTag (token, kind); + if (c == '=') + c = skipExpression (c); + } + else if (c == '(' || c == '[') // should be instance + { + c = skipDimension (c); // name_of_instance {unpacked_dimension} + c = skipPastMatch ("()"); // list_of_port_connections + + // if without the next "if" clause, get a instance named: `add_t from the following example + // var `add_t(foo) = '0; + if (c == ';' || c == ',') + { + verbose ("find instance: %s with kind %s\n", vStringValue (token->name), getNameForKind (K_INSTANCE)); + createTag (token, K_INSTANCE); + } + } + c = skipMacro (c, token); // `ifdef, `else, `endif, etc. (before comma) + if (c != ',' || c == EOF) + break; + c = skipWhite (vGetc ()); // skip ',' + c = skipMacro (c, token); // `ifdef, `else, `endif, etc. (after comma) + } + return c; +} + +static int findTag (tokenInfo *const token, int c) +{ + verbose ("Checking token %s of kind %d\n", vStringValue (token->name), token->kind); + + switch (token->kind) + { + case K_CONSTANT: + case K_EVENT: + case K_LOCALPARAM: + case K_NET: + case K_PARAMETER: + case K_PORT: + case K_REGISTER: + if (token->kind == K_PORT && currentContext->kind == K_CLOCKING) + c = skipToSemiColon (c); // clocking items are not port definitions + else + c = tagNameList (token, c, token->kind); + break; + case K_IDENTIFIER: + { + if (c == '[') // for a case label foo[x]: + c = skipPastMatch ("[]"); + + if (c == ':') + ; /* label */ + else if (c == ',' || c == '{') // "foo, ..." or "coverpoint foo { ... }" + c = skipWhite (vGetc ()); + else if (c == '(') // task, function, or method call + c = skipPastMatch ("()"); + else if (c == '=') // assignment + c = skipExpression (skipWhite (vGetc ())); + else + c = tagNameList (token, c, token->kind); /* user defined type */ + } + break; + case K_CLASS: + c = processClass (token, c, K_CLASS); + break; + case K_TYPEDEF: + case K_NETTYPE: + c = processTypedef (token, c); + break; + case K_ENUM: + c = processEnum (token, c); + break; + case K_STRUCT: + c = processStruct (token, c); + break; + case K_PROTOTYPE: + case K_IMPORT: + case K_WITH: + currentContext->prototype = true; + break; + + case K_INTERFACE: + case K_MODULE: + case K_PROGRAM: + c = processDesignElementL (token, c); + break; + case K_CHECKER: + case K_CLOCKING: + case K_COVERGROUP: + case K_MODPORT: + case K_PACKAGE: + case K_PROPERTY: + case K_SEQUENCE: + c = processDesignElementS (token, c); + break; + case K_END_DE: + c = dropEndContext (token, c); + break; + case K_BLOCK: + c = processBlock (token, c); + break; + case K_END: + c = processEnd (token, c); + break; + case K_FUNCTION: + case K_TASK: + c = processFunction (token, c); + break; + case K_ASSERTION: + c = processAssertion (token, c); + break; + case K_CONSTRAINT: + c = processConstraint (token, c); + break; + + case K_DEFINE: + c = processDefine (token, c); + break; + + case K_IGNORE: + break; + default: + verbose ("Unexpected kind->token %d\n", token->kind); + } + return c; +} + +static void findVerilogTags (void) +{ + tokenInfo *const token = newToken (); + int c = skipWhite (vGetc ()); + currentContext = newToken (); + fieldTable = isInputLanguage (Lang_verilog) ? VerilogFields : SystemVerilogFields; + ptrArrayClear (tagContents); + + while (c != EOF) + { + switch (c) + { + case ':': + /* Store current block name whenever a : is found + * This is used later by any tag type that requires this information */ + vStringCopy (currentContext->blockName, token->name); + c = skipWhite (vGetc ()); + break; + case ';': + /* Drop context on prototypes because they don't have an + * end statement */ + if (currentContext->scope && currentContext->scope->prototype) + dropContext (); + + /* Prototypes end at the end of statement */ + currentContext->prototype = false; + c = skipWhite (vGetc ()); + break; + case '(': // ignore locally declared variables in a for-loop (LRM 12.7.1) + c = skipPastMatch ("()");; + break; + case '{': + c = skipPastMatch ("{}");; + break; + case '#': + c = skipDelay (token, c); + break; + case '@': + c = skipClockEvent (token, c); + break; + case '"': + c = skipString (c); + break; + default : + if (isWordToken (c)) + { + c = readWordTokenNoSkip (token, c); + if (token->kind == K_DIRECTIVE) + { + // Skip compiler directives which are line-based. + c = skipToNewLine (c); + c = skipWhite (c); + } + else if (token->kind != K_UNDEFINED) + c = findTag (token, skipWhite (c)); + } + else + c = skipWhite (vGetc ()); + } + } + deleteToken (token); + pruneTokens (currentContext); + currentContext = NULL; +} + +extern parserDefinition* VerilogParser (void) +{ + static const char *const extensions [] = { "v", NULL }; + parserDefinition* def = parserNew ("Verilog"); + def->kindTable = VerilogKinds; + def->kindCount = ARRAY_SIZE (VerilogKinds); + def->fieldTable = VerilogFields; + def->fieldCount = ARRAY_SIZE (VerilogFields); + def->extensions = extensions; + def->parser = findVerilogTags; + def->initialize = initializeVerilog; + return def; +} + +extern parserDefinition* SystemVerilogParser (void) +{ + static const char *const extensions [] = { "sv", "svh", "svi", NULL }; + parserDefinition* def = parserNew ("SystemVerilog"); + def->kindTable = SystemVerilogKinds; + def->kindCount = ARRAY_SIZE (SystemVerilogKinds); + def->fieldTable = SystemVerilogFields; + def->fieldCount = ARRAY_SIZE (SystemVerilogFields); + def->extensions = extensions; + def->parser = findVerilogTags; + def->initialize = initializeSystemVerilog; + return def; +} diff --git a/src/tagmanager/tm_parser.c b/src/tagmanager/tm_parser.c index 2a317ceab3..0c201a0d46 100644 --- a/src/tagmanager/tm_parser.c +++ b/src/tagmanager/tm_parser.c @@ -253,7 +253,10 @@ static TMParserMapEntry map_TCL[] = { }; static TMParserMapEntry map_SH[] = { + {'a', tm_tag_undef_t}, {'f', tm_tag_function_t}, + {'s', tm_tag_undef_t}, + {'h', tm_tag_undef_t}, }; static TMParserMapEntry map_D[] = { @@ -476,12 +479,21 @@ static TMParserMapEntry map_VERILOG[] = { {'p', tm_tag_variable_t}, {'r', tm_tag_variable_t}, {'t', tm_tag_function_t}, + {'b', tm_tag_undef_t}, + {'i', tm_tag_undef_t}, }; static TMParserMapEntry map_R[] = { {'f', tm_tag_function_t}, {'l', tm_tag_other_t}, {'s', tm_tag_other_t}, + {'g', tm_tag_undef_t}, + {'v', tm_tag_undef_t}, + {'z', tm_tag_undef_t}, + {'c', tm_tag_undef_t}, + {'L', tm_tag_undef_t}, + {'d', tm_tag_undef_t}, + {'n', tm_tag_undef_t}, }; static TMParserMapEntry map_COBOL[] = { @@ -1102,11 +1114,13 @@ gboolean tm_parser_has_full_scope(TMParserType lang) case TM_PARSER_PHP: case TM_PARSER_POWERSHELL: case TM_PARSER_PYTHON: + case TM_PARSER_R: case TM_PARSER_RUBY: case TM_PARSER_RUST: case TM_PARSER_SQL: case TM_PARSER_TXT2TAGS: case TM_PARSER_VALA: + case TM_PARSER_VERILOG: case TM_PARSER_ZEPHIR: return TRUE; diff --git a/tests/ctags/bug1111214-j-chan.v.tags b/tests/ctags/bug1111214-j-chan.v.tags index 322eecf04b..09c5c5edcc 100644 --- a/tests/ctags/bug1111214-j-chan.v.tags +++ b/tests/ctags/bug1111214-j-chan.v.tags @@ -1,4 +1,4 @@ # format=tagmanager -insigÌ16384Ö0 -outsigÌ16384Ö0 +insigÌ16384ÎtopÖ0 +outsigÌ16384ÎtopÖ0 topÌ1Ö0 diff --git a/tests/ctags/traffic_signal.v.tags b/tests/ctags/traffic_signal.v.tags index 73d667a017..18a0b1e80e 100644 --- a/tests/ctags/traffic_signal.v.tags +++ b/tests/ctags/traffic_signal.v.tags @@ -1,14 +1,14 @@ # format=tagmanager -amberÌ16384Ö0 -amber_ticsÌ16384Ö0 -clockÌ16384Ö0 -colorÌ16384Ö0 -greenÌ16384Ö0 -green_ticsÌ16384Ö0 -lightÌ16Ö0 -offÌ16384Ö0 -onÌ16384Ö0 -redÌ16384Ö0 -red_ticsÌ16384Ö0 -ticsÌ16384Ö0 +amberÌ16384ÎtrafficÖ0 +amber_ticsÌ16384ÎtrafficÖ0 +clockÌ16384ÎtrafficÖ0 +colorÌ16384Îtraffic.lightÖ0 +greenÌ16384ÎtrafficÖ0 +green_ticsÌ16384ÎtrafficÖ0 +lightÌ16ÎtrafficÖ0 +offÌ16384ÎtrafficÖ0 +onÌ16384ÎtrafficÖ0 +redÌ16384ÎtrafficÖ0 +red_ticsÌ16384ÎtrafficÖ0 +ticsÌ16384Îtraffic.lightÖ0 trafficÌ1Ö0