From 328bc8bbede99e2b659733fbe3640089f1bc355b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ji=C5=99=C3=AD=20Techet?= Date: Mon, 8 Nov 2021 20:47:05 +0100 Subject: [PATCH 01/22] Remove the geany_ prefix from selected parsers and use the corresponding uctags version --- ctags/Makefile.am | 32 +- ctags/parsers/basic.c | 206 +++ ctags/parsers/{geany_diff.c => diff.c} | 93 +- ctags/parsers/geany_basic.c | 252 ---- ctags/parsers/geany_go.c | 831 ---------- ctags/parsers/geany_lua.c | 120 -- ctags/parsers/geany_nsis.c | 142 -- ctags/parsers/geany_perl.c | 380 ----- ctags/parsers/geany_ruby.c | 564 ------- ctags/parsers/go.c | 1416 ++++++++++++++++++ ctags/parsers/{geany_html.c => html.c} | 168 ++- ctags/parsers/{geany_jscript.c => jscript.c} | 462 ++++-- ctags/parsers/lua.c | 257 ++++ ctags/parsers/{geany_make.c => make.c} | 171 ++- ctags/parsers/make.h | 34 + ctags/parsers/nsis.c | 394 +++++ ctags/parsers/{geany_objc.c => objc.c} | 333 +++- ctags/parsers/{geany_pascal.c => pascal.c} | 109 +- ctags/parsers/perl.c | 738 +++++++++ ctags/parsers/perl.h | 44 + ctags/parsers/{geany_php.c => php.c} | 806 +++++++--- ctags/parsers/ruby.c | 1108 ++++++++++++++ ctags/parsers/{geany_rust.c => rust.c} | 95 +- ctags/parsers/{geany_sql.c => sql.c} | 1338 ++++++++++++----- 24 files changed, 6918 insertions(+), 3175 deletions(-) create mode 100644 ctags/parsers/basic.c rename ctags/parsers/{geany_diff.c => diff.c} (53%) delete mode 100644 ctags/parsers/geany_basic.c delete mode 100644 ctags/parsers/geany_go.c delete mode 100644 ctags/parsers/geany_lua.c delete mode 100644 ctags/parsers/geany_nsis.c delete mode 100644 ctags/parsers/geany_perl.c delete mode 100644 ctags/parsers/geany_ruby.c create mode 100644 ctags/parsers/go.c rename ctags/parsers/{geany_html.c => html.c} (72%) rename ctags/parsers/{geany_jscript.c => jscript.c} (84%) create mode 100644 ctags/parsers/lua.c rename ctags/parsers/{geany_make.c => make.c} (55%) create mode 100644 ctags/parsers/make.h create mode 100644 ctags/parsers/nsis.c rename ctags/parsers/{geany_objc.c => objc.c} (74%) rename ctags/parsers/{geany_pascal.c => pascal.c} (73%) create mode 100644 ctags/parsers/perl.c create mode 100644 ctags/parsers/perl.h rename ctags/parsers/{geany_php.c => php.c} (62%) create mode 100644 ctags/parsers/ruby.c rename ctags/parsers/{geany_rust.c => rust.c} (88%) rename ctags/parsers/{geany_sql.c => sql.c} (64%) diff --git a/ctags/Makefile.am b/ctags/Makefile.am index c0cd8eaeb8..0e22cbca85 100644 --- a/ctags/Makefile.am +++ b/ctags/Makefile.am @@ -15,43 +15,45 @@ parsers = \ parsers/geany_abc.c \ parsers/geany_asciidoc.c \ parsers/geany_asm.c \ - parsers/geany_basic.c \ + parsers/basic.c \ parsers/geany_bibtex.c \ parsers/geany_c.c \ parsers/geany_cobol.c \ parsers/geany_iniconf.c \ parsers/geany_css.c \ - parsers/geany_diff.c \ + parsers/diff.c \ parsers/geany_docbook.c \ parsers/geany_erlang.c \ parsers/geany_flex.c \ parsers/geany_fortran.c \ - parsers/geany_go.c \ + parsers/go.c \ parsers/geany_haskell.c \ parsers/geany_haxe.c \ - parsers/geany_html.c \ - parsers/geany_jscript.c \ + parsers/html.c \ + parsers/jscript.c \ parsers/geany_json.c \ parsers/geany_julia.c \ parsers/geany_lcpp.c \ parsers/geany_lcpp.h \ - parsers/geany_lua.c \ - parsers/geany_make.c \ + parsers/lua.c \ + parsers/make.c \ + parsers/make.h \ parsers/geany_markdown.c \ parsers/geany_matlab.c \ - parsers/geany_nsis.c \ - parsers/geany_objc.c \ - parsers/geany_pascal.c \ - parsers/geany_perl.c \ - parsers/geany_php.c \ + parsers/nsis.c \ + parsers/objc.c \ + parsers/pascal.c \ + parsers/perl.c \ + parsers/perl.h \ + parsers/php.c \ parsers/geany_powershell.c \ parsers/geany_python.c \ parsers/geany_r.c \ parsers/geany_rst.c \ - parsers/geany_ruby.c \ - parsers/geany_rust.c \ + parsers/ruby.c \ + parsers/rust.c \ parsers/geany_sh.c \ - parsers/geany_sql.c \ + parsers/sql.c \ parsers/geany_tcl.c \ parsers/geany_tex.c \ parsers/geany_txt2tags.c \ diff --git a/ctags/parsers/basic.c b/ctags/parsers/basic.c new file mode 100644 index 0000000000..4a6cffa687 --- /dev/null +++ b/ctags/parsers/basic.c @@ -0,0 +1,206 @@ +/* + * Copyright (c) 2000-2006, Darren Hiebert, Elias Pschernig + * + * This source code is released for free distribution under the terms of the + * GNU General Public License version 2 or (at your option) any later version. + * + * This module contains functions for generating tags for BlitzBasic + * (BlitzMax), PureBasic and FreeBasic language files. For now, this is kept + * quite simple - but feel free to ask for more things added any time - + * patches are of course most welcome. + */ + +/* + * INCLUDE FILES + */ +#include "general.h" /* must always come first */ + +#include + +#include "parse.h" +#include "read.h" +#include "routines.h" +#include "vstring.h" + +/* + * DATA DEFINITIONS + */ +typedef enum { + K_CONST, + K_FUNCTION, + K_LABEL, + K_TYPE, + K_VARIABLE, + K_ENUM +} BasicKind; + +typedef struct { + char const *token; + BasicKind kind; + int skip; +} KeyWord; + +static kindDefinition BasicKinds[] = { + {true, 'c', "constant", "constants"}, + {true, 'f', "function", "functions"}, + {true, 'l', "label", "labels"}, + {true, 't', "type", "types"}, + {true, 'v', "variable", "variables"}, + {true, 'g', "enum", "enumerations"} +}; + +static KeyWord blitzbasic_keywords[] = { + {"const", K_CONST, 0}, + {"global", K_VARIABLE, 0}, + {"dim", K_VARIABLE, 0}, + {"function", K_FUNCTION, 0}, + {"type", K_TYPE, 0}, + {NULL, 0, 0} +}; + +static KeyWord purebasic_keywords[] = { + {"newlist", K_VARIABLE, 0}, + {"global", K_VARIABLE, 0}, + {"dim", K_VARIABLE, 0}, + {"procedure", K_FUNCTION, 0}, + {"interface", K_TYPE, 0}, + {"structure", K_TYPE, 0}, + {NULL, 0, 0} +}; + +static KeyWord freebasic_keywords[] = { + {"const", K_CONST, 0}, + {"dim as", K_VARIABLE, 1}, + {"dim", K_VARIABLE, 0}, + {"common", K_VARIABLE, 0}, + {"function", K_FUNCTION, 0}, + {"sub", K_FUNCTION, 0}, + {"private sub", K_FUNCTION, 0}, + {"public sub", K_FUNCTION, 0}, + {"private function", K_FUNCTION, 0}, + {"public function", K_FUNCTION, 0}, + {"type", K_TYPE, 0}, + {"enum", K_ENUM, 0}, + {NULL, 0, 0} +}; + +/* + * FUNCTION DEFINITIONS + */ + +/* Match the name of a tag (function, variable, type, ...) starting at pos. */ +static char const *extract_name (char const *pos, vString * name) +{ + while (isspace (*pos)) + pos++; + vStringClear (name); + for (; *pos && !isspace (*pos) && *pos != '(' && *pos != ','; pos++) + vStringPut (name, *pos); + return pos; +} + +/* Match a keyword starting at p (case insensitive). */ +static int match_keyword (const char *p, KeyWord const *kw) +{ + vString *name; + size_t i; + int j; + for (i = 0; i < strlen (kw->token); i++) + { + if (tolower (p[i]) != kw->token[i]) + return 0; + } + name = vStringNew (); + p += i; + for (j = 0; j < 1 + kw->skip; j++) + { + p = extract_name (p, name); + } + makeSimpleTag (name, kw->kind); + vStringDelete (name); + return 1; +} + +/* Match a "label:" style label. */ +static void match_colon_label (char const *p) +{ + char const *end = p + strlen (p) - 1; + while (isspace (*end)) + end--; + if (*end == ':') + { + vString *name = vStringNew (); + vStringNCatS (name, p, end - p); + makeSimpleTag (name, K_LABEL); + vStringDelete (name); + } +} + +/* Match a ".label" style label. */ +static void match_dot_label (char const *p) +{ + if (*p == '.') + { + vString *name = vStringNew (); + extract_name (p + 1, name); + makeSimpleTag (name, K_LABEL); + vStringDelete (name); + } +} + +static void findBasicTags (void) +{ + const char *line; + const char *extension = fileExtension (getInputFileName ()); + KeyWord *keywords; + + if (strcmp (extension, "bb") == 0) + keywords = blitzbasic_keywords; + else if (strcmp (extension, "pb") == 0) + keywords = purebasic_keywords; + else + keywords = freebasic_keywords; + + while ((line = (const char *) readLineFromInputFile ()) != NULL) + { + const char *p = line; + KeyWord const *kw; + + while (isspace (*p)) + p++; + + /* Empty line? */ + if (!*p) + continue; + + /* REM comment? */ + if (strncasecmp (p, "REM", 3) == 0 && + (isspace (*(p + 3)) || *(p + 3) == '\0')) + continue; + + /* Single-quote comment? */ + if (*p == '\'') + continue; + + /* In Basic, keywords always are at the start of the line. */ + for (kw = keywords; kw->token; kw++) + if (match_keyword (p, kw)) break; + + /* Is it a label? */ + if (strcmp (extension, "bb") == 0) + match_dot_label (p); + else + match_colon_label (p); + } +} + +parserDefinition *BasicParser (void) +{ + static char const *extensions[] = { "bas", "bi", "bm", "bb", "pb", NULL }; + parserDefinition *def = parserNew ("Basic"); + def->kindTable = BasicKinds; + def->kindCount = ARRAY_SIZE (BasicKinds); + def->extensions = extensions; + def->parser = findBasicTags; + return def; +} diff --git a/ctags/parsers/geany_diff.c b/ctags/parsers/diff.c similarity index 53% rename from ctags/parsers/geany_diff.c rename to ctags/parsers/diff.c index 372fb10135..27e79982d1 100644 --- a/ctags/parsers/geany_diff.c +++ b/ctags/parsers/diff.c @@ -16,6 +16,7 @@ #include #include +#include "entry.h" #include "parse.h" #include "routines.h" #include "read.h" @@ -25,11 +26,17 @@ * DATA DEFINITIONS */ typedef enum { - K_FUNCTION + K_MODIFIED_FILE, + K_NEW_FILE, + K_DELETED_FILE, + K_HUNK, } diffKind; static kindDefinition DiffKinds [] = { - { true, 'f', "function", "functions"} + { true, 'm', "modifiedFile", "modified files"}, + { true, 'n', "newFile", "newly created files"}, + { true, 'd', "deletedFile", "deleted files"}, + { true, 'h', "hunk", "hunks"}, }; enum { @@ -42,6 +49,11 @@ static const char *DiffDelims[2] = { "+++ " }; +static const char *HunkDelim[2] = { + "@@ ", + " @@", +}; + /* * FUNCTION DEFINITIONS */ @@ -76,11 +88,58 @@ static const unsigned char *stripAbsolute (const unsigned char *filename) return tmp; } +static int parseHunk (const unsigned char* cp, vString *hunk, int scope_index) +{ + /* + example input: @@ -0,0 +1,134 @@ + expected output: -0,0 +1,134 + */ + + const char *next_delim; + const char *start, *end; + const char *c; + int i = CORK_NIL; + + cp += 3; + start = (const char*)cp; + + if (*start != '-') + return i; + + next_delim = strstr ((const char*)cp, HunkDelim[1]); + if ((next_delim == NULL) + || (! (start < next_delim ))) + return i; + end = next_delim; + if (! ( '0' <= *( end - 1 ) && *( end - 1 ) <= '9')) + return i; + for (c = start; c < end; c++) + if (*c == '\t') + return i; + vStringNCopyS (hunk, start, end - start); + i = makeSimpleTag (hunk, K_HUNK); + tagEntryInfo *e = getEntryInCorkQueue (i); + if (e && scope_index > CORK_NIL) + e->extensionFields.scopeIndex = scope_index; + return i; +} + +static void markTheLastTagAsDeletedFile (int scope_index) +{ + tagEntryInfo *e = getEntryInCorkQueue (scope_index); + + if (e) + e->kindIndex = K_DELETED_FILE; +} + static void findDiffTags (void) { vString *filename = vStringNew (); + vString *hunk = vStringNew (); const unsigned char *line, *tmp; int delim = DIFF_DELIM_MINUS; + diffKind kind; + int scope_index = CORK_NIL; while ((line = readLineFromInputFile ()) != NULL) { @@ -88,6 +147,7 @@ static void findDiffTags (void) if (strncmp ((const char*) cp, DiffDelims[delim], 4u) == 0) { + scope_index = CORK_NIL; cp += 4; if (isspace ((int) *cp)) continue; /* when original filename is /dev/null use the new one instead */ @@ -109,26 +169,45 @@ static void findDiffTags (void) tmp++; } - makeSimpleTag (filename, K_FUNCTION); + if (delim == DIFF_DELIM_PLUS) + kind = K_NEW_FILE; + else + kind = K_MODIFIED_FILE; + scope_index = makeSimpleTag (filename, kind); vStringClear (filename); } /* restore default delim */ delim = DIFF_DELIM_MINUS; } + else if ((scope_index > CORK_NIL) + && (strncmp ((const char*) cp, DiffDelims[1], 4u) == 0)) + { + cp += 4; + if (isspace ((int) *cp)) continue; + /* when modified filename is /dev/null, the original name is deleted. */ + if (strncmp ((const char*) cp, "/dev/null", 9u) == 0 && + (cp[9] == 0 || isspace (cp[9]))) + markTheLastTagAsDeletedFile (scope_index); + } + else if (strncmp ((const char*) cp, HunkDelim[0], 3u) == 0) + { + if (parseHunk (cp, hunk, scope_index) != CORK_NIL) + vStringClear (hunk); + } } + vStringDelete (hunk); vStringDelete (filename); } extern parserDefinition* DiffParser (void) { - static const char *const patterns [] = { "*.diff", "*.patch", NULL }; - static const char *const extensions [] = { "diff", NULL }; + static const char *const extensions [] = { "diff", "patch", NULL }; parserDefinition* const def = parserNew ("Diff"); - def->kindTable = DiffKinds; + def->kindTable = DiffKinds; def->kindCount = ARRAY_SIZE (DiffKinds); - def->patterns = patterns; def->extensions = extensions; def->parser = findDiffTags; + def->useCork = CORK_QUEUE; return def; } diff --git a/ctags/parsers/geany_basic.c b/ctags/parsers/geany_basic.c deleted file mode 100644 index b2036a29eb..0000000000 --- a/ctags/parsers/geany_basic.c +++ /dev/null @@ -1,252 +0,0 @@ -/* - * Copyright (c) 2000-2006, Darren Hiebert, Elias Pschernig - * - * This source code is released for free distribution under the terms of the - * GNU General Public License version 2 or (at your option) any later version. - * - * This module contains functions for generating tags for BlitzBasic - * (BlitzMax), PureBasic and FreeBasic language files. For now, this is kept - * quite simple - but feel free to ask for more things added any time - - * patches are of course most welcome. - */ - -/* - * INCLUDE FILES - */ -#include "general.h" /* must always come first */ - -#include - -#include "parse.h" -#include "read.h" -#include "routines.h" -#include "vstring.h" - -/* - * DATA DEFINITIONS - */ -typedef enum { - K_CONST, - K_FUNCTION, - K_LABEL, - K_TYPE, - K_VARIABLE, - K_ENUM -} BasicKind; - -typedef struct { - char const *token; - BasicKind kind; -} KeyWord; - -static kindDefinition BasicKinds[] = { - {true, 'c', "constant", "constants"}, - {true, 'f', "function", "functions"}, - {true, 'l', "label", "labels"}, - {true, 't', "type", "types"}, - {true, 'v', "variable", "variables"}, - {true, 'g', "enum", "enumerations"} -}; - -static KeyWord freebasic_keywords[] = { - {"dim", K_VARIABLE}, /* must always be the first */ - {"common", K_VARIABLE}, /* must always be the second */ - {"const", K_CONST}, /* must always be the third */ - {"function", K_FUNCTION}, - {"sub", K_FUNCTION}, - {"property", K_FUNCTION}, - {"constructor", K_FUNCTION}, - {"destructor", K_FUNCTION}, - {"private sub", K_FUNCTION}, - {"public sub", K_FUNCTION}, - {"private function", K_FUNCTION}, - {"public function", K_FUNCTION}, - {"type", K_TYPE}, - {"enum", K_ENUM}, - {NULL, 0} -}; - -/* - * FUNCTION DEFINITIONS - */ - -/* Match the name of a dim or const starting at pos. */ -static int extract_dim (char const *pos, vString * name, BasicKind kind) -{ - const char *old_pos = pos; - while (isspace (*pos)) - pos++; - - /* create tags only if there is some space between the keyword and the identifier */ - if (old_pos == pos) - return 0; - - vStringClear (name); - - if (strncasecmp (pos, "shared", 6) == 0) - pos += 6; /* skip keyword "shared" */ - - while (isspace (*pos)) - pos++; - - /* capture "dim as String str" */ - if (strncasecmp (pos, "as", 2) == 0) - { - pos += 2; /* skip keyword "as" */ - - while (isspace (*pos)) - pos++; - while (!isspace (*pos)) /* skip next part which is a type */ - pos++; - while (isspace (*pos)) - pos++; - /* now we are at the name */ - } - /* capture "dim as foo ptr bar" */ - if (strncasecmp (pos, "ptr", 3) == 0 && isspace(*(pos+4))) - { - pos += 3; /* skip keyword "ptr" */ - while (isspace (*pos)) - pos++; - } - /* capture "dim as string * 4096 chunk" */ - if (strncmp (pos, "*", 1) == 0) - { - pos += 1; /* skip "*" */ - while (isspace (*pos) || isdigit(*pos) || ispunct(*pos)) - pos++; - } - - for (; *pos && !isspace (*pos) && *pos != '(' && *pos != ',' && *pos != '='; pos++) - vStringPut (name, *pos); - makeSimpleTag (name, kind); - - /* if the line contains a ',', we have multiple declarations */ - while (*pos && strchr (pos, ',')) - { - /* skip all we don't need(e.g. "..., new_array(5), " we skip "(5)") */ - while (*pos != ',' && *pos != '\'') - pos++; - - if (*pos == '\'') - return 0; /* break if we are in a comment */ - - while (isspace (*pos) || *pos == ',') - pos++; - - if (*pos == '\'') - return 0; /* break if we are in a comment */ - - vStringClear (name); - for (; *pos && !isspace (*pos) && *pos != '(' && *pos != ',' && *pos != '='; pos++) - vStringPut (name, *pos); - makeSimpleTag (name, kind); - } - - vStringDelete (name); - return 1; -} - -/* Match the name of a tag (function, variable, type, ...) starting at pos. */ -static char const *extract_name (char const *pos, vString * name) -{ - while (isspace (*pos)) - pos++; - vStringClear (name); - for (; *pos && !isspace (*pos) && *pos != '(' && *pos != ',' && *pos != '='; pos++) - vStringPut (name, *pos); - return pos; -} - -/* Match a keyword starting at p (case insensitive). */ -static int match_keyword (const char *p, KeyWord const *kw) -{ - vString *name; - size_t i; - int j; - const char *old_p; - for (i = 0; i < strlen (kw->token); i++) - { - if (tolower (p[i]) != kw->token[i]) - return 0; - } - name = vStringNew (); - p += i; - if (kw == &freebasic_keywords[0] || - kw == &freebasic_keywords[1] || - kw == &freebasic_keywords[2]) - return extract_dim (p, name, kw->kind); /* extract_dim adds the found tag(s) */ - - old_p = p; - while (isspace (*p)) - p++; - - /* create tags only if there is some space between the keyword and the identifier */ - if (old_p == p) - { - vStringDelete (name); - return 0; - } - - for (j = 0; j < 1; j++) - { - p = extract_name (p, name); - } - makeSimpleTag (name, kw->kind); - vStringDelete (name); - return 1; -} - -/* Match a "label:" style label. */ -static void match_colon_label (char const *p) -{ - char const *end = p + strlen (p) - 1; - while (isspace (*end)) - end--; - if (*end == ':') - { - vString *name = vStringNew (); - vStringNCatS (name, p, end - p); - makeSimpleTag (name, K_LABEL); - vStringDelete (name); - } -} - -static void findBasicTags (void) -{ - const char *line; - KeyWord *keywords; - - keywords = freebasic_keywords; - - while ((line = (const char *) readLineFromInputFile ()) != NULL) - { - const char *p = line; - KeyWord const *kw; - - while (isspace (*p)) - p++; - - /* Empty line or comment? */ - if (!*p || *p == '\'') - continue; - - /* In Basic, keywords always are at the start of the line. */ - for (kw = keywords; kw->token; kw++) - if (match_keyword (p, kw)) break; - - /* Is it a label? */ - match_colon_label (p); - } -} - -parserDefinition *BasicParser (void) -{ - static char const *extensions[] = { "bas", "bi", "bb", "pb", NULL }; - parserDefinition *def = parserNew ("FreeBasic"); - def->kindTable = BasicKinds; - def->kindCount = ARRAY_SIZE (BasicKinds); - def->extensions = extensions; - def->parser = findBasicTags; - return def; -} diff --git a/ctags/parsers/geany_go.c b/ctags/parsers/geany_go.c deleted file mode 100644 index fc8921a768..0000000000 --- a/ctags/parsers/geany_go.c +++ /dev/null @@ -1,831 +0,0 @@ -/* -* This source code is released for free distribution under the terms of the -* GNU General Public License version 2 or (at your option) any later version. -* -* INCLUDE FILES -*/ -#include "general.h" /* must always come first */ - -#include "debug.h" -#include "entry.h" -#include "keyword.h" -#include "read.h" -#include "parse.h" -#include "routines.h" -#include "vstring.h" -#include "options.h" -#include "xtag.h" - -/* - * MACROS - */ -#define MAX_SIGNATURE_LENGTH 512 -#define isType(token,t) (bool) ((token)->type == (t)) -#define isKeyword(token,k) (bool) ((token)->keyword == (k)) - -/* - * DATA DECLARATIONS - */ - -enum eKeywordId { - KEYWORD_package, - KEYWORD_import, - KEYWORD_const, - KEYWORD_type, - KEYWORD_var, - KEYWORD_func, - KEYWORD_struct, - KEYWORD_interface, - KEYWORD_map, - KEYWORD_chan -}; -typedef int keywordId; /* to allow KEYWORD_NONE */ - -typedef enum eTokenType { - TOKEN_NONE = -1, - // Token not important for top-level Go parsing - TOKEN_OTHER, - TOKEN_KEYWORD, - TOKEN_IDENTIFIER, - TOKEN_STRING, - TOKEN_OPEN_PAREN, - TOKEN_CLOSE_PAREN, - TOKEN_OPEN_CURLY, - TOKEN_CLOSE_CURLY, - TOKEN_OPEN_SQUARE, - TOKEN_CLOSE_SQUARE, - TOKEN_SEMICOLON, - TOKEN_STAR, - TOKEN_LEFT_ARROW, - TOKEN_DOT, - TOKEN_COMMA, - TOKEN_EOF -} tokenType; - -typedef struct sTokenInfo { - tokenType type; - keywordId keyword; - vString *string; /* the name of the token */ - unsigned long lineNumber; /* line number of tag */ - MIOPos filePosition; /* file position of line containing name */ -} tokenInfo; - -/* -* DATA DEFINITIONS -*/ - -static int Lang_go; -static vString *scope; -static vString *signature = NULL; - -typedef enum { - GOTAG_UNDEFINED = -1, - GOTAG_PACKAGE, - GOTAG_FUNCTION, - GOTAG_CONST, - GOTAG_TYPE, - GOTAG_VAR, - GOTAG_STRUCT, - GOTAG_INTERFACE, - GOTAG_MEMBER -} goKind; - -static kindDefinition GoKinds[] = { - {true, 'p', "package", "packages"}, - {true, 'f', "func", "functions"}, - {true, 'c', "const", "constants"}, - {true, 't', "type", "types"}, - {true, 'v', "var", "variables"}, - {true, 's', "struct", "structs"}, - {true, 'i', "interface", "interfaces"}, - {true, 'm', "member", "struct members"} -}; - -static const keywordTable GoKeywordTable[] = { - {"package", KEYWORD_package}, - {"import", KEYWORD_import}, - {"const", KEYWORD_const}, - {"type", KEYWORD_type}, - {"var", KEYWORD_var}, - {"func", KEYWORD_func}, - {"struct", KEYWORD_struct}, - {"interface", KEYWORD_interface}, - {"map", KEYWORD_map}, - {"chan", KEYWORD_chan} -}; - -/* -* FUNCTION DEFINITIONS -*/ - -// XXX UTF-8 -static bool isStartIdentChar (const int c) -{ - return (bool) - (isalpha (c) || c == '_' || c > 128); -} - -static bool isIdentChar (const int c) -{ - return (bool) - (isStartIdentChar (c) || isdigit (c)); -} - -static void initialize (const langType language) -{ - Lang_go = language; -} - -static tokenInfo *newToken (void) -{ - tokenInfo *const token = xMalloc (1, tokenInfo); - token->type = TOKEN_NONE; - token->keyword = KEYWORD_NONE; - token->string = vStringNew (); - token->lineNumber = getInputLineNumber (); - token->filePosition = getInputFilePosition (); - return token; -} - -static tokenInfo *copyToken (tokenInfo *other) -{ - tokenInfo *const token = xMalloc (1, tokenInfo); - token->type = other->type; - token->keyword = other->keyword; - token->string = vStringNewCopy (other->string); - token->lineNumber = other->lineNumber; - token->filePosition = other->filePosition; - return token; -} - -static void deleteToken (tokenInfo * const token) -{ - if (token != NULL) - { - vStringDelete (token->string); - eFree (token); - } -} - -/* - * Parsing functions - */ - -static void parseString (vString *const string, const int delimiter) -{ - bool end = false; - while (!end) - { - int c = getcFromInputFile (); - if (c == EOF) - end = true; - else if (c == '\\' && delimiter != '`') - { - c = getcFromInputFile (); - if (c != '\'' && c != '\"') - vStringPut (string, '\\'); - vStringPut (string, c); - } - else if (c == delimiter) - end = true; - else - vStringPut (string, c); - } -} - -static void parseIdentifier (vString *const string, const int firstChar) -{ - int c = firstChar; - do - { - vStringPut (string, c); - c = getcFromInputFile (); - } while (isIdentChar (c)); - ungetcToInputFile (c); /* always unget, LF might add a semicolon */ -} - -static void readToken (tokenInfo *const token) -{ - int c; - static tokenType lastTokenType = TOKEN_NONE; - bool firstWhitespace = true; - bool whitespace; - - token->type = TOKEN_NONE; - token->keyword = KEYWORD_NONE; - vStringClear (token->string); - -getNextChar: - do - { - c = getcFromInputFile (); - token->lineNumber = getInputLineNumber (); - token->filePosition = getInputFilePosition (); - if (c == '\n' && (lastTokenType == TOKEN_IDENTIFIER || - lastTokenType == TOKEN_STRING || - lastTokenType == TOKEN_OTHER || - lastTokenType == TOKEN_CLOSE_PAREN || - lastTokenType == TOKEN_CLOSE_CURLY || - lastTokenType == TOKEN_CLOSE_SQUARE)) - { - c = ';'; // semicolon injection - } - whitespace = c == '\t' || c == ' ' || c == '\r' || c == '\n'; - if (signature && whitespace && firstWhitespace && vStringLength (signature) < MAX_SIGNATURE_LENGTH) - { - firstWhitespace = false; - vStringPut(signature, ' '); - } - } - while (whitespace); - - switch (c) - { - case EOF: - token->type = TOKEN_EOF; - break; - - case ';': - token->type = TOKEN_SEMICOLON; - break; - - case '/': - { - bool hasNewline = false; - int d = getcFromInputFile (); - switch (d) - { - case '/': - skipToCharacterInInputFile ('\n'); - /* Line comments start with the - * character sequence // and - * continue through the next - * newline. A line comment acts - * like a newline. */ - ungetcToInputFile ('\n'); - goto getNextChar; - case '*': - do - { - do - { - d = getcFromInputFile (); - if (d == '\n') - { - hasNewline = true; - } - } while (d != EOF && d != '*'); - - c = getcFromInputFile (); - if (c == '/') - break; - else - ungetcToInputFile (c); - } while (c != EOF && c != '\0'); - - ungetcToInputFile (hasNewline ? '\n' : ' '); - goto getNextChar; - default: - token->type = TOKEN_OTHER; - ungetcToInputFile (d); - break; - } - } - break; - - case '"': - case '\'': - case '`': - token->type = TOKEN_STRING; - parseString (token->string, c); - token->lineNumber = getInputLineNumber (); - token->filePosition = getInputFilePosition (); - break; - - case '<': - { - int d = getcFromInputFile (); - if (d == '-') - token->type = TOKEN_LEFT_ARROW; - else - { - ungetcToInputFile (d); - token->type = TOKEN_OTHER; - } - } - break; - - case '(': - token->type = TOKEN_OPEN_PAREN; - break; - - case ')': - token->type = TOKEN_CLOSE_PAREN; - break; - - case '{': - token->type = TOKEN_OPEN_CURLY; - break; - - case '}': - token->type = TOKEN_CLOSE_CURLY; - break; - - case '[': - token->type = TOKEN_OPEN_SQUARE; - break; - - case ']': - token->type = TOKEN_CLOSE_SQUARE; - break; - - case '*': - token->type = TOKEN_STAR; - break; - - case '.': - token->type = TOKEN_DOT; - break; - - case ',': - token->type = TOKEN_COMMA; - break; - - default: - if (isStartIdentChar (c)) - { - parseIdentifier (token->string, c); - token->lineNumber = getInputLineNumber (); - token->filePosition = getInputFilePosition (); - token->keyword = lookupKeyword (vStringValue (token->string), Lang_go); - if (isKeyword (token, KEYWORD_NONE)) - token->type = TOKEN_IDENTIFIER; - else - token->type = TOKEN_KEYWORD; - } - else - token->type = TOKEN_OTHER; - break; - } - - if (signature && vStringLength (signature) < MAX_SIGNATURE_LENGTH) - { - if (token->type == TOKEN_LEFT_ARROW) - vStringCatS(signature, "<-"); - else if (token->type == TOKEN_STRING) - { - // only struct member annotations can appear in function prototypes - // so only `` type strings are possible - vStringPut(signature, '`'); - vStringCat(signature, token->string); - vStringPut(signature, '`'); - } - else if (token->type == TOKEN_IDENTIFIER || token->type == TOKEN_KEYWORD) - vStringCat(signature, token->string); - else if (c != EOF) - vStringPut(signature, c); - } - - lastTokenType = token->type; -} - -static bool skipToMatchedNoRead (tokenInfo *const token) -{ - int nest_level = 0; - tokenType open_token = token->type; - tokenType close_token; - - switch (open_token) - { - case TOKEN_OPEN_PAREN: - close_token = TOKEN_CLOSE_PAREN; - break; - case TOKEN_OPEN_CURLY: - close_token = TOKEN_CLOSE_CURLY; - break; - case TOKEN_OPEN_SQUARE: - close_token = TOKEN_CLOSE_SQUARE; - break; - default: - return false; - } - - /* - * This routine will skip to a matching closing token. - * It will also handle nested tokens. - */ - nest_level++; - while (nest_level > 0 && !isType (token, TOKEN_EOF)) - { - readToken (token); - if (isType (token, open_token)) - nest_level++; - else if (isType (token, close_token)) - nest_level--; - } - - return true; -} - -static void skipToMatched (tokenInfo *const token) -{ - if (skipToMatchedNoRead (token)) - readToken (token); -} - -static bool skipType (tokenInfo *const token) -{ - // Type = TypeName | TypeLit | "(" Type ")" . - // Skips also function multiple return values "(" Type {"," Type} ")" - if (isType (token, TOKEN_OPEN_PAREN)) - { - skipToMatched (token); - return true; - } - - // TypeName = QualifiedIdent. - // QualifiedIdent = [ PackageName "." ] identifier . - // PackageName = identifier . - if (isType (token, TOKEN_IDENTIFIER)) - { - readToken (token); - if (isType (token, TOKEN_DOT)) - { - readToken (token); - if (isType (token, TOKEN_IDENTIFIER)) - readToken (token); - } - return true; - } - - // StructType = "struct" "{" { FieldDecl ";" } "}" - // InterfaceType = "interface" "{" { MethodSpec ";" } "}" . - if (isKeyword (token, KEYWORD_struct) || isKeyword (token, KEYWORD_interface)) - { - readToken (token); - // skip over "{}" - skipToMatched (token); - return true; - } - - // ArrayType = "[" ArrayLength "]" ElementType . - // SliceType = "[" "]" ElementType . - // ElementType = Type . - if (isType (token, TOKEN_OPEN_SQUARE)) - { - skipToMatched (token); - return skipType (token); - } - - // PointerType = "*" BaseType . - // BaseType = Type . - // ChannelType = ( "chan" [ "<-" ] | "<-" "chan" ) ElementType . - if (isType (token, TOKEN_STAR) || isKeyword (token, KEYWORD_chan) || isType (token, TOKEN_LEFT_ARROW)) - { - readToken (token); - return skipType (token); - } - - // MapType = "map" "[" KeyType "]" ElementType . - // KeyType = Type . - if (isKeyword (token, KEYWORD_map)) - { - readToken (token); - // skip over "[]" - skipToMatched (token); - return skipType (token); - } - - // FunctionType = "func" Signature . - // Signature = Parameters [ Result ] . - // Result = Parameters | Type . - // Parameters = "(" [ ParameterList [ "," ] ] ")" . - if (isKeyword (token, KEYWORD_func)) - { - readToken (token); - // Parameters, skip over "()" - skipToMatched (token); - // Result is parameters or type or nothing. skipType treats anything - // surrounded by parentheses as a type, and does nothing if what - // follows is not a type. - return skipType (token); - } - - return false; -} - -static void makeTag (tokenInfo *const token, const goKind kind, - tokenInfo *const parent_token, const goKind parent_kind, - const char *argList, const char *varType) -{ - const char *const name = vStringValue (token->string); - - tagEntryInfo e; - initTagEntry (&e, name, kind); - - if (!GoKinds [kind].enabled) - return; - - e.lineNumber = token->lineNumber; - e.filePosition = token->filePosition; - if (argList) - e.extensionFields.signature = argList; - if (varType) - e.extensionFields.typeRef[1] = varType; - - if (parent_kind != GOTAG_UNDEFINED && parent_token != NULL) - { - e.extensionFields.scopeKindIndex = parent_kind; - e.extensionFields.scopeName = vStringValue (parent_token->string); - } - makeTagEntry (&e); - - if (scope && isXtagEnabled(XTAG_QUALIFIED_TAGS)) - { - vString *qualifiedName = vStringNew (); - vStringCopy (qualifiedName, scope); - vStringCatS (qualifiedName, "."); - vStringCat (qualifiedName, token->string); - e.name = vStringValue (qualifiedName); - makeTagEntry (&e); - vStringDelete (qualifiedName); - } -} - -static void parsePackage (tokenInfo *const token) -{ - readToken (token); - if (isType (token, TOKEN_IDENTIFIER)) - { - makeTag (token, GOTAG_PACKAGE, NULL, GOTAG_UNDEFINED, NULL, NULL); - if (!scope && isXtagEnabled(XTAG_QUALIFIED_TAGS)) - { - scope = vStringNew (); - vStringCopy (scope, token->string); - } - } -} - -static void parseFunctionOrMethod (tokenInfo *const token) -{ - // FunctionDecl = "func" identifier Signature [ Body ] . - // Body = Block. - // - // MethodDecl = "func" Receiver MethodName Signature [ Body ] . - // Receiver = "(" [ identifier ] [ "*" ] BaseTypeName ")" . - // BaseTypeName = identifier . - - // Skip over receiver. - readToken (token); - if (isType (token, TOKEN_OPEN_PAREN)) - skipToMatched (token); - - if (isType (token, TOKEN_IDENTIFIER)) - { - vString *argList; - tokenInfo *functionToken = copyToken (token); - - // Start recording signature - signature = vStringNew (); - - // Skip over parameters. - readToken (token); - skipToMatchedNoRead (token); - - vStringStripLeading (signature); - vStringStripTrailing (signature); - argList = signature; - signature = vStringNew (); - - readToken (token); - - // Skip over result. - skipType (token); - - // Remove the extra { we have just read - vStringStripTrailing (signature); - vStringChop (signature); - - vStringStripLeading (signature); - vStringStripTrailing (signature); - makeTag (functionToken, GOTAG_FUNCTION, NULL, GOTAG_UNDEFINED, argList->buffer, signature->buffer); - deleteToken (functionToken); - vStringDelete(signature); - vStringDelete(argList); - - // Stop recording signature - signature = NULL; - - // Skip over function body. - if (isType (token, TOKEN_OPEN_CURLY)) - skipToMatched (token); - } -} - -static void parseStructMembers (tokenInfo *const token, tokenInfo *const parent_token) -{ - // StructType = "struct" "{" { FieldDecl ";" } "}" . - // FieldDecl = (IdentifierList Type | AnonymousField) [ Tag ] . - // AnonymousField = [ "*" ] TypeName . - // Tag = string_lit . - - readToken (token); - if (!isType (token, TOKEN_OPEN_CURLY)) - return; - - readToken (token); - while (!isType (token, TOKEN_EOF) && !isType (token, TOKEN_CLOSE_CURLY)) - { - tokenInfo *memberCandidate = NULL; - bool first = true; - - while (!isType (token, TOKEN_EOF)) - { - if (isType (token, TOKEN_IDENTIFIER)) - { - if (first) - { - // could be anonymous field like in 'struct {int}' - we don't know yet - memberCandidate = copyToken (token); - first = false; - } - else - { - if (memberCandidate) - { - // if we are here, there was a comma and memberCandidate isn't an anonymous field - makeTag (memberCandidate, GOTAG_MEMBER, parent_token, GOTAG_STRUCT, NULL, NULL); - deleteToken (memberCandidate); - memberCandidate = NULL; - } - makeTag (token, GOTAG_MEMBER, parent_token, GOTAG_STRUCT, NULL, NULL); - } - readToken (token); - } - if (!isType (token, TOKEN_COMMA)) - break; - readToken (token); - } - - // in the case of an anonymous field, we already read part of the - // type into memberCandidate and skipType() should return false so no tag should - // be generated in this case. - if (skipType (token) && memberCandidate) - makeTag (memberCandidate, GOTAG_MEMBER, parent_token, GOTAG_STRUCT, NULL, NULL); - - if (memberCandidate) - deleteToken (memberCandidate); - - while (!isType (token, TOKEN_SEMICOLON) && !isType (token, TOKEN_CLOSE_CURLY) - && !isType (token, TOKEN_EOF)) - { - readToken (token); - skipToMatched (token); - } - - if (!isType (token, TOKEN_CLOSE_CURLY)) - { - // we are at TOKEN_SEMICOLON - readToken (token); - } - } -} - -static void parseConstTypeVar (tokenInfo *const token, goKind kind) -{ - // ConstDecl = "const" ( ConstSpec | "(" { ConstSpec ";" } ")" ) . - // ConstSpec = IdentifierList [ [ Type ] "=" ExpressionList ] . - // IdentifierList = identifier { "," identifier } . - // ExpressionList = Expression { "," Expression } . - // TypeDecl = "type" ( TypeSpec | "(" { TypeSpec ";" } ")" ) . - // TypeSpec = identifier Type . - // VarDecl = "var" ( VarSpec | "(" { VarSpec ";" } ")" ) . - // VarSpec = IdentifierList ( Type [ "=" ExpressionList ] | "=" ExpressionList ) . - bool usesParens = false; - - readToken (token); - - if (isType (token, TOKEN_OPEN_PAREN)) - { - usesParens = true; - readToken (token); - } - - do - { - tokenInfo *typeToken = NULL; - - while (!isType (token, TOKEN_EOF)) - { - if (isType (token, TOKEN_IDENTIFIER)) - { - if (kind == GOTAG_TYPE) - { - typeToken = copyToken (token); - readToken (token); - if (isKeyword (token, KEYWORD_struct)) - makeTag (typeToken, GOTAG_STRUCT, NULL, GOTAG_UNDEFINED, NULL, NULL); - else if (isKeyword (token, KEYWORD_interface)) - makeTag (typeToken, GOTAG_INTERFACE, NULL, GOTAG_UNDEFINED, NULL, NULL); - else - makeTag (typeToken, kind, NULL, GOTAG_UNDEFINED, NULL, NULL); - break; - } - else - makeTag (token, kind, NULL, GOTAG_UNDEFINED, NULL, NULL); - readToken (token); - } - if (!isType (token, TOKEN_COMMA)) - break; - readToken (token); - } - - if (typeToken) - { - if (isKeyword (token, KEYWORD_struct)) - parseStructMembers (token, typeToken); - else - skipType (token); - deleteToken (typeToken); - } - else - skipType (token); - - while (!isType (token, TOKEN_SEMICOLON) && !isType (token, TOKEN_CLOSE_PAREN) - && !isType (token, TOKEN_EOF)) - { - readToken (token); - skipToMatched (token); - } - - if (usesParens && !isType (token, TOKEN_CLOSE_PAREN)) - { - // we are at TOKEN_SEMICOLON - readToken (token); - } - } - while (!isType (token, TOKEN_EOF) && - usesParens && !isType (token, TOKEN_CLOSE_PAREN)); -} - -static void parseGoFile (tokenInfo *const token) -{ - do - { - readToken (token); - - if (isType (token, TOKEN_KEYWORD)) - { - switch (token->keyword) - { - case KEYWORD_package: - parsePackage (token); - break; - case KEYWORD_func: - parseFunctionOrMethod (token); - break; - case KEYWORD_const: - parseConstTypeVar (token, GOTAG_CONST); - break; - case KEYWORD_type: - parseConstTypeVar (token, GOTAG_TYPE); - break; - case KEYWORD_var: - parseConstTypeVar (token, GOTAG_VAR); - break; - default: - break; - } - } - else if (isType (token, TOKEN_OPEN_PAREN) || isType (token, TOKEN_OPEN_CURLY) || - isType (token, TOKEN_OPEN_SQUARE)) - { - skipToMatched (token); - } - } while (token->type != TOKEN_EOF); -} - -static void findGoTags (void) -{ - tokenInfo *const token = newToken (); - - parseGoFile (token); - - deleteToken (token); - vStringDelete (scope); - scope = NULL; -} - -extern parserDefinition *GoParser (void) -{ - static const char *const extensions[] = { "go", NULL }; - parserDefinition *def = parserNew ("Go"); - def->kindTable = GoKinds; - def->kindCount = ARRAY_SIZE (GoKinds); - def->extensions = extensions; - def->parser = findGoTags; - def->initialize = initialize; - def->keywordTable = GoKeywordTable; - def->keywordCount = ARRAY_SIZE (GoKeywordTable); - return def; -} diff --git a/ctags/parsers/geany_lua.c b/ctags/parsers/geany_lua.c deleted file mode 100644 index 608b71dfa0..0000000000 --- a/ctags/parsers/geany_lua.c +++ /dev/null @@ -1,120 +0,0 @@ -/* -* Copyright (c) 2000-2001, Max Ischenko . -* -* This source code is released for free distribution under the terms of the -* GNU General Public License version 2 or (at your option) any later version. -* -* This module contains functions for generating tags for Lua language. -*/ - -/* -* INCLUDE FILES -*/ -#include "general.h" /* must always come first */ - -#include - -#include "parse.h" -#include "read.h" -#include "routines.h" -#include "vstring.h" - -/* -* DATA DEFINITIONS -*/ -typedef enum { - K_FUNCTION -} luaKind; - -static kindDefinition LuaKinds [] = { - { true, 'f', "function", "functions" } -}; - -/* -* FUNCTION DEFINITIONS -*/ - -/* - * Helper function. - * Returns 1 if line looks like a line of Lua code. - * - * TODO: Recognize UNIX bang notation. - * (Lua treat first line as a comment if it starts with #!) - * - */ -static bool is_a_code_line (const unsigned char *line) -{ - bool result; - const unsigned char *p = line; - while (isspace ((int) *p)) - p++; - if (p [0] == '\0') - result = false; - else if (p [0] == '-' && p [1] == '-') - result = false; - else - result = true; - return result; -} - -static void extract_name (const char *begin, const char *end, vString *name) -{ - if (begin != NULL && end != NULL && begin < end) - { - const char *cp; - - while (isspace ((int) *begin)) - begin++; - while (isspace ((int) *end)) - end--; - if (begin < end) - { - for (cp = begin ; cp != end; cp++) - vStringPut (name, (int) *cp); - - makeSimpleTag (name, K_FUNCTION); - vStringClear (name); - } - } -} - -static void findLuaTags (void) -{ - vString *name = vStringNew (); - const unsigned char *line; - - while ((line = readLineFromInputFile ()) != NULL) - { - const char *p, *q; - - if (! is_a_code_line (line)) - continue; - - p = (const char*) strstr ((const char*) line, "function"); - if (p == NULL) - continue; - - q = strchr ((const char*) line, '='); - - if (q == NULL) { - p = p + 9; /* skip the `function' word */ - q = strchr ((const char*) p, '('); - extract_name (p, q, name); - } else if (*(q+1) != '=') { /* ignore `if type(v) == "function" then ...' */ - p = (const char*) &line[0]; - extract_name (p, q, name); - } - } - vStringDelete (name); -} - -extern parserDefinition* LuaParser (void) -{ - static const char* const extensions [] = { "lua", NULL }; - parserDefinition* def = parserNew ("Lua"); - def->kindTable = LuaKinds; - def->kindCount = ARRAY_SIZE (LuaKinds); - def->extensions = extensions; - def->parser = findLuaTags; - return def; -} diff --git a/ctags/parsers/geany_nsis.c b/ctags/parsers/geany_nsis.c deleted file mode 100644 index a06a81508b..0000000000 --- a/ctags/parsers/geany_nsis.c +++ /dev/null @@ -1,142 +0,0 @@ -/* -* Copyright (c) 2000-2002, Darren Hiebert -* Copyright (c) 2009-2011, Enrico Tröger -* -* This source code is released for free distribution under the terms of the -* GNU General Public License. -* -* This module contains functions for generating tags for NSIS scripts (based on sh.c). -*/ - -/* -* INCLUDE FILES -*/ -#include "general.h" /* must always come first */ - -#include - -#include "parse.h" -#include "read.h" -#include "vstring.h" -#include "routines.h" - -/* -* DATA DEFINITIONS -*/ -typedef enum { - K_SECTION, - K_FUNCTION, - K_VARIABLE -} NsisKind; - -static kindDefinition NsisKinds [] = { - { true, 'n', "namespace", "sections"}, - { true, 'f', "function", "functions"}, - { true, 'v', "variable", "variables"} -}; - -/* -* FUNCTION DEFINITIONS -*/ - -static void findNsisTags (void) -{ - vString *name = vStringNew (); - const unsigned char *line; - - while ((line = readLineFromInputFile ()) != NULL) - { - const unsigned char* cp = line; - - while (isspace (*cp)) - cp++; - - if (*cp == '#' || *cp == ';') - continue; - - /* functions */ - if (strncasecmp ((const char*) cp, "function", (size_t) 8) == 0 && - isspace ((int) cp [8])) - { - cp += 8; - /* skip all whitespace */ - while (isspace ((int) *cp)) - ++cp; - while (isalnum ((int) *cp) || *cp == '_' || *cp == '-' || *cp == '.' || *cp == '!') - { - vStringPut (name, (int) *cp); - ++cp; - } - makeSimpleTag (name, K_FUNCTION); - vStringClear (name); - } - /* variables */ - else if (strncasecmp ((const char*) cp, "var", (size_t) 3) == 0 && - isspace ((int) cp [3])) - { - cp += 3; - /* skip all whitespace */ - while (isspace ((int) *cp)) - ++cp; - /* skip any flags */ - while (*cp == '/') - { - ++cp; - while (! isspace ((int) *cp)) - ++cp; - while (isspace ((int) *cp)) - ++cp; - } - while (isalnum ((int) *cp) || *cp == '_') - { - vStringPut (name, (int) *cp); - ++cp; - } - makeSimpleTag (name, K_VARIABLE); - vStringClear (name); - } - /* sections */ - else if (strncasecmp ((const char*) cp, "section", (size_t) 7) == 0 && - isspace ((int) cp [7])) - { - bool in_quotes = false; - cp += 7; - /* skip all whitespace */ - while (isspace ((int) *cp)) - ++cp; - while (isalnum ((int) *cp) || isspace ((int) *cp) || - *cp == '_' || *cp == '-' || *cp == '.' || *cp == '!' || *cp == '"') - { - if (*cp == '"') - { - if (in_quotes) - break; - else - { - in_quotes = true; - ++cp; - continue; - } - } - vStringPut (name, (int) *cp); - ++cp; - } - makeSimpleTag (name, K_SECTION); - vStringClear (name); - } - } - vStringDelete (name); -} - -extern parserDefinition* NsisParser (void) -{ - static const char *const extensions [] = { - "nsi", "nsh", NULL - }; - parserDefinition* def = parserNew ("NSIS"); - def->kindTable = NsisKinds; - def->kindCount = ARRAY_SIZE (NsisKinds); - def->extensions = extensions; - def->parser = findNsisTags; - return def; -} diff --git a/ctags/parsers/geany_perl.c b/ctags/parsers/geany_perl.c deleted file mode 100644 index 5e6c8e42aa..0000000000 --- a/ctags/parsers/geany_perl.c +++ /dev/null @@ -1,380 +0,0 @@ -/* -* Copyright (c) 2000-2003, Darren Hiebert -* -* This source code is released for free distribution under the terms of the -* GNU General Public License version 2 or (at your option) any later version. -* -* This module contains functions for generating tags for PERL language -* files. -*/ - -/* -* INCLUDE FILES -*/ -#include "general.h" /* must always come first */ -#include "debug.h" - -#include - -#include "entry.h" -#include "promise.h" -#include "options.h" -#include "read.h" -#include "routines.h" -#include "vstring.h" -#include "xtag.h" - -#define TRACE_PERL_C 0 -#define TRACE if (TRACE_PERL_C) printf("perl.c:%d: ", __LINE__), printf - -/* -* DATA DEFINITIONS -*/ -typedef enum { - K_NONE = -1, - K_CONSTANT, - K_FORMAT, - K_LABEL, - K_PACKAGE, - K_SUBROUTINE, - K_SUBROUTINE_DECLARATION -} perlKind; - -static kindDefinition PerlKinds [] = { - { true, 'c', "constant", "constants" }, - { true, 'f', "format", "formats" }, - { true, 'l', "label", "labels" }, - { true, 'p', "package", "packages" }, - { true, 's', "subroutine", "subroutines" }, - { false, 'd', "subroutineDeclaration", "subroutine declarations" }, -}; - -/* -* FUNCTION DEFINITIONS -*/ - -static bool isIdentifier1 (int c) -{ - return (bool) (isalpha (c) || c == '_'); -} - -static bool isIdentifier (int c) -{ - return (bool) (isalnum (c) || c == '_'); -} - -static bool isPodWord (const char *word) -{ - bool result = false; - if (isalpha (*word)) - { - const char *const pods [] = { - "head1", "head2", "head3", "head4", "over", "item", "back", - "pod", "begin", "end", "for" - }; - const size_t count = ARRAY_SIZE (pods); - const char *white = strpbrk (word, " \t"); - const size_t len = (white!=NULL) ? (size_t)(white-word) : strlen (word); - char *const id = (char*) eMalloc (len + 1); - size_t i; - strncpy (id, word, len); - id [len] = '\0'; - for (i = 0 ; i < count && ! result ; ++i) - { - if (strcmp (id, pods [i]) == 0) - result = true; - } - eFree (id); - } - return result; -} - -/* - * Perl subroutine declaration may look like one of the following: - * - * sub abc; - * sub abc :attr; - * sub abc (proto); - * sub abc (proto) :attr; - * - * Note that there may be more than one attribute. Attributes may - * have things in parentheses (they look like arguments). Anything - * inside of those parentheses goes. Prototypes may contain semi-colons. - * The matching end when we encounter (outside of any parentheses) either - * a semi-colon (that'd be a declaration) or an left curly brace - * (definition). - * - * This is pretty complicated parsing (plus we all know that only perl can - * parse Perl), so we are only promising best effort here. - * - * If we can't determine what this is (due to a file ending, for example), - * we will return false. - */ -static bool isSubroutineDeclaration (const unsigned char *cp) -{ - bool attr = false; - int nparens = 0; - - do { - for ( ; *cp; ++cp) { -SUB_DECL_SWITCH: - switch (*cp) { - case ':': - if (nparens) - break; - else if (true == attr) - return false; /* Invalid attribute name */ - else - attr = true; - break; - case '(': - ++nparens; - break; - case ')': - --nparens; - break; - case ' ': - case '\t': - break; - case ';': - if (!nparens) - return true; - /* fall through */ - case '{': - if (!nparens) - return false; - /* fall through */ - default: - if (attr) { - if (isIdentifier1(*cp)) { - cp++; - while (isIdentifier (*cp)) - cp++; - attr = false; - goto SUB_DECL_SWITCH; /* Instead of --cp; */ - } else { - return false; - } - } else if (nparens) { - break; - } else { - return false; - } - } - } - } while (NULL != (cp = readLineFromInputFile ())); - - return false; -} - -/* Algorithm adapted from from GNU etags. - * Perl support by Bart Robinson - * Perl sub names: look for /^ [ \t\n]sub [ \t\n]+ [^ \t\n{ (]+/ - */ -static void findPerlTags (void) -{ - vString *name = vStringNew (); - vString *package = NULL; - bool skipPodDoc = false; - const unsigned char *line; - - while ((line = readLineFromInputFile ()) != NULL) - { - bool spaceRequired = false; - bool qualified = false; - const unsigned char *cp = line; - perlKind kind = K_NONE; - tagEntryInfo e; - - if (skipPodDoc) - { - if (strncmp ((const char*) line, "=cut", (size_t) 4) == 0) - skipPodDoc = false; - continue; - } - else if (line [0] == '=') - { - skipPodDoc = isPodWord ((const char*)line + 1); - continue; - } - else if (strcmp ((const char*) line, "__DATA__") == 0) - break; - else if (strcmp ((const char*) line, "__END__") == 0) - break; - else if (line [0] == '#') - continue; - - while (isspace (*cp)) - cp++; - - if (strncmp((const char*) cp, "sub", (size_t) 3) == 0) - { - TRACE("this looks like a sub\n"); - cp += 3; - kind = K_SUBROUTINE; - spaceRequired = true; - qualified = true; - } - else if (strncmp((const char*) cp, "use", (size_t) 3) == 0) - { - cp += 3; - if (!isspace(*cp)) - continue; - while (*cp && isspace (*cp)) - ++cp; - if (strncmp((const char*) cp, "constant", (size_t) 8) != 0) - continue; - cp += 8; - kind = K_CONSTANT; - spaceRequired = true; - qualified = true; - } - else if (strncmp((const char*) cp, "package", (size_t) 7) == 0) - { - /* This will point to space after 'package' so that a tag - can be made */ - const unsigned char *space = cp += 7; - - if (package == NULL) - package = vStringNew (); - else - vStringClear (package); - while (isspace (*cp)) - cp++; - while ((int) *cp != ';' && !isspace ((int) *cp)) - { - vStringPut (package, (int) *cp); - cp++; - } - vStringCatS (package, "::"); - - cp = space; /* Rewind */ - kind = K_PACKAGE; - spaceRequired = true; - qualified = true; - } - else if (strncmp((const char*) cp, "format", (size_t) 6) == 0) - { - cp += 6; - kind = K_FORMAT; - spaceRequired = true; - qualified = true; - } - else - { - if (isIdentifier1 (*cp)) - { - const unsigned char *p = cp; - while (isIdentifier (*p)) - ++p; - while (isspace (*p)) - ++p; - if ((int) *p == ':' && (int) *(p + 1) != ':') - kind = K_LABEL; - } - } - if (kind != K_NONE) - { - TRACE("cp0: %s\n", (const char *) cp); - if (spaceRequired && *cp && !isspace (*cp)) - continue; - - TRACE("cp1: %s\n", (const char *) cp); - while (isspace (*cp)) - cp++; - - while (!*cp || '#' == *cp) { /* Gobble up empty lines - and comments */ - cp = readLineFromInputFile (); - if (!cp) - goto END_MAIN_WHILE; - while (isspace (*cp)) - cp++; - } - - while (isIdentifier (*cp) || (K_PACKAGE == kind && ':' == *cp)) - { - vStringPut (name, (int) *cp); - cp++; - } - - if (K_FORMAT == kind && - vStringLength (name) == 0 && /* cp did not advance */ - '=' == *cp) - { - /* format's name is optional. If it's omitted, 'STDOUT' - is assumed. */ - vStringCatS (name, "STDOUT"); - } - - TRACE("name: %s\n", name->buffer); - - if (0 == vStringLength(name)) { - vStringClear(name); - continue; - } - - if (K_SUBROUTINE == kind) - { - /* - * isSubroutineDeclaration() may consume several lines. So - * we record line positions. - */ - initTagEntry(&e, vStringValue(name), kind); - - if (true == isSubroutineDeclaration(cp)) { - if (true == PerlKinds[K_SUBROUTINE_DECLARATION].enabled) { - kind = K_SUBROUTINE_DECLARATION; - e.kindIndex = kind; - } else { - vStringClear (name); - continue; - } - } - - makeTagEntry(&e); - - if (isXtagEnabled(XTAG_QUALIFIED_TAGS) && qualified && - package != NULL && vStringLength (package) > 0) - { - vString *const qualifiedName = vStringNew (); - vStringCopy (qualifiedName, package); - vStringCat (qualifiedName, name); - e.name = vStringValue(qualifiedName); - makeTagEntry(&e); - vStringDelete (qualifiedName); - } - } else if (vStringLength (name) > 0) - { - makeSimpleTag (name, kind); - if (isXtagEnabled(XTAG_QUALIFIED_TAGS) && qualified && - K_PACKAGE != kind && - package != NULL && vStringLength (package) > 0) - { - vString *const qualifiedName = vStringNew (); - vStringCopy (qualifiedName, package); - vStringCat (qualifiedName, name); - makeSimpleTag (qualifiedName, kind); - vStringDelete (qualifiedName); - } - } - vStringClear (name); - } - } - -END_MAIN_WHILE: - vStringDelete (name); - if (package != NULL) - vStringDelete (package); -} - -extern parserDefinition* PerlParser (void) -{ - static const char *const extensions [] = { "pl", "pm", "plx", "perl", NULL }; - parserDefinition* def = parserNew ("Perl"); - def->kindTable = PerlKinds; - def->kindCount = ARRAY_SIZE (PerlKinds); - def->extensions = extensions; - def->parser = findPerlTags; - return def; -} diff --git a/ctags/parsers/geany_ruby.c b/ctags/parsers/geany_ruby.c deleted file mode 100644 index 4f4fb8a98a..0000000000 --- a/ctags/parsers/geany_ruby.c +++ /dev/null @@ -1,564 +0,0 @@ -/* -* Copyright (c) 2000-2001, Thaddeus Covert -* Copyright (c) 2002 Matthias Veit -* Copyright (c) 2004 Elliott Hughes -* -* This source code is released for free distribution under the terms of the -* GNU General Public License version 2 or (at your option) any later version. -* -* This module contains functions for generating tags for Ruby language -* files. -*/ - -/* -* INCLUDE FILES -*/ -#include "general.h" /* must always come first */ - -#include - -#include "debug.h" -#include "entry.h" -#include "parse.h" -#include "nestlevel.h" -#include "read.h" -#include "routines.h" -#include "vstring.h" - -/* -* DATA DECLARATIONS -*/ -typedef enum { - K_UNDEFINED = -1, K_CLASS, K_METHOD, K_MODULE, K_SINGLETON, -} rubyKind; - -/* -* DATA DEFINITIONS -*/ -static kindDefinition RubyKinds [] = { - { true, 'c', "class", "classes" }, - { true, 'f', "method", "methods" }, - { true, 'm', "module", "modules" }, - { true, 'S', "singletonMethod", "singleton methods" }, -#if 0 - /* Following two kinds are reserved. */ - { true, 'd', "describe", "describes and contexts for Rspec" }, - { true, 'C', "constant", "constants" }, -#endif -}; - -static NestingLevels* nesting = NULL; - -#define SCOPE_SEPARATOR '.' - -/* -* FUNCTION DEFINITIONS -*/ - -static void enterUnnamedScope (void); - -/* -* Returns a string describing the scope in 'nls'. -* We record the current scope as a list of entered scopes. -* Scopes corresponding to 'if' statements and the like are -* represented by empty strings. Scopes corresponding to -* modules and classes are represented by the name of the -* module or class. -*/ -static vString* nestingLevelsToScope (const NestingLevels* nls) -{ - int i; - unsigned int chunks_output = 0; - vString* result = vStringNew (); - for (i = 0; i < nls->n; ++i) - { - NestingLevel *nl = nestingLevelsGetNthFromRoot (nls, i); - tagEntryInfo *e = getEntryOfNestingLevel (nl); - if (e && strlen (e->name) > 0 && (!e->placeholder)) - { - if (chunks_output++ > 0) - vStringPut (result, SCOPE_SEPARATOR); - vStringCatS (result, e->name); - } - } - return result; -} - -/* -* Attempts to advance 's' past 'literal'. -* Returns true if it did, false (and leaves 's' where -* it was) otherwise. -*/ -static bool canMatch (const unsigned char** s, const char* literal, - bool (*end_check) (int)) -{ - const int literal_length = strlen (literal); - const int s_length = strlen ((const char *)*s); - - if (s_length < literal_length) - return false; - - const unsigned char next_char = *(*s + literal_length); - if (strncmp ((const char*) *s, literal, literal_length) != 0) - { - return false; - } - /* Additionally check that we're at the end of a token. */ - if (! end_check (next_char)) - { - return false; - } - *s += literal_length; - return true; -} - -static bool isIdentChar (int c) -{ - return (isalnum (c) || c == '_'); -} - -static bool notIdentChar (int c) -{ - return ! isIdentChar (c); -} - -static bool notOperatorChar (int c) -{ - return ! (c == '[' || c == ']' || - c == '=' || c == '!' || c == '~' || - c == '+' || c == '-' || - c == '@' || c == '*' || c == '/' || c == '%' || - c == '<' || c == '>' || - c == '&' || c == '^' || c == '|'); -} - -static bool isWhitespace (int c) -{ - return c == 0 || isspace (c); -} - -static bool canMatchKeyword (const unsigned char** s, const char* literal) -{ - return canMatch (s, literal, notIdentChar); -} - -/* -* Attempts to advance 'cp' past a Ruby operator method name. Returns -* true if successful (and copies the name into 'name'), false otherwise. -*/ -static bool parseRubyOperator (vString* name, const unsigned char** cp) -{ - static const char* RUBY_OPERATORS[] = { - "[]", "[]=", - "**", - "!", "~", "+@", "-@", - "*", "/", "%", - "+", "-", - ">>", "<<", - "&", - "^", "|", - "<=", "<", ">", ">=", - "<=>", "==", "===", "!=", "=~", "!~", - "`", - NULL - }; - int i; - for (i = 0; RUBY_OPERATORS[i] != NULL; ++i) - { - if (canMatch (cp, RUBY_OPERATORS[i], notOperatorChar)) - { - vStringCatS (name, RUBY_OPERATORS[i]); - return true; - } - } - return false; -} - -/* -* Emits a tag for the given 'name' of kind 'kind' at the current nesting. -*/ -static void emitRubyTag (vString* name, rubyKind kind) -{ - tagEntryInfo tag; - vString* scope; - tagEntryInfo *parent; - rubyKind parent_kind = K_UNDEFINED; - NestingLevel *lvl; - const char *unqualified_name; - const char *qualified_name; - int r; - - if (!RubyKinds[kind].enabled) { - return; - } - - scope = nestingLevelsToScope (nesting); - lvl = nestingLevelsGetCurrent (nesting); - parent = getEntryOfNestingLevel (lvl); - if (parent) - parent_kind = parent->kindIndex; - - qualified_name = vStringValue (name); - unqualified_name = strrchr (qualified_name, SCOPE_SEPARATOR); - if (unqualified_name && unqualified_name[1]) - { - if (unqualified_name > qualified_name) - { - if (vStringLength (scope) > 0) - vStringPut (scope, SCOPE_SEPARATOR); - vStringNCatS (scope, qualified_name, - unqualified_name - qualified_name); - /* assume module parent type for a lack of a better option */ - parent_kind = K_MODULE; - } - unqualified_name++; - } - else - unqualified_name = qualified_name; - - initTagEntry (&tag, unqualified_name, kind); - if (vStringLength (scope) > 0) { - Assert (0 <= parent_kind && - (size_t) parent_kind < (ARRAY_SIZE (RubyKinds))); - - tag.extensionFields.scopeKindIndex = parent_kind; - tag.extensionFields.scopeName = vStringValue (scope); - } - r = makeTagEntry (&tag); - - nestingLevelsPush (nesting, r); - - vStringClear (name); - vStringDelete (scope); -} - -/* Tests whether 'ch' is a character in 'list'. */ -static bool charIsIn (char ch, const char* list) -{ - return (strchr (list, ch) != NULL); -} - -/* Advances 'cp' over leading whitespace. */ -static void skipWhitespace (const unsigned char** cp) -{ - while (isspace (**cp)) - { - ++*cp; - } -} - -/* -* Copies the characters forming an identifier from *cp into -* name, leaving *cp pointing to the character after the identifier. -*/ -static rubyKind parseIdentifier ( - const unsigned char** cp, vString* name, rubyKind kind) -{ - /* Method names are slightly different to class and variable names. - * A method name may optionally end with a question mark, exclamation - * point or equals sign. These are all part of the name. - * A method name may also contain a period if it's a singleton method. - */ - bool had_sep = false; - const char* also_ok; - if (kind == K_METHOD) - { - also_ok = ".?!="; - } - else if (kind == K_SINGLETON) - { - also_ok = "?!="; - } - else - { - also_ok = ""; - } - - skipWhitespace (cp); - - /* Check for an anonymous (singleton) class such as "class << HTTP". */ - if (kind == K_CLASS && **cp == '<' && *(*cp + 1) == '<') - { - return K_UNDEFINED; - } - - /* Check for operators such as "def []=(key, val)". */ - if (kind == K_METHOD || kind == K_SINGLETON) - { - if (parseRubyOperator (name, cp)) - { - return kind; - } - } - - /* Copy the identifier into 'name'. */ - while (**cp != 0 && (**cp == ':' || isIdentChar (**cp) || charIsIn (**cp, also_ok))) - { - char last_char = **cp; - - if (last_char == ':') - had_sep = true; - else - { - if (had_sep) - { - vStringPut (name, SCOPE_SEPARATOR); - had_sep = false; - } - vStringPut (name, last_char); - } - ++*cp; - - if (kind == K_METHOD) - { - /* Recognize singleton methods. */ - if (last_char == '.') - { - vStringClear (name); - return parseIdentifier (cp, name, K_SINGLETON); - } - } - - if (kind == K_METHOD || kind == K_SINGLETON) - { - /* Recognize characters which mark the end of a method name. */ - if (charIsIn (last_char, "?!=")) - { - break; - } - } - } - return kind; -} - -static void readAndEmitTag (const unsigned char** cp, rubyKind expected_kind) -{ - if (isspace (**cp)) - { - vString *name = vStringNew (); - rubyKind actual_kind = parseIdentifier (cp, name, expected_kind); - - if (actual_kind == K_UNDEFINED || vStringLength (name) == 0) - { - /* - * What kind of tags should we create for code like this? - * - * %w(self.clfloor clfloor).each do |name| - * module_eval <<-"end;" - * def #{name}(x, y=1) - * q, r = x.divmod(y) - * q = q.to_i - * return q, r - * end - * end; - * end - * - * Or this? - * - * class << HTTP - * - * For now, we don't create any. - */ - enterUnnamedScope (); - } - else - { - emitRubyTag (name, actual_kind); - } - vStringDelete (name); - } -} - -static void enterUnnamedScope (void) -{ - int r = CORK_NIL; - NestingLevel *parent = nestingLevelsGetCurrent (nesting); - tagEntryInfo *e_parent = getEntryOfNestingLevel (parent); - - if (e_parent) - { - tagEntryInfo e; - initTagEntry (&e, "", e_parent->kindIndex); - e.placeholder = 1; - r = makeTagEntry (&e); - } - nestingLevelsPush (nesting, r); -} - -static void findRubyTags (void) -{ - const unsigned char *line; - bool inMultiLineComment = false; - - nesting = nestingLevelsNew (0); - - /* FIXME: this whole scheme is wrong, because Ruby isn't line-based. - * You could perfectly well write: - * - * def - * method - * puts("hello") - * end - * - * if you wished, and this function would fail to recognize anything. - */ - while ((line = readLineFromInputFile ()) != NULL) - { - const unsigned char *cp = line; - /* if we expect a separator after a while, for, or until statement - * separators are "do", ";" or newline */ - bool expect_separator = false; - - if (canMatch (&cp, "=begin", isWhitespace)) - { - inMultiLineComment = true; - continue; - } - if (canMatch (&cp, "=end", isWhitespace)) - { - inMultiLineComment = false; - continue; - } - if (inMultiLineComment) - continue; - - skipWhitespace (&cp); - - /* Avoid mistakenly starting a scope for modifiers such as - * - * return if - * - * FIXME: this is fooled by code such as - * - * result = if - * - * else - * - * end - * - * FIXME: we're also fooled if someone does something heinous such as - * - * puts("hello") \ - * unless - */ - if (canMatchKeyword (&cp, "for") || - canMatchKeyword (&cp, "until") || - canMatchKeyword (&cp, "while")) - { - expect_separator = true; - enterUnnamedScope (); - } - else if (canMatchKeyword (&cp, "case") || - canMatchKeyword (&cp, "if") || - canMatchKeyword (&cp, "unless")) - { - enterUnnamedScope (); - } - - /* - * "module M", "class C" and "def m" should only be at the beginning - * of a line. - */ - if (canMatchKeyword (&cp, "module")) - { - readAndEmitTag (&cp, K_MODULE); - } - else if (canMatchKeyword (&cp, "class")) - { - readAndEmitTag (&cp, K_CLASS); - } - else if (canMatchKeyword (&cp, "def")) - { - rubyKind kind = K_METHOD; - NestingLevel *nl = nestingLevelsGetCurrent (nesting); - tagEntryInfo *e = getEntryOfNestingLevel (nl); - - /* if the def is inside an unnamed scope at the class level, assume - * it's from a singleton from a construct like this: - * - * class C - * class << self - * def singleton - * ... - * end - * end - * end - */ - if (e && e->kindIndex == K_CLASS && strlen (e->name) == 0) - kind = K_SINGLETON; - readAndEmitTag (&cp, kind); - } - while (*cp != '\0') - { - /* FIXME: we don't cope with here documents, - * or regular expression literals, or ... you get the idea. - * Hopefully, the restriction above that insists on seeing - * definitions at the starts of lines should keep us out of - * mischief. - */ - if (inMultiLineComment || isspace (*cp)) - { - ++cp; - } - else if (*cp == '#') - { - /* FIXME: this is wrong, but there *probably* won't be a - * definition after an interpolated string (where # doesn't - * mean 'comment'). - */ - break; - } - else if (canMatchKeyword (&cp, "begin")) - { - enterUnnamedScope (); - } - else if (canMatchKeyword (&cp, "do")) - { - if (! expect_separator) - enterUnnamedScope (); - else - expect_separator = false; - } - else if (canMatchKeyword (&cp, "end") && nesting->n > 0) - { - /* Leave the most recent scope. */ - nestingLevelsPop (nesting); - } - else if (*cp == '"') - { - /* Skip string literals. - * FIXME: should cope with escapes and interpolation. - */ - do { - ++cp; - } while (*cp != 0 && *cp != '"'); - if (*cp == '"') - cp++; /* skip the last found '"' */ - } - else if (*cp == ';') - { - ++cp; - expect_separator = false; - } - else if (*cp != '\0') - { - do - ++cp; - while (isIdentChar (*cp)); - } - } - } - nestingLevelsFree (nesting); -} - -extern parserDefinition* RubyParser (void) -{ - static const char *const extensions [] = { "rb", "ruby", NULL }; - parserDefinition* def = parserNew ("Ruby"); - def->kindTable = RubyKinds; - def->kindCount = ARRAY_SIZE (RubyKinds); - def->extensions = extensions; - def->parser = findRubyTags; - def->useCork = CORK_QUEUE; - return def; -} diff --git a/ctags/parsers/go.c b/ctags/parsers/go.c new file mode 100644 index 0000000000..d642d24b44 --- /dev/null +++ b/ctags/parsers/go.c @@ -0,0 +1,1416 @@ +/* +* This source code is released for free distribution under the terms of the +* GNU General Public License version 2 or (at your option) any later version. +* +* Reference: +* https://golang.org/ref/spec +*/ + + +/* + * INCLUDE FILES + */ +#include "general.h" /* must always come first */ + +#include "debug.h" +#include "entry.h" +#include "keyword.h" +#include "read.h" +#include "numarray.h" +#include "objpool.h" +#include "parse.h" +#include "routines.h" +#include "vstring.h" +#include "xtag.h" +#include "field.h" +#include "htable.h" + +#include + +/* + * MACROS + */ +#define MAX_COLLECTOR_LENGTH 512 +#define isType(token,t) (bool) ((token)->type == (t)) +#define isKeyword(token,k) (bool) ((token)->keyword == (k)) +#define isStartIdentChar(c) (isalpha (c) || (c) == '_' || (c) > 128) /* XXX UTF-8 */ +#define isIdentChar(c) (isStartIdentChar (c) || isdigit (c)) +#define newToken() (objPoolGet (TokenPool)) +#define deleteToken(t) (objPoolPut (TokenPool, (t))) + +/* + * DATA DECLARATIONS + */ + +enum eKeywordId { + KEYWORD_package, + KEYWORD_import, + KEYWORD_const, + KEYWORD_type, + KEYWORD_var, + KEYWORD_func, + KEYWORD_struct, + KEYWORD_interface, + KEYWORD_map, + KEYWORD_chan +}; +typedef int keywordId; /* to allow KEYWORD_NONE */ + +typedef enum eTokenType { + TOKEN_NONE = -1, + // Token not important for top-level Go parsing + TOKEN_OTHER, + TOKEN_KEYWORD, + TOKEN_IDENTIFIER, + TOKEN_STRING, + TOKEN_OPEN_PAREN, + TOKEN_CLOSE_PAREN, + TOKEN_OPEN_CURLY, + TOKEN_CLOSE_CURLY, + TOKEN_OPEN_SQUARE, + TOKEN_CLOSE_SQUARE, + TOKEN_SEMICOLON, + TOKEN_STAR, + TOKEN_LEFT_ARROW, + TOKEN_DOT, + TOKEN_COMMA, + TOKEN_EQUAL, + TOKEN_3DOTS, + TOKEN_EOF +} tokenType; + +typedef struct sTokenInfo { + tokenType type; + keywordId keyword; + vString *string; /* the name of the token */ + unsigned long lineNumber; /* line number of tag */ + MIOPos filePosition; /* file position of line containing name */ + int c; /* Used in AppendTokenToVString */ +} tokenInfo; + +typedef struct sCollector { + vString *str; + size_t last_len; +} collector; + +/* +* DATA DEFINITIONS +*/ + +static int Lang_go; +static objPool *TokenPool = NULL; + +typedef enum { + GOTAG_UNDEFINED = -1, + GOTAG_PACKAGE, + GOTAG_FUNCTION, + GOTAG_CONST, + GOTAG_TYPE, + GOTAG_VAR, + GOTAG_STRUCT, + GOTAG_INTERFACE, + GOTAG_MEMBER, + GOTAG_ANONMEMBER, + GOTAG_METHODSPEC, + GOTAG_UNKNOWN, + GOTAG_PACKAGE_NAME, + GOTAG_TALIAS, + GOTAG_RECEIVER, +} goKind; + +typedef enum { + R_GOTAG_PACKAGE_IMPORTED, +} GoPackageRole; + +static roleDefinition GoPackageRoles [] = { + { true, "imported", "imported package" }, +}; + +typedef enum { + R_GOTAG_UNKNOWN_RECEIVER, +} GoUnknownRole; + +static roleDefinition GoUnknownRoles [] = { + { true, "receiverType", "receiver type" }, +}; + +static kindDefinition GoKinds[] = { + {true, 'p', "package", "packages", + .referenceOnly = false, ATTACH_ROLES (GoPackageRoles)}, + {true, 'f', "func", "functions"}, + {true, 'c', "const", "constants"}, + {true, 't', "type", "types"}, + {true, 'v', "var", "variables"}, + {true, 's', "struct", "structs"}, + {true, 'i', "interface", "interfaces"}, + {true, 'm', "member", "struct members"}, + {true, 'M', "anonMember", "struct anonymous members"}, + {true, 'n', "methodSpec", "interface method specification"}, + {true, 'u', "unknown", "unknown", + .referenceOnly = true, ATTACH_ROLES (GoUnknownRoles)}, + {true, 'P', "packageName", "name for specifying imported package"}, + {true, 'a', "talias", "type aliases"}, + {false,'R', "receiver", "receivers"}, +}; + +static const keywordTable GoKeywordTable[] = { + {"package", KEYWORD_package}, + {"import", KEYWORD_import}, + {"const", KEYWORD_const}, + {"type", KEYWORD_type}, + {"var", KEYWORD_var}, + {"func", KEYWORD_func}, + {"struct", KEYWORD_struct}, + {"interface", KEYWORD_interface}, + {"map", KEYWORD_map}, + {"chan", KEYWORD_chan} +}; + +typedef enum { + F_PACKAGE, + F_PACKAGE_NAME, + F_HOW_IMPORTED, +} goField; + +static fieldDefinition GoFields [] = { + { + .name = "package", + .description = "the real package specified by the package name", + .enabled = true, + }, + { + .name = "packageName", + .description = "the name for referring the package", + .enabled = true, + }, + { + .name = "howImported", + .description = "how the package is imported (\"inline\" for `.' or \"init\" for `_')", + .enabled = false, + }, +}; + + +/* +* FUNCTION DEFINITIONS +*/ + +static void *newPoolToken (void *createArg CTAGS_ATTR_UNUSED) +{ + tokenInfo *const token = xMalloc (1, tokenInfo); + token->string = vStringNew (); + return token; +} + +static void clearPoolToken (void *data) +{ + tokenInfo *token = data; + + token->type = TOKEN_NONE; + token->keyword = KEYWORD_NONE; + token->lineNumber = getInputLineNumber (); + token->filePosition = getInputFilePosition (); + vStringClear (token->string); +} + +static void copyToken (tokenInfo *const dest, const tokenInfo *const other) +{ + dest->type = other->type; + dest->keyword = other->keyword; + vStringCopy(dest->string, other->string); + dest->lineNumber = other->lineNumber; + dest->filePosition = other->filePosition; +} + +static void deletePoolToken (void* data) +{ + tokenInfo * const token = data; + + vStringDelete (token->string); + eFree (token); +} + +static void initialize (const langType language) +{ + Lang_go = language; + TokenPool = objPoolNew (16, newPoolToken, deletePoolToken, clearPoolToken, NULL); +} + +static void finalize (const langType language, bool initialized) +{ + if (!initialized) + return; + + objPoolDelete (TokenPool); +} + +/* + * Parsing functions + */ + +static void parseString (vString *const string, const int delimiter) +{ + bool end = false; + while (!end) + { + int c = getcFromInputFile (); + if (c == EOF) + end = true; + else if (c == '\\' && delimiter != '`') + { + c = getcFromInputFile (); + if (c != '\'' && c != '\"') + vStringPut (string, '\\'); + vStringPut (string, c); + } + else if (c == delimiter) + end = true; + else + vStringPut (string, c); + } +} + +static void parseIdentifier (vString *const string, const int firstChar) +{ + int c = firstChar; + do + { + vStringPut (string, c); + c = getcFromInputFile (); + } while (isIdentChar (c)); + ungetcToInputFile (c); /* always unget, LF might add a semicolon */ +} + +static bool collectorIsEmpty(collector *collector) +{ + return !vStringLength(collector->str); +} + +static void collectorPut (collector *collector, char c) +{ + if ((vStringLength(collector->str) > 2) + && strcmp (vStringValue (collector->str) + (vStringLength(collector->str) - 3), + "...") == 0 + && c == ' ') + return; + else if (vStringLength(collector->str) > 0) + { + if (vStringLast(collector->str) == '(' && c == ' ') + return; + else if (vStringLast(collector->str) == ' ' && c == ')') + vStringChop(collector->str); + } + + collector->last_len = vStringLength (collector->str); + vStringPut (collector->str, c); +} + +static void collectorCatS (collector *collector, char *cstr) +{ + collector->last_len = vStringLength (collector->str); + vStringCatS (collector->str, cstr); +} + +static void collectorCat (collector *collector, vString *str) +{ + collector->last_len = vStringLength (collector->str); + vStringCat (collector->str, str); +} + +static void collectorAppendToken (collector *collector, const tokenInfo *const token) +{ + if (token->type == TOKEN_LEFT_ARROW) + collectorCatS (collector, "<-"); + else if (token->type == TOKEN_STRING) + { + // only struct member annotations can appear in function prototypes + // so only `` type strings are possible + collector->last_len = vStringLength (collector->str); + vStringPut(collector->str, '`'); + vStringCat(collector->str, token->string); + vStringPut(collector->str, '`'); + } + else if (token->type == TOKEN_IDENTIFIER || token->type == TOKEN_KEYWORD) + collectorCat (collector, token->string); + else if (token->type == TOKEN_3DOTS) + { + if ((vStringLength (collector->str) > 0) + && vStringLast(collector->str) != ' ') + collectorPut (collector, ' '); + collectorCatS (collector, "..."); + } + else if (token->c != EOF) + collectorPut (collector, token->c); +} + +static void collectorTruncate (collector *collector, bool dropLast) +{ + if (dropLast) + vStringTruncate (collector->str, collector->last_len); + + vStringStripLeading (collector->str); + vStringStripTrailing (collector->str); +} + +static void readTokenFull (tokenInfo *const token, collector *collector) +{ + int c; + static tokenType lastTokenType = TOKEN_NONE; + bool firstWhitespace = true; + bool whitespace; + + token->c = EOF; + token->type = TOKEN_NONE; + token->keyword = KEYWORD_NONE; + vStringClear (token->string); + +getNextChar: + do + { + c = getcFromInputFile (); + token->lineNumber = getInputLineNumber (); + token->filePosition = getInputFilePosition (); + if (c == '\n' && (lastTokenType == TOKEN_IDENTIFIER || + lastTokenType == TOKEN_STRING || + lastTokenType == TOKEN_OTHER || + lastTokenType == TOKEN_CLOSE_PAREN || + lastTokenType == TOKEN_CLOSE_CURLY || + lastTokenType == TOKEN_CLOSE_SQUARE)) + { + c = ';'; // semicolon injection + } + whitespace = c == '\t' || c == ' ' || c == '\r' || c == '\n'; + if (collector && whitespace && firstWhitespace && vStringLength (collector->str) < MAX_COLLECTOR_LENGTH) + { + firstWhitespace = false; + collectorPut (collector, ' '); + } + } + while (whitespace); + + switch (c) + { + case EOF: + token->type = TOKEN_EOF; + break; + + case ';': + token->type = TOKEN_SEMICOLON; + break; + + case '/': + { + bool hasNewline = false; + int d = getcFromInputFile (); + switch (d) + { + case '/': + skipToCharacterInInputFile ('\n'); + /* Line comments start with the + * character sequence // and + * continue through the next + * newline. A line comment acts + * like a newline. */ + ungetcToInputFile ('\n'); + goto getNextChar; + case '*': + do + { + do + { + d = getcFromInputFile (); + if (d == '\n') + { + hasNewline = true; + } + } while (d != EOF && d != '*'); + + c = getcFromInputFile (); + if (c == '/') + break; + else + ungetcToInputFile (c); + } while (c != EOF && c != '\0'); + + ungetcToInputFile (hasNewline ? '\n' : ' '); + goto getNextChar; + default: + token->type = TOKEN_OTHER; + ungetcToInputFile (d); + break; + } + } + break; + + case '"': + case '\'': + case '`': + token->type = TOKEN_STRING; + parseString (token->string, c); + token->lineNumber = getInputLineNumber (); + token->filePosition = getInputFilePosition (); + break; + + case '<': + { + int d = getcFromInputFile (); + if (d == '-') + token->type = TOKEN_LEFT_ARROW; + else + { + ungetcToInputFile (d); + token->type = TOKEN_OTHER; + } + } + break; + + case '(': + token->type = TOKEN_OPEN_PAREN; + break; + + case ')': + token->type = TOKEN_CLOSE_PAREN; + break; + + case '{': + token->type = TOKEN_OPEN_CURLY; + break; + + case '}': + token->type = TOKEN_CLOSE_CURLY; + break; + + case '[': + token->type = TOKEN_OPEN_SQUARE; + break; + + case ']': + token->type = TOKEN_CLOSE_SQUARE; + break; + + case '*': + token->type = TOKEN_STAR; + break; + + case '.': + { + int d, e; + d = getcFromInputFile (); + if (d == '.') + { + e = getcFromInputFile (); + if (e == '.') + { + token->type = TOKEN_3DOTS; + break; + } + else + { + ungetcToInputFile (e); + ungetcToInputFile (d); + } + } + else + ungetcToInputFile (d); + } + token->type = TOKEN_DOT; + break; + + case ',': + token->type = TOKEN_COMMA; + break; + + case '=': + token->type = TOKEN_EQUAL; + break; + + default: + if (isStartIdentChar (c)) + { + parseIdentifier (token->string, c); + token->lineNumber = getInputLineNumber (); + token->filePosition = getInputFilePosition (); + token->keyword = lookupKeyword (vStringValue (token->string), Lang_go); + if (isKeyword (token, KEYWORD_NONE)) + token->type = TOKEN_IDENTIFIER; + else + token->type = TOKEN_KEYWORD; + } + else + token->type = TOKEN_OTHER; + break; + } + + token->c = c; + + if (collector && vStringLength (collector->str) < MAX_COLLECTOR_LENGTH) + collectorAppendToken (collector, token); + + lastTokenType = token->type; +} + +static void readToken (tokenInfo *const token) +{ + readTokenFull (token, NULL); +} + +static bool skipToMatchedNoRead (tokenInfo *const token, collector *collector) +{ + int nest_level = 0; + tokenType open_token = token->type; + tokenType close_token; + + switch (open_token) + { + case TOKEN_OPEN_PAREN: + close_token = TOKEN_CLOSE_PAREN; + break; + case TOKEN_OPEN_CURLY: + close_token = TOKEN_CLOSE_CURLY; + break; + case TOKEN_OPEN_SQUARE: + close_token = TOKEN_CLOSE_SQUARE; + break; + default: + return false; + } + + /* + * This routine will skip to a matching closing token. + * It will also handle nested tokens. + */ + nest_level++; + while (nest_level > 0 && !isType (token, TOKEN_EOF)) + { + readTokenFull (token, collector); + if (isType (token, open_token)) + nest_level++; + else if (isType (token, close_token)) + nest_level--; + } + + return true; +} + +static void skipToMatched (tokenInfo *const token, collector *collector) +{ + if (skipToMatchedNoRead (token, collector)) + readTokenFull (token, collector); +} + +static bool skipType (tokenInfo *const token, collector *collector) +{ + // Type = TypeName | TypeLit | "(" Type ")" . + // Skips also function multiple return values "(" Type {"," Type} ")" + if (isType (token, TOKEN_OPEN_PAREN)) + { + skipToMatched (token, collector); + return true; + } + + // TypeName = QualifiedIdent. + // QualifiedIdent = [ PackageName "." ] identifier . + // PackageName = identifier . + if (isType (token, TOKEN_IDENTIFIER)) + { + readTokenFull (token, collector); + if (isType (token, TOKEN_DOT)) + { + readTokenFull (token, collector); + if (isType (token, TOKEN_IDENTIFIER)) + readTokenFull (token, collector); + } + return true; + } + + // StructType = "struct" "{" { FieldDecl ";" } "}" + // InterfaceType = "interface" "{" { MethodSpec ";" } "}" . + if (isKeyword (token, KEYWORD_struct) || isKeyword (token, KEYWORD_interface)) + { + readTokenFull (token, collector); + // skip over "{}" + skipToMatched (token, collector); + return true; + } + + // ArrayType = "[" ArrayLength "]" ElementType . + // SliceType = "[" "]" ElementType . + // ElementType = Type . + if (isType (token, TOKEN_OPEN_SQUARE)) + { + skipToMatched (token, collector); + return skipType (token, collector); + } + + // PointerType = "*" BaseType . + // BaseType = Type . + // ChannelType = ( "chan" [ "<-" ] | "<-" "chan" ) ElementType . + if (isType (token, TOKEN_STAR) || isKeyword (token, KEYWORD_chan) || isType (token, TOKEN_LEFT_ARROW)) + { + readTokenFull (token, collector); + return skipType (token, collector); + } + + // MapType = "map" "[" KeyType "]" ElementType . + // KeyType = Type . + if (isKeyword (token, KEYWORD_map)) + { + readTokenFull (token, collector); + // skip over "[]" + skipToMatched (token, collector); + return skipType (token, collector); + } + + // FunctionType = "func" Signature . + // Signature = Parameters [ Result ] . + // Result = Parameters | Type . + // Parameters = "(" [ ParameterList [ "," ] ] ")" . + if (isKeyword (token, KEYWORD_func)) + { + readTokenFull (token, collector); + // Parameters, skip over "()" + skipToMatched (token, collector); + // Result is parameters or type or nothing. skipType treats anything + // surrounded by parentheses as a type, and does nothing if what + // follows is not a type. + return skipType (token, collector); + } + + return false; +} + +static int makeTagFull (tokenInfo *const token, const goKind kind, + const int scope, const char *argList, const char *typeref, + const int role) +{ + const char *const name = vStringValue (token->string); + + tagEntryInfo e; + + /* Don't record `_' placeholder variable */ + if (kind == GOTAG_VAR && name[0] == '_' && name[1] == '\0') + return CORK_NIL; + + initRefTagEntry (&e, name, kind, role); + + e.lineNumber = token->lineNumber; + e.filePosition = token->filePosition; + if (argList) + e.extensionFields.signature = argList; + if (typeref) + { + /* Follows Cxx parser convention */ + e.extensionFields.typeRef [0] = "typename"; + e.extensionFields.typeRef [1] = typeref; + } + + e.extensionFields.scopeIndex = scope; + return makeTagEntry (&e); +} + +static int makeTag (tokenInfo *const token, const goKind kind, + const int scope, const char *argList, const char *typeref) +{ + return makeTagFull (token, kind, scope, argList, typeref, + ROLE_DEFINITION_INDEX); +} + +static int makeRefTag (tokenInfo *const token, const goKind kind, + const int role) +{ + return makeTagFull (token, kind, CORK_NIL, NULL, NULL, role); +} + +static int parsePackage (tokenInfo *const token) +{ + readToken (token); + if (isType (token, TOKEN_IDENTIFIER)) + { + return makeTag (token, GOTAG_PACKAGE, CORK_NIL, NULL, NULL); + } + else + return CORK_NIL; +} + +static tokenInfo * parseReceiver (tokenInfo *const token, int *corkIndex) +{ + tokenInfo *receiver_type_token = NULL; + int nest_level = 1; + + *corkIndex = CORK_NIL; + + /* Looking for an identifier before ')'. */ + while (nest_level > 0 && !isType (token, TOKEN_EOF)) + { + if (isType (token, TOKEN_IDENTIFIER)) + { + if (*corkIndex == CORK_NIL) + *corkIndex = makeTag (token, GOTAG_RECEIVER, CORK_NIL, NULL, NULL); + if (!receiver_type_token) + receiver_type_token = newToken (); + copyToken (receiver_type_token, token); + } + + readToken (token); + if (isType (token, TOKEN_OPEN_PAREN)) + nest_level++; + else if (isType (token, TOKEN_CLOSE_PAREN)) + nest_level--; + } + + if (nest_level > 0 && receiver_type_token) + { + deleteToken (receiver_type_token); + receiver_type_token = NULL; + } + + if (receiver_type_token) + { + tagEntryInfo *e = getEntryInCorkQueue (*corkIndex); + if (e) + { + e->extensionFields.typeRef [0] = eStrdup ("typename"); + e->extensionFields.typeRef [1] = vStringStrdup (receiver_type_token->string); + } + } + readToken (token); + return receiver_type_token; +} + +static void parseFunctionOrMethod (tokenInfo *const token, const int scope) +{ + int receiver_cork = CORK_NIL; + tokenInfo *receiver_type_token = NULL; + + // FunctionDecl = "func" identifier Signature [ Body ] . + // Body = Block. + // + // MethodDecl = "func" Receiver MethodName Signature [ Body ] . + // Receiver = "(" [ identifier ] [ "*" ] BaseTypeName ")" . + // BaseTypeName = identifier . + + // Pick up receiver type. + readToken (token); + if (isType (token, TOKEN_OPEN_PAREN)) + receiver_type_token = parseReceiver (token, &receiver_cork); + + if (isType (token, TOKEN_IDENTIFIER)) + { + int cork; + tagEntryInfo *e = NULL; + tokenInfo *functionToken = newToken (); + int func_scope; + + copyToken (functionToken, token); + + // Start recording signature + vString *buffer = vStringNew (); + collector collector = { .str = buffer, .last_len = 0, }; + + // Skip over parameters. + readTokenFull (token, &collector); + skipToMatchedNoRead (token, &collector); + + collectorTruncate (&collector, false); + if (receiver_type_token) + { + func_scope = anyEntryInScope (scope, vStringValue (receiver_type_token->string)); + if (func_scope == CORK_NIL) + func_scope = makeTagFull(receiver_type_token, GOTAG_UNKNOWN, + scope, NULL, NULL, + R_GOTAG_UNKNOWN_RECEIVER); + } + else + func_scope = scope; + + cork = makeTag (functionToken, GOTAG_FUNCTION, + func_scope, vStringValue (buffer), NULL); + if ((e = getEntryInCorkQueue (cork))) + { + tagEntryInfo *receiver = getEntryInCorkQueue (receiver_cork); + if (receiver) + receiver->extensionFields.scopeIndex = cork; + } + + deleteToken (functionToken); + + vStringClear (collector.str); + collector.last_len = 0; + + readTokenFull (token, &collector); + + // Skip over result. + skipType (token, &collector); + + // Neither "{" nor " {". + if (!(isType (token, TOKEN_OPEN_CURLY) && collector.last_len < 2)) + { + collectorTruncate(&collector, isType (token, TOKEN_OPEN_CURLY)); + if (e) + { + e->extensionFields.typeRef [0] = eStrdup ("typename"); + e->extensionFields.typeRef [1] = vStringDeleteUnwrap (buffer); + buffer = NULL; + } + } + + if (buffer) + vStringDelete (buffer); + + // Skip over function body. + if (isType (token, TOKEN_OPEN_CURLY)) + { + skipToMatched (token, NULL); + if (e) + e->extensionFields.endLine = getInputLineNumber (); + } + } + + if (receiver_type_token) + deleteToken(receiver_type_token); +} + +static void attachTypeRefField (int scope, intArray *corks, const char *const type) +{ + int type_cork = anyEntryInScope (scope, type); + tagEntryInfo *type_e = getEntryInCorkQueue (type_cork); + + for (unsigned int i = 0; i < intArrayCount (corks); i++) + { + int cork = intArrayItem (corks, i); + tagEntryInfo *e = getEntryInCorkQueue (cork); + if (!e) + continue; + e->extensionFields.typeRef [0] = eStrdup (type_e + ?GoKinds[type_e->kindIndex].name + :"typename"); + e->extensionFields.typeRef [1] = eStrdup (type); + } +} + +static void parseInterfaceMethods (tokenInfo *const token, const int scope) +{ + // InterfaceType = "interface" "{" { MethodSpec ";" } "}" . + // MethodSpec = MethodName Signature | InterfaceTypeName . + // MethodName = identifier . + // InterfaceTypeName = TypeName . + + vString *inheritsBuf = vStringNew (); + collector inherits = { .str = inheritsBuf, .last_len = 0, }; + + readToken (token); + if (!isType (token, TOKEN_OPEN_CURLY)) + return; + + readToken (token); + while (!isType (token, TOKEN_EOF) && !isType (token, TOKEN_CLOSE_CURLY)) + { + if (isType (token, TOKEN_IDENTIFIER)) + { + tokenInfo * headToken = newToken(); + copyToken (headToken, token); + + readToken (token); + if(isType (token, TOKEN_DOT)) + { + if (!collectorIsEmpty(&inherits)) + collectorPut (&inherits, ','); + collectorAppendToken (&inherits, headToken); + readTokenFull (token, NULL); + if (isType (token, TOKEN_IDENTIFIER)) + { + collectorPut (&inherits, '.'); + collectorAppendToken (&inherits, token); + readToken (token); + } + /* If the token is not an identifier, the input + may be wrong. */ + } + else if (isType (token, TOKEN_SEMICOLON)) + { + if (!collectorIsEmpty(&inherits)) + collectorPut (&inherits, ','); + collectorAppendToken (&inherits, headToken); + readToken (token); + } + else if (isType (token, TOKEN_OPEN_PAREN)) + { + // => Signature + // Signature = Parameters [ Result ] . + vString *pbuf = vStringNew (); + collector pcol = { .str = pbuf, .last_len = 0, }; + vString *rbuf = NULL; + collector rcol = { .str = NULL, .last_len = 0, }; + + // Parameters + collectorPut (&pcol, '('); + skipToMatched (token, &pcol); + collectorTruncate(&pcol, true); + + if (!isType (token, TOKEN_SEMICOLON)) + { + rbuf = vStringNew (); + rcol.str = rbuf; + + collectorAppendToken (&rcol, token); + skipType (token, &rcol); + collectorTruncate(&rcol, true); + } + + makeTag (headToken, GOTAG_METHODSPEC, scope, + vStringValue (pbuf), + rbuf? vStringValue(rbuf): NULL); + + if (rbuf) + vStringDelete (rbuf); + vStringDelete (pbuf); + } + deleteToken (headToken); + } + else + readToken (token); + } + + if (!collectorIsEmpty(&inherits)) + { + tagEntryInfo *e = getEntryInCorkQueue (scope); + if (e) + { + e->extensionFields.inheritance = vStringDeleteUnwrap (inheritsBuf); + inheritsBuf = NULL; + } + } + vStringDelete (inheritsBuf); +} + +static void parseStructMembers (tokenInfo *const token, const int scope) +{ + // StructType = "struct" "{" { FieldDecl ";" } "}" . + // FieldDecl = (IdentifierList Type | AnonymousField) [ Tag ] . + // AnonymousField = [ "*" ] TypeName . + // Tag = string_lit . + + readToken (token); + if (!isType (token, TOKEN_OPEN_CURLY)) + return; + + vString *typeForAnonMember = vStringNew (); + intArray *corkForFields = intArrayNew (); + + readToken (token); + while (!isType (token, TOKEN_EOF) && !isType (token, TOKEN_CLOSE_CURLY)) + { + tokenInfo *memberCandidate = NULL; + bool first = true; + + while (!isType (token, TOKEN_EOF)) + { + if (isType (token, TOKEN_IDENTIFIER)) + { + if (first) + { + // could be anonymous field like in 'struct {int}' - we don't know yet + memberCandidate = newToken (); + copyToken (memberCandidate, token); + first = false; + } + else + { + int cork; + if (memberCandidate) + { + // if we are here, there was a comma and memberCandidate isn't an anonymous field + cork = makeTag (memberCandidate, GOTAG_MEMBER, scope, NULL, NULL); + deleteToken (memberCandidate); + memberCandidate = NULL; + intArrayAdd (corkForFields, cork); + } + cork = makeTag (token, GOTAG_MEMBER, scope, NULL, NULL); + intArrayAdd (corkForFields, cork); + } + readToken (token); + } + if (!isType (token, TOKEN_COMMA)) + break; + readToken (token); + } + + if (first && isType (token, TOKEN_STAR)) + { + vStringPut (typeForAnonMember, '*'); + readToken (token); + } + else if (memberCandidate && + (isType (token, TOKEN_DOT) || + isType (token, TOKEN_STRING) || + isType (token, TOKEN_SEMICOLON))) + // memberCandidate is part of anonymous type + vStringCat (typeForAnonMember, memberCandidate->string); + + // the above two cases that set typeForAnonMember guarantee + // this is an anonymous member + if (vStringLength (typeForAnonMember) > 0) + { + tokenInfo *anonMember = NULL; + + if (memberCandidate) + { + anonMember = newToken (); + copyToken (anonMember, memberCandidate); + } + + // TypeName of AnonymousField has a dot like package"."type. + // Pick up the last package component, and store it to + // memberCandidate. + while (isType (token, TOKEN_IDENTIFIER) || + isType (token, TOKEN_DOT)) + { + if (isType (token, TOKEN_IDENTIFIER)) + { + if (!anonMember) + anonMember = newToken (); + copyToken (anonMember, token); + vStringCat (typeForAnonMember, anonMember->string); + } + else if (isType (token, TOKEN_DOT)) + vStringPut (typeForAnonMember, '.'); + readToken (token); + } + + // optional tag + if (isType (token, TOKEN_STRING)) + readToken (token); + + if (anonMember) + { + makeTag (anonMember, GOTAG_ANONMEMBER, scope, NULL, + vStringValue (typeForAnonMember)); + deleteToken (anonMember); + } + } + else + { + vString *typeForMember = vStringNew (); + collector collector = { .str = typeForMember, .last_len = 0, }; + + collectorAppendToken (&collector, token); + skipType (token, &collector); + collectorTruncate (&collector, true); + + if (memberCandidate) + makeTag (memberCandidate, GOTAG_MEMBER, scope, NULL, + vStringValue (typeForMember)); + + attachTypeRefField (scope, corkForFields, vStringValue (typeForMember)); + intArrayClear (corkForFields); + vStringDelete (typeForMember); + } + + if (memberCandidate) + deleteToken (memberCandidate); + + vStringClear (typeForAnonMember); + + while (!isType (token, TOKEN_SEMICOLON) && !isType (token, TOKEN_CLOSE_CURLY) + && !isType (token, TOKEN_EOF)) + { + readToken (token); + skipToMatched (token, NULL); + } + + if (!isType (token, TOKEN_CLOSE_CURLY)) + { + // we are at TOKEN_SEMICOLON + readToken (token); + } + } + + intArrayDelete (corkForFields); + vStringDelete (typeForAnonMember); +} + +static void parseConstTypeVar (tokenInfo *const token, goKind kind, const int scope) +{ + // ConstDecl = "const" ( ConstSpec | "(" { ConstSpec ";" } ")" ) . + // ConstSpec = IdentifierList [ [ Type ] "=" ExpressionList ] . + // IdentifierList = identifier { "," identifier } . + // ExpressionList = Expression { "," Expression } . + // TypeDecl = "type" ( TypeSpec | "(" { TypeSpec ";" } ")" ) . + // TypeSpec = identifier [ "=" ] Type . + // VarDecl = "var" ( VarSpec | "(" { VarSpec ";" } ")" ) . + // VarSpec = IdentifierList ( Type [ "=" ExpressionList ] | "=" ExpressionList ) . + bool usesParens = false; + intArray *corks + = (kind == GOTAG_VAR || kind == GOTAG_CONST)? intArrayNew (): NULL; + + readToken (token); + + if (isType (token, TOKEN_OPEN_PAREN)) + { + usesParens = true; + readToken (token); + } + + do + { + tokenInfo *typeToken = NULL; + int member_scope = scope; + + while (!isType (token, TOKEN_EOF)) + { + if (isType (token, TOKEN_IDENTIFIER)) + { + if (kind == GOTAG_TYPE) + { + typeToken = newToken (); + copyToken (typeToken, token); + readToken (token); + if (isType (token, TOKEN_EQUAL)) + { + kind = GOTAG_TALIAS; + readToken (token); + } + + if (isKeyword (token, KEYWORD_struct)) + member_scope = makeTag (typeToken, GOTAG_STRUCT, + scope, NULL, NULL); + else if (isKeyword (token, KEYWORD_interface)) + member_scope = makeTag (typeToken, GOTAG_INTERFACE, + scope, NULL, NULL); + else + member_scope = makeTag (typeToken, kind, + scope, NULL, NULL); + + if (member_scope != CORK_NIL) + registerEntry (member_scope); + break; + } + else + { + int c = makeTag (token, kind, scope, NULL, NULL); + if (c != CORK_NIL && corks) + intArrayAdd (corks, c); + } + readToken (token); + } + if (!isType (token, TOKEN_COMMA)) + break; + readToken (token); + } + + if (typeToken) + { + if (isKeyword (token, KEYWORD_struct)) + parseStructMembers (token, member_scope); + else if (isKeyword (token, KEYWORD_interface)) + parseInterfaceMethods (token, member_scope); + else + { + /* Filling "typeref:" field of typeToken. */ + vString *buffer = vStringNew (); + collector collector = { .str = buffer, .last_len = 0, }; + + collectorAppendToken (&collector, token); + skipType (token, &collector); + collectorTruncate (&collector, true); + + if ((member_scope != CORK_NIL) && !vStringIsEmpty (buffer)) + { + tagEntryInfo *e = getEntryInCorkQueue (member_scope); + if (e) + { + e->extensionFields.typeRef [0] = eStrdup ("typename"); + e->extensionFields.typeRef [1] = vStringDeleteUnwrap (buffer); + } + } + else + vStringDelete (buffer); + } + deleteToken (typeToken); + } + else if (corks) + { + vString *buffer = vStringNew (); + collector collector = { .str = buffer, .last_len = 0, }; + + collectorAppendToken (&collector, token); + skipType (token, &collector); + collectorTruncate (&collector, true); + + if (!vStringIsEmpty (buffer)) + attachTypeRefField (scope, corks, vStringValue (buffer)); + vStringDelete (buffer); + intArrayClear (corks); + } + else + skipType (token, NULL); + + while (!isType (token, TOKEN_SEMICOLON) && !isType (token, TOKEN_CLOSE_PAREN) + && !isType (token, TOKEN_EOF)) + { + readToken (token); + skipToMatched (token, NULL); + } + + if (member_scope != scope && member_scope != CORK_NIL) + { + tagEntryInfo *e = getEntryInCorkQueue (member_scope); + if (e) + e->extensionFields.endLine = getInputLineNumber (); + } + + if (usesParens && !isType (token, TOKEN_CLOSE_PAREN)) + { + // we are at TOKEN_SEMICOLON + readToken (token); + } + } + while (!isType (token, TOKEN_EOF) && + usesParens && !isType (token, TOKEN_CLOSE_PAREN)); + + intArrayDelete (corks); +} + +static void parseImportSpec (tokenInfo *const token) +{ + // ImportSpec = [ "." | PackageName ] ImportPath . + // ImportPath = string_lit . + + int packageName_cork = CORK_NIL; + char *how_imported = NULL; + if (isType (token, TOKEN_IDENTIFIER)) + { + if (strcmp(vStringValue (token->string), "_") == 0) + how_imported = "init"; + else + { + packageName_cork = makeTag (token, GOTAG_PACKAGE_NAME, + CORK_NIL, NULL, NULL); + } + readToken (token); + } + else if (isType (token, TOKEN_DOT)) + { + how_imported = "inline"; + readToken (token); + } + + if (isType (token, TOKEN_STRING)) + { + int package_cork = + makeRefTag (token, GOTAG_PACKAGE, R_GOTAG_PACKAGE_IMPORTED); + + if (package_cork != CORK_NIL && how_imported) + attachParserFieldToCorkEntry (package_cork, + GoFields [F_HOW_IMPORTED].ftype, + how_imported); + + if (packageName_cork != CORK_NIL) + { + attachParserFieldToCorkEntry (packageName_cork, + GoFields [F_PACKAGE].ftype, + vStringValue (token->string)); + if (package_cork != CORK_NIL) + { + tagEntryInfo *e = getEntryInCorkQueue (packageName_cork); + if (e) + attachParserFieldToCorkEntry (package_cork, + GoFields [F_PACKAGE_NAME].ftype, + e->name); + } + } + } +} + +static void parseImport (tokenInfo *const token) +{ + // ImportDecl = "import" ( ImportSpec | "(" { ImportSpec ";" } ")" ) . + + readToken (token); + if (isType (token, TOKEN_EOF)) + return; + + if (isType (token, TOKEN_OPEN_PAREN)) + { + do + { + parseImportSpec (token); + readToken (token); + } while (!isType (token, TOKEN_EOF) && + !isType (token, TOKEN_CLOSE_PAREN)); + } + else + { + parseImportSpec (token); + return; + } +} + +static void parseGoFile (tokenInfo *const token) +{ + int scope = CORK_NIL; + do + { + readToken (token); + + if (isType (token, TOKEN_KEYWORD)) + { + switch (token->keyword) + { + case KEYWORD_package: + scope = parsePackage (token); + break; + case KEYWORD_func: + parseFunctionOrMethod (token, scope); + break; + case KEYWORD_const: + parseConstTypeVar (token, GOTAG_CONST, scope); + break; + case KEYWORD_type: + parseConstTypeVar (token, GOTAG_TYPE, scope); + break; + case KEYWORD_var: + parseConstTypeVar (token, GOTAG_VAR, scope); + break; + case KEYWORD_import: + parseImport (token); + break; + default: + break; + } + } + else if (isType (token, TOKEN_OPEN_PAREN) || isType (token, TOKEN_OPEN_CURLY) || + isType (token, TOKEN_OPEN_SQUARE)) + { + skipToMatched (token, NULL); + } + } while (token->type != TOKEN_EOF); +} + +static void findGoTags (void) +{ + tokenInfo *const token = newToken (); + + parseGoFile (token); + + deleteToken (token); +} + +extern parserDefinition *GoParser (void) +{ + static const char *const extensions[] = { "go", NULL }; + parserDefinition *def = parserNew ("Go"); + def->kindTable = GoKinds; + def->kindCount = ARRAY_SIZE (GoKinds); + def->extensions = extensions; + def->parser = findGoTags; + def->initialize = initialize; + def->finalize = finalize; + def->keywordTable = GoKeywordTable; + def->keywordCount = ARRAY_SIZE (GoKeywordTable); + def->fieldTable = GoFields; + def->fieldCount = ARRAY_SIZE (GoFields); + def->useCork = CORK_QUEUE | CORK_SYMTAB; + def->requestAutomaticFQTag = true; + return def; +} diff --git a/ctags/parsers/geany_html.c b/ctags/parsers/html.c similarity index 72% rename from ctags/parsers/geany_html.c rename to ctags/parsers/html.c index 8d2a4abfef..56b3b3f332 100644 --- a/ctags/parsers/geany_html.c +++ b/ctags/parsers/html.c @@ -28,17 +28,52 @@ typedef enum { K_ANCHOR, + K_CLASS, K_HEADING1, K_HEADING2, - K_HEADING3 + K_HEADING3, + K_STYELSHEET, + K_ID, + K_SCRIPT, } htmlKind; +typedef enum { + CLASS_KIND_ATTRIBUTE_ROLE, +} ClassRole; + +typedef enum { + SCRIPT_KIND_EXTERNAL_FILE_ROLE, +} ScriptRole; + +typedef enum { + STYLESHEET_KIND_EXTERNAL_FILE_ROLE, +} StylesheetRole; + +static roleDefinition ClassRoles [] = { + { true, "attribute", "assigned as attributes" }, +}; + +static roleDefinition ScriptRoles [] = { + { true, "extFile", "referenced as external files" }, +}; + +static roleDefinition StylesheetRoles [] = { + { true, "extFile", "referenced as external files" }, +}; + static kindDefinition HtmlKinds [] = { { true, 'a', "anchor", "named anchors" }, + { true, 'c', "class", "classes", + .referenceOnly = true, ATTACH_ROLES (ClassRoles)}, { true, 'h', "heading1", "H1 headings" }, { true, 'i', "heading2", "H2 headings" }, - { true, 'j', "heading3", "H3 headings" } + { true, 'j', "heading3", "H3 headings" }, + { true, 'C', "stylesheet", "stylesheets", + .referenceOnly = true, ATTACH_ROLES (StylesheetRoles)}, + { true, 'I', "id", "identifiers" }, + { true, 'J', "script", "scripts", + .referenceOnly = true, ATTACH_ROLES (ScriptRoles)}, }; typedef enum { @@ -54,17 +89,22 @@ typedef enum { KEYWORD_area, KEYWORD_base, KEYWORD_br, + KEYWORD_class, KEYWORD_col, KEYWORD_command, KEYWORD_embed, KEYWORD_hr, + KEYWORD_href, + KEYWORD_id, KEYWORD_img, KEYWORD_input, KEYWORD_keygen, KEYWORD_link, KEYWORD_meta, KEYWORD_param, + KEYWORD_rel, KEYWORD_source, + KEYWORD_src, KEYWORD_track, KEYWORD_wbr } keywordId; @@ -82,17 +122,22 @@ static const keywordTable HtmlKeywordTable[] = { {"area", KEYWORD_area}, {"base", KEYWORD_base}, {"br", KEYWORD_br}, + {"class", KEYWORD_class}, {"col", KEYWORD_col}, {"command", KEYWORD_command}, {"embed", KEYWORD_embed}, {"hr", KEYWORD_hr}, + {"href", KEYWORD_href}, + {"id", KEYWORD_id}, {"img", KEYWORD_img}, {"input", KEYWORD_input}, {"keygen", KEYWORD_keygen}, {"link", KEYWORD_link}, {"meta", KEYWORD_meta}, {"param", KEYWORD_param}, + {"rel", KEYWORD_rel}, {"source", KEYWORD_source}, + {"src", KEYWORD_src}, {"track", KEYWORD_track}, {"wbr", KEYWORD_wbr}, }; @@ -141,6 +186,7 @@ static int Lang_html; static void readTag (tokenInfo *token, vString *text, int depth); #ifdef DEBUG +#if 0 static void dumpToken (tokenInfo *token, const char *context, const char* extra_context) { fprintf (stderr, "[%7s] %-20s@%s.%s\n", @@ -148,6 +194,7 @@ static void dumpToken (tokenInfo *token, const char *context, const char* extra_ context, extra_context? extra_context: "_"); } #endif +#endif static void readTokenText (tokenInfo *const token, bool collectText) { @@ -345,8 +392,8 @@ static bool skipScriptContent (tokenInfo *token, long *line, long *lineOffset) bool found_start = false; bool found_script = false; - long line_tmp[2]; - long lineOffset_tmp[2]; + long line_tmp[2] = {0}; + long lineOffset_tmp[2] = {0}; tokenType type; @@ -380,6 +427,30 @@ static bool skipScriptContent (tokenInfo *token, long *line, long *lineOffset) return found_script; } +static void makeClassRefTags (const char *classes) +{ + vString *klass = vStringNew (); + + do + { + if (*classes && !isspace (*classes)) + vStringPut (klass, *classes); + else if (!vStringIsEmpty (klass)) + { + makeSimpleRefTag (klass, K_CLASS, + CLASS_KIND_ATTRIBUTE_ROLE); + vStringClear (klass); + } + + if (!*classes) + break; + + classes++; + } while (1); + + vStringDelete (klass); +} + static void readTag (tokenInfo *token, vString *text, int depth) { bool textCreated = false; @@ -390,6 +461,8 @@ static void readTag (tokenInfo *token, vString *text, int depth) keywordId startTag; bool isHeading; bool isVoid; + vString *stylesheet = NULL; + bool stylesheet_expectation = false; startTag = lookupKeyword (vStringValue (token->string), Lang_html); isHeading = (startTag == KEYWORD_h1 || startTag == KEYWORD_h2 || startTag == KEYWORD_h3); @@ -402,26 +475,101 @@ static void readTag (tokenInfo *token, vString *text, int depth) do { + keywordId attribute = KEYWORD_NONE; + readToken (token, true); - if (startTag == KEYWORD_a && token->type == TOKEN_NAME) - { - keywordId attribute = lookupKeyword (vStringValue (token->string), Lang_html); + if (token->type == TOKEN_NAME) + attribute = lookupKeyword (vStringValue (token->string), Lang_html); - if (attribute == KEYWORD_name) + if (attribute == KEYWORD_class) + { + readToken (token, true); + if (token->type == TOKEN_EQUAL) + { + readToken (token, true); + if (token->type == TOKEN_STRING) + makeClassRefTags (vStringValue (token->string)); + } + } + else if (attribute == KEYWORD_id) + { + readToken (token, true); + if (token->type == TOKEN_EQUAL) + { + readToken (token, true); + if (token->type == TOKEN_STRING) + makeSimpleTag (token->string, K_ID); + } + } + else if (startTag == KEYWORD_a && attribute == KEYWORD_name) + { + readToken (token, true); + if (token->type == TOKEN_EQUAL) + { + readToken (token, true); + if (token->type == TOKEN_STRING || token->type == TOKEN_NAME) + makeSimpleTag (token->string, K_ANCHOR); + } + } + else if (startTag == KEYWORD_script && attribute == KEYWORD_src) + { + readToken (token, true); + if (token->type == TOKEN_EQUAL) + { + readToken (token, true); + if (token->type == TOKEN_STRING) + makeSimpleRefTag (token->string, K_SCRIPT, + SCRIPT_KIND_EXTERNAL_FILE_ROLE); + } + } + else if (startTag == KEYWORD_link) + { + if (attribute == KEYWORD_rel) + { + readToken (token, true); + if (token->type == TOKEN_EQUAL) + { + readToken (token, true); + if (token->type == TOKEN_STRING && + /* strcmp is not enough: + * e.g. */ + vStringLength(token->string) >= 10 && + strstr (vStringValue (token->string), "stylesheet")) + stylesheet_expectation = true; + } + } + else if (attribute == KEYWORD_href) { readToken (token, true); if (token->type == TOKEN_EQUAL) { readToken (token, true); - if (token->type == TOKEN_STRING || token->type == TOKEN_NAME) - makeSimpleTag (token->string, K_ANCHOR); + if (token->type == TOKEN_STRING) + { + if (stylesheet == NULL) + stylesheet = vStringNewCopy (token->string); + else + vStringCopy (stylesheet, token->string); + } } } + if (stylesheet_expectation && stylesheet && !vStringIsEmpty (stylesheet)) + { + makeSimpleRefTag (stylesheet, K_STYELSHEET, + STYLESHEET_KIND_EXTERNAL_FILE_ROLE); + stylesheet_expectation = false; + if (stylesheet) + vStringClear (stylesheet); + } } } while (token->type != TOKEN_TAG_END && token->type != TOKEN_TAG_END2 && token->type != TOKEN_EOF); + vStringDelete (stylesheet); + stylesheet = NULL; + if (!isVoid && token->type == TOKEN_TAG_END && depth < MAX_DEPTH) { long startSourceLineNumber = getSourceLineNumber (); diff --git a/ctags/parsers/geany_jscript.c b/ctags/parsers/jscript.c similarity index 84% rename from ctags/parsers/geany_jscript.c rename to ctags/parsers/jscript.c index 458b5cb14e..df572df1cf 100644 --- a/ctags/parsers/geany_jscript.c +++ b/ctags/parsers/jscript.c @@ -60,52 +60,6 @@ #define newToken() (objPoolGet (TokenPool)) #define deleteToken(t) (objPoolPut (TokenPool, (t))) -/* - * Debugging - * - * Uncomment this to enable extensive debugging to stderr in jscript code. - * Please note that TRACING_ENABLED should be #defined in main/trace.h - * for this to work. - * - */ -//#define JSCRIPT_DEBUGGING_ENABLED 1 - -#if defined(DO_TRACING) && defined(JSCRIPT_DEBUGGING_ENABLED) - #define JSCRIPT_DO_DEBUGGING -#endif - -#ifdef JSCRIPT_DO_DEBUGGING - -#define JSCRIPT_DEBUG_ENTER() TRACE_ENTER() -#define JSCRIPT_DEBUG_LEAVE() TRACE_LEAVE() - -#define JSCRIPT_DEBUG_ENTER_TEXT(_szFormat,...) \ - TRACE_ENTER_TEXT(_szFormat,## __VA_ARGS__) - -#define JSCRIPT_DEBUG_LEAVE_TEXT(_szFormat,...) \ - TRACE_LEAVE_TEXT(_szFormat,## __VA_ARGS__) - -#define JSCRIPT_DEBUG_PRINT(_szFormat,...) \ - TRACE_PRINT(_szFormat,## __VA_ARGS__) - -#define JSCRIPT_DEBUG_ASSERT(_condition,_szFormat,...) \ - TRACE_ASSERT(_condition,_szFormat,## __VA_ARGS__) - -#else //!JSCRIPT_DO_DEBUGGING - -#define JSCRIPT_DEBUG_ENTER() do { } while(0) -#define JSCRIPT_DEBUG_LEAVE() do { } while(0) - -#define JSCRIPT_DEBUG_ENTER_TEXT(_szFormat,...) do { } while(0) -#define JSCRIPT_DEBUG_LEAVE_TEXT(_szFormat,...) do { } while(0) - -#define JSCRIPT_DEBUG_PRINT(_szFormat,...) do { } while(0) - -#define JSCRIPT_DEBUG_ASSERT(_condition,_szFormat,...) do { } while(0) - -#endif //!JSCRIPT_DO_DEBUGGING - - /* * DATA DECLARATIONS */ @@ -145,6 +99,8 @@ enum eKeywordId { KEYWORD_default, KEYWORD_export, KEYWORD_async, + KEYWORD_get, + KEYWORD_set, }; typedef int keywordId; /* to allow KEYWORD_NONE */ @@ -170,6 +126,9 @@ typedef enum eTokenType { TOKEN_REGEXP, TOKEN_POSTFIX_OPERATOR, TOKEN_STAR, + /* To handle Babel's decorators. + * Used only in readTokenFull or lower functions. */ + TOKEN_ATMARK, TOKEN_BINARY_OPERATOR } tokenType; @@ -181,7 +140,7 @@ typedef struct sTokenInfo { unsigned long lineNumber; MIOPos filePosition; int nestLevel; - bool ignoreTag; + bool dynamicProp; } tokenInfo; /* @@ -207,6 +166,9 @@ typedef enum { JSTAG_CONSTANT, JSTAG_VARIABLE, JSTAG_GENERATOR, + JSTAG_GETTER, + JSTAG_SETTER, + JSTAG_FIELD, JSTAG_COUNT } jsKind; @@ -217,7 +179,10 @@ static kindDefinition JsKinds [] = { { true, 'p', "property", "properties" }, { true, 'C', "constant", "constants" }, { true, 'v', "variable", "global variables" }, - { true, 'g', "generator", "generators" } + { true, 'g', "generator", "generators" }, + { true, 'G', "getter", "getters" }, + { true, 'S', "setter", "setters" }, + { true, 'M', "field", "fields" }, }; static const keywordTable JsKeywordTable [] = { @@ -248,6 +213,8 @@ static const keywordTable JsKeywordTable [] = { { "default", KEYWORD_default }, { "export", KEYWORD_export }, { "async", KEYWORD_async }, + { "get", KEYWORD_get }, + { "set", KEYWORD_set }, }; /* @@ -256,11 +223,18 @@ static const keywordTable JsKeywordTable [] = { /* Recursive functions */ static void readTokenFull (tokenInfo *const token, bool include_newlines, vString *const repr); +static void skipArgumentList (tokenInfo *const token, bool include_newlines, vString *const repr); static void parseFunction (tokenInfo *const token); static bool parseBlock (tokenInfo *const token, const vString *const parentScope); static bool parseLine (tokenInfo *const token, bool is_inside_class); static void parseUI5 (tokenInfo *const token); +#ifdef DO_TRACING +//static void dumpToken (const tokenInfo *const token); +static const char *tokenTypeName(enum eTokenType e); +//static const char *keywordName(enum eKeywordId e); +#endif + static void *newPoolToken (void *createArg CTAGS_ATTR_UNUSED) { tokenInfo *token = xMalloc (1, tokenInfo); @@ -278,7 +252,7 @@ static void clearPoolToken (void *data) token->type = TOKEN_UNDEFINED; token->keyword = KEYWORD_NONE; token->nestLevel = 0; - token->ignoreTag = false; + token->dynamicProp = false; token->lineNumber = getInputLineNumber (); token->filePosition = getInputFilePosition (); vStringClear (token->string); @@ -300,6 +274,7 @@ static void copyToken (tokenInfo *const dest, const tokenInfo *const src, dest->filePosition = src->filePosition; dest->type = src->type; dest->keyword = src->keyword; + dest->dynamicProp = src->dynamicProp; vStringCopy(dest->string, src->string); if (include_non_read_info) { @@ -308,6 +283,13 @@ static void copyToken (tokenInfo *const dest, const tokenInfo *const src, } } +static void injectDynamicName (tokenInfo *const token, vString *newName) +{ + token->dynamicProp = true; + vStringDelete (token->string); + token->string = newName; +} + /* * Tag generation functions */ @@ -316,14 +298,14 @@ static void makeJsTagCommon (const tokenInfo *const token, const jsKind kind, vString *const signature, vString *const inheritance, bool anonymous) { - if (JsKinds [kind].enabled && ! token->ignoreTag ) + if (JsKinds [kind].enabled ) { const char *name = vStringValue (token->string); vString *fullscope = vStringNewCopy (token->scope); const char *p; tagEntryInfo e; - if (kind != JSTAG_PROPERTY && (p = strrchr (name, '.')) != NULL ) + if (!token->dynamicProp && kind != JSTAG_PROPERTY && (p = strrchr (name, '.')) != NULL ) { if (vStringLength (fullscope) > 0) vStringPut (fullscope, '.'); @@ -333,7 +315,7 @@ static void makeJsTagCommon (const tokenInfo *const token, const jsKind kind, initTagEntry (&e, name, kind); - JSCRIPT_DEBUG_PRINT("Emitting tag for symbol '%s' of kind %02x with scope '%s'",name,kind,vStringValue(fullscope)); + TRACE_PRINT("Emitting tag for symbol '%s' of kind %02x with scope '%s'",name,kind,vStringValue(fullscope)); e.lineNumber = token->lineNumber; e.filePosition = token->filePosition; @@ -364,9 +346,9 @@ static void makeJsTagCommon (const tokenInfo *const token, const jsKind kind, * characters. */ for (i = 0; i < signature->length; i++) { - unsigned char c = (unsigned char) signature->buffer[i]; + unsigned char c = (unsigned char) vStringChar (signature, i); if (c < 0x20 /* below space */ || c == 0x7F /* DEL */) - signature->buffer[i] = ' '; + vStringChar (signature, i) = ' '; } e.extensionFields.signature = vStringValue(signature); } @@ -391,11 +373,10 @@ static void makeJsTag (const tokenInfo *const token, const jsKind kind, static void makeClassTagCommon (tokenInfo *const token, vString *const signature, vString *const inheritance, bool anonymous) { - vString * fulltag; - if ( ! token->ignoreTag ) + { - fulltag = vStringNew (); + vString * fulltag = vStringNew (); if (vStringLength (token->scope) > 0) { vStringCopy(fulltag, token->scope); @@ -425,11 +406,8 @@ static void makeClassTag (tokenInfo *const token, vString *const signature, static void makeFunctionTagCommon (tokenInfo *const token, vString *const signature, bool generator, bool anonymous) { - vString * fulltag; - - if ( ! token->ignoreTag ) { - fulltag = vStringNew (); + vString * fulltag = vStringNew (); if (vStringLength (token->scope) > 0) { vStringCopy(fulltag, token->scope); @@ -482,10 +460,10 @@ static int handleUnicodeCodePoint (uint32_t point) /* 4 bytes should be enough for any encoding (it's how much UTF-32 * would need). */ /* FIXME: actually iconv has a tendency to output a BOM for Unicode - * encodings where it matters when the endianess is not specified in + * encodings where it matters when the endianness is not specified in * the target encoding name. E.g., if the target encoding is "UTF-32" * or "UTF-16" it will output 2 code points, the BOM (U+FEFF) and the - * one we expect. This does not happen if the endianess is specified + * one we expect. This does not happen if the endianness is specified * explicitly, e.g. with "UTF-32LE", or "UTF-16BE". * However, it's not very relevant for the moment as nothing in CTags * cope well (if at all) with non-ASCII-compatible encodings like @@ -828,7 +806,7 @@ static void parseTemplateString (vString *const string) while (c != EOF); } -static void readTokenFull (tokenInfo *const token, bool include_newlines, vString *const repr) +static void readTokenFullRaw (tokenInfo *const token, bool include_newlines, vString *const repr) { int c; int i; @@ -965,18 +943,10 @@ static void readTokenFull (tokenInfo *const token, bool include_newlines, vStrin else { if (repr) /* remove the / we added */ - repr->buffer[--repr->length] = 0; + vStringChop(repr); if (d == '*') { - do - { - skipToCharacterInInputFile ('*'); - c = getcFromInputFile (); - if (c == '/') - break; - else - ungetcToInputFile (c); - } while (c != EOF && c != '\0'); + skipToCharacterInInputFile2('*', '/'); goto getNextChar; } else if (d == '/') /* is this the start of a comment? */ @@ -1007,6 +977,10 @@ static void readTokenFull (tokenInfo *const token, bool include_newlines, vStrin } break; + case '@': + token->type = TOKEN_ATMARK; + break; + case '\\': c = readUnicodeEscapeSequence (c); /* fallthrough */ @@ -1083,12 +1057,71 @@ static void readTokenFull (tokenInfo *const token, bool include_newlines, vStrin LastTokenType = token->type; } +/* See https://babeljs.io/blog/2018/09/17/decorators */ +static void skipBabelDecorator (tokenInfo *token, bool include_newlines, vString *const repr) +{ + readTokenFullRaw (token, include_newlines, repr); + if (isType (token, TOKEN_OPEN_PAREN)) + { + /* @(complex ? dec1 : dec2) */ + skipArgumentList (token, include_newlines, repr); + TRACE_PRINT ("found @(...) style decorator"); + } + else if (isType (token, TOKEN_IDENTIFIER)) + { + /* @namespace.foo (...) */ + bool found_period = false; + while (1) + { + readTokenFullRaw (token, include_newlines, repr); + if (isType (token, TOKEN_IDENTIFIER)) + { + if (!found_period) + { + TRACE_PRINT("found @namespace.bar style decorator"); + break; + } + found_period = false; + } + else if (isType (token, TOKEN_PERIOD)) + found_period = true; + else if (isType (token, TOKEN_OPEN_PAREN)) + { + skipArgumentList (token, include_newlines, repr); + TRACE_PRINT("found @foo(...) style decorator"); + break; + } + else + { + TRACE_PRINT("found @foo style decorator"); + break; + } + } + } + else + /* Unexpected token after @ */ + TRACE_PRINT("found unexpected token during skipping a decorator"); +} + +static void readTokenFull (tokenInfo *const token, bool include_newlines, vString *const repr) +{ + readTokenFullRaw (token, include_newlines, repr); + + while (1) + { + if (!isType (token, TOKEN_ATMARK)) + break; + skipBabelDecorator (token, include_newlines, repr); + /* @decorator0 @decorator1 ... There can be more than one decorator. */ + } +} + #ifdef JSCRIPT_DO_DEBUGGING /* trace readTokenFull() */ static void readTokenFullDebug (tokenInfo *const token, bool include_newlines, vString *const repr) { readTokenFull (token, include_newlines, repr); - JSCRIPT_DEBUG_PRINT("token '%s' of type %02x with scope '%s'",vStringValue(token->string),token->type, vStringValue(token->scope)); + TRACE_PRINT("token '%s' of type %02x with scope '%s'",vStringValue(token->string),token->type, vStringValue(token->scope)); } # define readTokenFull readTokenFullDebug #endif @@ -1150,6 +1183,19 @@ static void skipArrayList (tokenInfo *const token, bool include_newlines) } } +static void skipQualifiedIdentifier (tokenInfo *const token) +{ + /* Skip foo.bar.baz */ + while (isType (token, TOKEN_IDENTIFIER)) + { + readToken (token); + if (isType (token, TOKEN_PERIOD)) + readToken (token); + else + break; + } +} + static void addContext (tokenInfo* const parent, const tokenInfo* const child) { if (vStringLength (parent->string) > 0) @@ -1388,6 +1434,8 @@ static bool parseIf (tokenInfo *const token) static void parseFunction (tokenInfo *const token) { + TRACE_ENTER(); + tokenInfo *const name = newToken (); vString *const signature = vStringNew (); bool is_class = false; @@ -1445,13 +1493,15 @@ static void parseFunction (tokenInfo *const token) cleanUp: vStringDelete (signature); deleteToken (name); + + TRACE_LEAVE(); } /* Parses a block surrounded by curly braces. * @p parentScope is the scope name for this block, or NULL for unnamed scopes */ static bool parseBlock (tokenInfo *const token, const vString *const parentScope) { - JSCRIPT_DEBUG_ENTER(); + TRACE_ENTER(); bool is_class = false; bool read_next_token = true; @@ -1540,7 +1590,7 @@ static bool parseBlock (tokenInfo *const token, const vString *const parentScope if (parentScope) token->nestLevel--; - JSCRIPT_DEBUG_LEAVE(); + TRACE_LEAVE(); return is_class; } @@ -1548,7 +1598,10 @@ static bool parseBlock (tokenInfo *const token, const vString *const parentScope static bool parseMethods (tokenInfo *const token, const tokenInfo *const class, const bool is_es6_class) { - JSCRIPT_DEBUG_ENTER(); + TRACE_ENTER_TEXT("token is '%s' of type %s in classToken '%s' of type %s (es6: %s)", + vStringValue(token->string), tokenTypeName (token->type), + vStringValue(class->string), tokenTypeName (class->type), + is_es6_class? "yes": "no"); tokenInfo *const name = newToken (); bool has_methods = false; @@ -1563,18 +1616,36 @@ static bool parseMethods (tokenInfo *const token, const tokenInfo *const class, * validMethod : function(a,b) {} * 'validMethod2' : function(a,b) {} * container.dirtyTab = {'url': false, 'title':false, 'snapshot':false, '*': false} + * get prop() {} + * set prop(val) {} * * ES6 methods: * property(...) {} * *generator() {} - * FIXME: what to do with computed names? + * + * ES6 computed name: * [property]() {} + * get [property]() {} + * set [property]() {} * *[generator]() {} + * + * tc39/proposal-class-fields + * field0 = function(a,b) {} + * field1 = 1 + * The parser extracts field0 as a method because the left value + * is a function (kind propagation), and field1 as a field. */ + bool dont_read = false; do { - readToken (token); + bool is_setter = false; + bool is_getter = false; + + if (!dont_read) + readToken (token); + dont_read = false; + if (isType (token, TOKEN_CLOSE_CURLY)) { goto cleanUp; @@ -1582,12 +1653,28 @@ static bool parseMethods (tokenInfo *const token, const tokenInfo *const class, if (isKeyword (token, KEYWORD_async)) readToken (token); + else if (isType(token, TOKEN_KEYWORD) && isKeyword (token, KEYWORD_get)) + { + is_getter = true; + readToken (token); + } + else if (isType(token, TOKEN_KEYWORD) && isKeyword (token, KEYWORD_set)) + { + is_setter = true; + readToken (token); + } if (! isType (token, TOKEN_KEYWORD) && ! isType (token, TOKEN_SEMICOLON)) { bool is_generator = false; bool is_shorthand = false; /* ES6 shorthand syntax */ + bool is_computed_name = false; /* ES6 computed property name */ + bool is_dynamic_prop = false; + vString *dprop = NULL; /* is_computed_name is true but + * the name is not represented in + * a string literal. The expressions + * go this string. */ if (isType (token, TOKEN_STAR)) /* shorthand generator */ { @@ -1595,11 +1682,47 @@ static bool parseMethods (tokenInfo *const token, const tokenInfo *const class, readToken (token); } + if (isType (token, TOKEN_OPEN_SQUARE)) + { + is_computed_name = true; + dprop = vStringNewInit ("["); + readTokenFull (token, false, dprop); + } + copyToken(name, token, true); + if (is_computed_name && ! isType (token, TOKEN_STRING)) + is_dynamic_prop = true; + + readTokenFull (token, false, dprop); + + if (is_computed_name) + { + int depth = 1; + do + { + if (isType (token, TOKEN_CLOSE_SQUARE)) + depth--; + else + { + is_dynamic_prop = true; + if (isType (token, TOKEN_OPEN_SQUARE)) + depth++; + } + readTokenFull (token, false, (is_dynamic_prop && depth != 0)? dprop: NULL); + } while (! isType (token, TOKEN_EOF) && depth > 0); + } + + if (is_dynamic_prop) + { + injectDynamicName (name, dprop); + dprop = NULL; + } + else + vStringDelete (dprop); - readToken (token); is_shorthand = isType (token, TOKEN_OPEN_PAREN); - if ( isType (token, TOKEN_COLON) || is_shorthand ) + bool can_be_field = isType (token, TOKEN_EQUAL_SIGN); + if ( isType (token, TOKEN_COLON) || can_be_field || is_shorthand ) { if (! is_shorthand) { @@ -1609,7 +1732,7 @@ static bool parseMethods (tokenInfo *const token, const tokenInfo *const class, } if ( is_shorthand || isKeyword (token, KEYWORD_function) ) { - JSCRIPT_DEBUG_PRINT("Seems to be a function or shorthand"); + TRACE_PRINT("Seems to be a function or shorthand"); vString *const signature = vStringNew (); if (! is_shorthand) @@ -1630,7 +1753,16 @@ static bool parseMethods (tokenInfo *const token, const tokenInfo *const class, if (isType (token, TOKEN_OPEN_CURLY)) { has_methods = true; - makeJsTag (name, is_generator ? JSTAG_GENERATOR : JSTAG_METHOD, signature, NULL); + + int kind = JSTAG_METHOD; + if (is_generator) + kind = JSTAG_GENERATOR; + else if (is_getter) + kind = JSTAG_GETTER; + else if (is_setter) + kind = JSTAG_SETTER; + + makeJsTag (name, kind, signature, NULL); parseBlock (token, name->string); /* @@ -1680,12 +1812,23 @@ static bool parseMethods (tokenInfo *const token, const tokenInfo *const class, else makeJsTag (name, JSTAG_PROPERTY, NULL, NULL); } + else if (can_be_field) + { + makeJsTag (name, JSTAG_FIELD, NULL, NULL); + parseLine (token, true); + } + } + else + { + makeJsTag (name, JSTAG_FIELD, NULL, NULL); + if (!isType (token, TOKEN_SEMICOLON)) + dont_read = true; } } } while ( isType(token, TOKEN_COMMA) || ( is_es6_class && ! isType(token, TOKEN_EOF) ) ); - JSCRIPT_DEBUG_PRINT("Finished parsing methods"); + TRACE_PRINT("Finished parsing methods"); findCmdTerm (token, false, false); @@ -1694,14 +1837,14 @@ static bool parseMethods (tokenInfo *const token, const tokenInfo *const class, vStringDelete (saveScope); deleteToken (name); - JSCRIPT_DEBUG_LEAVE(); + TRACE_LEAVE_TEXT("found method(s): %s", has_methods? "yes": "no"); return has_methods; } static bool parseES6Class (tokenInfo *const token, const tokenInfo *targetName) { - JSCRIPT_DEBUG_ENTER(); + TRACE_ENTER(); tokenInfo * className = newToken (); vString *inheritance = NULL; @@ -1745,7 +1888,7 @@ static bool parseES6Class (tokenInfo *const token, const tokenInfo *targetName) vStringStripLeading (inheritance); } - JSCRIPT_DEBUG_PRINT("Emitting tag for class '%s'", vStringValue(targetName->string)); + TRACE_PRINT("Emitting tag for class '%s'", vStringValue(targetName->string)); makeJsTagCommon (targetName, JSTAG_CLASS, NULL, inheritance, (is_anonymous && (targetName == className))); @@ -1769,13 +1912,13 @@ static bool parseES6Class (tokenInfo *const token, const tokenInfo *targetName) deleteToken (className); - JSCRIPT_DEBUG_LEAVE(); + TRACE_LEAVE(); return true; } static bool parseStatement (tokenInfo *const token, bool is_inside_class) { - JSCRIPT_DEBUG_ENTER(); + TRACE_ENTER_TEXT("is_inside_class: %s", is_inside_class? "yes": "no"); tokenInfo *const name = newToken (); tokenInfo *const secondary_name = newToken (); @@ -1827,7 +1970,7 @@ static bool parseStatement (tokenInfo *const token, bool is_inside_class) isKeyword(token, KEYWORD_let) || isKeyword(token, KEYWORD_const) ) { - JSCRIPT_DEBUG_PRINT("var/let/const case"); + TRACE_PRINT("var/let/const case"); is_const = isKeyword(token, KEYWORD_const); /* * Only create variables for global scope @@ -1842,7 +1985,7 @@ static bool parseStatement (tokenInfo *const token, bool is_inside_class) nextVar: if ( isKeyword(token, KEYWORD_this) ) { - JSCRIPT_DEBUG_PRINT("found 'this' keyword"); + TRACE_PRINT("found 'this' keyword"); readToken(token); if (isType (token, TOKEN_PERIOD)) @@ -1852,7 +1995,8 @@ static bool parseStatement (tokenInfo *const token, bool is_inside_class) } copyToken(name, token, true); - JSCRIPT_DEBUG_PRINT("name becomes '%s'",vStringValue(name->string)); + TRACE_PRINT("name becomes '%s' of type %s", + vStringValue(token->string), tokenTypeName (token->type)); while (! isType (token, TOKEN_CLOSE_CURLY) && ! isType (token, TOKEN_SEMICOLON) && @@ -1990,7 +2134,6 @@ static bool parseStatement (tokenInfo *const token, bool is_inside_class) * Find to the end of the statement */ findCmdTerm (token, false, false); - token->ignoreTag = false; is_terminated = true; goto cleanUp; } @@ -2133,7 +2276,7 @@ static bool parseStatement (tokenInfo *const token, bool is_inside_class) || isType (name, TOKEN_KEYWORD) ) ) { /* Unexpected input. Try to reset the parsing. */ - JSCRIPT_DEBUG_PRINT("Unexpected input, trying to reset"); + TRACE_PRINT("Unexpected input, trying to reset"); vStringDelete (signature); goto cleanUp; } @@ -2166,9 +2309,12 @@ static bool parseStatement (tokenInfo *const token, bool is_inside_class) * Or checks if this is a hash variable. * var z = {}; */ + bool anonClass = vStringIsEmpty (name->string); + if (anonClass) + anonGenerate (name->string, "AnonymousClass", JSTAG_CLASS); has_methods = parseMethods(token, name, false); if (has_methods) - makeJsTag (name, JSTAG_CLASS, NULL, NULL); + makeJsTagCommon (name, JSTAG_CLASS, NULL, NULL, anonClass); else { /* @@ -2229,7 +2375,11 @@ static bool parseStatement (tokenInfo *const token, bool is_inside_class) if ( isKeyword (token, KEYWORD_capital_object) ) is_class = true; - readToken (token); + if (is_var) + skipQualifiedIdentifier (token); + else + readToken (token); + if ( isType (token, TOKEN_OPEN_PAREN) ) skipArgumentList(token, true, NULL); @@ -2241,17 +2391,16 @@ static bool parseStatement (tokenInfo *const token, bool is_inside_class) { makeJsTag (name, is_const ? JSTAG_CONSTANT : JSTAG_VARIABLE, NULL, NULL); } + else if ( is_class ) + { + makeClassTag (name, NULL, NULL); + } else { - if ( is_class ) - { - makeClassTag (name, NULL, NULL); - } else { - /* FIXME: we cannot really get a meaningful - * signature from a `new Function()` call, - * so for now just don't set any */ - makeFunctionTag (name, NULL, false); - } + /* FIXME: we cannot really get a meaningful + * signature from a `new Function()` call, + * so for now just don't set any */ + makeFunctionTag (name, NULL, false); } } } @@ -2346,7 +2495,7 @@ static bool parseStatement (tokenInfo *const token, bool is_inside_class) deleteToken (method_body_token); vStringDelete(saveScope); - JSCRIPT_DEBUG_LEAVE(); + TRACE_LEAVE(); return is_terminated; } @@ -2403,7 +2552,8 @@ static void parseUI5 (tokenInfo *const token) static bool parseLine (tokenInfo *const token, bool is_inside_class) { - JSCRIPT_DEBUG_ENTER_TEXT("token is '%s' of type %02x",vStringValue(token->string),token->type); + TRACE_ENTER_TEXT("token is '%s' of type %s", + vStringValue(token->string), tokenTypeName (token->type)); bool is_terminated = true; /* @@ -2462,14 +2612,14 @@ static bool parseLine (tokenInfo *const token, bool is_inside_class) is_terminated = parseStatement (token, is_inside_class); } - JSCRIPT_DEBUG_LEAVE(); + TRACE_LEAVE(); return is_terminated; } static void parseJsFile (tokenInfo *const token) { - JSCRIPT_DEBUG_ENTER(); + TRACE_ENTER(); do { @@ -2484,9 +2634,91 @@ static void parseJsFile (tokenInfo *const token) parseLine (token, false); } while (! isType (token, TOKEN_EOF)); - JSCRIPT_DEBUG_LEAVE(); + TRACE_LEAVE(); } +#ifdef DO_TRACING +#if 0 +static void dumpToken (const tokenInfo *const token) +{ + fprintf(stderr, "Token <%p>: %s: %s\n", + token, + tokenTypeName (token->type), + (token->type == TOKEN_KEYWORD ? keywordName (token->keyword): + token->type == TOKEN_IDENTIFIER? vStringValue (token->string): + "")); +} +#endif + +static const char *tokenTypeName(enum eTokenType e) +{ /* Generated by misc/enumstr.sh with cmdline "parsers/jscript.c" "eTokenType" "tokenTypeName" */ + switch (e) + { + case TOKEN_BINARY_OPERATOR: return "TOKEN_BINARY_OPERATOR"; + case TOKEN_CHARACTER: return "TOKEN_CHARACTER"; + case TOKEN_CLOSE_CURLY: return "TOKEN_CLOSE_CURLY"; + case TOKEN_CLOSE_PAREN: return "TOKEN_CLOSE_PAREN"; + case TOKEN_CLOSE_SQUARE: return "TOKEN_CLOSE_SQUARE"; + case TOKEN_COLON: return "TOKEN_COLON"; + case TOKEN_COMMA: return "TOKEN_COMMA"; + case TOKEN_EOF: return "TOKEN_EOF"; + case TOKEN_EQUAL_SIGN: return "TOKEN_EQUAL_SIGN"; + case TOKEN_IDENTIFIER: return "TOKEN_IDENTIFIER"; + case TOKEN_KEYWORD: return "TOKEN_KEYWORD"; + case TOKEN_OPEN_CURLY: return "TOKEN_OPEN_CURLY"; + case TOKEN_OPEN_PAREN: return "TOKEN_OPEN_PAREN"; + case TOKEN_OPEN_SQUARE: return "TOKEN_OPEN_SQUARE"; + case TOKEN_PERIOD: return "TOKEN_PERIOD"; + case TOKEN_POSTFIX_OPERATOR: return "TOKEN_POSTFIX_OPERATOR"; + case TOKEN_REGEXP: return "TOKEN_REGEXP"; + case TOKEN_SEMICOLON: return "TOKEN_SEMICOLON"; + case TOKEN_STAR: return "TOKEN_STAR"; + case TOKEN_STRING: return "TOKEN_STRING"; + case TOKEN_TEMPLATE_STRING: return "TOKEN_TEMPLATE_STRING"; + case TOKEN_UNDEFINED: return "TOKEN_UNDEFINED"; + default: return "UNKNOWN"; + } +} + +#if 0 +static const char *keywordName(enum eKeywordId e) +{ /* Generated by misc/enumstr.sh with cmdline "parsers/jscript.c" "eKeywordId" "keywordName" */ + switch (e) + { + case KEYWORD_async: return "KEYWORD_async"; + case KEYWORD_capital_function: return "KEYWORD_capital_function"; + case KEYWORD_capital_object: return "KEYWORD_capital_object"; + case KEYWORD_catch: return "KEYWORD_catch"; + case KEYWORD_class: return "KEYWORD_class"; + case KEYWORD_const: return "KEYWORD_const"; + case KEYWORD_default: return "KEYWORD_default"; + case KEYWORD_do: return "KEYWORD_do"; + case KEYWORD_else: return "KEYWORD_else"; + case KEYWORD_export: return "KEYWORD_export"; + case KEYWORD_extends: return "KEYWORD_extends"; + case KEYWORD_finally: return "KEYWORD_finally"; + case KEYWORD_for: return "KEYWORD_for"; + case KEYWORD_function: return "KEYWORD_function"; + case KEYWORD_get: return "KEYWORD_get"; + case KEYWORD_if: return "KEYWORD_if"; + case KEYWORD_let: return "KEYWORD_let"; + case KEYWORD_new: return "KEYWORD_new"; + case KEYWORD_prototype: return "KEYWORD_prototype"; + case KEYWORD_return: return "KEYWORD_return"; + case KEYWORD_sap: return "KEYWORD_sap"; + case KEYWORD_set: return "KEYWORD_set"; + case KEYWORD_static: return "KEYWORD_static"; + case KEYWORD_switch: return "KEYWORD_switch"; + case KEYWORD_this: return "KEYWORD_this"; + case KEYWORD_try: return "KEYWORD_try"; + case KEYWORD_var: return "KEYWORD_var"; + case KEYWORD_while: return "KEYWORD_while"; + default: return "UNKNOWN"; + } +} +#endif +#endif + static void initialize (const langType language) { Assert (ARRAY_SIZE (JsKinds) == JSTAG_COUNT); @@ -2537,9 +2769,13 @@ extern parserDefinition* JavaScriptParser (void) { // .jsx files are JSX: https://facebook.github.io/jsx/ // which have JS function definitions, so we just use the JS parser - static const char *const extensions [] = { "js", "jsx", NULL }; + static const char *const extensions [] = { "js", "jsx", "mjs", NULL }; static const char *const aliases [] = { "js", "node", "nodejs", - "seed", "gjs", NULL }; + "seed", "gjs", + /* Used in PostgreSQL + * https://github.com/plv8/plv8 */ + "v8", + NULL }; parserDefinition *const def = parserNew ("JavaScript"); def->extensions = extensions; def->aliases = aliases; diff --git a/ctags/parsers/lua.c b/ctags/parsers/lua.c new file mode 100644 index 0000000000..2d7efed950 --- /dev/null +++ b/ctags/parsers/lua.c @@ -0,0 +1,257 @@ +/* +* Copyright (c) 2000-2001, Max Ischenko . +* +* This source code is released for free distribution under the terms of the +* GNU General Public License version 2 or (at your option) any later version. +* +* This module contains functions for generating tags for Lua language. +*/ + +/* +* INCLUDE FILES +*/ +#include "general.h" /* must always come first */ + +#include + +#include "debug.h" +#include "entry.h" +#include "parse.h" +#include "read.h" +#include "routines.h" +#include "vstring.h" + +/* +* DATA DEFINITIONS +*/ +typedef enum { + K_FUNCTION, + K_UNKNOWN, +} luaKind; + +typedef enum { + LUA_UNKNOWN_REFERENCED, +} luaUnknownRole; + +static roleDefinition LuaUnknownRoles [] = { + { false, "referenced", "referenced somehow" }, +}; + +static kindDefinition LuaKinds [] = { + { true, 'f', "function", "functions" }, + + /* `unknown' is a kind just for making FQ tag for functions. */ + { false, 'X', "unknown", "unknown language object", + .referenceOnly = true, ATTACH_ROLES(LuaUnknownRoles) }, +}; + +/* +* FUNCTION DEFINITIONS +*/ + +/* + * Helper function. + * Returns 1 if line looks like a line of Lua code. + * + * TODO: Recognize UNIX bang notation. + * (Lua treat first line as a comment if it starts with #!) + * + */ +static bool is_a_code_line (const unsigned char *line) +{ + bool result; + const unsigned char *p = line; + while (isspace ((int) *p)) + p++; + if (p [0] == '\0') + result = false; + else if (p [0] == '-' && p [1] == '-') + result = false; + else + result = true; + return result; +} + +static bool isLuaIdentifier (char c) +{ + return (bool) !(isspace(c) || c == '(' || c == ')' || c == '=' || c == '.' || c == ':'); +} + +static void set_scope (int child, int parent) +{ + if (parent == CORK_NIL || child == CORK_NIL) + return; + + tagEntryInfo *e = getEntryInCorkQueue (child); + if (!e) + return; + + e->extensionFields.scopeIndex = parent; +} + +static void extract_next_token (const char *begin, const char *end_sentinel, vString *name) +{ + if (begin == NULL || end_sentinel == NULL) + return; + + Assert (begin <= end_sentinel); + + /* Both on '(' */ + if (begin == end_sentinel) + return; + + /* Trim prefixed white spaces */ + while (isspace ((int) *begin)) + begin++; + + /* Both on '(' */ + if (begin == end_sentinel) + return; + + const char *end = end_sentinel - 1; + + /* Trim suffixed white spaces */ + while (isspace ((int) *end)) + end--; + + Assert (begin <= end); + + int lastCorkIndx = CORK_NIL; + for (const char *c = begin; c <= end; ++c) + { + if (*c == '.' || *c == ':') + { + int r = makeSimpleRefTag(name, + K_UNKNOWN, LUA_UNKNOWN_REFERENCED); + set_scope(r, lastCorkIndx); + lastCorkIndx = r; + + /* Do not include module names in function name */ + vStringClear (name); + } + else if (isLuaIdentifier (*c)) + vStringPut (name, (int) *c); + else + { + /* An unexpected character is found + * between "function" and "(" */ + vStringClear (name); + return; + } + } + + int d = makeSimpleTag (name, K_FUNCTION); + set_scope(d, lastCorkIndx); + vStringClear (name); +} + +static void extract_prev_token (const char *end, const char *begin_sentinel, vString *name) +{ + const char *begin; + + if (end == NULL || begin_sentinel == NULL) + return; + + if (! (begin_sentinel <= end)) + return; + + while (isspace ((int) *end)) + { + end--; + if (! (begin_sentinel <= end)) + return; + } + + begin = end; + while (begin_sentinel <= begin && isLuaIdentifier (*begin)) + begin--; + + int targetCorkIndex = CORK_NIL; + if (end - begin) + { + vStringNCatS (name, begin + 1, end - begin); + targetCorkIndex = makeSimpleTag (name, K_FUNCTION); + vStringClear (name); + } + + if (targetCorkIndex == CORK_NIL || begin_sentinel == begin) + return; + + /* Fill the scope field of the function. */ + end = begin; + while (begin_sentinel <= (begin + 1)) + { + bool on_boundary = false; + if (begin < begin_sentinel || !isLuaIdentifier (*begin)) + { + if (end - begin) + { + vStringNCatS (name, begin + 1, end - begin); + int r = makeSimpleRefTag (name, + K_UNKNOWN, LUA_UNKNOWN_REFERENCED); + set_scope (targetCorkIndex, r); + targetCorkIndex = r; + vStringClear (name); + } + if (begin_sentinel <= begin && ! (*begin == ':' || *begin == '.')) + break; + on_boundary = true; + } + begin--; + + if(on_boundary) + end = begin; + } +} + +static void findLuaTags (void) +{ + vString *name = vStringNew (); + const unsigned char *line; + + while ((line = readLineFromInputFile ()) != NULL) + { + const char *p, *q; + + if (! is_a_code_line (line)) + continue; + + p = (const char*) strstr ((const char*) line, "function"); + if (p == NULL) + continue; + + q = strchr ((const char*) line, '='); + + if (q == NULL) { + p = p + 8; /* skip the `function' word */ + + /* We expect [ \t(] */ + if (! (*p == '(' || isspace ((int)*p))) + continue; + q = strchr ((const char*) p, '('); + if (q) + extract_next_token (p, q, name); + } else if ( + (*(q+1) != '=') /* ignore `if type(v) == "function" then ...' */ + && (q < p) /* ignore "function" ~= */ + ) { + p = (const char*) &line[0]; + if (p < q) + extract_prev_token (q - 1, p, name); + } + } + vStringDelete (name); +} + +extern parserDefinition* LuaParser (void) +{ + static const char* const extensions [] = { "lua", NULL }; + parserDefinition* def = parserNew ("Lua"); + def->kindTable = LuaKinds; + def->kindCount = ARRAY_SIZE (LuaKinds); + def->extensions = extensions; + def->parser = findLuaTags; + def->useCork = CORK_QUEUE; + def->requestAutomaticFQTag = true; + return def; +} diff --git a/ctags/parsers/geany_make.c b/ctags/parsers/make.c similarity index 55% rename from ctags/parsers/geany_make.c rename to ctags/parsers/make.c index d49c42108d..8f3180495f 100644 --- a/ctags/parsers/geany_make.c +++ b/ctags/parsers/make.c @@ -12,11 +12,12 @@ */ #include "general.h" /* must always come first */ -#include #include #include -#include "options.h" +#include "make.h" + +#include "kind.h" #include "parse.h" #include "read.h" #include "routines.h" @@ -24,18 +25,32 @@ #include "vstring.h" #include "xtag.h" + /* * DATA DEFINITIONS */ typedef enum { - K_MACRO, K_TARGET -} shKind; + K_MACRO, K_TARGET, K_INCLUDE, +} makeKind; + +typedef enum { + R_INCLUDE_GENERIC, + R_INCLUDE_OPTIONAL, +} makeMakefileRole; + +static roleDefinition MakeMakefileRoles [] = { + { true, "included", "included" }, + { true, "optional", "optionally included"}, +}; static kindDefinition MakeKinds [] = { { true, 'm', "macro", "macros"}, - { true, 't', "target", "targets"} + { true, 't', "target", "targets"}, + { true, 'I', "makefile", "makefiles", + .referenceOnly = true, ATTACH_ROLES(MakeMakefileRoles)}, }; + /* * FUNCTION DEFINITIONS */ @@ -91,6 +106,23 @@ static bool isSpecialTarget (vString *const name) return true; } +static void makeSimpleMakeTag (vString *const name, makeKind kind) +{ + if (!isLanguageEnabled (getInputLanguage ())) + return; + + makeSimpleTag (name, kind); +} + +static void makeSimpleMakeRefTag (const vString* const name, const int kind, + int roleIndex) +{ + if (!isLanguageEnabled (getInputLanguage ())) + return; + + makeSimpleRefTag (name, kind, roleIndex); +} + static void newTarget (vString *const name) { /* Ignore GNU Make's "special targets". */ @@ -98,12 +130,63 @@ static void newTarget (vString *const name) { return; } - makeSimpleTag (name, K_TARGET); + makeSimpleMakeTag (name, K_TARGET); +} + +static void newMacro (vString *const name, bool with_define_directive, bool appending) +{ + subparser *s; + + if (!appending) + makeSimpleMakeTag (name, K_MACRO); + + foreachSubparser(s, false) + { + makeSubparser *m = (makeSubparser *)s; + enterSubparser(s); + if (m->newMacroNotify) + m->newMacroNotify (m, vStringValue(name), with_define_directive, appending); + leaveSubparser(); + } +} + +static void valueFound (vString *const name) +{ + subparser *s; + foreachSubparser(s, false) + { + makeSubparser *m = (makeSubparser *)s; + enterSubparser(s); + if (m->valueNotify) + m->valueNotify (m, vStringValue (name)); + leaveSubparser(); + } } -static void newMacro (vString *const name) +static void directiveFound (vString *const name) { - makeSimpleTag (name, K_MACRO); + subparser *s; + foreachSubparser (s, false) + { + makeSubparser *m = (makeSubparser *)s; + enterSubparser(s); + if (m->directiveNotify) + m->directiveNotify (m, vStringValue (name)); + leaveSubparser(); + } +} + +static void newInclude (vString *const name, bool optional) +{ + makeSimpleMakeRefTag (name, K_INCLUDE, + optional? R_INCLUDE_OPTIONAL: R_INCLUDE_GENERIC); +} + +static bool isAcceptableAsInclude (vString *const name) +{ + if (strcmp (vStringValue (name), "$") == 0) + return false; + return true; } static void readIdentifier (const int first, vString *const id) @@ -128,9 +211,16 @@ static void findMakeTags (void) stringList *identifiers = stringListNew (); bool newline = true; bool in_define = false; + bool in_value = false; bool in_rule = false; bool variable_possible = true; + bool appending = false; int c; + subparser *sub; + + sub = getSubparserRunningBaseparser(); + if (sub) + chooseExclusiveSubparser (sub, NULL); while ((c = nextChar ()) != EOF) { @@ -146,6 +236,9 @@ static void findMakeTags (void) else if (c != '\n') in_rule = false; } + else if (in_value) + in_value = false; + stringListClear (identifiers); variable_possible = (bool)(!in_rule); newline = false; @@ -162,7 +255,14 @@ static void findMakeTags (void) ungetcToInputFile (c); variable_possible = (c == '='); } - else if (variable_possible && c == ':' && + else if (variable_possible && c == '+') + { + c = nextChar (); + ungetcToInputFile (c); + variable_possible = (c == '='); + appending = true; + } + else if ((! in_value) && variable_possible && c == ':' && stringListCount (identifiers) > 0) { c = nextChar (); @@ -179,9 +279,11 @@ static void findMakeTags (void) else if (variable_possible && c == '=' && stringListCount (identifiers) == 1) { - newMacro (stringListItem (identifiers, 0)); - skipLine (); + newMacro (stringListItem (identifiers, 0), false, appending); + + in_value = true; in_rule = false; + appending = false; } else if (variable_possible && isIdentifier (c)) { @@ -189,6 +291,9 @@ static void findMakeTags (void) readIdentifier (c, name); stringListAdd (identifiers, name); + if (in_value) + valueFound(name); + if (stringListCount (identifiers) == 1) { if (in_define && ! strcmp (vStringValue (name), "endef")) @@ -209,27 +314,67 @@ static void findMakeTags (void) if (c == '\n') ungetcToInputFile (c); vStringStripTrailing (name); - newMacro (name); + + newMacro (name, true, false); } else if (! strcmp (vStringValue (name), "export")) stringListClear (identifiers); + else if (! strcmp (vStringValue (name), "include") + || ! strcmp (vStringValue (name), "sinclude") + || ! strcmp (vStringValue (name), "-include")) + { + bool optional = (vStringValue (name)[0] == 'i')? false: true; + while (1) + { + c = skipToNonWhite (nextChar ()); + readIdentifier (c, name); + vStringStripTrailing (name); + if (isAcceptableAsInclude(name)) + newInclude (name, optional); + + /* non-space characters after readIdentifier() may + * be rejected by the function: + * e.g. + * include $* + * + * Here, remove such characters from input stream. + */ + do + c = nextChar (); + while (c != EOF && c != '\n' && (!isspace (c))); + if (c == '\n') + ungetcToInputFile (c); + + if (c == EOF || c == '\n') + break; + } + } + else + directiveFound (name); } } else variable_possible = false; } + stringListDelete (identifiers); } + extern parserDefinition* MakefileParser (void) { static const char *const patterns [] = { "[Mm]akefile", "GNUmakefile", NULL }; static const char *const extensions [] = { "mak", "mk", NULL }; + static const char *const aliases [] = { + /* the mode name in emacs */ + "makefile", + NULL }; parserDefinition* const def = parserNew ("Make"); - def->kindTable = MakeKinds; + def->kindTable = MakeKinds; def->kindCount = ARRAY_SIZE (MakeKinds); def->patterns = patterns; def->extensions = extensions; + def->aliases = aliases; def->parser = findMakeTags; return def; } diff --git a/ctags/parsers/make.h b/ctags/parsers/make.h new file mode 100644 index 0000000000..fc327ebf94 --- /dev/null +++ b/ctags/parsers/make.h @@ -0,0 +1,34 @@ +/* +* Copyright (c) 2016, Masatake YAMATO +* +* This source code is released for free distribution under the terms of the +* GNU General Public License version 2 or (at your option) any later version. +* +* This module contains functions for generating tags for makefiles. +*/ + +#ifndef CTAGS_PARSER_MAKE_H +#define CTAGS_PARSER_MAKE_H + +/* +* INCLUDE FILES +*/ +#include "general.h" /* must always come first */ + +#include "subparser.h" +#include "vstring.h" + +typedef struct sMakeSubparser makeSubparser; + +struct sMakeSubparser { + subparser subparser; + + void (* valueNotify) (makeSubparser *s, char* name); + void (* directiveNotify) (makeSubparser *s, char* name); + void (* newMacroNotify) (makeSubparser *s, + char* name, + bool withDefineDirective, + bool appending); +}; + +#endif diff --git a/ctags/parsers/nsis.c b/ctags/parsers/nsis.c new file mode 100644 index 0000000000..8e64196b30 --- /dev/null +++ b/ctags/parsers/nsis.c @@ -0,0 +1,394 @@ +/* +* Copyright (c) 2000-2002, Darren Hiebert +* Copyright (c) 2009-2011, Enrico Tröger +* +* This source code is released for free distribution under the terms of the +* GNU General Public License version 2 or (at your option) any later version. +* +* This module contains functions for generating tags for NSIS scripts +* (https://en.wikipedia.org/wiki/Nullsoft_Scriptable_Install_System). +* +* Based on sh.c. +*/ + +/* +* INCLUDE FILES +*/ +#include "general.h" /* must always come first */ + +#include + +#include "entry.h" +#include "parse.h" +#include "read.h" +#include "vstring.h" +#include "routines.h" + +/* +* DATA DEFINITIONS +*/ +typedef enum { + K_SECTION, + K_FUNCTION, + K_VARIABLE, + K_DEFINITION, + K_MACRO, + K_SECTION_GROUP, + K_MACRO_PARAM, + K_LANGSTR, + K_SCRIPT, +} NsisKind; + +typedef enum { + NSIS_SCRIPT_INCLUDED, +} nsisScriptRole; + +static roleDefinition NsisScriptRoles [] = { + { true, "included", "included with !include" }, +}; + +static kindDefinition NsisKinds [] = { + { true, 's', "section", "sections"}, + { true, 'f', "function", "functions"}, + { true, 'v', "variable", "variables"}, + { true, 'd', "definition", "definitions"}, + { true, 'm', "macro", "macros"}, + { true, 'S', "sectionGroup", "section groups"}, + { false, 'p', "macroparam", "macro parameters"}, + { true, 'l', "langstr", "language strings"}, + { true, 'i', "script", "NSIS scripts", + .referenceOnly = true, ATTACH_ROLES(NsisScriptRoles)}, +}; + +typedef enum { + F_LANGID, +} nsisField; + +static fieldDefinition NsisFields[] = { + { .name = "langid", + .description = "language identifier specified in (License)LangString commands", + .enabled = true }, +}; + +/* +* FUNCTION DEFINITIONS +*/ + +static const unsigned char* skipWhitespace (const unsigned char* cp) +{ + while (isspace ((int) *cp)) + ++cp; + return cp; +} + +static const unsigned char* skipFlags (const unsigned char* cp) +{ + while (*cp == '/') + { + ++cp; + while (! isspace ((int) *cp)) + ++cp; + while (isspace ((int) *cp)) + ++cp; + } + return cp; +} + +static int makeSimpleTagWithScope(vString *name, int kindIndex, int parentCorkIndex) +{ + tagEntryInfo e; + initTagEntry (&e, vStringValue (name), kindIndex); + e.extensionFields.scopeIndex = parentCorkIndex; + return makeTagEntry (&e); +} + +#define lineStartingWith(CP,EXPECTED,EOL) \ + (strncasecmp ((const char*) CP, EXPECTED, strlen(EXPECTED)) == 0 \ + && (EOL ? (isspace ((int) CP [strlen(EXPECTED)]) || CP [strlen(EXPECTED)] == '\0') \ + : isspace ((int) CP [strlen(EXPECTED)]))) + +#define fillName(NAME,CP,CONDITION) \ + while (CONDITION) \ + { \ + vStringPut ((NAME), (int) *(CP)); \ + ++(CP); \ + } \ + do {} while (0) + +static const unsigned char* parseSection (const unsigned char* cp, vString *name, + int kindIndex, int scopeIndex, int *corkIndex) +{ + cp = skipWhitespace (cp); + cp = skipFlags (cp); + cp = skipWhitespace (cp); + + if (corkIndex) + *corkIndex = CORK_NIL; + + if (strpbrk((const char *)cp, "'`\"")) + { + const unsigned char terminator = *cp; + + cp++; + if (*cp == terminator) + { + /* An empty section. + * See https://nsis.sourceforge.io/Docs/Chapter4.html#sectionsettext + */ + anonGenerate (name, + (kindIndex == K_SECTION + ? "AnonymousSection" + : "AnonymousSectionGroup"), + kindIndex); + cp++; + } + else if (*cp == '\0') + return cp; + else + { + int in_escape = 0; + do + { + vStringPut (name, (int) *cp); + ++cp; + + if (*cp == '\0') + break; + + /* + * Ignore `"' in `$\"' as the terminator of quotation. + */ + if (*cp == '$' && in_escape == 0) + in_escape++; + else if (*cp == '\\' && in_escape == 1) + in_escape++; + else if (*cp == terminator && in_escape == 2) + /* + * This `"' is not a terminator of quotation; + * set in_escape to 3. + */ + in_escape++; + else + in_escape = 0; + + if ((in_escape != 3) && *cp == terminator) + { + ++cp; + break; + } + } + while (1); + } + } + else + { + while (isalnum ((int) *cp) + || *cp == '_' || *cp == '-' || *cp == '.' || *cp == '!' + || *cp == '$' || *cp == '{' || *cp == '}' || *cp == '(' || *cp == ')') + { + vStringPut (name, (int) *cp); + ++cp; + } + } + int r = makeSimpleTagWithScope (name, kindIndex, scopeIndex); + if (corkIndex) + *corkIndex = r; + if (vStringLength (name) > 0) + { + /* + * Try to capture section_index_output. + */ + vStringClear (name); + cp = skipWhitespace (cp); + + fillName (name, cp, (isalnum ((int) *cp) || *cp == '_')); + + if (vStringLength (name) > 0) + { + makeSimpleTag (name, K_DEFINITION); + vStringClear (name); + } + } + return cp; +} + +static const unsigned char* parseLangString (const unsigned char* cp, vString *name) +{ + cp = skipWhitespace (cp); + + /* `^' is not explained in the nsis reference manual. However, it is used + * in gvim. + * e.g. + * https://github.com/vim/vim/blob/3dabd718f4b2d8e09de9e2ec73832620b91c2f79/nsis/lang/english.nsi + */ + fillName (name, cp, (isalnum ((int) *cp) || *cp == '_' || *cp == '^')); + + if (vStringLength (name) > 0) + { + int r = makeSimpleTag (name, K_LANGSTR); + if (r == CORK_NIL) + goto out; + vStringClear (name); + + cp = skipWhitespace (cp); + fillName (name, cp, ((*cp != '\0') && (!isspace ((int) *cp)))); + if (vStringLength (name) > 0) + { + attachParserFieldToCorkEntry (r, NsisFields[F_LANGID].ftype, + vStringValue (name)); + vStringClear (name); + } + } + out: + return cp; +} + +static void findNsisTags (void) +{ + int sectionGroupIndex = CORK_NIL; + vString *name = vStringNew (); + const unsigned char *line; + + while ((line = readLineFromInputFile ()) != NULL) + { + const unsigned char* cp = line; + + while (isspace (*cp)) + cp++; + + if (*cp == '#' || *cp == ';') + continue; + + /* functions */ + if (lineStartingWith (cp, "function", false)) + { + cp += 8; + cp = skipWhitespace (cp); + + fillName (name, cp, + (isalnum ((int) *cp) || *cp == '_' || *cp == '-' || *cp == '.' || *cp == '!')); + + makeSimpleTag (name, K_FUNCTION); + vStringClear (name); + } + /* variables */ + else if (lineStartingWith (cp, "var", false)) + { + cp += 3; + cp = skipWhitespace (cp); + cp = skipFlags (cp); + + fillName (name, cp, (isalnum ((int) *cp) || *cp == '_')); + + makeSimpleTag (name, K_VARIABLE); + vStringClear (name); + } + /* section groups */ + else if (lineStartingWith (cp, "sectiongroup", false)) + { + cp += 12; + cp = parseSection (cp, name, K_SECTION_GROUP, CORK_NIL, §ionGroupIndex); + } + else if (lineStartingWith (cp, "sectiongroupend", true)) + { + cp += 15; + sectionGroupIndex = CORK_NIL; + } + /* sections */ + else if (lineStartingWith (cp, "section", false)) + { + cp += 7; + cp = parseSection (cp, name, K_SECTION, sectionGroupIndex, NULL); + } + /* LangString */ + else if (lineStartingWith (cp, "langstring", false)) + { + cp += 10; + cp = parseLangString (cp, name); + } + /* LicenseLangString */ + else if (lineStartingWith (cp, "licenselangstring", false)) + { + cp += 17; + cp = parseLangString (cp, name); + } + /* definitions */ + else if (lineStartingWith (cp, "!define", false)) + { + cp += 7; + cp = skipWhitespace (cp); + cp = skipFlags (cp); + + fillName (name, cp, (isalnum ((int) *cp) || *cp == '_')); + + makeSimpleTag (name, K_DEFINITION); + vStringClear (name); + } + /* macro */ + else if (lineStartingWith (cp, "!macro", false)) + { + cp += 6; + cp = skipWhitespace (cp); + cp = skipFlags (cp); + + fillName (name, cp, (isalnum ((int) *cp) || *cp == '_')); + + int index = makeSimpleTag (name, K_MACRO); + if (vStringLength (name) > 0) + { + while (1) + { + vStringClear (name); + cp = skipWhitespace (cp); + fillName (name, cp, (isalnum ((int) *cp) || *cp == '_')); + if (vStringLength (name) == 0) + break; + makeSimpleTagWithScope (name, K_MACRO_PARAM, index); + } + } + } + /* include */ + else if (lineStartingWith (cp, "!include", false)) + { + cp += 8; + + /* !include [/NONFATAL] [/CHARSET=ACP|OEM|CP#|UTF8|UTF16LE|UTF16BE] file */ + cp = skipWhitespace (cp); + + /* /NONFATAL */ + cp = skipFlags (cp); + cp = skipWhitespace (cp); + + /* /CHARSET */ + cp = skipFlags (cp); + cp = skipWhitespace (cp); + + fillName (name, cp, (*cp != '\0' && *cp != ';' && *cp != '#')); + vStringStripTrailing (name); + + if (vStringLength (name) > 0) + { + makeSimpleRefTag (name, K_SCRIPT, NSIS_SCRIPT_INCLUDED); + vStringClear (name); + } + /* TODO: capture !addincludedir */ + } + } + vStringDelete (name); +} + +extern parserDefinition* NsisParser (void) +{ + static const char *const extensions [] = { + "nsi", "nsh", NULL + }; + parserDefinition* def = parserNew ("NSIS"); + def->kindTable = NsisKinds; + def->kindCount = ARRAY_SIZE (NsisKinds); + def->extensions = extensions; + def->fieldTable = NsisFields; + def->fieldCount = ARRAY_SIZE (NsisFields); + def->parser = findNsisTags; + def->useCork = CORK_QUEUE; + return def; +} diff --git a/ctags/parsers/geany_objc.c b/ctags/parsers/objc.c similarity index 74% rename from ctags/parsers/geany_objc.c rename to ctags/parsers/objc.c index 83966b8fe1..5efd852bfd 100644 --- a/ctags/parsers/geany_objc.c +++ b/ctags/parsers/objc.c @@ -16,11 +16,13 @@ #include #include "keyword.h" +#include "debug.h" #include "entry.h" #include "parse.h" -#include "options.h" #include "read.h" #include "routines.h" +#include "selectors.h" +#include "trashbox.h" #include "vstring.h" typedef enum { @@ -36,7 +38,8 @@ typedef enum { K_TYPEDEF, K_STRUCT, K_ENUM, - K_MACRO + K_MACRO, + K_CATEGORY, } objcKind; static kindDefinition ObjcKinds[] = { @@ -53,6 +56,7 @@ static kindDefinition ObjcKinds[] = { {true, 's', "struct", "A type structure"}, {true, 'e', "enum", "An enumeration"}, {true, 'M', "macro", "A preprocessor macro"}, + {true, 'C', "category", "categories"}, }; typedef enum { @@ -63,6 +67,7 @@ typedef enum { ObjcINTERFACE, ObjcPROTOCOL, ObjcENCODE, + ObjcEXTERN, ObjcSYNCHRONIZED, ObjcSELECTOR, ObjcPROPERTY, @@ -93,7 +98,11 @@ typedef enum { Tok_dpoint, /* ':' */ Tok_Sharp, /* '#' */ Tok_Backslash, /* '\\' */ + Tok_Asterisk, /* '*' */ + Tok_ANGLEL, /* '<' */ + Tok_ANGLER, /* '>' */ Tok_EOL, /* '\r''\n' */ + Tok_CSTRING, /* "..." */ Tok_any, Tok_EOF /* END of file */ @@ -105,6 +114,7 @@ static const keywordTable objcKeywordTable[] = { {"typedef", ObjcTYPEDEF}, {"struct", ObjcSTRUCT}, {"enum", ObjcENUM}, + {"extern", ObjcEXTERN}, {"@implementation", ObjcIMPLEMENTATION}, {"@interface", ObjcINTERFACE}, {"@protocol", ObjcPROTOCOL}, @@ -125,6 +135,24 @@ static const keywordTable objcKeywordTable[] = { {"@required", ObjcREQUIRED}, }; +typedef enum { + F_CATEGORY, + F_PROTOCOLS, +} objcField; + +static fieldDefinition ObjcFields [] = { + { + .name = "category", + .description = "category attached to the class", + .enabled = true, + }, + { + .name = "protocols", + .description = "protocols that the class (or category) confirms to", + .enabled = true, + }, +}; + static langType Lang_ObjectiveC; /*////////////////////////////////////////////////////////////////// @@ -176,12 +204,14 @@ static void eatWhiteSpace (lexingState * st) st->cp = cp; } -static void eatString (lexingState * st) +static void readCString (lexingState * st) { bool lastIsBackSlash = false; bool unfinished = true; const unsigned char *c = st->cp + 1; + vStringClear (st->name); + while (unfinished) { /* end of line should never happen. @@ -191,7 +221,10 @@ static void eatString (lexingState * st) else if (*c == '"' && !lastIsBackSlash) unfinished = false; else + { lastIsBackSlash = *c == '\\'; + vStringPut (st->name, (int) *c); + } c++; } @@ -282,7 +315,7 @@ static objcKeyword lex (lexingState * st) return Tok_EOL; } - if (isAlpha (*st->cp)) + if (isAlpha (*st->cp) || (*st->cp == '_')) { readIdentifier (st); retType = lookupKeyword (vStringValue (st->name), Lang_ObjectiveC); @@ -373,14 +406,23 @@ static objcKeyword lex (lexingState * st) st->cp++; return Tok_dpoint; case '"': - eatString (st); - return Tok_any; + readCString (st); + return Tok_CSTRING; case '+': st->cp++; return Tok_PLUS; case '-': st->cp++; return Tok_MINUS; + case '*': + st->cp++; + return Tok_Asterisk; + case '<': + st->cp++; + return Tok_ANGLEL; + case '>': + st->cp++; + return Tok_ANGLER; default: st->cp++; @@ -419,6 +461,8 @@ static void parseImplemMethods (vString * const ident, objcToken what); static vString *tempName = NULL; static vString *parentName = NULL; static objcKind parentType = K_INTERFACE; +static int parentCorkIndex = CORK_NIL; +static int categoryCorkIndex = CORK_NIL; /* used to prepare tag for OCaml, just in case their is a need to * add additional information to the tag. */ @@ -426,7 +470,7 @@ static void prepareTag (tagEntryInfo * tag, vString const *name, objcKind kind) { initTagEntry (tag, vStringValue (name), kind); - if (parentName != NULL) + if (vStringLength (parentName) > 0) { tag->extensionFields.scopeKindIndex = parentType; tag->extensionFields.scopeName = vStringValue (parentName); @@ -439,22 +483,39 @@ static void pushEnclosingContext (const vString * parent, objcKind type) parentType = type; } +static void pushEnclosingContextFull (const vString * parent, objcKind type, int corkIndex) +{ + pushEnclosingContext (parent, type); + parentCorkIndex = corkIndex; +} + static void popEnclosingContext (void) { vStringClear (parentName); + parentCorkIndex = CORK_NIL; +} + +static void pushCategoryContext (int category_index) +{ + categoryCorkIndex = category_index; +} + +static void popCategoryContext (void) +{ + categoryCorkIndex = CORK_NIL; } /* Used to centralise tag creation, and be able to add * more information to it in the future */ -static void addTag (vString * const ident, int kind) +static int addTag (vString * const ident, int kind) { tagEntryInfo toCreate; if (! ObjcKinds[kind].enabled) - return; + return CORK_NIL; prepareTag (&toCreate, ident, kind); - makeTagEntry (&toCreate); + return makeTagEntry (&toCreate); } static objcToken waitedToken, fallBackToken; @@ -541,24 +602,77 @@ static objcKind methodKind; static vString *fullMethodName; static vString *prevIdent; +static vString *signature; -static void parseMethodsName (vString * const ident, objcToken what) +static void tillTokenWithCapturingSignature (vString * const ident, objcToken what) { + tillToken (ident, what); + + if (what != waitedToken) + { + if (what == Tok_Asterisk) + vStringPut (signature, '*'); + else if (vStringLength (ident) > 0) + { + if (! (vStringLast (signature) == ',' + || vStringLast (signature) == '(' + || vStringLast (signature) == ' ')) + vStringPut (signature, ' '); + + vStringCat (signature, ident); + } + } +} + +static void parseMethodsNameCommon (vString * const ident, objcToken what, + parseNext reEnter, + parseNext nextAction) +{ + int index; + switch (what) { case Tok_PARL: toDoNext = &tillToken; - comeAfter = &parseMethodsName; + comeAfter = reEnter; waitedToken = Tok_PARR; + + if (! (vStringLength(prevIdent) == 0 + && vStringLength(fullMethodName) == 0)) + toDoNext = &tillTokenWithCapturingSignature; break; case Tok_dpoint: vStringCat (fullMethodName, prevIdent); - vStringCatS (fullMethodName, ":"); + vStringPut (fullMethodName, ':'); vStringClear (prevIdent); + + if (vStringLength (signature) > 1) + vStringPut (signature, ','); break; case ObjcIDENTIFIER: + if ((vStringLength (prevIdent) > 0 + /* "- initWithObject: o0 withAnotherObject: o1;" + Overwriting the last value of prevIdent ("o0"); + a parameter name ("o0") was stored to prevIdent, + and a part of selector("withAnotherObject") + overwrites it. + If type for the parameter specified explicitly, + the last char of signature should not be ',' nor + '('. In this case, "id" must be put as the type for + the parameter. */ + && (vStringLast (signature) == ',' + || vStringLast (signature) == '(')) + || (/* "- initWithObject: object;" + In this case no overwriting happens. + However, "id" for "object" is part + of signature. */ + vStringLength (prevIdent) == 0 + && vStringLength (fullMethodName) > 0 + && vStringLast (signature) == '(')) + vStringCatS (signature, "id"); + vStringCopy (prevIdent, ident); break; @@ -567,15 +681,34 @@ static void parseMethodsName (vString * const ident, objcToken what) /* method name is not simple */ if (vStringLength (fullMethodName) != '\0') { - addTag (fullMethodName, methodKind); + index = addTag (fullMethodName, methodKind); vStringClear (fullMethodName); } else - addTag (prevIdent, methodKind); + index = addTag (prevIdent, methodKind); - toDoNext = &parseMethods; + toDoNext = nextAction; parseImplemMethods (ident, what); vStringClear (prevIdent); + + tagEntryInfo *e = getEntryInCorkQueue (index); + if (e) + { + if (vStringLast (signature) == ',') + vStringCatS (signature, "id"); + vStringPut (signature, ')'); + + e->extensionFields.signature = vStringStrdup (signature); + + vStringClear (signature); + vStringPut (signature, '('); + + tagEntryInfo *e_cat = getEntryInCorkQueue (categoryCorkIndex); + if (e_cat) + attachParserFieldToCorkEntry (index, + ObjcFields [F_CATEGORY].ftype, + e_cat->name); + } break; default: @@ -583,44 +716,34 @@ static void parseMethodsName (vString * const ident, objcToken what) } } -static void parseMethodsImplemName (vString * const ident, objcToken what) +static void parseMethodsName (vString * const ident, objcToken what) { - switch (what) - { - case Tok_PARL: - toDoNext = &tillToken; - comeAfter = &parseMethodsImplemName; - waitedToken = Tok_PARR; - break; - - case Tok_dpoint: - vStringCat (fullMethodName, prevIdent); - vStringCatS (fullMethodName, ":"); - vStringClear (prevIdent); - break; + parseMethodsNameCommon (ident, what, parseMethodsName, parseMethods); +} - case ObjcIDENTIFIER: - vStringCopy (prevIdent, ident); - break; +static void parseMethodsImplemName (vString * const ident, objcToken what) +{ + parseMethodsNameCommon (ident, what, parseMethodsImplemName, parseImplemMethods); +} - case Tok_CurlL: - case Tok_semi: - /* method name is not simple */ - if (vStringLength (fullMethodName) != '\0') +static void parseCategory (vString * const ident, objcToken what) +{ + if (what == ObjcIDENTIFIER) + { + tagEntryInfo *e = getEntryInCorkQueue (parentCorkIndex); + if (e) { - addTag (fullMethodName, methodKind); - vStringClear (fullMethodName); + attachParserFieldToCorkEntry (parentCorkIndex, + ObjcFields [F_CATEGORY].ftype, + vStringValue (ident)); + if (e->kindIndex == K_INTERFACE) + toDoNext = &parseMethods; + else + toDoNext = &parseImplemMethods; } - else - addTag (prevIdent, methodKind); - toDoNext = &parseImplemMethods; - parseImplemMethods (ident, what); - vStringClear (prevIdent); - break; - - default: - break; + int index = addTag (ident, K_CATEGORY); + pushCategoryContext (index); } } @@ -640,6 +763,7 @@ static void parseImplemMethods (vString * const ident, objcToken what) case ObjcEND: /* @end */ popEnclosingContext (); + popCategoryContext (); toDoNext = &globalScope; break; @@ -649,6 +773,10 @@ static void parseImplemMethods (vString * const ident, objcToken what) comeAfter = &parseImplemMethods; break; + case Tok_PARL: /* ( */ + toDoNext = &parseCategory; + break; + default: break; } @@ -681,6 +809,49 @@ static void parseProperty (vString * const ident, objcToken what) } } +static void parseInterfaceSuperclass (vString * const ident, objcToken what) +{ + tagEntryInfo *e = getEntryInCorkQueue (parentCorkIndex); + if (what == ObjcIDENTIFIER && e) + e->extensionFields.inheritance = vStringStrdup (ident); + + toDoNext = &parseMethods; +} + +static void parseInterfaceProtocolList (vString * const ident, objcToken what) +{ + static vString *protocol_list; + + if (parentCorkIndex == CORK_NIL) + { + toDoNext = &parseMethods; + return; + } + + if (protocol_list == NULL) + { + protocol_list = vStringNew (); + DEFAULT_TRASH_BOX(protocol_list, vStringDelete); + } + + if (what == ObjcIDENTIFIER) + vStringCat(protocol_list, ident); + else if (what == Tok_COMA) + vStringPut (protocol_list, ','); + else if (what == Tok_ANGLER) + { + attachParserFieldToCorkEntry (parentCorkIndex, + ObjcFields [F_PROTOCOLS].ftype, + vStringValue (protocol_list)); + if (categoryCorkIndex != CORK_NIL) + attachParserFieldToCorkEntry (categoryCorkIndex, + ObjcFields [F_PROTOCOLS].ftype, + vStringValue (protocol_list)); + vStringClear (protocol_list); + toDoNext = &parseMethods; + } +} + static void parseMethods (vString * const ident CTAGS_ATTR_UNUSED, objcToken what) { switch (what) @@ -701,6 +872,7 @@ static void parseMethods (vString * const ident CTAGS_ATTR_UNUSED, objcToken wha case ObjcEND: /* @end */ popEnclosingContext (); + popCategoryContext (); toDoNext = &globalScope; break; @@ -708,6 +880,18 @@ static void parseMethods (vString * const ident CTAGS_ATTR_UNUSED, objcToken wha toDoNext = &parseFields; break; + case Tok_dpoint: /* : */ + toDoNext = &parseInterfaceSuperclass; + break; + + case Tok_PARL: /* ( */ + toDoNext = &parseCategory; + break; + + case Tok_ANGLEL: /* < */ + toDoNext = &parseInterfaceProtocolList; + break; + default: break; } @@ -718,8 +902,8 @@ static void parseProtocol (vString * const ident, objcToken what) { if (what == ObjcIDENTIFIER) { - pushEnclosingContext (ident, K_PROTOCOL); - addTag (ident, K_PROTOCOL); + int index = addTag (ident, K_PROTOCOL); + pushEnclosingContextFull (ident, K_PROTOCOL, index); } toDoNext = &parseMethods; } @@ -728,8 +912,8 @@ static void parseImplementation (vString * const ident, objcToken what) { if (what == ObjcIDENTIFIER) { - addTag (ident, K_IMPLEMENTATION); - pushEnclosingContext (ident, K_IMPLEMENTATION); + int index = addTag (ident, K_IMPLEMENTATION); + pushEnclosingContextFull (ident, K_IMPLEMENTATION, index); } toDoNext = &parseImplemMethods; } @@ -738,8 +922,8 @@ static void parseInterface (vString * const ident, objcToken what) { if (what == ObjcIDENTIFIER) { - addTag (ident, K_INTERFACE); - pushEnclosingContext (ident, K_INTERFACE); + int index = addTag (ident, K_INTERFACE); + pushEnclosingContextFull (ident, K_INTERFACE, index); } toDoNext = &parseMethods; @@ -990,6 +1174,24 @@ static void parsePreproc (vString * const ident, objcToken what) } } +static void skipCurlL (vString * const ident, objcToken what) +{ + if (what == Tok_CurlL) + toDoNext = comeAfter; +} + +static void parseCPlusPlusCLinkage (vString * const ident, objcToken what) +{ + toDoNext = comeAfter; + + /* Linkage specification like "C" */ + if (what == Tok_CSTRING) + toDoNext = skipCurlL; + else + /* Force handle this ident in globalScope */ + globalScope (ident, what); +} + /* Handle the "strong" top levels, all 'big' declarations * happen here */ static void globalScope (vString * const ident, objcToken what) @@ -1044,6 +1246,11 @@ static void globalScope (vString * const ident, objcToken what) ignoreBalanced (ident, what); break; + case ObjcEXTERN: + comeAfter = &globalScope; + toDoNext = &parseCPlusPlusCLinkage; + break; + case ObjcEND: case ObjcPUBLIC: case ObjcPROTECTED: @@ -1068,6 +1275,7 @@ static void findObjcTags (void) tempName = vStringNew (); fullMethodName = vStringNew (); prevIdent = vStringNew (); + signature = vStringNewInit ("("); /* (Re-)initialize state variables, this might be a second file */ comeAfter = NULL; @@ -1096,10 +1304,14 @@ static void findObjcTags (void) vStringDelete (tempName); vStringDelete (fullMethodName); vStringDelete (prevIdent); + vStringDelete (signature); + signature = NULL; parentName = NULL; tempName = NULL; prevIdent = NULL; fullMethodName = NULL; + categoryCorkIndex = CORK_NIL; + parentCorkIndex = CORK_NIL; } static void objcInitialize (const langType language) @@ -1109,14 +1321,25 @@ static void objcInitialize (const langType language) extern parserDefinition *ObjcParser (void) { - static const char *const extensions[] = { "m", "h", NULL }; + static const char *const extensions[] = { "mm", "m", "h", + NULL }; + static const char *const aliases[] = { "objc", "objective-c", + NULL }; + static selectLanguage selectors[] = { selectByObjectiveCAndMatLabKeywords, + selectByObjectiveCKeywords, + NULL }; parserDefinition *def = parserNew ("ObjectiveC"); def->kindTable = ObjcKinds; def->kindCount = ARRAY_SIZE (ObjcKinds); def->extensions = extensions; + def->fieldTable = ObjcFields; + def->fieldCount = ARRAY_SIZE (ObjcFields); + def->aliases = aliases; def->parser = findObjcTags; def->initialize = objcInitialize; + def->selectLanguage = selectors; def->keywordTable = objcKeywordTable; def->keywordCount = ARRAY_SIZE (objcKeywordTable); + def->useCork = CORK_QUEUE; return def; } diff --git a/ctags/parsers/geany_pascal.c b/ctags/parsers/pascal.c similarity index 73% rename from ctags/parsers/geany_pascal.c rename to ctags/parsers/pascal.c index fd4fbdd19b..6d7497b4ca 100644 --- a/ctags/parsers/geany_pascal.c +++ b/ctags/parsers/pascal.c @@ -37,22 +37,14 @@ static kindDefinition PascalKinds [] = { * FUNCTION DEFINITIONS */ -static void createPascalTag (tagEntryInfo* const tag, - const vString* const name, const int kind, - const char *arglist, const char *vartype) +static void createPascalTag ( + tagEntryInfo* const tag, const vString* const name, const int kind) { if (PascalKinds [kind].enabled && name != NULL && vStringLength (name) > 0) - { initTagEntry (tag, vStringValue (name), kind); - - tag->extensionFields.signature = arglist; - tag->extensionFields.typeRef[1] = vartype; - } else - { /* TODO: Passing NULL as name makes an assertion behind initTagEntry failure */ - /* initTagEntry (tag, NULL, NULL); */ - } + initTagEntry (tag, NULL, KIND_GHOST_INDEX); } static void makePascalTag (const tagEntryInfo* const tag) @@ -82,69 +74,6 @@ static bool tail (const char *cp) return result; } -static void parseArglist(const char *buf, char **arglist, char **vartype) -{ - char *c, *start, *end; - int level; - - if (NULL == buf || NULL == arglist) - return; - - c = strdup(buf); - /* parse argument list which can be missing like in "function ginit:integer;" */ - if (NULL != (start = strchr(c, '('))) - { - for (level = 1, end = start + 1; level > 0; ++end) - { - if ('\0' == *end) - break; - else if ('(' == *end) - ++ level; - else if (')' == *end) - -- level; - } - } - else /* if no argument list was found, continue looking for a return value */ - { - start = "()"; - end = c; - } - - /* parse return type if requested by passing a non-NULL vartype argument */ - if (NULL != vartype) - { - char *var, *var_start; - - *vartype = NULL; - - if (NULL != (var = strchr(end, ':'))) - { - var++; /* skip ':' */ - while (isspace((int) *var)) - ++var; - - if (starttoken(*var)) - { - var_start = var; - var++; - while (intoken(*var)) - var++; - if (endtoken(*var)) - { - *var = '\0'; - *vartype = strdup(var_start); - } - } - } - } - - *end = '\0'; - *arglist = strdup(start); - - eFree(c); -} - - /* Algorithm adapted from from GNU etags. * Locates tags for procedures & functions. Doesn't do any type- or * var-definitions. It does look for the keyword "extern" or "forward" @@ -155,12 +84,10 @@ static void findPascalTags (void) { vString *name = vStringNew (); tagEntryInfo tag; - char *arglist = NULL; - char *vartype = NULL; pascalKind kind = K_FUNCTION; /* each of these flags is true iff: */ bool incomment = false; /* point is inside a comment */ - int comment_char = '\0'; /* type of current comment */ + int comment_char = '\0'; /* type of current comment */ bool inquote = false; /* point is inside '..' string */ bool get_tagname = false;/* point is after PROCEDURE/FUNCTION keyword, so next item = potential tag */ @@ -257,14 +184,6 @@ static void findPascalTags (void) verify_tag = false; } } - else if (tolower ((int) *dbp) == 't') - { - if (tail ("type")) /* check for forward reference */ - { - found_tag = false; - verify_tag = false; - } - } if (found_tag && verify_tag) /* not external proc, so make tag */ { found_tag = false; @@ -286,12 +205,7 @@ static void findPascalTags (void) for (cp = dbp ; *cp != '\0' && !endtoken (*cp) ; cp++) continue; vStringNCopyS (name, (const char*) dbp, cp - dbp); - if (arglist != NULL) - eFree(arglist); - if (kind == K_FUNCTION && vartype != NULL) - eFree(vartype); - parseArglist((const char*) cp, &arglist, (kind == K_FUNCTION) ? &vartype : NULL); - createPascalTag (&tag, name, kind, arglist, (kind == K_FUNCTION) ? vartype : NULL); + createPascalTag (&tag, name, kind); dbp = cp; /* set dbp to e-o-token */ get_tagname = false; found_tag = true; @@ -329,20 +243,9 @@ static void findPascalTags (void) kind = K_FUNCTION; } break; - case 't': - if (tail ("ype")) - { - get_tagname = true; - kind = K_FUNCTION; - } - break; } } /* while not eof */ } - if (arglist != NULL) - eFree(arglist); - if (vartype != NULL) - eFree(vartype); vStringDelete (name); } @@ -351,7 +254,7 @@ extern parserDefinition* PascalParser (void) static const char *const extensions [] = { "p", "pas", NULL }; parserDefinition* def = parserNew ("Pascal"); def->extensions = extensions; - def->kindTable = PascalKinds; + def->kindTable = PascalKinds; def->kindCount = ARRAY_SIZE (PascalKinds); def->parser = findPascalTags; return def; diff --git a/ctags/parsers/perl.c b/ctags/parsers/perl.c new file mode 100644 index 0000000000..9f51aa6138 --- /dev/null +++ b/ctags/parsers/perl.c @@ -0,0 +1,738 @@ +/* +* Copyright (c) 2000-2003, Darren Hiebert +* +* This source code is released for free distribution under the terms of the +* GNU General Public License version 2 or (at your option) any later version. +* +* This module contains functions for generating tags for PERL language +* files. +*/ + +/* +* INCLUDE FILES +*/ +#include "general.h" /* must always come first */ + +#include + +#include "entry.h" +#include "perl.h" +#include "promise.h" +#include "read.h" +#include "routines.h" +#include "selectors.h" +#include "subparser.h" +#include "vstring.h" +#include "xtag.h" + +#define TRACE_PERL_C 0 +#define TRACE if (TRACE_PERL_C) printf("perl.c:%d: ", __LINE__), printf + +/* +* DATA DEFINITIONS +*/ +typedef enum PerlKindType perlKind; +typedef enum PerlModuleRoleType perlModuleRole; + +static roleDefinition PerlModuleRoles [] = { + { true, "used", "specified in `use' built-in function" }, + { true, "unused", "specified in `no' built-in function" }, +}; + +static kindDefinition PerlKinds [] = { + { true, 'c', "constant", "constants" }, + { true, 'f', "format", "formats" }, + { true, 'l', "label", "labels" }, + { true, 'p', "package", "packages" }, + { true, 's', "subroutine", "subroutines" }, + { false, 'd', "subroutineDeclaration", "subroutine declarations" }, + { false, 'M', "module", "modules", + .referenceOnly = true, ATTACH_ROLES(PerlModuleRoles)}, +}; + +/* +* FUNCTION DEFINITIONS +*/ + +static void notifyEnteringPod () +{ + subparser *sub; + + foreachSubparser (sub, false) + { + perlSubparser *perlsub = (perlSubparser *)sub; + if (perlsub->enteringPodNotify) + { + enterSubparser (sub); + perlsub->enteringPodNotify (perlsub); + leaveSubparser (); + } + } +} + +static void notifyLeavingPod () +{ + subparser *sub; + + foreachSubparser (sub, false) + { + perlSubparser *perlsub = (perlSubparser *)sub; + if (perlsub->leavingPodNotify) + { + enterSubparser (sub); + perlsub->leavingPodNotify (perlsub); + leaveSubparser (); + } + } +} + +static void notifyFindingQuotedWord (int moduleIndex, + const char *qwd) +{ + subparser *sub; + + foreachSubparser (sub, false) + { + perlSubparser *perlsub = (perlSubparser *)sub; + if (perlsub->findingQuotedWordNotify) + { + enterSubparser (sub); + perlsub->findingQuotedWordNotify (perlsub, + moduleIndex, + qwd); + leaveSubparser (); + } + } +} + +static bool isIdentifier1 (int c) +{ + return (bool) (isalpha (c) || c == '_'); +} + +static bool isIdentifier (int c) +{ + return (bool) (isalnum (c) || c == '_'); +} + +static bool isPodWord (const char *word) +{ + /* Perl POD words are three to eight characters in size. We use this + * fact to find (or not find) the right side of the word and then + * perform comparisons, if necessary, of POD words of that size. + */ + size_t len; + for (len = 0; len < 9; ++len) + if ('\0' == word[len] || ' ' == word[len] || '\t' == word[len]) + break; + switch (len) { + case 3: + return 0 == strncmp(word, "end", 3) + || 0 == strncmp(word, "for", 3) + || 0 == strncmp(word, "pod", 3); + case 4: + return 0 == strncmp(word, "back", 4) + || 0 == strncmp(word, "item", 4) + || 0 == strncmp(word, "over", 4); + case 5: + return 0 == strncmp(word, "begin", 5) + || 0 == strncmp(word, "head1", 5) + || 0 == strncmp(word, "head2", 5) + || 0 == strncmp(word, "head3", 5) + || 0 == strncmp(word, "head4", 5); + case 8: + return 0 == strncmp(word, "encoding", 8); + default: + return false; + } +} + +/* + * Perl subroutine declaration may look like one of the following: + * + * sub abc; + * sub abc :attr; + * sub abc (proto); + * sub abc (proto) :attr; + * + * Note that there may be more than one attribute. Attributes may + * have things in parentheses (they look like arguments). Anything + * inside of those parentheses goes. Prototypes may contain semi-colons. + * The matching end when we encounter (outside of any parentheses) either + * a semi-colon (that'd be a declaration) or an left curly brace + * (definition). + * + * This is pretty complicated parsing (plus we all know that only perl can + * parse Perl), so we are only promising best effort here. + * + * If we can't determine what this is (due to a file ending, for example), + * we will return false. + */ +static bool isSubroutineDeclaration (const unsigned char *cp) +{ + bool attr = false; + int nparens = 0; + + do { + for ( ; *cp; ++cp) { +SUB_DECL_SWITCH: + switch (*cp) { + case ':': + if (nparens) + break; + else if (true == attr) + return false; /* Invalid attribute name */ + else + attr = true; + break; + case '(': + ++nparens; + break; + case ')': + --nparens; + break; + case ' ': + case '\t': + break; + case ';': + if (!nparens) + return true; + case '{': + if (!nparens) + return false; + default: + if (attr) { + if (isIdentifier1(*cp)) { + cp++; + while (isIdentifier (*cp)) + cp++; + attr = false; + goto SUB_DECL_SWITCH; /* Instead of --cp; */ + } else { + return false; + } + } else if (nparens) { + break; + } else { + return false; + } + } + } + } while (NULL != (cp = readLineFromInputFile ())); + + return false; +} + +/* `end' points to the equal sign. Parse from right to left to get the + * identifier. Assume we're dealing with something of form \s*\w+\s*=> + */ +static void makeTagFromLeftSide (const char *begin, const char *end, + vString *name, vString *package) +{ + tagEntryInfo entry; + const char *b, *e; + if (! PerlKinds[KIND_PERL_CONSTANT].enabled) + return; + for (e = end - 1; e > begin && isspace(*e); --e) + ; + if (e < begin) + return; + for (b = e; b >= begin && isIdentifier(*b); --b) + ; + /* Identifier must be either beginning of line of have some whitespace + * on its left: + */ + if (b < begin || isspace(*b) || ',' == *b) + ++b; + else if (b != begin) + return; + if (e - b + 1 <= 0) + return; /* Left side of => has an invalid identifier. */ + vStringClear(name); + vStringNCatS(name, b, e - b + 1); + initTagEntry(&entry, vStringValue(name), KIND_PERL_CONSTANT); + makeTagEntry(&entry); + if (isXtagEnabled (XTAG_QUALIFIED_TAGS) && package && vStringLength(package)) { + vStringClear(name); + vStringCopy(name, package); + vStringNCatS(name, b, e - b + 1); + initTagEntry(&entry, vStringValue(name), KIND_PERL_CONSTANT); + markTagExtraBit (&entry, XTAG_QUALIFIED_TAGS); + makeTagEntry(&entry); + } +} + +static int makeTagForModule (const char *name, int role) +{ + tagEntryInfo entry; + initRefTagEntry(&entry, name, KIND_PERL_MODULE, role); + return makeTagEntry(&entry); +} + +enum const_state { CONST_STATE_NEXT_LINE, CONST_STATE_HIT_END }; + +/* Parse a single line, find as many NAME => VALUE pairs as we can and try + * to detect the end of the hashref. + */ +static enum const_state parseConstantsFromLine (const char *cp, + vString *name, vString *package) +{ + while (1) { + const size_t sz = strcspn(cp, "#}="); + switch (cp[sz]) { + case '=': + if ('>' == cp[sz + 1]) + makeTagFromLeftSide(cp, cp + sz, name, package); + break; + case '}': /* Assume this is the end of the hashref. */ + return CONST_STATE_HIT_END; + case '\0': /* End of the line. */ + case '#': /* Assume this is a comment and thus end of the line. */ + return CONST_STATE_NEXT_LINE; + } + cp += sz + 1; + } +} + +/* Parse constants declared via hash reference, like this: + * use constant { + * A => 1, + * B => 2, + * }; + * The approach we take is simplistic, but it covers the vast majority of + * cases well. There can be some false positives. + * Returns 0 if found the end of the hashref, -1 if we hit EOF + */ +static int parseConstantsFromHashRef (const unsigned char *cp, + vString *name, vString *package) +{ + while (1) { + enum const_state state = + parseConstantsFromLine((const char *) cp, name, package); + switch (state) { + case CONST_STATE_NEXT_LINE: + cp = readLineFromInputFile(); + if (cp) + break; + else + return -1; + case CONST_STATE_HIT_END: + return 0; + } + } +} + +static void parseQuotedWords(const unsigned char *cp, + vString *name, int moduleIndex) +{ + unsigned char end = *cp++; + switch (end) + { + case '[': end = ']'; break; + case '(': end = ')'; break; + case '{': end = '}'; break; + case '<': end = '>'; break; + } + + do { + while (*cp && *cp != end) + { + if (isspace(*cp)) + { + notifyFindingQuotedWord (moduleIndex, vStringValue(name)); + vStringClear(name); + cp++; + continue; + } + + if (*cp == '\\') + { + cp++; + if (*cp == '\0') + break; + } + + vStringPut(name, *cp); + cp++; + } + if (!vStringIsEmpty(name)) + notifyFindingQuotedWord (moduleIndex, vStringValue(name)); + + if (*cp == end) + break; + } while ((cp = readLineFromInputFile()) != NULL); +} + +/* Algorithm adapted from from GNU etags. + * Perl support by Bart Robinson + * Perl sub names: look for /^ [ \t\n]sub [ \t\n]+ [^ \t\n{ (]+/ + */ +static void findPerlTags (void) +{ + vString *name = vStringNew (); + vString *package = NULL; + bool skipPodDoc = false; + const unsigned char *line; + unsigned long podStart = 0UL; + + /* A pod area can be after __END__ marker. + * Perl parser itself doesn't need to parse the area + * after the marker. Parsing the area is needed only + * if Perl parser runs Pod parser as a guest. + * This variable is set true when it is needed. + */ + bool parse_only_pod_area = false; + + /* Core modules AutoLoader and SelfLoader support delayed compilation + * by allowing Perl code that follows __END__ and __DATA__ tokens, + * respectively. When we detect that one of these modules is used + * in the file, we continue processing even after we see the + * corresponding token that would usually terminate parsing of the + * file. + */ + enum { + RESPECT_END = (1 << 0), + RESPECT_DATA = (1 << 1), + } respect_token = RESPECT_END | RESPECT_DATA; + + while ((line = readLineFromInputFile ()) != NULL) + { + bool spaceRequired = false; + bool qualified = false; + const unsigned char *cp = line; + perlKind kind = KIND_PERL_NONE; + tagEntryInfo e; + + if (skipPodDoc) + { + if (strncmp ((const char*) line, "=cut", (size_t) 4) == 0) + { + skipPodDoc = false; + if (podStart != 0UL) + { + notifyLeavingPod (); + makePromise ("Pod", + podStart, 0, + getInputLineNumber(), 0, + getSourceLineNumber()); + podStart = 0UL; + } + } + continue; + } + else if (line [0] == '=') + { + skipPodDoc = isPodWord ((const char*)line + 1); + if (skipPodDoc) + { + podStart = getSourceLineNumber (); + notifyEnteringPod (); + } + continue; + } + else if (strcmp ((const char*) line, "__DATA__") == 0) + { + if (respect_token & RESPECT_DATA) + { + if (isXtagEnabled (XTAG_GUEST)) + parse_only_pod_area = true; + else + break; + } + else + continue; + } + else if (strcmp ((const char*) line, "__END__") == 0) + { + if (respect_token & RESPECT_END) + { + if (isXtagEnabled (XTAG_GUEST)) + parse_only_pod_area = true; + else + break; + } + else + continue; + } + else if (line [0] == '#') + continue; + + if (parse_only_pod_area) + continue; + + while (isspace (*cp)) + cp++; + + if (strncmp((const char*) cp, "sub", (size_t) 3) == 0) + { + TRACE("this looks like a sub\n"); + cp += 3; + kind = KIND_PERL_SUBROUTINE; + spaceRequired = true; + qualified = true; + } + else if (strncmp((const char*) cp, "use", (size_t) 3) == 0) + { + cp += 3; + if (!isspace(*cp)) + continue; + while (*cp && isspace (*cp)) + ++cp; + if (strncmp((const char*) cp, "AutoLoader", (size_t) 10) == 0) { + respect_token &= ~RESPECT_END; + makeTagForModule("AutoLoader", ROLE_PERL_MODULE_USED); + continue; + } + if (strncmp((const char*) cp, "SelfLoader", (size_t) 10) == 0) { + respect_token &= ~RESPECT_DATA; + makeTagForModule("SelfLoader", ROLE_PERL_MODULE_USED); + continue; + } + + vString *module = NULL; + while (isalnum(*cp) || *cp == ':' || *cp == '.') { + if (!module) + module = vStringNew(); + vStringPut(module, *cp); + ++cp; + } + if (!module) + continue; + + int q = makeTagForModule(vStringValue(module), ROLE_PERL_MODULE_USED); + bool isConstant = (strcmp(vStringValue(module), "constant") == 0); + vStringDelete(module); + if (!isConstant) + { + while (isspace(*cp)) + cp++; + if (strncmp("qw", (const char *)cp, 2) != 0) + continue; + cp += 2; + while (isspace(*cp)) + cp++; + if (*cp == '\0') + continue; + vStringClear (name); + + parseQuotedWords(cp, name, q); + vStringClear (name); + continue; + } + + /* Skip up to the first non-space character, skipping empty + * and comment lines. + */ + while (isspace(*cp)) + cp++; + while (!*cp || '#' == *cp) { + cp = readLineFromInputFile (); + if (!cp) + goto END_MAIN_WHILE; + while (isspace (*cp)) + cp++; + } + if ('{' == *cp) { + ++cp; + if (0 == parseConstantsFromHashRef(cp, name, package)) { + vStringClear(name); + continue; + } else + goto END_MAIN_WHILE; + } + kind = KIND_PERL_CONSTANT; + spaceRequired = false; + qualified = true; + } + else if (strncmp((const char*) cp, "no", (size_t) 2) == 0 && isspace(cp[2])) + { + cp += 3; + while (isspace (*cp)) + cp++; + vString *module = NULL; + while (isalnum(*cp) || *cp == ':' || *cp == '.') { + if (!module) + module = vStringNew(); + vStringPut(module, *cp); + ++cp; + } + if (module) { + makeTagForModule(vStringValue(module), ROLE_PERL_MODULE_UNUSED); + vStringDelete(module); + } + continue; + } + else if (strncmp((const char*) cp, "package", (size_t) 7) == 0 && + ('\0' == cp[7] || isspace(cp[7]))) + { + cp += 7; + while (isspace (*cp)) + cp++; + while (!*cp || '#' == *cp) { + cp = readLineFromInputFile (); + if (!cp) + goto END_MAIN_WHILE; + while (isspace (*cp)) + cp++; + } + if (package == NULL) + package = vStringNew (); + else + vStringClear (package); + const unsigned char *const first = cp; + while (*cp && (int) *cp != ';' && !isspace ((int) *cp)) + { + vStringPut (package, (int) *cp); + cp++; + } + vStringCatS (package, "::"); + + cp = first; /* Rewind */ + kind = KIND_PERL_PACKAGE; + spaceRequired = false; + qualified = true; + } + else if (strncmp((const char*) cp, "format", (size_t) 6) == 0) + { + cp += 6; + kind = KIND_PERL_FORMAT; + spaceRequired = true; + qualified = true; + } + else + { + if (isIdentifier1 (*cp)) + { + const unsigned char *p = cp; + while (isIdentifier (*p)) + ++p; + while (isspace (*p)) + ++p; + if ((int) *p == ':' && (int) *(p + 1) != ':') + kind = KIND_PERL_LABEL; + } + } + if (kind != KIND_PERL_NONE) + { + TRACE("cp0: %s\n", (const char *) cp); + if (spaceRequired && *cp && !isspace (*cp)) + continue; + + TRACE("cp1: %s\n", (const char *) cp); + while (isspace (*cp)) + cp++; + + while (!*cp || '#' == *cp) { /* Gobble up empty lines + and comments */ + cp = readLineFromInputFile (); + if (!cp) + goto END_MAIN_WHILE; + while (isspace (*cp)) + cp++; + } + + while (isIdentifier (*cp) || (KIND_PERL_PACKAGE == kind && ':' == *cp)) + { + vStringPut (name, (int) *cp); + cp++; + } + + if (KIND_PERL_FORMAT == kind && + vStringLength (name) == 0 && /* cp did not advance */ + '=' == *cp) + { + /* format's name is optional. If it's omitted, 'STDOUT' + is assumed. */ + vStringCatS (name, "STDOUT"); + } + + TRACE("name: %s\n", vStringValue (name)); + + if (0 == vStringLength(name)) { + vStringClear(name); + continue; + } + + if (KIND_PERL_SUBROUTINE == kind) + { + /* + * isSubroutineDeclaration() may consume several lines. So + * we record line positions. + */ + initTagEntry(&e, vStringValue(name), KIND_GHOST_INDEX); + + if (true == isSubroutineDeclaration(cp)) { + if (true == PerlKinds[KIND_PERL_SUBROUTINE_DECLARATION].enabled) { + kind = KIND_PERL_SUBROUTINE_DECLARATION; + } else { + vStringClear (name); + continue; + } + } else if (! PerlKinds[kind].enabled) { + continue; + } + + e.kindIndex = kind; + + makeTagEntry(&e); + + if (isXtagEnabled (XTAG_QUALIFIED_TAGS) && qualified && + package != NULL && vStringLength (package) > 0) + { + vString *const qualifiedName = vStringNew (); + vStringCopy (qualifiedName, package); + vStringCat (qualifiedName, name); + e.name = vStringValue(qualifiedName); + markTagExtraBit (&e, XTAG_QUALIFIED_TAGS); + makeTagEntry(&e); + vStringDelete (qualifiedName); + } + } else if (vStringLength (name) > 0) + { + makeSimpleTag (name, kind); + if (isXtagEnabled(XTAG_QUALIFIED_TAGS) && qualified && + KIND_PERL_PACKAGE != kind && + package != NULL && vStringLength (package) > 0) + { + tagEntryInfo fqe; + vString *const qualifiedName = vStringNew (); + vStringCopy (qualifiedName, package); + vStringCat (qualifiedName, name); + initTagEntry (&fqe, vStringValue (qualifiedName), kind); + markTagExtraBit (&fqe, XTAG_QUALIFIED_TAGS); + makeTagEntry (&fqe); + vStringDelete (qualifiedName); + } + } + vStringClear (name); + } + } + +END_MAIN_WHILE: + vStringDelete (name); + if (package != NULL) + vStringDelete (package); +} + +extern parserDefinition* PerlParser (void) +{ + static const char *const extensions [] = { "pl", "pm", "ph", "plx", "perl", NULL }; + static const char *const aliases [] = { + /* cperl is an Emacs' editing mode for Perl source code */ + "cperl", + NULL }; + static selectLanguage selectors [] = { selectByPickingPerlVersion, + NULL }; + parserDefinition* def = parserNew ("Perl"); + def->kindTable = PerlKinds; + def->kindCount = ARRAY_SIZE (PerlKinds); + def->extensions = extensions; + def->parser = findPerlTags; + def->selectLanguage = selectors; + def->aliases = aliases; + + /* Subparsers need this */ + def->useCork = CORK_QUEUE; + + return def; +} diff --git a/ctags/parsers/perl.h b/ctags/parsers/perl.h new file mode 100644 index 0000000000..a100d54dad --- /dev/null +++ b/ctags/parsers/perl.h @@ -0,0 +1,44 @@ +/* +* Copyright (c) 2019, Masatake YAMATO +* +* This source code is released for free distribution under the terms of the +* GNU General Public License version 2 or (at your option) any later version. +*/ +#ifndef CTAGS_PARSER_PERL_H +#define CTAGS_PARSER_PERL_H + +/* +* INCLUDE FILES +*/ +#include "general.h" /* must always come first */ + +#include "subparser.h" + +typedef struct sPerlSubparser perlSubparser; + +enum PerlModuleRoleType { + ROLE_PERL_MODULE_USED, + ROLE_PERL_MODULE_UNUSED, +}; + +enum PerlKindType { + KIND_PERL_NONE = -1, + KIND_PERL_CONSTANT, + KIND_PERL_FORMAT, + KIND_PERL_LABEL, + KIND_PERL_PACKAGE, + KIND_PERL_SUBROUTINE, + KIND_PERL_SUBROUTINE_DECLARATION, + KIND_PERL_MODULE, +}; + +struct sPerlSubparser { + subparser subparser; + void (* findingQuotedWordNotify) (perlSubparser *, + int moduleIndex, + const char *qwd); + void (* enteringPodNotify) (perlSubparser *); + void (* leavingPodNotify) (perlSubparser *); +}; + +#endif /* CTAGS_PARSER_PERL_H */ diff --git a/ctags/parsers/geany_php.c b/ctags/parsers/php.c similarity index 62% rename from ctags/parsers/geany_php.c rename to ctags/parsers/php.c index 5278a59e02..dd754913e0 100644 --- a/ctags/parsers/geany_php.c +++ b/ctags/parsers/php.c @@ -6,12 +6,17 @@ * * This module contains code for generating tags for the PHP scripting * language. +* +* The language reference: http://php.net/manual/en/langref.php */ /* * INCLUDE FILES */ #include "general.h" /* must always come first */ + +#include + #include "parse.h" #include "read.h" #include "vstring.h" @@ -19,10 +24,11 @@ #include "entry.h" #include "routines.h" #include "debug.h" +#include "objpool.h" - -#define SCOPE_SEPARATOR "::" - +#define isIdentChar(c) (isalnum (c) || (c) == '_' || (c) >= 0x80) +#define newToken() (objPoolGet (TokenPool)) +#define deleteToken(t) (objPoolPut (TokenPool, (t))) enum { KEYWORD_abstract, @@ -110,18 +116,35 @@ typedef enum { K_NAMESPACE, K_TRAIT, K_VARIABLE, + K_ALIAS, COUNT_KIND } phpKind; +#define NAMESPACE_SEPARATOR "\\" +static scopeSeparator PhpGenericSeparators [] = { + { K_NAMESPACE , NAMESPACE_SEPARATOR }, + { KIND_WILDCARD_INDEX, "::" }, +}; + static kindDefinition PhpKinds[COUNT_KIND] = { - { true, 'c', "class", "classes" }, - { true, 'd', "define", "constant definitions" }, - { true, 'f', "function", "functions" }, - { true, 'i', "interface", "interfaces" }, - { false, 'l', "local", "local variables" }, - { true, 'n', "namespace", "namespaces" }, - { true, 't', "trait", "traits" }, - { true, 'v', "variable", "variables" } + { true, 'c', "class", "classes", + ATTACH_SEPARATORS(PhpGenericSeparators) }, + { true, 'd', "define", "constant definitions", + ATTACH_SEPARATORS(PhpGenericSeparators)}, + { true, 'f', "function", "functions", + ATTACH_SEPARATORS(PhpGenericSeparators)}, + { true, 'i', "interface", "interfaces", + ATTACH_SEPARATORS(PhpGenericSeparators)}, + { false, 'l', "local", "local variables", + ATTACH_SEPARATORS(PhpGenericSeparators)}, + { true, 'n', "namespace", "namespaces", + ATTACH_SEPARATORS(PhpGenericSeparators)}, + { true, 't', "trait", "traits", + ATTACH_SEPARATORS(PhpGenericSeparators)}, + { true, 'v', "variable", "variables", + ATTACH_SEPARATORS(PhpGenericSeparators)}, + { true, 'a', "alias", "aliases", + ATTACH_SEPARATORS(PhpGenericSeparators)}, }; static const keywordTable PhpKeywordTable[] = { @@ -209,7 +232,9 @@ typedef enum eTokenType { TOKEN_OPEN_SQUARE, TOKEN_CLOSE_SQUARE, TOKEN_VARIABLE, - TOKEN_AMPERSAND + TOKEN_AMPERSAND, + TOKEN_BACKSLASH, + TOKEN_QMARK, } tokenType; typedef struct { @@ -220,12 +245,16 @@ typedef struct { unsigned long lineNumber; MIOPos filePosition; int parentKind; /* -1 if none */ + bool anonymous; /* true if token specifies + * an anonymous class */ } tokenInfo; static langType Lang_php; static langType Lang_zephir; static bool InPhp = false; /* whether we are between */ +/* whether the next token may be a keyword, e.g. not after "::" or "->" */ +static bool MayBeKeyword = true; /* current statement details */ static struct { @@ -234,8 +263,20 @@ static struct { } CurrentStatement; /* Current namespace */ -static vString *CurrentNamespace; +static vString *CurrentNamesapce; +/* Cache variable to build the tag's scope. It has no real meaning outside + * of initPhpEntry()'s scope. */ +static vString *FullScope; +/* The class name specified at "extends" keyword in the current class + * definition. Used to resolve "parent" in return type. */ +static vString *ParentClass; +static objPool *TokenPool = NULL; + +static const char *phpScopeSeparatorFor (int kind, int upperScopeKind) +{ + return scopeSeparatorFor (getInputLanguage(), kind, upperScopeKind); +} static const char *accessToString (const accessType access) { @@ -266,18 +307,15 @@ static const char *implToString (const implType impl) static void initPhpEntry (tagEntryInfo *const e, const tokenInfo *const token, const phpKind kind, const accessType access) { - static vString *fullScope = NULL; int parentKind = -1; - if (fullScope == NULL) - fullScope = vStringNew (); - else - vStringClear (fullScope); + vStringClear (FullScope); - if (vStringLength (CurrentNamespace) > 0) + if (vStringLength (CurrentNamesapce) > 0) { - vStringCopy (fullScope, CurrentNamespace); parentKind = K_NAMESPACE; + vStringCat (FullScope, CurrentNamesapce); + } initTagEntry (e, vStringValue (token->string), kind); @@ -290,31 +328,82 @@ static void initPhpEntry (tagEntryInfo *const e, const tokenInfo *const token, if (vStringLength (token->scope) > 0) { parentKind = token->parentKind; - if (vStringLength (fullScope) > 0) - vStringCatS (fullScope, SCOPE_SEPARATOR); - vStringCat (fullScope, token->scope); + + if (vStringLength (FullScope) > 0) + { + const char* sep; + + sep = phpScopeSeparatorFor (parentKind, + K_NAMESPACE); + vStringCatS (FullScope, sep); + } + vStringCat (FullScope, token->scope); } - if (vStringLength (fullScope) > 0) + if (vStringLength (FullScope) > 0) { Assert (parentKind >= 0); e->extensionFields.scopeKindIndex = parentKind; - e->extensionFields.scopeName = vStringValue (fullScope); + e->extensionFields.scopeName = vStringValue (FullScope); } + + if (token->anonymous) + markTagExtraBit (e, XTAG_ANONYMOUS); } -static void makeSimplePhpTag (const tokenInfo *const token, const phpKind kind, - const accessType access) +static void makePhpTagEntry (tagEntryInfo *const e) +{ + makeTagEntry (e); + makeQualifiedTagEntry (e); +} + +static void fillTypeRefField (tagEntryInfo *const e, + const vString *const rtype, const tokenInfo *const token) +{ + if ((vStringLength (rtype) == 4) + && (strcmp (vStringValue (rtype), "self") == 0) + && vStringLength (token->scope) > 0) + { + if (token->parentKind == -1) + e->extensionFields.typeRef [0] = "unknown"; + else + e->extensionFields.typeRef [0] = PhpKinds [token->parentKind].name; + e->extensionFields.typeRef [1] = vStringValue (token->scope); + } + else if ((vStringLength (rtype) == 6) + && (strcmp (vStringValue (rtype), "parent") == 0) + && (ParentClass && vStringLength (ParentClass) > 0)) + { + e->extensionFields.typeRef [0] = "class"; + e->extensionFields.typeRef [1] = vStringValue (ParentClass); + } + else + { + e->extensionFields.typeRef [0] = "unknown"; + e->extensionFields.typeRef [1] = vStringValue (rtype); + } +} + +static void makeTypedPhpTag (const tokenInfo *const token, const phpKind kind, + const accessType access, vString* typeName) { if (PhpKinds[kind].enabled) { tagEntryInfo e; initPhpEntry (&e, token, kind, access); - makeTagEntry (&e); + if (typeName) + fillTypeRefField (&e, typeName, token); + makePhpTagEntry (&e); } } +static void makeSimplePhpTag (const tokenInfo *const token, const phpKind kind, + const accessType access) +{ + makeTypedPhpTag (token, kind, access, NULL); +} + static void makeNamespacePhpTag (const tokenInfo *const token, const vString *const name) { if (PhpKinds[K_NAMESPACE].enabled) @@ -326,7 +415,7 @@ static void makeNamespacePhpTag (const tokenInfo *const token, const vString *co e.lineNumber = token->lineNumber; e.filePosition = token->filePosition; - makeTagEntry (&e); + makePhpTagEntry (&e); } } @@ -344,12 +433,13 @@ static void makeClassOrIfaceTag (const phpKind kind, const tokenInfo *const toke if (vStringLength (inheritance) > 0) e.extensionFields.inheritance = vStringValue (inheritance); - makeTagEntry (&e); + makePhpTagEntry (&e); } } static void makeFunctionTag (const tokenInfo *const token, const vString *const arglist, + const vString *const rtype, const accessType access, const implType impl) { if (PhpKinds[K_FUNCTION].enabled) @@ -362,28 +452,39 @@ static void makeFunctionTag (const tokenInfo *const token, e.extensionFields.implementation = implToString (impl); if (arglist) e.extensionFields.signature = vStringValue (arglist); + if (rtype) + fillTypeRefField (&e, rtype, token); - makeTagEntry (&e); + makePhpTagEntry (&e); } } -static tokenInfo *newToken (void) +static void *newPoolToken (void *createArg CTAGS_ATTR_UNUSED) +{ + tokenInfo *token = xMalloc (1, tokenInfo); + + token->string = vStringNew (); + token->scope = vStringNew (); + return token; +} + +static void clearPoolToken (void *data) { - tokenInfo *const token = xMalloc (1, tokenInfo); + tokenInfo *token = data; token->type = TOKEN_UNDEFINED; token->keyword = KEYWORD_NONE; - token->string = vStringNew (); - token->scope = vStringNew (); token->lineNumber = getInputLineNumber (); token->filePosition = getInputFilePosition (); token->parentKind = -1; - - return token; + token->anonymous = false; + vStringClear (token->string); + vStringClear (token->scope); } -static void deleteToken (tokenInfo *const token) +static void deletePoolToken (void *data) { + tokenInfo *token = data; vStringDelete (token->string); vStringDelete (token->scope); eFree (token); @@ -400,6 +501,7 @@ static void copyToken (tokenInfo *const dest, const tokenInfo *const src, dest->parentKind = src->parentKind; if (scope) vStringCopy(dest->scope, src->scope); + dest->anonymous = src->anonymous; } #if 0 @@ -469,16 +571,28 @@ static void printToken (const tokenInfo *const token) } #endif -static void addToScope (tokenInfo *const token, const vString *const extra) +static void addToScope (tokenInfo *const token, const vString *const extra, + int kindOfUpperScope) { if (vStringLength (token->scope) > 0) - vStringCatS (token->scope, SCOPE_SEPARATOR); - vStringCatS (token->scope, vStringValue (extra)); + { + const char* sep; + + sep = phpScopeSeparatorFor(token->parentKind, + kindOfUpperScope); + vStringCatS (token->scope, sep); + } + vStringCat (token->scope, extra); } -static bool isIdentChar (const int c) +static int skipToCharacter (const int c) { - return (isalnum (c) || c == '_' || c >= 0x80); + int d; + do + { + d = getcFromInputFile (); + } while (d != EOF && d != c); + return d; } static void parseString (vString *const string, const int delimiter) @@ -496,26 +610,69 @@ static void parseString (vString *const string, const int delimiter) } } -/* reads an HereDoc or a NowDoc (the part after the <<<). +/* Strips @indent_len characters from lines in @string to get the correct + * string value for an indented heredoc (PHP 7.3+). + * This doesn't handle invalid values specially and might yield surprising + * results with them, but it doesn't really matter as it's invalid anyway. */ +static void stripHeredocIndent (vString *const string, size_t indent_len) +{ + char *str = vStringValue (string); + size_t str_len = vStringLength (string); + char *p = str; + size_t new_len = str_len; + bool at_line_start = true; + + while (*p) + { + if (at_line_start) + { + size_t p_len; + size_t strip_len; + + p_len = str_len - (p - str); + strip_len = p_len < indent_len ? p_len : indent_len; + memmove (p, p + strip_len, p_len - strip_len); + p += strip_len; + new_len -= strip_len; + } + /* CRLF is already normalized as LF */ + at_line_start = (*p == '\r' || *p == '\n'); + p++; + } + vStringTruncate (string, new_len); +} + +/* reads a PHP >= 7.3 HereDoc or a NowDoc (the part after the <<<). * <<<[ \t]*(ID|'ID'|"ID") * ... - * ID;? + * [ \t]*ID[^:indent-char:];? * * note that: * 1) starting ID must be immediately followed by a newline; * 2) closing ID is the same as opening one; - * 3) closing ID must be immediately followed by a newline or a semicolon - * then a newline. + * 3) closing ID must not be immediately followed by an identifier character; + * 4) optional indentation of the closing ID is stripped from body lines, + * which lines must have the exact same prefix indentation. * - * Example of a *single* valid heredoc: + * This is slightly relaxed from PHP < 7.3, where the closing ID had to be the + * only thing on its line, with the only exception of a semicolon right after + * the ID. + * + * Example of a single valid heredoc: * <<< FOO * something * something else - * FOO this is not an end - * FOO; this isn't either - * FOO; # neither this is + * FOO_this is not an end * FOO; * # previous line was the end, but the semicolon wasn't required + * + * Another example using indentation and more code after the heredoc: + * << 0) + stripHeredocIndent (string, indent_len); + break; } /* if we are here it wasn't a delimiter, so put everything in the * string */ - vStringPut (string, (char) nl); vStringNCatS (string, delimiter, len); - if (extra != EOF) - vStringPut (string, (char) extra); } } while (c != EOF); @@ -628,16 +774,6 @@ static void parseIdentifier (vString *const string, const int firstChar) ungetcToInputFile (c); } -static keywordId analyzeToken (vString *const name, langType language) -{ - vString *keyword = vStringNew (); - keywordId result; - vStringCopyToLower (keyword, name); - result = lookupKeyword (vStringValue (keyword), language); - vStringDelete (keyword); - return result; -} - static bool isSpace (int c) { return (c == '\t' || c == ' ' || c == '\v' || @@ -652,7 +788,7 @@ static int skipWhitespaces (int c) } /* - * + * * This is ugly, but the whole "