From ad1debc0ff45b1ee2dac117e7a494e43523723aa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ji=C5=99=C3=AD=20Techet?= Date: Tue, 7 Dec 2021 01:17:33 +0100 Subject: [PATCH] Sync ctags parsers with big changes with Geany (#2991) * Remove the geany_ prefix from selected parsers and use the corresponding uctags version * Update tag mappings in tm_parser.c * Update PHP and zephir context separator to "\\" which is used in uctags now * Update ruby unit tests, the tests only differ in added "()" to all functions in generated tags. * Update nsis unit tests, the section name seems to be parsed correctly now. * Update go unit tests, the new go tags contain extra scope information. * Update php and zephir unit tests, PHP and zephir now use "\\" as the context separator instead of "::". For zephir, return values of methods are now parsed. * Update Objective-C unit tests, the new parser parses also method arguments. * Update lua unit tests, Scope information seems to be correctly parsed now. * Add uctags iniconf parser * Update tag mappings for the iniconf parser * Add a way to modify scope information if needed and use it to unify php separators This patch introduces a new function tm_parser_update_scope() which can be used to modify scope information when the provided value from ctags doesn't suit our needs. This patch uses it to replace php scope separator \ with :: so there's a single scope separator for this language. * Remove comment regarding ruby scope separator "." is used as the context separator in ruby now, the comment is probably just some historic artifact. * Add comment explaining why we modify PHP scope separators * Add a mechanism to enable/disable roles for certain kinds This patch allows us to enable/disable ctags roles for certain languages and kinds. Roles are currently disabled only for the Go kind 'p' for the reasons mentioned in the comment message in the patch. * Add a mechanism to enable/disable some ctags kinds This patch is similar to the patch allowing us to enable/disable roles, it just does this for ctags kinds. This can be useful when there is a bug in a ctags parser causing incorrect parsing when certain kind is enabled - which is what happens when parsing cython source files with the 'z' flag enabled. * Add LUA to the list of parsers returning full scope * Enable/disable kinds in ctags based on whether we use them or not Instead of enabling all kinds and then manually disabling them if there are problems with them, we can enable/disable them based on whether we actually use them - i.e., when they are mapped to something else than tm_tag_undef_t. --- ctags/Makefile.am | 33 +- ctags/parsers/basic.c | 206 +++ ctags/parsers/{geany_diff.c => diff.c} | 93 +- ctags/parsers/geany_basic.c | 252 --- ctags/parsers/geany_go.c | 831 ---------- ctags/parsers/geany_iniconf.c | 128 -- ctags/parsers/geany_lua.c | 120 -- ctags/parsers/geany_nsis.c | 142 -- ctags/parsers/geany_perl.c | 380 ----- ctags/parsers/geany_ruby.c | 564 ------- ctags/parsers/go.c | 1416 +++++++++++++++++ ctags/parsers/{geany_html.c => html.c} | 168 +- ctags/parsers/iniconf.c | 238 +++ ctags/parsers/iniconf.h | 41 + ctags/parsers/{geany_jscript.c => jscript.c} | 462 ++++-- ctags/parsers/lua.c | 257 +++ ctags/parsers/{geany_make.c => make.c} | 171 +- ctags/parsers/make.h | 34 + ctags/parsers/nsis.c | 394 +++++ ctags/parsers/{geany_objc.c => objc.c} | 333 +++- ctags/parsers/perl.c | 738 +++++++++ ctags/parsers/perl.h | 44 + ctags/parsers/{geany_php.c => php.c} | 806 +++++++--- ctags/parsers/ruby.c | 1108 +++++++++++++ ctags/parsers/{geany_rust.c => rust.c} | 95 +- ctags/parsers/{geany_sql.c => sql.c} | 1338 ++++++++++++---- src/tagmanager/tm_ctags.c | 34 +- src/tagmanager/tm_parser.c | 108 +- src/tagmanager/tm_parser.h | 6 + src/tagmanager/tm_parsers.h | 2 +- tests/ctags/bug1742588.rb.tags | 4 +- tests/ctags/bug2781264.rb.tags | 4 +- tests/ctags/geany.nsi.tags | 2 +- tests/ctags/namespaces.php.tags | 8 +- tests/ctags/namespaces2.php.tags | 8 +- tests/ctags/objectivec_implementation.mm.tags | 22 +- tests/ctags/objectivec_interface.mm.tags | 20 +- tests/ctags/objectivec_property.mm.tags | 2 +- tests/ctags/objectivec_protocol.mm.tags | 4 +- tests/ctags/return-hint.zep.tags | 4 +- tests/ctags/return-types.go.tags | 8 +- tests/ctags/ruby-block-call.rb.tags | 4 +- tests/ctags/ruby-doc.rb.tags | 8 +- .../ruby-scope-after-anonymous-class.rb.tags | 4 +- tests/ctags/ruby-sf-bug-364.rb.tags | 12 +- tests/ctags/simple.lua.tags | 4 +- tests/ctags/simple.rb.tags | 14 +- tests/ctags/strings.rb.tags | 24 +- tests/ctags/test.go.tags | 74 +- 49 files changed, 7447 insertions(+), 3325 deletions(-) create mode 100644 ctags/parsers/basic.c rename ctags/parsers/{geany_diff.c => diff.c} (53%) delete mode 100644 ctags/parsers/geany_basic.c delete mode 100644 ctags/parsers/geany_go.c delete mode 100644 ctags/parsers/geany_iniconf.c delete mode 100644 ctags/parsers/geany_lua.c delete mode 100644 ctags/parsers/geany_nsis.c delete mode 100644 ctags/parsers/geany_perl.c delete mode 100644 ctags/parsers/geany_ruby.c create mode 100644 ctags/parsers/go.c rename ctags/parsers/{geany_html.c => html.c} (72%) create mode 100644 ctags/parsers/iniconf.c create mode 100644 ctags/parsers/iniconf.h rename ctags/parsers/{geany_jscript.c => jscript.c} (84%) create mode 100644 ctags/parsers/lua.c rename ctags/parsers/{geany_make.c => make.c} (55%) create mode 100644 ctags/parsers/make.h create mode 100644 ctags/parsers/nsis.c rename ctags/parsers/{geany_objc.c => objc.c} (74%) create mode 100644 ctags/parsers/perl.c create mode 100644 ctags/parsers/perl.h rename ctags/parsers/{geany_php.c => php.c} (62%) create mode 100644 ctags/parsers/ruby.c rename ctags/parsers/{geany_rust.c => rust.c} (88%) rename ctags/parsers/{geany_sql.c => sql.c} (64%) diff --git a/ctags/Makefile.am b/ctags/Makefile.am index db0581a5b3..f9587b9b8d 100644 --- a/ctags/Makefile.am +++ b/ctags/Makefile.am @@ -15,43 +15,46 @@ parsers = \ parsers/abc.c \ parsers/asciidoc.c \ parsers/geany_asm.c \ - parsers/geany_basic.c \ + parsers/basic.c \ parsers/bibtex.c \ parsers/geany_c.c \ parsers/cobol.c \ - parsers/geany_iniconf.c \ + parsers/iniconf.c \ + parsers/iniconf.h \ parsers/css.c \ - parsers/geany_diff.c \ + parsers/diff.c \ parsers/geany_docbook.c \ parsers/erlang.c \ parsers/flex.c \ parsers/geany_fortran.c \ - parsers/geany_go.c \ + parsers/go.c \ parsers/haskell.c \ parsers/haxe.c \ - parsers/geany_html.c \ - parsers/geany_jscript.c \ + parsers/html.c \ + parsers/jscript.c \ parsers/json.c \ parsers/julia.c \ parsers/geany_lcpp.c \ parsers/geany_lcpp.h \ - parsers/geany_lua.c \ - parsers/geany_make.c \ + parsers/lua.c \ + parsers/make.c \ + parsers/make.h \ parsers/geany_markdown.c \ parsers/geany_matlab.c \ - parsers/geany_nsis.c \ - parsers/geany_objc.c \ + parsers/nsis.c \ + parsers/objc.c \ parsers/geany_pascal.c \ - parsers/geany_perl.c \ - parsers/geany_php.c \ + parsers/perl.c \ + parsers/perl.h \ + parsers/php.c \ parsers/powershell.c \ parsers/geany_python.c \ parsers/geany_r.c \ parsers/rst.c \ - parsers/geany_ruby.c \ - parsers/geany_rust.c \ + parsers/ruby.c \ + parsers/rust.c \ parsers/geany_sh.c \ - parsers/geany_sql.c \ + parsers/sql.c \ parsers/geany_tcl.c \ parsers/geany_tex.c \ parsers/txt2tags.c \ diff --git a/ctags/parsers/basic.c b/ctags/parsers/basic.c new file mode 100644 index 0000000000..4a6cffa687 --- /dev/null +++ b/ctags/parsers/basic.c @@ -0,0 +1,206 @@ +/* + * Copyright (c) 2000-2006, Darren Hiebert, Elias Pschernig + * + * This source code is released for free distribution under the terms of the + * GNU General Public License version 2 or (at your option) any later version. + * + * This module contains functions for generating tags for BlitzBasic + * (BlitzMax), PureBasic and FreeBasic language files. For now, this is kept + * quite simple - but feel free to ask for more things added any time - + * patches are of course most welcome. + */ + +/* + * INCLUDE FILES + */ +#include "general.h" /* must always come first */ + +#include + +#include "parse.h" +#include "read.h" +#include "routines.h" +#include "vstring.h" + +/* + * DATA DEFINITIONS + */ +typedef enum { + K_CONST, + K_FUNCTION, + K_LABEL, + K_TYPE, + K_VARIABLE, + K_ENUM +} BasicKind; + +typedef struct { + char const *token; + BasicKind kind; + int skip; +} KeyWord; + +static kindDefinition BasicKinds[] = { + {true, 'c', "constant", "constants"}, + {true, 'f', "function", "functions"}, + {true, 'l', "label", "labels"}, + {true, 't', "type", "types"}, + {true, 'v', "variable", "variables"}, + {true, 'g', "enum", "enumerations"} +}; + +static KeyWord blitzbasic_keywords[] = { + {"const", K_CONST, 0}, + {"global", K_VARIABLE, 0}, + {"dim", K_VARIABLE, 0}, + {"function", K_FUNCTION, 0}, + {"type", K_TYPE, 0}, + {NULL, 0, 0} +}; + +static KeyWord purebasic_keywords[] = { + {"newlist", K_VARIABLE, 0}, + {"global", K_VARIABLE, 0}, + {"dim", K_VARIABLE, 0}, + {"procedure", K_FUNCTION, 0}, + {"interface", K_TYPE, 0}, + {"structure", K_TYPE, 0}, + {NULL, 0, 0} +}; + +static KeyWord freebasic_keywords[] = { + {"const", K_CONST, 0}, + {"dim as", K_VARIABLE, 1}, + {"dim", K_VARIABLE, 0}, + {"common", K_VARIABLE, 0}, + {"function", K_FUNCTION, 0}, + {"sub", K_FUNCTION, 0}, + {"private sub", K_FUNCTION, 0}, + {"public sub", K_FUNCTION, 0}, + {"private function", K_FUNCTION, 0}, + {"public function", K_FUNCTION, 0}, + {"type", K_TYPE, 0}, + {"enum", K_ENUM, 0}, + {NULL, 0, 0} +}; + +/* + * FUNCTION DEFINITIONS + */ + +/* Match the name of a tag (function, variable, type, ...) starting at pos. */ +static char const *extract_name (char const *pos, vString * name) +{ + while (isspace (*pos)) + pos++; + vStringClear (name); + for (; *pos && !isspace (*pos) && *pos != '(' && *pos != ','; pos++) + vStringPut (name, *pos); + return pos; +} + +/* Match a keyword starting at p (case insensitive). */ +static int match_keyword (const char *p, KeyWord const *kw) +{ + vString *name; + size_t i; + int j; + for (i = 0; i < strlen (kw->token); i++) + { + if (tolower (p[i]) != kw->token[i]) + return 0; + } + name = vStringNew (); + p += i; + for (j = 0; j < 1 + kw->skip; j++) + { + p = extract_name (p, name); + } + makeSimpleTag (name, kw->kind); + vStringDelete (name); + return 1; +} + +/* Match a "label:" style label. */ +static void match_colon_label (char const *p) +{ + char const *end = p + strlen (p) - 1; + while (isspace (*end)) + end--; + if (*end == ':') + { + vString *name = vStringNew (); + vStringNCatS (name, p, end - p); + makeSimpleTag (name, K_LABEL); + vStringDelete (name); + } +} + +/* Match a ".label" style label. */ +static void match_dot_label (char const *p) +{ + if (*p == '.') + { + vString *name = vStringNew (); + extract_name (p + 1, name); + makeSimpleTag (name, K_LABEL); + vStringDelete (name); + } +} + +static void findBasicTags (void) +{ + const char *line; + const char *extension = fileExtension (getInputFileName ()); + KeyWord *keywords; + + if (strcmp (extension, "bb") == 0) + keywords = blitzbasic_keywords; + else if (strcmp (extension, "pb") == 0) + keywords = purebasic_keywords; + else + keywords = freebasic_keywords; + + while ((line = (const char *) readLineFromInputFile ()) != NULL) + { + const char *p = line; + KeyWord const *kw; + + while (isspace (*p)) + p++; + + /* Empty line? */ + if (!*p) + continue; + + /* REM comment? */ + if (strncasecmp (p, "REM", 3) == 0 && + (isspace (*(p + 3)) || *(p + 3) == '\0')) + continue; + + /* Single-quote comment? */ + if (*p == '\'') + continue; + + /* In Basic, keywords always are at the start of the line. */ + for (kw = keywords; kw->token; kw++) + if (match_keyword (p, kw)) break; + + /* Is it a label? */ + if (strcmp (extension, "bb") == 0) + match_dot_label (p); + else + match_colon_label (p); + } +} + +parserDefinition *BasicParser (void) +{ + static char const *extensions[] = { "bas", "bi", "bm", "bb", "pb", NULL }; + parserDefinition *def = parserNew ("Basic"); + def->kindTable = BasicKinds; + def->kindCount = ARRAY_SIZE (BasicKinds); + def->extensions = extensions; + def->parser = findBasicTags; + return def; +} diff --git a/ctags/parsers/geany_diff.c b/ctags/parsers/diff.c similarity index 53% rename from ctags/parsers/geany_diff.c rename to ctags/parsers/diff.c index 372fb10135..27e79982d1 100644 --- a/ctags/parsers/geany_diff.c +++ b/ctags/parsers/diff.c @@ -16,6 +16,7 @@ #include #include +#include "entry.h" #include "parse.h" #include "routines.h" #include "read.h" @@ -25,11 +26,17 @@ * DATA DEFINITIONS */ typedef enum { - K_FUNCTION + K_MODIFIED_FILE, + K_NEW_FILE, + K_DELETED_FILE, + K_HUNK, } diffKind; static kindDefinition DiffKinds [] = { - { true, 'f', "function", "functions"} + { true, 'm', "modifiedFile", "modified files"}, + { true, 'n', "newFile", "newly created files"}, + { true, 'd', "deletedFile", "deleted files"}, + { true, 'h', "hunk", "hunks"}, }; enum { @@ -42,6 +49,11 @@ static const char *DiffDelims[2] = { "+++ " }; +static const char *HunkDelim[2] = { + "@@ ", + " @@", +}; + /* * FUNCTION DEFINITIONS */ @@ -76,11 +88,58 @@ static const unsigned char *stripAbsolute (const unsigned char *filename) return tmp; } +static int parseHunk (const unsigned char* cp, vString *hunk, int scope_index) +{ + /* + example input: @@ -0,0 +1,134 @@ + expected output: -0,0 +1,134 + */ + + const char *next_delim; + const char *start, *end; + const char *c; + int i = CORK_NIL; + + cp += 3; + start = (const char*)cp; + + if (*start != '-') + return i; + + next_delim = strstr ((const char*)cp, HunkDelim[1]); + if ((next_delim == NULL) + || (! (start < next_delim ))) + return i; + end = next_delim; + if (! ( '0' <= *( end - 1 ) && *( end - 1 ) <= '9')) + return i; + for (c = start; c < end; c++) + if (*c == '\t') + return i; + vStringNCopyS (hunk, start, end - start); + i = makeSimpleTag (hunk, K_HUNK); + tagEntryInfo *e = getEntryInCorkQueue (i); + if (e && scope_index > CORK_NIL) + e->extensionFields.scopeIndex = scope_index; + return i; +} + +static void markTheLastTagAsDeletedFile (int scope_index) +{ + tagEntryInfo *e = getEntryInCorkQueue (scope_index); + + if (e) + e->kindIndex = K_DELETED_FILE; +} + static void findDiffTags (void) { vString *filename = vStringNew (); + vString *hunk = vStringNew (); const unsigned char *line, *tmp; int delim = DIFF_DELIM_MINUS; + diffKind kind; + int scope_index = CORK_NIL; while ((line = readLineFromInputFile ()) != NULL) { @@ -88,6 +147,7 @@ static void findDiffTags (void) if (strncmp ((const char*) cp, DiffDelims[delim], 4u) == 0) { + scope_index = CORK_NIL; cp += 4; if (isspace ((int) *cp)) continue; /* when original filename is /dev/null use the new one instead */ @@ -109,26 +169,45 @@ static void findDiffTags (void) tmp++; } - makeSimpleTag (filename, K_FUNCTION); + if (delim == DIFF_DELIM_PLUS) + kind = K_NEW_FILE; + else + kind = K_MODIFIED_FILE; + scope_index = makeSimpleTag (filename, kind); vStringClear (filename); } /* restore default delim */ delim = DIFF_DELIM_MINUS; } + else if ((scope_index > CORK_NIL) + && (strncmp ((const char*) cp, DiffDelims[1], 4u) == 0)) + { + cp += 4; + if (isspace ((int) *cp)) continue; + /* when modified filename is /dev/null, the original name is deleted. */ + if (strncmp ((const char*) cp, "/dev/null", 9u) == 0 && + (cp[9] == 0 || isspace (cp[9]))) + markTheLastTagAsDeletedFile (scope_index); + } + else if (strncmp ((const char*) cp, HunkDelim[0], 3u) == 0) + { + if (parseHunk (cp, hunk, scope_index) != CORK_NIL) + vStringClear (hunk); + } } + vStringDelete (hunk); vStringDelete (filename); } extern parserDefinition* DiffParser (void) { - static const char *const patterns [] = { "*.diff", "*.patch", NULL }; - static const char *const extensions [] = { "diff", NULL }; + static const char *const extensions [] = { "diff", "patch", NULL }; parserDefinition* const def = parserNew ("Diff"); - def->kindTable = DiffKinds; + def->kindTable = DiffKinds; def->kindCount = ARRAY_SIZE (DiffKinds); - def->patterns = patterns; def->extensions = extensions; def->parser = findDiffTags; + def->useCork = CORK_QUEUE; return def; } diff --git a/ctags/parsers/geany_basic.c b/ctags/parsers/geany_basic.c deleted file mode 100644 index b2036a29eb..0000000000 --- a/ctags/parsers/geany_basic.c +++ /dev/null @@ -1,252 +0,0 @@ -/* - * Copyright (c) 2000-2006, Darren Hiebert, Elias Pschernig - * - * This source code is released for free distribution under the terms of the - * GNU General Public License version 2 or (at your option) any later version. - * - * This module contains functions for generating tags for BlitzBasic - * (BlitzMax), PureBasic and FreeBasic language files. For now, this is kept - * quite simple - but feel free to ask for more things added any time - - * patches are of course most welcome. - */ - -/* - * INCLUDE FILES - */ -#include "general.h" /* must always come first */ - -#include - -#include "parse.h" -#include "read.h" -#include "routines.h" -#include "vstring.h" - -/* - * DATA DEFINITIONS - */ -typedef enum { - K_CONST, - K_FUNCTION, - K_LABEL, - K_TYPE, - K_VARIABLE, - K_ENUM -} BasicKind; - -typedef struct { - char const *token; - BasicKind kind; -} KeyWord; - -static kindDefinition BasicKinds[] = { - {true, 'c', "constant", "constants"}, - {true, 'f', "function", "functions"}, - {true, 'l', "label", "labels"}, - {true, 't', "type", "types"}, - {true, 'v', "variable", "variables"}, - {true, 'g', "enum", "enumerations"} -}; - -static KeyWord freebasic_keywords[] = { - {"dim", K_VARIABLE}, /* must always be the first */ - {"common", K_VARIABLE}, /* must always be the second */ - {"const", K_CONST}, /* must always be the third */ - {"function", K_FUNCTION}, - {"sub", K_FUNCTION}, - {"property", K_FUNCTION}, - {"constructor", K_FUNCTION}, - {"destructor", K_FUNCTION}, - {"private sub", K_FUNCTION}, - {"public sub", K_FUNCTION}, - {"private function", K_FUNCTION}, - {"public function", K_FUNCTION}, - {"type", K_TYPE}, - {"enum", K_ENUM}, - {NULL, 0} -}; - -/* - * FUNCTION DEFINITIONS - */ - -/* Match the name of a dim or const starting at pos. */ -static int extract_dim (char const *pos, vString * name, BasicKind kind) -{ - const char *old_pos = pos; - while (isspace (*pos)) - pos++; - - /* create tags only if there is some space between the keyword and the identifier */ - if (old_pos == pos) - return 0; - - vStringClear (name); - - if (strncasecmp (pos, "shared", 6) == 0) - pos += 6; /* skip keyword "shared" */ - - while (isspace (*pos)) - pos++; - - /* capture "dim as String str" */ - if (strncasecmp (pos, "as", 2) == 0) - { - pos += 2; /* skip keyword "as" */ - - while (isspace (*pos)) - pos++; - while (!isspace (*pos)) /* skip next part which is a type */ - pos++; - while (isspace (*pos)) - pos++; - /* now we are at the name */ - } - /* capture "dim as foo ptr bar" */ - if (strncasecmp (pos, "ptr", 3) == 0 && isspace(*(pos+4))) - { - pos += 3; /* skip keyword "ptr" */ - while (isspace (*pos)) - pos++; - } - /* capture "dim as string * 4096 chunk" */ - if (strncmp (pos, "*", 1) == 0) - { - pos += 1; /* skip "*" */ - while (isspace (*pos) || isdigit(*pos) || ispunct(*pos)) - pos++; - } - - for (; *pos && !isspace (*pos) && *pos != '(' && *pos != ',' && *pos != '='; pos++) - vStringPut (name, *pos); - makeSimpleTag (name, kind); - - /* if the line contains a ',', we have multiple declarations */ - while (*pos && strchr (pos, ',')) - { - /* skip all we don't need(e.g. "..., new_array(5), " we skip "(5)") */ - while (*pos != ',' && *pos != '\'') - pos++; - - if (*pos == '\'') - return 0; /* break if we are in a comment */ - - while (isspace (*pos) || *pos == ',') - pos++; - - if (*pos == '\'') - return 0; /* break if we are in a comment */ - - vStringClear (name); - for (; *pos && !isspace (*pos) && *pos != '(' && *pos != ',' && *pos != '='; pos++) - vStringPut (name, *pos); - makeSimpleTag (name, kind); - } - - vStringDelete (name); - return 1; -} - -/* Match the name of a tag (function, variable, type, ...) starting at pos. */ -static char const *extract_name (char const *pos, vString * name) -{ - while (isspace (*pos)) - pos++; - vStringClear (name); - for (; *pos && !isspace (*pos) && *pos != '(' && *pos != ',' && *pos != '='; pos++) - vStringPut (name, *pos); - return pos; -} - -/* Match a keyword starting at p (case insensitive). */ -static int match_keyword (const char *p, KeyWord const *kw) -{ - vString *name; - size_t i; - int j; - const char *old_p; - for (i = 0; i < strlen (kw->token); i++) - { - if (tolower (p[i]) != kw->token[i]) - return 0; - } - name = vStringNew (); - p += i; - if (kw == &freebasic_keywords[0] || - kw == &freebasic_keywords[1] || - kw == &freebasic_keywords[2]) - return extract_dim (p, name, kw->kind); /* extract_dim adds the found tag(s) */ - - old_p = p; - while (isspace (*p)) - p++; - - /* create tags only if there is some space between the keyword and the identifier */ - if (old_p == p) - { - vStringDelete (name); - return 0; - } - - for (j = 0; j < 1; j++) - { - p = extract_name (p, name); - } - makeSimpleTag (name, kw->kind); - vStringDelete (name); - return 1; -} - -/* Match a "label:" style label. */ -static void match_colon_label (char const *p) -{ - char const *end = p + strlen (p) - 1; - while (isspace (*end)) - end--; - if (*end == ':') - { - vString *name = vStringNew (); - vStringNCatS (name, p, end - p); - makeSimpleTag (name, K_LABEL); - vStringDelete (name); - } -} - -static void findBasicTags (void) -{ - const char *line; - KeyWord *keywords; - - keywords = freebasic_keywords; - - while ((line = (const char *) readLineFromInputFile ()) != NULL) - { - const char *p = line; - KeyWord const *kw; - - while (isspace (*p)) - p++; - - /* Empty line or comment? */ - if (!*p || *p == '\'') - continue; - - /* In Basic, keywords always are at the start of the line. */ - for (kw = keywords; kw->token; kw++) - if (match_keyword (p, kw)) break; - - /* Is it a label? */ - match_colon_label (p); - } -} - -parserDefinition *BasicParser (void) -{ - static char const *extensions[] = { "bas", "bi", "bb", "pb", NULL }; - parserDefinition *def = parserNew ("FreeBasic"); - def->kindTable = BasicKinds; - def->kindCount = ARRAY_SIZE (BasicKinds); - def->extensions = extensions; - def->parser = findBasicTags; - return def; -} diff --git a/ctags/parsers/geany_go.c b/ctags/parsers/geany_go.c deleted file mode 100644 index fc8921a768..0000000000 --- a/ctags/parsers/geany_go.c +++ /dev/null @@ -1,831 +0,0 @@ -/* -* This source code is released for free distribution under the terms of the -* GNU General Public License version 2 or (at your option) any later version. -* -* INCLUDE FILES -*/ -#include "general.h" /* must always come first */ - -#include "debug.h" -#include "entry.h" -#include "keyword.h" -#include "read.h" -#include "parse.h" -#include "routines.h" -#include "vstring.h" -#include "options.h" -#include "xtag.h" - -/* - * MACROS - */ -#define MAX_SIGNATURE_LENGTH 512 -#define isType(token,t) (bool) ((token)->type == (t)) -#define isKeyword(token,k) (bool) ((token)->keyword == (k)) - -/* - * DATA DECLARATIONS - */ - -enum eKeywordId { - KEYWORD_package, - KEYWORD_import, - KEYWORD_const, - KEYWORD_type, - KEYWORD_var, - KEYWORD_func, - KEYWORD_struct, - KEYWORD_interface, - KEYWORD_map, - KEYWORD_chan -}; -typedef int keywordId; /* to allow KEYWORD_NONE */ - -typedef enum eTokenType { - TOKEN_NONE = -1, - // Token not important for top-level Go parsing - TOKEN_OTHER, - TOKEN_KEYWORD, - TOKEN_IDENTIFIER, - TOKEN_STRING, - TOKEN_OPEN_PAREN, - TOKEN_CLOSE_PAREN, - TOKEN_OPEN_CURLY, - TOKEN_CLOSE_CURLY, - TOKEN_OPEN_SQUARE, - TOKEN_CLOSE_SQUARE, - TOKEN_SEMICOLON, - TOKEN_STAR, - TOKEN_LEFT_ARROW, - TOKEN_DOT, - TOKEN_COMMA, - TOKEN_EOF -} tokenType; - -typedef struct sTokenInfo { - tokenType type; - keywordId keyword; - vString *string; /* the name of the token */ - unsigned long lineNumber; /* line number of tag */ - MIOPos filePosition; /* file position of line containing name */ -} tokenInfo; - -/* -* DATA DEFINITIONS -*/ - -static int Lang_go; -static vString *scope; -static vString *signature = NULL; - -typedef enum { - GOTAG_UNDEFINED = -1, - GOTAG_PACKAGE, - GOTAG_FUNCTION, - GOTAG_CONST, - GOTAG_TYPE, - GOTAG_VAR, - GOTAG_STRUCT, - GOTAG_INTERFACE, - GOTAG_MEMBER -} goKind; - -static kindDefinition GoKinds[] = { - {true, 'p', "package", "packages"}, - {true, 'f', "func", "functions"}, - {true, 'c', "const", "constants"}, - {true, 't', "type", "types"}, - {true, 'v', "var", "variables"}, - {true, 's', "struct", "structs"}, - {true, 'i', "interface", "interfaces"}, - {true, 'm', "member", "struct members"} -}; - -static const keywordTable GoKeywordTable[] = { - {"package", KEYWORD_package}, - {"import", KEYWORD_import}, - {"const", KEYWORD_const}, - {"type", KEYWORD_type}, - {"var", KEYWORD_var}, - {"func", KEYWORD_func}, - {"struct", KEYWORD_struct}, - {"interface", KEYWORD_interface}, - {"map", KEYWORD_map}, - {"chan", KEYWORD_chan} -}; - -/* -* FUNCTION DEFINITIONS -*/ - -// XXX UTF-8 -static bool isStartIdentChar (const int c) -{ - return (bool) - (isalpha (c) || c == '_' || c > 128); -} - -static bool isIdentChar (const int c) -{ - return (bool) - (isStartIdentChar (c) || isdigit (c)); -} - -static void initialize (const langType language) -{ - Lang_go = language; -} - -static tokenInfo *newToken (void) -{ - tokenInfo *const token = xMalloc (1, tokenInfo); - token->type = TOKEN_NONE; - token->keyword = KEYWORD_NONE; - token->string = vStringNew (); - token->lineNumber = getInputLineNumber (); - token->filePosition = getInputFilePosition (); - return token; -} - -static tokenInfo *copyToken (tokenInfo *other) -{ - tokenInfo *const token = xMalloc (1, tokenInfo); - token->type = other->type; - token->keyword = other->keyword; - token->string = vStringNewCopy (other->string); - token->lineNumber = other->lineNumber; - token->filePosition = other->filePosition; - return token; -} - -static void deleteToken (tokenInfo * const token) -{ - if (token != NULL) - { - vStringDelete (token->string); - eFree (token); - } -} - -/* - * Parsing functions - */ - -static void parseString (vString *const string, const int delimiter) -{ - bool end = false; - while (!end) - { - int c = getcFromInputFile (); - if (c == EOF) - end = true; - else if (c == '\\' && delimiter != '`') - { - c = getcFromInputFile (); - if (c != '\'' && c != '\"') - vStringPut (string, '\\'); - vStringPut (string, c); - } - else if (c == delimiter) - end = true; - else - vStringPut (string, c); - } -} - -static void parseIdentifier (vString *const string, const int firstChar) -{ - int c = firstChar; - do - { - vStringPut (string, c); - c = getcFromInputFile (); - } while (isIdentChar (c)); - ungetcToInputFile (c); /* always unget, LF might add a semicolon */ -} - -static void readToken (tokenInfo *const token) -{ - int c; - static tokenType lastTokenType = TOKEN_NONE; - bool firstWhitespace = true; - bool whitespace; - - token->type = TOKEN_NONE; - token->keyword = KEYWORD_NONE; - vStringClear (token->string); - -getNextChar: - do - { - c = getcFromInputFile (); - token->lineNumber = getInputLineNumber (); - token->filePosition = getInputFilePosition (); - if (c == '\n' && (lastTokenType == TOKEN_IDENTIFIER || - lastTokenType == TOKEN_STRING || - lastTokenType == TOKEN_OTHER || - lastTokenType == TOKEN_CLOSE_PAREN || - lastTokenType == TOKEN_CLOSE_CURLY || - lastTokenType == TOKEN_CLOSE_SQUARE)) - { - c = ';'; // semicolon injection - } - whitespace = c == '\t' || c == ' ' || c == '\r' || c == '\n'; - if (signature && whitespace && firstWhitespace && vStringLength (signature) < MAX_SIGNATURE_LENGTH) - { - firstWhitespace = false; - vStringPut(signature, ' '); - } - } - while (whitespace); - - switch (c) - { - case EOF: - token->type = TOKEN_EOF; - break; - - case ';': - token->type = TOKEN_SEMICOLON; - break; - - case '/': - { - bool hasNewline = false; - int d = getcFromInputFile (); - switch (d) - { - case '/': - skipToCharacterInInputFile ('\n'); - /* Line comments start with the - * character sequence // and - * continue through the next - * newline. A line comment acts - * like a newline. */ - ungetcToInputFile ('\n'); - goto getNextChar; - case '*': - do - { - do - { - d = getcFromInputFile (); - if (d == '\n') - { - hasNewline = true; - } - } while (d != EOF && d != '*'); - - c = getcFromInputFile (); - if (c == '/') - break; - else - ungetcToInputFile (c); - } while (c != EOF && c != '\0'); - - ungetcToInputFile (hasNewline ? '\n' : ' '); - goto getNextChar; - default: - token->type = TOKEN_OTHER; - ungetcToInputFile (d); - break; - } - } - break; - - case '"': - case '\'': - case '`': - token->type = TOKEN_STRING; - parseString (token->string, c); - token->lineNumber = getInputLineNumber (); - token->filePosition = getInputFilePosition (); - break; - - case '<': - { - int d = getcFromInputFile (); - if (d == '-') - token->type = TOKEN_LEFT_ARROW; - else - { - ungetcToInputFile (d); - token->type = TOKEN_OTHER; - } - } - break; - - case '(': - token->type = TOKEN_OPEN_PAREN; - break; - - case ')': - token->type = TOKEN_CLOSE_PAREN; - break; - - case '{': - token->type = TOKEN_OPEN_CURLY; - break; - - case '}': - token->type = TOKEN_CLOSE_CURLY; - break; - - case '[': - token->type = TOKEN_OPEN_SQUARE; - break; - - case ']': - token->type = TOKEN_CLOSE_SQUARE; - break; - - case '*': - token->type = TOKEN_STAR; - break; - - case '.': - token->type = TOKEN_DOT; - break; - - case ',': - token->type = TOKEN_COMMA; - break; - - default: - if (isStartIdentChar (c)) - { - parseIdentifier (token->string, c); - token->lineNumber = getInputLineNumber (); - token->filePosition = getInputFilePosition (); - token->keyword = lookupKeyword (vStringValue (token->string), Lang_go); - if (isKeyword (token, KEYWORD_NONE)) - token->type = TOKEN_IDENTIFIER; - else - token->type = TOKEN_KEYWORD; - } - else - token->type = TOKEN_OTHER; - break; - } - - if (signature && vStringLength (signature) < MAX_SIGNATURE_LENGTH) - { - if (token->type == TOKEN_LEFT_ARROW) - vStringCatS(signature, "<-"); - else if (token->type == TOKEN_STRING) - { - // only struct member annotations can appear in function prototypes - // so only `` type strings are possible - vStringPut(signature, '`'); - vStringCat(signature, token->string); - vStringPut(signature, '`'); - } - else if (token->type == TOKEN_IDENTIFIER || token->type == TOKEN_KEYWORD) - vStringCat(signature, token->string); - else if (c != EOF) - vStringPut(signature, c); - } - - lastTokenType = token->type; -} - -static bool skipToMatchedNoRead (tokenInfo *const token) -{ - int nest_level = 0; - tokenType open_token = token->type; - tokenType close_token; - - switch (open_token) - { - case TOKEN_OPEN_PAREN: - close_token = TOKEN_CLOSE_PAREN; - break; - case TOKEN_OPEN_CURLY: - close_token = TOKEN_CLOSE_CURLY; - break; - case TOKEN_OPEN_SQUARE: - close_token = TOKEN_CLOSE_SQUARE; - break; - default: - return false; - } - - /* - * This routine will skip to a matching closing token. - * It will also handle nested tokens. - */ - nest_level++; - while (nest_level > 0 && !isType (token, TOKEN_EOF)) - { - readToken (token); - if (isType (token, open_token)) - nest_level++; - else if (isType (token, close_token)) - nest_level--; - } - - return true; -} - -static void skipToMatched (tokenInfo *const token) -{ - if (skipToMatchedNoRead (token)) - readToken (token); -} - -static bool skipType (tokenInfo *const token) -{ - // Type = TypeName | TypeLit | "(" Type ")" . - // Skips also function multiple return values "(" Type {"," Type} ")" - if (isType (token, TOKEN_OPEN_PAREN)) - { - skipToMatched (token); - return true; - } - - // TypeName = QualifiedIdent. - // QualifiedIdent = [ PackageName "." ] identifier . - // PackageName = identifier . - if (isType (token, TOKEN_IDENTIFIER)) - { - readToken (token); - if (isType (token, TOKEN_DOT)) - { - readToken (token); - if (isType (token, TOKEN_IDENTIFIER)) - readToken (token); - } - return true; - } - - // StructType = "struct" "{" { FieldDecl ";" } "}" - // InterfaceType = "interface" "{" { MethodSpec ";" } "}" . - if (isKeyword (token, KEYWORD_struct) || isKeyword (token, KEYWORD_interface)) - { - readToken (token); - // skip over "{}" - skipToMatched (token); - return true; - } - - // ArrayType = "[" ArrayLength "]" ElementType . - // SliceType = "[" "]" ElementType . - // ElementType = Type . - if (isType (token, TOKEN_OPEN_SQUARE)) - { - skipToMatched (token); - return skipType (token); - } - - // PointerType = "*" BaseType . - // BaseType = Type . - // ChannelType = ( "chan" [ "<-" ] | "<-" "chan" ) ElementType . - if (isType (token, TOKEN_STAR) || isKeyword (token, KEYWORD_chan) || isType (token, TOKEN_LEFT_ARROW)) - { - readToken (token); - return skipType (token); - } - - // MapType = "map" "[" KeyType "]" ElementType . - // KeyType = Type . - if (isKeyword (token, KEYWORD_map)) - { - readToken (token); - // skip over "[]" - skipToMatched (token); - return skipType (token); - } - - // FunctionType = "func" Signature . - // Signature = Parameters [ Result ] . - // Result = Parameters | Type . - // Parameters = "(" [ ParameterList [ "," ] ] ")" . - if (isKeyword (token, KEYWORD_func)) - { - readToken (token); - // Parameters, skip over "()" - skipToMatched (token); - // Result is parameters or type or nothing. skipType treats anything - // surrounded by parentheses as a type, and does nothing if what - // follows is not a type. - return skipType (token); - } - - return false; -} - -static void makeTag (tokenInfo *const token, const goKind kind, - tokenInfo *const parent_token, const goKind parent_kind, - const char *argList, const char *varType) -{ - const char *const name = vStringValue (token->string); - - tagEntryInfo e; - initTagEntry (&e, name, kind); - - if (!GoKinds [kind].enabled) - return; - - e.lineNumber = token->lineNumber; - e.filePosition = token->filePosition; - if (argList) - e.extensionFields.signature = argList; - if (varType) - e.extensionFields.typeRef[1] = varType; - - if (parent_kind != GOTAG_UNDEFINED && parent_token != NULL) - { - e.extensionFields.scopeKindIndex = parent_kind; - e.extensionFields.scopeName = vStringValue (parent_token->string); - } - makeTagEntry (&e); - - if (scope && isXtagEnabled(XTAG_QUALIFIED_TAGS)) - { - vString *qualifiedName = vStringNew (); - vStringCopy (qualifiedName, scope); - vStringCatS (qualifiedName, "."); - vStringCat (qualifiedName, token->string); - e.name = vStringValue (qualifiedName); - makeTagEntry (&e); - vStringDelete (qualifiedName); - } -} - -static void parsePackage (tokenInfo *const token) -{ - readToken (token); - if (isType (token, TOKEN_IDENTIFIER)) - { - makeTag (token, GOTAG_PACKAGE, NULL, GOTAG_UNDEFINED, NULL, NULL); - if (!scope && isXtagEnabled(XTAG_QUALIFIED_TAGS)) - { - scope = vStringNew (); - vStringCopy (scope, token->string); - } - } -} - -static void parseFunctionOrMethod (tokenInfo *const token) -{ - // FunctionDecl = "func" identifier Signature [ Body ] . - // Body = Block. - // - // MethodDecl = "func" Receiver MethodName Signature [ Body ] . - // Receiver = "(" [ identifier ] [ "*" ] BaseTypeName ")" . - // BaseTypeName = identifier . - - // Skip over receiver. - readToken (token); - if (isType (token, TOKEN_OPEN_PAREN)) - skipToMatched (token); - - if (isType (token, TOKEN_IDENTIFIER)) - { - vString *argList; - tokenInfo *functionToken = copyToken (token); - - // Start recording signature - signature = vStringNew (); - - // Skip over parameters. - readToken (token); - skipToMatchedNoRead (token); - - vStringStripLeading (signature); - vStringStripTrailing (signature); - argList = signature; - signature = vStringNew (); - - readToken (token); - - // Skip over result. - skipType (token); - - // Remove the extra { we have just read - vStringStripTrailing (signature); - vStringChop (signature); - - vStringStripLeading (signature); - vStringStripTrailing (signature); - makeTag (functionToken, GOTAG_FUNCTION, NULL, GOTAG_UNDEFINED, argList->buffer, signature->buffer); - deleteToken (functionToken); - vStringDelete(signature); - vStringDelete(argList); - - // Stop recording signature - signature = NULL; - - // Skip over function body. - if (isType (token, TOKEN_OPEN_CURLY)) - skipToMatched (token); - } -} - -static void parseStructMembers (tokenInfo *const token, tokenInfo *const parent_token) -{ - // StructType = "struct" "{" { FieldDecl ";" } "}" . - // FieldDecl = (IdentifierList Type | AnonymousField) [ Tag ] . - // AnonymousField = [ "*" ] TypeName . - // Tag = string_lit . - - readToken (token); - if (!isType (token, TOKEN_OPEN_CURLY)) - return; - - readToken (token); - while (!isType (token, TOKEN_EOF) && !isType (token, TOKEN_CLOSE_CURLY)) - { - tokenInfo *memberCandidate = NULL; - bool first = true; - - while (!isType (token, TOKEN_EOF)) - { - if (isType (token, TOKEN_IDENTIFIER)) - { - if (first) - { - // could be anonymous field like in 'struct {int}' - we don't know yet - memberCandidate = copyToken (token); - first = false; - } - else - { - if (memberCandidate) - { - // if we are here, there was a comma and memberCandidate isn't an anonymous field - makeTag (memberCandidate, GOTAG_MEMBER, parent_token, GOTAG_STRUCT, NULL, NULL); - deleteToken (memberCandidate); - memberCandidate = NULL; - } - makeTag (token, GOTAG_MEMBER, parent_token, GOTAG_STRUCT, NULL, NULL); - } - readToken (token); - } - if (!isType (token, TOKEN_COMMA)) - break; - readToken (token); - } - - // in the case of an anonymous field, we already read part of the - // type into memberCandidate and skipType() should return false so no tag should - // be generated in this case. - if (skipType (token) && memberCandidate) - makeTag (memberCandidate, GOTAG_MEMBER, parent_token, GOTAG_STRUCT, NULL, NULL); - - if (memberCandidate) - deleteToken (memberCandidate); - - while (!isType (token, TOKEN_SEMICOLON) && !isType (token, TOKEN_CLOSE_CURLY) - && !isType (token, TOKEN_EOF)) - { - readToken (token); - skipToMatched (token); - } - - if (!isType (token, TOKEN_CLOSE_CURLY)) - { - // we are at TOKEN_SEMICOLON - readToken (token); - } - } -} - -static void parseConstTypeVar (tokenInfo *const token, goKind kind) -{ - // ConstDecl = "const" ( ConstSpec | "(" { ConstSpec ";" } ")" ) . - // ConstSpec = IdentifierList [ [ Type ] "=" ExpressionList ] . - // IdentifierList = identifier { "," identifier } . - // ExpressionList = Expression { "," Expression } . - // TypeDecl = "type" ( TypeSpec | "(" { TypeSpec ";" } ")" ) . - // TypeSpec = identifier Type . - // VarDecl = "var" ( VarSpec | "(" { VarSpec ";" } ")" ) . - // VarSpec = IdentifierList ( Type [ "=" ExpressionList ] | "=" ExpressionList ) . - bool usesParens = false; - - readToken (token); - - if (isType (token, TOKEN_OPEN_PAREN)) - { - usesParens = true; - readToken (token); - } - - do - { - tokenInfo *typeToken = NULL; - - while (!isType (token, TOKEN_EOF)) - { - if (isType (token, TOKEN_IDENTIFIER)) - { - if (kind == GOTAG_TYPE) - { - typeToken = copyToken (token); - readToken (token); - if (isKeyword (token, KEYWORD_struct)) - makeTag (typeToken, GOTAG_STRUCT, NULL, GOTAG_UNDEFINED, NULL, NULL); - else if (isKeyword (token, KEYWORD_interface)) - makeTag (typeToken, GOTAG_INTERFACE, NULL, GOTAG_UNDEFINED, NULL, NULL); - else - makeTag (typeToken, kind, NULL, GOTAG_UNDEFINED, NULL, NULL); - break; - } - else - makeTag (token, kind, NULL, GOTAG_UNDEFINED, NULL, NULL); - readToken (token); - } - if (!isType (token, TOKEN_COMMA)) - break; - readToken (token); - } - - if (typeToken) - { - if (isKeyword (token, KEYWORD_struct)) - parseStructMembers (token, typeToken); - else - skipType (token); - deleteToken (typeToken); - } - else - skipType (token); - - while (!isType (token, TOKEN_SEMICOLON) && !isType (token, TOKEN_CLOSE_PAREN) - && !isType (token, TOKEN_EOF)) - { - readToken (token); - skipToMatched (token); - } - - if (usesParens && !isType (token, TOKEN_CLOSE_PAREN)) - { - // we are at TOKEN_SEMICOLON - readToken (token); - } - } - while (!isType (token, TOKEN_EOF) && - usesParens && !isType (token, TOKEN_CLOSE_PAREN)); -} - -static void parseGoFile (tokenInfo *const token) -{ - do - { - readToken (token); - - if (isType (token, TOKEN_KEYWORD)) - { - switch (token->keyword) - { - case KEYWORD_package: - parsePackage (token); - break; - case KEYWORD_func: - parseFunctionOrMethod (token); - break; - case KEYWORD_const: - parseConstTypeVar (token, GOTAG_CONST); - break; - case KEYWORD_type: - parseConstTypeVar (token, GOTAG_TYPE); - break; - case KEYWORD_var: - parseConstTypeVar (token, GOTAG_VAR); - break; - default: - break; - } - } - else if (isType (token, TOKEN_OPEN_PAREN) || isType (token, TOKEN_OPEN_CURLY) || - isType (token, TOKEN_OPEN_SQUARE)) - { - skipToMatched (token); - } - } while (token->type != TOKEN_EOF); -} - -static void findGoTags (void) -{ - tokenInfo *const token = newToken (); - - parseGoFile (token); - - deleteToken (token); - vStringDelete (scope); - scope = NULL; -} - -extern parserDefinition *GoParser (void) -{ - static const char *const extensions[] = { "go", NULL }; - parserDefinition *def = parserNew ("Go"); - def->kindTable = GoKinds; - def->kindCount = ARRAY_SIZE (GoKinds); - def->extensions = extensions; - def->parser = findGoTags; - def->initialize = initialize; - def->keywordTable = GoKeywordTable; - def->keywordCount = ARRAY_SIZE (GoKeywordTable); - return def; -} diff --git a/ctags/parsers/geany_iniconf.c b/ctags/parsers/geany_iniconf.c deleted file mode 100644 index 0f266df7c8..0000000000 --- a/ctags/parsers/geany_iniconf.c +++ /dev/null @@ -1,128 +0,0 @@ -/* -* -* Copyright (c) 2000-2001, Darren Hiebert -* -* This source code is released for free distribution under the terms of the -* GNU General Public License. -* -* This module contains functions for generating tags for config files. -*/ - -/* -* INCLUDE FILES -*/ -#include "general.h" /* must always come first */ - -#include - -#include "parse.h" -#include "read.h" -#include "vstring.h" -#include "routines.h" -#include "entry.h" - -/* -* DATA DEFINITIONS -*/ -typedef enum { - K_SECTION, - K_KEY -} confKind; - -static kindDefinition ConfKinds [] = { - { true, 'n', "namespace", "sections"}, - { true, 'm', "macro", "keys"} -}; - -/* -* FUNCTION DEFINITIONS -*/ - -static bool isIdentifier (int c) -{ - /* allow whitespace within keys and sections */ - return (bool)(isalnum (c) || isspace (c) || c == '_'); -} - -static void findConfTags (void) -{ - vString *name = vStringNew (); - vString *scope = vStringNew (); - const unsigned char *line; - - while ((line = readLineFromInputFile ()) != NULL) - { - const unsigned char* cp = line; - bool possible = true; - - if (isspace ((int) *cp) || *cp == '#' || (*cp != '\0' && *cp == '/' && *(cp+1) == '/')) - continue; - - /* look for a section */ - if (*cp != '\0' && *cp == '[') - { - ++cp; - while (*cp != '\0' && *cp != ']') - { - vStringPut (name, (int) *cp); - ++cp; - } - makeSimpleTag (name, K_SECTION); - /* remember section name */ - vStringCopy (scope, name); - vStringClear (name); - continue; - } - - while (*cp != '\0') - { - /* We look for any sequence of identifier characters following a white space */ - if (possible && isIdentifier ((int) *cp)) - { - while (isIdentifier ((int) *cp)) - { - vStringPut (name, (int) *cp); - ++cp; - } - vStringStripTrailing (name); - while (isspace ((int) *cp)) - ++cp; - if (*cp == '=') - { - tagEntryInfo e; - initTagEntry (&e, vStringValue (name), K_KEY); - - if (vStringLength (scope) > 0) - { - e.extensionFields.scopeKindIndex = K_SECTION; - e.extensionFields.scopeName = vStringValue(scope); - } - makeTagEntry (&e); - } - vStringClear (name); - } - else if (isspace ((int) *cp)) - possible = true; - else - possible = false; - - if (*cp != '\0') - ++cp; - } - } - vStringDelete (name); - vStringDelete (scope); -} - -extern parserDefinition* ConfParser (void) -{ - static const char *const patterns [] = { "*.ini", "*.conf", NULL }; - static const char *const extensions [] = { "conf", NULL }; - parserDefinition* const def = parserNew ("Conf"); - def->kindTable = ConfKinds; - def->kindCount = ARRAY_SIZE (ConfKinds); - def->patterns = patterns; - def->extensions = extensions; - def->parser = findConfTags; - return def; -} diff --git a/ctags/parsers/geany_lua.c b/ctags/parsers/geany_lua.c deleted file mode 100644 index 608b71dfa0..0000000000 --- a/ctags/parsers/geany_lua.c +++ /dev/null @@ -1,120 +0,0 @@ -/* -* Copyright (c) 2000-2001, Max Ischenko . -* -* This source code is released for free distribution under the terms of the -* GNU General Public License version 2 or (at your option) any later version. -* -* This module contains functions for generating tags for Lua language. -*/ - -/* -* INCLUDE FILES -*/ -#include "general.h" /* must always come first */ - -#include - -#include "parse.h" -#include "read.h" -#include "routines.h" -#include "vstring.h" - -/* -* DATA DEFINITIONS -*/ -typedef enum { - K_FUNCTION -} luaKind; - -static kindDefinition LuaKinds [] = { - { true, 'f', "function", "functions" } -}; - -/* -* FUNCTION DEFINITIONS -*/ - -/* - * Helper function. - * Returns 1 if line looks like a line of Lua code. - * - * TODO: Recognize UNIX bang notation. - * (Lua treat first line as a comment if it starts with #!) - * - */ -static bool is_a_code_line (const unsigned char *line) -{ - bool result; - const unsigned char *p = line; - while (isspace ((int) *p)) - p++; - if (p [0] == '\0') - result = false; - else if (p [0] == '-' && p [1] == '-') - result = false; - else - result = true; - return result; -} - -static void extract_name (const char *begin, const char *end, vString *name) -{ - if (begin != NULL && end != NULL && begin < end) - { - const char *cp; - - while (isspace ((int) *begin)) - begin++; - while (isspace ((int) *end)) - end--; - if (begin < end) - { - for (cp = begin ; cp != end; cp++) - vStringPut (name, (int) *cp); - - makeSimpleTag (name, K_FUNCTION); - vStringClear (name); - } - } -} - -static void findLuaTags (void) -{ - vString *name = vStringNew (); - const unsigned char *line; - - while ((line = readLineFromInputFile ()) != NULL) - { - const char *p, *q; - - if (! is_a_code_line (line)) - continue; - - p = (const char*) strstr ((const char*) line, "function"); - if (p == NULL) - continue; - - q = strchr ((const char*) line, '='); - - if (q == NULL) { - p = p + 9; /* skip the `function' word */ - q = strchr ((const char*) p, '('); - extract_name (p, q, name); - } else if (*(q+1) != '=') { /* ignore `if type(v) == "function" then ...' */ - p = (const char*) &line[0]; - extract_name (p, q, name); - } - } - vStringDelete (name); -} - -extern parserDefinition* LuaParser (void) -{ - static const char* const extensions [] = { "lua", NULL }; - parserDefinition* def = parserNew ("Lua"); - def->kindTable = LuaKinds; - def->kindCount = ARRAY_SIZE (LuaKinds); - def->extensions = extensions; - def->parser = findLuaTags; - return def; -} diff --git a/ctags/parsers/geany_nsis.c b/ctags/parsers/geany_nsis.c deleted file mode 100644 index a06a81508b..0000000000 --- a/ctags/parsers/geany_nsis.c +++ /dev/null @@ -1,142 +0,0 @@ -/* -* Copyright (c) 2000-2002, Darren Hiebert -* Copyright (c) 2009-2011, Enrico Tröger -* -* This source code is released for free distribution under the terms of the -* GNU General Public License. -* -* This module contains functions for generating tags for NSIS scripts (based on sh.c). -*/ - -/* -* INCLUDE FILES -*/ -#include "general.h" /* must always come first */ - -#include - -#include "parse.h" -#include "read.h" -#include "vstring.h" -#include "routines.h" - -/* -* DATA DEFINITIONS -*/ -typedef enum { - K_SECTION, - K_FUNCTION, - K_VARIABLE -} NsisKind; - -static kindDefinition NsisKinds [] = { - { true, 'n', "namespace", "sections"}, - { true, 'f', "function", "functions"}, - { true, 'v', "variable", "variables"} -}; - -/* -* FUNCTION DEFINITIONS -*/ - -static void findNsisTags (void) -{ - vString *name = vStringNew (); - const unsigned char *line; - - while ((line = readLineFromInputFile ()) != NULL) - { - const unsigned char* cp = line; - - while (isspace (*cp)) - cp++; - - if (*cp == '#' || *cp == ';') - continue; - - /* functions */ - if (strncasecmp ((const char*) cp, "function", (size_t) 8) == 0 && - isspace ((int) cp [8])) - { - cp += 8; - /* skip all whitespace */ - while (isspace ((int) *cp)) - ++cp; - while (isalnum ((int) *cp) || *cp == '_' || *cp == '-' || *cp == '.' || *cp == '!') - { - vStringPut (name, (int) *cp); - ++cp; - } - makeSimpleTag (name, K_FUNCTION); - vStringClear (name); - } - /* variables */ - else if (strncasecmp ((const char*) cp, "var", (size_t) 3) == 0 && - isspace ((int) cp [3])) - { - cp += 3; - /* skip all whitespace */ - while (isspace ((int) *cp)) - ++cp; - /* skip any flags */ - while (*cp == '/') - { - ++cp; - while (! isspace ((int) *cp)) - ++cp; - while (isspace ((int) *cp)) - ++cp; - } - while (isalnum ((int) *cp) || *cp == '_') - { - vStringPut (name, (int) *cp); - ++cp; - } - makeSimpleTag (name, K_VARIABLE); - vStringClear (name); - } - /* sections */ - else if (strncasecmp ((const char*) cp, "section", (size_t) 7) == 0 && - isspace ((int) cp [7])) - { - bool in_quotes = false; - cp += 7; - /* skip all whitespace */ - while (isspace ((int) *cp)) - ++cp; - while (isalnum ((int) *cp) || isspace ((int) *cp) || - *cp == '_' || *cp == '-' || *cp == '.' || *cp == '!' || *cp == '"') - { - if (*cp == '"') - { - if (in_quotes) - break; - else - { - in_quotes = true; - ++cp; - continue; - } - } - vStringPut (name, (int) *cp); - ++cp; - } - makeSimpleTag (name, K_SECTION); - vStringClear (name); - } - } - vStringDelete (name); -} - -extern parserDefinition* NsisParser (void) -{ - static const char *const extensions [] = { - "nsi", "nsh", NULL - }; - parserDefinition* def = parserNew ("NSIS"); - def->kindTable = NsisKinds; - def->kindCount = ARRAY_SIZE (NsisKinds); - def->extensions = extensions; - def->parser = findNsisTags; - return def; -} diff --git a/ctags/parsers/geany_perl.c b/ctags/parsers/geany_perl.c deleted file mode 100644 index 5e6c8e42aa..0000000000 --- a/ctags/parsers/geany_perl.c +++ /dev/null @@ -1,380 +0,0 @@ -/* -* Copyright (c) 2000-2003, Darren Hiebert -* -* This source code is released for free distribution under the terms of the -* GNU General Public License version 2 or (at your option) any later version. -* -* This module contains functions for generating tags for PERL language -* files. -*/ - -/* -* INCLUDE FILES -*/ -#include "general.h" /* must always come first */ -#include "debug.h" - -#include - -#include "entry.h" -#include "promise.h" -#include "options.h" -#include "read.h" -#include "routines.h" -#include "vstring.h" -#include "xtag.h" - -#define TRACE_PERL_C 0 -#define TRACE if (TRACE_PERL_C) printf("perl.c:%d: ", __LINE__), printf - -/* -* DATA DEFINITIONS -*/ -typedef enum { - K_NONE = -1, - K_CONSTANT, - K_FORMAT, - K_LABEL, - K_PACKAGE, - K_SUBROUTINE, - K_SUBROUTINE_DECLARATION -} perlKind; - -static kindDefinition PerlKinds [] = { - { true, 'c', "constant", "constants" }, - { true, 'f', "format", "formats" }, - { true, 'l', "label", "labels" }, - { true, 'p', "package", "packages" }, - { true, 's', "subroutine", "subroutines" }, - { false, 'd', "subroutineDeclaration", "subroutine declarations" }, -}; - -/* -* FUNCTION DEFINITIONS -*/ - -static bool isIdentifier1 (int c) -{ - return (bool) (isalpha (c) || c == '_'); -} - -static bool isIdentifier (int c) -{ - return (bool) (isalnum (c) || c == '_'); -} - -static bool isPodWord (const char *word) -{ - bool result = false; - if (isalpha (*word)) - { - const char *const pods [] = { - "head1", "head2", "head3", "head4", "over", "item", "back", - "pod", "begin", "end", "for" - }; - const size_t count = ARRAY_SIZE (pods); - const char *white = strpbrk (word, " \t"); - const size_t len = (white!=NULL) ? (size_t)(white-word) : strlen (word); - char *const id = (char*) eMalloc (len + 1); - size_t i; - strncpy (id, word, len); - id [len] = '\0'; - for (i = 0 ; i < count && ! result ; ++i) - { - if (strcmp (id, pods [i]) == 0) - result = true; - } - eFree (id); - } - return result; -} - -/* - * Perl subroutine declaration may look like one of the following: - * - * sub abc; - * sub abc :attr; - * sub abc (proto); - * sub abc (proto) :attr; - * - * Note that there may be more than one attribute. Attributes may - * have things in parentheses (they look like arguments). Anything - * inside of those parentheses goes. Prototypes may contain semi-colons. - * The matching end when we encounter (outside of any parentheses) either - * a semi-colon (that'd be a declaration) or an left curly brace - * (definition). - * - * This is pretty complicated parsing (plus we all know that only perl can - * parse Perl), so we are only promising best effort here. - * - * If we can't determine what this is (due to a file ending, for example), - * we will return false. - */ -static bool isSubroutineDeclaration (const unsigned char *cp) -{ - bool attr = false; - int nparens = 0; - - do { - for ( ; *cp; ++cp) { -SUB_DECL_SWITCH: - switch (*cp) { - case ':': - if (nparens) - break; - else if (true == attr) - return false; /* Invalid attribute name */ - else - attr = true; - break; - case '(': - ++nparens; - break; - case ')': - --nparens; - break; - case ' ': - case '\t': - break; - case ';': - if (!nparens) - return true; - /* fall through */ - case '{': - if (!nparens) - return false; - /* fall through */ - default: - if (attr) { - if (isIdentifier1(*cp)) { - cp++; - while (isIdentifier (*cp)) - cp++; - attr = false; - goto SUB_DECL_SWITCH; /* Instead of --cp; */ - } else { - return false; - } - } else if (nparens) { - break; - } else { - return false; - } - } - } - } while (NULL != (cp = readLineFromInputFile ())); - - return false; -} - -/* Algorithm adapted from from GNU etags. - * Perl support by Bart Robinson - * Perl sub names: look for /^ [ \t\n]sub [ \t\n]+ [^ \t\n{ (]+/ - */ -static void findPerlTags (void) -{ - vString *name = vStringNew (); - vString *package = NULL; - bool skipPodDoc = false; - const unsigned char *line; - - while ((line = readLineFromInputFile ()) != NULL) - { - bool spaceRequired = false; - bool qualified = false; - const unsigned char *cp = line; - perlKind kind = K_NONE; - tagEntryInfo e; - - if (skipPodDoc) - { - if (strncmp ((const char*) line, "=cut", (size_t) 4) == 0) - skipPodDoc = false; - continue; - } - else if (line [0] == '=') - { - skipPodDoc = isPodWord ((const char*)line + 1); - continue; - } - else if (strcmp ((const char*) line, "__DATA__") == 0) - break; - else if (strcmp ((const char*) line, "__END__") == 0) - break; - else if (line [0] == '#') - continue; - - while (isspace (*cp)) - cp++; - - if (strncmp((const char*) cp, "sub", (size_t) 3) == 0) - { - TRACE("this looks like a sub\n"); - cp += 3; - kind = K_SUBROUTINE; - spaceRequired = true; - qualified = true; - } - else if (strncmp((const char*) cp, "use", (size_t) 3) == 0) - { - cp += 3; - if (!isspace(*cp)) - continue; - while (*cp && isspace (*cp)) - ++cp; - if (strncmp((const char*) cp, "constant", (size_t) 8) != 0) - continue; - cp += 8; - kind = K_CONSTANT; - spaceRequired = true; - qualified = true; - } - else if (strncmp((const char*) cp, "package", (size_t) 7) == 0) - { - /* This will point to space after 'package' so that a tag - can be made */ - const unsigned char *space = cp += 7; - - if (package == NULL) - package = vStringNew (); - else - vStringClear (package); - while (isspace (*cp)) - cp++; - while ((int) *cp != ';' && !isspace ((int) *cp)) - { - vStringPut (package, (int) *cp); - cp++; - } - vStringCatS (package, "::"); - - cp = space; /* Rewind */ - kind = K_PACKAGE; - spaceRequired = true; - qualified = true; - } - else if (strncmp((const char*) cp, "format", (size_t) 6) == 0) - { - cp += 6; - kind = K_FORMAT; - spaceRequired = true; - qualified = true; - } - else - { - if (isIdentifier1 (*cp)) - { - const unsigned char *p = cp; - while (isIdentifier (*p)) - ++p; - while (isspace (*p)) - ++p; - if ((int) *p == ':' && (int) *(p + 1) != ':') - kind = K_LABEL; - } - } - if (kind != K_NONE) - { - TRACE("cp0: %s\n", (const char *) cp); - if (spaceRequired && *cp && !isspace (*cp)) - continue; - - TRACE("cp1: %s\n", (const char *) cp); - while (isspace (*cp)) - cp++; - - while (!*cp || '#' == *cp) { /* Gobble up empty lines - and comments */ - cp = readLineFromInputFile (); - if (!cp) - goto END_MAIN_WHILE; - while (isspace (*cp)) - cp++; - } - - while (isIdentifier (*cp) || (K_PACKAGE == kind && ':' == *cp)) - { - vStringPut (name, (int) *cp); - cp++; - } - - if (K_FORMAT == kind && - vStringLength (name) == 0 && /* cp did not advance */ - '=' == *cp) - { - /* format's name is optional. If it's omitted, 'STDOUT' - is assumed. */ - vStringCatS (name, "STDOUT"); - } - - TRACE("name: %s\n", name->buffer); - - if (0 == vStringLength(name)) { - vStringClear(name); - continue; - } - - if (K_SUBROUTINE == kind) - { - /* - * isSubroutineDeclaration() may consume several lines. So - * we record line positions. - */ - initTagEntry(&e, vStringValue(name), kind); - - if (true == isSubroutineDeclaration(cp)) { - if (true == PerlKinds[K_SUBROUTINE_DECLARATION].enabled) { - kind = K_SUBROUTINE_DECLARATION; - e.kindIndex = kind; - } else { - vStringClear (name); - continue; - } - } - - makeTagEntry(&e); - - if (isXtagEnabled(XTAG_QUALIFIED_TAGS) && qualified && - package != NULL && vStringLength (package) > 0) - { - vString *const qualifiedName = vStringNew (); - vStringCopy (qualifiedName, package); - vStringCat (qualifiedName, name); - e.name = vStringValue(qualifiedName); - makeTagEntry(&e); - vStringDelete (qualifiedName); - } - } else if (vStringLength (name) > 0) - { - makeSimpleTag (name, kind); - if (isXtagEnabled(XTAG_QUALIFIED_TAGS) && qualified && - K_PACKAGE != kind && - package != NULL && vStringLength (package) > 0) - { - vString *const qualifiedName = vStringNew (); - vStringCopy (qualifiedName, package); - vStringCat (qualifiedName, name); - makeSimpleTag (qualifiedName, kind); - vStringDelete (qualifiedName); - } - } - vStringClear (name); - } - } - -END_MAIN_WHILE: - vStringDelete (name); - if (package != NULL) - vStringDelete (package); -} - -extern parserDefinition* PerlParser (void) -{ - static const char *const extensions [] = { "pl", "pm", "plx", "perl", NULL }; - parserDefinition* def = parserNew ("Perl"); - def->kindTable = PerlKinds; - def->kindCount = ARRAY_SIZE (PerlKinds); - def->extensions = extensions; - def->parser = findPerlTags; - return def; -} diff --git a/ctags/parsers/geany_ruby.c b/ctags/parsers/geany_ruby.c deleted file mode 100644 index 4f4fb8a98a..0000000000 --- a/ctags/parsers/geany_ruby.c +++ /dev/null @@ -1,564 +0,0 @@ -/* -* Copyright (c) 2000-2001, Thaddeus Covert -* Copyright (c) 2002 Matthias Veit -* Copyright (c) 2004 Elliott Hughes -* -* This source code is released for free distribution under the terms of the -* GNU General Public License version 2 or (at your option) any later version. -* -* This module contains functions for generating tags for Ruby language -* files. -*/ - -/* -* INCLUDE FILES -*/ -#include "general.h" /* must always come first */ - -#include - -#include "debug.h" -#include "entry.h" -#include "parse.h" -#include "nestlevel.h" -#include "read.h" -#include "routines.h" -#include "vstring.h" - -/* -* DATA DECLARATIONS -*/ -typedef enum { - K_UNDEFINED = -1, K_CLASS, K_METHOD, K_MODULE, K_SINGLETON, -} rubyKind; - -/* -* DATA DEFINITIONS -*/ -static kindDefinition RubyKinds [] = { - { true, 'c', "class", "classes" }, - { true, 'f', "method", "methods" }, - { true, 'm', "module", "modules" }, - { true, 'S', "singletonMethod", "singleton methods" }, -#if 0 - /* Following two kinds are reserved. */ - { true, 'd', "describe", "describes and contexts for Rspec" }, - { true, 'C', "constant", "constants" }, -#endif -}; - -static NestingLevels* nesting = NULL; - -#define SCOPE_SEPARATOR '.' - -/* -* FUNCTION DEFINITIONS -*/ - -static void enterUnnamedScope (void); - -/* -* Returns a string describing the scope in 'nls'. -* We record the current scope as a list of entered scopes. -* Scopes corresponding to 'if' statements and the like are -* represented by empty strings. Scopes corresponding to -* modules and classes are represented by the name of the -* module or class. -*/ -static vString* nestingLevelsToScope (const NestingLevels* nls) -{ - int i; - unsigned int chunks_output = 0; - vString* result = vStringNew (); - for (i = 0; i < nls->n; ++i) - { - NestingLevel *nl = nestingLevelsGetNthFromRoot (nls, i); - tagEntryInfo *e = getEntryOfNestingLevel (nl); - if (e && strlen (e->name) > 0 && (!e->placeholder)) - { - if (chunks_output++ > 0) - vStringPut (result, SCOPE_SEPARATOR); - vStringCatS (result, e->name); - } - } - return result; -} - -/* -* Attempts to advance 's' past 'literal'. -* Returns true if it did, false (and leaves 's' where -* it was) otherwise. -*/ -static bool canMatch (const unsigned char** s, const char* literal, - bool (*end_check) (int)) -{ - const int literal_length = strlen (literal); - const int s_length = strlen ((const char *)*s); - - if (s_length < literal_length) - return false; - - const unsigned char next_char = *(*s + literal_length); - if (strncmp ((const char*) *s, literal, literal_length) != 0) - { - return false; - } - /* Additionally check that we're at the end of a token. */ - if (! end_check (next_char)) - { - return false; - } - *s += literal_length; - return true; -} - -static bool isIdentChar (int c) -{ - return (isalnum (c) || c == '_'); -} - -static bool notIdentChar (int c) -{ - return ! isIdentChar (c); -} - -static bool notOperatorChar (int c) -{ - return ! (c == '[' || c == ']' || - c == '=' || c == '!' || c == '~' || - c == '+' || c == '-' || - c == '@' || c == '*' || c == '/' || c == '%' || - c == '<' || c == '>' || - c == '&' || c == '^' || c == '|'); -} - -static bool isWhitespace (int c) -{ - return c == 0 || isspace (c); -} - -static bool canMatchKeyword (const unsigned char** s, const char* literal) -{ - return canMatch (s, literal, notIdentChar); -} - -/* -* Attempts to advance 'cp' past a Ruby operator method name. Returns -* true if successful (and copies the name into 'name'), false otherwise. -*/ -static bool parseRubyOperator (vString* name, const unsigned char** cp) -{ - static const char* RUBY_OPERATORS[] = { - "[]", "[]=", - "**", - "!", "~", "+@", "-@", - "*", "/", "%", - "+", "-", - ">>", "<<", - "&", - "^", "|", - "<=", "<", ">", ">=", - "<=>", "==", "===", "!=", "=~", "!~", - "`", - NULL - }; - int i; - for (i = 0; RUBY_OPERATORS[i] != NULL; ++i) - { - if (canMatch (cp, RUBY_OPERATORS[i], notOperatorChar)) - { - vStringCatS (name, RUBY_OPERATORS[i]); - return true; - } - } - return false; -} - -/* -* Emits a tag for the given 'name' of kind 'kind' at the current nesting. -*/ -static void emitRubyTag (vString* name, rubyKind kind) -{ - tagEntryInfo tag; - vString* scope; - tagEntryInfo *parent; - rubyKind parent_kind = K_UNDEFINED; - NestingLevel *lvl; - const char *unqualified_name; - const char *qualified_name; - int r; - - if (!RubyKinds[kind].enabled) { - return; - } - - scope = nestingLevelsToScope (nesting); - lvl = nestingLevelsGetCurrent (nesting); - parent = getEntryOfNestingLevel (lvl); - if (parent) - parent_kind = parent->kindIndex; - - qualified_name = vStringValue (name); - unqualified_name = strrchr (qualified_name, SCOPE_SEPARATOR); - if (unqualified_name && unqualified_name[1]) - { - if (unqualified_name > qualified_name) - { - if (vStringLength (scope) > 0) - vStringPut (scope, SCOPE_SEPARATOR); - vStringNCatS (scope, qualified_name, - unqualified_name - qualified_name); - /* assume module parent type for a lack of a better option */ - parent_kind = K_MODULE; - } - unqualified_name++; - } - else - unqualified_name = qualified_name; - - initTagEntry (&tag, unqualified_name, kind); - if (vStringLength (scope) > 0) { - Assert (0 <= parent_kind && - (size_t) parent_kind < (ARRAY_SIZE (RubyKinds))); - - tag.extensionFields.scopeKindIndex = parent_kind; - tag.extensionFields.scopeName = vStringValue (scope); - } - r = makeTagEntry (&tag); - - nestingLevelsPush (nesting, r); - - vStringClear (name); - vStringDelete (scope); -} - -/* Tests whether 'ch' is a character in 'list'. */ -static bool charIsIn (char ch, const char* list) -{ - return (strchr (list, ch) != NULL); -} - -/* Advances 'cp' over leading whitespace. */ -static void skipWhitespace (const unsigned char** cp) -{ - while (isspace (**cp)) - { - ++*cp; - } -} - -/* -* Copies the characters forming an identifier from *cp into -* name, leaving *cp pointing to the character after the identifier. -*/ -static rubyKind parseIdentifier ( - const unsigned char** cp, vString* name, rubyKind kind) -{ - /* Method names are slightly different to class and variable names. - * A method name may optionally end with a question mark, exclamation - * point or equals sign. These are all part of the name. - * A method name may also contain a period if it's a singleton method. - */ - bool had_sep = false; - const char* also_ok; - if (kind == K_METHOD) - { - also_ok = ".?!="; - } - else if (kind == K_SINGLETON) - { - also_ok = "?!="; - } - else - { - also_ok = ""; - } - - skipWhitespace (cp); - - /* Check for an anonymous (singleton) class such as "class << HTTP". */ - if (kind == K_CLASS && **cp == '<' && *(*cp + 1) == '<') - { - return K_UNDEFINED; - } - - /* Check for operators such as "def []=(key, val)". */ - if (kind == K_METHOD || kind == K_SINGLETON) - { - if (parseRubyOperator (name, cp)) - { - return kind; - } - } - - /* Copy the identifier into 'name'. */ - while (**cp != 0 && (**cp == ':' || isIdentChar (**cp) || charIsIn (**cp, also_ok))) - { - char last_char = **cp; - - if (last_char == ':') - had_sep = true; - else - { - if (had_sep) - { - vStringPut (name, SCOPE_SEPARATOR); - had_sep = false; - } - vStringPut (name, last_char); - } - ++*cp; - - if (kind == K_METHOD) - { - /* Recognize singleton methods. */ - if (last_char == '.') - { - vStringClear (name); - return parseIdentifier (cp, name, K_SINGLETON); - } - } - - if (kind == K_METHOD || kind == K_SINGLETON) - { - /* Recognize characters which mark the end of a method name. */ - if (charIsIn (last_char, "?!=")) - { - break; - } - } - } - return kind; -} - -static void readAndEmitTag (const unsigned char** cp, rubyKind expected_kind) -{ - if (isspace (**cp)) - { - vString *name = vStringNew (); - rubyKind actual_kind = parseIdentifier (cp, name, expected_kind); - - if (actual_kind == K_UNDEFINED || vStringLength (name) == 0) - { - /* - * What kind of tags should we create for code like this? - * - * %w(self.clfloor clfloor).each do |name| - * module_eval <<-"end;" - * def #{name}(x, y=1) - * q, r = x.divmod(y) - * q = q.to_i - * return q, r - * end - * end; - * end - * - * Or this? - * - * class << HTTP - * - * For now, we don't create any. - */ - enterUnnamedScope (); - } - else - { - emitRubyTag (name, actual_kind); - } - vStringDelete (name); - } -} - -static void enterUnnamedScope (void) -{ - int r = CORK_NIL; - NestingLevel *parent = nestingLevelsGetCurrent (nesting); - tagEntryInfo *e_parent = getEntryOfNestingLevel (parent); - - if (e_parent) - { - tagEntryInfo e; - initTagEntry (&e, "", e_parent->kindIndex); - e.placeholder = 1; - r = makeTagEntry (&e); - } - nestingLevelsPush (nesting, r); -} - -static void findRubyTags (void) -{ - const unsigned char *line; - bool inMultiLineComment = false; - - nesting = nestingLevelsNew (0); - - /* FIXME: this whole scheme is wrong, because Ruby isn't line-based. - * You could perfectly well write: - * - * def - * method - * puts("hello") - * end - * - * if you wished, and this function would fail to recognize anything. - */ - while ((line = readLineFromInputFile ()) != NULL) - { - const unsigned char *cp = line; - /* if we expect a separator after a while, for, or until statement - * separators are "do", ";" or newline */ - bool expect_separator = false; - - if (canMatch (&cp, "=begin", isWhitespace)) - { - inMultiLineComment = true; - continue; - } - if (canMatch (&cp, "=end", isWhitespace)) - { - inMultiLineComment = false; - continue; - } - if (inMultiLineComment) - continue; - - skipWhitespace (&cp); - - /* Avoid mistakenly starting a scope for modifiers such as - * - * return if - * - * FIXME: this is fooled by code such as - * - * result = if - * - * else - * - * end - * - * FIXME: we're also fooled if someone does something heinous such as - * - * puts("hello") \ - * unless - */ - if (canMatchKeyword (&cp, "for") || - canMatchKeyword (&cp, "until") || - canMatchKeyword (&cp, "while")) - { - expect_separator = true; - enterUnnamedScope (); - } - else if (canMatchKeyword (&cp, "case") || - canMatchKeyword (&cp, "if") || - canMatchKeyword (&cp, "unless")) - { - enterUnnamedScope (); - } - - /* - * "module M", "class C" and "def m" should only be at the beginning - * of a line. - */ - if (canMatchKeyword (&cp, "module")) - { - readAndEmitTag (&cp, K_MODULE); - } - else if (canMatchKeyword (&cp, "class")) - { - readAndEmitTag (&cp, K_CLASS); - } - else if (canMatchKeyword (&cp, "def")) - { - rubyKind kind = K_METHOD; - NestingLevel *nl = nestingLevelsGetCurrent (nesting); - tagEntryInfo *e = getEntryOfNestingLevel (nl); - - /* if the def is inside an unnamed scope at the class level, assume - * it's from a singleton from a construct like this: - * - * class C - * class << self - * def singleton - * ... - * end - * end - * end - */ - if (e && e->kindIndex == K_CLASS && strlen (e->name) == 0) - kind = K_SINGLETON; - readAndEmitTag (&cp, kind); - } - while (*cp != '\0') - { - /* FIXME: we don't cope with here documents, - * or regular expression literals, or ... you get the idea. - * Hopefully, the restriction above that insists on seeing - * definitions at the starts of lines should keep us out of - * mischief. - */ - if (inMultiLineComment || isspace (*cp)) - { - ++cp; - } - else if (*cp == '#') - { - /* FIXME: this is wrong, but there *probably* won't be a - * definition after an interpolated string (where # doesn't - * mean 'comment'). - */ - break; - } - else if (canMatchKeyword (&cp, "begin")) - { - enterUnnamedScope (); - } - else if (canMatchKeyword (&cp, "do")) - { - if (! expect_separator) - enterUnnamedScope (); - else - expect_separator = false; - } - else if (canMatchKeyword (&cp, "end") && nesting->n > 0) - { - /* Leave the most recent scope. */ - nestingLevelsPop (nesting); - } - else if (*cp == '"') - { - /* Skip string literals. - * FIXME: should cope with escapes and interpolation. - */ - do { - ++cp; - } while (*cp != 0 && *cp != '"'); - if (*cp == '"') - cp++; /* skip the last found '"' */ - } - else if (*cp == ';') - { - ++cp; - expect_separator = false; - } - else if (*cp != '\0') - { - do - ++cp; - while (isIdentChar (*cp)); - } - } - } - nestingLevelsFree (nesting); -} - -extern parserDefinition* RubyParser (void) -{ - static const char *const extensions [] = { "rb", "ruby", NULL }; - parserDefinition* def = parserNew ("Ruby"); - def->kindTable = RubyKinds; - def->kindCount = ARRAY_SIZE (RubyKinds); - def->extensions = extensions; - def->parser = findRubyTags; - def->useCork = CORK_QUEUE; - return def; -} diff --git a/ctags/parsers/go.c b/ctags/parsers/go.c new file mode 100644 index 0000000000..d642d24b44 --- /dev/null +++ b/ctags/parsers/go.c @@ -0,0 +1,1416 @@ +/* +* This source code is released for free distribution under the terms of the +* GNU General Public License version 2 or (at your option) any later version. +* +* Reference: +* https://golang.org/ref/spec +*/ + + +/* + * INCLUDE FILES + */ +#include "general.h" /* must always come first */ + +#include "debug.h" +#include "entry.h" +#include "keyword.h" +#include "read.h" +#include "numarray.h" +#include "objpool.h" +#include "parse.h" +#include "routines.h" +#include "vstring.h" +#include "xtag.h" +#include "field.h" +#include "htable.h" + +#include + +/* + * MACROS + */ +#define MAX_COLLECTOR_LENGTH 512 +#define isType(token,t) (bool) ((token)->type == (t)) +#define isKeyword(token,k) (bool) ((token)->keyword == (k)) +#define isStartIdentChar(c) (isalpha (c) || (c) == '_' || (c) > 128) /* XXX UTF-8 */ +#define isIdentChar(c) (isStartIdentChar (c) || isdigit (c)) +#define newToken() (objPoolGet (TokenPool)) +#define deleteToken(t) (objPoolPut (TokenPool, (t))) + +/* + * DATA DECLARATIONS + */ + +enum eKeywordId { + KEYWORD_package, + KEYWORD_import, + KEYWORD_const, + KEYWORD_type, + KEYWORD_var, + KEYWORD_func, + KEYWORD_struct, + KEYWORD_interface, + KEYWORD_map, + KEYWORD_chan +}; +typedef int keywordId; /* to allow KEYWORD_NONE */ + +typedef enum eTokenType { + TOKEN_NONE = -1, + // Token not important for top-level Go parsing + TOKEN_OTHER, + TOKEN_KEYWORD, + TOKEN_IDENTIFIER, + TOKEN_STRING, + TOKEN_OPEN_PAREN, + TOKEN_CLOSE_PAREN, + TOKEN_OPEN_CURLY, + TOKEN_CLOSE_CURLY, + TOKEN_OPEN_SQUARE, + TOKEN_CLOSE_SQUARE, + TOKEN_SEMICOLON, + TOKEN_STAR, + TOKEN_LEFT_ARROW, + TOKEN_DOT, + TOKEN_COMMA, + TOKEN_EQUAL, + TOKEN_3DOTS, + TOKEN_EOF +} tokenType; + +typedef struct sTokenInfo { + tokenType type; + keywordId keyword; + vString *string; /* the name of the token */ + unsigned long lineNumber; /* line number of tag */ + MIOPos filePosition; /* file position of line containing name */ + int c; /* Used in AppendTokenToVString */ +} tokenInfo; + +typedef struct sCollector { + vString *str; + size_t last_len; +} collector; + +/* +* DATA DEFINITIONS +*/ + +static int Lang_go; +static objPool *TokenPool = NULL; + +typedef enum { + GOTAG_UNDEFINED = -1, + GOTAG_PACKAGE, + GOTAG_FUNCTION, + GOTAG_CONST, + GOTAG_TYPE, + GOTAG_VAR, + GOTAG_STRUCT, + GOTAG_INTERFACE, + GOTAG_MEMBER, + GOTAG_ANONMEMBER, + GOTAG_METHODSPEC, + GOTAG_UNKNOWN, + GOTAG_PACKAGE_NAME, + GOTAG_TALIAS, + GOTAG_RECEIVER, +} goKind; + +typedef enum { + R_GOTAG_PACKAGE_IMPORTED, +} GoPackageRole; + +static roleDefinition GoPackageRoles [] = { + { true, "imported", "imported package" }, +}; + +typedef enum { + R_GOTAG_UNKNOWN_RECEIVER, +} GoUnknownRole; + +static roleDefinition GoUnknownRoles [] = { + { true, "receiverType", "receiver type" }, +}; + +static kindDefinition GoKinds[] = { + {true, 'p', "package", "packages", + .referenceOnly = false, ATTACH_ROLES (GoPackageRoles)}, + {true, 'f', "func", "functions"}, + {true, 'c', "const", "constants"}, + {true, 't', "type", "types"}, + {true, 'v', "var", "variables"}, + {true, 's', "struct", "structs"}, + {true, 'i', "interface", "interfaces"}, + {true, 'm', "member", "struct members"}, + {true, 'M', "anonMember", "struct anonymous members"}, + {true, 'n', "methodSpec", "interface method specification"}, + {true, 'u', "unknown", "unknown", + .referenceOnly = true, ATTACH_ROLES (GoUnknownRoles)}, + {true, 'P', "packageName", "name for specifying imported package"}, + {true, 'a', "talias", "type aliases"}, + {false,'R', "receiver", "receivers"}, +}; + +static const keywordTable GoKeywordTable[] = { + {"package", KEYWORD_package}, + {"import", KEYWORD_import}, + {"const", KEYWORD_const}, + {"type", KEYWORD_type}, + {"var", KEYWORD_var}, + {"func", KEYWORD_func}, + {"struct", KEYWORD_struct}, + {"interface", KEYWORD_interface}, + {"map", KEYWORD_map}, + {"chan", KEYWORD_chan} +}; + +typedef enum { + F_PACKAGE, + F_PACKAGE_NAME, + F_HOW_IMPORTED, +} goField; + +static fieldDefinition GoFields [] = { + { + .name = "package", + .description = "the real package specified by the package name", + .enabled = true, + }, + { + .name = "packageName", + .description = "the name for referring the package", + .enabled = true, + }, + { + .name = "howImported", + .description = "how the package is imported (\"inline\" for `.' or \"init\" for `_')", + .enabled = false, + }, +}; + + +/* +* FUNCTION DEFINITIONS +*/ + +static void *newPoolToken (void *createArg CTAGS_ATTR_UNUSED) +{ + tokenInfo *const token = xMalloc (1, tokenInfo); + token->string = vStringNew (); + return token; +} + +static void clearPoolToken (void *data) +{ + tokenInfo *token = data; + + token->type = TOKEN_NONE; + token->keyword = KEYWORD_NONE; + token->lineNumber = getInputLineNumber (); + token->filePosition = getInputFilePosition (); + vStringClear (token->string); +} + +static void copyToken (tokenInfo *const dest, const tokenInfo *const other) +{ + dest->type = other->type; + dest->keyword = other->keyword; + vStringCopy(dest->string, other->string); + dest->lineNumber = other->lineNumber; + dest->filePosition = other->filePosition; +} + +static void deletePoolToken (void* data) +{ + tokenInfo * const token = data; + + vStringDelete (token->string); + eFree (token); +} + +static void initialize (const langType language) +{ + Lang_go = language; + TokenPool = objPoolNew (16, newPoolToken, deletePoolToken, clearPoolToken, NULL); +} + +static void finalize (const langType language, bool initialized) +{ + if (!initialized) + return; + + objPoolDelete (TokenPool); +} + +/* + * Parsing functions + */ + +static void parseString (vString *const string, const int delimiter) +{ + bool end = false; + while (!end) + { + int c = getcFromInputFile (); + if (c == EOF) + end = true; + else if (c == '\\' && delimiter != '`') + { + c = getcFromInputFile (); + if (c != '\'' && c != '\"') + vStringPut (string, '\\'); + vStringPut (string, c); + } + else if (c == delimiter) + end = true; + else + vStringPut (string, c); + } +} + +static void parseIdentifier (vString *const string, const int firstChar) +{ + int c = firstChar; + do + { + vStringPut (string, c); + c = getcFromInputFile (); + } while (isIdentChar (c)); + ungetcToInputFile (c); /* always unget, LF might add a semicolon */ +} + +static bool collectorIsEmpty(collector *collector) +{ + return !vStringLength(collector->str); +} + +static void collectorPut (collector *collector, char c) +{ + if ((vStringLength(collector->str) > 2) + && strcmp (vStringValue (collector->str) + (vStringLength(collector->str) - 3), + "...") == 0 + && c == ' ') + return; + else if (vStringLength(collector->str) > 0) + { + if (vStringLast(collector->str) == '(' && c == ' ') + return; + else if (vStringLast(collector->str) == ' ' && c == ')') + vStringChop(collector->str); + } + + collector->last_len = vStringLength (collector->str); + vStringPut (collector->str, c); +} + +static void collectorCatS (collector *collector, char *cstr) +{ + collector->last_len = vStringLength (collector->str); + vStringCatS (collector->str, cstr); +} + +static void collectorCat (collector *collector, vString *str) +{ + collector->last_len = vStringLength (collector->str); + vStringCat (collector->str, str); +} + +static void collectorAppendToken (collector *collector, const tokenInfo *const token) +{ + if (token->type == TOKEN_LEFT_ARROW) + collectorCatS (collector, "<-"); + else if (token->type == TOKEN_STRING) + { + // only struct member annotations can appear in function prototypes + // so only `` type strings are possible + collector->last_len = vStringLength (collector->str); + vStringPut(collector->str, '`'); + vStringCat(collector->str, token->string); + vStringPut(collector->str, '`'); + } + else if (token->type == TOKEN_IDENTIFIER || token->type == TOKEN_KEYWORD) + collectorCat (collector, token->string); + else if (token->type == TOKEN_3DOTS) + { + if ((vStringLength (collector->str) > 0) + && vStringLast(collector->str) != ' ') + collectorPut (collector, ' '); + collectorCatS (collector, "..."); + } + else if (token->c != EOF) + collectorPut (collector, token->c); +} + +static void collectorTruncate (collector *collector, bool dropLast) +{ + if (dropLast) + vStringTruncate (collector->str, collector->last_len); + + vStringStripLeading (collector->str); + vStringStripTrailing (collector->str); +} + +static void readTokenFull (tokenInfo *const token, collector *collector) +{ + int c; + static tokenType lastTokenType = TOKEN_NONE; + bool firstWhitespace = true; + bool whitespace; + + token->c = EOF; + token->type = TOKEN_NONE; + token->keyword = KEYWORD_NONE; + vStringClear (token->string); + +getNextChar: + do + { + c = getcFromInputFile (); + token->lineNumber = getInputLineNumber (); + token->filePosition = getInputFilePosition (); + if (c == '\n' && (lastTokenType == TOKEN_IDENTIFIER || + lastTokenType == TOKEN_STRING || + lastTokenType == TOKEN_OTHER || + lastTokenType == TOKEN_CLOSE_PAREN || + lastTokenType == TOKEN_CLOSE_CURLY || + lastTokenType == TOKEN_CLOSE_SQUARE)) + { + c = ';'; // semicolon injection + } + whitespace = c == '\t' || c == ' ' || c == '\r' || c == '\n'; + if (collector && whitespace && firstWhitespace && vStringLength (collector->str) < MAX_COLLECTOR_LENGTH) + { + firstWhitespace = false; + collectorPut (collector, ' '); + } + } + while (whitespace); + + switch (c) + { + case EOF: + token->type = TOKEN_EOF; + break; + + case ';': + token->type = TOKEN_SEMICOLON; + break; + + case '/': + { + bool hasNewline = false; + int d = getcFromInputFile (); + switch (d) + { + case '/': + skipToCharacterInInputFile ('\n'); + /* Line comments start with the + * character sequence // and + * continue through the next + * newline. A line comment acts + * like a newline. */ + ungetcToInputFile ('\n'); + goto getNextChar; + case '*': + do + { + do + { + d = getcFromInputFile (); + if (d == '\n') + { + hasNewline = true; + } + } while (d != EOF && d != '*'); + + c = getcFromInputFile (); + if (c == '/') + break; + else + ungetcToInputFile (c); + } while (c != EOF && c != '\0'); + + ungetcToInputFile (hasNewline ? '\n' : ' '); + goto getNextChar; + default: + token->type = TOKEN_OTHER; + ungetcToInputFile (d); + break; + } + } + break; + + case '"': + case '\'': + case '`': + token->type = TOKEN_STRING; + parseString (token->string, c); + token->lineNumber = getInputLineNumber (); + token->filePosition = getInputFilePosition (); + break; + + case '<': + { + int d = getcFromInputFile (); + if (d == '-') + token->type = TOKEN_LEFT_ARROW; + else + { + ungetcToInputFile (d); + token->type = TOKEN_OTHER; + } + } + break; + + case '(': + token->type = TOKEN_OPEN_PAREN; + break; + + case ')': + token->type = TOKEN_CLOSE_PAREN; + break; + + case '{': + token->type = TOKEN_OPEN_CURLY; + break; + + case '}': + token->type = TOKEN_CLOSE_CURLY; + break; + + case '[': + token->type = TOKEN_OPEN_SQUARE; + break; + + case ']': + token->type = TOKEN_CLOSE_SQUARE; + break; + + case '*': + token->type = TOKEN_STAR; + break; + + case '.': + { + int d, e; + d = getcFromInputFile (); + if (d == '.') + { + e = getcFromInputFile (); + if (e == '.') + { + token->type = TOKEN_3DOTS; + break; + } + else + { + ungetcToInputFile (e); + ungetcToInputFile (d); + } + } + else + ungetcToInputFile (d); + } + token->type = TOKEN_DOT; + break; + + case ',': + token->type = TOKEN_COMMA; + break; + + case '=': + token->type = TOKEN_EQUAL; + break; + + default: + if (isStartIdentChar (c)) + { + parseIdentifier (token->string, c); + token->lineNumber = getInputLineNumber (); + token->filePosition = getInputFilePosition (); + token->keyword = lookupKeyword (vStringValue (token->string), Lang_go); + if (isKeyword (token, KEYWORD_NONE)) + token->type = TOKEN_IDENTIFIER; + else + token->type = TOKEN_KEYWORD; + } + else + token->type = TOKEN_OTHER; + break; + } + + token->c = c; + + if (collector && vStringLength (collector->str) < MAX_COLLECTOR_LENGTH) + collectorAppendToken (collector, token); + + lastTokenType = token->type; +} + +static void readToken (tokenInfo *const token) +{ + readTokenFull (token, NULL); +} + +static bool skipToMatchedNoRead (tokenInfo *const token, collector *collector) +{ + int nest_level = 0; + tokenType open_token = token->type; + tokenType close_token; + + switch (open_token) + { + case TOKEN_OPEN_PAREN: + close_token = TOKEN_CLOSE_PAREN; + break; + case TOKEN_OPEN_CURLY: + close_token = TOKEN_CLOSE_CURLY; + break; + case TOKEN_OPEN_SQUARE: + close_token = TOKEN_CLOSE_SQUARE; + break; + default: + return false; + } + + /* + * This routine will skip to a matching closing token. + * It will also handle nested tokens. + */ + nest_level++; + while (nest_level > 0 && !isType (token, TOKEN_EOF)) + { + readTokenFull (token, collector); + if (isType (token, open_token)) + nest_level++; + else if (isType (token, close_token)) + nest_level--; + } + + return true; +} + +static void skipToMatched (tokenInfo *const token, collector *collector) +{ + if (skipToMatchedNoRead (token, collector)) + readTokenFull (token, collector); +} + +static bool skipType (tokenInfo *const token, collector *collector) +{ + // Type = TypeName | TypeLit | "(" Type ")" . + // Skips also function multiple return values "(" Type {"," Type} ")" + if (isType (token, TOKEN_OPEN_PAREN)) + { + skipToMatched (token, collector); + return true; + } + + // TypeName = QualifiedIdent. + // QualifiedIdent = [ PackageName "." ] identifier . + // PackageName = identifier . + if (isType (token, TOKEN_IDENTIFIER)) + { + readTokenFull (token, collector); + if (isType (token, TOKEN_DOT)) + { + readTokenFull (token, collector); + if (isType (token, TOKEN_IDENTIFIER)) + readTokenFull (token, collector); + } + return true; + } + + // StructType = "struct" "{" { FieldDecl ";" } "}" + // InterfaceType = "interface" "{" { MethodSpec ";" } "}" . + if (isKeyword (token, KEYWORD_struct) || isKeyword (token, KEYWORD_interface)) + { + readTokenFull (token, collector); + // skip over "{}" + skipToMatched (token, collector); + return true; + } + + // ArrayType = "[" ArrayLength "]" ElementType . + // SliceType = "[" "]" ElementType . + // ElementType = Type . + if (isType (token, TOKEN_OPEN_SQUARE)) + { + skipToMatched (token, collector); + return skipType (token, collector); + } + + // PointerType = "*" BaseType . + // BaseType = Type . + // ChannelType = ( "chan" [ "<-" ] | "<-" "chan" ) ElementType . + if (isType (token, TOKEN_STAR) || isKeyword (token, KEYWORD_chan) || isType (token, TOKEN_LEFT_ARROW)) + { + readTokenFull (token, collector); + return skipType (token, collector); + } + + // MapType = "map" "[" KeyType "]" ElementType . + // KeyType = Type . + if (isKeyword (token, KEYWORD_map)) + { + readTokenFull (token, collector); + // skip over "[]" + skipToMatched (token, collector); + return skipType (token, collector); + } + + // FunctionType = "func" Signature . + // Signature = Parameters [ Result ] . + // Result = Parameters | Type . + // Parameters = "(" [ ParameterList [ "," ] ] ")" . + if (isKeyword (token, KEYWORD_func)) + { + readTokenFull (token, collector); + // Parameters, skip over "()" + skipToMatched (token, collector); + // Result is parameters or type or nothing. skipType treats anything + // surrounded by parentheses as a type, and does nothing if what + // follows is not a type. + return skipType (token, collector); + } + + return false; +} + +static int makeTagFull (tokenInfo *const token, const goKind kind, + const int scope, const char *argList, const char *typeref, + const int role) +{ + const char *const name = vStringValue (token->string); + + tagEntryInfo e; + + /* Don't record `_' placeholder variable */ + if (kind == GOTAG_VAR && name[0] == '_' && name[1] == '\0') + return CORK_NIL; + + initRefTagEntry (&e, name, kind, role); + + e.lineNumber = token->lineNumber; + e.filePosition = token->filePosition; + if (argList) + e.extensionFields.signature = argList; + if (typeref) + { + /* Follows Cxx parser convention */ + e.extensionFields.typeRef [0] = "typename"; + e.extensionFields.typeRef [1] = typeref; + } + + e.extensionFields.scopeIndex = scope; + return makeTagEntry (&e); +} + +static int makeTag (tokenInfo *const token, const goKind kind, + const int scope, const char *argList, const char *typeref) +{ + return makeTagFull (token, kind, scope, argList, typeref, + ROLE_DEFINITION_INDEX); +} + +static int makeRefTag (tokenInfo *const token, const goKind kind, + const int role) +{ + return makeTagFull (token, kind, CORK_NIL, NULL, NULL, role); +} + +static int parsePackage (tokenInfo *const token) +{ + readToken (token); + if (isType (token, TOKEN_IDENTIFIER)) + { + return makeTag (token, GOTAG_PACKAGE, CORK_NIL, NULL, NULL); + } + else + return CORK_NIL; +} + +static tokenInfo * parseReceiver (tokenInfo *const token, int *corkIndex) +{ + tokenInfo *receiver_type_token = NULL; + int nest_level = 1; + + *corkIndex = CORK_NIL; + + /* Looking for an identifier before ')'. */ + while (nest_level > 0 && !isType (token, TOKEN_EOF)) + { + if (isType (token, TOKEN_IDENTIFIER)) + { + if (*corkIndex == CORK_NIL) + *corkIndex = makeTag (token, GOTAG_RECEIVER, CORK_NIL, NULL, NULL); + if (!receiver_type_token) + receiver_type_token = newToken (); + copyToken (receiver_type_token, token); + } + + readToken (token); + if (isType (token, TOKEN_OPEN_PAREN)) + nest_level++; + else if (isType (token, TOKEN_CLOSE_PAREN)) + nest_level--; + } + + if (nest_level > 0 && receiver_type_token) + { + deleteToken (receiver_type_token); + receiver_type_token = NULL; + } + + if (receiver_type_token) + { + tagEntryInfo *e = getEntryInCorkQueue (*corkIndex); + if (e) + { + e->extensionFields.typeRef [0] = eStrdup ("typename"); + e->extensionFields.typeRef [1] = vStringStrdup (receiver_type_token->string); + } + } + readToken (token); + return receiver_type_token; +} + +static void parseFunctionOrMethod (tokenInfo *const token, const int scope) +{ + int receiver_cork = CORK_NIL; + tokenInfo *receiver_type_token = NULL; + + // FunctionDecl = "func" identifier Signature [ Body ] . + // Body = Block. + // + // MethodDecl = "func" Receiver MethodName Signature [ Body ] . + // Receiver = "(" [ identifier ] [ "*" ] BaseTypeName ")" . + // BaseTypeName = identifier . + + // Pick up receiver type. + readToken (token); + if (isType (token, TOKEN_OPEN_PAREN)) + receiver_type_token = parseReceiver (token, &receiver_cork); + + if (isType (token, TOKEN_IDENTIFIER)) + { + int cork; + tagEntryInfo *e = NULL; + tokenInfo *functionToken = newToken (); + int func_scope; + + copyToken (functionToken, token); + + // Start recording signature + vString *buffer = vStringNew (); + collector collector = { .str = buffer, .last_len = 0, }; + + // Skip over parameters. + readTokenFull (token, &collector); + skipToMatchedNoRead (token, &collector); + + collectorTruncate (&collector, false); + if (receiver_type_token) + { + func_scope = anyEntryInScope (scope, vStringValue (receiver_type_token->string)); + if (func_scope == CORK_NIL) + func_scope = makeTagFull(receiver_type_token, GOTAG_UNKNOWN, + scope, NULL, NULL, + R_GOTAG_UNKNOWN_RECEIVER); + } + else + func_scope = scope; + + cork = makeTag (functionToken, GOTAG_FUNCTION, + func_scope, vStringValue (buffer), NULL); + if ((e = getEntryInCorkQueue (cork))) + { + tagEntryInfo *receiver = getEntryInCorkQueue (receiver_cork); + if (receiver) + receiver->extensionFields.scopeIndex = cork; + } + + deleteToken (functionToken); + + vStringClear (collector.str); + collector.last_len = 0; + + readTokenFull (token, &collector); + + // Skip over result. + skipType (token, &collector); + + // Neither "{" nor " {". + if (!(isType (token, TOKEN_OPEN_CURLY) && collector.last_len < 2)) + { + collectorTruncate(&collector, isType (token, TOKEN_OPEN_CURLY)); + if (e) + { + e->extensionFields.typeRef [0] = eStrdup ("typename"); + e->extensionFields.typeRef [1] = vStringDeleteUnwrap (buffer); + buffer = NULL; + } + } + + if (buffer) + vStringDelete (buffer); + + // Skip over function body. + if (isType (token, TOKEN_OPEN_CURLY)) + { + skipToMatched (token, NULL); + if (e) + e->extensionFields.endLine = getInputLineNumber (); + } + } + + if (receiver_type_token) + deleteToken(receiver_type_token); +} + +static void attachTypeRefField (int scope, intArray *corks, const char *const type) +{ + int type_cork = anyEntryInScope (scope, type); + tagEntryInfo *type_e = getEntryInCorkQueue (type_cork); + + for (unsigned int i = 0; i < intArrayCount (corks); i++) + { + int cork = intArrayItem (corks, i); + tagEntryInfo *e = getEntryInCorkQueue (cork); + if (!e) + continue; + e->extensionFields.typeRef [0] = eStrdup (type_e + ?GoKinds[type_e->kindIndex].name + :"typename"); + e->extensionFields.typeRef [1] = eStrdup (type); + } +} + +static void parseInterfaceMethods (tokenInfo *const token, const int scope) +{ + // InterfaceType = "interface" "{" { MethodSpec ";" } "}" . + // MethodSpec = MethodName Signature | InterfaceTypeName . + // MethodName = identifier . + // InterfaceTypeName = TypeName . + + vString *inheritsBuf = vStringNew (); + collector inherits = { .str = inheritsBuf, .last_len = 0, }; + + readToken (token); + if (!isType (token, TOKEN_OPEN_CURLY)) + return; + + readToken (token); + while (!isType (token, TOKEN_EOF) && !isType (token, TOKEN_CLOSE_CURLY)) + { + if (isType (token, TOKEN_IDENTIFIER)) + { + tokenInfo * headToken = newToken(); + copyToken (headToken, token); + + readToken (token); + if(isType (token, TOKEN_DOT)) + { + if (!collectorIsEmpty(&inherits)) + collectorPut (&inherits, ','); + collectorAppendToken (&inherits, headToken); + readTokenFull (token, NULL); + if (isType (token, TOKEN_IDENTIFIER)) + { + collectorPut (&inherits, '.'); + collectorAppendToken (&inherits, token); + readToken (token); + } + /* If the token is not an identifier, the input + may be wrong. */ + } + else if (isType (token, TOKEN_SEMICOLON)) + { + if (!collectorIsEmpty(&inherits)) + collectorPut (&inherits, ','); + collectorAppendToken (&inherits, headToken); + readToken (token); + } + else if (isType (token, TOKEN_OPEN_PAREN)) + { + // => Signature + // Signature = Parameters [ Result ] . + vString *pbuf = vStringNew (); + collector pcol = { .str = pbuf, .last_len = 0, }; + vString *rbuf = NULL; + collector rcol = { .str = NULL, .last_len = 0, }; + + // Parameters + collectorPut (&pcol, '('); + skipToMatched (token, &pcol); + collectorTruncate(&pcol, true); + + if (!isType (token, TOKEN_SEMICOLON)) + { + rbuf = vStringNew (); + rcol.str = rbuf; + + collectorAppendToken (&rcol, token); + skipType (token, &rcol); + collectorTruncate(&rcol, true); + } + + makeTag (headToken, GOTAG_METHODSPEC, scope, + vStringValue (pbuf), + rbuf? vStringValue(rbuf): NULL); + + if (rbuf) + vStringDelete (rbuf); + vStringDelete (pbuf); + } + deleteToken (headToken); + } + else + readToken (token); + } + + if (!collectorIsEmpty(&inherits)) + { + tagEntryInfo *e = getEntryInCorkQueue (scope); + if (e) + { + e->extensionFields.inheritance = vStringDeleteUnwrap (inheritsBuf); + inheritsBuf = NULL; + } + } + vStringDelete (inheritsBuf); +} + +static void parseStructMembers (tokenInfo *const token, const int scope) +{ + // StructType = "struct" "{" { FieldDecl ";" } "}" . + // FieldDecl = (IdentifierList Type | AnonymousField) [ Tag ] . + // AnonymousField = [ "*" ] TypeName . + // Tag = string_lit . + + readToken (token); + if (!isType (token, TOKEN_OPEN_CURLY)) + return; + + vString *typeForAnonMember = vStringNew (); + intArray *corkForFields = intArrayNew (); + + readToken (token); + while (!isType (token, TOKEN_EOF) && !isType (token, TOKEN_CLOSE_CURLY)) + { + tokenInfo *memberCandidate = NULL; + bool first = true; + + while (!isType (token, TOKEN_EOF)) + { + if (isType (token, TOKEN_IDENTIFIER)) + { + if (first) + { + // could be anonymous field like in 'struct {int}' - we don't know yet + memberCandidate = newToken (); + copyToken (memberCandidate, token); + first = false; + } + else + { + int cork; + if (memberCandidate) + { + // if we are here, there was a comma and memberCandidate isn't an anonymous field + cork = makeTag (memberCandidate, GOTAG_MEMBER, scope, NULL, NULL); + deleteToken (memberCandidate); + memberCandidate = NULL; + intArrayAdd (corkForFields, cork); + } + cork = makeTag (token, GOTAG_MEMBER, scope, NULL, NULL); + intArrayAdd (corkForFields, cork); + } + readToken (token); + } + if (!isType (token, TOKEN_COMMA)) + break; + readToken (token); + } + + if (first && isType (token, TOKEN_STAR)) + { + vStringPut (typeForAnonMember, '*'); + readToken (token); + } + else if (memberCandidate && + (isType (token, TOKEN_DOT) || + isType (token, TOKEN_STRING) || + isType (token, TOKEN_SEMICOLON))) + // memberCandidate is part of anonymous type + vStringCat (typeForAnonMember, memberCandidate->string); + + // the above two cases that set typeForAnonMember guarantee + // this is an anonymous member + if (vStringLength (typeForAnonMember) > 0) + { + tokenInfo *anonMember = NULL; + + if (memberCandidate) + { + anonMember = newToken (); + copyToken (anonMember, memberCandidate); + } + + // TypeName of AnonymousField has a dot like package"."type. + // Pick up the last package component, and store it to + // memberCandidate. + while (isType (token, TOKEN_IDENTIFIER) || + isType (token, TOKEN_DOT)) + { + if (isType (token, TOKEN_IDENTIFIER)) + { + if (!anonMember) + anonMember = newToken (); + copyToken (anonMember, token); + vStringCat (typeForAnonMember, anonMember->string); + } + else if (isType (token, TOKEN_DOT)) + vStringPut (typeForAnonMember, '.'); + readToken (token); + } + + // optional tag + if (isType (token, TOKEN_STRING)) + readToken (token); + + if (anonMember) + { + makeTag (anonMember, GOTAG_ANONMEMBER, scope, NULL, + vStringValue (typeForAnonMember)); + deleteToken (anonMember); + } + } + else + { + vString *typeForMember = vStringNew (); + collector collector = { .str = typeForMember, .last_len = 0, }; + + collectorAppendToken (&collector, token); + skipType (token, &collector); + collectorTruncate (&collector, true); + + if (memberCandidate) + makeTag (memberCandidate, GOTAG_MEMBER, scope, NULL, + vStringValue (typeForMember)); + + attachTypeRefField (scope, corkForFields, vStringValue (typeForMember)); + intArrayClear (corkForFields); + vStringDelete (typeForMember); + } + + if (memberCandidate) + deleteToken (memberCandidate); + + vStringClear (typeForAnonMember); + + while (!isType (token, TOKEN_SEMICOLON) && !isType (token, TOKEN_CLOSE_CURLY) + && !isType (token, TOKEN_EOF)) + { + readToken (token); + skipToMatched (token, NULL); + } + + if (!isType (token, TOKEN_CLOSE_CURLY)) + { + // we are at TOKEN_SEMICOLON + readToken (token); + } + } + + intArrayDelete (corkForFields); + vStringDelete (typeForAnonMember); +} + +static void parseConstTypeVar (tokenInfo *const token, goKind kind, const int scope) +{ + // ConstDecl = "const" ( ConstSpec | "(" { ConstSpec ";" } ")" ) . + // ConstSpec = IdentifierList [ [ Type ] "=" ExpressionList ] . + // IdentifierList = identifier { "," identifier } . + // ExpressionList = Expression { "," Expression } . + // TypeDecl = "type" ( TypeSpec | "(" { TypeSpec ";" } ")" ) . + // TypeSpec = identifier [ "=" ] Type . + // VarDecl = "var" ( VarSpec | "(" { VarSpec ";" } ")" ) . + // VarSpec = IdentifierList ( Type [ "=" ExpressionList ] | "=" ExpressionList ) . + bool usesParens = false; + intArray *corks + = (kind == GOTAG_VAR || kind == GOTAG_CONST)? intArrayNew (): NULL; + + readToken (token); + + if (isType (token, TOKEN_OPEN_PAREN)) + { + usesParens = true; + readToken (token); + } + + do + { + tokenInfo *typeToken = NULL; + int member_scope = scope; + + while (!isType (token, TOKEN_EOF)) + { + if (isType (token, TOKEN_IDENTIFIER)) + { + if (kind == GOTAG_TYPE) + { + typeToken = newToken (); + copyToken (typeToken, token); + readToken (token); + if (isType (token, TOKEN_EQUAL)) + { + kind = GOTAG_TALIAS; + readToken (token); + } + + if (isKeyword (token, KEYWORD_struct)) + member_scope = makeTag (typeToken, GOTAG_STRUCT, + scope, NULL, NULL); + else if (isKeyword (token, KEYWORD_interface)) + member_scope = makeTag (typeToken, GOTAG_INTERFACE, + scope, NULL, NULL); + else + member_scope = makeTag (typeToken, kind, + scope, NULL, NULL); + + if (member_scope != CORK_NIL) + registerEntry (member_scope); + break; + } + else + { + int c = makeTag (token, kind, scope, NULL, NULL); + if (c != CORK_NIL && corks) + intArrayAdd (corks, c); + } + readToken (token); + } + if (!isType (token, TOKEN_COMMA)) + break; + readToken (token); + } + + if (typeToken) + { + if (isKeyword (token, KEYWORD_struct)) + parseStructMembers (token, member_scope); + else if (isKeyword (token, KEYWORD_interface)) + parseInterfaceMethods (token, member_scope); + else + { + /* Filling "typeref:" field of typeToken. */ + vString *buffer = vStringNew (); + collector collector = { .str = buffer, .last_len = 0, }; + + collectorAppendToken (&collector, token); + skipType (token, &collector); + collectorTruncate (&collector, true); + + if ((member_scope != CORK_NIL) && !vStringIsEmpty (buffer)) + { + tagEntryInfo *e = getEntryInCorkQueue (member_scope); + if (e) + { + e->extensionFields.typeRef [0] = eStrdup ("typename"); + e->extensionFields.typeRef [1] = vStringDeleteUnwrap (buffer); + } + } + else + vStringDelete (buffer); + } + deleteToken (typeToken); + } + else if (corks) + { + vString *buffer = vStringNew (); + collector collector = { .str = buffer, .last_len = 0, }; + + collectorAppendToken (&collector, token); + skipType (token, &collector); + collectorTruncate (&collector, true); + + if (!vStringIsEmpty (buffer)) + attachTypeRefField (scope, corks, vStringValue (buffer)); + vStringDelete (buffer); + intArrayClear (corks); + } + else + skipType (token, NULL); + + while (!isType (token, TOKEN_SEMICOLON) && !isType (token, TOKEN_CLOSE_PAREN) + && !isType (token, TOKEN_EOF)) + { + readToken (token); + skipToMatched (token, NULL); + } + + if (member_scope != scope && member_scope != CORK_NIL) + { + tagEntryInfo *e = getEntryInCorkQueue (member_scope); + if (e) + e->extensionFields.endLine = getInputLineNumber (); + } + + if (usesParens && !isType (token, TOKEN_CLOSE_PAREN)) + { + // we are at TOKEN_SEMICOLON + readToken (token); + } + } + while (!isType (token, TOKEN_EOF) && + usesParens && !isType (token, TOKEN_CLOSE_PAREN)); + + intArrayDelete (corks); +} + +static void parseImportSpec (tokenInfo *const token) +{ + // ImportSpec = [ "." | PackageName ] ImportPath . + // ImportPath = string_lit . + + int packageName_cork = CORK_NIL; + char *how_imported = NULL; + if (isType (token, TOKEN_IDENTIFIER)) + { + if (strcmp(vStringValue (token->string), "_") == 0) + how_imported = "init"; + else + { + packageName_cork = makeTag (token, GOTAG_PACKAGE_NAME, + CORK_NIL, NULL, NULL); + } + readToken (token); + } + else if (isType (token, TOKEN_DOT)) + { + how_imported = "inline"; + readToken (token); + } + + if (isType (token, TOKEN_STRING)) + { + int package_cork = + makeRefTag (token, GOTAG_PACKAGE, R_GOTAG_PACKAGE_IMPORTED); + + if (package_cork != CORK_NIL && how_imported) + attachParserFieldToCorkEntry (package_cork, + GoFields [F_HOW_IMPORTED].ftype, + how_imported); + + if (packageName_cork != CORK_NIL) + { + attachParserFieldToCorkEntry (packageName_cork, + GoFields [F_PACKAGE].ftype, + vStringValue (token->string)); + if (package_cork != CORK_NIL) + { + tagEntryInfo *e = getEntryInCorkQueue (packageName_cork); + if (e) + attachParserFieldToCorkEntry (package_cork, + GoFields [F_PACKAGE_NAME].ftype, + e->name); + } + } + } +} + +static void parseImport (tokenInfo *const token) +{ + // ImportDecl = "import" ( ImportSpec | "(" { ImportSpec ";" } ")" ) . + + readToken (token); + if (isType (token, TOKEN_EOF)) + return; + + if (isType (token, TOKEN_OPEN_PAREN)) + { + do + { + parseImportSpec (token); + readToken (token); + } while (!isType (token, TOKEN_EOF) && + !isType (token, TOKEN_CLOSE_PAREN)); + } + else + { + parseImportSpec (token); + return; + } +} + +static void parseGoFile (tokenInfo *const token) +{ + int scope = CORK_NIL; + do + { + readToken (token); + + if (isType (token, TOKEN_KEYWORD)) + { + switch (token->keyword) + { + case KEYWORD_package: + scope = parsePackage (token); + break; + case KEYWORD_func: + parseFunctionOrMethod (token, scope); + break; + case KEYWORD_const: + parseConstTypeVar (token, GOTAG_CONST, scope); + break; + case KEYWORD_type: + parseConstTypeVar (token, GOTAG_TYPE, scope); + break; + case KEYWORD_var: + parseConstTypeVar (token, GOTAG_VAR, scope); + break; + case KEYWORD_import: + parseImport (token); + break; + default: + break; + } + } + else if (isType (token, TOKEN_OPEN_PAREN) || isType (token, TOKEN_OPEN_CURLY) || + isType (token, TOKEN_OPEN_SQUARE)) + { + skipToMatched (token, NULL); + } + } while (token->type != TOKEN_EOF); +} + +static void findGoTags (void) +{ + tokenInfo *const token = newToken (); + + parseGoFile (token); + + deleteToken (token); +} + +extern parserDefinition *GoParser (void) +{ + static const char *const extensions[] = { "go", NULL }; + parserDefinition *def = parserNew ("Go"); + def->kindTable = GoKinds; + def->kindCount = ARRAY_SIZE (GoKinds); + def->extensions = extensions; + def->parser = findGoTags; + def->initialize = initialize; + def->finalize = finalize; + def->keywordTable = GoKeywordTable; + def->keywordCount = ARRAY_SIZE (GoKeywordTable); + def->fieldTable = GoFields; + def->fieldCount = ARRAY_SIZE (GoFields); + def->useCork = CORK_QUEUE | CORK_SYMTAB; + def->requestAutomaticFQTag = true; + return def; +} diff --git a/ctags/parsers/geany_html.c b/ctags/parsers/html.c similarity index 72% rename from ctags/parsers/geany_html.c rename to ctags/parsers/html.c index 8d2a4abfef..56b3b3f332 100644 --- a/ctags/parsers/geany_html.c +++ b/ctags/parsers/html.c @@ -28,17 +28,52 @@ typedef enum { K_ANCHOR, + K_CLASS, K_HEADING1, K_HEADING2, - K_HEADING3 + K_HEADING3, + K_STYELSHEET, + K_ID, + K_SCRIPT, } htmlKind; +typedef enum { + CLASS_KIND_ATTRIBUTE_ROLE, +} ClassRole; + +typedef enum { + SCRIPT_KIND_EXTERNAL_FILE_ROLE, +} ScriptRole; + +typedef enum { + STYLESHEET_KIND_EXTERNAL_FILE_ROLE, +} StylesheetRole; + +static roleDefinition ClassRoles [] = { + { true, "attribute", "assigned as attributes" }, +}; + +static roleDefinition ScriptRoles [] = { + { true, "extFile", "referenced as external files" }, +}; + +static roleDefinition StylesheetRoles [] = { + { true, "extFile", "referenced as external files" }, +}; + static kindDefinition HtmlKinds [] = { { true, 'a', "anchor", "named anchors" }, + { true, 'c', "class", "classes", + .referenceOnly = true, ATTACH_ROLES (ClassRoles)}, { true, 'h', "heading1", "H1 headings" }, { true, 'i', "heading2", "H2 headings" }, - { true, 'j', "heading3", "H3 headings" } + { true, 'j', "heading3", "H3 headings" }, + { true, 'C', "stylesheet", "stylesheets", + .referenceOnly = true, ATTACH_ROLES (StylesheetRoles)}, + { true, 'I', "id", "identifiers" }, + { true, 'J', "script", "scripts", + .referenceOnly = true, ATTACH_ROLES (ScriptRoles)}, }; typedef enum { @@ -54,17 +89,22 @@ typedef enum { KEYWORD_area, KEYWORD_base, KEYWORD_br, + KEYWORD_class, KEYWORD_col, KEYWORD_command, KEYWORD_embed, KEYWORD_hr, + KEYWORD_href, + KEYWORD_id, KEYWORD_img, KEYWORD_input, KEYWORD_keygen, KEYWORD_link, KEYWORD_meta, KEYWORD_param, + KEYWORD_rel, KEYWORD_source, + KEYWORD_src, KEYWORD_track, KEYWORD_wbr } keywordId; @@ -82,17 +122,22 @@ static const keywordTable HtmlKeywordTable[] = { {"area", KEYWORD_area}, {"base", KEYWORD_base}, {"br", KEYWORD_br}, + {"class", KEYWORD_class}, {"col", KEYWORD_col}, {"command", KEYWORD_command}, {"embed", KEYWORD_embed}, {"hr", KEYWORD_hr}, + {"href", KEYWORD_href}, + {"id", KEYWORD_id}, {"img", KEYWORD_img}, {"input", KEYWORD_input}, {"keygen", KEYWORD_keygen}, {"link", KEYWORD_link}, {"meta", KEYWORD_meta}, {"param", KEYWORD_param}, + {"rel", KEYWORD_rel}, {"source", KEYWORD_source}, + {"src", KEYWORD_src}, {"track", KEYWORD_track}, {"wbr", KEYWORD_wbr}, }; @@ -141,6 +186,7 @@ static int Lang_html; static void readTag (tokenInfo *token, vString *text, int depth); #ifdef DEBUG +#if 0 static void dumpToken (tokenInfo *token, const char *context, const char* extra_context) { fprintf (stderr, "[%7s] %-20s@%s.%s\n", @@ -148,6 +194,7 @@ static void dumpToken (tokenInfo *token, const char *context, const char* extra_ context, extra_context? extra_context: "_"); } #endif +#endif static void readTokenText (tokenInfo *const token, bool collectText) { @@ -345,8 +392,8 @@ static bool skipScriptContent (tokenInfo *token, long *line, long *lineOffset) bool found_start = false; bool found_script = false; - long line_tmp[2]; - long lineOffset_tmp[2]; + long line_tmp[2] = {0}; + long lineOffset_tmp[2] = {0}; tokenType type; @@ -380,6 +427,30 @@ static bool skipScriptContent (tokenInfo *token, long *line, long *lineOffset) return found_script; } +static void makeClassRefTags (const char *classes) +{ + vString *klass = vStringNew (); + + do + { + if (*classes && !isspace (*classes)) + vStringPut (klass, *classes); + else if (!vStringIsEmpty (klass)) + { + makeSimpleRefTag (klass, K_CLASS, + CLASS_KIND_ATTRIBUTE_ROLE); + vStringClear (klass); + } + + if (!*classes) + break; + + classes++; + } while (1); + + vStringDelete (klass); +} + static void readTag (tokenInfo *token, vString *text, int depth) { bool textCreated = false; @@ -390,6 +461,8 @@ static void readTag (tokenInfo *token, vString *text, int depth) keywordId startTag; bool isHeading; bool isVoid; + vString *stylesheet = NULL; + bool stylesheet_expectation = false; startTag = lookupKeyword (vStringValue (token->string), Lang_html); isHeading = (startTag == KEYWORD_h1 || startTag == KEYWORD_h2 || startTag == KEYWORD_h3); @@ -402,26 +475,101 @@ static void readTag (tokenInfo *token, vString *text, int depth) do { + keywordId attribute = KEYWORD_NONE; + readToken (token, true); - if (startTag == KEYWORD_a && token->type == TOKEN_NAME) - { - keywordId attribute = lookupKeyword (vStringValue (token->string), Lang_html); + if (token->type == TOKEN_NAME) + attribute = lookupKeyword (vStringValue (token->string), Lang_html); - if (attribute == KEYWORD_name) + if (attribute == KEYWORD_class) + { + readToken (token, true); + if (token->type == TOKEN_EQUAL) + { + readToken (token, true); + if (token->type == TOKEN_STRING) + makeClassRefTags (vStringValue (token->string)); + } + } + else if (attribute == KEYWORD_id) + { + readToken (token, true); + if (token->type == TOKEN_EQUAL) + { + readToken (token, true); + if (token->type == TOKEN_STRING) + makeSimpleTag (token->string, K_ID); + } + } + else if (startTag == KEYWORD_a && attribute == KEYWORD_name) + { + readToken (token, true); + if (token->type == TOKEN_EQUAL) + { + readToken (token, true); + if (token->type == TOKEN_STRING || token->type == TOKEN_NAME) + makeSimpleTag (token->string, K_ANCHOR); + } + } + else if (startTag == KEYWORD_script && attribute == KEYWORD_src) + { + readToken (token, true); + if (token->type == TOKEN_EQUAL) + { + readToken (token, true); + if (token->type == TOKEN_STRING) + makeSimpleRefTag (token->string, K_SCRIPT, + SCRIPT_KIND_EXTERNAL_FILE_ROLE); + } + } + else if (startTag == KEYWORD_link) + { + if (attribute == KEYWORD_rel) + { + readToken (token, true); + if (token->type == TOKEN_EQUAL) + { + readToken (token, true); + if (token->type == TOKEN_STRING && + /* strcmp is not enough: + * e.g. */ + vStringLength(token->string) >= 10 && + strstr (vStringValue (token->string), "stylesheet")) + stylesheet_expectation = true; + } + } + else if (attribute == KEYWORD_href) { readToken (token, true); if (token->type == TOKEN_EQUAL) { readToken (token, true); - if (token->type == TOKEN_STRING || token->type == TOKEN_NAME) - makeSimpleTag (token->string, K_ANCHOR); + if (token->type == TOKEN_STRING) + { + if (stylesheet == NULL) + stylesheet = vStringNewCopy (token->string); + else + vStringCopy (stylesheet, token->string); + } } } + if (stylesheet_expectation && stylesheet && !vStringIsEmpty (stylesheet)) + { + makeSimpleRefTag (stylesheet, K_STYELSHEET, + STYLESHEET_KIND_EXTERNAL_FILE_ROLE); + stylesheet_expectation = false; + if (stylesheet) + vStringClear (stylesheet); + } } } while (token->type != TOKEN_TAG_END && token->type != TOKEN_TAG_END2 && token->type != TOKEN_EOF); + vStringDelete (stylesheet); + stylesheet = NULL; + if (!isVoid && token->type == TOKEN_TAG_END && depth < MAX_DEPTH) { long startSourceLineNumber = getSourceLineNumber (); diff --git a/ctags/parsers/iniconf.c b/ctags/parsers/iniconf.c new file mode 100644 index 0000000000..830d427479 --- /dev/null +++ b/ctags/parsers/iniconf.c @@ -0,0 +1,238 @@ +/* +* +* Copyright (c) 2000-2001, Darren Hiebert +* +* This source code is released for free distribution under the terms of the +* GNU General Public License version 2 or (at your option) any later version. +* +* This module contains functions for generating tags for ini/config files. +*/ + +/* + * This is based on geany's conf.c: + * -------------------------------- + * commit 3af538fa65f8b17897259080db8144b1edc43470 + * Author: Enrico Tröger + * Date: Sun Nov 27 20:39:57 2005 +0000 + * + * added tag support for filetype Conf + * + * + * git-svn-id: https://geany.svn.sourceforge.net/svnroot/geany/trunk@15 ea778897-0a13-0410-b9d1-a72fbfd435f5 + * + */ + +#include "general.h" /* must always come first */ + +#include "entry.h" +#include "htable.h" +#include "iniconf.h" +#include "parse.h" +#include "read.h" +#include "subparser.h" +#include "vstring.h" + +static bool isIdentifier (int c) +{ + /* allow whitespace within keys and sections */ + return (bool)(isalnum (c) || isspace (c) || c == '_'); +} + +static bool isValue (int c) +{ + return (c != '\0'); +} + +static iniconfSubparser *maySwitchLanguage (const char *section, const char *key, const char *value) +{ + iniconfSubparser *iniconf_subparser = NULL; + subparser *sub; + + foreachSubparser (sub, false) + { + iniconfSubparser *s = (iniconfSubparser *)sub; + if ((sub->direction & SUBPARSER_BASE_RUNS_SUB) + && s->probeLanguage) + { + bool r; + + enterSubparser ((subparser *)s); + r = s->probeLanguage(section, key, value); + leaveSubparser (); + if (r) + { + iniconf_subparser = s; + chooseExclusiveSubparser (sub, NULL); + break; + } + } + } + + return iniconf_subparser; +} + +typedef enum { + K_SECTION, + K_KEY, +} makeKind; + +static kindDefinition IniconfKinds [] = { + { true, 's', "section", "sections"}, + { true, 'k', "key", "keys"}, +}; + +static void makeIniconfTagMaybe (const char *section, const char *key, const char *value CTAGS_ATTR_UNUSED, int *index) +{ + tagEntryInfo e; + + if (!isLanguageEnabled (getInputLanguage ())) + return; + + if (key) + { + initTagEntry (&e, key, K_KEY); + e.extensionFields.scopeIndex = *index; + makeTagEntry (&e); + } + else + { + tagEntryInfo *last = getEntryInCorkQueue (*index); + if (last) + last->extensionFields.endLine = getInputLineNumber (); + + initTagEntry (&e, section, K_SECTION); + *index = makeTagEntry (&e); + } +} + +static void findIniconfTags (void) +{ + const unsigned char *line; + vString *val = vStringNew (); + vString *name = vStringNew (); + vString *scope = vStringNew (); + iniconfSubparser *sub; + int sectionCorkIndex = CORK_NIL; + + + sub = (iniconfSubparser *)getSubparserRunningBaseparser(); + if (sub) + chooseExclusiveSubparser ((subparser *)sub, NULL); + + while ((line = readLineFromInputFile ()) != NULL) + { + const unsigned char* cp = line; + bool possible = true; + + if (isspace ((int) *cp) || *cp == '#' || *cp == ';' || *cp == '\0' + || (*cp == '/' && *(cp+1) == '/')) + continue; + + /* look for a section */ + if (*cp == '[') + { + ++cp; + while (*cp != '\0' && *cp != ']') + { + vStringPut (name, (int) *cp); + ++cp; + } + + makeIniconfTagMaybe (vStringValue (name), NULL, NULL, + §ionCorkIndex); + + + if (!sub) + sub = maySwitchLanguage (vStringValue (name), NULL, NULL); + + if (sub) + { + enterSubparser((subparser *)sub); + sub->newDataNotify (sub, vStringValue (name), NULL, NULL); + leaveSubparser (); + } + + vStringCopy (scope, name); + vStringClear (name); + continue; + } + + while (*cp != '\0') + { + /* We look for any sequence of identifier characters following a white space */ + if (possible && isIdentifier ((int) *cp)) + { + while (isIdentifier ((int) *cp)) + { + vStringPut (name, (int) *cp); + ++cp; + } + vStringStripTrailing (name); + while (isspace ((int) *cp)) + ++cp; + if (*cp == '=' || *cp == ':') + { + + cp++; + while (isspace ((int) *cp)) + ++cp; + while (isValue ((int) *cp)) + { + vStringPut (val, (int) *cp); + ++cp; + } + vStringStripTrailing (val); + + makeIniconfTagMaybe ((vStringLength (scope) > 0) + ? vStringValue (scope) + : NULL, + vStringValue (name), + vStringValue (val), + §ionCorkIndex); + if (!sub) + sub = maySwitchLanguage ((vStringLength (scope) > 0) + ? vStringValue (scope) + : NULL, + vStringValue (name), + vStringValue (val)); + if (sub) + { + enterSubparser ((subparser *)sub); + sub->newDataNotify (sub, + (vStringLength (scope) > 0) + ? vStringValue (scope) + : NULL, + vStringValue (name), + vStringValue (val)); + leaveSubparser (); + } + vStringClear (val); + } + vStringClear (name); + } + else + possible = !!(isspace ((int) *cp)); + + if (*cp != '\0') + ++cp; + } + } + + vStringDelete (name); + vStringDelete (scope); + vStringDelete (val); +} + +extern parserDefinition* IniconfParser (void) +{ + static const char *const extensions [] = { "ini", "conf", NULL }; + parserDefinition* const def = parserNew ("Iniconf"); + + def->kindTable = IniconfKinds; + def->kindCount = ARRAY_SIZE (IniconfKinds); + def->extensions = extensions; + def->parser = findIniconfTags; + def->useCork = CORK_QUEUE; + + return def; +} diff --git a/ctags/parsers/iniconf.h b/ctags/parsers/iniconf.h new file mode 100644 index 0000000000..3854d1054a --- /dev/null +++ b/ctags/parsers/iniconf.h @@ -0,0 +1,41 @@ +/* +* +* Copyright (c) 2000-2001, Darren Hiebert +* +* This source code is released for free distribution under the terms of the +* GNU General Public License version 2 or (at your option) any later version. +* +* This module contains functions for generating tags for ini/config files. +*/ + +/* + * This is based on geany's conf.c: + * -------------------------------- + * commit 3af538fa65f8b17897259080db8144b1edc43470 + * Author: Enrico Tröger + * Date: Sun Nov 27 20:39:57 2005 +0000 + * + * added tag support for filetype Conf +lang * + * + * git-svn-id: https://geany.svn.sourceforge.net/svnroot/geany/trunk@15 ea778897-0a13-0410-b9d1-a72fbfd435f5 + * + */ + +#ifndef CTAGS_INITCONF_H +#define CTAGS_INITCONF_H + +#include "general.h" + +#include "subparser.h" + +typedef struct sIniconfSubparser iniconfSubparser; +struct sIniconfSubparser { + subparser subparser; + + bool (* probeLanguage) (const char *section, const char *key, const char *value); + void (* newDataNotify) (iniconfSubparser *s, + const char *section, const char *key, const char *value); +}; + +#endif diff --git a/ctags/parsers/geany_jscript.c b/ctags/parsers/jscript.c similarity index 84% rename from ctags/parsers/geany_jscript.c rename to ctags/parsers/jscript.c index 458b5cb14e..df572df1cf 100644 --- a/ctags/parsers/geany_jscript.c +++ b/ctags/parsers/jscript.c @@ -60,52 +60,6 @@ #define newToken() (objPoolGet (TokenPool)) #define deleteToken(t) (objPoolPut (TokenPool, (t))) -/* - * Debugging - * - * Uncomment this to enable extensive debugging to stderr in jscript code. - * Please note that TRACING_ENABLED should be #defined in main/trace.h - * for this to work. - * - */ -//#define JSCRIPT_DEBUGGING_ENABLED 1 - -#if defined(DO_TRACING) && defined(JSCRIPT_DEBUGGING_ENABLED) - #define JSCRIPT_DO_DEBUGGING -#endif - -#ifdef JSCRIPT_DO_DEBUGGING - -#define JSCRIPT_DEBUG_ENTER() TRACE_ENTER() -#define JSCRIPT_DEBUG_LEAVE() TRACE_LEAVE() - -#define JSCRIPT_DEBUG_ENTER_TEXT(_szFormat,...) \ - TRACE_ENTER_TEXT(_szFormat,## __VA_ARGS__) - -#define JSCRIPT_DEBUG_LEAVE_TEXT(_szFormat,...) \ - TRACE_LEAVE_TEXT(_szFormat,## __VA_ARGS__) - -#define JSCRIPT_DEBUG_PRINT(_szFormat,...) \ - TRACE_PRINT(_szFormat,## __VA_ARGS__) - -#define JSCRIPT_DEBUG_ASSERT(_condition,_szFormat,...) \ - TRACE_ASSERT(_condition,_szFormat,## __VA_ARGS__) - -#else //!JSCRIPT_DO_DEBUGGING - -#define JSCRIPT_DEBUG_ENTER() do { } while(0) -#define JSCRIPT_DEBUG_LEAVE() do { } while(0) - -#define JSCRIPT_DEBUG_ENTER_TEXT(_szFormat,...) do { } while(0) -#define JSCRIPT_DEBUG_LEAVE_TEXT(_szFormat,...) do { } while(0) - -#define JSCRIPT_DEBUG_PRINT(_szFormat,...) do { } while(0) - -#define JSCRIPT_DEBUG_ASSERT(_condition,_szFormat,...) do { } while(0) - -#endif //!JSCRIPT_DO_DEBUGGING - - /* * DATA DECLARATIONS */ @@ -145,6 +99,8 @@ enum eKeywordId { KEYWORD_default, KEYWORD_export, KEYWORD_async, + KEYWORD_get, + KEYWORD_set, }; typedef int keywordId; /* to allow KEYWORD_NONE */ @@ -170,6 +126,9 @@ typedef enum eTokenType { TOKEN_REGEXP, TOKEN_POSTFIX_OPERATOR, TOKEN_STAR, + /* To handle Babel's decorators. + * Used only in readTokenFull or lower functions. */ + TOKEN_ATMARK, TOKEN_BINARY_OPERATOR } tokenType; @@ -181,7 +140,7 @@ typedef struct sTokenInfo { unsigned long lineNumber; MIOPos filePosition; int nestLevel; - bool ignoreTag; + bool dynamicProp; } tokenInfo; /* @@ -207,6 +166,9 @@ typedef enum { JSTAG_CONSTANT, JSTAG_VARIABLE, JSTAG_GENERATOR, + JSTAG_GETTER, + JSTAG_SETTER, + JSTAG_FIELD, JSTAG_COUNT } jsKind; @@ -217,7 +179,10 @@ static kindDefinition JsKinds [] = { { true, 'p', "property", "properties" }, { true, 'C', "constant", "constants" }, { true, 'v', "variable", "global variables" }, - { true, 'g', "generator", "generators" } + { true, 'g', "generator", "generators" }, + { true, 'G', "getter", "getters" }, + { true, 'S', "setter", "setters" }, + { true, 'M', "field", "fields" }, }; static const keywordTable JsKeywordTable [] = { @@ -248,6 +213,8 @@ static const keywordTable JsKeywordTable [] = { { "default", KEYWORD_default }, { "export", KEYWORD_export }, { "async", KEYWORD_async }, + { "get", KEYWORD_get }, + { "set", KEYWORD_set }, }; /* @@ -256,11 +223,18 @@ static const keywordTable JsKeywordTable [] = { /* Recursive functions */ static void readTokenFull (tokenInfo *const token, bool include_newlines, vString *const repr); +static void skipArgumentList (tokenInfo *const token, bool include_newlines, vString *const repr); static void parseFunction (tokenInfo *const token); static bool parseBlock (tokenInfo *const token, const vString *const parentScope); static bool parseLine (tokenInfo *const token, bool is_inside_class); static void parseUI5 (tokenInfo *const token); +#ifdef DO_TRACING +//static void dumpToken (const tokenInfo *const token); +static const char *tokenTypeName(enum eTokenType e); +//static const char *keywordName(enum eKeywordId e); +#endif + static void *newPoolToken (void *createArg CTAGS_ATTR_UNUSED) { tokenInfo *token = xMalloc (1, tokenInfo); @@ -278,7 +252,7 @@ static void clearPoolToken (void *data) token->type = TOKEN_UNDEFINED; token->keyword = KEYWORD_NONE; token->nestLevel = 0; - token->ignoreTag = false; + token->dynamicProp = false; token->lineNumber = getInputLineNumber (); token->filePosition = getInputFilePosition (); vStringClear (token->string); @@ -300,6 +274,7 @@ static void copyToken (tokenInfo *const dest, const tokenInfo *const src, dest->filePosition = src->filePosition; dest->type = src->type; dest->keyword = src->keyword; + dest->dynamicProp = src->dynamicProp; vStringCopy(dest->string, src->string); if (include_non_read_info) { @@ -308,6 +283,13 @@ static void copyToken (tokenInfo *const dest, const tokenInfo *const src, } } +static void injectDynamicName (tokenInfo *const token, vString *newName) +{ + token->dynamicProp = true; + vStringDelete (token->string); + token->string = newName; +} + /* * Tag generation functions */ @@ -316,14 +298,14 @@ static void makeJsTagCommon (const tokenInfo *const token, const jsKind kind, vString *const signature, vString *const inheritance, bool anonymous) { - if (JsKinds [kind].enabled && ! token->ignoreTag ) + if (JsKinds [kind].enabled ) { const char *name = vStringValue (token->string); vString *fullscope = vStringNewCopy (token->scope); const char *p; tagEntryInfo e; - if (kind != JSTAG_PROPERTY && (p = strrchr (name, '.')) != NULL ) + if (!token->dynamicProp && kind != JSTAG_PROPERTY && (p = strrchr (name, '.')) != NULL ) { if (vStringLength (fullscope) > 0) vStringPut (fullscope, '.'); @@ -333,7 +315,7 @@ static void makeJsTagCommon (const tokenInfo *const token, const jsKind kind, initTagEntry (&e, name, kind); - JSCRIPT_DEBUG_PRINT("Emitting tag for symbol '%s' of kind %02x with scope '%s'",name,kind,vStringValue(fullscope)); + TRACE_PRINT("Emitting tag for symbol '%s' of kind %02x with scope '%s'",name,kind,vStringValue(fullscope)); e.lineNumber = token->lineNumber; e.filePosition = token->filePosition; @@ -364,9 +346,9 @@ static void makeJsTagCommon (const tokenInfo *const token, const jsKind kind, * characters. */ for (i = 0; i < signature->length; i++) { - unsigned char c = (unsigned char) signature->buffer[i]; + unsigned char c = (unsigned char) vStringChar (signature, i); if (c < 0x20 /* below space */ || c == 0x7F /* DEL */) - signature->buffer[i] = ' '; + vStringChar (signature, i) = ' '; } e.extensionFields.signature = vStringValue(signature); } @@ -391,11 +373,10 @@ static void makeJsTag (const tokenInfo *const token, const jsKind kind, static void makeClassTagCommon (tokenInfo *const token, vString *const signature, vString *const inheritance, bool anonymous) { - vString * fulltag; - if ( ! token->ignoreTag ) + { - fulltag = vStringNew (); + vString * fulltag = vStringNew (); if (vStringLength (token->scope) > 0) { vStringCopy(fulltag, token->scope); @@ -425,11 +406,8 @@ static void makeClassTag (tokenInfo *const token, vString *const signature, static void makeFunctionTagCommon (tokenInfo *const token, vString *const signature, bool generator, bool anonymous) { - vString * fulltag; - - if ( ! token->ignoreTag ) { - fulltag = vStringNew (); + vString * fulltag = vStringNew (); if (vStringLength (token->scope) > 0) { vStringCopy(fulltag, token->scope); @@ -482,10 +460,10 @@ static int handleUnicodeCodePoint (uint32_t point) /* 4 bytes should be enough for any encoding (it's how much UTF-32 * would need). */ /* FIXME: actually iconv has a tendency to output a BOM for Unicode - * encodings where it matters when the endianess is not specified in + * encodings where it matters when the endianness is not specified in * the target encoding name. E.g., if the target encoding is "UTF-32" * or "UTF-16" it will output 2 code points, the BOM (U+FEFF) and the - * one we expect. This does not happen if the endianess is specified + * one we expect. This does not happen if the endianness is specified * explicitly, e.g. with "UTF-32LE", or "UTF-16BE". * However, it's not very relevant for the moment as nothing in CTags * cope well (if at all) with non-ASCII-compatible encodings like @@ -828,7 +806,7 @@ static void parseTemplateString (vString *const string) while (c != EOF); } -static void readTokenFull (tokenInfo *const token, bool include_newlines, vString *const repr) +static void readTokenFullRaw (tokenInfo *const token, bool include_newlines, vString *const repr) { int c; int i; @@ -965,18 +943,10 @@ static void readTokenFull (tokenInfo *const token, bool include_newlines, vStrin else { if (repr) /* remove the / we added */ - repr->buffer[--repr->length] = 0; + vStringChop(repr); if (d == '*') { - do - { - skipToCharacterInInputFile ('*'); - c = getcFromInputFile (); - if (c == '/') - break; - else - ungetcToInputFile (c); - } while (c != EOF && c != '\0'); + skipToCharacterInInputFile2('*', '/'); goto getNextChar; } else if (d == '/') /* is this the start of a comment? */ @@ -1007,6 +977,10 @@ static void readTokenFull (tokenInfo *const token, bool include_newlines, vStrin } break; + case '@': + token->type = TOKEN_ATMARK; + break; + case '\\': c = readUnicodeEscapeSequence (c); /* fallthrough */ @@ -1083,12 +1057,71 @@ static void readTokenFull (tokenInfo *const token, bool include_newlines, vStrin LastTokenType = token->type; } +/* See https://babeljs.io/blog/2018/09/17/decorators */ +static void skipBabelDecorator (tokenInfo *token, bool include_newlines, vString *const repr) +{ + readTokenFullRaw (token, include_newlines, repr); + if (isType (token, TOKEN_OPEN_PAREN)) + { + /* @(complex ? dec1 : dec2) */ + skipArgumentList (token, include_newlines, repr); + TRACE_PRINT ("found @(...) style decorator"); + } + else if (isType (token, TOKEN_IDENTIFIER)) + { + /* @namespace.foo (...) */ + bool found_period = false; + while (1) + { + readTokenFullRaw (token, include_newlines, repr); + if (isType (token, TOKEN_IDENTIFIER)) + { + if (!found_period) + { + TRACE_PRINT("found @namespace.bar style decorator"); + break; + } + found_period = false; + } + else if (isType (token, TOKEN_PERIOD)) + found_period = true; + else if (isType (token, TOKEN_OPEN_PAREN)) + { + skipArgumentList (token, include_newlines, repr); + TRACE_PRINT("found @foo(...) style decorator"); + break; + } + else + { + TRACE_PRINT("found @foo style decorator"); + break; + } + } + } + else + /* Unexpected token after @ */ + TRACE_PRINT("found unexpected token during skipping a decorator"); +} + +static void readTokenFull (tokenInfo *const token, bool include_newlines, vString *const repr) +{ + readTokenFullRaw (token, include_newlines, repr); + + while (1) + { + if (!isType (token, TOKEN_ATMARK)) + break; + skipBabelDecorator (token, include_newlines, repr); + /* @decorator0 @decorator1 ... There can be more than one decorator. */ + } +} + #ifdef JSCRIPT_DO_DEBUGGING /* trace readTokenFull() */ static void readTokenFullDebug (tokenInfo *const token, bool include_newlines, vString *const repr) { readTokenFull (token, include_newlines, repr); - JSCRIPT_DEBUG_PRINT("token '%s' of type %02x with scope '%s'",vStringValue(token->string),token->type, vStringValue(token->scope)); + TRACE_PRINT("token '%s' of type %02x with scope '%s'",vStringValue(token->string),token->type, vStringValue(token->scope)); } # define readTokenFull readTokenFullDebug #endif @@ -1150,6 +1183,19 @@ static void skipArrayList (tokenInfo *const token, bool include_newlines) } } +static void skipQualifiedIdentifier (tokenInfo *const token) +{ + /* Skip foo.bar.baz */ + while (isType (token, TOKEN_IDENTIFIER)) + { + readToken (token); + if (isType (token, TOKEN_PERIOD)) + readToken (token); + else + break; + } +} + static void addContext (tokenInfo* const parent, const tokenInfo* const child) { if (vStringLength (parent->string) > 0) @@ -1388,6 +1434,8 @@ static bool parseIf (tokenInfo *const token) static void parseFunction (tokenInfo *const token) { + TRACE_ENTER(); + tokenInfo *const name = newToken (); vString *const signature = vStringNew (); bool is_class = false; @@ -1445,13 +1493,15 @@ static void parseFunction (tokenInfo *const token) cleanUp: vStringDelete (signature); deleteToken (name); + + TRACE_LEAVE(); } /* Parses a block surrounded by curly braces. * @p parentScope is the scope name for this block, or NULL for unnamed scopes */ static bool parseBlock (tokenInfo *const token, const vString *const parentScope) { - JSCRIPT_DEBUG_ENTER(); + TRACE_ENTER(); bool is_class = false; bool read_next_token = true; @@ -1540,7 +1590,7 @@ static bool parseBlock (tokenInfo *const token, const vString *const parentScope if (parentScope) token->nestLevel--; - JSCRIPT_DEBUG_LEAVE(); + TRACE_LEAVE(); return is_class; } @@ -1548,7 +1598,10 @@ static bool parseBlock (tokenInfo *const token, const vString *const parentScope static bool parseMethods (tokenInfo *const token, const tokenInfo *const class, const bool is_es6_class) { - JSCRIPT_DEBUG_ENTER(); + TRACE_ENTER_TEXT("token is '%s' of type %s in classToken '%s' of type %s (es6: %s)", + vStringValue(token->string), tokenTypeName (token->type), + vStringValue(class->string), tokenTypeName (class->type), + is_es6_class? "yes": "no"); tokenInfo *const name = newToken (); bool has_methods = false; @@ -1563,18 +1616,36 @@ static bool parseMethods (tokenInfo *const token, const tokenInfo *const class, * validMethod : function(a,b) {} * 'validMethod2' : function(a,b) {} * container.dirtyTab = {'url': false, 'title':false, 'snapshot':false, '*': false} + * get prop() {} + * set prop(val) {} * * ES6 methods: * property(...) {} * *generator() {} - * FIXME: what to do with computed names? + * + * ES6 computed name: * [property]() {} + * get [property]() {} + * set [property]() {} * *[generator]() {} + * + * tc39/proposal-class-fields + * field0 = function(a,b) {} + * field1 = 1 + * The parser extracts field0 as a method because the left value + * is a function (kind propagation), and field1 as a field. */ + bool dont_read = false; do { - readToken (token); + bool is_setter = false; + bool is_getter = false; + + if (!dont_read) + readToken (token); + dont_read = false; + if (isType (token, TOKEN_CLOSE_CURLY)) { goto cleanUp; @@ -1582,12 +1653,28 @@ static bool parseMethods (tokenInfo *const token, const tokenInfo *const class, if (isKeyword (token, KEYWORD_async)) readToken (token); + else if (isType(token, TOKEN_KEYWORD) && isKeyword (token, KEYWORD_get)) + { + is_getter = true; + readToken (token); + } + else if (isType(token, TOKEN_KEYWORD) && isKeyword (token, KEYWORD_set)) + { + is_setter = true; + readToken (token); + } if (! isType (token, TOKEN_KEYWORD) && ! isType (token, TOKEN_SEMICOLON)) { bool is_generator = false; bool is_shorthand = false; /* ES6 shorthand syntax */ + bool is_computed_name = false; /* ES6 computed property name */ + bool is_dynamic_prop = false; + vString *dprop = NULL; /* is_computed_name is true but + * the name is not represented in + * a string literal. The expressions + * go this string. */ if (isType (token, TOKEN_STAR)) /* shorthand generator */ { @@ -1595,11 +1682,47 @@ static bool parseMethods (tokenInfo *const token, const tokenInfo *const class, readToken (token); } + if (isType (token, TOKEN_OPEN_SQUARE)) + { + is_computed_name = true; + dprop = vStringNewInit ("["); + readTokenFull (token, false, dprop); + } + copyToken(name, token, true); + if (is_computed_name && ! isType (token, TOKEN_STRING)) + is_dynamic_prop = true; + + readTokenFull (token, false, dprop); + + if (is_computed_name) + { + int depth = 1; + do + { + if (isType (token, TOKEN_CLOSE_SQUARE)) + depth--; + else + { + is_dynamic_prop = true; + if (isType (token, TOKEN_OPEN_SQUARE)) + depth++; + } + readTokenFull (token, false, (is_dynamic_prop && depth != 0)? dprop: NULL); + } while (! isType (token, TOKEN_EOF) && depth > 0); + } + + if (is_dynamic_prop) + { + injectDynamicName (name, dprop); + dprop = NULL; + } + else + vStringDelete (dprop); - readToken (token); is_shorthand = isType (token, TOKEN_OPEN_PAREN); - if ( isType (token, TOKEN_COLON) || is_shorthand ) + bool can_be_field = isType (token, TOKEN_EQUAL_SIGN); + if ( isType (token, TOKEN_COLON) || can_be_field || is_shorthand ) { if (! is_shorthand) { @@ -1609,7 +1732,7 @@ static bool parseMethods (tokenInfo *const token, const tokenInfo *const class, } if ( is_shorthand || isKeyword (token, KEYWORD_function) ) { - JSCRIPT_DEBUG_PRINT("Seems to be a function or shorthand"); + TRACE_PRINT("Seems to be a function or shorthand"); vString *const signature = vStringNew (); if (! is_shorthand) @@ -1630,7 +1753,16 @@ static bool parseMethods (tokenInfo *const token, const tokenInfo *const class, if (isType (token, TOKEN_OPEN_CURLY)) { has_methods = true; - makeJsTag (name, is_generator ? JSTAG_GENERATOR : JSTAG_METHOD, signature, NULL); + + int kind = JSTAG_METHOD; + if (is_generator) + kind = JSTAG_GENERATOR; + else if (is_getter) + kind = JSTAG_GETTER; + else if (is_setter) + kind = JSTAG_SETTER; + + makeJsTag (name, kind, signature, NULL); parseBlock (token, name->string); /* @@ -1680,12 +1812,23 @@ static bool parseMethods (tokenInfo *const token, const tokenInfo *const class, else makeJsTag (name, JSTAG_PROPERTY, NULL, NULL); } + else if (can_be_field) + { + makeJsTag (name, JSTAG_FIELD, NULL, NULL); + parseLine (token, true); + } + } + else + { + makeJsTag (name, JSTAG_FIELD, NULL, NULL); + if (!isType (token, TOKEN_SEMICOLON)) + dont_read = true; } } } while ( isType(token, TOKEN_COMMA) || ( is_es6_class && ! isType(token, TOKEN_EOF) ) ); - JSCRIPT_DEBUG_PRINT("Finished parsing methods"); + TRACE_PRINT("Finished parsing methods"); findCmdTerm (token, false, false); @@ -1694,14 +1837,14 @@ static bool parseMethods (tokenInfo *const token, const tokenInfo *const class, vStringDelete (saveScope); deleteToken (name); - JSCRIPT_DEBUG_LEAVE(); + TRACE_LEAVE_TEXT("found method(s): %s", has_methods? "yes": "no"); return has_methods; } static bool parseES6Class (tokenInfo *const token, const tokenInfo *targetName) { - JSCRIPT_DEBUG_ENTER(); + TRACE_ENTER(); tokenInfo * className = newToken (); vString *inheritance = NULL; @@ -1745,7 +1888,7 @@ static bool parseES6Class (tokenInfo *const token, const tokenInfo *targetName) vStringStripLeading (inheritance); } - JSCRIPT_DEBUG_PRINT("Emitting tag for class '%s'", vStringValue(targetName->string)); + TRACE_PRINT("Emitting tag for class '%s'", vStringValue(targetName->string)); makeJsTagCommon (targetName, JSTAG_CLASS, NULL, inheritance, (is_anonymous && (targetName == className))); @@ -1769,13 +1912,13 @@ static bool parseES6Class (tokenInfo *const token, const tokenInfo *targetName) deleteToken (className); - JSCRIPT_DEBUG_LEAVE(); + TRACE_LEAVE(); return true; } static bool parseStatement (tokenInfo *const token, bool is_inside_class) { - JSCRIPT_DEBUG_ENTER(); + TRACE_ENTER_TEXT("is_inside_class: %s", is_inside_class? "yes": "no"); tokenInfo *const name = newToken (); tokenInfo *const secondary_name = newToken (); @@ -1827,7 +1970,7 @@ static bool parseStatement (tokenInfo *const token, bool is_inside_class) isKeyword(token, KEYWORD_let) || isKeyword(token, KEYWORD_const) ) { - JSCRIPT_DEBUG_PRINT("var/let/const case"); + TRACE_PRINT("var/let/const case"); is_const = isKeyword(token, KEYWORD_const); /* * Only create variables for global scope @@ -1842,7 +1985,7 @@ static bool parseStatement (tokenInfo *const token, bool is_inside_class) nextVar: if ( isKeyword(token, KEYWORD_this) ) { - JSCRIPT_DEBUG_PRINT("found 'this' keyword"); + TRACE_PRINT("found 'this' keyword"); readToken(token); if (isType (token, TOKEN_PERIOD)) @@ -1852,7 +1995,8 @@ static bool parseStatement (tokenInfo *const token, bool is_inside_class) } copyToken(name, token, true); - JSCRIPT_DEBUG_PRINT("name becomes '%s'",vStringValue(name->string)); + TRACE_PRINT("name becomes '%s' of type %s", + vStringValue(token->string), tokenTypeName (token->type)); while (! isType (token, TOKEN_CLOSE_CURLY) && ! isType (token, TOKEN_SEMICOLON) && @@ -1990,7 +2134,6 @@ static bool parseStatement (tokenInfo *const token, bool is_inside_class) * Find to the end of the statement */ findCmdTerm (token, false, false); - token->ignoreTag = false; is_terminated = true; goto cleanUp; } @@ -2133,7 +2276,7 @@ static bool parseStatement (tokenInfo *const token, bool is_inside_class) || isType (name, TOKEN_KEYWORD) ) ) { /* Unexpected input. Try to reset the parsing. */ - JSCRIPT_DEBUG_PRINT("Unexpected input, trying to reset"); + TRACE_PRINT("Unexpected input, trying to reset"); vStringDelete (signature); goto cleanUp; } @@ -2166,9 +2309,12 @@ static bool parseStatement (tokenInfo *const token, bool is_inside_class) * Or checks if this is a hash variable. * var z = {}; */ + bool anonClass = vStringIsEmpty (name->string); + if (anonClass) + anonGenerate (name->string, "AnonymousClass", JSTAG_CLASS); has_methods = parseMethods(token, name, false); if (has_methods) - makeJsTag (name, JSTAG_CLASS, NULL, NULL); + makeJsTagCommon (name, JSTAG_CLASS, NULL, NULL, anonClass); else { /* @@ -2229,7 +2375,11 @@ static bool parseStatement (tokenInfo *const token, bool is_inside_class) if ( isKeyword (token, KEYWORD_capital_object) ) is_class = true; - readToken (token); + if (is_var) + skipQualifiedIdentifier (token); + else + readToken (token); + if ( isType (token, TOKEN_OPEN_PAREN) ) skipArgumentList(token, true, NULL); @@ -2241,17 +2391,16 @@ static bool parseStatement (tokenInfo *const token, bool is_inside_class) { makeJsTag (name, is_const ? JSTAG_CONSTANT : JSTAG_VARIABLE, NULL, NULL); } + else if ( is_class ) + { + makeClassTag (name, NULL, NULL); + } else { - if ( is_class ) - { - makeClassTag (name, NULL, NULL); - } else { - /* FIXME: we cannot really get a meaningful - * signature from a `new Function()` call, - * so for now just don't set any */ - makeFunctionTag (name, NULL, false); - } + /* FIXME: we cannot really get a meaningful + * signature from a `new Function()` call, + * so for now just don't set any */ + makeFunctionTag (name, NULL, false); } } } @@ -2346,7 +2495,7 @@ static bool parseStatement (tokenInfo *const token, bool is_inside_class) deleteToken (method_body_token); vStringDelete(saveScope); - JSCRIPT_DEBUG_LEAVE(); + TRACE_LEAVE(); return is_terminated; } @@ -2403,7 +2552,8 @@ static void parseUI5 (tokenInfo *const token) static bool parseLine (tokenInfo *const token, bool is_inside_class) { - JSCRIPT_DEBUG_ENTER_TEXT("token is '%s' of type %02x",vStringValue(token->string),token->type); + TRACE_ENTER_TEXT("token is '%s' of type %s", + vStringValue(token->string), tokenTypeName (token->type)); bool is_terminated = true; /* @@ -2462,14 +2612,14 @@ static bool parseLine (tokenInfo *const token, bool is_inside_class) is_terminated = parseStatement (token, is_inside_class); } - JSCRIPT_DEBUG_LEAVE(); + TRACE_LEAVE(); return is_terminated; } static void parseJsFile (tokenInfo *const token) { - JSCRIPT_DEBUG_ENTER(); + TRACE_ENTER(); do { @@ -2484,9 +2634,91 @@ static void parseJsFile (tokenInfo *const token) parseLine (token, false); } while (! isType (token, TOKEN_EOF)); - JSCRIPT_DEBUG_LEAVE(); + TRACE_LEAVE(); } +#ifdef DO_TRACING +#if 0 +static void dumpToken (const tokenInfo *const token) +{ + fprintf(stderr, "Token <%p>: %s: %s\n", + token, + tokenTypeName (token->type), + (token->type == TOKEN_KEYWORD ? keywordName (token->keyword): + token->type == TOKEN_IDENTIFIER? vStringValue (token->string): + "")); +} +#endif + +static const char *tokenTypeName(enum eTokenType e) +{ /* Generated by misc/enumstr.sh with cmdline "parsers/jscript.c" "eTokenType" "tokenTypeName" */ + switch (e) + { + case TOKEN_BINARY_OPERATOR: return "TOKEN_BINARY_OPERATOR"; + case TOKEN_CHARACTER: return "TOKEN_CHARACTER"; + case TOKEN_CLOSE_CURLY: return "TOKEN_CLOSE_CURLY"; + case TOKEN_CLOSE_PAREN: return "TOKEN_CLOSE_PAREN"; + case TOKEN_CLOSE_SQUARE: return "TOKEN_CLOSE_SQUARE"; + case TOKEN_COLON: return "TOKEN_COLON"; + case TOKEN_COMMA: return "TOKEN_COMMA"; + case TOKEN_EOF: return "TOKEN_EOF"; + case TOKEN_EQUAL_SIGN: return "TOKEN_EQUAL_SIGN"; + case TOKEN_IDENTIFIER: return "TOKEN_IDENTIFIER"; + case TOKEN_KEYWORD: return "TOKEN_KEYWORD"; + case TOKEN_OPEN_CURLY: return "TOKEN_OPEN_CURLY"; + case TOKEN_OPEN_PAREN: return "TOKEN_OPEN_PAREN"; + case TOKEN_OPEN_SQUARE: return "TOKEN_OPEN_SQUARE"; + case TOKEN_PERIOD: return "TOKEN_PERIOD"; + case TOKEN_POSTFIX_OPERATOR: return "TOKEN_POSTFIX_OPERATOR"; + case TOKEN_REGEXP: return "TOKEN_REGEXP"; + case TOKEN_SEMICOLON: return "TOKEN_SEMICOLON"; + case TOKEN_STAR: return "TOKEN_STAR"; + case TOKEN_STRING: return "TOKEN_STRING"; + case TOKEN_TEMPLATE_STRING: return "TOKEN_TEMPLATE_STRING"; + case TOKEN_UNDEFINED: return "TOKEN_UNDEFINED"; + default: return "UNKNOWN"; + } +} + +#if 0 +static const char *keywordName(enum eKeywordId e) +{ /* Generated by misc/enumstr.sh with cmdline "parsers/jscript.c" "eKeywordId" "keywordName" */ + switch (e) + { + case KEYWORD_async: return "KEYWORD_async"; + case KEYWORD_capital_function: return "KEYWORD_capital_function"; + case KEYWORD_capital_object: return "KEYWORD_capital_object"; + case KEYWORD_catch: return "KEYWORD_catch"; + case KEYWORD_class: return "KEYWORD_class"; + case KEYWORD_const: return "KEYWORD_const"; + case KEYWORD_default: return "KEYWORD_default"; + case KEYWORD_do: return "KEYWORD_do"; + case KEYWORD_else: return "KEYWORD_else"; + case KEYWORD_export: return "KEYWORD_export"; + case KEYWORD_extends: return "KEYWORD_extends"; + case KEYWORD_finally: return "KEYWORD_finally"; + case KEYWORD_for: return "KEYWORD_for"; + case KEYWORD_function: return "KEYWORD_function"; + case KEYWORD_get: return "KEYWORD_get"; + case KEYWORD_if: return "KEYWORD_if"; + case KEYWORD_let: return "KEYWORD_let"; + case KEYWORD_new: return "KEYWORD_new"; + case KEYWORD_prototype: return "KEYWORD_prototype"; + case KEYWORD_return: return "KEYWORD_return"; + case KEYWORD_sap: return "KEYWORD_sap"; + case KEYWORD_set: return "KEYWORD_set"; + case KEYWORD_static: return "KEYWORD_static"; + case KEYWORD_switch: return "KEYWORD_switch"; + case KEYWORD_this: return "KEYWORD_this"; + case KEYWORD_try: return "KEYWORD_try"; + case KEYWORD_var: return "KEYWORD_var"; + case KEYWORD_while: return "KEYWORD_while"; + default: return "UNKNOWN"; + } +} +#endif +#endif + static void initialize (const langType language) { Assert (ARRAY_SIZE (JsKinds) == JSTAG_COUNT); @@ -2537,9 +2769,13 @@ extern parserDefinition* JavaScriptParser (void) { // .jsx files are JSX: https://facebook.github.io/jsx/ // which have JS function definitions, so we just use the JS parser - static const char *const extensions [] = { "js", "jsx", NULL }; + static const char *const extensions [] = { "js", "jsx", "mjs", NULL }; static const char *const aliases [] = { "js", "node", "nodejs", - "seed", "gjs", NULL }; + "seed", "gjs", + /* Used in PostgreSQL + * https://github.com/plv8/plv8 */ + "v8", + NULL }; parserDefinition *const def = parserNew ("JavaScript"); def->extensions = extensions; def->aliases = aliases; diff --git a/ctags/parsers/lua.c b/ctags/parsers/lua.c new file mode 100644 index 0000000000..2d7efed950 --- /dev/null +++ b/ctags/parsers/lua.c @@ -0,0 +1,257 @@ +/* +* Copyright (c) 2000-2001, Max Ischenko . +* +* This source code is released for free distribution under the terms of the +* GNU General Public License version 2 or (at your option) any later version. +* +* This module contains functions for generating tags for Lua language. +*/ + +/* +* INCLUDE FILES +*/ +#include "general.h" /* must always come first */ + +#include + +#include "debug.h" +#include "entry.h" +#include "parse.h" +#include "read.h" +#include "routines.h" +#include "vstring.h" + +/* +* DATA DEFINITIONS +*/ +typedef enum { + K_FUNCTION, + K_UNKNOWN, +} luaKind; + +typedef enum { + LUA_UNKNOWN_REFERENCED, +} luaUnknownRole; + +static roleDefinition LuaUnknownRoles [] = { + { false, "referenced", "referenced somehow" }, +}; + +static kindDefinition LuaKinds [] = { + { true, 'f', "function", "functions" }, + + /* `unknown' is a kind just for making FQ tag for functions. */ + { false, 'X', "unknown", "unknown language object", + .referenceOnly = true, ATTACH_ROLES(LuaUnknownRoles) }, +}; + +/* +* FUNCTION DEFINITIONS +*/ + +/* + * Helper function. + * Returns 1 if line looks like a line of Lua code. + * + * TODO: Recognize UNIX bang notation. + * (Lua treat first line as a comment if it starts with #!) + * + */ +static bool is_a_code_line (const unsigned char *line) +{ + bool result; + const unsigned char *p = line; + while (isspace ((int) *p)) + p++; + if (p [0] == '\0') + result = false; + else if (p [0] == '-' && p [1] == '-') + result = false; + else + result = true; + return result; +} + +static bool isLuaIdentifier (char c) +{ + return (bool) !(isspace(c) || c == '(' || c == ')' || c == '=' || c == '.' || c == ':'); +} + +static void set_scope (int child, int parent) +{ + if (parent == CORK_NIL || child == CORK_NIL) + return; + + tagEntryInfo *e = getEntryInCorkQueue (child); + if (!e) + return; + + e->extensionFields.scopeIndex = parent; +} + +static void extract_next_token (const char *begin, const char *end_sentinel, vString *name) +{ + if (begin == NULL || end_sentinel == NULL) + return; + + Assert (begin <= end_sentinel); + + /* Both on '(' */ + if (begin == end_sentinel) + return; + + /* Trim prefixed white spaces */ + while (isspace ((int) *begin)) + begin++; + + /* Both on '(' */ + if (begin == end_sentinel) + return; + + const char *end = end_sentinel - 1; + + /* Trim suffixed white spaces */ + while (isspace ((int) *end)) + end--; + + Assert (begin <= end); + + int lastCorkIndx = CORK_NIL; + for (const char *c = begin; c <= end; ++c) + { + if (*c == '.' || *c == ':') + { + int r = makeSimpleRefTag(name, + K_UNKNOWN, LUA_UNKNOWN_REFERENCED); + set_scope(r, lastCorkIndx); + lastCorkIndx = r; + + /* Do not include module names in function name */ + vStringClear (name); + } + else if (isLuaIdentifier (*c)) + vStringPut (name, (int) *c); + else + { + /* An unexpected character is found + * between "function" and "(" */ + vStringClear (name); + return; + } + } + + int d = makeSimpleTag (name, K_FUNCTION); + set_scope(d, lastCorkIndx); + vStringClear (name); +} + +static void extract_prev_token (const char *end, const char *begin_sentinel, vString *name) +{ + const char *begin; + + if (end == NULL || begin_sentinel == NULL) + return; + + if (! (begin_sentinel <= end)) + return; + + while (isspace ((int) *end)) + { + end--; + if (! (begin_sentinel <= end)) + return; + } + + begin = end; + while (begin_sentinel <= begin && isLuaIdentifier (*begin)) + begin--; + + int targetCorkIndex = CORK_NIL; + if (end - begin) + { + vStringNCatS (name, begin + 1, end - begin); + targetCorkIndex = makeSimpleTag (name, K_FUNCTION); + vStringClear (name); + } + + if (targetCorkIndex == CORK_NIL || begin_sentinel == begin) + return; + + /* Fill the scope field of the function. */ + end = begin; + while (begin_sentinel <= (begin + 1)) + { + bool on_boundary = false; + if (begin < begin_sentinel || !isLuaIdentifier (*begin)) + { + if (end - begin) + { + vStringNCatS (name, begin + 1, end - begin); + int r = makeSimpleRefTag (name, + K_UNKNOWN, LUA_UNKNOWN_REFERENCED); + set_scope (targetCorkIndex, r); + targetCorkIndex = r; + vStringClear (name); + } + if (begin_sentinel <= begin && ! (*begin == ':' || *begin == '.')) + break; + on_boundary = true; + } + begin--; + + if(on_boundary) + end = begin; + } +} + +static void findLuaTags (void) +{ + vString *name = vStringNew (); + const unsigned char *line; + + while ((line = readLineFromInputFile ()) != NULL) + { + const char *p, *q; + + if (! is_a_code_line (line)) + continue; + + p = (const char*) strstr ((const char*) line, "function"); + if (p == NULL) + continue; + + q = strchr ((const char*) line, '='); + + if (q == NULL) { + p = p + 8; /* skip the `function' word */ + + /* We expect [ \t(] */ + if (! (*p == '(' || isspace ((int)*p))) + continue; + q = strchr ((const char*) p, '('); + if (q) + extract_next_token (p, q, name); + } else if ( + (*(q+1) != '=') /* ignore `if type(v) == "function" then ...' */ + && (q < p) /* ignore "function" ~= */ + ) { + p = (const char*) &line[0]; + if (p < q) + extract_prev_token (q - 1, p, name); + } + } + vStringDelete (name); +} + +extern parserDefinition* LuaParser (void) +{ + static const char* const extensions [] = { "lua", NULL }; + parserDefinition* def = parserNew ("Lua"); + def->kindTable = LuaKinds; + def->kindCount = ARRAY_SIZE (LuaKinds); + def->extensions = extensions; + def->parser = findLuaTags; + def->useCork = CORK_QUEUE; + def->requestAutomaticFQTag = true; + return def; +} diff --git a/ctags/parsers/geany_make.c b/ctags/parsers/make.c similarity index 55% rename from ctags/parsers/geany_make.c rename to ctags/parsers/make.c index d49c42108d..8f3180495f 100644 --- a/ctags/parsers/geany_make.c +++ b/ctags/parsers/make.c @@ -12,11 +12,12 @@ */ #include "general.h" /* must always come first */ -#include #include #include -#include "options.h" +#include "make.h" + +#include "kind.h" #include "parse.h" #include "read.h" #include "routines.h" @@ -24,18 +25,32 @@ #include "vstring.h" #include "xtag.h" + /* * DATA DEFINITIONS */ typedef enum { - K_MACRO, K_TARGET -} shKind; + K_MACRO, K_TARGET, K_INCLUDE, +} makeKind; + +typedef enum { + R_INCLUDE_GENERIC, + R_INCLUDE_OPTIONAL, +} makeMakefileRole; + +static roleDefinition MakeMakefileRoles [] = { + { true, "included", "included" }, + { true, "optional", "optionally included"}, +}; static kindDefinition MakeKinds [] = { { true, 'm', "macro", "macros"}, - { true, 't', "target", "targets"} + { true, 't', "target", "targets"}, + { true, 'I', "makefile", "makefiles", + .referenceOnly = true, ATTACH_ROLES(MakeMakefileRoles)}, }; + /* * FUNCTION DEFINITIONS */ @@ -91,6 +106,23 @@ static bool isSpecialTarget (vString *const name) return true; } +static void makeSimpleMakeTag (vString *const name, makeKind kind) +{ + if (!isLanguageEnabled (getInputLanguage ())) + return; + + makeSimpleTag (name, kind); +} + +static void makeSimpleMakeRefTag (const vString* const name, const int kind, + int roleIndex) +{ + if (!isLanguageEnabled (getInputLanguage ())) + return; + + makeSimpleRefTag (name, kind, roleIndex); +} + static void newTarget (vString *const name) { /* Ignore GNU Make's "special targets". */ @@ -98,12 +130,63 @@ static void newTarget (vString *const name) { return; } - makeSimpleTag (name, K_TARGET); + makeSimpleMakeTag (name, K_TARGET); +} + +static void newMacro (vString *const name, bool with_define_directive, bool appending) +{ + subparser *s; + + if (!appending) + makeSimpleMakeTag (name, K_MACRO); + + foreachSubparser(s, false) + { + makeSubparser *m = (makeSubparser *)s; + enterSubparser(s); + if (m->newMacroNotify) + m->newMacroNotify (m, vStringValue(name), with_define_directive, appending); + leaveSubparser(); + } +} + +static void valueFound (vString *const name) +{ + subparser *s; + foreachSubparser(s, false) + { + makeSubparser *m = (makeSubparser *)s; + enterSubparser(s); + if (m->valueNotify) + m->valueNotify (m, vStringValue (name)); + leaveSubparser(); + } } -static void newMacro (vString *const name) +static void directiveFound (vString *const name) { - makeSimpleTag (name, K_MACRO); + subparser *s; + foreachSubparser (s, false) + { + makeSubparser *m = (makeSubparser *)s; + enterSubparser(s); + if (m->directiveNotify) + m->directiveNotify (m, vStringValue (name)); + leaveSubparser(); + } +} + +static void newInclude (vString *const name, bool optional) +{ + makeSimpleMakeRefTag (name, K_INCLUDE, + optional? R_INCLUDE_OPTIONAL: R_INCLUDE_GENERIC); +} + +static bool isAcceptableAsInclude (vString *const name) +{ + if (strcmp (vStringValue (name), "$") == 0) + return false; + return true; } static void readIdentifier (const int first, vString *const id) @@ -128,9 +211,16 @@ static void findMakeTags (void) stringList *identifiers = stringListNew (); bool newline = true; bool in_define = false; + bool in_value = false; bool in_rule = false; bool variable_possible = true; + bool appending = false; int c; + subparser *sub; + + sub = getSubparserRunningBaseparser(); + if (sub) + chooseExclusiveSubparser (sub, NULL); while ((c = nextChar ()) != EOF) { @@ -146,6 +236,9 @@ static void findMakeTags (void) else if (c != '\n') in_rule = false; } + else if (in_value) + in_value = false; + stringListClear (identifiers); variable_possible = (bool)(!in_rule); newline = false; @@ -162,7 +255,14 @@ static void findMakeTags (void) ungetcToInputFile (c); variable_possible = (c == '='); } - else if (variable_possible && c == ':' && + else if (variable_possible && c == '+') + { + c = nextChar (); + ungetcToInputFile (c); + variable_possible = (c == '='); + appending = true; + } + else if ((! in_value) && variable_possible && c == ':' && stringListCount (identifiers) > 0) { c = nextChar (); @@ -179,9 +279,11 @@ static void findMakeTags (void) else if (variable_possible && c == '=' && stringListCount (identifiers) == 1) { - newMacro (stringListItem (identifiers, 0)); - skipLine (); + newMacro (stringListItem (identifiers, 0), false, appending); + + in_value = true; in_rule = false; + appending = false; } else if (variable_possible && isIdentifier (c)) { @@ -189,6 +291,9 @@ static void findMakeTags (void) readIdentifier (c, name); stringListAdd (identifiers, name); + if (in_value) + valueFound(name); + if (stringListCount (identifiers) == 1) { if (in_define && ! strcmp (vStringValue (name), "endef")) @@ -209,27 +314,67 @@ static void findMakeTags (void) if (c == '\n') ungetcToInputFile (c); vStringStripTrailing (name); - newMacro (name); + + newMacro (name, true, false); } else if (! strcmp (vStringValue (name), "export")) stringListClear (identifiers); + else if (! strcmp (vStringValue (name), "include") + || ! strcmp (vStringValue (name), "sinclude") + || ! strcmp (vStringValue (name), "-include")) + { + bool optional = (vStringValue (name)[0] == 'i')? false: true; + while (1) + { + c = skipToNonWhite (nextChar ()); + readIdentifier (c, name); + vStringStripTrailing (name); + if (isAcceptableAsInclude(name)) + newInclude (name, optional); + + /* non-space characters after readIdentifier() may + * be rejected by the function: + * e.g. + * include $* + * + * Here, remove such characters from input stream. + */ + do + c = nextChar (); + while (c != EOF && c != '\n' && (!isspace (c))); + if (c == '\n') + ungetcToInputFile (c); + + if (c == EOF || c == '\n') + break; + } + } + else + directiveFound (name); } } else variable_possible = false; } + stringListDelete (identifiers); } + extern parserDefinition* MakefileParser (void) { static const char *const patterns [] = { "[Mm]akefile", "GNUmakefile", NULL }; static const char *const extensions [] = { "mak", "mk", NULL }; + static const char *const aliases [] = { + /* the mode name in emacs */ + "makefile", + NULL }; parserDefinition* const def = parserNew ("Make"); - def->kindTable = MakeKinds; + def->kindTable = MakeKinds; def->kindCount = ARRAY_SIZE (MakeKinds); def->patterns = patterns; def->extensions = extensions; + def->aliases = aliases; def->parser = findMakeTags; return def; } diff --git a/ctags/parsers/make.h b/ctags/parsers/make.h new file mode 100644 index 0000000000..fc327ebf94 --- /dev/null +++ b/ctags/parsers/make.h @@ -0,0 +1,34 @@ +/* +* Copyright (c) 2016, Masatake YAMATO +* +* This source code is released for free distribution under the terms of the +* GNU General Public License version 2 or (at your option) any later version. +* +* This module contains functions for generating tags for makefiles. +*/ + +#ifndef CTAGS_PARSER_MAKE_H +#define CTAGS_PARSER_MAKE_H + +/* +* INCLUDE FILES +*/ +#include "general.h" /* must always come first */ + +#include "subparser.h" +#include "vstring.h" + +typedef struct sMakeSubparser makeSubparser; + +struct sMakeSubparser { + subparser subparser; + + void (* valueNotify) (makeSubparser *s, char* name); + void (* directiveNotify) (makeSubparser *s, char* name); + void (* newMacroNotify) (makeSubparser *s, + char* name, + bool withDefineDirective, + bool appending); +}; + +#endif diff --git a/ctags/parsers/nsis.c b/ctags/parsers/nsis.c new file mode 100644 index 0000000000..8e64196b30 --- /dev/null +++ b/ctags/parsers/nsis.c @@ -0,0 +1,394 @@ +/* +* Copyright (c) 2000-2002, Darren Hiebert +* Copyright (c) 2009-2011, Enrico Tröger +* +* This source code is released for free distribution under the terms of the +* GNU General Public License version 2 or (at your option) any later version. +* +* This module contains functions for generating tags for NSIS scripts +* (https://en.wikipedia.org/wiki/Nullsoft_Scriptable_Install_System). +* +* Based on sh.c. +*/ + +/* +* INCLUDE FILES +*/ +#include "general.h" /* must always come first */ + +#include + +#include "entry.h" +#include "parse.h" +#include "read.h" +#include "vstring.h" +#include "routines.h" + +/* +* DATA DEFINITIONS +*/ +typedef enum { + K_SECTION, + K_FUNCTION, + K_VARIABLE, + K_DEFINITION, + K_MACRO, + K_SECTION_GROUP, + K_MACRO_PARAM, + K_LANGSTR, + K_SCRIPT, +} NsisKind; + +typedef enum { + NSIS_SCRIPT_INCLUDED, +} nsisScriptRole; + +static roleDefinition NsisScriptRoles [] = { + { true, "included", "included with !include" }, +}; + +static kindDefinition NsisKinds [] = { + { true, 's', "section", "sections"}, + { true, 'f', "function", "functions"}, + { true, 'v', "variable", "variables"}, + { true, 'd', "definition", "definitions"}, + { true, 'm', "macro", "macros"}, + { true, 'S', "sectionGroup", "section groups"}, + { false, 'p', "macroparam", "macro parameters"}, + { true, 'l', "langstr", "language strings"}, + { true, 'i', "script", "NSIS scripts", + .referenceOnly = true, ATTACH_ROLES(NsisScriptRoles)}, +}; + +typedef enum { + F_LANGID, +} nsisField; + +static fieldDefinition NsisFields[] = { + { .name = "langid", + .description = "language identifier specified in (License)LangString commands", + .enabled = true }, +}; + +/* +* FUNCTION DEFINITIONS +*/ + +static const unsigned char* skipWhitespace (const unsigned char* cp) +{ + while (isspace ((int) *cp)) + ++cp; + return cp; +} + +static const unsigned char* skipFlags (const unsigned char* cp) +{ + while (*cp == '/') + { + ++cp; + while (! isspace ((int) *cp)) + ++cp; + while (isspace ((int) *cp)) + ++cp; + } + return cp; +} + +static int makeSimpleTagWithScope(vString *name, int kindIndex, int parentCorkIndex) +{ + tagEntryInfo e; + initTagEntry (&e, vStringValue (name), kindIndex); + e.extensionFields.scopeIndex = parentCorkIndex; + return makeTagEntry (&e); +} + +#define lineStartingWith(CP,EXPECTED,EOL) \ + (strncasecmp ((const char*) CP, EXPECTED, strlen(EXPECTED)) == 0 \ + && (EOL ? (isspace ((int) CP [strlen(EXPECTED)]) || CP [strlen(EXPECTED)] == '\0') \ + : isspace ((int) CP [strlen(EXPECTED)]))) + +#define fillName(NAME,CP,CONDITION) \ + while (CONDITION) \ + { \ + vStringPut ((NAME), (int) *(CP)); \ + ++(CP); \ + } \ + do {} while (0) + +static const unsigned char* parseSection (const unsigned char* cp, vString *name, + int kindIndex, int scopeIndex, int *corkIndex) +{ + cp = skipWhitespace (cp); + cp = skipFlags (cp); + cp = skipWhitespace (cp); + + if (corkIndex) + *corkIndex = CORK_NIL; + + if (strpbrk((const char *)cp, "'`\"")) + { + const unsigned char terminator = *cp; + + cp++; + if (*cp == terminator) + { + /* An empty section. + * See https://nsis.sourceforge.io/Docs/Chapter4.html#sectionsettext + */ + anonGenerate (name, + (kindIndex == K_SECTION + ? "AnonymousSection" + : "AnonymousSectionGroup"), + kindIndex); + cp++; + } + else if (*cp == '\0') + return cp; + else + { + int in_escape = 0; + do + { + vStringPut (name, (int) *cp); + ++cp; + + if (*cp == '\0') + break; + + /* + * Ignore `"' in `$\"' as the terminator of quotation. + */ + if (*cp == '$' && in_escape == 0) + in_escape++; + else if (*cp == '\\' && in_escape == 1) + in_escape++; + else if (*cp == terminator && in_escape == 2) + /* + * This `"' is not a terminator of quotation; + * set in_escape to 3. + */ + in_escape++; + else + in_escape = 0; + + if ((in_escape != 3) && *cp == terminator) + { + ++cp; + break; + } + } + while (1); + } + } + else + { + while (isalnum ((int) *cp) + || *cp == '_' || *cp == '-' || *cp == '.' || *cp == '!' + || *cp == '$' || *cp == '{' || *cp == '}' || *cp == '(' || *cp == ')') + { + vStringPut (name, (int) *cp); + ++cp; + } + } + int r = makeSimpleTagWithScope (name, kindIndex, scopeIndex); + if (corkIndex) + *corkIndex = r; + if (vStringLength (name) > 0) + { + /* + * Try to capture section_index_output. + */ + vStringClear (name); + cp = skipWhitespace (cp); + + fillName (name, cp, (isalnum ((int) *cp) || *cp == '_')); + + if (vStringLength (name) > 0) + { + makeSimpleTag (name, K_DEFINITION); + vStringClear (name); + } + } + return cp; +} + +static const unsigned char* parseLangString (const unsigned char* cp, vString *name) +{ + cp = skipWhitespace (cp); + + /* `^' is not explained in the nsis reference manual. However, it is used + * in gvim. + * e.g. + * https://github.com/vim/vim/blob/3dabd718f4b2d8e09de9e2ec73832620b91c2f79/nsis/lang/english.nsi + */ + fillName (name, cp, (isalnum ((int) *cp) || *cp == '_' || *cp == '^')); + + if (vStringLength (name) > 0) + { + int r = makeSimpleTag (name, K_LANGSTR); + if (r == CORK_NIL) + goto out; + vStringClear (name); + + cp = skipWhitespace (cp); + fillName (name, cp, ((*cp != '\0') && (!isspace ((int) *cp)))); + if (vStringLength (name) > 0) + { + attachParserFieldToCorkEntry (r, NsisFields[F_LANGID].ftype, + vStringValue (name)); + vStringClear (name); + } + } + out: + return cp; +} + +static void findNsisTags (void) +{ + int sectionGroupIndex = CORK_NIL; + vString *name = vStringNew (); + const unsigned char *line; + + while ((line = readLineFromInputFile ()) != NULL) + { + const unsigned char* cp = line; + + while (isspace (*cp)) + cp++; + + if (*cp == '#' || *cp == ';') + continue; + + /* functions */ + if (lineStartingWith (cp, "function", false)) + { + cp += 8; + cp = skipWhitespace (cp); + + fillName (name, cp, + (isalnum ((int) *cp) || *cp == '_' || *cp == '-' || *cp == '.' || *cp == '!')); + + makeSimpleTag (name, K_FUNCTION); + vStringClear (name); + } + /* variables */ + else if (lineStartingWith (cp, "var", false)) + { + cp += 3; + cp = skipWhitespace (cp); + cp = skipFlags (cp); + + fillName (name, cp, (isalnum ((int) *cp) || *cp == '_')); + + makeSimpleTag (name, K_VARIABLE); + vStringClear (name); + } + /* section groups */ + else if (lineStartingWith (cp, "sectiongroup", false)) + { + cp += 12; + cp = parseSection (cp, name, K_SECTION_GROUP, CORK_NIL, §ionGroupIndex); + } + else if (lineStartingWith (cp, "sectiongroupend", true)) + { + cp += 15; + sectionGroupIndex = CORK_NIL; + } + /* sections */ + else if (lineStartingWith (cp, "section", false)) + { + cp += 7; + cp = parseSection (cp, name, K_SECTION, sectionGroupIndex, NULL); + } + /* LangString */ + else if (lineStartingWith (cp, "langstring", false)) + { + cp += 10; + cp = parseLangString (cp, name); + } + /* LicenseLangString */ + else if (lineStartingWith (cp, "licenselangstring", false)) + { + cp += 17; + cp = parseLangString (cp, name); + } + /* definitions */ + else if (lineStartingWith (cp, "!define", false)) + { + cp += 7; + cp = skipWhitespace (cp); + cp = skipFlags (cp); + + fillName (name, cp, (isalnum ((int) *cp) || *cp == '_')); + + makeSimpleTag (name, K_DEFINITION); + vStringClear (name); + } + /* macro */ + else if (lineStartingWith (cp, "!macro", false)) + { + cp += 6; + cp = skipWhitespace (cp); + cp = skipFlags (cp); + + fillName (name, cp, (isalnum ((int) *cp) || *cp == '_')); + + int index = makeSimpleTag (name, K_MACRO); + if (vStringLength (name) > 0) + { + while (1) + { + vStringClear (name); + cp = skipWhitespace (cp); + fillName (name, cp, (isalnum ((int) *cp) || *cp == '_')); + if (vStringLength (name) == 0) + break; + makeSimpleTagWithScope (name, K_MACRO_PARAM, index); + } + } + } + /* include */ + else if (lineStartingWith (cp, "!include", false)) + { + cp += 8; + + /* !include [/NONFATAL] [/CHARSET=ACP|OEM|CP#|UTF8|UTF16LE|UTF16BE] file */ + cp = skipWhitespace (cp); + + /* /NONFATAL */ + cp = skipFlags (cp); + cp = skipWhitespace (cp); + + /* /CHARSET */ + cp = skipFlags (cp); + cp = skipWhitespace (cp); + + fillName (name, cp, (*cp != '\0' && *cp != ';' && *cp != '#')); + vStringStripTrailing (name); + + if (vStringLength (name) > 0) + { + makeSimpleRefTag (name, K_SCRIPT, NSIS_SCRIPT_INCLUDED); + vStringClear (name); + } + /* TODO: capture !addincludedir */ + } + } + vStringDelete (name); +} + +extern parserDefinition* NsisParser (void) +{ + static const char *const extensions [] = { + "nsi", "nsh", NULL + }; + parserDefinition* def = parserNew ("NSIS"); + def->kindTable = NsisKinds; + def->kindCount = ARRAY_SIZE (NsisKinds); + def->extensions = extensions; + def->fieldTable = NsisFields; + def->fieldCount = ARRAY_SIZE (NsisFields); + def->parser = findNsisTags; + def->useCork = CORK_QUEUE; + return def; +} diff --git a/ctags/parsers/geany_objc.c b/ctags/parsers/objc.c similarity index 74% rename from ctags/parsers/geany_objc.c rename to ctags/parsers/objc.c index 83966b8fe1..5efd852bfd 100644 --- a/ctags/parsers/geany_objc.c +++ b/ctags/parsers/objc.c @@ -16,11 +16,13 @@ #include #include "keyword.h" +#include "debug.h" #include "entry.h" #include "parse.h" -#include "options.h" #include "read.h" #include "routines.h" +#include "selectors.h" +#include "trashbox.h" #include "vstring.h" typedef enum { @@ -36,7 +38,8 @@ typedef enum { K_TYPEDEF, K_STRUCT, K_ENUM, - K_MACRO + K_MACRO, + K_CATEGORY, } objcKind; static kindDefinition ObjcKinds[] = { @@ -53,6 +56,7 @@ static kindDefinition ObjcKinds[] = { {true, 's', "struct", "A type structure"}, {true, 'e', "enum", "An enumeration"}, {true, 'M', "macro", "A preprocessor macro"}, + {true, 'C', "category", "categories"}, }; typedef enum { @@ -63,6 +67,7 @@ typedef enum { ObjcINTERFACE, ObjcPROTOCOL, ObjcENCODE, + ObjcEXTERN, ObjcSYNCHRONIZED, ObjcSELECTOR, ObjcPROPERTY, @@ -93,7 +98,11 @@ typedef enum { Tok_dpoint, /* ':' */ Tok_Sharp, /* '#' */ Tok_Backslash, /* '\\' */ + Tok_Asterisk, /* '*' */ + Tok_ANGLEL, /* '<' */ + Tok_ANGLER, /* '>' */ Tok_EOL, /* '\r''\n' */ + Tok_CSTRING, /* "..." */ Tok_any, Tok_EOF /* END of file */ @@ -105,6 +114,7 @@ static const keywordTable objcKeywordTable[] = { {"typedef", ObjcTYPEDEF}, {"struct", ObjcSTRUCT}, {"enum", ObjcENUM}, + {"extern", ObjcEXTERN}, {"@implementation", ObjcIMPLEMENTATION}, {"@interface", ObjcINTERFACE}, {"@protocol", ObjcPROTOCOL}, @@ -125,6 +135,24 @@ static const keywordTable objcKeywordTable[] = { {"@required", ObjcREQUIRED}, }; +typedef enum { + F_CATEGORY, + F_PROTOCOLS, +} objcField; + +static fieldDefinition ObjcFields [] = { + { + .name = "category", + .description = "category attached to the class", + .enabled = true, + }, + { + .name = "protocols", + .description = "protocols that the class (or category) confirms to", + .enabled = true, + }, +}; + static langType Lang_ObjectiveC; /*////////////////////////////////////////////////////////////////// @@ -176,12 +204,14 @@ static void eatWhiteSpace (lexingState * st) st->cp = cp; } -static void eatString (lexingState * st) +static void readCString (lexingState * st) { bool lastIsBackSlash = false; bool unfinished = true; const unsigned char *c = st->cp + 1; + vStringClear (st->name); + while (unfinished) { /* end of line should never happen. @@ -191,7 +221,10 @@ static void eatString (lexingState * st) else if (*c == '"' && !lastIsBackSlash) unfinished = false; else + { lastIsBackSlash = *c == '\\'; + vStringPut (st->name, (int) *c); + } c++; } @@ -282,7 +315,7 @@ static objcKeyword lex (lexingState * st) return Tok_EOL; } - if (isAlpha (*st->cp)) + if (isAlpha (*st->cp) || (*st->cp == '_')) { readIdentifier (st); retType = lookupKeyword (vStringValue (st->name), Lang_ObjectiveC); @@ -373,14 +406,23 @@ static objcKeyword lex (lexingState * st) st->cp++; return Tok_dpoint; case '"': - eatString (st); - return Tok_any; + readCString (st); + return Tok_CSTRING; case '+': st->cp++; return Tok_PLUS; case '-': st->cp++; return Tok_MINUS; + case '*': + st->cp++; + return Tok_Asterisk; + case '<': + st->cp++; + return Tok_ANGLEL; + case '>': + st->cp++; + return Tok_ANGLER; default: st->cp++; @@ -419,6 +461,8 @@ static void parseImplemMethods (vString * const ident, objcToken what); static vString *tempName = NULL; static vString *parentName = NULL; static objcKind parentType = K_INTERFACE; +static int parentCorkIndex = CORK_NIL; +static int categoryCorkIndex = CORK_NIL; /* used to prepare tag for OCaml, just in case their is a need to * add additional information to the tag. */ @@ -426,7 +470,7 @@ static void prepareTag (tagEntryInfo * tag, vString const *name, objcKind kind) { initTagEntry (tag, vStringValue (name), kind); - if (parentName != NULL) + if (vStringLength (parentName) > 0) { tag->extensionFields.scopeKindIndex = parentType; tag->extensionFields.scopeName = vStringValue (parentName); @@ -439,22 +483,39 @@ static void pushEnclosingContext (const vString * parent, objcKind type) parentType = type; } +static void pushEnclosingContextFull (const vString * parent, objcKind type, int corkIndex) +{ + pushEnclosingContext (parent, type); + parentCorkIndex = corkIndex; +} + static void popEnclosingContext (void) { vStringClear (parentName); + parentCorkIndex = CORK_NIL; +} + +static void pushCategoryContext (int category_index) +{ + categoryCorkIndex = category_index; +} + +static void popCategoryContext (void) +{ + categoryCorkIndex = CORK_NIL; } /* Used to centralise tag creation, and be able to add * more information to it in the future */ -static void addTag (vString * const ident, int kind) +static int addTag (vString * const ident, int kind) { tagEntryInfo toCreate; if (! ObjcKinds[kind].enabled) - return; + return CORK_NIL; prepareTag (&toCreate, ident, kind); - makeTagEntry (&toCreate); + return makeTagEntry (&toCreate); } static objcToken waitedToken, fallBackToken; @@ -541,24 +602,77 @@ static objcKind methodKind; static vString *fullMethodName; static vString *prevIdent; +static vString *signature; -static void parseMethodsName (vString * const ident, objcToken what) +static void tillTokenWithCapturingSignature (vString * const ident, objcToken what) { + tillToken (ident, what); + + if (what != waitedToken) + { + if (what == Tok_Asterisk) + vStringPut (signature, '*'); + else if (vStringLength (ident) > 0) + { + if (! (vStringLast (signature) == ',' + || vStringLast (signature) == '(' + || vStringLast (signature) == ' ')) + vStringPut (signature, ' '); + + vStringCat (signature, ident); + } + } +} + +static void parseMethodsNameCommon (vString * const ident, objcToken what, + parseNext reEnter, + parseNext nextAction) +{ + int index; + switch (what) { case Tok_PARL: toDoNext = &tillToken; - comeAfter = &parseMethodsName; + comeAfter = reEnter; waitedToken = Tok_PARR; + + if (! (vStringLength(prevIdent) == 0 + && vStringLength(fullMethodName) == 0)) + toDoNext = &tillTokenWithCapturingSignature; break; case Tok_dpoint: vStringCat (fullMethodName, prevIdent); - vStringCatS (fullMethodName, ":"); + vStringPut (fullMethodName, ':'); vStringClear (prevIdent); + + if (vStringLength (signature) > 1) + vStringPut (signature, ','); break; case ObjcIDENTIFIER: + if ((vStringLength (prevIdent) > 0 + /* "- initWithObject: o0 withAnotherObject: o1;" + Overwriting the last value of prevIdent ("o0"); + a parameter name ("o0") was stored to prevIdent, + and a part of selector("withAnotherObject") + overwrites it. + If type for the parameter specified explicitly, + the last char of signature should not be ',' nor + '('. In this case, "id" must be put as the type for + the parameter. */ + && (vStringLast (signature) == ',' + || vStringLast (signature) == '(')) + || (/* "- initWithObject: object;" + In this case no overwriting happens. + However, "id" for "object" is part + of signature. */ + vStringLength (prevIdent) == 0 + && vStringLength (fullMethodName) > 0 + && vStringLast (signature) == '(')) + vStringCatS (signature, "id"); + vStringCopy (prevIdent, ident); break; @@ -567,15 +681,34 @@ static void parseMethodsName (vString * const ident, objcToken what) /* method name is not simple */ if (vStringLength (fullMethodName) != '\0') { - addTag (fullMethodName, methodKind); + index = addTag (fullMethodName, methodKind); vStringClear (fullMethodName); } else - addTag (prevIdent, methodKind); + index = addTag (prevIdent, methodKind); - toDoNext = &parseMethods; + toDoNext = nextAction; parseImplemMethods (ident, what); vStringClear (prevIdent); + + tagEntryInfo *e = getEntryInCorkQueue (index); + if (e) + { + if (vStringLast (signature) == ',') + vStringCatS (signature, "id"); + vStringPut (signature, ')'); + + e->extensionFields.signature = vStringStrdup (signature); + + vStringClear (signature); + vStringPut (signature, '('); + + tagEntryInfo *e_cat = getEntryInCorkQueue (categoryCorkIndex); + if (e_cat) + attachParserFieldToCorkEntry (index, + ObjcFields [F_CATEGORY].ftype, + e_cat->name); + } break; default: @@ -583,44 +716,34 @@ static void parseMethodsName (vString * const ident, objcToken what) } } -static void parseMethodsImplemName (vString * const ident, objcToken what) +static void parseMethodsName (vString * const ident, objcToken what) { - switch (what) - { - case Tok_PARL: - toDoNext = &tillToken; - comeAfter = &parseMethodsImplemName; - waitedToken = Tok_PARR; - break; - - case Tok_dpoint: - vStringCat (fullMethodName, prevIdent); - vStringCatS (fullMethodName, ":"); - vStringClear (prevIdent); - break; + parseMethodsNameCommon (ident, what, parseMethodsName, parseMethods); +} - case ObjcIDENTIFIER: - vStringCopy (prevIdent, ident); - break; +static void parseMethodsImplemName (vString * const ident, objcToken what) +{ + parseMethodsNameCommon (ident, what, parseMethodsImplemName, parseImplemMethods); +} - case Tok_CurlL: - case Tok_semi: - /* method name is not simple */ - if (vStringLength (fullMethodName) != '\0') +static void parseCategory (vString * const ident, objcToken what) +{ + if (what == ObjcIDENTIFIER) + { + tagEntryInfo *e = getEntryInCorkQueue (parentCorkIndex); + if (e) { - addTag (fullMethodName, methodKind); - vStringClear (fullMethodName); + attachParserFieldToCorkEntry (parentCorkIndex, + ObjcFields [F_CATEGORY].ftype, + vStringValue (ident)); + if (e->kindIndex == K_INTERFACE) + toDoNext = &parseMethods; + else + toDoNext = &parseImplemMethods; } - else - addTag (prevIdent, methodKind); - toDoNext = &parseImplemMethods; - parseImplemMethods (ident, what); - vStringClear (prevIdent); - break; - - default: - break; + int index = addTag (ident, K_CATEGORY); + pushCategoryContext (index); } } @@ -640,6 +763,7 @@ static void parseImplemMethods (vString * const ident, objcToken what) case ObjcEND: /* @end */ popEnclosingContext (); + popCategoryContext (); toDoNext = &globalScope; break; @@ -649,6 +773,10 @@ static void parseImplemMethods (vString * const ident, objcToken what) comeAfter = &parseImplemMethods; break; + case Tok_PARL: /* ( */ + toDoNext = &parseCategory; + break; + default: break; } @@ -681,6 +809,49 @@ static void parseProperty (vString * const ident, objcToken what) } } +static void parseInterfaceSuperclass (vString * const ident, objcToken what) +{ + tagEntryInfo *e = getEntryInCorkQueue (parentCorkIndex); + if (what == ObjcIDENTIFIER && e) + e->extensionFields.inheritance = vStringStrdup (ident); + + toDoNext = &parseMethods; +} + +static void parseInterfaceProtocolList (vString * const ident, objcToken what) +{ + static vString *protocol_list; + + if (parentCorkIndex == CORK_NIL) + { + toDoNext = &parseMethods; + return; + } + + if (protocol_list == NULL) + { + protocol_list = vStringNew (); + DEFAULT_TRASH_BOX(protocol_list, vStringDelete); + } + + if (what == ObjcIDENTIFIER) + vStringCat(protocol_list, ident); + else if (what == Tok_COMA) + vStringPut (protocol_list, ','); + else if (what == Tok_ANGLER) + { + attachParserFieldToCorkEntry (parentCorkIndex, + ObjcFields [F_PROTOCOLS].ftype, + vStringValue (protocol_list)); + if (categoryCorkIndex != CORK_NIL) + attachParserFieldToCorkEntry (categoryCorkIndex, + ObjcFields [F_PROTOCOLS].ftype, + vStringValue (protocol_list)); + vStringClear (protocol_list); + toDoNext = &parseMethods; + } +} + static void parseMethods (vString * const ident CTAGS_ATTR_UNUSED, objcToken what) { switch (what) @@ -701,6 +872,7 @@ static void parseMethods (vString * const ident CTAGS_ATTR_UNUSED, objcToken wha case ObjcEND: /* @end */ popEnclosingContext (); + popCategoryContext (); toDoNext = &globalScope; break; @@ -708,6 +880,18 @@ static void parseMethods (vString * const ident CTAGS_ATTR_UNUSED, objcToken wha toDoNext = &parseFields; break; + case Tok_dpoint: /* : */ + toDoNext = &parseInterfaceSuperclass; + break; + + case Tok_PARL: /* ( */ + toDoNext = &parseCategory; + break; + + case Tok_ANGLEL: /* < */ + toDoNext = &parseInterfaceProtocolList; + break; + default: break; } @@ -718,8 +902,8 @@ static void parseProtocol (vString * const ident, objcToken what) { if (what == ObjcIDENTIFIER) { - pushEnclosingContext (ident, K_PROTOCOL); - addTag (ident, K_PROTOCOL); + int index = addTag (ident, K_PROTOCOL); + pushEnclosingContextFull (ident, K_PROTOCOL, index); } toDoNext = &parseMethods; } @@ -728,8 +912,8 @@ static void parseImplementation (vString * const ident, objcToken what) { if (what == ObjcIDENTIFIER) { - addTag (ident, K_IMPLEMENTATION); - pushEnclosingContext (ident, K_IMPLEMENTATION); + int index = addTag (ident, K_IMPLEMENTATION); + pushEnclosingContextFull (ident, K_IMPLEMENTATION, index); } toDoNext = &parseImplemMethods; } @@ -738,8 +922,8 @@ static void parseInterface (vString * const ident, objcToken what) { if (what == ObjcIDENTIFIER) { - addTag (ident, K_INTERFACE); - pushEnclosingContext (ident, K_INTERFACE); + int index = addTag (ident, K_INTERFACE); + pushEnclosingContextFull (ident, K_INTERFACE, index); } toDoNext = &parseMethods; @@ -990,6 +1174,24 @@ static void parsePreproc (vString * const ident, objcToken what) } } +static void skipCurlL (vString * const ident, objcToken what) +{ + if (what == Tok_CurlL) + toDoNext = comeAfter; +} + +static void parseCPlusPlusCLinkage (vString * const ident, objcToken what) +{ + toDoNext = comeAfter; + + /* Linkage specification like "C" */ + if (what == Tok_CSTRING) + toDoNext = skipCurlL; + else + /* Force handle this ident in globalScope */ + globalScope (ident, what); +} + /* Handle the "strong" top levels, all 'big' declarations * happen here */ static void globalScope (vString * const ident, objcToken what) @@ -1044,6 +1246,11 @@ static void globalScope (vString * const ident, objcToken what) ignoreBalanced (ident, what); break; + case ObjcEXTERN: + comeAfter = &globalScope; + toDoNext = &parseCPlusPlusCLinkage; + break; + case ObjcEND: case ObjcPUBLIC: case ObjcPROTECTED: @@ -1068,6 +1275,7 @@ static void findObjcTags (void) tempName = vStringNew (); fullMethodName = vStringNew (); prevIdent = vStringNew (); + signature = vStringNewInit ("("); /* (Re-)initialize state variables, this might be a second file */ comeAfter = NULL; @@ -1096,10 +1304,14 @@ static void findObjcTags (void) vStringDelete (tempName); vStringDelete (fullMethodName); vStringDelete (prevIdent); + vStringDelete (signature); + signature = NULL; parentName = NULL; tempName = NULL; prevIdent = NULL; fullMethodName = NULL; + categoryCorkIndex = CORK_NIL; + parentCorkIndex = CORK_NIL; } static void objcInitialize (const langType language) @@ -1109,14 +1321,25 @@ static void objcInitialize (const langType language) extern parserDefinition *ObjcParser (void) { - static const char *const extensions[] = { "m", "h", NULL }; + static const char *const extensions[] = { "mm", "m", "h", + NULL }; + static const char *const aliases[] = { "objc", "objective-c", + NULL }; + static selectLanguage selectors[] = { selectByObjectiveCAndMatLabKeywords, + selectByObjectiveCKeywords, + NULL }; parserDefinition *def = parserNew ("ObjectiveC"); def->kindTable = ObjcKinds; def->kindCount = ARRAY_SIZE (ObjcKinds); def->extensions = extensions; + def->fieldTable = ObjcFields; + def->fieldCount = ARRAY_SIZE (ObjcFields); + def->aliases = aliases; def->parser = findObjcTags; def->initialize = objcInitialize; + def->selectLanguage = selectors; def->keywordTable = objcKeywordTable; def->keywordCount = ARRAY_SIZE (objcKeywordTable); + def->useCork = CORK_QUEUE; return def; } diff --git a/ctags/parsers/perl.c b/ctags/parsers/perl.c new file mode 100644 index 0000000000..9f51aa6138 --- /dev/null +++ b/ctags/parsers/perl.c @@ -0,0 +1,738 @@ +/* +* Copyright (c) 2000-2003, Darren Hiebert +* +* This source code is released for free distribution under the terms of the +* GNU General Public License version 2 or (at your option) any later version. +* +* This module contains functions for generating tags for PERL language +* files. +*/ + +/* +* INCLUDE FILES +*/ +#include "general.h" /* must always come first */ + +#include + +#include "entry.h" +#include "perl.h" +#include "promise.h" +#include "read.h" +#include "routines.h" +#include "selectors.h" +#include "subparser.h" +#include "vstring.h" +#include "xtag.h" + +#define TRACE_PERL_C 0 +#define TRACE if (TRACE_PERL_C) printf("perl.c:%d: ", __LINE__), printf + +/* +* DATA DEFINITIONS +*/ +typedef enum PerlKindType perlKind; +typedef enum PerlModuleRoleType perlModuleRole; + +static roleDefinition PerlModuleRoles [] = { + { true, "used", "specified in `use' built-in function" }, + { true, "unused", "specified in `no' built-in function" }, +}; + +static kindDefinition PerlKinds [] = { + { true, 'c', "constant", "constants" }, + { true, 'f', "format", "formats" }, + { true, 'l', "label", "labels" }, + { true, 'p', "package", "packages" }, + { true, 's', "subroutine", "subroutines" }, + { false, 'd', "subroutineDeclaration", "subroutine declarations" }, + { false, 'M', "module", "modules", + .referenceOnly = true, ATTACH_ROLES(PerlModuleRoles)}, +}; + +/* +* FUNCTION DEFINITIONS +*/ + +static void notifyEnteringPod () +{ + subparser *sub; + + foreachSubparser (sub, false) + { + perlSubparser *perlsub = (perlSubparser *)sub; + if (perlsub->enteringPodNotify) + { + enterSubparser (sub); + perlsub->enteringPodNotify (perlsub); + leaveSubparser (); + } + } +} + +static void notifyLeavingPod () +{ + subparser *sub; + + foreachSubparser (sub, false) + { + perlSubparser *perlsub = (perlSubparser *)sub; + if (perlsub->leavingPodNotify) + { + enterSubparser (sub); + perlsub->leavingPodNotify (perlsub); + leaveSubparser (); + } + } +} + +static void notifyFindingQuotedWord (int moduleIndex, + const char *qwd) +{ + subparser *sub; + + foreachSubparser (sub, false) + { + perlSubparser *perlsub = (perlSubparser *)sub; + if (perlsub->findingQuotedWordNotify) + { + enterSubparser (sub); + perlsub->findingQuotedWordNotify (perlsub, + moduleIndex, + qwd); + leaveSubparser (); + } + } +} + +static bool isIdentifier1 (int c) +{ + return (bool) (isalpha (c) || c == '_'); +} + +static bool isIdentifier (int c) +{ + return (bool) (isalnum (c) || c == '_'); +} + +static bool isPodWord (const char *word) +{ + /* Perl POD words are three to eight characters in size. We use this + * fact to find (or not find) the right side of the word and then + * perform comparisons, if necessary, of POD words of that size. + */ + size_t len; + for (len = 0; len < 9; ++len) + if ('\0' == word[len] || ' ' == word[len] || '\t' == word[len]) + break; + switch (len) { + case 3: + return 0 == strncmp(word, "end", 3) + || 0 == strncmp(word, "for", 3) + || 0 == strncmp(word, "pod", 3); + case 4: + return 0 == strncmp(word, "back", 4) + || 0 == strncmp(word, "item", 4) + || 0 == strncmp(word, "over", 4); + case 5: + return 0 == strncmp(word, "begin", 5) + || 0 == strncmp(word, "head1", 5) + || 0 == strncmp(word, "head2", 5) + || 0 == strncmp(word, "head3", 5) + || 0 == strncmp(word, "head4", 5); + case 8: + return 0 == strncmp(word, "encoding", 8); + default: + return false; + } +} + +/* + * Perl subroutine declaration may look like one of the following: + * + * sub abc; + * sub abc :attr; + * sub abc (proto); + * sub abc (proto) :attr; + * + * Note that there may be more than one attribute. Attributes may + * have things in parentheses (they look like arguments). Anything + * inside of those parentheses goes. Prototypes may contain semi-colons. + * The matching end when we encounter (outside of any parentheses) either + * a semi-colon (that'd be a declaration) or an left curly brace + * (definition). + * + * This is pretty complicated parsing (plus we all know that only perl can + * parse Perl), so we are only promising best effort here. + * + * If we can't determine what this is (due to a file ending, for example), + * we will return false. + */ +static bool isSubroutineDeclaration (const unsigned char *cp) +{ + bool attr = false; + int nparens = 0; + + do { + for ( ; *cp; ++cp) { +SUB_DECL_SWITCH: + switch (*cp) { + case ':': + if (nparens) + break; + else if (true == attr) + return false; /* Invalid attribute name */ + else + attr = true; + break; + case '(': + ++nparens; + break; + case ')': + --nparens; + break; + case ' ': + case '\t': + break; + case ';': + if (!nparens) + return true; + case '{': + if (!nparens) + return false; + default: + if (attr) { + if (isIdentifier1(*cp)) { + cp++; + while (isIdentifier (*cp)) + cp++; + attr = false; + goto SUB_DECL_SWITCH; /* Instead of --cp; */ + } else { + return false; + } + } else if (nparens) { + break; + } else { + return false; + } + } + } + } while (NULL != (cp = readLineFromInputFile ())); + + return false; +} + +/* `end' points to the equal sign. Parse from right to left to get the + * identifier. Assume we're dealing with something of form \s*\w+\s*=> + */ +static void makeTagFromLeftSide (const char *begin, const char *end, + vString *name, vString *package) +{ + tagEntryInfo entry; + const char *b, *e; + if (! PerlKinds[KIND_PERL_CONSTANT].enabled) + return; + for (e = end - 1; e > begin && isspace(*e); --e) + ; + if (e < begin) + return; + for (b = e; b >= begin && isIdentifier(*b); --b) + ; + /* Identifier must be either beginning of line of have some whitespace + * on its left: + */ + if (b < begin || isspace(*b) || ',' == *b) + ++b; + else if (b != begin) + return; + if (e - b + 1 <= 0) + return; /* Left side of => has an invalid identifier. */ + vStringClear(name); + vStringNCatS(name, b, e - b + 1); + initTagEntry(&entry, vStringValue(name), KIND_PERL_CONSTANT); + makeTagEntry(&entry); + if (isXtagEnabled (XTAG_QUALIFIED_TAGS) && package && vStringLength(package)) { + vStringClear(name); + vStringCopy(name, package); + vStringNCatS(name, b, e - b + 1); + initTagEntry(&entry, vStringValue(name), KIND_PERL_CONSTANT); + markTagExtraBit (&entry, XTAG_QUALIFIED_TAGS); + makeTagEntry(&entry); + } +} + +static int makeTagForModule (const char *name, int role) +{ + tagEntryInfo entry; + initRefTagEntry(&entry, name, KIND_PERL_MODULE, role); + return makeTagEntry(&entry); +} + +enum const_state { CONST_STATE_NEXT_LINE, CONST_STATE_HIT_END }; + +/* Parse a single line, find as many NAME => VALUE pairs as we can and try + * to detect the end of the hashref. + */ +static enum const_state parseConstantsFromLine (const char *cp, + vString *name, vString *package) +{ + while (1) { + const size_t sz = strcspn(cp, "#}="); + switch (cp[sz]) { + case '=': + if ('>' == cp[sz + 1]) + makeTagFromLeftSide(cp, cp + sz, name, package); + break; + case '}': /* Assume this is the end of the hashref. */ + return CONST_STATE_HIT_END; + case '\0': /* End of the line. */ + case '#': /* Assume this is a comment and thus end of the line. */ + return CONST_STATE_NEXT_LINE; + } + cp += sz + 1; + } +} + +/* Parse constants declared via hash reference, like this: + * use constant { + * A => 1, + * B => 2, + * }; + * The approach we take is simplistic, but it covers the vast majority of + * cases well. There can be some false positives. + * Returns 0 if found the end of the hashref, -1 if we hit EOF + */ +static int parseConstantsFromHashRef (const unsigned char *cp, + vString *name, vString *package) +{ + while (1) { + enum const_state state = + parseConstantsFromLine((const char *) cp, name, package); + switch (state) { + case CONST_STATE_NEXT_LINE: + cp = readLineFromInputFile(); + if (cp) + break; + else + return -1; + case CONST_STATE_HIT_END: + return 0; + } + } +} + +static void parseQuotedWords(const unsigned char *cp, + vString *name, int moduleIndex) +{ + unsigned char end = *cp++; + switch (end) + { + case '[': end = ']'; break; + case '(': end = ')'; break; + case '{': end = '}'; break; + case '<': end = '>'; break; + } + + do { + while (*cp && *cp != end) + { + if (isspace(*cp)) + { + notifyFindingQuotedWord (moduleIndex, vStringValue(name)); + vStringClear(name); + cp++; + continue; + } + + if (*cp == '\\') + { + cp++; + if (*cp == '\0') + break; + } + + vStringPut(name, *cp); + cp++; + } + if (!vStringIsEmpty(name)) + notifyFindingQuotedWord (moduleIndex, vStringValue(name)); + + if (*cp == end) + break; + } while ((cp = readLineFromInputFile()) != NULL); +} + +/* Algorithm adapted from from GNU etags. + * Perl support by Bart Robinson + * Perl sub names: look for /^ [ \t\n]sub [ \t\n]+ [^ \t\n{ (]+/ + */ +static void findPerlTags (void) +{ + vString *name = vStringNew (); + vString *package = NULL; + bool skipPodDoc = false; + const unsigned char *line; + unsigned long podStart = 0UL; + + /* A pod area can be after __END__ marker. + * Perl parser itself doesn't need to parse the area + * after the marker. Parsing the area is needed only + * if Perl parser runs Pod parser as a guest. + * This variable is set true when it is needed. + */ + bool parse_only_pod_area = false; + + /* Core modules AutoLoader and SelfLoader support delayed compilation + * by allowing Perl code that follows __END__ and __DATA__ tokens, + * respectively. When we detect that one of these modules is used + * in the file, we continue processing even after we see the + * corresponding token that would usually terminate parsing of the + * file. + */ + enum { + RESPECT_END = (1 << 0), + RESPECT_DATA = (1 << 1), + } respect_token = RESPECT_END | RESPECT_DATA; + + while ((line = readLineFromInputFile ()) != NULL) + { + bool spaceRequired = false; + bool qualified = false; + const unsigned char *cp = line; + perlKind kind = KIND_PERL_NONE; + tagEntryInfo e; + + if (skipPodDoc) + { + if (strncmp ((const char*) line, "=cut", (size_t) 4) == 0) + { + skipPodDoc = false; + if (podStart != 0UL) + { + notifyLeavingPod (); + makePromise ("Pod", + podStart, 0, + getInputLineNumber(), 0, + getSourceLineNumber()); + podStart = 0UL; + } + } + continue; + } + else if (line [0] == '=') + { + skipPodDoc = isPodWord ((const char*)line + 1); + if (skipPodDoc) + { + podStart = getSourceLineNumber (); + notifyEnteringPod (); + } + continue; + } + else if (strcmp ((const char*) line, "__DATA__") == 0) + { + if (respect_token & RESPECT_DATA) + { + if (isXtagEnabled (XTAG_GUEST)) + parse_only_pod_area = true; + else + break; + } + else + continue; + } + else if (strcmp ((const char*) line, "__END__") == 0) + { + if (respect_token & RESPECT_END) + { + if (isXtagEnabled (XTAG_GUEST)) + parse_only_pod_area = true; + else + break; + } + else + continue; + } + else if (line [0] == '#') + continue; + + if (parse_only_pod_area) + continue; + + while (isspace (*cp)) + cp++; + + if (strncmp((const char*) cp, "sub", (size_t) 3) == 0) + { + TRACE("this looks like a sub\n"); + cp += 3; + kind = KIND_PERL_SUBROUTINE; + spaceRequired = true; + qualified = true; + } + else if (strncmp((const char*) cp, "use", (size_t) 3) == 0) + { + cp += 3; + if (!isspace(*cp)) + continue; + while (*cp && isspace (*cp)) + ++cp; + if (strncmp((const char*) cp, "AutoLoader", (size_t) 10) == 0) { + respect_token &= ~RESPECT_END; + makeTagForModule("AutoLoader", ROLE_PERL_MODULE_USED); + continue; + } + if (strncmp((const char*) cp, "SelfLoader", (size_t) 10) == 0) { + respect_token &= ~RESPECT_DATA; + makeTagForModule("SelfLoader", ROLE_PERL_MODULE_USED); + continue; + } + + vString *module = NULL; + while (isalnum(*cp) || *cp == ':' || *cp == '.') { + if (!module) + module = vStringNew(); + vStringPut(module, *cp); + ++cp; + } + if (!module) + continue; + + int q = makeTagForModule(vStringValue(module), ROLE_PERL_MODULE_USED); + bool isConstant = (strcmp(vStringValue(module), "constant") == 0); + vStringDelete(module); + if (!isConstant) + { + while (isspace(*cp)) + cp++; + if (strncmp("qw", (const char *)cp, 2) != 0) + continue; + cp += 2; + while (isspace(*cp)) + cp++; + if (*cp == '\0') + continue; + vStringClear (name); + + parseQuotedWords(cp, name, q); + vStringClear (name); + continue; + } + + /* Skip up to the first non-space character, skipping empty + * and comment lines. + */ + while (isspace(*cp)) + cp++; + while (!*cp || '#' == *cp) { + cp = readLineFromInputFile (); + if (!cp) + goto END_MAIN_WHILE; + while (isspace (*cp)) + cp++; + } + if ('{' == *cp) { + ++cp; + if (0 == parseConstantsFromHashRef(cp, name, package)) { + vStringClear(name); + continue; + } else + goto END_MAIN_WHILE; + } + kind = KIND_PERL_CONSTANT; + spaceRequired = false; + qualified = true; + } + else if (strncmp((const char*) cp, "no", (size_t) 2) == 0 && isspace(cp[2])) + { + cp += 3; + while (isspace (*cp)) + cp++; + vString *module = NULL; + while (isalnum(*cp) || *cp == ':' || *cp == '.') { + if (!module) + module = vStringNew(); + vStringPut(module, *cp); + ++cp; + } + if (module) { + makeTagForModule(vStringValue(module), ROLE_PERL_MODULE_UNUSED); + vStringDelete(module); + } + continue; + } + else if (strncmp((const char*) cp, "package", (size_t) 7) == 0 && + ('\0' == cp[7] || isspace(cp[7]))) + { + cp += 7; + while (isspace (*cp)) + cp++; + while (!*cp || '#' == *cp) { + cp = readLineFromInputFile (); + if (!cp) + goto END_MAIN_WHILE; + while (isspace (*cp)) + cp++; + } + if (package == NULL) + package = vStringNew (); + else + vStringClear (package); + const unsigned char *const first = cp; + while (*cp && (int) *cp != ';' && !isspace ((int) *cp)) + { + vStringPut (package, (int) *cp); + cp++; + } + vStringCatS (package, "::"); + + cp = first; /* Rewind */ + kind = KIND_PERL_PACKAGE; + spaceRequired = false; + qualified = true; + } + else if (strncmp((const char*) cp, "format", (size_t) 6) == 0) + { + cp += 6; + kind = KIND_PERL_FORMAT; + spaceRequired = true; + qualified = true; + } + else + { + if (isIdentifier1 (*cp)) + { + const unsigned char *p = cp; + while (isIdentifier (*p)) + ++p; + while (isspace (*p)) + ++p; + if ((int) *p == ':' && (int) *(p + 1) != ':') + kind = KIND_PERL_LABEL; + } + } + if (kind != KIND_PERL_NONE) + { + TRACE("cp0: %s\n", (const char *) cp); + if (spaceRequired && *cp && !isspace (*cp)) + continue; + + TRACE("cp1: %s\n", (const char *) cp); + while (isspace (*cp)) + cp++; + + while (!*cp || '#' == *cp) { /* Gobble up empty lines + and comments */ + cp = readLineFromInputFile (); + if (!cp) + goto END_MAIN_WHILE; + while (isspace (*cp)) + cp++; + } + + while (isIdentifier (*cp) || (KIND_PERL_PACKAGE == kind && ':' == *cp)) + { + vStringPut (name, (int) *cp); + cp++; + } + + if (KIND_PERL_FORMAT == kind && + vStringLength (name) == 0 && /* cp did not advance */ + '=' == *cp) + { + /* format's name is optional. If it's omitted, 'STDOUT' + is assumed. */ + vStringCatS (name, "STDOUT"); + } + + TRACE("name: %s\n", vStringValue (name)); + + if (0 == vStringLength(name)) { + vStringClear(name); + continue; + } + + if (KIND_PERL_SUBROUTINE == kind) + { + /* + * isSubroutineDeclaration() may consume several lines. So + * we record line positions. + */ + initTagEntry(&e, vStringValue(name), KIND_GHOST_INDEX); + + if (true == isSubroutineDeclaration(cp)) { + if (true == PerlKinds[KIND_PERL_SUBROUTINE_DECLARATION].enabled) { + kind = KIND_PERL_SUBROUTINE_DECLARATION; + } else { + vStringClear (name); + continue; + } + } else if (! PerlKinds[kind].enabled) { + continue; + } + + e.kindIndex = kind; + + makeTagEntry(&e); + + if (isXtagEnabled (XTAG_QUALIFIED_TAGS) && qualified && + package != NULL && vStringLength (package) > 0) + { + vString *const qualifiedName = vStringNew (); + vStringCopy (qualifiedName, package); + vStringCat (qualifiedName, name); + e.name = vStringValue(qualifiedName); + markTagExtraBit (&e, XTAG_QUALIFIED_TAGS); + makeTagEntry(&e); + vStringDelete (qualifiedName); + } + } else if (vStringLength (name) > 0) + { + makeSimpleTag (name, kind); + if (isXtagEnabled(XTAG_QUALIFIED_TAGS) && qualified && + KIND_PERL_PACKAGE != kind && + package != NULL && vStringLength (package) > 0) + { + tagEntryInfo fqe; + vString *const qualifiedName = vStringNew (); + vStringCopy (qualifiedName, package); + vStringCat (qualifiedName, name); + initTagEntry (&fqe, vStringValue (qualifiedName), kind); + markTagExtraBit (&fqe, XTAG_QUALIFIED_TAGS); + makeTagEntry (&fqe); + vStringDelete (qualifiedName); + } + } + vStringClear (name); + } + } + +END_MAIN_WHILE: + vStringDelete (name); + if (package != NULL) + vStringDelete (package); +} + +extern parserDefinition* PerlParser (void) +{ + static const char *const extensions [] = { "pl", "pm", "ph", "plx", "perl", NULL }; + static const char *const aliases [] = { + /* cperl is an Emacs' editing mode for Perl source code */ + "cperl", + NULL }; + static selectLanguage selectors [] = { selectByPickingPerlVersion, + NULL }; + parserDefinition* def = parserNew ("Perl"); + def->kindTable = PerlKinds; + def->kindCount = ARRAY_SIZE (PerlKinds); + def->extensions = extensions; + def->parser = findPerlTags; + def->selectLanguage = selectors; + def->aliases = aliases; + + /* Subparsers need this */ + def->useCork = CORK_QUEUE; + + return def; +} diff --git a/ctags/parsers/perl.h b/ctags/parsers/perl.h new file mode 100644 index 0000000000..a100d54dad --- /dev/null +++ b/ctags/parsers/perl.h @@ -0,0 +1,44 @@ +/* +* Copyright (c) 2019, Masatake YAMATO +* +* This source code is released for free distribution under the terms of the +* GNU General Public License version 2 or (at your option) any later version. +*/ +#ifndef CTAGS_PARSER_PERL_H +#define CTAGS_PARSER_PERL_H + +/* +* INCLUDE FILES +*/ +#include "general.h" /* must always come first */ + +#include "subparser.h" + +typedef struct sPerlSubparser perlSubparser; + +enum PerlModuleRoleType { + ROLE_PERL_MODULE_USED, + ROLE_PERL_MODULE_UNUSED, +}; + +enum PerlKindType { + KIND_PERL_NONE = -1, + KIND_PERL_CONSTANT, + KIND_PERL_FORMAT, + KIND_PERL_LABEL, + KIND_PERL_PACKAGE, + KIND_PERL_SUBROUTINE, + KIND_PERL_SUBROUTINE_DECLARATION, + KIND_PERL_MODULE, +}; + +struct sPerlSubparser { + subparser subparser; + void (* findingQuotedWordNotify) (perlSubparser *, + int moduleIndex, + const char *qwd); + void (* enteringPodNotify) (perlSubparser *); + void (* leavingPodNotify) (perlSubparser *); +}; + +#endif /* CTAGS_PARSER_PERL_H */ diff --git a/ctags/parsers/geany_php.c b/ctags/parsers/php.c similarity index 62% rename from ctags/parsers/geany_php.c rename to ctags/parsers/php.c index 5278a59e02..dd754913e0 100644 --- a/ctags/parsers/geany_php.c +++ b/ctags/parsers/php.c @@ -6,12 +6,17 @@ * * This module contains code for generating tags for the PHP scripting * language. +* +* The language reference: http://php.net/manual/en/langref.php */ /* * INCLUDE FILES */ #include "general.h" /* must always come first */ + +#include + #include "parse.h" #include "read.h" #include "vstring.h" @@ -19,10 +24,11 @@ #include "entry.h" #include "routines.h" #include "debug.h" +#include "objpool.h" - -#define SCOPE_SEPARATOR "::" - +#define isIdentChar(c) (isalnum (c) || (c) == '_' || (c) >= 0x80) +#define newToken() (objPoolGet (TokenPool)) +#define deleteToken(t) (objPoolPut (TokenPool, (t))) enum { KEYWORD_abstract, @@ -110,18 +116,35 @@ typedef enum { K_NAMESPACE, K_TRAIT, K_VARIABLE, + K_ALIAS, COUNT_KIND } phpKind; +#define NAMESPACE_SEPARATOR "\\" +static scopeSeparator PhpGenericSeparators [] = { + { K_NAMESPACE , NAMESPACE_SEPARATOR }, + { KIND_WILDCARD_INDEX, "::" }, +}; + static kindDefinition PhpKinds[COUNT_KIND] = { - { true, 'c', "class", "classes" }, - { true, 'd', "define", "constant definitions" }, - { true, 'f', "function", "functions" }, - { true, 'i', "interface", "interfaces" }, - { false, 'l', "local", "local variables" }, - { true, 'n', "namespace", "namespaces" }, - { true, 't', "trait", "traits" }, - { true, 'v', "variable", "variables" } + { true, 'c', "class", "classes", + ATTACH_SEPARATORS(PhpGenericSeparators) }, + { true, 'd', "define", "constant definitions", + ATTACH_SEPARATORS(PhpGenericSeparators)}, + { true, 'f', "function", "functions", + ATTACH_SEPARATORS(PhpGenericSeparators)}, + { true, 'i', "interface", "interfaces", + ATTACH_SEPARATORS(PhpGenericSeparators)}, + { false, 'l', "local", "local variables", + ATTACH_SEPARATORS(PhpGenericSeparators)}, + { true, 'n', "namespace", "namespaces", + ATTACH_SEPARATORS(PhpGenericSeparators)}, + { true, 't', "trait", "traits", + ATTACH_SEPARATORS(PhpGenericSeparators)}, + { true, 'v', "variable", "variables", + ATTACH_SEPARATORS(PhpGenericSeparators)}, + { true, 'a', "alias", "aliases", + ATTACH_SEPARATORS(PhpGenericSeparators)}, }; static const keywordTable PhpKeywordTable[] = { @@ -209,7 +232,9 @@ typedef enum eTokenType { TOKEN_OPEN_SQUARE, TOKEN_CLOSE_SQUARE, TOKEN_VARIABLE, - TOKEN_AMPERSAND + TOKEN_AMPERSAND, + TOKEN_BACKSLASH, + TOKEN_QMARK, } tokenType; typedef struct { @@ -220,12 +245,16 @@ typedef struct { unsigned long lineNumber; MIOPos filePosition; int parentKind; /* -1 if none */ + bool anonymous; /* true if token specifies + * an anonymous class */ } tokenInfo; static langType Lang_php; static langType Lang_zephir; static bool InPhp = false; /* whether we are between */ +/* whether the next token may be a keyword, e.g. not after "::" or "->" */ +static bool MayBeKeyword = true; /* current statement details */ static struct { @@ -234,8 +263,20 @@ static struct { } CurrentStatement; /* Current namespace */ -static vString *CurrentNamespace; +static vString *CurrentNamesapce; +/* Cache variable to build the tag's scope. It has no real meaning outside + * of initPhpEntry()'s scope. */ +static vString *FullScope; +/* The class name specified at "extends" keyword in the current class + * definition. Used to resolve "parent" in return type. */ +static vString *ParentClass; +static objPool *TokenPool = NULL; + +static const char *phpScopeSeparatorFor (int kind, int upperScopeKind) +{ + return scopeSeparatorFor (getInputLanguage(), kind, upperScopeKind); +} static const char *accessToString (const accessType access) { @@ -266,18 +307,15 @@ static const char *implToString (const implType impl) static void initPhpEntry (tagEntryInfo *const e, const tokenInfo *const token, const phpKind kind, const accessType access) { - static vString *fullScope = NULL; int parentKind = -1; - if (fullScope == NULL) - fullScope = vStringNew (); - else - vStringClear (fullScope); + vStringClear (FullScope); - if (vStringLength (CurrentNamespace) > 0) + if (vStringLength (CurrentNamesapce) > 0) { - vStringCopy (fullScope, CurrentNamespace); parentKind = K_NAMESPACE; + vStringCat (FullScope, CurrentNamesapce); + } initTagEntry (e, vStringValue (token->string), kind); @@ -290,31 +328,82 @@ static void initPhpEntry (tagEntryInfo *const e, const tokenInfo *const token, if (vStringLength (token->scope) > 0) { parentKind = token->parentKind; - if (vStringLength (fullScope) > 0) - vStringCatS (fullScope, SCOPE_SEPARATOR); - vStringCat (fullScope, token->scope); + + if (vStringLength (FullScope) > 0) + { + const char* sep; + + sep = phpScopeSeparatorFor (parentKind, + K_NAMESPACE); + vStringCatS (FullScope, sep); + } + vStringCat (FullScope, token->scope); } - if (vStringLength (fullScope) > 0) + if (vStringLength (FullScope) > 0) { Assert (parentKind >= 0); e->extensionFields.scopeKindIndex = parentKind; - e->extensionFields.scopeName = vStringValue (fullScope); + e->extensionFields.scopeName = vStringValue (FullScope); } + + if (token->anonymous) + markTagExtraBit (e, XTAG_ANONYMOUS); } -static void makeSimplePhpTag (const tokenInfo *const token, const phpKind kind, - const accessType access) +static void makePhpTagEntry (tagEntryInfo *const e) +{ + makeTagEntry (e); + makeQualifiedTagEntry (e); +} + +static void fillTypeRefField (tagEntryInfo *const e, + const vString *const rtype, const tokenInfo *const token) +{ + if ((vStringLength (rtype) == 4) + && (strcmp (vStringValue (rtype), "self") == 0) + && vStringLength (token->scope) > 0) + { + if (token->parentKind == -1) + e->extensionFields.typeRef [0] = "unknown"; + else + e->extensionFields.typeRef [0] = PhpKinds [token->parentKind].name; + e->extensionFields.typeRef [1] = vStringValue (token->scope); + } + else if ((vStringLength (rtype) == 6) + && (strcmp (vStringValue (rtype), "parent") == 0) + && (ParentClass && vStringLength (ParentClass) > 0)) + { + e->extensionFields.typeRef [0] = "class"; + e->extensionFields.typeRef [1] = vStringValue (ParentClass); + } + else + { + e->extensionFields.typeRef [0] = "unknown"; + e->extensionFields.typeRef [1] = vStringValue (rtype); + } +} + +static void makeTypedPhpTag (const tokenInfo *const token, const phpKind kind, + const accessType access, vString* typeName) { if (PhpKinds[kind].enabled) { tagEntryInfo e; initPhpEntry (&e, token, kind, access); - makeTagEntry (&e); + if (typeName) + fillTypeRefField (&e, typeName, token); + makePhpTagEntry (&e); } } +static void makeSimplePhpTag (const tokenInfo *const token, const phpKind kind, + const accessType access) +{ + makeTypedPhpTag (token, kind, access, NULL); +} + static void makeNamespacePhpTag (const tokenInfo *const token, const vString *const name) { if (PhpKinds[K_NAMESPACE].enabled) @@ -326,7 +415,7 @@ static void makeNamespacePhpTag (const tokenInfo *const token, const vString *co e.lineNumber = token->lineNumber; e.filePosition = token->filePosition; - makeTagEntry (&e); + makePhpTagEntry (&e); } } @@ -344,12 +433,13 @@ static void makeClassOrIfaceTag (const phpKind kind, const tokenInfo *const toke if (vStringLength (inheritance) > 0) e.extensionFields.inheritance = vStringValue (inheritance); - makeTagEntry (&e); + makePhpTagEntry (&e); } } static void makeFunctionTag (const tokenInfo *const token, const vString *const arglist, + const vString *const rtype, const accessType access, const implType impl) { if (PhpKinds[K_FUNCTION].enabled) @@ -362,28 +452,39 @@ static void makeFunctionTag (const tokenInfo *const token, e.extensionFields.implementation = implToString (impl); if (arglist) e.extensionFields.signature = vStringValue (arglist); + if (rtype) + fillTypeRefField (&e, rtype, token); - makeTagEntry (&e); + makePhpTagEntry (&e); } } -static tokenInfo *newToken (void) +static void *newPoolToken (void *createArg CTAGS_ATTR_UNUSED) +{ + tokenInfo *token = xMalloc (1, tokenInfo); + + token->string = vStringNew (); + token->scope = vStringNew (); + return token; +} + +static void clearPoolToken (void *data) { - tokenInfo *const token = xMalloc (1, tokenInfo); + tokenInfo *token = data; token->type = TOKEN_UNDEFINED; token->keyword = KEYWORD_NONE; - token->string = vStringNew (); - token->scope = vStringNew (); token->lineNumber = getInputLineNumber (); token->filePosition = getInputFilePosition (); token->parentKind = -1; - - return token; + token->anonymous = false; + vStringClear (token->string); + vStringClear (token->scope); } -static void deleteToken (tokenInfo *const token) +static void deletePoolToken (void *data) { + tokenInfo *token = data; vStringDelete (token->string); vStringDelete (token->scope); eFree (token); @@ -400,6 +501,7 @@ static void copyToken (tokenInfo *const dest, const tokenInfo *const src, dest->parentKind = src->parentKind; if (scope) vStringCopy(dest->scope, src->scope); + dest->anonymous = src->anonymous; } #if 0 @@ -469,16 +571,28 @@ static void printToken (const tokenInfo *const token) } #endif -static void addToScope (tokenInfo *const token, const vString *const extra) +static void addToScope (tokenInfo *const token, const vString *const extra, + int kindOfUpperScope) { if (vStringLength (token->scope) > 0) - vStringCatS (token->scope, SCOPE_SEPARATOR); - vStringCatS (token->scope, vStringValue (extra)); + { + const char* sep; + + sep = phpScopeSeparatorFor(token->parentKind, + kindOfUpperScope); + vStringCatS (token->scope, sep); + } + vStringCat (token->scope, extra); } -static bool isIdentChar (const int c) +static int skipToCharacter (const int c) { - return (isalnum (c) || c == '_' || c >= 0x80); + int d; + do + { + d = getcFromInputFile (); + } while (d != EOF && d != c); + return d; } static void parseString (vString *const string, const int delimiter) @@ -496,26 +610,69 @@ static void parseString (vString *const string, const int delimiter) } } -/* reads an HereDoc or a NowDoc (the part after the <<<). +/* Strips @indent_len characters from lines in @string to get the correct + * string value for an indented heredoc (PHP 7.3+). + * This doesn't handle invalid values specially and might yield surprising + * results with them, but it doesn't really matter as it's invalid anyway. */ +static void stripHeredocIndent (vString *const string, size_t indent_len) +{ + char *str = vStringValue (string); + size_t str_len = vStringLength (string); + char *p = str; + size_t new_len = str_len; + bool at_line_start = true; + + while (*p) + { + if (at_line_start) + { + size_t p_len; + size_t strip_len; + + p_len = str_len - (p - str); + strip_len = p_len < indent_len ? p_len : indent_len; + memmove (p, p + strip_len, p_len - strip_len); + p += strip_len; + new_len -= strip_len; + } + /* CRLF is already normalized as LF */ + at_line_start = (*p == '\r' || *p == '\n'); + p++; + } + vStringTruncate (string, new_len); +} + +/* reads a PHP >= 7.3 HereDoc or a NowDoc (the part after the <<<). * <<<[ \t]*(ID|'ID'|"ID") * ... - * ID;? + * [ \t]*ID[^:indent-char:];? * * note that: * 1) starting ID must be immediately followed by a newline; * 2) closing ID is the same as opening one; - * 3) closing ID must be immediately followed by a newline or a semicolon - * then a newline. + * 3) closing ID must not be immediately followed by an identifier character; + * 4) optional indentation of the closing ID is stripped from body lines, + * which lines must have the exact same prefix indentation. * - * Example of a *single* valid heredoc: + * This is slightly relaxed from PHP < 7.3, where the closing ID had to be the + * only thing on its line, with the only exception of a semicolon right after + * the ID. + * + * Example of a single valid heredoc: * <<< FOO * something * something else - * FOO this is not an end - * FOO; this isn't either - * FOO; # neither this is + * FOO_this is not an end * FOO; * # previous line was the end, but the semicolon wasn't required + * + * Another example using indentation and more code after the heredoc: + * << 0) + stripHeredocIndent (string, indent_len); + break; } /* if we are here it wasn't a delimiter, so put everything in the * string */ - vStringPut (string, (char) nl); vStringNCatS (string, delimiter, len); - if (extra != EOF) - vStringPut (string, (char) extra); } } while (c != EOF); @@ -628,16 +774,6 @@ static void parseIdentifier (vString *const string, const int firstChar) ungetcToInputFile (c); } -static keywordId analyzeToken (vString *const name, langType language) -{ - vString *keyword = vStringNew (); - keywordId result; - vStringCopyToLower (keyword, name); - result = lookupKeyword (vStringValue (keyword), language); - vStringDelete (keyword); - return result; -} - static bool isSpace (int c) { return (c == '\t' || c == ' ' || c == '\v' || @@ -652,7 +788,7 @@ static int skipWhitespaces (int c) } /* - * + * * This is ugly, but the whole "