From d440a81166e62d41e2b06d555b471db5fdd8d80d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ji=C5=99=C3=AD=20Techet?= Date: Mon, 1 Aug 2016 23:48:06 +0200 Subject: [PATCH] Add tagRegexTable to parserDefinition and use it to define regex parsers Also whitespace-format regexes so they match uctags. --- ctags/main/parse.c | 32 ++++++++++++--- ctags/main/parse.h | 22 ++++++++++- ctags/parsers/actionscript.c | 62 +++++++++++++++--------------- ctags/parsers/cobol.c | 38 +++++++++--------- ctags/parsers/html.c | 49 ++++++++++------------- src/tagmanager/tm_ctags_wrappers.c | 2 +- 6 files changed, 121 insertions(+), 84 deletions(-) diff --git a/ctags/main/parse.c b/ctags/main/parse.c index ac76596591..bb068c7e43 100644 --- a/ctags/main/parse.c +++ b/ctags/main/parse.c @@ -324,6 +324,7 @@ static void initializeParserOne (langType lang) parserDefinition *const parser = LanguageTable [lang]; installKeywordTable (lang); + installTagRegexTable (lang); if ((parser->initialize != NULL) && (parser->initialized == FALSE)) { @@ -355,7 +356,7 @@ extern void initializeParsing (void) boolean accepted = FALSE; if (def->name == NULL || def->name[0] == '\0') error (FATAL, "parser definition must contain name\n"); - else if (def->regex) + else if (def->method & METHOD_REGEX) { #ifdef HAVE_REGEX def->parser = findRegexTags; @@ -414,7 +415,7 @@ extern void processLanguageDefineOption (const char *const option, def->parser = findRegexTags; def->currentPatterns = stringListNew (); def->currentExtensions = stringListNew (); - def->regex = TRUE; + def->method = METHOD_NOT_CRAFTED; def->enabled = TRUE; def->id = i; LanguageTable = xRealloc (LanguageTable, i + 1, parserDefinition*); @@ -482,7 +483,7 @@ extern void processLegacyKindOption (const char *const parameter) static void disableLanguageKinds (const langType language) { - if (LanguageTable [language]->regex) + if (LanguageTable [language]->method & METHOD_REGEX) #ifdef HAVE_REGEX disableRegexKinds (language); #else @@ -500,7 +501,7 @@ static boolean enableLanguageKind (const langType language, const int kind, const boolean mode) { boolean result = FALSE; - if (LanguageTable [language]->regex) + if (LanguageTable [language]->method & METHOD_REGEX) #ifdef HAVE_REGEX result = enableRegexKind (language, kind, mode); #else @@ -578,7 +579,7 @@ static void printLangugageKindOptions (const langType language) const parserDefinition* lang; Assert (0 <= language && language < (int) LanguageCount); lang = LanguageTable [language]; - if (lang->kinds != NULL || lang->regex) + if (lang->kinds != NULL || lang->method & METHOD_NOT_CRAFTED) { unsigned int i; char* const name = newLowerString (lang->name); @@ -690,6 +691,27 @@ extern boolean parseFile (const char *const fileName) return tagFileResized; } +extern void installTagRegexTable (const langType language) +{ + parserDefinition* lang; + unsigned int i; + + Assert (0 <= language && language < (int) LanguageCount); + lang = LanguageTable [language]; + + + if ((lang->tagRegexTable != NULL) && (lang->tagRegexInstalled == FALSE)) + { + for (i = 0; i < lang->tagRegexCount; ++i) + addTagRegex (language, + lang->tagRegexTable [i].regex, + lang->tagRegexTable [i].name, + lang->tagRegexTable [i].kinds, + lang->tagRegexTable [i].flags); + lang->tagRegexInstalled = TRUE; + } +} + extern void installKeywordTable (const langType language) { parserDefinition* lang; diff --git a/ctags/main/parse.h b/ctags/main/parse.h index b1b7622923..6b1ba50321 100644 --- a/ctags/main/parse.h +++ b/ctags/main/parse.h @@ -36,6 +36,22 @@ typedef boolean (*rescanParser) (const unsigned int passCount); typedef void (*parserInitialize) (langType language); typedef int (*tagEntryFunction) (const tagEntryInfo *const tag, void *user_data); +typedef enum { + METHOD_NOT_CRAFTED = 1 << 0, + METHOD_REGEX = 1 << 1, + METHOD_XCMD = 1 << 2, + METHOD_XCMD_AVAILABLE = 1 << 3, + METHOD_XPATH = 1 << 4, +} parsingMethod; + +typedef struct { + const char *const regex; + const char* const name; + const char* const kinds; + const char *const flags; + boolean *disabled; +} tagRegexTable; + typedef struct { const char *name; const int id; @@ -52,17 +68,20 @@ typedef struct { parserInitialize initialize; /* initialization routine, if needed */ simpleParser parser; /* simple parser (common case) */ rescanParser parser2; /* rescanning parser (unusual case) */ - boolean regex; /* is this a regex parser? */ + unsigned int method; /* See PARSE__... definitions above */ /* used internally */ unsigned int id; /* id assigned to language */ boolean enabled; /* currently enabled? */ stringList* currentPatterns; /* current list of file name patterns */ stringList* currentExtensions; /* current list of extensions */ + tagRegexTable *tagRegexTable; + unsigned int tagRegexCount; const keywordTable *keywordTable; unsigned int keywordCount; unsigned int initialized:1; /* initialize() is called or not */ + unsigned int tagRegexInstalled:1; /* tagRegexTable is installed or not. */ unsigned int keywordInstalled:1; /* keywordTable is installed or not. */ } parserDefinition; @@ -124,6 +143,7 @@ extern boolean matchRegex (const vString* const line, const langType language); #endif extern boolean processRegexOption (const char *const option, const char *const parameter); extern void addLanguageRegex (const langType language, const char* const regex); +extern void installTagRegexTable (const langType language); extern void addTagRegex (const langType language, const char* const regex, const char* const name, const char* const kinds, const char* const flags); extern void addCallbackRegex (const langType language, const char* const regex, const char* flags, const regexCallback callback); extern void disableRegexKinds (const langType UNUSED language); diff --git a/ctags/parsers/actionscript.c b/ctags/parsers/actionscript.c index a018f1962b..cbec0e4f3d 100644 --- a/ctags/parsers/actionscript.c +++ b/ctags/parsers/actionscript.c @@ -36,64 +36,64 @@ */ #include "general.h" /* must always come first */ #include "parse.h" +#include "routines.h" -/* -* FUNCTION DEFINITIONS -* -*/ - -static void installActionScriptRegex (const langType language) -{ +static tagRegexTable actionscriptTagRegexTable[] = { /* Functions */ - addTagRegex (language, "^[ \t]*[(private|public|static|protected|internal|final|override)( \t)]*function[ \t]+([A-Za-z0-9_]+)[ \t]*\\(([^\\{]*)", - "\\1 (\\2", "f,function,functions,methods", NULL); + {"^[ \t]*[(private|public|static|protected|internal|final|override)( \t)]*function[ \t]+([A-Za-z0-9_]+)[ \t]*\\(([^\\{]*)", + "\\1 (\\2", "f,function,functions,methods", NULL}, /* Getters and setters */ - addTagRegex (language, "^[ \t]*[(public|static|internal|final|override)( \t)]*function[ \t]+(set|get)[ \t]+([A-Za-z0-9_]+)[ \t]*\\(", - "\\2 \\1", "l,field,fields", NULL); + {"^[ \t]*[(public|static|internal|final|override)( \t)]*function[ \t]+(set|get)[ \t]+([A-Za-z0-9_]+)[ \t]*\\(", + "\\2 \\1", "l,field,fields", NULL}, /* Variables */ - addTagRegex (language, "^[ \t]*[(private|public|static|protected|internal)( \t)]*var[ \t]+([A-Za-z0-9_]+)([ \t]*\\:[ \t]*([A-Za-z0-9_]+))*[ \t]*", - "\\1 \\: \\3", "v,variable,variables", NULL); + {"^[ \t]*[(private|public|static|protected|internal)( \t)]*var[ \t]+([A-Za-z0-9_]+)([ \t]*\\:[ \t]*([A-Za-z0-9_]+))*[ \t]*", + "\\1 \\: \\3", "v,variable,variables", NULL}, /* Constants */ - addTagRegex (language, "^[ \t]*[(private|public|static|protected|internal)( \t)]*const[ \t]+([A-Za-z0-9_]+)([ \t]*\\:[ \t]*([A-Za-z0-9_]+))*[ \t]*", - "\\1 : \\3", "m,macro,macros", NULL); + {"^[ \t]*[(private|public|static|protected|internal)( \t)]*const[ \t]+([A-Za-z0-9_]+)([ \t]*\\:[ \t]*([A-Za-z0-9_]+))*[ \t]*", + "\\1 : \\3", "m,macro,macros", NULL}, /* Classes */ - addTagRegex (language, "^[ \t]*[(private|public|static|dynamic|final|internal)( \t)]*class[ \t]+([A-Za-z0-9_]+)[ \t]*([^\\{]*)", - "\\1 (\\2)", "c,class,classes", NULL); + {"^[ \t]*[(private|public|static|dynamic|final|internal)( \t)]*class[ \t]+([A-Za-z0-9_]+)[ \t]*([^\\{]*)", + "\\1 (\\2)", "c,class,classes", NULL}, /* Interfaces */ - addTagRegex (language, "^[ \t]*[(private|public|static|dynamic|final|internal)( \t)]*interface[ \t]+([A-Za-z0-9_]+)[ \t]*([^\\{]*)", - "\\1 (\\2)", "i,interface,interfaces", NULL); + {"^[ \t]*[(private|public|static|dynamic|final|internal)( \t)]*interface[ \t]+([A-Za-z0-9_]+)[ \t]*([^\\{]*)", + "\\1 (\\2)", "i,interface,interfaces", NULL}, /* Packages */ - addTagRegex (language, "^[ \t]*[(private|public|static)( \t)]*package[ \t]+([A-Za-z0-9_.]+)[ \t]*", - "\\1", "p,package", NULL); + {"^[ \t]*[(private|public|static)( \t)]*package[ \t]+([A-Za-z0-9_.]+)[ \t]*", + "\\1", "p,package", NULL}, /* Notes */ - addTagRegex (language, "\\/\\/[ \t]*(NOTE|note|Note)[ \t]*\\:*(.*)", - "\\2", "o,other", NULL); + {"\\/\\/[ \t]*(NOTE|note|Note)[ \t]*\\:*(.*)", + "\\2", "o,other"}, /* Todos */ - addTagRegex (language, "\\/\\/[ \t]*(TODO|todo|ToDo|Todo)[ \t]*\\:*(.*)", - "\\2", "o,other", NULL); + {"\\/\\/[ \t]*(TODO|todo|ToDo|Todo)[ \t]*\\:*(.*)", + "\\2", "o,other"}, /* Prototypes (Put this in for AS1 compatibility...) */ - addTagRegex (language, ".*\\.prototype\\.([A-Za-z0-9 ]+)[ \t]*\\=([ \t]*)function( [ \t]?)*\\(", - "\\1", "r,prototype", NULL); -} + {".*\\.prototype\\.([A-Za-z0-9 ]+)[ \t]*\\=([ \t]*)function( [ \t]?)*\\(", + "\\1", "r,prototype"} +}; + +/* +* FUNCTION DEFINITIONS +* +*/ /* Create parser definition structure */ extern parserDefinition* ActionScriptParser (void) - { static const char *const extensions [] = { "as", NULL }; parserDefinition *const def = parserNew ("ActionScript"); def->extensions = extensions; - def->initialize = installActionScriptRegex; - def->regex = TRUE; + def->tagRegexTable = actionscriptTagRegexTable; + def->tagRegexCount = ARRAY_SIZE (actionscriptTagRegexTable); + def->method = METHOD_NOT_CRAFTED|METHOD_REGEX; return def; } diff --git a/ctags/parsers/cobol.c b/ctags/parsers/cobol.c index b86467b4d7..9a5d9b0a23 100644 --- a/ctags/parsers/cobol.c +++ b/ctags/parsers/cobol.c @@ -15,34 +15,36 @@ #include "parse.h" #include "routines.h" +static tagRegexTable cobolTagRegexTable[] = { + {"^[ \t]*[0-9]+[ \t]+([A-Z0-9][A-Z0-9-]*)[ \t]+(" + "BLANK|OCCURS|IS|JUST|PIC|REDEFINES|RENAMES|SIGN|SYNC|USAGE|VALUE" + ")", "\\1", + "d,data,data items", "i"}, + {"^[ \t]*[FSR]D[ \t]+([A-Z0-9][A-Z0-9-]*)\\.", "\\1", + "f,file,file descriptions (FD, SD, RD)", "i"}, + {"^[ \t]*[0-9]+[ \t]+([A-Z0-9][A-Z0-9-]*)\\.", "\\1", + "g,group,group items", "i"}, + {"^[ \t]*([A-Z0-9][A-Z0-9-]*)\\.", "\\1", + "p,paragraph,paragraphs", "i"}, + {"^[ \t]*PROGRAM-ID\\.[ \t]+([A-Z0-9][A-Z0-9-]*)\\.", "\\1", + "P,program,program ids", "i"}, + {"^[ \t]*([A-Z0-9][A-Z0-9-]*)[ \t]+SECTION\\.", "\\1", + "s,section,sections", "i"}, +}; + /* * FUNCTION DEFINITIONS */ -static void installCobolRegex (const langType language) -{ - addTagRegex (language, "^[ \t]*[0-9]+[ \t]+([A-Z0-9][A-Z0-9-]*)[ \t]+(BLANK|OCCURS|IS|JUST|PIC|REDEFINES|RENAMES|SIGN|SYNC|USAGE|VALUE)", - "\\1", "d,data,data items", "i"); - addTagRegex (language, "^[ \t]*[FSR]D[ \t]+([A-Z0-9][A-Z0-9-]*)\\.", - "\\1", "f,file,file descriptions (FD, SD, RD)", "i"); - addTagRegex (language, "^[ \t]*[0-9]+[ \t]+([A-Z0-9][A-Z0-9-]*)\\.", - "\\1", "g,group,group items", "i"); - addTagRegex (language, "^[ \t]*([A-Z0-9][A-Z0-9-]*)\\.", - "\\1", "p,paragraph,paragraphs", "i"); - addTagRegex (language, "^[ \t]*PROGRAM-ID\\.[ \t]+([A-Z0-9][A-Z0-9-]*)\\.", - "\\1", "P,program,program ids", "i"); - addTagRegex (language, "^[ \t]*([A-Z0-9][A-Z0-9-]*)[ \t]+SECTION\\.", - "\\1", "s,section,sections", "i"); -} - extern parserDefinition* CobolParser (void) { static const char *const extensions [] = { "cbl", "cob", "cpy", "CBL", "COB", NULL }; parserDefinition* def = parserNew ("Cobol"); def->extensions = extensions; - def->initialize = installCobolRegex; - def->regex = TRUE; + def->tagRegexTable = cobolTagRegexTable; + def->tagRegexCount = ARRAY_SIZE (cobolTagRegexTable); + def->method = METHOD_NOT_CRAFTED|METHOD_REGEX; return def; } diff --git a/ctags/parsers/html.c b/ctags/parsers/html.c index ec2123ab84..44decec237 100644 --- a/ctags/parsers/html.c +++ b/ctags/parsers/html.c @@ -15,24 +15,15 @@ #include "parse.h" #include "routines.h" -/* -* FUNCTION DEFINITIONS -*/ - -static void installHtmlRegex (const langType language) -{ +static tagRegexTable htmlTagRegexTable [] = { #define POSSIBLE_ATTRIBUTES "([ \t]+[a-z]+=\"?[^>\"]*\"?)*" - addTagRegex (language, - "\"]+)\"?" - POSSIBLE_ATTRIBUTES - "[ \t]*>", - "\\2", "a,anchor,named anchors", "i"); - - addTagRegex (language, "^[ \t]*function[ \t]*([A-Za-z0-9_]+)[ \t]*\\(", - "\\1", "f,function,JavaScript functions", NULL); + {"\"]+)\"?" POSSIBLE_ATTRIBUTES + "[ \t]*>", "\\2", + "a,anchor,named anchors", "i"}, + {"^[ \t]*function[ \t]*([A-Za-z0-9_]+)[ \t]*\\(", "\\1", + "f,function,JavaScript functions", NULL}, /* the following matches headings with tags inside like *

Some Text

@@ -43,18 +34,19 @@ static void installHtmlRegex (const langType language) #define INNER_HEADING \ ATTRS ">" SPACES "(<" ATTRS ">" SPACES ")*([^<]+).*" - addTagRegex (language, - "", - "\\2", "n,namespace,H1 heading", "i"); + {"", "\\2", + "n,namespace,H1 heading", "i"}, - addTagRegex (language, - "", - "\\2", "c,class,H2 heading", "i"); + {"", "\\2", + "c,class,H2 heading", "i"}, - addTagRegex (language, - "", - "\\2", "v,variable,H3 heading", "i"); -} + {"", "\\2", + "v,variable,H3 heading", "i"}, +}; + +/* +* FUNCTION DEFINITIONS +*/ /* Create parser definition structure */ extern parserDefinition* HtmlParser (void) @@ -62,8 +54,9 @@ extern parserDefinition* HtmlParser (void) static const char *const extensions [] = { "htm", "html", NULL }; parserDefinition *const def = parserNew ("HTML"); def->extensions = extensions; - def->initialize = installHtmlRegex; - def->regex = TRUE; + def->tagRegexTable = htmlTagRegexTable; + def->tagRegexCount = ARRAY_SIZE (htmlTagRegexTable); + def->method = METHOD_NOT_CRAFTED|METHOD_REGEX; return def; } diff --git a/src/tagmanager/tm_ctags_wrappers.c b/src/tagmanager/tm_ctags_wrappers.c index 385ba35860..378033da84 100644 --- a/src/tagmanager/tm_ctags_wrappers.c +++ b/src/tagmanager/tm_ctags_wrappers.c @@ -161,7 +161,7 @@ gchar tm_ctags_get_kind_from_name(const gchar *name, TMParserType lang) gboolean tm_ctags_is_using_regex_parser(TMParserType lang) { - return LanguageTable[lang]->regex; + return LanguageTable[lang]->method & METHOD_REGEX; }