Skip to content

Commit

Permalink
Add tagRegexTable to parserDefinition and use it to define regex parsers
Browse files Browse the repository at this point in the history
Also whitespace-format regexes so they match uctags.
  • Loading branch information
techee committed Aug 1, 2016
1 parent 1b32ac5 commit d440a81
Show file tree
Hide file tree
Showing 6 changed files with 121 additions and 84 deletions.
32 changes: 27 additions & 5 deletions ctags/main/parse.c
Expand Up @@ -324,6 +324,7 @@ static void initializeParserOne (langType lang)
parserDefinition *const parser = LanguageTable [lang];

installKeywordTable (lang);
installTagRegexTable (lang);

if ((parser->initialize != NULL) && (parser->initialized == FALSE))
{
Expand Down Expand Up @@ -355,7 +356,7 @@ extern void initializeParsing (void)
boolean accepted = FALSE;
if (def->name == NULL || def->name[0] == '\0')
error (FATAL, "parser definition must contain name\n");
else if (def->regex)
else if (def->method & METHOD_REGEX)
{
#ifdef HAVE_REGEX
def->parser = findRegexTags;
Expand Down Expand Up @@ -414,7 +415,7 @@ extern void processLanguageDefineOption (const char *const option,
def->parser = findRegexTags;
def->currentPatterns = stringListNew ();
def->currentExtensions = stringListNew ();
def->regex = TRUE;
def->method = METHOD_NOT_CRAFTED;
def->enabled = TRUE;
def->id = i;
LanguageTable = xRealloc (LanguageTable, i + 1, parserDefinition*);
Expand Down Expand Up @@ -482,7 +483,7 @@ extern void processLegacyKindOption (const char *const parameter)

static void disableLanguageKinds (const langType language)
{
if (LanguageTable [language]->regex)
if (LanguageTable [language]->method & METHOD_REGEX)
#ifdef HAVE_REGEX
disableRegexKinds (language);
#else
Expand All @@ -500,7 +501,7 @@ static boolean enableLanguageKind (const langType language,
const int kind, const boolean mode)
{
boolean result = FALSE;
if (LanguageTable [language]->regex)
if (LanguageTable [language]->method & METHOD_REGEX)
#ifdef HAVE_REGEX
result = enableRegexKind (language, kind, mode);
#else
Expand Down Expand Up @@ -578,7 +579,7 @@ static void printLangugageKindOptions (const langType language)
const parserDefinition* lang;
Assert (0 <= language && language < (int) LanguageCount);
lang = LanguageTable [language];
if (lang->kinds != NULL || lang->regex)
if (lang->kinds != NULL || lang->method & METHOD_NOT_CRAFTED)
{
unsigned int i;
char* const name = newLowerString (lang->name);
Expand Down Expand Up @@ -690,6 +691,27 @@ extern boolean parseFile (const char *const fileName)
return tagFileResized;
}

extern void installTagRegexTable (const langType language)
{
parserDefinition* lang;
unsigned int i;

Assert (0 <= language && language < (int) LanguageCount);
lang = LanguageTable [language];


if ((lang->tagRegexTable != NULL) && (lang->tagRegexInstalled == FALSE))
{
for (i = 0; i < lang->tagRegexCount; ++i)
addTagRegex (language,
lang->tagRegexTable [i].regex,
lang->tagRegexTable [i].name,
lang->tagRegexTable [i].kinds,
lang->tagRegexTable [i].flags);
lang->tagRegexInstalled = TRUE;
}
}

extern void installKeywordTable (const langType language)
{
parserDefinition* lang;
Expand Down
22 changes: 21 additions & 1 deletion ctags/main/parse.h
Expand Up @@ -36,6 +36,22 @@ typedef boolean (*rescanParser) (const unsigned int passCount);
typedef void (*parserInitialize) (langType language);
typedef int (*tagEntryFunction) (const tagEntryInfo *const tag, void *user_data);

typedef enum {
METHOD_NOT_CRAFTED = 1 << 0,
METHOD_REGEX = 1 << 1,
METHOD_XCMD = 1 << 2,
METHOD_XCMD_AVAILABLE = 1 << 3,
METHOD_XPATH = 1 << 4,
} parsingMethod;

typedef struct {
const char *const regex;
const char* const name;
const char* const kinds;
const char *const flags;
boolean *disabled;
} tagRegexTable;

typedef struct {
const char *name;
const int id;
Expand All @@ -52,17 +68,20 @@ typedef struct {
parserInitialize initialize; /* initialization routine, if needed */
simpleParser parser; /* simple parser (common case) */
rescanParser parser2; /* rescanning parser (unusual case) */
boolean regex; /* is this a regex parser? */
unsigned int method; /* See PARSE__... definitions above */

/* used internally */
unsigned int id; /* id assigned to language */
boolean enabled; /* currently enabled? */
stringList* currentPatterns; /* current list of file name patterns */
stringList* currentExtensions; /* current list of extensions */
tagRegexTable *tagRegexTable;
unsigned int tagRegexCount;
const keywordTable *keywordTable;
unsigned int keywordCount;

unsigned int initialized:1; /* initialize() is called or not */
unsigned int tagRegexInstalled:1; /* tagRegexTable is installed or not. */
unsigned int keywordInstalled:1; /* keywordTable is installed or not. */
} parserDefinition;

Expand Down Expand Up @@ -124,6 +143,7 @@ extern boolean matchRegex (const vString* const line, const langType language);
#endif
extern boolean processRegexOption (const char *const option, const char *const parameter);
extern void addLanguageRegex (const langType language, const char* const regex);
extern void installTagRegexTable (const langType language);
extern void addTagRegex (const langType language, const char* const regex, const char* const name, const char* const kinds, const char* const flags);
extern void addCallbackRegex (const langType language, const char* const regex, const char* flags, const regexCallback callback);
extern void disableRegexKinds (const langType UNUSED language);
Expand Down
62 changes: 31 additions & 31 deletions ctags/parsers/actionscript.c
Expand Up @@ -36,64 +36,64 @@
*/
#include "general.h" /* must always come first */
#include "parse.h"
#include "routines.h"

/*
* FUNCTION DEFINITIONS
*
*/

static void installActionScriptRegex (const langType language)
{
static tagRegexTable actionscriptTagRegexTable[] = {
/* Functions */
addTagRegex (language, "^[ \t]*[(private|public|static|protected|internal|final|override)( \t)]*function[ \t]+([A-Za-z0-9_]+)[ \t]*\\(([^\\{]*)",
"\\1 (\\2", "f,function,functions,methods", NULL);
{"^[ \t]*[(private|public|static|protected|internal|final|override)( \t)]*function[ \t]+([A-Za-z0-9_]+)[ \t]*\\(([^\\{]*)",
"\\1 (\\2", "f,function,functions,methods", NULL},

/* Getters and setters */
addTagRegex (language, "^[ \t]*[(public|static|internal|final|override)( \t)]*function[ \t]+(set|get)[ \t]+([A-Za-z0-9_]+)[ \t]*\\(",
"\\2 \\1", "l,field,fields", NULL);
{"^[ \t]*[(public|static|internal|final|override)( \t)]*function[ \t]+(set|get)[ \t]+([A-Za-z0-9_]+)[ \t]*\\(",
"\\2 \\1", "l,field,fields", NULL},

/* Variables */
addTagRegex (language, "^[ \t]*[(private|public|static|protected|internal)( \t)]*var[ \t]+([A-Za-z0-9_]+)([ \t]*\\:[ \t]*([A-Za-z0-9_]+))*[ \t]*",
"\\1 \\: \\3", "v,variable,variables", NULL);
{"^[ \t]*[(private|public|static|protected|internal)( \t)]*var[ \t]+([A-Za-z0-9_]+)([ \t]*\\:[ \t]*([A-Za-z0-9_]+))*[ \t]*",
"\\1 \\: \\3", "v,variable,variables", NULL},

/* Constants */
addTagRegex (language, "^[ \t]*[(private|public|static|protected|internal)( \t)]*const[ \t]+([A-Za-z0-9_]+)([ \t]*\\:[ \t]*([A-Za-z0-9_]+))*[ \t]*",
"\\1 : \\3", "m,macro,macros", NULL);
{"^[ \t]*[(private|public|static|protected|internal)( \t)]*const[ \t]+([A-Za-z0-9_]+)([ \t]*\\:[ \t]*([A-Za-z0-9_]+))*[ \t]*",
"\\1 : \\3", "m,macro,macros", NULL},

/* Classes */
addTagRegex (language, "^[ \t]*[(private|public|static|dynamic|final|internal)( \t)]*class[ \t]+([A-Za-z0-9_]+)[ \t]*([^\\{]*)",
"\\1 (\\2)", "c,class,classes", NULL);
{"^[ \t]*[(private|public|static|dynamic|final|internal)( \t)]*class[ \t]+([A-Za-z0-9_]+)[ \t]*([^\\{]*)",
"\\1 (\\2)", "c,class,classes", NULL},

/* Interfaces */
addTagRegex (language, "^[ \t]*[(private|public|static|dynamic|final|internal)( \t)]*interface[ \t]+([A-Za-z0-9_]+)[ \t]*([^\\{]*)",
"\\1 (\\2)", "i,interface,interfaces", NULL);
{"^[ \t]*[(private|public|static|dynamic|final|internal)( \t)]*interface[ \t]+([A-Za-z0-9_]+)[ \t]*([^\\{]*)",
"\\1 (\\2)", "i,interface,interfaces", NULL},

/* Packages */
addTagRegex (language, "^[ \t]*[(private|public|static)( \t)]*package[ \t]+([A-Za-z0-9_.]+)[ \t]*",
"\\1", "p,package", NULL);
{"^[ \t]*[(private|public|static)( \t)]*package[ \t]+([A-Za-z0-9_.]+)[ \t]*",
"\\1", "p,package", NULL},

/* Notes */
addTagRegex (language, "\\/\\/[ \t]*(NOTE|note|Note)[ \t]*\\:*(.*)",
"\\2", "o,other", NULL);
{"\\/\\/[ \t]*(NOTE|note|Note)[ \t]*\\:*(.*)",
"\\2", "o,other"},

/* Todos */
addTagRegex (language, "\\/\\/[ \t]*(TODO|todo|ToDo|Todo)[ \t]*\\:*(.*)",
"\\2", "o,other", NULL);
{"\\/\\/[ \t]*(TODO|todo|ToDo|Todo)[ \t]*\\:*(.*)",
"\\2", "o,other"},

/* Prototypes (Put this in for AS1 compatibility...) */
addTagRegex (language, ".*\\.prototype\\.([A-Za-z0-9 ]+)[ \t]*\\=([ \t]*)function( [ \t]?)*\\(",
"\\1", "r,prototype", NULL);
}
{".*\\.prototype\\.([A-Za-z0-9 ]+)[ \t]*\\=([ \t]*)function( [ \t]?)*\\(",
"\\1", "r,prototype"}
};

/*
* FUNCTION DEFINITIONS
*
*/

/* Create parser definition structure */
extern parserDefinition* ActionScriptParser (void)

{
static const char *const extensions [] = { "as", NULL };
parserDefinition *const def = parserNew ("ActionScript");
def->extensions = extensions;
def->initialize = installActionScriptRegex;
def->regex = TRUE;
def->tagRegexTable = actionscriptTagRegexTable;
def->tagRegexCount = ARRAY_SIZE (actionscriptTagRegexTable);
def->method = METHOD_NOT_CRAFTED|METHOD_REGEX;
return def;
}

38 changes: 20 additions & 18 deletions ctags/parsers/cobol.c
Expand Up @@ -15,34 +15,36 @@
#include "parse.h"
#include "routines.h"

static tagRegexTable cobolTagRegexTable[] = {
{"^[ \t]*[0-9]+[ \t]+([A-Z0-9][A-Z0-9-]*)[ \t]+("
"BLANK|OCCURS|IS|JUST|PIC|REDEFINES|RENAMES|SIGN|SYNC|USAGE|VALUE"
")", "\\1",
"d,data,data items", "i"},
{"^[ \t]*[FSR]D[ \t]+([A-Z0-9][A-Z0-9-]*)\\.", "\\1",
"f,file,file descriptions (FD, SD, RD)", "i"},
{"^[ \t]*[0-9]+[ \t]+([A-Z0-9][A-Z0-9-]*)\\.", "\\1",
"g,group,group items", "i"},
{"^[ \t]*([A-Z0-9][A-Z0-9-]*)\\.", "\\1",
"p,paragraph,paragraphs", "i"},
{"^[ \t]*PROGRAM-ID\\.[ \t]+([A-Z0-9][A-Z0-9-]*)\\.", "\\1",
"P,program,program ids", "i"},
{"^[ \t]*([A-Z0-9][A-Z0-9-]*)[ \t]+SECTION\\.", "\\1",
"s,section,sections", "i"},
};

/*
* FUNCTION DEFINITIONS
*/

static void installCobolRegex (const langType language)
{
addTagRegex (language, "^[ \t]*[0-9]+[ \t]+([A-Z0-9][A-Z0-9-]*)[ \t]+(BLANK|OCCURS|IS|JUST|PIC|REDEFINES|RENAMES|SIGN|SYNC|USAGE|VALUE)",
"\\1", "d,data,data items", "i");
addTagRegex (language, "^[ \t]*[FSR]D[ \t]+([A-Z0-9][A-Z0-9-]*)\\.",
"\\1", "f,file,file descriptions (FD, SD, RD)", "i");
addTagRegex (language, "^[ \t]*[0-9]+[ \t]+([A-Z0-9][A-Z0-9-]*)\\.",
"\\1", "g,group,group items", "i");
addTagRegex (language, "^[ \t]*([A-Z0-9][A-Z0-9-]*)\\.",
"\\1", "p,paragraph,paragraphs", "i");
addTagRegex (language, "^[ \t]*PROGRAM-ID\\.[ \t]+([A-Z0-9][A-Z0-9-]*)\\.",
"\\1", "P,program,program ids", "i");
addTagRegex (language, "^[ \t]*([A-Z0-9][A-Z0-9-]*)[ \t]+SECTION\\.",
"\\1", "s,section,sections", "i");
}

extern parserDefinition* CobolParser (void)
{
static const char *const extensions [] = {
"cbl", "cob", "cpy", "CBL", "COB", NULL };
parserDefinition* def = parserNew ("Cobol");
def->extensions = extensions;
def->initialize = installCobolRegex;
def->regex = TRUE;
def->tagRegexTable = cobolTagRegexTable;
def->tagRegexCount = ARRAY_SIZE (cobolTagRegexTable);
def->method = METHOD_NOT_CRAFTED|METHOD_REGEX;
return def;
}

Expand Down
49 changes: 21 additions & 28 deletions ctags/parsers/html.c
Expand Up @@ -15,24 +15,15 @@
#include "parse.h"
#include "routines.h"

/*
* FUNCTION DEFINITIONS
*/

static void installHtmlRegex (const langType language)
{
static tagRegexTable htmlTagRegexTable [] = {
#define POSSIBLE_ATTRIBUTES "([ \t]+[a-z]+=\"?[^>\"]*\"?)*"

addTagRegex (language,
"<a"
POSSIBLE_ATTRIBUTES
"[ \t]+name=\"?([^>\"]+)\"?"
POSSIBLE_ATTRIBUTES
"[ \t]*>",
"\\2", "a,anchor,named anchors", "i");

addTagRegex (language, "^[ \t]*function[ \t]*([A-Za-z0-9_]+)[ \t]*\\(",
"\\1", "f,function,JavaScript functions", NULL);
{"<a"
POSSIBLE_ATTRIBUTES "[ \t]+name=\"?([^>\"]+)\"?" POSSIBLE_ATTRIBUTES
"[ \t]*>", "\\2",
"a,anchor,named anchors", "i"},
{"^[ \t]*function[ \t]*([A-Za-z0-9_]+)[ \t]*\\(", "\\1",
"f,function,JavaScript functions", NULL},

/* the following matches headings with tags inside like
* <h1><a href="#id109"><i>Some Text</i></a></h1>
Expand All @@ -43,27 +34,29 @@ static void installHtmlRegex (const langType language)
#define INNER_HEADING \
ATTRS ">" SPACES "(<" ATTRS ">" SPACES ")*([^<]+).*"

addTagRegex (language,
"<h1" INNER_HEADING "</h1>",
"\\2", "n,namespace,H1 heading", "i");
{"<h1" INNER_HEADING "</h1>", "\\2",
"n,namespace,H1 heading", "i"},

addTagRegex (language,
"<h2" INNER_HEADING "</h2>",
"\\2", "c,class,H2 heading", "i");
{"<h2" INNER_HEADING "</h2>", "\\2",
"c,class,H2 heading", "i"},

addTagRegex (language,
"<h3" INNER_HEADING "</h3>",
"\\2", "v,variable,H3 heading", "i");
}
{"<h3" INNER_HEADING "</h3>", "\\2",
"v,variable,H3 heading", "i"},
};

/*
* FUNCTION DEFINITIONS
*/

/* Create parser definition structure */
extern parserDefinition* HtmlParser (void)
{
static const char *const extensions [] = { "htm", "html", NULL };
parserDefinition *const def = parserNew ("HTML");
def->extensions = extensions;
def->initialize = installHtmlRegex;
def->regex = TRUE;
def->tagRegexTable = htmlTagRegexTable;
def->tagRegexCount = ARRAY_SIZE (htmlTagRegexTable);
def->method = METHOD_NOT_CRAFTED|METHOD_REGEX;
return def;
}

Expand Down
2 changes: 1 addition & 1 deletion src/tagmanager/tm_ctags_wrappers.c
Expand Up @@ -161,7 +161,7 @@ gchar tm_ctags_get_kind_from_name(const gchar *name, TMParserType lang)

gboolean tm_ctags_is_using_regex_parser(TMParserType lang)
{
return LanguageTable[lang]->regex;
return LanguageTable[lang]->method & METHOD_REGEX;
}


Expand Down

0 comments on commit d440a81

Please sign in to comment.