diff --git a/ctags/Makefile.am b/ctags/Makefile.am index ee2e283102..a06490168b 100644 --- a/ctags/Makefile.am +++ b/ctags/Makefile.am @@ -82,6 +82,7 @@ parsers = \ parsers/geany_matlab.c \ parsers/nsis.c \ parsers/objc.c \ + parsers/ocaml.c \ parsers/pascal.c \ parsers/perl.c \ parsers/perl.h \ diff --git a/ctags/parsers/ocaml.c b/ctags/parsers/ocaml.c new file mode 100644 index 0000000000..d4af643d96 --- /dev/null +++ b/ctags/parsers/ocaml.c @@ -0,0 +1,2094 @@ +/* +* Copyright (c) 2009, Vincent Berthoux +* +* This source code is released for free distribution under the terms of the +* GNU General Public License version 2 or (at your option) any later version. +* +* This module contains functions for generating tags for Objective Caml +* language files. +*/ +/* +* INCLUDE FILES +*/ +#include "general.h" /* must always come first */ + +#include + +#include "debug.h" +#include "entry.h" +#include "keyword.h" +#include "options.h" +#include "parse.h" +#include "read.h" +#include "routines.h" +#include "vstring.h" + +#define OCAML_MAX_STACK_SIZE 256 + +typedef enum { + K_CLASS, /* OCaml class, relatively rare */ + K_METHOD, /* class method */ + K_MODULE, /* OCaml module OR functor */ + K_VARIABLE, + K_VAL, + K_TYPE, /* name of an OCaml type */ + K_FUNCTION, + K_CONSTRUCTOR, /* Constructor of a sum type */ + K_RECORDFIELD, + K_EXCEPTION, +} ocamlKind; + +static kindDefinition OcamlKinds[] = { + {true, 'c', "class", "classes"}, + {true, 'm', "method", "Object's method"}, + {true, 'M', "module", "Module or functor"}, + {true, 'v', "var", "Global variable"}, + {true, 'p', "val", "Signature item"}, + {true, 't', "type", "Type name"}, + {true, 'f', "function", "A function"}, + {true, 'C', "Constructor", "A constructor"}, + {true, 'r', "RecordField", "A 'structure' field"}, + {true, 'e', "Exception", "An exception"}, +}; + +typedef enum { + OcaKEYWORD_and, + OcaKEYWORD_begin, + OcaKEYWORD_class, + OcaKEYWORD_do, + OcaKEYWORD_done, + OcaKEYWORD_else, + OcaKEYWORD_end, + OcaKEYWORD_exception, + OcaKEYWORD_for, + OcaKEYWORD_functor, + OcaKEYWORD_fun, + OcaKEYWORD_function, + OcaKEYWORD_if, + OcaKEYWORD_in, + OcaKEYWORD_let, + OcaKEYWORD_value, + OcaKEYWORD_match, + OcaKEYWORD_method, + OcaKEYWORD_module, + OcaKEYWORD_mutable, + OcaKEYWORD_object, + OcaKEYWORD_of, + OcaKEYWORD_rec, + OcaKEYWORD_sig, + OcaKEYWORD_struct, + OcaKEYWORD_then, + OcaKEYWORD_try, + OcaKEYWORD_type, + OcaKEYWORD_val, + OcaKEYWORD_virtual, + OcaKEYWORD_while, + OcaKEYWORD_with, + + OcaIDENTIFIER, + Tok_PARL, /* '(' */ + Tok_PARR, /* ')' */ + Tok_BRL, /* '[' */ + Tok_BRR, /* ']' */ + Tok_CurlL, /* '{' */ + Tok_CurlR, /* '}' */ + Tok_Prime, /* '\'' */ + Tok_Pipe, /* '|' */ + Tok_EQ, /* '=' */ + Tok_Val, /* string/number/poo */ + Tok_Op, /* any operator recognized by the language */ + Tok_semi, /* ';' */ + Tok_comma, /* ',' */ + Tok_To, /* '->' */ + Tok_Of, /* ':' */ + Tok_Sharp, /* '#' */ + Tok_Backslash, /* '\\' */ + + Tok_EOF /* END of file */ +} ocamlKeyword; + +typedef struct sOcaKeywordDesc { + const char *name; + ocamlKeyword id; +} ocaKeywordDesc; + +typedef ocamlKeyword ocaToken; + +static const keywordTable OcamlKeywordTable[] = { + { "and" , OcaKEYWORD_and }, + { "begin" , OcaKEYWORD_begin }, + { "class" , OcaKEYWORD_class }, + { "do" , OcaKEYWORD_do }, + { "done" , OcaKEYWORD_done }, + { "else" , OcaKEYWORD_else }, + { "end" , OcaKEYWORD_end }, + { "exception" , OcaKEYWORD_exception }, + { "for" , OcaKEYWORD_for }, + { "fun" , OcaKEYWORD_fun }, + { "function" , OcaKEYWORD_fun }, + { "functor" , OcaKEYWORD_functor }, + { "if" , OcaKEYWORD_if }, + { "in" , OcaKEYWORD_in }, + { "let" , OcaKEYWORD_let }, + { "match" , OcaKEYWORD_match }, + { "method" , OcaKEYWORD_method }, + { "module" , OcaKEYWORD_module }, + { "mutable" , OcaKEYWORD_mutable }, + { "object" , OcaKEYWORD_object }, + { "of" , OcaKEYWORD_of }, + { "rec" , OcaKEYWORD_rec }, + { "sig" , OcaKEYWORD_sig }, + { "struct" , OcaKEYWORD_struct }, + { "then" , OcaKEYWORD_then }, + { "try" , OcaKEYWORD_try }, + { "type" , OcaKEYWORD_type }, + { "val" , OcaKEYWORD_val }, + { "value" , OcaKEYWORD_value }, /* just to handle revised syntax */ + { "virtual" , OcaKEYWORD_virtual }, + { "while" , OcaKEYWORD_while }, + { "with" , OcaKEYWORD_with }, + + { "or" , Tok_Op }, + { "mod " , Tok_Op }, + { "land " , Tok_Op }, + { "lor " , Tok_Op }, + { "lxor " , Tok_Op }, + { "lsl " , Tok_Op }, + { "lsr " , Tok_Op }, + { "asr" , Tok_Op }, + { "->" , Tok_To }, + { ":" , Tok_Of }, + { "true" , Tok_Val }, + { "false" , Tok_Val } +}; + +static langType Lang_Ocaml; + +static bool exportLocalInfo = false; + +/*////////////////////////////////////////////////////////////////// +//// lexingInit */ +typedef struct _lexingState { + vString *name; /* current parsed identifier/operator */ + const unsigned char *cp; /* position in stream */ +} lexingState; + +/* array of the size of all possible value for a char */ +static bool isOperator[1 << (8 * sizeof (char))] = { false }; + +/* definition of all the operator in OCaml, + * /!\ certain operator get special treatment + * in regards of their role in OCaml grammar : + * '|' ':' '=' '~' and '?' */ +static void initOperatorTable ( void ) +{ + isOperator['!'] = true; + isOperator['$'] = true; + isOperator['%'] = true; + isOperator['&'] = true; + isOperator['*'] = true; + isOperator['+'] = true; + isOperator['-'] = true; + isOperator['.'] = true; + isOperator['/'] = true; + isOperator[':'] = true; + isOperator['<'] = true; + isOperator['='] = true; + isOperator['>'] = true; + isOperator['?'] = true; + isOperator['@'] = true; + isOperator['^'] = true; + isOperator['~'] = true; + isOperator['|'] = true; +} + +/*////////////////////////////////////////////////////////////////////// +//// Lexing */ +static bool isNum (char c) +{ + return c >= '0' && c <= '9'; +} + +static bool isLowerAlpha (char c) +{ + return c >= 'a' && c <= 'z'; +} + +static bool isUpperAlpha (char c) +{ + return c >= 'A' && c <= 'Z'; +} + +static bool isAlpha (char c) +{ + return isLowerAlpha (c) || isUpperAlpha (c); +} + +static bool isIdent (char c) +{ + return isNum (c) || isAlpha (c) || c == '_' || c == '\''; +} + +static bool isSpace (char c) +{ + return c == ' ' || c == '\t' || c == '\r' || c == '\n'; +} + +static void eatWhiteSpace (lexingState * st) +{ + const unsigned char *cp = st->cp; + while (isSpace (*cp)) + cp++; + + st->cp = cp; +} + +static void eatString (lexingState * st) +{ + bool lastIsBackSlash = false; + bool unfinished = true; + const unsigned char *c = st->cp + 1; + + while (unfinished) + { + /* end of line should never happen. + * we tolerate it */ + if (c == NULL || c[0] == '\0') + break; + else if (*c == '"' && !lastIsBackSlash) + unfinished = false; + else + lastIsBackSlash = *c == '\\'; + + c++; + } + + st->cp = c; +} + +static void eatComment (lexingState * st) +{ + bool unfinished = true; + bool lastIsStar = false; + const unsigned char *c = st->cp + 2; + + while (unfinished) + { + /* we've reached the end of the line.. + * so we have to reload a line... */ + if (c == NULL || *c == '\0') + { + st->cp = readLineFromInputFile (); + /* WOOPS... no more input... + * we return, next lexing read + * will be null and ok */ + if (st->cp == NULL) + return; + c = st->cp; + } + /* we've reached the end of the comment */ + else if (*c == ')' && lastIsStar) + { + unfinished = false; + c++; + } + /* here we deal with imbricated comment, which + * are allowed in OCaml */ + else if (c[0] == '(' && c[1] == '*') + { + st->cp = c; + eatComment (st); + + c = st->cp; + if (c == NULL) + return; + + lastIsStar = false; + c++; + } + /* OCaml has a rule which says : + * + * "Comments do not occur inside string or character literals. + * Nested comments are handled correctly." + * + * So if we encounter a string beginning, we must parse it to + * get a good comment nesting (bug ID: 3117537) + */ + else if (*c == '"') + { + st->cp = c; + eatString (st); + c = st->cp; + } + else + { + lastIsStar = '*' == *c; + c++; + } + } + + st->cp = c; +} + +static void readIdentifier (lexingState * st) +{ + const unsigned char *p; + vStringClear (st->name); + + /* first char is a simple letter */ + if (isAlpha (*st->cp) || *st->cp == '_') + vStringPut (st->name, (int) *st->cp); + + /* Go till you get identifier chars */ + for (p = st->cp + 1; isIdent (*p); p++) + vStringPut (st->name, (int) *p); + + st->cp = p; +} + +static ocamlKeyword eatNumber (lexingState * st) +{ + while (isNum (*st->cp)) + st->cp++; + return Tok_Val; +} + +/* Operator can be defined in OCaml as a function + * so we must be ample enough to parse them normally */ +static ocamlKeyword eatOperator (lexingState * st) +{ + int count = 0; + const unsigned char *root = st->cp; + + vStringClear (st->name); + + while (isOperator[st->cp[count]]) + { + vStringPut (st->name, st->cp[count]); + count++; + } + + st->cp += count; + if (count <= 1) + { + switch (root[0]) + { + case '|': + return Tok_Pipe; + case '=': + return Tok_EQ; + case ':': + return Tok_Of; + default: + return Tok_Op; + } + } + else if (count == 2 && root[0] == '-' && root[1] == '>') + return Tok_To; + else if (count == 2 && root[0] == '|' && root[1] == '>') + return Tok_Op; + else + return Tok_Op; +} + +/* The lexer is in charge of reading the file. + * Some of sub-lexer (like eatComment) also read file. + * lexing is finished when the lexer return Tok_EOF */ +static ocamlKeyword lex (lexingState * st) +{ + int retType; + /* handling data input here */ + while (st->cp == NULL || st->cp[0] == '\0') + { + st->cp = readLineFromInputFile (); + if (st->cp == NULL) + return Tok_EOF; + } + + if (isAlpha (*st->cp)) + { + readIdentifier (st); + retType = lookupKeyword (vStringValue (st->name), Lang_Ocaml); + + if (retType == -1) /* If it's not a keyword */ + { + return OcaIDENTIFIER; + } + else + { + return retType; + } + } + else if (isNum (*st->cp)) + return eatNumber (st); + else if (isSpace (*st->cp)) + { + eatWhiteSpace (st); + return lex (st); + } + else if (*st->cp == '_') + { // special + readIdentifier (st); + return Tok_Val; + } + + /* OCaml permit the definition of our own operators + * so here we check all the consecutive chars which + * are operators to discard them. */ + else if (isOperator[*st->cp]) + return eatOperator (st); + else + { + switch (*st->cp) + { + case '(': + if (st->cp[1] == '*') /* ergl, a comment */ + { + eatComment (st); + return lex (st); + } + else + { + st->cp++; + return Tok_PARL; + } + + case ')': + st->cp++; + return Tok_PARR; + case '[': + st->cp++; + return Tok_BRL; + case ']': + st->cp++; + return Tok_BRR; + case '{': + st->cp++; + return Tok_CurlL; + case '}': + st->cp++; + return Tok_CurlR; + case '\'': + st->cp++; + return Tok_Prime; + case ',': + st->cp++; + return Tok_comma; + case '=': + st->cp++; + return Tok_EQ; + case ';': + st->cp++; + return Tok_semi; + case '"': + eatString (st); + return Tok_Val; + case '#': + st->cp++; + return Tok_Sharp; + case '\\': + st->cp++; + return Tok_Backslash; + default: + st->cp++; + break; + } + } + /* default return if nothing is recognized, + * shouldn't happen, but at least, it will + * be handled without destroying the parsing. */ + return Tok_Val; +} + +/*////////////////////////////////////////////////////////////////////// +//// Parsing */ +typedef void (*parseNext) (vString * const ident, ocaToken what, ocaToken whatNext); + +/********** Helpers */ +/* This variable hold the 'parser' which is going to + * handle the next token */ +static parseNext toDoNext; + +/* Special variable used by parser eater to + * determine which action to put after their + * job is finished. */ +static parseNext comeAfter; + +/* If a token put an end to current declaration/ + * statement */ +static ocaToken terminatingToken; + +/* Token to be searched by the different + * parser eater. */ +static ocaToken waitedToken; + +/* name of the last class, used for + * context stacking. */ +static vString *lastClass; + +typedef enum _sContextKind { + ContextStrong, + ContextSoft +} contextKind; + +typedef enum _sContextType { + ContextType, + ContextModule, + ContextClass, + ContextValue, + ContextFunction, + ContextMethod, + ContextBlock, + ContextMatch +} contextType; + +typedef struct _sOcamlContext { + contextKind kind; /* well if the context is strong or not */ + contextType type; + parseNext callback; /* what to do when a context is pop'd */ + vString *contextName; /* name, if any, of the surrounding context */ +} ocamlContext; + +/* context stack, can be used to output scope information + * into the tag file. */ +static ocamlContext stack[OCAML_MAX_STACK_SIZE]; +/* current position in the tag */ +static int stackIndex; + +/* special function, often recalled, so putting it here */ +static void globalScope (vString * const ident, ocaToken what, ocaToken whatNext); + +/* Return : index of the last named context if one + * is found, -1 otherwise */ +static int getLastNamedIndex ( void ) +{ + int i; + + for (i = stackIndex - 1; i >= 0; --i) + { + if (vStringLength (stack[i].contextName) > 0) + { + return i; + } + } + + return -1; +} + +static int contextDescription (contextType t) +{ + switch (t) + { + case ContextFunction: + return K_FUNCTION; + case ContextMethod: + return K_METHOD; + case ContextValue: + return K_VAL; + case ContextModule: + return K_MODULE; + case ContextType: + return K_TYPE; + case ContextClass: + return K_CLASS; + default: + AssertNotReached(); + return KIND_GHOST_INDEX; + } +} + +static char contextTypeSuffix (contextType t) +{ + switch (t) + { + case ContextFunction: + case ContextMethod: + case ContextValue: + case ContextModule: + return '/'; + case ContextType: + return '.'; + case ContextClass: + return '#'; + case ContextBlock: + return ' '; + case ContextMatch: + return '|'; + default: + return '$'; + } +} + +/* Push a new context, handle null string */ +static void pushContext (contextKind kind, contextType type, parseNext after, + vString const *contextName) +{ + int parentIndex; + + if (stackIndex >= OCAML_MAX_STACK_SIZE) + { + verbose ("OCaml Maximum depth reached"); + return; + } + + stack[stackIndex].kind = kind; + stack[stackIndex].type = type; + stack[stackIndex].callback = after; + + parentIndex = getLastNamedIndex (); + if (contextName == NULL) + { + vStringClear (stack[stackIndex++].contextName); + return; + } + + if (parentIndex >= 0) + { + vStringCopy (stack[stackIndex].contextName, + stack[parentIndex].contextName); + vStringPut (stack[stackIndex].contextName, + contextTypeSuffix (stack[parentIndex].type)); + + vStringCat (stack[stackIndex].contextName, contextName); + } + else + vStringCopy (stack[stackIndex].contextName, contextName); + + stackIndex++; +} + +static void pushStrongContext (vString * name, contextType type) +{ + pushContext (ContextStrong, type, &globalScope, name); +} + +static void pushSoftContext (parseNext continuation, + vString * name, contextType type) +{ + pushContext (ContextSoft, type, continuation, name); +} + +static void pushEmptyContext (parseNext continuation) +{ + pushContext (ContextSoft, ContextValue, continuation, NULL); +} + +/* unroll the stack until the last named context. + * then discard it. Used to handle the : + * let f x y = ... + * in ... + * where the context is reseted after the in. Context may have + * been really nested before that. */ +static void popLastNamed ( void ) +{ + int i = getLastNamedIndex (); + + if (i >= 0) + { + stackIndex = i; + toDoNext = stack[i].callback; + vStringClear (stack[i].contextName); + } + else + { + /* ok, no named context found... + * (should not happen). */ + stackIndex = 0; + toDoNext = &globalScope; + } +} + +/* pop a context without regarding it's content + * (beside handling empty stack case) */ +static void popSoftContext ( void ) +{ + if (stackIndex <= 0) + { + toDoNext = &globalScope; + } + else + { + stackIndex--; + toDoNext = stack[stackIndex].callback; + vStringClear (stack[stackIndex].contextName); + } +} + +/* Reset everything until the last global space. + * a strong context can be : + * - module + * - class definition + * - the initial global space + * - a _global_ declaration (let at global scope or in a module). + * Created to exit quickly deeply nested context */ +static contextType popStrongContext ( void ) +{ + int i; + + for (i = stackIndex - 1; i >= 0; --i) + { + if (stack[i].kind == ContextStrong) + { + stackIndex = i; + toDoNext = stack[i].callback; + vStringClear (stack[i].contextName); + return stack[i].type; + } + } + /* ok, no strong context found... */ + stackIndex = 0; + toDoNext = &globalScope; + return -1; +} + +/* Reset everything before the last match. */ +static void jumpToMatchContext ( void ) +{ + int i; + for (i = stackIndex - 1; i >= 0; --i) + { + if (stack[i].type == ContextMatch) + { + stackIndex = i + 1; + toDoNext = stack[i].callback; // this should always be + // matchPattern + stack[i + 1].callback = NULL; + vStringClear (stack[i + 1].contextName); + return; + } + } +} + +/* Ignore everything till waitedToken and jump to comeAfter. + * If the "end" keyword is encountered break, doesn't remember + * why though. */ +static void tillToken (vString * const ident CTAGS_ATTR_UNUSED, ocaToken what, ocaToken whatNext CTAGS_ATTR_UNUSED) +{ + if (what == waitedToken) + toDoNext = comeAfter; + else if (what == OcaKEYWORD_end) + { + popStrongContext (); + toDoNext = &globalScope; + } +} + +/* Ignore everything till a waitedToken is seen, but + * take care of balanced parentheses/bracket use */ +static void contextualTillToken (vString * const ident, ocaToken what, ocaToken whatNext) +{ + static int parentheses = 0; + static int bracket = 0; + static int curly = 0; + + switch (what) + { + case Tok_PARL: + parentheses--; + break; + case Tok_PARR: + parentheses++; + break; + case Tok_CurlL: + curly--; + break; + case Tok_CurlR: + curly++; + break; + case Tok_BRL: + bracket--; + break; + case Tok_BRR: + bracket++; + break; + + default: /* other token are ignored */ + break; + } + + if (what == waitedToken && parentheses == 0 && bracket == 0 && curly == 0) + toDoNext = comeAfter; + else if (what == OcaKEYWORD_end) + globalScope (ident, what, whatNext); +} + +/* Wait for waitedToken and jump to comeAfter or let + * the globalScope handle declarations */ +static void tillTokenOrFallback (vString * const ident, ocaToken what, ocaToken whatNext) +{ + if (what == waitedToken) + toDoNext = comeAfter; + else + globalScope (ident, what, whatNext); +} + +/* ignore token till waitedToken, or give up if find + * terminatingToken. Use globalScope to handle new + * declarations. */ +static void tillTokenOrTerminatingOrFallback (vString * const ident, ocaToken what, ocaToken whatNext) +{ + if (what == waitedToken) + toDoNext = comeAfter; + else if (what == terminatingToken) + toDoNext = globalScope; + else + globalScope (ident, what, whatNext); +} + +/* ignore the next token in the stream and jump to the + * given comeAfter state */ +static void ignoreToken (vString * const ident CTAGS_ATTR_UNUSED, ocaToken what CTAGS_ATTR_UNUSED, ocaToken whatNext CTAGS_ATTR_UNUSED) +{ + toDoNext = comeAfter; +} + +/********** Grammar */ +/* the purpose of each function is detailed near their + * implementation */ + +static contextType killCurrentState ( void ) +{ + contextType popped = popStrongContext (); + + /* Tracking the kind of previous strong + * context, if it doesn't match with a + * really strong entity, repop */ + switch (popped) + { + case ContextValue: + popped = popStrongContext (); + break; + case ContextFunction: + popped = popStrongContext (); + break; + case ContextMethod: + popped = popStrongContext (); + break; + case ContextType: + popped = popStrongContext (); + break; + case ContextMatch: + popped = popStrongContext (); + break; + case ContextBlock: + break; + case ContextModule: + break; + case ContextClass: + break; + default: + /* nothing more */ + break; + } + return popped; +} + +/* Keep track of our _true_ line number and file pos, + * as the lookahead token gives us false values. */ +static unsigned long ocaLineNumber; +static MIOPos ocaFilePosition; + +/* Used to prepare an OCaml tag, just in case there is a need to + * add additional information to the tag. */ +static void prepareTag (tagEntryInfo * tag, vString const *name, int kind) +{ + int parentIndex; + + initTagEntry (tag, vStringValue (name), kind); + /* Ripped out of read.h initTagEntry, because of line number + * shenanigans. + * Ugh. Lookahead is harder than I expected. */ + tag->lineNumber = ocaLineNumber; + tag->filePosition = ocaFilePosition; + + parentIndex = getLastNamedIndex (); + if (parentIndex >= 0) + { + tag->extensionFields.scopeKindIndex = + contextDescription (stack[parentIndex].type); + tag->extensionFields.scopeName = + vStringValue (stack[parentIndex].contextName); + } +} + +/* Used to centralise tag creation, and be able to add + * more information to it in the future */ +static void addTag (vString * const ident, int kind) +{ + if (OcamlKinds [kind].enabled && ident != NULL && vStringLength (ident) > 0) + { + tagEntryInfo toCreate; + prepareTag (&toCreate, ident, kind); + makeTagEntry (&toCreate); + } +} + +static bool needStrongPoping = false; +static void requestStrongPoping ( void ) +{ + needStrongPoping = true; +} + +static void cleanupPreviousParser ( void ) +{ + if (needStrongPoping) + { + needStrongPoping = false; + popStrongContext (); + } +} + +/* Due to some circular dependencies, the following functions + * must be forward-declared. */ +static void letParam (vString * const ident, ocaToken what, ocaToken whatNext); +static void localScope (vString * const ident, ocaToken what, ocaToken whatNext); +static void mayRedeclare (vString * const ident, ocaToken what, ocaToken whatNext); +static void typeSpecification (vString * const ident, ocaToken what, ocaToken whatNext); + +/* + * Parse a record type + * type ident = // parsed previously + * { + * ident1: type1; + * ident2: type2; + * } + */ +static void typeRecord (vString * const ident, ocaToken what, ocaToken whatNext CTAGS_ATTR_UNUSED) +{ + switch (what) + { + case OcaIDENTIFIER: + addTag (ident, K_RECORDFIELD); + terminatingToken = Tok_CurlR; + waitedToken = Tok_semi; + comeAfter = &typeRecord; + toDoNext = &tillTokenOrTerminatingOrFallback; + break; + + case OcaKEYWORD_mutable: + /* ignore it */ + break; + + case Tok_CurlR: + popStrongContext (); + // don't pop the module context when going to another expression + needStrongPoping = false; + toDoNext = &globalScope; + break; + + default: /* don't care */ + break; + } +} + +/* handle : + * exception ExceptionName of ... */ +static void exceptionDecl (vString * const ident, ocaToken what, ocaToken whatNext) +{ + if (what == OcaIDENTIFIER) + { + addTag (ident, K_EXCEPTION); + } + else /* probably ill-formed, give back to global scope */ + { + globalScope (ident, what, whatNext); + } + toDoNext = &globalScope; +} + +static tagEntryInfo tempTag; +static vString *tempIdent; + +/* Ensure a constructor is not a type path beginning + * with a module */ +static void constructorValidation (vString * const ident, ocaToken what, ocaToken whatNext) +{ + switch (what) + { + case Tok_Op: /* if we got a '.' which is an operator */ + toDoNext = &globalScope; + popStrongContext (); + needStrongPoping = false; + break; + + case OcaKEYWORD_of: /* OK, it must be a constructor :) */ + if (vStringLength (tempIdent) > 0) + { + makeTagEntry (&tempTag); + vStringClear (tempIdent); + } + toDoNext = &tillTokenOrFallback; + comeAfter = &typeSpecification; + waitedToken = Tok_Pipe; + break; + + case Tok_Pipe: /* OK, it was a constructor :) */ + if (vStringLength (tempIdent) > 0) + { + makeTagEntry (&tempTag); + vStringClear (tempIdent); + } + toDoNext = &typeSpecification; + break; + + default: /* and mean that we're not facing a module name */ + if (vStringLength (tempIdent) > 0) + { + makeTagEntry (&tempTag); + vStringClear (tempIdent); + } + toDoNext = &tillTokenOrFallback; + comeAfter = &typeSpecification; + waitedToken = Tok_Pipe; + + popStrongContext (); + + // don't pop the module context when going to another expression + needStrongPoping = false; + + /* to be sure we use this token */ + globalScope (ident, what, whatNext); + } +} + +/* Parse beginning of type definition + * type 'avar ident = + * or + * type ('var1, 'var2) ident = + */ +static void typeDecl (vString * const ident, ocaToken what, ocaToken whatNext) +{ + switch (what) + { + /* parameterized */ + case Tok_Prime: + comeAfter = &typeDecl; + toDoNext = &ignoreToken; + break; + /* LOTS of parameters */ + case Tok_PARL: + comeAfter = &typeDecl; + waitedToken = Tok_PARR; + toDoNext = &tillToken; + break; + + case OcaIDENTIFIER: + addTag (ident, K_TYPE); + // true type declaration + if (whatNext == Tok_EQ) + { + pushStrongContext (ident, ContextType); + requestStrongPoping (); + toDoNext = &typeSpecification; + } + else // we're in a sig + toDoNext = &globalScope; + break; + + default: + globalScope (ident, what, whatNext); + } +} + +/** handle 'val' signatures in sigs and .mli files + * val ident : String.t -> Val.t + * Eventually, this will do cool things to annotate + * functions with their actual signatures. But for now, + * it's basically globalLet */ +static void val (vString * const ident, ocaToken what, ocaToken whatNext CTAGS_ATTR_UNUSED) +{ + switch (what) + { + case Tok_PARL: + case OcaKEYWORD_rec: + break; + + case Tok_Op: + /* we are defining a new operator, it's a + * function definition */ + addTag (ident, K_VAL); + toDoNext = &globalScope; + break; + + case Tok_Val: /* Can be a weiiird binding, or an '_' */ + case OcaIDENTIFIER: + addTag (ident, K_VAL); + toDoNext = &globalScope; // sig parser ? + break; + + default: + toDoNext = &globalScope; + break; + } +} + +/* Parse type of kind + * type bidule = Ctor1 of ... + * | Ctor2 + * | Ctor3 of ... + * or + * type bidule = | Ctor1 of ... | Ctor2 + * + * when type bidule = { ... } is detected, + * let typeRecord handle it. */ +static void typeSpecification (vString * const ident, ocaToken what, ocaToken whatNext CTAGS_ATTR_UNUSED) +{ + switch (what) + { + case OcaIDENTIFIER: + if (isUpperAlpha (vStringChar (ident, 0))) + { + /* here we handle type aliases of type + * type foo = AnotherModule.bar + * AnotherModule can mistakenly be took + * for a constructor. */ + if (! OcamlKinds[K_CONSTRUCTOR].enabled) + vStringClear (tempIdent); + else + { + vStringCopy (tempIdent, ident); + prepareTag (&tempTag, tempIdent, K_CONSTRUCTOR); + } + toDoNext = &constructorValidation; + } + else + { + toDoNext = &tillTokenOrFallback; + comeAfter = &typeSpecification; + waitedToken = Tok_Pipe; + } + break; + + case OcaKEYWORD_and: + toDoNext = &typeDecl; + break; + + case OcaKEYWORD_val: + toDoNext = &val; + break; + + case Tok_BRL: /* the '[' & ']' are ignored to accommodate */ + case Tok_BRR: /* with the revised syntax */ + case Tok_Pipe: + /* just ignore it */ + break; + + case Tok_CurlL: + toDoNext = &typeRecord; + break; + + default: /* don't care */ + break; + } +} + + +static bool dirtySpecialParam = false; + +/* parse the ~label and ~label:type parameter */ +static void parseLabel (vString * const ident, ocaToken what, ocaToken whatNext) +{ + static int parCount = 0; + + switch (what) + { + case OcaIDENTIFIER: + if (!dirtySpecialParam) + { + if (exportLocalInfo) + addTag (ident, K_VARIABLE); + + dirtySpecialParam = true; + } + break; + + case Tok_PARL: + parCount++; + break; + + case Tok_PARR: + parCount--; + if (parCount == 0) + toDoNext = &letParam; + break; + + case Tok_Op: + if (vStringChar(ident, 0) == ':') + { + toDoNext = &ignoreToken; + comeAfter = &letParam; + } + else if (parCount == 0 && dirtySpecialParam) + { + toDoNext = &letParam; + letParam (ident, what, whatNext); + } + break; + + default: + if (parCount == 0 && dirtySpecialParam) + { + toDoNext = &letParam; + letParam (ident, what, whatNext); + } + break; + } +} + +/* Optional argument with syntax like this : + * ?(foo = value) */ +static void parseOptionnal (vString * const ident, ocaToken what, ocaToken whatNext CTAGS_ATTR_UNUSED) +{ + static int parCount = 0; + + switch (what) + { + case OcaIDENTIFIER: + if (!dirtySpecialParam) + { + if (exportLocalInfo) + addTag (ident, K_VARIABLE); + + dirtySpecialParam = true; + + if (parCount == 0) + toDoNext = &letParam; + } + break; + + case Tok_PARL: + parCount++; + break; + + case Tok_PARR: + parCount--; + if (parCount == 0) + toDoNext = &letParam; + break; + + default: /* don't care */ + break; + } +} + +/** handle let inside functions (so like it's name + * say : local let */ +static void localLet (vString * const ident, ocaToken what, ocaToken whatNext) +{ + switch (what) + { + case Tok_PARL: + /* We ignore this token to be able to parse such + * declarations : + * let (ident : type) = ... + */ + break; + + case OcaKEYWORD_rec: + /* just ignore to be able to parse such declarations: + * let rec ident = ... */ + break; + + case Tok_Op: + /* we are defining a new operator, it's a + * function definition */ + if (exportLocalInfo) + addTag (ident, K_FUNCTION); + pushSoftContext (mayRedeclare, ident, ContextFunction); + toDoNext = &letParam; + break; + + case Tok_Val: /* Can be a weiiird binding, or an '_' */ + case OcaIDENTIFIER: + // if we're an identifier, and the next token is too, then + // we're definitely a function. + if (whatNext == OcaIDENTIFIER || whatNext == Tok_PARL) + { + if (exportLocalInfo) + addTag (ident, K_FUNCTION); + pushSoftContext (mayRedeclare, ident, ContextFunction); + } + else + { + if (exportLocalInfo) + addTag (ident, K_VARIABLE); + pushSoftContext (mayRedeclare, ident, ContextValue); + } + toDoNext = &letParam; + break; + + case OcaKEYWORD_end: + localScope (ident, what, whatNext); + break; + + default: + toDoNext = &localScope; + break; + } +} + +/* parse : + * | pattern pattern -> ... + * or + * pattern apttern apttern -> ... + * we ignore all identifiers declared in the pattern, + * because their scope is likely to be even more limited + * than the let definitions. + * Used after a match ... with, or a function ... + * because their syntax is similar. */ +static void matchPattern (vString * const ident, ocaToken what, ocaToken whatNext) +{ + /* keep track of [], as it + * can be used in patterns and can + * mean the end of match expression in + * revised syntax */ + static int braceCount = 0; + + switch (what) + { + case Tok_To: + pushEmptyContext (&matchPattern); + toDoNext = &mayRedeclare; + break; + + case Tok_BRL: + braceCount++; + break; + + case OcaKEYWORD_value: + popLastNamed (); + case OcaKEYWORD_and: + case OcaKEYWORD_end: + // why was this global? matches only make sense in local scope + localScope (ident, what, whatNext); + break; + + case OcaKEYWORD_in: + popLastNamed (); + break; + + default: + break; + } +} + +/* Used at the beginning of a new scope (begin of a + * definition, parenthesis...) to catch inner let + * definition that may be in. */ +static void mayRedeclare (vString * const ident, ocaToken what, ocaToken whatNext) +{ + switch (what) + { + case OcaKEYWORD_value: + /* let globalScope handle it */ + globalScope (ident, what, whatNext); + + case OcaKEYWORD_let: + toDoNext = &localLet; + break; + + case OcaKEYWORD_val: + toDoNext = &val; + break; + + case OcaKEYWORD_object: + vStringClear (lastClass); + pushContext (ContextStrong, ContextClass, + &localScope, NULL); + needStrongPoping = false; + toDoNext = &globalScope; + break; + + case OcaKEYWORD_for: + case OcaKEYWORD_while: + toDoNext = &tillToken; + waitedToken = OcaKEYWORD_do; + comeAfter = &mayRedeclare; + break; + + case OcaKEYWORD_try: + toDoNext = &mayRedeclare; + pushSoftContext (&matchPattern, ident, ContextFunction); + break; + + case OcaKEYWORD_function: + toDoNext = &matchPattern; + pushSoftContext (&matchPattern, NULL, ContextMatch); + break; + + case OcaKEYWORD_fun: + toDoNext = &letParam; + break; + + /* Handle the special ;; from the OCaml + * Top level */ + case Tok_semi: + default: + toDoNext = &localScope; + localScope (ident, what, whatNext); + } +} + +/* parse : + * p1 p2 ... pn = ... + * or + * ?(p1=v) p2 ~p3 ~pn:ja ... = ... */ +static void letParam (vString * const ident, ocaToken what, ocaToken whatNext CTAGS_ATTR_UNUSED) +{ + switch (what) + { + case Tok_To: + case Tok_EQ: + toDoNext = &mayRedeclare; + break; + + case OcaIDENTIFIER: + if (exportLocalInfo) + addTag (ident, K_VARIABLE); + break; + + case Tok_Op: + switch (vStringChar (ident, 0)) + { + case ':': + /*popSoftContext(); */ + /* we got a type signature */ + comeAfter = &mayRedeclare; + toDoNext = &tillTokenOrFallback; + waitedToken = Tok_EQ; + break; + + /* parse something like + * ~varname:type + * or + * ~varname + * or + * ~(varname: long type) */ + case '~': + toDoNext = &parseLabel; + dirtySpecialParam = false; + break; + + /* Optional argument with syntax like this : + * ?(bla = value) + * or + * ?bla */ + case '?': + toDoNext = &parseOptionnal; + dirtySpecialParam = false; + break; + + default: + break; + } + break; + + default: /* don't care */ + break; + } +} + +/* parse object ... + * used to be sure the class definition is not a type + * alias */ +static void classSpecif (vString * const ident CTAGS_ATTR_UNUSED, ocaToken what, ocaToken whatNext CTAGS_ATTR_UNUSED) +{ + switch (what) + { + case OcaKEYWORD_object: + pushStrongContext (lastClass, ContextClass); + toDoNext = &globalScope; + break; + + default: + vStringClear (lastClass); + toDoNext = &globalScope; + } +} + +/* Handle a method ... class declaration. + * nearly a copy/paste of globalLet. */ +static void methodDecl (vString * const ident, ocaToken what, ocaToken whatNext) +{ + switch (what) + { + case Tok_PARL: + /* We ignore this token to be able to parse such + * declarations : + * let (ident : type) = ... */ + break; + + case OcaKEYWORD_mutable: + case OcaKEYWORD_virtual: + case OcaKEYWORD_rec: + /* just ignore to be able to parse such declarations: + * let rec ident = ... */ + break; + + case OcaIDENTIFIER: + addTag (ident, K_METHOD); + /* Normal pushing to get good subs */ + pushStrongContext (ident, ContextMethod); + /*pushSoftContext( globalScope, ident, ContextMethod ); */ + toDoNext = &letParam; + break; + + case OcaKEYWORD_end: + localScope (ident, what, whatNext); + break; + + default: + toDoNext = &globalScope; + break; + } +} + +/* name of the last module, used for + * context stacking. */ +static vString *lastModule; + +/* parse + * ... struct (* new global scope *) end + * or + * ... sig (* new global scope *) end + * or + * functor ... -> moduleSpecif + */ +static void moduleSpecif (vString * const ident, ocaToken what, ocaToken whatNext) +{ + switch (what) + { + case OcaKEYWORD_functor: + toDoNext = &contextualTillToken; + waitedToken = Tok_To; + comeAfter = &moduleSpecif; + break; + + case OcaKEYWORD_struct: + case OcaKEYWORD_sig: + pushStrongContext (lastModule, ContextModule); + toDoNext = &globalScope; + needStrongPoping = false; + break; + + case Tok_PARL: /* ( */ + toDoNext = &contextualTillToken; + comeAfter = &globalScope; + waitedToken = Tok_PARR; + contextualTillToken (ident, what, whatNext); + break; + + case Tok_Of: + case Tok_EQ: + break; + + default: + vStringClear (lastModule); + toDoNext = &globalScope; + break; + } +} + +/* parse : + * module name = ... + * then pass the token stream to moduleSpecif */ +static void moduleDecl (vString * const ident, ocaToken what, ocaToken whatNext) +{ + switch (what) + { + case OcaKEYWORD_rec: + /* recursive modules are _weird_, but they happen */ + case OcaKEYWORD_type: + /* this is technically a special type, but whatever */ + break; + + case OcaIDENTIFIER: + addTag (ident, K_MODULE); + vStringCopy (lastModule, ident); + if (whatNext == Tok_Of || whatNext == Tok_EQ) + toDoNext = &moduleSpecif; + else + { + // default to waiting on a '=' since + // module M : sig ... end = struct ... end + // is rarer + waitedToken = Tok_EQ; + comeAfter = &moduleSpecif; + toDoNext = &contextualTillToken; + } + break; + + default: /* don't care */ + break; + } +} + +/* parse : + * class name = ... + * or + * class virtual ['a,'b] classname = ... */ +static void classDecl (vString * const ident, ocaToken what, ocaToken whatNext CTAGS_ATTR_UNUSED) +{ + switch (what) + { + case OcaIDENTIFIER: + addTag (ident, K_CLASS); + vStringCopy (lastClass, ident); + toDoNext = &contextualTillToken; + waitedToken = Tok_EQ; + comeAfter = &classSpecif; + break; + + case Tok_BRL: + toDoNext = &tillToken; + waitedToken = Tok_BRR; + comeAfter = &classDecl; + break; + + default: + break; + } +} + +/* Handle a global + * let ident ... + * or + * let rec ident ... */ +static void globalLet (vString * const ident, ocaToken what, ocaToken whatNext) +{ + switch (what) + { + case Tok_PARL: + /* We ignore this token to be able to parse such + * declarations : + * let (ident : type) = ... + * but () is the toplevel function name, so fake ourselves + * as an ident and make a new function */ + if (whatNext == Tok_PARR) + { + vString *fakeIdent = vStringNewInit ("()"); + addTag (fakeIdent, K_FUNCTION); + pushStrongContext (fakeIdent, ContextFunction); + vStringDelete (fakeIdent); + requestStrongPoping (); + toDoNext = &letParam; + } + break; + + case OcaKEYWORD_mutable: + case OcaKEYWORD_virtual: + case OcaKEYWORD_rec: + /* just ignore to be able to parse such declarations: + * let rec ident = ... */ + break; + + case Tok_Op: + /* we are defining a new operator, it's a + * function definition */ + addTag (ident, K_FUNCTION); + pushStrongContext (ident, ContextFunction); + toDoNext = &letParam; + break; + + case Tok_Val: + if (vStringValue (ident)[0] == '_') + addTag (ident, K_FUNCTION); + pushStrongContext (ident, ContextFunction); + requestStrongPoping (); + toDoNext = &letParam; + break; + + case OcaIDENTIFIER: + // if we're an identifier, and the next token is too, then + // we're definitely a function. + if (whatNext == OcaIDENTIFIER || whatNext == Tok_PARL) + { + addTag (ident, K_FUNCTION); + pushStrongContext (ident, ContextFunction); + } + else + { + addTag (ident, K_VARIABLE); + pushStrongContext (ident, ContextValue); + } + requestStrongPoping (); + toDoNext = &letParam; + break; + + case OcaKEYWORD_end: + globalScope (ident, what, whatNext); + break; + + default: + toDoNext = &globalScope; + break; + } +} + +/* Handle the "strong" top levels, all 'big' declarations + * happen here */ +static void globalScope (vString * const ident CTAGS_ATTR_UNUSED, ocaToken what, ocaToken whatNext) +{ + /* Do not touch, this is used only by the global scope + * to handle an 'and' */ + static parseNext previousParser = &globalScope; + + switch (what) + { + case OcaKEYWORD_and: + cleanupPreviousParser (); + // deal with module M = struct ... end _and_ N = struct ... end + toDoNext = previousParser; + break; + + case OcaKEYWORD_type: + cleanupPreviousParser (); + toDoNext = &typeDecl; + previousParser = &typeDecl; + break; + + case OcaKEYWORD_class: + cleanupPreviousParser (); + toDoNext = &classDecl; + previousParser = &classDecl; + break; + + case OcaKEYWORD_module: + cleanupPreviousParser (); + toDoNext = &moduleDecl; + previousParser = &moduleDecl; + break; + + case OcaKEYWORD_end:; + contextType popped = killCurrentState (); + + /** so here, end can legally be followed by = or and in the + * situation of + * module M : sig ... end = struct ... end and + * module M struct ... end and N = struct ... end + * and we need to make sure we know we're still inside of a + * struct */ + if (whatNext == Tok_EQ && popped == ContextModule) + { + previousParser = &moduleDecl; + toDoNext = &moduleSpecif; + } + else if (whatNext == OcaKEYWORD_and && popped == ContextModule) + toDoNext = &moduleDecl; + needStrongPoping = false; + break; + + case OcaKEYWORD_method: + cleanupPreviousParser (); + toDoNext = &methodDecl; + /* and is not allowed in methods */ + break; + + case OcaKEYWORD_val: + toDoNext = &val; + /* and is not allowed in sigs */ + break; + + case OcaKEYWORD_let: + cleanupPreviousParser (); + toDoNext = &globalLet; + previousParser = &globalLet; + break; + + case OcaKEYWORD_exception: + cleanupPreviousParser (); + toDoNext = &exceptionDecl; + previousParser = &globalScope; + break; + + /* must be a #line directive, discard the + * whole line. */ + case Tok_Sharp: + /* ignore */ + break; + + default: + /* we don't care */ + break; + } +} + +/* Parse expression. Well ignore it is more the case, + * ignore all tokens except "shocking" keywords */ +static void localScope (vString * const ident, ocaToken what, ocaToken whatNext) +{ + switch (what) + { + + // we're probably in a match, so let's go to the last one + case Tok_Pipe: + jumpToMatchContext (); + break; + + case Tok_PARR: + case Tok_BRR: + case Tok_CurlR: + popSoftContext (); + break; + + /* Everything that `begin` has an `end` + * as end is overloaded and signal many end + * of things, we add an empty strong context to + * avoid problem with the end. + */ + case OcaKEYWORD_begin: + pushContext (ContextStrong, ContextBlock, &mayRedeclare, NULL); + toDoNext = &mayRedeclare; + break; + + /* An in keyword signals the end of the previous context and the + * start of a new one. */ + case OcaKEYWORD_in: + popLastNamed (); + pushEmptyContext (&localScope); + toDoNext = &mayRedeclare; + break; + + /* Ok, we got a '{', which is much likely to create + * a record. We cannot treat it like other [ && (, + * because it may contain the 'with' keyword and screw + * everything else. */ + case Tok_CurlL: + toDoNext = &contextualTillToken; + waitedToken = Tok_CurlR; + comeAfter = &localScope; + contextualTillToken (ident, what, whatNext); + break; + + /* Yeah imperative feature of OCaml, + * a ';' like in C */ + case Tok_semi: + /* ';;' case should end all scopes */ + if (whatNext == Tok_semi) + { + popStrongContext (); + toDoNext = &globalScope; + break; + } /* else fallthrough */ + + /* Every standard operator has very high precedence + * e.g. expr * expr needs no parentheses */ + case Tok_Op: + toDoNext = &mayRedeclare; + break; + + case Tok_PARL: + case Tok_BRL: + pushEmptyContext (&localScope); + toDoNext = &mayRedeclare; + break; + + case OcaKEYWORD_and: + if (toDoNext == &mayRedeclare) + { + popSoftContext (); + pushEmptyContext (localScope); + toDoNext = &localLet; + } + else + { + /* a local 'and' keyword jumps up a context to the last + * named. For ex + * in `with let IDENT ... and IDENT2 ...` ident and + * ident2 are on + * same level, the same as `let IDENT ... in let IDENT2 + * ...` + * a 'let' is the only 'and'-chainable construct allowed + * locally + * (thus we had to be one to get here), so we either go + * to + * globalLet or localLet depending on our scope. */ + popLastNamed (); + toDoNext = stackIndex == 0 ? &globalLet : &localLet; + } + break; + + case OcaKEYWORD_else: + case OcaKEYWORD_then: + popSoftContext (); + pushEmptyContext (&localScope); + toDoNext = &mayRedeclare; + break; + + case OcaKEYWORD_if: + pushEmptyContext (&localScope); + toDoNext = &mayRedeclare; + break; + + case OcaKEYWORD_match: + pushEmptyContext (&localScope); + toDoNext = &mayRedeclare; + break; + + case OcaKEYWORD_with: + popSoftContext (); + toDoNext = &matchPattern; + pushSoftContext (&matchPattern, NULL, ContextMatch); + break; + + case OcaKEYWORD_fun: + toDoNext = &letParam; + break; + + case OcaKEYWORD_done: + /* doesn't care */ + break; + + default: + requestStrongPoping (); + globalScope (ident, what, whatNext); + break; + } +} + +/*//////////////////////////////////////////////////////////////// +//// Deal with the system */ +/* in OCaml the file name is the module name used in the language + * with it first letter put in upper case */ +static void computeModuleName ( void ) +{ + /* in OCaml the file name define a module. + * so we define a module if the file has + * things in it. =) + */ + const char *filename = getInputFileName (); + + int beginIndex = 0; + int endIndex = strlen (filename) - 1; + vString *moduleName = vStringNew (); + + while (filename[endIndex] != '.' && endIndex > 0) + endIndex--; + + /* avoid problem with path in front of filename */ + beginIndex = endIndex; + while (beginIndex > 0) + { + if (filename[beginIndex] == '\\' || filename[beginIndex] == '/') + { + beginIndex++; + break; + } + + beginIndex--; + } + + vStringNCopyS (moduleName, &filename[beginIndex], endIndex - beginIndex); + + if (isLowerAlpha (vStringChar (moduleName, 0))) + vStringChar (moduleName, 0) += ('A' - 'a'); + + addTag (moduleName, K_MODULE); + vStringDelete (moduleName); +} + +/* Allocate all string of the context stack */ +static void initStack ( void ) +{ + int i; + for (i = 0; i < OCAML_MAX_STACK_SIZE; ++i) + stack[i].contextName = vStringNew (); + stackIndex = 0; +} + +static void clearStack ( void ) +{ + int i; + for (i = 0; i < OCAML_MAX_STACK_SIZE; ++i) + vStringDelete (stack[i].contextName); +} + +static void findOcamlTags (void) +{ + lexingState st; + ocaToken tok; + + /* One-token lookahead gives us the ability to + * do much more accurate analysis */ + lexingState nextSt; + ocaToken nextTok; + + initStack (); + + tempIdent = vStringNew (); + lastModule = vStringNew (); + lastClass = vStringNew (); + vString *temp_cp = vStringNew (); + + nextSt.name = vStringNew (); + nextSt.cp = readLineFromInputFile (); + ocaLineNumber = getInputLineNumber(); + ocaFilePosition = getInputFilePosition(); + toDoNext = &globalScope; + nextTok = lex (&nextSt); + + if (nextTok != Tok_EOF) + computeModuleName (); + + /* prime the lookahead token */ + st = nextSt; // preserve the old state for our first token + st.name = vStringNewCopy (st.name); + st.cp = (const unsigned char *) vStringValue (temp_cp); + tok = nextTok; + ocaLineNumber = getInputLineNumber(); /* ??? getSourceLineNumber() */ + ocaFilePosition = getInputFilePosition(); + nextTok = lex (&nextSt); + + /* main loop */ + while (tok != Tok_EOF) + { + (*toDoNext) (st.name, tok, nextTok); + + tok = nextTok; + ocaLineNumber = getInputLineNumber(); /* ??? */ + ocaFilePosition = getInputFilePosition(); + + if (nextTok != Tok_EOF) + { + vStringCopyS (temp_cp, (const char *) nextSt.cp); + st.cp = (const unsigned char *) vStringValue (temp_cp); + vStringCopy (st.name, nextSt.name); + nextTok = lex (&nextSt); + } + else + break; + } + + vStringDelete (st.name); + vStringDelete (nextSt.name); + vStringDelete (temp_cp); + vStringDelete (tempIdent); + vStringDelete (lastModule); + vStringDelete (lastClass); + clearStack (); +} + +static void ocamlInitialize (const langType language) +{ + Lang_Ocaml = language; + + initOperatorTable (); +} + +extern parserDefinition *OcamlParser (void) +{ + static const char *const extensions[] = { "ml", "mli", "aug", NULL }; + static const char *const aliases[] = { "tuareg", /* mode name of emacs */ + "caml", /* mode name of emacs */ + NULL }; + parserDefinition *def = parserNew ("OCaml"); + def->kindTable = OcamlKinds; + def->kindCount = ARRAY_SIZE (OcamlKinds); + def->extensions = extensions; + def->aliases = aliases; + def->parser = findOcamlTags; + def->initialize = ocamlInitialize; + def->keywordTable = OcamlKeywordTable; + def->keywordCount = ARRAY_SIZE (OcamlKeywordTable); + return def; +} diff --git a/meson.build b/meson.build index 394ef619ac..1966deeca9 100644 --- a/meson.build +++ b/meson.build @@ -660,6 +660,7 @@ ctags = static_library('ctags', 'ctags/parsers/markdown.h', 'ctags/parsers/nsis.c', 'ctags/parsers/objc.c', + 'ctags/parsers/ocaml.c', 'ctags/parsers/pascal.c', 'ctags/parsers/perl.c', 'ctags/parsers/perl.h', diff --git a/src/filetypes.c b/src/filetypes.c index b0a52a1eec..b1913e3689 100644 --- a/src/filetypes.c +++ b/src/filetypes.c @@ -138,7 +138,7 @@ static void init_builtin_filetypes(void) FT_INIT( FORTRAN, FORTRAN, "Fortran", "Fortran (F90)", SOURCE_FILE, COMPILED ); FT_INIT( F77, FORTRAN, "F77", "Fortran (F77)", SOURCE_FILE, COMPILED ); FT_INIT( GLSL, C, "GLSL", NULL, SOURCE_FILE, COMPILED ); - FT_INIT( CAML, NONE, "CAML", "(O)Caml", SOURCE_FILE, COMPILED ); + FT_INIT( CAML, OCAML, "CAML", "(O)Caml", SOURCE_FILE, COMPILED ); FT_INIT( PERL, PERL, "Perl", NULL, SOURCE_FILE, SCRIPT ); FT_INIT( PHP, PHP, "PHP", NULL, SOURCE_FILE, SCRIPT ); FT_INIT( JS, JAVASCRIPT, "Javascript", NULL, SOURCE_FILE, SCRIPT ); diff --git a/src/tagmanager/tm_parser.c b/src/tagmanager/tm_parser.c index c120b6117b..54ffd3b35b 100644 --- a/src/tagmanager/tm_parser.c +++ b/src/tagmanager/tm_parser.c @@ -1114,6 +1114,26 @@ static TMParserMapGroup group_RAKU[] = { {N_("Rules / Tokens"), TM_ICON_VAR, tm_tag_variable_t}, }; +static TMParserMapEntry map_OCAML[] = { + {'c', tm_tag_class_t}, // class + {'m', tm_tag_method_t}, // method + {'M', tm_tag_package_t}, // module + {'v', tm_tag_variable_t}, // var + {'p', tm_tag_undef_t}, // val + {'t', tm_tag_typedef_t}, // type + {'f', tm_tag_function_t}, // function + {'C', tm_tag_undef_t}, // Constructor + {'r', tm_tag_undef_t}, // RecordField + {'e', tm_tag_undef_t}, // Exception +}; +static TMParserMapGroup group_OCAML[] = { + {N_("Modules"), TM_ICON_NAMESPACE, tm_tag_package_t}, + {N_("Classes"), TM_ICON_CLASS, tm_tag_class_t}, + {N_("Types"), TM_ICON_STRUCT, tm_tag_typedef_t}, + {N_("Functions"), TM_ICON_METHOD, tm_tag_method_t | tm_tag_function_t}, + {N_("Variables"), TM_ICON_VAR, tm_tag_variable_t}, +}; + typedef struct { TMParserMapEntry *entries; @@ -1187,6 +1207,7 @@ static TMParserMap parser_map[] = { MAP_ENTRY(BATCH), MAP_ENTRY(AUTOIT), MAP_ENTRY(RAKU), + MAP_ENTRY(OCAML), }; /* make sure the parser map is consistent and complete */ G_STATIC_ASSERT(G_N_ELEMENTS(parser_map) == TM_PARSER_COUNT); @@ -1728,6 +1749,7 @@ gboolean tm_parser_has_full_scope(TMParserType lang) case TM_PARSER_ERLANG: case TM_PARSER_FORTRAN: case TM_PARSER_OBJC: + case TM_PARSER_OCAML: case TM_PARSER_REST: /* Other parsers don't use scope at all (or should be somewhere above) */ default: diff --git a/src/tagmanager/tm_parser.h b/src/tagmanager/tm_parser.h index 7672880932..043edc7713 100644 --- a/src/tagmanager/tm_parser.h +++ b/src/tagmanager/tm_parser.h @@ -120,6 +120,7 @@ enum TM_PARSER_BATCH, TM_PARSER_AUTOIT, TM_PARSER_RAKU, + TM_PARSER_OCAML, TM_PARSER_COUNT }; diff --git a/src/tagmanager/tm_parsers.h b/src/tagmanager/tm_parsers.h index 535a496ee1..808cd419a3 100644 --- a/src/tagmanager/tm_parsers.h +++ b/src/tagmanager/tm_parsers.h @@ -74,6 +74,7 @@ TypeScriptParser, \ DosBatchParser, \ AutoItParser, \ - Perl6Parser + Perl6Parser, \ + OcamlParser #endif diff --git a/tests/ctags/Makefile.am b/tests/ctags/Makefile.am index 8bdea43729..641368a961 100644 --- a/tests/ctags/Makefile.am +++ b/tests/ctags/Makefile.am @@ -313,6 +313,7 @@ test_sources = \ simple.lua \ simple.mak \ simple.md \ + simple.ml \ simple.php \ simple.pl \ simple.ps1 \ diff --git a/tests/ctags/simple.ml b/tests/ctags/simple.ml new file mode 100644 index 0000000000..373f3a99c5 --- /dev/null +++ b/tests/ctags/simple.ml @@ -0,0 +1,35 @@ +module ModuleFoo = struct + type foobar = + ConstructorFoo + | ConstructorBar of int * char list +end + +type 'a foorecord = + { foofield : 'a; + barfield : int; + mutable foobarfield : list char -> int -> unit } + +(* op redif *) +let (+-) a b = + let aplus = a + b + and aminus = a - b + in + (aplus, aminus) + +let shall_appear () = + let sub_not 1 = 2 + and shall_not_either fu = () in + let nope = 3 +and must_appear_also 4 = () + +let y = 4 + +let foo_function a b = (a, b) + +class fooClass = +object (self) + val x = () + method fooMethod = x +end + +exception ConnectionNotReachable diff --git a/tests/ctags/simple.ml.tags b/tests/ctags/simple.ml.tags new file mode 100644 index 0000000000..6e4160efc1 --- /dev/null +++ b/tests/ctags/simple.ml.tags @@ -0,0 +1,20 @@ ++-Ì16Ö0 +function: +- +ModuleFooÌ512Ö0 +package: ModuleFoo +SimpleÌ512Ö0 +package: Simple +fooClassÌ1Ö0 +class: fooClass +fooMethodÌ128ÎfooClassÖ0 +method: fooClass :: fooMethod +foo_functionÌ16Ö0 +function: foo_function +foobarÌ4096ÎModuleFooÖ0 +typedef: ModuleFoo :: foobar +foorecordÌ4096Ö0 +typedef: foorecord +shall_appearÌ16Ö0 +function: shall_appear +yÌ16384Ö0 +variable: y diff --git a/tests/meson.build b/tests/meson.build index 74cc7b322a..7aa9b91d7e 100644 --- a/tests/meson.build +++ b/tests/meson.build @@ -310,6 +310,7 @@ ctags_tests = files([ 'ctags/simple.lua.tags', 'ctags/simple.mak.tags', 'ctags/simple.md.tags', + 'ctags/simple.ml.tags', 'ctags/simple.php.tags', 'ctags/simple.pl.tags', 'ctags/simple.ps1.tags',