From b3dab3b9a8d1bd7b10b78819a57ec2540bbb8ddf Mon Sep 17 00:00:00 2001 From: getzze Date: Fri, 11 Sep 2020 19:18:04 +0100 Subject: [PATCH 1/2] Create julia parser, ctags and lexilla --- ctags/Makefile.am | 1 + ctags/parsers/geany_julia.c | 1554 +++++++++++++++++++++++++++++ data/Makefile.am | 1 + data/filedefs/filetypes.common | 3 + data/filedefs/filetypes.julia | 81 ++ data/filetype_extensions.conf | 1 + scintilla/Makefile.am | 1 + scintilla/include/SciLexer.h | 23 + scintilla/include/Scintilla.iface | 25 + scintilla/julia_lexilla_v5.patch | 36 + scintilla/lexers/LexJulia.cxx | 1269 +++++++++++++++++++++++ scintilla/src/Catalogue.cxx | 1 + src/filetypes.c | 1 + src/filetypes.h | 1 + src/highlighting.c | 12 + src/highlightingmappings.h | 36 + src/symbols.c | 14 + src/tagmanager/tm_parser.c | 14 + src/tagmanager/tm_parser.h | 1 + src/tagmanager/tm_parsers.h | 1 + 20 files changed, 3076 insertions(+) create mode 100644 ctags/parsers/geany_julia.c create mode 100644 data/filedefs/filetypes.julia create mode 100644 scintilla/julia_lexilla_v5.patch create mode 100644 scintilla/lexers/LexJulia.cxx diff --git a/ctags/Makefile.am b/ctags/Makefile.am index 5517a6c5e2..eceed2fba4 100644 --- a/ctags/Makefile.am +++ b/ctags/Makefile.am @@ -31,6 +31,7 @@ parsers = \ parsers/geany_html.c \ parsers/geany_jscript.c \ parsers/geany_json.c \ + parsers/geany_julia.c \ parsers/geany_lcpp.c \ parsers/geany_lcpp.h \ parsers/geany_lua.c \ diff --git a/ctags/parsers/geany_julia.c b/ctags/parsers/geany_julia.c new file mode 100644 index 0000000000..3f433e6a15 --- /dev/null +++ b/ctags/parsers/geany_julia.c @@ -0,0 +1,1554 @@ +/* +* Copyright (c) 2020-2021, getzze +* +* This source code is released for free distribution under the terms of the +* GNU General Public License version 2 or (at your option) any later version. +* +* This module contains functions for generating tags for Julia files. +* +* Documented 'kinds': +* https://docs.julialang.org/en/v1/manual/documentation/#Syntax-Guide +* Language parser in Scheme: +* https://github.com/JuliaLang/julia/blob/master/src/julia-parser.scm +*/ + +/* +* INCLUDE FILES +*/ +#include "general.h" /* must always come first */ + +#include + +#include "keyword.h" +#include "parse.h" +#include "entry.h" +#include "options.h" +#include "read.h" +#include "routines.h" +#include "vstring.h" +#include "xtag.h" + +/* +* MACROS +*/ +#define MAX_STRING_LENGTH 256 + +/* +* DATA DEFINITIONS +*/ +typedef enum { + K_CONSTANT, + K_FUNCTION, + K_FIELD, + K_MACRO, + K_MODULE, + K_STRUCT, + K_TYPE, + K_UNKNOWN, + K_NONE +} JuliaKind; + +typedef enum { + JULIA_MODULE_IMPORTED, + JULIA_MODULE_USED, + JULIA_MODULE_NAMESPACE, +} juliaModuleRole; + +typedef enum { + JULIA_UNKNOWN_IMPORTED, + JULIA_UNKNOWN_USED, +} juliaUnknownRole; + +/* +* using X X = (kind:module, role:used) +* +* using X: a, b X = (kind:module, role:namespace) +* a, b = (kind:unknown, role:used, scope:module:X) +* +* import X X = (kind:module, role:imported) +* +* import X.a, Y.b X, Y = (kind:module, role:namespace) +* a, b = (kind:unknown, role:imported, scope:module:X) +* +* import X: a, b Same as the above one +*/ +static roleDefinition JuliaModuleRoles [] = { + { true, "imported", "loaded by \"import\"" }, + { true, "used", "loaded by \"using\"" }, + { true, "namespace", "only some symbols in it are imported" }, +}; + +static roleDefinition JuliaUnknownRoles [] = { + { true, "imported", "loaded by \"import\"" }, + { true, "used", "loaded by \"using\""}, +}; + +static kindDefinition JuliaKinds [] = { + { true, 'c', "constant", "Constants" }, + { true, 'f', "function", "Functions" }, + { true, 'g', "field", "Fields" }, + { true, 'm', "macro", "Macros" }, + { true, 'n', "module", "Modules", + ATTACH_ROLES(JuliaModuleRoles) }, + { true, 's', "struct", "Structures" }, + { true, 't', "type", "Types" }, + { true, 'x', "unknown", "name defined in other modules", + .referenceOnly = true, ATTACH_ROLES(JuliaUnknownRoles) }, +}; + +typedef enum { + TOKEN_NONE=0, /* none */ + TOKEN_WHITESPACE, + TOKEN_PAREN_BLOCK, + TOKEN_BRACKET_BLOCK, + TOKEN_CURLY_BLOCK, + TOKEN_OPEN_BLOCK, + TOKEN_CLOSE_BLOCK, + TOKEN_TYPE_ANNOTATION, + TOKEN_TYPE_WHERE, + TOKEN_CONST, + TOKEN_STRING, /* = 10 */ + TOKEN_COMMAND, + TOKEN_MACROCALL, + TOKEN_IDENTIFIER, + TOKEN_MODULE, + TOKEN_MACRO, + TOKEN_FUNCTION, + TOKEN_STRUCT, + TOKEN_ENUM, + TOKEN_TYPE, + TOKEN_IMPORT, /* = 20 */ + TOKEN_USING, + TOKEN_EXPORT, + TOKEN_NEWLINE, + TOKEN_SEMICOLON, + TOKEN_COMPOSER_KWD, /* KEYWORD only */ + TOKEN_EOF, + TOKEN_COUNT +} tokenType; + +static const keywordTable JuliaKeywordTable [] = { + /* TODO: Sort by keys. */ + { "mutable", TOKEN_COMPOSER_KWD }, + { "primitive", TOKEN_COMPOSER_KWD }, + { "abstract", TOKEN_COMPOSER_KWD }, + + { "if", TOKEN_OPEN_BLOCK }, + { "for", TOKEN_OPEN_BLOCK }, + { "while", TOKEN_OPEN_BLOCK }, + { "try", TOKEN_OPEN_BLOCK }, + { "do", TOKEN_OPEN_BLOCK }, + { "begin", TOKEN_OPEN_BLOCK }, + { "let", TOKEN_OPEN_BLOCK }, + { "quote", TOKEN_OPEN_BLOCK }, + + { "module", TOKEN_MODULE }, + { "baremodule",TOKEN_MODULE }, + + { "using", TOKEN_USING }, + { "import", TOKEN_IMPORT }, + + { "export", TOKEN_EXPORT }, + { "const", TOKEN_CONST }, + { "macro", TOKEN_MACRO }, + { "function", TOKEN_FUNCTION }, + { "struct", TOKEN_STRUCT }, + { "type", TOKEN_TYPE }, + { "where", TOKEN_TYPE_WHERE }, + { "end", TOKEN_CLOSE_BLOCK }, +}; + +typedef struct { + /* Characters */ + int prev_c; + int cur_c; + int next_c; + + /* Tokens */ + bool first_token; + int cur_token; + vString* token_str; + unsigned long line; + MIOPos pos; +} lexerState; + +/* +* FUNCTION PROTOTYPES +*/ + +static void parseExpr (lexerState *lexer, bool delim, int kind, vString *scope); + +static void scanParenBlock (lexerState *lexer); + +/* +* FUNCTION DEFINITIONS +*/ + +static int endswith(const char* what, const char* withwhat) +{ + int l1 = strlen(what); + int l2 = strlen(withwhat); + if (l2 > l1) + { + return 0; + } + + return strcmp(withwhat, what + (l1 - l2)) == 0; +} + +/* Resets the scope string to the old length */ +static void resetScope (vString *scope, size_t old_len) +{ + vStringTruncate (scope, old_len); +} + +/* Adds a name to the end of the scope string */ +static void addToScope (vString *scope, vString *name) +{ + if (vStringLength(scope) > 0) + { + vStringPut(scope, '.'); + } + vStringCat(scope, name); +} + +/* Reads a character from the file */ +static void advanceChar (lexerState *lexer) +{ + lexer->prev_c = lexer->cur_c; + lexer->cur_c = lexer->next_c; + lexer->next_c = getcFromInputFile(); +} + +/* Reads N characters from the file */ +static void advanceNChar (lexerState *lexer, int n) +{ + while (n--) + { + advanceChar(lexer); + } +} + +/* Store the current character in lexerState::token_str if there is space + * (set by MAX_STRING_LENGTH), and then read the next character from the file */ +static void advanceAndStoreChar (lexerState *lexer) +{ + if (vStringLength(lexer->token_str) < MAX_STRING_LENGTH) + { + vStringPut(lexer->token_str, (char) lexer->cur_c); + } + advanceChar(lexer); +} + +static bool isWhitespace (int c, bool newline) +{ + if (newline) + { + return c == ' ' || c == '\t' || c == '\r' || c == '\n'; + } + return c == ' ' || c == '\t'; +} + +static bool isAscii (int c) +{ + return (c >= 0) && (c < 0x80); +} + +static bool isOperator (int c) +{ + if (c == '%' || c == '^' || c == '&' || c == '|' || + c == '*' || c == '-' || c == '+' || c == '~' || + c == '<' || c == '>' || c == ',' || c == '/' || + c == '?' || c == '=' || c == ':' ) + { + return true; + } + return false; +} + +/* This does not distinguish Unicode letters from operators... */ +static bool isIdentifierFirstCharacter (int c) +{ + return (bool) ((isAscii(c) && (isalpha (c) || c == '_')) || c >= 0xC0); +} + +/* This does not distinguish Unicode letters from operators... */ +static bool isIdentifierCharacter (int c) +{ + return (bool) (isIdentifierFirstCharacter(c) || (isAscii(c) && (isdigit(c) || c == '!')) || c >= 0x80); +} + +static void skipWhitespace (lexerState *lexer, bool newline) +{ + while (isWhitespace(lexer->cur_c, newline)) + { + advanceChar(lexer); + } +} + +/* The transpose operator is only allowed after an identifier, a number, an expression inside parenthesis or an index */ +static bool isTranspose (int c) +{ + return (isIdentifierCharacter(c) || c == ')' || c == ']'); +} + + +/* + * Lexer functions + * */ + +/* Check that the current character sequence is a type declaration or inheritance */ +static bool isTypeDecl (lexerState *lexer) +{ + if ((lexer->prev_c != '.' && lexer->cur_c == '<' && lexer->next_c == ':') || + (lexer->prev_c != '.' && lexer->cur_c == '>' && lexer->next_c == ':') || + (lexer->cur_c == ':' && lexer->next_c == ':') ) + { + return true; + } + return false; +} + +/* Check if the current char is a new line */ +static bool isNewLine (lexerState *lexer) +{ + return (lexer->cur_c == '\n')? true: false; +} + +/* Check if the current char is a new line. + * If it is, skip the newline and return true */ +static bool skipNewLine (lexerState *lexer) +{ + if (isNewLine(lexer)) + { + advanceChar(lexer); + return true; + } + return false; +} + +/* Skip a single comment or multiline comment + * A single line comment starts with # + * A multi-line comment is encapsulated in #=...=# and they are nesting + * */ +static void skipComment (lexerState *lexer) +{ + /* # */ + if (lexer->next_c != '=') + { + advanceNChar(lexer, 1); + while (lexer->cur_c != EOF && lexer->cur_c != '\n') + { + advanceChar(lexer); + } + } + /* block comment */ + else /* if (lexer->next_c == '=') */ + { + int level = 1; + advanceNChar(lexer, 2); + while (lexer->cur_c != EOF && level > 0) + { + if (lexer->cur_c == '=' && lexer->next_c == '#') + { + level--; + advanceNChar(lexer, 2); + } + else if (lexer->cur_c == '#' && lexer->next_c == '=') + { + level++; + advanceNChar(lexer, 2); + } + else + { + advanceChar(lexer); + } + } + } +} + +static void scanIdentifier (lexerState *lexer, bool clear) +{ + if (clear) + { + vStringClear(lexer->token_str); + } + + do + { + advanceAndStoreChar(lexer); + } while(lexer->cur_c != EOF && isIdentifierCharacter(lexer->cur_c)); +} + +/* Scan a quote-like expression. + * Allow for triple-character variand and interpolation with `$`. + * These last past the end of the line, so be careful + * not to store too much of them (see MAX_STRING_LENGTH). */ +static void scanStringOrCommand (lexerState *lexer, int c) +{ + bool istriple = false; + + /* Pass the first "quote"-character */ + advanceAndStoreChar(lexer); + + /* Check for triple "quote"-character */ + if (lexer->cur_c == c && lexer->next_c == c) + { + istriple = true; + advanceAndStoreChar(lexer); + advanceAndStoreChar(lexer); + + /* Cancel up to 2 "quote"-characters after opening the triple */ + if (lexer->cur_c == c) + { + advanceAndStoreChar(lexer); + if (lexer->cur_c == c) + { + advanceAndStoreChar(lexer); + } + } + } + + while (lexer->cur_c != EOF && lexer->cur_c != c) + { + /* Check for interpolation before checking for end of "quote" */ + if (lexer->cur_c == '$' && lexer->next_c == '(') + { + advanceAndStoreChar(lexer); + scanParenBlock(lexer); + /* continue to avoid advance character again. Correct bug + * with "quote"-character just after closing parenthesis */ + continue; + } + + if (lexer->cur_c == '\\' && + (lexer->next_c == c || lexer->next_c == '\\')) + { + advanceAndStoreChar(lexer); + } + advanceAndStoreChar(lexer); + + /* Cancel up to 2 "quote"-characters if triple string */ + if (istriple && lexer->cur_c == c) + { + advanceAndStoreChar(lexer); + if (lexer->cur_c == c) + { + advanceAndStoreChar(lexer); + } + } + } + /* Pass the last "quote"-character */ + advanceAndStoreChar(lexer); +} + + +/* Scan commands surrounded by backticks, + * possibly triple backticks */ +static void scanCommand (lexerState *lexer) +{ + scanStringOrCommand(lexer, '`'); +} + +/* Double-quoted strings, + * possibly triple doublequotes */ +static void scanString (lexerState *lexer) +{ + scanStringOrCommand(lexer, '"'); +} + + +/* This deals with character literals: 'n', '\n', '\uFFFF'; + * and matrix transpose: A'. + * We'll use this approximate regexp for the literals: + * \' [^'] \' or \' \\ [^']+ \' or \' \\ \' \' + * Either way, we'll treat this token as a string, so it gets preserved */ +static bool scanCharacterOrTranspose (lexerState *lexer) +{ + if (isTranspose(lexer->prev_c)) + { + /* deal with untranspose/transpose sequence */ + while (lexer->cur_c != EOF && lexer->cur_c == '\'') + { + advanceAndStoreChar(lexer); + } + return false; + } + + //vStringClear(lexer->token_str); + advanceAndStoreChar(lexer); + + if (lexer->cur_c == '\\') + { + advanceAndStoreChar(lexer); + /* The \' \\ \' \' (literally '\'') case */ + if (lexer->cur_c == '\'' && lexer->next_c == '\'') + { + advanceAndStoreChar(lexer); + advanceAndStoreChar(lexer); + } + /* The \' \\ [^']+ \' case */ + else + { + while (lexer->cur_c != EOF && lexer->cur_c != '\'') + { + advanceAndStoreChar(lexer); + } + } + } + /* The \' [^'] \' and \' \' \' cases */ + else if (lexer->next_c == '\'') + { + advanceAndStoreChar(lexer); + advanceAndStoreChar(lexer); + } + /* Otherwise it is malformed */ + return true; +} + +/* Parse a block with opening and closing character */ +static void scanBlock (lexerState *lexer, int open, int close, bool convert_newline) +{ + /* Assume the current char is `open` */ + int level = 1; + + /* Pass the first opening */ + advanceAndStoreChar(lexer); + + while (lexer->cur_c != EOF && level > 0) + { + /* Parse everything */ + if (lexer->cur_c == ' ' || lexer->cur_c == '\t') + { + skipWhitespace(lexer, false); + vStringPut(lexer->token_str, ' '); + } + if (lexer->cur_c == '#') + { + skipComment(lexer); + } + else if (lexer->cur_c == '\"') + { + scanString(lexer); + } + else if (lexer->cur_c == '\'') + { + scanCharacterOrTranspose(lexer); + } + + /* Parse opening/closing */ + if (lexer->cur_c == open) + { + level++; + } + else if (lexer->cur_c == close) + { + level--; + } + + if (convert_newline && skipNewLine(lexer)) + { + vStringPut(lexer->token_str, ' '); + } + else + { + advanceAndStoreChar(lexer); + } + + } + /* Lexer position is just after `close` */ +} + + +/* Parse a block inside parenthesis, for example a function argument list */ +static void scanParenBlock (lexerState *lexer) +{ + scanBlock(lexer, '(', ')', true); +} + +/* Indexing block with bracket. + * Some keywords have a special meaning in this environment: + * end, begin, for and if */ +static void scanIndexBlock (lexerState *lexer) +{ + scanBlock(lexer, '[', ']', false); + +} + +/* Parse a block inside curly brackets, for type parametrization */ +static void scanCurlyBlock (lexerState *lexer) +{ + scanBlock(lexer, '{', '}', true); +} + +/* Scan type annotation like + * `::Type`, `::Type{T}` + */ +static void scanTypeAnnotation (lexerState *lexer) +{ + /* assume that current char is '<', '>' or ':', followed by ':' */ + advanceAndStoreChar(lexer); + advanceAndStoreChar(lexer); + + skipWhitespace(lexer, true); + scanIdentifier(lexer, false); + if (lexer->cur_c == '{') + { + scanCurlyBlock(lexer); + } +} + +/* Scan type annotation like + * `where Int<:T<:Real`, `where S<:Array{Real}` or `where {S, T}` + */ +static void scanTypeWhere (lexerState *lexer) +{ + /* assume that current token is 'where' + * allow line continuation */ + vStringPut(lexer->token_str, ' '); + skipWhitespace(lexer, true); + + while (lexer->cur_c != EOF) + { + + if (lexer->cur_c == '{') + { + scanCurlyBlock(lexer); + } + else if (isIdentifierFirstCharacter(lexer->cur_c)) + { + scanIdentifier(lexer, false); + if (endswith(vStringValue(lexer->token_str), "where")) + { + /* allow line continuation */ + vStringPut(lexer->token_str, ' '); + skipWhitespace(lexer, true); + } + } + else if (isTypeDecl(lexer)) + { + scanTypeAnnotation(lexer); + //skipWhitespace(lexer, false); + } + else if (lexer->cur_c == '#') + { + skipComment(lexer); + /* allow line continuation */ + if (endswith(vStringValue(lexer->token_str), "where ")) + { + skipWhitespace(lexer, true); + } + } + else if (isWhitespace(lexer->cur_c, false)) + { + while (isWhitespace(lexer->cur_c, false)) + { + advanceChar(lexer); + } + /* Add a space, if it is not a trailing space */ + if (!(isNewLine(lexer))) + { + vStringPut(lexer->token_str, ' '); + } + } + else + { + break; + } + } +} + + +static int parseIdentifier (lexerState *lexer) +{ + langType julia = getInputLanguage (); + scanIdentifier(lexer, true); + + int k = lookupKeyword (vStringValue(lexer->token_str), julia); + /* First part of a composed identifier */ + if (k == TOKEN_COMPOSER_KWD) + { + skipWhitespace(lexer, false); + scanIdentifier(lexer, true); + k = lookupKeyword (vStringValue(lexer->token_str), julia); + } + + if ((k == TOKEN_OPEN_BLOCK) + || (k == TOKEN_MODULE) + || (k == TOKEN_IMPORT) + || (k == TOKEN_USING) + || (k == TOKEN_EXPORT) + || (k == TOKEN_CONST) + || (k == TOKEN_MACRO) + || (k == TOKEN_FUNCTION) + || (k == TOKEN_STRUCT) + || (k == TOKEN_TYPE) + || (k == TOKEN_TYPE_WHERE) + || (k == TOKEN_CLOSE_BLOCK)) + { + if (k == TOKEN_TYPE_WHERE) + { + scanTypeWhere(lexer); + } + return lexer->cur_token = k; + } + return lexer->cur_token = TOKEN_IDENTIFIER; +} + + +/* Advances the parser one token, optionally skipping whitespace + * (otherwise it is concatenated and returned as a single whitespace token). + * Whitespace is needed to properly render function signatures. Unrecognized + * token starts are stored literally, e.g. token may equal to a character '#'. */ +static int advanceToken (lexerState *lexer, bool skip_whitespace, bool propagate_first) +{ + bool have_whitespace = false; + bool newline = false; + lexer->line = getInputLineNumber(); + lexer->pos = getInputFilePosition(); + + /* the next token is the first token of the line */ + if (!propagate_first) + { + if (lexer->cur_token == TOKEN_NEWLINE || + lexer->cur_token == TOKEN_SEMICOLON || + lexer->cur_token == TOKEN_NONE || + (lexer->first_token && lexer->cur_token == TOKEN_MACROCALL)) + { + lexer->first_token = true; + } + else + { + lexer->first_token = false; + } + } + + while (lexer->cur_c != EOF) + { + /* skip whitespaces but not newlines */ + if (isWhitespace(lexer->cur_c, newline)) + { + skipWhitespace(lexer, newline); + have_whitespace = true; + } + else if (lexer->cur_c == '#') + { + skipComment(lexer); + have_whitespace = true; + } + else + { + if (have_whitespace && !skip_whitespace) + { + return lexer->cur_token = TOKEN_WHITESPACE; + } + break; + } + } + lexer->line = getInputLineNumber(); + lexer->pos = getInputFilePosition(); + while (lexer->cur_c != EOF) + { + if (lexer->cur_c == '"') + { + vStringClear(lexer->token_str); + scanString(lexer); + return lexer->cur_token = TOKEN_STRING; + } + else if (lexer->cur_c == '\'') + { + vStringClear(lexer->token_str); + if (scanCharacterOrTranspose(lexer)) + { + return lexer->cur_token = TOKEN_STRING; + } + else + { + return lexer->cur_token = '\''; + } + } + else if (lexer->cur_c == '`') + { + vStringClear(lexer->token_str); + scanCommand(lexer); + return lexer->cur_token = TOKEN_COMMAND; + } + else if (isIdentifierFirstCharacter(lexer->cur_c)) + { + return parseIdentifier(lexer); + } + else if (lexer->cur_c == '@') + { + vStringClear(lexer->token_str); + advanceAndStoreChar(lexer); + do + { + advanceAndStoreChar(lexer); + } while(lexer->cur_c != EOF && isIdentifierCharacter(lexer->cur_c)); + return lexer->cur_token = TOKEN_MACROCALL; + } + else if (lexer->cur_c == '(') + { + vStringClear(lexer->token_str); + scanParenBlock(lexer); + return lexer->cur_token = TOKEN_PAREN_BLOCK; + } + else if (lexer->cur_c == '[') + { + vStringClear(lexer->token_str); + scanIndexBlock(lexer); + return lexer->cur_token = TOKEN_BRACKET_BLOCK; + } + else if (lexer->cur_c == '{') + { + vStringClear(lexer->token_str); + scanCurlyBlock(lexer); + return lexer->cur_token = TOKEN_CURLY_BLOCK; + } + else if (isTypeDecl(lexer)) + { + vStringClear(lexer->token_str); + scanTypeAnnotation(lexer); + return lexer->cur_token = TOKEN_TYPE_ANNOTATION; + } + else if (skipNewLine(lexer)) + { + /* allow line continuation */ + if (isOperator(lexer->cur_token)) + { + return lexer->cur_token; + } + return lexer->cur_token = TOKEN_NEWLINE; + } + else if (lexer->cur_c == ';') + { + advanceChar(lexer); + return lexer->cur_token = TOKEN_SEMICOLON; + } + else + { + int c = lexer->cur_c; + advanceChar(lexer); + return lexer->cur_token = c; + } + } + return lexer->cur_token = TOKEN_EOF; +} + +static void initLexer (lexerState *lexer) +{ + advanceNChar(lexer, 2); + lexer->token_str = vStringNew(); + lexer->first_token = true; + lexer->cur_token = TOKEN_NONE; + lexer->prev_c = '\0'; + + if (lexer->cur_c == '#' && lexer->next_c == '!') + { + skipComment(lexer); + } + advanceToken(lexer, true, false); +} + +static void deInitLexer (lexerState *lexer) +{ + vStringDelete(lexer->token_str); + lexer->token_str = NULL; +} + +#if 0 +static void debugLexer (lexerState *lexer) +{ + printf("Current lexer state: line %d, token (%lu), cur char `%c`, token str:\n\t`", lexer->line, lexer->cur_token, lexer->cur_c); + printf(vStringValue(lexer->token_str)); + printf("`\n"); +} +#endif + +static void addTag (vString* ident, const char* type, const char* arg_list, int kind, unsigned long line, MIOPos pos, vString *scope, int parent_kind) +{ + if (kind == K_NONE) + { + return; + } + tagEntryInfo tag; + initTagEntry(&tag, vStringValue(ident), kind); + + tag.lineNumber = line; + tag.filePosition = pos; + tag.sourceFileName = getInputFileName(); + + tag.extensionFields.signature = arg_list; + /* tag.extensionFields.varType = type; */ /* Needs a workaround */ + if (parent_kind != K_NONE) + { + tag.extensionFields.scopeKindIndex = parent_kind; + tag.extensionFields.scopeName = vStringValue(scope); + } + makeTagEntry(&tag); +} + +static void addReferenceTag (vString* ident, int kind, int role, unsigned long line, MIOPos pos, vString* scope, int parent_kind) +{ + if (kind == K_NONE) + { + return; + } + tagEntryInfo tag; + initRefTagEntry(&tag, vStringValue(ident), kind, role); + tag.lineNumber = line; + tag.filePosition = pos; + if (parent_kind != K_NONE) + { + tag.extensionFields.scopeKindIndex = parent_kind; + tag.extensionFields.scopeName = vStringValue(scope); + } + makeTagEntry(&tag); +} + +/* Skip tokens until one of the goal tokens is hit. Escapes when level = 0 if there are no goal tokens. + * Keeps track of balanced ()'s, []'s, and {}'s and ignores the goal tokens within those pairings */ +static void skipUntil (lexerState *lexer, int goal_tokens[], int num_goal_tokens) +{ + int block_level = 0; + + while (lexer->cur_token != TOKEN_EOF) + { + /* check if the keyword is reached, only if outside a block */ + if (block_level == 0) + { + int ii = 0; + for(ii = 0; ii < num_goal_tokens; ii++) + { + if (lexer->cur_token == goal_tokens[ii]) + { + break; + } + } + if (ii < num_goal_tokens) + { + /* parse the next token */ + advanceToken(lexer, true, false); + break; + } + } + + /* take into account nested blocks */ + switch (lexer->cur_token) + { + case TOKEN_OPEN_BLOCK: + block_level++; + break; + case TOKEN_CLOSE_BLOCK: + block_level--; + break; + default: + break; + } + + /* Has to be after the token switch to catch the case when we start with the initial level token */ + if (num_goal_tokens == 0 && block_level == 0) + { + break; + } + + advanceToken(lexer, true, false); + } +} + +/* Skip until the end of the block */ +static void skipUntilEnd (lexerState *lexer) +{ + int goal_tokens[] = { TOKEN_CLOSE_BLOCK }; + + skipUntil(lexer, goal_tokens, 1); +} + +/* Skip a function body after assignment operator '=' + * Beware of continuation lines after operators + * */ +static void skipBody (lexerState *lexer) +{ + /* assume position just after '=' */ + while (lexer->cur_token != TOKEN_EOF && lexer->cur_token != TOKEN_NEWLINE) + { + advanceToken(lexer, true, false); + + if (lexer->cur_token == TOKEN_OPEN_BLOCK) + { + /* pass the keyword */ + advanceToken(lexer, true, false); + skipUntilEnd(lexer); + /* the next token is already selected */ + } + } +} + +/* Short function format: + * ( [] ) [::] [] = [begin] [end] + * */ +static void parseShortFunction (lexerState *lexer, vString *scope, int parent_kind) +{ + /* assume the current char is just after identifier */ + vString *name; + vString *arg_list; + unsigned long line; + MIOPos pos; + + /* should be an open parenthesis after identifier + * with potentially parametric type */ + skipWhitespace(lexer, false); + if (lexer->cur_c == '{') + { + scanCurlyBlock(lexer); + skipWhitespace(lexer, false); + } + + if (lexer->cur_c != '(') + { + advanceToken(lexer, true, false); + return; + } + + name = vStringNewCopy(lexer->token_str); + line = lexer->line; + pos = lexer->pos; + + /* scan argument list */ + advanceToken(lexer, true, false); + arg_list = vStringNewCopy(lexer->token_str); + + /* scan potential type casting */ + advanceToken(lexer, true, false); + if (lexer->cur_token == TOKEN_TYPE_ANNOTATION) + { + vStringCat(arg_list, lexer->token_str); + advanceToken(lexer, true, false); + } + /* scan potential type union with 'where' */ + if (lexer->cur_token == TOKEN_TYPE_WHERE) + { + vStringPut(arg_list, ' '); + vStringCat(arg_list, lexer->token_str); + advanceToken(lexer, true, false); + } + + /* scan equal sign, ignore `==` and `=>` */ + if (!(lexer->cur_token == '=' && + lexer->cur_c != '=' && + lexer->cur_c != '>')) + { + vStringDelete(name); + vStringDelete(arg_list); + return; + } + + addTag(name, NULL, vStringValue(arg_list), K_FUNCTION, line, pos, scope, parent_kind); + + /* scan until end of function definition */ + skipBody(lexer); + + /* Should end on a new line, parse next token */ + advanceToken(lexer, true, false); + lexer->first_token = true; + + vStringDelete(name); + vStringDelete(arg_list); +} + +/* Function format: + * function ( [] ) [::] [] [] end + * */ +static void parseFunction (lexerState *lexer, vString *scope, int parent_kind) +{ + vString *name; + vString *arg_list; + vString *local_scope; + int local_parent_kind; + unsigned long line; + MIOPos pos; + + advanceToken(lexer, true, false); + if (lexer->cur_token != TOKEN_IDENTIFIER) + { + return; + } + else if (lexer->cur_c == '.') + { + local_scope = vStringNewCopy(lexer->token_str); + local_parent_kind = K_MODULE; + advanceChar(lexer); + advanceToken(lexer, true, false); + } + else + { + local_scope = vStringNewCopy(scope); + local_parent_kind = parent_kind; + } + + /* Scan for parametric type constructor */ + skipWhitespace(lexer, false); + if (lexer->cur_c == '{') + { + scanCurlyBlock(lexer); + skipWhitespace(lexer, false); + } + + name = vStringNewCopy(lexer->token_str); + arg_list = vStringNew(); + line = lexer->line; + pos = lexer->pos; + + advanceToken(lexer, true, false); + if (lexer->cur_token == TOKEN_PAREN_BLOCK) + { + vStringCopy(arg_list, lexer->token_str); + + /* scan potential type casting */ + advanceToken(lexer, true, false); + if (lexer->cur_token == TOKEN_TYPE_ANNOTATION) + { + vStringCat(arg_list, lexer->token_str); + advanceToken(lexer, true, false); + } + /* scan potential type union with 'where' */ + if (lexer->cur_token == TOKEN_TYPE_WHERE) + { + vStringPut(arg_list, ' '); + vStringCat(arg_list, lexer->token_str); + advanceToken(lexer, true, false); + } + + addTag(name, NULL, vStringValue(arg_list), K_FUNCTION, line, pos, local_scope, local_parent_kind); + addToScope(scope, name); + parseExpr(lexer, true, K_FUNCTION, scope); + } + else if (lexer->cur_token == TOKEN_CLOSE_BLOCK) + { + /* Function without method */ + addTag(name, NULL, NULL, K_FUNCTION, line, pos, local_scope, local_parent_kind); + /* Go to the closing 'end' keyword */ + skipUntilEnd(lexer); + } + + vStringDelete(name); + vStringDelete(arg_list); + vStringDelete(local_scope); +} + +/* Macro format: + * "macro" () + */ +static void parseMacro (lexerState *lexer, vString *scope, int parent_kind) +{ + vString *name; + unsigned long line; + MIOPos pos; + + advanceToken(lexer, true, false); + if (lexer->cur_token != TOKEN_IDENTIFIER) + { + return; + } + + name = vStringNewCopy(lexer->token_str); + line = lexer->line; + pos = lexer->pos; + + advanceToken(lexer, true, false); + if (lexer->cur_token == TOKEN_PAREN_BLOCK) + { + addTag(name, NULL, vStringValue(lexer->token_str), K_MACRO, line, pos, scope, parent_kind); + } + + skipUntilEnd(lexer); + vStringDelete(name); +} + +/* Const format: + * "const" + */ +static void parseConst (lexerState *lexer, vString *scope, int parent_kind) +{ + vString *name; + + advanceToken(lexer, true, false); + if (lexer->cur_token != TOKEN_IDENTIFIER) + { + return; + } + + name = vStringNewCopy(lexer->token_str); + + advanceToken(lexer, true, false); + if (lexer->cur_token == TOKEN_TYPE_ANNOTATION) + { + addTag(name, "const", vStringValue(lexer->token_str), K_CONSTANT, lexer->line, lexer->pos, scope, parent_kind); + advanceToken(lexer, true, false); + } + else + { + addTag(name, "const", NULL, K_CONSTANT, lexer->line, lexer->pos, scope, parent_kind); + } + + vStringDelete(name); +} + +/* Type format: + * [ "abstract" | "primitive" ] "type" + */ +static void parseType (lexerState *lexer, vString *scope, int parent_kind) +{ + advanceToken(lexer, true, false); + if (lexer->cur_token != TOKEN_IDENTIFIER) + { + return; + } + + addTag(lexer->token_str, NULL, NULL, K_TYPE, lexer->line, lexer->pos, scope, parent_kind); + + skipUntilEnd(lexer); +} + +/* Module format: + * [ "baremodule" | "module" ] + */ +static void parseModule (lexerState *lexer, vString *scope, int parent_kind) +{ + advanceToken(lexer, true, false); + if (lexer->cur_token != TOKEN_IDENTIFIER) + { + return; + } + + addTag(lexer->token_str, NULL, NULL, K_MODULE, lexer->line, lexer->pos, scope, parent_kind); + addToScope(scope, lexer->token_str); + advanceToken(lexer, true, false); + parseExpr(lexer, true, K_MODULE, scope); +} + +/* + * Parse comma separated entity in import/using expressions. An entity could be + * in the form of "Module" or "Module.symbol". The lexer should be at the end + * of "Module", and this function will take it to the end of the entity + * (whitespaces also skipped). + */ +static void parseImportEntity (lexerState *lexer, vString *scope, int token_type, int parent_kind) +{ + if (lexer->cur_c == '.') + { + if (token_type == TOKEN_IMPORT) + { + vString *module_name = vStringNewCopy(lexer->token_str); + addReferenceTag(module_name, K_MODULE, JULIA_MODULE_NAMESPACE, lexer->line, lexer->pos, scope, parent_kind); + advanceChar(lexer); + advanceToken(lexer, true, false); + addReferenceTag(lexer->token_str, K_UNKNOWN, JULIA_UNKNOWN_IMPORTED, lexer->line, lexer->pos, module_name, K_MODULE); + vStringDelete(module_name); + } + else /* if (token_type == TOKEN_USING) */ + { + /* using Module.symbol is invalid, so we advance the lexer but don't tag it. */ + advanceChar(lexer); + advanceToken(lexer, true, false); + } + } + else + { + if (token_type == TOKEN_IMPORT) + { + addReferenceTag(lexer->token_str, K_MODULE, JULIA_MODULE_IMPORTED, lexer->line, lexer->pos, scope, parent_kind); + } + else /* if (token_type == TOKEN_USING) */ + { + addReferenceTag(lexer->token_str, K_MODULE, JULIA_MODULE_USED, lexer->line, lexer->pos, scope, parent_kind); + } + } +} + +/* Parse import/using expressions with a colon, like: */ +/* import Module: symbol1, symbol2 */ +/* using Module: symbol1, symbol2 */ +/* The lexer should be at the end of "Module", and this function will take it + * to the end of the token after this expression (whitespaces also skipped). */ +static void parseColonImportExpr (lexerState *lexer, vString *scope, int token_type, int parent_kind) +{ + int symbol_role; + if (token_type == TOKEN_IMPORT) + { + symbol_role = JULIA_UNKNOWN_IMPORTED; + } + else /* if (token_type == TOKEN_USING) */ + { + symbol_role = JULIA_UNKNOWN_USED; + } + vString *name = vStringNewCopy(lexer->token_str); + addReferenceTag(name, K_MODULE, JULIA_MODULE_NAMESPACE, lexer->line, lexer->pos, scope, parent_kind); + advanceChar(lexer); + advanceToken(lexer, true, false); + if (lexer->cur_token == TOKEN_NEWLINE) + { + advanceToken(lexer, true, false); + } + while (lexer->cur_token == TOKEN_IDENTIFIER || lexer->cur_token == TOKEN_MACROCALL) + { + addReferenceTag(lexer->token_str, K_UNKNOWN, symbol_role, lexer->line, lexer->pos, name, K_MODULE); + if (lexer->cur_c == ',') + { + advanceChar(lexer); + advanceToken(lexer, true, false); + if (lexer->cur_token == TOKEN_NEWLINE) + { + advanceToken(lexer, true, false); + } + } + else + { + advanceToken(lexer, true, false); + } + } + vStringDelete(name); +} + +/* Import format: + * [ "import" | "using" ] [: ] + */ +static void parseImport (lexerState *lexer, vString *scope, int token_type, int parent_kind) +{ + /* capture the imported name */ + advanceToken(lexer, true, false); + /* import Mod1: symbol1, symbol2 */ + /* using Mod1: symbol1, symbol2 */ + if (lexer->cur_c == ':') + { + parseColonImportExpr(lexer, scope, token_type, parent_kind); + } + /* All other situations, like import/using Mod1, Mod2.symbol1, Mod3... */ + else + { + while (lexer->cur_token == TOKEN_IDENTIFIER || lexer->cur_token == TOKEN_MACROCALL) + { + parseImportEntity(lexer, scope, token_type, parent_kind); + if (lexer->cur_c == ',') + { + advanceChar(lexer); + advanceToken(lexer, true, false); + if (lexer->cur_token == TOKEN_NEWLINE) + { + advanceToken(lexer, true, false); + } + } + else + { + advanceToken(lexer, true, false); + } + } + } +} + +/* Structs format: + * "struct" [{}] [<:]; end + * */ +static void parseStruct (lexerState *lexer, vString *scope, int parent_kind) +{ + vString *name; + vString *field; + size_t old_scope_len; + unsigned long line; + MIOPos pos; + + advanceToken(lexer, true, false); + if (lexer->cur_token != TOKEN_IDENTIFIER) + { + return; + } + + name = vStringNewCopy(lexer->token_str); + field = vStringNew(); + line = lexer->line; + pos = lexer->pos; + + /* scan parametrization */ + advanceToken(lexer, true, false); + if (lexer->cur_token == TOKEN_CURLY_BLOCK) + { + addTag(name, NULL, vStringValue(lexer->token_str), K_STRUCT, line, pos, scope, parent_kind); + advanceToken(lexer, true, false); + } + else + { + addTag(name, NULL, NULL, K_STRUCT, line, pos, scope, parent_kind); + } + addToScope(scope, name); + + /* skip inheritance */ + if (lexer->cur_token == TOKEN_TYPE_ANNOTATION) + { + advanceToken(lexer, true, false); + } + + /* keep the struct scope in memory to reset it after parsing constructors */ + old_scope_len = vStringLength(scope); + /* Parse fields and inner constructors */ + while (lexer->cur_token != TOKEN_EOF && lexer->cur_token != TOKEN_CLOSE_BLOCK) + { + if (lexer->cur_token == TOKEN_IDENTIFIER && lexer->first_token) + { + if (strcmp(vStringValue(lexer->token_str), vStringValue(name)) == 0) + { + /* inner constructor */ + parseShortFunction(lexer, scope, K_STRUCT); + continue; + } + + vStringCopy(field, lexer->token_str); + + /* parse type annotation */ + advanceToken(lexer, true, false); + if (lexer->cur_token == TOKEN_TYPE_ANNOTATION) + { + addTag(field, NULL, vStringValue(lexer->token_str), K_FIELD, lexer->line, lexer->pos, scope, K_STRUCT); + advanceToken(lexer, true, false); + } + else + { + addTag(field, NULL, NULL, K_FIELD, lexer->line, lexer->pos, scope, K_STRUCT); + } + } + else if (lexer->cur_token == TOKEN_FUNCTION) + { + /* inner constructor */ + parseFunction(lexer, scope, K_STRUCT); + } + else + { + /* Get next token */ + advanceToken(lexer, true, false); + } + resetScope(scope, old_scope_len); + } + + vStringDelete(name); + vStringDelete(field); +} + + +static void parseExpr (lexerState *lexer, bool delim, int kind, vString *scope) +{ + int level = 1; + size_t old_scope_len; + vString *local_scope = NULL; + + while (lexer->cur_token != TOKEN_EOF) + { + old_scope_len = vStringLength(scope); + /* Advance token and update if this is a new line */ + while (lexer->cur_token == TOKEN_NEWLINE || + lexer->cur_token == TOKEN_SEMICOLON || + lexer->cur_token == TOKEN_NONE ) + { + advanceToken(lexer, true, false); + } + + /* Make sure every case advances the token + * otherwise we can be stuck in infinite loop */ + switch (lexer->cur_token) + { + case TOKEN_CONST: + parseConst(lexer, scope, kind); + break; + case TOKEN_FUNCTION: + parseFunction(lexer, scope, kind); + break; + case TOKEN_MACRO: + parseMacro(lexer, scope, kind); + break; + case TOKEN_MODULE: + parseModule(lexer, scope, kind); + break; + case TOKEN_STRUCT: + parseStruct(lexer, scope, kind); + break; + case TOKEN_TYPE: + parseType(lexer, scope, kind); + break; + case TOKEN_IMPORT: + parseImport(lexer, scope, TOKEN_IMPORT, kind); + break; + case TOKEN_USING: + parseImport(lexer, scope, TOKEN_USING, kind); + case TOKEN_IDENTIFIER: + if (lexer->first_token && lexer->cur_c == '.') + { + if (local_scope == NULL) + { + local_scope = vStringNew(); + } + vStringCopy(local_scope, lexer->token_str); + advanceChar(lexer); + // next token, but keep the first_token value + advanceToken(lexer, true, true); + skipWhitespace(lexer, false); + if (lexer->cur_c == '(') + { + parseShortFunction(lexer, local_scope, K_MODULE); + } + } + else + { + skipWhitespace(lexer, false); + if (lexer->first_token && (lexer->cur_c == '(' || lexer->cur_c == '{')) + { + parseShortFunction(lexer, scope, kind); + } + else + { + advanceToken(lexer, true, false); + } + } + break; + case TOKEN_OPEN_BLOCK: + level++; + advanceToken(lexer, true, false); + break; + case TOKEN_CLOSE_BLOCK: + level--; + advanceToken(lexer, true, false); + break; + default: + advanceToken(lexer, true, false); + break; + } + resetScope(scope, old_scope_len); + if (delim && level <= 0) + { + break; + } + } + vStringDelete(local_scope); +} + +static void findJuliaTags (void) +{ + lexerState lexer; + vString* scope = vStringNew(); + initLexer(&lexer); + + parseExpr(&lexer, false, K_NONE, scope); + vStringDelete(scope); + + deInitLexer(&lexer); +} + +extern parserDefinition* JuliaParser (void) +{ + static const char *const extensions [] = { "jl", NULL }; + parserDefinition* def = parserNew ("Julia"); + def->kindTable = JuliaKinds; + def->kindCount = ARRAY_SIZE (JuliaKinds); + def->extensions = extensions; + def->parser = findJuliaTags; + def->keywordTable = JuliaKeywordTable; + def->keywordCount = ARRAY_SIZE (JuliaKeywordTable); + return def; +} diff --git a/data/Makefile.am b/data/Makefile.am index ca12354ea4..8851afde7d 100644 --- a/data/Makefile.am +++ b/data/Makefile.am @@ -44,6 +44,7 @@ filetypes_dist = \ filedefs/filetypes.html \ filedefs/filetypes.java \ filedefs/filetypes.javascript \ + filedefs/filetypes.julia \ filedefs/filetypes.JSON.conf \ filedefs/filetypes.latex \ filedefs/filetypes.lisp \ diff --git a/data/filedefs/filetypes.common b/data/filedefs/filetypes.common index f0992e326e..a749bb2da1 100644 --- a/data/filedefs/filetypes.common +++ b/data/filedefs/filetypes.common @@ -132,11 +132,13 @@ comment_doc_keyword_error=comment_doc,italic number=0x007f00 number_1=number number_2=number_1 +number_3=0x808000 type=0x0000d0;;true;false class=type function=0x000080 parameter=function +annotation=0x8080ff;;true;false keyword=0x00007f;;true;false keyword_1=keyword @@ -163,6 +165,7 @@ preprocessor=0x007f7f regex=number_1 operator=0x301010 decorator=string_1,bold +macro=preprocessor,bold other=0x404080 tag=type diff --git a/data/filedefs/filetypes.julia b/data/filedefs/filetypes.julia new file mode 100644 index 0000000000..c834c80649 --- /dev/null +++ b/data/filedefs/filetypes.julia @@ -0,0 +1,81 @@ +# filetypes.julia +# +# For complete documentation of this file, please see Geany's main documentation +# +# Keywords from pygment lexer (http://pygments.org/) +# and from vim parser (https://github.com/JuliaEditorSupport/julia-vim/) +# + +[styling] +default=default +comment=comment +number=number_3 +keyword1=keyword_1 +keyword2=keyword_2 +keyword3=number_3 +keyword4=identifier_1 +char=string_1 +operator=operator +bracket=operator +identifier=identifier_1 +string=string_2 +symbol=string_1 +macro=macro +stringinterp=default +docstring=string_2 +stringliteral=string_2,bold +command=default,italic +commandliteral=default,italic,bold +typeoperator=annotation +typeannotation=keyword_2 +lexerror=error + +[keywords] +# all items must be in one line +# primary should contain at least the reserved keyword (for, if, begin, end, ...) +primary=baremodule begin break catch const continue do else elseif end export finally for function global if import let local macro module quote return struct try using while abstract mutable primitive type where in isa as +secondary=Main Base Core Any AbstractArray AbstractRange LinRange OrdinalRange AbstractUnitRange UnitRange StepRange StepRangeLen BitArray CartesianIndices DenseArray Array LinearIndices PermutedDimsArray SubArray AbstractChannel Channel AbstractChar Char AbstractDict Dict IdDict WeakKeyDict AbstractDisplay TextDisplay AbstractSet BitSet Set AbstractString String SubString SubstitutionString Cstring Cwstring Enum Exception ArgumentError AssertionError BoundsError CapturedException CompositeException DimensionMismatch DivideError DomainError EOFError ErrorException InexactError InterruptException InvalidStateException KeyError MethodError MissingException OutOfMemoryError OverflowError ProcessFailedException ReadOnlyMemoryError SegmentationFault StackOverflowError StringIndexError SystemError TaskFailedException TypeError UndefKeywordError UndefRefError UndefVarError ExponentialBackOff Expr GlobalRef HTML IO IOStream IndexStyle IndexCartesian IndexLinear LineNumberNode MIME Method MethodSummary Missing Module NamedTuple Nothing Number Complex Real AbstractFloat BigFloat Float16 Float32 Float64 AbstractIrrational Irrational Integer Bool Signed BigInt Int Int128 Int16 Int32 Int64 Int8 Unsigned UInt UInt128 UInt16 UInt32 UInt64 UInt8 Rational Pair QuoteNode RawFD Ref Ptr Regex RegexMatch RoundingMode Some Symbol Task Text Timer Tuple Type DataType Union UnionAll TypeVar UndefInitializer Val Vararg VecElement VersionNumber WeakRef AbstractVector DenseVector StridedVector AbstractMatrix DenseMatrix StridedMatrix AbstractVecOrMat DenseVecOrMat StridedVecOrMat +tertiary=true false missing Inf NaN pi stdin stdout stderr devnull nothing undef ARGS ENV ENDIAN_BOM LOAD_PATH VERSION PROGRAM_FILE DEPOT_PATH +functions= + +[lexer_properties] +# Fold multiline triple-doublequote strings, usually used to document a function or type above the definition. +fold.julia.docstring=1 + +# Set this property to 0 to disable syntax based folding. +fold.julia.syntax.based=1 + +# This option enables highlighting of the type identifier after `::`. +lexer.julia.highlight.typeannotation=0 + +# This option enables highlighting of syntax error int character or number definition. +lexer.julia.highlight.lexerror=0 + +[settings] +lexer_filetype=Julia +tag_parser=Julia + +# default extension used when saving files +extension=jl + +# MIME type +mime_type=text/x-julia + +# single comments, like # in this file +comment_single=# +# multiline comments +comment_open=#= +comment_close==# + +[indentation] +width=4 +# 0 is spaces, 1 is tabs, 2 is tab & spaces +type=0 + + +[build-menu] +# %f will be replaced by the complete filename +# %e will be replaced by the filename without extension +# (use only one of it at one time) +compiler= +run_cmd=julia "%f" diff --git a/data/filetype_extensions.conf b/data/filetype_extensions.conf index 965e5c10a8..50fbff4c4d 100644 --- a/data/filetype_extensions.conf +++ b/data/filetype_extensions.conf @@ -43,6 +43,7 @@ HTML=*.htm;*.html;*.shtml;*.hta;*.htd;*.htt;*.cfm;*.tpl; Java=*.java;*.jsp; Javascript=*.js; JSON=*.json; +Julia=*.jl; Kotlin=*.kt;*.kts; LaTeX=*.tex;*.sty;*.idx;*.ltx;*.latex;*.aux; Lisp=*.lisp; diff --git a/scintilla/Makefile.am b/scintilla/Makefile.am index be8ed0ad83..e59703f35e 100644 --- a/scintilla/Makefile.am +++ b/scintilla/Makefile.am @@ -25,6 +25,7 @@ lexers/LexForth.cxx \ lexers/LexFortran.cxx \ lexers/LexHTML.cxx \ lexers/LexHaskell.cxx \ +lexers/LexJulia.cxx \ lexers/LexLaTeX.cxx \ lexers/LexLisp.cxx \ lexers/LexLua.cxx \ diff --git a/scintilla/include/SciLexer.h b/scintilla/include/SciLexer.h index e58f6184cf..3086688319 100644 --- a/scintilla/include/SciLexer.h +++ b/scintilla/include/SciLexer.h @@ -144,6 +144,7 @@ #define SCLEX_DATAFLEX 129 #define SCLEX_HOLLYWOOD 130 #define SCLEX_RAKU 131 +#define SCLEX_JULIA 133 #define SCLEX_LPEG 999 #define SCLEX_AUTOMATIC 1000 #define SCE_P_DEFAULT 0 @@ -932,6 +933,28 @@ #define SCE_ERLANG_MODULES 23 #define SCE_ERLANG_MODULES_ATT 24 #define SCE_ERLANG_UNKNOWN 31 +#define SCE_JULIA_DEFAULT 0 +#define SCE_JULIA_COMMENT 1 +#define SCE_JULIA_NUMBER 2 +#define SCE_JULIA_KEYWORD1 3 +#define SCE_JULIA_KEYWORD2 4 +#define SCE_JULIA_KEYWORD3 5 +#define SCE_JULIA_CHAR 6 +#define SCE_JULIA_OPERATOR 7 +#define SCE_JULIA_BRACKET 8 +#define SCE_JULIA_IDENTIFIER 9 +#define SCE_JULIA_STRING 10 +#define SCE_JULIA_SYMBOL 11 +#define SCE_JULIA_MACRO 12 +#define SCE_JULIA_STRINGINTERP 13 +#define SCE_JULIA_DOCSTRING 14 +#define SCE_JULIA_STRINGLITERAL 15 +#define SCE_JULIA_COMMAND 16 +#define SCE_JULIA_COMMANDLITERAL 17 +#define SCE_JULIA_TYPEANNOT 18 +#define SCE_JULIA_LEXERROR 19 +#define SCE_JULIA_KEYWORD4 20 +#define SCE_JULIA_TYPEOPERATOR 21 #define SCE_MSSQL_DEFAULT 0 #define SCE_MSSQL_COMMENT 1 #define SCE_MSSQL_LINE_COMMENT 2 diff --git a/scintilla/include/Scintilla.iface b/scintilla/include/Scintilla.iface index 7d32ed46df..4b40127baa 100644 --- a/scintilla/include/Scintilla.iface +++ b/scintilla/include/Scintilla.iface @@ -3255,6 +3255,7 @@ val SCLEX_X12=128 val SCLEX_DATAFLEX=129 val SCLEX_HOLLYWOOD=130 val SCLEX_RAKU=131 +val SCLEX_JULIA=133 val SCLEX_LPEG=999 # When a lexer specifies its language as SCLEX_AUTOMATIC it receives a @@ -4162,6 +4163,30 @@ val SCE_ERLANG_MODULES_ATT=24 val SCE_ERLANG_UNKNOWN=31 # Lexical states for SCLEX_OCTAVE are identical to MatLab lex Octave=SCLEX_OCTAVE SCE_MATLAB_ +# Lexical states for SCLEX_JULIA +lex Julia=SCLEX_JULIA SCE_JULIA_ +val SCE_JULIA_DEFAULT=0 +val SCE_JULIA_COMMENT=1 +val SCE_JULIA_NUMBER=2 +val SCE_JULIA_KEYWORD1=3 +val SCE_JULIA_KEYWORD2=4 +val SCE_JULIA_KEYWORD3=5 +val SCE_JULIA_CHAR=6 +val SCE_JULIA_OPERATOR=7 +val SCE_JULIA_BRACKET=8 +val SCE_JULIA_IDENTIFIER=9 +val SCE_JULIA_STRING=10 +val SCE_JULIA_SYMBOL=11 +val SCE_JULIA_MACRO=12 +val SCE_JULIA_STRINGINTERP=13 +val SCE_JULIA_DOCSTRING=14 +val SCE_JULIA_STRINGLITERAL=15 +val SCE_JULIA_COMMAND=16 +val SCE_JULIA_COMMANDLITERAL=17 +val SCE_JULIA_TYPEANNOT=18 +val SCE_JULIA_LEXERROR=19 +val SCE_JULIA_KEYWORD4=20 +val SCE_JULIA_TYPEOPERATOR=21 # Lexical states for SCLEX_MSSQL lex MSSQL=SCLEX_MSSQL SCE_MSSQL_ val SCE_MSSQL_DEFAULT=0 diff --git a/scintilla/julia_lexilla_v5.patch b/scintilla/julia_lexilla_v5.patch new file mode 100644 index 0000000000..0b96180e0f --- /dev/null +++ b/scintilla/julia_lexilla_v5.patch @@ -0,0 +1,36 @@ +diff --git a/lexilla/lexers/LexJulia.cxx b/geany/scintilla/lexers/LexJulia.cxx +index 6730074..ccf947d 100644 +--- a/lexilla/lexers/LexJulia.cxx ++++ b/geany/scintilla/lexers/LexJulia.cxx +@@ -39,7 +39,8 @@ + #include "DefaultLexer.h" + + using namespace Scintilla; +-using namespace Lexilla; ++// Geany still uses Scintilla v3.5 ++//using namespace Lexilla; + + static const int MAX_JULIA_IDENT_CHARS = 1023; + +@@ -138,7 +139,9 @@ public: + delete this; + } + int SCI_METHOD Version() const override { +- return lvRelease5; ++ // Geany still uses Scintilla v3.5 ++ //return lvRelease5; ++ return lvIdentity; + } + const char * SCI_METHOD PropertyNames() override { + return osJulia.PropertyNames(); +@@ -163,7 +166,9 @@ public: + return 0; + } + +- static ILexer5 *LexerFactoryJulia() { ++ // Geany still uses Scintilla v3.5 ++ //static ILexer5 *LexerFactoryJulia() { ++ static ILexer *LexerFactoryJulia() { + return new LexerJulia(); + } + }; diff --git a/scintilla/lexers/LexJulia.cxx b/scintilla/lexers/LexJulia.cxx new file mode 100644 index 0000000000..ccf947dc4f --- /dev/null +++ b/scintilla/lexers/LexJulia.cxx @@ -0,0 +1,1269 @@ +// Scintilla source code edit control +// Encoding: UTF-8 +/** @file LexJulia.cxx + ** Lexer for Julia. + ** Reusing code from LexMatlab, LexPython and LexRust + ** + ** Written by Bertrand Lacoste + ** + **/ +// Copyright 1998-2001 by Neil Hodgson +// The License.txt file describes the conditions under which this software may be distributed. + +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#include "ILexer.h" +#include "Scintilla.h" +#include "SciLexer.h" + +#include "StringCopy.h" +#include "PropSetSimple.h" +#include "StringCopy.h" +#include "WordList.h" +#include "LexAccessor.h" +#include "Accessor.h" +#include "StyleContext.h" +#include "CharacterSet.h" +#include "CharacterCategory.h" +#include "LexerModule.h" +#include "OptionSet.h" +#include "DefaultLexer.h" + +using namespace Scintilla; +// Geany still uses Scintilla v3.5 +//using namespace Lexilla; + +static const int MAX_JULIA_IDENT_CHARS = 1023; + +// Options used for LexerJulia +struct OptionsJulia { + bool fold; + bool foldComment; + bool foldCompact; + bool foldDocstring; + bool foldSyntaxBased; + bool highlightTypeannotation; + bool highlightLexerror; + OptionsJulia() { + fold = true; + foldComment = true; + foldCompact = false; + foldDocstring = true; + foldSyntaxBased = true; + highlightTypeannotation = false; + highlightLexerror = false; + } +}; + +const char * const juliaWordLists[] = { + "Primary keywords and identifiers", + "Built in types", + "Other keywords", + "Built in functions", + 0, +}; + +struct OptionSetJulia : public OptionSet { + OptionSetJulia() { + DefineProperty("fold", &OptionsJulia::fold); + + DefineProperty("fold.compact", &OptionsJulia::foldCompact); + + DefineProperty("fold.comment", &OptionsJulia::foldComment); + + DefineProperty("fold.julia.docstring", &OptionsJulia::foldDocstring, + "Fold multiline triple-doublequote strings, usually used to document a function or type above the definition."); + + DefineProperty("fold.julia.syntax.based", &OptionsJulia::foldSyntaxBased, + "Set this property to 0 to disable syntax based folding."); + + DefineProperty("lexer.julia.highlight.typeannotation", &OptionsJulia::highlightTypeannotation, + "This option enables highlighting of the type identifier after `::`."); + + DefineProperty("lexer.julia.highlight.lexerror", &OptionsJulia::highlightLexerror, + "This option enables highlighting of syntax error int character or number definition."); + + DefineWordListSets(juliaWordLists); + } +}; + +LexicalClass juliaLexicalClasses[] = { + // Lexer Julia SCLEX_JULIA SCE_JULIA_: + 0, "SCE_JULIA_DEFAULT", "default", "White space", + 1, "SCE_JULIA_COMMENT", "comment", "Comment", + 2, "SCE_JULIA_NUMBER", "literal numeric", "Number", + 3, "SCE_JULIA_KEYWORD1", "keyword", "Reserved keywords", + 4, "SCE_JULIA_KEYWORD2", "identifier", "Builtin type names", + 5, "SCE_JULIA_KEYWORD3", "identifier", "Constants", + 6, "SCE_JULIA_CHAR", "literal string character", "Single quoted string", + 7, "SCE_JULIA_OPERATOR", "operator", "Operator", + 8, "SCE_JULIA_BRACKET", "bracket operator", "Bracket operator", + 9, "SCE_JULIA_IDENTIFIER", "identifier", "Identifier", + 10, "SCE_JULIA_STRING", "literal string", "Double quoted String", + 11, "SCE_JULIA_SYMBOL", "literal string symbol", "Symbol", + 12, "SCE_JULIA_MACRO", "macro preprocessor", "Macro", + 13, "SCE_JULIA_STRINGINTERP", "literal string interpolated", "String interpolation", + 14, "SCE_JULIA_DOCSTRING", "literal string documentation", "Docstring", + 15, "SCE_JULIA_STRINGLITERAL", "literal string", "String literal prefix", + 16, "SCE_JULIA_COMMAND", "literal string command", "Command", + 17, "SCE_JULIA_COMMANDLITERAL", "literal string command", "Command literal prefix", + 18, "SCE_JULIA_TYPEANNOT", "identifier type", "Type annotation identifier", + 19, "SCE_JULIA_LEXERROR", "lexer error", "Lexing error", + 20, "SCE_JULIA_KEYWORD4", "identifier", "Builtin function names", + 21, "SCE_JULIA_TYPEOPERATOR", "operator type", "Type annotation operator", +}; + +class LexerJulia : public DefaultLexer { + WordList keywords; + WordList identifiers2; + WordList identifiers3; + WordList identifiers4; + OptionsJulia options; + OptionSetJulia osJulia; +public: + explicit LexerJulia() : + DefaultLexer("julia", SCLEX_JULIA, juliaLexicalClasses, ELEMENTS(juliaLexicalClasses)) { + } + virtual ~LexerJulia() { + } + void SCI_METHOD Release() override { + delete this; + } + int SCI_METHOD Version() const override { + // Geany still uses Scintilla v3.5 + //return lvRelease5; + return lvIdentity; + } + const char * SCI_METHOD PropertyNames() override { + return osJulia.PropertyNames(); + } + int SCI_METHOD PropertyType(const char *name) override { + return osJulia.PropertyType(name); + } + const char * SCI_METHOD DescribeProperty(const char *name) override { + return osJulia.DescribeProperty(name); + } + Sci_Position SCI_METHOD PropertySet(const char *key, const char *val) override; + const char * SCI_METHOD PropertyGet(const char *key) override { + return osJulia.PropertyGet(key); + } + const char * SCI_METHOD DescribeWordListSets() override { + return osJulia.DescribeWordListSets(); + } + Sci_Position SCI_METHOD WordListSet(int n, const char *wl) override; + void SCI_METHOD Lex(Sci_PositionU startPos, Sci_Position length, int initStyle, IDocument *pAccess) override; + void SCI_METHOD Fold(Sci_PositionU startPos, Sci_Position length, int initStyle, IDocument *pAccess) override; + void * SCI_METHOD PrivateCall(int, void *) override { + return 0; + } + + // Geany still uses Scintilla v3.5 + //static ILexer5 *LexerFactoryJulia() { + static ILexer *LexerFactoryJulia() { + return new LexerJulia(); + } +}; + +Sci_Position SCI_METHOD LexerJulia::PropertySet(const char *key, const char *val) { + if (osJulia.PropertySet(&options, key, val)) { + return 0; + } + return -1; +} + +Sci_Position SCI_METHOD LexerJulia::WordListSet(int n, const char *wl) { + WordList *wordListN = nullptr; + switch (n) { + case 0: + wordListN = &keywords; + break; + case 1: + wordListN = &identifiers2; + break; + case 2: + wordListN = &identifiers3; + break; + case 3: + wordListN = &identifiers4; + break; + } + Sci_Position firstModification = -1; + if (wordListN) { + WordList wlNew; + wlNew.Set(wl); + if (*wordListN != wlNew) { + wordListN->Set(wl); + firstModification = 0; + } + } + return firstModification; +} + +static inline bool IsJuliaOperator(int ch) { + if (ch == '%' || ch == '^' || ch == '&' || ch == '*' || + ch == '-' || ch == '+' || ch == '=' || ch == '|' || + ch == '<' || ch == '>' || ch == '/' || ch == '~' || + ch == '\\' ) { + return true; + } + return false; +} + +// The list contains non-ascii unary operators +static inline bool IsJuliaUnaryOperator (int ch) { + if (ch == 0x00ac || ch == 0x221a || ch == 0x221b || + ch == 0x221c || ch == 0x22c6 || ch == 0x00b1 || + ch == 0x2213 ) { + return true; + } + return false; +} + +static inline bool IsJuliaParen (int ch) { + if (ch == '(' || ch == ')' || ch == '{' || ch == '}' || + ch == '[' || ch == ']' ) { + return true; + } + return false; +} + +// Unicode parsing from Julia source code: +// https://github.com/JuliaLang/julia/blob/master/src/flisp/julia_extensions.c +// keep the same function name to be easy to find again +static int is_wc_cat_id_start(uint32_t wc) { + const CharacterCategory cat = CategoriseCharacter((int) wc); + + return (cat == ccLu || cat == ccLl || + cat == ccLt || cat == ccLm || + cat == ccLo || cat == ccNl || + cat == ccSc || // allow currency symbols + // other symbols, but not arrows or replacement characters + (cat == ccSo && !(wc >= 0x2190 && wc <= 0x21FF) && + wc != 0xfffc && wc != 0xfffd && + wc != 0x233f && // notslash + wc != 0x00a6) || // broken bar + + // math symbol (category Sm) whitelist + (wc >= 0x2140 && wc <= 0x2a1c && + ((wc >= 0x2140 && wc <= 0x2144) || // ⅀, ⅁, ⅂, ⅃, ⅄ + wc == 0x223f || wc == 0x22be || wc == 0x22bf || // ∿, ⊾, ⊿ + wc == 0x22a4 || wc == 0x22a5 || // ⊤ ⊥ + + (wc >= 0x2202 && wc <= 0x2233 && + (wc == 0x2202 || wc == 0x2205 || wc == 0x2206 || // ∂, ∅, ∆ + wc == 0x2207 || wc == 0x220e || wc == 0x220f || // ∇, ∎, ∏ + wc == 0x2210 || wc == 0x2211 || // ∐, ∑ + wc == 0x221e || wc == 0x221f || // ∞, ∟ + wc >= 0x222b)) || // ∫, ∬, ∭, ∮, ∯, ∰, ∱, ∲, ∳ + + (wc >= 0x22c0 && wc <= 0x22c3) || // N-ary big ops: ⋀, ⋁, ⋂, ⋃ + (wc >= 0x25F8 && wc <= 0x25ff) || // ◸, ◹, ◺, ◻, ◼, ◽, ◾, ◿ + + (wc >= 0x266f && + (wc == 0x266f || wc == 0x27d8 || wc == 0x27d9 || // ♯, ⟘, ⟙ + (wc >= 0x27c0 && wc <= 0x27c1) || // ⟀, ⟁ + (wc >= 0x29b0 && wc <= 0x29b4) || // ⦰, ⦱, ⦲, ⦳, ⦴ + (wc >= 0x2a00 && wc <= 0x2a06) || // ⨀, ⨁, ⨂, ⨃, ⨄, ⨅, ⨆ + (wc >= 0x2a09 && wc <= 0x2a16) || // ⨉, ⨊, ⨋, ⨌, ⨍, ⨎, ⨏, ⨐, ⨑, ⨒, ⨓, ⨔, ⨕, ⨖ + wc == 0x2a1b || wc == 0x2a1c)))) || // ⨛, ⨜ + + (wc >= 0x1d6c1 && // variants of \nabla and \partial + (wc == 0x1d6c1 || wc == 0x1d6db || + wc == 0x1d6fb || wc == 0x1d715 || + wc == 0x1d735 || wc == 0x1d74f || + wc == 0x1d76f || wc == 0x1d789 || + wc == 0x1d7a9 || wc == 0x1d7c3)) || + + // super- and subscript +-=() + (wc >= 0x207a && wc <= 0x207e) || + (wc >= 0x208a && wc <= 0x208e) || + + // angle symbols + (wc >= 0x2220 && wc <= 0x2222) || // ∠, ∡, ∢ + (wc >= 0x299b && wc <= 0x29af) || // ⦛, ⦜, ⦝, ⦞, ⦟, ⦠, ⦡, ⦢, ⦣, ⦤, ⦥, ⦦, ⦧, ⦨, ⦩, ⦪, ⦫, ⦬, ⦭, ⦮, ⦯ + + // Other_ID_Start + wc == 0x2118 || wc == 0x212E || // ℘, ℮ + (wc >= 0x309B && wc <= 0x309C) || // katakana-hiragana sound marks + + // bold-digits and double-struck digits + (wc >= 0x1D7CE && wc <= 0x1D7E1)); // 𝟎 through 𝟗 (inclusive), 𝟘 through 𝟡 (inclusive) +} + +static inline bool IsIdentifierFirstCharacter (int ch) { + if (IsASCII(ch)) { + return (bool) (isalpha(ch) || ch == '_'); + } + if (ch < 0xA1 || ch > 0x10ffff) { + return false; + } + + return is_wc_cat_id_start((uint32_t) ch); +} + +static inline bool IsIdentifierCharacter (int ch) { + if (IsASCII(ch)) { + return (bool) (isalnum(ch) || ch == '_' || ch == '!'); + } + if (ch < 0xA1 || ch > 0x10ffff) { + return false; + } + + if (is_wc_cat_id_start((uint32_t) ch)) { + return true; + } + + const CharacterCategory cat = CategoriseCharacter(ch); + + if (cat == ccMn || cat == ccMc || + cat == ccNd || cat == ccPc || + cat == ccSk || cat == ccMe || + cat == ccNo || + // primes (single, double, triple, their reverses, and quadruple) + (ch >= 0x2032 && ch <= 0x2037) || (ch == 0x2057)) { + return true; + } + return false; +} + +// keep the same function name to be easy to find again +static const uint32_t opsuffs[] = { + 0x00b2, // ² + 0x00b3, // ³ + 0x00b9, // ¹ + 0x02b0, // ʰ + 0x02b2, // ʲ + 0x02b3, // ʳ + 0x02b7, // ʷ + 0x02b8, // ʸ + 0x02e1, // ˡ + 0x02e2, // ˢ + 0x02e3, // ˣ + 0x1d2c, // ᴬ + 0x1d2e, // ᴮ + 0x1d30, // ᴰ + 0x1d31, // ᴱ + 0x1d33, // ᴳ + 0x1d34, // ᴴ + 0x1d35, // ᴵ + 0x1d36, // ᴶ + 0x1d37, // ᴷ + 0x1d38, // ᴸ + 0x1d39, // ᴹ + 0x1d3a, // ᴺ + 0x1d3c, // ᴼ + 0x1d3e, // ᴾ + 0x1d3f, // ᴿ + 0x1d40, // ᵀ + 0x1d41, // ᵁ + 0x1d42, // ᵂ + 0x1d43, // ᵃ + 0x1d47, // ᵇ + 0x1d48, // ᵈ + 0x1d49, // ᵉ + 0x1d4d, // ᵍ + 0x1d4f, // ᵏ + 0x1d50, // ᵐ + 0x1d52, // ᵒ + 0x1d56, // ᵖ + 0x1d57, // ᵗ + 0x1d58, // ᵘ + 0x1d5b, // ᵛ + 0x1d5d, // ᵝ + 0x1d5e, // ᵞ + 0x1d5f, // ᵟ + 0x1d60, // ᵠ + 0x1d61, // ᵡ + 0x1d62, // ᵢ + 0x1d63, // ᵣ + 0x1d64, // ᵤ + 0x1d65, // ᵥ + 0x1d66, // ᵦ + 0x1d67, // ᵧ + 0x1d68, // ᵨ + 0x1d69, // ᵩ + 0x1d6a, // ᵪ + 0x1d9c, // ᶜ + 0x1da0, // ᶠ + 0x1da5, // ᶥ + 0x1da6, // ᶦ + 0x1dab, // ᶫ + 0x1db0, // ᶰ + 0x1db8, // ᶸ + 0x1dbb, // ᶻ + 0x1dbf, // ᶿ + 0x2032, // ′ + 0x2033, // ″ + 0x2034, // ‴ + 0x2035, // ‵ + 0x2036, // ‶ + 0x2037, // ‷ + 0x2057, // ⁗ + 0x2070, // ⁰ + 0x2071, // ⁱ + 0x2074, // ⁴ + 0x2075, // ⁵ + 0x2076, // ⁶ + 0x2077, // ⁷ + 0x2078, // ⁸ + 0x2079, // ⁹ + 0x207a, // ⁺ + 0x207b, // ⁻ + 0x207c, // ⁼ + 0x207d, // ⁽ + 0x207e, // ⁾ + 0x207f, // ⁿ + 0x2080, // ₀ + 0x2081, // ₁ + 0x2082, // ₂ + 0x2083, // ₃ + 0x2084, // ₄ + 0x2085, // ₅ + 0x2086, // ₆ + 0x2087, // ₇ + 0x2088, // ₈ + 0x2089, // ₉ + 0x208a, // ₊ + 0x208b, // ₋ + 0x208c, // ₌ + 0x208d, // ₍ + 0x208e, // ₎ + 0x2090, // ₐ + 0x2091, // ₑ + 0x2092, // ₒ + 0x2093, // ₓ + 0x2095, // ₕ + 0x2096, // ₖ + 0x2097, // ₗ + 0x2098, // ₘ + 0x2099, // ₙ + 0x209a, // ₚ + 0x209b, // ₛ + 0x209c, // ₜ + 0x2c7c, // ⱼ + 0x2c7d, // ⱽ + 0xa71b, // ꜛ + 0xa71c, // ꜜ + 0xa71d // ꜝ +}; +static const size_t opsuffs_len = sizeof(opsuffs) / (sizeof(uint32_t)); + +// keep the same function name to be easy to find again +static bool jl_op_suffix_char(uint32_t wc) { + if (wc < 0xA1 || wc > 0x10ffff) { + return false; + } + const CharacterCategory cat = CategoriseCharacter((int) wc); + if (cat == ccMn || cat == ccMc || + cat == ccMe) { + return true; + } + + for (size_t i = 0; i < opsuffs_len; ++i) { + if (wc == opsuffs[i]) { + return true; + } + } + return false; +} + +// keep the same function name to be easy to find again +static bool never_id_char(uint32_t wc) { + const CharacterCategory cat = CategoriseCharacter((int) wc); + return ( + // spaces and control characters: + (cat >= ccZs && cat <= ccCs) || + + // ASCII and Latin1 non-connector punctuation + (wc < 0xff && + cat >= ccPd && cat <= ccPo) || + + wc == '`' || + + // mathematical brackets + (wc >= 0x27e6 && wc <= 0x27ef) || + // angle, corner, and lenticular brackets + (wc >= 0x3008 && wc <= 0x3011) || + // tortoise shell, square, and more lenticular brackets + (wc >= 0x3014 && wc <= 0x301b) || + // fullwidth parens + (wc == 0xff08 || wc == 0xff09) || + // fullwidth square brackets + (wc == 0xff3b || wc == 0xff3d)); +} + + +static bool IsOperatorFirstCharacter (int ch) { + if (IsASCII(ch)) { + if (IsJuliaOperator(ch) || + ch == '!' || ch == '?' || + ch == ':' || ch == ';' || + ch == ',' || ch == '.' ) { + return true; + }else { + return false; + } + } else if (is_wc_cat_id_start((uint32_t) ch)) { + return false; + } else if (IsJuliaUnaryOperator(ch) || + ! never_id_char((uint32_t) ch)) { + return true; + } + return false; +} + +static bool IsOperatorCharacter (int ch) { + if (IsOperatorFirstCharacter(ch) || + (!IsASCII(ch) && jl_op_suffix_char((uint32_t) ch)) ) { + return true; + } + return false; +} + +static bool CheckBoundsIndexing(char *str) { + if (strcmp("begin", str) == 0 || strcmp("end", str) == 0 ) { + return true; + } + return false; +} + +static int CheckKeywordFoldPoint(char *str) { + if (strcmp ("if", str) == 0 || + strcmp ("for", str) == 0 || + strcmp ("while", str) == 0 || + strcmp ("try", str) == 0 || + strcmp ("do", str) == 0 || + strcmp ("begin", str) == 0 || + strcmp ("let", str) == 0 || + strcmp ("baremodule", str) == 0 || + strcmp ("quote", str) == 0 || + strcmp ("module", str) == 0 || + strcmp ("struct", str) == 0 || + strcmp ("type", str) == 0 || + strcmp ("macro", str) == 0 || + strcmp ("function", str) == 0) { + return 1; + } + if (strcmp("end", str) == 0) { + return -1; + } + return 0; +} + +static bool IsNumberExpon(int ch, int base) { + if ((base == 10 && (ch == 'e' || ch == 'E' || ch == 'f')) || + (base == 16 && (ch == 'p' || ch == 'P'))) { + return true; + } + return false; +} + +/* Scans a sequence of digits, returning true if it found any. */ +static bool ScanDigits(StyleContext& sc, int base, bool allow_sep) { + bool found = false; + for (;;) { + if (IsADigit(sc.chNext, base) || (allow_sep && sc.chNext == '_')) { + found = true; + sc.Forward(); + } else { + break; + } + } + return found; +} + +static inline bool ScanNHexas(StyleContext &sc, int max) { + int n = 0; + bool error = false; + + sc.Forward(); + if (!IsADigit(sc.ch, 16)) { + error = true; + } else { + while (IsADigit(sc.ch, 16) && n < max) { + sc.Forward(); + n++; + } + } + return error; +} + +static void resumeCharacter(StyleContext &sc, bool lexerror) { + bool error = false; + + // ''' case + if (sc.chPrev == '\'' && sc.ch == '\'' && sc.chNext == '\'') { + sc.Forward(); + sc.ForwardSetState(SCE_JULIA_DEFAULT); + return; + } else if (lexerror && sc.chPrev == '\'' && sc.ch == '\'') { + sc.ChangeState(SCE_JULIA_LEXERROR); + sc.ForwardSetState(SCE_JULIA_DEFAULT); + + // Escape characters + } else if (sc.ch == '\\') { + sc.Forward(); + if (sc.ch == '\'' || sc.ch == '\\' ) { + sc.Forward(); + } else if (sc.ch == 'n' || sc.ch == 't' || sc.ch == 'a' || + sc.ch == 'b' || sc.ch == 'e' || sc.ch == 'f' || + sc.ch == 'r' || sc.ch == 'v' ) { + sc.Forward(); + } else if (sc.ch == 'x') { + error |= ScanNHexas(sc, 2); + } else if (sc.ch == 'u') { + error |= ScanNHexas(sc, 4); + } else if (sc.ch == 'U') { + error |= ScanNHexas(sc, 8); + } else if (IsADigit(sc.ch, 8)) { + int n = 1; + int max = 3; + sc.Forward(); + while (IsADigit(sc.ch, 8) && n < max) { + sc.Forward(); + n++; + } + } + + if (lexerror) { + if (sc.ch != '\'') { + error = true; + while (sc.ch != '\'' && + sc.ch != '\r' && + sc.ch != '\n') { + sc.Forward(); + } + } + + if (error) { + sc.ChangeState(SCE_JULIA_LEXERROR); + sc.ForwardSetState(SCE_JULIA_DEFAULT); + } + } + } else if (lexerror) { + if (sc.ch < 0x20 || sc.ch > 0x10ffff) { + error = true; + } else { + // single character + sc.Forward(); + + if (sc.ch != '\'') { + error = true; + while (sc.ch != '\'' && + sc.ch != '\r' && + sc.ch != '\n') { + sc.Forward(); + } + } + } + + if (error) { + sc.ChangeState(SCE_JULIA_LEXERROR); + sc.ForwardSetState(SCE_JULIA_DEFAULT); + } + } + + // closing quote + if (sc.ch == '\'') { + if (sc.chNext == '\'') { + sc.Forward(); + } else { + sc.ForwardSetState(SCE_JULIA_DEFAULT); + } + } +} + +static inline bool IsACharacter(StyleContext &sc) { + return (sc.chPrev == '\'' && sc.chNext == '\''); +} + +static void ScanParenInterpolation(StyleContext &sc) { + // TODO: no syntax highlighting inside a string interpolation + + // Level of nested parenthesis + int interp_level = 0; + + // If true, it is inside a string and parenthesis are not counted. + bool allow_paren_string = false; + + + // check for end of states + for (; sc.More(); sc.Forward()) { + // TODO: check corner cases for nested string interpolation + // TODO: check corner cases with Command inside interpolation + + if ( sc.ch == '\"' && sc.chPrev != '\\') { + // Toggle the string environment (parenthesis are not counted inside a string) + allow_paren_string = !allow_paren_string; + } else if ( !allow_paren_string ) { + if ( sc.ch == '(' && !IsACharacter(sc) ) { + interp_level ++; + } else if ( sc.ch == ')' && !IsACharacter(sc) && interp_level > 0 ) { + interp_level --; + if (interp_level == 0) { + // Exit interpolation + return; + } + } + } + } +} +/* + * Start parsing a number, parse the base. + */ +static void initNumber (StyleContext &sc, int &base, bool &with_dot) { + base = 10; + with_dot = false; + sc.SetState(SCE_JULIA_NUMBER); + if (sc.ch == '0') { + if (sc.chNext == 'x') { + sc.Forward(); + base = 16; + if (sc.chNext == '.') { + sc.Forward(); + with_dot = true; + } + } else if (sc.chNext == 'o') { + sc.Forward(); + base = 8; + } else if (sc.chNext == 'b') { + sc.Forward(); + base = 2; + } + } else if (sc.ch == '.') { + with_dot = true; + } +} + +/* + * Resume parsing a String or Command, bounded by the `quote` character (\" or \`) + * The `triple` argument specifies if it is a triple-quote String or Command. + * Interpolation is detected (with `$`), and parsed if `allow_interp` is true. + */ +static void resumeStringLike(StyleContext &sc, int quote, bool triple, bool allow_interp, bool full_highlight) { + int stylePrev = sc.state; + bool checkcurrent = false; + + // Escape characters + if (sc.ch == '\\') { + if (sc.chNext == quote || sc.chNext == '\\' || sc.chNext == '$') { + sc.Forward(); + } + } else if (allow_interp && sc.ch == '$') { + // If the interpolation is only of a variable, do not change state + if (sc.chNext == '(') { + if (full_highlight) { + sc.SetState(SCE_JULIA_STRINGINTERP); + } else { + sc.ForwardSetState(SCE_JULIA_STRINGINTERP); + } + ScanParenInterpolation(sc); + sc.ForwardSetState(stylePrev); + + checkcurrent = true; + + } else if (full_highlight && IsIdentifierFirstCharacter(sc.chNext)) { + sc.SetState(SCE_JULIA_STRINGINTERP); + sc.Forward(); + sc.Forward(); + for (; sc.More(); sc.Forward()) { + if (! IsIdentifierCharacter(sc.ch)) { + break; + } + } + sc.SetState(stylePrev); + + checkcurrent = true; + } + + if (checkcurrent) { + // Check that the current character is not a special char, + // otherwise it will be skipped + resumeStringLike(sc, quote, triple, allow_interp, full_highlight); + } + + } else if (sc.ch == quote) { + if (triple) { + if (sc.chNext == quote && sc.GetRelativeCharacter(2) == quote) { + // Move to the end of the triple quotes + Sci_PositionU nextIndex = sc.currentPos + 2; + while (nextIndex > sc.currentPos && sc.More()) { + sc.Forward(); + } + sc.ForwardSetState(SCE_JULIA_DEFAULT); + } + } else { + sc.ForwardSetState(SCE_JULIA_DEFAULT); + } + } +} + +static void resumeCommand(StyleContext &sc, bool triple, bool allow_interp) { + return resumeStringLike(sc, '`', triple, allow_interp, true); +} + +static void resumeString(StyleContext &sc, bool triple, bool allow_interp) { + return resumeStringLike(sc, '"', triple, allow_interp, true); +} + +static void resumeNumber (StyleContext &sc, int base, bool &with_dot, bool lexerror) { + if (IsNumberExpon(sc.ch, base)) { + if (IsADigit(sc.chNext) || sc.chNext == '+' || sc.chNext == '-') { + sc.Forward(); + // Capture all digits + ScanDigits(sc, 10, false); + sc.Forward(); + } + sc.SetState(SCE_JULIA_DEFAULT); + } else if (sc.ch == '.' && sc.chNext == '.') { + // Interval operator `..` + sc.SetState(SCE_JULIA_OPERATOR); + sc.Forward(); + sc.ForwardSetState(SCE_JULIA_DEFAULT); + } else if (sc.ch == '.' && !with_dot) { + with_dot = true; + ScanDigits(sc, base, true); + } else if (IsADigit(sc.ch, base) || sc.ch == '_') { + ScanDigits(sc, base, true); + } else if (IsADigit(sc.ch) && !IsADigit(sc.ch, base)) { + if (lexerror) { + sc.ChangeState(SCE_JULIA_LEXERROR); + } + ScanDigits(sc, 10, false); + sc.ForwardSetState(SCE_JULIA_DEFAULT); + } else { + sc.SetState(SCE_JULIA_DEFAULT); + } +} + +static void resumeOperator (StyleContext &sc) { + if (sc.chNext == ':' && (sc.ch == ':' || sc.ch == '<' || + (sc.ch == '>' && (sc.chPrev != '-' && sc.chPrev != '=')))) { + // Case `:a=>:b` + sc.Forward(); + sc.ForwardSetState(SCE_JULIA_DEFAULT); + } else if (sc.ch == ':') { + // Case `foo(:baz,:baz)` or `:one+:two` + // Let the default case switch decide if it is a symbol + sc.SetState(SCE_JULIA_DEFAULT); + } else if (sc.ch == '\'') { + sc.SetState(SCE_JULIA_DEFAULT); + } else if ((sc.ch == '.' && sc.chPrev != '.') || IsIdentifierFirstCharacter(sc.ch) || + (! (sc.chPrev == '.' && IsOperatorFirstCharacter(sc.ch)) && + ! IsOperatorCharacter(sc.ch)) ) { + sc.SetState(SCE_JULIA_DEFAULT); + } +} + +void SCI_METHOD LexerJulia::Lex(Sci_PositionU startPos, Sci_Position length, int initStyle, IDocument *pAccess) { + PropSetSimple props; + Accessor styler(pAccess, &props); + + Sci_Position pos = startPos; + styler.StartAt(pos); + styler.StartSegment(pos); + + // use the line state of each line to store block/multiline states + Sci_Position curLine = styler.GetLine(startPos); + // Default is false for everything and 0 counters. + int lineState = (curLine > 0) ? styler.GetLineState(curLine-1) : 0; + + bool transpose = (lineState >> 0) & 0x01; // 1 bit to know if ' is allowed to mean transpose + bool istripledocstring = (lineState >> 1) & 0x01; // 1 bit to know if we are in a triple doublequotes string + bool triple_backtick = (lineState >> 2) & 0x01; // 1 bit to know if we are in a triple backtick command + bool israwstring = (lineState >> 3) & 0x01; // 1 bit to know if we are in a raw string + int indexing_level = (int)((lineState >> 4) & 0x0F); // 4 bits of bracket nesting counter + int list_comprehension = (int)((lineState >> 8) & 0x0F); // 4 bits of parenthesis nesting counter + int commentDepth = (int)((lineState >> 12) & 0x0F); // 4 bits of nested comment counter + + // base for parsing number + int base = 10; + // number has a float dot ? + bool with_dot = false; + + StyleContext sc(startPos, length, initStyle, styler); + + for (; sc.More(); sc.Forward()) { + + //// check for end of states + switch (sc.state) { + case SCE_JULIA_BRACKET: + sc.SetState(SCE_JULIA_DEFAULT); + break; + case SCE_JULIA_OPERATOR: + resumeOperator(sc); + break; + case SCE_JULIA_TYPEOPERATOR: + sc.SetState(SCE_JULIA_DEFAULT); + break; + case SCE_JULIA_TYPEANNOT: + if (! IsIdentifierCharacter(sc.ch)) { + sc.SetState(SCE_JULIA_DEFAULT); + } + break; + case SCE_JULIA_IDENTIFIER: + // String literal + if (sc.ch == '\"') { + // If the string literal has a prefix, interpolation is disabled + israwstring = true; + sc.ChangeState(SCE_JULIA_STRINGLITERAL); + sc.SetState(SCE_JULIA_DEFAULT); + + } else if (sc.ch == '`') { + // If the string literal has a prefix, interpolation is disabled + israwstring = true; + sc.ChangeState(SCE_JULIA_COMMANDLITERAL); + sc.SetState(SCE_JULIA_DEFAULT); + + // Continue if the character is an identifier character + } else if (! IsIdentifierCharacter(sc.ch)) { + char s[MAX_JULIA_IDENT_CHARS + 1]; + sc.GetCurrent(s, sizeof(s)); + + // Treat the keywords differently if we are indexing or not + if ( indexing_level > 0 && CheckBoundsIndexing(s)) { + // Inside [], (), `begin` and `end` are numbers not block keywords + sc.ChangeState(SCE_JULIA_NUMBER); + transpose = false; + + } else { + if (keywords.InList(s)) { + sc.ChangeState(SCE_JULIA_KEYWORD1); + transpose = false; + } else if (identifiers2.InList(s)) { + sc.ChangeState(SCE_JULIA_KEYWORD2); + transpose = false; + } else if (identifiers3.InList(s)) { + sc.ChangeState(SCE_JULIA_KEYWORD3); + transpose = false; + } else if (identifiers4.InList(s)) { + sc.ChangeState(SCE_JULIA_KEYWORD4); + // These identifiers can be used for variable names also, + // so transpose is not forbidden. + //transpose = false; + } + } + sc.SetState(SCE_JULIA_DEFAULT); + + // TODO: recognize begin-end blocks inside list comprehension + // b = [(begin n%2; n*2 end) for n in 1:10] + // TODO: recognize better comprehension for-if to avoid problem with code-folding + // c = [(if isempty(a); missing else first(b) end) for (a, b) in zip(l1, l2)] + } + break; + case SCE_JULIA_NUMBER: + resumeNumber(sc, base, with_dot, options.highlightLexerror); + break; + case SCE_JULIA_CHAR: + resumeCharacter(sc, options.highlightLexerror); + break; + case SCE_JULIA_DOCSTRING: + resumeString(sc, true, !israwstring); + if (sc.state == SCE_JULIA_DEFAULT && israwstring) { + israwstring = false; + } + break; + case SCE_JULIA_STRING: + resumeString(sc, false, !israwstring); + if (sc.state == SCE_JULIA_DEFAULT && israwstring) { + israwstring = false; + } + break; + case SCE_JULIA_COMMAND: + resumeCommand(sc, triple_backtick, !israwstring); + break; + case SCE_JULIA_MACRO: + if (IsASpace(sc.ch) || ! IsIdentifierCharacter(sc.ch)) { + sc.SetState(SCE_JULIA_DEFAULT); + } + break; + case SCE_JULIA_SYMBOL: + if (! IsIdentifierCharacter(sc.ch)) { + sc.SetState(SCE_JULIA_DEFAULT); + } + break; + case SCE_JULIA_COMMENT: + if( commentDepth > 0 ) { + // end or start of a nested a block comment + if ( sc.ch == '=' && sc.chNext == '#') { + commentDepth --; + sc.Forward(); + + if (commentDepth == 0) { + sc.ForwardSetState(SCE_JULIA_DEFAULT); + } + } else if( sc.ch == '#' && sc.chNext == '=') { + commentDepth ++; + sc.Forward(); + } + } else { + // single line comment + if (sc.atLineEnd || sc.ch == '\r' || sc.ch == '\n') { + sc.SetState(SCE_JULIA_DEFAULT); + transpose = false; + } + } + break; + } + + // check start of a new state + if (sc.state == SCE_JULIA_DEFAULT) { + if (sc.ch == '#') { + sc.SetState(SCE_JULIA_COMMENT); + // increment depth if we are a block comment + if(sc.chNext == '=') { + commentDepth ++; + sc.Forward(); + } + } else if (sc.ch == '!') { + sc.SetState(SCE_JULIA_OPERATOR); + } else if (sc.ch == '\'') { + if (transpose) { + sc.SetState(SCE_JULIA_OPERATOR); + } else { + sc.SetState(SCE_JULIA_CHAR); + } + } else if (sc.ch == '\"') { + istripledocstring = (sc.chNext == '\"' && sc.GetRelativeCharacter(2) == '\"'); + if (istripledocstring) { + sc.SetState(SCE_JULIA_DOCSTRING); + // Move to the end of the triple quotes + Sci_PositionU nextIndex = sc.currentPos + 2; + while (nextIndex > sc.currentPos && sc.More()) { + sc.Forward(); + } + } else { + sc.SetState(SCE_JULIA_STRING); + } + } else if (sc.ch == '`') { + triple_backtick = (sc.chNext == '`' && sc.GetRelativeCharacter(2) == '`'); + sc.SetState(SCE_JULIA_COMMAND); + if (triple_backtick) { + // Move to the end of the triple backticks + Sci_PositionU nextIndex = sc.currentPos + 2; + while (nextIndex > sc.currentPos && sc.More()) { + sc.Forward(); + } + } + } else if (IsADigit(sc.ch) || (sc.ch == '.' && IsADigit(sc.chNext))) { + initNumber(sc, base, with_dot); + } else if (IsIdentifierFirstCharacter(sc.ch)) { + sc.SetState(SCE_JULIA_IDENTIFIER); + transpose = true; + } else if (sc.ch == '@') { + sc.SetState(SCE_JULIA_MACRO); + transpose = false; + + // Several parsing of operators, should keep the order of `if` blocks + } else if ((sc.ch == ':' || sc.ch == '<' || sc.ch == '>') && sc.chNext == ':') { + sc.SetState(SCE_JULIA_TYPEOPERATOR); + sc.Forward(); + // Highlight the next identifier, if option is set + if (options.highlightTypeannotation && + IsIdentifierFirstCharacter(sc.chNext)) { + sc.ForwardSetState(SCE_JULIA_TYPEANNOT); + } + } else if (sc.ch == ':') { + // TODO: improve detection of range + // should be solved with begin-end parsing + // `push!(arr, s1 :s2)` and `a[begin :end] + if (IsIdentifierFirstCharacter(sc.chNext) && + ! IsIdentifierCharacter(sc.chPrev) && + sc.chPrev != ')' && sc.chPrev != ']' ) { + sc.SetState(SCE_JULIA_SYMBOL); + } else { + sc.SetState(SCE_JULIA_OPERATOR); + } + } else if (IsJuliaParen(sc.ch)) { + if (sc.ch == '[') { + list_comprehension ++; + indexing_level ++; + } else if (sc.ch == ']' && (indexing_level > 0)) { + list_comprehension --; + indexing_level --; + } else if (sc.ch == '(') { + list_comprehension ++; + } else if (sc.ch == ')' && (list_comprehension > 0)) { + list_comprehension --; + } + + if (sc.ch == ')' || sc.ch == ']' || sc.ch == '}') { + transpose = true; + } else { + transpose = false; + } + sc.SetState(SCE_JULIA_BRACKET); + } else if (IsOperatorFirstCharacter(sc.ch)) { + transpose = false; + sc.SetState(SCE_JULIA_OPERATOR); + } else { + transpose = false; + } + } + + // update the line information (used for line-by-line lexing and folding) + if (sc.atLineEnd) { + // set the line state to the current state + curLine = styler.GetLine(sc.currentPos); + + lineState = ((transpose ? 1 : 0) << 0) | + ((istripledocstring ? 1 : 0) << 1) | + ((triple_backtick ? 1 : 0) << 2) | + ((israwstring ? 1 : 0) << 3) | + ((indexing_level & 0x0F) << 4) | + ((list_comprehension & 0x0F) << 8) | + ((commentDepth & 0x0F) << 12); + styler.SetLineState(curLine, lineState); + } + } + sc.Complete(); +} + +void SCI_METHOD LexerJulia::Fold(Sci_PositionU startPos, Sci_Position length, int initStyle, IDocument *pAccess) { + + if (!options.fold) + return; + + LexAccessor styler(pAccess); + + Sci_PositionU endPos = startPos + length; + int visibleChars = 0; + Sci_Position lineCurrent = styler.GetLine(startPos); + int levelCurrent = SC_FOLDLEVELBASE; + int lineState = 0; + if (lineCurrent > 0) { + levelCurrent = styler.LevelAt(lineCurrent-1) >> 16; + lineState = styler.GetLineState(lineCurrent-1); + } + + // level of nested brackets + int indexing_level = (int)((lineState >> 4) & 0x0F); // 4 bits of bracket nesting counter + // level of nested parenthesis or brackets + int list_comprehension = (int)((lineState >> 8) & 0x0F); // 4 bits of parenthesis nesting counter + //int commentDepth = (int)((lineState >> 12) & 0x0F); // 4 bits of nested comment counter + + Sci_PositionU lineStartNext = styler.LineStart(lineCurrent+1); + int levelNext = levelCurrent; + char chNext = styler[startPos]; + int stylePrev = styler.StyleAt(startPos - 1); + int styleNext = styler.StyleAt(startPos); + int style = initStyle; + char word[100]; + int wordlen = 0; + for (Sci_PositionU i = startPos; i < endPos; i++) { + char ch = chNext; + chNext = styler.SafeGetCharAt(i + 1); + style = styleNext; + styleNext = styler.StyleAt(i + 1); + bool atEOL = i == (lineStartNext-1); + + // a start/end of comment block + if (options.foldComment && style == SCE_JULIA_COMMENT) { + // start of block comment + if (ch == '#' && chNext == '=') { + levelNext ++; + } + // end of block comment + if (ch == '=' && chNext == '#' && levelNext > 0) { + levelNext --; + } + } + + // Syntax based folding, accounts for list comprehension + if (options.foldSyntaxBased) { + // list comprehension allow `for`, `if` and `begin` without `end` + if (style == SCE_JULIA_BRACKET) { + if (ch == '[') { + list_comprehension ++; + indexing_level ++; + levelNext ++; + } else if (ch == ']') { + list_comprehension --; + indexing_level --; + levelNext --; + } else if (ch == '(') { + list_comprehension ++; + levelNext ++; + } else if (ch == ')') { + list_comprehension --; + levelNext --; + } + // check non-negative + if (indexing_level < 0) { + indexing_level = 0; + } + if (list_comprehension < 0) { + list_comprehension = 0; + } + } + + // keyword + if (style == SCE_JULIA_KEYWORD1) { + word[wordlen++] = static_cast(ch); + if (wordlen == 100) { // prevent overflow + word[0] = '\0'; + wordlen = 1; + } + if (styleNext != SCE_JULIA_KEYWORD1) { + word[wordlen] = '\0'; + wordlen = 0; + if (list_comprehension <= 0 && indexing_level <= 0) { + levelNext += CheckKeywordFoldPoint(word); + } + } + } + } + + // Docstring + if (options.foldDocstring) { + if (stylePrev != SCE_JULIA_DOCSTRING && style == SCE_JULIA_DOCSTRING) { + levelNext ++; + } else if (style == SCE_JULIA_DOCSTRING && styleNext != SCE_JULIA_DOCSTRING) { + levelNext --; + } + } + + // check non-negative level + if (levelNext < 0) { + levelNext = 0; + } + + if (!IsASpace(ch)) { + visibleChars++; + } + stylePrev = style; + + if (atEOL || (i == endPos-1)) { + int levelUse = levelCurrent; + int lev = levelUse | levelNext << 16; + if (visibleChars == 0 && options.foldCompact) { + lev |= SC_FOLDLEVELWHITEFLAG; + } + if (levelUse < levelNext) { + lev |= SC_FOLDLEVELHEADERFLAG; + } + if (lev != styler.LevelAt(lineCurrent)) { + styler.SetLevel(lineCurrent, lev); + } + lineCurrent++; + lineStartNext = styler.LineStart(lineCurrent+1); + levelCurrent = levelNext; + if (atEOL && (i == static_cast(styler.Length() - 1))) { + // There is an empty line at end of file so give it same level and empty + styler.SetLevel(lineCurrent, (levelCurrent | levelCurrent << 16) | SC_FOLDLEVELWHITEFLAG); + } + visibleChars = 0; + } + } +} + +LexerModule lmJulia(SCLEX_JULIA, LexerJulia::LexerFactoryJulia, "julia", juliaWordLists); diff --git a/scintilla/src/Catalogue.cxx b/scintilla/src/Catalogue.cxx index 428af4da7d..fc9a81aeec 100644 --- a/scintilla/src/Catalogue.cxx +++ b/scintilla/src/Catalogue.cxx @@ -80,6 +80,7 @@ int Scintilla_LinkLexers() { LINK_LEXER(lmFreeBasic); LINK_LEXER(lmHaskell); LINK_LEXER(lmHTML); + LINK_LEXER(lmJulia); LINK_LEXER(lmLatex); LINK_LEXER(lmLISP); LINK_LEXER(lmLua); diff --git a/src/filetypes.c b/src/filetypes.c index 3d2a1e3a33..5a50fd6891 100644 --- a/src/filetypes.c +++ b/src/filetypes.c @@ -187,6 +187,7 @@ static void init_builtin_filetypes(void) FT_INIT( GO, GO, "Go", NULL, SOURCE_FILE, COMPILED ); FT_INIT( ZEPHIR, ZEPHIR, "Zephir", NULL, SOURCE_FILE, COMPILED ); FT_INIT( SMALLTALK, NONE, "Smalltalk", NULL, SOURCE_FILE, SCRIPT ); + FT_INIT( JULIA, JULIA, "Julia", NULL, SOURCE_FILE, SCRIPT ); } diff --git a/src/filetypes.h b/src/filetypes.h index d7cfbd99ce..43fbeb596e 100644 --- a/src/filetypes.h +++ b/src/filetypes.h @@ -107,6 +107,7 @@ typedef enum GEANY_FILETYPES_ZEPHIR, GEANY_FILETYPES_BIBTEX, GEANY_FILETYPES_SMALLTALK, + GEANY_FILETYPES_JULIA, /* ^ append items here */ GEANY_MAX_BUILT_IN_FILETYPES /* Don't use this, use filetypes_array->len instead */ } diff --git a/src/highlighting.c b/src/highlighting.c index 938e5432cd..65f49fbfd8 100644 --- a/src/highlighting.c +++ b/src/highlighting.c @@ -1026,6 +1026,7 @@ void highlighting_init_styles(guint filetype_idx, GKeyFile *config, GKeyFile *co init_styleset_case(HTML); init_styleset_case(JAVA); init_styleset_case(JS); + init_styleset_case(JULIA); init_styleset_case(LATEX); init_styleset_case(LUA); init_styleset_case(MAKE); @@ -1115,6 +1116,7 @@ void highlighting_set_styles(ScintillaObject *sci, GeanyFiletype *ft) styleset_case(HTML); styleset_case(JAVA); styleset_case(JS); + styleset_case(JULIA); styleset_case(LATEX); styleset_case(LUA); styleset_case(MAKE); @@ -1514,6 +1516,13 @@ gboolean highlighting_is_string_style(gint lexer, gint style) return (style == SCE_MATLAB_STRING || style == SCE_MATLAB_DOUBLEQUOTESTRING); + case SCLEX_JULIA: + return (style == SCE_JULIA_CHAR || + style == SCE_JULIA_STRING || + style == SCE_JULIA_DOCSTRING || + style == SCE_JULIA_COMMAND || + style == SCE_JULIA_STRINGINTERP); + case SCLEX_XML: case SCLEX_HTML: case SCLEX_PHPSCRIPT: @@ -1718,6 +1727,9 @@ gboolean highlighting_is_comment_style(gint lexer, gint style) case SCLEX_OCTAVE: return (style == SCE_MATLAB_COMMENT); + case SCLEX_JULIA: + return (style == SCE_JULIA_COMMENT); + case SCLEX_LUA: return (style == SCE_LUA_COMMENT || style == SCE_LUA_COMMENTLINE || diff --git a/src/highlightingmappings.h b/src/highlightingmappings.h index d62b7e24b9..939808f9d0 100644 --- a/src/highlightingmappings.h +++ b/src/highlightingmappings.h @@ -883,6 +883,42 @@ static const HLKeyword highlighting_keywords_JS[] = }; #define highlighting_properties_JS highlighting_properties_C +/* Julia */ +#define highlighting_lexer_JULIA SCLEX_JULIA +static const HLStyle highlighting_styles_JULIA[] = +{ + { SCE_JULIA_DEFAULT, "default", FALSE }, + { SCE_JULIA_COMMENT, "comment", FALSE }, + { SCE_JULIA_NUMBER, "number", FALSE }, + { SCE_JULIA_KEYWORD1, "keyword1", FALSE }, + { SCE_JULIA_KEYWORD2, "keyword2", FALSE }, + { SCE_JULIA_KEYWORD3, "keyword3", FALSE }, + { SCE_JULIA_CHAR, "char", FALSE }, + { SCE_JULIA_OPERATOR, "operator", FALSE }, + { SCE_JULIA_BRACKET, "bracket", FALSE }, + { SCE_JULIA_IDENTIFIER, "identifier", FALSE }, + { SCE_JULIA_STRING, "string", FALSE }, + { SCE_JULIA_SYMBOL, "symbol", FALSE }, + { SCE_JULIA_MACRO, "macro", FALSE }, + { SCE_JULIA_STRINGINTERP, "stringinterp", FALSE }, + { SCE_JULIA_DOCSTRING, "docstring", FALSE }, + { SCE_JULIA_STRINGLITERAL, "stringliteral", FALSE }, + { SCE_JULIA_COMMAND, "command", FALSE }, + { SCE_JULIA_COMMANDLITERAL, "commandliteral", FALSE }, + { SCE_JULIA_TYPEANNOT, "typeannotation", FALSE }, + { SCE_JULIA_LEXERROR, "lexerror", FALSE }, + { SCE_JULIA_KEYWORD4, "keyword4", FALSE }, + { SCE_JULIA_TYPEOPERATOR, "typeoperator", FALSE }, +}; +static const HLKeyword highlighting_keywords_JULIA[] = +{ + { 0, "primary", FALSE }, + { 1, "secondary", FALSE }, + { 2, "tertiary", FALSE }, + { 3, "functions", FALSE } +}; +#define highlighting_properties_JULIA EMPTY_PROPERTIES + /* LaTeX */ #define highlighting_lexer_LATEX SCLEX_LATEX diff --git a/src/symbols.c b/src/symbols.c index 49a1d33d96..b22d4f6bcb 100644 --- a/src/symbols.c +++ b/src/symbols.c @@ -619,6 +619,20 @@ static void add_top_level_items(GeanyDocument *doc) NULL); break; } + case GEANY_FILETYPES_JULIA: + { + tag_list_add_groups(tag_store, + &(tv_iters.tag_variable), _("Constants"), ICON_VAR, + &(tv_iters.tag_namespace), _("Modules"), ICON_NAMESPACE, + &(tv_iters.tag_function), _("Functions"), ICON_METHOD, + &(tv_iters.tag_member), _("Fields"), ICON_MEMBER, + &(tv_iters.tag_macro), _("Macros"), ICON_MACRO, + &(tv_iters.tag_struct), _("Structures"), ICON_STRUCT, + &(tv_iters.tag_type), _("Types"), ICON_CLASS, + &(tv_iters.tag_externvar), _("Unknowns"), ICON_OTHER, + NULL); + break; + } case GEANY_FILETYPES_HTML: { tag_list_add_groups(tag_store, diff --git a/src/tagmanager/tm_parser.c b/src/tagmanager/tm_parser.c index bb699fc3c3..86fcf03423 100644 --- a/src/tagmanager/tm_parser.c +++ b/src/tagmanager/tm_parser.c @@ -528,6 +528,19 @@ static TMParserMapEntry map_POWERSHELL[] = { {'v', tm_tag_variable_t}, }; +static TMParserMapEntry map_JULIA[] = { + {'c', tm_tag_variable_t}, + {'f', tm_tag_function_t}, + {'g', tm_tag_member_t}, + {'m', tm_tag_macro_t}, + {'n', tm_tag_namespace_t}, + {'s', tm_tag_struct_t}, + {'t', tm_tag_typedef_t}, + /* defined as externvar to get those excluded as forward type in symbols.c:goto_tag() + * so we can jump to the real implementation (if known) instead of to the import statement */ + {'x', tm_tag_externvar_t}, +}; + typedef struct { @@ -591,6 +604,7 @@ static TMParserMap parser_map[] = { MAP_ENTRY(JSON), MAP_ENTRY(ZEPHIR), MAP_ENTRY(POWERSHELL), + MAP_ENTRY(JULIA), }; /* make sure the parser map is consistent and complete */ G_STATIC_ASSERT(G_N_ELEMENTS(parser_map) == TM_PARSER_COUNT); diff --git a/src/tagmanager/tm_parser.h b/src/tagmanager/tm_parser.h index b845b5c540..6e3adb41b6 100644 --- a/src/tagmanager/tm_parser.h +++ b/src/tagmanager/tm_parser.h @@ -109,6 +109,7 @@ enum TM_PARSER_JSON, TM_PARSER_ZEPHIR, TM_PARSER_POWERSHELL, + TM_PARSER_JULIA, TM_PARSER_BIBTEX, TM_PARSER_COUNT }; diff --git a/src/tagmanager/tm_parsers.h b/src/tagmanager/tm_parsers.h index 0b782082f4..3c552d8a00 100644 --- a/src/tagmanager/tm_parsers.h +++ b/src/tagmanager/tm_parsers.h @@ -65,6 +65,7 @@ JsonParser, \ ZephirParser, \ PowerShellParser, \ + JuliaParser, \ BibtexParser #endif From a8343e414ce24af8fd28c64749df7ab43ed1ede7 Mon Sep 17 00:00:00 2001 From: getzze Date: Fri, 25 Jun 2021 00:20:07 +0100 Subject: [PATCH 2/2] add ctags test file --- tests/ctags/julia-corner_cases.jl | 500 ++++++++++++++++++++++++++++++ 1 file changed, 500 insertions(+) create mode 100644 tests/ctags/julia-corner_cases.jl diff --git a/tests/ctags/julia-corner_cases.jl b/tests/ctags/julia-corner_cases.jl new file mode 100644 index 0000000000..a2b7c10e0e --- /dev/null +++ b/tests/ctags/julia-corner_cases.jl @@ -0,0 +1,500 @@ +#= Julia syntax highlighting test. + +Modified from https://github.com/JuliaEditorSupport/julia-syntax-test-cases + +This file derives from https://gist.github.com/Wilfred/f1aca44c61ed6e1df603 +whose author is [@Wilfred](https://github.com/Wilfred). @Wilfred has put it in +the public domain: + https://gist.github.com/Wilfred/f1aca44c61ed6e1df603#gistcomment-2948526 + +Changes from the original are governed by the license of the repository in +which this file is found. + +This file is designed to test various corner cases of Julia +syntax highlighting. +=# + +baremodule Mod1 + # Nothing here +end + +module Mod2 + # Here neither +end + +## Simple function definitions. +# Expected: `function` should be highlighted, as should `foo_bar!`. +function foo_bar!(x,y) + x + y + 1 +end + +# Expected: `foo_bar!` should be highlighted. +foo_bar!(x,y) = x + y + +# Expected: `foo` should be highlighted. +Baz.foo(x) = 1 + +# Expected: `foo` should be highlighted. +foo(x::(Int,)) = 1 + +# Expected: `foo` should be highlighted. +foo(x, y=length(x)) + +## Function definitions in namespaces. +# Expected: `bar` should be highlighted. +function Foo.bar(x, y) + x + 1 +end + +## Function definitions with type variables. +# Expected: `elsize` should be highlighted. +elsize(::AbstractArray{T}) where {T} = sizeof(T) + +function elsize(::AbstractArray{T}) where T + sizeof(T) +end + +## Nested brackets in function definitions. +# Expected: `cell` should be highlighted. +cell(dims::(Integer...)) = Array(Any, convert((Int...), dims)) + +# TODO: find an example with a nested type expression. + +## Macro usage +# Expected: `@hello_world!` should be highlighted. +@hello_world! foo + +# Expected: highlight `myfun` +@inline myfun() = println("myfun") + +## Builtin functions. +# Expected: `throw`, `error` and `super` should not be highlighted. There are +# too many built-in functions for this to be useful. +# https://github.com/JuliaLang/julia/commit/134867c69096fcb52afa5d5a7433892b5127e981 +# https://github.com/JuliaLang/julia/pull/7963#issuecomment-52586261 +throw(foo) +error("foo", bar, "baz") +super(Int) + +## Strings +# Expected: highlight the string. +x = "foo \"bar\" baz" + +# Expected: highlight the whole string. +x = "foo +bar" + +# Expected: highlight the whole triple-quoted string. +x = """hello "world" foobar""" +y = """foo\\""" +z = """bar\"""" +w = """"bar""" + +# Expected: highlight `$user` +x = "hello $user" + +# Expected: don't highlight `$user` +x = "hello \$user" + +# Expected: highlight `$val` +x = """(a="$val")""" + +# Expected: treat r as part of the string, so `r"a"` is highlighted. +x = r"0.1" + +# Expected: treat ismx as part of the string, so `r"a"ismx` is highlighted. +x = r"a"ismx + +# Expected: highlight `r"""a "b" c"""` +x = r"""a "b" c""" + +# Expected: treat v as part of the string, so `v"0.1"` is highlighted. +x = v"0.1" + +# Expected: treat b as part of the string, so `b"a"` is highlighted. +x = b"a" + +# Bonus points: +# Expected: highlight the interpolation brackets `$(` and `)` +x = "hello $(user * user)" + +# Bonus points: +# Expected: highlight regexp metacharacters `[` and `]` +x = r"[abc]" + +# Bonus points: +# Expected: highlight escape sequences `\xff` and `\u2200` +x = b"DATA\xff\u2200" + +# Bonus points: +# Expected: don't highlight `$user` +x = raw"hello $user" + +## Characters +# Expected: highlight the character. +x = 'a' +y = '\u0' +z = '\U10ffff' +w = '\x41' +a = ' ' +b = '"' +c = ''' +d = '\'' +e = '\\' + +# Expected: don't highlight, as ' is an operator here, not a character delimiter. +a = b' + c' +A''' + +# Bonus points: +# Expected: don't highlight the character +x = 'way too long so not a character' +x = '' + +## Comments +# Expected: highlight `# foo` +# foo + +# Expected: highlight `#= foo\n bar =#` +#= foo +bar =# + +# Expected: highlight `#= #= =# =#` (comments can nest). +#= #= =# =# + +# Expected: highlight `'` as adjoint operator +A#==#' +(A)#==#' +A[1]#==#' + +## Type declarations + +# Expected highlight `Foo` and `Bar` +mutable struct Foo + x::Bar +end + +# Expected highlight `Foo` and `Bar` +struct Foo + x::Bar +end + +# Expected: highlight `Foo` and `Bar` +abstract type Foo <: Bar end + +# Expected: don't highlight x or y +x <: y + +## Type annotations + +# Expected: highlight `FooBar` +f(x::FooBar) = x + +# Expected: highlight `Int8` +function foo() + local x::Int8 = 5 + x +end + +# Expected: highlight `Point` and `Real` as types +function norm(p::Point{<:Real}) + sqrt(p.x^2 + p.y^2) +end + +# Expected: highlight `g` as function and `Int8` as type +function g(x, y)::Int8 + return x * y +end + +# Expected: highlight `T` and `Number` +same_type_numeric(x::T, y::T) where {T <: Number} = true +same_type_numeric(x::T, y::T) where T = false + +## Parametric type declaration + +# Expected: highlight `Pointy` and `T` +abstract type Pointy{T} end + +# Expected: highlight `Point`, `Pointy` and `T` +struct Point{T} <: Pointy{T} + x::T + y::T +end + +## Variable declarations + +# Expected: highlight `x` and `y` +global x = "foo, bar = 2", y = 3 + +# Expected: highlight `x` and `y` +global x = foo(a, b), y = 3 + +# Expected: highlight `y` +const y = "hello world" + +# Expected: highlight `x` and `y` +function foo() + local x = f(1, 2), y = f(3, 4) + x + y +end + +# Expected: highlight `x` and `y` +let x = f(1, 2), y = f(3, 4) + x + y +end + +## Colons and end + +# Expected: highlight `:foo`, `:end` and `:function` +:foo +x = :foo +y = :function +z = :end + +# Expected: highlight index `[end]` differently to block delimiter `end` +if foo[end] +end + +# Expected: highlight as index `end` +foo[bar:end] + +# Expected: highlight as index `begin` +foo[begin:4] + +# Expected: don't highlight `:123` +x = :123 + +# Expected: don't highlight `:baz` +foo[bar:baz] + +# Expected: highlight `:baz` +foo[:baz] + +# Expected: highlight both `:baz` +foo(:baz,:baz) + +# Note that `: foo` is currently a valid quoted symbol, this will hopefully +# change in 0.4: https://github.com/JuliaLang/julia/issues/5997 + +# Expected: highlight `:foo` +[1 :foo] + +# Expected: highlight `:end` +[1 :end] + +# Expected: highlight `:two` +@eval :one+:two + +# Expected: don't highlight `:end` but `end` as index +[(1+1):end] + +# Expected: don't highlight `:end` but `end` as index +[a[1]:end] + +# Expected: don't highlight `:foo` +for x=1:foo + print(x) +end + +## Range detection + +# Bonus points: +# Expected: don't highlight `:s2` +push!(a, s1 :s2) + +# Bonus points: +# Expected: don't highlight `:end` +a[begin :end] + +## Expression evaluation + +# Expected: highlight `:` as operator +a = :(x = 2) + +# Expected: highlight `:call` and `:b` as symbols +# Debatable: highlight `:+` as operator +ex = Expr(:call, :+, a, :b) + +## Number highlighting + +# Expected: highlight all these as numbers +x = 123 +x = 1.1 +x = .5 +x = 0x123abcdef +x = 0o7 +x = 0b1011 +x = 2.5e-4 +x = 2.5E-4 +x = 1e+00 +x = 2.5f-4 +x = 0x.4p-1 +x = 1_000 + +# Expected: highlight these as numbers or built-ins +x = Inf +x = NaN + +# Expected: highlight `123`, not the letter +y = 123x +y = 123e + +# Expected: highlight `1e+1` and `1e-1` +1e+1+1e-1 + +# Expected: highlight `1.` and `.1` +1. +.1 +# Note that `1.+1` is currently ambiguous and gives an error + +# Bonus points: +# Expected: don't highlight `..` +x = 1..3 + +# Bonus points: +# Debatable: highlight the first two digits, not the following digits +# or show an error +x = 0o1291 +x = 0b1091 + +# Debatable: highlight `π` as a number or built-in +# (note that `πx` is a single symbol, not `π * x`) +x = π + +## List comprehension +# Expected: highlight `for` and `if` without the `end` keyword +[i for i in 1:5 if i%2==0] + +## Broadcasting +# Expected: highlight `.+` as operator +a.+1 + +## Command +# Expected: highlight "`echo 1`" +c = `echo 1` + +# Expected: highlight "```echo `hello 1` ```" +c = ```echo `hello 1` ``` + +# Expected: highlight "raw`echo $1`" +c = raw`echo $1` + +## Non-standard identifiers +# Bonus points: +# Expected: highlight `var"##"` as a function +function var"##"(x) + println(x) +end + +# Bonus points: +# Expected: highlight `var"%%"` as a function +var"%%"(x) = println(x) + +# Bonus points: +# Expected: highlight `$var` as string and `##""` as comment +"$var"##"" + +# Bonus points: +# Expected: highlight `$(var")(")` as string interpolation +"$(var")(")" + +# Bonus points: +# Expected: highlight `'` as adjoint operator +var"##mat"' + +## Code folding: for and if in list comprehension +# Expected: fold between function and last end +function test(x) + a = (if x; 0 else 1 end) + println(a) +end + +## Default struct attributes +Base.@kwdef struct Test + a::Float64 = 1E12 + b::Float64 = pi +end + +## Parametric constructors +struct OurRational{T<:Integer} <: Real + "Numerator" + num::T + + "Denominator" + den::T + + OurRational{Int8}(num::T, den::T) where T<:Integer = new(convert(Int8, num), convert(Int8, den)) + + """ + Parametric inner constructor + """ + function OurRational{T}(num::T, den::T) where T<:Integer + if num == 0 && den == 0 + error("invalid rational: 0//0") + end + # Bug with short function misidentification of == and => + length(num) == 0 + length(den) => 0 + + g = gcd(den, num) + num = div(num, g) + den = div(den, g) + new(num, den) + end + + test::T + + OurRational{Int64}(num::T, den::T, test::T) where T<:Integer = new(num, den, test) +end + +OurRational{Int16}(num::T, den::T) where T<:Integer = new(num, den) + +## Import modules +using Module1 +using Module2: func1, func2 +import Module3 +import Module4.func1, Module5, Module6.func2 +import Module7: func1, func2 + +module MyModule +using Module8, + Module9, + Module10 + +using Module11: func1, + func2, + func3 +end + +import Base.show, +# Base.print as pr, + Base.* + +#using Module12: func1, func2 as alias2, func3 +using Module13: func1, +# func2 as alias2, + func3 + +#import Module14 as Alias12 + +## Function scope +module MyModule +function func3(a::Int) + a +end + +function func4(a::Int) + function func5(b::Int) + b + end + func5(a) +end + +end + +## Docstring +""" +This is a test docstring +with multiple lines. + +""" +function test_docstring end