Add Raku (Perl 6) filetype support (lexer and ctags parser)

geany · Oct 22, 2023 · a9363bc · a9363bc
1 parent 0fd68e4
commit a9363bc
Show file tree

Hide file tree

Showing 21 changed files with 2,376 additions and 1 deletion.
diff --git a/ctags/Makefile.am b/ctags/Makefile.am
@@ -85,6 +85,7 @@ parsers = \
 	parsers/pascal.c \
 	parsers/perl.c \
 	parsers/perl.h \
+	parsers/perl6.c \
 	parsers/php.c \
 	parsers/powershell.c \
 	parsers/python.c \

diff --git a/ctags/parsers/perl6.c b/ctags/parsers/perl6.c
@@ -0,0 +1,338 @@
+/*
+ * perl6.c -- Perl6 parser.
+ * Author: Dmitri Tikhonov <dmitri@cpan.org>
+ *
+ * This is a very basic Perl 6 parser.  It does not know how to:
+ *   - skip POD;
+ *   - skip multiline comments;
+ *   - skip quoted strings;
+ *   - generate fully-qualified tags.
+ *
+ * This source code is released for free distribution under the terms of
+ * the GNU General Public License version 2 or (at your option) any later version.
+ */
+
+#include "general.h"    /* must always come first */
+
+#include <stdio.h>
+#include <string.h>
+
+#include "debug.h"
+#include "entry.h"
+#include "parse.h"
+#include "read.h"
+#include "routines.h"
+#include "selectors.h"
+#include "vstring.h"
+
+enum perl6Kind {
+    K_NONE = -1,
+    K_CLASS,
+    K_GRAMMAR,
+    K_METHOD,
+    K_MODULE,
+    K_PACKAGE,
+    K_ROLE,
+    K_RULE,
+    K_SUBMETHOD,
+    K_SUBROUTINE,
+    K_TOKEN,
+};
+
+static kindDefinition perl6Kinds[] = {
+    [K_CLASS]       = { true,  'c', "class",      "classes" },
+    [K_GRAMMAR]     = { true,  'g', "grammar",    "grammars" },
+    [K_METHOD]      = { true,  'm', "method",     "methods" },
+    [K_MODULE]      = { true,  'o', "module",     "modules" },
+    [K_PACKAGE]     = { true,  'p', "package",    "packages" },
+    [K_ROLE]        = { true,  'r', "role",       "roles" },
+    [K_RULE]        = { true,  'u', "rule",       "rules" },
+    [K_SUBMETHOD]   = { true,  'b', "submethod",  "submethods" },
+    [K_SUBROUTINE]  = { true,  's', "subroutine", "subroutines" },
+    [K_TOKEN]       = { true,  't', "token",      "tokens" },
+};
+
+enum token {
+    T_CLASS,
+    T_GRAMMAR,
+    T_METHOD,
+    T_MODULE,
+    T_MULTI,
+    T_MY,
+    T_OUR,
+    T_PACKAGE,
+    T_PROTO,
+    T_ROLE,
+    T_RULE,
+    T_SUB,
+    T_SUBMETHOD,
+    T_UNIT,
+    T_TOKEN,
+};
+
+static const enum perl6Kind token2kind[] = {
+    [T_CLASS]       = K_CLASS,
+    [T_GRAMMAR]     = K_GRAMMAR,
+    [T_METHOD]      = K_METHOD,
+    [T_MODULE]      = K_MODULE,
+    [T_MULTI]       = K_SUBROUTINE,
+    [T_MY]          = K_NONE,
+    [T_OUR]         = K_NONE,
+    [T_PACKAGE]     = K_PACKAGE,
+    [T_PROTO]       = K_NONE,
+    [T_ROLE]        = K_ROLE,
+    [T_RULE]        = K_RULE,
+    [T_SUB]         = K_SUBROUTINE,
+    [T_SUBMETHOD]   = K_SUBMETHOD,
+    [T_UNIT]        = K_NONE,
+    [T_TOKEN]       = K_TOKEN,
+};
+
+#define STRLEN(s) (sizeof(s) - 1)
+#define STREQN(s, token) (0 == strncmp(s, token, STRLEN(token)))
+
+static enum token
+matchToken (const char *s, int len)
+{
+    switch (len) {
+        case 2:
+            if (STREQN(s, "my"))                return T_MY;
+            break;
+        case 3:
+            switch (s[0]) {
+                case 'o':
+                    if (STREQN(s, "our"))       return T_OUR;
+                    break;
+                case 's':
+                    if (STREQN(s, "sub"))       return T_SUB;
+                    break;
+            }
+            break;
+        case 4:
+            switch (s[1]) {
+                case 'o':
+                    if (STREQN(s, "role"))      return T_ROLE;
+                    break;
+                case 'u':
+                    if (STREQN(s, "rule"))      return T_RULE;
+                    break;
+                case 'n':
+                    if (STREQN(s, "unit"))      return T_UNIT;
+                    break;
+            }
+            break;
+        case 5:
+            switch (s[0]) {
+                case 'c':
+                    if (STREQN(s, "class"))     return T_CLASS;
+                    break;
+                case 'm':
+                    if (STREQN(s, "multi"))     return T_MULTI;
+                    break;
+                case 'p':
+                    if (STREQN(s, "proto"))     return T_PROTO;
+                    break;
+                case 't':
+                    if (STREQN(s, "token"))     return T_TOKEN;
+                    break;
+            }
+            break;
+        case 6:
+            switch (s[1]) {
+                case 'e':
+                    if (STREQN(s, "method"))    return T_METHOD;
+                    break;
+                case 'o':
+                    if (STREQN(s, "module"))    return T_MODULE;
+                    break;
+            }
+            break;
+        case 7:
+            switch (s[0]) {
+                case 'g':
+                    if (STREQN(s, "grammar"))   return T_GRAMMAR;
+                    break;
+                case 'p':
+                    if (STREQN(s, "package"))   return T_PACKAGE;
+                    break;
+            }
+            break;
+        case 9:
+            if (STREQN(s, "submethod"))         return T_SUBMETHOD;
+            break;
+    }
+    return -1;
+}
+
+static const int validPerl6Identifier[0x100] = {
+/* r!perl -e "print qq([(int)'\$_'] = 1,\n)for a..z,A..Z,0..9,':','-','_'"|fmt
+ */
+    [(int)'a'] = 1, [(int)'b'] = 1, [(int)'c'] = 1, [(int)'d'] = 1,
+    [(int)'e'] = 1, [(int)'f'] = 1, [(int)'g'] = 1, [(int)'h'] = 1,
+    [(int)'i'] = 1, [(int)'j'] = 1, [(int)'k'] = 1, [(int)'l'] = 1,
+    [(int)'m'] = 1, [(int)'n'] = 1, [(int)'o'] = 1, [(int)'p'] = 1,
+    [(int)'q'] = 1, [(int)'r'] = 1, [(int)'s'] = 1, [(int)'t'] = 1,
+    [(int)'u'] = 1, [(int)'v'] = 1, [(int)'w'] = 1, [(int)'x'] = 1,
+    [(int)'y'] = 1, [(int)'z'] = 1, [(int)'A'] = 1, [(int)'B'] = 1,
+    [(int)'C'] = 1, [(int)'D'] = 1, [(int)'E'] = 1, [(int)'F'] = 1,
+    [(int)'G'] = 1, [(int)'H'] = 1, [(int)'I'] = 1, [(int)'J'] = 1,
+    [(int)'K'] = 1, [(int)'L'] = 1, [(int)'M'] = 1, [(int)'N'] = 1,
+    [(int)'O'] = 1, [(int)'P'] = 1, [(int)'Q'] = 1, [(int)'R'] = 1,
+    [(int)'S'] = 1, [(int)'T'] = 1, [(int)'U'] = 1, [(int)'V'] = 1,
+    [(int)'W'] = 1, [(int)'X'] = 1, [(int)'Y'] = 1, [(int)'Z'] = 1,
+    [(int)'0'] = 1, [(int)'1'] = 1, [(int)'2'] = 1, [(int)'3'] = 1,
+    [(int)'4'] = 1, [(int)'5'] = 1, [(int)'6'] = 1, [(int)'7'] = 1,
+    [(int)'8'] = 1, [(int)'9'] = 1, [(int)':'] = 1, [(int)'-'] = 1,
+    [(int)'_'] = 1,
+};
+
+static const int validMethodPrefix[0x100] = {
+    [(int)'!'] = 1, [(int)'^'] = 1,
+};
+
+static const int kindMayHaveMethodPrefix = (1 << K_SUBMETHOD) |
+                                           (1 << K_METHOD)    ;
+
+/* Trim identifier pointed to by ps, possibly advancing it, and return
+ * the length of the valid portion.  If the returned value is zero, the
+ * identifier is invalid.
+ */
+static int
+trimIdentifier (enum perl6Kind kind, const char **ps, int len)
+{
+    Assert(len > 0);
+    const char *const end = *ps + len;
+    const char *s = *ps;
+    /* Trim the front if possible: */
+    s += (kindMayHaveMethodPrefix & (1 << kind)) &&
+         validMethodPrefix[(int)*s];
+    /* Record the start of identifier: */
+    *ps = s;
+    /* Continuous string of valid characters: */
+    while (s < end && validPerl6Identifier[(int)*s])
+        ++s;
+    /* sub multi infix:<...>        -- we want the "infix" only */
+    while (s - *ps > 0 && ':' == s[-1])
+        --s;
+    /* It's ok if this is zero: */
+    return s - *ps;
+}
+
+struct p6Ctx {
+    enum token  tokens[128 /* unlikely to need more than this */];
+    unsigned int n_tokens;
+    vString    *name;
+    const char *line;      /* Saved from readLineFromInputFile() */
+};
+
+static void
+makeTag (struct p6Ctx *ctx, int kind, const char *name, int len)
+{
+    tagEntryInfo entry;
+    vStringNCopyS(ctx->name, name, len);
+    initTagEntry(&entry, vStringValue(ctx->name), kind);
+    makeTagEntry(&entry);
+}
+
+static void
+possiblyMakeTag (struct p6Ctx *ctx, const char *s, int len)
+{
+    Assert(ctx->n_tokens > 0);
+    enum perl6Kind kind = token2kind[ ctx->tokens[ctx->n_tokens - 1] ];
+    if (K_NONE != kind && perl6Kinds[kind].enabled
+                       && (len = trimIdentifier(kind, &s, len)) > 0)
+        makeTag(ctx, kind, s, len);
+}
+
+static void
+initP6Ctx (struct p6Ctx *ctx)
+{
+    ctx->n_tokens = 0;
+    ctx->name = vStringNew();
+    ctx->line = NULL;
+}
+
+static void
+deinitP6Ctx (struct p6Ctx *ctx)
+{
+    vStringDelete(ctx->name);
+}
+
+/* Read next contiguous sequence of non-whitespace characters, store
+ * the address in `ptok', and return its length.  Return value of zero
+ * means EOF.
+ *
+ * TODO: Currently, POD and multi-line comments are not handled.
+ */
+static int
+getNonSpaceStr (struct p6Ctx *ctx, const char **ptok)
+{
+    const char *s = ctx->line;
+    if (!s) {
+next_line:
+        s = (const char *) readLineFromInputFile();
+        if (!s)
+            return 0;                           /* EOF */
+    }
+    while (*s && isspace(*s))                   /* Skip whitespace */
+        ++s;
+    if ('#' == *s)
+        goto next_line;
+    int non_white_len = strcspn(s, ",; \t");
+    if (non_white_len) {
+        ctx->line = s + non_white_len;          /* Save state */
+        *ptok = s;
+        return non_white_len;
+    } else
+        goto next_line;
+}
+
+static void
+findPerl6Tags (void)
+{
+    struct p6Ctx ctx;
+
+#define RESET_TOKENS() do { ctx.n_tokens = 0; } while (0)
+
+#define PUSH_TOKEN(_t_) do {                                            \
+    if (ctx.n_tokens < ARRAY_SIZE(ctx.tokens)) {			\
+        ctx.tokens[ ctx.n_tokens ] = _t_;                               \
+        ++ctx.n_tokens;                                                 \
+    } else {                                                            \
+        Assert(!"Token stack overflown: this is quite odd");            \
+        RESET_TOKENS();                                                 \
+    }                                                                   \
+} while (0)
+
+    initP6Ctx(&ctx);
+
+    const char *s;
+    int len;
+
+    while ((len = getNonSpaceStr(&ctx, &s)) > 0) {
+        enum token token = matchToken(s, len);
+        if ((int) token >= 0) {
+            PUSH_TOKEN(token);
+        } else if (ctx.n_tokens > 0) {
+            possiblyMakeTag(&ctx, s, len);
+            RESET_TOKENS();
+        }
+    }
+
+    deinitP6Ctx(&ctx);
+}
+
+parserDefinition *
+Perl6Parser (void)
+{
+    static const char *const extensions[] = { "p6", "pm6", "pm", "pl6", NULL };
+    static selectLanguage selectors [] = { selectByPickingPerlVersion,
+					   NULL };
+    parserDefinition* def = parserNew("Perl6");
+    def->kindTable      = perl6Kinds;
+    def->kindCount  = ARRAY_SIZE(perl6Kinds);
+    def->extensions = extensions;
+    def->parser     = findPerl6Tags;
+    def->selectLanguage = selectors;
+    return def;
+}
diff --git a/data/Makefile.am b/data/Makefile.am
@@ -64,6 +64,7 @@ filetypes_dist = \
 	filedefs/filetypes.po \
 	filedefs/filetypes.powershell \
 	filedefs/filetypes.r \
+	filedefs/filetypes.raku \
 	filedefs/filetypes.restructuredtext \
 	filedefs/filetypes.ruby \
 	filedefs/filetypes.rust \