diff --git a/ctags/Makefile.am b/ctags/Makefile.am index fb1d1d5964..e33a50c756 100644 --- a/ctags/Makefile.am +++ b/ctags/Makefile.am @@ -72,7 +72,8 @@ parsers = \ parsers/lua.c \ parsers/make.c \ parsers/make.h \ - parsers/geany_markdown.c \ + parsers/markdown.c \ + parsers/markdown.h \ parsers/geany_matlab.c \ parsers/nsis.c \ parsers/objc.c \ diff --git a/ctags/parsers/geany_markdown.c b/ctags/parsers/geany_markdown.c deleted file mode 100644 index 9cc78eff35..0000000000 --- a/ctags/parsers/geany_markdown.c +++ /dev/null @@ -1,103 +0,0 @@ -/* -* -* Copyright (c) 2009, Jon Strait -* -* This source code is released for free distribution under the terms of the -* GNU General Public License. -* -* This module contains functions for generating tags for Markdown files. -*/ - -/* -* INCLUDE FILES -*/ -#include "general.h" /* must always come first */ - -#include -#include - -#include "parse.h" -#include "read.h" -#include "vstring.h" -#include "routines.h" -#include "entry.h" - -/* -* DATA DEFINITIONS -*/ - -static kindDefinition MarkdownKinds[] = { - { true, 'v', "variable", "sections" } -}; - -/* -* FUNCTION DEFINITIONS -*/ - -/* checks if str is all the same character */ -static bool issame(const char *str) -{ - char first = *str; - - while (*(++str)) - { - if (*str && *str != first) - return false; - } - return true; -} - -static void makeMarkdownTag (const vString* const name, bool name_before) -{ - tagEntryInfo e; - initTagEntry (&e, vStringValue(name), 0); - - if (name_before) - e.lineNumber--; /* we want the line before the underline chars */ - - makeTagEntry(&e); -} - - -static void findMarkdownTags (void) -{ - vString *name = vStringNew(); - const unsigned char *line; - - while ((line = readLineFromInputFile()) != NULL) - { - int name_len = vStringLength(name); - - /* underlines must be the same length or more */ - if (name_len > 0 && (line[0] == '=' || line[0] == '-') && issame((const char*) line)) - { - makeMarkdownTag(name, true); - } - else if (line[0] == '#') { - vStringClear(name); - vStringCatS(name, (const char *) line); - makeMarkdownTag(name, false); - } - else { - vStringClear (name); - if (! isspace(*line)) - vStringCatS(name, (const char*) line); - } - } - vStringDelete (name); -} - -extern parserDefinition* MarkdownParser (void) -{ - static const char *const patterns [] = { "*.md", NULL }; - static const char *const extensions [] = { "md", NULL }; - parserDefinition* const def = parserNew ("Markdown"); - - def->kindTable = MarkdownKinds; - def->kindCount = ARRAY_SIZE (MarkdownKinds); - def->patterns = patterns; - def->extensions = extensions; - def->parser = findMarkdownTags; - return def; -} - diff --git a/ctags/parsers/markdown.c b/ctags/parsers/markdown.c new file mode 100644 index 0000000000..3510d756d3 --- /dev/null +++ b/ctags/parsers/markdown.c @@ -0,0 +1,420 @@ +/* + * + * Copyright (c) 2007-2011, Nick Treleaven + * Copyright (c) 2012, Lex Trotman + * Copyright (c) 2021, Jiri Techet + * + * This source code is released for free distribution under the terms of the + * GNU General Public License version 2 or (at your option) any later version. + * + * This module contains functions for generating tags for markdown files. + * + * This parser was based on the asciidoc parser. + * + * Extended syntax like footnotes is described in + * https://www.markdownguide.org/extended-syntax/ + */ + +/* + * INCLUDE FILES + */ +#include "general.h" /* must always come first */ + +#include +#include + +#include "debug.h" +#include "entry.h" +#include "parse.h" +#include "read.h" +#include "vstring.h" +#include "nestlevel.h" +#include "routines.h" +#include "promise.h" +#include "htable.h" + +#include "markdown.h" + +/* + * DATA DEFINITIONS + */ +typedef enum { + K_CHAPTER = 0, + K_SECTION, + K_SUBSECTION, + K_SUBSUBSECTION, + K_LEVEL4SECTION, + K_LEVEL5SECTION, + K_SECTION_COUNT, + K_FOOTNOTE = K_SECTION_COUNT, +} markdownKind; + +static kindDefinition MarkdownKinds[] = { + { true, 'c', "chapter", "chapters"}, + { true, 's', "section", "sections" }, + { true, 'S', "subsection", "level 2 sections" }, + { true, 't', "subsubsection", "level 3 sections" }, + { true, 'T', "l4subsection", "level 4 sections" }, + { true, 'u', "l5subsection", "level 5 sections" }, + { true, 'n', "footnote", "footnotes" }, +}; + +static fieldDefinition MarkdownFields [] = { + { + .enabled = false, + .name = "sectionMarker", + .description = "character used for declaring section(#, ##, =, or -)", + }, +}; + +typedef enum { + F_MARKER, +} markdownField; + +static NestingLevels *nestingLevels = NULL; + +/* +* FUNCTION DEFINITIONS +*/ + +static NestingLevel *getNestingLevel (const int kind, unsigned long adjustmentWhenPop) +{ + NestingLevel *nl; + tagEntryInfo *e; + unsigned long line = getInputLineNumber (); + + line = (line > adjustmentWhenPop)? (line - adjustmentWhenPop): 0; + + while (1) + { + nl = nestingLevelsGetCurrent (nestingLevels); + e = getEntryOfNestingLevel (nl); + if ((nl && (e == NULL)) || (e && (e->kindIndex >= kind))) + nestingLevelsPop (nestingLevels); + else + break; + } + return nl; +} + + +static int makeMarkdownTag (const vString* const name, const int kind, const bool twoLine) +{ + int r = CORK_NIL; + + if (vStringLength (name) > 0) + { + const NestingLevel *const nl = getNestingLevel (kind, twoLine? 2: 1); + tagEntryInfo *parent = getEntryOfNestingLevel (nl); + tagEntryInfo e; + + initTagEntry (&e, vStringValue (name), kind); + + if (twoLine) + { + /* we want the line before the '---' underline chars */ + const unsigned long line = getInputLineNumber (); + Assert (line > 0); + if (line > 0) + { + e.lineNumber--; + e.filePosition = getInputFilePositionForLine (line - 1); + } + } + + if (parent && (parent->kindIndex < kind)) + e.extensionFields.scopeIndex = nl->corkIndex; + + r = makeTagEntry (&e); + } + return r; +} + + +static int makeSectionMarkdownTag (const vString* const name, const int kind, const char *marker) +{ + int r = makeMarkdownTag (name, kind, marker[0] != '#'); + attachParserFieldToCorkEntry (r, MarkdownFields [F_MARKER].ftype, marker); + + nestingLevelsPush (nestingLevels, r); + return r; +} + + +static vString *getHeading (const int kind, const unsigned char *line, + const int lineLen, bool *delimited) +{ + int pos = 0; + int start = kind + 1; + int end = lineLen - 1; + vString *name = vStringNew (); + + Assert (kind >= 0 && kind < K_SECTION_COUNT); + Assert (lineLen > start); + + *delimited = false; + while (isspace (line[pos])) ++pos; + while (line[end] == line[pos] && end - 1 >= 0 && line[end - 1] != '\\') + { + --end; + *delimited = true; + } + while (isspace (line[start])) ++start; + while (isspace (line[end])) --end; + + if (start <= end) + vStringNCatS (name, (const char*)(&(line[start])), end - start + 1); + + return name; +} + + +static int getFirstCharPos (const unsigned char *line, int lineLen, bool *indented) +{ + int indent = 0; + int i; + for (i = 0; i < lineLen && isspace (line[i]); i++) + indent += line[i] == '\t' ? 4 : 1; + *indented = indent >= 4; + return i; +} + + +static void getFootnoteMaybe (const char *line) +{ + const char *start = strstr (line, "[^"); + const char *end = start? strstr(start + 2, "]:"): NULL; + + if (! (start && end)) + return; + if (! (end > (start + 2))) + return; + + vString * footnote = vStringNewNInit (start + 2, end - (start + 2)); + const NestingLevel *const nl = nestingLevelsGetCurrent (nestingLevels); + tagEntryInfo e; + + initTagEntry (&e, vStringValue (footnote), K_FOOTNOTE); + if (nl) + e.extensionFields.scopeIndex = nl->corkIndex; + makeTagEntry (&e); + + vStringDelete (footnote); +} + +static bool extractLanguageForCodeBlock (const char *langMarker, + vString *codeLang) +{ + subparser *s; + bool r = false; + + foreachSubparser (s, false) + { + markdownSubparser *m = (markdownSubparser *)s; + enterSubparser(s); + if (m->extractLanguageForCodeBlock) + r = m->extractLanguageForCodeBlock (m, langMarker, codeLang); + leaveSubparser(); + if (r) + break; + } + + return r; +} + +static void findMarkdownTags (void) +{ + vString *prevLine = vStringNew (); + vString *codeLang = vStringNew (); + const unsigned char *line; + char inCodeChar = 0; + long startSourceLineNumber = 0; + long startLineNumber = 0; + bool inPreambule = false; + bool inComment = false; + + subparser *sub = getSubparserRunningBaseparser(); + if (sub) + chooseExclusiveSubparser (sub, NULL); + + nestingLevels = nestingLevelsNew (0); + + while ((line = readLineFromInputFile ()) != NULL) + { + int lineLen = strlen ((const char*) line); + bool lineProcessed = false; + bool indented; + int pos = getFirstCharPos (line, lineLen, &indented); + const int lineNum = getInputLineNumber (); + + if (lineNum == 1 || inPreambule) + { + if (line[pos] == '-' && line[pos + 1] == '-' && line[pos + 2] == '-') + { + if (inPreambule) + { + long endLineNumber = lineNum; + if (startLineNumber < endLineNumber) + makePromise ("FrontMatter", startLineNumber, 0, + endLineNumber, 0, startSourceLineNumber); + } + else + startSourceLineNumber = startLineNumber = lineNum; + inPreambule = !inPreambule; + } + } + + if (inPreambule) + continue; + + /* fenced code block */ + if (line[pos] == '`' || line[pos] == '~') + { + char c = line[pos]; + char otherC = c == '`' ? '~' : '`'; + int nSame; + for (nSame = 1; line[nSame] == line[pos]; ++nSame); + + if (inCodeChar != otherC && nSame >= 3) + { + inCodeChar = inCodeChar ? 0 : c; + if (inCodeChar == c && strstr ((const char *)(line + pos + nSame), "```") != NULL) + inCodeChar = 0; + else if (inCodeChar) + { + const char *langMarker = (const char *)(line + pos + nSame); + startLineNumber = startSourceLineNumber = lineNum + 1; + + vStringClear (codeLang); + if (! extractLanguageForCodeBlock (langMarker, codeLang)) + { + vStringCopyS (codeLang, langMarker); + vStringStripLeading (codeLang); + vStringStripTrailing (codeLang); + } + } + else + { + long endLineNumber = lineNum; + if (vStringLength (codeLang) > 0 + && startLineNumber < endLineNumber) + makePromise (vStringValue (codeLang), startLineNumber, 0, + endLineNumber, 0, startSourceLineNumber); + } + + lineProcessed = true; + } + } + /* XML comment start */ + else if (lineLen >= pos + 4 && line[pos] == '<' && line[pos + 1] == '!' && + line[pos + 2] == '-' && line[pos + 3] == '-') + { + if (strstr ((const char *)(line + pos + 4), "-->") == NULL) + inComment = true; + lineProcessed = true; + } + /* XML comment end */ + else if (inComment && strstr ((const char *)(line + pos), "-->")) + { + inComment = false; + lineProcessed = true; + } + + /* code block or comment */ + if (inCodeChar || inComment) + lineProcessed = true; + + /* code block using indent */ + else if (indented) + lineProcessed = true; + + /* if it's a title underline, or a delimited block marking character */ + else if (line[pos] == '=' || line[pos] == '-' || line[pos] == '#' || line[pos] == '>') + { + int nSame; + for (nSame = 1; line[nSame] == line[pos]; ++nSame); + + /* quote */ + if (line[pos] == '>') + ; /* just to make sure lineProcessed = true so it won't be in a heading */ + /* is it a two line title */ + else if (line[pos] == '=' || line[pos] == '-') + { + char marker[2] = { line[pos], '\0' }; + int kind = line[pos] == '=' ? K_CHAPTER : K_SECTION; + bool whitespaceTerminated = true; + + for (int i = pos + nSame; i < lineLen; i++) + { + if (!isspace (line[i])) + { + whitespaceTerminated = false; + break; + } + } + + vStringStripLeading (prevLine); + vStringStripTrailing (prevLine); + if (whitespaceTerminated && vStringLength (prevLine) > 0) + makeSectionMarkdownTag (prevLine, kind, marker); + } + /* otherwise is it a one line title */ + else if (line[pos] == '#' && nSame <= K_SECTION_COUNT && isspace (line[nSame])) + { + int kind = nSame - 1; + bool delimited = false; + vString *name = getHeading (kind, line, lineLen, &delimited); + if (vStringLength (name) > 0) + makeSectionMarkdownTag (name, kind, delimited ? "##" : "#"); + vStringDelete (name); + } + + lineProcessed = true; + } + + vStringClear (prevLine); + if (!lineProcessed) + { + getFootnoteMaybe ((const char *)line); + vStringCatS (prevLine, (const char*) line); + } + } + vStringDelete (prevLine); + vStringDelete (codeLang); + { + unsigned int line = (unsigned int)getInputLineNumber (); + nestingLevelsFree (nestingLevels); + } +} + +extern parserDefinition* MarkdownParser (void) +{ + parserDefinition* const def = parserNew ("Markdown"); + static const char *const extensions [] = { "md", "markdown", NULL }; + + def->enabled = true; + def->extensions = extensions; + def->useCork = CORK_QUEUE; + def->kindTable = MarkdownKinds; + def->kindCount = ARRAY_SIZE (MarkdownKinds); + def->fieldTable = MarkdownFields; + def->fieldCount = ARRAY_SIZE (MarkdownFields); + def->defaultScopeSeparator = "\"\""; + def->parser = findMarkdownTags; + + /* + * This setting (useMemoryStreamInput) is for running + * Yaml parser from YamlFrontMatter as subparser. + * YamlFrontMatter is run from FrontMatter as a gust parser. + * FrontMatter is run from Markdown as a guest parser. + * This stacked structure hits the limitation of the main + * part: subparser's requirement for memory based input stream + * is not propagated to the main part. + * + * TODO: instead of setting useMemoryStreamInput here, we + * should remove the limitation. + */ + def->useMemoryStreamInput = true; + + return def; +} diff --git a/ctags/parsers/markdown.h b/ctags/parsers/markdown.h new file mode 100644 index 0000000000..2af2372aca --- /dev/null +++ b/ctags/parsers/markdown.h @@ -0,0 +1,29 @@ +/* +* Copyright (c) 2022, Masatake YAMATO +* +* This source code is released for free distribution under the terms of the +* GNU General Public License version 2 or (at your option) any later version. +* +* The interface for subparsers of Markdown +*/ +#ifndef CTAGS_PARSER_MARKDOWN_H +#define CTAGS_PARSER_MARKDOWN_H + +/* +* INCLUDE FILES +*/ +#include "general.h" /* must always come first */ + +#include "subparser.h" +#include "vstring.h" + +typedef struct sMarkdownSubparser markdownSubparser; + +struct sMarkdownSubparser { + subparser subparser; + bool (* extractLanguageForCodeBlock) (markdownSubparser *s, + const char *langMarker, + vString *langName); +}; + +#endif diff --git a/meson.build b/meson.build index bbbaa7b33c..40cb346e35 100644 --- a/meson.build +++ b/meson.build @@ -636,7 +636,6 @@ ctags = static_library('ctags', 'ctags/parsers/geany_fortran.c', 'ctags/parsers/geany_lcpp.c', 'ctags/parsers/geany_lcpp.h', - 'ctags/parsers/geany_markdown.c', 'ctags/parsers/geany_matlab.c', 'ctags/parsers/geany_tcl.c', 'ctags/parsers/geany_tex.c', @@ -653,6 +652,8 @@ ctags = static_library('ctags', 'ctags/parsers/lua.c', 'ctags/parsers/make.c', 'ctags/parsers/make.h', + 'ctags/parsers/markdown.c', + 'ctags/parsers/markdown.h', 'ctags/parsers/nsis.c', 'ctags/parsers/objc.c', 'ctags/parsers/pascal.c', diff --git a/src/tagmanager/tm_parser.c b/src/tagmanager/tm_parser.c index a305674e2d..3ebb7c24c1 100644 --- a/src/tagmanager/tm_parser.c +++ b/src/tagmanager/tm_parser.c @@ -694,10 +694,21 @@ static TMParserMapGroup group_NSIS[] = { }; static TMParserMapEntry map_MARKDOWN[] = { - {'v', tm_tag_variable_t}, + {'c', tm_tag_namespace_t}, //chapter + {'s', tm_tag_member_t}, //section + {'S', tm_tag_macro_t}, //subsection + {'t', tm_tag_variable_t}, //subsubsection + {'T', tm_tag_struct_t}, //l4subsection + {'u', tm_tag_union_t}, //l5subsection + {'n', tm_tag_undef_t}, //footnote }; static TMParserMapGroup group_MARKDOWN[] = { - {_("Variables"), TM_ICON_VAR, tm_tag_variable_t}, + {_("Chapters"), TM_ICON_NONE, tm_tag_namespace_t}, + {_("Sections"), TM_ICON_NONE, tm_tag_member_t}, + {_("Subsections"), TM_ICON_NONE, tm_tag_macro_t}, + {_("Subsubsections"), TM_ICON_NONE, tm_tag_variable_t}, + {_("Level 4 sections"), TM_ICON_NONE, tm_tag_struct_t}, + {_("Level 5 sections"), TM_ICON_NONE, tm_tag_union_t}, }; static TMParserMapEntry map_TXT2TAGS[] = { @@ -1435,6 +1446,7 @@ const gchar *tm_parser_scope_separator(TMParserType lang) case TM_PARSER_ZEPHIR: return "::"; + case TM_PARSER_MARKDOWN: case TM_PARSER_TXT2TAGS: return "\"\""; @@ -1455,10 +1467,11 @@ const gchar *tm_parser_scope_separator_printable(TMParserType lang) { switch (lang) { - case TM_PARSER_TXT2TAGS: case TM_PARSER_ASCIIDOC: case TM_PARSER_CONF: + case TM_PARSER_MARKDOWN: case TM_PARSER_REST: + case TM_PARSER_TXT2TAGS: return " > "; default: @@ -1485,6 +1498,7 @@ gboolean tm_parser_has_full_scope(TMParserType lang) case TM_PARSER_JAVASCRIPT: case TM_PARSER_JSON: case TM_PARSER_LUA: + case TM_PARSER_MARKDOWN: case TM_PARSER_PHP: case TM_PARSER_POWERSHELL: case TM_PARSER_PYTHON: diff --git a/tests/ctags/simple.md.tags b/tests/ctags/simple.md.tags index e7d7aa9962..839b3f6cb5 100644 --- a/tests/ctags/simple.md.tags +++ b/tests/ctags/simple.md.tags @@ -1,28 +1,28 @@ # format=tagmanager -# aÌ16384Ö0 -# g #Ì16384Ö0 -# h ##Ì16384Ö0 -## bÌ16384Ö0 -## i #Ì16384Ö0 -## j ##Ì16384Ö0 -## k ###Ì16384Ö0 -### cÌ16384Ö0 -### l #Ì16384Ö0 -### m ##Ì16384Ö0 -### n ###Ì16384Ö0 -### o ###Ì16384Ö0 -#### dÌ16384Ö0 -#### p #Ì16384Ö0 -#### q #####Ì16384Ö0 -##### eÌ16384Ö0 -##### r #Ì16384Ö0 -##### s ######Ì16384Ö0 -###### fÌ16384Ö0 -###### t #Ì16384Ö0 -###### u #######Ì16384Ö0 -AÌ16384Ö0 -BÌ16384Ö0 -CÌ16384Ö0 -DÌ16384Ö0 -EÌ16384Ö0 -FÌ16384Ö0 +AÌ256Ö0 +BÌ256Ö0 +CÌ256Ö0 +DÌ64ÎCÖ0 +EÌ64ÎCÖ0 +FÌ64ÎCÖ0 +aÌ256Ö0 +bÌ64ÎaÖ0 +cÌ65536Îa""bÖ0 +dÌ16384Îa""b""cÖ0 +eÌ2048Îa""b""c""dÖ0 +fÌ8192Îa""b""c""d""eÖ0 +gÌ256Ö0 +hÌ256Ö0 +iÌ64ÎhÖ0 +jÌ64ÎhÖ0 +kÌ64ÎhÖ0 +lÌ65536Îh""kÖ0 +mÌ65536Îh""kÖ0 +nÌ65536Îh""kÖ0 +oÌ65536Îh""kÖ0 +pÌ16384Îh""k""oÖ0 +qÌ16384Îh""k""oÖ0 +rÌ2048Îh""k""o""qÖ0 +sÌ2048Îh""k""o""qÖ0 +tÌ8192Îh""k""o""q""sÖ0 +uÌ8192Îh""k""o""q""sÖ0