From e1ae976a7284d59f24b387825e967a2e6794a330 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ji=C5=99=C3=AD=20Techet?= <techet@gmail.com>
Date: Tue, 7 Dec 2021 18:34:57 +0100
Subject: [PATCH 1/4] Add ctags 'readtags' library to allow us parse ctags
 files

---
 ctags/Makefile.am            |    2 +
 ctags/libreadtags/readtags.c | 1310 ++++++++++++++++++++++++++++++++++
 ctags/libreadtags/readtags.h |  295 ++++++++
 meson.build                  |    4 +-
 4 files changed, 1610 insertions(+), 1 deletion(-)
 create mode 100644 ctags/libreadtags/readtags.c
 create mode 100644 ctags/libreadtags/readtags.h

diff --git a/ctags/Makefile.am b/ctags/Makefile.am
index fe980d27f1..e12bd1b3da 100644
--- a/ctags/Makefile.am
+++ b/ctags/Makefile.am
@@ -112,6 +112,8 @@ libctags_la_SOURCES = \
 	dsl/optscript.h \
 	dsl/es.c \
 	dsl/es.h \
+	libreadtags/readtags.c \
+	libreadtags/readtags.h \
 	main/args.c \
 	main/args_p.h \
 	main/colprint.c \
diff --git a/ctags/libreadtags/readtags.c b/ctags/libreadtags/readtags.c
new file mode 100644
index 0000000000..6f86ba27e1
--- /dev/null
+++ b/ctags/libreadtags/readtags.c
@@ -0,0 +1,1310 @@
+/*
+*   Copyright (c) 1996-2003, Darren Hiebert
+*
+*   This source code is released into the public domain.
+*
+*   This module contains functions for reading tag files.
+*/
+
+/*
+*   INCLUDE FILES
+*/
+#include <stdlib.h>
+#include <string.h>
+#include <ctype.h>
+#include <stdio.h>
+#include <errno.h>
+#include <sys/types.h>  /* to declare off_t */
+
+#include "readtags.h"
+
+/*
+*   MACROS
+*/
+#define TAB '\t'
+
+
+/*
+*   DATA DECLARATIONS
+*/
+typedef struct {
+	size_t size;
+	char *buffer;
+} vstring;
+
+/* Information about current tag file */
+struct sTagFile {
+		/* has the file been opened and this structure initialized? */
+	short initialized;
+		/* format of tag file */
+	short format;
+		/* how is the tag file sorted? */
+	tagSortType sortMethod;
+		/* pointer to file structure */
+	FILE* fp;
+		/* file position of first character of `line' */
+	off_t pos;
+		/* size of tag file in seekable positions */
+	off_t size;
+		/* last line read */
+	vstring line;
+		/* name of tag in last line read */
+	vstring name;
+		/* defines tag search state */
+	struct {
+				/* file position of last match for tag */
+			off_t pos;
+				/* name of tag last searched for */
+			char *name;
+				/* length of name for partial matches */
+			size_t nameLength;
+				/* performing partial match */
+			short partial;
+				/* ignoring case */
+			short ignorecase;
+	} search;
+		/* miscellaneous extension fields */
+	struct {
+				/* number of entries in `list' */
+			unsigned short max;
+				/* list of key value pairs */
+			tagExtensionField *list;
+	} fields;
+		/* buffers to be freed at close */
+	struct {
+			/* name of program author */
+		char *author;
+			/* name of program */
+		char *name;
+			/* URL of distribution */
+		char *url;
+			/* program version */
+		char *version;
+	} program;
+		/* 0 (initial state set by calloc), errno value,
+		 * or tagErrno typed value */
+	int err;
+};
+
+/*
+*   DATA DEFINITIONS
+*/
+static const char *const EmptyString = "";
+static const char *const PseudoTagPrefix = "!_";
+static const size_t PseudoTagPrefixLength = 2;
+
+/*
+*   FUNCTION DEFINITIONS
+*/
+
+/* Converts a hexadecimal digit to its value */
+static int xdigitValue (char digit)
+{
+	if (digit >= '0' && digit <= '9')
+		return digit - '0';
+	else if (digit >= 'a' && digit <= 'f')
+		return 10 + digit - 'a';
+	else if (digit >= 'A' && digit <= 'F')
+		return 10 + digit - 'A';
+	else
+		return 0;
+}
+
+/*
+ * Reads the first character from the string, possibly un-escaping it, and
+ * advances *s to the start of the next character.
+ */
+static int readTagCharacter (const char **s)
+{
+	int c = **(const unsigned char **)s;
+
+	(*s)++;
+
+	if (c == '\\')
+	{
+		switch (**s)
+		{
+			case 't': c = '\t'; (*s)++; break;
+			case 'r': c = '\r'; (*s)++; break;
+			case 'n': c = '\n'; (*s)++; break;
+			case '\\': c = '\\'; (*s)++; break;
+			/* Universal-CTags extensions */
+			case 'a': c = '\a'; (*s)++; break;
+			case 'b': c = '\b'; (*s)++; break;
+			case 'v': c = '\v'; (*s)++; break;
+			case 'f': c = '\f'; (*s)++; break;
+			case 'x':
+				if (isxdigit ((*s)[1]) && isxdigit ((*s)[2]))
+				{
+					int val = (xdigitValue ((*s)[1]) << 4) | xdigitValue ((*s)[2]);
+					if (val < 0x80)
+					{
+						(*s) += 3;
+						c = val;
+					}
+				}
+				break;
+		}
+	}
+
+	return c;
+}
+
+/*
+ * Compare two strings, ignoring case.
+ * Return 0 for match, < 0 for smaller, > 0 for bigger
+ * Make sure case is folded to uppercase in comparison (like for 'sort -f')
+ * This makes a difference when one of the chars lies between upper and lower
+ * ie. one of the chars [ \ ] ^ _ ` for ascii. (The '_' in particular !)
+ */
+static int taguppercmp (const char *s1, const char *s2)
+{
+	int result;
+	int c1, c2;
+	do
+	{
+		c1 = (unsigned char)*s1++;
+		c2 = readTagCharacter (&s2);
+
+		result = toupper (c1) - toupper (c2);
+	} while (result == 0  &&  c1 != '\0'  &&  c2 != '\0');
+	return result;
+}
+
+static int tagnuppercmp (const char *s1, const char *s2, size_t n)
+{
+	int result;
+	int c1, c2;
+	do
+	{
+		c1 = (unsigned char)*s1++;
+		c2 = readTagCharacter (&s2);
+
+		result = toupper (c1) - toupper (c2);
+	} while (result == 0  &&  --n > 0  &&  c1 != '\0'  &&  c2 != '\0');
+	return result;
+}
+
+static int tagcmp (const char *s1, const char *s2)
+{
+	int result;
+	int c1, c2;
+	do
+	{
+		c1 = (unsigned char)*s1++;
+		c2 = readTagCharacter (&s2);
+
+		result = c1 - c2;
+	} while (result == 0  &&  c1 != '\0'  &&  c2 != '\0');
+	return result;
+}
+
+static int tagncmp (const char *s1, const char *s2, size_t n)
+{
+	int result;
+	int c1, c2;
+	do
+	{
+		c1 = *s1++;
+		c2 = readTagCharacter (&s2);
+
+		result = c1 - c2;
+	} while (result == 0  &&  --n > 0  &&  c1 != '\0'  &&  c2 != '\0');
+	return result;
+}
+
+static tagResult growString (vstring *s)
+{
+	tagResult result = TagFailure;
+	size_t newLength;
+	char *newLine;
+	if (s->size == 0)
+	{
+		newLength = 128;
+		newLine = (char*) malloc (newLength);
+		if (newLine)
+			*newLine = '\0';
+	}
+	else
+	{
+		newLength = 2 * s->size;
+		newLine = (char*) realloc (s->buffer, newLength);
+	}
+	if (newLine == NULL)
+		perror ("string too large");
+	else
+	{
+		s->buffer = newLine;
+		s->size = newLength;
+		result = TagSuccess;
+	}
+	return result;
+}
+
+/* Copy name of tag out of tag line */
+static tagResult copyName (tagFile *const file)
+{
+	size_t length;
+	const char *end = strchr (file->line.buffer, '\t');
+	if (end == NULL)
+	{
+		end = strchr (file->line.buffer, '\n');
+		if (end == NULL)
+			end = strchr (file->line.buffer, '\r');
+	}
+	if (end != NULL)
+		length = end - file->line.buffer;
+	else
+		length = strlen (file->line.buffer);
+	while (length >= file->name.size)
+	{
+		if (growString (&file->name) != TagSuccess)
+			return TagFailure;
+	}
+	strncpy (file->name.buffer, file->line.buffer, length);
+	file->name.buffer [length] = '\0';
+	return TagSuccess;
+}
+
+/* Return 1 on success.
+ * Return 0 on failure or EOF.
+ * errno is set to *err unless EOF.
+ */
+static int readTagLineRaw (tagFile *const file, int *err)
+{
+	int result = 1;
+	int reReadLine;
+
+	/*  If reading the line places any character other than a null or a
+	 *  newline at the last character position in the buffer (one less than
+	 *  the buffer size), then we must resize the buffer and reattempt to read
+	 *  the line.
+	 */
+	do
+	{
+		char *const pLastChar = file->line.buffer + file->line.size - 2;
+		char *line;
+
+		file->pos = ftell (file->fp);
+		if (file->pos < 0)
+		{
+			*err = errno;
+			result = 0;
+			break;
+		}
+		reReadLine = 0;
+		*pLastChar = '\0';
+		line = fgets (file->line.buffer, (int) file->line.size, file->fp);
+		if (line == NULL)
+		{
+			/* read error */
+			*err = 0;
+			if (! feof (file->fp))
+				*err = errno;
+			result = 0;
+		}
+		else if (*pLastChar != '\0'  &&
+					*pLastChar != '\n'  &&  *pLastChar != '\r')
+		{
+			/*  buffer overflow */
+			if (growString (&file->line) != TagSuccess)
+			{
+				*err = ENOMEM;
+				result = 0;
+			}
+			if (fseek (file->fp, file->pos, SEEK_SET) < 0)
+			{
+				*err = errno;
+				result = 0;
+			}
+			reReadLine = 1;
+		}
+		else
+		{
+			size_t i = strlen (file->line.buffer);
+			while (i > 0  &&
+				   (file->line.buffer [i - 1] == '\n' || file->line.buffer [i - 1] == '\r'))
+			{
+				file->line.buffer [i - 1] = '\0';
+				--i;
+			}
+		}
+	} while (reReadLine  &&  result);
+	if (result)
+	{
+		if (copyName (file) != TagSuccess)
+		{
+			*err = ENOMEM;
+			result = 0;
+		}
+	}
+	return result;
+}
+
+/* Return 1 on success.
+ * Return 0 on failure or EOF.
+ * errno is set to *err unless EOF.
+ */
+static int readTagLine (tagFile *const file, int *err)
+{
+	int result;
+	do
+	{
+		result = readTagLineRaw (file, err);
+	} while (result && *file->name.buffer == '\0');
+	return result;
+}
+
+static tagResult growFields (tagFile *const file)
+{
+	tagResult result = TagFailure;
+	unsigned short newCount = (unsigned short) 2 * file->fields.max;
+	tagExtensionField *newFields = (tagExtensionField*)
+			realloc (file->fields.list, newCount * sizeof (tagExtensionField));
+	if (newFields == NULL)
+		perror ("too many extension fields");
+	else
+	{
+		file->fields.list = newFields;
+		file->fields.max = newCount;
+		result = TagSuccess;
+	}
+	return result;
+}
+
+static tagResult parseExtensionFields (tagFile *const file, tagEntry *const entry,
+									   char *const string, int *err)
+{
+	char *p = string;
+	char *tail = string + (string? strlen(string):0);
+	size_t q_len;
+
+	while (p != NULL  &&  *p != '\0')
+	{
+		while (*p == TAB)
+			*p++ = '\0';
+		if (*p != '\0')
+		{
+			char *colon;
+			char *field = p;
+			p = strchr (p, TAB);
+			if (p != NULL)
+				*p++ = '\0';
+			colon = strchr (field, ':');
+			if (colon == NULL)
+				entry->kind = field;
+			else
+			{
+				const char *key = field;
+				char *q = colon + 1;
+				const char *value = q;
+				const int key_len = colon - key;
+				*colon = '\0';
+
+				q_len = tail - q;
+
+				/* Unescaping */
+				while (*q != '\0')
+				{
+					const char *next = q;
+					int ch = readTagCharacter (&next);
+					size_t skip = next - q;
+
+					*q = (char) ch;
+					q++;
+					q_len -= skip;
+					if (skip > 1)
+					{
+						/* + 1 is for moving the area including the last '\0'. */
+						memmove (q, next, q_len + 1);
+						if (p)
+							p -= skip - 1;
+						if (tail != string)
+							tail -= skip - 1;
+					}
+				}
+
+				if (key_len == 4)
+				{
+					if (memcmp (key, "kind", 4) == 0)
+						entry->kind = value;
+					else if (memcmp (key, "file", 4) == 0)
+						entry->fileScope = 1;
+					else if (memcmp (key, "line", 4) == 0)
+					{
+						char *endptr = NULL;
+						long m = strtol (value, &endptr, 10);
+						if (*endptr != '\0' || m < 0)
+						{
+							*err = TagErrnoUnexpectedLineno;
+							return TagFailure;
+						}
+						entry->address.lineNumber = m;
+					}
+					else
+						goto normalField;
+				}
+				else
+				{
+				normalField:
+					if (entry->fields.count == file->fields.max)
+					{
+						if (growFields (file) != TagSuccess)
+						{
+							*err = ENOMEM;
+							return TagFailure;
+						}
+					}
+					file->fields.list [entry->fields.count].key = key;
+					file->fields.list [entry->fields.count].value = value;
+					++entry->fields.count;
+				}
+			}
+		}
+	}
+	return TagSuccess;
+}
+
+static int isOdd (unsigned int i)
+{
+	return  (i % 2);
+}
+
+static unsigned int countContinuousBackslashesBackward(const char *from,
+						     const char *till)
+{
+	unsigned int counter = 0;
+
+	for (; from > till; from--)
+	{
+		if (*from == '\\')
+			counter++;
+		else
+			break;
+	}
+	return counter;
+}
+
+static tagResult parseTagLine (tagFile *file, tagEntry *const entry, int *err)
+{
+	int i;
+	char *p = file->line.buffer;
+	size_t p_len = strlen (p);
+	char *tab = strchr (p, TAB);
+
+	memset(entry, 0, sizeof(*entry));
+
+	entry->name = p;
+	if (tab != NULL)
+	{
+		*tab = '\0';
+	}
+
+	/* When unescaping, the input string becomes shorter.
+	 * e.g. \t occupies two bytes on the tag file.
+	 * It is converted to 0x9 and occupies one byte.
+	 * memmove called here for shortening the line
+	 * buffer. */
+	while (*p != '\0')
+	{
+		const char *next = p;
+		int ch = readTagCharacter (&next);
+		size_t skip = next - p;
+
+		*p = (char) ch;
+		p++;
+		p_len -= skip;
+		if (skip > 1)
+		{
+			/* + 1 is for moving the area including the last '\0'. */
+			memmove (p, next, p_len + 1);
+			if (tab)
+				tab -= skip - 1;
+		}
+	}
+
+	if (tab != NULL)
+	{
+		p = tab + 1;
+		entry->file = p;
+		tab = strchr (p, TAB);
+		if (tab != NULL)
+		{
+			int fieldsPresent;
+			int combinedPattern;
+			*tab = '\0';
+			p = tab + 1;
+			if (*p == '/'  ||  *p == '?')
+			{
+				/* parse pattern */
+				int delimiter = *(unsigned char*) p;
+				entry->address.lineNumber = 0;
+				entry->address.pattern = p;
+				do
+				{
+					p = strchr (p + 1, delimiter);
+				} while (p != NULL
+					 &&  isOdd (countContinuousBackslashesBackward (p - 1,
+											entry->address.pattern)));
+
+				if (p == NULL)
+				{
+					/* TODO: invalid pattern */
+				}
+				else
+					++p;
+			}
+			else if (isdigit ((int) *(unsigned char*) p))
+			{
+				/* parse line number */
+				entry->address.pattern = p;
+				entry->address.lineNumber = atol (p);
+				while (isdigit ((int) *(unsigned char*) p))
+					++p;
+				if (p)
+				{
+					combinedPattern = (strncmp (p, ";/", 2) == 0) ||
+											(strncmp (p, ";?", 2) == 0);
+					if (combinedPattern)
+					{
+						++p;
+						/* parse pattern */
+						int delimiter = *(unsigned char*) p;
+						do
+						{
+							p = strchr (p + 1, delimiter);
+						} while (p != NULL
+							 &&  isOdd (countContinuousBackslashesBackward (p - 1,
+													entry->address.pattern)));
+
+						if (p == NULL)
+						{
+							/* TODO: invalid pattern */
+						}
+						else
+							++p;
+					}
+				}
+			}
+			else
+			{
+				/* TODO: invalid pattern */
+			}
+
+			if (p)
+			{
+				fieldsPresent = (strncmp (p, ";\"", 2) == 0);
+				*p = '\0';
+				if (fieldsPresent)
+				{
+					if (parseExtensionFields (file, entry, p + 2, err) != TagSuccess)
+						return TagFailure;
+				}
+			}
+		}
+	}
+	if (entry->fields.count > 0)
+		entry->fields.list = file->fields.list;
+	for (i = entry->fields.count  ;  i < file->fields.max  ;  ++i)
+	{
+		file->fields.list [i].key = NULL;
+		file->fields.list [i].value = NULL;
+	}
+	return TagSuccess;
+}
+
+static char *duplicate (const char *str)
+{
+	char *result = NULL;
+	if (str != NULL)
+	{
+		result = strdup (str);
+		if (result == NULL)
+			perror (NULL);
+	}
+	return result;
+}
+
+static int isPseudoTagLine (const char *buffer)
+{
+	return (strncmp (buffer, PseudoTagPrefix, PseudoTagPrefixLength) == 0);
+}
+
+static tagResult readPseudoTags (tagFile *const file, tagFileInfo *const info)
+{
+	fpos_t startOfLine;
+	int err = 0;
+	tagResult result = TagSuccess;
+	const size_t prefixLength = strlen (PseudoTagPrefix);
+
+	info->file.format     = 1;
+	info->file.sort       = TAG_UNSORTED;
+	info->program.author  = NULL;
+	info->program.name    = NULL;
+	info->program.url     = NULL;
+	info->program.version = NULL;
+
+	while (1)
+	{
+		if (fgetpos (file->fp, &startOfLine) < 0)
+		{
+			err = errno;
+			break;
+		}
+		if (! readTagLine (file, &err))
+			break;
+		if (!isPseudoTagLine (file->line.buffer))
+			break;
+		else
+		{
+			tagEntry entry;
+			const char *key, *value;
+			if (parseTagLine (file, &entry, &err) != TagSuccess)
+				break;
+			key = entry.name + prefixLength;
+			value = entry.file;
+			if (strcmp (key, "TAG_FILE_SORTED") == 0)
+			{
+				char *endptr = NULL;
+				long m = strtol (value, &endptr, 10);
+				if (*endptr != '\0' || m < 0 || m > 2)
+				{
+					err = TagErrnoUnexpectedSortedMethod;
+					break;
+				}
+				file->sortMethod = (tagSortType) m;
+			}
+			else if (strcmp (key, "TAG_FILE_FORMAT") == 0)
+			{
+				char *endptr = NULL;
+				long m = strtol (value, &endptr, 10);
+				if (*endptr != '\0' || m < 1 || m > 2)
+				{
+					err = TagErrnoUnexpectedFormat;
+					break;
+				}
+				file->format = (short) m;
+			}
+			else if (strcmp (key, "TAG_PROGRAM_AUTHOR") == 0)
+			{
+				file->program.author = duplicate (value);
+				if (value && file->program.author == NULL)
+				{
+					err = ENOMEM;
+					break;
+				}
+			}
+			else if (strcmp (key, "TAG_PROGRAM_NAME") == 0)
+			{
+				file->program.name = duplicate (value);
+				if (value && file->program.name == NULL)
+				{
+					err = ENOMEM;
+					break;
+				}
+			}
+			else if (strcmp (key, "TAG_PROGRAM_URL") == 0)
+			{
+				file->program.url = duplicate (value);
+				if (value && file->program.url == NULL)
+				{
+					err = ENOMEM;
+					break;
+				}
+			}
+			else if (strcmp (key, "TAG_PROGRAM_VERSION") == 0)
+			{
+				file->program.version = duplicate (value);
+				if (value && file->program.version == NULL)
+				{
+					err = ENOMEM;
+					break;
+				}
+			}
+
+			info->file.format     = file->format;
+			info->file.sort       = file->sortMethod;
+			info->program.author  = file->program.author;
+			info->program.name    = file->program.name;
+			info->program.url     = file->program.url;
+			info->program.version = file->program.version;
+		}
+	}
+	if (fsetpos (file->fp, &startOfLine) < 0)
+		err = errno;
+
+	info->status.error_number = err;
+	if (err)
+		result = TagFailure;
+	return result;
+}
+
+static int doesFilePointPseudoTag (tagFile *const file, void *unused)
+{
+	return isPseudoTagLine (file->name.buffer);
+}
+
+static tagResult gotoFirstLogicalTag (tagFile *const file)
+{
+	fpos_t startOfLine;
+
+	if (fseek(file->fp, 0L, SEEK_SET) == -1)
+	{
+		file->err = errno;
+		return TagFailure;
+	}
+
+	while (1)
+	{
+		if (fgetpos (file->fp, &startOfLine) < 0)
+		{
+			file->err = errno;
+			return TagFailure;
+		}
+		if (! readTagLine (file, &file->err))
+		{
+			if (file->err)
+				return TagFailure;
+			break;
+		}
+		if (!isPseudoTagLine (file->line.buffer))
+			break;
+	}
+	if (fsetpos (file->fp, &startOfLine) < 0)
+	{
+		file->err = errno;
+		return TagFailure;
+	}
+	return TagSuccess;
+}
+
+static tagFile *initialize (const char *const filePath, tagFileInfo *const info)
+{
+	tagFile *result = (tagFile*) calloc ((size_t) 1, sizeof (tagFile));
+
+	if (result == NULL)
+	{
+		info->status.opened = 0;
+		info->status.error_number = ENOMEM;
+		return NULL;
+	}
+
+	if (growString (&result->line) != TagSuccess)
+		goto mem_error;
+	if (growString (&result->name) != TagSuccess)
+		goto mem_error;
+	result->fields.max = 20;
+	result->fields.list = (tagExtensionField*) calloc (
+		result->fields.max, sizeof (tagExtensionField));
+	if (result->fields.list == NULL)
+		goto mem_error;
+	result->fp = fopen (filePath, "rb");
+	if (result->fp == NULL)
+	{
+		info->status.error_number = errno;
+		goto file_error;
+	}
+	else
+	{
+		if (fseek (result->fp, 0, SEEK_END) == -1)
+		{
+			info->status.error_number = errno;
+			goto file_error;
+		}
+		result->size = ftell (result->fp);
+		if (result->size == -1)
+		{
+			info->status.error_number = errno;
+			goto file_error;
+		}
+		if (fseek(result->fp, 0L, SEEK_SET) == -1)
+		{
+			info->status.error_number = errno;
+			goto file_error;
+		}
+
+		if (readPseudoTags (result, info) == TagFailure)
+			goto file_error;
+
+		info->status.opened = 1;
+		result->initialized = 1;
+	}
+	return result;
+ mem_error:
+	info->status.error_number = ENOMEM;
+ file_error:
+	free (result->line.buffer);
+	free (result->name.buffer);
+	free (result->fields.list);
+	if (result->fp)
+		fclose (result->fp);
+	free (result);
+	info->status.opened = 0;
+	return NULL;
+}
+
+static void terminate (tagFile *const file)
+{
+	fclose (file->fp);
+
+	free (file->line.buffer);
+	free (file->name.buffer);
+	free (file->fields.list);
+
+	if (file->program.author != NULL)
+		free (file->program.author);
+	if (file->program.name != NULL)
+		free (file->program.name);
+	if (file->program.url != NULL)
+		free (file->program.url);
+	if (file->program.version != NULL)
+		free (file->program.version);
+	if (file->search.name != NULL)
+		free (file->search.name);
+
+	memset (file, 0, sizeof (tagFile));
+
+	free (file);
+}
+
+static tagResult readNext (tagFile *const file, tagEntry *const entry)
+{
+	tagResult result;
+
+	if (file == NULL)
+		return TagFailure;
+
+	if (! file->initialized)
+	{
+		file->err = TagErrnoInvalidArgument;
+		return TagFailure;
+	}
+
+	if (! readTagLine (file, &file->err))
+		return TagFailure;
+
+	result = (entry != NULL)
+		? parseTagLine (file, entry, &file->err)
+		: TagSuccess;
+
+	return result;
+}
+
+static const char *readFieldValue (
+	const tagEntry *const entry, const char *const key)
+{
+	const char *result = NULL;
+	int i;
+	if (strcmp (key, "kind") == 0)
+		result = entry->kind;
+	else if (strcmp (key, "file") == 0)
+		result = EmptyString;
+	else for (i = 0  ;  i < entry->fields.count  &&  result == NULL  ;  ++i)
+		if (strcmp (entry->fields.list [i].key, key) == 0)
+			result = entry->fields.list [i].value;
+	return result;
+}
+
+static int readTagLineSeek (tagFile *const file, const off_t pos)
+{
+	if (fseek (file->fp, pos, SEEK_SET) < 0)
+	{
+		file->err = errno;
+		return 0;
+	}
+
+	/* read probable partial line */
+	if (!readTagLine (file, &file->err))
+		return 0;
+
+	/* read complete line */
+	if (pos > 0)
+		return readTagLine (file, &file->err);
+
+	return 1;
+}
+
+static int nameComparison (tagFile *const file)
+{
+	int result;
+	if (file->search.ignorecase)
+	{
+		if (file->search.partial)
+			result = tagnuppercmp (file->search.name, file->name.buffer,
+					file->search.nameLength);
+		else
+			result = taguppercmp (file->search.name, file->name.buffer);
+	}
+	else
+	{
+		if (file->search.partial)
+			result = tagncmp (file->search.name, file->name.buffer,
+					file->search.nameLength);
+		else
+			result = tagcmp (file->search.name, file->name.buffer);
+	}
+	return result;
+}
+
+static tagResult findFirstNonMatchBefore (tagFile *const file)
+{
+#define JUMP_BACK 512
+	int more_lines;
+	int comp;
+	off_t start = file->pos;
+	off_t pos = start;
+	do
+	{
+		if (pos < (off_t) JUMP_BACK)
+			pos = 0;
+		else
+			pos = pos - JUMP_BACK;
+		more_lines = readTagLineSeek (file, pos);
+		if (more_lines == 0 && file->err)
+			return TagFailure;
+		comp = nameComparison (file);
+	} while (more_lines  &&  comp == 0  &&  pos > 0  &&  pos < start);
+	return TagSuccess;
+}
+
+static tagResult findFirstMatchBefore (tagFile *const file)
+{
+	tagResult result = TagFailure;
+	int more_lines;
+	off_t start = file->pos;
+	if (findFirstNonMatchBefore (file) != TagSuccess)
+		return TagFailure;
+	do
+	{
+		more_lines = readTagLine (file, &file->err);
+		if (more_lines == 0 && file->err)
+			return TagFailure;
+		if (nameComparison (file) == 0)
+			result = TagSuccess;
+	} while (more_lines  &&  result != TagSuccess  &&  file->pos < start);
+	return result;
+}
+
+static tagResult findBinary (tagFile *const file)
+{
+	tagResult result = TagFailure;
+	off_t lower_limit = 0;
+	off_t upper_limit = file->size;
+	off_t last_pos = 0;
+	off_t pos = upper_limit / 2;
+	while (result != TagSuccess)
+	{
+		if (! readTagLineSeek (file, pos))
+		{
+			if (file->err)
+				break;
+			/* in case we fell off end of file */
+			result = findFirstMatchBefore (file);
+			break;
+		}
+		else if (pos == last_pos)
+		{
+			/* prevent infinite loop if we backed up to beginning of file */
+			break;
+		}
+		else
+		{
+			const int comp = nameComparison (file);
+			last_pos = pos;
+			if (comp < 0)
+			{
+				upper_limit = pos;
+				pos = lower_limit + ((upper_limit - lower_limit) / 2);
+			}
+			else if (comp > 0)
+			{
+				lower_limit = pos;
+				pos = lower_limit + ((upper_limit - lower_limit) / 2);
+			}
+			else if (pos == 0)
+				result = TagSuccess;
+			else
+			{
+				result = findFirstMatchBefore (file);
+				if (result != TagSuccess && file->err)
+					break;
+			}
+		}
+	}
+	return result;
+}
+
+static tagResult findSequentialFull (tagFile *const file,
+									 int (* isAcceptable) (tagFile *const, void *),
+									 void *data)
+{
+	if (file == NULL)
+		return TagFailure;
+
+	if (!file->initialized || file->err)
+	{
+		file->err = TagErrnoInvalidArgument;
+		return TagFailure;
+	}
+
+	tagResult result = TagFailure;
+	while (result == TagFailure)
+	{
+		if (! readTagLine (file, &file->err))
+			break;
+		if (isAcceptable (file, data))
+			result = TagSuccess;
+	}
+	return result;
+}
+
+static int nameAcceptable (tagFile *const file, void *unused)
+{
+	return (nameComparison (file) == 0);
+}
+
+static tagResult findSequential (tagFile *const file)
+{
+	return findSequentialFull (file, nameAcceptable, NULL);
+}
+
+static tagResult find (tagFile *const file, tagEntry *const entry,
+					   const char *const name, const int options)
+{
+	tagResult result;
+	if (file->search.name != NULL)
+		free (file->search.name);
+	file->search.name = duplicate (name);
+	if (file->search.name == NULL)
+	{
+		file->err = ENOMEM;
+		return TagFailure;
+	}
+	file->search.nameLength = strlen (name);
+	file->search.partial = (options & TAG_PARTIALMATCH) != 0;
+	file->search.ignorecase = (options & TAG_IGNORECASE) != 0;
+	if (fseek (file->fp, 0, SEEK_END) < 0)
+	{
+		file->err = errno;
+		return TagFailure;
+	}
+	file->size = ftell (file->fp);
+	if (file->size == -1)
+	{
+		file->err = errno;
+		return TagFailure;
+	}
+	if (fseek(file->fp, 0L, SEEK_SET) == -1)
+	{
+		file->err = errno;
+		return TagFailure;
+	}
+	if ((file->sortMethod == TAG_SORTED      && !file->search.ignorecase) ||
+		(file->sortMethod == TAG_FOLDSORTED  &&  file->search.ignorecase))
+	{
+		result = findBinary (file);
+		if (result == TagFailure && file->err)
+			return TagFailure;
+	}
+	else
+	{
+		result = findSequential (file);
+		if (result == TagFailure && file->err)
+			return TagFailure;
+	}
+
+	if (result != TagSuccess)
+		file->search.pos = file->size;
+	else
+	{
+		file->search.pos = file->pos;
+		result = (entry != NULL)
+			? parseTagLine (file, entry, &file->err)
+			: TagSuccess;
+	}
+	return result;
+}
+
+static tagResult findNextFull (tagFile *const file, tagEntry *const entry,
+							   int sorted,
+							   int (* isAcceptable) (tagFile *const, void *),
+							   void *data)
+{
+	tagResult result;
+	if (sorted)
+	{
+		result = tagsNext (file, entry);
+		if (result == TagSuccess  && !isAcceptable (file, data))
+			result = TagFailure;
+	}
+	else
+	{
+		result = findSequentialFull (file, isAcceptable, data);
+		if (result == TagSuccess  &&  entry != NULL)
+			result = parseTagLine (file, entry, &file->err);
+	}
+	return result;
+}
+
+static tagResult findNext (tagFile *const file, tagEntry *const entry)
+{
+	return findNextFull (file, entry,
+						 (file->sortMethod == TAG_SORTED      && !file->search.ignorecase) ||
+						 (file->sortMethod == TAG_FOLDSORTED  &&  file->search.ignorecase),
+						 nameAcceptable, NULL);
+}
+
+static tagResult findPseudoTag (tagFile *const file, int rewindBeforeFinding, tagEntry *const entry)
+{
+	if (file == NULL)
+		return TagFailure;
+
+	if (!file->initialized || file->err)
+	{
+		file->err = TagErrnoInvalidArgument;
+		return TagFailure;
+	}
+
+	if (rewindBeforeFinding)
+	{
+		if (fseek(file->fp, 0L, SEEK_SET) == -1)
+		{
+			file->err = errno;
+			return TagFailure;
+		}
+	}
+	return findNextFull (file, entry,
+						 (file->sortMethod == TAG_SORTED || file->sortMethod == TAG_FOLDSORTED),
+						 doesFilePointPseudoTag,
+						 NULL);
+}
+
+
+/*
+*  EXTERNAL INTERFACE
+*/
+
+extern tagFile *tagsOpen (const char *const filePath, tagFileInfo *const info)
+{
+	tagFileInfo infoDummy;
+	return initialize (filePath, info? info: &infoDummy);
+}
+
+extern tagResult tagsSetSortType (tagFile *const file, const tagSortType type)
+{
+	if (file == NULL)
+		return TagFailure;
+
+	if (!file->initialized || file->err)
+	{
+		file->err = TagErrnoInvalidArgument;
+		return TagFailure;
+	}
+
+	switch (type)
+	{
+	case TAG_UNSORTED:
+	case TAG_SORTED:
+	case TAG_FOLDSORTED:
+		file->sortMethod = type;
+		return TagSuccess;
+	default:
+		file->err = TagErrnoUnexpectedSortedMethod;
+		return TagFailure;
+	}
+}
+
+extern tagResult tagsFirst (tagFile *const file, tagEntry *const entry)
+{
+	if (file == NULL)
+		return TagFailure;
+
+	if (!file->initialized || file->err)
+	{
+		file->err = TagErrnoInvalidArgument;
+		return TagFailure;
+	}
+
+	if (gotoFirstLogicalTag (file) != TagSuccess)
+		return TagFailure;
+	return readNext (file, entry);
+}
+
+extern tagResult tagsNext (tagFile *const file, tagEntry *const entry)
+{
+	if (file == NULL)
+		return TagFailure;
+
+	if (!file->initialized || file->err)
+	{
+		file->err = TagErrnoInvalidArgument;
+		return TagFailure;
+	}
+
+	return readNext (file, entry);
+}
+
+extern const char *tagsField (const tagEntry *const entry, const char *const key)
+{
+	const char *result = NULL;
+	if (entry != NULL)
+		result = readFieldValue (entry, key);
+	return result;
+}
+
+extern tagResult tagsFind (tagFile *const file, tagEntry *const entry,
+						   const char *const name, const int options)
+{
+	if (file == NULL)
+		return TagFailure;
+
+	if (!file->initialized || file->err)
+	{
+		file->err = TagErrnoInvalidArgument;
+		return TagFailure;
+	}
+
+	return find (file, entry, name, options);
+}
+
+extern tagResult tagsFindNext (tagFile *const file, tagEntry *const entry)
+{
+	if (file == NULL)
+		return TagFailure;
+
+	if (!file->initialized || file->err)
+	{
+		file->err = TagErrnoInvalidArgument;
+		return TagFailure;
+	}
+
+	return findNext (file, entry);
+}
+
+extern tagResult tagsFirstPseudoTag (tagFile *const file, tagEntry *const entry)
+{
+	return findPseudoTag (file, 1, entry);
+}
+
+extern tagResult tagsNextPseudoTag (tagFile *const file, tagEntry *const entry)
+{
+	return findPseudoTag (file, 0, entry);
+}
+
+extern tagResult tagsClose (tagFile *const file)
+{
+	tagResult result = TagFailure;
+	if (file != NULL  &&  file->initialized)
+	{
+		terminate (file);
+		result = TagSuccess;
+	}
+	return result;
+}
+
+extern int tagsGetErrno (tagFile *const file)
+{
+	if (file == NULL)
+		return TagErrnoInvalidArgument;
+	return file->err;
+}
diff --git a/ctags/libreadtags/readtags.h b/ctags/libreadtags/readtags.h
new file mode 100644
index 0000000000..866f4b049c
--- /dev/null
+++ b/ctags/libreadtags/readtags.h
@@ -0,0 +1,295 @@
+/*
+*   Copyright (c) 1996-2003, Darren Hiebert
+*
+*   This source code is released for the public domain.
+*
+*   This file defines the public interface for looking up tag entries in tag
+*   files.
+*
+*   The functions defined in this interface are intended to provide tag file
+*   support to a software tool. The tag lookups provided are sufficiently fast
+*   enough to permit opening a sorted tag file, searching for a matching tag,
+*   then closing the tag file each time a tag is looked up (search times are
+*   on the order of hundredths of a second, even for huge tag files). This is
+*   the recommended use of this library for most tool applications. Adhering
+*   to this approach permits a user to regenerate a tag file at will without
+*   the tool needing to detect and resynchronize with changes to the tag file.
+*   Even for an unsorted 24MB tag file, tag searches take about one second.
+*/
+#ifndef READTAGS_H
+#define READTAGS_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*
+*  MACROS
+*/
+
+/* Options for tagsSetSortType() */
+typedef enum {
+	TAG_UNSORTED, TAG_SORTED, TAG_FOLDSORTED
+} tagSortType ;
+
+/* For source code level compatibility, sortType is defined here.
+*  Define TAG_NO_COMPAT_SORT_TYPE if you want to avoid namespace pollution.
+*/
+#ifndef TAG_NO_COMPAT_SORT_TYPE
+#define sortType tagSortType
+#endif
+
+/* Options for tagsFind() */
+#define TAG_FULLMATCH     0x0
+#define TAG_PARTIALMATCH  0x1
+
+#define TAG_OBSERVECASE   0x0
+#define TAG_IGNORECASE    0x2
+
+/*
+*  DATA DECLARATIONS
+*/
+
+typedef enum { TagFailure = 0, TagSuccess = 1 } tagResult;
+
+typedef enum {
+	TagErrnoUnexpectedSortedMethod = -1, /* Unexpected sorted method */
+	TagErrnoUnexpectedFormat       = -2, /* Unexpected format number */
+	TagErrnoUnexpectedLineno       = -3, /* Unexpected value for line: field
+										  * (Zero or a positive integer is expected.) */
+	TagErrnoInvalidArgument        = -4, /* Unexpected argument passed to the API
+										  * function */
+} tagErrno;
+
+struct sTagFile;
+
+typedef struct sTagFile tagFile;
+
+/* This structure contains information about the tag file. */
+typedef struct {
+
+	struct {
+			/* was the tag file successfully opened? */
+		int opened;
+
+			/* errno value or tagErrno typed value
+			   when 'opened' is false */
+		int error_number;
+	} status;
+
+		/* information about the structure of the tag file */
+	struct {
+				/* format of tag file (1 = original, 2 = extended) */
+			short format;
+
+				/* how is the tag file sorted? */
+			tagSortType sort;
+	} file;
+
+
+		/* information about the program which created this tag file */
+	struct {
+			/* name of author of generating program (may be null) */
+		const char *author;
+
+			/* name of program (may be null) */
+		const char *name;
+
+			/* URL of distribution (may be null) */
+		const char *url;
+
+			/* program version (may be null) */
+		const char *version;
+	} program;
+
+} tagFileInfo;
+
+/* This structure contains information about an extension field for a tag.
+ * These exist at the end of the tag in the form "key:value").
+ */
+typedef struct {
+
+		/* the key of the extension field */
+	const char *key;
+
+		/* the value of the extension field (may be an empty string) */
+	const char *value;
+
+} tagExtensionField;
+
+/* This structure contains information about a specific tag. */
+typedef struct {
+
+		/* name of tag */
+	const char *name;
+
+		/* path of source file containing definition of tag.
+		   For a broken tags file, this can be NULL. */
+	const char *file;
+
+		/* address for locating tag in source file */
+	struct {
+			/* pattern for locating source line
+			 * (may be NULL if not present) */
+		const char *pattern;
+
+			/* line number in source file of tag definition
+			 * (may be zero if not known) */
+		unsigned long lineNumber;
+	} address;
+
+		/* kind of tag (may by name, character, or NULL if not known) */
+	const char *kind;
+
+		/* is tag of file-limited scope? */
+	short fileScope;
+
+		/* miscellaneous extension fields */
+	struct {
+			/* number of entries in `list' */
+		unsigned short count;
+
+			/* list of key value pairs */
+		tagExtensionField *list;
+	} fields;
+
+} tagEntry;
+
+
+/*
+*  FUNCTION PROTOTYPES
+*/
+
+/*
+*  This function must be called before calling other functions in this
+*  library. It is passed the path to the tag file to read and a (possibly
+*  null) pointer to a structure which, if not null, will be populated with
+*  information about the tag file. If successful, the function will return a
+*  handle which must be supplied to other calls to read information from the
+*  tag file, and info.status.opened will be set to true.
+*  If unsuccessful, the function will return NULL, and
+*  info.status.opened will be set to false and
+*  info.status.error_number will be set to either the errno value
+*  representing the system error preventing the tag file from being
+*  successfully opened, or the tagErrno typed value representing the
+*  library level error. The error_number will be ENOMEM if the memory
+*  allocation for the handle is failed.
+*/
+extern tagFile *tagsOpen (const char *const filePath, tagFileInfo *const info);
+
+/*
+*  This function allows the client to override the normal automatic detection
+*  of how a tag file is sorted. Permissible values for `type' are
+*  TAG_UNSORTED, TAG_SORTED, TAG_FOLDSORTED. Tag files in the new extended
+*  format contain a key indicating whether or not they are sorted. However,
+*  tag files in the original format do not contain such a key even when
+*  sorted, preventing this library from taking advantage of fast binary
+*  lookups. If the client knows that such an unmarked tag file is indeed
+*  sorted (or not), it can override the automatic detection. Note that
+*  incorrect lookup results will result if a tag file is marked as sorted when
+*  it actually is not. The function will return TagSuccess if called on an
+*  open tag file or TagFailure if not.
+*/
+extern tagResult tagsSetSortType (tagFile *const file, const tagSortType type);
+
+/*
+*  Reads the first tag in the file, if any. It is passed the handle to an
+*  opened tag file and a (possibly null) pointer to a structure which, if not
+*  null, will be populated with information about the first tag file entry.
+*  The function will return TagSuccess another tag entry is found, or
+*  TagFailure if not (i.e. it reached end of file).
+*/
+extern tagResult tagsFirst (tagFile *const file, tagEntry *const entry);
+
+/*
+*  Step to the next tag in the file, if any. It is passed the handle to an
+*  opened tag file and a (possibly null) pointer to a structure which, if not
+*  null, will be populated with information about the next tag file entry. The
+*  function will return TagSuccess another tag entry is found, or TagFailure
+*  if not (i.e. it reached end of file). It will always read the first tag in
+*  the file immediately after calling tagsOpen().
+*/
+extern tagResult tagsNext (tagFile *const file, tagEntry *const entry);
+
+/*
+*  Retrieve the value associated with the extension field for a specified key.
+*  It is passed a pointer to a structure already populated with values by a
+*  previous call to tagsNext(), tagsFind(), or tagsFindNext(), and a string
+*  containing the key of the desired extension field. If no such field of the
+*  specified key exists, the function will return null.
+*/
+extern const char *tagsField (const tagEntry *const entry, const char *const key);
+
+/*
+*  Find the first tag matching `name'. The structure pointed to by `entry'
+*  will be populated with information about the tag file entry. If a tag file
+*  is sorted using the C locale, a binary search algorithm is used to search
+*  the tag file, resulting in very fast tag lookups, even in huge tag files.
+*  Various options controlling the matches can be combined by bit-wise or-ing
+*  certain values together. The available values are:
+*
+*    TAG_PARTIALMATCH
+*        Tags whose leading characters match `name' will qualify.
+*
+*    TAG_FULLMATCH
+*        Only tags whose full lengths match `name' will qualify.
+*
+*    TAG_IGNORECASE
+*        Matching will be performed in a case-insensitive manner. Note that
+*        this disables binary searches of the tag file.
+*
+*    TAG_OBSERVECASE
+*        Matching will be performed in a case-sensitive manner. Note that
+*        this enables binary searches of the tag file.
+*
+*  The function will return TagSuccess if a tag matching the name is found, or
+*  TagFailure if not.
+*/
+extern tagResult tagsFind (tagFile *const file, tagEntry *const entry, const char *const name, const int options);
+
+/*
+*  Find the next tag matching the name and options supplied to the most recent
+*  call to tagsFind() for the same tag file. The structure pointed to by
+*  `entry' will be populated with information about the tag file entry. The
+*  function will return TagSuccess if another tag matching the name is found,
+*  or TagFailure if not.
+*/
+extern tagResult tagsFindNext (tagFile *const file, tagEntry *const entry);
+
+/*
+*  Does the same as tagsFirst(), but is specialized to pseudo tags.
+*  If tagFileInfo doesn't contain pseudo tags you are interested, read
+*  them sequentially with this function and tagsNextPseudoTag().
+*/
+extern tagResult tagsFirstPseudoTag (tagFile *const file, tagEntry *const entry);
+
+/*
+*  Does the same as tagsNext(), but is specialized to pseudo tags. Use with
+*  tagsFirstPseudoTag().
+*/
+extern tagResult tagsNextPseudoTag (tagFile *const file, tagEntry *const entry);
+
+/*
+*  Call tagsClose() at completion of reading the tag file, which will
+*  close the file and free any internal memory allocated. The function will
+*  return TagFailure if no file is currently open, TagSuccess otherwise.
+*/
+extern tagResult tagsClose (tagFile *const file);
+
+/*
+*  Get the error status set in the last API call.
+*  Much of the API functions return TagFailure because (1) no tag is
+*  found, or (2) an error occurs. tagsGetErrno() is for distinguishing
+*  (1) or (2). This function will return 0 for (1). The errno value
+*  representing the system error or tagErrno value for (2).
+*
+*  This function does not deal with the results of tagsOpen(),
+*  tagsClose(), and tagsField().
+*/
+extern int tagsGetErrno (tagFile *const file);
+
+#ifdef __cplusplus
+};
+#endif
+
+#endif
diff --git a/meson.build b/meson.build
index ec3e509af9..955dc0c3df 100644
--- a/meson.build
+++ b/meson.build
@@ -232,7 +232,7 @@ sci_cflags += [ '-std=c++17', '-Wno-non-virtual-dtor', '-DNDEBUG', '-DSCI_LEXER'
 
 iregex = include_directories('ctags/gnu_regex')
 ifnmatch = include_directories('ctags/fnmatch')
-ictags = include_directories('ctags/main', 'ctags/parsers', 'ctags/dsl')
+ictags = include_directories('ctags/main', 'ctags/parsers', 'ctags/dsl', 'ctags/libreadtags')
 itagmanager = include_directories('src/tagmanager')
 iscintilla = include_directories('scintilla/include', 'scintilla/lexilla/include')
 igeany = include_directories('src')
@@ -472,6 +472,8 @@ ctags = static_library('ctags',
 	'ctags/dsl/es.h',
 	'ctags/dsl/optscript.c',
 	'ctags/dsl/optscript.h',
+	'ctags/libreadtags/readtags.c',
+	'ctags/libreadtags/readtags.h',
 	'ctags/main/args.c',
 	'ctags/main/args_p.h',
 	'ctags/main/colprint.c',

From d364c30e7973ceaf387f8c64211d3980a462f90b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ji=C5=99=C3=AD=20Techet?= <techet@gmail.com>
Date: Thu, 23 Dec 2021 17:53:50 +0100
Subject: [PATCH 2/4] Fix compilation when TM_DEBUG is defined

This function doesn't get normally compiled because TM_DEBUG is undefined
and its declaration in header differs from its implementation.
---
 src/tagmanager/tm_tag.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/tagmanager/tm_tag.h b/src/tagmanager/tm_tag.h
index 27a78aa5b1..d9813ca0df 100644
--- a/src/tagmanager/tm_tag.h
+++ b/src/tagmanager/tm_tag.h
@@ -154,7 +154,7 @@ gboolean tm_tag_is_anon(const TMTag *tag);
 
 const char *tm_tag_type_name(const TMTag *tag);
 
-TMTagType tm_tag_name_type(const char* tag_name);
+TMTagType tm_tag_name_type(const char* tag_name, TMParserType lang);
 
 void tm_tag_print(TMTag *tag, FILE *fp);
 

From 3a0230e0285fe65d00c758b0f2747ac29f269b6b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ji=C5=99=C3=AD=20Techet?= <techet@gmail.com>
Date: Wed, 8 Dec 2021 00:39:09 +0100
Subject: [PATCH 3/4] Use the readtags library to parse ctags files

Use the official 'readtags' library from ctags to parser files in the
ctags file format. This allows us not to worry about parsing itself
and just extract the information from ctags files we need.

In addition, this patch also reads some extra ctags fields we are interested
in. It also detects scope key based on the kind used by the given language
(not the hard-coded values that are valid for C/C++ only).

Finally, this patch also detects the "language" field that specifies
for which language the given tag is in the case that the tags file
contains tags from multiple languages.

When loading language this way, we have to postpone the update of
scope ('s' ctags field) until we get the language field because its key
is a kind name that is specific to the used language.

Note that even after this change tags files still have to be named so
that they contain the language in the file's extension because this is
needed for other tag file formats supported by Geany. It will, however,
be overridden by the "language" field.

Co-authored-by: Colomban Wendling <lists.ban@herbesfolles.org>
---
 doc/geany.txt                   |   9 +-
 src/tagmanager/Makefile.am      |   1 +
 src/tagmanager/tm_source_file.c | 209 ++++++++++++++++++--------------
 3 files changed, 121 insertions(+), 98 deletions(-)

diff --git a/doc/geany.txt b/doc/geany.txt
index b17aaf7d33..a772c51271 100644
--- a/doc/geany.txt
+++ b/doc/geany.txt
@@ -1689,12 +1689,13 @@ Global tags files can have three different formats:
 * Pipe-separated format
 * CTags format
 
-The first line of global tags files should be a comment, introduced
-by ``#`` followed by a space and a string like ``format=pipe``,
-``format=ctags`` or ``format=tagmanager`` respectively, these are
+For the tagmanager or pipe-separated format, the first line of global tag files
+should be a comment, introduced by ``#`` followed by a space and a string like
+``format=tagmanager`` or ``format=pipe``, respectively; these are
 case-sensitive.  This helps Geany to read the file properly. If this
 line is missing, Geany tries to auto-detect the used format but this
-might fail.
+might fail.  Tag files using the CTags format should be left unmodified in the
+form generated by the ctags command-line tool.
 
 
 The Tagmanager format is a bit more complex and is used for files
diff --git a/src/tagmanager/Makefile.am b/src/tagmanager/Makefile.am
index 319befa58f..e04969e13e 100644
--- a/src/tagmanager/Makefile.am
+++ b/src/tagmanager/Makefile.am
@@ -1,6 +1,7 @@
 AM_CPPFLAGS = \
 	-I$(srcdir) \
 	-I$(top_srcdir)/ctags/main \
+	-I$(top_srcdir)/ctags/libreadtags \
 	-I$(top_srcdir)/ctags/dsl \
 	-DGEANY_PRIVATE \
 	-DG_LOG_DOMAIN=\"Tagmanager\"
diff --git a/src/tagmanager/tm_source_file.c b/src/tagmanager/tm_source_file.c
index 4d29c66735..10d1662031 100644
--- a/src/tagmanager/tm_source_file.c
+++ b/src/tagmanager/tm_source_file.c
@@ -29,6 +29,8 @@
 # include <windows.h> /* for GetFullPathName */
 #endif
 
+#include "readtags.h"
+
 #include "tm_source_file.h"
 #include "tm_tag.h"
 #include "tm_parser.h"
@@ -303,125 +305,133 @@ static gboolean init_tag_from_file_alt(TMTag *tag, TMSourceFile *file, FILE *fp)
 	return TRUE;
 }
 
-/*
- CTags tag file format (https://ctags.sourceforge.net/FORMAT)
-*/
-static gboolean init_tag_from_file_ctags(TMTag *tag, TMSourceFile *file, FILE *fp, TMParserType lang)
+
+static void read_ctags_file(const gchar *tags_file, TMParserType lang, GPtrArray *file_tags)
 {
-	gchar buf[BUFSIZ];
-	gchar *p, *tab;
+	tagEntry entry;
+	tagFile *f = tagsOpen(tags_file, NULL);
+	const gchar *lang_kinds = tm_ctags_get_lang_kinds(lang);
+	GArray *unknown_fields = g_array_sized_new(FALSE, FALSE, sizeof(guint), 10);
 
-	tag->refcount = 1;
-	tag->type = tm_tag_function_t; /* default type is function if no kind is specified */
-	do
+	while (tagsNext(f, &entry))
 	{
-		if ((NULL == fgets(buf, BUFSIZ, fp)) || ('\0' == *buf))
-			return FALSE;
-	}
-	while (strncmp(buf, "!_TAG_", 6) == 0); /* skip !_TAG_ lines */
+		TMTagType type;
+		TMTag *tag;
 
-	p = buf;
+		if (!entry.kind)
+			continue;
 
-	/* tag name */
-	if (! (tab = strchr(p, '\t')) || p == tab)
-		return FALSE;
-	tag->name = g_strndup(p, (gsize)(tab - p));
-	p = tab + 1;
+		if (entry.kind[0] && entry.kind[1])
+			type = tm_parser_get_tag_type(tm_ctags_get_kind_from_name(entry.kind, lang), lang);  /* 'K' field */
+		else
+			type = tm_parser_get_tag_type(*entry.kind, lang);  /* 'k' field */
 
-	if (tm_parser_is_anon_name(lang, tag->name))
-		tag->flags |= tm_tag_flag_anon_t;
+		if (type == tm_tag_undef_t)
+			continue;
 
-	/* tagfile, unused */
-	if (! (tab = strchr(p, '\t')))
-	{
-		g_free(tag->name);
-		tag->name = NULL;
-		return FALSE;
-	}
-	p = tab + 1;
-	/* Ex command, unused */
-	if (*p == '/' || *p == '?')
-	{
-		gchar c = *p;
-		for (++p; *p && *p != c; p++)
-		{
-			if (*p == '\\' && p[1])
-				p++;
-		}
-	}
-	else /* assume a line */
-		tag->line = atol(p);
-	tab = strstr(p, ";\"");
-	/* read extension fields */
-	if (tab)
-	{
-		p = tab + 2;
-		while (*p && *p != '\n' && *p != '\r')
+		tag = tm_tag_new();
+		tag->refcount = 1;
+		tag->name = g_strdup(entry.name);
+		tag->type = type;
+		tag->lang = lang;
+		tag->local = entry.fileScope;  /* 'f' field */
+		tag->line = entry.address.lineNumber;  /* 'n' field */
+		tag->file = NULL;
+
+		if (tm_parser_is_anon_name(lang, tag->name))
+			tag->flags |= tm_tag_flag_anon_t;
+
+		for (guint i = 0; i < entry.fields.count; i++)
 		{
-			gchar *end;
-			const gchar *key, *value = NULL;
-
-			/* skip leading tabulations */
-			while (*p && *p == '\t') p++;
-			/* find the separator (:) and end (\t) */
-			key = end = p;
-			while (*end && *end != '\t' && *end != '\n' && *end != '\r')
+			const gchar *key = entry.fields.list[i].key;
+			const gchar *value = entry.fields.list[i].value;
+
+			if (strcmp(key, "scope") == 0)  /* 'sZ' field */
 			{
-				if (*end == ':' && ! value)
+				/* scope:class:A::B::C */
+				const gchar *val = strchr(value, ':');
+				if (val && *(++val))
 				{
-					*end = 0; /* terminate the key */
-					value = end + 1;
+					g_free(tag->scope);
+					tag->scope = g_strdup(val);
 				}
-				end++;
 			}
-			/* move p paste the so we won't stop parsing by setting *end=0 below */
-			p = *end ? end + 1 : end;
-			*end = 0; /* terminate the value (or key if no value) */
-
-			if (! value || 0 == strcmp(key, "kind")) /* tag kind */
+			else if (strcmp(key, "signature") == 0)  /* 'S' field */
 			{
-				const gchar *kind = value ? value : key;
-
-				if (kind[0] && kind[1])
-					tag->kind_letter = tm_ctags_get_kind_from_name(kind, lang);
-				else
-					tag->kind_letter = *kind;
-				tag->type = tm_parser_get_tag_type(tag->kind_letter, lang);
+				g_free(tag->arglist);
+				tag->arglist = g_strdup(value);
 			}
-			else if (0 == strcmp(key, "inherits")) /* comma-separated list of classes this class inherits from */
+			else if (strcmp(key, "inherits") == 0)  /* 'i' field */
 			{
 				g_free(tag->inheritance);
 				tag->inheritance = g_strdup(value);
 			}
-			else if (0 == strcmp(key, "implementation")) /* implementation limit */
+			else if (strcmp(key, "typeref") == 0)  /* 't' field */
+			{
+				/* typeref:typename:int */
+				const gchar *val = strchr(value, ':');
+				if (val && *(++val) &&
+					(g_str_has_prefix(value, "typename:") || g_str_has_prefix(value, "unknown:")))
+				{
+					/* "unknown:" above is used by the php parser, all other parsers use "typename:" */
+					g_free(tag->var_type);
+					tag->var_type = g_strdup(val);
+				}
+			}
+			else if (strcmp(key, "extras") == 0)  /* 'E' field */
+			{
+				/* extras may contain multiple values such as extras:fileScope,anonymous */
+				if (strstr(value, "anonymous"))
+					tag->flags |= tm_tag_flag_anon_t;
+			}
+			else if (strcmp(key, "implementation") == 0)  /* 'm' field */
 				tag->impl = tm_source_file_get_tag_impl(value);
-			else if (0 == strcmp(key, "line")) /* line */
-				tag->line = atol(value);
-			else if (0 == strcmp(key, "access")) /* access */
+			else if (strcmp(key, "access") == 0)  /* 'a' field */
 				tag->access = tm_source_file_get_tag_access(value);
-			else if (0 == strcmp(key, "class") ||
-					 0 == strcmp(key, "enum") ||
-					 0 == strcmp(key, "function") ||
-					 0 == strcmp(key, "struct") ||
-					 0 == strcmp(key, "union")) /* Name of the class/enum/function/struct/union in which this tag is a member */
+			else if (strcmp(key, "language") == 0)  /* 'l' field */
 			{
-				g_free(tag->scope);
-				tag->scope = g_strdup(value);
+				TMParserType tag_lang = tm_ctags_get_named_lang(value);
+				if (tag_lang >= 0)
+					tag->lang = tag_lang;
 			}
-			else if (0 == strcmp(key, "file")) /* static (local) tag */
-				tag->local = TRUE;
-			else if (0 == strcmp(key, "signature")) /* arglist */
+			else
+				g_array_append_val(unknown_fields, i);
+		}
+
+		if (!tag->scope)
+		{
+			/* search for scope introduced by scope kind name only after going
+			 * through all extension fields and having tag->lang updated when
+			 * "language" field is present */
+			for (guint i = 0; !tag->scope && i < unknown_fields->len; i++)
 			{
-				g_free(tag->arglist);
-				tag->arglist = g_strdup(value);
+				const guint idx = g_array_index(unknown_fields, guint, i);
+				const gchar *key = entry.fields.list[idx].key;
+				const gchar *value = entry.fields.list[idx].value;
+
+				for (gint j = 0; lang_kinds[j]; j++)
+				{
+					const gchar kind = lang_kinds[j];
+					const gchar *name = tm_ctags_get_kind_name(kind, tag->lang);
+					if (strcmp(key, name) == 0)
+					{
+						/* 's' field - scope without the 'scope:' prefix */
+						tag->scope = g_strdup(value);
+						break;
+					}
+				}
 			}
 		}
+
+		unknown_fields->len = 0;
+		g_ptr_array_add(file_tags, tag);
 	}
 
-	tag->file = file;
-	return TRUE;
+	g_array_unref(unknown_fields);
+	tagsClose(f);
 }
 
+
 static TMTag *new_tag_from_tags_file(TMSourceFile *file, FILE *fp, TMParserType mode, TMFileFormat format)
 {
 	TMTag *tag = tm_tag_new();
@@ -436,7 +446,7 @@ static TMTag *new_tag_from_tags_file(TMSourceFile *file, FILE *fp, TMParserType
 			result = init_tag_from_file_alt(tag, file, fp);
 			break;
 		case TM_FILE_FORMAT_CTAGS:
-			result = init_tag_from_file_ctags(tag, file, fp, mode);
+			g_warn_if_reached();  /* this should never be reached; ctags files are handled separately */
 			break;
 	}
 
@@ -508,7 +518,10 @@ GPtrArray *tm_source_file_read_tags_file(const gchar *tags_file, TMParserType mo
 		else if (buf[0] == '#' && strstr((gchar*) buf, "format=tagmanager") != NULL)
 			format = TM_FILE_FORMAT_TAGMANAGER;
 		else if (buf[0] == '#' && strstr((gchar*) buf, "format=ctags") != NULL)
+		{
 			format = TM_FILE_FORMAT_CTAGS;
+			g_warning("# format=ctags directive is no longer supported; please remove it from %s", tags_file);
+		}
 		else if (strncmp((gchar*) buf, "!_TAG_", 6) == 0)
 			format = TM_FILE_FORMAT_CTAGS;
 		else
@@ -533,9 +546,17 @@ GPtrArray *tm_source_file_read_tags_file(const gchar *tags_file, TMParserType mo
 	}
 
 	file_tags = g_ptr_array_new();
-	while (NULL != (tag = new_tag_from_tags_file(NULL, fp, mode, format)))
-		g_ptr_array_add(file_tags, tag);
-	fclose(fp);
+	if (format == TM_FILE_FORMAT_CTAGS)
+	{
+		fclose(fp);  /* the readtags library opens the file by itself */
+		read_ctags_file(tags_file, mode, file_tags);
+	}
+	else
+	{
+		while (NULL != (tag = new_tag_from_tags_file(NULL, fp, mode, format)))
+			g_ptr_array_add(file_tags, tag);
+		fclose(fp);
+	}
 
 	return file_tags;
 }

From 3f6f10c627ab33cdc04dfbee6c889768b784ec99 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ji=C5=99=C3=AD=20Techet?= <techet@gmail.com>
Date: Fri, 13 Oct 2023 00:17:31 +0200
Subject: [PATCH 4/4] Update geany.txt with ctags usage for generating tag
 files

Co-authored-by: Colomban Wendling <lists.ban@herbesfolles.org>
---
 doc/geany.txt | 87 ++++++++++++++++++++++++++++++++++++---------------
 1 file changed, 61 insertions(+), 26 deletions(-)

diff --git a/doc/geany.txt b/doc/geany.txt
index a772c51271..78df0ba255 100644
--- a/doc/geany.txt
+++ b/doc/geany.txt
@@ -1685,25 +1685,32 @@ Global tags file format
 
 Global tags files can have three different formats:
 
-* Tagmanager format
-* Pipe-separated format
 * CTags format
+* Pipe-separated format
+* Tagmanager format
 
-For the tagmanager or pipe-separated format, the first line of global tag files
-should be a comment, introduced by ``#`` followed by a space and a string like
-``format=tagmanager`` or ``format=pipe``, respectively; these are
-case-sensitive.  This helps Geany to read the file properly. If this
-line is missing, Geany tries to auto-detect the used format but this
-might fail.  Tag files using the CTags format should be left unmodified in the
+Tag files using the CTags format should be left unmodified in the
 form generated by the ctags command-line tool.
 
+For the pipe-separated or tagmanager format, the first line of global tag files
+should be a comment, introduced by ``#`` followed by a space and
+``format=pipe`` or ``format=tagmanager``, respectively; these are
+case-sensitive.  This helps Geany to read the file properly. If this
+line is missing, Geany tries to auto-detect the format used but this
+might fail.
+
+
+CTags format
+************
+This is the recommended tags file format, generated by the ctags command-line
+tool from the universal-ctags project (https://github.com/universal-ctags/ctags).
+This format is compatible with the format historically used by Vi.
+
+The format is described at https://ctags.sourceforge.net/FORMAT, but
+for the full list of existing extensions please refer to universal-ctags.
+However, note that Geany may actually only honor a subset of the
+existing extensions.
 
-The Tagmanager format is a bit more complex and is used for files
-created by the ``geany -g`` command. There is one symbol per line.
-Different symbol attributes like the return value or the argument list
-are separated with different characters indicating the type of the
-following argument.  This is the more complete and recommended tags file
-format.
 
 Pipe-separated format
 *********************
@@ -1726,19 +1733,47 @@ You can easily write your own global tags files using this format.
 Just save them in your tags directory, as described earlier in the
 section `Global tags files`_.
 
-CTags format
-************
-This is the format that ctags generates, and that is used by Vim.
-This format is compatible with the format historically used by Vi.
 
-The format is described at https://ctags.sourceforge.net/FORMAT, but
-for the full list of existing extensions please refer to ctags.
-However, note that Geany may actually only honor a subset of the
-existing extensions.
+Tagmanager format
+*****************
+The Tagmanager format is a bit more complex and is used for files
+created by the ``geany -g`` command. There is one symbol per line.
+Different symbol attributes like the return value or the argument list
+are separated with different characters indicating the type of the
+following argument.
+
 
 Generating a global tags file
 `````````````````````````````
 
+Generating tags files using ctags
+*********************************
+This is currently the recommended way of generating tags files. Unlike the
+methods below which use the Geany binary for their generation, this method
+should produce tags files which are compatible across Geany releases, starting
+from Geany 2.0.
+
+Geany supports loading tag files generated using the ``ctags`` command-line
+tool from the universal-ctags project (https://github.com/universal-ctags/ctags).
+Even though Geany should work with any ctags file, it is recommended to use
+certain fields to give Geany some additional information. The recommended fields
+are ``EfiklsZSt``, so to generate symbols for all sources in the my_project
+directory one can use::
+
+    ctags -n --fields=EfiklsZSt -R -o my_project.c.tags my_project
+
+Additional options may be given to the ``ctags`` tool, for instance, to restrict
+the generated tags file to some languages only, use certain tag kinds, etc.
+
+Note that when the ``l`` field (specifying the programming language) is enabled,
+the language of all symbols is set based on the value of this field instead of
+the language specified in the extension of the tags file.  You however still
+have to name the file according to the same rules regardless of whether the
+``l`` field is used or not.
+
+
+Generating tags files using Geany
+*********************************
 You can generate your own global tags files by parsing a list of
 source files. The command is::
 
@@ -1760,8 +1795,8 @@ Example for the wxD library for the D programming language::
     geany -g wxd.d.tags /home/username/wxd/wx/*.d
 
 
-Generating C/C++ tags files
-***************************
+Generating C/C++ tags files using Geany
+***************************************
 You may need to first setup the `C ignore.tags`_ file.
 
 For C/C++ tags files gcc is required by default, so that header files
@@ -1779,8 +1814,8 @@ You can adapt this command to use CFLAGS and header files appropriate
 for whichever libraries you want.
 
 
-Generating tags files on Windows
-********************************
+Generating tags files on Windows using Geany
+********************************************
 This works basically the same as on other platforms::
 
     "c:\program files\geany\bin\geany" -g c:\mytags.php.tags c:\code\somefile.php