From be44028bc158c96ff9974ecb1a10b2de982d22fe Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ji=C5=99=C3=AD=20Techet?= <techet@gmail.com>
Date: Tue, 26 Apr 2022 14:31:32 +0200
Subject: [PATCH] Add script checking our unit test coverage of the mapped
 kinds

For each of the filetypes, the script uses universal-ctags binary to
generate a tags file for all the unit tests for the given filetype,
collects the kinds contained in the tags file and compares them to those
mapped inside tm_parser.c (except the ignored ones mapped to
tm_tag_undef_t). It then prints those kinds that are mapped in
tm_parser.c but not present in the ctags output.

The script runs a universal-ctags binary that must be present in
PATH and the script assumes it's executed from the root directory
containing Geany sources (by './scripts/test-units.py'). Since Geany
now uses most of the upstream parser, the output of universal-ctags should
correspond to the output of Geany. Note that we cannot use our own
tags file for this test as they don't contain the original kind but rather
the kind mapped to Geany's internal representation (tm_tag_..._t).
---
 scripts/test-units.py | 121 ++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 121 insertions(+)
 create mode 100755 scripts/test-units.py

diff --git a/scripts/test-units.py b/scripts/test-units.py
new file mode 100755
index 0000000000..5c550e21d7
--- /dev/null
+++ b/scripts/test-units.py
@@ -0,0 +1,121 @@
+#!/usr/bin/env python3
+
+import os
+
+LANGS = {
+	'C': '*.c *.h',
+	'CPP': '*.cpp *.hpp',
+	'JAVA': '*.java',
+	'MAKEFILE': '*.mak',
+	'PASCAL': '*.pas',
+	'PERL': '*.pl *.pm',
+	'PHP': '*.php',
+	'PYTHON': '*.py *.pyx',
+	'LATEX': '*.tex',
+	'ASM': '*.asm',
+	'CONF': '*.conf',
+	'SQL': '*.sql',
+# not in ctags
+#	'DOCBOOK': '*.docbook',
+	'ERLANG': '*.erl',
+	'CSS': '*.css',
+	'RUBY': '*.rb',
+	'TCL': '*.tcl',
+	'SH': '*.sh *.ksh',
+	'D': '*.d',
+# like f77 - once new parser merged, keep FORTRAN only
+#	'FORTRAN': '',
+	'GDSCRIPT': '*.gd',
+	'DIFF': '*.diff',
+	'VHDL': '*.vhd',
+	'LUA': '*.lua',
+	'JAVASCRIPT': '*.js',
+	'HASKELL': '*.hs',
+	'CSHARP': '*.cs',
+	'FREEBASIC': '*.bas',
+	'HAXE': '*.hx',
+	'REST': '*.rst',
+	'HTML': '*.html',
+	'F77': '*.f90 *.f *.for *.f95',
+# like C
+#	'CUDA': '',
+	'MATLAB': '*.m',
+# not in ctags
+#	'VALA': '*.vala',
+	'ACTIONSCRIPT': 'actionscript/*.as',
+	'NSIS': '*.nsi',
+	'MARKDOWN': '*.md',
+	'TXT2TAGS': '*.t2t',
+	'ABC': '*.abc',
+	'VERILOG': '*.v',
+# missing unit tests
+#	'R': '',
+	'COBOL': 'cobol/*.cbl',
+	'OBJC': '*.mm',
+	'ASCIIDOC': '*.asciidoc',
+	'ABAQUS': '*.inp',
+	'RUST': '*.rs',
+	'GO': '*.go',
+	'JSON': '*.json',
+# like PHP
+#	'ZEPHIR': '*.zep',
+	'POWERSHELL': '*.ps1',
+	'JULIA': '*.jl',
+# missing unit tests
+#	'BIBTEX': '',
+# "virtual" parser used by C/C++
+#	'CPREPROCESSOR': '',
+	'CLOJURE': '*.clj',
+	'OCAML': '*.ml',
+	'LISP': '*.lisp',
+	'TYPESCRIPT': '*.ts',
+	'ADA': '*.ads *.adb',
+	'RAKU': '*.raku',
+	'BATCH': '*.bat',
+}
+
+TAGFILE='test_units.tags'
+
+# get all kinds from a tags file
+def get_used_kinds():
+	with open(TAGFILE, encoding="cp1252") as f:
+		lines = f.readlines()
+	used = set()
+	for line in lines:
+		comps = line.split(";\"\t")
+		if len(comps) > 1:
+			used.add(comps[1][0])
+	return used
+
+# get all kinds mapped in tm_parser.c
+def get_mapped_kinds(lang):
+	with open("src/tagmanager/tm_parser.c") as f:
+		lines = f.readlines()
+	found_decl = False
+	mapped = set()
+	for line in lines:
+		s = 'static TMParserMapEntry map_' + lang
+		if s in line or (lang == 'COMMON_C' and '#define COMMON_C' in line):
+			found_decl = True
+		elif found_decl:
+			if len(line) < 10:
+				break
+			if line.startswith("\t{'") and 'tm_tag_undef_t' not in line:
+				mapped.add(line[3])
+	return mapped
+
+# get all kinds mapped in tm_parser.c but not present in the tags file
+def get_diff(lang):
+	pattern = LANGS[lang]
+	os.system('cd tests/ctags && ctags -o ../../' + TAGFILE + ' --kinds-all=* ' + pattern)
+	used = get_used_kinds()
+	mapped = get_mapped_kinds(lang)
+	if lang == 'C' or lang == 'CPP':
+		mapped = mapped.union(get_mapped_kinds('COMMON_C'))
+	return mapped - used
+
+
+for lang in LANGS:
+	diff = get_diff(lang)
+	if len(diff) > 0:
+		print(lang + ': ' + str(sorted(list(diff))))