From 17251f6faba4c70cf4f6241721c4bcb25dad9988 Mon Sep 17 00:00:00 2001 From: Daniel Swanson Date: Fri, 1 Mar 2024 14:31:54 -0500 Subject: [PATCH] add as dictionary symbol for reading separator for lsx pgen --- configure.ac | 2 +- lttoolbox/compiler.cc | 6 ++++++ lttoolbox/compiler.h | 2 ++ lttoolbox/dix.dtd | 3 +++ lttoolbox/transducer.h | 1 + 5 files changed, 13 insertions(+), 1 deletion(-) diff --git a/configure.ac b/configure.ac index 1a51dd15..1e83f7ec 100644 --- a/configure.ac +++ b/configure.ac @@ -2,7 +2,7 @@ AC_PREREQ(2.52) m4_define([PKG_VERSION_MAJOR], [3]) m4_define([PKG_VERSION_MINOR], [7]) -m4_define([PKG_VERSION_PATCH], [7]) +m4_define([PKG_VERSION_PATCH], [8]) # Bump if the ABI (not API) changed in a backwards-incompatible manner m4_define([PKG_VERSION_ABI], [3]) diff --git a/lttoolbox/compiler.cc b/lttoolbox/compiler.cc index 753382a9..0d0ce3e9 100644 --- a/lttoolbox/compiler.cc +++ b/lttoolbox/compiler.cc @@ -385,6 +385,10 @@ Compiler::readString(std::vector &result, UStringView name) result.push_back(word_boundary); } } + else if (is_separable && name == COMPILER_LSX_FORM_SEP_ELEM) { + requireEmptyError(name); + result.push_back(reading_boundary); + } else { std::cerr << "Error (" << xmlTextReaderGetParserLineNumber(reader); @@ -966,11 +970,13 @@ Compiler::procNode() alphabet.includeSymbol(Transducer::LSX_BOUNDARY_SYMBOL); alphabet.includeSymbol(Transducer::LSX_BOUNDARY_SPACE_SYMBOL); alphabet.includeSymbol(Transducer::LSX_BOUNDARY_NO_SPACE_SYMBOL); + alphabet.includeSymbol(Transducer::READING_SEPARATOR_SYMBOL); any_tag = alphabet(Transducer::ANY_TAG_SYMBOL); any_char = alphabet(Transducer::ANY_CHAR_SYMBOL); word_boundary = alphabet(Transducer::LSX_BOUNDARY_SYMBOL); word_boundary_s = alphabet(Transducer::LSX_BOUNDARY_SPACE_SYMBOL); word_boundary_ns = alphabet(Transducer::LSX_BOUNDARY_NO_SPACE_SYMBOL); + reading_boundary = alphabet(Transducer::READING_SEPARATOR_SYMBOL); } } else if(name == COMPILER_ALPHABET_ELEM) diff --git a/lttoolbox/compiler.h b/lttoolbox/compiler.h index 9b8d42f8..e9d65e96 100644 --- a/lttoolbox/compiler.h +++ b/lttoolbox/compiler.h @@ -176,6 +176,7 @@ class Compiler int32_t word_boundary = 0; int32_t word_boundary_s = 0; int32_t word_boundary_ns = 0; + int32_t reading_boundary = 0; /** * Method to parse an XML Node @@ -354,6 +355,7 @@ class Compiler static constexpr UStringView COMPILER_LSX_WB_ELEM = u"d"; static constexpr UStringView COMPILER_LSX_CHAR_ELEM = u"w"; static constexpr UStringView COMPILER_LSX_TAG_ELEM = u"t"; + static constexpr UStringView COMPILER_LSX_FORM_SEP_ELEM = u"f"; static constexpr UStringView COMPILER_LSX_SPACE_ATTR = u"space"; static constexpr UStringView COMPILER_LSX_SPACE_YES_VAL = u"yes"; static constexpr UStringView COMPILER_LSX_SPACE_NO_VAL = u"no"; diff --git a/lttoolbox/dix.dtd b/lttoolbox/dix.dtd index b17b52ed..6d491fe1 100644 --- a/lttoolbox/dix.dtd +++ b/lttoolbox/dix.dtd @@ -171,3 +171,6 @@ + + + diff --git a/lttoolbox/transducer.h b/lttoolbox/transducer.h index 814fe2e6..98391fe3 100644 --- a/lttoolbox/transducer.h +++ b/lttoolbox/transducer.h @@ -99,6 +99,7 @@ class Transducer static constexpr UStringView LSX_BOUNDARY_SYMBOL = u"<$>"; static constexpr UStringView LSX_BOUNDARY_SPACE_SYMBOL = u"<$_>"; static constexpr UStringView LSX_BOUNDARY_NO_SPACE_SYMBOL= u"<$->"; + static constexpr UStringView READING_SEPARATOR_SYMBOL = u""; static constexpr UStringView COMPOUND_ONLY_L_SYMBOL = u""; static constexpr UStringView COMPOUND_R_SYMBOL = u"";