From e1fbc0f2ed848d7fa00e2c24190bb5cd37ad36d9 Mon Sep 17 00:00:00 2001 From: Tony Narlock Date: Sun, 10 Dec 2023 07:05:05 -0600 Subject: [PATCH] expansion(kRSGeneric): Fix double apostrophe (non-apostrophe, simplified radical) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit via https://www.unicode.org/reports/tr38/#kRSUnicode: > The standard radical-stroke count for this ideograph in the form “radical.additional strokes.” The radical is indicated by a number in the range 1–214, followed by an optional single apostrophe (U+0027 ' apostrophe) or double apostrophe ('') suffix. A single apostrophe after the radical indicates a Chinese simplified version of the given radical. Two apostrophes after the radical indicates a non-Chinese simplified version of the given radical. The “additional strokes” value is the residual stroke-count, the count of all strokes remaining after eliminating all strokes associated with the radical. --- src/unihan_etl/expansion.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/unihan_etl/expansion.py b/src/unihan_etl/expansion.py index a6827f76..ebc03665 100644 --- a/src/unihan_etl/expansion.py +++ b/src/unihan_etl/expansion.py @@ -578,7 +578,7 @@ def _expand_kRSGeneric(value: t.List[str]) -> t.List[kRSGenericDict]: pattern = re.compile( r""" (?P[1-9][0-9]{0,2}) - (?P\'?)\. + (?P\'{0,2})\. (?P-?[0-9]{1,2}) """, re.X,