Skip to content

Commit

Permalink
prettify kRSUnicode expansion regex
Browse files Browse the repository at this point in the history
  • Loading branch information
tony committed May 28, 2017
1 parent fa7aad1 commit 72c0312
Showing 1 changed file with 10 additions and 4 deletions.
14 changes: 10 additions & 4 deletions unihan_etl/expansion.py
Original file line number Diff line number Diff line change
Expand Up @@ -258,12 +258,18 @@ def expand_kSBGY(value):


def expand_kRSUnicode(value):
pattern = re.compile(r"""
(?P<radical>[1-9][0-9]{0,2})
(?P<simplified>\'?)\.
(?P<strokes>-?[0-9]{1,2})
""", re.X)

for i, v in enumerate(value):
match = re.split(r'([1-9][0-9]{0,2})(\'?)\.(-?[0-9]{1,2})', v)
m = pattern.match(v).groupdict()
value[i] = {
"radical": int(match[1]),
"strokes": int(match[3]),
"simplified": match[2] == "'"
"radical": int(m['radical']),
"strokes": int(m['strokes']),
"simplified": m['simplified'] == "'"
}
return value

Expand Down

0 comments on commit 72c0312

Please sign in to comment.