Skip to content

Commit

Permalink
use regex for kCheungBauer match
Browse files Browse the repository at this point in the history
  • Loading branch information
tony committed May 26, 2017
1 parent 9c9a4b1 commit 565b7a1
Showing 1 changed file with 7 additions and 5 deletions.
12 changes: 7 additions & 5 deletions unihan_etl/expansion.py
Original file line number Diff line number Diff line change
Expand Up @@ -146,12 +146,14 @@ def expand_kXHC1983(value):

def expand_kCheungBauer(value):
for i, v in enumerate(value):
v = [c.strip() for c in v.split(';')]
matches = re.split(
r'([0-9]{3})\/([0-9]{2});([A-Z]*);([a-z1-6\[\]\/,]+)', v
)
value[i] = {
"radical": int(v[0].split('/')[0]),
"strokes": int(v[0].split('/')[1]),
"cangjie": v[1] or None,
"readings": v[2].split(',')
"radical": int(matches[1]),
"strokes": int(matches[2]),
"cangjie": matches[3] or None,
"readings": matches[4].split(',')
}
return value

Expand Down

0 comments on commit 565b7a1

Please sign in to comment.