Skip to content

Commit

Permalink
add kHanyuPinlu regex match, with zhon regex
Browse files Browse the repository at this point in the history
  • Loading branch information
tony committed May 27, 2017
1 parent 90b49a5 commit 59b321c
Showing 1 changed file with 6 additions and 2 deletions.
8 changes: 6 additions & 2 deletions unihan_etl/expansion.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
unicode_literals, with_statement)

import re
import zhon.pinyin

from unihan_etl.constants import SPACE_DELIMITED_FIELDS

Expand Down Expand Up @@ -204,9 +205,12 @@ def expand_kFenn(value):

def expand_kHanyuPinlu(value):
for i, v in enumerate(value):
vre = re.split(
r'([a-z{}]+)\(([0-9]+)\)'.format(zhon.pinyin.lowercase), v
)
value[i] = {
"phonetic": v[0:v.find("(")],
"frequency": int(v[v.find("(")+1:v.find(")")])
"phonetic": vre[1],
"frequency": int(vre[2])
}
return value

Expand Down

0 comments on commit 59b321c

Please sign in to comment.