Skip to content

Commit

Permalink
Add some examples in Chinese
Browse files Browse the repository at this point in the history
  • Loading branch information
amotl committed Aug 30, 2021
1 parent d9aa14b commit a86fcf3
Show file tree
Hide file tree
Showing 5 changed files with 55 additions and 2 deletions.
5 changes: 3 additions & 2 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,9 @@ setup-virtualenv:
# Setup prerequisites
setup: setup-virtualenv
$(pip) install --requirement=requirements.txt
$(python) -m spacy download en
$(python) -m spacy download de
$(python) -m spacy download en_core_web_md
$(python) -m spacy download de_core_news_md
$(python) -m spacy download zh_core_web_md

# Setup Flair
setup-flair: setup-virtualenv
Expand Down
4 changes: 4 additions & 0 deletions README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,10 @@ Usage
python wq.py Berliner Tagestemperatur um 11:00 Uhr
python wq.py Berliner Tagestemperaturen um 11:00 Uhr

# Chinese
python wq.py 成都的雨
python wq.py 南昌的温度


.. _Flair: https://pypi.org/project/flair/
.. _spaCy: https://pypi.org/project/spacy/
Expand Down
5 changes: 5 additions & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,2 +1,7 @@
numpy==1.21.2
spacy<3
spacy-langdetect

# https://github.com/lancopku/pkuseg-python/issues/148
# https://github.com/explosion/spaCy/discussions/7370#discussioncomment-455375
https://github.com/lancopku/pkuseg-python/archive/d581c95.zip
28 changes: 28 additions & 0 deletions test_wq.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,3 +88,31 @@ def test_german_ozon():
def test_german_particulates():
result = analyze_spacy("Feinstaub in Stuttgart am 17.09.2020")
assert result == Result(where="Stuttgart", when="am 17.09.2020", what="Feinstaub")


def test_chinese_rain():
# Rain in Chengdu
# Translated using DeepL
result = analyze_spacy("成都的雨")
assert result == Result(where='成都', when='now', what='雨')


def test_chinese_temperature_now():
# Temperature in Nanchang
# Translated using DeepL
result = analyze_spacy("南昌的温度")
assert result == Result(where='南昌', when='now', what='温度')


def test_chinese_temperature_tomorrow():
# Temperature in Nanchang tomorrow
# Translated using DeepL
result = analyze_spacy("南昌明天的温度")
assert result == Result(where='南昌', when='明天', what='温度')


def test_chinese_temperature_on_date():
# Temperature in Nanchang on 2020-09-17
# Translated using DeepL
result = analyze_spacy("2020年9月17日南昌市的温度")
assert result == Result(where='南昌市', when='2020年9月17日', what='温度')
15 changes: 15 additions & 0 deletions wq.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,14 @@ def detect_language(text: str):
doc = nlp(text)
language = doc._.language["language"]

# Correct language misdetections.
if language == "ko":
language = "zh-cn"

# Map language to model.
if language == "zh-cn":
language = "zh_core_web_md"

return language


Expand Down Expand Up @@ -199,6 +207,13 @@ def improve_with_heuristics(nlp, expression, sentence):
except IndexError:
pass

# "Temperature in Nanchang on 2020-09-17" in Chinese: "2020年9月17日南昌市的温度"
if what in when:
for noun in dh.find_tokens("NOUN"):
if noun.lemma_ in when:
continue
what = noun.lemma_

result = Result(where=where, when=when, what=what)
return result

Expand Down

0 comments on commit a86fcf3

Please sign in to comment.