Skip to content

Commit

Permalink
Improve language detection
Browse files Browse the repository at this point in the history
  • Loading branch information
amotl committed Aug 30, 2021
1 parent d661712 commit 57ac944
Show file tree
Hide file tree
Showing 2 changed files with 20 additions and 9 deletions.
2 changes: 1 addition & 1 deletion test_wq.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ def test_english_forecast_rain():


def test_english_snowdepth():
result = analyze_spacy("snow depth on Zugspitze")
result = analyze_spacy("Snow depth on Zugspitze")
assert result == Result(where="Zugspitze", when="now", what="Snow Depth")


Expand Down
27 changes: 19 additions & 8 deletions wq.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from dataclasses import dataclass

import spacy
from spacy_langdetect import LanguageDetector

from util import DocHelper

Expand All @@ -21,28 +22,38 @@ class Result:
what: str


models = {}
use_models = {}
english_model = None


def load_model(name):

global models
global use_models

if name in models:
nlp = models[name]
if name in use_models:
nlp = use_models[name]
else:
nlp = spacy.load(name)
models[name] = nlp
use_models[name] = nlp

return nlp


def detect_language(text: str):
import spacy
from spacy_langdetect import LanguageDetector

# Short-circuit misdetections.
# Why? "snow depth on Zugspitze" is sometimes detected as German.
if "snow" in text.lower():
return "en"

global english_model

if english_model is None:
english_model = spacy.load("en")

nlp = english_model

# Detect language.
nlp = load_model("en")
try:
nlp.add_pipe(LanguageDetector(), name="language_detector", last=True)
except:
Expand Down

0 comments on commit 57ac944

Please sign in to comment.