Skip to content

Commit

Permalink
added class variable script and custom pydantic validator to LookupDa…
Browse files Browse the repository at this point in the history
…ta class. tested in test_lookup_data::test_lookup_data_script_raises()
  • Loading branch information
iwpnd committed Mar 2, 2020
1 parent 394f3f1 commit 213d8ae
Show file tree
Hide file tree
Showing 2 changed files with 58 additions and 0 deletions.
28 changes: 28 additions & 0 deletions flashgeotext/lookup.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,11 @@
from loguru import logger
from pydantic import BaseModel
from pydantic import StrictStr
from pydantic import validator

from flashgeotext.settings import DEMODATA_CITIES
from flashgeotext.settings import DEMODATA_COUNTRIES
from flashgeotext.settings import SCRIPTS


class LookupDuplicateError(Exception):
Expand Down Expand Up @@ -82,17 +84,34 @@ def __repr__(self):
class LookupData(BaseModel, object):
"""Data that is supposed to be looked up in a text
Setting a script here would add characters of that script (see resources/scripts.json)
to the set of non_word_boundaries's default of:
>> {'k', '6', 's', 'M', 'i', 'S', 'm', 'E', 'r', 'W', 'v', 'l',
'R', 'f', 'e', 'X', '7', '3', 'q', 'w', '0', 'x', 'V', 'C', 'n',
'I', '4', 'D', 'z', 'G', 'L', '2', 'T', 'U', '_', 'B', 't', 'Q',
'd', '9', 'h', 'o', 'c', 'u', 'P', 'K', 'Y', 'p', 'A', 'J', 'O',
'N', 'H', 'j', 'a', 'Z', '5', '1', 'b', 'y', 'F', '8', 'g'}
Args:
name (pydantic.StrictStr): Human readable name as string describing the data.
data (dict): dictionary containing data to lookup and their synonyms
script: (pydantic.StrictStr): what scripts characters to add to non_word_boundaries
Attributes:
name (pydantic.StrictStr): Human readable name as string describing the data.
data (dict): dictionary containing data to lookup and their synonyms
script: (pydantic.StrictStr): what scripts characters to add to non_word_boundaries
"""

name: StrictStr
data: dict
script: StrictStr = "default"

@validator("script")
def script_must_be_in_scripts(cls, value):
if value not in SCRIPTS:
raise ValueError("must be supported script")
return value

def validate(self) -> dict:
"""Validate if data attribute has appropiate structure.
Expand Down Expand Up @@ -163,6 +182,15 @@ def add(self, lookup: LookupData, update: bool = False) -> None:
else:
self.pool[lookup.name] = KeywordProcessor(case_sensitive=True)
self.pool[lookup.name].add_keywords_from_dict(lookup.data)

# if there is a script specified, then update non word boundaries with
# characters from script

if lookup.script != "default":
self.pool[lookup.name].non_word_boundaries.update(
SCRIPTS[lookup.script]["chars"]
)

logger.debug(f"{lookup.name} added to pool")

def remove(self, lookup_to_remove: str) -> None:
Expand Down
30 changes: 30 additions & 0 deletions tests/unit/test_lookup_data.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
from contextlib import nullcontext

import pytest
from pydantic import ValidationError

Expand Down Expand Up @@ -36,6 +38,34 @@ def test_lookup_data_raises(id, name, data, expectation):
assert isinstance(lookup, LookupData)


@pytest.mark.parametrize(
"id, name, data,, script, expectation",
[
(
1,
"cities",
{"Нижневартовск": ["Нижневартовск"]},
"german",
pytest.raises(ValidationError),
),
(
2,
"cities",
{"Нижневартовск": ["Нижневартовск"]},
"default",
pytest.raises(ValidationError),
),
(3, "cities", {"Нижневартовск": ["Нижневартовск"]}, "cyrillic", nullcontext()),
],
)
def test_lookup_data_script_raises(id, name, data, script, expectation):

with expectation:
lookup = LookupData(name=name, data=data, script=script)

assert isinstance(lookup, LookupData)


@pytest.mark.parametrize(
"id, name, data, error_count",
[
Expand Down

0 comments on commit 213d8ae

Please sign in to comment.