Skip to content

Commit

Permalink
refactor: lookup validation
Browse files Browse the repository at this point in the history
  • Loading branch information
iwpnd committed Jan 25, 2023
1 parent 95aff8f commit 561f287
Show file tree
Hide file tree
Showing 3 changed files with 59 additions and 41 deletions.
63 changes: 38 additions & 25 deletions flashgeotext/lookup.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import json
import sys
from typing import Dict, List

from flashtext import KeywordProcessor
from loguru import logger
Expand Down Expand Up @@ -39,6 +40,12 @@ def __init__(self, message: str):
self.message = message


class ValidationResult(BaseModel):
status: str = "No errors detected"
error_count: int = 0
errors: Dict[str, List[str]] = {}


class LookupValidation:
"""Data validation container object
Expand All @@ -55,18 +62,31 @@ class LookupValidation:
}
"""

def __init__(
self,
status: str = "No errors detected",
error_count: int = 0,
errors: dict = {},
):
self.status = status
self.error_count = error_count
self.errors = {}
data: ValidationResult

def __init__(self):
self.data = ValidationResult()

def has_error(self, name: str) -> bool:
return name in self.data.errors

def get_error_count(self) -> int:
return self.data.error_count

def __repr__(self):
return f"<LookupValidation: {self.__dict__}>"
def set_error_status(self) -> None:
self.data.status = f"Found {self.data.error_count} errors"

def add_error(self, name: str, error: str) -> None:
if not self.has_error(name):
self.data.errors[name] = [error]
self.data.error_count = self.data.error_count + 1
return

self.data.errors[name].append(error)
self.data.error_count = self.data.error_count + 1

def to_dict(self):
return self.data.dict()


class LookupData(BaseModel, object):
Expand Down Expand Up @@ -112,23 +132,16 @@ def validate(self) -> dict:

for key, value in self.data.items():
if not isinstance(value, list):
validation.errors[key] = [f"data[{key}] is not a list of synonyms"]
validation.error_count = validation.error_count + 1

if key not in value:
if key in validation.errors:
validation.errors[key] = validation.errors[key] + [
f"{key} missing in list of synonyms"
]
else:
validation.errors[key] = [f"{key} missing in list of synonyms"]
validation.add_error(key, f"{key} is not a list of synonyms")
validation.add_error(key, f"{key} missing in list of synonyms")

validation.error_count = validation.error_count + 1
if isinstance(value, list) and key not in value:
validation.add_error(key, f"{key} missing in list of synonyms")

if validation.error_count > 0:
validation.status = f"Found {validation.error_count} errors"
if validation.get_error_count() > 0:
validation.set_error_status()

return validation.__dict__
return validation.to_dict()


class LookupDataPool:
Expand Down
2 changes: 1 addition & 1 deletion flashgeotext/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
DEMODATA_DIR = os.path.join(PARENT_DIRECTORY, "resources")
DEMODATA_CITIES = DEMODATA_DIR + "/cities.json"
DEMODATA_COUNTRIES = DEMODATA_DIR + "/countries.json"
LOGLEVEL = "WARNING" if not os.getenv("LOGURU_LEVEL") else os.getenv("LOGURU_LEVEL")
LOGLEVEL: str = os.getenv("LOGURU_LEVEL") or "WARNING"

with open(DEMODATA_DIR + "/scripts.json", "r", encoding="utf-8") as f:
SCRIPTS = json.loads(f.read())
35 changes: 20 additions & 15 deletions tests/unit/test_lookup_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import pytest
from pydantic import ValidationError

from flashgeotext.lookup import LookupData, LookupValidation, load_data_from_file
from flashgeotext.lookup import LookupData, load_data_from_file
from flashgeotext.settings import DEMODATA_CITIES, DEMODATA_COUNTRIES


Expand Down Expand Up @@ -66,16 +66,15 @@ def test_lookup_data_script_raises(id, name, data, script, expectation):
@pytest.mark.parametrize(
"id, name, data, error_count",
[
(1, "cities", {"Berlin": "Berlin"}, 1),
(1, "cities", {"Berlin": "Berlin"}, 2),
(2, "cities", {"Berlin": ["Dickes B"]}, 1),
(2, "cities", {"Berlin": "Hamburg"}, 2),
(3, "cities", {"Berlin": "Hamburg"}, 2),
],
)
def test_lookup_data_validate(id, name, data, error_count):
lookup = LookupData(name=name, data=data)

validation = lookup.validate()

assert validation["error_count"] == error_count


Expand All @@ -92,18 +91,24 @@ def test_lookup_data_demo_data(id, name, demodata):
validation = lookup.validate()

assert validation["error_count"] == 0
assert isinstance(validation, dict)


def test_lookup_data_validate_repr():
lookup = LookupData(name="countries", data=load_data_from_file(DEMODATA_COUNTRIES))

assert isinstance(lookup.validate(), dict)
def test_lookup_data_invalid():
districts = {
"Friedrichshain-Kreuzberg": ["Friedrichshain", "Kreuzberg", "Fhain"],
"Steglitz-Zehlendorf": 1,
"Tempelhof-Schoeneberg": ["Tempelhof-Schoeneberg", "THF"],
}
lookup = LookupData(name="district", data=districts)

validation = lookup.validate()

def test_lookupvalidation_repr():
validation = LookupValidation()

assert (
repr(validation)
== "<LookupValidation: {'status': 'No errors detected', 'error_count': 0, 'errors': {}}>"
)
assert validation["error_count"] == 3
assert validation["errors"]["Friedrichshain-Kreuzberg"] == [
"Friedrichshain-Kreuzberg missing in list of synonyms"
]
assert validation["errors"]["Steglitz-Zehlendorf"] == [
"Steglitz-Zehlendorf is not a list of synonyms",
"Steglitz-Zehlendorf missing in list of synonyms",
]

0 comments on commit 561f287

Please sign in to comment.