Skip to content

Commit

Permalink
cache, remove old references, drop py 3.8
Browse files Browse the repository at this point in the history
  • Loading branch information
pudo committed Sep 29, 2023
1 parent 009819b commit 367711d
Show file tree
Hide file tree
Showing 5 changed files with 15 additions and 6 deletions.
1 change: 0 additions & 1 deletion .github/workflows/build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@ jobs:
strategy:
matrix:
python:
- "3.8"
- "3.9"
- "3.10"
- "3.11"
Expand Down
2 changes: 2 additions & 0 deletions fingerprints/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from fingerprints.fingerprint import fingerprint
from fingerprints.cleanup import clean_entity_prefix
from fingerprints.cleanup import clean_brackets
from fingerprints.cleanup import clean_name_light
from fingerprints.cleanup import clean_name_ascii
from fingerprints.types import remove_types, replace_types
Expand All @@ -10,6 +11,7 @@
"fingerprint",
"generate",
"clean_entity_prefix",
"clean_brackets",
"clean_name_light",
"clean_name_ascii",
"remove_types",
Expand Down
10 changes: 9 additions & 1 deletion fingerprints/cleanup.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import re
import logging
from typing import Optional
from functools import lru_cache
from normality import collapse_spaces, ascii_text, category_replace

from fingerprints.constants import WS, BRACKETED
Expand Down Expand Up @@ -58,15 +59,22 @@ def clean_brackets(text: str) -> str:
return BRACKETED.sub(WS, text)


@lru_cache(maxsize=2000)
def clean_name_ascii(text: Optional[str]) -> Optional[str]:
"""Super-hardcore string scrubbing."""
# transliterate to ascii
text = ascii_text(text)
if text is None:
return None
return clean_name_light(text)
# replace punctuation and symbols
text = CHARACTERS_REMOVE_RE.sub("", text)
text = text.lower()
cleaned = category_replace(text)
cleaned = collapse_spaces(cleaned)
return cleaned


@lru_cache(maxsize=2000)
def clean_name_light(text: str) -> Optional[str]:
"""Clean up a name for comparison, but don't convert to ASCII/Latin."""
# replace punctuation and symbols
Expand Down
2 changes: 0 additions & 2 deletions fingerprints/types/check.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,5 @@
import os
import yaml
from typing import Set
from normality import slugify

from fingerprints.types.common import TYPES_PATH, TypesList

Expand Down
6 changes: 4 additions & 2 deletions fingerprints/types/replacer.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

from fingerprints.types.data import TYPES
from fingerprints.constants import WS
from fingerprints.cleanup import clean_strict
from fingerprints.cleanup import clean_name_ascii

log = logging.getLogger(__name__)
NormFunc = Callable[[Optional[str]], Optional[str]]
Expand Down Expand Up @@ -61,7 +61,9 @@ def normalize_replacements(norm_func: NormFunc) -> Dict[str, str]:


@lru_cache(maxsize=None)
def get_replacer(clean: NormFunc = clean_strict, remove: bool = False) -> ReplaceFunc:
def get_replacer(
clean: NormFunc = clean_name_ascii, remove: bool = False
) -> ReplaceFunc:
replacements = normalize_replacements(clean)
return Replacer(replacements, remove=remove)

Expand Down

0 comments on commit 367711d

Please sign in to comment.