diff --git a/CHANGELOG.md b/CHANGELOG.md index 37a1172ae9..03080c415f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,13 @@ +## 0.18.14-dev0 + +### Enhancements + +### Features + +### Fixes + +- **change short text language detection log to debug** reduce warning level log spamming + ## 0.18.13 ### Enhancements @@ -6,7 +16,7 @@ ### Fixes -- **Parse a wider variety of date formats in email headers** The `partition_email` function is now more robust to non-standard date formats, including ISO-8601 dates with "Z" suffixes. This prevents `ValueError` exceptions when partitioning emails with these date formats. +- **Parse a wider variety of date formats in email headers** The `partition_email` function is now more robust to non-standard date formats, including ISO-8601 dates with "Z" suffixes. This prevents `ValueError` exceptions when partitioning emails with these date formats. ## 0.18.12 diff --git a/unstructured/__version__.py b/unstructured/__version__.py index 7774420d99..4df44ced66 100644 --- a/unstructured/__version__.py +++ b/unstructured/__version__.py @@ -1 +1 @@ -__version__ = "0.18.13" # pragma: no cover +__version__ = "0.18.14-dev0" # pragma: no cover diff --git a/unstructured/partition/common/lang.py b/unstructured/partition/common/lang.py index 2f966725ed..31fee82877 100644 --- a/unstructured/partition/common/lang.py +++ b/unstructured/partition/common/lang.py @@ -403,7 +403,7 @@ def detect_languages( # If text contains special characters (like ñ, å, or Korean/Mandarin/etc.) it will NOT default # to English. It will default to English if text is only ascii characters and is short. if re.match(r"^[\x00-\x7F]+$", text) and len(text.split()) < 5: - logger.warning(f'short text: "{text}". Defaulting to English.') + logger.debug(f'short text: "{text}". Defaulting to English.') return ["eng"] # set seed for deterministic langdetect outputs