From c9db88712a7d7d309df836ec26838fedc868ab37 Mon Sep 17 00:00:00 2001 From: Brian Greunke Date: Fri, 5 Sep 2025 18:44:02 -0500 Subject: [PATCH 1/4] fix: text extras import errors --- dreadnode/scorers/classification.py | 4 +- dreadnode/scorers/pii.py | 12 +++-- dreadnode/scorers/readability.py | 6 +-- dreadnode/scorers/sentiment.py | 5 +- dreadnode/scorers/similarity.py | 69 +++++++++++++++++----------- dreadnode/transforms/ascii_art.py | 6 +-- dreadnode/transforms/perturbation.py | 8 ++-- poetry.lock | 50 +++++++++----------- pyproject.toml | 19 +++++--- 9 files changed, 94 insertions(+), 85 deletions(-) diff --git a/dreadnode/scorers/classification.py b/dreadnode/scorers/classification.py index 4c2b6577..ca572fdf 100644 --- a/dreadnode/scorers/classification.py +++ b/dreadnode/scorers/classification.py @@ -1,7 +1,5 @@ import typing as t -from transformers import pipeline - from dreadnode.meta import Config from dreadnode.metric import Metric from dreadnode.scorers import Scorer @@ -37,7 +35,7 @@ def zero_shot_classification( ) try: - pipeline("zero-shot-classification", model=model_name) + from transformers import pipeline # type: ignore[import-not-found] except ImportError: warn_at_user_stacklevel(transformers_error_msg, UserWarning) diff --git a/dreadnode/scorers/pii.py b/dreadnode/scorers/pii.py index ba1c9c35..e929465f 100644 --- a/dreadnode/scorers/pii.py +++ b/dreadnode/scorers/pii.py @@ -1,15 +1,14 @@ import re import typing as t -from presidio_analyzer import AnalyzerEngine -from presidio_analyzer.nlp_engine import NlpEngineProvider - from dreadnode.metric import Metric from dreadnode.scorers import Scorer from dreadnode.scorers.contains import contains from dreadnode.util import warn_at_user_stacklevel if t.TYPE_CHECKING: + from presidio_analyzer import AnalyzerEngine # type: ignore[import-not-found] + from dreadnode.types import JsonDict @@ -66,6 +65,9 @@ def _get_presidio_analyzer() -> "AnalyzerEngine": """Lazily initializes and returns a singleton Presidio AnalyzerEngine instance.""" global g_analyzer_engine # noqa: PLW0603 + from presidio_analyzer import AnalyzerEngine # type: ignore[import-not-found] + from presidio_analyzer.nlp_engine import NlpEngineProvider # type: ignore[import-not-found] + if g_analyzer_engine is None: provider = NlpEngineProvider( nlp_configuration={ @@ -107,8 +109,8 @@ def detect_pii_with_presidio( ) try: - _get_presidio_analyzer() - except (ImportError, OSError): + import presidio_analyzer # type: ignore[import-not-found,unused-ignore] + except ImportError: warn_at_user_stacklevel(presidio_import_error_msg, UserWarning) def disabled_evaluate(_: t.Any) -> Metric: diff --git a/dreadnode/scorers/readability.py b/dreadnode/scorers/readability.py index 259f5feb..0c5845d8 100644 --- a/dreadnode/scorers/readability.py +++ b/dreadnode/scorers/readability.py @@ -1,7 +1,5 @@ import typing as t -import textstat # type: ignore[import-untyped] - from dreadnode.metric import Metric from dreadnode.scorers.base import Scorer from dreadnode.util import warn_at_user_stacklevel @@ -29,8 +27,8 @@ def readability( ) try: - textstat.flesch_kincaid_grade("test") - except (ImportError, AttributeError): + import textstat # type: ignore[import-not-found] + except ImportError: warn_at_user_stacklevel(textstat_import_error_msg, UserWarning) def disabled_evaluate(_: t.Any) -> Metric: diff --git a/dreadnode/scorers/sentiment.py b/dreadnode/scorers/sentiment.py index d9151da8..0389cc8c 100644 --- a/dreadnode/scorers/sentiment.py +++ b/dreadnode/scorers/sentiment.py @@ -2,7 +2,6 @@ import typing as t import httpx -from textblob import TextBlob # type: ignore[import-untyped] from dreadnode.meta import Config from dreadnode.metric import Metric @@ -33,8 +32,8 @@ def sentiment( textblob_import_error_msg = "TextBlob dependency is not installed. Install with: pip install textblob && python -m textblob.download_corpora" try: - TextBlob("test").sentiment # noqa: B018 - except (ImportError, AttributeError): + from textblob import TextBlob # type: ignore[import-not-found] + except ImportError: warn_at_user_stacklevel(textblob_import_error_msg, UserWarning) def disabled_evaluate(_: t.Any) -> Metric: diff --git a/dreadnode/scorers/similarity.py b/dreadnode/scorers/similarity.py index de199751..918ace7d 100644 --- a/dreadnode/scorers/similarity.py +++ b/dreadnode/scorers/similarity.py @@ -1,23 +1,17 @@ import typing as t from difflib import SequenceMatcher -import litellm -import nltk # type: ignore[import-untyped] -from nltk.tokenize import word_tokenize # type: ignore[import-untyped] -from nltk.translate.bleu_score import sentence_bleu # type: ignore[import-untyped] -from rapidfuzz import distance, fuzz, utils -from sentence_transformers import SentenceTransformer, util -from sklearn.feature_extraction.text import TfidfVectorizer # type: ignore[import-untyped] -from sklearn.metrics.pairwise import ( # type: ignore # noqa: PGH003 - cosine_similarity as sklearn_cosine_similarity, -) - from dreadnode.meta import Config from dreadnode.metric import Metric from dreadnode.scorers.base import Scorer from dreadnode.scorers.util import cosine_similarity from dreadnode.util import warn_at_user_stacklevel +if t.TYPE_CHECKING: + from sentence_transformers import ( # type: ignore[import-not-found] + SentenceTransformer, + ) + def similarity( reference: str, @@ -94,12 +88,9 @@ def similarity_with_rapidfuzz( score_cutoff: Optional score cutoff below which to return 0.0. name: Name of the scorer. """ - rapidfuzz_import_error_msg = ( - "RapidFuzz dependency is not installed. Please install it with: pip install rapidfuzz" - ) - + rapidfuzz_import_error_msg = "RapidFuzz dependency is not installed. Please install it with: pip install rapidfuzz or dreadnode[text]" try: - fuzz.ratio("test", "test") + from rapidfuzz import fuzz, utils # type: ignore[import-not-found] except ImportError: warn_at_user_stacklevel(rapidfuzz_import_error_msg, UserWarning) @@ -191,11 +182,11 @@ def string_distance( normalize: Normalize distances and convert to similarity scores. name: Name of the scorer. """ - rapidfuzz_import_error_msg = ( - "RapidFuzz dependency is not installed. Please install it with: pip install rapidfuzz" - ) + rapidfuzz_import_error_msg = "RapidFuzz dependency is not installed. Please install it with: pip install rapidfuzz or dreadnode[text]" try: + from rapidfuzz import distance # type: ignore[import-not-found] + distance.Levenshtein.distance("test", "test") except ImportError: warn_at_user_stacklevel(rapidfuzz_import_error_msg, UserWarning) @@ -260,12 +251,15 @@ def similarity_with_tf_idf(reference: str, *, name: str = "similarity") -> "Scor reference: The reference text (e.g., expected output). name: Name of the scorer. """ - sklearn_import_error_msg = ( - "scikit-learn dependency is not installed. Please install it with: pip install scikit-learn" - ) + sklearn_import_error_msg = "scikit-learn dependency is not installed. Please install it with: pip install scikit-learn or dreadnode[text]" try: - TfidfVectorizer() + from sklearn.feature_extraction.text import ( # type: ignore[import-not-found] + TfidfVectorizer, + ) + from sklearn.metrics.pairwise import ( # type: ignore[import-not-found] + cosine_similarity as sklearn_cosine_similarity, + ) except ImportError: warn_at_user_stacklevel(sklearn_import_error_msg, UserWarning) @@ -275,6 +269,7 @@ def disabled_evaluate(_: t.Any) -> Metric: return Scorer(disabled_evaluate, name=name) vectorizer = TfidfVectorizer(stop_words="english") + a = 1 def evaluate(data: t.Any, *, reference: str = reference) -> Metric: candidate_text = str(data) @@ -309,10 +304,13 @@ def similarity_with_sentence_transformers( model_name: The name of the sentence-transformer model to use. name: Name of the scorer. """ - sentence_transformers_error_msg = "Sentence transformers dependency is not installed. Please install it with: pip install sentence-transformers" + sentence_transformers_error_msg = "Sentence transformers dependency is not installed. Please install it with: pip install sentence-transformers or dreadnode[training]" try: - SentenceTransformer(model_name) + from sentence_transformers import ( # type: ignore[import-not-found] + SentenceTransformer, + util, + ) except ImportError: warn_at_user_stacklevel(sentence_transformers_error_msg, UserWarning) @@ -370,6 +368,16 @@ def similarity_with_litellm( or self-hosted models. name: Name of the scorer. """ + litellm_import_error_msg = "litellm dependency is not installed. Please install it with: pip install litellm or dreadnode[text]" + try: + import litellm + except ImportError: + warn_at_user_stacklevel(litellm_import_error_msg, UserWarning) + + def disabled_evaluate(_: t.Any) -> Metric: + return Metric(value=0.0, attributes={"error": litellm_import_error_msg}) + + return Scorer(disabled_evaluate, name=name) async def evaluate( data: t.Any, @@ -426,11 +434,16 @@ def bleu( nltk_import_error_msg = "NLTK dependency is not installed. Install with: pip install nltk && python -m nltk.downloader punkt" try: - # Check for the 'punkt' tokenizer data + import nltk # type: ignore[import-not-found] + from nltk.tokenize import ( # type: ignore[import-not-found] + word_tokenize, + ) + from nltk.translate.bleu_score import ( # type: ignore[import-not-found] + sentence_bleu, + ) + try: nltk.data.find("tokenizers/punkt") - word_tokenize("test") - sentence_bleu([["test"]], ["test"]) except LookupError as e: nltk_import_error_msg = ( "NLTK 'punkt' tokenizer not found. Please run: python -m nltk.downloader punkt" diff --git a/dreadnode/transforms/ascii_art.py b/dreadnode/transforms/ascii_art.py index 0c62af7e..fdc79e95 100644 --- a/dreadnode/transforms/ascii_art.py +++ b/dreadnode/transforms/ascii_art.py @@ -1,5 +1,3 @@ -from art import text2art # type: ignore[import-untyped] - from dreadnode.meta import Config from dreadnode.transforms.base import Transform @@ -8,8 +6,8 @@ def ascii_art(font: str = "rand", *, name: str = "ascii_art") -> Transform[str, """Converts text into ASCII art using the 'art' library.""" try: - text2art("test") # Test if art is working - except (ImportError, AttributeError): + from art import text2art # type: ignore[import-not-found] + except ImportError: raise ImportError( "ASCII art dependency is not installed. Install with: pip install art" ) from ImportError("art library not available") diff --git a/dreadnode/transforms/perturbation.py b/dreadnode/transforms/perturbation.py index cf9bf01f..43e4477f 100644 --- a/dreadnode/transforms/perturbation.py +++ b/dreadnode/transforms/perturbation.py @@ -3,8 +3,6 @@ import typing as t import unicodedata -from confusables import confusable_characters # type: ignore[import-untyped] - from dreadnode.meta import Config from dreadnode.transforms.base import Transform @@ -226,8 +224,10 @@ def unicode_confusable( """ try: - confusable_characters("a") - except (ImportError, AttributeError): + from confusables import ( # type: ignore[import-not-found] + confusable_characters, + ) + except ImportError: raise ImportError( "Confusables dependency is not installed. Install with: pip install confusables" ) from ImportError("confusables library not available") diff --git a/poetry.lock b/poetry.lock index 0a331377..7bbbf5ef 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,4 +1,4 @@ -# This file is automatically @generated by Poetry 2.1.3 and should not be changed by hand. +# This file is automatically @generated by Poetry 2.1.1 and should not be changed by hand. [[package]] name = "aiobotocore" @@ -285,7 +285,7 @@ description = "Timeout context manager for asyncio programs" optional = false python-versions = ">=3.8" groups = ["main"] -markers = "python_version == \"3.10\"" +markers = "python_version < \"3.11\"" files = [ {file = "async_timeout-5.0.1-py3-none-any.whl", hash = "sha256:39e3809566ff85354557ec2398b55e096c8364bacac9405a7a1fa429e77fe76c"}, {file = "async_timeout-5.0.1.tar.gz", hash = "sha256:d9321a7a3d5a6a5e187e824d2fa0793ce379a202935782d555d6e9d2735677d3"}, @@ -810,15 +810,14 @@ files = [ [[package]] name = "datasets" -version = "3.6.0" +version = "4.0.0" description = "HuggingFace community-driven open-source library of datasets" -optional = true +optional = false python-versions = ">=3.9.0" groups = ["main"] -markers = "extra == \"dev\"" files = [ - {file = "datasets-3.6.0-py3-none-any.whl", hash = "sha256:25000c4a2c0873a710df127d08a202a06eab7bf42441a6bc278b499c2f72cd1b"}, - {file = "datasets-3.6.0.tar.gz", hash = "sha256:1b2bf43b19776e2787e181cfd329cb0ca1a358ea014780c3581e0f276375e041"}, + {file = "datasets-4.0.0-py3-none-any.whl", hash = "sha256:7ef95e62025fd122882dbce6cb904c8cd3fbc829de6669a5eb939c77d50e203d"}, + {file = "datasets-4.0.0.tar.gz", hash = "sha256:9657e7140a9050db13443ba21cb5de185af8af944479b00e7ff1e00a61c8dbf1"}, ] [package.dependencies] @@ -837,18 +836,17 @@ tqdm = ">=4.66.3" xxhash = "*" [package.extras] -audio = ["librosa", "soundfile (>=0.12.1)", "soxr (>=0.4.0)"] +audio = ["soundfile (>=0.12.1)", "torch (>=2.7.0)", "torchcodec (>=0.4.0)"] benchmarks = ["tensorflow (==2.12.0)", "torch (==2.0.1)", "transformers (==4.30.1)"] -dev = ["Pillow (>=9.4.0)", "absl-py", "aiohttp", "decorator", "elasticsearch (>=7.17.12,<8.0.0)", "faiss-cpu (>=1.8.0.post1)", "jax (>=0.3.14) ; sys_platform != \"win32\"", "jaxlib (>=0.3.14) ; sys_platform != \"win32\"", "joblib (<1.3.0)", "joblibspark", "librosa", "lz4", "moto[server]", "polars[timezone] (>=0.20.0)", "protobuf (<4.0.0)", "py7zr", "pyav", "pyspark (>=3.4)", "pytest", "pytest-datadir", "pytest-xdist", "rarfile (>=4.0)", "ruff (>=0.3.0)", "s3fs", "s3fs (>=2021.11.1)", "soundfile (>=0.12.1)", "soundfile (>=0.12.1)", "soxr (>=0.4.0)", "sqlalchemy", "tensorflow (>=2.16.0) ; python_version >= \"3.10\"", "tensorflow (>=2.6.0)", "tensorflow (>=2.6.0) ; python_version < \"3.10\"", "tiktoken", "torch", "torch (>=2.0.0)", "torchdata", "torchvision", "transformers", "transformers (>=4.42.0)", "zstandard"] -docs = ["s3fs", "tensorflow (>=2.6.0)", "torch", "transformers"] +dev = ["Pillow (>=9.4.0)", "absl-py", "aiohttp", "decorator", "elasticsearch (>=7.17.12,<8.0.0)", "faiss-cpu (>=1.8.0.post1)", "jax (>=0.3.14) ; sys_platform != \"win32\"", "jaxlib (>=0.3.14) ; sys_platform != \"win32\"", "joblib (<1.3.0)", "joblibspark", "lz4", "moto[server]", "numba (>=0.56.4)", "polars[timezone] (>=0.20.0)", "protobuf (<4.0.0)", "py7zr", "pyspark (>=3.4)", "pytest", "pytest-datadir", "pytest-xdist", "rarfile (>=4.0)", "ruff (>=0.3.0)", "soundfile (>=0.12.1)", "soundfile (>=0.12.1)", "sqlalchemy", "tensorflow (>=2.16.0) ; python_version >= \"3.10\" and sys_platform != \"win32\"", "tensorflow (>=2.6.0)", "tensorflow (>=2.6.0) ; python_version < \"3.10\" and sys_platform != \"win32\"", "tiktoken", "torch", "torch (>=2.0.0)", "torchcodec (>=0.4.0) ; sys_platform != \"win32\"", "torchdata", "transformers", "transformers (>=4.42.0)", "zstandard"] +docs = ["tensorflow (>=2.6.0)", "torch", "transformers"] jax = ["jax (>=0.3.14)", "jaxlib (>=0.3.14)"] pdfs = ["pdfplumber (>=0.11.4)"] quality = ["ruff (>=0.3.0)"] -s3 = ["s3fs"] tensorflow = ["tensorflow (>=2.6.0)"] tensorflow-gpu = ["tensorflow (>=2.6.0)"] -tests = ["Pillow (>=9.4.0)", "absl-py", "aiohttp", "decorator", "elasticsearch (>=7.17.12,<8.0.0)", "faiss-cpu (>=1.8.0.post1)", "jax (>=0.3.14) ; sys_platform != \"win32\"", "jaxlib (>=0.3.14) ; sys_platform != \"win32\"", "joblib (<1.3.0)", "joblibspark", "librosa", "lz4", "moto[server]", "polars[timezone] (>=0.20.0)", "protobuf (<4.0.0)", "py7zr", "pyav", "pyspark (>=3.4)", "pytest", "pytest-datadir", "pytest-xdist", "rarfile (>=4.0)", "s3fs (>=2021.11.1)", "soundfile (>=0.12.1)", "soundfile (>=0.12.1)", "soxr (>=0.4.0)", "sqlalchemy", "tensorflow (>=2.16.0) ; python_version >= \"3.10\"", "tensorflow (>=2.6.0) ; python_version < \"3.10\"", "tiktoken", "torch (>=2.0.0)", "torchdata", "torchvision", "transformers (>=4.42.0)", "zstandard"] -tests-numpy2 = ["Pillow (>=9.4.0)", "absl-py", "aiohttp", "decorator", "elasticsearch (>=7.17.12,<8.0.0)", "jax (>=0.3.14) ; sys_platform != \"win32\"", "jaxlib (>=0.3.14) ; sys_platform != \"win32\"", "joblib (<1.3.0)", "joblibspark", "lz4", "moto[server]", "polars[timezone] (>=0.20.0)", "protobuf (<4.0.0)", "py7zr", "pyav", "pyspark (>=3.4)", "pytest", "pytest-datadir", "pytest-xdist", "rarfile (>=4.0)", "s3fs (>=2021.11.1)", "soundfile (>=0.12.1)", "soundfile (>=0.12.1)", "soxr (>=0.4.0)", "sqlalchemy", "tiktoken", "torch (>=2.0.0)", "torchdata", "torchvision", "transformers (>=4.42.0)", "zstandard"] +tests = ["Pillow (>=9.4.0)", "absl-py", "aiohttp", "decorator", "elasticsearch (>=7.17.12,<8.0.0)", "faiss-cpu (>=1.8.0.post1)", "jax (>=0.3.14) ; sys_platform != \"win32\"", "jaxlib (>=0.3.14) ; sys_platform != \"win32\"", "joblib (<1.3.0)", "joblibspark", "lz4", "moto[server]", "numba (>=0.56.4)", "polars[timezone] (>=0.20.0)", "protobuf (<4.0.0)", "py7zr", "pyspark (>=3.4)", "pytest", "pytest-datadir", "pytest-xdist", "rarfile (>=4.0)", "soundfile (>=0.12.1)", "soundfile (>=0.12.1)", "sqlalchemy", "tensorflow (>=2.16.0) ; python_version >= \"3.10\" and sys_platform != \"win32\"", "tensorflow (>=2.6.0) ; python_version < \"3.10\" and sys_platform != \"win32\"", "tiktoken", "torch (>=2.0.0)", "torchcodec (>=0.4.0) ; sys_platform != \"win32\"", "torchdata", "transformers (>=4.42.0)", "zstandard"] +tests-numpy2 = ["Pillow (>=9.4.0)", "absl-py", "aiohttp", "decorator", "elasticsearch (>=7.17.12,<8.0.0)", "jax (>=0.3.14) ; sys_platform != \"win32\"", "jaxlib (>=0.3.14) ; sys_platform != \"win32\"", "joblib (<1.3.0)", "joblibspark", "lz4", "moto[server]", "numba (>=0.56.4)", "polars[timezone] (>=0.20.0)", "protobuf (<4.0.0)", "py7zr", "pyspark (>=3.4)", "pytest", "pytest-datadir", "pytest-xdist", "rarfile (>=4.0)", "soundfile (>=0.12.1)", "soundfile (>=0.12.1)", "sqlalchemy", "tiktoken", "torch (>=2.0.0)", "torchcodec (>=0.4.0) ; sys_platform != \"win32\"", "torchdata", "transformers (>=4.42.0)", "zstandard"] torch = ["torch"] vision = ["Pillow (>=9.4.0)"] @@ -904,10 +902,9 @@ files = [ name = "dill" version = "0.3.8" description = "serialize all of Python" -optional = true +optional = false python-versions = ">=3.8" groups = ["main"] -markers = "extra == \"dev\"" files = [ {file = "dill-0.3.8-py3-none-any.whl", hash = "sha256:c36ca9ffb54365bdd2f8eb3eff7d2a21237f8452b57ace88b1ac615b7e815bd7"}, {file = "dill-0.3.8.tar.gz", hash = "sha256:3ebe3c479ad625c4553aca177444d89b486b1d84982eeacded644afc0cf797ca"}, @@ -1623,7 +1620,7 @@ description = "IPython: Productive Interactive Computing" optional = false python-versions = ">=3.10" groups = ["main"] -markers = "python_version == \"3.10\"" +markers = "python_version < \"3.11\"" files = [ {file = "ipython-8.37.0-py3-none-any.whl", hash = "sha256:ed87326596b878932dbcb171e3e698845434d8c61b8d8cd474bf663041a9dcf2"}, {file = "ipython-8.37.0.tar.gz", hash = "sha256:ca815841e1a41a1e6b73a0b08f3038af9b2252564d01fc405356d34033012216"}, @@ -2714,10 +2711,9 @@ typing-extensions = {version = ">=4.1.0", markers = "python_version < \"3.11\""} name = "multiprocess" version = "0.70.16" description = "better multiprocessing and multithreading in Python" -optional = true +optional = false python-versions = ">=3.8" groups = ["main"] -markers = "extra == \"dev\"" files = [ {file = "multiprocess-0.70.16-pp310-pypy310_pp73-macosx_10_13_x86_64.whl", hash = "sha256:476887be10e2f59ff183c006af746cb6f1fd0eadcfd4ef49e605cbe2659920ee"}, {file = "multiprocess-0.70.16-pp310-pypy310_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:d951bed82c8f73929ac82c61f01a7b5ce8f3e5ef40f5b52553b4f547ce2b08ec"}, @@ -2877,7 +2873,7 @@ description = "Python package for creating and manipulating graphs and networks" optional = false python-versions = ">=3.10" groups = ["main"] -markers = "python_version == \"3.10\"" +markers = "python_version < \"3.11\"" files = [ {file = "networkx-3.4.2-py3-none-any.whl", hash = "sha256:df5d4365b724cf81b8c6a7312509d0c22386097011ad1abe274afd5e9d3bbc5f"}, {file = "networkx-3.4.2.tar.gz", hash = "sha256:307c3669428c5362aab27c8a1260aa8f47c4e91d3891f48be0141738d8d053e1"}, @@ -2960,7 +2956,7 @@ description = "Fundamental package for array computing in Python" optional = false python-versions = ">=3.10" groups = ["main"] -markers = "python_version == \"3.10\"" +markers = "python_version < \"3.11\"" files = [ {file = "numpy-2.2.6-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:b412caa66f72040e6d268491a59f2c43bf03eb6c96dd8f0307829feb7fa2b6fb"}, {file = "numpy-2.2.6-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:8e41fd67c52b86603a91c1a505ebaef50b3314de0213461c7a6e99c9a3beff90"}, @@ -4144,10 +4140,9 @@ tests = ["pytest"] name = "pyarrow" version = "19.0.1" description = "Python library for Apache Arrow" -optional = true +optional = false python-versions = ">=3.9" groups = ["main"] -markers = "extra == \"dev\"" files = [ {file = "pyarrow-19.0.1-cp310-cp310-macosx_12_0_arm64.whl", hash = "sha256:fc28912a2dc924dddc2087679cc8b7263accc71b9ff025a1362b004711661a69"}, {file = "pyarrow-19.0.1-cp310-cp310-macosx_12_0_x86_64.whl", hash = "sha256:fca15aabbe9b8355800d923cc2e82c8ef514af321e18b437c3d782aa884eaeec"}, @@ -5481,7 +5476,7 @@ description = "Fundamental algorithms for scientific computing in Python" optional = true python-versions = ">=3.10" groups = ["main"] -markers = "python_version == \"3.10\" and (extra == \"training\" or extra == \"text\")" +markers = "python_version < \"3.11\" and (extra == \"training\" or extra == \"text\")" files = [ {file = "scipy-1.15.3-cp310-cp310-macosx_10_13_x86_64.whl", hash = "sha256:a345928c86d535060c9c2b25e71e87c39ab2f22fc96e9636bd74d1dbf9de448c"}, {file = "scipy-1.15.3-cp310-cp310-macosx_12_0_arm64.whl", hash = "sha256:ad3432cb0f9ed87477a8d97f03b763fd1d57709f1bbde3c9369b1dff5503b253"}, @@ -6376,7 +6371,7 @@ description = "A lil' TOML parser" optional = false python-versions = ">=3.8" groups = ["main"] -markers = "python_version == \"3.10\"" +markers = "python_version < \"3.11\"" files = [ {file = "tomli-2.2.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:678e4fa69e4575eb77d103de3df8a895e1591b48e740211bd1067378c69e8249"}, {file = "tomli-2.2.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:023aa114dd824ade0100497eb2318602af309e5a55595f76b626d6d9f3b7b0a6"}, @@ -7029,10 +7024,9 @@ files = [ name = "xxhash" version = "3.5.0" description = "Python binding for xxHash" -optional = true +optional = false python-versions = ">=3.7" groups = ["main"] -markers = "extra == \"dev\"" files = [ {file = "xxhash-3.5.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:ece616532c499ee9afbb83078b1b952beffef121d989841f7f4b3dc5ac0fd212"}, {file = "xxhash-3.5.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:3171f693dbc2cef6477054a665dc255d996646b4023fe56cb4db80e26f4cc520"}, @@ -7299,7 +7293,7 @@ test = ["big-O", "jaraco.functools", "jaraco.itertools", "jaraco.test", "more_it type = ["pytest-mypy"] [extras] -dev = ["datasets", "ipykernel", "markdown", "markdownify", "mkdocstrings-python", "mypy", "pandas-stubs", "pre-commit", "pyarrow", "pytest", "pytest-asyncio", "ruff", "typer", "types-protobuf", "types-requests"] +dev = ["ipykernel", "markdown", "markdownify", "mkdocstrings-python", "mypy", "pandas-stubs", "pre-commit", "pyarrow", "pytest", "pytest-asyncio", "ruff", "typer", "types-protobuf", "types-requests"] multimodal = ["moviepy", "pillow", "soundfile"] text = ["art", "confusables", "nltk", "presidio-analyzer", "rapidfuzz", "scikit-learn", "textblob", "textstat"] training = ["sentence-transformers", "transformers"] @@ -7307,4 +7301,4 @@ training = ["sentence-transformers", "transformers"] [metadata] lock-version = "2.1" python-versions = ">=3.10,<3.14" -content-hash = "c2495ca89db7e42c85802bf951614fb8cf1648363ce9f94edd71616ef6e242d7" +content-hash = "a72bf575d623bcfc530e9e24552b1244e4bc60fa881f151bd97bcc87d2048a1c" diff --git a/pyproject.toml b/pyproject.toml index 68b4dd56..a5606893 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -25,10 +25,14 @@ dependencies = [ "mkdocstrings-python[dev] (>=1.18.2,<2.0.0)", "markdownify[dev] (>=1.2.0,<2.0.0)", "networkx>=3.3,<4.0.0", + "datasets (>=4.0.0,<5.0.0)", ] [project.optional-dependencies] -training = ["transformers>=4.41.0,<5.0.0", "sentence-transformers>=5.1.0,<6.0.0",] +training = [ + "transformers>=4.41.0,<5.0.0", + "sentence-transformers>=5.1.0,<6.0.0", +] multimodal = [ "pillow>=11.2.1,<12.0.0", @@ -59,7 +63,6 @@ dev = [ "pandas-stubs>=2.2.3.250308", "types-requests>=2.32.0.20250306", "typer>=0.15.2,<1.0.0", - "datasets>=3.5.0,<4.0.0", "pyarrow>=19.0.1,<20.0.0", "markdown>=3.8.2,<4.0.0", "markdownify>=1.1.0,<2.0.0", @@ -105,6 +108,10 @@ strict = true python_version = "3.10" exclude = "tests" +[[tool.mypy.overrides]] +module = ["dreadnode.scorers.*"] +disable_error_code = ["unused-ignore", "import-untyped"] + [tool.ty.environment] python-version = "3.10" @@ -150,10 +157,10 @@ ignore = [ "FIX002", # contains todo, consider fixing "COM812", # disabled for formatting "ISC001", # disabled for formatting - "PLC0415", # import should be at top-level (lazy imports) - "FBT001", # boolean positional argument (legitimate cases) - "FURB122", # use f.writelines (minor optimization) - "F401", # ignore imported but unused + "PLC0415", # import should be at top-level (lazy imports) + "FBT001", # boolean positional argument (legitimate cases) + "FURB122", # use f.writelines (minor optimization) + "F401", # ignore imported but unused ] [tool.ruff.format] From cb168b079fe49eaace81cf0be5b45eb08de12749 Mon Sep 17 00:00:00 2001 From: Brian Greunke Date: Fri, 5 Sep 2025 20:37:57 -0500 Subject: [PATCH 2/4] fix: suppress certain mypy errors in transforms --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index a5606893..457935bd 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -109,7 +109,7 @@ python_version = "3.10" exclude = "tests" [[tool.mypy.overrides]] -module = ["dreadnode.scorers.*"] +module = ["dreadnode.scorers.*", "dreadnode.transforms.*"] disable_error_code = ["unused-ignore", "import-untyped"] [tool.ty.environment] From fdf268ae27939d98b859883e7d4bcacd5d27c3f1 Mon Sep 17 00:00:00 2001 From: Brian Greunke Date: Fri, 5 Sep 2025 20:47:15 -0500 Subject: [PATCH 3/4] chore: removed test code case --- dreadnode/scorers/similarity.py | 1 - 1 file changed, 1 deletion(-) diff --git a/dreadnode/scorers/similarity.py b/dreadnode/scorers/similarity.py index 918ace7d..08866e69 100644 --- a/dreadnode/scorers/similarity.py +++ b/dreadnode/scorers/similarity.py @@ -269,7 +269,6 @@ def disabled_evaluate(_: t.Any) -> Metric: return Scorer(disabled_evaluate, name=name) vectorizer = TfidfVectorizer(stop_words="english") - a = 1 def evaluate(data: t.Any, *, reference: str = reference) -> Metric: candidate_text = str(data) From 5bc23553929205518baa216a7a51eec4d2a8055b Mon Sep 17 00:00:00 2001 From: Brian Greunke Date: Fri, 5 Sep 2025 21:56:46 -0500 Subject: [PATCH 4/4] refactor: dry import error messages --- dreadnode/scorers/classification.py | 6 ++---- dreadnode/scorers/pii.py | 9 +++------ dreadnode/scorers/sentiment.py | 4 ++-- dreadnode/scorers/similarity.py | 14 +++++++------- dreadnode/util.py | 7 +++++++ 5 files changed, 21 insertions(+), 19 deletions(-) diff --git a/dreadnode/scorers/classification.py b/dreadnode/scorers/classification.py index ca572fdf..3989f63d 100644 --- a/dreadnode/scorers/classification.py +++ b/dreadnode/scorers/classification.py @@ -3,7 +3,7 @@ from dreadnode.meta import Config from dreadnode.metric import Metric from dreadnode.scorers import Scorer -from dreadnode.util import clean_str, warn_at_user_stacklevel +from dreadnode.util import clean_str, generate_import_error_msg, warn_at_user_stacklevel # Global cache for pipelines g_transformer_pipeline_cache: dict[str, t.Any] = {} @@ -30,9 +30,7 @@ def zero_shot_classification( model_name: The name of the zero-shot model from Hugging Face Hub. name: Name of the scorer. """ - transformers_error_msg = ( - "Transformers dependency is not installed. Install with: pip install transformers" - ) + transformers_error_msg = generate_import_error_msg("transformers", "training") try: from transformers import pipeline # type: ignore[import-not-found] diff --git a/dreadnode/scorers/pii.py b/dreadnode/scorers/pii.py index e929465f..56f13dc7 100644 --- a/dreadnode/scorers/pii.py +++ b/dreadnode/scorers/pii.py @@ -4,7 +4,7 @@ from dreadnode.metric import Metric from dreadnode.scorers import Scorer from dreadnode.scorers.contains import contains -from dreadnode.util import warn_at_user_stacklevel +from dreadnode.util import generate_import_error_msg, warn_at_user_stacklevel if t.TYPE_CHECKING: from presidio_analyzer import AnalyzerEngine # type: ignore[import-not-found] @@ -103,13 +103,10 @@ def detect_pii_with_presidio( invert: Invert the score (1.0 for no PII, 0.0 for PII detected). name: Name of the scorer. """ - presidio_import_error_msg = ( - "Presidio dependencies are not installed. " - "Install with: pip install presidio-analyzer presidio-anonymizer 'spacy[en_core_web_lg]'" - ) + presidio_import_error_msg = generate_import_error_msg("presidio-analyzer", "text") try: - import presidio_analyzer # type: ignore[import-not-found,unused-ignore] + import presidio_analyzer # type: ignore[import-not-found] except ImportError: warn_at_user_stacklevel(presidio_import_error_msg, UserWarning) diff --git a/dreadnode/scorers/sentiment.py b/dreadnode/scorers/sentiment.py index 0389cc8c..0907ecb6 100644 --- a/dreadnode/scorers/sentiment.py +++ b/dreadnode/scorers/sentiment.py @@ -6,7 +6,7 @@ from dreadnode.meta import Config from dreadnode.metric import Metric from dreadnode.scorers.base import Scorer -from dreadnode.util import warn_at_user_stacklevel +from dreadnode.util import generate_import_error_msg, warn_at_user_stacklevel Sentiment = t.Literal["positive", "negative", "neutral"] @@ -29,7 +29,7 @@ def sentiment( target: The desired sentiment to score against. name: Name of the scorer. """ - textblob_import_error_msg = "TextBlob dependency is not installed. Install with: pip install textblob && python -m textblob.download_corpora" + textblob_import_error_msg = generate_import_error_msg("textblob", "text") try: from textblob import TextBlob # type: ignore[import-not-found] diff --git a/dreadnode/scorers/similarity.py b/dreadnode/scorers/similarity.py index 08866e69..28200b34 100644 --- a/dreadnode/scorers/similarity.py +++ b/dreadnode/scorers/similarity.py @@ -5,7 +5,7 @@ from dreadnode.metric import Metric from dreadnode.scorers.base import Scorer from dreadnode.scorers.util import cosine_similarity -from dreadnode.util import warn_at_user_stacklevel +from dreadnode.util import generate_import_error_msg, warn_at_user_stacklevel if t.TYPE_CHECKING: from sentence_transformers import ( # type: ignore[import-not-found] @@ -88,7 +88,7 @@ def similarity_with_rapidfuzz( score_cutoff: Optional score cutoff below which to return 0.0. name: Name of the scorer. """ - rapidfuzz_import_error_msg = "RapidFuzz dependency is not installed. Please install it with: pip install rapidfuzz or dreadnode[text]" + rapidfuzz_import_error_msg = generate_import_error_msg("rapidfuzz", "text") try: from rapidfuzz import fuzz, utils # type: ignore[import-not-found] except ImportError: @@ -182,7 +182,7 @@ def string_distance( normalize: Normalize distances and convert to similarity scores. name: Name of the scorer. """ - rapidfuzz_import_error_msg = "RapidFuzz dependency is not installed. Please install it with: pip install rapidfuzz or dreadnode[text]" + rapidfuzz_import_error_msg = generate_import_error_msg("rapidfuzz", "text") try: from rapidfuzz import distance # type: ignore[import-not-found] @@ -251,7 +251,7 @@ def similarity_with_tf_idf(reference: str, *, name: str = "similarity") -> "Scor reference: The reference text (e.g., expected output). name: Name of the scorer. """ - sklearn_import_error_msg = "scikit-learn dependency is not installed. Please install it with: pip install scikit-learn or dreadnode[text]" + sklearn_import_error_msg = generate_import_error_msg("scikit-learn", "text") try: from sklearn.feature_extraction.text import ( # type: ignore[import-not-found] @@ -303,7 +303,7 @@ def similarity_with_sentence_transformers( model_name: The name of the sentence-transformer model to use. name: Name of the scorer. """ - sentence_transformers_error_msg = "Sentence transformers dependency is not installed. Please install it with: pip install sentence-transformers or dreadnode[training]" + sentence_transformers_error_msg = generate_import_error_msg("sentence-transformers", "training") try: from sentence_transformers import ( # type: ignore[import-not-found] @@ -367,7 +367,7 @@ def similarity_with_litellm( or self-hosted models. name: Name of the scorer. """ - litellm_import_error_msg = "litellm dependency is not installed. Please install it with: pip install litellm or dreadnode[text]" + litellm_import_error_msg = generate_import_error_msg("litellm", "text") try: import litellm except ImportError: @@ -430,7 +430,7 @@ def bleu( weights: Weights for unigram, bigram, etc. Must sum to 1. name: Name of the scorer. """ - nltk_import_error_msg = "NLTK dependency is not installed. Install with: pip install nltk && python -m nltk.downloader punkt" + nltk_import_error_msg = generate_import_error_msg("nltk", "text") try: import nltk # type: ignore[import-not-found] diff --git a/dreadnode/util.py b/dreadnode/util.py index ee569c39..234c3e50 100644 --- a/dreadnode/util.py +++ b/dreadnode/util.py @@ -154,6 +154,13 @@ def format_dict(data: dict[str, t.Any], max_length: int = 80) -> str: return f"{{{formatted}}}" +def generate_import_error_msg(package_name: str, extras_name: str) -> str: + return ( + f"Missing required package '{package_name}'. " + f"Please install it with: pip install {package_name} or dreadnode[{extras_name}]" + ) + + # Types