From c9db88712a7d7d309df836ec26838fedc868ab37 Mon Sep 17 00:00:00 2001
From: Brian Greunke <briangreunke@pm.me>
Date: Fri, 5 Sep 2025 18:44:02 -0500
Subject: [PATCH 1/4] fix: text extras import errors

---
 dreadnode/scorers/classification.py  |  4 +-
 dreadnode/scorers/pii.py             | 12 +++--
 dreadnode/scorers/readability.py     |  6 +--
 dreadnode/scorers/sentiment.py       |  5 +-
 dreadnode/scorers/similarity.py      | 69 +++++++++++++++++-----------
 dreadnode/transforms/ascii_art.py    |  6 +--
 dreadnode/transforms/perturbation.py |  8 ++--
 poetry.lock                          | 50 +++++++++-----------
 pyproject.toml                       | 19 +++++---
 9 files changed, 94 insertions(+), 85 deletions(-)

diff --git a/dreadnode/scorers/classification.py b/dreadnode/scorers/classification.py
index 4c2b6577..ca572fdf 100644
--- a/dreadnode/scorers/classification.py
+++ b/dreadnode/scorers/classification.py
@@ -1,7 +1,5 @@
 import typing as t
 
-from transformers import pipeline
-
 from dreadnode.meta import Config
 from dreadnode.metric import Metric
 from dreadnode.scorers import Scorer
@@ -37,7 +35,7 @@ def zero_shot_classification(
     )
 
     try:
-        pipeline("zero-shot-classification", model=model_name)
+        from transformers import pipeline  # type: ignore[import-not-found]
     except ImportError:
         warn_at_user_stacklevel(transformers_error_msg, UserWarning)
 
diff --git a/dreadnode/scorers/pii.py b/dreadnode/scorers/pii.py
index ba1c9c35..e929465f 100644
--- a/dreadnode/scorers/pii.py
+++ b/dreadnode/scorers/pii.py
@@ -1,15 +1,14 @@
 import re
 import typing as t
 
-from presidio_analyzer import AnalyzerEngine
-from presidio_analyzer.nlp_engine import NlpEngineProvider
-
 from dreadnode.metric import Metric
 from dreadnode.scorers import Scorer
 from dreadnode.scorers.contains import contains
 from dreadnode.util import warn_at_user_stacklevel
 
 if t.TYPE_CHECKING:
+    from presidio_analyzer import AnalyzerEngine  # type: ignore[import-not-found]
+
     from dreadnode.types import JsonDict
 
 
@@ -66,6 +65,9 @@ def _get_presidio_analyzer() -> "AnalyzerEngine":
     """Lazily initializes and returns a singleton Presidio AnalyzerEngine instance."""
     global g_analyzer_engine  # noqa: PLW0603
 
+    from presidio_analyzer import AnalyzerEngine  # type: ignore[import-not-found]
+    from presidio_analyzer.nlp_engine import NlpEngineProvider  # type: ignore[import-not-found]
+
     if g_analyzer_engine is None:
         provider = NlpEngineProvider(
             nlp_configuration={
@@ -107,8 +109,8 @@ def detect_pii_with_presidio(
     )
 
     try:
-        _get_presidio_analyzer()
-    except (ImportError, OSError):
+        import presidio_analyzer  # type: ignore[import-not-found,unused-ignore]
+    except ImportError:
         warn_at_user_stacklevel(presidio_import_error_msg, UserWarning)
 
         def disabled_evaluate(_: t.Any) -> Metric:
diff --git a/dreadnode/scorers/readability.py b/dreadnode/scorers/readability.py
index 259f5feb..0c5845d8 100644
--- a/dreadnode/scorers/readability.py
+++ b/dreadnode/scorers/readability.py
@@ -1,7 +1,5 @@
 import typing as t
 
-import textstat  # type: ignore[import-untyped]
-
 from dreadnode.metric import Metric
 from dreadnode.scorers.base import Scorer
 from dreadnode.util import warn_at_user_stacklevel
@@ -29,8 +27,8 @@ def readability(
     )
 
     try:
-        textstat.flesch_kincaid_grade("test")
-    except (ImportError, AttributeError):
+        import textstat  # type: ignore[import-not-found]
+    except ImportError:
         warn_at_user_stacklevel(textstat_import_error_msg, UserWarning)
 
         def disabled_evaluate(_: t.Any) -> Metric:
diff --git a/dreadnode/scorers/sentiment.py b/dreadnode/scorers/sentiment.py
index d9151da8..0389cc8c 100644
--- a/dreadnode/scorers/sentiment.py
+++ b/dreadnode/scorers/sentiment.py
@@ -2,7 +2,6 @@
 import typing as t
 
 import httpx
-from textblob import TextBlob  # type: ignore[import-untyped]
 
 from dreadnode.meta import Config
 from dreadnode.metric import Metric
@@ -33,8 +32,8 @@ def sentiment(
     textblob_import_error_msg = "TextBlob dependency is not installed. Install with: pip install textblob && python -m textblob.download_corpora"
 
     try:
-        TextBlob("test").sentiment  # noqa: B018
-    except (ImportError, AttributeError):
+        from textblob import TextBlob  # type: ignore[import-not-found]
+    except ImportError:
         warn_at_user_stacklevel(textblob_import_error_msg, UserWarning)
 
         def disabled_evaluate(_: t.Any) -> Metric:
diff --git a/dreadnode/scorers/similarity.py b/dreadnode/scorers/similarity.py
index de199751..918ace7d 100644
--- a/dreadnode/scorers/similarity.py
+++ b/dreadnode/scorers/similarity.py
@@ -1,23 +1,17 @@
 import typing as t
 from difflib import SequenceMatcher
 
-import litellm
-import nltk  # type: ignore[import-untyped]
-from nltk.tokenize import word_tokenize  # type: ignore[import-untyped]
-from nltk.translate.bleu_score import sentence_bleu  # type: ignore[import-untyped]
-from rapidfuzz import distance, fuzz, utils
-from sentence_transformers import SentenceTransformer, util
-from sklearn.feature_extraction.text import TfidfVectorizer  # type: ignore[import-untyped]
-from sklearn.metrics.pairwise import (  # type: ignore  # noqa: PGH003
-    cosine_similarity as sklearn_cosine_similarity,
-)
-
 from dreadnode.meta import Config
 from dreadnode.metric import Metric
 from dreadnode.scorers.base import Scorer
 from dreadnode.scorers.util import cosine_similarity
 from dreadnode.util import warn_at_user_stacklevel
 
+if t.TYPE_CHECKING:
+    from sentence_transformers import (  # type: ignore[import-not-found]
+        SentenceTransformer,
+    )
+
 
 def similarity(
     reference: str,
@@ -94,12 +88,9 @@ def similarity_with_rapidfuzz(
         score_cutoff: Optional score cutoff below which to return 0.0.
         name: Name of the scorer.
     """
-    rapidfuzz_import_error_msg = (
-        "RapidFuzz dependency is not installed. Please install it with: pip install rapidfuzz"
-    )
-
+    rapidfuzz_import_error_msg = "RapidFuzz dependency is not installed. Please install it with: pip install rapidfuzz or dreadnode[text]"
     try:
-        fuzz.ratio("test", "test")
+        from rapidfuzz import fuzz, utils  # type: ignore[import-not-found]
     except ImportError:
         warn_at_user_stacklevel(rapidfuzz_import_error_msg, UserWarning)
 
@@ -191,11 +182,11 @@ def string_distance(
         normalize: Normalize distances and convert to similarity scores.
         name: Name of the scorer.
     """
-    rapidfuzz_import_error_msg = (
-        "RapidFuzz dependency is not installed. Please install it with: pip install rapidfuzz"
-    )
+    rapidfuzz_import_error_msg = "RapidFuzz dependency is not installed. Please install it with: pip install rapidfuzz or dreadnode[text]"
 
     try:
+        from rapidfuzz import distance  # type: ignore[import-not-found]
+
         distance.Levenshtein.distance("test", "test")
     except ImportError:
         warn_at_user_stacklevel(rapidfuzz_import_error_msg, UserWarning)
@@ -260,12 +251,15 @@ def similarity_with_tf_idf(reference: str, *, name: str = "similarity") -> "Scor
         reference: The reference text (e.g., expected output).
         name: Name of the scorer.
     """
-    sklearn_import_error_msg = (
-        "scikit-learn dependency is not installed. Please install it with: pip install scikit-learn"
-    )
+    sklearn_import_error_msg = "scikit-learn dependency is not installed. Please install it with: pip install scikit-learn or dreadnode[text]"
 
     try:
-        TfidfVectorizer()
+        from sklearn.feature_extraction.text import (  # type: ignore[import-not-found]
+            TfidfVectorizer,
+        )
+        from sklearn.metrics.pairwise import (  # type: ignore[import-not-found]
+            cosine_similarity as sklearn_cosine_similarity,
+        )
     except ImportError:
         warn_at_user_stacklevel(sklearn_import_error_msg, UserWarning)
 
@@ -275,6 +269,7 @@ def disabled_evaluate(_: t.Any) -> Metric:
         return Scorer(disabled_evaluate, name=name)
 
     vectorizer = TfidfVectorizer(stop_words="english")
+    a = 1
 
     def evaluate(data: t.Any, *, reference: str = reference) -> Metric:
         candidate_text = str(data)
@@ -309,10 +304,13 @@ def similarity_with_sentence_transformers(
         model_name: The name of the sentence-transformer model to use.
         name: Name of the scorer.
     """
-    sentence_transformers_error_msg = "Sentence transformers dependency is not installed. Please install it with: pip install sentence-transformers"
+    sentence_transformers_error_msg = "Sentence transformers dependency is not installed. Please install it with: pip install sentence-transformers or dreadnode[training]"
 
     try:
-        SentenceTransformer(model_name)
+        from sentence_transformers import (  # type: ignore[import-not-found]
+            SentenceTransformer,
+            util,
+        )
     except ImportError:
         warn_at_user_stacklevel(sentence_transformers_error_msg, UserWarning)
 
@@ -370,6 +368,16 @@ def similarity_with_litellm(
                   or self-hosted models.
         name: Name of the scorer.
     """
+    litellm_import_error_msg = "litellm dependency is not installed. Please install it with: pip install litellm or dreadnode[text]"
+    try:
+        import litellm
+    except ImportError:
+        warn_at_user_stacklevel(litellm_import_error_msg, UserWarning)
+
+        def disabled_evaluate(_: t.Any) -> Metric:
+            return Metric(value=0.0, attributes={"error": litellm_import_error_msg})
+
+        return Scorer(disabled_evaluate, name=name)
 
     async def evaluate(
         data: t.Any,
@@ -426,11 +434,16 @@ def bleu(
     nltk_import_error_msg = "NLTK dependency is not installed. Install with: pip install nltk && python -m nltk.downloader punkt"
 
     try:
-        # Check for the 'punkt' tokenizer data
+        import nltk  # type: ignore[import-not-found]
+        from nltk.tokenize import (  # type: ignore[import-not-found]
+            word_tokenize,
+        )
+        from nltk.translate.bleu_score import (  # type: ignore[import-not-found]
+            sentence_bleu,
+        )
+
         try:
             nltk.data.find("tokenizers/punkt")
-            word_tokenize("test")
-            sentence_bleu([["test"]], ["test"])
         except LookupError as e:
             nltk_import_error_msg = (
                 "NLTK 'punkt' tokenizer not found. Please run: python -m nltk.downloader punkt"
diff --git a/dreadnode/transforms/ascii_art.py b/dreadnode/transforms/ascii_art.py
index 0c62af7e..fdc79e95 100644
--- a/dreadnode/transforms/ascii_art.py
+++ b/dreadnode/transforms/ascii_art.py
@@ -1,5 +1,3 @@
-from art import text2art  # type: ignore[import-untyped]
-
 from dreadnode.meta import Config
 from dreadnode.transforms.base import Transform
 
@@ -8,8 +6,8 @@ def ascii_art(font: str = "rand", *, name: str = "ascii_art") -> Transform[str,
     """Converts text into ASCII art using the 'art' library."""
 
     try:
-        text2art("test")  # Test if art is working
-    except (ImportError, AttributeError):
+        from art import text2art  # type: ignore[import-not-found]
+    except ImportError:
         raise ImportError(
             "ASCII art dependency is not installed. Install with: pip install art"
         ) from ImportError("art library not available")
diff --git a/dreadnode/transforms/perturbation.py b/dreadnode/transforms/perturbation.py
index cf9bf01f..43e4477f 100644
--- a/dreadnode/transforms/perturbation.py
+++ b/dreadnode/transforms/perturbation.py
@@ -3,8 +3,6 @@
 import typing as t
 import unicodedata
 
-from confusables import confusable_characters  # type: ignore[import-untyped]
-
 from dreadnode.meta import Config
 from dreadnode.transforms.base import Transform
 
@@ -226,8 +224,10 @@ def unicode_confusable(
     """
 
     try:
-        confusable_characters("a")
-    except (ImportError, AttributeError):
+        from confusables import (  # type: ignore[import-not-found]
+            confusable_characters,
+        )
+    except ImportError:
         raise ImportError(
             "Confusables dependency is not installed. Install with: pip install confusables"
         ) from ImportError("confusables library not available")
diff --git a/poetry.lock b/poetry.lock
index 0a331377..7bbbf5ef 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -1,4 +1,4 @@
-# This file is automatically @generated by Poetry 2.1.3 and should not be changed by hand.
+# This file is automatically @generated by Poetry 2.1.1 and should not be changed by hand.
 
 [[package]]
 name = "aiobotocore"
@@ -285,7 +285,7 @@ description = "Timeout context manager for asyncio programs"
 optional = false
 python-versions = ">=3.8"
 groups = ["main"]
-markers = "python_version == \"3.10\""
+markers = "python_version < \"3.11\""
 files = [
     {file = "async_timeout-5.0.1-py3-none-any.whl", hash = "sha256:39e3809566ff85354557ec2398b55e096c8364bacac9405a7a1fa429e77fe76c"},
     {file = "async_timeout-5.0.1.tar.gz", hash = "sha256:d9321a7a3d5a6a5e187e824d2fa0793ce379a202935782d555d6e9d2735677d3"},
@@ -810,15 +810,14 @@ files = [
 
 [[package]]
 name = "datasets"
-version = "3.6.0"
+version = "4.0.0"
 description = "HuggingFace community-driven open-source library of datasets"
-optional = true
+optional = false
 python-versions = ">=3.9.0"
 groups = ["main"]
-markers = "extra == \"dev\""
 files = [
-    {file = "datasets-3.6.0-py3-none-any.whl", hash = "sha256:25000c4a2c0873a710df127d08a202a06eab7bf42441a6bc278b499c2f72cd1b"},
-    {file = "datasets-3.6.0.tar.gz", hash = "sha256:1b2bf43b19776e2787e181cfd329cb0ca1a358ea014780c3581e0f276375e041"},
+    {file = "datasets-4.0.0-py3-none-any.whl", hash = "sha256:7ef95e62025fd122882dbce6cb904c8cd3fbc829de6669a5eb939c77d50e203d"},
+    {file = "datasets-4.0.0.tar.gz", hash = "sha256:9657e7140a9050db13443ba21cb5de185af8af944479b00e7ff1e00a61c8dbf1"},
 ]
 
 [package.dependencies]
@@ -837,18 +836,17 @@ tqdm = ">=4.66.3"
 xxhash = "*"
 
 [package.extras]
-audio = ["librosa", "soundfile (>=0.12.1)", "soxr (>=0.4.0)"]
+audio = ["soundfile (>=0.12.1)", "torch (>=2.7.0)", "torchcodec (>=0.4.0)"]
 benchmarks = ["tensorflow (==2.12.0)", "torch (==2.0.1)", "transformers (==4.30.1)"]
-dev = ["Pillow (>=9.4.0)", "absl-py", "aiohttp", "decorator", "elasticsearch (>=7.17.12,<8.0.0)", "faiss-cpu (>=1.8.0.post1)", "jax (>=0.3.14) ; sys_platform != \"win32\"", "jaxlib (>=0.3.14) ; sys_platform != \"win32\"", "joblib (<1.3.0)", "joblibspark", "librosa", "lz4", "moto[server]", "polars[timezone] (>=0.20.0)", "protobuf (<4.0.0)", "py7zr", "pyav", "pyspark (>=3.4)", "pytest", "pytest-datadir", "pytest-xdist", "rarfile (>=4.0)", "ruff (>=0.3.0)", "s3fs", "s3fs (>=2021.11.1)", "soundfile (>=0.12.1)", "soundfile (>=0.12.1)", "soxr (>=0.4.0)", "sqlalchemy", "tensorflow (>=2.16.0) ; python_version >= \"3.10\"", "tensorflow (>=2.6.0)", "tensorflow (>=2.6.0) ; python_version < \"3.10\"", "tiktoken", "torch", "torch (>=2.0.0)", "torchdata", "torchvision", "transformers", "transformers (>=4.42.0)", "zstandard"]
-docs = ["s3fs", "tensorflow (>=2.6.0)", "torch", "transformers"]
+dev = ["Pillow (>=9.4.0)", "absl-py", "aiohttp", "decorator", "elasticsearch (>=7.17.12,<8.0.0)", "faiss-cpu (>=1.8.0.post1)", "jax (>=0.3.14) ; sys_platform != \"win32\"", "jaxlib (>=0.3.14) ; sys_platform != \"win32\"", "joblib (<1.3.0)", "joblibspark", "lz4", "moto[server]", "numba (>=0.56.4)", "polars[timezone] (>=0.20.0)", "protobuf (<4.0.0)", "py7zr", "pyspark (>=3.4)", "pytest", "pytest-datadir", "pytest-xdist", "rarfile (>=4.0)", "ruff (>=0.3.0)", "soundfile (>=0.12.1)", "soundfile (>=0.12.1)", "sqlalchemy", "tensorflow (>=2.16.0) ; python_version >= \"3.10\" and sys_platform != \"win32\"", "tensorflow (>=2.6.0)", "tensorflow (>=2.6.0) ; python_version < \"3.10\" and sys_platform != \"win32\"", "tiktoken", "torch", "torch (>=2.0.0)", "torchcodec (>=0.4.0) ; sys_platform != \"win32\"", "torchdata", "transformers", "transformers (>=4.42.0)", "zstandard"]
+docs = ["tensorflow (>=2.6.0)", "torch", "transformers"]
 jax = ["jax (>=0.3.14)", "jaxlib (>=0.3.14)"]
 pdfs = ["pdfplumber (>=0.11.4)"]
 quality = ["ruff (>=0.3.0)"]
-s3 = ["s3fs"]
 tensorflow = ["tensorflow (>=2.6.0)"]
 tensorflow-gpu = ["tensorflow (>=2.6.0)"]
-tests = ["Pillow (>=9.4.0)", "absl-py", "aiohttp", "decorator", "elasticsearch (>=7.17.12,<8.0.0)", "faiss-cpu (>=1.8.0.post1)", "jax (>=0.3.14) ; sys_platform != \"win32\"", "jaxlib (>=0.3.14) ; sys_platform != \"win32\"", "joblib (<1.3.0)", "joblibspark", "librosa", "lz4", "moto[server]", "polars[timezone] (>=0.20.0)", "protobuf (<4.0.0)", "py7zr", "pyav", "pyspark (>=3.4)", "pytest", "pytest-datadir", "pytest-xdist", "rarfile (>=4.0)", "s3fs (>=2021.11.1)", "soundfile (>=0.12.1)", "soundfile (>=0.12.1)", "soxr (>=0.4.0)", "sqlalchemy", "tensorflow (>=2.16.0) ; python_version >= \"3.10\"", "tensorflow (>=2.6.0) ; python_version < \"3.10\"", "tiktoken", "torch (>=2.0.0)", "torchdata", "torchvision", "transformers (>=4.42.0)", "zstandard"]
-tests-numpy2 = ["Pillow (>=9.4.0)", "absl-py", "aiohttp", "decorator", "elasticsearch (>=7.17.12,<8.0.0)", "jax (>=0.3.14) ; sys_platform != \"win32\"", "jaxlib (>=0.3.14) ; sys_platform != \"win32\"", "joblib (<1.3.0)", "joblibspark", "lz4", "moto[server]", "polars[timezone] (>=0.20.0)", "protobuf (<4.0.0)", "py7zr", "pyav", "pyspark (>=3.4)", "pytest", "pytest-datadir", "pytest-xdist", "rarfile (>=4.0)", "s3fs (>=2021.11.1)", "soundfile (>=0.12.1)", "soundfile (>=0.12.1)", "soxr (>=0.4.0)", "sqlalchemy", "tiktoken", "torch (>=2.0.0)", "torchdata", "torchvision", "transformers (>=4.42.0)", "zstandard"]
+tests = ["Pillow (>=9.4.0)", "absl-py", "aiohttp", "decorator", "elasticsearch (>=7.17.12,<8.0.0)", "faiss-cpu (>=1.8.0.post1)", "jax (>=0.3.14) ; sys_platform != \"win32\"", "jaxlib (>=0.3.14) ; sys_platform != \"win32\"", "joblib (<1.3.0)", "joblibspark", "lz4", "moto[server]", "numba (>=0.56.4)", "polars[timezone] (>=0.20.0)", "protobuf (<4.0.0)", "py7zr", "pyspark (>=3.4)", "pytest", "pytest-datadir", "pytest-xdist", "rarfile (>=4.0)", "soundfile (>=0.12.1)", "soundfile (>=0.12.1)", "sqlalchemy", "tensorflow (>=2.16.0) ; python_version >= \"3.10\" and sys_platform != \"win32\"", "tensorflow (>=2.6.0) ; python_version < \"3.10\" and sys_platform != \"win32\"", "tiktoken", "torch (>=2.0.0)", "torchcodec (>=0.4.0) ; sys_platform != \"win32\"", "torchdata", "transformers (>=4.42.0)", "zstandard"]
+tests-numpy2 = ["Pillow (>=9.4.0)", "absl-py", "aiohttp", "decorator", "elasticsearch (>=7.17.12,<8.0.0)", "jax (>=0.3.14) ; sys_platform != \"win32\"", "jaxlib (>=0.3.14) ; sys_platform != \"win32\"", "joblib (<1.3.0)", "joblibspark", "lz4", "moto[server]", "numba (>=0.56.4)", "polars[timezone] (>=0.20.0)", "protobuf (<4.0.0)", "py7zr", "pyspark (>=3.4)", "pytest", "pytest-datadir", "pytest-xdist", "rarfile (>=4.0)", "soundfile (>=0.12.1)", "soundfile (>=0.12.1)", "sqlalchemy", "tiktoken", "torch (>=2.0.0)", "torchcodec (>=0.4.0) ; sys_platform != \"win32\"", "torchdata", "transformers (>=4.42.0)", "zstandard"]
 torch = ["torch"]
 vision = ["Pillow (>=9.4.0)"]
 
@@ -904,10 +902,9 @@ files = [
 name = "dill"
 version = "0.3.8"
 description = "serialize all of Python"
-optional = true
+optional = false
 python-versions = ">=3.8"
 groups = ["main"]
-markers = "extra == \"dev\""
 files = [
     {file = "dill-0.3.8-py3-none-any.whl", hash = "sha256:c36ca9ffb54365bdd2f8eb3eff7d2a21237f8452b57ace88b1ac615b7e815bd7"},
     {file = "dill-0.3.8.tar.gz", hash = "sha256:3ebe3c479ad625c4553aca177444d89b486b1d84982eeacded644afc0cf797ca"},
@@ -1623,7 +1620,7 @@ description = "IPython: Productive Interactive Computing"
 optional = false
 python-versions = ">=3.10"
 groups = ["main"]
-markers = "python_version == \"3.10\""
+markers = "python_version < \"3.11\""
 files = [
     {file = "ipython-8.37.0-py3-none-any.whl", hash = "sha256:ed87326596b878932dbcb171e3e698845434d8c61b8d8cd474bf663041a9dcf2"},
     {file = "ipython-8.37.0.tar.gz", hash = "sha256:ca815841e1a41a1e6b73a0b08f3038af9b2252564d01fc405356d34033012216"},
@@ -2714,10 +2711,9 @@ typing-extensions = {version = ">=4.1.0", markers = "python_version < \"3.11\""}
 name = "multiprocess"
 version = "0.70.16"
 description = "better multiprocessing and multithreading in Python"
-optional = true
+optional = false
 python-versions = ">=3.8"
 groups = ["main"]
-markers = "extra == \"dev\""
 files = [
     {file = "multiprocess-0.70.16-pp310-pypy310_pp73-macosx_10_13_x86_64.whl", hash = "sha256:476887be10e2f59ff183c006af746cb6f1fd0eadcfd4ef49e605cbe2659920ee"},
     {file = "multiprocess-0.70.16-pp310-pypy310_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:d951bed82c8f73929ac82c61f01a7b5ce8f3e5ef40f5b52553b4f547ce2b08ec"},
@@ -2877,7 +2873,7 @@ description = "Python package for creating and manipulating graphs and networks"
 optional = false
 python-versions = ">=3.10"
 groups = ["main"]
-markers = "python_version == \"3.10\""
+markers = "python_version < \"3.11\""
 files = [
     {file = "networkx-3.4.2-py3-none-any.whl", hash = "sha256:df5d4365b724cf81b8c6a7312509d0c22386097011ad1abe274afd5e9d3bbc5f"},
     {file = "networkx-3.4.2.tar.gz", hash = "sha256:307c3669428c5362aab27c8a1260aa8f47c4e91d3891f48be0141738d8d053e1"},
@@ -2960,7 +2956,7 @@ description = "Fundamental package for array computing in Python"
 optional = false
 python-versions = ">=3.10"
 groups = ["main"]
-markers = "python_version == \"3.10\""
+markers = "python_version < \"3.11\""
 files = [
     {file = "numpy-2.2.6-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:b412caa66f72040e6d268491a59f2c43bf03eb6c96dd8f0307829feb7fa2b6fb"},
     {file = "numpy-2.2.6-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:8e41fd67c52b86603a91c1a505ebaef50b3314de0213461c7a6e99c9a3beff90"},
@@ -4144,10 +4140,9 @@ tests = ["pytest"]
 name = "pyarrow"
 version = "19.0.1"
 description = "Python library for Apache Arrow"
-optional = true
+optional = false
 python-versions = ">=3.9"
 groups = ["main"]
-markers = "extra == \"dev\""
 files = [
     {file = "pyarrow-19.0.1-cp310-cp310-macosx_12_0_arm64.whl", hash = "sha256:fc28912a2dc924dddc2087679cc8b7263accc71b9ff025a1362b004711661a69"},
     {file = "pyarrow-19.0.1-cp310-cp310-macosx_12_0_x86_64.whl", hash = "sha256:fca15aabbe9b8355800d923cc2e82c8ef514af321e18b437c3d782aa884eaeec"},
@@ -5481,7 +5476,7 @@ description = "Fundamental algorithms for scientific computing in Python"
 optional = true
 python-versions = ">=3.10"
 groups = ["main"]
-markers = "python_version == \"3.10\" and (extra == \"training\" or extra == \"text\")"
+markers = "python_version < \"3.11\" and (extra == \"training\" or extra == \"text\")"
 files = [
     {file = "scipy-1.15.3-cp310-cp310-macosx_10_13_x86_64.whl", hash = "sha256:a345928c86d535060c9c2b25e71e87c39ab2f22fc96e9636bd74d1dbf9de448c"},
     {file = "scipy-1.15.3-cp310-cp310-macosx_12_0_arm64.whl", hash = "sha256:ad3432cb0f9ed87477a8d97f03b763fd1d57709f1bbde3c9369b1dff5503b253"},
@@ -6376,7 +6371,7 @@ description = "A lil' TOML parser"
 optional = false
 python-versions = ">=3.8"
 groups = ["main"]
-markers = "python_version == \"3.10\""
+markers = "python_version < \"3.11\""
 files = [
     {file = "tomli-2.2.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:678e4fa69e4575eb77d103de3df8a895e1591b48e740211bd1067378c69e8249"},
     {file = "tomli-2.2.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:023aa114dd824ade0100497eb2318602af309e5a55595f76b626d6d9f3b7b0a6"},
@@ -7029,10 +7024,9 @@ files = [
 name = "xxhash"
 version = "3.5.0"
 description = "Python binding for xxHash"
-optional = true
+optional = false
 python-versions = ">=3.7"
 groups = ["main"]
-markers = "extra == \"dev\""
 files = [
     {file = "xxhash-3.5.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:ece616532c499ee9afbb83078b1b952beffef121d989841f7f4b3dc5ac0fd212"},
     {file = "xxhash-3.5.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:3171f693dbc2cef6477054a665dc255d996646b4023fe56cb4db80e26f4cc520"},
@@ -7299,7 +7293,7 @@ test = ["big-O", "jaraco.functools", "jaraco.itertools", "jaraco.test", "more_it
 type = ["pytest-mypy"]
 
 [extras]
-dev = ["datasets", "ipykernel", "markdown", "markdownify", "mkdocstrings-python", "mypy", "pandas-stubs", "pre-commit", "pyarrow", "pytest", "pytest-asyncio", "ruff", "typer", "types-protobuf", "types-requests"]
+dev = ["ipykernel", "markdown", "markdownify", "mkdocstrings-python", "mypy", "pandas-stubs", "pre-commit", "pyarrow", "pytest", "pytest-asyncio", "ruff", "typer", "types-protobuf", "types-requests"]
 multimodal = ["moviepy", "pillow", "soundfile"]
 text = ["art", "confusables", "nltk", "presidio-analyzer", "rapidfuzz", "scikit-learn", "textblob", "textstat"]
 training = ["sentence-transformers", "transformers"]
@@ -7307,4 +7301,4 @@ training = ["sentence-transformers", "transformers"]
 [metadata]
 lock-version = "2.1"
 python-versions = ">=3.10,<3.14"
-content-hash = "c2495ca89db7e42c85802bf951614fb8cf1648363ce9f94edd71616ef6e242d7"
+content-hash = "a72bf575d623bcfc530e9e24552b1244e4bc60fa881f151bd97bcc87d2048a1c"
diff --git a/pyproject.toml b/pyproject.toml
index 68b4dd56..a5606893 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -25,10 +25,14 @@ dependencies = [
     "mkdocstrings-python[dev] (>=1.18.2,<2.0.0)",
     "markdownify[dev] (>=1.2.0,<2.0.0)",
     "networkx>=3.3,<4.0.0",
+    "datasets (>=4.0.0,<5.0.0)",
 ]
 
 [project.optional-dependencies]
-training = ["transformers>=4.41.0,<5.0.0", "sentence-transformers>=5.1.0,<6.0.0",]
+training = [
+    "transformers>=4.41.0,<5.0.0",
+    "sentence-transformers>=5.1.0,<6.0.0",
+]
 
 multimodal = [
     "pillow>=11.2.1,<12.0.0",
@@ -59,7 +63,6 @@ dev = [
     "pandas-stubs>=2.2.3.250308",
     "types-requests>=2.32.0.20250306",
     "typer>=0.15.2,<1.0.0",
-    "datasets>=3.5.0,<4.0.0",
     "pyarrow>=19.0.1,<20.0.0",
     "markdown>=3.8.2,<4.0.0",
     "markdownify>=1.1.0,<2.0.0",
@@ -105,6 +108,10 @@ strict = true
 python_version = "3.10"
 exclude = "tests"
 
+[[tool.mypy.overrides]]
+module = ["dreadnode.scorers.*"]
+disable_error_code = ["unused-ignore", "import-untyped"]
+
 [tool.ty.environment]
 python-version = "3.10"
 
@@ -150,10 +157,10 @@ ignore = [
     "FIX002",  # contains todo, consider fixing
     "COM812",  # disabled for formatting
     "ISC001",  # disabled for formatting
-    "PLC0415",  # import should be at top-level (lazy imports)
-    "FBT001",   # boolean positional argument (legitimate cases)  
-    "FURB122",  # use f.writelines (minor optimization)
-    "F401", # ignore imported but unused
+    "PLC0415", # import should be at top-level (lazy imports)
+    "FBT001",  # boolean positional argument (legitimate cases)  
+    "FURB122", # use f.writelines (minor optimization)
+    "F401",    # ignore imported but unused
 ]
 
 [tool.ruff.format]

From cb168b079fe49eaace81cf0be5b45eb08de12749 Mon Sep 17 00:00:00 2001
From: Brian Greunke <briangreunke@pm.me>
Date: Fri, 5 Sep 2025 20:37:57 -0500
Subject: [PATCH 2/4] fix: suppress certain mypy errors in transforms

---
 pyproject.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pyproject.toml b/pyproject.toml
index a5606893..457935bd 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -109,7 +109,7 @@ python_version = "3.10"
 exclude = "tests"
 
 [[tool.mypy.overrides]]
-module = ["dreadnode.scorers.*"]
+module = ["dreadnode.scorers.*", "dreadnode.transforms.*"]
 disable_error_code = ["unused-ignore", "import-untyped"]
 
 [tool.ty.environment]

From fdf268ae27939d98b859883e7d4bcacd5d27c3f1 Mon Sep 17 00:00:00 2001
From: Brian Greunke <briangreunke@pm.me>
Date: Fri, 5 Sep 2025 20:47:15 -0500
Subject: [PATCH 3/4] chore: removed test code case

---
 dreadnode/scorers/similarity.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/dreadnode/scorers/similarity.py b/dreadnode/scorers/similarity.py
index 918ace7d..08866e69 100644
--- a/dreadnode/scorers/similarity.py
+++ b/dreadnode/scorers/similarity.py
@@ -269,7 +269,6 @@ def disabled_evaluate(_: t.Any) -> Metric:
         return Scorer(disabled_evaluate, name=name)
 
     vectorizer = TfidfVectorizer(stop_words="english")
-    a = 1
 
     def evaluate(data: t.Any, *, reference: str = reference) -> Metric:
         candidate_text = str(data)

From 5bc23553929205518baa216a7a51eec4d2a8055b Mon Sep 17 00:00:00 2001
From: Brian Greunke <briangreunke@pm.me>
Date: Fri, 5 Sep 2025 21:56:46 -0500
Subject: [PATCH 4/4] refactor: dry import error messages

---
 dreadnode/scorers/classification.py |  6 ++----
 dreadnode/scorers/pii.py            |  9 +++------
 dreadnode/scorers/sentiment.py      |  4 ++--
 dreadnode/scorers/similarity.py     | 14 +++++++-------
 dreadnode/util.py                   |  7 +++++++
 5 files changed, 21 insertions(+), 19 deletions(-)

diff --git a/dreadnode/scorers/classification.py b/dreadnode/scorers/classification.py
index ca572fdf..3989f63d 100644
--- a/dreadnode/scorers/classification.py
+++ b/dreadnode/scorers/classification.py
@@ -3,7 +3,7 @@
 from dreadnode.meta import Config
 from dreadnode.metric import Metric
 from dreadnode.scorers import Scorer
-from dreadnode.util import clean_str, warn_at_user_stacklevel
+from dreadnode.util import clean_str, generate_import_error_msg, warn_at_user_stacklevel
 
 # Global cache for pipelines
 g_transformer_pipeline_cache: dict[str, t.Any] = {}
@@ -30,9 +30,7 @@ def zero_shot_classification(
         model_name: The name of the zero-shot model from Hugging Face Hub.
         name: Name of the scorer.
     """
-    transformers_error_msg = (
-        "Transformers dependency is not installed. Install with: pip install transformers"
-    )
+    transformers_error_msg = generate_import_error_msg("transformers", "training")
 
     try:
         from transformers import pipeline  # type: ignore[import-not-found]
diff --git a/dreadnode/scorers/pii.py b/dreadnode/scorers/pii.py
index e929465f..56f13dc7 100644
--- a/dreadnode/scorers/pii.py
+++ b/dreadnode/scorers/pii.py
@@ -4,7 +4,7 @@
 from dreadnode.metric import Metric
 from dreadnode.scorers import Scorer
 from dreadnode.scorers.contains import contains
-from dreadnode.util import warn_at_user_stacklevel
+from dreadnode.util import generate_import_error_msg, warn_at_user_stacklevel
 
 if t.TYPE_CHECKING:
     from presidio_analyzer import AnalyzerEngine  # type: ignore[import-not-found]
@@ -103,13 +103,10 @@ def detect_pii_with_presidio(
         invert: Invert the score (1.0 for no PII, 0.0 for PII detected).
         name: Name of the scorer.
     """
-    presidio_import_error_msg = (
-        "Presidio dependencies are not installed. "
-        "Install with: pip install presidio-analyzer presidio-anonymizer 'spacy[en_core_web_lg]'"
-    )
+    presidio_import_error_msg = generate_import_error_msg("presidio-analyzer", "text")
 
     try:
-        import presidio_analyzer  # type: ignore[import-not-found,unused-ignore]
+        import presidio_analyzer  # type: ignore[import-not-found]
     except ImportError:
         warn_at_user_stacklevel(presidio_import_error_msg, UserWarning)
 
diff --git a/dreadnode/scorers/sentiment.py b/dreadnode/scorers/sentiment.py
index 0389cc8c..0907ecb6 100644
--- a/dreadnode/scorers/sentiment.py
+++ b/dreadnode/scorers/sentiment.py
@@ -6,7 +6,7 @@
 from dreadnode.meta import Config
 from dreadnode.metric import Metric
 from dreadnode.scorers.base import Scorer
-from dreadnode.util import warn_at_user_stacklevel
+from dreadnode.util import generate_import_error_msg, warn_at_user_stacklevel
 
 Sentiment = t.Literal["positive", "negative", "neutral"]
 
@@ -29,7 +29,7 @@ def sentiment(
         target: The desired sentiment to score against.
         name: Name of the scorer.
     """
-    textblob_import_error_msg = "TextBlob dependency is not installed. Install with: pip install textblob && python -m textblob.download_corpora"
+    textblob_import_error_msg = generate_import_error_msg("textblob", "text")
 
     try:
         from textblob import TextBlob  # type: ignore[import-not-found]
diff --git a/dreadnode/scorers/similarity.py b/dreadnode/scorers/similarity.py
index 08866e69..28200b34 100644
--- a/dreadnode/scorers/similarity.py
+++ b/dreadnode/scorers/similarity.py
@@ -5,7 +5,7 @@
 from dreadnode.metric import Metric
 from dreadnode.scorers.base import Scorer
 from dreadnode.scorers.util import cosine_similarity
-from dreadnode.util import warn_at_user_stacklevel
+from dreadnode.util import generate_import_error_msg, warn_at_user_stacklevel
 
 if t.TYPE_CHECKING:
     from sentence_transformers import (  # type: ignore[import-not-found]
@@ -88,7 +88,7 @@ def similarity_with_rapidfuzz(
         score_cutoff: Optional score cutoff below which to return 0.0.
         name: Name of the scorer.
     """
-    rapidfuzz_import_error_msg = "RapidFuzz dependency is not installed. Please install it with: pip install rapidfuzz or dreadnode[text]"
+    rapidfuzz_import_error_msg = generate_import_error_msg("rapidfuzz", "text")
     try:
         from rapidfuzz import fuzz, utils  # type: ignore[import-not-found]
     except ImportError:
@@ -182,7 +182,7 @@ def string_distance(
         normalize: Normalize distances and convert to similarity scores.
         name: Name of the scorer.
     """
-    rapidfuzz_import_error_msg = "RapidFuzz dependency is not installed. Please install it with: pip install rapidfuzz or dreadnode[text]"
+    rapidfuzz_import_error_msg = generate_import_error_msg("rapidfuzz", "text")
 
     try:
         from rapidfuzz import distance  # type: ignore[import-not-found]
@@ -251,7 +251,7 @@ def similarity_with_tf_idf(reference: str, *, name: str = "similarity") -> "Scor
         reference: The reference text (e.g., expected output).
         name: Name of the scorer.
     """
-    sklearn_import_error_msg = "scikit-learn dependency is not installed. Please install it with: pip install scikit-learn or dreadnode[text]"
+    sklearn_import_error_msg = generate_import_error_msg("scikit-learn", "text")
 
     try:
         from sklearn.feature_extraction.text import (  # type: ignore[import-not-found]
@@ -303,7 +303,7 @@ def similarity_with_sentence_transformers(
         model_name: The name of the sentence-transformer model to use.
         name: Name of the scorer.
     """
-    sentence_transformers_error_msg = "Sentence transformers dependency is not installed. Please install it with: pip install sentence-transformers or dreadnode[training]"
+    sentence_transformers_error_msg = generate_import_error_msg("sentence-transformers", "training")
 
     try:
         from sentence_transformers import (  # type: ignore[import-not-found]
@@ -367,7 +367,7 @@ def similarity_with_litellm(
                   or self-hosted models.
         name: Name of the scorer.
     """
-    litellm_import_error_msg = "litellm dependency is not installed. Please install it with: pip install litellm or dreadnode[text]"
+    litellm_import_error_msg = generate_import_error_msg("litellm", "text")
     try:
         import litellm
     except ImportError:
@@ -430,7 +430,7 @@ def bleu(
         weights: Weights for unigram, bigram, etc. Must sum to 1.
         name: Name of the scorer.
     """
-    nltk_import_error_msg = "NLTK dependency is not installed. Install with: pip install nltk && python -m nltk.downloader punkt"
+    nltk_import_error_msg = generate_import_error_msg("nltk", "text")
 
     try:
         import nltk  # type: ignore[import-not-found]
diff --git a/dreadnode/util.py b/dreadnode/util.py
index ee569c39..234c3e50 100644
--- a/dreadnode/util.py
+++ b/dreadnode/util.py
@@ -154,6 +154,13 @@ def format_dict(data: dict[str, t.Any], max_length: int = 80) -> str:
     return f"{{{formatted}}}"
 
 
+def generate_import_error_msg(package_name: str, extras_name: str) -> str:
+    return (
+        f"Missing required package '{package_name}'. "
+        f"Please install it with: pip install {package_name} or dreadnode[{extras_name}]"
+    )
+
+
 # Types