diff --git a/docs/sdk/main.mdx b/docs/sdk/main.mdx
index c479c08c..0a2ff169 100644
--- a/docs/sdk/main.mdx
+++ b/docs/sdk/main.mdx
@@ -1752,7 +1752,6 @@ def scorer(
def make_scorer(func: ScorerCallable[T]) -> Scorer[T]:
return Scorer.from_callable(
- self._get_tracer(),
func,
name=name,
tags=tags,
@@ -2158,9 +2157,7 @@ def task(
attributes=_attributes,
func=t.cast("t.Callable[P, R]", func),
scorers=[
- scorer
- if isinstance(scorer, Scorer)
- else Scorer.from_callable(self._get_tracer(), scorer)
+ scorer if isinstance(scorer, Scorer) else Scorer.from_callable(scorer)
for scorer in scorers or []
],
tags=list(tags or []),
diff --git a/docs/sdk/metric.mdx b/docs/sdk/metric.mdx
index 68609cde..57a9257e 100644
--- a/docs/sdk/metric.mdx
+++ b/docs/sdk/metric.mdx
@@ -212,7 +212,11 @@ def from_many(
total = sum(value * weight for _, value, weight in values)
weight = sum(weight for _, _, weight in values)
score_attributes = {name: value for name, value, _ in values}
- return cls(value=total / weight, step=step, attributes={**attributes, **score_attributes})
+ return cls(
+ value=total / weight,
+ step=step,
+ attributes={**attributes, **score_attributes},
+ )
```
@@ -228,13 +232,13 @@ Scorer
```python
Scorer(
- tracer: Tracer,
name: str,
tags: Sequence[str],
attributes: dict[str, Any],
func: ScorerCallable[T],
step: int = 0,
auto_increment_step: bool = False,
+ catch: bool = False,
)
```
@@ -254,6 +258,14 @@ auto_increment_step: bool = False
Whether to automatically increment the step for each time this scorer is called.
+### catch
+
+```python
+catch: bool = False
+```
+
+Whether to catch exceptions in the scorer function and return a 0 Metric with error information.
+
### func
```python
@@ -321,17 +333,19 @@ async def __call__(self, object: T) -> Metric:
Returns:
A Metric object.
"""
- from dreadnode.tracing.span import Span
-
- with Span(
- name=self.name,
- tags=self.tags,
- attributes=self.attributes,
- tracer=self.tracer,
- ):
+ try:
metric = self.func(object)
if inspect.isawaitable(metric):
metric = await metric
+ except Exception as exc:
+ if not self.catch:
+ raise
+
+ warn_at_user_stacklevel(
+ f"Error executing scorer {self.name!r} for object {object!r}: {exc}",
+ MetricWarning,
+ )
+ metric = Metric(value=0.0, step=self.step, attributes={"error": str(exc)})
if not isinstance(metric, Metric):
metric = Metric(
@@ -373,13 +387,13 @@ def clone(self) -> "Scorer[T]":
A new Scorer.
"""
return Scorer(
- tracer=self.tracer,
name=self.name,
tags=self.tags,
attributes=self.attributes,
func=self.func,
step=self.step,
auto_increment_step=self.auto_increment_step,
+ catch=self.catch,
)
```
@@ -390,11 +404,11 @@ def clone(self) -> "Scorer[T]":
```python
from_callable(
- tracer: Tracer,
func: ScorerCallable[T] | Scorer[T],
*,
name: str | None = None,
tags: Sequence[str] | None = None,
+ catch: bool = False,
**attributes: Any,
) -> Scorer[T]
```
@@ -403,9 +417,6 @@ Create a scorer from a callable function.
**Parameters:**
-* **`tracer`**
- (`Tracer`)
- –The tracer to use for reporting metrics.
* **`func`**
(`ScorerCallable[T] | Scorer[T]`)
–The function to call to get the metric.
@@ -419,6 +430,11 @@ Create a scorer from a callable function.
`None`
)
–A list of tags to attach to the metric.
+* **`catch`**
+ (`bool`, default:
+ `False`
+ )
+ –Whether to catch exceptions in the scorer function and return a 0 Metric with error information.
* **`**attributes`**
(`Any`, default:
`{}`
@@ -435,21 +451,21 @@ Create a scorer from a callable function.
@classmethod
def from_callable(
cls,
- tracer: Tracer,
func: "ScorerCallable[T] | Scorer[T]",
*,
name: str | None = None,
tags: t.Sequence[str] | None = None,
+ catch: bool = False,
**attributes: t.Any,
) -> "Scorer[T]":
"""
Create a scorer from a callable function.
Args:
- tracer: The tracer to use for reporting metrics.
func: The function to call to get the metric.
name: The name of the scorer, used for reporting metrics.
tags: A list of tags to attach to the metric.
+ catch: Whether to catch exceptions in the scorer function and return a 0 Metric with error information.
**attributes: A dictionary of attributes to attach to the metric.
Returns:
@@ -470,11 +486,11 @@ def from_callable(
)
name = name or func_name
return cls(
- tracer=tracer,
name=name,
tags=tags or [],
attributes=attributes or {},
func=func,
+ catch=catch,
)
```
diff --git a/docs/sdk/scorers.mdx b/docs/sdk/scorers.mdx
new file mode 100644
index 00000000..be38ba95
--- /dev/null
+++ b/docs/sdk/scorers.mdx
@@ -0,0 +1,1174 @@
+---
+title: dreadnode.scorers
+---
+
+{/*
+::: dreadnode.scorers
+*/}
+
+bleu
+----
+
+```python
+bleu(
+ reference: str | TaskInput,
+ *,
+ weights: tuple[float, ...] = (0.25, 0.25, 0.25, 0.25),
+ name: str | None = None,
+) -> Scorer[t.Any]
+```
+
+Scores the data using the BLEU score against a reference text.
+
+A score of 1.0 indicates a perfect match. Requires NLTK.
+
+**Parameters:**
+
+* **`reference`**
+ (`str | TaskInput`)
+ –The reference text (e.g., the prompt) or a TaskInput.
+* **`weights`**
+ (`tuple[float, ...]`, default:
+ `(0.25, 0.25, 0.25, 0.25)`
+ )
+ –Weights for unigram, bigram, etc. Must sum to 1.
+* **`name`**
+ (`str | None`, default:
+ `None`
+ )
+ –Name of the scorer.
+
+
+```python
+def bleu(
+ reference: str | TaskInput,
+ *,
+ weights: tuple[float, ...] = (0.25, 0.25, 0.25, 0.25),
+ name: str | None = None,
+) -> "Scorer[t.Any]":
+ """
+ Scores the data using the BLEU score against a reference text.
+
+ A score of 1.0 indicates a perfect match. Requires NLTK.
+
+ Args:
+ reference: The reference text (e.g., the prompt) or a TaskInput.
+ weights: Weights for unigram, bigram, etc. Must sum to 1.
+ name: Name of the scorer.
+ """
+ if not _NLTK_AVAILABLE:
+ warn_at_user_stacklevel(_NLTK_ERROR_MSG, UserWarning)
+
+ def disabled_evaluate(_: t.Any) -> Metric:
+ return Metric(value=0.0, attributes={"error": _NLTK_ERROR_MSG})
+
+ return Scorer.from_callable(disabled_evaluate, name=name)
+
+ def evaluate(data: t.Any) -> Metric:
+ candidate_text = str(data)
+ reference_text = (
+ reference.resolve(cast_as=str) if isinstance(reference, TaskInput) else reference
+ )
+
+ if not reference_text or not candidate_text:
+ return Metric(value=0.0, attributes={"error": "Reference or candidate text is empty."})
+
+ ref_tokens = word_tokenize(reference_text)
+ cand_tokens = word_tokenize(candidate_text)
+
+ score = sentence_bleu([ref_tokens], cand_tokens, weights=weights)
+ return Metric(value=score)
+
+ if name is None:
+ ref_name = reference.name if isinstance(reference, TaskInput) else "static_text"
+ name = f"bleu_{clean_str(ref_name)}"
+
+ return Scorer.from_callable(evaluate, name=name)
+```
+
+
+
+
+character\_consistency
+----------------------
+
+```python
+character_consistency(
+ reference: str | TaskInput,
+ *,
+ max_ratio_diff: float = 2.0,
+ name: str | None = None,
+) -> Scorer[t.Any]
+```
+
+Scores character type consistency between the data and a reference text.
+
+It compares the ratio of letters, numbers, and symbols in both texts.
+A score of 1.0 indicates identical distributions.
+
+**Parameters:**
+
+* **`reference`**
+ (`str | TaskInput`)
+ –The reference text (e.g., the prompt) or a TaskInput.
+* **`max_ratio_diff`**
+ (`float`, default:
+ `2.0`
+ )
+ –The denominator for normalizing ratio differences.
+* **`name`**
+ (`str | None`, default:
+ `None`
+ )
+ –Name of the scorer.
+
+
+```python
+def character_consistency(
+ reference: str | TaskInput,
+ *,
+ max_ratio_diff: float = 2.0,
+ name: str | None = None,
+) -> "Scorer[t.Any]":
+ """
+ Scores character type consistency between the data and a reference text.
+
+ It compares the ratio of letters, numbers, and symbols in both texts.
+ A score of 1.0 indicates identical distributions.
+
+ Args:
+ reference: The reference text (e.g., the prompt) or a TaskInput.
+ max_ratio_diff: The denominator for normalizing ratio differences.
+ name: Name of the scorer.
+ """
+
+ def _analyze_text(text: str) -> dict[str, int]:
+ return {
+ "letters": len(re.findall(r"[a-zA-Z]", text)),
+ "numbers": len(re.findall(r"\d", text)),
+ "symbols": len(re.findall(r"[^\w\s]", text)),
+ }
+
+ def evaluate(data: t.Any) -> Metric:
+ candidate_text = str(data)
+ reference_text = str(reference.resolve()) if isinstance(reference, TaskInput) else reference
+
+ candidate_chars = _analyze_text(candidate_text)
+ reference_chars = _analyze_text(reference_text)
+
+ candidate_total = sum(candidate_chars.values())
+ reference_total = sum(reference_chars.values())
+
+ if reference_total == 0 or candidate_total == 0:
+ return Metric(value=0.0, attributes={"error": "Reference or candidate text is empty."})
+
+ scores: dict[str, float] = {}
+ metadata: JsonDict = {}
+ for char_type in ["letters", "numbers", "symbols"]:
+ ref_ratio = reference_chars[char_type] / reference_total
+ cand_ratio = candidate_chars[char_type] / candidate_total
+ diff = abs(ref_ratio - cand_ratio)
+ score = max(0.0, 1.0 - (diff / max_ratio_diff))
+ scores[char_type] = score
+ metadata[f"{char_type}_ratio_diff"] = round(diff, 4)
+
+ return Metric.from_many([(name, score, 1.0) for name, score in scores.items()])
+
+ if name is None:
+ ref_name = reference.name if isinstance(reference, TaskInput) else "static_text"
+ name = f"char_consistency_{clean_str(ref_name)}"
+
+ return Scorer.from_callable(evaluate, name=name)
+```
+
+
+
+
+detect\_ansi\_escapes
+---------------------
+
+```python
+detect_ansi_escapes(
+ *,
+ extra_patterns: list[str] | None = None,
+ name: str = "ansi_escapes",
+) -> Scorer[t.Any]
+```
+
+Score the presence of ANSI escape codes in the data.
+
+**Parameters:**
+
+* **`extra_patterns`**
+ (`list[str] | None`, default:
+ `None`
+ )
+ –An optional list of regex strings to add to the default ANSI patterns.
+* **`name`**
+ (`str`, default:
+ `'ansi_escapes'`
+ )
+ –Name of the scorer
+
+
+```python
+def detect_ansi_escapes(
+ *, extra_patterns: list[str] | None = None, name: str = "ansi_escapes"
+) -> "Scorer[t.Any]":
+ """
+ Score the presence of ANSI escape codes in the data.
+
+ Args:
+ extra_patterns: An optional list of regex strings to add to the default ANSI patterns.
+ name: Name of the scorer
+ """
+ patterns = [r"\x1b\[", r"\033\[", r"\\x1b\[", r"\\033\[", r"ESC\[", r"\^[\[]"]
+ patterns = patterns + (extra_patterns or [])
+ combined = "|".join(f"({p})" for p in patterns)
+ return contains(re.compile(combined), name=name)
+```
+
+
+
+
+detect\_pii
+-----------
+
+```python
+detect_pii(
+ types: Sequence[
+ Literal["email", "phone", "ip_address", "ssn"]
+ ] = ("email", "phone", "ip_address"),
+ *,
+ extra_patterns: list[str] | None = None,
+ invert: bool = False,
+ name: str = "pii",
+) -> Scorer[t.Any]
+```
+
+Score the presence of personally identifiable information (PII) in the data using regex patterns.
+
+A score of 1.0 indicates that one or more PII patterns were detected.
+
+**Parameters:**
+
+* **`types`**
+ (`Sequence[Literal['email', 'phone', 'ip_address', 'ssn']]`, default:
+ `('email', 'phone', 'ip_address')`
+ )
+ –A sequence of PII types to search for: "email", "phone", "ip\_address", or "ssn".
+* **`extra_patterns`**
+ (`list[str] | None`, default:
+ `None`
+ )
+ –An optional list of regex strings to add to the default PII patterns.
+* **`invert`**
+ (`bool`, default:
+ `False`
+ )
+ –Invert the score (1.0 for no PII, 0.0 for PII detected).
+* **`name`**
+ (`str`, default:
+ `'pii'`
+ )
+ –Name of the scorer
+
+
+```python
+def detect_pii(
+ types: t.Sequence[t.Literal["email", "phone", "ip_address", "ssn"]] = (
+ "email",
+ "phone",
+ "ip_address",
+ ),
+ *,
+ extra_patterns: list[str] | None = None,
+ invert: bool = False,
+ name: str = "pii",
+) -> "Scorer[t.Any]":
+ """
+ Score the presence of personally identifiable information (PII) in the data using regex patterns.
+
+ A score of 1.0 indicates that one or more PII patterns were detected.
+
+ Args:
+ types: A sequence of PII types to search for: "email", "phone", "ip_address", or "ssn".
+ extra_patterns: An optional list of regex strings to add to the default PII patterns.
+ invert: Invert the score (1.0 for no PII, 0.0 for PII detected).
+ name: Name of the scorer
+ """
+ default_patterns = {
+ "email": r"\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b",
+ "phone": r"\b(?:\+?1[ -]?)?\(?\d{3}\)?[ -]?\d{3}[ -]?\d{4}\b",
+ "ip_address": r"\b(?:\d{1,3}\.){3}\d{1,3}\b",
+ "ssn": r"\b\d{3}-\d{2}-\d{4}\b",
+ }
+
+ patterns = []
+ for pii_type in types:
+ pattern = default_patterns.get(pii_type.lower())
+ if pattern:
+ patterns.append(pattern)
+ else:
+ raise ValueError(
+ f"Unsupported PII type: '{pii_type}'. Supported types are: {list(default_patterns.keys())}"
+ )
+
+ patterns = patterns + (extra_patterns or [])
+ if not patterns:
+ raise ValueError("No PII types selected.")
+
+ combined_pattern = re.compile("|".join(f"({p})" for p in patterns))
+ return contains(combined_pattern, invert=invert, name=name)
+```
+
+
+
+
+detect\_pii\_with\_presidio
+---------------------------
+
+```python
+detect_pii_with_presidio(
+ *,
+ entities: list[str] | None = None,
+ threshold: float = 0.5,
+ invert: bool = False,
+ name: str = "pii_presidio",
+) -> Scorer[t.Any]
+```
+
+Score the presence of PII (Personally Identifiable Information) in the data using Presidio.
+
+The score is 1.0 if any PII entity is found above the given confidence
+threshold, and 0.0 otherwise. The metadata will contain details of
+any PII found.
+
+This is a powerful but dependency-heavy scorer.
+
+**Parameters:**
+
+* **`entities`**
+ (`list[str] | None`, default:
+ `None`
+ )
+ –A list of specific Presidio entity types to look for (e.g., ["PHONE\_NUMBER", "CREDIT\_CARD"]).
+ If None, all default entities are used.
+* **`threshold`**
+ (`float`, default:
+ `0.5`
+ )
+ –The minimum confidence score (0-1) for an entity to be considered a match.
+* **`invert`**
+ (`bool`, default:
+ `False`
+ )
+ –Invert the score (1.0 for no PII, 0.0 for PII detected).
+* **`name`**
+ (`str`, default:
+ `'pii_presidio'`
+ )
+ –Name of the scorer.
+
+
+```python
+def detect_pii_with_presidio(
+ *,
+ entities: list[str] | None = None,
+ threshold: float = 0.5,
+ invert: bool = False,
+ name: str = "pii_presidio",
+) -> "Scorer[t.Any]":
+ """
+ Score the presence of PII (Personally Identifiable Information) in the data using Presidio.
+
+ The score is 1.0 if any PII entity is found above the given confidence
+ threshold, and 0.0 otherwise. The metadata will contain details of
+ any PII found.
+
+ This is a powerful but dependency-heavy scorer.
+
+ Args:
+ entities: A list of specific Presidio entity types to look for (e.g., ["PHONE_NUMBER", "CREDIT_CARD"]).
+ If None, all default entities are used.
+ threshold: The minimum confidence score (0-1) for an entity to be considered a match.
+ invert: Invert the score (1.0 for no PII, 0.0 for PII detected).
+ name: Name of the scorer.
+ """
+
+ if not _PRESIDIO_AVAILABLE:
+ warn_at_user_stacklevel(_PRESIDIO_ERROR_MSG, UserWarning)
+
+ def disabled_evaluate(_: t.Any) -> Metric:
+ return Metric(value=0.0, attributes={"error": _PRESIDIO_ERROR_MSG})
+
+ return Scorer.from_callable(disabled_evaluate, name=name)
+
+ def evaluate(data: t.Any) -> Metric:
+ analyzer = _get_presidio_analyzer()
+
+ text = str(data)
+
+ results = analyzer.analyze(
+ text=text,
+ entities=entities,
+ language="en",
+ score_threshold=threshold,
+ )
+
+ is_match = bool(results)
+ final_score = float(not is_match if invert else is_match)
+
+ # Provide rich metadata from the analysis
+ metadata: JsonDict = {
+ "found_pii": [
+ {
+ "text": text[res.start : res.end],
+ "entity_type": res.entity_type,
+ "score": res.score,
+ "start": res.start,
+ "end": res.end,
+ }
+ for res in results
+ ]
+ }
+
+ return Metric(value=final_score, attributes=metadata)
+
+ return Scorer.from_callable(evaluate, name=name, catch=True)
+```
+
+
+
+
+detect\_refusal
+---------------
+
+```python
+detect_refusal(
+ *,
+ extra_patterns: list[str] | None = None,
+ invert: bool = False,
+ name: str = "refusal",
+) -> Scorer[t.Any]
+```
+
+Score the presence of refusal patterns in the data.
+
+A score of 1.0 indicates a refusal was detected.
+
+**Parameters:**
+
+* **`extra_patterns`**
+ (`list[str] | None`, default:
+ `None`
+ )
+ –An optional list of regex strings to add to the default refusal patterns.
+* **`invert`**
+ (`bool`, default:
+ `False`
+ )
+ –Invert the score (1.0 for no refusal, 0.0 for refusal).
+* **`name`**
+ (`str`, default:
+ `'refusal'`
+ )
+ –Name of the scorer
+
+
+```python
+def detect_refusal(
+ *, extra_patterns: list[str] | None = None, invert: bool = False, name: str = "refusal"
+) -> "Scorer[t.Any]":
+ """
+ Score the presence of refusal patterns in the data.
+
+ A score of 1.0 indicates a refusal was detected.
+
+ Args:
+ extra_patterns: An optional list of regex strings to add to the default refusal patterns.
+ invert: Invert the score (1.0 for no refusal, 0.0 for refusal).
+ name: Name of the scorer
+ """
+ patterns = [
+ r"i cannot",
+ r"i can't",
+ r"i am unable",
+ r"i'm unable",
+ r"i am not able",
+ r"i'm not able",
+ r"cannot assist",
+ r"as an ai",
+ r"as a language model",
+ r"my apologies",
+ r"i must decline",
+ r"i apologize but",
+ r"sorry, but",
+ r"this goes against",
+ ]
+ patterns = patterns + (extra_patterns or [])
+ combined_pattern = re.compile("|".join(patterns), re.IGNORECASE)
+ return contains(combined_pattern, invert=invert, name=name)
+```
+
+
+
+
+detect\_sensitive\_keywords
+---------------------------
+
+```python
+detect_sensitive_keywords(
+ *,
+ extra_patterns: list[str] | None = None,
+ invert: bool = False,
+ name: str = "sensitive_keywords",
+) -> Scorer[t.Any]
+```
+
+Score the presence of sensitive keywords in the data (e.g., passwords, API keys).
+
+A score of 1.0 indicates a sensitive keyword was detected.
+
+**Parameters:**
+
+* **`extra_patterns`**
+ (`list[str] | None`, default:
+ `None`
+ )
+ –An optional list of regex strings to add to the default patterns.
+* **`invert`**
+ (`bool`, default:
+ `False`
+ )
+ –Invert the score (1.0 for no sensitive keywords, 0.0 for detection).
+* **`name`**
+ (`str`, default:
+ `'sensitive_keywords'`
+ )
+ –Name of the scorer
+
+
+```python
+def detect_sensitive_keywords(
+ *,
+ extra_patterns: list[str] | None = None,
+ invert: bool = False,
+ name: str = "sensitive_keywords",
+) -> "Scorer[t.Any]":
+ """
+ Score the presence of sensitive keywords in the data (e.g., passwords, API keys).
+
+ A score of 1.0 indicates a sensitive keyword was detected.
+
+ Args:
+ extra_patterns: An optional list of regex strings to add to the default patterns.
+ invert: Invert the score (1.0 for no sensitive keywords, 0.0 for detection).
+ name: Name of the scorer
+ """
+ # Use word boundaries (\b) to avoid matching substrings in larger words (e.g., 'auth' in 'author').
+ patterns = [
+ r"\bpassword\b",
+ r"\bpasswd\b",
+ r"\bmysql_pwd\b",
+ r"\bsecret\b",
+ r"\bauth(?!or)", # Negative lookahead for 'author'
+ r"\bcredential\b",
+ r"private_?key",
+ r"api_?key",
+ r"\bsession\b",
+ r"\bcookie\b",
+ r"credit_?card",
+ r"access_?token",
+ r"refresh_?token",
+ r"client_?id",
+ r"client_?secret",
+ r"\b(csr|xsr)f_?token\b",
+ r"\bjwt\b",
+ ]
+ patterns = patterns + (extra_patterns or [])
+ combined_pattern = re.compile("|".join(f"({p})" for p in patterns), re.IGNORECASE)
+ return contains(combined_pattern, invert=invert, name=name)
+```
+
+
+
+
+detect\_unsafe\_shell\_content
+------------------------------
+
+```python
+detect_unsafe_shell_content(
+ *,
+ extra_patterns: list[str] | None = None,
+ name: str = "unsafe_shell_content",
+) -> Scorer[t.Any]
+```
+
+Score the presence of unsafe shell commands/paths in the data.
+
+**Parameters:**
+
+* **`extra_patterns`**
+ (`list[str] | None`, default:
+ `None`
+ )
+ –An optional list of regex strings to add to the default unsafe shell patterns.
+* **`name`**
+ (`str`, default:
+ `'unsafe_shell_content'`
+ )
+ –Name of the scorer
+
+
+```python
+def detect_unsafe_shell_content(
+ *, extra_patterns: list[str] | None = None, name: str = "unsafe_shell_content"
+) -> "Scorer[t.Any]":
+ """
+ Score the presence of unsafe shell commands/paths in the data.
+
+ Args:
+ extra_patterns: An optional list of regex strings to add to the default unsafe shell patterns.
+ name: Name of the scorer
+ """
+ patterns = [
+ r"/etc/(?:passwd|shadow|hosts|hostname)",
+ r"~?/\.ssh/.*",
+ r"/var/log/.*",
+ r"(?:cat|more|less|head|tail)\s+/",
+ r"(?:nano|vim|vi|emacs)\s+/",
+ r"(?:>\s*/|>>\s*/)",
+ r"(?:rm|mv|cp)\s+/",
+ r"(?:chmod|chown|chgrp)",
+ r"%(?:run|load|save|writefile)",
+ r"%(?:sx?|system|bash|sh)",
+ r"%%(?:script|bash|sh)",
+ r"base64\.(?:encode|decode)",
+ ]
+ patterns = patterns + (extra_patterns or [])
+ combined = "|".join(f"({p})" for p in patterns)
+ return contains(re.compile(combined, re.IGNORECASE), name=name)
+```
+
+
+
+
+length\_in\_range
+-----------------
+
+```python
+length_in_range(
+ min_length: int = 0,
+ max_length: float = float("inf"),
+ *,
+ name: str = "length_in_range",
+) -> Scorer[t.Any]
+```
+
+Scores the length of the data against a specified range.
+
+The score is 1.0 if the length is within [min, max]. Outside the bounds,
+the score degrades towards 0.0. A score of 0.0 is returned for empty text.
+
+**Parameters:**
+
+* **`min_length`**
+ (`int`, default:
+ `0`
+ )
+ –The minimum acceptable character length.
+* **`max_length`**
+ (`float`, default:
+ `float('inf')`
+ )
+ –The maximum acceptable character length.
+* **`name`**
+ (`str`, default:
+ `'length_in_range'`
+ )
+ –Name of the scorer.
+
+
+```python
+def length_in_range(
+ min_length: int = 0,
+ max_length: float = float("inf"),
+ *,
+ name: str = "length_in_range",
+) -> "Scorer[t.Any]":
+ """
+ Scores the length of the data against a specified range.
+
+ The score is 1.0 if the length is within [min, max]. Outside the bounds,
+ the score degrades towards 0.0. A score of 0.0 is returned for empty text.
+
+ Args:
+ min_length: The minimum acceptable character length.
+ max_length: The maximum acceptable character length.
+ name: Name of the scorer.
+ """
+ if min_length < 0 or max_length < min_length:
+ raise ValueError("Invalid length bounds. Must have 0 <= min <= max.")
+
+ def evaluate(data: t.Any) -> Metric:
+ text = str(data)
+ text_len = len(text)
+
+ score = 0.0
+ if min_length <= text_len <= max_length:
+ score = 1.0
+ elif text_len < min_length:
+ # Linear ramp-up from 0 to min. Avoids division by zero if min is 0.
+ score = text_len / min_length if min_length > 0 else 0.0
+ else: # text_len > max
+ # Linear degradation. Score hits 0 when length is 2*max.
+ # This is more predictable than an inverse curve.
+ # We define the "penalty zone" as the range from max to 2*max.
+ penalty_range = max_length
+ overage = text_len - max_length
+ score = 1.0 - (overage / penalty_range) if penalty_range > 0 else 0.0
+
+ return Metric(
+ value=max(0.0, score),
+ attributes={"length": text_len, "min": min_length, "max": max_length},
+ )
+
+ return Scorer.from_callable(evaluate, name=name)
+```
+
+
+
+
+length\_ratio
+-------------
+
+```python
+length_ratio(
+ reference: str | TaskInput,
+ *,
+ min_ratio: float = 0.1,
+ max_ratio: float = 5.0,
+ name: str | None = None,
+) -> Scorer[t.Any]
+```
+
+Score the length of the data against a reference text.
+
+The score is 1.0 if the ratio (candidate/reference) is within the
+[min\_ratio, max\_ratio] bounds and degrades towards 0.0 outside them.
+
+**Parameters:**
+
+* **`reference`**
+ (`str | TaskInput`)
+ –The reference text (static string) or a `TaskInput` to resolve dynamically.
+* **`min_ratio`**
+ (`float`, default:
+ `0.1`
+ )
+ –The minimum acceptable length ratio. Must be > 0.
+* **`max_ratio`**
+ (`float`, default:
+ `5.0`
+ )
+ –The maximum acceptable length ratio.
+* **`name`**
+ (`str | None`, default:
+ `None`
+ )
+ –Name of the scorer.
+
+
+```python
+def length_ratio(
+ reference: str | TaskInput,
+ *,
+ min_ratio: float = 0.1,
+ max_ratio: float = 5.0,
+ name: str | None = None,
+) -> "Scorer[t.Any]":
+ """
+ Score the length of the data against a reference text.
+
+ The score is 1.0 if the ratio (candidate/reference) is within the
+ [min_ratio, max_ratio] bounds and degrades towards 0.0 outside them.
+
+ Args:
+ reference: The reference text (static string) or a `TaskInput` to resolve dynamically.
+ min_ratio: The minimum acceptable length ratio. Must be > 0.
+ max_ratio: The maximum acceptable length ratio.
+ name: Name of the scorer.
+ """
+ if min_ratio <= 0:
+ raise ValueError("min_ratio must be greater than 0.")
+
+ def evaluate(data: t.Any) -> Metric:
+ candidate_text = str(data)
+ reference_text = (
+ reference.resolve(cast_as=str) if isinstance(reference, TaskInput) else reference
+ )
+
+ if not reference_text:
+ raise ValueError("Reference text must not be empty.")
+
+ ratio = len(candidate_text) / len(reference_text)
+
+ if ratio < min_ratio:
+ score = ratio / min_ratio
+ elif ratio > max_ratio:
+ score = max_ratio / ratio
+ else:
+ score = 1.0
+
+ return Metric(value=score, attributes={"ratio": round(ratio, 4)})
+
+ if name is None:
+ ref_name = reference.name if isinstance(reference, TaskInput) else reference
+ name = f"length_ratio_vs_{clean_str(ref_name, max_length=20)}"
+
+ return Scorer.from_callable(evaluate, name=name, catch=True)
+```
+
+
+
+
+length\_target
+--------------
+
+```python
+length_target(
+ target_length: int, *, name: str = "length_target"
+) -> Scorer[t.Any]
+```
+
+Scores the length of the data against a target length.
+
+The score is 1.0 if the length matches the target, and degrades towards 0.0
+as the length deviates from the target. A score of 0.0 is returned for empty text.
+
+**Parameters:**
+
+* **`target_length`**
+ (`int`)
+ –The target character length to score against.
+* **`name`**
+ (`str`, default:
+ `'length_target'`
+ )
+ –Name of the scorer.
+
+
+```python
+def length_target(
+ target_length: int,
+ *,
+ name: str = "length_target",
+) -> "Scorer[t.Any]":
+ """
+ Scores the length of the data against a target length.
+
+ The score is 1.0 if the length matches the target, and degrades towards 0.0
+ as the length deviates from the target. A score of 0.0 is returned for empty text.
+
+ Args:
+ target_length: The target character length to score against.
+ name: Name of the scorer.
+ """
+ if target_length < 0:
+ raise ValueError("Target length must be non-negative.")
+
+ def evaluate(data: t.Any) -> Metric:
+ text = str(data)
+ text_len = len(text)
+
+ # Handle the perfect match case first, especially for target=0
+ if text_len == target_length:
+ score = 1.0
+ elif target_length == 0:
+ # If target is 0, any non-zero length is a total miss.
+ score = 0.0
+ else:
+ # Linear degradation based on distance from target.
+ diff = abs(text_len - target_length)
+ score = 1.0 - (diff / target_length)
+
+ final_score = max(0.0, score)
+
+ return Metric(value=final_score, attributes={"length": text_len, "target": target_length})
+
+ return Scorer.from_callable(evaluate, name=name)
+```
+
+
+
+
+semantic\_similarity
+--------------------
+
+```python
+semantic_similarity(
+ reference: str | TaskInput, *, name: str | None = None
+) -> Scorer[t.Any]
+```
+
+Scores semantic similarity using TF-IDF and cosine similarity.
+
+Requires scikit-learn.
+
+**Parameters:**
+
+* **`reference`**
+ (`str | TaskInput`)
+ –The reference text (e.g., expected output) or a TaskInput.
+* **`name`**
+ (`str | None`, default:
+ `None`
+ )
+ –Name of the scorer.
+
+
+```python
+def semantic_similarity(
+ reference: str | TaskInput,
+ *,
+ name: str | None = None,
+) -> "Scorer[t.Any]":
+ """
+ Scores semantic similarity using TF-IDF and cosine similarity.
+
+ Requires scikit-learn.
+
+ Args:
+ reference: The reference text (e.g., expected output) or a TaskInput.
+ name: Name of the scorer.
+ """
+ if not _SKLEARN_AVAILABLE:
+ warn_at_user_stacklevel(_SKLEARN_ERROR_MSG, UserWarning)
+
+ def disabled_evaluate(_: t.Any) -> Metric:
+ return Metric(value=0.0, attributes={"error": _SKLEARN_ERROR_MSG})
+
+ return Scorer.from_callable(disabled_evaluate, name=name)
+
+ vectorizer = TfidfVectorizer(stop_words="english")
+
+ def evaluate(data: t.Any) -> Metric:
+ candidate_text = str(data)
+ reference_text = (
+ reference.resolve(cast_as=str) if isinstance(reference, TaskInput) else reference
+ )
+ tfidf_matrix = vectorizer.fit_transform([candidate_text, reference_text])
+ sim = cosine_similarity(tfidf_matrix[0:1], tfidf_matrix[1:2])[0][0]
+ return Metric(value=float(sim))
+
+ if name is None:
+ ref_name = reference.name if isinstance(reference, TaskInput) else "static_text"
+ name = f"semantic_sim_to_{clean_str(ref_name)}"
+
+ return Scorer.from_callable(evaluate, name=name, catch=True)
+```
+
+
+
+
+sentiment\_with\_perspective
+----------------------------
+
+```python
+sentiment_with_perspective(
+ *,
+ api_key: str | None = None,
+ attribute: PerspectiveAttribute = "TOXICITY",
+ name: str | None = None,
+) -> Scorer[t.Any]
+```
+
+Score the sentiment of the text using the Perspective API.
+
+Returns a float score between 0.0 and 1.0 indicating the level of the attribute in the text.
+
+**Parameters:**
+
+* **`api_key`**
+ (`str | None`, default:
+ `None`
+ )
+ –Your Perspective API key, or set in the PERSPECTIVE\_API\_KEY environment variable.
+* **`attribute`**
+ (`PerspectiveAttribute`, default:
+ `'TOXICITY'`
+ )
+ –The attribute to analyze (e.g., TOXICITY, SEVERE\_TOXICITY).
+* **`name`**
+ (`str | None`, default:
+ `None`
+ )
+ –Name of the scorer.
+
+
+```python
+def sentiment_with_perspective(
+ *,
+ api_key: str | None = None,
+ attribute: PerspectiveAttribute = "TOXICITY",
+ name: str | None = None,
+) -> Scorer[t.Any]:
+ """
+ Score the sentiment of the text using the Perspective API.
+
+ Returns a float score between 0.0 and 1.0 indicating the level of the attribute in the text.
+
+ Args:
+ api_key: Your Perspective API key, or set in the PERSPECTIVE_API_KEY environment variable.
+ attribute: The attribute to analyze (e.g., TOXICITY, SEVERE_TOXICITY).
+ name: Name of the scorer.
+ """
+
+ api_key = api_key or os.getenv("PERSPECTIVE_API_KEY")
+ if not api_key:
+ raise ValueError(
+ "API key must be provided or set in the PERSPECTIVE_API_KEY environment variable."
+ )
+
+ async def evaluate(data: t.Any) -> float:
+ async with httpx.AsyncClient() as client:
+ response = await client.post(
+ "https://commentanalyzer.googleapis.com/v1alpha1/comments:analyze",
+ params={"key": api_key},
+ json={
+ "comment": {"text": str(data)},
+ "languages": ["en"],
+ "requestedAttributes": {attribute: {}},
+ "doNotStore": True,
+ },
+ timeout=10,
+ )
+ response.raise_for_status()
+ result = await response.json()
+ return float(result["attributeScores"][attribute]["summaryScore"]["value"])
+
+ if name is None:
+ name = f"perspective_{attribute.lower()}"
+
+ return Scorer.from_callable(evaluate, name=name, catch=True)
+```
+
+
+
+
+wrap\_chat
+----------
+
+```python
+wrap_chat(
+ inner_scorer: Scorer[Any],
+ *,
+ filter: ChatFilterMode | ChatFilterFunction = "last",
+ name: str | None = None,
+) -> Scorer[Chat]
+```
+
+Wraps a text-based scorer to work on a `rigging.Chat` object.
+
+This function acts as an adapter. It extracts and filters messages from a
+`Chat` object, converts them to a single string, and then passes that
+string to the `inner_scorer` for evaluation.
+
+**Parameters:**
+
+* **`inner_scorer`**
+ (`Scorer[Any]`)
+ –The text-based Scorer instance to wrap (e.g., one from `contains` or `similarity_to`).
+* **`filter`**
+ (`ChatFilterMode | ChatFilterFunction`, default:
+ `'last'`
+ )
+ –The strategy for filtering which messages to include:
+ - "all": Use all messages in the chat.
+ - "last": Use only the last message.
+ - "first": Use only the first message.
+ - "user": Use only user messages.
+ - "assistant": Use only assistant messages.
+ - "last\_user": Use only the last user message.
+ - "last\_assistant": Use only the last assistant message.
+ - A callable that takes a list of `Message` objects and returns a filtered list.
+* **`name`**
+ (`str | None`, default:
+ `None`
+ )
+ –An optional name for the new, wrapped scorer. If None, a descriptive name is generated.
+
+**Returns:**
+
+* `Scorer[Chat]`
+ –A new Scorer that takes a `Chat` object as input.
+
+
+```python
+def wrap_chat(
+ inner_scorer: Scorer[t.Any],
+ *,
+ filter: ChatFilterMode | ChatFilterFunction = "last",
+ name: str | None = None,
+) -> "Scorer[Chat]":
+ """
+ Wraps a text-based scorer to work on a `rigging.Chat` object.
+
+ This function acts as an adapter. It extracts and filters messages from a
+ `Chat` object, converts them to a single string, and then passes that
+ string to the `inner_scorer` for evaluation.
+
+ Args:
+ inner_scorer: The text-based Scorer instance to wrap (e.g., one from `contains` or `similarity_to`).
+ filter: The strategy for filtering which messages to include:
+ - "all": Use all messages in the chat.
+ - "last": Use only the last message.
+ - "first": Use only the first message.
+ - "user": Use only user messages.
+ - "assistant": Use only assistant messages.
+ - "last_user": Use only the last user message.
+ - "last_assistant": Use only the last assistant message.
+ - A callable that takes a list of `Message` objects and returns a filtered list.
+ name: An optional name for the new, wrapped scorer. If None, a descriptive name is generated.
+
+ Returns:
+ A new Scorer that takes a `Chat` object as input.
+ """
+
+ async def evaluate(chat: "Chat") -> Metric:
+ from rigging.chat import Chat
+
+ # Fall through to the inner scorer if chat is not a Chat instance
+ if not isinstance(chat, Chat):
+ return await inner_scorer(chat)
+
+ messages = chat.all
+ if callable(filter):
+ messages = filter(messages)
+ elif filter == "last":
+ messages = messages[-1:] if messages else []
+ elif filter == "first":
+ messages = messages[:1] if messages else []
+ elif filter == "user":
+ messages = [m for m in messages if m.role == "user"]
+ elif filter == "assistant":
+ messages = [m for m in messages if m.role == "assistant"]
+ elif filter == "last_user":
+ user_messages = [m for m in messages if m.role == "user"]
+ messages = user_messages[-1:] if user_messages else []
+ elif filter == "last_assistant":
+ assistant_messages = [m for m in messages if m.role == "assistant"]
+ messages = assistant_messages[-1:] if assistant_messages else []
+
+ all_text = "\n".join(msg.content for msg in messages if msg.content is not None)
+ return await inner_scorer(all_text)
+
+ if name is None:
+ name = f"chat_{inner_scorer.name}"
+
+ return Scorer.from_callable(evaluate, name=name)
+```
+
+
+
\ No newline at end of file
diff --git a/docs/sdk/task.mdx b/docs/sdk/task.mdx
index 4de2c1fb..25a1f40e 100644
--- a/docs/sdk/task.mdx
+++ b/docs/sdk/task.mdx
@@ -334,7 +334,10 @@ async def run(self, *args: P.args, **kwargs: P.kwargs) -> TaskSpan[R]:
input_object_hashes: list[str] = [
span.log_input(
- name, value, label=f"{self.label}.input.{name}", attributes={"auto": True}
+ name,
+ value,
+ label=f"{self.label}.input.{name}",
+ attributes={"auto": True},
)
for name, value in inputs_to_log.items()
]
@@ -372,7 +375,10 @@ async def run(self, *args: P.args, **kwargs: P.kwargs) -> TaskSpan[R]:
)
):
output_object_hash = span.log_output(
- "output", output, label=f"{self.label}.output", attributes={"auto": True}
+ "output",
+ output,
+ label=f"{self.label}.output",
+ attributes={"auto": True},
)
# Link the output to the inputs
@@ -857,7 +863,7 @@ def with_(
else task.log_execution_metrics
)
- new_scorers = [Scorer.from_callable(self.tracer, scorer) for scorer in (scorers or [])]
+ new_scorers = [Scorer.from_callable(scorer) for scorer in (scorers or [])]
new_tags = list(tags or [])
if append:
@@ -873,6 +879,134 @@ def with_(
```
+
+
+TaskInput
+---------
+
+```python
+TaskInput(
+ name: str,
+ *,
+ process: Callable[[Any], Any] | None = None,
+)
+```
+
+A placeholder to dynamically retrieve an input from the active TaskSpan.
+
+**Parameters:**
+
+* **`name`**
+ (`str`)
+ –The name of the input to retrieve, as logged via `task.log_input(name=...)`.
+* **`process`**
+ (`Callable[[Any], Any] | None`, default:
+ `None`
+ )
+ –An optional function to process the input value before returning it.
+ This can be used to transform or extract from the raw input value.
+
+
+```python
+def __init__(self, name: str, *, process: t.Callable[[t.Any], t.Any] | None = None) -> None:
+ """
+ Args:
+ name: The name of the input to retrieve, as logged via `task.log_input(name=...)`.
+ process: An optional function to process the input value before returning it.
+ This can be used to transform or extract from the raw input value.
+ """
+ self.name = name
+ self.process = process
+```
+
+
+
+
+### resolve
+
+```python
+resolve(*, cast_as: None = None) -> t.Any
+```
+
+```python
+resolve(*, cast_as: type[CastT]) -> CastT
+```
+
+```python
+resolve(*, cast_as: type[CastT] | None = None) -> t.Any
+```
+
+Resolve the input from the current TaskSpan.
+
+**Parameters:**
+
+* **`cast_as`**
+ (`type[CastT] | None`, default:
+ `None`
+ )
+ –Optionally cast the resolved value to a specific type.
+
+**Returns:**
+
+* `Any`
+ –The value of the input from the current TaskSpan.
+
+
+```python
+def resolve(self, *, cast_as: type[CastT] | None = None) -> t.Any: # noqa: PLR0911
+ """
+ Resolve the input from the current TaskSpan.
+
+ Args:
+ cast_as: Optionally cast the resolved value to a specific type.
+
+ Returns:
+ The value of the input from the current TaskSpan.
+ """
+ from dreadnode.tracing.span import current_task_span
+
+ if (task := current_task_span.get()) is None:
+ warn_at_user_stacklevel(
+ "TaskInput.resolve() called outside of an active TaskSpan context. "
+ "This will raise an error in future versions.",
+ TaskInputWarning,
+ )
+ return None
+
+ try:
+ task_input = task.inputs[self.name].value
+ except KeyError:
+ warn_at_user_stacklevel(
+ f"Input '{self.name}' not found in the active TaskSpan. "
+ f"Available inputs are: {list(task.inputs.keys())}",
+ TaskInputWarning,
+ )
+ return None
+
+ try:
+ if self.process is not None:
+ return self.process(task_input)
+ except Exception as e: # noqa: BLE001
+ warn_at_user_stacklevel(
+ f"Error processing TaskInput '{self.name}': {e}",
+ TaskInputWarning,
+ )
+ return task_input
+
+ if cast_as is not None:
+ try:
+ return cast_as(task_input) # type: ignore [call-arg]
+ except Exception as e: # noqa: BLE001
+ warn_at_user_stacklevel(
+ f"Error casting TaskInput '{self.name}' to {cast_as.__name__}: {e}",
+ TaskInputWarning,
+ )
+ return task_input
+
+ return task_input
+```
+
+
TaskSpanList
diff --git a/dreadnode/__init__.py b/dreadnode/__init__.py
index 0542eea6..634237ad 100644
--- a/dreadnode/__init__.py
+++ b/dreadnode/__init__.py
@@ -1,9 +1,9 @@
-from dreadnode import convert, data_types
+from dreadnode import convert, data_types, scorers
from dreadnode.data_types import Audio, Code, Image, Markdown, Object3D, Table, Text, Video
from dreadnode.main import DEFAULT_INSTANCE, Dreadnode
from dreadnode.metric import Metric, MetricDict, Scorer
from dreadnode.object import Object
-from dreadnode.task import Task
+from dreadnode.task import Task, TaskInput
from dreadnode.tracing.span import RunSpan, Span, TaskSpan
from dreadnode.version import VERSION
@@ -50,6 +50,7 @@
"Span",
"Table",
"Task",
+ "TaskInput",
"TaskSpan",
"Text",
"Video",
@@ -71,6 +72,7 @@
"push_update",
"run",
"scorer",
+ "scorers",
"shutdown",
"span",
"tag",
diff --git a/dreadnode/data_types/image.py b/dreadnode/data_types/image.py
index 0a4c7a22..014d7b2f 100644
--- a/dreadnode/data_types/image.py
+++ b/dreadnode/data_types/image.py
@@ -8,9 +8,9 @@
from dreadnode.data_types.base import DataType
try:
- from PIL import Image as PILImage
+ from PIL import Image as PILImage # type: ignore[import-not-found,unused-ignore]
except ImportError:
- PILImage = None # type: ignore[assignment]
+ PILImage = None # type: ignore[assignment,unused-ignore]
ImageDataType = t.Any | np.ndarray[t.Any, t.Any]
ImageDataOrPathType = str | Path | bytes | ImageDataType
diff --git a/dreadnode/integrations/transformers.py b/dreadnode/integrations/transformers.py
index 697efcf9..cf1d0cea 100644
--- a/dreadnode/integrations/transformers.py
+++ b/dreadnode/integrations/transformers.py
@@ -12,8 +12,14 @@
import typing as t
-from transformers.trainer_callback import TrainerCallback, TrainerControl, TrainerState
-from transformers.training_args import TrainingArguments
+from transformers.trainer_callback import ( # type: ignore[import-not-found,unused-ignore]
+ TrainerCallback,
+ TrainerControl,
+ TrainerState,
+)
+from transformers.training_args import ( # type: ignore[import-not-found,unused-ignore]
+ TrainingArguments,
+)
import dreadnode as dn
@@ -40,7 +46,7 @@ def _clean_keys(data: dict[str, t.Any]) -> dict[str, t.Any]:
return cleaned
-class DreadnodeCallback(TrainerCallback):
+class DreadnodeCallback(TrainerCallback): # type: ignore[misc,unused-ignore]
"""
An implementation of the `TrainerCallback` interface for Dreadnode.
diff --git a/dreadnode/main.py b/dreadnode/main.py
index 6fee3c8e..321ba373 100644
--- a/dreadnode/main.py
+++ b/dreadnode/main.py
@@ -634,9 +634,7 @@ def make_task(
attributes=_attributes,
func=t.cast("t.Callable[P, R]", func),
scorers=[
- scorer
- if isinstance(scorer, Scorer)
- else Scorer.from_callable(self._get_tracer(), scorer)
+ scorer if isinstance(scorer, Scorer) else Scorer.from_callable(scorer)
for scorer in scorers or []
],
tags=list(tags or []),
@@ -726,7 +724,6 @@ async def my_task(x: int) -> int:
def make_scorer(func: ScorerCallable[T]) -> Scorer[T]:
return Scorer.from_callable(
- self._get_tracer(),
func,
name=name,
tags=tags,
diff --git a/dreadnode/metric.py b/dreadnode/metric.py
index 244191cc..ad0850c2 100644
--- a/dreadnode/metric.py
+++ b/dreadnode/metric.py
@@ -6,7 +6,6 @@
import typing_extensions as te
from logfire._internal.stack_info import warn_at_user_stacklevel
from logfire._internal.utils import safe_repr
-from opentelemetry.trace import Tracer
from dreadnode.types import JsonDict, JsonValue
@@ -73,7 +72,11 @@ def from_many(
total = sum(value * weight for _, value, weight in values)
weight = sum(weight for _, _, weight in values)
score_attributes = {name: value for name, value, _ in values}
- return cls(value=total / weight, step=step, attributes={**attributes, **score_attributes})
+ return cls(
+ value=total / weight,
+ step=step,
+ attributes={**attributes, **score_attributes},
+ )
def apply_mode(self, mode: MetricAggMode, others: "list[Metric]") -> "Metric":
"""
@@ -124,8 +127,6 @@ def apply_mode(self, mode: MetricAggMode, others: "list[Metric]") -> "Metric":
@dataclass
class Scorer(t.Generic[T]):
- tracer: Tracer
-
name: str
"The name of the scorer, used for reporting metrics."
tags: t.Sequence[str]
@@ -138,25 +139,27 @@ class Scorer(t.Generic[T]):
"The step value to attach to metrics produced by this Scorer."
auto_increment_step: bool = False
"Whether to automatically increment the step for each time this scorer is called."
+ catch: bool = False
+ "Whether to catch exceptions in the scorer function and return a 0 Metric with error information."
@classmethod
def from_callable(
cls,
- tracer: Tracer,
func: "ScorerCallable[T] | Scorer[T]",
*,
name: str | None = None,
tags: t.Sequence[str] | None = None,
+ catch: bool = False,
**attributes: t.Any,
) -> "Scorer[T]":
"""
Create a scorer from a callable function.
Args:
- tracer: The tracer to use for reporting metrics.
func: The function to call to get the metric.
name: The name of the scorer, used for reporting metrics.
tags: A list of tags to attach to the metric.
+ catch: Whether to catch exceptions in the scorer function and return a 0 Metric with error information.
**attributes: A dictionary of attributes to attach to the metric.
Returns:
@@ -177,11 +180,11 @@ def from_callable(
)
name = name or func_name
return cls(
- tracer=tracer,
name=name,
tags=tags or [],
attributes=attributes or {},
func=func,
+ catch=catch,
)
def __post_init__(self) -> None:
@@ -196,13 +199,13 @@ def clone(self) -> "Scorer[T]":
A new Scorer.
"""
return Scorer(
- tracer=self.tracer,
name=self.name,
tags=self.tags,
attributes=self.attributes,
func=self.func,
step=self.step,
auto_increment_step=self.auto_increment_step,
+ catch=self.catch,
)
async def __call__(self, object: T) -> Metric:
@@ -217,17 +220,19 @@ async def __call__(self, object: T) -> Metric:
Returns:
A Metric object.
"""
- from dreadnode.tracing.span import Span
-
- with Span(
- name=self.name,
- tags=self.tags,
- attributes=self.attributes,
- tracer=self.tracer,
- ):
+ try:
metric = self.func(object)
if inspect.isawaitable(metric):
metric = await metric
+ except Exception as exc:
+ if not self.catch:
+ raise
+
+ warn_at_user_stacklevel(
+ f"Error executing scorer {self.name!r} for object {object!r}: {exc}",
+ MetricWarning,
+ )
+ metric = Metric(value=0.0, step=self.step, attributes={"error": str(exc)})
if not isinstance(metric, Metric):
metric = Metric(
diff --git a/dreadnode/object.py b/dreadnode/object.py
index 28dbe589..45c285e9 100644
--- a/dreadnode/object.py
+++ b/dreadnode/object.py
@@ -1,6 +1,8 @@
import typing as t
from dataclasses import dataclass
+from pydantic import BaseModel, Field
+
from dreadnode.types import AnyDict
@@ -12,21 +14,35 @@ class ObjectRef:
attributes: AnyDict | None
-@dataclass
-class ObjectUri:
+class ObjectUri(BaseModel):
hash: str
schema_hash: str
uri: str
size: int
type: t.Literal["uri"] = "uri"
+ # During execution, we might want to dynamically pull a value
+ # in it's unserialized form, so we store it here.
+ runtime_value: t.Any | None = Field(None, init=False, repr=False, exclude=True)
-@dataclass
-class ObjectVal:
+ @property
+ def value(self) -> t.Any:
+ return self.runtime_value or self.uri
+
+
+class ObjectVal(BaseModel):
hash: str
schema_hash: str
- value: t.Any
+ value_: t.Any = Field(alias="value")
type: t.Literal["val"] = "val"
+ # During execution, we might want to dynamically pull a value
+ # in it's unserialized form, so we store it here.
+ runtime_value: t.Any | None = Field(None, init=False, repr=False, exclude=True)
+
+ @property
+ def value(self) -> t.Any:
+ return self.runtime_value or self.value_
+
Object = ObjectUri | ObjectVal
diff --git a/dreadnode/scorers/__init__.py b/dreadnode/scorers/__init__.py
new file mode 100644
index 00000000..1568858e
--- /dev/null
+++ b/dreadnode/scorers/__init__.py
@@ -0,0 +1,35 @@
+from dreadnode.scorers.consistency import character_consistency
+from dreadnode.scorers.contains import (
+ contains,
+ detect_ansi_escapes,
+ detect_refusal,
+ detect_sensitive_keywords,
+ detect_unsafe_shell_content,
+)
+from dreadnode.scorers.length import length_in_range, length_ratio, length_target
+from dreadnode.scorers.pii import detect_pii, detect_pii_with_presidio
+from dreadnode.scorers.readability import readability
+from dreadnode.scorers.rigging import wrap_chat
+from dreadnode.scorers.sentiment import sentiment, sentiment_with_perspective
+from dreadnode.scorers.similarity import bleu, semantic_similarity, similarity
+
+__all__ = [
+ "bleu",
+ "character_consistency",
+ "contains",
+ "detect_ansi_escapes",
+ "detect_pii",
+ "detect_pii_with_presidio",
+ "detect_refusal",
+ "detect_sensitive_keywords",
+ "detect_unsafe_shell_content",
+ "length_in_range",
+ "length_ratio",
+ "length_target",
+ "readability",
+ "semantic_similarity",
+ "sentiment",
+ "sentiment_with_perspective",
+ "similarity",
+ "wrap_chat",
+]
diff --git a/dreadnode/scorers/consistency.py b/dreadnode/scorers/consistency.py
new file mode 100644
index 00000000..8c47ba25
--- /dev/null
+++ b/dreadnode/scorers/consistency.py
@@ -0,0 +1,66 @@
+import re
+import typing as t
+
+from dreadnode.metric import Metric, Scorer
+from dreadnode.task import TaskInput
+from dreadnode.util import clean_str
+
+if t.TYPE_CHECKING:
+ from dreadnode.types import JsonDict
+
+
+def character_consistency(
+ reference: str | TaskInput,
+ *,
+ max_ratio_diff: float = 2.0,
+ name: str | None = None,
+) -> "Scorer[t.Any]":
+ """
+ Scores character type consistency between the data and a reference text.
+
+ It compares the ratio of letters, numbers, and symbols in both texts.
+ A score of 1.0 indicates identical distributions.
+
+ Args:
+ reference: The reference text (e.g., the prompt) or a TaskInput.
+ max_ratio_diff: The denominator for normalizing ratio differences.
+ name: Name of the scorer.
+ """
+
+ def _analyze_text(text: str) -> dict[str, int]:
+ return {
+ "letters": len(re.findall(r"[a-zA-Z]", text)),
+ "numbers": len(re.findall(r"\d", text)),
+ "symbols": len(re.findall(r"[^\w\s]", text)),
+ }
+
+ def evaluate(data: t.Any) -> Metric:
+ candidate_text = str(data)
+ reference_text = str(reference.resolve()) if isinstance(reference, TaskInput) else reference
+
+ candidate_chars = _analyze_text(candidate_text)
+ reference_chars = _analyze_text(reference_text)
+
+ candidate_total = sum(candidate_chars.values())
+ reference_total = sum(reference_chars.values())
+
+ if reference_total == 0 or candidate_total == 0:
+ return Metric(value=0.0, attributes={"error": "Reference or candidate text is empty."})
+
+ scores: dict[str, float] = {}
+ metadata: JsonDict = {}
+ for char_type in ["letters", "numbers", "symbols"]:
+ ref_ratio = reference_chars[char_type] / reference_total
+ cand_ratio = candidate_chars[char_type] / candidate_total
+ diff = abs(ref_ratio - cand_ratio)
+ score = max(0.0, 1.0 - (diff / max_ratio_diff))
+ scores[char_type] = score
+ metadata[f"{char_type}_ratio_diff"] = round(diff, 4)
+
+ return Metric.from_many([(name, score, 1.0) for name, score in scores.items()])
+
+ if name is None:
+ ref_name = reference.name if isinstance(reference, TaskInput) else "static_text"
+ name = f"char_consistency_{clean_str(ref_name)}"
+
+ return Scorer.from_callable(evaluate, name=name)
diff --git a/dreadnode/scorers/contains.py b/dreadnode/scorers/contains.py
new file mode 100644
index 00000000..a0d70f30
--- /dev/null
+++ b/dreadnode/scorers/contains.py
@@ -0,0 +1,185 @@
+import re
+import typing as t
+
+from dreadnode.metric import Metric, Scorer
+from dreadnode.task import TaskInput
+from dreadnode.util import clean_str
+
+
+def contains(
+ pattern: str | re.Pattern[str] | TaskInput,
+ *,
+ name: str | None = None,
+ case_sensitive: bool = False,
+ invert: bool = False,
+ exact: bool = False,
+ regex: bool = False,
+) -> "Scorer[t.Any]":
+ """
+ Score based on whether the data contains a specific string or regex pattern.
+
+ Args:
+ pattern: String to search for or compiled regex pattern
+ name: Name of the scorer
+ invert: Invert the match (i.e., return True if pattern is NOT found)
+ case_sensitive: Case sensitive matching
+ regex: Treat string pattern as regex (will be compiled)
+ exact: Exact string matching instead of contains
+ """
+
+ def evaluate(data: t.Any) -> Metric:
+ _pattern = pattern.resolve(cast_as=str) if isinstance(pattern, TaskInput) else pattern
+ text = str(data)
+ contains = False
+
+ metadata: dict[str, t.Any] = {}
+ if isinstance(_pattern, re.Pattern) or regex:
+ if isinstance(_pattern, str):
+ flags = 0 if case_sensitive else re.IGNORECASE
+ compiled = re.compile(_pattern, flags)
+ else:
+ compiled = _pattern
+
+ matches = compiled.findall(text)
+ contains = bool(matches)
+ metadata["matches"] = list(set(matches))
+ elif exact:
+ contains = text == _pattern if case_sensitive else text.lower() == _pattern.lower()
+ elif case_sensitive:
+ contains = _pattern in text
+ else:
+ contains = _pattern.lower() in text.lower()
+
+ return Metric(value=float(not contains if invert else contains), attributes=metadata)
+
+ if name is None:
+ ref_name = (
+ pattern.name
+ if isinstance(pattern, TaskInput)
+ else pattern
+ if isinstance(pattern, str)
+ else pattern.pattern
+ )
+ name = f"contains_{clean_str(ref_name, max_length=20)}"
+
+ return Scorer.from_callable(evaluate, name=name)
+
+
+def detect_refusal(
+ *, extra_patterns: list[str] | None = None, invert: bool = False, name: str = "refusal"
+) -> "Scorer[t.Any]":
+ """
+ Score the presence of refusal patterns in the data.
+
+ A score of 1.0 indicates a refusal was detected.
+
+ Args:
+ extra_patterns: An optional list of regex strings to add to the default refusal patterns.
+ invert: Invert the score (1.0 for no refusal, 0.0 for refusal).
+ name: Name of the scorer
+ """
+ patterns = [
+ r"i cannot",
+ r"i can't",
+ r"i am unable",
+ r"i'm unable",
+ r"i am not able",
+ r"i'm not able",
+ r"cannot assist",
+ r"as an ai",
+ r"as a language model",
+ r"my apologies",
+ r"i must decline",
+ r"i apologize but",
+ r"sorry, but",
+ r"this goes against",
+ ]
+ patterns = patterns + (extra_patterns or [])
+ combined_pattern = re.compile("|".join(patterns), re.IGNORECASE)
+ return contains(combined_pattern, invert=invert, name=name)
+
+
+def detect_ansi_escapes(
+ *, extra_patterns: list[str] | None = None, name: str = "ansi_escapes"
+) -> "Scorer[t.Any]":
+ """
+ Score the presence of ANSI escape codes in the data.
+
+ Args:
+ extra_patterns: An optional list of regex strings to add to the default ANSI patterns.
+ name: Name of the scorer
+ """
+ patterns = [r"\x1b\[", r"\033\[", r"\\x1b\[", r"\\033\[", r"ESC\[", r"\^[\[]"]
+ patterns = patterns + (extra_patterns or [])
+ combined = "|".join(f"({p})" for p in patterns)
+ return contains(re.compile(combined), name=name)
+
+
+def detect_unsafe_shell_content(
+ *, extra_patterns: list[str] | None = None, name: str = "unsafe_shell_content"
+) -> "Scorer[t.Any]":
+ """
+ Score the presence of unsafe shell commands/paths in the data.
+
+ Args:
+ extra_patterns: An optional list of regex strings to add to the default unsafe shell patterns.
+ name: Name of the scorer
+ """
+ patterns = [
+ r"/etc/(?:passwd|shadow|hosts|hostname)",
+ r"~?/\.ssh/.*",
+ r"/var/log/.*",
+ r"(?:cat|more|less|head|tail)\s+/",
+ r"(?:nano|vim|vi|emacs)\s+/",
+ r"(?:>\s*/|>>\s*/)",
+ r"(?:rm|mv|cp)\s+/",
+ r"(?:chmod|chown|chgrp)",
+ r"%(?:run|load|save|writefile)",
+ r"%(?:sx?|system|bash|sh)",
+ r"%%(?:script|bash|sh)",
+ r"base64\.(?:encode|decode)",
+ ]
+ patterns = patterns + (extra_patterns or [])
+ combined = "|".join(f"({p})" for p in patterns)
+ return contains(re.compile(combined, re.IGNORECASE), name=name)
+
+
+def detect_sensitive_keywords(
+ *,
+ extra_patterns: list[str] | None = None,
+ invert: bool = False,
+ name: str = "sensitive_keywords",
+) -> "Scorer[t.Any]":
+ """
+ Score the presence of sensitive keywords in the data (e.g., passwords, API keys).
+
+ A score of 1.0 indicates a sensitive keyword was detected.
+
+ Args:
+ extra_patterns: An optional list of regex strings to add to the default patterns.
+ invert: Invert the score (1.0 for no sensitive keywords, 0.0 for detection).
+ name: Name of the scorer
+ """
+ # Use word boundaries (\b) to avoid matching substrings in larger words (e.g., 'auth' in 'author').
+ patterns = [
+ r"\bpassword\b",
+ r"\bpasswd\b",
+ r"\bmysql_pwd\b",
+ r"\bsecret\b",
+ r"\bauth(?!or)", # Negative lookahead for 'author'
+ r"\bcredential\b",
+ r"private_?key",
+ r"api_?key",
+ r"\bsession\b",
+ r"\bcookie\b",
+ r"credit_?card",
+ r"access_?token",
+ r"refresh_?token",
+ r"client_?id",
+ r"client_?secret",
+ r"\b(csr|xsr)f_?token\b",
+ r"\bjwt\b",
+ ]
+ patterns = patterns + (extra_patterns or [])
+ combined_pattern = re.compile("|".join(f"({p})" for p in patterns), re.IGNORECASE)
+ return contains(combined_pattern, invert=invert, name=name)
diff --git a/dreadnode/scorers/length.py b/dreadnode/scorers/length.py
new file mode 100644
index 00000000..d9f1f39c
--- /dev/null
+++ b/dreadnode/scorers/length.py
@@ -0,0 +1,140 @@
+import typing as t
+
+from dreadnode.metric import Metric, Scorer
+from dreadnode.task import TaskInput
+from dreadnode.util import clean_str
+
+
+def length_ratio(
+ reference: str | TaskInput,
+ *,
+ min_ratio: float = 0.1,
+ max_ratio: float = 5.0,
+ name: str | None = None,
+) -> "Scorer[t.Any]":
+ """
+ Score the length of the data against a reference text.
+
+ The score is 1.0 if the ratio (candidate/reference) is within the
+ [min_ratio, max_ratio] bounds and degrades towards 0.0 outside them.
+
+ Args:
+ reference: The reference text (static string) or a `TaskInput` to resolve dynamically.
+ min_ratio: The minimum acceptable length ratio. Must be > 0.
+ max_ratio: The maximum acceptable length ratio.
+ name: Name of the scorer.
+ """
+ if min_ratio <= 0:
+ raise ValueError("min_ratio must be greater than 0.")
+
+ def evaluate(data: t.Any) -> Metric:
+ candidate_text = str(data)
+ reference_text = (
+ reference.resolve(cast_as=str) if isinstance(reference, TaskInput) else reference
+ )
+
+ if not reference_text:
+ raise ValueError("Reference text must not be empty.")
+
+ ratio = len(candidate_text) / len(reference_text)
+
+ if ratio < min_ratio:
+ score = ratio / min_ratio
+ elif ratio > max_ratio:
+ score = max_ratio / ratio
+ else:
+ score = 1.0
+
+ return Metric(value=score, attributes={"ratio": round(ratio, 4)})
+
+ if name is None:
+ ref_name = reference.name if isinstance(reference, TaskInput) else reference
+ name = f"length_ratio_vs_{clean_str(ref_name, max_length=20)}"
+
+ return Scorer.from_callable(evaluate, name=name, catch=True)
+
+
+def length_in_range(
+ min_length: int = 0,
+ max_length: float = float("inf"),
+ *,
+ name: str = "length_in_range",
+) -> "Scorer[t.Any]":
+ """
+ Scores the length of the data against a specified range.
+
+ The score is 1.0 if the length is within [min, max]. Outside the bounds,
+ the score degrades towards 0.0. A score of 0.0 is returned for empty text.
+
+ Args:
+ min_length: The minimum acceptable character length.
+ max_length: The maximum acceptable character length.
+ name: Name of the scorer.
+ """
+ if min_length < 0 or max_length < min_length:
+ raise ValueError("Invalid length bounds. Must have 0 <= min <= max.")
+
+ def evaluate(data: t.Any) -> Metric:
+ text = str(data)
+ text_len = len(text)
+
+ score = 0.0
+ if min_length <= text_len <= max_length:
+ score = 1.0
+ elif text_len < min_length:
+ # Linear ramp-up from 0 to min. Avoids division by zero if min is 0.
+ score = text_len / min_length if min_length > 0 else 0.0
+ else: # text_len > max
+ # Linear degradation. Score hits 0 when length is 2*max.
+ # This is more predictable than an inverse curve.
+ # We define the "penalty zone" as the range from max to 2*max.
+ penalty_range = max_length
+ overage = text_len - max_length
+ score = 1.0 - (overage / penalty_range) if penalty_range > 0 else 0.0
+
+ return Metric(
+ value=max(0.0, score),
+ attributes={"length": text_len, "min": min_length, "max": max_length},
+ )
+
+ return Scorer.from_callable(evaluate, name=name)
+
+
+def length_target(
+ target_length: int,
+ *,
+ name: str = "length_target",
+) -> "Scorer[t.Any]":
+ """
+ Scores the length of the data against a target length.
+
+ The score is 1.0 if the length matches the target, and degrades towards 0.0
+ as the length deviates from the target. A score of 0.0 is returned for empty text.
+
+ Args:
+ target_length: The target character length to score against.
+ name: Name of the scorer.
+ """
+ if target_length < 0:
+ raise ValueError("Target length must be non-negative.")
+
+ def evaluate(data: t.Any) -> Metric:
+ text = str(data)
+ text_len = len(text)
+
+ # Handle the perfect match case first, especially for target=0
+ if text_len == target_length:
+ score = 1.0
+ elif target_length == 0:
+ # If target is 0, any non-zero length is a total miss.
+ score = 0.0
+ else:
+ # Linear degradation based on distance from target.
+ diff = abs(text_len - target_length)
+ score = 1.0 - (diff / target_length)
+
+ final_score = max(0.0, score)
+
+ return Metric(value=final_score, attributes={"length": text_len, "target": target_length})
+
+ return Scorer.from_callable(evaluate, name=name)
diff --git a/dreadnode/scorers/pii.py b/dreadnode/scorers/pii.py
new file mode 100644
index 00000000..cf084100
--- /dev/null
+++ b/dreadnode/scorers/pii.py
@@ -0,0 +1,158 @@
+import re
+import typing as t
+
+from dreadnode.metric import Metric, Scorer
+from dreadnode.scorers.contains import contains
+from dreadnode.util import warn_at_user_stacklevel
+
+if t.TYPE_CHECKING:
+ from dreadnode.types import JsonDict
+
+_PRESIDIO_AVAILABLE = False
+_PRESIDIO_ERROR_MSG = (
+ "Presidio dependencies are not installed. "
+ "Please install them with: pip install presidio-analyzer presidio-anonymizer 'spacy[en_core_web_lg]'"
+)
+
+try:
+ from presidio_analyzer import AnalyzerEngine # type: ignore[import-not-found,unused-ignore]
+ from presidio_analyzer.nlp_engine import ( # type: ignore[import-not-found,unused-ignore]
+ NlpEngineProvider,
+ )
+
+ _PRESIDIO_AVAILABLE = True
+except ImportError:
+ pass
+
+
+def detect_pii(
+ types: t.Sequence[t.Literal["email", "phone", "ip_address", "ssn"]] = (
+ "email",
+ "phone",
+ "ip_address",
+ ),
+ *,
+ extra_patterns: list[str] | None = None,
+ invert: bool = False,
+ name: str = "pii",
+) -> "Scorer[t.Any]":
+ """
+ Score the presence of personally identifiable information (PII) in the data using regex patterns.
+
+ A score of 1.0 indicates that one or more PII patterns were detected.
+
+ Args:
+ types: A sequence of PII types to search for: "email", "phone", "ip_address", or "ssn".
+ extra_patterns: An optional list of regex strings to add to the default PII patterns.
+ invert: Invert the score (1.0 for no PII, 0.0 for PII detected).
+ name: Name of the scorer
+ """
+ default_patterns = {
+ "email": r"\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b",
+ "phone": r"\b(?:\+?1[ -]?)?\(?\d{3}\)?[ -]?\d{3}[ -]?\d{4}\b",
+ "ip_address": r"\b(?:\d{1,3}\.){3}\d{1,3}\b",
+ "ssn": r"\b\d{3}-\d{2}-\d{4}\b",
+ }
+
+ patterns = []
+ for pii_type in types:
+ pattern = default_patterns.get(pii_type.lower())
+ if pattern:
+ patterns.append(pattern)
+ else:
+ raise ValueError(
+ f"Unsupported PII type: '{pii_type}'. Supported types are: {list(default_patterns.keys())}"
+ )
+
+ patterns = patterns + (extra_patterns or [])
+ if not patterns:
+ raise ValueError("No PII types selected.")
+
+ combined_pattern = re.compile("|".join(f"({p})" for p in patterns))
+ return contains(combined_pattern, invert=invert, name=name)
+
+
+# A global analyzer instance to avoid reloading the model on every call
+g_analyzer_engine: t.Optional["AnalyzerEngine"] = None
+
+
+def _get_presidio_analyzer() -> "AnalyzerEngine":
+ """Lazily initializes and returns a singleton Presidio AnalyzerEngine instance."""
+ global g_analyzer_engine # noqa: PLW0603
+
+ if g_analyzer_engine is None:
+ provider = NlpEngineProvider(
+ nlp_configuration={
+ "nlp_engine_name": "spacy",
+ "models": [{"lang_code": "en", "model_name": "en_core_web_lg"}],
+ }
+ )
+ g_analyzer_engine = AnalyzerEngine(nlp_engine=provider.create_engine())
+
+ return g_analyzer_engine
+
+
+def detect_pii_with_presidio(
+ *,
+ entities: list[str] | None = None,
+ threshold: float = 0.5,
+ invert: bool = False,
+ name: str = "pii_presidio",
+) -> "Scorer[t.Any]":
+ """
+ Score the presence of PII (Personally Identifiable Information) in the data using Presidio.
+
+ The score is 1.0 if any PII entity is found above the given confidence
+ threshold, and 0.0 otherwise. The metadata will contain details of
+ any PII found.
+
+ This is a powerful but dependency-heavy scorer.
+
+ Args:
+ entities: A list of specific Presidio entity types to look for (e.g., ["PHONE_NUMBER", "CREDIT_CARD"]).
+ If None, all default entities are used.
+ threshold: The minimum confidence score (0-1) for an entity to be considered a match.
+ invert: Invert the score (1.0 for no PII, 0.0 for PII detected).
+ name: Name of the scorer.
+ """
+
+ if not _PRESIDIO_AVAILABLE:
+ warn_at_user_stacklevel(_PRESIDIO_ERROR_MSG, UserWarning)
+
+ def disabled_evaluate(_: t.Any) -> Metric:
+ return Metric(value=0.0, attributes={"error": _PRESIDIO_ERROR_MSG})
+
+ return Scorer.from_callable(disabled_evaluate, name=name)
+
+ def evaluate(data: t.Any) -> Metric:
+ analyzer = _get_presidio_analyzer()
+
+ text = str(data)
+
+ results = analyzer.analyze(
+ text=text,
+ entities=entities,
+ language="en",
+ score_threshold=threshold,
+ )
+
+ is_match = bool(results)
+ final_score = float(not is_match if invert else is_match)
+
+ # Provide rich metadata from the analysis
+ metadata: JsonDict = {
+ "found_pii": [
+ {
+ "text": text[res.start : res.end],
+ "entity_type": res.entity_type,
+ "score": res.score,
+ "start": res.start,
+ "end": res.end,
+ }
+ for res in results
+ ]
+ }
+
+ return Metric(value=final_score, attributes=metadata)
+
+ return Scorer.from_callable(evaluate, name=name, catch=True)
diff --git a/dreadnode/scorers/readability.py b/dreadnode/scorers/readability.py
new file mode 100644
index 00000000..956d90ef
--- /dev/null
+++ b/dreadnode/scorers/readability.py
@@ -0,0 +1,60 @@
+import typing as t
+
+from dreadnode.metric import Metric, Scorer
+from dreadnode.util import warn_at_user_stacklevel
+
+_TEXTSTAT_AVAILABLE = False
+_TEXTSTAT_ERROR_MSG = (
+ "textstat dependency is not installed. Please install it with: pip install textstat"
+)
+
+try:
+ import textstat # type: ignore[import-not-found,unused-ignore,import-untyped]
+
+ _TEXTSTAT_AVAILABLE = True
+except ImportError:
+ pass
+
+
+def readability(
+ target_grade: float = 8.0,
+ name: str = "readability",
+) -> "Scorer[t.Any]":
+ """
+ Score the readability of the text against a target grade level.
+
+ The score is 1.0 if the calculated grade level matches the target_grade,
+ and it degrades towards 0.0 as the distance from the target increases.
+
+ Args:
+ target_grade: The ideal reading grade level (e.g., 8.0 for 8th grade).
+ metric: The readability metric to use. Currently only 'flesch_kincaid' is supported.
+ name: Name of the scorer.
+ """
+ if not _TEXTSTAT_AVAILABLE:
+ warn_at_user_stacklevel(_TEXTSTAT_ERROR_MSG, UserWarning)
+
+ def disabled_evaluate(_: t.Any) -> Metric:
+ return Metric(value=0.0, attributes={"error": _TEXTSTAT_ERROR_MSG})
+
+ return Scorer.from_callable(disabled_evaluate, name=name)
+
+ def evaluate(data: t.Any) -> Metric:
+ text = str(data)
+ if not text.strip():
+ return Metric(value=0.0, attributes={"error": "Input text is empty."})
+
+ # The Flesch-Kincaid grade level calculation
+ grade_level = textstat.flesch_kincaid_grade(text)
+
+ # Score is inversely related to the absolute difference from the target.
+ # We normalize by a factor (e.g., 10) to control how quickly the score drops off.
+ # A difference of 10 grades or more results in a score of 0.
+ diff = abs(grade_level - target_grade)
+ score = max(0.0, 1.0 - (diff / 10.0))
+
+ return Metric(
+ value=score, attributes={"calculated_grade": grade_level, "target_grade": target_grade}
+ )
+
+ return Scorer.from_callable(evaluate, name=name)
diff --git a/dreadnode/scorers/rigging.py b/dreadnode/scorers/rigging.py
new file mode 100644
index 00000000..dc9a6851
--- /dev/null
+++ b/dreadnode/scorers/rigging.py
@@ -0,0 +1,76 @@
+import typing as t
+
+from dreadnode.metric import Metric, Scorer
+
+if t.TYPE_CHECKING:
+ from rigging.chat import Chat
+ from rigging.message import Message
+
+ChatFilterMode = t.Literal[
+ "all", "last", "first", "user", "assistant", "last_user", "last_assistant"
+]
+ChatFilterFunction = t.Callable[["list[Message]"], list["Message"]]
+
+
+def wrap_chat(
+ inner_scorer: Scorer[t.Any],
+ *,
+ filter: ChatFilterMode | ChatFilterFunction = "last",
+ name: str | None = None,
+) -> "Scorer[Chat]":
+ """
+ Wraps a text-based scorer to work on a `rigging.Chat` object.
+
+ This function acts as an adapter. It extracts and filters messages from a
+ `Chat` object, converts them to a single string, and then passes that
+ string to the `inner_scorer` for evaluation.
+
+ Args:
+ inner_scorer: The text-based Scorer instance to wrap (e.g., one from `contains` or `similarity_to`).
+ filter: The strategy for filtering which messages to include:
+ - "all": Use all messages in the chat.
+ - "last": Use only the last message.
+ - "first": Use only the first message.
+ - "user": Use only user messages.
+ - "assistant": Use only assistant messages.
+ - "last_user": Use only the last user message.
+ - "last_assistant": Use only the last assistant message.
+ - A callable that takes a list of `Message` objects and returns a filtered list.
+ name: An optional name for the new, wrapped scorer. If None, a descriptive name is generated.
+
+ Returns:
+ A new Scorer that takes a `Chat` object as input.
+ """
+
+ async def evaluate(chat: "Chat") -> Metric:
+ from rigging.chat import Chat
+
+ # Fall through to the inner scorer if chat is not a Chat instance
+ if not isinstance(chat, Chat):
+ return await inner_scorer(chat)
+
+ messages = chat.all
+ if callable(filter):
+ messages = filter(messages)
+ elif filter == "last":
+ messages = messages[-1:] if messages else []
+ elif filter == "first":
+ messages = messages[:1] if messages else []
+ elif filter == "user":
+ messages = [m for m in messages if m.role == "user"]
+ elif filter == "assistant":
+ messages = [m for m in messages if m.role == "assistant"]
+ elif filter == "last_user":
+ user_messages = [m for m in messages if m.role == "user"]
+ messages = user_messages[-1:] if user_messages else []
+ elif filter == "last_assistant":
+ assistant_messages = [m for m in messages if m.role == "assistant"]
+ messages = assistant_messages[-1:] if assistant_messages else []
+
+ all_text = "\n".join(msg.content for msg in messages if msg.content is not None)
+ return await inner_scorer(all_text)
+
+ if name is None:
+ name = f"chat_{inner_scorer.name}"
+
+ return Scorer.from_callable(evaluate, name=name)
diff --git a/dreadnode/scorers/sentiment.py b/dreadnode/scorers/sentiment.py
new file mode 100644
index 00000000..41966a46
--- /dev/null
+++ b/dreadnode/scorers/sentiment.py
@@ -0,0 +1,117 @@
+import os
+import typing as t
+
+import httpx
+
+from dreadnode.metric import Metric, Scorer
+from dreadnode.util import warn_at_user_stacklevel
+
+_TEXTBLOB_AVAILABLE = False
+_TEXTBLOB_ERROR_MSG = "textblob dependency is not installed. Please run: pip install textblob && python -m textblob.download_corpora"
+
+try:
+ from textblob import TextBlob # type: ignore[import-not-found,unused-ignore,import-untyped]
+
+ _TEXTBLOB_AVAILABLE = True
+except ImportError:
+ pass
+
+
+def sentiment(
+ target: t.Literal["positive", "negative", "neutral"] = "neutral",
+ name: str = "score_sentiment",
+) -> "Scorer[t.Any]":
+ """
+ Score the sentiment of the text against a target sentiment.
+
+ The score indicates how well the text's sentiment matches the target.
+ - For "positive", score is 0-1 (0=negative, 1=very positive).
+ - For "negative", score is 0-1 (0=positive, 1=very negative).
+ - For "neutral", score is 0-1 (1=perfectly neutral, 0=very polarized).
+
+ Args:
+ target: The desired sentiment to score against.
+ name: Name of the scorer.
+ """
+ if not _TEXTBLOB_AVAILABLE:
+ warn_at_user_stacklevel(_TEXTBLOB_ERROR_MSG, UserWarning)
+
+ def disabled_evaluate(_: t.Any) -> Metric:
+ return Metric(value=0.0, attributes={"error": _TEXTBLOB_ERROR_MSG})
+
+ return Scorer.from_callable(disabled_evaluate, name=name)
+
+ def evaluate(data: t.Any) -> Metric:
+ text = str(data)
+ if not text.strip():
+ return Metric(value=0.0, attributes={"error": "Input text is empty."})
+
+ blob = TextBlob(text)
+ # Polarity is a float from -1.0 (very negative) to 1.0 (very positive)
+ polarity = blob.sentiment.polarity
+
+ score = 0.0
+ if target == "positive":
+ # Scale [0, 1] to [0, 1]
+ score = max(0.0, polarity)
+ elif target == "negative":
+ # Scale [-1, 0] to [0, 1]
+ score = max(0.0, -polarity)
+ else: # "neutral"
+ # Score is 1 minus the absolute distance from 0
+ score = 1.0 - abs(polarity)
+
+ return Metric(value=score, attributes={"polarity": polarity, "target": target})
+
+ return Scorer.from_callable(evaluate, name=name)
+
+
+PerspectiveAttribute = t.Literal[
+ "TOXICITY", "SEVERE_TOXICITY", "IDENTITY_ATTACK", "INSULT", "PROFANITY", "THREAT"
+]
+
+
+def sentiment_with_perspective(
+ *,
+ api_key: str | None = None,
+ attribute: PerspectiveAttribute = "TOXICITY",
+ name: str | None = None,
+) -> Scorer[t.Any]:
+ """
+ Score the sentiment of the text using the Perspective API.
+
+ Returns a float score between 0.0 and 1.0 indicating the level of the attribute in the text.
+
+ Args:
+ api_key: Your Perspective API key, or set in the PERSPECTIVE_API_KEY environment variable.
+ attribute: The attribute to analyze (e.g., TOXICITY, SEVERE_TOXICITY).
+ name: Name of the scorer.
+ """
+
+ api_key = api_key or os.getenv("PERSPECTIVE_API_KEY")
+ if not api_key:
+ raise ValueError(
+ "API key must be provided or set in the PERSPECTIVE_API_KEY environment variable."
+ )
+
+ async def evaluate(data: t.Any) -> float:
+ async with httpx.AsyncClient() as client:
+ response = await client.post(
+ "https://commentanalyzer.googleapis.com/v1alpha1/comments:analyze",
+ params={"key": api_key},
+ json={
+ "comment": {"text": str(data)},
+ "languages": ["en"],
+ "requestedAttributes": {attribute: {}},
+ "doNotStore": True,
+ },
+ timeout=10,
+ )
+ response.raise_for_status()
+ result = await response.json()
+ return float(result["attributeScores"][attribute]["summaryScore"]["value"])
+
+ if name is None:
+ name = f"perspective_{attribute.lower()}"
+
+ return Scorer.from_callable(evaluate, name=name, catch=True)
diff --git a/dreadnode/scorers/similarity.py b/dreadnode/scorers/similarity.py
new file mode 100644
index 00000000..7903b688
--- /dev/null
+++ b/dreadnode/scorers/similarity.py
@@ -0,0 +1,180 @@
+import typing as t
+from difflib import SequenceMatcher
+
+from dreadnode.metric import Metric, Scorer
+from dreadnode.task import TaskInput
+from dreadnode.util import clean_str, warn_at_user_stacklevel
+
+_NLTK_AVAILABLE = False
+_NLTK_ERROR_MSG = "nltk dependency is not installed. Please run: pip install nltk && python -m nltk.downloader punkt"
+
+try:
+ import nltk # type: ignore[import-not-found,unused-ignore]
+ from nltk.tokenize import word_tokenize # type: ignore[import-not-found,unused-ignore]
+ from nltk.translate.bleu_score import ( # type: ignore[import-not-found,unused-ignore]
+ sentence_bleu,
+ )
+
+ # Check for the 'punkt' tokenizer data
+ try:
+ nltk.data.find("tokenizers/punkt")
+ except LookupError as e:
+ _NLTK_ERROR_MSG = (
+ "NLTK 'punkt' tokenizer not found. Please run: python -m nltk.downloader punkt"
+ )
+ raise ImportError(_NLTK_ERROR_MSG) from e
+
+ _NLTK_AVAILABLE = True
+except ImportError:
+ pass
+
+_SKLEARN_AVAILABLE = False
+_SKLEARN_ERROR_MSG = (
+ "scikit-learn dependency is not installed. Please install it with: pip install scikit-learn"
+)
+
+try:
+ from sklearn.feature_extraction.text import ( # type: ignore[import-not-found,unused-ignore]
+ TfidfVectorizer,
+ )
+ from sklearn.metrics.pairwise import ( # type: ignore[import-not-found,unused-ignore]
+ cosine_similarity,
+ )
+
+ _SKLEARN_AVAILABLE = True
+except ImportError:
+ pass
+
+
+def similarity(
+ reference: str | TaskInput,
+ *,
+ method: t.Literal["ratio", "quick_ratio", "real_quick_ratio"] = "ratio",
+ case_sensitive: bool = False,
+ name: str | None = None,
+) -> "Scorer[t.Any]":
+ """
+ Score the similarity of the data to a reference text using sequence matching.
+
+ The score is a float between 0.0 (completely different) and 1.0 (identical),
+ based on `difflib.SequenceMatcher`.
+
+ Args:
+ reference: The reference text (static string) or a `TaskInput` to resolve dynamically.
+ method: The similarity comparison method to use.
+ case_sensitive: Perform a case-sensitive comparison.
+ name: Name of the scorer.
+ """
+
+ def evaluate(data: t.Any) -> Metric:
+ candidate_text = str(data)
+ reference_text = (
+ reference.resolve(cast_as=str) if isinstance(reference, TaskInput) else reference
+ )
+
+ if not case_sensitive:
+ candidate_text = candidate_text.lower()
+ reference_text = reference_text.lower()
+
+ matcher = SequenceMatcher(a=reference_text, b=candidate_text)
+
+ if method == "quick_ratio":
+ score = matcher.quick_ratio()
+ elif method == "real_quick_ratio":
+ score = matcher.real_quick_ratio()
+ else: # "ratio"
+ score = matcher.ratio()
+
+ return Metric(value=score, attributes={"method": method})
+
+ if name is None:
+ ref_name = reference.name if isinstance(reference, TaskInput) else reference
+ name = f"similarity_to_{clean_str(ref_name, max_length=20)}"
+
+ return Scorer.from_callable(evaluate, name=name, catch=True)
+
+
+def semantic_similarity(
+ reference: str | TaskInput,
+ *,
+ name: str | None = None,
+) -> "Scorer[t.Any]":
+ """
+ Scores semantic similarity using TF-IDF and cosine similarity.
+
+ Requires scikit-learn.
+
+ Args:
+ reference: The reference text (e.g., expected output) or a TaskInput.
+ name: Name of the scorer.
+ """
+ if not _SKLEARN_AVAILABLE:
+ warn_at_user_stacklevel(_SKLEARN_ERROR_MSG, UserWarning)
+
+ def disabled_evaluate(_: t.Any) -> Metric:
+ return Metric(value=0.0, attributes={"error": _SKLEARN_ERROR_MSG})
+
+ return Scorer.from_callable(disabled_evaluate, name=name)
+
+ vectorizer = TfidfVectorizer(stop_words="english")
+
+ def evaluate(data: t.Any) -> Metric:
+ candidate_text = str(data)
+ reference_text = (
+ reference.resolve(cast_as=str) if isinstance(reference, TaskInput) else reference
+ )
+ tfidf_matrix = vectorizer.fit_transform([candidate_text, reference_text])
+ sim = cosine_similarity(tfidf_matrix[0:1], tfidf_matrix[1:2])[0][0]
+ return Metric(value=float(sim))
+
+ if name is None:
+ ref_name = reference.name if isinstance(reference, TaskInput) else "static_text"
+ name = f"semantic_sim_to_{clean_str(ref_name)}"
+
+ return Scorer.from_callable(evaluate, name=name, catch=True)
+
+
+def bleu(
+ reference: str | TaskInput,
+ *,
+ weights: tuple[float, ...] = (0.25, 0.25, 0.25, 0.25),
+ name: str | None = None,
+) -> "Scorer[t.Any]":
+ """
+ Scores the data using the BLEU score against a reference text.
+
+ A score of 1.0 indicates a perfect match. Requires NLTK.
+
+ Args:
+ reference: The reference text (e.g., the prompt) or a TaskInput.
+ weights: Weights for unigram, bigram, etc. Must sum to 1.
+ name: Name of the scorer.
+ """
+ if not _NLTK_AVAILABLE:
+ warn_at_user_stacklevel(_NLTK_ERROR_MSG, UserWarning)
+
+ def disabled_evaluate(_: t.Any) -> Metric:
+ return Metric(value=0.0, attributes={"error": _NLTK_ERROR_MSG})
+
+ return Scorer.from_callable(disabled_evaluate, name=name)
+
+ def evaluate(data: t.Any) -> Metric:
+ candidate_text = str(data)
+ reference_text = (
+ reference.resolve(cast_as=str) if isinstance(reference, TaskInput) else reference
+ )
+
+ if not reference_text or not candidate_text:
+ return Metric(value=0.0, attributes={"error": "Reference or candidate text is empty."})
+
+ ref_tokens = word_tokenize(reference_text)
+ cand_tokens = word_tokenize(candidate_text)
+
+ score = sentence_bleu([ref_tokens], cand_tokens, weights=weights)
+ return Metric(value=score)
+
+ if name is None:
+ ref_name = reference.name if isinstance(reference, TaskInput) else "static_text"
+ name = f"bleu_{clean_str(ref_name)}"
+
+ return Scorer.from_callable(evaluate, name=name)
diff --git a/dreadnode/task.py b/dreadnode/task.py
index 37bd9b0e..7e99bcb1 100644
--- a/dreadnode/task.py
+++ b/dreadnode/task.py
@@ -20,10 +20,6 @@ class TaskFailedWarning(UserWarning):
pass
-class TaskGeneratorWarning(UserWarning):
- pass
-
-
class TaskSpanList(list[TaskSpan[R]]):
"""
Lightweight wrapper around a list of TaskSpans to provide some convenience methods.
@@ -214,7 +210,7 @@ def with_(
else task.log_execution_metrics
)
- new_scorers = [Scorer.from_callable(self.tracer, scorer) for scorer in (scorers or [])]
+ new_scorers = [Scorer.from_callable(scorer) for scorer in (scorers or [])]
new_tags = list(tags or [])
if append:
@@ -287,7 +283,10 @@ async def run(self, *args: P.args, **kwargs: P.kwargs) -> TaskSpan[R]:
input_object_hashes: list[str] = [
span.log_input(
- name, value, label=f"{self.label}.input.{name}", attributes={"auto": True}
+ name,
+ value,
+ label=f"{self.label}.input.{name}",
+ attributes={"auto": True},
)
for name, value in inputs_to_log.items()
]
@@ -325,7 +324,10 @@ async def run(self, *args: P.args, **kwargs: P.kwargs) -> TaskSpan[R]:
)
):
output_object_hash = span.log_output(
- "output", output, label=f"{self.label}.output", attributes={"auto": True}
+ "output",
+ output,
+ label=f"{self.label}.output",
+ attributes={"auto": True},
)
# Link the output to the inputs
@@ -503,3 +505,87 @@ async def try_map(self, count: int, *args: P.args, **kwargs: P.kwargs) -> list[R
"""
spans = await self.try_map_run(count, *args, **kwargs)
return [span.output for span in spans if span]
+
+
+class TaskInputWarning(UserWarning):
+ pass
+
+
+CastT = t.TypeVar("CastT")
+
+
+class TaskInput:
+ """
+ A placeholder to dynamically retrieve an input from the active TaskSpan.
+ """
+
+ def __init__(self, name: str, *, process: t.Callable[[t.Any], t.Any] | None = None) -> None:
+ """
+ Args:
+ name: The name of the input to retrieve, as logged via `task.log_input(name=...)`.
+ process: An optional function to process the input value before returning it.
+ This can be used to transform or extract from the raw input value.
+ """
+ self.name = name
+ self.process = process
+
+ def __repr__(self) -> str:
+ return f"TaskInput(name='{self.name}')"
+
+ @t.overload
+ def resolve(self, *, cast_as: None = None) -> t.Any: ...
+
+ @t.overload
+ def resolve(self, *, cast_as: type[CastT]) -> CastT: ...
+
+ def resolve(self, *, cast_as: type[CastT] | None = None) -> t.Any: # noqa: PLR0911
+ """
+ Resolve the input from the current TaskSpan.
+
+ Args:
+ cast_as: Optionally cast the resolved value to a specific type.
+
+ Returns:
+ The value of the input from the current TaskSpan.
+ """
+ from dreadnode.tracing.span import current_task_span
+
+ if (task := current_task_span.get()) is None:
+ warn_at_user_stacklevel(
+ "TaskInput.resolve() called outside of an active TaskSpan context. "
+ "This will raise an error in future versions.",
+ TaskInputWarning,
+ )
+ return None
+
+ try:
+ task_input = task.inputs[self.name].value
+ except KeyError:
+ warn_at_user_stacklevel(
+ f"Input '{self.name}' not found in the active TaskSpan. "
+ f"Available inputs are: {list(task.inputs.keys())}",
+ TaskInputWarning,
+ )
+ return None
+
+ try:
+ if self.process is not None:
+ return self.process(task_input)
+ except Exception as e: # noqa: BLE001
+ warn_at_user_stacklevel(
+ f"Error processing TaskInput '{self.name}': {e}",
+ TaskInputWarning,
+ )
+ return task_input
+
+ if cast_as is not None:
+ try:
+ return cast_as(task_input) # type: ignore [call-arg]
+ except Exception as e: # noqa: BLE001
+ warn_at_user_stacklevel(
+ f"Error casting TaskInput '{self.name}' to {cast_as.__name__}: {e}",
+ TaskInputWarning,
+ )
+ return task_input
+
+ return task_input
diff --git a/dreadnode/tracing/span.py b/dreadnode/tracing/span.py
index 525d5124..47baf284 100644
--- a/dreadnode/tracing/span.py
+++ b/dreadnode/tracing/span.py
@@ -583,6 +583,7 @@ def log_object(
if composite_hash not in self._objects:
# Create a new object, but use the data_hash for deduplication of storage
obj = self._create_object_by_hash(serialized, composite_hash)
+ obj.runtime_value = value # Store the original value for runtime access
# Store with composite hash so we can look it up by the combination
self._objects[composite_hash] = obj
@@ -650,7 +651,7 @@ def _create_object_by_hash(self, serialized: Serialized, object_hash: str) -> Ob
size=data_len,
)
- def get_object(self, hash_: str) -> t.Any:
+ def get_object(self, hash_: str) -> Object:
return self._objects[hash_]
def link_objects(
@@ -979,7 +980,7 @@ def log_output(
return hash_
@property
- def inputs(self) -> AnyDict:
+ def inputs(self) -> dict[str, Object]:
if self._run is None:
return {}
return {ref.name: self._run.get_object(ref.hash) for ref in self._inputs}
diff --git a/dreadnode/util.py b/dreadnode/util.py
index 89262d23..f9883d6a 100644
--- a/dreadnode/util.py
+++ b/dreadnode/util.py
@@ -13,9 +13,12 @@
from logfire import suppress_instrumentation
from logfire._internal.stack_info import add_non_user_code_prefix, is_user_code
+from logfire._internal.stack_info import warn_at_user_stacklevel as _warn_at_user_stacklevel
import dreadnode
+warn_at_user_stacklevel = _warn_at_user_stacklevel
+
SysExcInfo = (
tuple[type[BaseException], BaseException, TracebackType | None] | tuple[None, None, None]
)
@@ -28,11 +31,14 @@
add_non_user_code_prefix(Path(dreadnode.__file__).parent)
-def clean_str(s: str) -> str:
+def clean_str(string: str, *, max_length: int | None = None) -> str:
"""
Clean a string by replacing all non-alphanumeric characters (except `/` and `@`) with underscores.
"""
- return re.sub(r"[^\w/@]+", "_", s.lower()).strip("_")
+ result = re.sub(r"[^\w/@]+", "_", string.lower()).strip("_")
+ if max_length is not None:
+ result = result[:max_length]
+ return result
def safe_repr(obj: t.Any) -> str:
diff --git a/poetry.lock b/poetry.lock
index cdb90985..d0077857 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -1,4 +1,4 @@
-# This file is automatically @generated by Poetry 2.1.3 and should not be changed by hand.
+# This file is automatically @generated by Poetry 2.1.1 and should not be changed by hand.
[[package]]
name = "aiohappyeyeballs"
@@ -6,7 +6,7 @@ version = "2.6.1"
description = "Happy Eyeballs for asyncio"
optional = false
python-versions = ">=3.9"
-groups = ["dev"]
+groups = ["main", "dev"]
files = [
{file = "aiohappyeyeballs-2.6.1-py3-none-any.whl", hash = "sha256:f349ba8f4b75cb25c99c5c2d84e997e485204d2902a9597802b0371f09331fb8"},
{file = "aiohappyeyeballs-2.6.1.tar.gz", hash = "sha256:c3f9d0113123803ccadfdf3f0faa505bc78e6a72d1cc4806cbd719826e943558"},
@@ -18,7 +18,7 @@ version = "3.11.18"
description = "Async http client/server framework (asyncio)"
optional = false
python-versions = ">=3.9"
-groups = ["dev"]
+groups = ["main", "dev"]
files = [
{file = "aiohttp-3.11.18-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:96264854fedbea933a9ca4b7e0c745728f01380691687b7365d18d9e977179c4"},
{file = "aiohttp-3.11.18-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:9602044ff047043430452bc3a2089743fa85da829e6fc9ee0025351d66c332b6"},
@@ -122,7 +122,7 @@ version = "1.3.2"
description = "aiosignal: a list of registered asynchronous callbacks"
optional = false
python-versions = ">=3.9"
-groups = ["dev"]
+groups = ["main", "dev"]
files = [
{file = "aiosignal-1.3.2-py2.py3-none-any.whl", hash = "sha256:45cde58e409a301715980c2b01d0c28bdde3770d8290b5eb2173759d9acb31a5"},
{file = "aiosignal-1.3.2.tar.gz", hash = "sha256:a8c255c66fafb1e499c9351d0bf32ff2d8a0321595ebac3b93713656d2436f54"},
@@ -137,7 +137,7 @@ version = "0.7.0"
description = "Reusable constraint types to use with typing.Annotated"
optional = false
python-versions = ">=3.8"
-groups = ["main", "dev"]
+groups = ["main"]
files = [
{file = "annotated_types-0.7.0-py3-none-any.whl", hash = "sha256:1f02e8b43a8fbbc3f3e0d4f0f4bfc8131bcb4eebe8849b8e5c773f3a1c582a53"},
{file = "annotated_types-0.7.0.tar.gz", hash = "sha256:aff07c09a53a08bc8cfccb9c85b05f1aa9a2a6f23728d790723543408344ce89"},
@@ -149,7 +149,7 @@ version = "4.9.0"
description = "High level compatibility layer for multiple asynchronous event loop implementations"
optional = false
python-versions = ">=3.9"
-groups = ["main", "dev"]
+groups = ["main"]
files = [
{file = "anyio-4.9.0-py3-none-any.whl", hash = "sha256:9f76d541cad6e36af7beb62e978876f3b41e3e04f2c1fbf0884604c0a9c4d93c"},
{file = "anyio-4.9.0.tar.gz", hash = "sha256:673c0c244e15788651a4ff38710fea9675823028a6f08a5eda409e0c9840a028"},
@@ -172,8 +172,8 @@ version = "5.0.1"
description = "Timeout context manager for asyncio programs"
optional = false
python-versions = ">=3.8"
-groups = ["dev"]
-markers = "python_version == \"3.10\""
+groups = ["main", "dev"]
+markers = "python_version < \"3.11\""
files = [
{file = "async_timeout-5.0.1-py3-none-any.whl", hash = "sha256:39e3809566ff85354557ec2398b55e096c8364bacac9405a7a1fa429e77fe76c"},
{file = "async_timeout-5.0.1.tar.gz", hash = "sha256:d9321a7a3d5a6a5e187e824d2fa0793ce379a202935782d555d6e9d2735677d3"},
@@ -185,7 +185,7 @@ version = "25.3.0"
description = "Classes Without Boilerplate"
optional = false
python-versions = ">=3.8"
-groups = ["dev"]
+groups = ["main", "dev"]
files = [
{file = "attrs-25.3.0-py3-none-any.whl", hash = "sha256:427318ce031701fea540783410126f03899a97ffc6f61596ad581ac2e40e3bc3"},
{file = "attrs-25.3.0.tar.gz", hash = "sha256:75d7cefc7fb576747b2c81b4442d4d4a1ce0900973527c011d1030fd3bf4af1b"},
@@ -205,7 +205,7 @@ version = "1.38.14"
description = "The AWS SDK for Python"
optional = false
python-versions = ">=3.9"
-groups = ["dev"]
+groups = ["main"]
files = [
{file = "boto3-1.38.14-py3-none-any.whl", hash = "sha256:44bc15285104683cd25dfb60abc5aac65b75d9e79b06f43094d18ed5c2739302"},
{file = "boto3-1.38.14.tar.gz", hash = "sha256:2cba851374c9b15facd6e7fe3adf7988c216537182d2c139e96da5c101f4cbcf"},
@@ -225,7 +225,7 @@ version = "1.38.14"
description = "Type annotations for boto3 1.38.14 generated with mypy-boto3-builder 8.11.0"
optional = false
python-versions = ">=3.8"
-groups = ["dev"]
+groups = ["main"]
files = [
{file = "boto3_stubs-1.38.14-py3-none-any.whl", hash = "sha256:8efd0912ed472422426b8645d41b4947ffcd18a4ce861a15ae1e9d066459788d"},
{file = "boto3_stubs-1.38.14.tar.gz", hash = "sha256:7ed7e98dfdca6aa30aa21cd0524031c530f16e2eb209ce346d18674d967ff822"},
@@ -653,7 +653,7 @@ version = "1.38.14"
description = "Low-level, data-driven core of boto 3."
optional = false
python-versions = ">=3.9"
-groups = ["main", "dev"]
+groups = ["main"]
files = [
{file = "botocore-1.38.14-py3-none-any.whl", hash = "sha256:3125ed92e9ee6137c28fd32c56934a531a372346a7b13cb86de4328d7629e156"},
{file = "botocore-1.38.14.tar.gz", hash = "sha256:8ac91de6c33651a5c699268f1d22fadd5e99f370230dbea97d29e4164de4e5f2"},
@@ -673,7 +673,7 @@ version = "1.38.14"
description = "Type annotations and code completion for botocore"
optional = false
python-versions = ">=3.8"
-groups = ["dev"]
+groups = ["main"]
files = [
{file = "botocore_stubs-1.38.14-py3-none-any.whl", hash = "sha256:d0f65980feeef3daa9203da45832c0331c008fa50ca42431c23932a7cd160f1d"},
{file = "botocore_stubs-1.38.14.tar.gz", hash = "sha256:adfb5d81ebeb8ba8373d3e296c2bedf6889550c13029475e0338cc8852ddb574"},
@@ -898,7 +898,7 @@ version = "8.2.0"
description = "Composable command line interface toolkit"
optional = false
python-versions = ">=3.10"
-groups = ["dev"]
+groups = ["main", "dev"]
files = [
{file = "click-8.2.0-py3-none-any.whl", hash = "sha256:6b303f0b2aa85f1cb4e5303078fadcbcd4e476f114fab9b5007005711839325c"},
{file = "click-8.2.0.tar.gz", hash = "sha256:f5452aeddd9988eefa20f90f05ab66f17fce1ee2a36907fd30b05bbb5953814d"},
@@ -918,7 +918,7 @@ files = [
{file = "colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6"},
{file = "colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44"},
]
-markers = {main = "(extra == \"training\" or extra == \"multimodal\") and platform_system == \"Windows\""}
+markers = {dev = "platform_system == \"Windows\" or sys_platform == \"win32\""}
[[package]]
name = "coolname"
@@ -1040,72 +1040,12 @@ version = "1.9.0"
description = "Distro - an OS platform information API"
optional = false
python-versions = ">=3.6"
-groups = ["dev"]
+groups = ["main"]
files = [
{file = "distro-1.9.0-py3-none-any.whl", hash = "sha256:7bffd925d65168f85027d8da9af6bddab658135b840670a223589bc0c8ef02b2"},
{file = "distro-1.9.0.tar.gz", hash = "sha256:2fa77c6fd8940f116ee1d6b94a2f90b13b5ea8d019b98bc8bafdcabcdd9bdbed"},
]
-[[package]]
-name = "elastic-transport"
-version = "8.17.1"
-description = "Transport classes and utilities shared among Python Elastic client libraries"
-optional = false
-python-versions = ">=3.8"
-groups = ["dev"]
-files = [
- {file = "elastic_transport-8.17.1-py3-none-any.whl", hash = "sha256:192718f498f1d10c5e9aa8b9cf32aed405e469a7f0e9d6a8923431dbb2c59fb8"},
- {file = "elastic_transport-8.17.1.tar.gz", hash = "sha256:5edef32ac864dca8e2f0a613ef63491ee8d6b8cfb52881fa7313ba9290cac6d2"},
-]
-
-[package.dependencies]
-certifi = "*"
-urllib3 = ">=1.26.2,<3"
-
-[package.extras]
-develop = ["aiohttp", "furo", "httpx", "opentelemetry-api", "opentelemetry-sdk", "orjson", "pytest", "pytest-asyncio", "pytest-cov", "pytest-httpserver", "pytest-mock", "requests", "respx", "sphinx (>2)", "sphinx-autodoc-typehints", "trustme"]
-
-[[package]]
-name = "elasticsearch"
-version = "8.18.1"
-description = "Python client for Elasticsearch"
-optional = false
-python-versions = ">=3.8"
-groups = ["dev"]
-files = [
- {file = "elasticsearch-8.18.1-py3-none-any.whl", hash = "sha256:1a8c8b5ec3ce5be88f96d2f898375671648e96272978bce0dee3137d9326aabb"},
- {file = "elasticsearch-8.18.1.tar.gz", hash = "sha256:998035f17a8c1fba7ae26b183dca797dcf95db86da6a7ecba56d31afc40f07c7"},
-]
-
-[package.dependencies]
-elastic-transport = ">=8.15.1,<9"
-python-dateutil = "*"
-typing-extensions = "*"
-
-[package.extras]
-async = ["aiohttp (>=3,<4)"]
-dev = ["aiohttp", "black", "build", "coverage", "isort", "jinja2", "mapbox-vector-tile", "mypy", "nltk", "nox", "numpy", "orjson", "pandas", "pyarrow", "pyright", "pytest", "pytest-asyncio", "pytest-cov", "pytest-mock", "python-dateutil", "pyyaml (>=5.4)", "requests (>=2,<3)", "sentence-transformers", "simsimd", "tqdm", "twine", "types-python-dateutil", "types-tqdm", "unasync"]
-docs = ["sphinx", "sphinx-autodoc-typehints", "sphinx-rtd-theme (>=2.0)"]
-orjson = ["orjson (>=3)"]
-pyarrow = ["pyarrow (>=1)"]
-requests = ["requests (>=2.4.0,!=2.32.2,<3.0.0)"]
-vectorstore-mmr = ["numpy (>=1)", "simsimd (>=3)"]
-
-[[package]]
-name = "eval-type-backport"
-version = "0.2.2"
-description = "Like `typing._eval_type`, but lets older Python versions use newer typing features."
-optional = false
-python-versions = ">=3.8"
-groups = ["dev"]
-files = [
- {file = "eval_type_backport-0.2.2-py3-none-any.whl", hash = "sha256:cb6ad7c393517f476f96d456d0412ea80f0a8cf96f6892834cd9340149111b0a"},
- {file = "eval_type_backport-0.2.2.tar.gz", hash = "sha256:f0576b4cf01ebb5bd358d02314d31846af5e07678387486e2c798af0e7d849c1"},
-]
-
-[package.extras]
-tests = ["pytest"]
-
[[package]]
name = "exceptiongroup"
version = "1.3.0"
@@ -1113,7 +1053,7 @@ description = "Backport of PEP 654 (exception groups)"
optional = false
python-versions = ">=3.7"
groups = ["main", "dev"]
-markers = "python_version == \"3.10\""
+markers = "python_version < \"3.11\""
files = [
{file = "exceptiongroup-1.3.0-py3-none-any.whl", hash = "sha256:4d111e6e0c13d0644cad6ddaa7ed0261a0b36971f6d23e7ec9b4b9097da78a10"},
{file = "exceptiongroup-1.3.0.tar.gz", hash = "sha256:b241f5885f560bc56a59ee63ca4c6a8bfa46ae4ad651af316d4e81817bb9fd88"},
@@ -1151,7 +1091,6 @@ files = [
{file = "filelock-3.18.0-py3-none-any.whl", hash = "sha256:c401f4f8377c4464e6db25fff06205fd89bdd83b65eb0488ed1b160f780e21de"},
{file = "filelock-3.18.0.tar.gz", hash = "sha256:adbc88eabb99d2fec8c9c1b229b171f18afa655400173ddc653d5d01501fb9f2"},
]
-markers = {main = "extra == \"training\""}
[package.extras]
docs = ["furo (>=2024.8.6)", "sphinx (>=8.1.3)", "sphinx-autodoc-typehints (>=3)"]
@@ -1164,7 +1103,7 @@ version = "1.6.0"
description = "A list-like structure which implements collections.abc.MutableSequence"
optional = false
python-versions = ">=3.9"
-groups = ["dev"]
+groups = ["main", "dev"]
files = [
{file = "frozenlist-1.6.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:e6e558ea1e47fd6fa8ac9ccdad403e5dd5ecc6ed8dda94343056fa4277d5c65e"},
{file = "frozenlist-1.6.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:f4b3cd7334a4bbc0c472164f3744562cb72d05002cc6fcf58adb104630bbc352"},
@@ -1340,7 +1279,7 @@ version = "0.16.0"
description = "A pure-Python, bring-your-own-I/O implementation of HTTP/1.1"
optional = false
python-versions = ">=3.8"
-groups = ["main", "dev"]
+groups = ["main"]
files = [
{file = "h11-0.16.0-py3-none-any.whl", hash = "sha256:63cf8bbe7522de3bf65932fda1d9c2772064ffb3dae62d55932da54b31cb6c86"},
{file = "h11-0.16.0.tar.gz", hash = "sha256:4e35b956cf45792e4caa5885e69fba00bdbc6ffafbfa020300e549b208ee5ff1"},
@@ -1353,6 +1292,7 @@ description = ""
optional = false
python-versions = ">=3.8"
groups = ["main", "dev"]
+markers = "platform_machine == \"x86_64\" or platform_machine == \"amd64\" or platform_machine == \"arm64\" or platform_machine == \"aarch64\""
files = [
{file = "hf_xet-1.1.1-cp37-abi3-macosx_10_12_x86_64.whl", hash = "sha256:e39a8513f0854656116c837d387d9a41e9d78430b1a181442f04c223cbc4e8f8"},
{file = "hf_xet-1.1.1-cp37-abi3-macosx_11_0_arm64.whl", hash = "sha256:c60cd67be384cb9e592fa6dfd29a10fddffa1feb2f3b31f53e980630d1ca0fd6"},
@@ -1363,7 +1303,6 @@ files = [
{file = "hf_xet-1.1.1-cp37-abi3-win_amd64.whl", hash = "sha256:215a4e95009a0b9795ca3cf33db4e8d1248139593d7e1185661cd19b062d2b82"},
{file = "hf_xet-1.1.1.tar.gz", hash = "sha256:3e75d6e04c38c80115b640c025d68c3dc14d62f8b244011dfe547363674a1e87"},
]
-markers = {main = "extra == \"training\" and (platform_machine == \"x86_64\" or platform_machine == \"amd64\" or platform_machine == \"arm64\" or platform_machine == \"aarch64\")", dev = "platform_machine == \"x86_64\" or platform_machine == \"amd64\" or platform_machine == \"arm64\" or platform_machine == \"aarch64\""}
[package.extras]
tests = ["pytest"]
@@ -1374,7 +1313,7 @@ version = "1.0.9"
description = "A minimal low-level HTTP client."
optional = false
python-versions = ">=3.8"
-groups = ["main", "dev"]
+groups = ["main"]
files = [
{file = "httpcore-1.0.9-py3-none-any.whl", hash = "sha256:2d400746a40668fc9dec9810239072b40b4484b640a8c38fd654a024c7a1bf55"},
{file = "httpcore-1.0.9.tar.gz", hash = "sha256:6e34463af53fd2ab5d807f399a9b45ea31c3dfa2276f15a2c3f00afff6e176e8"},
@@ -1396,7 +1335,7 @@ version = "0.28.1"
description = "The next generation HTTP client."
optional = false
python-versions = ">=3.8"
-groups = ["main", "dev"]
+groups = ["main"]
files = [
{file = "httpx-0.28.1-py3-none-any.whl", hash = "sha256:d909fcccc110f8c7faf814ca82a9a4d816bc5a6dbfea25d6591d6985b8ba59ad"},
{file = "httpx-0.28.1.tar.gz", hash = "sha256:75e98c5f16b0f35b567856f597f06ff2270a374470a5c2392242528e3e3e42fc"},
@@ -1415,6 +1354,18 @@ http2 = ["h2 (>=3,<5)"]
socks = ["socksio (==1.*)"]
zstd = ["zstandard (>=0.18.0)"]
+[[package]]
+name = "httpx-sse"
+version = "0.4.1"
+description = "Consume Server-Sent Event (SSE) messages with HTTPX."
+optional = false
+python-versions = ">=3.9"
+groups = ["main"]
+files = [
+ {file = "httpx_sse-0.4.1-py3-none-any.whl", hash = "sha256:cba42174344c3a5b06f255ce65b350880f962d99ead85e776f23c6618a377a37"},
+ {file = "httpx_sse-0.4.1.tar.gz", hash = "sha256:8f44d34414bc7b21bf3602713005c5df4917884f76072479b21f68befa4ea26e"},
+]
+
[[package]]
name = "huggingface-hub"
version = "0.31.1"
@@ -1426,7 +1377,6 @@ files = [
{file = "huggingface_hub-0.31.1-py3-none-any.whl", hash = "sha256:43f73124819b48b42d140cbc0d7a2e6bd15b2853b1b9d728d4d55ad1750cac5b"},
{file = "huggingface_hub-0.31.1.tar.gz", hash = "sha256:492bb5f545337aa9e2f59b75ef4c5f535a371e8958a6ce90af056387e67f1180"},
]
-markers = {main = "extra == \"training\""}
[package.dependencies]
filelock = "*"
@@ -1542,7 +1492,7 @@ version = "8.6.1"
description = "Read metadata from Python packages"
optional = false
python-versions = ">=3.9"
-groups = ["main", "dev"]
+groups = ["main"]
files = [
{file = "importlib_metadata-8.6.1-py3-none-any.whl", hash = "sha256:02a89390c1e15fdfdc0d7c6b25cb3e62650d0494005c97d6f148bf5b9787525e"},
{file = "importlib_metadata-8.6.1.tar.gz", hash = "sha256:310b41d755445d74569f993ccfc22838295d9fe005425094fad953d7f15c8580"},
@@ -1578,7 +1528,7 @@ version = "3.1.6"
description = "A very fast and expressive template engine."
optional = false
python-versions = ">=3.7"
-groups = ["dev"]
+groups = ["main"]
files = [
{file = "jinja2-3.1.6-py3-none-any.whl", hash = "sha256:85ece4451f492d0c13c5dd7c13a64681a86afae63a5f347908daf103ce6d2f67"},
{file = "jinja2-3.1.6.tar.gz", hash = "sha256:0137fb05990d35f1275a587e9aee6d56da821fc83491a0fb838183be43f66d6d"},
@@ -1596,7 +1546,7 @@ version = "0.9.0"
description = "Fast iterable JSON parser."
optional = false
python-versions = ">=3.8"
-groups = ["dev"]
+groups = ["main"]
files = [
{file = "jiter-0.9.0-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:816ec9b60fdfd1fec87da1d7ed46c66c44ffec37ab2ef7de5b147b2fce3fd5ad"},
{file = "jiter-0.9.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:9b1d3086f8a3ee0194ecf2008cf81286a5c3e540d977fa038ff23576c023c0ea"},
@@ -1682,7 +1632,7 @@ version = "1.0.1"
description = "JSON Matching Expressions"
optional = false
python-versions = ">=3.7"
-groups = ["main", "dev"]
+groups = ["main"]
files = [
{file = "jmespath-1.0.1-py3-none-any.whl", hash = "sha256:02e2e4cc71b5bcab88332eebf907519190dd9e6e82107fa7f83b1003a6252980"},
{file = "jmespath-1.0.1.tar.gz", hash = "sha256:90261b206d6defd58fdd5e85f478bf633a2901798906be2ad389150c5c60edbe"},
@@ -1694,7 +1644,7 @@ version = "1.7.0"
description = "A final implementation of JSONPath for Python that aims to be standard compliant, including arithmetic and binary comparison operators and providing clear AST for metaprogramming."
optional = false
python-versions = "*"
-groups = ["dev"]
+groups = ["main"]
files = [
{file = "jsonpath-ng-1.7.0.tar.gz", hash = "sha256:f6f5f7fd4e5ff79c785f1573b394043b39849fb2bb47bcead935d12b00beab3c"},
{file = "jsonpath_ng-1.7.0-py2-none-any.whl", hash = "sha256:898c93fc173f0c336784a3fa63d7434297544b7198124a68f9a3ef9597b0ae6e"},
@@ -1710,7 +1660,7 @@ version = "1.1.0"
description = "jsonref is a library for automatic dereferencing of JSON Reference objects for Python."
optional = false
python-versions = ">=3.7"
-groups = ["dev"]
+groups = ["main"]
files = [
{file = "jsonref-1.1.0-py3-none-any.whl", hash = "sha256:590dc7773df6c21cbf948b5dac07a72a251db28b0238ceecce0a2abfa8ec30a9"},
{file = "jsonref-1.1.0.tar.gz", hash = "sha256:32fe8e1d85af0fdefbebce950af85590b22b60f9e95443176adbde4e1ecea552"},
@@ -1722,7 +1672,7 @@ version = "4.23.0"
description = "An implementation of JSON Schema validation for Python"
optional = false
python-versions = ">=3.8"
-groups = ["dev"]
+groups = ["main"]
files = [
{file = "jsonschema-4.23.0-py3-none-any.whl", hash = "sha256:fbadb6f8b144a8f8cf9f0b89ba94501d143e50411a1278633f56a7acf7fd5566"},
{file = "jsonschema-4.23.0.tar.gz", hash = "sha256:d71497fef26351a33265337fa77ffeb82423f3ea21283cd9467bb03999266bc4"},
@@ -1744,7 +1694,7 @@ version = "2025.4.1"
description = "The JSON Schema meta-schemas and vocabularies, exposed as a Registry"
optional = false
python-versions = ">=3.9"
-groups = ["dev"]
+groups = ["main"]
files = [
{file = "jsonschema_specifications-2025.4.1-py3-none-any.whl", hash = "sha256:4653bffbd6584f7de83a67e0d620ef16900b390ddc7939d56684d6c81e33f1af"},
{file = "jsonschema_specifications-2025.4.1.tar.gz", hash = "sha256:630159c9f4dbea161a6a2205c3011cc4f18ff381b189fff48bb39b9bf26ae608"},
@@ -1759,7 +1709,7 @@ version = "1.69.1"
description = "Library to easily interface with LLM API providers"
optional = false
python-versions = "!=2.7.*,!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*,!=3.7.*,>=3.8"
-groups = ["dev"]
+groups = ["main"]
files = [
{file = "litellm-1.69.1-py3-none-any.whl", hash = "sha256:43eb76d16d2c19881856740491adc43474b4a1e7cd405e65edc42e5c8ccfb65d"},
{file = "litellm-1.69.1.tar.gz", hash = "sha256:96886aec050b93c76da7d45b3d633c287cfedf18de046542c3c681c386f56136"},
@@ -1832,7 +1782,7 @@ version = "3.15.1"
description = "Shim for the Logfire SDK which does nothing unless Logfire is installed"
optional = false
python-versions = ">=3.8"
-groups = ["dev"]
+groups = ["main"]
files = [
{file = "logfire_api-3.15.1-py3-none-any.whl", hash = "sha256:dfe344ce3e67ccada347c98112ee044ff0a52e8900f7dc0ff20c683d0c81c13f"},
{file = "logfire_api-3.15.1.tar.gz", hash = "sha256:50705b905408d007163c82ba4cb76a36dd85b7d401568ea9321d1de2171eb157"},
@@ -1844,7 +1794,7 @@ version = "0.7.3"
description = "Python logging made (stupidly) simple"
optional = false
python-versions = "<4.0,>=3.5"
-groups = ["dev"]
+groups = ["main"]
files = [
{file = "loguru-0.7.3-py3-none-any.whl", hash = "sha256:31a33c10c8e1e10422bfd431aeb5d351c7cf7fa671e3c4df004162264b28220c"},
{file = "loguru-0.7.3.tar.gz", hash = "sha256:19480589e77d47b8d85b2c827ad95d49bf31b0dcde16593892eb51dd18706eb6"},
@@ -1888,7 +1838,7 @@ version = "3.0.2"
description = "Safely add untrusted strings to HTML/XML markup."
optional = false
python-versions = ">=3.9"
-groups = ["dev"]
+groups = ["main"]
files = [
{file = "MarkupSafe-3.0.2-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:7e94c425039cde14257288fd61dcfb01963e658efbc0ff54f5306b06054700f8"},
{file = "MarkupSafe-3.0.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:9e2d922824181480953426608b81967de705c3cef4d1af983af849d7bd619158"},
@@ -1953,6 +1903,36 @@ files = [
{file = "markupsafe-3.0.2.tar.gz", hash = "sha256:ee55d3edf80167e48ea11a923c7386f4669df67d7994554387f84e7d8b0a2bf0"},
]
+[[package]]
+name = "mcp"
+version = "1.12.1"
+description = "Model Context Protocol SDK"
+optional = false
+python-versions = ">=3.10"
+groups = ["main"]
+files = [
+ {file = "mcp-1.12.1-py3-none-any.whl", hash = "sha256:34147f62891417f8b000c39718add844182ba424c8eb2cea250b4267bda4b08b"},
+ {file = "mcp-1.12.1.tar.gz", hash = "sha256:d1d0bdeb09e4b17c1a72b356248bf3baf75ab10db7008ef865c4afbeb0eb810e"},
+]
+
+[package.dependencies]
+anyio = ">=4.5"
+httpx = ">=0.27"
+httpx-sse = ">=0.4"
+jsonschema = ">=4.20.0"
+pydantic = ">=2.8.0,<3.0.0"
+pydantic-settings = ">=2.5.2"
+python-multipart = ">=0.0.9"
+pywin32 = {version = ">=310", markers = "sys_platform == \"win32\""}
+sse-starlette = ">=1.6.1"
+starlette = ">=0.27"
+uvicorn = {version = ">=0.23.1", markers = "sys_platform != \"emscripten\""}
+
+[package.extras]
+cli = ["python-dotenv (>=1.0.0)", "typer (>=0.16.0)"]
+rich = ["rich (>=13.9.4)"]
+ws = ["websockets (>=15.0.1)"]
+
[[package]]
name = "mdurl"
version = "0.1.2"
@@ -1998,7 +1978,7 @@ version = "6.4.3"
description = "multidict implementation"
optional = false
python-versions = ">=3.9"
-groups = ["dev"]
+groups = ["main", "dev"]
files = [
{file = "multidict-6.4.3-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:32a998bd8a64ca48616eac5a8c1cc4fa38fb244a3facf2eeb14abe186e0f6cc5"},
{file = "multidict-6.4.3-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:a54ec568f1fc7f3c313c2f3b16e5db346bf3660e1309746e7fccbbfded856188"},
@@ -2194,7 +2174,7 @@ version = "1.38.0"
description = "Type annotations for boto3 S3 1.38.0 service generated with mypy-boto3-builder 8.10.1"
optional = false
python-versions = ">=3.8"
-groups = ["dev"]
+groups = ["main"]
files = [
{file = "mypy_boto3_s3-1.38.0-py3-none-any.whl", hash = "sha256:5cd9449df0ef6cf89e00e6fc9130a0ab641f703a23ab1d2146c394da058e8282"},
{file = "mypy_boto3_s3-1.38.0.tar.gz", hash = "sha256:f8fe586e45123ffcd305a0c30847128f3931d888649e2b4c5a52f412183c840a"},
@@ -2298,7 +2278,7 @@ version = "1.75.0"
description = "The official Python library for the openai API"
optional = false
python-versions = ">=3.8"
-groups = ["dev"]
+groups = ["main"]
files = [
{file = "openai-1.75.0-py3-none-any.whl", hash = "sha256:fe6f932d2ded3b429ff67cc9ad118c71327db32eb9d32dd723de3acfca337125"},
{file = "openai-1.75.0.tar.gz", hash = "sha256:fb3ea907efbdb1bcfd0c44507ad9c961afd7dce3147292b54505ecfd17be8fd1"},
@@ -2717,7 +2697,7 @@ version = "3.11"
description = "Python Lex & Yacc"
optional = false
python-versions = "*"
-groups = ["dev"]
+groups = ["main"]
files = [
{file = "ply-3.11-py2.py3-none-any.whl", hash = "sha256:096f9b8350b65ebd2fd1346b12452efe5b9607f7482813ffca50c22722a807ce"},
{file = "ply-3.11.tar.gz", hash = "sha256:00c7c1aaa88358b9c765b6d3000c6eec0ba42abca5351b095321aef446081da3"},
@@ -2764,7 +2744,7 @@ version = "0.3.1"
description = "Accelerated property cache"
optional = false
python-versions = ">=3.9"
-groups = ["dev"]
+groups = ["main", "dev"]
files = [
{file = "propcache-0.3.1-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:f27785888d2fdd918bc36de8b8739f2d6c791399552333721b58193f68ea3e98"},
{file = "propcache-0.3.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:d4e89cde74154c7b5957f87a355bb9c8ec929c167b59c83d90654ea36aeb6180"},
@@ -2961,7 +2941,7 @@ version = "2.11.7"
description = "Data validation using Python type hints"
optional = false
python-versions = ">=3.9"
-groups = ["main", "dev"]
+groups = ["main"]
files = [
{file = "pydantic-2.11.7-py3-none-any.whl", hash = "sha256:dde5df002701f6de26248661f6835bbe296a47bf73990135c7d07ce741b9623b"},
{file = "pydantic-2.11.7.tar.gz", hash = "sha256:d989c3c6cb79469287b1569f7447a17848c998458d49ebe294e975b9baf0f0db"},
@@ -2983,7 +2963,7 @@ version = "2.33.2"
description = "Core functionality for Pydantic validation and serialization"
optional = false
python-versions = ">=3.9"
-groups = ["main", "dev"]
+groups = ["main"]
files = [
{file = "pydantic_core-2.33.2-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:2b3d326aaef0c0399d9afffeb6367d5e26ddc24d351dbc9c636840ac355dc5d8"},
{file = "pydantic_core-2.33.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:0e5b2671f05ba48b94cb90ce55d8bdcaaedb8ba00cc5359f6810fc918713983d"},
@@ -3089,13 +3069,37 @@ files = [
[package.dependencies]
typing-extensions = ">=4.6.0,<4.7.0 || >4.7.0"
+[[package]]
+name = "pydantic-settings"
+version = "2.10.1"
+description = "Settings management using Pydantic"
+optional = false
+python-versions = ">=3.9"
+groups = ["main"]
+files = [
+ {file = "pydantic_settings-2.10.1-py3-none-any.whl", hash = "sha256:a60952460b99cf661dc25c29c0ef171721f98bfcb52ef8d9ea4c943d7c8cc796"},
+ {file = "pydantic_settings-2.10.1.tar.gz", hash = "sha256:06f0062169818d0f5524420a360d632d5857b83cffd4d42fe29597807a1614ee"},
+]
+
+[package.dependencies]
+pydantic = ">=2.7.0"
+python-dotenv = ">=0.21.0"
+typing-inspection = ">=0.4.0"
+
+[package.extras]
+aws-secrets-manager = ["boto3 (>=1.35.0)", "boto3-stubs[secretsmanager]"]
+azure-key-vault = ["azure-identity (>=1.16.0)", "azure-keyvault-secrets (>=4.8.0)"]
+gcp-secret-manager = ["google-cloud-secret-manager (>=2.23.1)"]
+toml = ["tomli (>=2.0.1)"]
+yaml = ["pyyaml (>=6.0.1)"]
+
[[package]]
name = "pydantic-xml"
version = "2.16.0"
description = "pydantic xml extension"
optional = false
python-versions = ">=3.8"
-groups = ["dev"]
+groups = ["main"]
files = [
{file = "pydantic_xml-2.16.0-py3-none-any.whl", hash = "sha256:e1ecd513287e30070ce0a9f8c0e461187ebf5b18da79ca62f5dd4219fb93b68e"},
{file = "pydantic_xml-2.16.0.tar.gz", hash = "sha256:64ae5d8538a23706471f0b2007c9252ef290dff40c216dbc3051c79030aaf03f"},
@@ -3188,16 +3192,27 @@ version = "1.1.0"
description = "Read key-value pairs from a .env file and set them as environment variables"
optional = false
python-versions = ">=3.9"
-groups = ["main", "dev"]
+groups = ["main"]
files = [
{file = "python_dotenv-1.1.0-py3-none-any.whl", hash = "sha256:d7c01d9e2293916c18baf562d95698754b0dbbb5e74d457c45d4f6561fb9d55d"},
{file = "python_dotenv-1.1.0.tar.gz", hash = "sha256:41f90bc6f5f177fb41f53e87666db362025010eb28f60a01c9143bfa33a2b2d5"},
]
-markers = {main = "extra == \"multimodal\""}
[package.extras]
cli = ["click (>=5.0)"]
+[[package]]
+name = "python-multipart"
+version = "0.0.20"
+description = "A streaming multipart parser for Python"
+optional = false
+python-versions = ">=3.8"
+groups = ["main"]
+files = [
+ {file = "python_multipart-0.0.20-py3-none-any.whl", hash = "sha256:8a62d3a8335e06589fe01f2a3e178cdcc632f3fbe0d492ad9ee0ec35aab1f104"},
+ {file = "python_multipart-0.0.20.tar.gz", hash = "sha256:8dd0cab45b8e23064ae09147625994d090fa46f5b0d1e13af944c331a7fa9d13"},
+]
+
[[package]]
name = "python-ulid"
version = "3.0.0"
@@ -3225,6 +3240,37 @@ files = [
{file = "pytz-2025.2.tar.gz", hash = "sha256:360b9e3dbb49a209c21ad61809c7fb453643e048b38924c765813546746e81c3"},
]
+[[package]]
+name = "pywin32"
+version = "311"
+description = "Python for Window Extensions"
+optional = false
+python-versions = "*"
+groups = ["main"]
+markers = "sys_platform == \"win32\""
+files = [
+ {file = "pywin32-311-cp310-cp310-win32.whl", hash = "sha256:d03ff496d2a0cd4a5893504789d4a15399133fe82517455e78bad62efbb7f0a3"},
+ {file = "pywin32-311-cp310-cp310-win_amd64.whl", hash = "sha256:797c2772017851984b97180b0bebe4b620bb86328e8a884bb626156295a63b3b"},
+ {file = "pywin32-311-cp310-cp310-win_arm64.whl", hash = "sha256:0502d1facf1fed4839a9a51ccbcc63d952cf318f78ffc00a7e78528ac27d7a2b"},
+ {file = "pywin32-311-cp311-cp311-win32.whl", hash = "sha256:184eb5e436dea364dcd3d2316d577d625c0351bf237c4e9a5fabbcfa5a58b151"},
+ {file = "pywin32-311-cp311-cp311-win_amd64.whl", hash = "sha256:3ce80b34b22b17ccbd937a6e78e7225d80c52f5ab9940fe0506a1a16f3dab503"},
+ {file = "pywin32-311-cp311-cp311-win_arm64.whl", hash = "sha256:a733f1388e1a842abb67ffa8e7aad0e70ac519e09b0f6a784e65a136ec7cefd2"},
+ {file = "pywin32-311-cp312-cp312-win32.whl", hash = "sha256:750ec6e621af2b948540032557b10a2d43b0cee2ae9758c54154d711cc852d31"},
+ {file = "pywin32-311-cp312-cp312-win_amd64.whl", hash = "sha256:b8c095edad5c211ff31c05223658e71bf7116daa0ecf3ad85f3201ea3190d067"},
+ {file = "pywin32-311-cp312-cp312-win_arm64.whl", hash = "sha256:e286f46a9a39c4a18b319c28f59b61de793654af2f395c102b4f819e584b5852"},
+ {file = "pywin32-311-cp313-cp313-win32.whl", hash = "sha256:f95ba5a847cba10dd8c4d8fefa9f2a6cf283b8b88ed6178fa8a6c1ab16054d0d"},
+ {file = "pywin32-311-cp313-cp313-win_amd64.whl", hash = "sha256:718a38f7e5b058e76aee1c56ddd06908116d35147e133427e59a3983f703a20d"},
+ {file = "pywin32-311-cp313-cp313-win_arm64.whl", hash = "sha256:7b4075d959648406202d92a2310cb990fea19b535c7f4a78d3f5e10b926eeb8a"},
+ {file = "pywin32-311-cp314-cp314-win32.whl", hash = "sha256:b7a2c10b93f8986666d0c803ee19b5990885872a7de910fc460f9b0c2fbf92ee"},
+ {file = "pywin32-311-cp314-cp314-win_amd64.whl", hash = "sha256:3aca44c046bd2ed8c90de9cb8427f581c479e594e99b5c0bb19b29c10fd6cb87"},
+ {file = "pywin32-311-cp314-cp314-win_arm64.whl", hash = "sha256:a508e2d9025764a8270f93111a970e1d0fbfc33f4153b388bb649b7eec4f9b42"},
+ {file = "pywin32-311-cp38-cp38-win32.whl", hash = "sha256:6c6f2969607b5023b0d9ce2541f8d2cbb01c4f46bc87456017cf63b73f1e2d8c"},
+ {file = "pywin32-311-cp38-cp38-win_amd64.whl", hash = "sha256:c8015b09fb9a5e188f83b7b04de91ddca4658cee2ae6f3bc483f0b21a77ef6cd"},
+ {file = "pywin32-311-cp39-cp39-win32.whl", hash = "sha256:aba8f82d551a942cb20d4a83413ccbac30790b50efb89a75e4f586ac0bb8056b"},
+ {file = "pywin32-311-cp39-cp39-win_amd64.whl", hash = "sha256:e0c4cfb0621281fe40387df582097fd796e80430597cb9944f0ae70447bacd91"},
+ {file = "pywin32-311-cp39-cp39-win_arm64.whl", hash = "sha256:62ea666235135fee79bb154e695f3ff67370afefd71bd7fea7512fc70ef31e3d"},
+]
+
[[package]]
name = "pyyaml"
version = "6.0.2"
@@ -3287,7 +3333,6 @@ files = [
{file = "PyYAML-6.0.2-cp39-cp39-win_amd64.whl", hash = "sha256:39693e1f8320ae4f43943590b49779ffb98acb81f788220ea932a6b6c51004d8"},
{file = "pyyaml-6.0.2.tar.gz", hash = "sha256:d584d9ec91ad65861cc08d42e834324ef890a082e591037abe114850ff7bbc3e"},
]
-markers = {main = "extra == \"training\""}
[[package]]
name = "referencing"
@@ -3295,7 +3340,7 @@ version = "0.36.2"
description = "JSON Referencing + Python"
optional = false
python-versions = ">=3.9"
-groups = ["dev"]
+groups = ["main"]
files = [
{file = "referencing-0.36.2-py3-none-any.whl", hash = "sha256:e8699adbbf8b5c7de96d8ffa0eb5c158b3beafce084968e2ea8bb08c6794dcd0"},
{file = "referencing-0.36.2.tar.gz", hash = "sha256:df2e89862cd09deabbdba16944cc3f10feb6b3e6f18e902f7cc25609a34775aa"},
@@ -3312,7 +3357,7 @@ version = "2024.11.6"
description = "Alternative regular expression module, to replace re."
optional = false
python-versions = ">=3.8"
-groups = ["main", "dev"]
+groups = ["main"]
files = [
{file = "regex-2024.11.6-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:ff590880083d60acc0433f9c3f713c51f7ac6ebb9adf889c79a261ecf541aa91"},
{file = "regex-2024.11.6-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:658f90550f38270639e83ce492f27d2c8d2cd63805c65a13a14d36ca126753f0"},
@@ -3409,7 +3454,6 @@ files = [
{file = "regex-2024.11.6-cp39-cp39-win_amd64.whl", hash = "sha256:b2837718570f95dd41675328e111345f9b7095d821bac435aac173ac80b19983"},
{file = "regex-2024.11.6.tar.gz", hash = "sha256:7ab159b063c52a0333c884e4679f8d7a85112ee3078fe3d9004b2dd875585519"},
]
-markers = {main = "extra == \"training\""}
[[package]]
name = "requests"
@@ -3455,27 +3499,26 @@ jupyter = ["ipywidgets (>=7.5.1,<9)"]
[[package]]
name = "rigging"
-version = "2.3.0"
+version = "3.1.1"
description = "LLM Interaction Framework"
optional = false
-python-versions = "<4.0,>=3.9"
-groups = ["dev"]
+python-versions = "<4.0,>=3.10"
+groups = ["main"]
files = [
- {file = "rigging-2.3.0-py3-none-any.whl", hash = "sha256:e17a78acb4c36651fc30eb55c8166858402d2f51b359bcbf717833883a6dad8f"},
- {file = "rigging-2.3.0.tar.gz", hash = "sha256:2c021cbfeaf6c6cd80762ba6bba310ef77443bf792eaadadef54795a877a8883"},
+ {file = "rigging-3.1.1-py3-none-any.whl", hash = "sha256:245a6d4886db3c1eb36e2cd24bc451e32c850532f177ac74e232449a5597bdcf"},
+ {file = "rigging-3.1.1.tar.gz", hash = "sha256:6e81295a67a73d70f79a9f904d0ba61cc863a41bf4fad4f3587238fa7a97a492"},
]
[package.dependencies]
boto3 = ">=1.35.0,<2.0.0"
boto3-stubs = {version = ">=1.35.0,<2.0.0", extras = ["s3"]}
colorama = ">=0.4.6,<0.5.0"
-elasticsearch = ">=8.13.2,<9.0.0"
-eval-type-backport = ">=0.2.0,<0.3.0"
jsonpath-ng = ">=1.7.0,<2.0.0"
jsonref = ">=1.1.0,<2.0.0"
-litellm = ">=1.60.0,<2.0.0"
+litellm = ">=1.67.2,<2.0.0"
logfire-api = ">=3.1.1,<4.0.0"
loguru = ">=0.7.2,<0.8.0"
+mcp = ">=1.5.0,<2.0.0"
pandas = ">=2.2.2,<3.0.0"
pydantic = ">=2.7.3,<3.0.0"
pydantic-xml = ">=2.11.0,<3.0.0"
@@ -3483,7 +3526,7 @@ ruamel-yaml = ">=0.18.10,<0.19.0"
xmltodict = ">=0.13.0,<0.14.0"
[package.extras]
-all = ["accelerate (>=0.30.1,<0.31.0)", "aiodocker (>=0.22.2,<0.23.0)", "asyncssh (>=2.14.2,<3.0.0)", "click (>=8.1.7,<9.0.0)", "httpx (>=0.27.0,<0.28.0)", "transformers (>=4.41.0,<5.0.0)", "vllm (>=0.5.0,<0.6.0)", "websockets (>=13.0,<14.0)"]
+all = ["accelerate (>=0.30.1,<0.31.0)", "aiodocker (>=0.22.2,<0.23.0)", "asyncssh (>=2.14.2,<3.0.0)", "click (>=8.1.7,<9.0.0)", "elasticsearch (>=8.13.2,<9.0.0)", "httpx (>=0.27.0,<0.28.0)", "transformers (>=4.41.0,<5.0.0)", "vllm (>=0.5.0,<0.6.0)", "websockets (>=13.0,<14.0)"]
examples = ["aiodocker (>=0.22.2,<0.23.0)", "asyncssh (>=2.14.2,<3.0.0)", "click (>=8.1.7,<9.0.0)", "httpx (>=0.27.0,<0.28.0)", "websockets (>=13.0,<14.0)"]
[[package]]
@@ -3492,7 +3535,7 @@ version = "0.24.0"
description = "Python bindings to Rust's persistent data structures (rpds)"
optional = false
python-versions = ">=3.9"
-groups = ["dev"]
+groups = ["main"]
files = [
{file = "rpds_py-0.24.0-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:006f4342fe729a368c6df36578d7a348c7c716be1da0a1a0f86e3021f8e98724"},
{file = "rpds_py-0.24.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:2d53747da70a4e4b17f559569d5f9506420966083a31c5fbd84e764461c4444b"},
@@ -3616,7 +3659,7 @@ version = "0.18.10"
description = "ruamel.yaml is a YAML parser/emitter that supports roundtrip preservation of comments, seq/map flow style, and map key order"
optional = false
python-versions = ">=3.7"
-groups = ["dev"]
+groups = ["main"]
files = [
{file = "ruamel.yaml-0.18.10-py3-none-any.whl", hash = "sha256:30f22513ab2301b3d2b577adc121c6471f28734d3d9728581245f1e76468b4f1"},
{file = "ruamel.yaml-0.18.10.tar.gz", hash = "sha256:20c86ab29ac2153f80a428e1254a8adf686d3383df04490514ca3b79a362db58"},
@@ -3635,7 +3678,7 @@ version = "0.2.12"
description = "C version of reader, parser and emitter for ruamel.yaml derived from libyaml"
optional = false
python-versions = ">=3.9"
-groups = ["dev"]
+groups = ["main"]
markers = "platform_python_implementation == \"CPython\" and python_version < \"3.13\""
files = [
{file = "ruamel.yaml.clib-0.2.12-cp310-cp310-macosx_13_0_arm64.whl", hash = "sha256:11f891336688faf5156a36293a9c362bdc7c88f03a8a027c2c1d8e0bcde998e5"},
@@ -3644,7 +3687,6 @@ files = [
{file = "ruamel.yaml.clib-0.2.12-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f66efbc1caa63c088dead1c4170d148eabc9b80d95fb75b6c92ac0aad2437d76"},
{file = "ruamel.yaml.clib-0.2.12-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:22353049ba4181685023b25b5b51a574bce33e7f51c759371a7422dcae5402a6"},
{file = "ruamel.yaml.clib-0.2.12-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:932205970b9f9991b34f55136be327501903f7c66830e9760a8ffb15b07f05cd"},
- {file = "ruamel.yaml.clib-0.2.12-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:a52d48f4e7bf9005e8f0a89209bf9a73f7190ddf0489eee5eb51377385f59f2a"},
{file = "ruamel.yaml.clib-0.2.12-cp310-cp310-win32.whl", hash = "sha256:3eac5a91891ceb88138c113f9db04f3cebdae277f5d44eaa3651a4f573e6a5da"},
{file = "ruamel.yaml.clib-0.2.12-cp310-cp310-win_amd64.whl", hash = "sha256:ab007f2f5a87bd08ab1499bdf96f3d5c6ad4dcfa364884cb4549aa0154b13a28"},
{file = "ruamel.yaml.clib-0.2.12-cp311-cp311-macosx_13_0_arm64.whl", hash = "sha256:4a6679521a58256a90b0d89e03992c15144c5f3858f40d7c18886023d7943db6"},
@@ -3653,7 +3695,6 @@ files = [
{file = "ruamel.yaml.clib-0.2.12-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:811ea1594b8a0fb466172c384267a4e5e367298af6b228931f273b111f17ef52"},
{file = "ruamel.yaml.clib-0.2.12-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:cf12567a7b565cbf65d438dec6cfbe2917d3c1bdddfce84a9930b7d35ea59642"},
{file = "ruamel.yaml.clib-0.2.12-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:7dd5adc8b930b12c8fc5b99e2d535a09889941aa0d0bd06f4749e9a9397c71d2"},
- {file = "ruamel.yaml.clib-0.2.12-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:1492a6051dab8d912fc2adeef0e8c72216b24d57bd896ea607cb90bb0c4981d3"},
{file = "ruamel.yaml.clib-0.2.12-cp311-cp311-win32.whl", hash = "sha256:bd0a08f0bab19093c54e18a14a10b4322e1eacc5217056f3c063bd2f59853ce4"},
{file = "ruamel.yaml.clib-0.2.12-cp311-cp311-win_amd64.whl", hash = "sha256:a274fb2cb086c7a3dea4322ec27f4cb5cc4b6298adb583ab0e211a4682f241eb"},
{file = "ruamel.yaml.clib-0.2.12-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:20b0f8dc160ba83b6dcc0e256846e1a02d044e13f7ea74a3d1d56ede4e48c632"},
@@ -3662,7 +3703,6 @@ files = [
{file = "ruamel.yaml.clib-0.2.12-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:749c16fcc4a2b09f28843cda5a193e0283e47454b63ec4b81eaa2242f50e4ccd"},
{file = "ruamel.yaml.clib-0.2.12-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:bf165fef1f223beae7333275156ab2022cffe255dcc51c27f066b4370da81e31"},
{file = "ruamel.yaml.clib-0.2.12-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:32621c177bbf782ca5a18ba4d7af0f1082a3f6e517ac2a18b3974d4edf349680"},
- {file = "ruamel.yaml.clib-0.2.12-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:b82a7c94a498853aa0b272fd5bc67f29008da798d4f93a2f9f289feb8426a58d"},
{file = "ruamel.yaml.clib-0.2.12-cp312-cp312-win32.whl", hash = "sha256:e8c4ebfcfd57177b572e2040777b8abc537cdef58a2120e830124946aa9b42c5"},
{file = "ruamel.yaml.clib-0.2.12-cp312-cp312-win_amd64.whl", hash = "sha256:0467c5965282c62203273b838ae77c0d29d7638c8a4e3a1c8bdd3602c10904e4"},
{file = "ruamel.yaml.clib-0.2.12-cp313-cp313-macosx_14_0_arm64.whl", hash = "sha256:4c8c5d82f50bb53986a5e02d1b3092b03622c02c2eb78e29bec33fd9593bae1a"},
@@ -3671,7 +3711,6 @@ files = [
{file = "ruamel.yaml.clib-0.2.12-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:96777d473c05ee3e5e3c3e999f5d23c6f4ec5b0c38c098b3a5229085f74236c6"},
{file = "ruamel.yaml.clib-0.2.12-cp313-cp313-musllinux_1_1_i686.whl", hash = "sha256:3bc2a80e6420ca8b7d3590791e2dfc709c88ab9152c00eeb511c9875ce5778bf"},
{file = "ruamel.yaml.clib-0.2.12-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:e188d2699864c11c36cdfdada94d781fd5d6b0071cd9c427bceb08ad3d7c70e1"},
- {file = "ruamel.yaml.clib-0.2.12-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:4f6f3eac23941b32afccc23081e1f50612bdbe4e982012ef4f5797986828cd01"},
{file = "ruamel.yaml.clib-0.2.12-cp313-cp313-win32.whl", hash = "sha256:6442cb36270b3afb1b4951f060eccca1ce49f3d087ca1ca4563a6eb479cb3de6"},
{file = "ruamel.yaml.clib-0.2.12-cp313-cp313-win_amd64.whl", hash = "sha256:e5b8daf27af0b90da7bb903a876477a9e6d7270be6146906b276605997c7e9a3"},
{file = "ruamel.yaml.clib-0.2.12-cp39-cp39-macosx_12_0_arm64.whl", hash = "sha256:fc4b630cd3fa2cf7fce38afa91d7cfe844a9f75d7f0f36393fa98815e911d987"},
@@ -3680,7 +3719,6 @@ files = [
{file = "ruamel.yaml.clib-0.2.12-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e2f1c3765db32be59d18ab3953f43ab62a761327aafc1594a2a1fbe038b8b8a7"},
{file = "ruamel.yaml.clib-0.2.12-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:d85252669dc32f98ebcd5d36768f5d4faeaeaa2d655ac0473be490ecdae3c285"},
{file = "ruamel.yaml.clib-0.2.12-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:e143ada795c341b56de9418c58d028989093ee611aa27ffb9b7f609c00d813ed"},
- {file = "ruamel.yaml.clib-0.2.12-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:2c59aa6170b990d8d2719323e628aaf36f3bfbc1c26279c0eeeb24d05d2d11c7"},
{file = "ruamel.yaml.clib-0.2.12-cp39-cp39-win32.whl", hash = "sha256:beffaed67936fbbeffd10966a4eb53c402fafd3d6833770516bf7314bc6ffa12"},
{file = "ruamel.yaml.clib-0.2.12-cp39-cp39-win_amd64.whl", hash = "sha256:040ae85536960525ea62868b642bdb0c2cc6021c9f9d507810c0c604e66f5a7b"},
{file = "ruamel.yaml.clib-0.2.12.tar.gz", hash = "sha256:6c8fbb13ec503f99a91901ab46e0b07ae7941cd527393187039aec586fdfd36f"},
@@ -3736,7 +3774,7 @@ version = "0.12.0"
description = "An Amazon S3 Transfer Manager"
optional = false
python-versions = ">=3.9"
-groups = ["dev"]
+groups = ["main"]
files = [
{file = "s3transfer-0.12.0-py3-none-any.whl", hash = "sha256:35b314d7d82865756edab59f7baebc6b477189e6ab4c53050e28c1de4d9cce18"},
{file = "s3transfer-0.12.0.tar.gz", hash = "sha256:8ac58bc1989a3fdb7c7f3ee0918a66b160d038a147c7b5db1500930a607e9a1c"},
@@ -3817,7 +3855,7 @@ version = "1.3.1"
description = "Sniff out which async library your code is running under"
optional = false
python-versions = ">=3.7"
-groups = ["main", "dev"]
+groups = ["main"]
files = [
{file = "sniffio-1.3.1-py3-none-any.whl", hash = "sha256:2f6da418d1f1e0fddd844478f41680e794e6051915791a034ff65e5f100525a2"},
{file = "sniffio-1.3.1.tar.gz", hash = "sha256:f4324edc670a0f49750a81b895f35c3adb843cca46f0530f79fc1babb23789dc"},
@@ -3846,13 +3884,53 @@ files = [
cffi = ">=1.0"
numpy = "*"
+[[package]]
+name = "sse-starlette"
+version = "2.4.1"
+description = "SSE plugin for Starlette"
+optional = false
+python-versions = ">=3.9"
+groups = ["main"]
+files = [
+ {file = "sse_starlette-2.4.1-py3-none-any.whl", hash = "sha256:08b77ea898ab1a13a428b2b6f73cfe6d0e607a7b4e15b9bb23e4a37b087fd39a"},
+ {file = "sse_starlette-2.4.1.tar.gz", hash = "sha256:7c8a800a1ca343e9165fc06bbda45c78e4c6166320707ae30b416c42da070926"},
+]
+
+[package.dependencies]
+anyio = ">=4.7.0"
+
+[package.extras]
+daphne = ["daphne (>=4.2.0)"]
+examples = ["aiosqlite (>=0.21.0)", "fastapi (>=0.115.12)", "sqlalchemy[asyncio,examples] (>=2.0.41)", "starlette (>=0.41.3)", "uvicorn (>=0.34.0)"]
+granian = ["granian (>=2.3.1)"]
+uvicorn = ["uvicorn (>=0.34.0)"]
+
+[[package]]
+name = "starlette"
+version = "0.47.2"
+description = "The little ASGI library that shines."
+optional = false
+python-versions = ">=3.9"
+groups = ["main"]
+files = [
+ {file = "starlette-0.47.2-py3-none-any.whl", hash = "sha256:c5847e96134e5c5371ee9fac6fdf1a67336d5815e09eb2a01fdb57a351ef915b"},
+ {file = "starlette-0.47.2.tar.gz", hash = "sha256:6ae9aa5db235e4846decc1e7b79c4f346adf41e9777aebeb49dfd09bbd7023d8"},
+]
+
+[package.dependencies]
+anyio = ">=3.6.2,<5"
+typing-extensions = {version = ">=4.10.0", markers = "python_version < \"3.13\""}
+
+[package.extras]
+full = ["httpx (>=0.27.0,<0.29.0)", "itsdangerous", "jinja2", "python-multipart (>=0.0.18)", "pyyaml"]
+
[[package]]
name = "tiktoken"
version = "0.9.0"
description = "tiktoken is a fast BPE tokeniser for use with OpenAI's models"
optional = false
python-versions = ">=3.9"
-groups = ["dev"]
+groups = ["main"]
files = [
{file = "tiktoken-0.9.0-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:586c16358138b96ea804c034b8acf3f5d3f0258bd2bc3b0227af4af5d622e382"},
{file = "tiktoken-0.9.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:d9c59ccc528c6c5dd51820b3474402f69d9a9e1d656226848ad68a8d5b2e5108"},
@@ -3900,7 +3978,7 @@ version = "0.21.1"
description = ""
optional = false
python-versions = ">=3.9"
-groups = ["main", "dev"]
+groups = ["main"]
files = [
{file = "tokenizers-0.21.1-cp39-abi3-macosx_10_12_x86_64.whl", hash = "sha256:e78e413e9e668ad790a29456e677d9d3aa50a9ad311a40905d6861ba7692cf41"},
{file = "tokenizers-0.21.1-cp39-abi3-macosx_11_0_arm64.whl", hash = "sha256:cd51cd0a91ecc801633829fcd1fda9cf8682ed3477c6243b9a095539de4aecf3"},
@@ -3918,7 +3996,6 @@ files = [
{file = "tokenizers-0.21.1-cp39-abi3-win_amd64.whl", hash = "sha256:0f0dcbcc9f6e13e675a66d7a5f2f225a736745ce484c1a4e07476a89ccdad382"},
{file = "tokenizers-0.21.1.tar.gz", hash = "sha256:a1bb04dc5b448985f86ecd4b05407f5a8d97cb2c0532199b2a302a604a0165ab"},
]
-markers = {main = "extra == \"training\""}
[package.dependencies]
huggingface-hub = ">=0.16.4,<1.0"
@@ -3935,7 +4012,7 @@ description = "A lil' TOML parser"
optional = false
python-versions = ">=3.8"
groups = ["main", "dev"]
-markers = "python_version == \"3.10\""
+markers = "python_version < \"3.11\""
files = [
{file = "tomli-2.2.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:678e4fa69e4575eb77d103de3df8a895e1591b48e740211bd1067378c69e8249"},
{file = "tomli-2.2.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:023aa114dd824ade0100497eb2318602af309e5a55595f76b626d6d9f3b7b0a6"},
@@ -3982,7 +4059,6 @@ files = [
{file = "tqdm-4.67.1-py3-none-any.whl", hash = "sha256:26445eca388f82e72884e0d580d5464cd801a3ea01e63e5601bdff9ba6a48de2"},
{file = "tqdm-4.67.1.tar.gz", hash = "sha256:f8aef9c52c08c13a65f30ea34f4e5aac3fd1a34959879d7e59e63027286627f2"},
]
-markers = {main = "extra == \"training\" or extra == \"multimodal\""}
[package.dependencies]
colorama = {version = "*", markers = "platform_system == \"Windows\""}
@@ -4092,7 +4168,7 @@ version = "0.27.1"
description = "Type annotations and code completion for awscrt"
optional = false
python-versions = ">=3.8"
-groups = ["dev"]
+groups = ["main"]
files = [
{file = "types_awscrt-0.27.1-py3-none-any.whl", hash = "sha256:e86b83d0fd8c770f985b8c458c28e232dae9adee0689d0a9671868a8bf397b0a"},
{file = "types_awscrt-0.27.1.tar.gz", hash = "sha256:3c2bee52ee45022daaf4f106d5d1b5f0ff0a8e3e6093dda65f5315b7669bc418"},
@@ -4143,7 +4219,7 @@ version = "0.12.0"
description = "Type annotations and code completion for s3transfer"
optional = false
python-versions = ">=3.8"
-groups = ["dev"]
+groups = ["main"]
files = [
{file = "types_s3transfer-0.12.0-py3-none-any.whl", hash = "sha256:101bbc5b7f00b71512374df881f480fc6bf63c948b5098ab024bf3370fbfb0e8"},
{file = "types_s3transfer-0.12.0.tar.gz", hash = "sha256:f8f59201481e904362873bf0be3267f259d60ad946ebdfcb847d092a1fa26f98"},
@@ -4167,7 +4243,7 @@ version = "0.4.0"
description = "Runtime typing introspection tools"
optional = false
python-versions = ">=3.9"
-groups = ["main", "dev"]
+groups = ["main"]
files = [
{file = "typing_inspection-0.4.0-py3-none-any.whl", hash = "sha256:50e72559fcd2a6367a19f7a7e610e6afcb9fac940c650290eed893d61386832f"},
{file = "typing_inspection-0.4.0.tar.gz", hash = "sha256:9765c87de36671694a67904bf2c96e395be9c6439bb6c87b5142569dcdd65122"},
@@ -4206,6 +4282,27 @@ h2 = ["h2 (>=4,<5)"]
socks = ["pysocks (>=1.5.6,!=1.5.7,<2.0)"]
zstd = ["zstandard (>=0.18.0)"]
+[[package]]
+name = "uvicorn"
+version = "0.35.0"
+description = "The lightning-fast ASGI server."
+optional = false
+python-versions = ">=3.9"
+groups = ["main"]
+markers = "sys_platform != \"emscripten\""
+files = [
+ {file = "uvicorn-0.35.0-py3-none-any.whl", hash = "sha256:197535216b25ff9b785e29a0b79199f55222193d47f820816e7da751e9bc8d4a"},
+ {file = "uvicorn-0.35.0.tar.gz", hash = "sha256:bc662f087f7cf2ce11a1d7fd70b90c9f98ef2e2831556dd078d131b96cc94a01"},
+]
+
+[package.dependencies]
+click = ">=7.0"
+h11 = ">=0.8"
+typing-extensions = {version = ">=4.0", markers = "python_version < \"3.11\""}
+
+[package.extras]
+standard = ["colorama (>=0.4) ; sys_platform == \"win32\"", "httptools (>=0.6.3)", "python-dotenv (>=0.13)", "pyyaml (>=5.1)", "uvloop (>=0.15.1) ; sys_platform != \"win32\" and sys_platform != \"cygwin\" and platform_python_implementation != \"PyPy\"", "watchfiles (>=0.13)", "websockets (>=10.4)"]
+
[[package]]
name = "virtualenv"
version = "20.31.2"
@@ -4233,7 +4330,7 @@ version = "1.2.0"
description = "A small Python utility to set file creation time on Windows"
optional = false
python-versions = ">=3.5"
-groups = ["dev"]
+groups = ["main"]
markers = "sys_platform == \"win32\""
files = [
{file = "win32_setctime-1.2.0-py3-none-any.whl", hash = "sha256:95d644c4e708aba81dc3704a116d8cbc974d70b3bdb8be1d150e36be6e9d1390"},
@@ -4338,7 +4435,7 @@ version = "0.13.0"
description = "Makes working with XML feel like you are working with JSON"
optional = false
python-versions = ">=3.4"
-groups = ["dev"]
+groups = ["main"]
files = [
{file = "xmltodict-0.13.0-py2.py3-none-any.whl", hash = "sha256:aa89e8fd76320154a40d19a0df04a4695fb9dc5ba977cbb68ab3e4eb225e7852"},
{file = "xmltodict-0.13.0.tar.gz", hash = "sha256:341595a488e3e01a85a9d8911d8912fd922ede5fecc4dce437eb4b6c8d037e56"},
@@ -4483,7 +4580,7 @@ version = "1.20.0"
description = "Yet another URL library"
optional = false
python-versions = ">=3.9"
-groups = ["dev"]
+groups = ["main", "dev"]
files = [
{file = "yarl-1.20.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:f1f6670b9ae3daedb325fa55fbe31c22c8228f6e0b513772c2e1c623caa6ab22"},
{file = "yarl-1.20.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:85a231fa250dfa3308f3c7896cc007a47bc76e9e8e8595c20b7426cac4884c62"},
@@ -4602,7 +4699,7 @@ version = "3.21.0"
description = "Backport of pathlib-compatible object wrapper for zip files"
optional = false
python-versions = ">=3.9"
-groups = ["main", "dev"]
+groups = ["main"]
files = [
{file = "zipp-3.21.0-py3-none-any.whl", hash = "sha256:ac1bbe05fd2991f160ebce24ffbac5f6d11d83dc90891255885223d42b3cd931"},
{file = "zipp-3.21.0.tar.gz", hash = "sha256:2c9958f6430a2040341a52eb608ed6dd93ef4392e02ffe219417c1b28b5dd1f4"},
@@ -4624,4 +4721,4 @@ training = ["transformers"]
[metadata]
lock-version = "2.1"
python-versions = ">=3.10,<3.14"
-content-hash = "21fe5cf29eefa6f77e8bb811529fa19adff4f32d8e64f13432402631c4d3808f"
+content-hash = "d1fa3ef52a831c079d8ecb5de986b244a3d39608b07bbb1dde40d6cf4b6e2956"
diff --git a/pyproject.toml b/pyproject.toml
index f7359063..d9c42299 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -23,6 +23,7 @@ pandas = "^2.2.3"
fsspec = { version = ">=2023.1.0,<=2025.3.0", extras = [
"s3",
] } # Pinned for datasets compatibility
+rigging = "^3.1.1"
transformers = { version = "^4.41.0", optional = true }
soundfile = { version = "^0.13.1", optional = true }
@@ -43,7 +44,6 @@ pytest-asyncio = "^0.26.0"
types-protobuf = "^5.29.1.20250208"
pandas-stubs = "^2.2.3.250308"
types-requests = "^2.32.0.20250306"
-rigging = "^2.3.0"
typer = "^0.15.2"
datasets = "^3.5.0"
pyarrow = "^19.0.1"