Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
82 changes: 59 additions & 23 deletions docs/sdk/api.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -170,7 +170,6 @@ def export_metrics(
Returns:
A DataFrame containing the exported metric data.
"""
import pandas as pd

response = self.request(
"GET",
Expand Down Expand Up @@ -265,7 +264,6 @@ def export_parameters(
Returns:
A DataFrame containing the exported parameter data.
"""
import pandas as pd

response = self.request(
"GET",
Expand Down Expand Up @@ -294,36 +292,48 @@ export_runs(
filter: str | None = None,
status: StatusFilter = "completed",
aggregations: list[MetricAggregationType] | None = None,
) -> pd.DataFrame
format: ExportFormat = "parquet",
base_dir: str | None = None,
) -> str
```

Exports run data for a specific project.
Export runs using pagination - always writes to disk.

**Parameters:**

* **`project`**
(`str`)
–The project identifier.
–The project identifier
* **`filter`**
(`str | None`, default:
`None`
)
–A filter to apply to the exported data. Defaults to None.
–A filter to apply to the exported data
* **`status`**
(`StatusFilter`, default:
`'completed'`
)
–The status of runs to include. Defaults to "completed".
–The status of runs to include
* **`aggregations`**
(`list[MetricAggregationType] | None`, default:
`None`
)
–A list of aggregation types to apply. Defaults to None.
–A list of aggregation types to apply
* **`format`**
(`ExportFormat`, default:
`'parquet'`
)
–Output format - "parquet", "csv", "json", "jsonl"
* **`base_dir`**
(`str | None`, default:
`None`
)
–Base directory for export (defaults to "./strikes-data")

**Returns:**

* `DataFrame`
–A DataFrame containing the exported run data.
* **`str`** ( `str`
) –Path to the export directory

<Accordion title="Source code in dreadnode/api/client.py" icon="code">
```python
Expand All @@ -332,35 +342,61 @@ def export_runs(
project: str,
*,
filter: str | None = None,
# format: ExportFormat = "parquet",
status: StatusFilter = "completed",
aggregations: list[MetricAggregationType] | None = None,
) -> "pd.DataFrame":
format: ExportFormat = "parquet",
base_dir: str | None = None,
) -> str:
"""
Exports run data for a specific project.
Export runs using pagination - always writes to disk.

Args:
project: The project identifier.
filter: A filter to apply to the exported data. Defaults to None.
status: The status of runs to include. Defaults to "completed".
aggregations: A list of aggregation types to apply. Defaults to None.
project: The project identifier
filter: A filter to apply to the exported data
status: The status of runs to include
aggregations: A list of aggregation types to apply
format: Output format - "parquet", "csv", "json", "jsonl"
base_dir: Base directory for export (defaults to "./strikes-data")

Returns:
A DataFrame containing the exported run data.
str: Path to the export directory
"""
import pandas as pd

response = self.request(
logger.info(f"Starting paginated export for project '{project}', format='{format}'")

page = 1
first_response = self.request(
"GET",
f"/strikes/projects/{project!s}/export",
f"/strikes/projects/{project!s}/export/paginated",
params={
"format": "parquet",
"page": page,
"status": status,
**({"filter": filter} if filter else {}),
**({"aggregations": aggregations} if aggregations else {}),
},
)
return pd.read_parquet(io.BytesIO(response.content))

if not first_response.content:
logger.info("No data found")

first_chunk = pd.read_parquet(io.BytesIO(first_response.content))

total_runs = int(first_response.headers.get("x-total", "0"))
has_more = first_response.headers.get("x-has-more", "false") == "true"

logger.info(f"Total runs: {total_runs}, Has more: {has_more}")

logger.info(f"Writing {total_runs} runs to disk")
return self._export_to_disk(
project,
first_chunk,
dict(first_response.headers),
filter,
status,
aggregations,
format,
str(base_dir) if base_dir else None,
)
```


Expand Down
2 changes: 1 addition & 1 deletion docs/sdk/data_types.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -643,7 +643,7 @@ def to_serializable(self) -> tuple[bytes, dict[str, t.Any]]:
Returns:
A tuple of (video_bytes, metadata_dict)
"""
import numpy as np # type: ignore[import,unused-ignore]
import numpy as np # type: ignore[import,unused-ignore] # noqa: PLC0415

try:
from moviepy.video.VideoClip import ( # type: ignore[import,unused-ignore,import-untyped]
Expand Down
2 changes: 1 addition & 1 deletion docs/sdk/main.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -972,7 +972,7 @@ with dreadnode.run("my_run"):
def log_metric(
self,
name: str,
value: float | bool | Metric,
value: float | bool | Metric, # noqa: FBT001
*,
step: int = 0,
origin: t.Any | None = None,
Expand Down
10 changes: 5 additions & 5 deletions docs/sdk/scorers.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -1941,7 +1941,7 @@ def zero_shot_classification(
)

try:
from transformers import ( # type: ignore [attr-defined,import-not-found,unused-ignore]
from transformers import ( # type: ignore [attr-defined,import-not-found,unused-ignore] # noqa: PLC0415
pipeline,
)
except ImportError:
Expand Down Expand Up @@ -2661,7 +2661,7 @@ def detect_harm_with_openai(
model: The moderation model to use.
name: Name of the scorer.
"""
import openai
import openai # noqa: PLC0415

async def evaluate(
data: t.Any, *, api_key: str | None = Config(api_key), model: str = Config(model)
Expand Down Expand Up @@ -3373,7 +3373,7 @@ def detect_pii_with_presidio(
)

try:
import presidio_analyzer # type: ignore[import-not-found,unused-ignore] # noqa: F401
import presidio_analyzer # type: ignore[import-not-found,unused-ignore] # noqa: F401, PLC0415
except ImportError:
warn_at_user_stacklevel(presidio_import_error_msg, UserWarning)

Expand Down Expand Up @@ -3589,7 +3589,7 @@ def wrap_chat(
"""

async def evaluate(chat: "Chat") -> Metric:
from rigging.chat import Chat
from rigging.chat import Chat # noqa: PLC0415

# Fall through to the inner scorer if chat is not a Chat instance
if not isinstance(chat, Chat):
Expand Down Expand Up @@ -4215,7 +4215,7 @@ def similarity_with_litellm(
or self-hosted models.
name: Name of the scorer.
"""
import litellm
import litellm # noqa: PLC0415

async def evaluate(
data: t.Any,
Expand Down
Loading
Loading