-
Notifications
You must be signed in to change notification settings - Fork 3
chore: Add more benchmarks #146
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
Merged
Changes from all commits
Commits
Show all changes
2 commits
Select commit
Hold shift + click to select a range
File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,38 @@ | ||
| """Benchmarks for ID generation. | ||
|
|
||
| get_span_id and get_trace_id are called on every span creation, so their | ||
| cost accumulates in high-throughput tracing workloads. This module | ||
| compares the two generators: UUIDGenerator (default) and OTELIDGenerator | ||
| (enabled via BRAINTRUST_OTEL_COMPAT=true). | ||
| """ | ||
|
|
||
| import pathlib | ||
| import sys | ||
|
|
||
| import pyperf | ||
|
|
||
|
|
||
| if __package__ in (None, ""): | ||
| sys.path.insert(0, str(pathlib.Path(__file__).resolve().parents[2])) | ||
|
|
||
| from braintrust.id_gen import OTELIDGenerator, UUIDGenerator | ||
|
|
||
| from benchmarks._utils import disable_pyperf_psutil | ||
|
|
||
|
|
||
| def main(runner: pyperf.Runner | None = None) -> None: | ||
| if runner is None: | ||
| disable_pyperf_psutil() | ||
| runner = pyperf.Runner() | ||
|
|
||
| uuid_gen = UUIDGenerator() | ||
| otel_gen = OTELIDGenerator() | ||
|
|
||
| runner.bench_func("id_gen.uuid.span_id", uuid_gen.get_span_id) | ||
| runner.bench_func("id_gen.uuid.trace_id", uuid_gen.get_trace_id) | ||
| runner.bench_func("id_gen.otel.span_id", otel_gen.get_span_id) | ||
| runner.bench_func("id_gen.otel.trace_id", otel_gen.get_trace_id) | ||
|
|
||
|
|
||
| if __name__ == "__main__": | ||
| main() |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,104 @@ | ||
| """Benchmarks for merge_dicts and merge_dicts_with_paths. | ||
|
|
||
| merge_dicts is called on every span log update and during row merging, | ||
| making it one of the most frequently executed SDK functions. | ||
|
|
||
| Note: merge_dicts mutates merge_into, so each benchmark wrapper creates a | ||
| fresh copy of the target dict before calling. This means each bench_func | ||
| measures a shallow/deep copy plus the merge itself — the copy cost is | ||
| intentionally kept proportional to the input size so relative comparisons | ||
| remain valid. | ||
| """ | ||
|
|
||
| import copy | ||
| import pathlib | ||
| import sys | ||
| from typing import Any | ||
|
|
||
| import pyperf | ||
|
|
||
|
|
||
| if __package__ in (None, ""): | ||
| sys.path.insert(0, str(pathlib.Path(__file__).resolve().parents[2])) | ||
|
|
||
| from braintrust.util import merge_dicts | ||
|
|
||
| from benchmarks._utils import disable_pyperf_psutil | ||
| from benchmarks.fixtures import make_large_payload, make_medium_payload, make_small_payload | ||
|
|
||
|
|
||
| # Updates are pre-built once; only merge_into is copied per iteration. | ||
| _SMALL_UPDATE: dict[str, Any] = { | ||
| "metadata": {"extra_key": "extra_value"}, | ||
| "scores": {"relevance": 0.8}, | ||
| "tags": ["new_tag"], | ||
| } | ||
|
|
||
| _MEDIUM_UPDATE: dict[str, Any] = { | ||
| "metadata": {"workspace_id": "workspace-789", "new_flag": True}, | ||
| "metrics": {"cached_tokens": 64}, | ||
| "tags": ["updated", "benchmark"], | ||
| } | ||
|
|
||
| _LARGE_UPDATE: dict[str, Any] = { | ||
| "metadata": {"routing": {"tier": "standard"}, "extra": "value"}, | ||
| "metrics": {"cached_tokens": 512}, | ||
| "tags": ["updated"], | ||
| "output": {"summary": "revised"}, | ||
| } | ||
|
|
||
| # Pre-built base payloads (copied per iteration, not mutated at module level). | ||
| _SMALL_BASE = make_small_payload() | ||
| _MEDIUM_BASE = make_medium_payload() | ||
| _LARGE_BASE = make_large_payload() | ||
|
|
||
| _NESTED_BASE: dict[str, Any] = { | ||
| "a": {"b": {"c": {"d": 1, "e": 2}, "f": 3}, "g": 4}, | ||
| "h": {"i": {"j": {"k": 5}}}, | ||
| } | ||
| _NESTED_UPDATE: dict[str, Any] = { | ||
| "a": {"b": {"c": {"d": 99}, "new": "value"}, "g": 99}, | ||
| "h": {"i": {"j": {"new_key": "hello"}}}, | ||
| } | ||
|
|
||
| # Tags set-union: top-level "tags" field uses set-union semantics in merge_dicts. | ||
| _TAGS_UPDATE: dict[str, Any] = {"tags": ["c", "d", "e"]} | ||
|
|
||
|
|
||
| def _bench_small() -> None: | ||
| merge_dicts(dict(_SMALL_BASE), _SMALL_UPDATE) | ||
|
|
||
|
|
||
| def _bench_medium() -> None: | ||
| # Shallow copy is enough: _MEDIUM_UPDATE only touches top-level dict values. | ||
| merge_dicts(dict(_MEDIUM_BASE), _MEDIUM_UPDATE) | ||
|
|
||
|
|
||
| def _bench_large() -> None: | ||
| merge_dicts(dict(_LARGE_BASE), _LARGE_UPDATE) | ||
|
|
||
|
|
||
| def _bench_nested() -> None: | ||
| # Deep copy required because the update recurses into nested dicts. | ||
| merge_dicts(copy.deepcopy(_NESTED_BASE), _NESTED_UPDATE) | ||
|
|
||
|
|
||
| def _bench_tags_union() -> None: | ||
| # Tags list grows on each call, so start from a fresh copy every time. | ||
| merge_dicts({"tags": ["a", "b"], "value": 1}, _TAGS_UPDATE) | ||
|
|
||
|
|
||
| def main(runner: pyperf.Runner | None = None) -> None: | ||
| if runner is None: | ||
| disable_pyperf_psutil() | ||
| runner = pyperf.Runner() | ||
|
|
||
| runner.bench_func("merge_dicts[small]", _bench_small) | ||
| runner.bench_func("merge_dicts[medium]", _bench_medium) | ||
| runner.bench_func("merge_dicts[large]", _bench_large) | ||
| runner.bench_func("merge_dicts[nested-deep]", _bench_nested) | ||
| runner.bench_func("merge_dicts[tags-union]", _bench_tags_union) | ||
|
|
||
|
|
||
| if __name__ == "__main__": | ||
| main() |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,148 @@ | ||
| """Benchmarks for merge_row_batch and batch_items. | ||
|
|
||
| merge_row_batch is called before every flush to the Braintrust API to | ||
| de-duplicate and merge rows in a pending batch. batch_items is used to | ||
| split the resulting rows into API-request-sized chunks. | ||
|
|
||
| Both functions mutate their inputs, so each benchmark wrapper builds fresh | ||
| row lists per iteration. | ||
| """ | ||
|
|
||
| import pathlib | ||
| import sys | ||
|
|
||
| import pyperf | ||
|
|
||
|
|
||
| if __package__ in (None, ""): | ||
| sys.path.insert(0, str(pathlib.Path(__file__).resolve().parents[2])) | ||
|
|
||
| from braintrust.db_fields import IS_MERGE_FIELD | ||
| from braintrust.merge_row_batch import batch_items, merge_row_batch | ||
|
|
||
| from benchmarks._utils import disable_pyperf_psutil | ||
|
|
||
|
|
||
| # --------------------------------------------------------------------------- | ||
| # Row factories — called inside each benchmark wrapper to get fresh dicts. | ||
| # --------------------------------------------------------------------------- | ||
|
|
||
|
|
||
| def _unique_rows(n: int) -> list[dict]: | ||
| """n rows, all distinct IDs — no merging needed.""" | ||
| return [{"id": f"row-{i}", "project_id": "proj-1", "value": i} for i in range(n)] | ||
|
|
||
|
|
||
| def _merge_rows(n: int) -> list[dict]: | ||
| """n rows forming n//2 pairs: first is a base, second is an IS_MERGE update.""" | ||
| rows = [] | ||
| for i in range(n // 2): | ||
| rows.append({"id": f"row-{i}", "project_id": "proj-1", "payload": {"a": i}}) | ||
| rows.append( | ||
| { | ||
| "id": f"row-{i}", | ||
| "project_id": "proj-1", | ||
| "payload": {"b": i + 100}, | ||
| IS_MERGE_FIELD: True, | ||
| } | ||
| ) | ||
| return rows | ||
|
|
||
|
|
||
| def _mixed_rows(n: int) -> list[dict]: | ||
| """Mix of unique rows and merge pairs (roughly half each).""" | ||
| rows = [] | ||
| for i in range(n // 4): | ||
| # pair that will be merged | ||
| rows.append({"id": f"merge-{i}", "project_id": "proj-1", "payload": {"a": i}}) | ||
| rows.append( | ||
| { | ||
| "id": f"merge-{i}", | ||
| "project_id": "proj-1", | ||
| "payload": {"b": i + 100}, | ||
| IS_MERGE_FIELD: True, | ||
| } | ||
| ) | ||
| for i in range(n // 2): | ||
| rows.append({"id": f"unique-{i}", "project_id": "proj-1", "value": i}) | ||
| return rows | ||
|
|
||
|
|
||
| # --------------------------------------------------------------------------- | ||
| # Benchmark wrappers | ||
| # --------------------------------------------------------------------------- | ||
|
|
||
| _SMALL_N = 10 | ||
| _MEDIUM_N = 50 | ||
| _LARGE_N = 200 | ||
|
|
||
|
|
||
| def _bench_no_conflict_small() -> None: | ||
| merge_row_batch(_unique_rows(_SMALL_N)) | ||
|
|
||
|
|
||
| def _bench_no_conflict_medium() -> None: | ||
| merge_row_batch(_unique_rows(_MEDIUM_N)) | ||
|
|
||
|
|
||
| def _bench_no_conflict_large() -> None: | ||
| merge_row_batch(_unique_rows(_LARGE_N)) | ||
|
|
||
|
|
||
| def _bench_all_merge_small() -> None: | ||
| merge_row_batch(_merge_rows(_SMALL_N)) | ||
|
|
||
|
|
||
| def _bench_all_merge_medium() -> None: | ||
| merge_row_batch(_merge_rows(_MEDIUM_N)) | ||
|
|
||
|
|
||
| def _bench_mixed_medium() -> None: | ||
| merge_row_batch(_mixed_rows(_MEDIUM_N)) | ||
|
|
||
|
|
||
| # batch_items: split a list of strings by item-count and byte-count limits. | ||
| _BATCH_STRINGS = [f"item-payload-{i:04d}" * 4 for i in range(200)] | ||
| _ITEM_SIZE = len(_BATCH_STRINGS[0].encode()) | ||
|
|
||
|
|
||
| def _bench_batch_items_count_limit() -> None: | ||
| batch_items(_BATCH_STRINGS, batch_max_num_items=20) | ||
|
|
||
|
|
||
| def _bench_batch_items_byte_limit() -> None: | ||
| batch_items( | ||
| _BATCH_STRINGS, | ||
| batch_max_num_bytes=_ITEM_SIZE * 15, | ||
| get_byte_size=lambda s: len(s.encode()), | ||
| ) | ||
|
|
||
|
|
||
| def _bench_batch_items_both_limits() -> None: | ||
| batch_items( | ||
| _BATCH_STRINGS, | ||
| batch_max_num_items=20, | ||
| batch_max_num_bytes=_ITEM_SIZE * 15, | ||
| get_byte_size=lambda s: len(s.encode()), | ||
| ) | ||
|
|
||
|
|
||
| def main(runner: pyperf.Runner | None = None) -> None: | ||
| if runner is None: | ||
| disable_pyperf_psutil() | ||
| runner = pyperf.Runner() | ||
|
|
||
| runner.bench_func("merge_row_batch[no-conflict-small]", _bench_no_conflict_small) | ||
| runner.bench_func("merge_row_batch[no-conflict-medium]", _bench_no_conflict_medium) | ||
| runner.bench_func("merge_row_batch[no-conflict-large]", _bench_no_conflict_large) | ||
| runner.bench_func("merge_row_batch[all-merge-small]", _bench_all_merge_small) | ||
| runner.bench_func("merge_row_batch[all-merge-medium]", _bench_all_merge_medium) | ||
| runner.bench_func("merge_row_batch[mixed-medium]", _bench_mixed_medium) | ||
|
|
||
| runner.bench_func("batch_items[count-limit]", _bench_batch_items_count_limit) | ||
| runner.bench_func("batch_items[byte-limit]", _bench_batch_items_byte_limit) | ||
| runner.bench_func("batch_items[both-limits]", _bench_batch_items_both_limits) | ||
|
|
||
|
|
||
| if __name__ == "__main__": | ||
| main() | ||
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,73 @@ | ||
| """Benchmarks for SpanComponentsV3 and SpanComponentsV4 encode/decode. | ||
|
|
||
| These are on the hot path: every span serializes/deserializes parent context. | ||
| """ | ||
|
|
||
| import pathlib | ||
| import secrets | ||
| import sys | ||
| import uuid | ||
|
|
||
| import pyperf | ||
|
|
||
|
|
||
| if __package__ in (None, ""): | ||
| sys.path.insert(0, str(pathlib.Path(__file__).resolve().parents[2])) | ||
|
|
||
| from braintrust.span_identifier_v3 import SpanComponentsV3, SpanObjectTypeV3 | ||
| from braintrust.span_identifier_v4 import SpanComponentsV4 | ||
|
|
||
| from benchmarks._utils import disable_pyperf_psutil | ||
|
|
||
|
|
||
| def main(runner: pyperf.Runner | None = None) -> None: | ||
| if runner is None: | ||
| disable_pyperf_psutil() | ||
| runner = pyperf.Runner() | ||
|
|
||
| # V3 — UUID-based IDs | ||
| v3_obj_only = SpanComponentsV3( | ||
| object_type=SpanObjectTypeV3.PROJECT_LOGS, | ||
| object_id=str(uuid.uuid4()), | ||
| ) | ||
| v3_full = SpanComponentsV3( | ||
| object_type=SpanObjectTypeV3.EXPERIMENT, | ||
| object_id=str(uuid.uuid4()), | ||
| row_id=str(uuid.uuid4()), | ||
| span_id=str(uuid.uuid4()), | ||
| root_span_id=str(uuid.uuid4()), | ||
| ) | ||
| v3_obj_only_str = v3_obj_only.to_str() | ||
| v3_full_str = v3_full.to_str() | ||
|
|
||
| runner.bench_func("span_components.v3.to_str[object-only]", v3_obj_only.to_str) | ||
| runner.bench_func("span_components.v3.to_str[full-uuid]", v3_full.to_str) | ||
| runner.bench_func("span_components.v3.from_str[object-only]", SpanComponentsV3.from_str, v3_obj_only_str) | ||
| runner.bench_func("span_components.v3.from_str[full-uuid]", SpanComponentsV3.from_str, v3_full_str) | ||
|
|
||
| # V4 — OTEL hex IDs for span_id (8-byte) and root_span_id (16-byte) | ||
| v4_obj_only = SpanComponentsV4( | ||
| object_type=SpanObjectTypeV3.PROJECT_LOGS, | ||
| object_id=str(uuid.uuid4()), | ||
| ) | ||
| v4_full_otel = SpanComponentsV4( | ||
| object_type=SpanObjectTypeV3.EXPERIMENT, | ||
| object_id=str(uuid.uuid4()), | ||
| row_id=str(uuid.uuid4()), | ||
| span_id=secrets.token_hex(8), | ||
| root_span_id=secrets.token_hex(16), | ||
| ) | ||
| v4_obj_only_str = v4_obj_only.to_str() | ||
| v4_full_otel_str = v4_full_otel.to_str() | ||
|
|
||
| runner.bench_func("span_components.v4.to_str[object-only]", v4_obj_only.to_str) | ||
| runner.bench_func("span_components.v4.to_str[full-otel]", v4_full_otel.to_str) | ||
| runner.bench_func("span_components.v4.from_str[object-only]", SpanComponentsV4.from_str, v4_obj_only_str) | ||
| runner.bench_func("span_components.v4.from_str[full-otel]", SpanComponentsV4.from_str, v4_full_otel_str) | ||
|
|
||
| # Cross-version: V4 decoder reading a V3-encoded string (backwards-compat path) | ||
| runner.bench_func("span_components.v4.from_str[v3-encoded]", SpanComponentsV4.from_str, v3_full_str) | ||
|
|
||
|
|
||
| if __name__ == "__main__": | ||
| main() |
Oops, something went wrong.
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Move the function definition outside the function body so function creating time isn't counted by pyperf?