Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
56 commits
Select commit Hold shift + click to select a range
3e0bdf4
chore: add workflow to sync from public repo
avi-alpert Mar 24, 2026
42348f8
Merge pull request #71 from aws/aalpert/sync-workflow-v2
avi-alpert Mar 24, 2026
20c47e2
chore: sync main with public/main
jariy17 Mar 26, 2026
ad47b56
chore: sync main with public/main
github-actions[bot] Mar 27, 2026
cddcac4
chore: sync main with public/main
github-actions[bot] Mar 27, 2026
7100433
fix: bump memory integration test timeout to 15 minutes (#77)
Hweinstock Mar 27, 2026
27e66fe
chore: sync main with public/main
github-actions[bot] Mar 31, 2026
51d6dca
chore: sync main with public/main
github-actions[bot] Mar 31, 2026
7169857
chore: sync main with public/main
github-actions[bot] Apr 1, 2026
0f30dd0
feat: add config bundle support to runtime (#82)
padmak30 Apr 7, 2026
e1d861d
feat: add batch evaluation (#83)
padmak30 Apr 7, 2026
86d0343
Remove batch collect from agent span collector
padmak30 Apr 8, 2026
a72f159
Update to prod endpoint
padmak30 Apr 8, 2026
4e85852
fix: exclude workflow files from public repo sync (#89)
aidandaly24 Apr 8, 2026
95876b2
chore: sync main with public/main
github-actions[bot] Apr 8, 2026
db780de
Cleanup
padmak30 Apr 8, 2026
e65b5d8
feat: bundle AgentCore service models and restrict ConfigBundleClient…
padmak30 Apr 8, 2026
df0aa73
chore(deps): bump cryptography from 46.0.5 to 46.0.7 (#92)
dependabot[bot] Apr 8, 2026
12f8c60
chore: sync main with public/main
github-actions[bot] Apr 9, 2026
b80a00f
chore: sync main with public/main
github-actions[bot] Apr 10, 2026
415f3a2
chore: sync main with public/main
github-actions[bot] Apr 10, 2026
522b5a9
chore: sync main with public/main
github-actions[bot] Apr 11, 2026
d9e0a1e
chore: sync main with public/main
github-actions[bot] Apr 13, 2026
176f109
chore(deps): bump pillow from 12.1.1 to 12.2.0 (#99)
dependabot[bot] Apr 13, 2026
6e820bd
chore(deps): bump python-multipart from 0.0.22 to 0.0.26 (#100)
dependabot[bot] Apr 15, 2026
3d488be
chore: sync main with public/main
github-actions[bot] Apr 16, 2026
e58bbba
chore: sync main with public/main
github-actions[bot] Apr 17, 2026
1fee391
Merge branch 'main' into feat/evo_main
padmak30 Apr 20, 2026
2881677
chore: sync main with public/main
github-actions[bot] Apr 21, 2026
d0d850d
feat(runtime): stamp OTel spans with routing experiment baggage (#101)
padmak30 Apr 21, 2026
b6b6309
chore(deps): bump python-dotenv from 1.1.1 to 1.2.2 (#102)
dependabot[bot] Apr 21, 2026
ca3c4f1
chore: sync main with public/main
github-actions[bot] Apr 22, 2026
f93bdf0
fix: fix AWS_DEFAULT_REGION fallback and rename get_bundle_config
padmak30 Apr 22, 2026
a94bbfe
Merge pull request #103 from aws/fix/evo_config_bundle
jariy17 Apr 22, 2026
8eebd90
chore: sync main with public/main
github-actions[bot] Apr 23, 2026
5a20030
chore: sync main with public/main
github-actions[bot] Apr 24, 2026
2a0592e
chore: sync main with public/main
github-actions[bot] Apr 25, 2026
1386dca
chore: sync main with public/main
github-actions[bot] Apr 27, 2026
6cbbdb5
chore: sync main with public/main
github-actions[bot] Apr 28, 2026
257235d
fix(eval): update batch eval runner to new API shape
padmak30 Apr 28, 2026
7c9b0dc
Merge pull request #106 from aws/fix/evo_batch_eval_api_update
jariy17 Apr 28, 2026
918fa08
simulated scenario dataset (#94)
padmak30 Apr 28, 2026
fb53a8e
Merge remote-tracking branch 'origin/main' into feat/evo_main
notgitika Apr 28, 2026
6ecc0f0
feat(eval): add SimulatedScenario support to on-demand evaluation runner
padmak30 Apr 28, 2026
a954e34
Merge pull request #107 from aws/feat/simulated_dataset_ondemand_eval
jariy17 Apr 28, 2026
ea1d4f3
chore: sync main with public/main
github-actions[bot] Apr 29, 2026
8a4bc8a
chore: sync main with public/main
github-actions[bot] Apr 29, 2026
493e40c
chore: migrate boto3 injection to public botocore 1.43.0 service mode…
padmak30 Apr 30, 2026
ec92670
Merge remote-tracking branch 'origin/main' into feat/evo_main
padmak30 Apr 30, 2026
e5387ed
fix(batch-eval): handle all statuses in _poll_for_results (#110)
padmak30 Apr 30, 2026
f86ba49
docs: add preview warning docstrings to all new evo methods and classes
padmak30 Apr 30, 2026
40a9b0b
Merge pull request #111 from aws/feat/evo_preview
jariy17 Apr 30, 2026
66303bb
Fix tests, formatting, uv.lock
padmak30 Apr 30, 2026
a2fdff6
Fix jinja autoescape error
padmak30 Apr 30, 2026
708f88a
chore: restore workflow files to main state
padmak30 Apr 30, 2026
7c2f081
Add description to batch eval result
padmak30 Apr 30, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 6 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,8 @@ classifiers = [
"Topic :: Software Development :: Libraries :: Python Modules",
]
dependencies = [
"boto3>=1.42.86",
"botocore>=1.42.86",
"boto3>=1.43.0",
"botocore>=1.43.0",
"pydantic>=2.0.0,<2.41.3",
"urllib3>=1.26.0",
"starlette>=0.46.2",
Expand Down Expand Up @@ -167,3 +167,7 @@ strands-agents = [
strands-agents-evals = [
"strands-agents-evals>=0.1.0"
]
simulation = [
"jinja2>=3.1.0",
"strands-agents-evals>=0.1.0",
]
2 changes: 1 addition & 1 deletion src/bedrock_agentcore/_utils/endpoints.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
# Environment-configurable constants with fallback defaults
DP_ENDPOINT_OVERRIDE = os.getenv("BEDROCK_AGENTCORE_DP_ENDPOINT")
CP_ENDPOINT_OVERRIDE = os.getenv("BEDROCK_AGENTCORE_CP_ENDPOINT")
DEFAULT_REGION = os.getenv("AWS_REGION", "us-west-2")
DEFAULT_REGION = os.getenv("AWS_REGION") or os.getenv("AWS_DEFAULT_REGION") or "us-west-2"

# Regex for valid AWS region names (e.g., us-east-1, eu-west-2, cn-north-1, us-gov-west-1).
# Uses \A and \Z anchors to prevent newline injection bypass that $ allows.
Expand Down
9 changes: 9 additions & 0 deletions src/bedrock_agentcore/config_bundle/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
"""Configuration bundle support for BedrockAgentCore."""

from .bundle import ConfigBundleRef
from .client import ConfigBundleClient

__all__ = [
"ConfigBundleRef",
"ConfigBundleClient",
]
97 changes: 97 additions & 0 deletions src/bedrock_agentcore/config_bundle/baggage.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
"""Parse W3C baggage headers for configuration bundle references."""

import logging
from typing import Any, Dict, List, Optional
from urllib.parse import unquote

from .bundle import ConfigBundleRef

BAGGAGE_HEADER = "baggage"

logger = logging.getLogger(__name__)


def _extract_baggage(headers: Any) -> Dict[str, List[str]]:
"""Extract all W3C baggage entries from request headers into a multi-value dict.

Args:
headers: A Starlette ``Headers`` object or a list of ``(name, value)``
tuples. Must preserve duplicate header names as separate entries so
that multiple ``baggage`` headers are each processed independently.
A plain ``dict`` is not suitable — it can only hold one ``baggage``
entry and will silently drop the rest.

Returns:
A dict mapping each baggage key to a list of its decoded values in the
order they were encountered. A key that appears in more than one
``baggage`` header, or more than once within a single header value,
accumulates one entry per occurrence.

Notes:
- Header name matching is case-insensitive (``Baggage`` == ``baggage``).
- Per-entry properties (the ``;property=value`` suffix) are stripped
before the value is returned.
- Values are percent-decoded (``%XX`` → character).
- Entries with no ``=`` sign, an empty key, or an empty value are skipped.
"""
result: Dict[str, List[str]] = {}
items = headers.items() if hasattr(headers, "items") else headers
for key, value in items:
if key.lower() != BAGGAGE_HEADER:
continue
for item in value.split(","):
item = item.strip()
if not item:
continue
if "=" not in item:
logger.warning("Skipping malformed baggage entry (no '='): %r", item)
continue
entry_key, _, entry_value = item.partition("=")
entry_key = entry_key.strip()
if not entry_key:
logger.warning("Skipping baggage entry with empty key in: %r", item)
continue
decoded_value = unquote(entry_value.split(";")[0].strip())
if not decoded_value:
logger.warning("Skipping baggage entry with empty value for key %r", entry_key)
continue
result.setdefault(entry_key, []).append(decoded_value)
return result


def _parse_config_bundle_baggage(all_baggage: Dict[str, List[str]]) -> Optional[ConfigBundleRef]:
"""Build a ``ConfigBundleRef`` from extracted baggage entries, or ``None`` if absent.

Expects ``all_baggage`` to have been produced by :func:`_extract_baggage`.
The two keys used are:

- ``aws.agentcore.configbundle_arn`` — full ARN of the configuration bundle
- ``aws.agentcore.configbundle_version`` — version ID of the bundle

Only a single bundle is supported::

baggage: aws.agentcore.configbundle_arn=<arn>,aws.agentcore.configbundle_version=<version>

If multiple values are present for either key, only the first is used and a
warning is logged.

Args:
all_baggage: Multi-value baggage dict from :func:`_extract_baggage`.

Returns:
A ``ConfigBundleRef`` when both keys are present and valid, otherwise ``None``.
"""
arns = all_baggage.get("aws.agentcore.configbundle_arn", [])
versions = all_baggage.get("aws.agentcore.configbundle_version", [])

if not arns or not versions:
return None

if len(arns) > 1 or len(versions) > 1:
logger.warning("Multiple config bundle ARNs/versions found in baggage — only the first will be used")

try:
return ConfigBundleRef(bundle_arn=arns[0], bundle_version=versions[0])
except ValueError as e:
logger.warning("Skipping invalid config bundle ref (arn=%r, version=%r): %s", arns[0], versions[0], e)
return None
34 changes: 34 additions & 0 deletions src/bedrock_agentcore/config_bundle/bundle.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
"""Configuration bundle reference model."""

from dataclasses import dataclass
from typing import Any, Dict

# ComponentConfigurationMap value: {componentId: {"configuration": <Document>}}
ConfigBundleComponents = Dict[str, Dict[str, Any]]


@dataclass(frozen=True)
class ConfigBundleRef:
"""Lightweight reference to a configuration bundle version, parsed from OTEL baggage.
.. warning::
This feature is in preview and may change in future releases.
"""

bundle_arn: str
bundle_version: str

def __post_init__(self) -> None:
"""Validate bundle ARN and version."""
if not self.bundle_arn:
raise ValueError("bundle_arn must not be empty")
if not self.bundle_version:
raise ValueError("bundle_version must not be empty")
parts = self.bundle_arn.rsplit("/", 1)
if len(parts) != 2 or not parts[1]:
raise ValueError(f"bundle_arn does not contain a valid bundle ID segment: {self.bundle_arn!r}")

@property
def bundle_id(self) -> str:
"""Extract bundle ID from ARN (last path segment after '/')."""
return self.bundle_arn.rsplit("/", 1)[-1]
75 changes: 75 additions & 0 deletions src/bedrock_agentcore/config_bundle/client.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
"""Client for fetching configuration bundle versions from the AgentCore control plane."""

import logging
import threading
from typing import Optional

import boto3

from .._utils.endpoints import DEFAULT_REGION, get_control_plane_endpoint

logger = logging.getLogger(__name__)

_ALLOWED_OPERATIONS = frozenset(
{
"create_configuration_bundle",
"delete_configuration_bundle",
"get_configuration_bundle",
"get_configuration_bundle_version",
"list_configuration_bundle_versions",
"list_configuration_bundles",
"update_configuration_bundle",
}
)


class ConfigBundleClient:
"""Client for AgentCore configuration bundle operations.

.. warning::
This feature is in preview and may change in future releases.

Wraps the ``bedrock-agentcore-control`` boto3 client and forwards all method
calls to it via ``__getattr__``, so any boto3 operation (e.g.
``get_configuration_bundle_version``, ``list_configuration_bundles``) is
available without explicit definitions.

Intended to be created once at application startup and reused across requests.
The underlying boto3 client is created lazily on first use so that agents
which never receive config bundle baggage incur no startup overhead.
"""

def __init__(self, region_name: Optional[str] = None, boto3_session: Optional[boto3.Session] = None):
"""Initialise the client with an optional region and boto3 session."""
self._region = region_name or DEFAULT_REGION
self._boto3_session = boto3_session
self._client = None
self._client_lock = threading.Lock()

def _get_client(self):
# Use __dict__ directly to avoid triggering __getattr__ if _client is
# not yet set (e.g. during unpickling before __init__ completes).
if self.__dict__.get("_client") is None:
with self._client_lock:
if self.__dict__.get("_client") is None:
session = self._boto3_session or boto3.Session()
self._client = session.client(
"bedrock-agentcore-control",
region_name=self._region,
endpoint_url=get_control_plane_endpoint(self._region),
)
return self._client

def __getattr__(self, name: str):
"""Forward configuration bundle method calls to the underlying boto3 client.

Only operations in ``_ALLOWED_OPERATIONS`` are exposed. Attempts to call
any other operation raise ``AttributeError``.

Uses ``object.__getattribute__`` to access ``_get_client`` so that if Python
looks up dunder attributes during unpickling or deepcopy before instance
attributes are initialised, this method does not recurse into itself.
"""
if name not in _ALLOWED_OPERATIONS:
raise AttributeError(f"'{type(self).__name__}' does not expose operation '{name}'")
return getattr(object.__getattribute__(self, "_get_client")(), name)
32 changes: 32 additions & 0 deletions src/bedrock_agentcore/evaluation/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,15 +6,31 @@
EvaluatorOutput,
custom_code_based_evaluator,
)
from bedrock_agentcore.evaluation.runner.batch.batch_evaluation_models import (
BatchEvaluationResult,
BatchEvaluationRunConfig,
BatchEvaluationSummary,
BatchEvaluatorConfig,
CloudWatchDataSourceConfig,
CloudWatchOutputDataConfig,
EvaluatorStatistics,
EvaluatorSummary,
FailedScenario,
)
from bedrock_agentcore.evaluation.runner.batch.batch_evaluation_runner import (
BatchEvaluationRunner,
)
from bedrock_agentcore.evaluation.runner.dataset_providers import (
DatasetProvider,
FileDatasetProvider,
)
from bedrock_agentcore.evaluation.runner.dataset_types import (
ActorProfile,
Dataset,
Input,
PredefinedScenario,
Scenario,
SimulatedScenario,
Turn,
)
from bedrock_agentcore.evaluation.runner.invoker_types import (
Expand All @@ -34,6 +50,8 @@
ScenarioExecutionResult,
ScenarioExecutor,
ScenarioResult,
SimulatedScenarioExecutor,
SimulationConfig,
)
from bedrock_agentcore.evaluation.span_to_adot_serializer import (
convert_strands_to_adot,
Expand All @@ -43,7 +61,18 @@
)

__all__ = [
"ActorProfile",
"AgentInvokerFn",
"BatchEvaluationRunner",
"BatchEvaluationResult",
"BatchEvaluationRunConfig",
"CloudWatchOutputDataConfig",
"CloudWatchDataSourceConfig",
"BatchEvaluatorConfig",
"BatchEvaluationSummary",
"EvaluatorStatistics",
"EvaluatorSummary",
"FailedScenario",
"AgentInvokerInput",
"AgentInvokerOutput",
"CloudWatchAgentSpanCollector",
Expand All @@ -65,10 +94,13 @@
"ScenarioExecutor",
"ScenarioResult",
"AgentSpanCollector",
"SimulationConfig",
"StrandsEvalsAgentCoreEvaluator",
"Turn",
"PredefinedScenario",
"PredefinedScenarioExecutor",
"SimulatedScenario",
"SimulatedScenarioExecutor",
"custom_code_based_evaluator",
"convert_strands_to_adot",
"create_strands_evaluator",
Expand Down
20 changes: 16 additions & 4 deletions src/bedrock_agentcore/evaluation/runner/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,12 @@

from .dataset_providers import DatasetProvider, FileDatasetProvider
from .dataset_types import (
ActorProfile,
Dataset,
Input,
PredefinedScenario,
Scenario,
SimulatedScenario,
Turn,
)
from .invoker_types import (
Expand All @@ -22,33 +24,43 @@
EvaluatorResult,
OnDemandEvaluationDatasetRunner,
ScenarioResult,
SimulationConfig,
)
from .scenario_executor import (
AgentCoreActorSimulator,
PredefinedScenarioExecutor,
ScenarioExecutionResult,
ScenarioExecutor,
SimulatedScenarioExecutor,
SimulatorResult,
)

__all__ = [
"ActorProfile",
"AgentInvokerFn",
"AgentInvokerInput",
"AgentInvokerOutput",
"AgentSpanCollector",
"CloudWatchAgentSpanCollector",
"Dataset",
"DatasetProvider",
"EvaluationResult",
"EvaluationRunConfig",
"OnDemandEvaluationDatasetRunner",
"EvaluatorConfig",
"EvaluatorResult",
"FileDatasetProvider",
"Input",
"OnDemandEvaluationDatasetRunner",
"PredefinedScenario",
"AgentCoreActorSimulator",
"PredefinedScenarioExecutor",
"Scenario",
"ScenarioExecutionResult",
"ScenarioExecutor",
"SimulatorResult",
"ScenarioResult",
"AgentSpanCollector",
"SimulatedScenario",
"SimulatedScenarioExecutor",
"SimulationConfig",
"Turn",
"PredefinedScenario",
"PredefinedScenarioExecutor",
]
Loading
Loading