From 3a276dce6db0c50bb3f138e805d0da36b35e7e68 Mon Sep 17 00:00:00 2001 From: Felix Delattre Date: Tue, 16 Sep 2025 16:47:46 +0200 Subject: [PATCH 1/2] Made max header length configurable for cloudevents. --- eoapi_notifier/outputs/cloudevents.py | 20 +++++++++- examples/config.yaml | 1 + tests/test_cloudevents_output.py | 57 +++++++++++++++++++++++++++ 3 files changed, 76 insertions(+), 2 deletions(-) diff --git a/eoapi_notifier/outputs/cloudevents.py b/eoapi_notifier/outputs/cloudevents.py index f775884..8e89bca 100644 --- a/eoapi_notifier/outputs/cloudevents.py +++ b/eoapi_notifier/outputs/cloudevents.py @@ -28,6 +28,7 @@ class CloudEventsConfig(BasePluginConfig): timeout: float = 30.0 max_retries: int = 3 retry_backoff: float = 1.0 + max_header_length: int = 2048 @field_validator("endpoint") @classmethod @@ -53,6 +54,7 @@ def get_sample_config(cls) -> dict[str, Any]: "timeout": 30.0, "max_retries": 3, "retry_backoff": 1.0, + "max_header_length": 2048, } @classmethod @@ -191,6 +193,16 @@ async def send_event(self, event: NotificationEvent) -> bool: ) return False + def _truncate_header(self, value: str | None) -> str | None: + """Truncate header value to max_header_length if needed.""" + if not value: + return value + if len(value.encode("utf-8")) <= self.config.max_header_length: + return value + # Truncate to byte limit, ensuring valid UTF-8 + truncated = value.encode("utf-8")[: self.config.max_header_length] + return truncated.decode("utf-8", errors="ignore") + def _convert_to_cloudevent(self, event: NotificationEvent) -> CloudEvent: """Convert NotificationEvent to CloudEvent.""" # Use config values which now include environment overrides @@ -211,11 +223,15 @@ def _convert_to_cloudevent(self, event: NotificationEvent) -> CloudEvent: # Add subject if item_id exists if event.item_id: - attributes["subject"] = event.item_id + truncated_subject = self._truncate_header(event.item_id) + if truncated_subject: + attributes["subject"] = truncated_subject # Add collection attribute if event.collection: - attributes["collection"] = event.collection + truncated_collection = self._truncate_header(event.collection) + if truncated_collection: + attributes["collection"] = truncated_collection # Event data payload data = { diff --git a/examples/config.yaml b/examples/config.yaml index b466fc4..2f8f4ce 100644 --- a/examples/config.yaml +++ b/examples/config.yaml @@ -61,6 +61,7 @@ outputs: # Optional: HTTP settings # timeout: 30.0 # CLOUDEVENTS_TIMEOUT # max_retries: 3 # CLOUDEVENTS_MAX_RETRIES + # max_header_length: 2048 # CLOUDEVENTS_MAX_HEADER_LENGTH # Example with multiple sources and outputs # sources: diff --git a/tests/test_cloudevents_output.py b/tests/test_cloudevents_output.py index 53c5d7e..cffa507 100644 --- a/tests/test_cloudevents_output.py +++ b/tests/test_cloudevents_output.py @@ -34,6 +34,7 @@ def test_default_configuration(self) -> None: assert config.event_type == "org.eoapi.stac" assert config.timeout == 30.0 assert config.max_retries == 3 + assert config.max_header_length == 2048 def test_endpoint_validation_error(self) -> None: """Test endpoint validation.""" @@ -198,6 +199,62 @@ def test_convert_to_cloudevent( assert cloud_event["subject"] == "test-item" assert cloud_event["collection"] == "test-collection" + def test_truncate_header(self, adapter: CloudEventsAdapter) -> None: + """Test header value truncation.""" + # Short string should not be truncated + short = "short-string" + assert adapter._truncate_header(short) == short + + # None should remain None + assert adapter._truncate_header(None) is None + + # Long string should be truncated to max_header_length bytes + long_string = "a" * 3000 + truncated = adapter._truncate_header(long_string) + assert truncated is not None + assert len(truncated.encode("utf-8")) <= adapter.config.max_header_length + assert len(truncated) <= adapter.config.max_header_length + + # UTF-8 multi-byte characters should be handled correctly + unicode_string = "测试" * 1000 # Chinese characters (3 bytes each) + truncated_unicode = adapter._truncate_header(unicode_string) + assert truncated_unicode is not None + assert ( + len(truncated_unicode.encode("utf-8")) <= adapter.config.max_header_length + ) + # Should not break in the middle of a character + assert truncated_unicode.encode("utf-8").decode("utf-8") == truncated_unicode + + def test_convert_to_cloudevent_with_long_headers( + self, config: CloudEventsConfig + ) -> None: + """Test CloudEvent conversion with long header values.""" + config.max_header_length = 50 # Small limit for testing + adapter = CloudEventsAdapter(config) + + # Create event with long item_id and collection + event = NotificationEvent( + source="/test/source", + type="test.type", + operation="INSERT", + collection="a-very-long-collection-name-that-exceeds-the-limit", + item_id="a-very-long-item-id-that-also-exceeds-the-configured-limit", + ) + + cloud_event = adapter._convert_to_cloudevent(event) + + # Check that long values are truncated in headers + assert "subject" in cloud_event + assert "collection" in cloud_event + assert len(cloud_event["subject"].encode("utf-8")) <= config.max_header_length + assert ( + len(cloud_event["collection"].encode("utf-8")) <= config.max_header_length + ) + + # Original values should still be in data payload + assert cloud_event.data["item_id"] == event.item_id + assert cloud_event.data["collection"] == event.collection + def test_operation_mapping(self, adapter: CloudEventsAdapter) -> None: """Test operation to event type mapping.""" test_cases = [ From 178994c32251cf7920d662d72ace64efe23882a1 Mon Sep 17 00:00:00 2001 From: Felix Delattre Date: Thu, 18 Sep 2025 12:07:37 +0200 Subject: [PATCH 2/2] Updated to 4k as the default header length. --- eoapi_notifier/outputs/cloudevents.py | 4 ++-- examples/config.yaml | 2 +- tests/test_cloudevents_output.py | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/eoapi_notifier/outputs/cloudevents.py b/eoapi_notifier/outputs/cloudevents.py index 8e89bca..368f282 100644 --- a/eoapi_notifier/outputs/cloudevents.py +++ b/eoapi_notifier/outputs/cloudevents.py @@ -28,7 +28,7 @@ class CloudEventsConfig(BasePluginConfig): timeout: float = 30.0 max_retries: int = 3 retry_backoff: float = 1.0 - max_header_length: int = 2048 + max_header_length: int = 4096 @field_validator("endpoint") @classmethod @@ -54,7 +54,7 @@ def get_sample_config(cls) -> dict[str, Any]: "timeout": 30.0, "max_retries": 3, "retry_backoff": 1.0, - "max_header_length": 2048, + "max_header_length": 4096, } @classmethod diff --git a/examples/config.yaml b/examples/config.yaml index 2f8f4ce..dbb6b61 100644 --- a/examples/config.yaml +++ b/examples/config.yaml @@ -61,7 +61,7 @@ outputs: # Optional: HTTP settings # timeout: 30.0 # CLOUDEVENTS_TIMEOUT # max_retries: 3 # CLOUDEVENTS_MAX_RETRIES - # max_header_length: 2048 # CLOUDEVENTS_MAX_HEADER_LENGTH + # max_header_length: 4096 # CLOUDEVENTS_MAX_HEADER_LENGTH # Example with multiple sources and outputs # sources: diff --git a/tests/test_cloudevents_output.py b/tests/test_cloudevents_output.py index cffa507..64fdfce 100644 --- a/tests/test_cloudevents_output.py +++ b/tests/test_cloudevents_output.py @@ -34,7 +34,7 @@ def test_default_configuration(self) -> None: assert config.event_type == "org.eoapi.stac" assert config.timeout == 30.0 assert config.max_retries == 3 - assert config.max_header_length == 2048 + assert config.max_header_length == 4096 def test_endpoint_validation_error(self) -> None: """Test endpoint validation."""