From bf0886d4c774fd21198f2d2805fe0e1554cfaa60 Mon Sep 17 00:00:00 2001 From: K8sCat Date: Fri, 14 Nov 2025 15:14:20 +0800 Subject: [PATCH 1/3] add ensure_ascii param --- ecs_logging/_stdlib.py | 4 +++- ecs_logging/_structlog.py | 8 +++++++- ecs_logging/_utils.py | 4 ++-- 3 files changed, 12 insertions(+), 4 deletions(-) diff --git a/ecs_logging/_stdlib.py b/ecs_logging/_stdlib.py index 28e9f13..8e541d7 100644 --- a/ecs_logging/_stdlib.py +++ b/ecs_logging/_stdlib.py @@ -84,6 +84,7 @@ def __init__( stack_trace_limit: Optional[int] = None, extra: Optional[Dict[str, Any]] = None, exclude_fields: Sequence[str] = (), + ensure_ascii: bool = True, ) -> None: """Initialize the ECS formatter. @@ -133,6 +134,7 @@ def __init__( self._extra = extra self._exclude_fields = frozenset(exclude_fields) self._stack_trace_limit = stack_trace_limit + self.ensure_ascii = ensure_ascii def _record_error_type(self, record: logging.LogRecord) -> Optional[str]: exc_info = record.exc_info @@ -162,7 +164,7 @@ def _record_error_message(self, record: logging.LogRecord) -> Optional[str]: def format(self, record: logging.LogRecord) -> str: result = self.format_to_ecs(record) - return json_dumps(result) + return json_dumps(result, ensure_ascii=self.ensure_ascii) def format_to_ecs(self, record: logging.LogRecord) -> Dict[str, Any]: """Function that can be overridden to add additional fields to diff --git a/ecs_logging/_structlog.py b/ecs_logging/_structlog.py index 5bc65e5..d60af75 100644 --- a/ecs_logging/_structlog.py +++ b/ecs_logging/_structlog.py @@ -26,6 +26,12 @@ class StructlogFormatter: """ECS formatter for the ``structlog`` module""" + def __init__( + self, + ensure_ascii: bool = True, + ) -> None: + self.ensure_ascii = ensure_ascii + def __call__(self, _: Any, name: str, event_dict: Dict[str, Any]) -> str: # Handle event -> message now so that stuff like `event.dataset` doesn't @@ -56,4 +62,4 @@ def format_to_ecs(self, event_dict: Dict[str, Any]) -> Dict[str, Any]: return event_dict def _json_dumps(self, value: Dict[str, Any]) -> str: - return json_dumps(value=value) + return json_dumps(value=value, ensure_ascii=self.ensure_ascii) diff --git a/ecs_logging/_utils.py b/ecs_logging/_utils.py index ee5dc6b..33b713a 100644 --- a/ecs_logging/_utils.py +++ b/ecs_logging/_utils.py @@ -98,7 +98,7 @@ def merge_dicts(from_: Dict[Any, Any], into: Dict[Any, Any]) -> Dict[Any, Any]: return into -def json_dumps(value: Dict[str, Any]) -> str: +def json_dumps(value: Dict[str, Any], ensure_ascii: bool = True) -> str: # Ensure that the first three fields are '@timestamp', # 'log.level', and 'message' per ECS spec @@ -124,7 +124,7 @@ def json_dumps(value: Dict[str, Any]) -> str: pass json_dumps = functools.partial( - json.dumps, sort_keys=True, separators=(",", ":"), default=_json_dumps_fallback + json.dumps, sort_keys=True, separators=(",", ":"), default=_json_dumps_fallback, ensure_ascii=ensure_ascii ) # Because we want to use 'sorted_keys=True' we manually build From c0f862b836fb629f77b6412398aaae95fc8c66a4 Mon Sep 17 00:00:00 2001 From: K8sCat Date: Thu, 20 Nov 2025 01:21:55 +0800 Subject: [PATCH 2/3] Enhance JSON formatting by adding `ensure_ascii` parameter to `StdlibFormatter` and `StructlogFormatter` to control non-ASCII character escaping. Update documentation and add tests to verify behavior for different `ensure_ascii` values. --- docs/reference/installation.md | 40 +++++++++++++ tests/test_stdlib_formatter.py | 98 +++++++++++++++++++++++++++++++ tests/test_structlog_formatter.py | 98 +++++++++++++++++++++++++++++++ 3 files changed, 236 insertions(+) diff --git a/docs/reference/installation.md b/docs/reference/installation.md index 1f5897b..2ffb853 100644 --- a/docs/reference/installation.md +++ b/docs/reference/installation.md @@ -101,6 +101,25 @@ formatter = StdlibFormatter( ``` +#### Controlling ASCII encoding [_controlling_ascii_encoding] + +By default, the `StdlibFormatter` escapes non-ASCII characters in the JSON output using Unicode escape sequences. If you want to preserve non-ASCII characters (such as Chinese, Japanese, emojis, etc.) in their original form, you can use the `ensure_ascii` parameter: + +```python +from ecs_logging import StdlibFormatter + +# Default behavior - non-ASCII characters are escaped +formatter = StdlibFormatter() +# Output: {"message":"Hello \\u4e16\\u754c"} + +# Preserve non-ASCII characters +formatter = StdlibFormatter(ensure_ascii=False) +# Output: {"message":"Hello 世界"} +``` + +This is particularly useful when working with internationalized applications or when you need to maintain readability of logs containing non-ASCII characters. + + ### Structlog Example [structlog] Note that the structlog processor should be the last processor in the list, as it handles the conversion to JSON as well as the ECS field enrichment. @@ -144,6 +163,27 @@ logger = logger.bind(**{ logger.debug("Example message!") ``` + +#### Controlling ASCII encoding for Structlog [_structlog_ascii_encoding] + +Similar to `StdlibFormatter`, the `StructlogFormatter` also supports the `ensure_ascii` parameter to control whether non-ASCII characters are escaped: + +```python +import structlog +import ecs_logging + +# Configure Structlog with ensure_ascii=False to preserve non-ASCII characters +structlog.configure( + processors=[ecs_logging.StructlogFormatter(ensure_ascii=False)], + wrapper_class=structlog.BoundLogger, + context_class=dict, + logger_factory=structlog.PrintLoggerFactory(), +) + +logger = structlog.get_logger("app") +logger.info("你好世界") # Non-ASCII characters will be preserved in output +``` + ```json { "@timestamp": "2020-03-26T13:08:11.728Z", diff --git a/tests/test_stdlib_formatter.py b/tests/test_stdlib_formatter.py index ee56d95..d95d08e 100644 --- a/tests/test_stdlib_formatter.py +++ b/tests/test_stdlib_formatter.py @@ -367,3 +367,101 @@ def test_apm_data_conflicts(spec_validator): '"log":{"logger":"logger-name","origin":{"file":{"line":10,"name":"file.py"},"function":"test_function"},' '"original":"1: hello"},"service":{"environment":"dev","name":"myapp","version":"1.0.0"}}' ) + + +def test_ensure_ascii_default(): + """Test that ensure_ascii defaults to True (escaping non-ASCII characters)""" + record = make_record() + record.msg = "Hello 世界" + record.args = () + + formatter = ecs_logging.StdlibFormatter(exclude_fields=["process"]) + result = formatter.format(record) + + # With ensure_ascii=True (default), non-ASCII characters should be escaped + assert "\\u4e16\\u754c" in result + assert "世界" not in result + + # Verify the JSON is valid + parsed = json.loads(result) + assert parsed["message"] == "Hello 世界" + + +def test_ensure_ascii_true(): + """Test that ensure_ascii=True escapes non-ASCII characters""" + record = make_record() + record.msg = "Café ☕" + record.args = () + + formatter = ecs_logging.StdlibFormatter(exclude_fields=["process"], ensure_ascii=True) + result = formatter.format(record) + + # With ensure_ascii=True, non-ASCII characters should be escaped + assert "\\u00e9" in result # é is escaped + assert "\\u2615" in result # ☕ is escaped + assert "Café" not in result + assert "☕" not in result + + # Verify the JSON is valid and correctly decoded + parsed = json.loads(result) + assert parsed["message"] == "Café ☕" + + +def test_ensure_ascii_false(): + """Test that ensure_ascii=False preserves non-ASCII characters""" + record = make_record() + record.msg = "Hello 世界" + record.args = () + + formatter = ecs_logging.StdlibFormatter(exclude_fields=["process"], ensure_ascii=False) + result = formatter.format(record) + + # With ensure_ascii=False, non-ASCII characters should be preserved + assert "世界" in result + assert "\\u4e16" not in result + + # Verify the JSON is valid + parsed = json.loads(result) + assert parsed["message"] == "Hello 世界" + + +def test_ensure_ascii_false_with_emoji(): + """Test that ensure_ascii=False preserves emoji and special characters""" + record = make_record() + record.msg = "Café ☕ 你好" + record.args = () + + formatter = ecs_logging.StdlibFormatter(exclude_fields=["process"], ensure_ascii=False) + result = formatter.format(record) + + # With ensure_ascii=False, all non-ASCII characters should be preserved + assert "Café" in result + assert "☕" in result + assert "你好" in result + + # Verify the JSON is valid and correctly decoded + parsed = json.loads(result) + assert parsed["message"] == "Café ☕ 你好" + + +def test_ensure_ascii_with_extra_fields(): + """Test that ensure_ascii works with extra fields containing non-ASCII""" + record = make_record() + record.msg = "Test message" + record.args = () + + formatter = ecs_logging.StdlibFormatter( + exclude_fields=["process"], + ensure_ascii=False, + extra={"user": "用户", "city": "北京"} + ) + result = formatter.format(record) + + # With ensure_ascii=False, non-ASCII in extra fields should be preserved + assert "用户" in result + assert "北京" in result + + # Verify the JSON is valid + parsed = json.loads(result) + assert parsed["user"] == "用户" + assert parsed["city"] == "北京" diff --git a/tests/test_structlog_formatter.py b/tests/test_structlog_formatter.py index e9a4296..e65a353 100644 --- a/tests/test_structlog_formatter.py +++ b/tests/test_structlog_formatter.py @@ -108,3 +108,101 @@ def test_exception_log_is_ecs_compliant_when_used_with_format_exc_info( assert "error" in formatted_event_dict assert "stack_trace" in formatted_event_dict["error"] assert "" in formatted_event_dict["error"]["stack_trace"] + + +@mock.patch("time.time") +def test_ensure_ascii_default(time): + """Test that ensure_ascii defaults to True (escaping non-ASCII characters)""" + time.return_value = 1584720997.187709 + + formatter = ecs_logging.StructlogFormatter() + result = formatter(None, "debug", {"event": "Hello 世界", "log.logger": "test"}) + + # With ensure_ascii=True (default), non-ASCII characters should be escaped + assert "\\u4e16\\u754c" in result + assert "世界" not in result + + # Verify the JSON is valid + parsed = json.loads(result) + assert parsed["message"] == "Hello 世界" + + +@mock.patch("time.time") +def test_ensure_ascii_true(time): + """Test that ensure_ascii=True escapes non-ASCII characters""" + time.return_value = 1584720997.187709 + + formatter = ecs_logging.StructlogFormatter(ensure_ascii=True) + result = formatter(None, "info", {"event": "Café ☕", "log.logger": "test"}) + + # With ensure_ascii=True, non-ASCII characters should be escaped + assert "\\u00e9" in result # é is escaped + assert "\\u2615" in result # ☕ is escaped + assert "Café" not in result + assert "☕" not in result + + # Verify the JSON is valid and correctly decoded + parsed = json.loads(result) + assert parsed["message"] == "Café ☕" + + +@mock.patch("time.time") +def test_ensure_ascii_false(time): + """Test that ensure_ascii=False preserves non-ASCII characters""" + time.return_value = 1584720997.187709 + + formatter = ecs_logging.StructlogFormatter(ensure_ascii=False) + result = formatter(None, "debug", {"event": "Hello 世界", "log.logger": "test"}) + + # With ensure_ascii=False, non-ASCII characters should be preserved + assert "世界" in result + assert "\\u4e16" not in result + + # Verify the JSON is valid + parsed = json.loads(result) + assert parsed["message"] == "Hello 世界" + + +@mock.patch("time.time") +def test_ensure_ascii_false_with_emoji(time): + """Test that ensure_ascii=False preserves emoji and special characters""" + time.return_value = 1584720997.187709 + + formatter = ecs_logging.StructlogFormatter(ensure_ascii=False) + result = formatter(None, "info", {"event": "Café ☕ 你好", "log.logger": "test"}) + + # With ensure_ascii=False, all non-ASCII characters should be preserved + assert "Café" in result + assert "☕" in result + assert "你好" in result + + # Verify the JSON is valid and correctly decoded + parsed = json.loads(result) + assert parsed["message"] == "Café ☕ 你好" + + +@mock.patch("time.time") +def test_ensure_ascii_with_custom_fields(time): + """Test that ensure_ascii works with custom fields containing non-ASCII""" + time.return_value = 1584720997.187709 + + formatter = ecs_logging.StructlogFormatter(ensure_ascii=False) + result = formatter( + None, + "info", + { + "event": "Test", + "log.logger": "test", + "user": "用户", + "city": "北京", + }, + ) + + # With ensure_ascii=False, non-ASCII in custom fields should be preserved + assert "用户" in result + assert "北京" in result + + # Verify the JSON is valid + parsed = json.loads(result) + assert parsed["user"] == "用户" + assert parsed["city"] == "北京" From 67a5030c3f9c2773a3778f346c27f6800175e926 Mon Sep 17 00:00:00 2001 From: K8sCat Date: Mon, 24 Nov 2025 15:37:26 +0800 Subject: [PATCH 3/3] reformat files --- ecs_logging/_utils.py | 6 ++++- tests/test_stdlib_formatter.py | 44 ++++++++++++++++++------------- tests/test_structlog_formatter.py | 30 ++++++++++----------- 3 files changed, 45 insertions(+), 35 deletions(-) diff --git a/ecs_logging/_utils.py b/ecs_logging/_utils.py index 33b713a..4750763 100644 --- a/ecs_logging/_utils.py +++ b/ecs_logging/_utils.py @@ -124,7 +124,11 @@ def json_dumps(value: Dict[str, Any], ensure_ascii: bool = True) -> str: pass json_dumps = functools.partial( - json.dumps, sort_keys=True, separators=(",", ":"), default=_json_dumps_fallback, ensure_ascii=ensure_ascii + json.dumps, + sort_keys=True, + separators=(",", ":"), + default=_json_dumps_fallback, + ensure_ascii=ensure_ascii, ) # Because we want to use 'sorted_keys=True' we manually build diff --git a/tests/test_stdlib_formatter.py b/tests/test_stdlib_formatter.py index d95d08e..d993186 100644 --- a/tests/test_stdlib_formatter.py +++ b/tests/test_stdlib_formatter.py @@ -374,14 +374,14 @@ def test_ensure_ascii_default(): record = make_record() record.msg = "Hello 世界" record.args = () - + formatter = ecs_logging.StdlibFormatter(exclude_fields=["process"]) result = formatter.format(record) - + # With ensure_ascii=True (default), non-ASCII characters should be escaped assert "\\u4e16\\u754c" in result assert "世界" not in result - + # Verify the JSON is valid parsed = json.loads(result) assert parsed["message"] == "Hello 世界" @@ -392,16 +392,18 @@ def test_ensure_ascii_true(): record = make_record() record.msg = "Café ☕" record.args = () - - formatter = ecs_logging.StdlibFormatter(exclude_fields=["process"], ensure_ascii=True) + + formatter = ecs_logging.StdlibFormatter( + exclude_fields=["process"], ensure_ascii=True + ) result = formatter.format(record) - + # With ensure_ascii=True, non-ASCII characters should be escaped assert "\\u00e9" in result # é is escaped assert "\\u2615" in result # ☕ is escaped assert "Café" not in result assert "☕" not in result - + # Verify the JSON is valid and correctly decoded parsed = json.loads(result) assert parsed["message"] == "Café ☕" @@ -412,14 +414,16 @@ def test_ensure_ascii_false(): record = make_record() record.msg = "Hello 世界" record.args = () - - formatter = ecs_logging.StdlibFormatter(exclude_fields=["process"], ensure_ascii=False) + + formatter = ecs_logging.StdlibFormatter( + exclude_fields=["process"], ensure_ascii=False + ) result = formatter.format(record) - + # With ensure_ascii=False, non-ASCII characters should be preserved assert "世界" in result assert "\\u4e16" not in result - + # Verify the JSON is valid parsed = json.loads(result) assert parsed["message"] == "Hello 世界" @@ -430,15 +434,17 @@ def test_ensure_ascii_false_with_emoji(): record = make_record() record.msg = "Café ☕ 你好" record.args = () - - formatter = ecs_logging.StdlibFormatter(exclude_fields=["process"], ensure_ascii=False) + + formatter = ecs_logging.StdlibFormatter( + exclude_fields=["process"], ensure_ascii=False + ) result = formatter.format(record) - + # With ensure_ascii=False, all non-ASCII characters should be preserved assert "Café" in result assert "☕" in result assert "你好" in result - + # Verify the JSON is valid and correctly decoded parsed = json.loads(result) assert parsed["message"] == "Café ☕ 你好" @@ -449,18 +455,18 @@ def test_ensure_ascii_with_extra_fields(): record = make_record() record.msg = "Test message" record.args = () - + formatter = ecs_logging.StdlibFormatter( exclude_fields=["process"], ensure_ascii=False, - extra={"user": "用户", "city": "北京"} + extra={"user": "用户", "city": "北京"}, ) result = formatter.format(record) - + # With ensure_ascii=False, non-ASCII in extra fields should be preserved assert "用户" in result assert "北京" in result - + # Verify the JSON is valid parsed = json.loads(result) assert parsed["user"] == "用户" diff --git a/tests/test_structlog_formatter.py b/tests/test_structlog_formatter.py index e65a353..6a14c08 100644 --- a/tests/test_structlog_formatter.py +++ b/tests/test_structlog_formatter.py @@ -114,14 +114,14 @@ def test_exception_log_is_ecs_compliant_when_used_with_format_exc_info( def test_ensure_ascii_default(time): """Test that ensure_ascii defaults to True (escaping non-ASCII characters)""" time.return_value = 1584720997.187709 - + formatter = ecs_logging.StructlogFormatter() result = formatter(None, "debug", {"event": "Hello 世界", "log.logger": "test"}) - + # With ensure_ascii=True (default), non-ASCII characters should be escaped assert "\\u4e16\\u754c" in result assert "世界" not in result - + # Verify the JSON is valid parsed = json.loads(result) assert parsed["message"] == "Hello 世界" @@ -131,16 +131,16 @@ def test_ensure_ascii_default(time): def test_ensure_ascii_true(time): """Test that ensure_ascii=True escapes non-ASCII characters""" time.return_value = 1584720997.187709 - + formatter = ecs_logging.StructlogFormatter(ensure_ascii=True) result = formatter(None, "info", {"event": "Café ☕", "log.logger": "test"}) - + # With ensure_ascii=True, non-ASCII characters should be escaped assert "\\u00e9" in result # é is escaped assert "\\u2615" in result # ☕ is escaped assert "Café" not in result assert "☕" not in result - + # Verify the JSON is valid and correctly decoded parsed = json.loads(result) assert parsed["message"] == "Café ☕" @@ -150,14 +150,14 @@ def test_ensure_ascii_true(time): def test_ensure_ascii_false(time): """Test that ensure_ascii=False preserves non-ASCII characters""" time.return_value = 1584720997.187709 - + formatter = ecs_logging.StructlogFormatter(ensure_ascii=False) result = formatter(None, "debug", {"event": "Hello 世界", "log.logger": "test"}) - + # With ensure_ascii=False, non-ASCII characters should be preserved assert "世界" in result assert "\\u4e16" not in result - + # Verify the JSON is valid parsed = json.loads(result) assert parsed["message"] == "Hello 世界" @@ -167,15 +167,15 @@ def test_ensure_ascii_false(time): def test_ensure_ascii_false_with_emoji(time): """Test that ensure_ascii=False preserves emoji and special characters""" time.return_value = 1584720997.187709 - + formatter = ecs_logging.StructlogFormatter(ensure_ascii=False) result = formatter(None, "info", {"event": "Café ☕ 你好", "log.logger": "test"}) - + # With ensure_ascii=False, all non-ASCII characters should be preserved assert "Café" in result assert "☕" in result assert "你好" in result - + # Verify the JSON is valid and correctly decoded parsed = json.loads(result) assert parsed["message"] == "Café ☕ 你好" @@ -185,7 +185,7 @@ def test_ensure_ascii_false_with_emoji(time): def test_ensure_ascii_with_custom_fields(time): """Test that ensure_ascii works with custom fields containing non-ASCII""" time.return_value = 1584720997.187709 - + formatter = ecs_logging.StructlogFormatter(ensure_ascii=False) result = formatter( None, @@ -197,11 +197,11 @@ def test_ensure_ascii_with_custom_fields(time): "city": "北京", }, ) - + # With ensure_ascii=False, non-ASCII in custom fields should be preserved assert "用户" in result assert "北京" in result - + # Verify the JSON is valid parsed = json.loads(result) assert parsed["user"] == "用户"