Skip to content

Commit c0f862b

Browse files
committed
Enhance JSON formatting by adding ensure_ascii parameter to StdlibFormatter and StructlogFormatter to control non-ASCII character escaping. Update documentation and add tests to verify behavior for different ensure_ascii values.
1 parent bf0886d commit c0f862b

File tree

3 files changed

+236
-0
lines changed

3 files changed

+236
-0
lines changed

docs/reference/installation.md

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -101,6 +101,25 @@ formatter = StdlibFormatter(
101101
```
102102

103103

104+
#### Controlling ASCII encoding [_controlling_ascii_encoding]
105+
106+
By default, the `StdlibFormatter` escapes non-ASCII characters in the JSON output using Unicode escape sequences. If you want to preserve non-ASCII characters (such as Chinese, Japanese, emojis, etc.) in their original form, you can use the `ensure_ascii` parameter:
107+
108+
```python
109+
from ecs_logging import StdlibFormatter
110+
111+
# Default behavior - non-ASCII characters are escaped
112+
formatter = StdlibFormatter()
113+
# Output: {"message":"Hello \\u4e16\\u754c"}
114+
115+
# Preserve non-ASCII characters
116+
formatter = StdlibFormatter(ensure_ascii=False)
117+
# Output: {"message":"Hello 世界"}
118+
```
119+
120+
This is particularly useful when working with internationalized applications or when you need to maintain readability of logs containing non-ASCII characters.
121+
122+
104123
### Structlog Example [structlog]
105124

106125
Note that the structlog processor should be the last processor in the list, as it handles the conversion to JSON as well as the ECS field enrichment.
@@ -144,6 +163,27 @@ logger = logger.bind(**{
144163
logger.debug("Example message!")
145164
```
146165

166+
167+
#### Controlling ASCII encoding for Structlog [_structlog_ascii_encoding]
168+
169+
Similar to `StdlibFormatter`, the `StructlogFormatter` also supports the `ensure_ascii` parameter to control whether non-ASCII characters are escaped:
170+
171+
```python
172+
import structlog
173+
import ecs_logging
174+
175+
# Configure Structlog with ensure_ascii=False to preserve non-ASCII characters
176+
structlog.configure(
177+
processors=[ecs_logging.StructlogFormatter(ensure_ascii=False)],
178+
wrapper_class=structlog.BoundLogger,
179+
context_class=dict,
180+
logger_factory=structlog.PrintLoggerFactory(),
181+
)
182+
183+
logger = structlog.get_logger("app")
184+
logger.info("你好世界") # Non-ASCII characters will be preserved in output
185+
```
186+
147187
```json
148188
{
149189
"@timestamp": "2020-03-26T13:08:11.728Z",

tests/test_stdlib_formatter.py

Lines changed: 98 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -367,3 +367,101 @@ def test_apm_data_conflicts(spec_validator):
367367
'"log":{"logger":"logger-name","origin":{"file":{"line":10,"name":"file.py"},"function":"test_function"},'
368368
'"original":"1: hello"},"service":{"environment":"dev","name":"myapp","version":"1.0.0"}}'
369369
)
370+
371+
372+
def test_ensure_ascii_default():
373+
"""Test that ensure_ascii defaults to True (escaping non-ASCII characters)"""
374+
record = make_record()
375+
record.msg = "Hello 世界"
376+
record.args = ()
377+
378+
formatter = ecs_logging.StdlibFormatter(exclude_fields=["process"])
379+
result = formatter.format(record)
380+
381+
# With ensure_ascii=True (default), non-ASCII characters should be escaped
382+
assert "\\u4e16\\u754c" in result
383+
assert "世界" not in result
384+
385+
# Verify the JSON is valid
386+
parsed = json.loads(result)
387+
assert parsed["message"] == "Hello 世界"
388+
389+
390+
def test_ensure_ascii_true():
391+
"""Test that ensure_ascii=True escapes non-ASCII characters"""
392+
record = make_record()
393+
record.msg = "Café ☕"
394+
record.args = ()
395+
396+
formatter = ecs_logging.StdlibFormatter(exclude_fields=["process"], ensure_ascii=True)
397+
result = formatter.format(record)
398+
399+
# With ensure_ascii=True, non-ASCII characters should be escaped
400+
assert "\\u00e9" in result # é is escaped
401+
assert "\\u2615" in result # ☕ is escaped
402+
assert "Café" not in result
403+
assert "☕" not in result
404+
405+
# Verify the JSON is valid and correctly decoded
406+
parsed = json.loads(result)
407+
assert parsed["message"] == "Café ☕"
408+
409+
410+
def test_ensure_ascii_false():
411+
"""Test that ensure_ascii=False preserves non-ASCII characters"""
412+
record = make_record()
413+
record.msg = "Hello 世界"
414+
record.args = ()
415+
416+
formatter = ecs_logging.StdlibFormatter(exclude_fields=["process"], ensure_ascii=False)
417+
result = formatter.format(record)
418+
419+
# With ensure_ascii=False, non-ASCII characters should be preserved
420+
assert "世界" in result
421+
assert "\\u4e16" not in result
422+
423+
# Verify the JSON is valid
424+
parsed = json.loads(result)
425+
assert parsed["message"] == "Hello 世界"
426+
427+
428+
def test_ensure_ascii_false_with_emoji():
429+
"""Test that ensure_ascii=False preserves emoji and special characters"""
430+
record = make_record()
431+
record.msg = "Café ☕ 你好"
432+
record.args = ()
433+
434+
formatter = ecs_logging.StdlibFormatter(exclude_fields=["process"], ensure_ascii=False)
435+
result = formatter.format(record)
436+
437+
# With ensure_ascii=False, all non-ASCII characters should be preserved
438+
assert "Café" in result
439+
assert "☕" in result
440+
assert "你好" in result
441+
442+
# Verify the JSON is valid and correctly decoded
443+
parsed = json.loads(result)
444+
assert parsed["message"] == "Café ☕ 你好"
445+
446+
447+
def test_ensure_ascii_with_extra_fields():
448+
"""Test that ensure_ascii works with extra fields containing non-ASCII"""
449+
record = make_record()
450+
record.msg = "Test message"
451+
record.args = ()
452+
453+
formatter = ecs_logging.StdlibFormatter(
454+
exclude_fields=["process"],
455+
ensure_ascii=False,
456+
extra={"user": "用户", "city": "北京"}
457+
)
458+
result = formatter.format(record)
459+
460+
# With ensure_ascii=False, non-ASCII in extra fields should be preserved
461+
assert "用户" in result
462+
assert "北京" in result
463+
464+
# Verify the JSON is valid
465+
parsed = json.loads(result)
466+
assert parsed["user"] == "用户"
467+
assert parsed["city"] == "北京"

tests/test_structlog_formatter.py

Lines changed: 98 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -108,3 +108,101 @@ def test_exception_log_is_ecs_compliant_when_used_with_format_exc_info(
108108
assert "error" in formatted_event_dict
109109
assert "stack_trace" in formatted_event_dict["error"]
110110
assert "<stack trace here>" in formatted_event_dict["error"]["stack_trace"]
111+
112+
113+
@mock.patch("time.time")
114+
def test_ensure_ascii_default(time):
115+
"""Test that ensure_ascii defaults to True (escaping non-ASCII characters)"""
116+
time.return_value = 1584720997.187709
117+
118+
formatter = ecs_logging.StructlogFormatter()
119+
result = formatter(None, "debug", {"event": "Hello 世界", "log.logger": "test"})
120+
121+
# With ensure_ascii=True (default), non-ASCII characters should be escaped
122+
assert "\\u4e16\\u754c" in result
123+
assert "世界" not in result
124+
125+
# Verify the JSON is valid
126+
parsed = json.loads(result)
127+
assert parsed["message"] == "Hello 世界"
128+
129+
130+
@mock.patch("time.time")
131+
def test_ensure_ascii_true(time):
132+
"""Test that ensure_ascii=True escapes non-ASCII characters"""
133+
time.return_value = 1584720997.187709
134+
135+
formatter = ecs_logging.StructlogFormatter(ensure_ascii=True)
136+
result = formatter(None, "info", {"event": "Café ☕", "log.logger": "test"})
137+
138+
# With ensure_ascii=True, non-ASCII characters should be escaped
139+
assert "\\u00e9" in result # é is escaped
140+
assert "\\u2615" in result # ☕ is escaped
141+
assert "Café" not in result
142+
assert "☕" not in result
143+
144+
# Verify the JSON is valid and correctly decoded
145+
parsed = json.loads(result)
146+
assert parsed["message"] == "Café ☕"
147+
148+
149+
@mock.patch("time.time")
150+
def test_ensure_ascii_false(time):
151+
"""Test that ensure_ascii=False preserves non-ASCII characters"""
152+
time.return_value = 1584720997.187709
153+
154+
formatter = ecs_logging.StructlogFormatter(ensure_ascii=False)
155+
result = formatter(None, "debug", {"event": "Hello 世界", "log.logger": "test"})
156+
157+
# With ensure_ascii=False, non-ASCII characters should be preserved
158+
assert "世界" in result
159+
assert "\\u4e16" not in result
160+
161+
# Verify the JSON is valid
162+
parsed = json.loads(result)
163+
assert parsed["message"] == "Hello 世界"
164+
165+
166+
@mock.patch("time.time")
167+
def test_ensure_ascii_false_with_emoji(time):
168+
"""Test that ensure_ascii=False preserves emoji and special characters"""
169+
time.return_value = 1584720997.187709
170+
171+
formatter = ecs_logging.StructlogFormatter(ensure_ascii=False)
172+
result = formatter(None, "info", {"event": "Café ☕ 你好", "log.logger": "test"})
173+
174+
# With ensure_ascii=False, all non-ASCII characters should be preserved
175+
assert "Café" in result
176+
assert "☕" in result
177+
assert "你好" in result
178+
179+
# Verify the JSON is valid and correctly decoded
180+
parsed = json.loads(result)
181+
assert parsed["message"] == "Café ☕ 你好"
182+
183+
184+
@mock.patch("time.time")
185+
def test_ensure_ascii_with_custom_fields(time):
186+
"""Test that ensure_ascii works with custom fields containing non-ASCII"""
187+
time.return_value = 1584720997.187709
188+
189+
formatter = ecs_logging.StructlogFormatter(ensure_ascii=False)
190+
result = formatter(
191+
None,
192+
"info",
193+
{
194+
"event": "Test",
195+
"log.logger": "test",
196+
"user": "用户",
197+
"city": "北京",
198+
},
199+
)
200+
201+
# With ensure_ascii=False, non-ASCII in custom fields should be preserved
202+
assert "用户" in result
203+
assert "北京" in result
204+
205+
# Verify the JSON is valid
206+
parsed = json.loads(result)
207+
assert parsed["user"] == "用户"
208+
assert parsed["city"] == "北京"

0 commit comments

Comments
 (0)