Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
40 changes: 40 additions & 0 deletions docs/reference/installation.md
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,25 @@ formatter = StdlibFormatter(
```


#### Controlling ASCII encoding [_controlling_ascii_encoding]

By default, the `StdlibFormatter` escapes non-ASCII characters in the JSON output using Unicode escape sequences. If you want to preserve non-ASCII characters (such as Chinese, Japanese, emojis, etc.) in their original form, you can use the `ensure_ascii` parameter:

```python
from ecs_logging import StdlibFormatter

# Default behavior - non-ASCII characters are escaped
formatter = StdlibFormatter()
# Output: {"message":"Hello \\u4e16\\u754c"}

# Preserve non-ASCII characters
formatter = StdlibFormatter(ensure_ascii=False)
# Output: {"message":"Hello 世界"}
```

This is particularly useful when working with internationalized applications or when you need to maintain readability of logs containing non-ASCII characters.


### Structlog Example [structlog]

Note that the structlog processor should be the last processor in the list, as it handles the conversion to JSON as well as the ECS field enrichment.
Expand Down Expand Up @@ -144,6 +163,27 @@ logger = logger.bind(**{
logger.debug("Example message!")
```


#### Controlling ASCII encoding for Structlog [_structlog_ascii_encoding]

Similar to `StdlibFormatter`, the `StructlogFormatter` also supports the `ensure_ascii` parameter to control whether non-ASCII characters are escaped:

```python
import structlog
import ecs_logging

# Configure Structlog with ensure_ascii=False to preserve non-ASCII characters
structlog.configure(
processors=[ecs_logging.StructlogFormatter(ensure_ascii=False)],
wrapper_class=structlog.BoundLogger,
context_class=dict,
logger_factory=structlog.PrintLoggerFactory(),
)

logger = structlog.get_logger("app")
logger.info("你好世界") # Non-ASCII characters will be preserved in output
```

```json
{
"@timestamp": "2020-03-26T13:08:11.728Z",
Expand Down
4 changes: 3 additions & 1 deletion ecs_logging/_stdlib.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,7 @@ def __init__(
stack_trace_limit: Optional[int] = None,
extra: Optional[Dict[str, Any]] = None,
exclude_fields: Sequence[str] = (),
ensure_ascii: bool = True,
) -> None:
"""Initialize the ECS formatter.

Expand Down Expand Up @@ -133,6 +134,7 @@ def __init__(
self._extra = extra
self._exclude_fields = frozenset(exclude_fields)
self._stack_trace_limit = stack_trace_limit
self.ensure_ascii = ensure_ascii

def _record_error_type(self, record: logging.LogRecord) -> Optional[str]:
exc_info = record.exc_info
Expand Down Expand Up @@ -162,7 +164,7 @@ def _record_error_message(self, record: logging.LogRecord) -> Optional[str]:

def format(self, record: logging.LogRecord) -> str:
result = self.format_to_ecs(record)
return json_dumps(result)
return json_dumps(result, ensure_ascii=self.ensure_ascii)

def format_to_ecs(self, record: logging.LogRecord) -> Dict[str, Any]:
"""Function that can be overridden to add additional fields to
Expand Down
8 changes: 7 additions & 1 deletion ecs_logging/_structlog.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,12 @@
class StructlogFormatter:
"""ECS formatter for the ``structlog`` module"""

def __init__(
self,
ensure_ascii: bool = True,
) -> None:
self.ensure_ascii = ensure_ascii

def __call__(self, _: Any, name: str, event_dict: Dict[str, Any]) -> str:

# Handle event -> message now so that stuff like `event.dataset` doesn't
Expand Down Expand Up @@ -56,4 +62,4 @@ def format_to_ecs(self, event_dict: Dict[str, Any]) -> Dict[str, Any]:
return event_dict

def _json_dumps(self, value: Dict[str, Any]) -> str:
return json_dumps(value=value)
return json_dumps(value=value, ensure_ascii=self.ensure_ascii)
8 changes: 6 additions & 2 deletions ecs_logging/_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,7 @@ def merge_dicts(from_: Dict[Any, Any], into: Dict[Any, Any]) -> Dict[Any, Any]:
return into


def json_dumps(value: Dict[str, Any]) -> str:
def json_dumps(value: Dict[str, Any], ensure_ascii: bool = True) -> str:

# Ensure that the first three fields are '@timestamp',
# 'log.level', and 'message' per ECS spec
Expand All @@ -124,7 +124,11 @@ def json_dumps(value: Dict[str, Any]) -> str:
pass

json_dumps = functools.partial(
json.dumps, sort_keys=True, separators=(",", ":"), default=_json_dumps_fallback
json.dumps,
sort_keys=True,
separators=(",", ":"),
default=_json_dumps_fallback,
ensure_ascii=ensure_ascii,
)

# Because we want to use 'sorted_keys=True' we manually build
Expand Down
104 changes: 104 additions & 0 deletions tests/test_stdlib_formatter.py
Original file line number Diff line number Diff line change
Expand Up @@ -367,3 +367,107 @@ def test_apm_data_conflicts(spec_validator):
'"log":{"logger":"logger-name","origin":{"file":{"line":10,"name":"file.py"},"function":"test_function"},'
'"original":"1: hello"},"service":{"environment":"dev","name":"myapp","version":"1.0.0"}}'
)


def test_ensure_ascii_default():
"""Test that ensure_ascii defaults to True (escaping non-ASCII characters)"""
record = make_record()
record.msg = "Hello 世界"
record.args = ()

formatter = ecs_logging.StdlibFormatter(exclude_fields=["process"])
result = formatter.format(record)

# With ensure_ascii=True (default), non-ASCII characters should be escaped
assert "\\u4e16\\u754c" in result
assert "世界" not in result

# Verify the JSON is valid
parsed = json.loads(result)
assert parsed["message"] == "Hello 世界"


def test_ensure_ascii_true():
"""Test that ensure_ascii=True escapes non-ASCII characters"""
record = make_record()
record.msg = "Café ☕"
record.args = ()

formatter = ecs_logging.StdlibFormatter(
exclude_fields=["process"], ensure_ascii=True
)
result = formatter.format(record)

# With ensure_ascii=True, non-ASCII characters should be escaped
assert "\\u00e9" in result # é is escaped
assert "\\u2615" in result # ☕ is escaped
assert "Café" not in result
assert "☕" not in result

# Verify the JSON is valid and correctly decoded
parsed = json.loads(result)
assert parsed["message"] == "Café ☕"


def test_ensure_ascii_false():
"""Test that ensure_ascii=False preserves non-ASCII characters"""
record = make_record()
record.msg = "Hello 世界"
record.args = ()

formatter = ecs_logging.StdlibFormatter(
exclude_fields=["process"], ensure_ascii=False
)
result = formatter.format(record)

# With ensure_ascii=False, non-ASCII characters should be preserved
assert "世界" in result
assert "\\u4e16" not in result

# Verify the JSON is valid
parsed = json.loads(result)
assert parsed["message"] == "Hello 世界"


def test_ensure_ascii_false_with_emoji():
"""Test that ensure_ascii=False preserves emoji and special characters"""
record = make_record()
record.msg = "Café ☕ 你好"
record.args = ()

formatter = ecs_logging.StdlibFormatter(
exclude_fields=["process"], ensure_ascii=False
)
result = formatter.format(record)

# With ensure_ascii=False, all non-ASCII characters should be preserved
assert "Café" in result
assert "☕" in result
assert "你好" in result

# Verify the JSON is valid and correctly decoded
parsed = json.loads(result)
assert parsed["message"] == "Café ☕ 你好"


def test_ensure_ascii_with_extra_fields():
"""Test that ensure_ascii works with extra fields containing non-ASCII"""
record = make_record()
record.msg = "Test message"
record.args = ()

formatter = ecs_logging.StdlibFormatter(
exclude_fields=["process"],
ensure_ascii=False,
extra={"user": "用户", "city": "北京"},
)
result = formatter.format(record)

# With ensure_ascii=False, non-ASCII in extra fields should be preserved
assert "用户" in result
assert "北京" in result

# Verify the JSON is valid
parsed = json.loads(result)
assert parsed["user"] == "用户"
assert parsed["city"] == "北京"
98 changes: 98 additions & 0 deletions tests/test_structlog_formatter.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,3 +108,101 @@ def test_exception_log_is_ecs_compliant_when_used_with_format_exc_info(
assert "error" in formatted_event_dict
assert "stack_trace" in formatted_event_dict["error"]
assert "<stack trace here>" in formatted_event_dict["error"]["stack_trace"]


@mock.patch("time.time")
def test_ensure_ascii_default(time):
"""Test that ensure_ascii defaults to True (escaping non-ASCII characters)"""
time.return_value = 1584720997.187709

formatter = ecs_logging.StructlogFormatter()
result = formatter(None, "debug", {"event": "Hello 世界", "log.logger": "test"})

# With ensure_ascii=True (default), non-ASCII characters should be escaped
assert "\\u4e16\\u754c" in result
assert "世界" not in result

# Verify the JSON is valid
parsed = json.loads(result)
assert parsed["message"] == "Hello 世界"


@mock.patch("time.time")
def test_ensure_ascii_true(time):
"""Test that ensure_ascii=True escapes non-ASCII characters"""
time.return_value = 1584720997.187709

formatter = ecs_logging.StructlogFormatter(ensure_ascii=True)
result = formatter(None, "info", {"event": "Café ☕", "log.logger": "test"})

# With ensure_ascii=True, non-ASCII characters should be escaped
assert "\\u00e9" in result # é is escaped
assert "\\u2615" in result # ☕ is escaped
assert "Café" not in result
assert "☕" not in result

# Verify the JSON is valid and correctly decoded
parsed = json.loads(result)
assert parsed["message"] == "Café ☕"


@mock.patch("time.time")
def test_ensure_ascii_false(time):
"""Test that ensure_ascii=False preserves non-ASCII characters"""
time.return_value = 1584720997.187709

formatter = ecs_logging.StructlogFormatter(ensure_ascii=False)
result = formatter(None, "debug", {"event": "Hello 世界", "log.logger": "test"})

# With ensure_ascii=False, non-ASCII characters should be preserved
assert "世界" in result
assert "\\u4e16" not in result

# Verify the JSON is valid
parsed = json.loads(result)
assert parsed["message"] == "Hello 世界"


@mock.patch("time.time")
def test_ensure_ascii_false_with_emoji(time):
"""Test that ensure_ascii=False preserves emoji and special characters"""
time.return_value = 1584720997.187709

formatter = ecs_logging.StructlogFormatter(ensure_ascii=False)
result = formatter(None, "info", {"event": "Café ☕ 你好", "log.logger": "test"})

# With ensure_ascii=False, all non-ASCII characters should be preserved
assert "Café" in result
assert "☕" in result
assert "你好" in result

# Verify the JSON is valid and correctly decoded
parsed = json.loads(result)
assert parsed["message"] == "Café ☕ 你好"


@mock.patch("time.time")
def test_ensure_ascii_with_custom_fields(time):
"""Test that ensure_ascii works with custom fields containing non-ASCII"""
time.return_value = 1584720997.187709

formatter = ecs_logging.StructlogFormatter(ensure_ascii=False)
result = formatter(
None,
"info",
{
"event": "Test",
"log.logger": "test",
"user": "用户",
"city": "北京",
},
)

# With ensure_ascii=False, non-ASCII in custom fields should be preserved
assert "用户" in result
assert "北京" in result

# Verify the JSON is valid
parsed = json.loads(result)
assert parsed["user"] == "用户"
assert parsed["city"] == "北京"