From ba0547a48c9d0b5ea775a246294b1c520c24f6f6 Mon Sep 17 00:00:00 2001 From: "shuizhao.gh" Date: Tue, 18 Nov 2025 10:05:29 +0800 Subject: [PATCH] feat: LogItems/LogTags is optional now --- Cargo.lock | 2 +- Cargo.toml | 2 +- pyproject.toml | 2 +- src/lib.rs | 58 +++++++------- tests/test_error.py | 35 +++++---- tests/test_large_dtaa.py | 4 + tests/test_missing_fields.py | 146 +++++++++++++++++++++++++++++++++++ 7 files changed, 203 insertions(+), 46 deletions(-) create mode 100644 tests/test_missing_fields.py diff --git a/Cargo.lock b/Cargo.lock index a7908d1..8be20c5 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4,7 +4,7 @@ version = 4 [[package]] name = "aliyun-log-fastpb" -version = "0.1.0" +version = "0.2.0" dependencies = [ "pyo3", "quick-protobuf", diff --git a/Cargo.toml b/Cargo.toml index 6320c34..41dd850 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "aliyun-log-fastpb" -version = "0.1.0" +version = "0.2.0" edition = "2021" authors = ["Aliyun Log FastPB Contributors"] description = "Fast protobuf serialization for Aliyun Log using PyO3 and quick-protobuf" diff --git a/pyproject.toml b/pyproject.toml index 3d91327..564bb8e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "maturin" [project] name = "aliyun-log-fastpb" -version = "0.1.0" +version = "0.2.0" description = "Fast protobuf serialization for Aliyun Log using PyO3 and quick-protobuf" readme = "README.md" requires-python = ">=3.7" diff --git a/src/lib.rs b/src/lib.rs index 397c778..97a40c1 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -274,31 +274,35 @@ fn serialize_log_group_generic( where G: ParseLogGroup<'static>, { - let log_items_obj = log_group_dict - .get_item("LogItems") - .map_err(|_| PyValueError::new_err("LogGroup missing 'LogItems' field"))? - .ok_or_else(|| PyValueError::new_err("LogGroup missing 'LogItems' field"))?; - let log_items_list = log_items_obj - .cast::() - .map_err(|_| PyTypeError::new_err("LogItems must be a list"))?; + // Parse LogItems - if not present, default to empty list + let logs = if let Ok(Some(log_items_obj)) = log_group_dict.get_item("LogItems") { + let log_items_list = log_items_obj + .cast::() + .map_err(|_| PyTypeError::new_err("LogItems must be a list"))?; + + let mut logs = Vec::with_capacity(log_items_list.len()); + for log_item in log_items_list.iter() { + logs.push(parse_log_generic::(&log_item)?); + } + logs + } else { + Vec::new() + }; - let mut logs = Vec::with_capacity(log_items_list.len()); - for log_item in log_items_list.iter() { - logs.push(parse_log_generic::(&log_item)?); - } + // Parse LogTags - if not present, default to empty list + let log_tags = if let Ok(Some(log_tags_obj)) = log_group_dict.get_item("LogTags") { + let log_tags_list = log_tags_obj + .cast::() + .map_err(|_| PyTypeError::new_err("LogTags must be a list"))?; - let log_tags_obj = log_group_dict - .get_item("LogTags") - .map_err(|_| PyValueError::new_err("LogGroup missing 'LogTags' field"))? - .ok_or_else(|| PyValueError::new_err("LogGroup missing 'LogTags' field"))?; - let log_tags_list = log_tags_obj - .cast::() - .map_err(|_| PyTypeError::new_err("LogTags must be a list"))?; - - let mut log_tags = Vec::with_capacity(log_tags_list.len()); - for tag_item in log_tags_list.iter() { - log_tags.push(parse_log_tag(&tag_item)?); - } + let mut log_tags = Vec::with_capacity(log_tags_list.len()); + for tag_item in log_tags_list.iter() { + log_tags.push(parse_log_tag(&tag_item)?); + } + log_tags + } else { + Vec::new() + }; let topic = if let Ok(Some(topic_obj)) = log_group_dict.get_item("Topic") { extract_optional_string(&topic_obj, "Topic")?.map(Cow::Owned) @@ -326,14 +330,14 @@ where /// Serialize a LogGroup Python dict to protobuf bytes. /// /// Args: -/// log_group_dict: A dict containing LogItems, LogTags, Topic, and Source. +/// log_group_dict: A dict that may contain LogItems, LogTags, Topic, and Source. /// /// Returns: /// bytes: The serialized protobuf data. /// /// Raises: /// TypeError: If the input types are incorrect. -/// ValueError: If required fields are missing. +/// ValueError: If required fields within Log entries are missing. #[pyfunction] fn serialize_log_group( py: Python<'_>, @@ -345,14 +349,14 @@ fn serialize_log_group( /// Serialize a LogGroupRaw Python dict to protobuf bytes. /// /// Args: -/// log_group_dict: A dict containing LogItems, LogTags, Topic, and Source. +/// log_group_dict: A dict that may contain LogItems, LogTags, Topic, and Source. /// /// Returns: /// bytes: The serialized protobuf data. /// /// Raises: /// TypeError: If the input types are incorrect. -/// ValueError: If required fields are missing. +/// ValueError: If required fields within Log entries are missing. #[pyfunction] fn serialize_log_group_raw( py: Python<'_>, diff --git a/tests/test_error.py b/tests/test_error.py index 5cc654d..21b8e1f 100644 --- a/tests/test_error.py +++ b/tests/test_error.py @@ -1,4 +1,3 @@ - import pytest import sys import os @@ -12,20 +11,6 @@ import logs_pb2 -def test_missing_log_items(): - """Test error when LogItems field is missing.""" - log_group_dict = {"LogTags": [], "Topic": "", "Source": ""} - - with pytest.raises(ValueError, match="LogGroup missing 'LogItems' field"): - aliyun_log_fastpb.serialize_log_group(log_group_dict) - -def test_missing_log_tags(): - """Test error when LogTags field is missing.""" - log_group_dict = {"LogItems": [], "Topic": "", "Source": ""} - - with pytest.raises(ValueError, match="LogGroup missing 'LogTags' field"): - aliyun_log_fastpb.serialize_log_group(log_group_dict) - def test_log_items_not_list(): """Test error when LogItems is not a list.""" log_group_dict = { @@ -38,6 +23,7 @@ def test_log_items_not_list(): with pytest.raises(TypeError, match="LogItems must be a list"): aliyun_log_fastpb.serialize_log_group(log_group_dict) + def test_log_tags_not_list(): """Test error when LogTags is not a list.""" log_group_dict = { @@ -50,6 +36,7 @@ def test_log_tags_not_list(): with pytest.raises(TypeError, match="LogTags must be a list"): aliyun_log_fastpb.serialize_log_group(log_group_dict) + def test_log_not_dict(): """Test error when Log item is not a dict.""" log_group_dict = { @@ -62,6 +49,7 @@ def test_log_not_dict(): with pytest.raises(TypeError, match="Log must be a dict"): aliyun_log_fastpb.serialize_log_group(log_group_dict) + def test_log_missing_time(): """Test error when Log is missing Time field.""" log_group_dict = { @@ -74,6 +62,7 @@ def test_log_missing_time(): with pytest.raises(ValueError, match="Log missing 'Time' field"): aliyun_log_fastpb.serialize_log_group(log_group_dict) + def test_log_missing_contents(): """Test error when Log is missing Contents field.""" log_group_dict = { @@ -86,6 +75,7 @@ def test_log_missing_contents(): with pytest.raises(ValueError, match="Log missing 'Contents' field"): aliyun_log_fastpb.serialize_log_group(log_group_dict) + def test_contents_not_list(): """Test error when Contents is not a list.""" log_group_dict = { @@ -98,6 +88,7 @@ def test_contents_not_list(): with pytest.raises(TypeError, match="Contents must be a list"): aliyun_log_fastpb.serialize_log_group(log_group_dict) + def test_content_not_dict(): """Test error when Content item is not a dict.""" log_group_dict = { @@ -110,6 +101,7 @@ def test_content_not_dict(): with pytest.raises(TypeError, match="Content must be a dict"): aliyun_log_fastpb.serialize_log_group(log_group_dict) + def test_content_missing_key(): """Test error when Content is missing Key field.""" log_group_dict = { @@ -122,6 +114,7 @@ def test_content_missing_key(): with pytest.raises(ValueError, match="LogContent missing 'Key' field"): aliyun_log_fastpb.serialize_log_group(log_group_dict) + def test_content_missing_value(): """Test error when Content is missing Value field.""" log_group_dict = { @@ -134,6 +127,7 @@ def test_content_missing_value(): with pytest.raises(ValueError, match="LogContent missing 'Value' field"): aliyun_log_fastpb.serialize_log_group(log_group_dict) + def test_tag_not_dict(): """Test error when Tag item is not a dict.""" log_group_dict = { @@ -146,6 +140,7 @@ def test_tag_not_dict(): with pytest.raises(TypeError, match="LogTag must be a dict"): aliyun_log_fastpb.serialize_log_group(log_group_dict) + def test_tag_missing_key(): """Test error when Tag is missing Key field.""" log_group_dict = { @@ -158,6 +153,7 @@ def test_tag_missing_key(): with pytest.raises(ValueError, match="LogTag missing 'Key' field"): aliyun_log_fastpb.serialize_log_group(log_group_dict) + def test_tag_missing_value(): """Test error when Tag is missing Value field.""" log_group_dict = { @@ -170,6 +166,7 @@ def test_tag_missing_value(): with pytest.raises(ValueError, match="LogTag missing 'Value' field"): aliyun_log_fastpb.serialize_log_group(log_group_dict) + def test_invalid_time_type(): """Test error when Time is wrong type.""" log_group_dict = { @@ -182,6 +179,7 @@ def test_invalid_time_type(): with pytest.raises(TypeError, match="Value must be an unsigned integer"): aliyun_log_fastpb.serialize_log_group(log_group_dict) + def test_invalid_time_ns_type(): """Test error when TimeNs is wrong type.""" log_group_dict = { @@ -194,6 +192,7 @@ def test_invalid_time_ns_type(): with pytest.raises(TypeError, match="Value must be an unsigned integer"): aliyun_log_fastpb.serialize_log_group(log_group_dict) + def test_invalid_topic_type(): """Test error when Topic is wrong type.""" log_group_dict = {"LogItems": [], "LogTags": [], "Topic": 123, "Source": ""} @@ -201,6 +200,7 @@ def test_invalid_topic_type(): with pytest.raises(TypeError, match="Value must be a string"): aliyun_log_fastpb.serialize_log_group(log_group_dict) + def test_invalid_source_type(): """Test error when Source is wrong type.""" log_group_dict = {"LogItems": [], "LogTags": [], "Topic": "", "Source": 123} @@ -208,6 +208,7 @@ def test_invalid_source_type(): with pytest.raises(TypeError, match="Value must be a string"): aliyun_log_fastpb.serialize_log_group(log_group_dict) + def test_invalid_content_key_type(): """Test error when Content Key is wrong type.""" log_group_dict = { @@ -220,6 +221,7 @@ def test_invalid_content_key_type(): with pytest.raises(TypeError, match="Value must be a string"): aliyun_log_fastpb.serialize_log_group(log_group_dict) + def test_invalid_content_value_type(): """Test error when Content Value is wrong type.""" log_group_dict = { @@ -232,13 +234,14 @@ def test_invalid_content_value_type(): with pytest.raises(TypeError, match="Value must be a string"): aliyun_log_fastpb.serialize_log_group(log_group_dict) + def test_not_dict_input(): """Test error when input is not a dict.""" with pytest.raises(Exception): # Will raise some exception aliyun_log_fastpb.serialize_log_group("not a dict") + def test_none_input(): """Test error when input is None.""" with pytest.raises(Exception): # Will raise some exception aliyun_log_fastpb.serialize_log_group(None) - diff --git a/tests/test_large_dtaa.py b/tests/test_large_dtaa.py index a82f21b..b04b25c 100644 --- a/tests/test_large_dtaa.py +++ b/tests/test_large_dtaa.py @@ -36,6 +36,7 @@ def test_many_logs(): assert pb_log_group.logs[0].time == 1000 assert pb_log_group.logs[-1].time == 1000 + num_logs - 1 + def test_many_contents_per_log(): """Test Log with many content entries.""" num_contents = 100 @@ -59,6 +60,7 @@ def test_many_contents_per_log(): assert len(pb_log_group.logs[0].contents) == num_contents + def test_many_tags(): """Test LogGroup with many tags.""" num_tags = 100 @@ -75,6 +77,7 @@ def test_many_tags(): assert len(pb_log_group.log_tags) == num_tags + def test_long_strings(): """Test with very long string values.""" long_string = "x" * 10000 @@ -100,6 +103,7 @@ def test_long_strings(): assert pb_log_group.topic == long_string assert pb_log_group.source == long_string + def test_realistic_scenario(): """Test realistic logging scenario with 1000 logs.""" # 1000 logs, 5 tags, 15 fields per log diff --git a/tests/test_missing_fields.py b/tests/test_missing_fields.py new file mode 100644 index 0000000..8644337 --- /dev/null +++ b/tests/test_missing_fields.py @@ -0,0 +1,146 @@ +""" +Test handling of missing LogItems and LogTags fields. + +These fields should be treated as empty arrays when not present. +""" + +import pytest +import aliyun_log_fastpb + +try: + from . import logs_pb2 +except ImportError: + import logs_pb2 + + +def test_missing_log_items(): + """Test LogGroup without LogItems field - should default to empty array.""" + log_group_dict = { + "LogTags": [{"Key": "tag1", "Value": "value1"}], + "Topic": "test-topic", + "Source": "test-source", + } + + rust_bytes = aliyun_log_fastpb.serialize_log_group(log_group_dict) + + pb_log_group = logs_pb2.LogGroup() + pb_log_group.ParseFromString(rust_bytes) + + assert len(pb_log_group.logs) == 0 + assert len(pb_log_group.log_tags) == 1 + assert pb_log_group.topic == "test-topic" + assert pb_log_group.source == "test-source" + + +def test_missing_log_tags(): + """Test LogGroup without LogTags field - should default to empty array.""" + log_group_dict = { + "LogItems": [ + { + "Time": 1000, + "Contents": [{"Key": "test", "Value": "data"}], + } + ], + "Topic": "test-topic", + "Source": "test-source", + } + + rust_bytes = aliyun_log_fastpb.serialize_log_group(log_group_dict) + + pb_log_group = logs_pb2.LogGroup() + pb_log_group.ParseFromString(rust_bytes) + + assert len(pb_log_group.logs) == 1 + assert len(pb_log_group.log_tags) == 0 + assert pb_log_group.topic == "test-topic" + assert pb_log_group.source == "test-source" + + +def test_missing_both_log_items_and_log_tags(): + """Test LogGroup without both LogItems and LogTags fields.""" + log_group_dict = { + "Topic": "test-topic", + "Source": "test-source", + } + + rust_bytes = aliyun_log_fastpb.serialize_log_group(log_group_dict) + + pb_log_group = logs_pb2.LogGroup() + pb_log_group.ParseFromString(rust_bytes) + + assert len(pb_log_group.logs) == 0 + assert len(pb_log_group.log_tags) == 0 + assert pb_log_group.topic == "test-topic" + assert pb_log_group.source == "test-source" + + +def test_missing_all_optional_fields(): + """Test LogGroup with only empty dict - should produce minimal valid protobuf.""" + log_group_dict = {} + + rust_bytes = aliyun_log_fastpb.serialize_log_group(log_group_dict) + + pb_log_group = logs_pb2.LogGroup() + pb_log_group.ParseFromString(rust_bytes) + + assert len(pb_log_group.logs) == 0 + assert len(pb_log_group.log_tags) == 0 + assert not pb_log_group.HasField("topic") + assert not pb_log_group.HasField("source") + + +def test_missing_log_items_raw(): + """Test LogGroupRaw without LogItems field.""" + log_group_dict = { + "LogTags": [{"Key": "tag1", "Value": "value1"}], + "Topic": "test-topic", + } + + rust_bytes = aliyun_log_fastpb.serialize_log_group_raw(log_group_dict) + + pb_log_group = logs_pb2.LogGroupRaw() + pb_log_group.ParseFromString(rust_bytes) + + assert len(pb_log_group.logs) == 0 + assert len(pb_log_group.log_tags) == 1 + + +def test_missing_log_tags_raw(): + """Test LogGroupRaw without LogTags field.""" + log_group_dict = { + "LogItems": [ + { + "Time": 2000, + "Contents": [{"Key": "binary", "Value": b"data"}], + } + ], + } + + rust_bytes = aliyun_log_fastpb.serialize_log_group_raw(log_group_dict) + + pb_log_group = logs_pb2.LogGroupRaw() + pb_log_group.ParseFromString(rust_bytes) + + assert len(pb_log_group.logs) == 1 + assert len(pb_log_group.log_tags) == 0 + + +def test_mixed_missing_fields(): + """Test various combinations of missing and present fields.""" + test_cases = [ + {"LogItems": []}, + {"LogTags": []}, + {"Topic": "test"}, + {"Source": "test"}, + {"LogItems": [], "Topic": "test"}, + {"LogTags": [], "Source": "test"}, + ] + + for log_group_dict in test_cases: + rust_bytes = aliyun_log_fastpb.serialize_log_group(log_group_dict) + + pb_log_group = logs_pb2.LogGroup() + pb_log_group.ParseFromString(rust_bytes) + + # Should succeed without errors + assert pb_log_group is not None