From 20592e6d73ed6d520b4a58bfd8afb3c24e55d983 Mon Sep 17 00:00:00 2001 From: Princy Ballabh Date: Thu, 2 Oct 2025 16:40:51 +0530 Subject: [PATCH 1/2] fix: auto-apply safe defaults for missing fields in load_engine_object --- python/cocoindex/convert.py | 49 ++++++++++++++++ python/cocoindex/tests/test_load_convert.py | 65 +++++++++++++++++++++ 2 files changed, 114 insertions(+) diff --git a/python/cocoindex/convert.py b/python/cocoindex/convert.py index 5e5dfa19..6ade3d9f 100644 --- a/python/cocoindex/convert.py +++ b/python/cocoindex/convert.py @@ -783,9 +783,27 @@ def load_engine_object(expected_type: Any, v: Any) -> Any: # Drop auxiliary discriminator "kind" if present dc_init_kwargs: dict[str, Any] = {} field_types = {f.name: f.type for f in dataclasses.fields(struct_type)} + dataclass_fields = {f.name: f for f in dataclasses.fields(struct_type)} + for name, f_type in field_types.items(): if name in v: dc_init_kwargs[name] = load_engine_object(f_type, v[name]) + else: + # Field is missing from input, check if it has a default or can use auto-default + field = dataclass_fields[name] + if field.default is not dataclasses.MISSING: + # Field has an explicit default value + dc_init_kwargs[name] = field.default + elif field.default_factory is not dataclasses.MISSING: + # Field has a default factory + dc_init_kwargs[name] = field.default_factory() + else: + # No explicit default, try to get auto-default + type_info = analyze_type_info(f_type) + auto_default, is_supported = _get_auto_default_for_type(type_info) + if is_supported: + dc_init_kwargs[name] = auto_default + # If not supported, skip the field (let dataclass constructor handle the error) return struct_type(**dc_init_kwargs) elif is_namedtuple_type(struct_type): if not isinstance(v, Mapping): @@ -793,11 +811,25 @@ def load_engine_object(expected_type: Any, v: Any) -> Any: # Dict format (from dump/load functions) annotations = getattr(struct_type, "__annotations__", {}) field_names = list(getattr(struct_type, "_fields", ())) + field_defaults = getattr(struct_type, "_field_defaults", {}) nt_init_kwargs: dict[str, Any] = {} + for name in field_names: f_type = annotations.get(name, Any) if name in v: nt_init_kwargs[name] = load_engine_object(f_type, v[name]) + else: + # Field is missing from input, check if it has a default or can use auto-default + if name in field_defaults: + # Field has an explicit default value + nt_init_kwargs[name] = field_defaults[name] + else: + # No explicit default, try to get auto-default + type_info = analyze_type_info(f_type) + auto_default, is_supported = _get_auto_default_for_type(type_info) + if is_supported: + nt_init_kwargs[name] = auto_default + # If not supported, skip the field (let NamedTuple constructor handle the error) return struct_type(**nt_init_kwargs) elif is_pydantic_model(struct_type): if not isinstance(v, Mapping): @@ -812,9 +844,26 @@ def load_engine_object(expected_type: Any, v: Any) -> Any: field_types = { name: field.annotation for name, field in model_fields.items() } + for name, f_type in field_types.items(): if name in v: pydantic_init_kwargs[name] = load_engine_object(f_type, v[name]) + else: + # Field is missing from input, check if it has a default or can use auto-default + field = model_fields[name] + if hasattr(field, "default") and field.default is not ...: # ... is Pydantic's sentinel for no default + # Field has an explicit default value + pydantic_init_kwargs[name] = field.default + elif hasattr(field, "default_factory") and field.default_factory is not None: + # Field has a default factory + pydantic_init_kwargs[name] = field.default_factory() + else: + # No explicit default, try to get auto-default + type_info = analyze_type_info(f_type) + auto_default, is_supported = _get_auto_default_for_type(type_info) + if is_supported: + pydantic_init_kwargs[name] = auto_default + # If not supported, skip the field (let Pydantic constructor handle the error) return struct_type(**pydantic_init_kwargs) return v diff --git a/python/cocoindex/tests/test_load_convert.py b/python/cocoindex/tests/test_load_convert.py index 14a18082..e04f91d1 100644 --- a/python/cocoindex/tests/test_load_convert.py +++ b/python/cocoindex/tests/test_load_convert.py @@ -116,3 +116,68 @@ def test_namedtuple_roundtrip_via_dump_load() -> None: loaded = load_engine_object(LocalPoint, dumped) assert isinstance(loaded, LocalPoint) assert loaded == p + + +def test_dataclass_missing_fields_with_auto_defaults() -> None: + """Test that missing fields are automatically assigned safe default values.""" + + @dataclasses.dataclass + class TestClass: + required_field: str + optional_field: str | None # Should get None + list_field: list[str] # Should get [] + dict_field: dict[str, int] # Should get {} + explicit_default: str = "default" # Should use explicit default + + # Input missing optional_field, list_field, dict_field (but has explicit_default via class definition) + input_data = {"required_field": "test_value"} + + loaded = load_engine_object(TestClass, input_data) + + assert isinstance(loaded, TestClass) + assert loaded.required_field == "test_value" + assert loaded.optional_field is None # Auto-default for Optional + assert loaded.list_field == [] # Auto-default for list + assert loaded.dict_field == {} # Auto-default for dict + assert loaded.explicit_default == "default" # Explicit default from class + + +def test_namedtuple_missing_fields_with_auto_defaults() -> None: + """Test that missing fields in NamedTuple are automatically assigned safe default values.""" + from typing import NamedTuple + + class TestTuple(NamedTuple): + required_field: str + optional_field: str | None # Should get None + list_field: list[str] # Should get [] + dict_field: dict[str, int] # Should get {} + + # Input missing optional_field, list_field, dict_field + input_data = {"required_field": "test_value"} + + loaded = load_engine_object(TestTuple, input_data) + + assert isinstance(loaded, TestTuple) + assert loaded.required_field == "test_value" + assert loaded.optional_field is None # Auto-default for Optional + assert loaded.list_field == [] # Auto-default for list + assert loaded.dict_field == {} # Auto-default for dict + + +def test_dataclass_unsupported_type_still_fails() -> None: + """Test that fields with unsupported types still cause errors when missing.""" + + @dataclasses.dataclass + class TestClass: + required_field1: str + required_field2: int # No auto-default for int + + # Input missing required_field2 which has no safe auto-default + input_data = {"required_field1": "test_value"} + + # Should still raise an error because int has no safe auto-default + try: + load_engine_object(TestClass, input_data) + assert False, "Expected TypeError to be raised" + except TypeError: + pass # Expected behavior From 23b0bcc08855f138367b65036da191a84b21e69d Mon Sep 17 00:00:00 2001 From: Princy Ballabh Date: Thu, 2 Oct 2025 23:40:14 +0530 Subject: [PATCH 2/2] style: apply pre-commit formatting to modified files --- python/cocoindex/convert.py | 27 ++++++++++----- python/cocoindex/tests/test_load_convert.py | 38 ++++++++++----------- 2 files changed, 38 insertions(+), 27 deletions(-) diff --git a/python/cocoindex/convert.py b/python/cocoindex/convert.py index 6ade3d9f..f4f60dec 100644 --- a/python/cocoindex/convert.py +++ b/python/cocoindex/convert.py @@ -784,7 +784,7 @@ def load_engine_object(expected_type: Any, v: Any) -> Any: dc_init_kwargs: dict[str, Any] = {} field_types = {f.name: f.type for f in dataclasses.fields(struct_type)} dataclass_fields = {f.name: f for f in dataclasses.fields(struct_type)} - + for name, f_type in field_types.items(): if name in v: dc_init_kwargs[name] = load_engine_object(f_type, v[name]) @@ -800,7 +800,9 @@ def load_engine_object(expected_type: Any, v: Any) -> Any: else: # No explicit default, try to get auto-default type_info = analyze_type_info(f_type) - auto_default, is_supported = _get_auto_default_for_type(type_info) + auto_default, is_supported = _get_auto_default_for_type( + type_info + ) if is_supported: dc_init_kwargs[name] = auto_default # If not supported, skip the field (let dataclass constructor handle the error) @@ -813,7 +815,7 @@ def load_engine_object(expected_type: Any, v: Any) -> Any: field_names = list(getattr(struct_type, "_fields", ())) field_defaults = getattr(struct_type, "_field_defaults", {}) nt_init_kwargs: dict[str, Any] = {} - + for name in field_names: f_type = annotations.get(name, Any) if name in v: @@ -826,7 +828,9 @@ def load_engine_object(expected_type: Any, v: Any) -> Any: else: # No explicit default, try to get auto-default type_info = analyze_type_info(f_type) - auto_default, is_supported = _get_auto_default_for_type(type_info) + auto_default, is_supported = _get_auto_default_for_type( + type_info + ) if is_supported: nt_init_kwargs[name] = auto_default # If not supported, skip the field (let NamedTuple constructor handle the error) @@ -844,23 +848,30 @@ def load_engine_object(expected_type: Any, v: Any) -> Any: field_types = { name: field.annotation for name, field in model_fields.items() } - + for name, f_type in field_types.items(): if name in v: pydantic_init_kwargs[name] = load_engine_object(f_type, v[name]) else: # Field is missing from input, check if it has a default or can use auto-default field = model_fields[name] - if hasattr(field, "default") and field.default is not ...: # ... is Pydantic's sentinel for no default + if ( + hasattr(field, "default") and field.default is not ... + ): # ... is Pydantic's sentinel for no default # Field has an explicit default value pydantic_init_kwargs[name] = field.default - elif hasattr(field, "default_factory") and field.default_factory is not None: + elif ( + hasattr(field, "default_factory") + and field.default_factory is not None + ): # Field has a default factory pydantic_init_kwargs[name] = field.default_factory() else: # No explicit default, try to get auto-default type_info = analyze_type_info(f_type) - auto_default, is_supported = _get_auto_default_for_type(type_info) + auto_default, is_supported = _get_auto_default_for_type( + type_info + ) if is_supported: pydantic_init_kwargs[name] = auto_default # If not supported, skip the field (let Pydantic constructor handle the error) diff --git a/python/cocoindex/tests/test_load_convert.py b/python/cocoindex/tests/test_load_convert.py index e04f91d1..4018bff1 100644 --- a/python/cocoindex/tests/test_load_convert.py +++ b/python/cocoindex/tests/test_load_convert.py @@ -120,61 +120,61 @@ def test_namedtuple_roundtrip_via_dump_load() -> None: def test_dataclass_missing_fields_with_auto_defaults() -> None: """Test that missing fields are automatically assigned safe default values.""" - + @dataclasses.dataclass class TestClass: required_field: str optional_field: str | None # Should get None - list_field: list[str] # Should get [] + list_field: list[str] # Should get [] dict_field: dict[str, int] # Should get {} explicit_default: str = "default" # Should use explicit default - + # Input missing optional_field, list_field, dict_field (but has explicit_default via class definition) input_data = {"required_field": "test_value"} - + loaded = load_engine_object(TestClass, input_data) - + assert isinstance(loaded, TestClass) assert loaded.required_field == "test_value" - assert loaded.optional_field is None # Auto-default for Optional - assert loaded.list_field == [] # Auto-default for list - assert loaded.dict_field == {} # Auto-default for dict + assert loaded.optional_field is None # Auto-default for Optional + assert loaded.list_field == [] # Auto-default for list + assert loaded.dict_field == {} # Auto-default for dict assert loaded.explicit_default == "default" # Explicit default from class def test_namedtuple_missing_fields_with_auto_defaults() -> None: """Test that missing fields in NamedTuple are automatically assigned safe default values.""" from typing import NamedTuple - + class TestTuple(NamedTuple): required_field: str optional_field: str | None # Should get None - list_field: list[str] # Should get [] + list_field: list[str] # Should get [] dict_field: dict[str, int] # Should get {} - + # Input missing optional_field, list_field, dict_field input_data = {"required_field": "test_value"} - + loaded = load_engine_object(TestTuple, input_data) - + assert isinstance(loaded, TestTuple) assert loaded.required_field == "test_value" - assert loaded.optional_field is None # Auto-default for Optional - assert loaded.list_field == [] # Auto-default for list - assert loaded.dict_field == {} # Auto-default for dict + assert loaded.optional_field is None # Auto-default for Optional + assert loaded.list_field == [] # Auto-default for list + assert loaded.dict_field == {} # Auto-default for dict def test_dataclass_unsupported_type_still_fails() -> None: """Test that fields with unsupported types still cause errors when missing.""" - + @dataclasses.dataclass class TestClass: required_field1: str required_field2: int # No auto-default for int - + # Input missing required_field2 which has no safe auto-default input_data = {"required_field1": "test_value"} - + # Should still raise an error because int has no safe auto-default try: load_engine_object(TestClass, input_data)