diff --git a/python/cocoindex/convert.py b/python/cocoindex/convert.py index 5e5dfa19..f4f60dec 100644 --- a/python/cocoindex/convert.py +++ b/python/cocoindex/convert.py @@ -783,9 +783,29 @@ def load_engine_object(expected_type: Any, v: Any) -> Any: # Drop auxiliary discriminator "kind" if present dc_init_kwargs: dict[str, Any] = {} field_types = {f.name: f.type for f in dataclasses.fields(struct_type)} + dataclass_fields = {f.name: f for f in dataclasses.fields(struct_type)} + for name, f_type in field_types.items(): if name in v: dc_init_kwargs[name] = load_engine_object(f_type, v[name]) + else: + # Field is missing from input, check if it has a default or can use auto-default + field = dataclass_fields[name] + if field.default is not dataclasses.MISSING: + # Field has an explicit default value + dc_init_kwargs[name] = field.default + elif field.default_factory is not dataclasses.MISSING: + # Field has a default factory + dc_init_kwargs[name] = field.default_factory() + else: + # No explicit default, try to get auto-default + type_info = analyze_type_info(f_type) + auto_default, is_supported = _get_auto_default_for_type( + type_info + ) + if is_supported: + dc_init_kwargs[name] = auto_default + # If not supported, skip the field (let dataclass constructor handle the error) return struct_type(**dc_init_kwargs) elif is_namedtuple_type(struct_type): if not isinstance(v, Mapping): @@ -793,11 +813,27 @@ def load_engine_object(expected_type: Any, v: Any) -> Any: # Dict format (from dump/load functions) annotations = getattr(struct_type, "__annotations__", {}) field_names = list(getattr(struct_type, "_fields", ())) + field_defaults = getattr(struct_type, "_field_defaults", {}) nt_init_kwargs: dict[str, Any] = {} + for name in field_names: f_type = annotations.get(name, Any) if name in v: nt_init_kwargs[name] = load_engine_object(f_type, v[name]) + else: + # Field is missing from input, check if it has a default or can use auto-default + if name in field_defaults: + # Field has an explicit default value + nt_init_kwargs[name] = field_defaults[name] + else: + # No explicit default, try to get auto-default + type_info = analyze_type_info(f_type) + auto_default, is_supported = _get_auto_default_for_type( + type_info + ) + if is_supported: + nt_init_kwargs[name] = auto_default + # If not supported, skip the field (let NamedTuple constructor handle the error) return struct_type(**nt_init_kwargs) elif is_pydantic_model(struct_type): if not isinstance(v, Mapping): @@ -812,9 +848,33 @@ def load_engine_object(expected_type: Any, v: Any) -> Any: field_types = { name: field.annotation for name, field in model_fields.items() } + for name, f_type in field_types.items(): if name in v: pydantic_init_kwargs[name] = load_engine_object(f_type, v[name]) + else: + # Field is missing from input, check if it has a default or can use auto-default + field = model_fields[name] + if ( + hasattr(field, "default") and field.default is not ... + ): # ... is Pydantic's sentinel for no default + # Field has an explicit default value + pydantic_init_kwargs[name] = field.default + elif ( + hasattr(field, "default_factory") + and field.default_factory is not None + ): + # Field has a default factory + pydantic_init_kwargs[name] = field.default_factory() + else: + # No explicit default, try to get auto-default + type_info = analyze_type_info(f_type) + auto_default, is_supported = _get_auto_default_for_type( + type_info + ) + if is_supported: + pydantic_init_kwargs[name] = auto_default + # If not supported, skip the field (let Pydantic constructor handle the error) return struct_type(**pydantic_init_kwargs) return v diff --git a/python/cocoindex/tests/test_load_convert.py b/python/cocoindex/tests/test_load_convert.py index 14a18082..4018bff1 100644 --- a/python/cocoindex/tests/test_load_convert.py +++ b/python/cocoindex/tests/test_load_convert.py @@ -116,3 +116,68 @@ def test_namedtuple_roundtrip_via_dump_load() -> None: loaded = load_engine_object(LocalPoint, dumped) assert isinstance(loaded, LocalPoint) assert loaded == p + + +def test_dataclass_missing_fields_with_auto_defaults() -> None: + """Test that missing fields are automatically assigned safe default values.""" + + @dataclasses.dataclass + class TestClass: + required_field: str + optional_field: str | None # Should get None + list_field: list[str] # Should get [] + dict_field: dict[str, int] # Should get {} + explicit_default: str = "default" # Should use explicit default + + # Input missing optional_field, list_field, dict_field (but has explicit_default via class definition) + input_data = {"required_field": "test_value"} + + loaded = load_engine_object(TestClass, input_data) + + assert isinstance(loaded, TestClass) + assert loaded.required_field == "test_value" + assert loaded.optional_field is None # Auto-default for Optional + assert loaded.list_field == [] # Auto-default for list + assert loaded.dict_field == {} # Auto-default for dict + assert loaded.explicit_default == "default" # Explicit default from class + + +def test_namedtuple_missing_fields_with_auto_defaults() -> None: + """Test that missing fields in NamedTuple are automatically assigned safe default values.""" + from typing import NamedTuple + + class TestTuple(NamedTuple): + required_field: str + optional_field: str | None # Should get None + list_field: list[str] # Should get [] + dict_field: dict[str, int] # Should get {} + + # Input missing optional_field, list_field, dict_field + input_data = {"required_field": "test_value"} + + loaded = load_engine_object(TestTuple, input_data) + + assert isinstance(loaded, TestTuple) + assert loaded.required_field == "test_value" + assert loaded.optional_field is None # Auto-default for Optional + assert loaded.list_field == [] # Auto-default for list + assert loaded.dict_field == {} # Auto-default for dict + + +def test_dataclass_unsupported_type_still_fails() -> None: + """Test that fields with unsupported types still cause errors when missing.""" + + @dataclasses.dataclass + class TestClass: + required_field1: str + required_field2: int # No auto-default for int + + # Input missing required_field2 which has no safe auto-default + input_data = {"required_field1": "test_value"} + + # Should still raise an error because int has no safe auto-default + try: + load_engine_object(TestClass, input_data) + assert False, "Expected TypeError to be raised" + except TypeError: + pass # Expected behavior