From 62f947a3bd7bd34c0809e53aa5d5aa2592e1ea06 Mon Sep 17 00:00:00 2001 From: Siddhesh Khairnar Date: Fri, 24 Apr 2026 23:49:48 +0530 Subject: [PATCH 1/5] fix: Handle numpy ndarray in Array(String) materialization Signed-off-by: Siddhesh Khairnar --- sdk/python/feast/type_map.py | 36 ++++++++++++-- sdk/python/tests/unit/test_type_map.py | 65 ++++++++++++++++++++++++++ 2 files changed, 98 insertions(+), 3 deletions(-) diff --git a/sdk/python/feast/type_map.py b/sdk/python/feast/type_map.py index 001cb1d5b62..383714b82b6 100644 --- a/sdk/python/feast/type_map.py +++ b/sdk/python/feast/type_map.py @@ -694,7 +694,7 @@ def _validate_collection_item_types( """ if sample is None: return - if all(type(item) in valid_types for item in sample): + if all(item is None or type(item) in valid_types for item in sample): return # to_numpy() upcasts INT32/INT64 with NULL to Float64 automatically @@ -863,6 +863,38 @@ def _convert_list_values_to_proto( ] raise _type_err(sample, valid_types[0]) + # Arrow/Athena may deserialize array columns as numpy.ndarray with + # object dtype instead of plain Python lists. Normalise every value + # to a Python list so that protobuf constructors accept them, and + # replace None elements with a type-appropriate default (protobuf + # repeated fields do not accept None). + _LIST_NONE_DEFAULTS: Dict[ValueType, Any] = { + ValueType.STRING_LIST: "", + ValueType.BYTES_LIST: b"", + ValueType.INT32_LIST: 0, + ValueType.INT64_LIST: 0, + ValueType.FLOAT_LIST: 0.0, + ValueType.DOUBLE_LIST: 0.0, + ValueType.BOOL_LIST: False, + ValueType.UNIX_TIMESTAMP_LIST: 0, + ValueType.UUID_LIST: "", + ValueType.TIME_UUID_LIST: "", + ValueType.DECIMAL_LIST: "", + } + none_default = _LIST_NONE_DEFAULTS.get(feast_value_type) + + def _sanitize(value: Any) -> Any: + """Convert ndarray to list and replace None elements.""" + if isinstance(value, np.ndarray): + value = value.tolist() + if none_default is not None and isinstance(value, list): + value = [none_default if v is None else v for v in value] + return value + + values = [_sanitize(v) if v is not None else v for v in values] + if sample is not None: + sample = _sanitize(sample) + # Validate item types using shared helper _validate_collection_item_types(sample, valid_types, feast_value_type) @@ -875,7 +907,6 @@ def _convert_list_values_to_proto( return _convert_bool_collection_to_proto(values, field_name, proto_type) if feast_value_type in (ValueType.UUID_LIST, ValueType.TIME_UUID_LIST): - # uuid.UUID objects must be converted to str for StringList proto. return [ ( ProtoValue( @@ -888,7 +919,6 @@ def _convert_list_values_to_proto( ] if feast_value_type == ValueType.DECIMAL_LIST: - # decimal.Decimal objects must be converted to str for StringList proto. return [ ( ProtoValue( diff --git a/sdk/python/tests/unit/test_type_map.py b/sdk/python/tests/unit/test_type_map.py index 4f87aa46f19..b1395262044 100644 --- a/sdk/python/tests/unit/test_type_map.py +++ b/sdk/python/tests/unit/test_type_map.py @@ -1953,3 +1953,68 @@ def test_non_empty_array_treated_as_null_unix_timestamp(self): "non-empty array in UNIX_TIMESTAMP scalar column should produce null" ) assert result[1].unix_timestamp_val == int(ts.timestamp()) + + +class TestNdarrayListConversion: + """Regression tests for https://github.com/feast-dev/feast/issues/6325 + Arrow/Athena deserializes Array(String) columns as numpy.ndarray with + object dtype instead of plain Python lists. Ensure these are converted + to proto without raising ValueError or TypeError. + """ + + def test_ndarray_string_list_roundtrip(self): + """ndarray of strings converts to STRING_LIST proto and back.""" + values = [np.array(["tag1", "tag2"], dtype=object)] + protos = python_values_to_proto_values(values, ValueType.STRING_LIST) + converted = feast_value_type_to_python_type(protos[0]) + assert converted == ["tag1", "tag2"] + + def test_ndarray_string_list_with_none_elements(self): + """None elements inside an ndarray are replaced with empty string.""" + values = [np.array(["tag1", None, "tag3"], dtype=object)] + protos = python_values_to_proto_values(values, ValueType.STRING_LIST) + converted = feast_value_type_to_python_type(protos[0]) + assert converted == ["tag1", "", "tag3"] + + def test_ndarray_empty_string_list(self): + """An empty ndarray in a list column produces an empty ProtoValue (null).""" + from feast.protos.feast.types.Value_pb2 import Value as ProtoValue + + values = [np.array([], dtype=object)] + protos = python_values_to_proto_values(values, ValueType.STRING_LIST) + assert protos[0] == ProtoValue() + + def test_ndarray_string_list_mixed_batch(self): + """Batch with populated ndarray, None, and empty ndarray.""" + from feast.protos.feast.types.Value_pb2 import Value as ProtoValue + + values = [ + np.array(["a", "b"], dtype=object), + None, + np.array([], dtype=object), + ] + protos = python_values_to_proto_values(values, ValueType.STRING_LIST) + assert feast_value_type_to_python_type(protos[0]) == ["a", "b"] + assert protos[1] == ProtoValue() + assert protos[2] == ProtoValue() + + def test_ndarray_int64_list_roundtrip(self): + """ndarray of ints converts to INT64_LIST proto and back.""" + values = [np.array([1, 2, 3], dtype=object)] + protos = python_values_to_proto_values(values, ValueType.INT64_LIST) + converted = feast_value_type_to_python_type(protos[0]) + assert converted == [1, 2, 3] + + def test_ndarray_double_list_with_none_elements(self): + """None elements in a DOUBLE_LIST ndarray are replaced with 0.0.""" + values = [np.array([1.5, None, 3.5], dtype=object)] + protos = python_values_to_proto_values(values, ValueType.DOUBLE_LIST) + converted = feast_value_type_to_python_type(protos[0]) + assert converted == [1.5, 0.0, 3.5] + + def test_ndarray_bool_list_roundtrip(self): + """ndarray of bools converts to BOOL_LIST proto and back.""" + values = [np.array([True, False, True], dtype=object)] + protos = python_values_to_proto_values(values, ValueType.BOOL_LIST) + converted = feast_value_type_to_python_type(protos[0]) + assert converted == [True, False, True] From e754d58271f3f9938b72244b1782c31b0cc5803c Mon Sep 17 00:00:00 2001 From: Siddhesh Khairnar Date: Sat, 25 Apr 2026 00:05:03 +0530 Subject: [PATCH 2/5] fix: Use NULL_TIMESTAMP_INT_VALUE for UNIX_TIMESTAMP_LIST default Signed-off-by: Siddhesh Khairnar --- sdk/python/feast/type_map.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sdk/python/feast/type_map.py b/sdk/python/feast/type_map.py index 383714b82b6..15f6f00cefa 100644 --- a/sdk/python/feast/type_map.py +++ b/sdk/python/feast/type_map.py @@ -876,7 +876,7 @@ def _convert_list_values_to_proto( ValueType.FLOAT_LIST: 0.0, ValueType.DOUBLE_LIST: 0.0, ValueType.BOOL_LIST: False, - ValueType.UNIX_TIMESTAMP_LIST: 0, + ValueType.UNIX_TIMESTAMP_LIST: NULL_TIMESTAMP_INT_VALUE, ValueType.UUID_LIST: "", ValueType.TIME_UUID_LIST: "", ValueType.DECIMAL_LIST: "", From c4b639851e891dff7f2251611e78a013f95eb80e Mon Sep 17 00:00:00 2001 From: Siddhesh Khairnar Date: Sat, 25 Apr 2026 23:57:27 +0530 Subject: [PATCH 3/5] fix: correct test expectations for empty ndarray list conversion Signed-off-by: Siddhesh Khairnar --- sdk/python/tests/unit/test_type_map.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/sdk/python/tests/unit/test_type_map.py b/sdk/python/tests/unit/test_type_map.py index b1395262044..73848113ddc 100644 --- a/sdk/python/tests/unit/test_type_map.py +++ b/sdk/python/tests/unit/test_type_map.py @@ -1977,12 +1977,11 @@ def test_ndarray_string_list_with_none_elements(self): assert converted == ["tag1", "", "tag3"] def test_ndarray_empty_string_list(self): - """An empty ndarray in a list column produces an empty ProtoValue (null).""" - from feast.protos.feast.types.Value_pb2 import Value as ProtoValue - + """An empty ndarray in a list column round-trips as an empty list.""" values = [np.array([], dtype=object)] protos = python_values_to_proto_values(values, ValueType.STRING_LIST) - assert protos[0] == ProtoValue() + converted = feast_value_type_to_python_type(protos[0]) + assert converted == [] def test_ndarray_string_list_mixed_batch(self): """Batch with populated ndarray, None, and empty ndarray.""" @@ -1996,7 +1995,7 @@ def test_ndarray_string_list_mixed_batch(self): protos = python_values_to_proto_values(values, ValueType.STRING_LIST) assert feast_value_type_to_python_type(protos[0]) == ["a", "b"] assert protos[1] == ProtoValue() - assert protos[2] == ProtoValue() + assert feast_value_type_to_python_type(protos[2]) == [] def test_ndarray_int64_list_roundtrip(self): """ndarray of ints converts to INT64_LIST proto and back.""" From c77302a3bfb1d75e8771e1087cfb95f27c770749 Mon Sep 17 00:00:00 2001 From: Siddhesh Khairnar Date: Sun, 26 Apr 2026 00:10:59 +0530 Subject: [PATCH 4/5] fix: null semantics for empty ndarray and revert set-type validation change Signed-off-by: Siddhesh Khairnar --- sdk/python/feast/type_map.py | 4 +++- sdk/python/tests/unit/test_type_map.py | 9 +++++---- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/sdk/python/feast/type_map.py b/sdk/python/feast/type_map.py index 15f6f00cefa..a2b31bfcddb 100644 --- a/sdk/python/feast/type_map.py +++ b/sdk/python/feast/type_map.py @@ -694,7 +694,7 @@ def _validate_collection_item_types( """ if sample is None: return - if all(item is None or type(item) in valid_types for item in sample): + if all(type(item) in valid_types for item in sample): return # to_numpy() upcasts INT32/INT64 with NULL to Float64 automatically @@ -887,6 +887,8 @@ def _sanitize(value: Any) -> Any: """Convert ndarray to list and replace None elements.""" if isinstance(value, np.ndarray): value = value.tolist() + if isinstance(value, list) and len(value) == 0: + return None if none_default is not None and isinstance(value, list): value = [none_default if v is None else v for v in value] return value diff --git a/sdk/python/tests/unit/test_type_map.py b/sdk/python/tests/unit/test_type_map.py index 73848113ddc..b1395262044 100644 --- a/sdk/python/tests/unit/test_type_map.py +++ b/sdk/python/tests/unit/test_type_map.py @@ -1977,11 +1977,12 @@ def test_ndarray_string_list_with_none_elements(self): assert converted == ["tag1", "", "tag3"] def test_ndarray_empty_string_list(self): - """An empty ndarray in a list column round-trips as an empty list.""" + """An empty ndarray in a list column produces an empty ProtoValue (null).""" + from feast.protos.feast.types.Value_pb2 import Value as ProtoValue + values = [np.array([], dtype=object)] protos = python_values_to_proto_values(values, ValueType.STRING_LIST) - converted = feast_value_type_to_python_type(protos[0]) - assert converted == [] + assert protos[0] == ProtoValue() def test_ndarray_string_list_mixed_batch(self): """Batch with populated ndarray, None, and empty ndarray.""" @@ -1995,7 +1996,7 @@ def test_ndarray_string_list_mixed_batch(self): protos = python_values_to_proto_values(values, ValueType.STRING_LIST) assert feast_value_type_to_python_type(protos[0]) == ["a", "b"] assert protos[1] == ProtoValue() - assert feast_value_type_to_python_type(protos[2]) == [] + assert protos[2] == ProtoValue() def test_ndarray_int64_list_roundtrip(self): """ndarray of ints converts to INT64_LIST proto and back.""" From e6c69d5f38059c5fae0227fc82fa13ef70818505 Mon Sep 17 00:00:00 2001 From: Siddhesh Khairnar Date: Sun, 26 Apr 2026 00:21:17 +0530 Subject: [PATCH 5/5] fix: only convert empty ndarray to null, preserve empty Python lists Signed-off-by: Siddhesh Khairnar --- sdk/python/feast/type_map.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sdk/python/feast/type_map.py b/sdk/python/feast/type_map.py index a2b31bfcddb..5b42cc3bc78 100644 --- a/sdk/python/feast/type_map.py +++ b/sdk/python/feast/type_map.py @@ -887,8 +887,8 @@ def _sanitize(value: Any) -> Any: """Convert ndarray to list and replace None elements.""" if isinstance(value, np.ndarray): value = value.tolist() - if isinstance(value, list) and len(value) == 0: - return None + if isinstance(value, list) and len(value) == 0: + return None if none_default is not None and isinstance(value, list): value = [none_default if v is None else v for v in value] return value