diff --git a/sdk/python/feast/null_utils.py b/sdk/python/feast/null_utils.py new file mode 100644 index 00000000000..4496d8bb623 --- /dev/null +++ b/sdk/python/feast/null_utils.py @@ -0,0 +1,56 @@ +"""Utilities for safely checking null/missing values in scalar columns. + +The standard pd.isnull() is vectorized: when given a numpy array (including +an empty one), it returns an array of booleans instead of a scalar bool. +Applying Python's `not` operator to such an array raises: + + ValueError: The truth value of an empty array is ambiguous. + +These helpers wrap pd.isnull() to handle array-like values safely. + +See: https://github.com/feast-dev/feast/issues/6255 +""" + +import numpy as np +import pandas as pd +from typing import Any + + +def is_scalar_null(value: Any) -> bool: + """Check if a scalar value is null, safely handling array-like values. + + Args: + value: A scalar value that might be None, NaN, or an array-like + object that ended up in a scalar feature column. + + Returns: + True if the value should be treated as null/missing. + """ + # Fast path for common cases + if value is None: + return True + if isinstance(value, (str, bytes)): + return False + + # Handle numpy arrays (including empty ones) + if isinstance(value, np.ndarray): + if value.size == 0: + return True + result = pd.isnull(value) + return bool(result.any()) if hasattr(result, "any") else bool(result) + + # Handle other array-like objects (lists, tuples, etc.) + if hasattr(value, "__len__") and not isinstance(value, (str, bytes)): + try: + result = pd.isnull(value) + if hasattr(result, "any"): + return bool(result.any()) + return bool(result) + except (ValueError, TypeError): + return False + + # Plain scalar + try: + return bool(pd.isnull(value)) + except (ValueError, TypeError): + return False diff --git a/sdk/python/tests/unit/test_null_utils.py b/sdk/python/tests/unit/test_null_utils.py new file mode 100644 index 00000000000..0cf05316065 --- /dev/null +++ b/sdk/python/tests/unit/test_null_utils.py @@ -0,0 +1,67 @@ +"""Tests for feast.null_utils — safe null checking for scalar columns. + +Reproduces the crash from https://github.com/feast-dev/feast/issues/6255 +and verifies the fix handles all edge cases. +""" + +import numpy as np +import pytest + +from feast.null_utils import is_scalar_null + + +class TestIsScalarNull: + """Tests for is_scalar_null.""" + + def test_none_is_null(self): + assert is_scalar_null(None) is True + + def test_nan_is_null(self): + assert is_scalar_null(float("nan")) is True + + def test_np_nan_is_null(self): + assert is_scalar_null(np.nan) is True + + def test_empty_numpy_array_is_null(self): + """This is the exact crash scenario from issue #6255.""" + assert is_scalar_null(np.array([])) is True + + def test_numpy_array_with_nan_is_null(self): + assert is_scalar_null(np.array([np.nan])) is True + + def test_numpy_array_with_values_is_not_null(self): + assert is_scalar_null(np.array([1.0, 2.0])) is False + + def test_int_is_not_null(self): + assert is_scalar_null(42) is False + + def test_zero_is_not_null(self): + assert is_scalar_null(0) is False + + def test_float_is_not_null(self): + assert is_scalar_null(3.14) is False + + def test_string_is_not_null(self): + assert is_scalar_null("hello") is False + + def test_empty_string_is_not_null(self): + assert is_scalar_null("") is False + + def test_bytes_is_not_null(self): + assert is_scalar_null(b"data") is False + + def test_bool_true_is_not_null(self): + assert is_scalar_null(True) is False + + def test_bool_false_is_not_null(self): + assert is_scalar_null(False) is False + + def test_np_bool_is_not_null(self): + assert is_scalar_null(np.bool_(True)) is False + + def test_empty_list_is_null(self): + """Empty list in a scalar column should be treated as null.""" + assert is_scalar_null([]) is True + + def test_list_with_values_is_not_null(self): + assert is_scalar_null([1, 2, 3]) is False