diff --git a/pyiceberg/expressions/__init__.py b/pyiceberg/expressions/__init__.py index c3b5ae74d6..a8c0fdf4ee 100644 --- a/pyiceberg/expressions/__init__.py +++ b/pyiceberg/expressions/__init__.py @@ -696,7 +696,7 @@ def __new__( # type: ignore # pylint: disable=W0221 if count == 0: return AlwaysFalse() elif count == 1: - return EqualTo(term, next(iter(literals))) # type: ignore + return EqualTo(term, next(iter(literals))) else: return super().__new__(cls) diff --git a/pyiceberg/expressions/literals.py b/pyiceberg/expressions/literals.py index 921e24e29f..0847f19c84 100644 --- a/pyiceberg/expressions/literals.py +++ b/pyiceberg/expressions/literals.py @@ -30,7 +30,9 @@ from typing import Any, Generic, Type from uuid import UUID -from pyiceberg.typedef import L +from pydantic import Field, model_serializer + +from pyiceberg.typedef import IcebergRootModel, L from pyiceberg.types import ( BinaryType, BooleanType, @@ -52,7 +54,9 @@ date_str_to_days, date_to_days, datetime_to_micros, + days_to_date, micros_to_days, + micros_to_timestamp, time_str_to_micros, time_to_micros, timestamp_to_micros, @@ -64,21 +68,24 @@ UUID_BYTES_LENGTH = 16 -class Literal(Generic[L], ABC): +class Literal(IcebergRootModel[L], Generic[L], ABC): # type: ignore """Literal which has a value and can be converted between types.""" - _value: L + root: L = Field() + + def __init__(self, value: L, value_type: Type[L], /, **data): # type: ignore + if value is None: + raise TypeError("Invalid literal value: None") - def __init__(self, value: L, value_type: Type[L]): + super().__init__(value) if value is None or not isinstance(value, value_type): raise TypeError(f"Invalid literal value: {value!r} (not a {value_type})") if isinstance(value, float) and isnan(value): raise ValueError("Cannot create expression literal from NaN.") - self._value = value @property def value(self) -> L: - return self._value + return self.root @singledispatchmethod @abstractmethod @@ -136,7 +143,7 @@ def literal(value: L) -> Literal[L]: LongLiteral(123) """ if isinstance(value, float): - return DoubleLiteral(value) # type: ignore + return DoubleLiteral(value) elif isinstance(value, bool): return BooleanLiteral(value) elif isinstance(value, int): @@ -144,17 +151,17 @@ def literal(value: L) -> Literal[L]: elif isinstance(value, str): return StringLiteral(value) elif isinstance(value, UUID): - return UUIDLiteral(value.bytes) # type: ignore + return UUIDLiteral(value.bytes) elif isinstance(value, bytes): return BinaryLiteral(value) elif isinstance(value, Decimal): return DecimalLiteral(value) elif isinstance(value, datetime): - return TimestampLiteral(datetime_to_micros(value)) # type: ignore + return TimestampLiteral(datetime_to_micros(value)) elif isinstance(value, date): - return DateLiteral(date_to_days(value)) # type: ignore + return DateLiteral(date_to_days(value)) elif isinstance(value, time): - return TimeLiteral(time_to_micros(value)) # type: ignore + return TimeLiteral(time_to_micros(value)) else: raise TypeError(f"Invalid literal value: {repr(value)}") @@ -411,6 +418,10 @@ class DateLiteral(Literal[int]): def __init__(self, value: int) -> None: super().__init__(value, int) + @model_serializer + def ser_model(self) -> date: + return days_to_date(self.root) + def increment(self) -> Literal[int]: return DateLiteral(self.value + 1) @@ -443,6 +454,10 @@ class TimestampLiteral(Literal[int]): def __init__(self, value: int) -> None: super().__init__(value, int) + @model_serializer + def ser_model(self) -> str: + return micros_to_timestamp(self.root).isoformat() + def increment(self) -> Literal[int]: return TimestampLiteral(self.value + 1) @@ -635,6 +650,10 @@ class UUIDLiteral(Literal[bytes]): def __init__(self, value: bytes) -> None: super().__init__(value, bytes) + @model_serializer + def ser_model(self) -> UUID: + return UUID(bytes=self.root) + @singledispatchmethod def to(self, type_var: IcebergType) -> Literal: # type: ignore raise TypeError(f"Cannot convert UUIDLiteral into {type_var}") @@ -661,6 +680,10 @@ class FixedLiteral(Literal[bytes]): def __init__(self, value: bytes) -> None: super().__init__(value, bytes) + @model_serializer + def ser_model(self) -> str: + return self.root.hex() + @singledispatchmethod def to(self, type_var: IcebergType) -> Literal: # type: ignore raise TypeError(f"Cannot convert FixedLiteral into {type_var}") @@ -692,6 +715,10 @@ class BinaryLiteral(Literal[bytes]): def __init__(self, value: bytes) -> None: super().__init__(value, bytes) + @model_serializer + def ser_model(self) -> str: + return self.root.hex() + @singledispatchmethod def to(self, type_var: IcebergType) -> Literal: # type: ignore raise TypeError(f"Cannot convert BinaryLiteral into {type_var}") diff --git a/tests/expressions/test_evaluator.py b/tests/expressions/test_evaluator.py index 7b15099105..cfc32d9b6b 100644 --- a/tests/expressions/test_evaluator.py +++ b/tests/expressions/test_evaluator.py @@ -683,7 +683,7 @@ def data_file_nan() -> DataFile: def test_inclusive_metrics_evaluator_less_than_and_less_than_equal(schema_data_file_nan: Schema, data_file_nan: DataFile) -> None: - for operator in [LessThan, LessThanOrEqual]: + for operator in [LessThan, LessThanOrEqual]: # type: ignore should_read = _InclusiveMetricsEvaluator(schema_data_file_nan, operator("all_nan", 1)).eval(data_file_nan) # type: ignore[arg-type] assert not should_read, "Should not match: all nan column doesn't contain number" @@ -711,7 +711,7 @@ def test_inclusive_metrics_evaluator_less_than_and_less_than_equal(schema_data_f def test_inclusive_metrics_evaluator_greater_than_and_greater_than_equal( schema_data_file_nan: Schema, data_file_nan: DataFile ) -> None: - for operator in [GreaterThan, GreaterThanOrEqual]: + for operator in [GreaterThan, GreaterThanOrEqual]: # type: ignore should_read = _InclusiveMetricsEvaluator(schema_data_file_nan, operator("all_nan", 1)).eval(data_file_nan) # type: ignore[arg-type] assert not should_read, "Should not match: all nan column doesn't contain number" diff --git a/tests/expressions/test_literals.py b/tests/expressions/test_literals.py index 4d8f5557f6..2137681e79 100644 --- a/tests/expressions/test_literals.py +++ b/tests/expressions/test_literals.py @@ -319,8 +319,8 @@ def test_string_to_time_literal() -> None: avro_val = 51661919000 - assert isinstance(time_lit, TimeLiteral) # type: ignore - assert avro_val == time_lit.value # type: ignore + assert isinstance(time_lit, TimeLiteral) + assert avro_val == time_lit.value def test_string_to_timestamp_literal() -> None: @@ -428,8 +428,8 @@ def test_python_date_conversion() -> None: from_str_lit = literal(one_day_str).to(DateType()) - assert isinstance(from_str_lit, DateLiteral) # type: ignore - assert from_str_lit.value == 19079 # type: ignore + assert isinstance(from_str_lit, DateLiteral) + assert from_str_lit.value == 19079 @pytest.mark.parametrize( @@ -911,7 +911,7 @@ def test_uuid_to_fixed() -> None: with pytest.raises(TypeError) as e: uuid_literal.to(FixedType(15)) assert "Cannot convert UUIDLiteral into fixed[15], different length: 15 <> 16" in str(e.value) - assert isinstance(fixed_literal, FixedLiteral) # type: ignore + assert isinstance(fixed_literal, FixedLiteral) def test_uuid_to_binary() -> None: @@ -919,7 +919,7 @@ def test_uuid_to_binary() -> None: uuid_literal = literal(test_uuid) binary_literal = uuid_literal.to(BinaryType()) assert test_uuid.bytes == binary_literal.value - assert isinstance(binary_literal, BinaryLiteral) # type: ignore + assert isinstance(binary_literal, BinaryLiteral) def test_literal_from_datetime() -> None: @@ -930,6 +930,22 @@ def test_literal_from_date() -> None: assert isinstance(literal(datetime.date.today()), DateLiteral) +def test_to_json() -> None: + assert literal(True).model_dump_json() == "true" + assert literal(float(123)).model_dump_json() == "123.0" + assert literal(123).model_dump_json() == "123" + assert literal("vo").model_dump_json() == '"vo"' + assert ( + literal(uuid.UUID("f79c3e09-677c-4bbd-a479-3f349cb785e7")).model_dump_json() == '"f79c3e09-677c-4bbd-a479-3f349cb785e7"' + ) + assert literal(bytes([0x01, 0x02, 0x03])).model_dump_json() == '"010203"' + assert literal(Decimal("19.25")).model_dump_json() == '"19.25"' + assert literal(datetime.date.fromisoformat("2022-03-28")).model_dump_json() == '"2022-03-28"' + assert ( + literal(datetime.datetime.fromisoformat("1970-11-22T00:00:00.000000+00:00")).model_dump_json() == '"1970-11-22T00:00:00"' + ) + + # __ __ ___ # | \/ |_ _| _ \_ _ # | |\/| | || | _/ || |