Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion pyiceberg/expressions/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -696,7 +696,7 @@ def __new__( # type: ignore # pylint: disable=W0221
if count == 0:
return AlwaysFalse()
elif count == 1:
return EqualTo(term, next(iter(literals))) # type: ignore
return EqualTo(term, next(iter(literals)))
else:
return super().__new__(cls)

Expand Down
49 changes: 38 additions & 11 deletions pyiceberg/expressions/literals.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,9 @@
from typing import Any, Generic, Type
from uuid import UUID

from pyiceberg.typedef import L
from pydantic import Field, model_serializer

from pyiceberg.typedef import IcebergRootModel, L
from pyiceberg.types import (
BinaryType,
BooleanType,
Expand All @@ -52,7 +54,9 @@
date_str_to_days,
date_to_days,
datetime_to_micros,
days_to_date,
micros_to_days,
micros_to_timestamp,
time_str_to_micros,
time_to_micros,
timestamp_to_micros,
Expand All @@ -64,21 +68,24 @@
UUID_BYTES_LENGTH = 16


class Literal(Generic[L], ABC):
class Literal(IcebergRootModel[L], Generic[L], ABC): # type: ignore
"""Literal which has a value and can be converted between types."""

_value: L
root: L = Field()

def __init__(self, value: L, value_type: Type[L], /, **data): # type: ignore
if value is None:
raise TypeError("Invalid literal value: None")

def __init__(self, value: L, value_type: Type[L]):
super().__init__(value)
if value is None or not isinstance(value, value_type):
raise TypeError(f"Invalid literal value: {value!r} (not a {value_type})")
if isinstance(value, float) and isnan(value):
raise ValueError("Cannot create expression literal from NaN.")
self._value = value

@property
def value(self) -> L:
return self._value
return self.root

@singledispatchmethod
@abstractmethod
Expand Down Expand Up @@ -136,25 +143,25 @@ def literal(value: L) -> Literal[L]:
LongLiteral(123)
"""
if isinstance(value, float):
return DoubleLiteral(value) # type: ignore
return DoubleLiteral(value)
elif isinstance(value, bool):
return BooleanLiteral(value)
elif isinstance(value, int):
return LongLiteral(value)
elif isinstance(value, str):
return StringLiteral(value)
elif isinstance(value, UUID):
return UUIDLiteral(value.bytes) # type: ignore
return UUIDLiteral(value.bytes)
elif isinstance(value, bytes):
return BinaryLiteral(value)
elif isinstance(value, Decimal):
return DecimalLiteral(value)
elif isinstance(value, datetime):
return TimestampLiteral(datetime_to_micros(value)) # type: ignore
return TimestampLiteral(datetime_to_micros(value))
elif isinstance(value, date):
return DateLiteral(date_to_days(value)) # type: ignore
return DateLiteral(date_to_days(value))
elif isinstance(value, time):
return TimeLiteral(time_to_micros(value)) # type: ignore
return TimeLiteral(time_to_micros(value))
else:
raise TypeError(f"Invalid literal value: {repr(value)}")

Expand Down Expand Up @@ -411,6 +418,10 @@ class DateLiteral(Literal[int]):
def __init__(self, value: int) -> None:
super().__init__(value, int)

@model_serializer
def ser_model(self) -> date:
return days_to_date(self.root)

def increment(self) -> Literal[int]:
return DateLiteral(self.value + 1)

Expand Down Expand Up @@ -443,6 +454,10 @@ class TimestampLiteral(Literal[int]):
def __init__(self, value: int) -> None:
super().__init__(value, int)

@model_serializer
def ser_model(self) -> str:
return micros_to_timestamp(self.root).isoformat()

def increment(self) -> Literal[int]:
return TimestampLiteral(self.value + 1)

Expand Down Expand Up @@ -635,6 +650,10 @@ class UUIDLiteral(Literal[bytes]):
def __init__(self, value: bytes) -> None:
super().__init__(value, bytes)

@model_serializer
def ser_model(self) -> UUID:
return UUID(bytes=self.root)

@singledispatchmethod
def to(self, type_var: IcebergType) -> Literal: # type: ignore
raise TypeError(f"Cannot convert UUIDLiteral into {type_var}")
Expand All @@ -661,6 +680,10 @@ class FixedLiteral(Literal[bytes]):
def __init__(self, value: bytes) -> None:
super().__init__(value, bytes)

@model_serializer
def ser_model(self) -> str:
return self.root.hex()

@singledispatchmethod
def to(self, type_var: IcebergType) -> Literal: # type: ignore
raise TypeError(f"Cannot convert FixedLiteral into {type_var}")
Expand Down Expand Up @@ -692,6 +715,10 @@ class BinaryLiteral(Literal[bytes]):
def __init__(self, value: bytes) -> None:
super().__init__(value, bytes)

@model_serializer
def ser_model(self) -> str:
return self.root.hex()

@singledispatchmethod
def to(self, type_var: IcebergType) -> Literal: # type: ignore
raise TypeError(f"Cannot convert BinaryLiteral into {type_var}")
Expand Down
4 changes: 2 additions & 2 deletions tests/expressions/test_evaluator.py
Original file line number Diff line number Diff line change
Expand Up @@ -683,7 +683,7 @@ def data_file_nan() -> DataFile:


def test_inclusive_metrics_evaluator_less_than_and_less_than_equal(schema_data_file_nan: Schema, data_file_nan: DataFile) -> None:
for operator in [LessThan, LessThanOrEqual]:
for operator in [LessThan, LessThanOrEqual]: # type: ignore
should_read = _InclusiveMetricsEvaluator(schema_data_file_nan, operator("all_nan", 1)).eval(data_file_nan) # type: ignore[arg-type]
assert not should_read, "Should not match: all nan column doesn't contain number"

Expand Down Expand Up @@ -711,7 +711,7 @@ def test_inclusive_metrics_evaluator_less_than_and_less_than_equal(schema_data_f
def test_inclusive_metrics_evaluator_greater_than_and_greater_than_equal(
schema_data_file_nan: Schema, data_file_nan: DataFile
) -> None:
for operator in [GreaterThan, GreaterThanOrEqual]:
for operator in [GreaterThan, GreaterThanOrEqual]: # type: ignore
should_read = _InclusiveMetricsEvaluator(schema_data_file_nan, operator("all_nan", 1)).eval(data_file_nan) # type: ignore[arg-type]
assert not should_read, "Should not match: all nan column doesn't contain number"

Expand Down
28 changes: 22 additions & 6 deletions tests/expressions/test_literals.py
Original file line number Diff line number Diff line change
Expand Up @@ -319,8 +319,8 @@ def test_string_to_time_literal() -> None:

avro_val = 51661919000

assert isinstance(time_lit, TimeLiteral) # type: ignore
assert avro_val == time_lit.value # type: ignore
assert isinstance(time_lit, TimeLiteral)
assert avro_val == time_lit.value


def test_string_to_timestamp_literal() -> None:
Expand Down Expand Up @@ -428,8 +428,8 @@ def test_python_date_conversion() -> None:

from_str_lit = literal(one_day_str).to(DateType())

assert isinstance(from_str_lit, DateLiteral) # type: ignore
assert from_str_lit.value == 19079 # type: ignore
assert isinstance(from_str_lit, DateLiteral)
assert from_str_lit.value == 19079


@pytest.mark.parametrize(
Expand Down Expand Up @@ -911,15 +911,15 @@ def test_uuid_to_fixed() -> None:
with pytest.raises(TypeError) as e:
uuid_literal.to(FixedType(15))
assert "Cannot convert UUIDLiteral into fixed[15], different length: 15 <> 16" in str(e.value)
assert isinstance(fixed_literal, FixedLiteral) # type: ignore
assert isinstance(fixed_literal, FixedLiteral)


def test_uuid_to_binary() -> None:
test_uuid = uuid.uuid4()
uuid_literal = literal(test_uuid)
binary_literal = uuid_literal.to(BinaryType())
assert test_uuid.bytes == binary_literal.value
assert isinstance(binary_literal, BinaryLiteral) # type: ignore
assert isinstance(binary_literal, BinaryLiteral)


def test_literal_from_datetime() -> None:
Expand All @@ -930,6 +930,22 @@ def test_literal_from_date() -> None:
assert isinstance(literal(datetime.date.today()), DateLiteral)


def test_to_json() -> None:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

assert literal(True).model_dump_json() == "true"
assert literal(float(123)).model_dump_json() == "123.0"
assert literal(123).model_dump_json() == "123"
assert literal("vo").model_dump_json() == '"vo"'
assert (
literal(uuid.UUID("f79c3e09-677c-4bbd-a479-3f349cb785e7")).model_dump_json() == '"f79c3e09-677c-4bbd-a479-3f349cb785e7"'
)
assert literal(bytes([0x01, 0x02, 0x03])).model_dump_json() == '"010203"'
assert literal(Decimal("19.25")).model_dump_json() == '"19.25"'
assert literal(datetime.date.fromisoformat("2022-03-28")).model_dump_json() == '"2022-03-28"'
assert (
literal(datetime.datetime.fromisoformat("1970-11-22T00:00:00.000000+00:00")).model_dump_json() == '"1970-11-22T00:00:00"'
)


# __ __ ___
# | \/ |_ _| _ \_ _
# | |\/| | || | _/ || |
Expand Down