Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 1 addition & 4 deletions pyiceberg/transforms.py
Original file line number Diff line number Diff line change
Expand Up @@ -491,7 +491,7 @@ class DayTransform(TimeTransform[S]):
"""Transforms a datetime value into a day value.

Example:
>>> transform = MonthTransform()
>>> transform = DayTransform()
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ty!

>>> transform.transform(DateType())(17501)
17501
"""
Expand All @@ -517,9 +517,6 @@ def day_func(v: Any) -> int:
def can_transform(self, source: IcebergType) -> bool:
return isinstance(source, (DateType, TimestampType, TimestamptzType))

def result_type(self, source: IcebergType) -> IcebergType:
return DateType()

Comment on lines -520 to -522
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: can we be explicit here and do

    def result_type(self, source: IcebergType) -> IntegerType:
        return IntegerType()

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm matching the behavior of the other transforms that extend TimeTransform, which all just implicitly use TimeTransform.result_type instead of overriding it. Should we change this for all of them?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

gotcha, thanks for the context. This is fine!

@property
def granularity(self) -> TimeResolution:
return TimeResolution.DAY
Expand Down
6 changes: 4 additions & 2 deletions tests/integration/test_inspect_table.py
Original file line number Diff line number Diff line change
Expand Up @@ -282,7 +282,7 @@ def test_inspect_entries_partitioned(spark: SparkSession, session_catalog: Catal

df = session_catalog.load_table(identifier).inspect.entries()

assert df.to_pydict()["data_file"][0]["partition"] == {"dt_day": date(2021, 2, 1), "dt_month": None}
assert df.to_pydict()["data_file"][0]["partition"] == {"dt_day": 18659, "dt_month": None}
assert df.to_pydict()["data_file"][1]["partition"] == {"dt_day": None, "dt_month": 612}


Expand Down Expand Up @@ -452,7 +452,9 @@ def test_inspect_partitions_partitioned(spark: SparkSession, session_catalog: Ca

def check_pyiceberg_df_equals_spark_df(df: pa.Table, spark_df: DataFrame) -> None:
lhs = df.to_pandas().sort_values("spec_id")
rhs = spark_df.toPandas().sort_values("spec_id")
# Spark does not store day partition values in the right type so we need to convert them
spark_df_arrow = pa.Table.from_pandas(spark_df.toPandas(), schema=df.schema)
rhs = spark_df_arrow.to_pandas().sort_values("spec_id")
for column in df.column_names:
for left, right in zip(lhs[column].to_list(), rhs[column].to_list()):
assert left == right, f"Difference in column {column}: {left} != {right}"
Expand Down
4 changes: 1 addition & 3 deletions tests/integration/test_writes/test_partitioned_writes.py
Original file line number Diff line number Diff line change
Expand Up @@ -450,9 +450,7 @@ def test_append_ymd_transform_partitioned(
[
pytest.param(YearTransform(), {53, 54, None}, id="year_transform"),
pytest.param(MonthTransform(), {647, 648, 649, None}, id="month_transform"),
pytest.param(
DayTransform(), {date(2023, 12, 31), date(2024, 1, 1), date(2024, 1, 31), date(2024, 2, 1), None}, id="day_transform"
),
pytest.param(DayTransform(), {19722, 19723, 19753, 19754, None}, id="day_transform"),
pytest.param(HourTransform(), {473328, 473352, 474072, 474096, 474102, None}, id="hour_transform"),
],
)
Expand Down
2 changes: 1 addition & 1 deletion tests/test_transforms.py
Original file line number Diff line number Diff line change
Expand Up @@ -280,7 +280,7 @@ def test_time_methods(type_var: PrimitiveType) -> None:
assert DayTransform().preserves_order
assert YearTransform().result_type(type_var) == IntegerType()
assert MonthTransform().result_type(type_var) == IntegerType()
assert DayTransform().result_type(type_var) == DateType()
assert DayTransform().result_type(type_var) == IntegerType()
assert YearTransform().dedup_name == "time"
assert MonthTransform().dedup_name == "time"
assert DayTransform().dedup_name == "time"
Expand Down
Loading