diff --git a/cpp/src/arrow/compute/kernels/codegen_internal.cc b/cpp/src/arrow/compute/kernels/codegen_internal.cc index 8e2669bd3dfb9..00a833742f957 100644 --- a/cpp/src/arrow/compute/kernels/codegen_internal.cc +++ b/cpp/src/arrow/compute/kernels/codegen_internal.cc @@ -251,6 +251,7 @@ TypeHolder CommonTemporal(const TypeHolder* begin, size_t count) { bool saw_date32 = false; bool saw_date64 = false; bool saw_duration = false; + bool saw_time_since_midnight = false; const TypeHolder* end = begin + count; for (auto it = begin; it != end; it++) { auto id = it->type->id(); @@ -271,6 +272,18 @@ TypeHolder CommonTemporal(const TypeHolder* begin, size_t count) { finest_unit = std::max(finest_unit, ty.unit()); continue; } + case Type::TIME32: { + const auto& type = checked_cast(*it->type); + finest_unit = std::max(finest_unit, type.unit()); + saw_time_since_midnight = true; + continue; + } + case Type::TIME64: { + const auto& type = checked_cast(*it->type); + finest_unit = std::max(finest_unit, type.unit()); + saw_time_since_midnight = true; + continue; + } case Type::DURATION: { const auto& ty = checked_cast(*it->type); finest_unit = std::max(finest_unit, ty.unit()); @@ -282,15 +295,33 @@ TypeHolder CommonTemporal(const TypeHolder* begin, size_t count) { } } - if (timezone) { - // At least one timestamp seen - return timestamp(finest_unit, *timezone); - } else if (saw_date64) { - return date64(); - } else if (saw_date32) { - return date32(); - } else if (saw_duration) { - return duration(finest_unit); + bool saw_timestamp_or_date = timezone || saw_date64 || saw_date32 || saw_duration; + + if (saw_time_since_midnight && saw_timestamp_or_date) { + // Cannot find common type + return TypeHolder(nullptr); + } + if (saw_timestamp_or_date) { + if (timezone) { + // At least one timestamp seen + return timestamp(finest_unit, *timezone); + } else if (saw_date64) { + return date64(); + } else if (saw_date32) { + return date32(); + } else if (saw_duration) { + return duration(finest_unit); + } + } + if (saw_time_since_midnight) { + switch (finest_unit) { + case TimeUnit::SECOND: + case TimeUnit::MILLI: + return time32(finest_unit); + case TimeUnit::MICRO: + case TimeUnit::NANO: + return time64(finest_unit); + } } return TypeHolder(nullptr); } diff --git a/cpp/src/arrow/compute/kernels/codegen_internal_test.cc b/cpp/src/arrow/compute/kernels/codegen_internal_test.cc index af024fb8d6e08..6bb5568d2ff38 100644 --- a/cpp/src/arrow/compute/kernels/codegen_internal_test.cc +++ b/cpp/src/arrow/compute/kernels/codegen_internal_test.cc @@ -159,6 +159,18 @@ TEST(TestDispatchBest, CommonTemporal) { args = {timestamp(TimeUnit::SECOND, "America/Phoenix"), timestamp(TimeUnit::SECOND, "UTC")}; ASSERT_EQ(CommonTemporal(args.data(), args.size()), nullptr); + + args = {time32(TimeUnit::SECOND), time32(TimeUnit::MILLI)}; + AssertTypeEqual(*time32(TimeUnit::MILLI), *CommonTemporal(args.data(), args.size())); + + args = {time32(TimeUnit::SECOND), time64(TimeUnit::NANO)}; + AssertTypeEqual(*time64(TimeUnit::NANO), *CommonTemporal(args.data(), args.size())); + + args = {date32(), time32(TimeUnit::SECOND)}; + ASSERT_EQ(CommonTemporal(args.data(), args.size()), nullptr); + + args = {timestamp(TimeUnit::SECOND), time32(TimeUnit::SECOND)}; + ASSERT_EQ(CommonTemporal(args.data(), args.size()), nullptr); } TEST(TestDispatchBest, CommonTemporalResolution) { diff --git a/cpp/src/arrow/dataset/file_parquet_test.cc b/cpp/src/arrow/dataset/file_parquet_test.cc index 177ca824179a8..dc9e085df3c4c 100644 --- a/cpp/src/arrow/dataset/file_parquet_test.cc +++ b/cpp/src/arrow/dataset/file_parquet_test.cc @@ -730,6 +730,31 @@ TEST_P(TestParquetFileFormatScan, PredicatePushdownRowGroupFragmentsUsingDuratio CountRowGroupsInFragment(fragment, {0}, expr); } +TEST_P(TestParquetFileFormatScan, + PredicatePushdownRowGroupFragmentsUsingTimestampColumn) { + // GH-37799: Parquet arrow will change TimeUnit::SECOND to TimeUnit::MILLI + // because parquet LogicalType doesn't support SECOND. + for (auto time_unit : {TimeUnit::MILLI, TimeUnit::SECOND}) { + auto table = TableFromJSON(schema({field("t", time32(time_unit))}), + { + R"([{"t": 1}])", + R"([{"t": 2}, {"t": 3}])", + }); + TableBatchReader table_reader(*table); + ARROW_SCOPED_TRACE("time_unit=", time_unit); + ASSERT_OK_AND_ASSIGN( + auto source, + ParquetFormatHelper::Write( + &table_reader, ArrowWriterProperties::Builder().store_schema()->build()) + .As()); + SetSchema({field("t", time32(time_unit))}); + ASSERT_OK_AND_ASSIGN(auto fragment, format_->MakeFragment(source)); + + auto expr = equal(field_ref("t"), literal(::arrow::Time32Scalar(1, time_unit))); + CountRowGroupsInFragment(fragment, {0}, expr); + } +} + // Tests projection with nested/indexed FieldRefs. // https://github.com/apache/arrow/issues/35579 TEST_P(TestParquetFileFormatScan, ProjectWithNonNamedFieldRefs) {