-
Notifications
You must be signed in to change notification settings - Fork 1k
[Variant] Add variant to arrow primitive support for boolean/timestamp/time #8516
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
2134dcc
76f51ad
fdfb93c
48047fe
b074728
b62e15b
d1a7d06
6762eff
2505dfa
f325746
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -17,7 +17,8 @@ | |
|
||
//! Module for transforming a typed arrow `Array` to `VariantArray`. | ||
|
||
use arrow::datatypes::{self, ArrowPrimitiveType}; | ||
use arrow::datatypes::{self, ArrowPrimitiveType, ArrowTimestampType, Date32Type}; | ||
use chrono::{DateTime, Utc}; | ||
use parquet_variant::Variant; | ||
|
||
/// Options for controlling the behavior of `cast_to_variant_with_options`. | ||
|
@@ -38,12 +39,40 @@ pub(crate) trait PrimitiveFromVariant: ArrowPrimitiveType { | |
fn from_variant(variant: &Variant<'_, '_>) -> Option<Self::Native>; | ||
} | ||
|
||
/// Extension trait for Arrow timestamp types that can extract their native value from a Variant | ||
/// We can't use [`PrimitiveFromVariant`] directly because we need _two_ implementations for each | ||
/// timestamp type -- the `NTZ` param here. | ||
pub(crate) trait TimestampFromVariant<const NTZ: bool>: ArrowTimestampType { | ||
fn from_variant(variant: &Variant<'_, '_>) -> Option<Self::Native>; | ||
} | ||
|
||
/// Extension trait that `ArrowTimestampType` handle `DateTime<Utc>` like `NaiveDateTime` | ||
trait MakeValueTz: ArrowTimestampType { | ||
fn make_value_tz(timestamp: DateTime<Utc>) -> Option<i64> { | ||
Self::make_value(timestamp.naive_utc()) | ||
} | ||
} | ||
|
||
impl<T: ArrowTimestampType> MakeValueTz for T {} | ||
|
||
/// Macro to generate PrimitiveFromVariant implementations for Arrow primitive types | ||
macro_rules! impl_primitive_from_variant { | ||
($arrow_type:ty, $variant_method:ident) => { | ||
($arrow_type:ty, $variant_method:ident $(, $cast_fn:expr)?) => { | ||
impl PrimitiveFromVariant for $arrow_type { | ||
fn from_variant(variant: &Variant<'_, '_>) -> Option<Self::Native> { | ||
variant.$variant_method() | ||
let value = variant.$variant_method(); | ||
$( let value = value.map($cast_fn); )? | ||
value | ||
} | ||
} | ||
}; | ||
} | ||
|
||
macro_rules! impl_timestamp_from_variant { | ||
($timestamp_type:ty, $variant_method:ident, ntz=$ntz:ident, $cast_fn:expr $(,)?) => { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. After this change, I removed the the macro rule in my head now is something like below
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The micro -> nano conversion actually happens inside |
||
impl TimestampFromVariant<{ $ntz }> for $timestamp_type { | ||
fn from_variant(variant: &Variant<'_, '_>) -> Option<Self::Native> { | ||
variant.$variant_method().and_then($cast_fn) | ||
} | ||
} | ||
}; | ||
|
@@ -60,6 +89,35 @@ impl_primitive_from_variant!(datatypes::UInt64Type, as_u64); | |
impl_primitive_from_variant!(datatypes::Float16Type, as_f16); | ||
impl_primitive_from_variant!(datatypes::Float32Type, as_f32); | ||
impl_primitive_from_variant!(datatypes::Float64Type, as_f64); | ||
impl_primitive_from_variant!( | ||
datatypes::Date32Type, | ||
as_naive_date, | ||
Date32Type::from_naive_date | ||
); | ||
impl_timestamp_from_variant!( | ||
datatypes::TimestampMicrosecondType, | ||
as_timestamp_ntz_micros, | ||
ntz = true, | ||
Self::make_value, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Here use |
||
); | ||
impl_timestamp_from_variant!( | ||
datatypes::TimestampMicrosecondType, | ||
as_timestamp_micros, | ||
ntz = false, | ||
Self::make_value_tz | ||
); | ||
impl_timestamp_from_variant!( | ||
datatypes::TimestampNanosecondType, | ||
as_timestamp_ntz_nanos, | ||
ntz = true, | ||
Self::make_value | ||
); | ||
impl_timestamp_from_variant!( | ||
datatypes::TimestampNanosecondType, | ||
as_timestamp_nanos, | ||
ntz = false, | ||
Self::make_value_tz | ||
); | ||
|
||
/// Convert the value at a specific index in the given array into a `Variant`. | ||
macro_rules! non_generic_conversion_single_value { | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
note: this name is fine, but because it's a different trait we should also be able to "overload"
make_value
if you want.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Sorry for didn't add a comment for this, use
make_value_tz
here because it can't compile if I try to "overload"make_value
, the compiler needs me to convert type to a specific type inMakeValueTz::make_value
and macroimpl_timestamp_from_variant
(something like<Self as ArrowTimestampType>::make_value(...)
and<Self as MakeValueTz>::make_value(...)
. and googled that seems Rust didn't support "overload" with same func name and different parameters. not sure if I missed anything here