diff --git a/crates/iceberg/src/spec/values.rs b/crates/iceberg/src/spec/values.rs index 00f2e57d2..d1c895222 100644 --- a/crates/iceberg/src/spec/values.rs +++ b/crates/iceberg/src/spec/values.rs @@ -84,7 +84,7 @@ pub enum PrimitiveLiteral { /// /// By default, we decouple the type and value of a literal, so we can use avoid the cost of storing extra type info /// for each literal. But associate type with literal can be useful in some cases, for example, in unbound expression. -#[derive(Debug, PartialEq, Hash, Eq)] +#[derive(Debug, PartialEq, Hash, Eq, Clone)] pub struct Datum { r#type: PrimitiveType, literal: PrimitiveLiteral, @@ -673,6 +673,16 @@ impl Datum { )), } } + + /// Get the primitive literal from datum. + pub fn literal(&self) -> &PrimitiveLiteral { + &self.literal + } + + /// Get the primitive type from datum. + pub fn data_type(&self) -> &PrimitiveType { + &self.r#type + } } /// Values present in iceberg type diff --git a/crates/iceberg/src/transform/bucket.rs b/crates/iceberg/src/transform/bucket.rs index 435426191..015aceaf4 100644 --- a/crates/iceberg/src/transform/bucket.rs +++ b/crates/iceberg/src/transform/bucket.rs @@ -20,7 +20,7 @@ use std::sync::Arc; use arrow_array::ArrayRef; use arrow_schema::{DataType, TimeUnit}; -use crate::spec::{Literal, PrimitiveLiteral}; +use crate::spec::{Datum, PrimitiveLiteral}; use super::TransformFunction; @@ -208,45 +208,42 @@ impl TransformFunction for Bucket { .iter() .map(|v| v.map(|v| self.bucket_bytes(v))), ), - _ => unreachable!("Unsupported data type: {:?}", input.data_type()), + _ => { + return Err(crate::Error::new( + crate::ErrorKind::FeatureUnsupported, + format!( + "Unsupported data type for bucket transform: {:?}", + input.data_type() + ), + )) + } }; Ok(Arc::new(res)) } - fn transform_literal(&self, input: &Literal) -> crate::Result> { - match input { - Literal::Primitive(PrimitiveLiteral::Int(v)) => Ok(Some(Literal::Primitive( - PrimitiveLiteral::Int(self.bucket_int(*v)), - ))), - Literal::Primitive(PrimitiveLiteral::Long(v)) => Ok(Some(Literal::Primitive( - PrimitiveLiteral::Int(self.bucket_long(*v)), - ))), - Literal::Primitive(PrimitiveLiteral::Decimal(v)) => Ok(Some(Literal::Primitive( - PrimitiveLiteral::Int(self.bucket_decimal(*v)), - ))), - Literal::Primitive(PrimitiveLiteral::Date(v)) => Ok(Some(Literal::Primitive( - PrimitiveLiteral::Int(self.bucket_date(*v)), - ))), - Literal::Primitive(PrimitiveLiteral::Time(v)) => Ok(Some(Literal::Primitive( - PrimitiveLiteral::Int(self.bucket_time(*v)), - ))), - Literal::Primitive(PrimitiveLiteral::Timestamp(v)) => Ok(Some(Literal::Primitive( - PrimitiveLiteral::Int(self.bucket_timestamp(*v)), - ))), - Literal::Primitive(PrimitiveLiteral::String(v)) => Ok(Some(Literal::Primitive( - PrimitiveLiteral::Int(self.bucket_str(v)), - ))), - Literal::Primitive(PrimitiveLiteral::UUID(v)) => Ok(Some(Literal::Primitive( - PrimitiveLiteral::Int(self.bucket_bytes(v.as_ref())), - ))), - Literal::Primitive(PrimitiveLiteral::Binary(v)) => Ok(Some(Literal::Primitive( - PrimitiveLiteral::Int(self.bucket_bytes(v.as_ref())), - ))), - Literal::Primitive(PrimitiveLiteral::Fixed(v)) => Ok(Some(Literal::Primitive( - PrimitiveLiteral::Int(self.bucket_bytes(v.as_ref())), - ))), - _ => unreachable!("Unsupported literal: {:?}", input), - } + fn transform_literal(&self, input: &Datum) -> crate::Result> { + let val = match input.literal() { + PrimitiveLiteral::Int(v) => self.bucket_int(*v), + PrimitiveLiteral::Long(v) => self.bucket_long(*v), + PrimitiveLiteral::Decimal(v) => self.bucket_decimal(*v), + PrimitiveLiteral::Date(v) => self.bucket_date(*v), + PrimitiveLiteral::Time(v) => self.bucket_time(*v), + PrimitiveLiteral::Timestamp(v) => self.bucket_timestamp(*v), + PrimitiveLiteral::String(v) => self.bucket_str(v.as_str()), + PrimitiveLiteral::UUID(v) => self.bucket_bytes(v.as_ref()), + PrimitiveLiteral::Binary(v) => self.bucket_bytes(v.as_ref()), + PrimitiveLiteral::Fixed(v) => self.bucket_bytes(v.as_ref()), + _ => { + return Err(crate::Error::new( + crate::ErrorKind::FeatureUnsupported, + format!( + "Unsupported data type for bucket transform: {:?}", + input.data_type() + ), + )) + } + }; + Ok(Some(Datum::int(val))) } } @@ -254,15 +251,10 @@ impl TransformFunction for Bucket { mod test { use chrono::{DateTime, NaiveDate, NaiveDateTime, NaiveTime}; - use crate::transform::TransformFunction; + use crate::{spec::Datum, transform::TransformFunction}; use super::Bucket; #[test] - fn t() { - let bucket = Bucket::new(10); - println!("{}", bucket.bucket_n(-653330422)); - } - #[test] fn test_hash() { // test int assert_eq!(Bucket::hash_int(34), 2017239379); @@ -341,13 +333,8 @@ mod test { fn test_int_literal() { let bucket = Bucket::new(10); assert_eq!( - bucket - .transform_literal(&crate::spec::Literal::Primitive( - crate::spec::PrimitiveLiteral::Int(34) - )) - .unwrap() - .unwrap(), - crate::spec::Literal::Primitive(crate::spec::PrimitiveLiteral::Int(9)) + bucket.transform_literal(&Datum::int(34)).unwrap().unwrap(), + Datum::int(9) ); } @@ -355,13 +342,8 @@ mod test { fn test_long_literal() { let bucket = Bucket::new(10); assert_eq!( - bucket - .transform_literal(&crate::spec::Literal::Primitive( - crate::spec::PrimitiveLiteral::Long(34) - )) - .unwrap() - .unwrap(), - crate::spec::Literal::Primitive(crate::spec::PrimitiveLiteral::Int(9)) + bucket.transform_literal(&Datum::long(34)).unwrap().unwrap(), + Datum::int(9) ); } @@ -370,12 +352,10 @@ mod test { let bucket = Bucket::new(10); assert_eq!( bucket - .transform_literal(&crate::spec::Literal::Primitive( - crate::spec::PrimitiveLiteral::Decimal(1420) - )) + .transform_literal(&Datum::decimal(1420).unwrap()) .unwrap() .unwrap(), - crate::spec::Literal::Primitive(crate::spec::PrimitiveLiteral::Int(9)) + Datum::int(9) ); } @@ -384,12 +364,10 @@ mod test { let bucket = Bucket::new(100); assert_eq!( bucket - .transform_literal(&crate::spec::Literal::Primitive( - crate::spec::PrimitiveLiteral::Date(17486) - )) + .transform_literal(&Datum::date(17486)) .unwrap() .unwrap(), - crate::spec::Literal::Primitive(crate::spec::PrimitiveLiteral::Int(26)) + Datum::int(26) ); } @@ -398,12 +376,10 @@ mod test { let bucket = Bucket::new(100); assert_eq!( bucket - .transform_literal(&crate::spec::Literal::Primitive( - crate::spec::PrimitiveLiteral::Time(81068000000) - )) + .transform_literal(&Datum::time_micros(81068000000).unwrap()) .unwrap() .unwrap(), - crate::spec::Literal::Primitive(crate::spec::PrimitiveLiteral::Int(59)) + Datum::int(59) ); } @@ -412,12 +388,10 @@ mod test { let bucket = Bucket::new(100); assert_eq!( bucket - .transform_literal(&crate::spec::Literal::Primitive( - crate::spec::PrimitiveLiteral::Timestamp(1510871468000000) - )) + .transform_literal(&Datum::timestamp_micros(1510871468000000)) .unwrap() .unwrap(), - crate::spec::Literal::Primitive(crate::spec::PrimitiveLiteral::Int(7)) + Datum::int(7) ); } @@ -426,12 +400,10 @@ mod test { let bucket = Bucket::new(100); assert_eq!( bucket - .transform_literal(&crate::spec::Literal::Primitive( - crate::spec::PrimitiveLiteral::String("iceberg".to_string()) - )) + .transform_literal(&Datum::string("iceberg")) .unwrap() .unwrap(), - crate::spec::Literal::Primitive(crate::spec::PrimitiveLiteral::Int(89)) + Datum::int(89) ); } @@ -440,14 +412,12 @@ mod test { let bucket = Bucket::new(100); assert_eq!( bucket - .transform_literal(&crate::spec::Literal::Primitive( - crate::spec::PrimitiveLiteral::UUID( - "F79C3E09-677C-4BBD-A479-3F349CB785E7".parse().unwrap() - ) + .transform_literal(&Datum::uuid( + "F79C3E09-677C-4BBD-A479-3F349CB785E7".parse().unwrap() )) .unwrap() .unwrap(), - crate::spec::Literal::Primitive(crate::spec::PrimitiveLiteral::Int(40)) + Datum::int(40) ); } @@ -456,12 +426,10 @@ mod test { let bucket = Bucket::new(128); assert_eq!( bucket - .transform_literal(&crate::spec::Literal::Primitive( - crate::spec::PrimitiveLiteral::Binary(b"\x00\x01\x02\x03".to_vec()) - )) + .transform_literal(&Datum::binary(b"\x00\x01\x02\x03".to_vec())) .unwrap() .unwrap(), - crate::spec::Literal::Primitive(crate::spec::PrimitiveLiteral::Int(57)) + Datum::int(57) ); } @@ -470,12 +438,10 @@ mod test { let bucket = Bucket::new(128); assert_eq!( bucket - .transform_literal(&crate::spec::Literal::Primitive( - crate::spec::PrimitiveLiteral::Fixed(b"foo".to_vec()) - )) + .transform_literal(&Datum::fixed(b"foo".to_vec())) .unwrap() .unwrap(), - crate::spec::Literal::Primitive(crate::spec::PrimitiveLiteral::Int(32)) + Datum::int(32) ); } } diff --git a/crates/iceberg/src/transform/identity.rs b/crates/iceberg/src/transform/identity.rs index c53757494..49ab612aa 100644 --- a/crates/iceberg/src/transform/identity.rs +++ b/crates/iceberg/src/transform/identity.rs @@ -29,10 +29,7 @@ impl TransformFunction for Identity { Ok(input) } - fn transform_literal( - &self, - input: &crate::spec::Literal, - ) -> Result> { + fn transform_literal(&self, input: &crate::spec::Datum) -> Result> { Ok(Some(input.clone())) } } diff --git a/crates/iceberg/src/transform/mod.rs b/crates/iceberg/src/transform/mod.rs index 6bc935138..7effdbec3 100644 --- a/crates/iceberg/src/transform/mod.rs +++ b/crates/iceberg/src/transform/mod.rs @@ -17,7 +17,7 @@ //! Transform function used to compute partition values. use crate::{ - spec::{Literal, Transform}, + spec::{Datum, Transform}, Result, }; use arrow_array::ArrayRef; @@ -35,7 +35,7 @@ pub trait TransformFunction: Send { /// type. fn transform(&self, input: ArrayRef) -> Result; /// transform_literal will take an input literal and transform it into a new literal. - fn transform_literal(&self, input: &Literal) -> Result>; + fn transform_literal(&self, input: &Datum) -> Result>; } /// BoxedTransformFunction is a boxed trait object of TransformFunction. diff --git a/crates/iceberg/src/transform/temporal.rs b/crates/iceberg/src/transform/temporal.rs index 356e9f8f9..c6b9a127f 100644 --- a/crates/iceberg/src/transform/temporal.rs +++ b/crates/iceberg/src/transform/temporal.rs @@ -16,7 +16,7 @@ // under the License. use super::TransformFunction; -use crate::spec::{Literal, PrimitiveLiteral}; +use crate::spec::{Datum, PrimitiveLiteral}; use crate::{Error, ErrorKind, Result}; use arrow_arith::temporal::DatePart; use arrow_arith::{arity::binary, temporal::date_part}; @@ -24,7 +24,7 @@ use arrow_array::{ types::Date32Type, Array, ArrayRef, Date32Array, Int32Array, TimestampMicrosecondArray, }; use arrow_schema::{DataType, TimeUnit}; -use chrono::{DateTime, Datelike, NaiveDate}; +use chrono::{DateTime, Datelike}; use std::sync::Arc; /// Hour in one second. @@ -41,11 +41,7 @@ pub struct Year; impl Year { #[inline] fn timestamp_to_year(timestamp: i64) -> i32 { - (DateTime::from_timestamp_micros(timestamp) - .unwrap() - .signed_duration_since(DateTime::from_timestamp(0, 0).unwrap()) - .num_days() - / 365) as i32 + DateTime::from_timestamp_micros(timestamp).unwrap().year() - UNIX_EPOCH_YEAR } } @@ -62,22 +58,22 @@ impl TransformFunction for Year { )) } - fn transform_literal( - &self, - input: &crate::spec::Literal, - ) -> Result> { - match input { - Literal::Primitive(PrimitiveLiteral::Date(v)) => Ok(Some(Literal::Primitive( - PrimitiveLiteral::Int(Date32Type::to_naive_date(*v).year() - UNIX_EPOCH_YEAR), - ))), - Literal::Primitive(PrimitiveLiteral::Timestamp(v)) => Ok(Some(Literal::Primitive( - PrimitiveLiteral::Int(Self::timestamp_to_year(*v)), - ))), - Literal::Primitive(PrimitiveLiteral::TimestampTZ(v)) => Ok(Some(Literal::Primitive( - PrimitiveLiteral::Int(Self::timestamp_to_year(*v)), - ))), - _ => unreachable!("Should not call internally for unsupported literal type"), - } + fn transform_literal(&self, input: &crate::spec::Datum) -> Result> { + let val = match input.literal() { + PrimitiveLiteral::Date(v) => Date32Type::to_naive_date(*v).year() - UNIX_EPOCH_YEAR, + PrimitiveLiteral::Timestamp(v) => Self::timestamp_to_year(*v), + PrimitiveLiteral::TimestampTZ(v) => Self::timestamp_to_year(*v), + _ => { + return Err(crate::Error::new( + crate::ErrorKind::FeatureUnsupported, + format!( + "Unsupported data type for year transform: {:?}", + input.data_type() + ), + )) + } + }; + Ok(Some(Datum::int(val))) } } @@ -88,17 +84,17 @@ pub struct Month; impl Month { #[inline] fn timestamp_to_month(timestamp: i64) -> i32 { - let day = DateTime::from_timestamp_micros(timestamp) - .unwrap() - .signed_duration_since(DateTime::from_timestamp_micros(0).unwrap()) - .num_days(); - let m = NaiveDate::from_num_days_from_ce_opt(day as i32) - .unwrap() - .month0(); - if day < 0 { - m as i32 - 12 + // date: aaaa-aa-aa + // unix epoch date: 1970-01-01 + // if date > unix epoch date, delta month = (aa - 1) + 12 * (aaaa-1970) + // if date < unix epoch date, delta month = (12 - (aa - 1)) + 12 * (1970-aaaa-1) + let date = DateTime::from_timestamp_micros(timestamp).unwrap(); + let unix_epoch_date = DateTime::from_timestamp_micros(0).unwrap(); + if date > unix_epoch_date { + (date.month0() as i32) + 12 * (date.year() - UNIX_EPOCH_YEAR) } else { - m as i32 + let delta = (12 - date.month0() as i32) + 12 * (UNIX_EPOCH_YEAR - date.year() - 1); + -delta } } } @@ -125,25 +121,25 @@ impl TransformFunction for Month { )) } - fn transform_literal( - &self, - input: &crate::spec::Literal, - ) -> Result> { - match input { - Literal::Primitive(PrimitiveLiteral::Date(v)) => { - Ok(Some(Literal::Primitive(PrimitiveLiteral::Int( - (Date32Type::to_naive_date(*v).year() - UNIX_EPOCH_YEAR) * 12 - + Date32Type::to_naive_date(*v).month0() as i32, - )))) + fn transform_literal(&self, input: &crate::spec::Datum) -> Result> { + let val = match input.literal() { + PrimitiveLiteral::Date(v) => { + (Date32Type::to_naive_date(*v).year() - UNIX_EPOCH_YEAR) * 12 + + Date32Type::to_naive_date(*v).month0() as i32 } - Literal::Primitive(PrimitiveLiteral::Timestamp(v)) => Ok(Some(Literal::Primitive( - PrimitiveLiteral::Int(Self::timestamp_to_month(*v)), - ))), - Literal::Primitive(PrimitiveLiteral::TimestampTZ(v)) => Ok(Some(Literal::Primitive( - PrimitiveLiteral::Int(Self::timestamp_to_month(*v)), - ))), - _ => unreachable!("Should not call internally for unsupported literal type"), - } + PrimitiveLiteral::Timestamp(v) => Self::timestamp_to_month(*v), + PrimitiveLiteral::TimestampTZ(v) => Self::timestamp_to_month(*v), + _ => { + return Err(crate::Error::new( + crate::ErrorKind::FeatureUnsupported, + format!( + "Unsupported data type for month transform: {:?}", + input.data_type() + ), + )) + } + }; + Ok(Some(Datum::int(val))) } } @@ -184,22 +180,22 @@ impl TransformFunction for Day { Ok(Arc::new(res)) } - fn transform_literal( - &self, - input: &crate::spec::Literal, - ) -> Result> { - match input { - Literal::Primitive(PrimitiveLiteral::Date(v)) => { - Ok(Some(Literal::Primitive(PrimitiveLiteral::Int(*v)))) + fn transform_literal(&self, input: &crate::spec::Datum) -> Result> { + let val = match input.literal() { + PrimitiveLiteral::Date(v) => *v, + PrimitiveLiteral::Timestamp(v) => Self::day_timestamp_micro(*v), + PrimitiveLiteral::TimestampTZ(v) => Self::day_timestamp_micro(*v), + _ => { + return Err(crate::Error::new( + crate::ErrorKind::FeatureUnsupported, + format!( + "Unsupported data type for day transform: {:?}", + input.data_type() + ), + )) } - Literal::Primitive(PrimitiveLiteral::Timestamp(v)) => Ok(Some(Literal::Primitive( - PrimitiveLiteral::Int(Self::day_timestamp_micro(*v)), - ))), - Literal::Primitive(PrimitiveLiteral::TimestampTZ(v)) => Ok(Some(Literal::Primitive( - PrimitiveLiteral::Int(Self::day_timestamp_micro(*v)), - ))), - _ => unreachable!("Should not call internally for unsupported literal type"), - } + }; + Ok(Some(Datum::int(val))) } } @@ -223,31 +219,33 @@ impl TransformFunction for Hour { .unwrap() .unary(|v| -> i32 { Self::hour_timestamp_micro(v) }), _ => { - return Err(Error::new( - ErrorKind::Unexpected, + return Err(crate::Error::new( + crate::ErrorKind::FeatureUnsupported, format!( - "Should not call internally for unsupported data type {:?}", + "Unsupported data type for hour transform: {:?}", input.data_type() ), - )) + )); } }; Ok(Arc::new(res)) } - fn transform_literal( - &self, - input: &crate::spec::Literal, - ) -> Result> { - match input { - Literal::Primitive(PrimitiveLiteral::Timestamp(v)) => Ok(Some(Literal::Primitive( - PrimitiveLiteral::Int(Self::hour_timestamp_micro(*v)), - ))), - Literal::Primitive(PrimitiveLiteral::TimestampTZ(v)) => Ok(Some(Literal::Primitive( - PrimitiveLiteral::Int(Self::hour_timestamp_micro(*v)), - ))), - _ => unreachable!("Should not call internally for unsupported literal type"), - } + fn transform_literal(&self, input: &crate::spec::Datum) -> Result> { + let val = match input.literal() { + PrimitiveLiteral::Timestamp(v) => Self::hour_timestamp_micro(*v), + PrimitiveLiteral::TimestampTZ(v) => Self::hour_timestamp_micro(*v), + _ => { + return Err(crate::Error::new( + crate::ErrorKind::FeatureUnsupported, + format!( + "Unsupported data type for hour transform: {:?}", + input.data_type() + ), + )) + } + }; + Ok(Some(Datum::int(val))) } } @@ -257,7 +255,10 @@ mod test { use chrono::{NaiveDate, NaiveDateTime}; use std::sync::Arc; - use crate::transform::TransformFunction; + use crate::{ + spec::Datum, + transform::{BoxedTransformFunction, TransformFunction}, + }; #[test] fn test_transform_years() { @@ -329,71 +330,63 @@ mod test { assert_eq!(res.value(4), -1); } - #[test] - fn test_transform_year_literal() { - let year = super::Year; - - // Test Date32 - let date = crate::spec::Literal::Primitive(crate::spec::PrimitiveLiteral::Date(18628)); - let res = year.transform_literal(&date).unwrap().unwrap(); - assert_eq!( - res, - crate::spec::Literal::Primitive(crate::spec::PrimitiveLiteral::Int( - 2021 - super::UNIX_EPOCH_YEAR - )) - ); - let date = crate::spec::Literal::Primitive(crate::spec::PrimitiveLiteral::Date(-365)); - let res = year.transform_literal(&date).unwrap().unwrap(); - assert_eq!( - res, - crate::spec::Literal::Primitive(crate::spec::PrimitiveLiteral::Int(-1)) - ); - - // Test TimestampMicrosecond - let timestamp = - crate::spec::Literal::Primitive(crate::spec::PrimitiveLiteral::Timestamp(186280000000)); - let res = year.transform_literal(×tamp).unwrap().unwrap(); - assert_eq!( - res, - crate::spec::Literal::Primitive(crate::spec::PrimitiveLiteral::Int( - 1970 - super::UNIX_EPOCH_YEAR - )) + fn test_timestamp_and_tz_transform( + time: &str, + transform: &BoxedTransformFunction, + expect: Datum, + ) { + let timestamp = Datum::timestamp_micros( + NaiveDateTime::parse_from_str(time, "%Y-%m-%d %H:%M:%S.%f") + .unwrap() + .and_utc() + .timestamp_micros(), ); - let timestamp = crate::spec::Literal::Primitive(crate::spec::PrimitiveLiteral::Timestamp( - NaiveDateTime::parse_from_str("1969-01-01 00:00:00.00", "%Y-%m-%d %H:%M:%S.%f") + let timestamp_tz = Datum::timestamptz_micros( + NaiveDateTime::parse_from_str(time, "%Y-%m-%d %H:%M:%S.%f") .unwrap() .and_utc() .timestamp_micros(), - )); - let res = year.transform_literal(×tamp).unwrap().unwrap(); - assert_eq!( - res, - crate::spec::Literal::Primitive(crate::spec::PrimitiveLiteral::Int(-1)) ); + let res = transform.transform_literal(×tamp).unwrap().unwrap(); + assert_eq!(res, expect); + let res = transform.transform_literal(×tamp_tz).unwrap().unwrap(); + assert_eq!(res, expect); + } - // Test TimestampMicrosecond with timezone - let timestamp_tz = crate::spec::Literal::Primitive( - crate::spec::PrimitiveLiteral::TimestampTZ(186280000000), - ); - let res = year.transform_literal(×tamp_tz).unwrap().unwrap(); - assert_eq!( - res, - crate::spec::Literal::Primitive(crate::spec::PrimitiveLiteral::Int( - 1970 - super::UNIX_EPOCH_YEAR - )) - ); - let timestamp_tz = - crate::spec::Literal::Primitive(crate::spec::PrimitiveLiteral::TimestampTZ( - NaiveDateTime::parse_from_str("1969-01-01 00:00:00.00", "%Y-%m-%d %H:%M:%S.%f") - .unwrap() - .and_utc() - .timestamp_micros(), - )); - let res = year.transform_literal(×tamp_tz).unwrap().unwrap(); - assert_eq!( - res, - crate::spec::Literal::Primitive(crate::spec::PrimitiveLiteral::Int(-1)) + fn test_timestamp_and_tz_transform_using_i64( + time: i64, + transform: &BoxedTransformFunction, + expect: Datum, + ) { + let timestamp = Datum::timestamp_micros(time); + let timestamp_tz = Datum::timestamptz_micros(time); + let res = transform.transform_literal(×tamp).unwrap().unwrap(); + assert_eq!(res, expect); + let res = transform.transform_literal(×tamp_tz).unwrap().unwrap(); + assert_eq!(res, expect); + } + + fn test_date(date: i32, transform: &BoxedTransformFunction, expect: Datum) { + let date = Datum::date(date); + let res = transform.transform_literal(&date).unwrap().unwrap(); + assert_eq!(res, expect); + } + + #[test] + fn test_transform_year_literal() { + let year = Box::new(super::Year) as BoxedTransformFunction; + + // Test Date32 + test_date(18628, &year, Datum::int(2021 - super::UNIX_EPOCH_YEAR)); + test_date(-365, &year, Datum::int(-1)); + + // Test TimestampMicrosecond + test_timestamp_and_tz_transform_using_i64( + 186280000000, + &year, + Datum::int(1970 - super::UNIX_EPOCH_YEAR), ); + test_timestamp_and_tz_transform("1969-01-01 00:00:00.00", &year, Datum::int(-1)); } #[test] @@ -468,71 +461,26 @@ mod test { #[test] fn test_transform_month_literal() { - let month = super::Month; - // Test Date32 - let date = crate::spec::Literal::Primitive(crate::spec::PrimitiveLiteral::Date(18628)); - let res = month.transform_literal(&date).unwrap().unwrap(); - assert_eq!( - res, - crate::spec::Literal::Primitive(crate::spec::PrimitiveLiteral::Int( - (2021 - super::UNIX_EPOCH_YEAR) * 12 - )) - ); + let month = Box::new(super::Month) as BoxedTransformFunction; - let date = crate::spec::Literal::Primitive(crate::spec::PrimitiveLiteral::Date(-31)); - let res = month.transform_literal(&date).unwrap().unwrap(); - assert_eq!( - res, - crate::spec::Literal::Primitive(crate::spec::PrimitiveLiteral::Int(-1)) + // Test Date32 + test_date( + 18628, + &month, + Datum::int((2021 - super::UNIX_EPOCH_YEAR) * 12), ); + test_date(-31, &month, Datum::int(-1)); // Test TimestampMicrosecond - let timestamp = - crate::spec::Literal::Primitive(crate::spec::PrimitiveLiteral::Timestamp(186280000000)); - let res = month.transform_literal(×tamp).unwrap().unwrap(); - assert_eq!( - res, - crate::spec::Literal::Primitive(crate::spec::PrimitiveLiteral::Int( - (1970 - super::UNIX_EPOCH_YEAR) * 12 - )) - ); - - let timestamp = crate::spec::Literal::Primitive(crate::spec::PrimitiveLiteral::Timestamp( - NaiveDateTime::parse_from_str("1969-12-01 23:00:00.00", "%Y-%m-%d %H:%M:%S.%f") - .unwrap() - .and_utc() - .timestamp_micros(), - )); - let res = month.transform_literal(×tamp).unwrap().unwrap(); - assert_eq!( - res, - crate::spec::Literal::Primitive(crate::spec::PrimitiveLiteral::Int(-1)) - ); - - // Test TimestampMicrosecond with timezone - let timestamp_tz = crate::spec::Literal::Primitive( - crate::spec::PrimitiveLiteral::TimestampTZ(186280000000), - ); - let res = month.transform_literal(×tamp_tz).unwrap().unwrap(); - assert_eq!( - res, - crate::spec::Literal::Primitive(crate::spec::PrimitiveLiteral::Int( - (1970 - super::UNIX_EPOCH_YEAR) * 12 - )) - ); - - let timestamp_tz = - crate::spec::Literal::Primitive(crate::spec::PrimitiveLiteral::TimestampTZ( - NaiveDateTime::parse_from_str("1969-12-01 23:00:00.00", "%Y-%m-%d %H:%M:%S.%f") - .unwrap() - .and_utc() - .timestamp_micros(), - )); - let res = month.transform_literal(×tamp_tz).unwrap().unwrap(); - assert_eq!( - res, - crate::spec::Literal::Primitive(crate::spec::PrimitiveLiteral::Int(-1)) + test_timestamp_and_tz_transform_using_i64( + 186280000000, + &month, + Datum::int((1970 - super::UNIX_EPOCH_YEAR) * 12), ); + test_timestamp_and_tz_transform("1969-12-01 23:00:00.00", &month, Datum::int(-1)); + test_timestamp_and_tz_transform("2017-12-01 00:00:00.00", &month, Datum::int(575)); + test_timestamp_and_tz_transform("1970-01-01 00:00:00.00", &month, Datum::int(0)); + test_timestamp_and_tz_transform("1969-12-31 00:00:00.00", &month, Datum::int(-1)); } #[test] @@ -615,63 +563,15 @@ mod test { #[test] fn test_transform_days_literal() { - let day = super::Day; + let day = Box::new(super::Day) as BoxedTransformFunction; // Test Date32 - let date = crate::spec::Literal::Primitive(crate::spec::PrimitiveLiteral::Date(18628)); - let res = day.transform_literal(&date).unwrap().unwrap(); - assert_eq!( - res, - crate::spec::Literal::Primitive(crate::spec::PrimitiveLiteral::Int(18628)) - ); - - let date = crate::spec::Literal::Primitive(crate::spec::PrimitiveLiteral::Date(-31)); - let res = day.transform_literal(&date).unwrap().unwrap(); - assert_eq!( - res, - crate::spec::Literal::Primitive(crate::spec::PrimitiveLiteral::Int(-31)) - ); + test_date(18628, &day, Datum::int(18628)); + test_date(-31, &day, Datum::int(-31)); // Test TimestampMicrosecond - let timestamp = crate::spec::Literal::Primitive(crate::spec::PrimitiveLiteral::Timestamp( - 1512151975038194, - )); - let res = day.transform_literal(×tamp).unwrap().unwrap(); - assert_eq!( - res, - crate::spec::Literal::Primitive(crate::spec::PrimitiveLiteral::Int(17501)) - ); - - let timestamp_tz = crate::spec::Literal::Primitive( - crate::spec::PrimitiveLiteral::TimestampTZ(-115200000000), - ); - let res = day.transform_literal(×tamp_tz).unwrap().unwrap(); - assert_eq!( - res, - crate::spec::Literal::Primitive(crate::spec::PrimitiveLiteral::Int(-1)) - ); - - // Test TimestampMicrosecond with timezone - let timestamp_tz = - crate::spec::Literal::Primitive(crate::spec::PrimitiveLiteral::TimestampTZ( - NaiveDateTime::parse_from_str("2017-12-01 10:30:42.123", "%Y-%m-%d %H:%M:%S.%f") - .unwrap() - .and_utc() - .timestamp_micros(), - )); - let res = day.transform_literal(×tamp_tz).unwrap().unwrap(); - assert_eq!( - res, - crate::spec::Literal::Primitive(crate::spec::PrimitiveLiteral::Int(17501)) - ); - - let timestamp_tz = crate::spec::Literal::Primitive( - crate::spec::PrimitiveLiteral::TimestampTZ(-115200000000), - ); - let res = day.transform_literal(×tamp_tz).unwrap().unwrap(); - assert_eq!( - res, - crate::spec::Literal::Primitive(crate::spec::PrimitiveLiteral::Int(-1)) - ); + test_timestamp_and_tz_transform_using_i64(1512151975038194, &day, Datum::int(17501)); + test_timestamp_and_tz_transform_using_i64(-115200000000, &day, Datum::int(-1)); + test_timestamp_and_tz_transform("2017-12-01 10:30:42.123", &day, Datum::int(17501)); } #[test] @@ -735,58 +635,10 @@ mod test { #[test] fn test_transform_hours_literal() { - let hour = super::Hour; + let hour = Box::new(super::Hour) as BoxedTransformFunction; // Test TimestampMicrosecond - let timestamp = crate::spec::Literal::Primitive(crate::spec::PrimitiveLiteral::Timestamp( - NaiveDateTime::parse_from_str("2017-12-01 18:00:00.00", "%Y-%m-%d %H:%M:%S.%f") - .unwrap() - .and_utc() - .timestamp_micros(), - )); - let res = hour.transform_literal(×tamp).unwrap().unwrap(); - assert_eq!( - res, - crate::spec::Literal::Primitive(crate::spec::PrimitiveLiteral::Int(420042)) - ); - - let timestamp = crate::spec::Literal::Primitive(crate::spec::PrimitiveLiteral::Timestamp( - NaiveDateTime::parse_from_str("1969-12-31 23:00:00.00", "%Y-%m-%d %H:%M:%S.%f") - .unwrap() - .and_utc() - .timestamp_micros(), - )); - let res = hour.transform_literal(×tamp).unwrap().unwrap(); - assert_eq!( - res, - crate::spec::Literal::Primitive(crate::spec::PrimitiveLiteral::Int(-1)) - ); - - // Test TimestampMicrosecond with timezone - let timestamp_tz = - crate::spec::Literal::Primitive(crate::spec::PrimitiveLiteral::TimestampTZ( - NaiveDateTime::parse_from_str("2017-12-01 18:00:00.00", "%Y-%m-%d %H:%M:%S.%f") - .unwrap() - .and_utc() - .timestamp_micros(), - )); - let res = hour.transform_literal(×tamp_tz).unwrap().unwrap(); - assert_eq!( - res, - crate::spec::Literal::Primitive(crate::spec::PrimitiveLiteral::Int(420042)) - ); - - let timestamp = - crate::spec::Literal::Primitive(crate::spec::PrimitiveLiteral::TimestampTZ( - NaiveDateTime::parse_from_str("1969-12-31 23:00:00.00", "%Y-%m-%d %H:%M:%S.%f") - .unwrap() - .and_utc() - .timestamp_micros(), - )); - let res = hour.transform_literal(×tamp).unwrap().unwrap(); - assert_eq!( - res, - crate::spec::Literal::Primitive(crate::spec::PrimitiveLiteral::Int(-1)) - ); + test_timestamp_and_tz_transform("2017-12-01 18:00:00.00", &hour, Datum::int(420042)); + test_timestamp_and_tz_transform("1969-12-31 23:00:00.00", &hour, Datum::int(-1)); } } diff --git a/crates/iceberg/src/transform/truncate.rs b/crates/iceberg/src/transform/truncate.rs index d4a8e8480..767ca0036 100644 --- a/crates/iceberg/src/transform/truncate.rs +++ b/crates/iceberg/src/transform/truncate.rs @@ -21,7 +21,7 @@ use arrow_array::ArrayRef; use arrow_schema::DataType; use crate::{ - spec::{Literal, PrimitiveLiteral}, + spec::{Datum, PrimitiveLiteral}, Error, }; @@ -122,38 +122,46 @@ impl TransformFunction for Truncate { ); Ok(Arc::new(res)) } - _ => unreachable!("Truncate transform only supports (int,long,decimal,string) types"), + _ => Err(crate::Error::new( + crate::ErrorKind::FeatureUnsupported, + format!( + "Unsupported data type for truncate transform: {:?}", + input.data_type() + ), + )), } } - fn transform_literal(&self, input: &Literal) -> crate::Result> { - match input { - Literal::Primitive(PrimitiveLiteral::Int(v)) => Ok(Some({ + fn transform_literal(&self, input: &Datum) -> crate::Result> { + match input.literal() { + PrimitiveLiteral::Int(v) => Ok(Some({ let width: i32 = self.width.try_into().map_err(|_| { Error::new( crate::ErrorKind::DataInvalid, "width is failed to convert to i32 when truncate Int32Array", ) })?; - Literal::Primitive(PrimitiveLiteral::Int(Self::truncate_i32(*v, width))) + Datum::int(Self::truncate_i32(*v, width)) })), - Literal::Primitive(PrimitiveLiteral::Long(v)) => Ok(Some({ + PrimitiveLiteral::Long(v) => Ok(Some({ let width = self.width as i64; - Literal::Primitive(PrimitiveLiteral::Long(Self::truncate_i64(*v, width))) + Datum::long(Self::truncate_i64(*v, width)) })), - Literal::Primitive(PrimitiveLiteral::Decimal(v)) => Ok(Some({ + PrimitiveLiteral::Decimal(v) => Ok(Some({ let width = self.width as i128; - Literal::Primitive(PrimitiveLiteral::Decimal(Self::truncate_decimal_i128( - *v, width, - ))) + Datum::decimal(Self::truncate_decimal_i128(*v, width))? })), - Literal::Primitive(PrimitiveLiteral::String(v)) => Ok(Some({ + PrimitiveLiteral::String(v) => Ok(Some({ let len = self.width as usize; - Literal::Primitive(PrimitiveLiteral::String( - Self::truncate_str(v, len).to_string(), - )) + Datum::string(Self::truncate_str(v, len).to_string()) })), - _ => unreachable!("Truncate transform only supports (int,long,decimal,string) types"), + _ => Err(crate::Error::new( + crate::ErrorKind::FeatureUnsupported, + format!( + "Unsupported data type for truncate transform: {:?}", + input.data_type() + ), + )), } } } @@ -166,7 +174,7 @@ mod test { builder::PrimitiveBuilder, types::Decimal128Type, Decimal128Array, Int32Array, Int64Array, }; - use crate::transform::TransformFunction; + use crate::{spec::Datum, transform::TransformFunction}; // Test case ref from: https://iceberg.apache.org/spec/#truncate-transform-details #[test] @@ -256,77 +264,55 @@ mod test { #[test] fn test_literal_int() { - let input = crate::spec::Literal::Primitive(crate::spec::PrimitiveLiteral::Int(1)); + let input = Datum::int(1); let res = super::Truncate::new(10) .transform_literal(&input) .unwrap() .unwrap(); - assert_eq!( - res, - crate::spec::Literal::Primitive(crate::spec::PrimitiveLiteral::Int(0)) - ); + assert_eq!(res, Datum::int(0),); - let input = crate::spec::Literal::Primitive(crate::spec::PrimitiveLiteral::Int(-1)); + let input = Datum::int(-1); let res = super::Truncate::new(10) .transform_literal(&input) .unwrap() .unwrap(); - assert_eq!( - res, - crate::spec::Literal::Primitive(crate::spec::PrimitiveLiteral::Int(-10)) - ); + assert_eq!(res, Datum::int(-10),); } #[test] fn test_literal_long() { - let input = crate::spec::Literal::Primitive(crate::spec::PrimitiveLiteral::Long(1)); + let input = Datum::long(1); let res = super::Truncate::new(10) .transform_literal(&input) .unwrap() .unwrap(); - assert_eq!( - res, - crate::spec::Literal::Primitive(crate::spec::PrimitiveLiteral::Long(0)) - ); + assert_eq!(res, Datum::long(0),); - let input = crate::spec::Literal::Primitive(crate::spec::PrimitiveLiteral::Long(-1)); + let input = Datum::long(-1); let res = super::Truncate::new(10) .transform_literal(&input) .unwrap() .unwrap(); - assert_eq!( - res, - crate::spec::Literal::Primitive(crate::spec::PrimitiveLiteral::Long(-10)) - ); + assert_eq!(res, Datum::long(-10),); } #[test] fn test_decimal_literal() { - let input = crate::spec::Literal::Primitive(crate::spec::PrimitiveLiteral::Decimal(1065)); + let input = Datum::decimal(1065).unwrap(); let res = super::Truncate::new(50) .transform_literal(&input) .unwrap() .unwrap(); - assert_eq!( - res, - crate::spec::Literal::Primitive(crate::spec::PrimitiveLiteral::Decimal(1050)) - ); + assert_eq!(res, Datum::decimal(1050).unwrap(),); } #[test] fn test_string_literal() { - let input = crate::spec::Literal::Primitive(crate::spec::PrimitiveLiteral::String( - "iceberg".to_string(), - )); + let input = Datum::string("iceberg".to_string()); let res = super::Truncate::new(3) .transform_literal(&input) .unwrap() .unwrap(); - assert_eq!( - res, - crate::spec::Literal::Primitive(crate::spec::PrimitiveLiteral::String( - "ice".to_string() - )) - ); + assert_eq!(res, Datum::string("ice".to_string()),); } } diff --git a/crates/iceberg/src/transform/void.rs b/crates/iceberg/src/transform/void.rs index fae85f5be..7cbee27ca 100644 --- a/crates/iceberg/src/transform/void.rs +++ b/crates/iceberg/src/transform/void.rs @@ -28,10 +28,7 @@ impl TransformFunction for Void { Ok(new_null_array(input.data_type(), input.len())) } - fn transform_literal( - &self, - _input: &crate::spec::Literal, - ) -> Result> { + fn transform_literal(&self, _input: &crate::spec::Datum) -> Result> { Ok(None) } }