diff --git a/crates/iceberg/src/spec/values.rs b/crates/iceberg/src/spec/values.rs
index 00f2e57d2..d1c895222 100644
--- a/crates/iceberg/src/spec/values.rs
+++ b/crates/iceberg/src/spec/values.rs
@@ -84,7 +84,7 @@ pub enum PrimitiveLiteral {
///
/// By default, we decouple the type and value of a literal, so we can use avoid the cost of storing extra type info
/// for each literal. But associate type with literal can be useful in some cases, for example, in unbound expression.
-#[derive(Debug, PartialEq, Hash, Eq)]
+#[derive(Debug, PartialEq, Hash, Eq, Clone)]
pub struct Datum {
r#type: PrimitiveType,
literal: PrimitiveLiteral,
@@ -673,6 +673,16 @@ impl Datum {
)),
}
}
+
+ /// Get the primitive literal from datum.
+ pub fn literal(&self) -> &PrimitiveLiteral {
+ &self.literal
+ }
+
+ /// Get the primitive type from datum.
+ pub fn data_type(&self) -> &PrimitiveType {
+ &self.r#type
+ }
}
/// Values present in iceberg type
diff --git a/crates/iceberg/src/transform/bucket.rs b/crates/iceberg/src/transform/bucket.rs
index 435426191..015aceaf4 100644
--- a/crates/iceberg/src/transform/bucket.rs
+++ b/crates/iceberg/src/transform/bucket.rs
@@ -20,7 +20,7 @@ use std::sync::Arc;
use arrow_array::ArrayRef;
use arrow_schema::{DataType, TimeUnit};
-use crate::spec::{Literal, PrimitiveLiteral};
+use crate::spec::{Datum, PrimitiveLiteral};
use super::TransformFunction;
@@ -208,45 +208,42 @@ impl TransformFunction for Bucket {
.iter()
.map(|v| v.map(|v| self.bucket_bytes(v))),
),
- _ => unreachable!("Unsupported data type: {:?}", input.data_type()),
+ _ => {
+ return Err(crate::Error::new(
+ crate::ErrorKind::FeatureUnsupported,
+ format!(
+ "Unsupported data type for bucket transform: {:?}",
+ input.data_type()
+ ),
+ ))
+ }
};
Ok(Arc::new(res))
}
- fn transform_literal(&self, input: &Literal) -> crate::Result> {
- match input {
- Literal::Primitive(PrimitiveLiteral::Int(v)) => Ok(Some(Literal::Primitive(
- PrimitiveLiteral::Int(self.bucket_int(*v)),
- ))),
- Literal::Primitive(PrimitiveLiteral::Long(v)) => Ok(Some(Literal::Primitive(
- PrimitiveLiteral::Int(self.bucket_long(*v)),
- ))),
- Literal::Primitive(PrimitiveLiteral::Decimal(v)) => Ok(Some(Literal::Primitive(
- PrimitiveLiteral::Int(self.bucket_decimal(*v)),
- ))),
- Literal::Primitive(PrimitiveLiteral::Date(v)) => Ok(Some(Literal::Primitive(
- PrimitiveLiteral::Int(self.bucket_date(*v)),
- ))),
- Literal::Primitive(PrimitiveLiteral::Time(v)) => Ok(Some(Literal::Primitive(
- PrimitiveLiteral::Int(self.bucket_time(*v)),
- ))),
- Literal::Primitive(PrimitiveLiteral::Timestamp(v)) => Ok(Some(Literal::Primitive(
- PrimitiveLiteral::Int(self.bucket_timestamp(*v)),
- ))),
- Literal::Primitive(PrimitiveLiteral::String(v)) => Ok(Some(Literal::Primitive(
- PrimitiveLiteral::Int(self.bucket_str(v)),
- ))),
- Literal::Primitive(PrimitiveLiteral::UUID(v)) => Ok(Some(Literal::Primitive(
- PrimitiveLiteral::Int(self.bucket_bytes(v.as_ref())),
- ))),
- Literal::Primitive(PrimitiveLiteral::Binary(v)) => Ok(Some(Literal::Primitive(
- PrimitiveLiteral::Int(self.bucket_bytes(v.as_ref())),
- ))),
- Literal::Primitive(PrimitiveLiteral::Fixed(v)) => Ok(Some(Literal::Primitive(
- PrimitiveLiteral::Int(self.bucket_bytes(v.as_ref())),
- ))),
- _ => unreachable!("Unsupported literal: {:?}", input),
- }
+ fn transform_literal(&self, input: &Datum) -> crate::Result > {
+ let val = match input.literal() {
+ PrimitiveLiteral::Int(v) => self.bucket_int(*v),
+ PrimitiveLiteral::Long(v) => self.bucket_long(*v),
+ PrimitiveLiteral::Decimal(v) => self.bucket_decimal(*v),
+ PrimitiveLiteral::Date(v) => self.bucket_date(*v),
+ PrimitiveLiteral::Time(v) => self.bucket_time(*v),
+ PrimitiveLiteral::Timestamp(v) => self.bucket_timestamp(*v),
+ PrimitiveLiteral::String(v) => self.bucket_str(v.as_str()),
+ PrimitiveLiteral::UUID(v) => self.bucket_bytes(v.as_ref()),
+ PrimitiveLiteral::Binary(v) => self.bucket_bytes(v.as_ref()),
+ PrimitiveLiteral::Fixed(v) => self.bucket_bytes(v.as_ref()),
+ _ => {
+ return Err(crate::Error::new(
+ crate::ErrorKind::FeatureUnsupported,
+ format!(
+ "Unsupported data type for bucket transform: {:?}",
+ input.data_type()
+ ),
+ ))
+ }
+ };
+ Ok(Some(Datum::int(val)))
}
}
@@ -254,15 +251,10 @@ impl TransformFunction for Bucket {
mod test {
use chrono::{DateTime, NaiveDate, NaiveDateTime, NaiveTime};
- use crate::transform::TransformFunction;
+ use crate::{spec::Datum, transform::TransformFunction};
use super::Bucket;
#[test]
- fn t() {
- let bucket = Bucket::new(10);
- println!("{}", bucket.bucket_n(-653330422));
- }
- #[test]
fn test_hash() {
// test int
assert_eq!(Bucket::hash_int(34), 2017239379);
@@ -341,13 +333,8 @@ mod test {
fn test_int_literal() {
let bucket = Bucket::new(10);
assert_eq!(
- bucket
- .transform_literal(&crate::spec::Literal::Primitive(
- crate::spec::PrimitiveLiteral::Int(34)
- ))
- .unwrap()
- .unwrap(),
- crate::spec::Literal::Primitive(crate::spec::PrimitiveLiteral::Int(9))
+ bucket.transform_literal(&Datum::int(34)).unwrap().unwrap(),
+ Datum::int(9)
);
}
@@ -355,13 +342,8 @@ mod test {
fn test_long_literal() {
let bucket = Bucket::new(10);
assert_eq!(
- bucket
- .transform_literal(&crate::spec::Literal::Primitive(
- crate::spec::PrimitiveLiteral::Long(34)
- ))
- .unwrap()
- .unwrap(),
- crate::spec::Literal::Primitive(crate::spec::PrimitiveLiteral::Int(9))
+ bucket.transform_literal(&Datum::long(34)).unwrap().unwrap(),
+ Datum::int(9)
);
}
@@ -370,12 +352,10 @@ mod test {
let bucket = Bucket::new(10);
assert_eq!(
bucket
- .transform_literal(&crate::spec::Literal::Primitive(
- crate::spec::PrimitiveLiteral::Decimal(1420)
- ))
+ .transform_literal(&Datum::decimal(1420).unwrap())
.unwrap()
.unwrap(),
- crate::spec::Literal::Primitive(crate::spec::PrimitiveLiteral::Int(9))
+ Datum::int(9)
);
}
@@ -384,12 +364,10 @@ mod test {
let bucket = Bucket::new(100);
assert_eq!(
bucket
- .transform_literal(&crate::spec::Literal::Primitive(
- crate::spec::PrimitiveLiteral::Date(17486)
- ))
+ .transform_literal(&Datum::date(17486))
.unwrap()
.unwrap(),
- crate::spec::Literal::Primitive(crate::spec::PrimitiveLiteral::Int(26))
+ Datum::int(26)
);
}
@@ -398,12 +376,10 @@ mod test {
let bucket = Bucket::new(100);
assert_eq!(
bucket
- .transform_literal(&crate::spec::Literal::Primitive(
- crate::spec::PrimitiveLiteral::Time(81068000000)
- ))
+ .transform_literal(&Datum::time_micros(81068000000).unwrap())
.unwrap()
.unwrap(),
- crate::spec::Literal::Primitive(crate::spec::PrimitiveLiteral::Int(59))
+ Datum::int(59)
);
}
@@ -412,12 +388,10 @@ mod test {
let bucket = Bucket::new(100);
assert_eq!(
bucket
- .transform_literal(&crate::spec::Literal::Primitive(
- crate::spec::PrimitiveLiteral::Timestamp(1510871468000000)
- ))
+ .transform_literal(&Datum::timestamp_micros(1510871468000000))
.unwrap()
.unwrap(),
- crate::spec::Literal::Primitive(crate::spec::PrimitiveLiteral::Int(7))
+ Datum::int(7)
);
}
@@ -426,12 +400,10 @@ mod test {
let bucket = Bucket::new(100);
assert_eq!(
bucket
- .transform_literal(&crate::spec::Literal::Primitive(
- crate::spec::PrimitiveLiteral::String("iceberg".to_string())
- ))
+ .transform_literal(&Datum::string("iceberg"))
.unwrap()
.unwrap(),
- crate::spec::Literal::Primitive(crate::spec::PrimitiveLiteral::Int(89))
+ Datum::int(89)
);
}
@@ -440,14 +412,12 @@ mod test {
let bucket = Bucket::new(100);
assert_eq!(
bucket
- .transform_literal(&crate::spec::Literal::Primitive(
- crate::spec::PrimitiveLiteral::UUID(
- "F79C3E09-677C-4BBD-A479-3F349CB785E7".parse().unwrap()
- )
+ .transform_literal(&Datum::uuid(
+ "F79C3E09-677C-4BBD-A479-3F349CB785E7".parse().unwrap()
))
.unwrap()
.unwrap(),
- crate::spec::Literal::Primitive(crate::spec::PrimitiveLiteral::Int(40))
+ Datum::int(40)
);
}
@@ -456,12 +426,10 @@ mod test {
let bucket = Bucket::new(128);
assert_eq!(
bucket
- .transform_literal(&crate::spec::Literal::Primitive(
- crate::spec::PrimitiveLiteral::Binary(b"\x00\x01\x02\x03".to_vec())
- ))
+ .transform_literal(&Datum::binary(b"\x00\x01\x02\x03".to_vec()))
.unwrap()
.unwrap(),
- crate::spec::Literal::Primitive(crate::spec::PrimitiveLiteral::Int(57))
+ Datum::int(57)
);
}
@@ -470,12 +438,10 @@ mod test {
let bucket = Bucket::new(128);
assert_eq!(
bucket
- .transform_literal(&crate::spec::Literal::Primitive(
- crate::spec::PrimitiveLiteral::Fixed(b"foo".to_vec())
- ))
+ .transform_literal(&Datum::fixed(b"foo".to_vec()))
.unwrap()
.unwrap(),
- crate::spec::Literal::Primitive(crate::spec::PrimitiveLiteral::Int(32))
+ Datum::int(32)
);
}
}
diff --git a/crates/iceberg/src/transform/identity.rs b/crates/iceberg/src/transform/identity.rs
index c53757494..49ab612aa 100644
--- a/crates/iceberg/src/transform/identity.rs
+++ b/crates/iceberg/src/transform/identity.rs
@@ -29,10 +29,7 @@ impl TransformFunction for Identity {
Ok(input)
}
- fn transform_literal(
- &self,
- input: &crate::spec::Literal,
- ) -> Result > {
+ fn transform_literal(&self, input: &crate::spec::Datum) -> Result > {
Ok(Some(input.clone()))
}
}
diff --git a/crates/iceberg/src/transform/mod.rs b/crates/iceberg/src/transform/mod.rs
index 6bc935138..7effdbec3 100644
--- a/crates/iceberg/src/transform/mod.rs
+++ b/crates/iceberg/src/transform/mod.rs
@@ -17,7 +17,7 @@
//! Transform function used to compute partition values.
use crate::{
- spec::{Literal, Transform},
+ spec::{Datum, Transform},
Result,
};
use arrow_array::ArrayRef;
@@ -35,7 +35,7 @@ pub trait TransformFunction: Send {
/// type.
fn transform(&self, input: ArrayRef) -> Result;
/// transform_literal will take an input literal and transform it into a new literal.
- fn transform_literal(&self, input: &Literal) -> Result>;
+ fn transform_literal(&self, input: &Datum) -> Result >;
}
/// BoxedTransformFunction is a boxed trait object of TransformFunction.
diff --git a/crates/iceberg/src/transform/temporal.rs b/crates/iceberg/src/transform/temporal.rs
index 356e9f8f9..c6b9a127f 100644
--- a/crates/iceberg/src/transform/temporal.rs
+++ b/crates/iceberg/src/transform/temporal.rs
@@ -16,7 +16,7 @@
// under the License.
use super::TransformFunction;
-use crate::spec::{Literal, PrimitiveLiteral};
+use crate::spec::{Datum, PrimitiveLiteral};
use crate::{Error, ErrorKind, Result};
use arrow_arith::temporal::DatePart;
use arrow_arith::{arity::binary, temporal::date_part};
@@ -24,7 +24,7 @@ use arrow_array::{
types::Date32Type, Array, ArrayRef, Date32Array, Int32Array, TimestampMicrosecondArray,
};
use arrow_schema::{DataType, TimeUnit};
-use chrono::{DateTime, Datelike, NaiveDate};
+use chrono::{DateTime, Datelike};
use std::sync::Arc;
/// Hour in one second.
@@ -41,11 +41,7 @@ pub struct Year;
impl Year {
#[inline]
fn timestamp_to_year(timestamp: i64) -> i32 {
- (DateTime::from_timestamp_micros(timestamp)
- .unwrap()
- .signed_duration_since(DateTime::from_timestamp(0, 0).unwrap())
- .num_days()
- / 365) as i32
+ DateTime::from_timestamp_micros(timestamp).unwrap().year() - UNIX_EPOCH_YEAR
}
}
@@ -62,22 +58,22 @@ impl TransformFunction for Year {
))
}
- fn transform_literal(
- &self,
- input: &crate::spec::Literal,
- ) -> Result > {
- match input {
- Literal::Primitive(PrimitiveLiteral::Date(v)) => Ok(Some(Literal::Primitive(
- PrimitiveLiteral::Int(Date32Type::to_naive_date(*v).year() - UNIX_EPOCH_YEAR),
- ))),
- Literal::Primitive(PrimitiveLiteral::Timestamp(v)) => Ok(Some(Literal::Primitive(
- PrimitiveLiteral::Int(Self::timestamp_to_year(*v)),
- ))),
- Literal::Primitive(PrimitiveLiteral::TimestampTZ(v)) => Ok(Some(Literal::Primitive(
- PrimitiveLiteral::Int(Self::timestamp_to_year(*v)),
- ))),
- _ => unreachable!("Should not call internally for unsupported literal type"),
- }
+ fn transform_literal(&self, input: &crate::spec::Datum) -> Result > {
+ let val = match input.literal() {
+ PrimitiveLiteral::Date(v) => Date32Type::to_naive_date(*v).year() - UNIX_EPOCH_YEAR,
+ PrimitiveLiteral::Timestamp(v) => Self::timestamp_to_year(*v),
+ PrimitiveLiteral::TimestampTZ(v) => Self::timestamp_to_year(*v),
+ _ => {
+ return Err(crate::Error::new(
+ crate::ErrorKind::FeatureUnsupported,
+ format!(
+ "Unsupported data type for year transform: {:?}",
+ input.data_type()
+ ),
+ ))
+ }
+ };
+ Ok(Some(Datum::int(val)))
}
}
@@ -88,17 +84,17 @@ pub struct Month;
impl Month {
#[inline]
fn timestamp_to_month(timestamp: i64) -> i32 {
- let day = DateTime::from_timestamp_micros(timestamp)
- .unwrap()
- .signed_duration_since(DateTime::from_timestamp_micros(0).unwrap())
- .num_days();
- let m = NaiveDate::from_num_days_from_ce_opt(day as i32)
- .unwrap()
- .month0();
- if day < 0 {
- m as i32 - 12
+ // date: aaaa-aa-aa
+ // unix epoch date: 1970-01-01
+ // if date > unix epoch date, delta month = (aa - 1) + 12 * (aaaa-1970)
+ // if date < unix epoch date, delta month = (12 - (aa - 1)) + 12 * (1970-aaaa-1)
+ let date = DateTime::from_timestamp_micros(timestamp).unwrap();
+ let unix_epoch_date = DateTime::from_timestamp_micros(0).unwrap();
+ if date > unix_epoch_date {
+ (date.month0() as i32) + 12 * (date.year() - UNIX_EPOCH_YEAR)
} else {
- m as i32
+ let delta = (12 - date.month0() as i32) + 12 * (UNIX_EPOCH_YEAR - date.year() - 1);
+ -delta
}
}
}
@@ -125,25 +121,25 @@ impl TransformFunction for Month {
))
}
- fn transform_literal(
- &self,
- input: &crate::spec::Literal,
- ) -> Result > {
- match input {
- Literal::Primitive(PrimitiveLiteral::Date(v)) => {
- Ok(Some(Literal::Primitive(PrimitiveLiteral::Int(
- (Date32Type::to_naive_date(*v).year() - UNIX_EPOCH_YEAR) * 12
- + Date32Type::to_naive_date(*v).month0() as i32,
- ))))
+ fn transform_literal(&self, input: &crate::spec::Datum) -> Result > {
+ let val = match input.literal() {
+ PrimitiveLiteral::Date(v) => {
+ (Date32Type::to_naive_date(*v).year() - UNIX_EPOCH_YEAR) * 12
+ + Date32Type::to_naive_date(*v).month0() as i32
}
- Literal::Primitive(PrimitiveLiteral::Timestamp(v)) => Ok(Some(Literal::Primitive(
- PrimitiveLiteral::Int(Self::timestamp_to_month(*v)),
- ))),
- Literal::Primitive(PrimitiveLiteral::TimestampTZ(v)) => Ok(Some(Literal::Primitive(
- PrimitiveLiteral::Int(Self::timestamp_to_month(*v)),
- ))),
- _ => unreachable!("Should not call internally for unsupported literal type"),
- }
+ PrimitiveLiteral::Timestamp(v) => Self::timestamp_to_month(*v),
+ PrimitiveLiteral::TimestampTZ(v) => Self::timestamp_to_month(*v),
+ _ => {
+ return Err(crate::Error::new(
+ crate::ErrorKind::FeatureUnsupported,
+ format!(
+ "Unsupported data type for month transform: {:?}",
+ input.data_type()
+ ),
+ ))
+ }
+ };
+ Ok(Some(Datum::int(val)))
}
}
@@ -184,22 +180,22 @@ impl TransformFunction for Day {
Ok(Arc::new(res))
}
- fn transform_literal(
- &self,
- input: &crate::spec::Literal,
- ) -> Result > {
- match input {
- Literal::Primitive(PrimitiveLiteral::Date(v)) => {
- Ok(Some(Literal::Primitive(PrimitiveLiteral::Int(*v))))
+ fn transform_literal(&self, input: &crate::spec::Datum) -> Result > {
+ let val = match input.literal() {
+ PrimitiveLiteral::Date(v) => *v,
+ PrimitiveLiteral::Timestamp(v) => Self::day_timestamp_micro(*v),
+ PrimitiveLiteral::TimestampTZ(v) => Self::day_timestamp_micro(*v),
+ _ => {
+ return Err(crate::Error::new(
+ crate::ErrorKind::FeatureUnsupported,
+ format!(
+ "Unsupported data type for day transform: {:?}",
+ input.data_type()
+ ),
+ ))
}
- Literal::Primitive(PrimitiveLiteral::Timestamp(v)) => Ok(Some(Literal::Primitive(
- PrimitiveLiteral::Int(Self::day_timestamp_micro(*v)),
- ))),
- Literal::Primitive(PrimitiveLiteral::TimestampTZ(v)) => Ok(Some(Literal::Primitive(
- PrimitiveLiteral::Int(Self::day_timestamp_micro(*v)),
- ))),
- _ => unreachable!("Should not call internally for unsupported literal type"),
- }
+ };
+ Ok(Some(Datum::int(val)))
}
}
@@ -223,31 +219,33 @@ impl TransformFunction for Hour {
.unwrap()
.unary(|v| -> i32 { Self::hour_timestamp_micro(v) }),
_ => {
- return Err(Error::new(
- ErrorKind::Unexpected,
+ return Err(crate::Error::new(
+ crate::ErrorKind::FeatureUnsupported,
format!(
- "Should not call internally for unsupported data type {:?}",
+ "Unsupported data type for hour transform: {:?}",
input.data_type()
),
- ))
+ ));
}
};
Ok(Arc::new(res))
}
- fn transform_literal(
- &self,
- input: &crate::spec::Literal,
- ) -> Result > {
- match input {
- Literal::Primitive(PrimitiveLiteral::Timestamp(v)) => Ok(Some(Literal::Primitive(
- PrimitiveLiteral::Int(Self::hour_timestamp_micro(*v)),
- ))),
- Literal::Primitive(PrimitiveLiteral::TimestampTZ(v)) => Ok(Some(Literal::Primitive(
- PrimitiveLiteral::Int(Self::hour_timestamp_micro(*v)),
- ))),
- _ => unreachable!("Should not call internally for unsupported literal type"),
- }
+ fn transform_literal(&self, input: &crate::spec::Datum) -> Result > {
+ let val = match input.literal() {
+ PrimitiveLiteral::Timestamp(v) => Self::hour_timestamp_micro(*v),
+ PrimitiveLiteral::TimestampTZ(v) => Self::hour_timestamp_micro(*v),
+ _ => {
+ return Err(crate::Error::new(
+ crate::ErrorKind::FeatureUnsupported,
+ format!(
+ "Unsupported data type for hour transform: {:?}",
+ input.data_type()
+ ),
+ ))
+ }
+ };
+ Ok(Some(Datum::int(val)))
}
}
@@ -257,7 +255,10 @@ mod test {
use chrono::{NaiveDate, NaiveDateTime};
use std::sync::Arc;
- use crate::transform::TransformFunction;
+ use crate::{
+ spec::Datum,
+ transform::{BoxedTransformFunction, TransformFunction},
+ };
#[test]
fn test_transform_years() {
@@ -329,71 +330,63 @@ mod test {
assert_eq!(res.value(4), -1);
}
- #[test]
- fn test_transform_year_literal() {
- let year = super::Year;
-
- // Test Date32
- let date = crate::spec::Literal::Primitive(crate::spec::PrimitiveLiteral::Date(18628));
- let res = year.transform_literal(&date).unwrap().unwrap();
- assert_eq!(
- res,
- crate::spec::Literal::Primitive(crate::spec::PrimitiveLiteral::Int(
- 2021 - super::UNIX_EPOCH_YEAR
- ))
- );
- let date = crate::spec::Literal::Primitive(crate::spec::PrimitiveLiteral::Date(-365));
- let res = year.transform_literal(&date).unwrap().unwrap();
- assert_eq!(
- res,
- crate::spec::Literal::Primitive(crate::spec::PrimitiveLiteral::Int(-1))
- );
-
- // Test TimestampMicrosecond
- let timestamp =
- crate::spec::Literal::Primitive(crate::spec::PrimitiveLiteral::Timestamp(186280000000));
- let res = year.transform_literal(×tamp).unwrap().unwrap();
- assert_eq!(
- res,
- crate::spec::Literal::Primitive(crate::spec::PrimitiveLiteral::Int(
- 1970 - super::UNIX_EPOCH_YEAR
- ))
+ fn test_timestamp_and_tz_transform(
+ time: &str,
+ transform: &BoxedTransformFunction,
+ expect: Datum,
+ ) {
+ let timestamp = Datum::timestamp_micros(
+ NaiveDateTime::parse_from_str(time, "%Y-%m-%d %H:%M:%S.%f")
+ .unwrap()
+ .and_utc()
+ .timestamp_micros(),
);
- let timestamp = crate::spec::Literal::Primitive(crate::spec::PrimitiveLiteral::Timestamp(
- NaiveDateTime::parse_from_str("1969-01-01 00:00:00.00", "%Y-%m-%d %H:%M:%S.%f")
+ let timestamp_tz = Datum::timestamptz_micros(
+ NaiveDateTime::parse_from_str(time, "%Y-%m-%d %H:%M:%S.%f")
.unwrap()
.and_utc()
.timestamp_micros(),
- ));
- let res = year.transform_literal(×tamp).unwrap().unwrap();
- assert_eq!(
- res,
- crate::spec::Literal::Primitive(crate::spec::PrimitiveLiteral::Int(-1))
);
+ let res = transform.transform_literal(×tamp).unwrap().unwrap();
+ assert_eq!(res, expect);
+ let res = transform.transform_literal(×tamp_tz).unwrap().unwrap();
+ assert_eq!(res, expect);
+ }
- // Test TimestampMicrosecond with timezone
- let timestamp_tz = crate::spec::Literal::Primitive(
- crate::spec::PrimitiveLiteral::TimestampTZ(186280000000),
- );
- let res = year.transform_literal(×tamp_tz).unwrap().unwrap();
- assert_eq!(
- res,
- crate::spec::Literal::Primitive(crate::spec::PrimitiveLiteral::Int(
- 1970 - super::UNIX_EPOCH_YEAR
- ))
- );
- let timestamp_tz =
- crate::spec::Literal::Primitive(crate::spec::PrimitiveLiteral::TimestampTZ(
- NaiveDateTime::parse_from_str("1969-01-01 00:00:00.00", "%Y-%m-%d %H:%M:%S.%f")
- .unwrap()
- .and_utc()
- .timestamp_micros(),
- ));
- let res = year.transform_literal(×tamp_tz).unwrap().unwrap();
- assert_eq!(
- res,
- crate::spec::Literal::Primitive(crate::spec::PrimitiveLiteral::Int(-1))
+ fn test_timestamp_and_tz_transform_using_i64(
+ time: i64,
+ transform: &BoxedTransformFunction,
+ expect: Datum,
+ ) {
+ let timestamp = Datum::timestamp_micros(time);
+ let timestamp_tz = Datum::timestamptz_micros(time);
+ let res = transform.transform_literal(×tamp).unwrap().unwrap();
+ assert_eq!(res, expect);
+ let res = transform.transform_literal(×tamp_tz).unwrap().unwrap();
+ assert_eq!(res, expect);
+ }
+
+ fn test_date(date: i32, transform: &BoxedTransformFunction, expect: Datum) {
+ let date = Datum::date(date);
+ let res = transform.transform_literal(&date).unwrap().unwrap();
+ assert_eq!(res, expect);
+ }
+
+ #[test]
+ fn test_transform_year_literal() {
+ let year = Box::new(super::Year) as BoxedTransformFunction;
+
+ // Test Date32
+ test_date(18628, &year, Datum::int(2021 - super::UNIX_EPOCH_YEAR));
+ test_date(-365, &year, Datum::int(-1));
+
+ // Test TimestampMicrosecond
+ test_timestamp_and_tz_transform_using_i64(
+ 186280000000,
+ &year,
+ Datum::int(1970 - super::UNIX_EPOCH_YEAR),
);
+ test_timestamp_and_tz_transform("1969-01-01 00:00:00.00", &year, Datum::int(-1));
}
#[test]
@@ -468,71 +461,26 @@ mod test {
#[test]
fn test_transform_month_literal() {
- let month = super::Month;
- // Test Date32
- let date = crate::spec::Literal::Primitive(crate::spec::PrimitiveLiteral::Date(18628));
- let res = month.transform_literal(&date).unwrap().unwrap();
- assert_eq!(
- res,
- crate::spec::Literal::Primitive(crate::spec::PrimitiveLiteral::Int(
- (2021 - super::UNIX_EPOCH_YEAR) * 12
- ))
- );
+ let month = Box::new(super::Month) as BoxedTransformFunction;
- let date = crate::spec::Literal::Primitive(crate::spec::PrimitiveLiteral::Date(-31));
- let res = month.transform_literal(&date).unwrap().unwrap();
- assert_eq!(
- res,
- crate::spec::Literal::Primitive(crate::spec::PrimitiveLiteral::Int(-1))
+ // Test Date32
+ test_date(
+ 18628,
+ &month,
+ Datum::int((2021 - super::UNIX_EPOCH_YEAR) * 12),
);
+ test_date(-31, &month, Datum::int(-1));
// Test TimestampMicrosecond
- let timestamp =
- crate::spec::Literal::Primitive(crate::spec::PrimitiveLiteral::Timestamp(186280000000));
- let res = month.transform_literal(×tamp).unwrap().unwrap();
- assert_eq!(
- res,
- crate::spec::Literal::Primitive(crate::spec::PrimitiveLiteral::Int(
- (1970 - super::UNIX_EPOCH_YEAR) * 12
- ))
- );
-
- let timestamp = crate::spec::Literal::Primitive(crate::spec::PrimitiveLiteral::Timestamp(
- NaiveDateTime::parse_from_str("1969-12-01 23:00:00.00", "%Y-%m-%d %H:%M:%S.%f")
- .unwrap()
- .and_utc()
- .timestamp_micros(),
- ));
- let res = month.transform_literal(×tamp).unwrap().unwrap();
- assert_eq!(
- res,
- crate::spec::Literal::Primitive(crate::spec::PrimitiveLiteral::Int(-1))
- );
-
- // Test TimestampMicrosecond with timezone
- let timestamp_tz = crate::spec::Literal::Primitive(
- crate::spec::PrimitiveLiteral::TimestampTZ(186280000000),
- );
- let res = month.transform_literal(×tamp_tz).unwrap().unwrap();
- assert_eq!(
- res,
- crate::spec::Literal::Primitive(crate::spec::PrimitiveLiteral::Int(
- (1970 - super::UNIX_EPOCH_YEAR) * 12
- ))
- );
-
- let timestamp_tz =
- crate::spec::Literal::Primitive(crate::spec::PrimitiveLiteral::TimestampTZ(
- NaiveDateTime::parse_from_str("1969-12-01 23:00:00.00", "%Y-%m-%d %H:%M:%S.%f")
- .unwrap()
- .and_utc()
- .timestamp_micros(),
- ));
- let res = month.transform_literal(×tamp_tz).unwrap().unwrap();
- assert_eq!(
- res,
- crate::spec::Literal::Primitive(crate::spec::PrimitiveLiteral::Int(-1))
+ test_timestamp_and_tz_transform_using_i64(
+ 186280000000,
+ &month,
+ Datum::int((1970 - super::UNIX_EPOCH_YEAR) * 12),
);
+ test_timestamp_and_tz_transform("1969-12-01 23:00:00.00", &month, Datum::int(-1));
+ test_timestamp_and_tz_transform("2017-12-01 00:00:00.00", &month, Datum::int(575));
+ test_timestamp_and_tz_transform("1970-01-01 00:00:00.00", &month, Datum::int(0));
+ test_timestamp_and_tz_transform("1969-12-31 00:00:00.00", &month, Datum::int(-1));
}
#[test]
@@ -615,63 +563,15 @@ mod test {
#[test]
fn test_transform_days_literal() {
- let day = super::Day;
+ let day = Box::new(super::Day) as BoxedTransformFunction;
// Test Date32
- let date = crate::spec::Literal::Primitive(crate::spec::PrimitiveLiteral::Date(18628));
- let res = day.transform_literal(&date).unwrap().unwrap();
- assert_eq!(
- res,
- crate::spec::Literal::Primitive(crate::spec::PrimitiveLiteral::Int(18628))
- );
-
- let date = crate::spec::Literal::Primitive(crate::spec::PrimitiveLiteral::Date(-31));
- let res = day.transform_literal(&date).unwrap().unwrap();
- assert_eq!(
- res,
- crate::spec::Literal::Primitive(crate::spec::PrimitiveLiteral::Int(-31))
- );
+ test_date(18628, &day, Datum::int(18628));
+ test_date(-31, &day, Datum::int(-31));
// Test TimestampMicrosecond
- let timestamp = crate::spec::Literal::Primitive(crate::spec::PrimitiveLiteral::Timestamp(
- 1512151975038194,
- ));
- let res = day.transform_literal(×tamp).unwrap().unwrap();
- assert_eq!(
- res,
- crate::spec::Literal::Primitive(crate::spec::PrimitiveLiteral::Int(17501))
- );
-
- let timestamp_tz = crate::spec::Literal::Primitive(
- crate::spec::PrimitiveLiteral::TimestampTZ(-115200000000),
- );
- let res = day.transform_literal(×tamp_tz).unwrap().unwrap();
- assert_eq!(
- res,
- crate::spec::Literal::Primitive(crate::spec::PrimitiveLiteral::Int(-1))
- );
-
- // Test TimestampMicrosecond with timezone
- let timestamp_tz =
- crate::spec::Literal::Primitive(crate::spec::PrimitiveLiteral::TimestampTZ(
- NaiveDateTime::parse_from_str("2017-12-01 10:30:42.123", "%Y-%m-%d %H:%M:%S.%f")
- .unwrap()
- .and_utc()
- .timestamp_micros(),
- ));
- let res = day.transform_literal(×tamp_tz).unwrap().unwrap();
- assert_eq!(
- res,
- crate::spec::Literal::Primitive(crate::spec::PrimitiveLiteral::Int(17501))
- );
-
- let timestamp_tz = crate::spec::Literal::Primitive(
- crate::spec::PrimitiveLiteral::TimestampTZ(-115200000000),
- );
- let res = day.transform_literal(×tamp_tz).unwrap().unwrap();
- assert_eq!(
- res,
- crate::spec::Literal::Primitive(crate::spec::PrimitiveLiteral::Int(-1))
- );
+ test_timestamp_and_tz_transform_using_i64(1512151975038194, &day, Datum::int(17501));
+ test_timestamp_and_tz_transform_using_i64(-115200000000, &day, Datum::int(-1));
+ test_timestamp_and_tz_transform("2017-12-01 10:30:42.123", &day, Datum::int(17501));
}
#[test]
@@ -735,58 +635,10 @@ mod test {
#[test]
fn test_transform_hours_literal() {
- let hour = super::Hour;
+ let hour = Box::new(super::Hour) as BoxedTransformFunction;
// Test TimestampMicrosecond
- let timestamp = crate::spec::Literal::Primitive(crate::spec::PrimitiveLiteral::Timestamp(
- NaiveDateTime::parse_from_str("2017-12-01 18:00:00.00", "%Y-%m-%d %H:%M:%S.%f")
- .unwrap()
- .and_utc()
- .timestamp_micros(),
- ));
- let res = hour.transform_literal(×tamp).unwrap().unwrap();
- assert_eq!(
- res,
- crate::spec::Literal::Primitive(crate::spec::PrimitiveLiteral::Int(420042))
- );
-
- let timestamp = crate::spec::Literal::Primitive(crate::spec::PrimitiveLiteral::Timestamp(
- NaiveDateTime::parse_from_str("1969-12-31 23:00:00.00", "%Y-%m-%d %H:%M:%S.%f")
- .unwrap()
- .and_utc()
- .timestamp_micros(),
- ));
- let res = hour.transform_literal(×tamp).unwrap().unwrap();
- assert_eq!(
- res,
- crate::spec::Literal::Primitive(crate::spec::PrimitiveLiteral::Int(-1))
- );
-
- // Test TimestampMicrosecond with timezone
- let timestamp_tz =
- crate::spec::Literal::Primitive(crate::spec::PrimitiveLiteral::TimestampTZ(
- NaiveDateTime::parse_from_str("2017-12-01 18:00:00.00", "%Y-%m-%d %H:%M:%S.%f")
- .unwrap()
- .and_utc()
- .timestamp_micros(),
- ));
- let res = hour.transform_literal(×tamp_tz).unwrap().unwrap();
- assert_eq!(
- res,
- crate::spec::Literal::Primitive(crate::spec::PrimitiveLiteral::Int(420042))
- );
-
- let timestamp =
- crate::spec::Literal::Primitive(crate::spec::PrimitiveLiteral::TimestampTZ(
- NaiveDateTime::parse_from_str("1969-12-31 23:00:00.00", "%Y-%m-%d %H:%M:%S.%f")
- .unwrap()
- .and_utc()
- .timestamp_micros(),
- ));
- let res = hour.transform_literal(×tamp).unwrap().unwrap();
- assert_eq!(
- res,
- crate::spec::Literal::Primitive(crate::spec::PrimitiveLiteral::Int(-1))
- );
+ test_timestamp_and_tz_transform("2017-12-01 18:00:00.00", &hour, Datum::int(420042));
+ test_timestamp_and_tz_transform("1969-12-31 23:00:00.00", &hour, Datum::int(-1));
}
}
diff --git a/crates/iceberg/src/transform/truncate.rs b/crates/iceberg/src/transform/truncate.rs
index d4a8e8480..767ca0036 100644
--- a/crates/iceberg/src/transform/truncate.rs
+++ b/crates/iceberg/src/transform/truncate.rs
@@ -21,7 +21,7 @@ use arrow_array::ArrayRef;
use arrow_schema::DataType;
use crate::{
- spec::{Literal, PrimitiveLiteral},
+ spec::{Datum, PrimitiveLiteral},
Error,
};
@@ -122,38 +122,46 @@ impl TransformFunction for Truncate {
);
Ok(Arc::new(res))
}
- _ => unreachable!("Truncate transform only supports (int,long,decimal,string) types"),
+ _ => Err(crate::Error::new(
+ crate::ErrorKind::FeatureUnsupported,
+ format!(
+ "Unsupported data type for truncate transform: {:?}",
+ input.data_type()
+ ),
+ )),
}
}
- fn transform_literal(&self, input: &Literal) -> crate::Result > {
- match input {
- Literal::Primitive(PrimitiveLiteral::Int(v)) => Ok(Some({
+ fn transform_literal(&self, input: &Datum) -> crate::Result > {
+ match input.literal() {
+ PrimitiveLiteral::Int(v) => Ok(Some({
let width: i32 = self.width.try_into().map_err(|_| {
Error::new(
crate::ErrorKind::DataInvalid,
"width is failed to convert to i32 when truncate Int32Array",
)
})?;
- Literal::Primitive(PrimitiveLiteral::Int(Self::truncate_i32(*v, width)))
+ Datum::int(Self::truncate_i32(*v, width))
})),
- Literal::Primitive(PrimitiveLiteral::Long(v)) => Ok(Some({
+ PrimitiveLiteral::Long(v) => Ok(Some({
let width = self.width as i64;
- Literal::Primitive(PrimitiveLiteral::Long(Self::truncate_i64(*v, width)))
+ Datum::long(Self::truncate_i64(*v, width))
})),
- Literal::Primitive(PrimitiveLiteral::Decimal(v)) => Ok(Some({
+ PrimitiveLiteral::Decimal(v) => Ok(Some({
let width = self.width as i128;
- Literal::Primitive(PrimitiveLiteral::Decimal(Self::truncate_decimal_i128(
- *v, width,
- )))
+ Datum::decimal(Self::truncate_decimal_i128(*v, width))?
})),
- Literal::Primitive(PrimitiveLiteral::String(v)) => Ok(Some({
+ PrimitiveLiteral::String(v) => Ok(Some({
let len = self.width as usize;
- Literal::Primitive(PrimitiveLiteral::String(
- Self::truncate_str(v, len).to_string(),
- ))
+ Datum::string(Self::truncate_str(v, len).to_string())
})),
- _ => unreachable!("Truncate transform only supports (int,long,decimal,string) types"),
+ _ => Err(crate::Error::new(
+ crate::ErrorKind::FeatureUnsupported,
+ format!(
+ "Unsupported data type for truncate transform: {:?}",
+ input.data_type()
+ ),
+ )),
}
}
}
@@ -166,7 +174,7 @@ mod test {
builder::PrimitiveBuilder, types::Decimal128Type, Decimal128Array, Int32Array, Int64Array,
};
- use crate::transform::TransformFunction;
+ use crate::{spec::Datum, transform::TransformFunction};
// Test case ref from: https://iceberg.apache.org/spec/#truncate-transform-details
#[test]
@@ -256,77 +264,55 @@ mod test {
#[test]
fn test_literal_int() {
- let input = crate::spec::Literal::Primitive(crate::spec::PrimitiveLiteral::Int(1));
+ let input = Datum::int(1);
let res = super::Truncate::new(10)
.transform_literal(&input)
.unwrap()
.unwrap();
- assert_eq!(
- res,
- crate::spec::Literal::Primitive(crate::spec::PrimitiveLiteral::Int(0))
- );
+ assert_eq!(res, Datum::int(0),);
- let input = crate::spec::Literal::Primitive(crate::spec::PrimitiveLiteral::Int(-1));
+ let input = Datum::int(-1);
let res = super::Truncate::new(10)
.transform_literal(&input)
.unwrap()
.unwrap();
- assert_eq!(
- res,
- crate::spec::Literal::Primitive(crate::spec::PrimitiveLiteral::Int(-10))
- );
+ assert_eq!(res, Datum::int(-10),);
}
#[test]
fn test_literal_long() {
- let input = crate::spec::Literal::Primitive(crate::spec::PrimitiveLiteral::Long(1));
+ let input = Datum::long(1);
let res = super::Truncate::new(10)
.transform_literal(&input)
.unwrap()
.unwrap();
- assert_eq!(
- res,
- crate::spec::Literal::Primitive(crate::spec::PrimitiveLiteral::Long(0))
- );
+ assert_eq!(res, Datum::long(0),);
- let input = crate::spec::Literal::Primitive(crate::spec::PrimitiveLiteral::Long(-1));
+ let input = Datum::long(-1);
let res = super::Truncate::new(10)
.transform_literal(&input)
.unwrap()
.unwrap();
- assert_eq!(
- res,
- crate::spec::Literal::Primitive(crate::spec::PrimitiveLiteral::Long(-10))
- );
+ assert_eq!(res, Datum::long(-10),);
}
#[test]
fn test_decimal_literal() {
- let input = crate::spec::Literal::Primitive(crate::spec::PrimitiveLiteral::Decimal(1065));
+ let input = Datum::decimal(1065).unwrap();
let res = super::Truncate::new(50)
.transform_literal(&input)
.unwrap()
.unwrap();
- assert_eq!(
- res,
- crate::spec::Literal::Primitive(crate::spec::PrimitiveLiteral::Decimal(1050))
- );
+ assert_eq!(res, Datum::decimal(1050).unwrap(),);
}
#[test]
fn test_string_literal() {
- let input = crate::spec::Literal::Primitive(crate::spec::PrimitiveLiteral::String(
- "iceberg".to_string(),
- ));
+ let input = Datum::string("iceberg".to_string());
let res = super::Truncate::new(3)
.transform_literal(&input)
.unwrap()
.unwrap();
- assert_eq!(
- res,
- crate::spec::Literal::Primitive(crate::spec::PrimitiveLiteral::String(
- "ice".to_string()
- ))
- );
+ assert_eq!(res, Datum::string("ice".to_string()),);
}
}
diff --git a/crates/iceberg/src/transform/void.rs b/crates/iceberg/src/transform/void.rs
index fae85f5be..7cbee27ca 100644
--- a/crates/iceberg/src/transform/void.rs
+++ b/crates/iceberg/src/transform/void.rs
@@ -28,10 +28,7 @@ impl TransformFunction for Void {
Ok(new_null_array(input.data_type(), input.len()))
}
- fn transform_literal(
- &self,
- _input: &crate::spec::Literal,
- ) -> Result > {
+ fn transform_literal(&self, _input: &crate::spec::Datum) -> Result > {
Ok(None)
}
}