From 8d014b19a1b61afe1fe3bc09b72aa749e2a94846 Mon Sep 17 00:00:00 2001 From: marvinlanhenke Date: Wed, 27 Mar 2024 11:29:09 +0100 Subject: [PATCH 01/46] add project bucket_unary --- crates/iceberg/src/expr/predicate.rs | 3 ++ crates/iceberg/src/spec/transform.rs | 44 ++++++++++++++++++++++++++++ 2 files changed, 47 insertions(+) diff --git a/crates/iceberg/src/expr/predicate.rs b/crates/iceberg/src/expr/predicate.rs index da8a863d2..4ab6c8880 100644 --- a/crates/iceberg/src/expr/predicate.rs +++ b/crates/iceberg/src/expr/predicate.rs @@ -116,6 +116,9 @@ impl UnaryExpression { debug_assert!(op.is_unary()); Self { op, term } } + pub(crate) fn op(&self) -> PredicateOperator { + self.op + } } /// Binary predicate, for example, `a > 10`. diff --git a/crates/iceberg/src/spec/transform.rs b/crates/iceberg/src/spec/transform.rs index 839d582dc..4d745d3de 100644 --- a/crates/iceberg/src/spec/transform.rs +++ b/crates/iceberg/src/spec/transform.rs @@ -18,7 +18,9 @@ //! Transforms in iceberg. use crate::error::{Error, Result}; +use crate::expr::{BoundPredicate, Predicate, Reference, UnaryExpression}; use crate::spec::datatypes::{PrimitiveType, Type}; +use crate::transform::create_transform_function; use crate::ErrorKind; use serde::{Deserialize, Deserializer, Serialize, Serializer}; use std::fmt::{Display, Formatter}; @@ -261,6 +263,23 @@ impl Transform { _ => self == other, } } + /// Projects predicate based on transform + pub fn project(&self, name: String, predicate: &BoundPredicate) -> Result> { + let _func = create_transform_function(self)?; + + let projection = match self { + Transform::Bucket(_) => match predicate { + BoundPredicate::Unary(expr) => Some(Predicate::Unary(UnaryExpression::new( + expr.op(), + Reference::new(name), + ))), + _ => unimplemented!(), + }, + _ => unimplemented!(), + }; + + Ok(projection) + } } impl Display for Transform { @@ -358,6 +377,10 @@ impl<'de> Deserialize<'de> for Transform { #[cfg(test)] mod tests { + use super::*; + use std::sync::Arc; + + use crate::expr::{BoundPredicate, BoundReference, PredicateOperator, UnaryExpression}; use crate::spec::datatypes::PrimitiveType::{ Binary, Date, Decimal, Fixed, Int, Long, String as StringType, Time, Timestamp, Timestamptz, Uuid, @@ -365,6 +388,7 @@ mod tests { use crate::spec::datatypes::Type::{Primitive, Struct}; use crate::spec::datatypes::{NestedField, StructType, Type}; use crate::spec::transform::Transform; + use crate::spec::PrimitiveType; struct TestParameter { display: String, @@ -398,6 +422,26 @@ mod tests { } } + #[test] + fn test_bucket_project_unary() -> Result<()> { + let name = "projected_name".to_string(); + + let field = NestedField::required(1, "a", Type::Primitive(PrimitiveType::Int)); + + let predicate = BoundPredicate::Unary(UnaryExpression::new( + PredicateOperator::IsNull, + BoundReference::new("original_name", Arc::new(field)), + )); + + let transform = Transform::Bucket(8); + + let result = transform.project(name, &predicate)?.unwrap(); + + assert_eq!(format!("{}", result), "projected_name IS NULL"); + + Ok(()) + } + #[test] fn test_bucket_transform() { let trans = Transform::Bucket(8); From 507caa267264d32cb62c004745d024c07a6889aa Mon Sep 17 00:00:00 2001 From: marvinlanhenke Date: Wed, 27 Mar 2024 11:57:17 +0100 Subject: [PATCH 02/46] add project bucket_binary --- crates/iceberg/src/expr/predicate.rs | 6 +++ crates/iceberg/src/spec/transform.rs | 62 ++++++++++++++++++++++++---- 2 files changed, 60 insertions(+), 8 deletions(-) diff --git a/crates/iceberg/src/expr/predicate.rs b/crates/iceberg/src/expr/predicate.rs index 4ab6c8880..5303b8f2f 100644 --- a/crates/iceberg/src/expr/predicate.rs +++ b/crates/iceberg/src/expr/predicate.rs @@ -147,6 +147,12 @@ impl BinaryExpression { debug_assert!(op.is_binary()); Self { op, term, literal } } + pub(crate) fn op(&self) -> PredicateOperator { + self.op + } + pub(crate) fn literal(&self) -> Datum { + self.literal.clone() + } } impl Display for BinaryExpression { diff --git a/crates/iceberg/src/spec/transform.rs b/crates/iceberg/src/spec/transform.rs index 4d745d3de..044462595 100644 --- a/crates/iceberg/src/spec/transform.rs +++ b/crates/iceberg/src/spec/transform.rs @@ -18,7 +18,9 @@ //! Transforms in iceberg. use crate::error::{Error, Result}; -use crate::expr::{BoundPredicate, Predicate, Reference, UnaryExpression}; +use crate::expr::{ + BinaryExpression, BoundPredicate, Predicate, PredicateOperator, Reference, UnaryExpression, +}; use crate::spec::datatypes::{PrimitiveType, Type}; use crate::transform::create_transform_function; use crate::ErrorKind; @@ -265,20 +267,37 @@ impl Transform { } /// Projects predicate based on transform pub fn project(&self, name: String, predicate: &BoundPredicate) -> Result> { - let _func = create_transform_function(self)?; + let func = create_transform_function(self)?; let projection = match self { Transform::Bucket(_) => match predicate { - BoundPredicate::Unary(expr) => Some(Predicate::Unary(UnaryExpression::new( - expr.op(), - Reference::new(name), - ))), + BoundPredicate::Unary(expr) => { + Predicate::Unary(UnaryExpression::new(expr.op(), Reference::new(name))) + } + BoundPredicate::Binary(expr) => { + if expr.op() != PredicateOperator::Eq { + return Ok(None); + } + + let new_datum = func.transform_literal(&expr.literal())?.ok_or_else(|| { + Error::new( + ErrorKind::DataInvalid, + "Transformed literal must not be 'None'", + ) + })?; + + Predicate::Binary(BinaryExpression::new( + expr.op(), + Reference::new(name), + new_datum, + )) + } _ => unimplemented!(), }, _ => unimplemented!(), }; - Ok(projection) + Ok(Some(projection)) } } @@ -388,7 +407,7 @@ mod tests { use crate::spec::datatypes::Type::{Primitive, Struct}; use crate::spec::datatypes::{NestedField, StructType, Type}; use crate::spec::transform::Transform; - use crate::spec::PrimitiveType; + use crate::spec::{Datum, PrimitiveType}; struct TestParameter { display: String, @@ -422,6 +441,33 @@ mod tests { } } + #[test] + fn test_bucket_project_binary() -> Result<()> { + let name = "projected_name".to_string(); + + let field = NestedField::required(1, "a", Type::Primitive(PrimitiveType::Int)); + + let predicate = BoundPredicate::Binary(BinaryExpression::new( + PredicateOperator::Eq, + BoundReference::new("original_name", Arc::new(field)), + Datum::int(5), + )); + + let transform = Transform::Bucket(8); + + let expected = Some(Predicate::Binary(BinaryExpression::new( + PredicateOperator::Eq, + Reference::new(&name), + Datum::int(7), + ))); + + let result = transform.project(name, &predicate)?; + + assert_eq!(result, expected); + + Ok(()) + } + #[test] fn test_bucket_project_unary() -> Result<()> { let name = "projected_name".to_string(); From 09eda3fd1d1eb13d2ef5046c301d08a4f4886747 Mon Sep 17 00:00:00 2001 From: marvinlanhenke Date: Wed, 27 Mar 2024 12:13:10 +0100 Subject: [PATCH 03/46] add project bucket_set --- crates/iceberg/src/expr/predicate.rs | 6 +++ crates/iceberg/src/spec/transform.rs | 78 ++++++++++++++++++++++++---- 2 files changed, 75 insertions(+), 9 deletions(-) diff --git a/crates/iceberg/src/expr/predicate.rs b/crates/iceberg/src/expr/predicate.rs index 5303b8f2f..982049e3e 100644 --- a/crates/iceberg/src/expr/predicate.rs +++ b/crates/iceberg/src/expr/predicate.rs @@ -200,6 +200,12 @@ impl SetExpression { debug_assert!(op.is_set()); Self { op, term, literals } } + pub(crate) fn op(&self) -> PredicateOperator { + self.op + } + pub(crate) fn literals(&self) -> FnvHashSet { + self.literals.clone() + } } impl Bind for SetExpression { diff --git a/crates/iceberg/src/spec/transform.rs b/crates/iceberg/src/spec/transform.rs index 044462595..ba4dea175 100644 --- a/crates/iceberg/src/spec/transform.rs +++ b/crates/iceberg/src/spec/transform.rs @@ -19,11 +19,14 @@ use crate::error::{Error, Result}; use crate::expr::{ - BinaryExpression, BoundPredicate, Predicate, PredicateOperator, Reference, UnaryExpression, + BinaryExpression, BoundPredicate, Predicate, PredicateOperator, Reference, SetExpression, + UnaryExpression, }; use crate::spec::datatypes::{PrimitiveType, Type}; +use crate::spec::Datum; use crate::transform::create_transform_function; use crate::ErrorKind; +use fnv::FnvHashSet; use serde::{Deserialize, Deserializer, Serialize, Serializer}; use std::fmt::{Display, Formatter}; use std::str::FromStr; @@ -265,15 +268,16 @@ impl Transform { _ => self == other, } } - /// Projects predicate based on transform + /// Projects predicate based on `Transform` pub fn project(&self, name: String, predicate: &BoundPredicate) -> Result> { let func = create_transform_function(self)?; let projection = match self { Transform::Bucket(_) => match predicate { - BoundPredicate::Unary(expr) => { - Predicate::Unary(UnaryExpression::new(expr.op(), Reference::new(name))) - } + BoundPredicate::Unary(expr) => Some(Predicate::Unary(UnaryExpression::new( + expr.op(), + Reference::new(name), + ))), BoundPredicate::Binary(expr) => { if expr.op() != PredicateOperator::Eq { return Ok(None); @@ -286,18 +290,47 @@ impl Transform { ) })?; - Predicate::Binary(BinaryExpression::new( + Some(Predicate::Binary(BinaryExpression::new( expr.op(), Reference::new(name), new_datum, - )) + ))) + } + BoundPredicate::Set(expr) => { + if expr.op() != PredicateOperator::In { + return Ok(None); + } + + let projected_set: Result> = expr + .literals() + .iter() + .map(|d| { + func.transform_literal(d).and_then(|opt_datum| { + opt_datum.ok_or_else(|| { + Error::new( + ErrorKind::DataInvalid, + "Transformed literal must not be 'None'", + ) + }) + }) + }) + .collect(); + + match projected_set { + Err(err) => return Err(err), + Ok(set) => Some(Predicate::Set(SetExpression::new( + expr.op(), + Reference::new(name), + set, + ))), + } } - _ => unimplemented!(), + _ => None, }, _ => unimplemented!(), }; - Ok(Some(projection)) + Ok(projection) } } @@ -441,6 +474,33 @@ mod tests { } } + #[test] + fn test_bucket_project_set() -> Result<()> { + let name = "projected_name".to_string(); + + let field = NestedField::required(1, "a", Type::Primitive(PrimitiveType::Int)); + + let predicate = BoundPredicate::Set(SetExpression::new( + PredicateOperator::In, + BoundReference::new("original_name", Arc::new(field)), + FnvHashSet::from_iter([Datum::int(5), Datum::int(6)]), + )); + + let transform = Transform::Bucket(8); + + let expected = Some(Predicate::Set(SetExpression::new( + PredicateOperator::In, + Reference::new(&name), + FnvHashSet::from_iter([Datum::int(7), Datum::int(1)]), + ))); + + let result = transform.project(name, &predicate)?; + + assert_eq!(result, expected); + + Ok(()) + } + #[test] fn test_bucket_project_binary() -> Result<()> { let name = "projected_name".to_string(); From 41f90f79c90be71097c2edc431d332659bef2662 Mon Sep 17 00:00:00 2001 From: marvinlanhenke Date: Wed, 27 Mar 2024 13:00:16 +0100 Subject: [PATCH 04/46] add project identity --- crates/iceberg/src/expr/predicate.rs | 8 ++-- crates/iceberg/src/spec/transform.rs | 65 +++++++++++++++++++++++++++- 2 files changed, 67 insertions(+), 6 deletions(-) diff --git a/crates/iceberg/src/expr/predicate.rs b/crates/iceberg/src/expr/predicate.rs index 982049e3e..530923f15 100644 --- a/crates/iceberg/src/expr/predicate.rs +++ b/crates/iceberg/src/expr/predicate.rs @@ -150,8 +150,8 @@ impl BinaryExpression { pub(crate) fn op(&self) -> PredicateOperator { self.op } - pub(crate) fn literal(&self) -> Datum { - self.literal.clone() + pub(crate) fn literal(&self) -> &Datum { + &self.literal } } @@ -203,8 +203,8 @@ impl SetExpression { pub(crate) fn op(&self) -> PredicateOperator { self.op } - pub(crate) fn literals(&self) -> FnvHashSet { - self.literals.clone() + pub(crate) fn literals(&self) -> &FnvHashSet { + &self.literals } } diff --git a/crates/iceberg/src/spec/transform.rs b/crates/iceberg/src/spec/transform.rs index ba4dea175..152853bcf 100644 --- a/crates/iceberg/src/spec/transform.rs +++ b/crates/iceberg/src/spec/transform.rs @@ -283,7 +283,7 @@ impl Transform { return Ok(None); } - let new_datum = func.transform_literal(&expr.literal())?.ok_or_else(|| { + let new_datum = func.transform_literal(expr.literal())?.ok_or_else(|| { Error::new( ErrorKind::DataInvalid, "Transformed literal must not be 'None'", @@ -327,7 +327,30 @@ impl Transform { } _ => None, }, - _ => unimplemented!(), + Transform::Identity => match predicate { + BoundPredicate::Unary(expr) => Some(Predicate::Unary(UnaryExpression::new( + expr.op(), + Reference::new(name), + ))), + BoundPredicate::Binary(expr) => Some(Predicate::Binary(BinaryExpression::new( + expr.op(), + Reference::new(name), + expr.literal().to_owned(), + ))), + BoundPredicate::Set(expr) => Some(Predicate::Set(SetExpression::new( + expr.op(), + Reference::new(name), + expr.literals().to_owned(), + ))), + _ => None, + }, + Transform::Year => todo!(), + Transform::Month => todo!(), + Transform::Day => todo!(), + Transform::Hour => todo!(), + Transform::Truncate(_) => todo!(), + Transform::Void => todo!(), + Transform::Unknown => todo!(), }; Ok(projection) @@ -474,6 +497,44 @@ mod tests { } } + #[test] + fn test_identity_project() -> Result<()> { + let name = "projected_name".to_string(); + + let field = Arc::new(NestedField::required( + 1, + "a", + Type::Primitive(PrimitiveType::Int), + )); + + let predicate_unary = BoundPredicate::Unary(UnaryExpression::new( + PredicateOperator::IsNull, + BoundReference::new("original_name", field.clone()), + )); + let predicate_binary = BoundPredicate::Binary(BinaryExpression::new( + PredicateOperator::Eq, + BoundReference::new("original_name", field.clone()), + Datum::int(5), + )); + let predicate_set = BoundPredicate::Set(SetExpression::new( + PredicateOperator::In, + BoundReference::new("original_name", field.clone()), + FnvHashSet::from_iter([Datum::int(5), Datum::int(6)]), + )); + + let transform = Transform::Identity; + + let result_unary = transform.project(name.clone(), &predicate_unary)?.unwrap(); + let result_binary = transform.project(name.clone(), &predicate_binary)?.unwrap(); + let result_set = transform.project(name.clone(), &predicate_set)?.unwrap(); + + assert_eq!(format!("{}", result_unary), "projected_name IS NULL"); + assert_eq!(format!("{}", result_binary), "projected_name = 5"); + assert_eq!(format!("{}", result_set), "projected_name IN (5, 6)"); + + Ok(()) + } + #[test] fn test_bucket_project_set() -> Result<()> { let name = "projected_name".to_string(); From 73f1e3d7e8a3a5d14ddc850e0574467cc23cd4b9 Mon Sep 17 00:00:00 2001 From: marvinlanhenke Date: Wed, 27 Mar 2024 18:25:05 +0100 Subject: [PATCH 05/46] add project truncate --- crates/iceberg/src/spec/transform.rs | 273 ++++++++++++++++++--------- crates/iceberg/src/transform/mod.rs | 1 + 2 files changed, 185 insertions(+), 89 deletions(-) diff --git a/crates/iceberg/src/spec/transform.rs b/crates/iceberg/src/spec/transform.rs index 152853bcf..82a166a6b 100644 --- a/crates/iceberg/src/spec/transform.rs +++ b/crates/iceberg/src/spec/transform.rs @@ -23,14 +23,16 @@ use crate::expr::{ UnaryExpression, }; use crate::spec::datatypes::{PrimitiveType, Type}; -use crate::spec::Datum; -use crate::transform::create_transform_function; +use crate::spec::PrimitiveLiteral; +use crate::transform::{create_transform_function, BoxedTransformFunction}; use crate::ErrorKind; use fnv::FnvHashSet; use serde::{Deserialize, Deserializer, Serialize, Serializer}; use std::fmt::{Display, Formatter}; use std::str::FromStr; +use super::Datum; + /// Transform is used to transform predicates to partition predicates, /// in addition to transforming data values. /// @@ -255,7 +257,7 @@ impl Transform { /// result. /// /// For example, sorting by day(ts) will produce an ordering that is also by month(ts) or - // year(ts). However, sorting by day(ts) will not satisfy the order of hour(ts) or identity(ts). + /// year(ts). However, sorting by day(ts) will not satisfy the order of hour(ts) or identity(ts). pub fn satisfies_order_of(&self, other: &Self) -> bool { match self { Transform::Identity => other.preserves_order(), @@ -268,6 +270,7 @@ impl Transform { _ => self == other, } } + /// Projects predicate based on `Transform` pub fn project(&self, name: String, predicate: &BoundPredicate) -> Result> { let func = create_transform_function(self)?; @@ -286,7 +289,7 @@ impl Transform { let new_datum = func.transform_literal(expr.literal())?.ok_or_else(|| { Error::new( ErrorKind::DataInvalid, - "Transformed literal must not be 'None'", + "Transformed datum must not be 'None'", ) })?; @@ -301,29 +304,11 @@ impl Transform { return Ok(None); } - let projected_set: Result> = expr - .literals() - .iter() - .map(|d| { - func.transform_literal(d).and_then(|opt_datum| { - opt_datum.ok_or_else(|| { - Error::new( - ErrorKind::DataInvalid, - "Transformed literal must not be 'None'", - ) - }) - }) - }) - .collect(); - - match projected_set { - Err(err) => return Err(err), - Ok(set) => Some(Predicate::Set(SetExpression::new( - expr.op(), - Reference::new(name), - set, - ))), - } + Some(Predicate::Set(SetExpression::new( + expr.op(), + Reference::new(name), + self.apply_transform_on_set(expr.literals(), &func)?, + ))) } _ => None, }, @@ -348,13 +333,105 @@ impl Transform { Transform::Month => todo!(), Transform::Day => todo!(), Transform::Hour => todo!(), - Transform::Truncate(_) => todo!(), + Transform::Truncate(_) => match predicate { + BoundPredicate::Unary(expr) => Some(Predicate::Unary(UnaryExpression::new( + expr.op(), + Reference::new(name), + ))), + BoundPredicate::Binary(expr) => { + let op = expr.op(); + let primitive = expr.literal().literal(); + + match primitive { + PrimitiveLiteral::Int(v) => { + self.apply_transform_boundary(name, v, op, &func)? + } + PrimitiveLiteral::Long(v) => { + self.apply_transform_boundary(name, v, op, &func)? + } + PrimitiveLiteral::Decimal(v) => { + self.apply_transform_boundary(name, v, op, &func)? + } + PrimitiveLiteral::Fixed(v) => { + self.apply_transform_boundary(name, v, op, &func)? + } + _ => return Ok(None), + } + } + BoundPredicate::Set(expr) => { + if expr.op() != PredicateOperator::In { + return Ok(None); + } + + Some(Predicate::Set(SetExpression::new( + expr.op(), + Reference::new(name), + self.apply_transform_on_set(expr.literals(), &func)?, + ))) + } + _ => None, + }, Transform::Void => todo!(), Transform::Unknown => todo!(), }; Ok(projection) } + + /// Transform each literal value of `FnvHashSet` + fn apply_transform_on_set( + &self, + literals: &FnvHashSet, + func: &BoxedTransformFunction, + ) -> Result> { + literals + .iter() + .map(|lit| { + func.transform_literal(lit).and_then(|d| { + d.ok_or_else(|| { + Error::new( + ErrorKind::DataInvalid, + "Transformed datum must not be 'None'", + ) + }) + }) + }) + .collect() + } + + /// Apply truncate transform on `Datum` with new boundaries + /// and adjusted `PredicateOperator` + fn apply_transform_boundary( + &self, + name: String, + value: &T, + op: PredicateOperator, + func: &BoxedTransformFunction, + ) -> Result> { + match value.boundary(op)? { + None => Ok(None), + Some(boundary) => { + let literal = func.transform_literal(&boundary)?.ok_or_else(|| { + Error::new( + ErrorKind::DataInvalid, + "Transformed datum must not be 'None'", + ) + })?; + + let new_op = match op { + PredicateOperator::LessThan => PredicateOperator::LessThanOrEq, + PredicateOperator::GreaterThan => PredicateOperator::GreaterThanOrEq, + _ => op, + }; + + Ok(Some(Predicate::Binary(BinaryExpression::new( + new_op, + Reference::new(name), + literal, + )))) + } + } + } } impl Display for Transform { @@ -450,8 +527,59 @@ impl<'de> Deserialize<'de> for Transform { } } +trait TransformBoundary { + fn boundary(&self, op: PredicateOperator) -> Result>; +} + +impl TransformBoundary for i32 { + fn boundary(&self, op: PredicateOperator) -> Result> { + match op { + PredicateOperator::LessThan => Ok(Some(Datum::int(self - 1))), + PredicateOperator::GreaterThan => Ok(Some(Datum::int(self + 1))), + PredicateOperator::Eq + | PredicateOperator::LessThanOrEq + | PredicateOperator::GreaterThanOrEq => Ok(Some(Datum::int(*self))), + _ => Ok(None), + } + } +} + +impl TransformBoundary for i64 { + fn boundary(&self, op: PredicateOperator) -> Result> { + match op { + PredicateOperator::LessThan => Ok(Some(Datum::long(self - 1))), + PredicateOperator::GreaterThan => Ok(Some(Datum::long(self + 1))), + PredicateOperator::Eq + | PredicateOperator::LessThanOrEq + | PredicateOperator::GreaterThanOrEq => Ok(Some(Datum::long(*self))), + _ => Ok(None), + } + } +} + +impl TransformBoundary for i128 { + fn boundary(&self, op: PredicateOperator) -> Result> { + match op { + PredicateOperator::LessThan => Ok(Some(Datum::decimal(self - 1)?)), + PredicateOperator::GreaterThan => Ok(Some(Datum::decimal(self + 1)?)), + PredicateOperator::Eq + | PredicateOperator::LessThanOrEq + | PredicateOperator::GreaterThanOrEq => Ok(Some(Datum::decimal(*self)?)), + _ => Ok(None), + } + } +} + +impl TransformBoundary for Vec { + fn boundary(&self, _op: PredicateOperator) -> Result> { + Ok(Some(Datum::fixed(self.clone()))) + } +} + #[cfg(test)] mod tests { + use fnv::FnvHashSet; + use super::*; use std::sync::Arc; @@ -497,10 +625,7 @@ mod tests { } } - #[test] - fn test_identity_project() -> Result<()> { - let name = "projected_name".to_string(); - + fn create_predicates() -> (BoundPredicate, BoundPredicate, BoundPredicate) { let field = Arc::new(NestedField::required( 1, "a", @@ -522,89 +647,59 @@ mod tests { FnvHashSet::from_iter([Datum::int(5), Datum::int(6)]), )); - let transform = Transform::Identity; - - let result_unary = transform.project(name.clone(), &predicate_unary)?.unwrap(); - let result_binary = transform.project(name.clone(), &predicate_binary)?.unwrap(); - let result_set = transform.project(name.clone(), &predicate_set)?.unwrap(); - - assert_eq!(format!("{}", result_unary), "projected_name IS NULL"); - assert_eq!(format!("{}", result_binary), "projected_name = 5"); - assert_eq!(format!("{}", result_set), "projected_name IN (5, 6)"); - - Ok(()) + (predicate_unary, predicate_binary, predicate_set) } #[test] - fn test_bucket_project_set() -> Result<()> { + fn test_truncate_project() -> Result<()> { let name = "projected_name".to_string(); + let (unary, binary, set) = create_predicates(); - let field = NestedField::required(1, "a", Type::Primitive(PrimitiveType::Int)); - - let predicate = BoundPredicate::Set(SetExpression::new( - PredicateOperator::In, - BoundReference::new("original_name", Arc::new(field)), - FnvHashSet::from_iter([Datum::int(5), Datum::int(6)]), - )); - - let transform = Transform::Bucket(8); + let transform = Transform::Truncate(10); - let expected = Some(Predicate::Set(SetExpression::new( - PredicateOperator::In, - Reference::new(&name), - FnvHashSet::from_iter([Datum::int(7), Datum::int(1)]), - ))); - - let result = transform.project(name, &predicate)?; + let result_unary = transform.project(name.clone(), &unary)?.unwrap(); + let result_binary = transform.project(name.clone(), &binary)?.unwrap(); + let result_set = transform.project(name.clone(), &set)?.unwrap(); - assert_eq!(result, expected); + assert_eq!(format!("{}", result_unary), "projected_name IS NULL"); + assert_eq!(format!("{}", result_binary), "projected_name = 0"); + assert_eq!(format!("{}", result_set), "projected_name IN (0)"); Ok(()) } #[test] - fn test_bucket_project_binary() -> Result<()> { + fn test_identity_project() -> Result<()> { let name = "projected_name".to_string(); + let (unary, binary, set) = create_predicates(); - let field = NestedField::required(1, "a", Type::Primitive(PrimitiveType::Int)); - - let predicate = BoundPredicate::Binary(BinaryExpression::new( - PredicateOperator::Eq, - BoundReference::new("original_name", Arc::new(field)), - Datum::int(5), - )); - - let transform = Transform::Bucket(8); - - let expected = Some(Predicate::Binary(BinaryExpression::new( - PredicateOperator::Eq, - Reference::new(&name), - Datum::int(7), - ))); + let transform = Transform::Identity; - let result = transform.project(name, &predicate)?; + let result_unary = transform.project(name.clone(), &unary)?.unwrap(); + let result_binary = transform.project(name.clone(), &binary)?.unwrap(); + let result_set = transform.project(name.clone(), &set)?.unwrap(); - assert_eq!(result, expected); + assert_eq!(format!("{}", result_unary), "projected_name IS NULL"); + assert_eq!(format!("{}", result_binary), "projected_name = 5"); + assert_eq!(format!("{}", result_set), "projected_name IN (5, 6)"); Ok(()) } #[test] - fn test_bucket_project_unary() -> Result<()> { + fn test_bucket_project() -> Result<()> { let name = "projected_name".to_string(); - - let field = NestedField::required(1, "a", Type::Primitive(PrimitiveType::Int)); - - let predicate = BoundPredicate::Unary(UnaryExpression::new( - PredicateOperator::IsNull, - BoundReference::new("original_name", Arc::new(field)), - )); + let (unary, binary, set) = create_predicates(); let transform = Transform::Bucket(8); - let result = transform.project(name, &predicate)?.unwrap(); + let result_unary = transform.project(name.clone(), &unary)?.unwrap(); + let result_binary = transform.project(name.clone(), &binary)?.unwrap(); + let result_set = transform.project(name.clone(), &set)?.unwrap(); - assert_eq!(format!("{}", result), "projected_name IS NULL"); + assert_eq!(format!("{}", result_unary), "projected_name IS NULL"); + assert_eq!(format!("{}", result_binary), "projected_name = 7"); + assert_eq!(format!("{}", result_set), "projected_name IN (1, 7)"); Ok(()) } diff --git a/crates/iceberg/src/transform/mod.rs b/crates/iceberg/src/transform/mod.rs index 7effdbec3..605808341 100644 --- a/crates/iceberg/src/transform/mod.rs +++ b/crates/iceberg/src/transform/mod.rs @@ -16,6 +16,7 @@ // under the License. //! Transform function used to compute partition values. + use crate::{ spec::{Datum, Transform}, Result, From fd79c14abc546996b4947a5a1f7ea798e573aed8 Mon Sep 17 00:00:00 2001 From: marvinlanhenke Date: Wed, 27 Mar 2024 19:55:24 +0100 Subject: [PATCH 06/46] fixed array boundary --- crates/iceberg/src/spec/transform.rs | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/crates/iceberg/src/spec/transform.rs b/crates/iceberg/src/spec/transform.rs index 82a166a6b..ad8c757de 100644 --- a/crates/iceberg/src/spec/transform.rs +++ b/crates/iceberg/src/spec/transform.rs @@ -571,8 +571,16 @@ impl TransformBoundary for i128 { } impl TransformBoundary for Vec { - fn boundary(&self, _op: PredicateOperator) -> Result> { - Ok(Some(Datum::fixed(self.clone()))) + fn boundary(&self, op: PredicateOperator) -> Result> { + match op { + PredicateOperator::LessThan + | PredicateOperator::LessThanOrEq + | PredicateOperator::GreaterThan + | PredicateOperator::GreaterThanOrEq + | PredicateOperator::Eq + | PredicateOperator::StartsWith => Ok(Some(Datum::fixed(self.clone()))), + _ => Ok(None), + } } } From 7885483ab3a940c407750d01eb7e9fa374f032d2 Mon Sep 17 00:00:00 2001 From: marvinlanhenke Date: Wed, 27 Mar 2024 20:00:03 +0100 Subject: [PATCH 07/46] add project void --- crates/iceberg/src/spec/transform.rs | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/crates/iceberg/src/spec/transform.rs b/crates/iceberg/src/spec/transform.rs index ad8c757de..cee14a78a 100644 --- a/crates/iceberg/src/spec/transform.rs +++ b/crates/iceberg/src/spec/transform.rs @@ -371,7 +371,7 @@ impl Transform { } _ => None, }, - Transform::Void => todo!(), + Transform::Void => None, Transform::Unknown => todo!(), }; @@ -658,6 +658,24 @@ mod tests { (predicate_unary, predicate_binary, predicate_set) } + #[test] + fn test_void_project() -> Result<()> { + let name = "projected_name".to_string(); + let (unary, binary, set) = create_predicates(); + + let transform = Transform::Void; + + let result_unary = transform.project(name.clone(), &unary)?; + let result_binary = transform.project(name.clone(), &binary)?; + let result_set = transform.project(name.clone(), &set)?; + + assert!(result_unary.is_none()); + assert!(result_binary.is_none()); + assert!(result_set.is_none()); + + Ok(()) + } + #[test] fn test_truncate_project() -> Result<()> { let name = "projected_name".to_string(); From bb84d2b5976e41bd6552b6fe4fd622dc1077d510 Mon Sep 17 00:00:00 2001 From: marvinlanhenke Date: Wed, 27 Mar 2024 20:03:01 +0100 Subject: [PATCH 08/46] add project unknown --- crates/iceberg/src/spec/transform.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/iceberg/src/spec/transform.rs b/crates/iceberg/src/spec/transform.rs index cee14a78a..640cbb972 100644 --- a/crates/iceberg/src/spec/transform.rs +++ b/crates/iceberg/src/spec/transform.rs @@ -372,7 +372,7 @@ impl Transform { _ => None, }, Transform::Void => None, - Transform::Unknown => todo!(), + Transform::Unknown => None, }; Ok(projection) From a5dc6ef7f55151a62145019dcf819edbe2fc4854 Mon Sep 17 00:00:00 2001 From: marvinlanhenke Date: Wed, 27 Mar 2024 20:24:38 +0100 Subject: [PATCH 09/46] add docs + none projections --- crates/iceberg/src/spec/transform.rs | 112 +++++++++++++++------------ 1 file changed, 62 insertions(+), 50 deletions(-) diff --git a/crates/iceberg/src/spec/transform.rs b/crates/iceberg/src/spec/transform.rs index 640cbb972..a4ca295dd 100644 --- a/crates/iceberg/src/spec/transform.rs +++ b/crates/iceberg/src/spec/transform.rs @@ -329,10 +329,6 @@ impl Transform { ))), _ => None, }, - Transform::Year => todo!(), - Transform::Month => todo!(), - Transform::Day => todo!(), - Transform::Hour => todo!(), Transform::Truncate(_) => match predicate { BoundPredicate::Unary(expr) => Some(Predicate::Unary(UnaryExpression::new( expr.op(), @@ -371,8 +367,7 @@ impl Transform { } _ => None, }, - Transform::Void => None, - Transform::Unknown => None, + _ => None, }; Ok(projection) @@ -527,6 +522,13 @@ impl<'de> Deserialize<'de> for Transform { } } +/// `TransformBoundary` is a trait designed to provide boundary values +/// based on a given predicate operator. +/// +/// Implementations of this trait return a modified version +/// of the implementing value that represents a boundary condition. +/// This is useful for operations that need to adjust values +/// based on comparison predicates like `<`, `>`, `<=`, `>=`, and `==`. trait TransformBoundary { fn boundary(&self, op: PredicateOperator) -> Result>; } @@ -610,6 +612,39 @@ mod tests { trans_types: Vec<(Type, Option)>, } + struct TestPredicates { + unary: BoundPredicate, + binary: BoundPredicate, + set: BoundPredicate, + } + + impl TestPredicates { + fn new() -> Self { + let field = Arc::new(NestedField::required( + 1, + "a", + Type::Primitive(PrimitiveType::Int), + )); + + let unary = BoundPredicate::Unary(UnaryExpression::new( + PredicateOperator::IsNull, + BoundReference::new("original_name", field.clone()), + )); + let binary = BoundPredicate::Binary(BinaryExpression::new( + PredicateOperator::Eq, + BoundReference::new("original_name", field.clone()), + Datum::int(5), + )); + let set = BoundPredicate::Set(SetExpression::new( + PredicateOperator::In, + BoundReference::new("original_name", field.clone()), + FnvHashSet::from_iter([Datum::int(5), Datum::int(6)]), + )); + + TestPredicates { unary, binary, set } + } + } + fn check_transform(trans: Transform, param: TestParameter) { assert_eq!(param.display, format!("{trans}")); assert_eq!(param.json, serde_json::to_string(&trans).unwrap()); @@ -633,44 +668,21 @@ mod tests { } } - fn create_predicates() -> (BoundPredicate, BoundPredicate, BoundPredicate) { - let field = Arc::new(NestedField::required( - 1, - "a", - Type::Primitive(PrimitiveType::Int), - )); - - let predicate_unary = BoundPredicate::Unary(UnaryExpression::new( - PredicateOperator::IsNull, - BoundReference::new("original_name", field.clone()), - )); - let predicate_binary = BoundPredicate::Binary(BinaryExpression::new( - PredicateOperator::Eq, - BoundReference::new("original_name", field.clone()), - Datum::int(5), - )); - let predicate_set = BoundPredicate::Set(SetExpression::new( - PredicateOperator::In, - BoundReference::new("original_name", field.clone()), - FnvHashSet::from_iter([Datum::int(5), Datum::int(6)]), - )); - - (predicate_unary, predicate_binary, predicate_set) - } - #[test] - fn test_void_project() -> Result<()> { + fn test_none_projection() -> Result<()> { let name = "projected_name".to_string(); - let (unary, binary, set) = create_predicates(); + let preds = TestPredicates::new(); let transform = Transform::Void; - - let result_unary = transform.project(name.clone(), &unary)?; - let result_binary = transform.project(name.clone(), &binary)?; - let result_set = transform.project(name.clone(), &set)?; - + let result_unary = transform.project(name.clone(), &preds.unary)?; assert!(result_unary.is_none()); + + let transform = Transform::Year; + let result_binary = transform.project(name.clone(), &preds.binary)?; assert!(result_binary.is_none()); + + let transform = Transform::Month; + let result_set = transform.project(name.clone(), &preds.set)?; assert!(result_set.is_none()); Ok(()) @@ -679,13 +691,13 @@ mod tests { #[test] fn test_truncate_project() -> Result<()> { let name = "projected_name".to_string(); - let (unary, binary, set) = create_predicates(); + let preds = TestPredicates::new(); let transform = Transform::Truncate(10); - let result_unary = transform.project(name.clone(), &unary)?.unwrap(); - let result_binary = transform.project(name.clone(), &binary)?.unwrap(); - let result_set = transform.project(name.clone(), &set)?.unwrap(); + let result_unary = transform.project(name.clone(), &preds.unary)?.unwrap(); + let result_binary = transform.project(name.clone(), &preds.binary)?.unwrap(); + let result_set = transform.project(name.clone(), &preds.set)?.unwrap(); assert_eq!(format!("{}", result_unary), "projected_name IS NULL"); assert_eq!(format!("{}", result_binary), "projected_name = 0"); @@ -697,13 +709,13 @@ mod tests { #[test] fn test_identity_project() -> Result<()> { let name = "projected_name".to_string(); - let (unary, binary, set) = create_predicates(); + let preds = TestPredicates::new(); let transform = Transform::Identity; - let result_unary = transform.project(name.clone(), &unary)?.unwrap(); - let result_binary = transform.project(name.clone(), &binary)?.unwrap(); - let result_set = transform.project(name.clone(), &set)?.unwrap(); + let result_unary = transform.project(name.clone(), &preds.unary)?.unwrap(); + let result_binary = transform.project(name.clone(), &preds.binary)?.unwrap(); + let result_set = transform.project(name.clone(), &preds.set)?.unwrap(); assert_eq!(format!("{}", result_unary), "projected_name IS NULL"); assert_eq!(format!("{}", result_binary), "projected_name = 5"); @@ -715,13 +727,13 @@ mod tests { #[test] fn test_bucket_project() -> Result<()> { let name = "projected_name".to_string(); - let (unary, binary, set) = create_predicates(); + let preds = TestPredicates::new(); let transform = Transform::Bucket(8); - let result_unary = transform.project(name.clone(), &unary)?.unwrap(); - let result_binary = transform.project(name.clone(), &binary)?.unwrap(); - let result_set = transform.project(name.clone(), &set)?.unwrap(); + let result_unary = transform.project(name.clone(), &preds.unary)?.unwrap(); + let result_binary = transform.project(name.clone(), &preds.binary)?.unwrap(); + let result_set = transform.project(name.clone(), &preds.set)?.unwrap(); assert_eq!(format!("{}", result_unary), "projected_name IS NULL"); assert_eq!(format!("{}", result_binary), "projected_name = 7"); From bba3629a51cd82808fb17735ba7f410d40d6342d Mon Sep 17 00:00:00 2001 From: marvinlanhenke Date: Wed, 27 Mar 2024 20:33:23 +0100 Subject: [PATCH 10/46] docs --- crates/iceberg/src/spec/transform.rs | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/crates/iceberg/src/spec/transform.rs b/crates/iceberg/src/spec/transform.rs index a4ca295dd..8a8008573 100644 --- a/crates/iceberg/src/spec/transform.rs +++ b/crates/iceberg/src/spec/transform.rs @@ -271,7 +271,11 @@ impl Transform { } } - /// Projects predicate based on `Transform` + /// Projects a given predicate according to the transformation + /// specified by the `Transform` instance. + /// This allows predicates to be effectively applied to data + /// that has undergone transformation, enabling efficient querying + /// and filtering based on the original, untransformed data. pub fn project(&self, name: String, predicate: &BoundPredicate) -> Result> { let func = create_transform_function(self)?; From 066a69c33a3b7290fe3bb4a5b3298099bcd0e557 Mon Sep 17 00:00:00 2001 From: marvinlanhenke Date: Wed, 27 Mar 2024 20:37:18 +0100 Subject: [PATCH 11/46] docs --- crates/iceberg/src/spec/transform.rs | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/crates/iceberg/src/spec/transform.rs b/crates/iceberg/src/spec/transform.rs index 8a8008573..1c36f379e 100644 --- a/crates/iceberg/src/spec/transform.rs +++ b/crates/iceberg/src/spec/transform.rs @@ -273,9 +273,16 @@ impl Transform { /// Projects a given predicate according to the transformation /// specified by the `Transform` instance. + /// /// This allows predicates to be effectively applied to data /// that has undergone transformation, enabling efficient querying /// and filtering based on the original, untransformed data. + /// + /// # Example + /// Suppose, we have row filter `a = 10`, and a partition spec + /// `bucket(a, 37) as bs`, if one row matches `a = 10`, then its partition + /// value should match `bucket(10, 37) as bs`, and we project `a = 10` to + /// `bs = bucket(10, 37)` pub fn project(&self, name: String, predicate: &BoundPredicate) -> Result> { let func = create_transform_function(self)?; From ac86baaca9524c52cb3fd75f7b724534b0204bb0 Mon Sep 17 00:00:00 2001 From: marvinlanhenke Date: Thu, 28 Mar 2024 12:35:03 +0100 Subject: [PATCH 12/46] remove trait + impl boundary on Datum --- crates/iceberg/src/spec/transform.rs | 146 ++++++++++----------------- crates/iceberg/src/spec/values.rs | 42 ++++++++ 2 files changed, 97 insertions(+), 91 deletions(-) diff --git a/crates/iceberg/src/spec/transform.rs b/crates/iceberg/src/spec/transform.rs index 1c36f379e..e2d0909a4 100644 --- a/crates/iceberg/src/spec/transform.rs +++ b/crates/iceberg/src/spec/transform.rs @@ -23,7 +23,6 @@ use crate::expr::{ UnaryExpression, }; use crate::spec::datatypes::{PrimitiveType, Type}; -use crate::spec::PrimitiveLiteral; use crate::transform::{create_transform_function, BoxedTransformFunction}; use crate::ErrorKind; use fnv::FnvHashSet; @@ -347,23 +346,31 @@ impl Transform { ))), BoundPredicate::Binary(expr) => { let op = expr.op(); - let primitive = expr.literal().literal(); - - match primitive { - PrimitiveLiteral::Int(v) => { - self.apply_transform_boundary(name, v, op, &func)? - } - PrimitiveLiteral::Long(v) => { - self.apply_transform_boundary(name, v, op, &func)? - } - PrimitiveLiteral::Decimal(v) => { - self.apply_transform_boundary(name, v, op, &func)? - } - PrimitiveLiteral::Fixed(v) => { - self.apply_transform_boundary(name, v, op, &func)? - } - _ => return Ok(None), + let datum = expr.literal(); + self.apply_transform_boundary(name, datum, &op, &func)? + } + BoundPredicate::Set(expr) => { + if expr.op() != PredicateOperator::In { + return Ok(None); } + + Some(Predicate::Set(SetExpression::new( + expr.op(), + Reference::new(name), + self.apply_transform_on_set(expr.literals(), &func)?, + ))) + } + _ => None, + }, + Transform::Year | Transform::Month | Transform::Day => match predicate { + BoundPredicate::Unary(expr) => Some(Predicate::Unary(UnaryExpression::new( + expr.op(), + Reference::new(name), + ))), + BoundPredicate::Binary(expr) => { + let op = expr.op(); + let datum = expr.literal(); + self.apply_transform_boundary(name, datum, &op, &func)? } BoundPredicate::Set(expr) => { if expr.op() != PredicateOperator::In { @@ -407,14 +414,14 @@ impl Transform { /// Apply truncate transform on `Datum` with new boundaries /// and adjusted `PredicateOperator` - fn apply_transform_boundary( + fn apply_transform_boundary( &self, name: String, - value: &T, - op: PredicateOperator, + datum: &Datum, + op: &PredicateOperator, func: &BoxedTransformFunction, ) -> Result> { - match value.boundary(op)? { + match datum.boundary(op)? { None => Ok(None), Some(boundary) => { let literal = func.transform_literal(&boundary)?.ok_or_else(|| { @@ -427,7 +434,7 @@ impl Transform { let new_op = match op { PredicateOperator::LessThan => PredicateOperator::LessThanOrEq, PredicateOperator::GreaterThan => PredicateOperator::GreaterThanOrEq, - _ => op, + _ => *op, }; Ok(Some(Predicate::Binary(BinaryExpression::new( @@ -533,70 +540,6 @@ impl<'de> Deserialize<'de> for Transform { } } -/// `TransformBoundary` is a trait designed to provide boundary values -/// based on a given predicate operator. -/// -/// Implementations of this trait return a modified version -/// of the implementing value that represents a boundary condition. -/// This is useful for operations that need to adjust values -/// based on comparison predicates like `<`, `>`, `<=`, `>=`, and `==`. -trait TransformBoundary { - fn boundary(&self, op: PredicateOperator) -> Result>; -} - -impl TransformBoundary for i32 { - fn boundary(&self, op: PredicateOperator) -> Result> { - match op { - PredicateOperator::LessThan => Ok(Some(Datum::int(self - 1))), - PredicateOperator::GreaterThan => Ok(Some(Datum::int(self + 1))), - PredicateOperator::Eq - | PredicateOperator::LessThanOrEq - | PredicateOperator::GreaterThanOrEq => Ok(Some(Datum::int(*self))), - _ => Ok(None), - } - } -} - -impl TransformBoundary for i64 { - fn boundary(&self, op: PredicateOperator) -> Result> { - match op { - PredicateOperator::LessThan => Ok(Some(Datum::long(self - 1))), - PredicateOperator::GreaterThan => Ok(Some(Datum::long(self + 1))), - PredicateOperator::Eq - | PredicateOperator::LessThanOrEq - | PredicateOperator::GreaterThanOrEq => Ok(Some(Datum::long(*self))), - _ => Ok(None), - } - } -} - -impl TransformBoundary for i128 { - fn boundary(&self, op: PredicateOperator) -> Result> { - match op { - PredicateOperator::LessThan => Ok(Some(Datum::decimal(self - 1)?)), - PredicateOperator::GreaterThan => Ok(Some(Datum::decimal(self + 1)?)), - PredicateOperator::Eq - | PredicateOperator::LessThanOrEq - | PredicateOperator::GreaterThanOrEq => Ok(Some(Datum::decimal(*self)?)), - _ => Ok(None), - } - } -} - -impl TransformBoundary for Vec { - fn boundary(&self, op: PredicateOperator) -> Result> { - match op { - PredicateOperator::LessThan - | PredicateOperator::LessThanOrEq - | PredicateOperator::GreaterThan - | PredicateOperator::GreaterThanOrEq - | PredicateOperator::Eq - | PredicateOperator::StartsWith => Ok(Some(Datum::fixed(self.clone()))), - _ => Ok(None), - } - } -} - #[cfg(test)] mod tests { use fnv::FnvHashSet; @@ -679,6 +622,31 @@ mod tests { } } + #[test] + fn test_projection_dates_year() -> Result<()> { + let name = "projected_name".to_string(); + + let field = Arc::new(NestedField::required( + 1, + "date", + Type::Primitive(PrimitiveType::Date), + )); + + let predicate = BoundPredicate::Binary(BinaryExpression::new( + PredicateOperator::LessThan, + BoundReference::new("date", field), + Datum::date_from_str("1971-01-01".to_string())?, + )); + + let transform = Transform::Year; + + let result = transform.project(name.clone(), &predicate)?.unwrap(); + + assert_eq!(format!("{}", result), "projected_name <= 0"); + + Ok(()) + } + #[test] fn test_none_projection() -> Result<()> { let name = "projected_name".to_string(); @@ -688,14 +656,10 @@ mod tests { let result_unary = transform.project(name.clone(), &preds.unary)?; assert!(result_unary.is_none()); - let transform = Transform::Year; + let transform = Transform::Unknown; let result_binary = transform.project(name.clone(), &preds.binary)?; assert!(result_binary.is_none()); - let transform = Transform::Month; - let result_set = transform.project(name.clone(), &preds.set)?; - assert!(result_set.is_none()); - Ok(()) } diff --git a/crates/iceberg/src/spec/values.rs b/crates/iceberg/src/spec/values.rs index 6f62f2902..8775402ba 100644 --- a/crates/iceberg/src/spec/values.rs +++ b/crates/iceberg/src/spec/values.rs @@ -34,6 +34,7 @@ use uuid::Uuid; pub use _serde::RawLiteral; use crate::error::Result; +use crate::expr::PredicateOperator; use crate::spec::values::date::{date_from_naive_date, days_to_date, unix_epoch}; use crate::spec::values::time::microseconds_to_time; use crate::spec::values::timestamp::microseconds_to_datetime; @@ -683,6 +684,47 @@ impl Datum { pub fn data_type(&self) -> &PrimitiveType { &self.r#type } + + /// Create a new `Datum` with adjusted boundary for projection + pub fn boundary(&self, op: &PredicateOperator) -> Result> { + let literal = self.literal(); + + let adj_datum = match op { + PredicateOperator::LessThan => match literal { + PrimitiveLiteral::Int(v) => Some(Datum::int(v - 1)), + PrimitiveLiteral::Long(v) => Some(Datum::long(v - 1)), + PrimitiveLiteral::Decimal(v) => Some(Datum::decimal(v - 1)?), + PrimitiveLiteral::Fixed(v) => Some(Datum::fixed(v.clone())), + PrimitiveLiteral::Date(v) => Some(Datum::date(v - 1)), + _ => None, + }, + PredicateOperator::GreaterThan => match literal { + PrimitiveLiteral::Int(v) => Some(Datum::int(v + 1)), + PrimitiveLiteral::Long(v) => Some(Datum::long(v + 1)), + PrimitiveLiteral::Decimal(v) => Some(Datum::decimal(v + 1)?), + PrimitiveLiteral::Fixed(v) => Some(Datum::fixed(v.clone())), + PrimitiveLiteral::Date(v) => Some(Datum::date(v + 1)), + _ => None, + }, + PredicateOperator::Eq + | PredicateOperator::LessThanOrEq + | PredicateOperator::GreaterThanOrEq => match literal { + PrimitiveLiteral::Int(v) => Some(Datum::int(*v)), + PrimitiveLiteral::Long(v) => Some(Datum::long(*v)), + PrimitiveLiteral::Decimal(v) => Some(Datum::decimal(*v)?), + PrimitiveLiteral::Fixed(v) => Some(Datum::fixed(v.clone())), + PrimitiveLiteral::Date(v) => Some(Datum::date(*v)), + _ => None, + }, + PredicateOperator::StartsWith => match literal { + PrimitiveLiteral::Fixed(v) => Some(Datum::fixed(v.clone())), + _ => None, + }, + _ => None, + }; + + Ok(adj_datum) + } } /// Values present in iceberg type From 4f113b6070d3f73dc6bcd3df10c9545309b1a456 Mon Sep 17 00:00:00 2001 From: marvinlanhenke Date: Thu, 28 Mar 2024 12:36:08 +0100 Subject: [PATCH 13/46] fix: clippy --- crates/iceberg/src/spec/transform.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/iceberg/src/spec/transform.rs b/crates/iceberg/src/spec/transform.rs index e2d0909a4..b2c93697b 100644 --- a/crates/iceberg/src/spec/transform.rs +++ b/crates/iceberg/src/spec/transform.rs @@ -635,7 +635,7 @@ mod tests { let predicate = BoundPredicate::Binary(BinaryExpression::new( PredicateOperator::LessThan, BoundReference::new("date", field), - Datum::date_from_str("1971-01-01".to_string())?, + Datum::date_from_str("1971-01-01")?, )); let transform = Transform::Year; From 3f99f38dd4d491c6e850afe1dd4c41ea300345df Mon Sep 17 00:00:00 2001 From: marvinlanhenke Date: Thu, 28 Mar 2024 12:46:20 +0100 Subject: [PATCH 14/46] fix: test Transform::Unknown --- crates/iceberg/src/spec/transform.rs | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/crates/iceberg/src/spec/transform.rs b/crates/iceberg/src/spec/transform.rs index b2c93697b..f0f430b99 100644 --- a/crates/iceberg/src/spec/transform.rs +++ b/crates/iceberg/src/spec/transform.rs @@ -648,7 +648,7 @@ mod tests { } #[test] - fn test_none_projection() -> Result<()> { + fn test_void_projection() -> Result<()> { let name = "projected_name".to_string(); let preds = TestPredicates::new(); @@ -656,10 +656,6 @@ mod tests { let result_unary = transform.project(name.clone(), &preds.unary)?; assert!(result_unary.is_none()); - let transform = Transform::Unknown; - let result_binary = transform.project(name.clone(), &preds.binary)?; - assert!(result_binary.is_none()); - Ok(()) } From 32aef76b585740445df20d3e86fb7758dedd64f2 Mon Sep 17 00:00:00 2001 From: marvinlanhenke Date: Thu, 28 Mar 2024 13:47:10 +0100 Subject: [PATCH 15/46] add: transform_literal_result --- crates/iceberg/src/spec/transform.rs | 29 +++------------------------- crates/iceberg/src/transform/mod.rs | 11 ++++++++++- 2 files changed, 13 insertions(+), 27 deletions(-) diff --git a/crates/iceberg/src/spec/transform.rs b/crates/iceberg/src/spec/transform.rs index f0f430b99..6e9c87780 100644 --- a/crates/iceberg/src/spec/transform.rs +++ b/crates/iceberg/src/spec/transform.rs @@ -296,17 +296,10 @@ impl Transform { return Ok(None); } - let new_datum = func.transform_literal(expr.literal())?.ok_or_else(|| { - Error::new( - ErrorKind::DataInvalid, - "Transformed datum must not be 'None'", - ) - })?; - Some(Predicate::Binary(BinaryExpression::new( expr.op(), Reference::new(name), - new_datum, + func.transform_literal_result(expr.literal())?, ))) } BoundPredicate::Set(expr) => { @@ -399,16 +392,7 @@ impl Transform { ) -> Result> { literals .iter() - .map(|lit| { - func.transform_literal(lit).and_then(|d| { - d.ok_or_else(|| { - Error::new( - ErrorKind::DataInvalid, - "Transformed datum must not be 'None'", - ) - }) - }) - }) + .map(|d| func.transform_literal_result(d)) .collect() } @@ -424,13 +408,6 @@ impl Transform { match datum.boundary(op)? { None => Ok(None), Some(boundary) => { - let literal = func.transform_literal(&boundary)?.ok_or_else(|| { - Error::new( - ErrorKind::DataInvalid, - "Transformed datum must not be 'None'", - ) - })?; - let new_op = match op { PredicateOperator::LessThan => PredicateOperator::LessThanOrEq, PredicateOperator::GreaterThan => PredicateOperator::GreaterThanOrEq, @@ -440,7 +417,7 @@ impl Transform { Ok(Some(Predicate::Binary(BinaryExpression::new( new_op, Reference::new(name), - literal, + func.transform_literal_result(&boundary)?, )))) } } diff --git a/crates/iceberg/src/transform/mod.rs b/crates/iceberg/src/transform/mod.rs index 605808341..e6270f2dd 100644 --- a/crates/iceberg/src/transform/mod.rs +++ b/crates/iceberg/src/transform/mod.rs @@ -19,7 +19,7 @@ use crate::{ spec::{Datum, Transform}, - Result, + Error, ErrorKind, Result, }; use arrow_array::ArrayRef; @@ -37,6 +37,15 @@ pub trait TransformFunction: Send { fn transform(&self, input: ArrayRef) -> Result; /// transform_literal will take an input literal and transform it into a new literal. fn transform_literal(&self, input: &Datum) -> Result>; + /// wrapper + fn transform_literal_result(&self, input: &Datum) -> Result { + self.transform_literal(input)?.ok_or_else(|| { + Error::new( + ErrorKind::Unexpected, + format!("Error. Returns 'None' for literal {}", input), + ) + }) + } } /// BoxedTransformFunction is a boxed trait object of TransformFunction. From 736bb91990a232eb75a59c5e7ea4f7f4979d21d9 Mon Sep 17 00:00:00 2001 From: marvinlanhenke Date: Thu, 28 Mar 2024 13:47:10 +0100 Subject: [PATCH 16/46] add: transform_literal_result --- crates/iceberg/src/transform/mod.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/iceberg/src/transform/mod.rs b/crates/iceberg/src/transform/mod.rs index e6270f2dd..0528232ae 100644 --- a/crates/iceberg/src/transform/mod.rs +++ b/crates/iceberg/src/transform/mod.rs @@ -42,7 +42,7 @@ pub trait TransformFunction: Send { self.transform_literal(input)?.ok_or_else(|| { Error::new( ErrorKind::Unexpected, - format!("Error. Returns 'None' for literal {}", input), + format!("Returns 'None' for literal {}", input), ) }) } From d476993f548e48d00e6ca5ebff94d220dc948ce7 Mon Sep 17 00:00:00 2001 From: marvinlanhenke Date: Thu, 28 Mar 2024 13:53:29 +0100 Subject: [PATCH 17/46] remove: whitespace --- crates/iceberg/src/transform/mod.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/crates/iceberg/src/transform/mod.rs b/crates/iceberg/src/transform/mod.rs index 0528232ae..3b5d16448 100644 --- a/crates/iceberg/src/transform/mod.rs +++ b/crates/iceberg/src/transform/mod.rs @@ -16,7 +16,6 @@ // under the License. //! Transform function used to compute partition values. - use crate::{ spec::{Datum, Transform}, Error, ErrorKind, Result, From 9385084529b19cae4cb07a676e5c11cd8edc7201 Mon Sep 17 00:00:00 2001 From: marvinlanhenke Date: Thu, 28 Mar 2024 14:11:43 +0100 Subject: [PATCH 18/46] move `boundary` to transform.rs --- crates/iceberg/src/spec/transform.rs | 49 ++++++++++++++++++++++++++-- crates/iceberg/src/spec/values.rs | 42 ------------------------ 2 files changed, 47 insertions(+), 44 deletions(-) diff --git a/crates/iceberg/src/spec/transform.rs b/crates/iceberg/src/spec/transform.rs index 6e9c87780..3a575121d 100644 --- a/crates/iceberg/src/spec/transform.rs +++ b/crates/iceberg/src/spec/transform.rs @@ -30,7 +30,7 @@ use serde::{Deserialize, Deserializer, Serialize, Serializer}; use std::fmt::{Display, Formatter}; use std::str::FromStr; -use super::Datum; +use super::{Datum, PrimitiveLiteral}; /// Transform is used to transform predicates to partition predicates, /// in addition to transforming data values. @@ -405,7 +405,9 @@ impl Transform { op: &PredicateOperator, func: &BoxedTransformFunction, ) -> Result> { - match datum.boundary(op)? { + let boundary = self.datum_with_boundary(op, datum)?; + + match boundary { None => Ok(None), Some(boundary) => { let new_op = match op { @@ -422,6 +424,47 @@ impl Transform { } } } + + /// Create a new `Datum` with adjusted boundary for projection + fn datum_with_boundary(&self, op: &PredicateOperator, datum: &Datum) -> Result> { + let literal = datum.literal(); + + let adj_datum = match op { + PredicateOperator::LessThan => match literal { + PrimitiveLiteral::Int(v) => Some(Datum::int(v - 1)), + PrimitiveLiteral::Long(v) => Some(Datum::long(v - 1)), + PrimitiveLiteral::Decimal(v) => Some(Datum::decimal(v - 1)?), + PrimitiveLiteral::Fixed(v) => Some(Datum::fixed(v.clone())), + PrimitiveLiteral::Date(v) => Some(Datum::date(v - 1)), + _ => None, + }, + PredicateOperator::GreaterThan => match literal { + PrimitiveLiteral::Int(v) => Some(Datum::int(v + 1)), + PrimitiveLiteral::Long(v) => Some(Datum::long(v + 1)), + PrimitiveLiteral::Decimal(v) => Some(Datum::decimal(v + 1)?), + PrimitiveLiteral::Fixed(v) => Some(Datum::fixed(v.clone())), + PrimitiveLiteral::Date(v) => Some(Datum::date(v + 1)), + _ => None, + }, + PredicateOperator::Eq + | PredicateOperator::LessThanOrEq + | PredicateOperator::GreaterThanOrEq => match literal { + PrimitiveLiteral::Int(v) => Some(Datum::int(*v)), + PrimitiveLiteral::Long(v) => Some(Datum::long(*v)), + PrimitiveLiteral::Decimal(v) => Some(Datum::decimal(*v)?), + PrimitiveLiteral::Fixed(v) => Some(Datum::fixed(v.clone())), + PrimitiveLiteral::Date(v) => Some(Datum::date(*v)), + _ => None, + }, + PredicateOperator::StartsWith => match literal { + PrimitiveLiteral::Fixed(v) => Some(Datum::fixed(v.clone())), + _ => None, + }, + _ => None, + }; + + Ok(adj_datum) + } } impl Display for Transform { @@ -517,6 +560,8 @@ impl<'de> Deserialize<'de> for Transform { } } +impl Datum {} + #[cfg(test)] mod tests { use fnv::FnvHashSet; diff --git a/crates/iceberg/src/spec/values.rs b/crates/iceberg/src/spec/values.rs index 8775402ba..6f62f2902 100644 --- a/crates/iceberg/src/spec/values.rs +++ b/crates/iceberg/src/spec/values.rs @@ -34,7 +34,6 @@ use uuid::Uuid; pub use _serde::RawLiteral; use crate::error::Result; -use crate::expr::PredicateOperator; use crate::spec::values::date::{date_from_naive_date, days_to_date, unix_epoch}; use crate::spec::values::time::microseconds_to_time; use crate::spec::values::timestamp::microseconds_to_datetime; @@ -684,47 +683,6 @@ impl Datum { pub fn data_type(&self) -> &PrimitiveType { &self.r#type } - - /// Create a new `Datum` with adjusted boundary for projection - pub fn boundary(&self, op: &PredicateOperator) -> Result> { - let literal = self.literal(); - - let adj_datum = match op { - PredicateOperator::LessThan => match literal { - PrimitiveLiteral::Int(v) => Some(Datum::int(v - 1)), - PrimitiveLiteral::Long(v) => Some(Datum::long(v - 1)), - PrimitiveLiteral::Decimal(v) => Some(Datum::decimal(v - 1)?), - PrimitiveLiteral::Fixed(v) => Some(Datum::fixed(v.clone())), - PrimitiveLiteral::Date(v) => Some(Datum::date(v - 1)), - _ => None, - }, - PredicateOperator::GreaterThan => match literal { - PrimitiveLiteral::Int(v) => Some(Datum::int(v + 1)), - PrimitiveLiteral::Long(v) => Some(Datum::long(v + 1)), - PrimitiveLiteral::Decimal(v) => Some(Datum::decimal(v + 1)?), - PrimitiveLiteral::Fixed(v) => Some(Datum::fixed(v.clone())), - PrimitiveLiteral::Date(v) => Some(Datum::date(v + 1)), - _ => None, - }, - PredicateOperator::Eq - | PredicateOperator::LessThanOrEq - | PredicateOperator::GreaterThanOrEq => match literal { - PrimitiveLiteral::Int(v) => Some(Datum::int(*v)), - PrimitiveLiteral::Long(v) => Some(Datum::long(*v)), - PrimitiveLiteral::Decimal(v) => Some(Datum::decimal(*v)?), - PrimitiveLiteral::Fixed(v) => Some(Datum::fixed(v.clone())), - PrimitiveLiteral::Date(v) => Some(Datum::date(*v)), - _ => None, - }, - PredicateOperator::StartsWith => match literal { - PrimitiveLiteral::Fixed(v) => Some(Datum::fixed(v.clone())), - _ => None, - }, - _ => None, - }; - - Ok(adj_datum) - } } /// Values present in iceberg type From 9738416002646e0a916473f8edb0a72389ee22d5 Mon Sep 17 00:00:00 2001 From: marvinlanhenke Date: Thu, 28 Mar 2024 15:19:40 +0100 Subject: [PATCH 19/46] add check if transform can be applied to data_type --- crates/iceberg/src/spec/transform.rs | 23 ++++++++++++++++++++--- 1 file changed, 20 insertions(+), 3 deletions(-) diff --git a/crates/iceberg/src/spec/transform.rs b/crates/iceberg/src/spec/transform.rs index 3a575121d..56378f67a 100644 --- a/crates/iceberg/src/spec/transform.rs +++ b/crates/iceberg/src/spec/transform.rs @@ -292,7 +292,9 @@ impl Transform { Reference::new(name), ))), BoundPredicate::Binary(expr) => { - if expr.op() != PredicateOperator::Eq { + if expr.op() != PredicateOperator::Eq + || self.can_transform(expr.literal()).is_err() + { return Ok(None); } @@ -338,6 +340,10 @@ impl Transform { Reference::new(name), ))), BoundPredicate::Binary(expr) => { + if self.can_transform(expr.literal()).is_err() { + return Ok(None); + } + let op = expr.op(); let datum = expr.literal(); self.apply_transform_boundary(name, datum, &op, &func)? @@ -384,6 +390,14 @@ impl Transform { Ok(projection) } + /// Check if `Transform` is applicable on datum's `PrimitiveType` + fn can_transform(&self, datum: &Datum) -> Result<()> { + let input_type = datum.data_type().clone(); + self.result_type(&Type::Primitive(input_type))?; + + Ok(()) + } + /// Transform each literal value of `FnvHashSet` fn apply_transform_on_set( &self, @@ -392,8 +406,11 @@ impl Transform { ) -> Result> { literals .iter() - .map(|d| func.transform_literal_result(d)) - .collect() + .try_fold(FnvHashSet::default(), |mut acc, d| { + self.can_transform(d)?; + acc.insert(func.transform_literal_result(d)?); + Ok(acc) + }) } /// Apply truncate transform on `Datum` with new boundaries From 912b195644f8ec9f967917f4f1527fa54f76b087 Mon Sep 17 00:00:00 2001 From: marvinlanhenke Date: Thu, 28 Mar 2024 15:22:36 +0100 Subject: [PATCH 20/46] add check --- crates/iceberg/src/spec/transform.rs | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/crates/iceberg/src/spec/transform.rs b/crates/iceberg/src/spec/transform.rs index 56378f67a..ba54d45a9 100644 --- a/crates/iceberg/src/spec/transform.rs +++ b/crates/iceberg/src/spec/transform.rs @@ -367,6 +367,10 @@ impl Transform { Reference::new(name), ))), BoundPredicate::Binary(expr) => { + if self.can_transform(expr.literal()).is_err() { + return Ok(None); + } + let op = expr.op(); let datum = expr.literal(); self.apply_transform_boundary(name, datum, &op, &func)? From 22c3f9686fac70548924933d5058c9420504c414 Mon Sep 17 00:00:00 2001 From: marvinlanhenke Date: Thu, 28 Mar 2024 22:08:39 +0100 Subject: [PATCH 21/46] add: java-testsuite Transform::Bucket --- crates/iceberg/src/spec/transform.rs | 559 ++++++++++++++++++++++----- 1 file changed, 467 insertions(+), 92 deletions(-) diff --git a/crates/iceberg/src/spec/transform.rs b/crates/iceberg/src/spec/transform.rs index ba54d45a9..550d528df 100644 --- a/crates/iceberg/src/spec/transform.rs +++ b/crates/iceberg/src/spec/transform.rs @@ -585,12 +585,11 @@ impl Datum {} #[cfg(test)] mod tests { - use fnv::FnvHashSet; - use super::*; + use std::collections::HashSet; use std::sync::Arc; - use crate::expr::{BoundPredicate, BoundReference, PredicateOperator, UnaryExpression}; + use crate::expr::{BoundPredicate, BoundReference, PredicateOperator}; use crate::spec::datatypes::PrimitiveType::{ Binary, Date, Decimal, Fixed, Int, Long, String as StringType, Time, Timestamp, Timestamptz, Uuid, @@ -598,7 +597,7 @@ mod tests { use crate::spec::datatypes::Type::{Primitive, Struct}; use crate::spec::datatypes::{NestedField, StructType, Type}; use crate::spec::transform::Transform; - use crate::spec::{Datum, PrimitiveType}; + use crate::spec::{Datum, NestedFieldRef, PrimitiveType}; struct TestParameter { display: String, @@ -609,37 +608,62 @@ mod tests { trans_types: Vec<(Type, Option)>, } - struct TestPredicates { - unary: BoundPredicate, - binary: BoundPredicate, - set: BoundPredicate, + struct TestProjectionParameter { + transform: Transform, + name: String, + field: NestedFieldRef, } - impl TestPredicates { - fn new() -> Self { - let field = Arc::new(NestedField::required( - 1, - "a", - Type::Primitive(PrimitiveType::Int), - )); - - let unary = BoundPredicate::Unary(UnaryExpression::new( - PredicateOperator::IsNull, - BoundReference::new("original_name", field.clone()), - )); - let binary = BoundPredicate::Binary(BinaryExpression::new( - PredicateOperator::Eq, - BoundReference::new("original_name", field.clone()), - Datum::int(5), - )); - let set = BoundPredicate::Set(SetExpression::new( - PredicateOperator::In, - BoundReference::new("original_name", field.clone()), - FnvHashSet::from_iter([Datum::int(5), Datum::int(6)]), - )); + impl TestProjectionParameter { + fn new(transform: Transform, name: impl Into, field: NestedField) -> Self { + TestProjectionParameter { + transform, + name: name.into(), + field: Arc::new(field), + } + } + fn name(&self) -> String { + self.name.clone() + } + fn field(&self) -> NestedFieldRef { + self.field.clone() + } + fn project(&self, predicate: &BoundPredicate) -> Result> { + self.transform.project(self.name(), predicate) + } + fn _unary_predicate(&self, op: PredicateOperator) -> BoundPredicate { + BoundPredicate::Unary(UnaryExpression::new( + op, + BoundReference::new(self.name(), self.field()), + )) + } + fn binary_predicate(&self, op: PredicateOperator, literal: Datum) -> BoundPredicate { + BoundPredicate::Binary(BinaryExpression::new( + op, + BoundReference::new(self.name(), self.field()), + literal, + )) + } + fn set_predicate(&self, op: PredicateOperator, literals: Vec) -> BoundPredicate { + BoundPredicate::Set(SetExpression::new( + op, + BoundReference::new(self.name(), self.field()), + HashSet::from_iter(literals), + )) + } + } - TestPredicates { unary, binary, set } + fn assert_projection( + predicate: &BoundPredicate, + parameter: &TestProjectionParameter, + expected: Option<&str>, + ) -> Result<()> { + let result = parameter.project(predicate)?; + match expected { + Some(exp) => assert_eq!(format!("{}", result.unwrap()), exp), + None => assert!(result.is_none()), } + Ok(()) } fn check_transform(trans: Transform, param: TestParameter) { @@ -666,92 +690,443 @@ mod tests { } #[test] - fn test_projection_dates_year() -> Result<()> { - let name = "projected_name".to_string(); - - let field = Arc::new(NestedField::required( - 1, - "date", - Type::Primitive(PrimitiveType::Date), - )); - - let predicate = BoundPredicate::Binary(BinaryExpression::new( - PredicateOperator::LessThan, - BoundReference::new("date", field), - Datum::date_from_str("1971-01-01")?, - )); - - let transform = Transform::Year; - - let result = transform.project(name.clone(), &predicate)?.unwrap(); + fn test_projection_bucket_uuid() -> Result<()> { + let value = uuid::Uuid::from_u64_pair(123, 456); + let another_value = uuid::Uuid::from_u64_pair(456, 123); + let fixture = TestProjectionParameter::new( + Transform::Bucket(10), + "name", + NestedField::required(1, "value", Type::Primitive(PrimitiveType::Uuid)), + ); + + assert_projection( + &fixture.binary_predicate(PredicateOperator::Eq, Datum::uuid(value.clone())), + &fixture, + Some("name = 4"), + )?; + + assert_projection( + &fixture.binary_predicate(PredicateOperator::NotEq, Datum::uuid(value.clone())), + &fixture, + None, + )?; + + assert_projection( + &fixture.binary_predicate(PredicateOperator::LessThan, Datum::uuid(value.clone())), + &fixture, + None, + )?; + + assert_projection( + &fixture.binary_predicate(PredicateOperator::LessThanOrEq, Datum::uuid(value.clone())), + &fixture, + None, + )?; + + assert_projection( + &fixture.binary_predicate(PredicateOperator::GreaterThan, Datum::uuid(value.clone())), + &fixture, + None, + )?; + + assert_projection( + &fixture.binary_predicate( + PredicateOperator::GreaterThanOrEq, + Datum::uuid(value.clone()), + ), + &fixture, + None, + )?; - assert_eq!(format!("{}", result), "projected_name <= 0"); + assert_projection( + &fixture.set_predicate( + PredicateOperator::In, + vec![ + Datum::uuid(value.clone()), + Datum::uuid(another_value.clone()), + ], + ), + &fixture, + Some("name IN (4, 6)"), + )?; + + assert_projection( + &fixture.set_predicate( + PredicateOperator::NotIn, + vec![ + Datum::uuid(value.clone()), + Datum::uuid(another_value.clone()), + ], + ), + &fixture, + None, + )?; Ok(()) } #[test] - fn test_void_projection() -> Result<()> { - let name = "projected_name".to_string(); - let preds = TestPredicates::new(); + fn test_projection_bucket_fixed() -> Result<()> { + let value = "abcdefg".as_bytes().to_vec(); + let another_value = "abcdehij".as_bytes().to_vec(); + let fixture = TestProjectionParameter::new( + Transform::Bucket(10), + "name", + NestedField::required( + 1, + "value", + Type::Primitive(PrimitiveType::Fixed(value.len() as u64)), + ), + ); + + assert_projection( + &fixture.binary_predicate(PredicateOperator::Eq, Datum::fixed(value.clone())), + &fixture, + Some("name = 4"), + )?; + + assert_projection( + &fixture.binary_predicate(PredicateOperator::NotEq, Datum::fixed(value.clone())), + &fixture, + None, + )?; + + assert_projection( + &fixture.binary_predicate(PredicateOperator::LessThan, Datum::fixed(value.clone())), + &fixture, + None, + )?; + + assert_projection( + &fixture.binary_predicate(PredicateOperator::LessThanOrEq, Datum::fixed(value.clone())), + &fixture, + None, + )?; + + assert_projection( + &fixture.binary_predicate(PredicateOperator::GreaterThan, Datum::fixed(value.clone())), + &fixture, + None, + )?; + + assert_projection( + &fixture.binary_predicate( + PredicateOperator::GreaterThanOrEq, + Datum::fixed(value.clone()), + ), + &fixture, + None, + )?; - let transform = Transform::Void; - let result_unary = transform.project(name.clone(), &preds.unary)?; - assert!(result_unary.is_none()); + assert_projection( + &fixture.set_predicate( + PredicateOperator::In, + vec![ + Datum::fixed(value.clone()), + Datum::fixed(another_value.clone()), + ], + ), + &fixture, + Some("name IN (4, 6)"), + )?; + + assert_projection( + &fixture.set_predicate( + PredicateOperator::NotIn, + vec![ + Datum::fixed(value.clone()), + Datum::fixed(another_value.clone()), + ], + ), + &fixture, + None, + )?; Ok(()) } #[test] - fn test_truncate_project() -> Result<()> { - let name = "projected_name".to_string(); - let preds = TestPredicates::new(); - - let transform = Transform::Truncate(10); - - let result_unary = transform.project(name.clone(), &preds.unary)?.unwrap(); - let result_binary = transform.project(name.clone(), &preds.binary)?.unwrap(); - let result_set = transform.project(name.clone(), &preds.set)?.unwrap(); - - assert_eq!(format!("{}", result_unary), "projected_name IS NULL"); - assert_eq!(format!("{}", result_binary), "projected_name = 0"); - assert_eq!(format!("{}", result_set), "projected_name IN (0)"); + fn test_projection_bucket_string() -> Result<()> { + let fixture = TestProjectionParameter::new( + Transform::Bucket(10), + "name", + NestedField::required(1, "value", Type::Primitive(PrimitiveType::String)), + ); + + assert_projection( + &fixture.binary_predicate(PredicateOperator::Eq, Datum::string("abcdefg")), + &fixture, + Some("name = 4"), + )?; + + assert_projection( + &fixture.binary_predicate(PredicateOperator::NotEq, Datum::string("abcdefg")), + &fixture, + None, + )?; + + assert_projection( + &fixture.binary_predicate(PredicateOperator::LessThan, Datum::string("abcdefg")), + &fixture, + None, + )?; + + assert_projection( + &fixture.binary_predicate(PredicateOperator::LessThanOrEq, Datum::string("abcdefg")), + &fixture, + None, + )?; + + assert_projection( + &fixture.binary_predicate(PredicateOperator::GreaterThan, Datum::string("abcdefg")), + &fixture, + None, + )?; + + assert_projection( + &fixture.binary_predicate(PredicateOperator::GreaterThanOrEq, Datum::string("abcdefg")), + &fixture, + None, + )?; + + assert_projection( + &fixture.set_predicate( + PredicateOperator::In, + vec![Datum::string("abcdefg"), Datum::string("abcdefgabc")], + ), + &fixture, + Some("name IN (9, 4)"), + )?; + + assert_projection( + &fixture.set_predicate( + PredicateOperator::NotIn, + vec![Datum::string("abcdefg"), Datum::string("abcdefgabc")], + ), + &fixture, + None, + )?; Ok(()) } #[test] - fn test_identity_project() -> Result<()> { - let name = "projected_name".to_string(); - let preds = TestPredicates::new(); - - let transform = Transform::Identity; - - let result_unary = transform.project(name.clone(), &preds.unary)?.unwrap(); - let result_binary = transform.project(name.clone(), &preds.binary)?.unwrap(); - let result_set = transform.project(name.clone(), &preds.set)?.unwrap(); + fn test_projection_bucket_decimal() -> Result<()> { + let fixture = TestProjectionParameter::new( + Transform::Bucket(10), + "name", + NestedField::required( + 1, + "value", + Type::Primitive(PrimitiveType::Decimal { + precision: 9, + scale: 2, + }), + ), + ); + + assert_projection( + &fixture.binary_predicate(PredicateOperator::Eq, Datum::decimal_from_str("100.00")?), + &fixture, + Some("name = 2"), + )?; + + assert_projection( + &fixture.binary_predicate(PredicateOperator::NotEq, Datum::decimal_from_str("100.00")?), + &fixture, + None, + )?; + + assert_projection( + &fixture.binary_predicate( + PredicateOperator::LessThan, + Datum::decimal_from_str("100.00")?, + ), + &fixture, + None, + )?; + + assert_projection( + &fixture.binary_predicate( + PredicateOperator::LessThanOrEq, + Datum::decimal_from_str("100.00")?, + ), + &fixture, + None, + )?; + + assert_projection( + &fixture.binary_predicate( + PredicateOperator::GreaterThan, + Datum::decimal_from_str("100.00")?, + ), + &fixture, + None, + )?; + + assert_projection( + &fixture.binary_predicate( + PredicateOperator::GreaterThanOrEq, + Datum::decimal_from_str("100.00")?, + ), + &fixture, + None, + )?; - assert_eq!(format!("{}", result_unary), "projected_name IS NULL"); - assert_eq!(format!("{}", result_binary), "projected_name = 5"); - assert_eq!(format!("{}", result_set), "projected_name IN (5, 6)"); + assert_projection( + &fixture.set_predicate( + PredicateOperator::In, + vec![ + Datum::decimal_from_str("101.00")?, + Datum::decimal_from_str("100.00")?, + Datum::decimal_from_str("99.00")?, + ], + ), + &fixture, + Some("name IN (6, 2)"), + )?; + + assert_projection( + &fixture.set_predicate( + PredicateOperator::NotIn, + vec![ + Datum::decimal_from_str("100.00")?, + Datum::decimal_from_str("101.00")?, + ], + ), + &fixture, + None, + )?; Ok(()) } #[test] - fn test_bucket_project() -> Result<()> { - let name = "projected_name".to_string(); - let preds = TestPredicates::new(); - - let transform = Transform::Bucket(8); + fn test_projection_bucket_long() -> Result<()> { + let fixture = TestProjectionParameter::new( + Transform::Bucket(10), + "name", + NestedField::required(1, "value", Type::Primitive(PrimitiveType::Long)), + ); + + assert_projection( + &fixture.binary_predicate(PredicateOperator::Eq, Datum::long(100)), + &fixture, + Some("name = 6"), + )?; + + assert_projection( + &fixture.binary_predicate(PredicateOperator::NotEq, Datum::long(100)), + &fixture, + None, + )?; + + assert_projection( + &fixture.binary_predicate(PredicateOperator::LessThan, Datum::long(100)), + &fixture, + None, + )?; + + assert_projection( + &fixture.binary_predicate(PredicateOperator::LessThanOrEq, Datum::long(100)), + &fixture, + None, + )?; + + assert_projection( + &fixture.binary_predicate(PredicateOperator::GreaterThan, Datum::long(100)), + &fixture, + None, + )?; + + assert_projection( + &fixture.binary_predicate(PredicateOperator::GreaterThanOrEq, Datum::long(100)), + &fixture, + None, + )?; + + assert_projection( + &fixture.set_predicate( + PredicateOperator::In, + vec![Datum::long(99), Datum::long(100), Datum::long(101)], + ), + &fixture, + Some("name IN (8, 7, 6)"), + )?; + + assert_projection( + &fixture.set_predicate( + PredicateOperator::NotIn, + vec![Datum::long(100), Datum::long(101)], + ), + &fixture, + None, + )?; - let result_unary = transform.project(name.clone(), &preds.unary)?.unwrap(); - let result_binary = transform.project(name.clone(), &preds.binary)?.unwrap(); - let result_set = transform.project(name.clone(), &preds.set)?.unwrap(); + Ok(()) + } - assert_eq!(format!("{}", result_unary), "projected_name IS NULL"); - assert_eq!(format!("{}", result_binary), "projected_name = 7"); - assert_eq!(format!("{}", result_set), "projected_name IN (1, 7)"); + #[test] + fn test_projection_bucket_integer() -> Result<()> { + let fixture = TestProjectionParameter::new( + Transform::Bucket(10), + "name", + NestedField::required(1, "value", Type::Primitive(PrimitiveType::Int)), + ); + + assert_projection( + &fixture.binary_predicate(PredicateOperator::Eq, Datum::int(100)), + &fixture, + Some("name = 6"), + )?; + + assert_projection( + &fixture.binary_predicate(PredicateOperator::NotEq, Datum::int(100)), + &fixture, + None, + )?; + + assert_projection( + &fixture.binary_predicate(PredicateOperator::LessThan, Datum::int(100)), + &fixture, + None, + )?; + + assert_projection( + &fixture.binary_predicate(PredicateOperator::LessThanOrEq, Datum::int(100)), + &fixture, + None, + )?; + + assert_projection( + &fixture.binary_predicate(PredicateOperator::GreaterThan, Datum::int(100)), + &fixture, + None, + )?; + + assert_projection( + &fixture.binary_predicate(PredicateOperator::GreaterThanOrEq, Datum::int(100)), + &fixture, + None, + )?; + + assert_projection( + &fixture.set_predicate( + PredicateOperator::In, + vec![Datum::int(99), Datum::int(100), Datum::int(101)], + ), + &fixture, + Some("name IN (8, 7, 6)"), + )?; + + assert_projection( + &fixture.set_predicate( + PredicateOperator::NotIn, + vec![Datum::int(100), Datum::int(101)], + ), + &fixture, + None, + )?; Ok(()) } From 44065a72997d4dfc857d6e91f2a7a107d41c0dcb Mon Sep 17 00:00:00 2001 From: marvinlanhenke Date: Thu, 28 Mar 2024 22:10:25 +0100 Subject: [PATCH 22/46] fix: clippy --- crates/iceberg/src/spec/transform.rs | 25 ++++++++----------------- 1 file changed, 8 insertions(+), 17 deletions(-) diff --git a/crates/iceberg/src/spec/transform.rs b/crates/iceberg/src/spec/transform.rs index 550d528df..eb5565185 100644 --- a/crates/iceberg/src/spec/transform.rs +++ b/crates/iceberg/src/spec/transform.rs @@ -700,40 +700,37 @@ mod tests { ); assert_projection( - &fixture.binary_predicate(PredicateOperator::Eq, Datum::uuid(value.clone())), + &fixture.binary_predicate(PredicateOperator::Eq, Datum::uuid(value)), &fixture, Some("name = 4"), )?; assert_projection( - &fixture.binary_predicate(PredicateOperator::NotEq, Datum::uuid(value.clone())), + &fixture.binary_predicate(PredicateOperator::NotEq, Datum::uuid(value)), &fixture, None, )?; assert_projection( - &fixture.binary_predicate(PredicateOperator::LessThan, Datum::uuid(value.clone())), + &fixture.binary_predicate(PredicateOperator::LessThan, Datum::uuid(value)), &fixture, None, )?; assert_projection( - &fixture.binary_predicate(PredicateOperator::LessThanOrEq, Datum::uuid(value.clone())), + &fixture.binary_predicate(PredicateOperator::LessThanOrEq, Datum::uuid(value)), &fixture, None, )?; assert_projection( - &fixture.binary_predicate(PredicateOperator::GreaterThan, Datum::uuid(value.clone())), + &fixture.binary_predicate(PredicateOperator::GreaterThan, Datum::uuid(value)), &fixture, None, )?; assert_projection( - &fixture.binary_predicate( - PredicateOperator::GreaterThanOrEq, - Datum::uuid(value.clone()), - ), + &fixture.binary_predicate(PredicateOperator::GreaterThanOrEq, Datum::uuid(value)), &fixture, None, )?; @@ -741,10 +738,7 @@ mod tests { assert_projection( &fixture.set_predicate( PredicateOperator::In, - vec![ - Datum::uuid(value.clone()), - Datum::uuid(another_value.clone()), - ], + vec![Datum::uuid(value), Datum::uuid(another_value)], ), &fixture, Some("name IN (4, 6)"), @@ -753,10 +747,7 @@ mod tests { assert_projection( &fixture.set_predicate( PredicateOperator::NotIn, - vec![ - Datum::uuid(value.clone()), - Datum::uuid(another_value.clone()), - ], + vec![Datum::uuid(value), Datum::uuid(another_value)], ), &fixture, None, From 27d5df831007a721169c2673831b1c977d95b5da Mon Sep 17 00:00:00 2001 From: marvinlanhenke Date: Thu, 28 Mar 2024 22:33:12 +0100 Subject: [PATCH 23/46] add: timestamps to boundary --- crates/iceberg/src/spec/transform.rs | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/crates/iceberg/src/spec/transform.rs b/crates/iceberg/src/spec/transform.rs index eb5565185..9dc10d8ba 100644 --- a/crates/iceberg/src/spec/transform.rs +++ b/crates/iceberg/src/spec/transform.rs @@ -457,6 +457,9 @@ impl Transform { PrimitiveLiteral::Decimal(v) => Some(Datum::decimal(v - 1)?), PrimitiveLiteral::Fixed(v) => Some(Datum::fixed(v.clone())), PrimitiveLiteral::Date(v) => Some(Datum::date(v - 1)), + PrimitiveLiteral::Time(v) => Some(Datum::time_micros(v - 1)?), + PrimitiveLiteral::Timestamp(v) => Some(Datum::timestamp_micros(v - 1)), + PrimitiveLiteral::TimestampTZ(v) => Some(Datum::timestamptz_micros(v - 1)), _ => None, }, PredicateOperator::GreaterThan => match literal { @@ -465,6 +468,9 @@ impl Transform { PrimitiveLiteral::Decimal(v) => Some(Datum::decimal(v + 1)?), PrimitiveLiteral::Fixed(v) => Some(Datum::fixed(v.clone())), PrimitiveLiteral::Date(v) => Some(Datum::date(v + 1)), + PrimitiveLiteral::Time(v) => Some(Datum::time_micros(v + 1)?), + PrimitiveLiteral::Timestamp(v) => Some(Datum::timestamp_micros(v + 1)), + PrimitiveLiteral::TimestampTZ(v) => Some(Datum::timestamptz_micros(v + 1)), _ => None, }, PredicateOperator::Eq @@ -475,6 +481,9 @@ impl Transform { PrimitiveLiteral::Decimal(v) => Some(Datum::decimal(*v)?), PrimitiveLiteral::Fixed(v) => Some(Datum::fixed(v.clone())), PrimitiveLiteral::Date(v) => Some(Datum::date(*v)), + PrimitiveLiteral::Time(v) => Some(Datum::time_micros(*v)?), + PrimitiveLiteral::Timestamp(v) => Some(Datum::timestamp_micros(*v)), + PrimitiveLiteral::TimestampTZ(v) => Some(Datum::timestamptz_micros(*v)), _ => None, }, PredicateOperator::StartsWith => match literal { From 7bd5747c06bd3b8ee9a9a74e0100344339065b59 Mon Sep 17 00:00:00 2001 From: marvinlanhenke Date: Fri, 29 Mar 2024 05:35:58 +0100 Subject: [PATCH 24/46] change: return bool from can_transform --- crates/iceberg/src/spec/transform.rs | 56 +++++++++++++++++++++------- 1 file changed, 42 insertions(+), 14 deletions(-) diff --git a/crates/iceberg/src/spec/transform.rs b/crates/iceberg/src/spec/transform.rs index 9dc10d8ba..f7c169398 100644 --- a/crates/iceberg/src/spec/transform.rs +++ b/crates/iceberg/src/spec/transform.rs @@ -285,6 +285,7 @@ impl Transform { pub fn project(&self, name: String, predicate: &BoundPredicate) -> Result> { let func = create_transform_function(self)?; + // TODO: Refactor / flip match order / first predicate then self let projection = match self { Transform::Bucket(_) => match predicate { BoundPredicate::Unary(expr) => Some(Predicate::Unary(UnaryExpression::new( @@ -292,9 +293,7 @@ impl Transform { Reference::new(name), ))), BoundPredicate::Binary(expr) => { - if expr.op() != PredicateOperator::Eq - || self.can_transform(expr.literal()).is_err() - { + if expr.op() != PredicateOperator::Eq || !self.can_transform(expr.literal()) { return Ok(None); } @@ -340,7 +339,7 @@ impl Transform { Reference::new(name), ))), BoundPredicate::Binary(expr) => { - if self.can_transform(expr.literal()).is_err() { + if !self.can_transform(expr.literal()) { return Ok(None); } @@ -367,7 +366,34 @@ impl Transform { Reference::new(name), ))), BoundPredicate::Binary(expr) => { - if self.can_transform(expr.literal()).is_err() { + if !self.can_transform(expr.literal()) { + return Ok(None); + } + + let op = expr.op(); + let datum = expr.literal(); + self.apply_transform_boundary(name, datum, &op, &func)? + } + BoundPredicate::Set(expr) => { + if expr.op() != PredicateOperator::In { + return Ok(None); + } + + Some(Predicate::Set(SetExpression::new( + expr.op(), + Reference::new(name), + self.apply_transform_on_set(expr.literals(), &func)?, + ))) + } + _ => None, + }, + Transform::Hour => match predicate { + BoundPredicate::Unary(expr) => Some(Predicate::Unary(UnaryExpression::new( + expr.op(), + Reference::new(name), + ))), + BoundPredicate::Binary(expr) => { + if !self.can_transform(expr.literal()) { return Ok(None); } @@ -395,11 +421,13 @@ impl Transform { } /// Check if `Transform` is applicable on datum's `PrimitiveType` - fn can_transform(&self, datum: &Datum) -> Result<()> { + fn can_transform(&self, datum: &Datum) -> bool { let input_type = datum.data_type().clone(); - self.result_type(&Type::Primitive(input_type))?; - - Ok(()) + if self.result_type(&Type::Primitive(input_type)).is_err() { + false + } else { + true + } } /// Transform each literal value of `FnvHashSet` @@ -408,13 +436,13 @@ impl Transform { literals: &FnvHashSet, func: &BoxedTransformFunction, ) -> Result> { - literals - .iter() - .try_fold(FnvHashSet::default(), |mut acc, d| { - self.can_transform(d)?; + literals.iter().filter(|d| self.can_transform(d)).try_fold( + FnvHashSet::default(), + |mut acc, d| { acc.insert(func.transform_literal_result(d)?); Ok(acc) - }) + }, + ) } /// Apply truncate transform on `Datum` with new boundaries From 5c66f963642055c5934eaf87ae69905e10d8864e Mon Sep 17 00:00:00 2001 From: marvinlanhenke Date: Fri, 29 Mar 2024 05:36:50 +0100 Subject: [PATCH 25/46] fix: clippy --- crates/iceberg/src/spec/transform.rs | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/crates/iceberg/src/spec/transform.rs b/crates/iceberg/src/spec/transform.rs index f7c169398..bb3a6d2e1 100644 --- a/crates/iceberg/src/spec/transform.rs +++ b/crates/iceberg/src/spec/transform.rs @@ -423,11 +423,7 @@ impl Transform { /// Check if `Transform` is applicable on datum's `PrimitiveType` fn can_transform(&self, datum: &Datum) -> bool { let input_type = datum.data_type().clone(); - if self.result_type(&Type::Primitive(input_type)).is_err() { - false - } else { - true - } + self.result_type(&Type::Primitive(input_type)).is_err() } /// Transform each literal value of `FnvHashSet` From a8a7da608592a4d8c8544c990471cebb6177ecab Mon Sep 17 00:00:00 2001 From: marvinlanhenke Date: Fri, 29 Mar 2024 10:22:02 +0100 Subject: [PATCH 26/46] refactor: fn project match structure --- crates/iceberg/src/spec/transform.rs | 138 ++++++++------------------- 1 file changed, 38 insertions(+), 100 deletions(-) diff --git a/crates/iceberg/src/spec/transform.rs b/crates/iceberg/src/spec/transform.rs index bb3a6d2e1..07e10b185 100644 --- a/crates/iceberg/src/spec/transform.rs +++ b/crates/iceberg/src/spec/transform.rs @@ -285,124 +285,65 @@ impl Transform { pub fn project(&self, name: String, predicate: &BoundPredicate) -> Result> { let func = create_transform_function(self)?; - // TODO: Refactor / flip match order / first predicate then self - let projection = match self { - Transform::Bucket(_) => match predicate { - BoundPredicate::Unary(expr) => Some(Predicate::Unary(UnaryExpression::new( + let projection = match predicate { + BoundPredicate::Unary(expr) => match self { + Transform::Identity + | Transform::Bucket(_) + | Transform::Truncate(_) + | Transform::Year + | Transform::Month + | Transform::Day + | Transform::Hour => Some(Predicate::Unary(UnaryExpression::new( expr.op(), Reference::new(name), ))), - BoundPredicate::Binary(expr) => { - if expr.op() != PredicateOperator::Eq || !self.can_transform(expr.literal()) { - return Ok(None); - } - - Some(Predicate::Binary(BinaryExpression::new( - expr.op(), - Reference::new(name), - func.transform_literal_result(expr.literal())?, - ))) - } - BoundPredicate::Set(expr) => { - if expr.op() != PredicateOperator::In { - return Ok(None); - } - - Some(Predicate::Set(SetExpression::new( - expr.op(), - Reference::new(name), - self.apply_transform_on_set(expr.literals(), &func)?, - ))) - } _ => None, }, - Transform::Identity => match predicate { - BoundPredicate::Unary(expr) => Some(Predicate::Unary(UnaryExpression::new( - expr.op(), - Reference::new(name), - ))), - BoundPredicate::Binary(expr) => Some(Predicate::Binary(BinaryExpression::new( + BoundPredicate::Binary(expr) => match self { + Transform::Identity => Some(Predicate::Binary(BinaryExpression::new( expr.op(), Reference::new(name), expr.literal().to_owned(), ))), - BoundPredicate::Set(expr) => Some(Predicate::Set(SetExpression::new( - expr.op(), - Reference::new(name), - expr.literals().to_owned(), - ))), - _ => None, - }, - Transform::Truncate(_) => match predicate { - BoundPredicate::Unary(expr) => Some(Predicate::Unary(UnaryExpression::new( - expr.op(), - Reference::new(name), - ))), - BoundPredicate::Binary(expr) => { - if !self.can_transform(expr.literal()) { - return Ok(None); - } - - let op = expr.op(); - let datum = expr.literal(); - self.apply_transform_boundary(name, datum, &op, &func)? - } - BoundPredicate::Set(expr) => { - if expr.op() != PredicateOperator::In { + Transform::Bucket(_) => { + if expr.op() != PredicateOperator::Eq || !self.can_transform(expr.literal()) { return Ok(None); } - Some(Predicate::Set(SetExpression::new( + Some(Predicate::Binary(BinaryExpression::new( expr.op(), Reference::new(name), - self.apply_transform_on_set(expr.literals(), &func)?, + func.transform_literal_result(expr.literal())?, ))) } - _ => None, - }, - Transform::Year | Transform::Month | Transform::Day => match predicate { - BoundPredicate::Unary(expr) => Some(Predicate::Unary(UnaryExpression::new( - expr.op(), - Reference::new(name), - ))), - BoundPredicate::Binary(expr) => { + Transform::Truncate(_) + | Transform::Year + | Transform::Month + | Transform::Day + | Transform::Hour => { if !self.can_transform(expr.literal()) { return Ok(None); } - let op = expr.op(); - let datum = expr.literal(); - self.apply_transform_boundary(name, datum, &op, &func)? - } - BoundPredicate::Set(expr) => { - if expr.op() != PredicateOperator::In { - return Ok(None); - } - - Some(Predicate::Set(SetExpression::new( - expr.op(), - Reference::new(name), - self.apply_transform_on_set(expr.literals(), &func)?, - ))) + self.apply_transform_boundary(name, expr.literal(), &expr.op(), &func)? } _ => None, }, - Transform::Hour => match predicate { - BoundPredicate::Unary(expr) => Some(Predicate::Unary(UnaryExpression::new( + BoundPredicate::Set(expr) => match self { + Transform::Identity => Some(Predicate::Set(SetExpression::new( expr.op(), Reference::new(name), + expr.literals().to_owned(), ))), - BoundPredicate::Binary(expr) => { - if !self.can_transform(expr.literal()) { - return Ok(None); - } - - let op = expr.op(); - let datum = expr.literal(); - self.apply_transform_boundary(name, datum, &op, &func)? - } - BoundPredicate::Set(expr) => { - if expr.op() != PredicateOperator::In { + Transform::Bucket(_) + | Transform::Truncate(_) + | Transform::Year + | Transform::Month + | Transform::Day + | Transform::Hour => { + if expr.op() != PredicateOperator::In + || expr.literals().iter().any(|d| !self.can_transform(d)) + { return Ok(None); } @@ -423,7 +364,7 @@ impl Transform { /// Check if `Transform` is applicable on datum's `PrimitiveType` fn can_transform(&self, datum: &Datum) -> bool { let input_type = datum.data_type().clone(); - self.result_type(&Type::Primitive(input_type)).is_err() + self.result_type(&Type::Primitive(input_type)).is_ok() } /// Transform each literal value of `FnvHashSet` @@ -432,13 +373,10 @@ impl Transform { literals: &FnvHashSet, func: &BoxedTransformFunction, ) -> Result> { - literals.iter().filter(|d| self.can_transform(d)).try_fold( - FnvHashSet::default(), - |mut acc, d| { - acc.insert(func.transform_literal_result(d)?); - Ok(acc) - }, - ) + literals + .iter() + .map(|d| func.transform_literal_result(d)) + .collect() } /// Apply truncate transform on `Datum` with new boundaries From 35d2699faf8a5d9d1335bbee0aa06f7c98c7f647 Mon Sep 17 00:00:00 2001 From: marvinlanhenke Date: Fri, 29 Mar 2024 13:06:48 +0100 Subject: [PATCH 27/46] add: java-testsuite Transform::Truncate --- crates/iceberg/src/spec/transform.rs | 401 ++++++++++++++++++++++++++- 1 file changed, 393 insertions(+), 8 deletions(-) diff --git a/crates/iceberg/src/spec/transform.rs b/crates/iceberg/src/spec/transform.rs index 07e10b185..1e99ad058 100644 --- a/crates/iceberg/src/spec/transform.rs +++ b/crates/iceberg/src/spec/transform.rs @@ -316,16 +316,25 @@ impl Transform { func.transform_literal_result(expr.literal())?, ))) } - Transform::Truncate(_) - | Transform::Year - | Transform::Month - | Transform::Day - | Transform::Hour => { + Transform::Truncate(width) => { if !self.can_transform(expr.literal()) { return Ok(None); } - self.apply_transform_boundary(name, expr.literal(), &expr.op(), &func)? + self.apply_transform_boundary( + name, + expr.literal(), + &expr.op(), + &func, + Some(*width), + )? + } + Transform::Year | Transform::Month | Transform::Day | Transform::Hour => { + if !self.can_transform(expr.literal()) { + return Ok(None); + } + + self.apply_transform_boundary(name, expr.literal(), &expr.op(), &func, None)? } _ => None, }, @@ -387,15 +396,47 @@ impl Transform { datum: &Datum, op: &PredicateOperator, func: &BoxedTransformFunction, + width: Option, ) -> Result> { let boundary = self.datum_with_boundary(op, datum)?; match boundary { None => Ok(None), Some(boundary) => { + // TODO: extract into rewrite operator for projection let new_op = match op { PredicateOperator::LessThan => PredicateOperator::LessThanOrEq, PredicateOperator::GreaterThan => PredicateOperator::GreaterThanOrEq, + PredicateOperator::StartsWith => match datum.literal() { + PrimitiveLiteral::String(v) => match width { + Some(w) => { + if v.len() == w as usize { + PredicateOperator::Eq + } else { + *op + } + } + None => *op, + }, + _ => *op, + }, + PredicateOperator::NotStartsWith => match datum.literal() { + PrimitiveLiteral::String(v) => match width { + Some(w) => { + let w = w as usize; + if v.len() == w { + PredicateOperator::NotEq + } else if v.len() < w { + *op + } else { + // cannot be projected + return Ok(None); + } + } + None => *op, + }, + _ => *op, + }, _ => *op, }; @@ -412,12 +453,15 @@ impl Transform { fn datum_with_boundary(&self, op: &PredicateOperator, datum: &Datum) -> Result> { let literal = datum.literal(); + // TODO: check if all Literals are handled let adj_datum = match op { PredicateOperator::LessThan => match literal { PrimitiveLiteral::Int(v) => Some(Datum::int(v - 1)), PrimitiveLiteral::Long(v) => Some(Datum::long(v - 1)), PrimitiveLiteral::Decimal(v) => Some(Datum::decimal(v - 1)?), PrimitiveLiteral::Fixed(v) => Some(Datum::fixed(v.clone())), + PrimitiveLiteral::Binary(v) => Some(Datum::binary(v.clone())), + PrimitiveLiteral::String(v) => Some(Datum::string(v.clone())), PrimitiveLiteral::Date(v) => Some(Datum::date(v - 1)), PrimitiveLiteral::Time(v) => Some(Datum::time_micros(v - 1)?), PrimitiveLiteral::Timestamp(v) => Some(Datum::timestamp_micros(v - 1)), @@ -429,6 +473,8 @@ impl Transform { PrimitiveLiteral::Long(v) => Some(Datum::long(v + 1)), PrimitiveLiteral::Decimal(v) => Some(Datum::decimal(v + 1)?), PrimitiveLiteral::Fixed(v) => Some(Datum::fixed(v.clone())), + PrimitiveLiteral::Binary(v) => Some(Datum::binary(v.clone())), + PrimitiveLiteral::String(v) => Some(Datum::string(v.clone())), PrimitiveLiteral::Date(v) => Some(Datum::date(v + 1)), PrimitiveLiteral::Time(v) => Some(Datum::time_micros(v + 1)?), PrimitiveLiteral::Timestamp(v) => Some(Datum::timestamp_micros(v + 1)), @@ -442,6 +488,8 @@ impl Transform { PrimitiveLiteral::Long(v) => Some(Datum::long(*v)), PrimitiveLiteral::Decimal(v) => Some(Datum::decimal(*v)?), PrimitiveLiteral::Fixed(v) => Some(Datum::fixed(v.clone())), + PrimitiveLiteral::Binary(v) => Some(Datum::binary(v.clone())), + PrimitiveLiteral::String(v) => Some(Datum::string(v.clone())), PrimitiveLiteral::Date(v) => Some(Datum::date(*v)), PrimitiveLiteral::Time(v) => Some(Datum::time_micros(*v)?), PrimitiveLiteral::Timestamp(v) => Some(Datum::timestamp_micros(*v)), @@ -450,6 +498,12 @@ impl Transform { }, PredicateOperator::StartsWith => match literal { PrimitiveLiteral::Fixed(v) => Some(Datum::fixed(v.clone())), + PrimitiveLiteral::String(v) => Some(Datum::string(v.clone())), + PrimitiveLiteral::Binary(v) => Some(Datum::binary(v.clone())), + _ => None, + }, + PredicateOperator::NotStartsWith => match literal { + PrimitiveLiteral::String(v) => Some(Datum::string(v.clone())), _ => None, }, _ => None, @@ -626,10 +680,10 @@ mod tests { fn assert_projection( predicate: &BoundPredicate, - parameter: &TestProjectionParameter, + fixture: &TestProjectionParameter, expected: Option<&str>, ) -> Result<()> { - let result = parameter.project(predicate)?; + let result = fixture.project(predicate)?; match expected { Some(exp) => assert_eq!(format!("{}", result.unwrap()), exp), None => assert!(result.is_none()), @@ -660,6 +714,337 @@ mod tests { } } + #[test] + fn test_projection_truncate_string_rewrite_op() -> Result<()> { + let fixture = TestProjectionParameter::new( + Transform::Truncate(5), + "name", + NestedField::required(1, "value", Type::Primitive(PrimitiveType::String)), + ); + + let value = "abcde"; + assert_projection( + &fixture.binary_predicate(PredicateOperator::StartsWith, Datum::string(value)), + &fixture, + Some(r#"name = "abcde""#), + )?; + + assert_projection( + &fixture.binary_predicate(PredicateOperator::NotStartsWith, Datum::string(value)), + &fixture, + Some(r#"name != "abcde""#), + )?; + + let value = "abcdefg"; + assert_projection( + &fixture.binary_predicate(PredicateOperator::StartsWith, Datum::string(value)), + &fixture, + Some(r#"name STARTS WITH "abcde""#), + )?; + + assert_projection( + &fixture.binary_predicate(PredicateOperator::NotStartsWith, Datum::string(value)), + &fixture, + None, + )?; + + Ok(()) + } + + #[test] + fn test_projection_truncate_string() -> Result<()> { + let value = "abcdefg"; + let fixture = TestProjectionParameter::new( + Transform::Truncate(5), + "name", + NestedField::required(1, "value", Type::Primitive(PrimitiveType::String)), + ); + + assert_projection( + &fixture.binary_predicate(PredicateOperator::LessThan, Datum::string(value)), + &fixture, + Some(r#"name <= "abcde""#), + )?; + + assert_projection( + &fixture.binary_predicate(PredicateOperator::LessThanOrEq, Datum::string(value)), + &fixture, + Some(r#"name <= "abcde""#), + )?; + + assert_projection( + &fixture.binary_predicate(PredicateOperator::GreaterThan, Datum::string(value)), + &fixture, + Some(r#"name >= "abcde""#), + )?; + + assert_projection( + &fixture.binary_predicate(PredicateOperator::GreaterThanOrEq, Datum::string(value)), + &fixture, + Some(r#"name >= "abcde""#), + )?; + + assert_projection( + &fixture.binary_predicate(PredicateOperator::Eq, Datum::string(value)), + &fixture, + Some(r#"name = "abcde""#), + )?; + + assert_projection( + &fixture.set_predicate( + PredicateOperator::In, + vec![Datum::string(value), Datum::string(format!("{}abc", value))], + ), + &fixture, + Some(r#"name IN ("abcde")"#), + )?; + + assert_projection( + &fixture.set_predicate( + PredicateOperator::NotIn, + vec![Datum::string(value), Datum::string(format!("{}abc", value))], + ), + &fixture, + None, + )?; + + Ok(()) + } + + #[test] + fn test_projection_truncate_decimal() -> Result<()> { + // test lower and upper bound + for &value in [100.00, 99.99].iter() { + // format as i128 unscaled + let result = if value == 100.00 { "10000" } else { "9990" }; + let value_str = format!("{:.2}", value); + + let fixture = TestProjectionParameter::new( + Transform::Truncate(10), + "name", + NestedField::required( + 1, + "value", + Type::Primitive(PrimitiveType::Decimal { + precision: 9, + scale: 2, + }), + ), + ); + + assert_projection( + &fixture.binary_predicate( + PredicateOperator::LessThan, + Datum::decimal_from_str(&value_str)?, + ), + &fixture, + Some("name <= 9990"), + )?; + + assert_projection( + &fixture.binary_predicate( + PredicateOperator::LessThanOrEq, + Datum::decimal_from_str(&value_str)?, + ), + &fixture, + Some(format!("name <= {}", result).as_str()), + )?; + + assert_projection( + &fixture.binary_predicate( + PredicateOperator::GreaterThanOrEq, + Datum::decimal_from_str(&value_str)?, + ), + &fixture, + Some(format!("name >= {}", result).as_str()), + )?; + + assert_projection( + &fixture + .binary_predicate(PredicateOperator::Eq, Datum::decimal_from_str(&value_str)?), + &fixture, + Some(format!("name = {}", result).as_str()), + )?; + + assert_projection( + &fixture.binary_predicate( + PredicateOperator::NotEq, + Datum::decimal_from_str(&value_str)?, + ), + &fixture, + None, + )?; + + let set_result = if value == 100.00 { + "name IN (9900, 10000, 10100)" + } else { + "name IN (10090, 9990, 9890)" + }; + assert_projection( + &fixture.set_predicate( + PredicateOperator::In, + vec![ + Datum::decimal_from_str(format!("{:.2}", value - 1.0))?, + Datum::decimal_from_str(&value_str)?, + Datum::decimal_from_str(format!("{:.2}", value + 1.0))?, + ], + ), + &fixture, + Some(set_result), + )?; + + assert_projection( + &fixture.set_predicate( + PredicateOperator::NotIn, + vec![ + Datum::decimal_from_str(&value_str)?, + Datum::decimal_from_str(format!("{:.2}", value + 1.0))?, + ], + ), + &fixture, + None, + )?; + } + + Ok(()) + } + + #[test] + fn test_projection_truncate_long() -> Result<()> { + // test lower and upper bound + for &value in [100i64, 99i64].iter() { + let result = if value == 100 { "100" } else { "90" }; + + let fixture = TestProjectionParameter::new( + Transform::Truncate(10), + "name", + NestedField::required(1, "value", Type::Primitive(PrimitiveType::Long)), + ); + + assert_projection( + &fixture.binary_predicate(PredicateOperator::LessThan, Datum::long(value)), + &fixture, + Some("name <= 90"), + )?; + + assert_projection( + &fixture.binary_predicate(PredicateOperator::LessThanOrEq, Datum::long(value)), + &fixture, + Some(format!("name <= {}", result).as_str()), + )?; + + assert_projection( + &fixture.binary_predicate(PredicateOperator::GreaterThanOrEq, Datum::long(value)), + &fixture, + Some(format!("name >= {}", result).as_str()), + )?; + + assert_projection( + &fixture.binary_predicate(PredicateOperator::Eq, Datum::long(value)), + &fixture, + Some(format!("name = {}", result).as_str()), + )?; + + assert_projection( + &fixture.binary_predicate(PredicateOperator::NotEq, Datum::long(value)), + &fixture, + None, + )?; + + assert_projection( + &fixture.set_predicate( + PredicateOperator::In, + vec![ + Datum::long(value - 1), + Datum::long(value), + Datum::long(value + 1), + ], + ), + &fixture, + Some("name IN (100, 90)"), + )?; + + assert_projection( + &fixture.set_predicate( + PredicateOperator::NotIn, + vec![Datum::long(value), Datum::long(value + 1)], + ), + &fixture, + None, + )?; + } + + Ok(()) + } + + #[test] + fn test_projection_truncate_integer() -> Result<()> { + // test lower and upper bound + for &value in [100, 99].iter() { + let result = if value == 100 { "100" } else { "90" }; + + let fixture = TestProjectionParameter::new( + Transform::Truncate(10), + "name", + NestedField::required(1, "value", Type::Primitive(PrimitiveType::Int)), + ); + + assert_projection( + &fixture.binary_predicate(PredicateOperator::LessThan, Datum::int(value)), + &fixture, + Some("name <= 90"), + )?; + + assert_projection( + &fixture.binary_predicate(PredicateOperator::LessThanOrEq, Datum::int(value)), + &fixture, + Some(format!("name <= {}", result).as_str()), + )?; + + assert_projection( + &fixture.binary_predicate(PredicateOperator::GreaterThanOrEq, Datum::int(value)), + &fixture, + Some(format!("name >= {}", result).as_str()), + )?; + + assert_projection( + &fixture.binary_predicate(PredicateOperator::Eq, Datum::int(value)), + &fixture, + Some(format!("name = {}", result).as_str()), + )?; + + assert_projection( + &fixture.binary_predicate(PredicateOperator::NotEq, Datum::int(value)), + &fixture, + None, + )?; + + assert_projection( + &fixture.set_predicate( + PredicateOperator::In, + vec![ + Datum::int(value - 1), + Datum::int(value), + Datum::int(value + 1), + ], + ), + &fixture, + Some("name IN (100, 90)"), + )?; + + assert_projection( + &fixture.set_predicate( + PredicateOperator::NotIn, + vec![Datum::int(value), Datum::int(value + 1)], + ), + &fixture, + None, + )?; + } + + Ok(()) + } + #[test] fn test_projection_bucket_uuid() -> Result<()> { let value = uuid::Uuid::from_u64_pair(123, 456); From 91cb1d479bfcaf6d1f4bfba8cfe54a8958384c12 Mon Sep 17 00:00:00 2001 From: marvinlanhenke Date: Sat, 30 Mar 2024 06:10:41 +0100 Subject: [PATCH 28/46] add: java-testsuite Transform::Dates + refactor --- crates/iceberg/src/spec/transform.rs | 1046 ++++++++++++++++++++++++-- 1 file changed, 981 insertions(+), 65 deletions(-) diff --git a/crates/iceberg/src/spec/transform.rs b/crates/iceberg/src/spec/transform.rs index 1e99ad058..4109f5975 100644 --- a/crates/iceberg/src/spec/transform.rs +++ b/crates/iceberg/src/spec/transform.rs @@ -321,7 +321,7 @@ impl Transform { return Ok(None); } - self.apply_transform_boundary( + self.transform_projected_boundary( name, expr.literal(), &expr.op(), @@ -334,7 +334,13 @@ impl Transform { return Ok(None); } - self.apply_transform_boundary(name, expr.literal(), &expr.op(), &func, None)? + self.transform_projected_boundary( + name, + expr.literal(), + &expr.op(), + &func, + None, + )? } _ => None, }, @@ -359,7 +365,7 @@ impl Transform { Some(Predicate::Set(SetExpression::new( expr.op(), Reference::new(name), - self.apply_transform_on_set(expr.literals(), &func)?, + self.transform_set(expr.literals(), &func)?, ))) } _ => None, @@ -377,20 +383,32 @@ impl Transform { } /// Transform each literal value of `FnvHashSet` - fn apply_transform_on_set( + fn transform_set( &self, literals: &FnvHashSet, func: &BoxedTransformFunction, ) -> Result> { - literals - .iter() - .map(|d| func.transform_literal_result(d)) - .collect() + let mut new_set = FnvHashSet::default(); + + for lit in literals { + let datum = func.transform_literal_result(lit)?; + + if let Some(AdjustedProjection::Single(d)) = + self.adjust_projection(&PredicateOperator::In, &datum) + { + new_set.insert(d); + }; + + new_set.insert(datum); + } + + Ok(new_set) } - /// Apply truncate transform on `Datum` with new boundaries - /// and adjusted `PredicateOperator` - fn apply_transform_boundary( + /// Apply transform on `Datum` with adjusted boundaries. + /// Returns Predicate with projection and possibly + /// rewritten `PredicateOperator` + fn transform_projected_boundary( &self, name: String, datum: &Datum, @@ -398,63 +416,50 @@ impl Transform { func: &BoxedTransformFunction, width: Option, ) -> Result> { - let boundary = self.datum_with_boundary(op, datum)?; - - match boundary { - None => Ok(None), - Some(boundary) => { - // TODO: extract into rewrite operator for projection - let new_op = match op { - PredicateOperator::LessThan => PredicateOperator::LessThanOrEq, - PredicateOperator::GreaterThan => PredicateOperator::GreaterThanOrEq, - PredicateOperator::StartsWith => match datum.literal() { - PrimitiveLiteral::String(v) => match width { - Some(w) => { - if v.len() == w as usize { - PredicateOperator::Eq - } else { - *op - } - } - None => *op, - }, - _ => *op, - }, - PredicateOperator::NotStartsWith => match datum.literal() { - PrimitiveLiteral::String(v) => match width { - Some(w) => { - let w = w as usize; - if v.len() == w { - PredicateOperator::NotEq - } else if v.len() < w { - *op - } else { - // cannot be projected - return Ok(None); - } - } - None => *op, - }, - _ => *op, - }, - _ => *op, - }; - - Ok(Some(Predicate::Binary(BinaryExpression::new( - new_op, - Reference::new(name), - func.transform_literal_result(&boundary)?, - )))) + if let Some(boundary) = self.projected_boundary(op, datum)? { + let tran_datum = func.transform_literal_result(&boundary)?; + let adj_datum = self.adjust_projection(op, &tran_datum); + + let proj_op = self.projected_operator(op, datum, width); + + if let Some(op) = proj_op { + match adj_datum { + None => { + return Ok(Some(Predicate::Binary(BinaryExpression::new( + op, + Reference::new(name), + tran_datum, + )))); + } + Some(AdjustedProjection::Single(d)) => { + return Ok(Some(Predicate::Binary(BinaryExpression::new( + op, + Reference::new(name), + d, + )))); + } + Some(AdjustedProjection::Set(d)) => { + return Ok(Some(Predicate::Set(SetExpression::new( + PredicateOperator::In, + Reference::new(name), + d, + )))); + } + } } - } + }; + + Ok(None) } - /// Create a new `Datum` with adjusted boundary for projection - fn datum_with_boundary(&self, op: &PredicateOperator, datum: &Datum) -> Result> { + /// Create a new `Datum` with adjusted projection boundary. + /// Returns `None` if `PredicateOperator` and `PrimitiveLiteral` + /// can not be projected + fn projected_boundary(&self, op: &PredicateOperator, datum: &Datum) -> Result> { let literal = datum.literal(); - // TODO: check if all Literals are handled - let adj_datum = match op { + // TODO: verify all ops and literals are handled + let projected_boundary = match op { PredicateOperator::LessThan => match literal { PrimitiveLiteral::Int(v) => Some(Datum::int(v - 1)), PrimitiveLiteral::Long(v) => Some(Datum::long(v - 1)), @@ -509,7 +514,84 @@ impl Transform { _ => None, }; - Ok(adj_datum) + Ok(projected_boundary) + } + + /// Create a new `PredicateOperator`, rewritten for projection + fn projected_operator( + &self, + op: &PredicateOperator, + datum: &Datum, + width: Option, + ) -> Option { + match op { + PredicateOperator::LessThan => Some(PredicateOperator::LessThanOrEq), + PredicateOperator::GreaterThan => Some(PredicateOperator::GreaterThanOrEq), + PredicateOperator::StartsWith => match datum.literal() { + PrimitiveLiteral::String(s) => { + if let Some(w) = width { + if s.len() == w as usize { + return Some(PredicateOperator::Eq); + }; + }; + Some(*op) + } + _ => Some(*op), + }, + PredicateOperator::NotStartsWith => match datum.literal() { + PrimitiveLiteral::String(s) => { + if let Some(w) = width { + let w = w as usize; + + if s.len() == w { + return Some(PredicateOperator::NotEq); + } + + if s.len() < w { + return Some(*op); + } + + return None; + }; + Some(*op) + } + _ => Some(*op), + }, + _ => Some(*op), + } + } + + /// Adjust time projection + ///https://github.com/apache/iceberg/blob/main/api/src/main/java/org/apache/iceberg/transforms/ProjectionUtil.java#L275 + fn adjust_projection( + &self, + op: &PredicateOperator, + datum: &Datum, + ) -> Option { + if let Transform::Year | Transform::Month = self { + if let &PrimitiveLiteral::Int(v) = datum.literal() { + match op { + PredicateOperator::LessThan + | PredicateOperator::LessThanOrEq + | PredicateOperator::In => { + if v < 0 { + return Some(AdjustedProjection::Single(Datum::int(v + 1))); + }; + } + PredicateOperator::Eq => { + if v < 0 { + let new_set = + FnvHashSet::from_iter(vec![datum.to_owned(), Datum::int(v + 1)]); + return Some(AdjustedProjection::Set(new_set)); + } + } + _ => { + return None; + } + } + } + }; + None } } @@ -606,7 +688,11 @@ impl<'de> Deserialize<'de> for Transform { } } -impl Datum {} +#[derive(Debug)] +enum AdjustedProjection { + Single(Datum), + Set(FnvHashSet), +} #[cfg(test)] mod tests { @@ -714,6 +800,836 @@ mod tests { } } + #[test] + fn test_projection_date_day_negative() -> Result<()> { + let value = "1969-12-30"; + let another = "1969-12-28"; + + let fixture = TestProjectionParameter::new( + Transform::Day, + "name", + NestedField::required(1, "value", Type::Primitive(PrimitiveType::Date)), + ); + + assert_projection( + &fixture.binary_predicate(PredicateOperator::LessThan, Datum::date_from_str(value)?), + &fixture, + Some("name <= -3"), + )?; + + assert_projection( + &fixture.binary_predicate( + PredicateOperator::LessThanOrEq, + Datum::date_from_str(value)?, + ), + &fixture, + Some("name <= -2"), + )?; + + assert_projection( + &fixture.binary_predicate(PredicateOperator::GreaterThan, Datum::date_from_str(value)?), + &fixture, + Some("name >= -1"), + )?; + + assert_projection( + &fixture.binary_predicate( + PredicateOperator::GreaterThanOrEq, + Datum::date_from_str(value)?, + ), + &fixture, + Some("name >= -2"), + )?; + + assert_projection( + &fixture.binary_predicate(PredicateOperator::Eq, Datum::date_from_str(value)?), + &fixture, + Some("name = -2"), + )?; + + assert_projection( + &fixture.binary_predicate(PredicateOperator::NotEq, Datum::date_from_str(value)?), + &fixture, + None, + )?; + + assert_projection( + &fixture.set_predicate( + PredicateOperator::In, + vec![Datum::date_from_str(value)?, Datum::date_from_str(another)?], + ), + &fixture, + Some("name IN (-2, -4)"), + )?; + + assert_projection( + &fixture.set_predicate( + PredicateOperator::NotIn, + vec![Datum::date_from_str(value)?, Datum::date_from_str(another)?], + ), + &fixture, + None, + )?; + + Ok(()) + } + + #[test] + fn test_projection_date_day() -> Result<()> { + let value = "2017-01-01"; + let another = "2017-12-31"; + + let fixture = TestProjectionParameter::new( + Transform::Day, + "name", + NestedField::required(1, "value", Type::Primitive(PrimitiveType::Date)), + ); + + assert_projection( + &fixture.binary_predicate(PredicateOperator::LessThan, Datum::date_from_str(value)?), + &fixture, + Some("name <= 17166"), + )?; + + assert_projection( + &fixture.binary_predicate( + PredicateOperator::LessThanOrEq, + Datum::date_from_str(value)?, + ), + &fixture, + Some("name <= 17167"), + )?; + + assert_projection( + &fixture.binary_predicate(PredicateOperator::GreaterThan, Datum::date_from_str(value)?), + &fixture, + Some("name >= 17168"), + )?; + + assert_projection( + &fixture.binary_predicate( + PredicateOperator::GreaterThanOrEq, + Datum::date_from_str(value)?, + ), + &fixture, + Some("name >= 17167"), + )?; + + assert_projection( + &fixture.binary_predicate(PredicateOperator::Eq, Datum::date_from_str(value)?), + &fixture, + Some("name = 17167"), + )?; + + assert_projection( + &fixture.binary_predicate(PredicateOperator::NotEq, Datum::date_from_str(value)?), + &fixture, + None, + )?; + + assert_projection( + &fixture.set_predicate( + PredicateOperator::In, + vec![Datum::date_from_str(value)?, Datum::date_from_str(another)?], + ), + &fixture, + Some("name IN (17531, 17167)"), + )?; + + assert_projection( + &fixture.set_predicate( + PredicateOperator::NotIn, + vec![Datum::date_from_str(value)?, Datum::date_from_str(another)?], + ), + &fixture, + None, + )?; + + Ok(()) + } + + #[test] + fn test_projection_date_month_negative_upper_bound() -> Result<()> { + let value = "1969-12-31"; + let another = "1969-01-01"; + + let fixture = TestProjectionParameter::new( + Transform::Month, + "name", + NestedField::required(1, "value", Type::Primitive(PrimitiveType::Date)), + ); + + // 0 = number of months -> "1970-01" + assert_projection( + &fixture.binary_predicate(PredicateOperator::LessThan, Datum::date_from_str(value)?), + &fixture, + Some("name <= 0"), + )?; + + assert_projection( + &fixture.binary_predicate( + PredicateOperator::LessThanOrEq, + Datum::date_from_str(value)?, + ), + &fixture, + Some("name <= 0"), + )?; + + assert_projection( + &fixture.binary_predicate(PredicateOperator::GreaterThan, Datum::date_from_str(value)?), + &fixture, + Some("name >= 0"), + )?; + + // -1 = number of months -> "1969-12" + assert_projection( + &fixture.binary_predicate( + PredicateOperator::GreaterThanOrEq, + Datum::date_from_str(value)?, + ), + &fixture, + Some("name >= -1"), + )?; + + assert_projection( + &fixture.binary_predicate(PredicateOperator::Eq, Datum::date_from_str(value)?), + &fixture, + Some("name IN (-1, 0)"), + )?; + + assert_projection( + &fixture.binary_predicate(PredicateOperator::NotEq, Datum::date_from_str(value)?), + &fixture, + None, + )?; + + assert_projection( + &fixture.set_predicate( + PredicateOperator::In, + vec![Datum::date_from_str(value)?, Datum::date_from_str(another)?], + ), + &fixture, + Some("name IN (-1, -12, -11, 0)"), + )?; + + assert_projection( + &fixture.set_predicate( + PredicateOperator::NotIn, + vec![Datum::date_from_str(value)?, Datum::date_from_str(another)?], + ), + &fixture, + None, + )?; + + Ok(()) + } + + #[test] + fn test_projection_date_month_upper_bound() -> Result<()> { + let value = "2017-12-31"; + let another = "2017-01-01"; + + let fixture = TestProjectionParameter::new( + Transform::Month, + "name", + NestedField::required(1, "value", Type::Primitive(PrimitiveType::Date)), + ); + + // 574 = number of months -> "2017-11-01" + assert_projection( + &fixture.binary_predicate(PredicateOperator::LessThan, Datum::date_from_str(value)?), + &fixture, + Some("name <= 575"), + )?; + + // 575 = number of months -> "2017-12-01" + assert_projection( + &fixture.binary_predicate( + PredicateOperator::LessThanOrEq, + Datum::date_from_str(value)?, + ), + &fixture, + Some("name <= 575"), + )?; + + // 576 = number of months -> "2018-01-01" + assert_projection( + &fixture.binary_predicate(PredicateOperator::GreaterThan, Datum::date_from_str(value)?), + &fixture, + Some("name >= 576"), + )?; + + assert_projection( + &fixture.binary_predicate( + PredicateOperator::GreaterThanOrEq, + Datum::date_from_str(value)?, + ), + &fixture, + Some("name >= 575"), + )?; + + assert_projection( + &fixture.binary_predicate(PredicateOperator::Eq, Datum::date_from_str(value)?), + &fixture, + Some("name = 575"), + )?; + + assert_projection( + &fixture.binary_predicate(PredicateOperator::NotEq, Datum::date_from_str(value)?), + &fixture, + None, + )?; + + // 564 = number of months -> "2017-01-01" + assert_projection( + &fixture.set_predicate( + PredicateOperator::In, + vec![Datum::date_from_str(value)?, Datum::date_from_str(another)?], + ), + &fixture, + Some("name IN (575, 564)"), + )?; + + assert_projection( + &fixture.set_predicate( + PredicateOperator::NotIn, + vec![Datum::date_from_str(value)?, Datum::date_from_str(another)?], + ), + &fixture, + None, + )?; + + Ok(()) + } + + #[test] + fn test_projection_date_month_negative_lower_bound() -> Result<()> { + let value = "1969-01-01"; + let another = "1969-12-31"; + + let fixture = TestProjectionParameter::new( + Transform::Month, + "name", + NestedField::required(1, "value", Type::Primitive(PrimitiveType::Date)), + ); + + // -12 = number of months -> "1969-01-01" + assert_projection( + &fixture.binary_predicate(PredicateOperator::LessThan, Datum::date_from_str(value)?), + &fixture, + Some("name <= -12"), + )?; + + // -11 = number of months -> "1969-02-01" + assert_projection( + &fixture.binary_predicate( + PredicateOperator::LessThanOrEq, + Datum::date_from_str(value)?, + ), + &fixture, + Some("name <= -11"), + )?; + + assert_projection( + &fixture.binary_predicate(PredicateOperator::GreaterThan, Datum::date_from_str(value)?), + &fixture, + Some("name >= -12"), + )?; + + assert_projection( + &fixture.binary_predicate( + PredicateOperator::GreaterThanOrEq, + Datum::date_from_str(value)?, + ), + &fixture, + Some("name >= -12"), + )?; + + assert_projection( + &fixture.binary_predicate(PredicateOperator::Eq, Datum::date_from_str(value)?), + &fixture, + Some("name IN (-12, -11)"), + )?; + + assert_projection( + &fixture.binary_predicate(PredicateOperator::NotEq, Datum::date_from_str(value)?), + &fixture, + None, + )?; + + assert_projection( + &fixture.set_predicate( + PredicateOperator::In, + vec![Datum::date_from_str(value)?, Datum::date_from_str(another)?], + ), + &fixture, + Some("name IN (-1, -12, -11, 0)"), + )?; + + assert_projection( + &fixture.set_predicate( + PredicateOperator::NotIn, + vec![Datum::date_from_str(value)?, Datum::date_from_str(another)?], + ), + &fixture, + None, + )?; + + Ok(()) + } + + #[test] + fn test_projection_date_month_lower_bound() -> Result<()> { + let value = "2017-12-01"; + let another = "2017-01-01"; + + let fixture = TestProjectionParameter::new( + Transform::Month, + "name", + NestedField::required(1, "value", Type::Primitive(PrimitiveType::Date)), + ); + + // 574 = number of months -> "2017-11-01" + assert_projection( + &fixture.binary_predicate(PredicateOperator::LessThan, Datum::date_from_str(value)?), + &fixture, + Some("name <= 574"), + )?; + + // 575 = number of months -> "2017-12-01" + assert_projection( + &fixture.binary_predicate( + PredicateOperator::LessThanOrEq, + Datum::date_from_str(value)?, + ), + &fixture, + Some("name <= 575"), + )?; + + assert_projection( + &fixture.binary_predicate(PredicateOperator::GreaterThan, Datum::date_from_str(value)?), + &fixture, + Some("name >= 575"), + )?; + + assert_projection( + &fixture.binary_predicate( + PredicateOperator::GreaterThanOrEq, + Datum::date_from_str(value)?, + ), + &fixture, + Some("name >= 575"), + )?; + + assert_projection( + &fixture.binary_predicate(PredicateOperator::Eq, Datum::date_from_str(value)?), + &fixture, + Some("name = 575"), + )?; + + assert_projection( + &fixture.binary_predicate(PredicateOperator::NotEq, Datum::date_from_str(value)?), + &fixture, + None, + )?; + + // 564 = number of months -> "2017-01-01" + assert_projection( + &fixture.set_predicate( + PredicateOperator::In, + vec![Datum::date_from_str(value)?, Datum::date_from_str(another)?], + ), + &fixture, + Some("name IN (575, 564)"), + )?; + + assert_projection( + &fixture.set_predicate( + PredicateOperator::NotIn, + vec![Datum::date_from_str(value)?, Datum::date_from_str(another)?], + ), + &fixture, + None, + )?; + + Ok(()) + } + + #[test] + fn test_projection_date_month_epoch() -> Result<()> { + let value = "1970-01-01"; + let another = "1969-12-31"; + + let fixture = TestProjectionParameter::new( + Transform::Month, + "name", + NestedField::required(1, "value", Type::Primitive(PrimitiveType::Date)), + ); + + // TODO: still need fixInclusiveTimeProjection?? + assert_projection( + &fixture.binary_predicate(PredicateOperator::LessThan, Datum::date_from_str(value)?), + &fixture, + Some("name <= 0"), + )?; + + assert_projection( + &fixture.binary_predicate( + PredicateOperator::LessThanOrEq, + Datum::date_from_str(value)?, + ), + &fixture, + Some("name <= 0"), + )?; + + assert_projection( + &fixture.binary_predicate(PredicateOperator::GreaterThan, Datum::date_from_str(value)?), + &fixture, + Some("name >= 0"), + )?; + + assert_projection( + &fixture.binary_predicate( + PredicateOperator::GreaterThanOrEq, + Datum::date_from_str(value)?, + ), + &fixture, + Some("name >= 0"), + )?; + + assert_projection( + &fixture.binary_predicate(PredicateOperator::Eq, Datum::date_from_str(value)?), + &fixture, + Some("name = 0"), + )?; + + assert_projection( + &fixture.binary_predicate(PredicateOperator::NotEq, Datum::date_from_str(value)?), + &fixture, + None, + )?; + + assert_projection( + &fixture.set_predicate( + PredicateOperator::In, + vec![Datum::date_from_str(value)?, Datum::date_from_str(another)?], + ), + &fixture, + Some("name IN (0, -1)"), + )?; + + assert_projection( + &fixture.set_predicate( + PredicateOperator::NotIn, + vec![Datum::date_from_str(value)?, Datum::date_from_str(another)?], + ), + &fixture, + None, + )?; + + Ok(()) + } + + #[test] + fn test_projection_date_year_negative_upper_bound() -> Result<()> { + let value = "1969-12-31"; + let another = "1969-01-01"; + + let fixture = TestProjectionParameter::new( + Transform::Year, + "name", + NestedField::required(1, "value", Type::Primitive(PrimitiveType::Date)), + ); + + // 0 = number of years -> "1970" + assert_projection( + &fixture.binary_predicate(PredicateOperator::LessThan, Datum::date_from_str(value)?), + &fixture, + Some("name <= 0"), + )?; + + assert_projection( + &fixture.binary_predicate( + PredicateOperator::LessThanOrEq, + Datum::date_from_str(value)?, + ), + &fixture, + Some("name <= 0"), + )?; + + assert_projection( + &fixture.binary_predicate(PredicateOperator::GreaterThan, Datum::date_from_str(value)?), + &fixture, + Some("name >= 0"), + )?; + + assert_projection( + &fixture.binary_predicate( + PredicateOperator::GreaterThanOrEq, + Datum::date_from_str(value)?, + ), + &fixture, + Some("name >= -1"), + )?; + + assert_projection( + &fixture.binary_predicate(PredicateOperator::Eq, Datum::date_from_str(value)?), + &fixture, + Some("name IN (-1, 0)"), + )?; + + assert_projection( + &fixture.binary_predicate(PredicateOperator::NotEq, Datum::date_from_str(value)?), + &fixture, + None, + )?; + + assert_projection( + &fixture.set_predicate( + PredicateOperator::In, + vec![Datum::date_from_str(value)?, Datum::date_from_str(another)?], + ), + &fixture, + Some("name IN (0, -1)"), + )?; + + assert_projection( + &fixture.set_predicate( + PredicateOperator::NotIn, + vec![Datum::date_from_str(value)?, Datum::date_from_str(another)?], + ), + &fixture, + None, + )?; + + Ok(()) + } + + #[test] + fn test_projection_date_year_upper_bound() -> Result<()> { + let value = "2017-12-31"; + let another = "2016-01-01"; + + let fixture = TestProjectionParameter::new( + Transform::Year, + "name", + NestedField::required(1, "value", Type::Primitive(PrimitiveType::Date)), + ); + + // 47 = number of years -> "2017" + assert_projection( + &fixture.binary_predicate(PredicateOperator::LessThan, Datum::date_from_str(value)?), + &fixture, + Some("name <= 47"), + )?; + + assert_projection( + &fixture.binary_predicate( + PredicateOperator::LessThanOrEq, + Datum::date_from_str(value)?, + ), + &fixture, + Some("name <= 47"), + )?; + + assert_projection( + &fixture.binary_predicate(PredicateOperator::GreaterThan, Datum::date_from_str(value)?), + &fixture, + Some("name >= 48"), + )?; + + assert_projection( + &fixture.binary_predicate( + PredicateOperator::GreaterThanOrEq, + Datum::date_from_str(value)?, + ), + &fixture, + Some("name >= 47"), + )?; + + assert_projection( + &fixture.binary_predicate(PredicateOperator::Eq, Datum::date_from_str(value)?), + &fixture, + Some("name = 47"), + )?; + + assert_projection( + &fixture.binary_predicate(PredicateOperator::NotEq, Datum::date_from_str(value)?), + &fixture, + None, + )?; + + assert_projection( + &fixture.set_predicate( + PredicateOperator::In, + vec![Datum::date_from_str(value)?, Datum::date_from_str(another)?], + ), + &fixture, + Some("name IN (47, 46)"), + )?; + + assert_projection( + &fixture.set_predicate( + PredicateOperator::NotIn, + vec![Datum::date_from_str(value)?, Datum::date_from_str(another)?], + ), + &fixture, + None, + )?; + + Ok(()) + } + + #[test] + fn test_projection_date_year_negative_lower_bound() -> Result<()> { + let value = "1970-01-01"; + let another = "1969-12-31"; + + let fixture = TestProjectionParameter::new( + Transform::Year, + "name", + NestedField::required(1, "value", Type::Primitive(PrimitiveType::Date)), + ); + + // 0 = number of years -> "1970" + assert_projection( + &fixture.binary_predicate(PredicateOperator::LessThan, Datum::date_from_str(value)?), + &fixture, + Some("name <= 0"), + )?; + + assert_projection( + &fixture.binary_predicate( + PredicateOperator::LessThanOrEq, + Datum::date_from_str(value)?, + ), + &fixture, + Some("name <= 0"), + )?; + + assert_projection( + &fixture.binary_predicate(PredicateOperator::GreaterThan, Datum::date_from_str(value)?), + &fixture, + Some("name >= 0"), + )?; + + assert_projection( + &fixture.binary_predicate( + PredicateOperator::GreaterThanOrEq, + Datum::date_from_str(value)?, + ), + &fixture, + Some("name >= 0"), + )?; + + assert_projection( + &fixture.binary_predicate(PredicateOperator::Eq, Datum::date_from_str(value)?), + &fixture, + Some("name = 0"), + )?; + + assert_projection( + &fixture.binary_predicate(PredicateOperator::NotEq, Datum::date_from_str(value)?), + &fixture, + None, + )?; + + assert_projection( + &fixture.set_predicate( + PredicateOperator::In, + vec![Datum::date_from_str(value)?, Datum::date_from_str(another)?], + ), + &fixture, + Some("name IN (0, -1)"), + )?; + + assert_projection( + &fixture.set_predicate( + PredicateOperator::NotIn, + vec![Datum::date_from_str(value)?, Datum::date_from_str(another)?], + ), + &fixture, + None, + )?; + + Ok(()) + } + + #[test] + fn test_projection_date_year_lower_bound() -> Result<()> { + let value = "2017-01-01"; + let another = "2016-12-31"; + + let fixture = TestProjectionParameter::new( + Transform::Year, + "name", + NestedField::required(1, "value", Type::Primitive(PrimitiveType::Date)), + ); + + // 46 = number of years -> "2016" + assert_projection( + &fixture.binary_predicate(PredicateOperator::LessThan, Datum::date_from_str(value)?), + &fixture, + Some("name <= 46"), + )?; + + assert_projection( + &fixture.binary_predicate( + PredicateOperator::LessThanOrEq, + Datum::date_from_str(value)?, + ), + &fixture, + Some("name <= 47"), + )?; + + assert_projection( + &fixture.binary_predicate(PredicateOperator::GreaterThan, Datum::date_from_str(value)?), + &fixture, + Some("name >= 47"), + )?; + + assert_projection( + &fixture.binary_predicate( + PredicateOperator::GreaterThanOrEq, + Datum::date_from_str(value)?, + ), + &fixture, + Some("name >= 47"), + )?; + + assert_projection( + &fixture.binary_predicate(PredicateOperator::Eq, Datum::date_from_str(value)?), + &fixture, + Some("name = 47"), + )?; + + assert_projection( + &fixture.binary_predicate(PredicateOperator::NotEq, Datum::date_from_str(value)?), + &fixture, + None, + )?; + + assert_projection( + &fixture.set_predicate( + PredicateOperator::In, + vec![Datum::date_from_str(value)?, Datum::date_from_str(another)?], + ), + &fixture, + Some("name IN (47, 46)"), + )?; + + assert_projection( + &fixture.set_predicate( + PredicateOperator::NotIn, + vec![Datum::date_from_str(value)?, Datum::date_from_str(another)?], + ), + &fixture, + None, + )?; + + Ok(()) + } + #[test] fn test_projection_truncate_string_rewrite_op() -> Result<()> { let fixture = TestProjectionParameter::new( From 4ccc3b245cf91a1f743d15d5883a331cd5830cad Mon Sep 17 00:00:00 2001 From: marvinlanhenke Date: Sat, 30 Mar 2024 06:13:03 +0100 Subject: [PATCH 29/46] fix: doc --- crates/iceberg/src/transform/mod.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/crates/iceberg/src/transform/mod.rs b/crates/iceberg/src/transform/mod.rs index 3b5d16448..b2ac67018 100644 --- a/crates/iceberg/src/transform/mod.rs +++ b/crates/iceberg/src/transform/mod.rs @@ -36,7 +36,8 @@ pub trait TransformFunction: Send { fn transform(&self, input: ArrayRef) -> Result; /// transform_literal will take an input literal and transform it into a new literal. fn transform_literal(&self, input: &Datum) -> Result>; - /// wrapper + /// A thin wrapper around `transform_literal` + /// to return an error even when it's `None`. fn transform_literal_result(&self, input: &Datum) -> Result { self.transform_literal(input)?.ok_or_else(|| { Error::new( From 1455ad3ba4ce9820824aab3f56b982a6e05c24a1 Mon Sep 17 00:00:00 2001 From: marvinlanhenke Date: Sat, 30 Mar 2024 12:12:53 +0100 Subject: [PATCH 30/46] add: timestamp test + refactor --- crates/iceberg/src/spec/transform.rs | 304 +++++++++++++++++++++++++-- 1 file changed, 289 insertions(+), 15 deletions(-) diff --git a/crates/iceberg/src/spec/transform.rs b/crates/iceberg/src/spec/transform.rs index 4109f5975..cf9b6db2a 100644 --- a/crates/iceberg/src/spec/transform.rs +++ b/crates/iceberg/src/spec/transform.rs @@ -394,7 +394,7 @@ impl Transform { let datum = func.transform_literal_result(lit)?; if let Some(AdjustedProjection::Single(d)) = - self.adjust_projection(&PredicateOperator::In, &datum) + self.adjust_projection(&PredicateOperator::In, lit, &datum) { new_set.insert(d); }; @@ -418,7 +418,7 @@ impl Transform { ) -> Result> { if let Some(boundary) = self.projected_boundary(op, datum)? { let tran_datum = func.transform_literal_result(&boundary)?; - let adj_datum = self.adjust_projection(op, &tran_datum); + let adj_datum = self.adjust_projection(op, datum, &tran_datum); let proj_op = self.projected_operator(op, datum, width); @@ -457,6 +457,7 @@ impl Transform { /// can not be projected fn projected_boundary(&self, op: &PredicateOperator, datum: &Datum) -> Result> { let literal = datum.literal(); + let day_micros = 86_400_000_000; // TODO: verify all ops and literals are handled let projected_boundary = match op { @@ -468,9 +469,9 @@ impl Transform { PrimitiveLiteral::Binary(v) => Some(Datum::binary(v.clone())), PrimitiveLiteral::String(v) => Some(Datum::string(v.clone())), PrimitiveLiteral::Date(v) => Some(Datum::date(v - 1)), - PrimitiveLiteral::Time(v) => Some(Datum::time_micros(v - 1)?), - PrimitiveLiteral::Timestamp(v) => Some(Datum::timestamp_micros(v - 1)), - PrimitiveLiteral::TimestampTZ(v) => Some(Datum::timestamptz_micros(v - 1)), + PrimitiveLiteral::Time(v) => Some(Datum::time_micros(v - day_micros)?), + PrimitiveLiteral::Timestamp(v) => Some(Datum::timestamp_micros(v - day_micros)), + PrimitiveLiteral::TimestampTZ(v) => Some(Datum::timestamptz_micros(v - day_micros)), _ => None, }, PredicateOperator::GreaterThan => match literal { @@ -481,9 +482,9 @@ impl Transform { PrimitiveLiteral::Binary(v) => Some(Datum::binary(v.clone())), PrimitiveLiteral::String(v) => Some(Datum::string(v.clone())), PrimitiveLiteral::Date(v) => Some(Datum::date(v + 1)), - PrimitiveLiteral::Time(v) => Some(Datum::time_micros(v + 1)?), - PrimitiveLiteral::Timestamp(v) => Some(Datum::timestamp_micros(v + 1)), - PrimitiveLiteral::TimestampTZ(v) => Some(Datum::timestamptz_micros(v + 1)), + PrimitiveLiteral::Time(v) => Some(Datum::time_micros(*v)?), + PrimitiveLiteral::Timestamp(v) => Some(Datum::timestamp_micros(*v)), + PrimitiveLiteral::TimestampTZ(v) => Some(Datum::timestamptz_micros(*v)), _ => None, }, PredicateOperator::Eq @@ -566,10 +567,17 @@ impl Transform { fn adjust_projection( &self, op: &PredicateOperator, - datum: &Datum, + original: &Datum, + transformed: &Datum, ) -> Option { - if let Transform::Year | Transform::Month = self { - if let &PrimitiveLiteral::Int(v) = datum.literal() { + let should_adjust = match self { + Transform::Day => matches!(original.literal(), PrimitiveLiteral::Timestamp(_)), + Transform::Year | Transform::Month => true, + _ => false, + }; + + if should_adjust { + if let &PrimitiveLiteral::Int(v) = transformed.literal() { match op { PredicateOperator::LessThan | PredicateOperator::LessThanOrEq @@ -580,8 +588,10 @@ impl Transform { } PredicateOperator::Eq => { if v < 0 { - let new_set = - FnvHashSet::from_iter(vec![datum.to_owned(), Datum::int(v + 1)]); + let new_set = FnvHashSet::from_iter(vec![ + transformed.to_owned(), + Datum::int(v + 1), + ]); return Some(AdjustedProjection::Set(new_set)); } } @@ -589,8 +599,8 @@ impl Transform { return None; } } - } - }; + }; + } None } } @@ -800,6 +810,270 @@ mod tests { } } + #[test] + fn test_projection_timestamp_day_negative_lower_bound() -> Result<()> { + // -365 + let value = "1969-01-01T00:00:00.000000"; + // -364 + let another = "1969-01-02T00:00:00.000000"; + + let fixture = TestProjectionParameter::new( + Transform::Day, + "name", + NestedField::required(1, "value", Type::Primitive(PrimitiveType::Timestamp)), + ); + + assert_projection( + &fixture.binary_predicate( + PredicateOperator::LessThan, + Datum::timestamp_from_str(value)?, + ), + &fixture, + Some("name <= -365"), + )?; + + assert_projection( + &fixture.binary_predicate( + PredicateOperator::LessThanOrEq, + Datum::timestamp_from_str(value)?, + ), + &fixture, + Some("name <= -364"), + )?; + + assert_projection( + &fixture.binary_predicate( + PredicateOperator::GreaterThan, + Datum::timestamp_from_str(value)?, + ), + &fixture, + Some("name >= -365"), + )?; + + assert_projection( + &fixture.binary_predicate( + PredicateOperator::GreaterThanOrEq, + Datum::timestamp_from_str(value)?, + ), + &fixture, + Some("name >= -365"), + )?; + + assert_projection( + &fixture.binary_predicate(PredicateOperator::Eq, Datum::timestamp_from_str(value)?), + &fixture, + Some("name IN (-364, -365)"), + )?; + + assert_projection( + &fixture.binary_predicate(PredicateOperator::NotEq, Datum::timestamp_from_str(value)?), + &fixture, + None, + )?; + + assert_projection( + &fixture.set_predicate( + PredicateOperator::In, + vec![ + Datum::timestamp_from_str(value)?, + Datum::timestamp_from_str(another)?, + ], + ), + &fixture, + Some("name IN (-363, -364, -365)"), + )?; + + assert_projection( + &fixture.set_predicate( + PredicateOperator::NotIn, + vec![ + Datum::timestamp_from_str(value)?, + Datum::timestamp_from_str(another)?, + ], + ), + &fixture, + None, + )?; + + Ok(()) + } + + #[test] + fn test_projection_timestamp_day_lower_bound() -> Result<()> { + // 17501 + let value = "2017-12-01T00:00:00.00000"; + // 17502 + let another = "2017-12-02T00:00:00.00000"; + + let fixture = TestProjectionParameter::new( + Transform::Day, + "name", + NestedField::required(1, "value", Type::Primitive(PrimitiveType::Timestamp)), + ); + + assert_projection( + &fixture.binary_predicate( + PredicateOperator::LessThan, + Datum::timestamp_from_str(value)?, + ), + &fixture, + Some("name <= 17500"), + )?; + + assert_projection( + &fixture.binary_predicate( + PredicateOperator::LessThanOrEq, + Datum::timestamp_from_str(value)?, + ), + &fixture, + Some("name <= 17501"), + )?; + + assert_projection( + &fixture.binary_predicate( + PredicateOperator::GreaterThan, + Datum::timestamp_from_str(value)?, + ), + &fixture, + Some("name >= 17501"), + )?; + + assert_projection( + &fixture.binary_predicate( + PredicateOperator::GreaterThanOrEq, + Datum::timestamp_from_str(value)?, + ), + &fixture, + Some("name >= 17501"), + )?; + + assert_projection( + &fixture.binary_predicate(PredicateOperator::Eq, Datum::timestamp_from_str(value)?), + &fixture, + Some("name = 17501"), + )?; + + assert_projection( + &fixture.binary_predicate(PredicateOperator::NotEq, Datum::timestamp_from_str(value)?), + &fixture, + None, + )?; + + assert_projection( + &fixture.set_predicate( + PredicateOperator::In, + vec![ + Datum::timestamp_from_str(value)?, + Datum::timestamp_from_str(another)?, + ], + ), + &fixture, + Some("name IN (17501, 17502)"), + )?; + + assert_projection( + &fixture.set_predicate( + PredicateOperator::NotIn, + vec![ + Datum::timestamp_from_str(value)?, + Datum::timestamp_from_str(another)?, + ], + ), + &fixture, + None, + )?; + + Ok(()) + } + + #[test] + fn test_projection_timestamp_day_epoch() -> Result<()> { + // 0 + let value = "1970-01-01T00:00:00.00000"; + // 1 + let another = "1970-01-02T00:00:00.00000"; + + let fixture = TestProjectionParameter::new( + Transform::Day, + "name", + NestedField::required(1, "value", Type::Primitive(PrimitiveType::Timestamp)), + ); + + assert_projection( + &fixture.binary_predicate( + PredicateOperator::LessThan, + Datum::timestamp_from_str(value)?, + ), + &fixture, + Some("name <= 0"), + )?; + + assert_projection( + &fixture.binary_predicate( + PredicateOperator::LessThanOrEq, + Datum::timestamp_from_str(value)?, + ), + &fixture, + Some("name <= 0"), + )?; + + assert_projection( + &fixture.binary_predicate( + PredicateOperator::GreaterThan, + Datum::timestamp_from_str(value)?, + ), + &fixture, + Some("name >= 0"), + )?; + + assert_projection( + &fixture.binary_predicate( + PredicateOperator::GreaterThanOrEq, + Datum::timestamp_from_str(value)?, + ), + &fixture, + Some("name >= 0"), + )?; + + assert_projection( + &fixture.binary_predicate(PredicateOperator::Eq, Datum::timestamp_from_str(value)?), + &fixture, + Some("name = 0"), + )?; + + assert_projection( + &fixture.binary_predicate(PredicateOperator::NotEq, Datum::timestamp_from_str(value)?), + &fixture, + None, + )?; + + assert_projection( + &fixture.set_predicate( + PredicateOperator::In, + vec![ + Datum::timestamp_from_str(value)?, + Datum::timestamp_from_str(another)?, + ], + ), + &fixture, + Some("name IN (1, 0)"), + )?; + + assert_projection( + &fixture.set_predicate( + PredicateOperator::NotIn, + vec![ + Datum::timestamp_from_str(value)?, + Datum::timestamp_from_str(another)?, + ], + ), + &fixture, + None, + )?; + + Ok(()) + } + #[test] fn test_projection_date_day_negative() -> Result<()> { let value = "1969-12-30"; From 868108cf1f5de88f8f1d69aa19ecb795d5f9282e Mon Sep 17 00:00:00 2001 From: marvinlanhenke Date: Sat, 30 Mar 2024 12:22:21 +0100 Subject: [PATCH 31/46] refactor: simplify projected_boundary --- crates/iceberg/src/spec/transform.rs | 164 +++++++++++---------------- 1 file changed, 67 insertions(+), 97 deletions(-) diff --git a/crates/iceberg/src/spec/transform.rs b/crates/iceberg/src/spec/transform.rs index cf9b6db2a..a81937828 100644 --- a/crates/iceberg/src/spec/transform.rs +++ b/crates/iceberg/src/spec/transform.rs @@ -459,59 +459,29 @@ impl Transform { let literal = datum.literal(); let day_micros = 86_400_000_000; - // TODO: verify all ops and literals are handled let projected_boundary = match op { PredicateOperator::LessThan => match literal { PrimitiveLiteral::Int(v) => Some(Datum::int(v - 1)), PrimitiveLiteral::Long(v) => Some(Datum::long(v - 1)), PrimitiveLiteral::Decimal(v) => Some(Datum::decimal(v - 1)?), - PrimitiveLiteral::Fixed(v) => Some(Datum::fixed(v.clone())), - PrimitiveLiteral::Binary(v) => Some(Datum::binary(v.clone())), - PrimitiveLiteral::String(v) => Some(Datum::string(v.clone())), PrimitiveLiteral::Date(v) => Some(Datum::date(v - 1)), PrimitiveLiteral::Time(v) => Some(Datum::time_micros(v - day_micros)?), PrimitiveLiteral::Timestamp(v) => Some(Datum::timestamp_micros(v - day_micros)), PrimitiveLiteral::TimestampTZ(v) => Some(Datum::timestamptz_micros(v - day_micros)), - _ => None, + _ => Some(datum.to_owned()), }, PredicateOperator::GreaterThan => match literal { PrimitiveLiteral::Int(v) => Some(Datum::int(v + 1)), PrimitiveLiteral::Long(v) => Some(Datum::long(v + 1)), PrimitiveLiteral::Decimal(v) => Some(Datum::decimal(v + 1)?), - PrimitiveLiteral::Fixed(v) => Some(Datum::fixed(v.clone())), - PrimitiveLiteral::Binary(v) => Some(Datum::binary(v.clone())), - PrimitiveLiteral::String(v) => Some(Datum::string(v.clone())), PrimitiveLiteral::Date(v) => Some(Datum::date(v + 1)), - PrimitiveLiteral::Time(v) => Some(Datum::time_micros(*v)?), - PrimitiveLiteral::Timestamp(v) => Some(Datum::timestamp_micros(*v)), - PrimitiveLiteral::TimestampTZ(v) => Some(Datum::timestamptz_micros(*v)), - _ => None, + _ => Some(datum.to_owned()), }, PredicateOperator::Eq | PredicateOperator::LessThanOrEq - | PredicateOperator::GreaterThanOrEq => match literal { - PrimitiveLiteral::Int(v) => Some(Datum::int(*v)), - PrimitiveLiteral::Long(v) => Some(Datum::long(*v)), - PrimitiveLiteral::Decimal(v) => Some(Datum::decimal(*v)?), - PrimitiveLiteral::Fixed(v) => Some(Datum::fixed(v.clone())), - PrimitiveLiteral::Binary(v) => Some(Datum::binary(v.clone())), - PrimitiveLiteral::String(v) => Some(Datum::string(v.clone())), - PrimitiveLiteral::Date(v) => Some(Datum::date(*v)), - PrimitiveLiteral::Time(v) => Some(Datum::time_micros(*v)?), - PrimitiveLiteral::Timestamp(v) => Some(Datum::timestamp_micros(*v)), - PrimitiveLiteral::TimestampTZ(v) => Some(Datum::timestamptz_micros(*v)), - _ => None, - }, - PredicateOperator::StartsWith => match literal { - PrimitiveLiteral::Fixed(v) => Some(Datum::fixed(v.clone())), - PrimitiveLiteral::String(v) => Some(Datum::string(v.clone())), - PrimitiveLiteral::Binary(v) => Some(Datum::binary(v.clone())), - _ => None, - }, - PredicateOperator::NotStartsWith => match literal { - PrimitiveLiteral::String(v) => Some(Datum::string(v.clone())), - _ => None, - }, + | PredicateOperator::GreaterThanOrEq + | PredicateOperator::StartsWith + | PredicateOperator::NotStartsWith => Some(datum.to_owned()), _ => None, }; @@ -815,7 +785,7 @@ mod tests { // -365 let value = "1969-01-01T00:00:00.000000"; // -364 - let another = "1969-01-02T00:00:00.000000"; + let _another = "1969-01-02T00:00:00.000000"; let fixture = TestProjectionParameter::new( Transform::Day, @@ -823,23 +793,23 @@ mod tests { NestedField::required(1, "value", Type::Primitive(PrimitiveType::Timestamp)), ); - assert_projection( - &fixture.binary_predicate( - PredicateOperator::LessThan, - Datum::timestamp_from_str(value)?, - ), - &fixture, - Some("name <= -365"), - )?; - - assert_projection( - &fixture.binary_predicate( - PredicateOperator::LessThanOrEq, - Datum::timestamp_from_str(value)?, - ), - &fixture, - Some("name <= -364"), - )?; + // assert_projection( + // &fixture.binary_predicate( + // PredicateOperator::LessThan, + // Datum::timestamp_from_str(value)?, + // ), + // &fixture, + // Some("name <= -365"), + // )?; + + // assert_projection( + // &fixture.binary_predicate( + // PredicateOperator::LessThanOrEq, + // Datum::timestamp_from_str(value)?, + // ), + // &fixture, + // Some("name <= -364"), + // )?; assert_projection( &fixture.binary_predicate( @@ -850,50 +820,50 @@ mod tests { Some("name >= -365"), )?; - assert_projection( - &fixture.binary_predicate( - PredicateOperator::GreaterThanOrEq, - Datum::timestamp_from_str(value)?, - ), - &fixture, - Some("name >= -365"), - )?; - - assert_projection( - &fixture.binary_predicate(PredicateOperator::Eq, Datum::timestamp_from_str(value)?), - &fixture, - Some("name IN (-364, -365)"), - )?; - - assert_projection( - &fixture.binary_predicate(PredicateOperator::NotEq, Datum::timestamp_from_str(value)?), - &fixture, - None, - )?; - - assert_projection( - &fixture.set_predicate( - PredicateOperator::In, - vec![ - Datum::timestamp_from_str(value)?, - Datum::timestamp_from_str(another)?, - ], - ), - &fixture, - Some("name IN (-363, -364, -365)"), - )?; - - assert_projection( - &fixture.set_predicate( - PredicateOperator::NotIn, - vec![ - Datum::timestamp_from_str(value)?, - Datum::timestamp_from_str(another)?, - ], - ), - &fixture, - None, - )?; + // assert_projection( + // &fixture.binary_predicate( + // PredicateOperator::GreaterThanOrEq, + // Datum::timestamp_from_str(value)?, + // ), + // &fixture, + // Some("name >= -365"), + // )?; + + // assert_projection( + // &fixture.binary_predicate(PredicateOperator::Eq, Datum::timestamp_from_str(value)?), + // &fixture, + // Some("name IN (-364, -365)"), + // )?; + + // assert_projection( + // &fixture.binary_predicate(PredicateOperator::NotEq, Datum::timestamp_from_str(value)?), + // &fixture, + // None, + // )?; + + // assert_projection( + // &fixture.set_predicate( + // PredicateOperator::In, + // vec![ + // Datum::timestamp_from_str(value)?, + // Datum::timestamp_from_str(another)?, + // ], + // ), + // &fixture, + // Some("name IN (-363, -364, -365)"), + // )?; + + // assert_projection( + // &fixture.set_predicate( + // PredicateOperator::NotIn, + // vec![ + // Datum::timestamp_from_str(value)?, + // Datum::timestamp_from_str(another)?, + // ], + // ), + // &fixture, + // None, + // )?; Ok(()) } From e2b6e3fa6e33f8fa752428eeb166dda2017e392c Mon Sep 17 00:00:00 2001 From: marvinlanhenke Date: Sat, 30 Mar 2024 20:24:37 +0100 Subject: [PATCH 32/46] add: java-testsuite Transform::Timestamp --- crates/iceberg/src/spec/transform.rs | 937 +++++++++++++++++++++++---- 1 file changed, 821 insertions(+), 116 deletions(-) diff --git a/crates/iceberg/src/spec/transform.rs b/crates/iceberg/src/spec/transform.rs index a81937828..6821ecc31 100644 --- a/crates/iceberg/src/spec/transform.rs +++ b/crates/iceberg/src/spec/transform.rs @@ -32,6 +32,9 @@ use std::str::FromStr; use super::{Datum, PrimitiveLiteral}; +/// A `Day` in microseconds +const DAY_IN_MICROS: i64 = 86_400_000_000; + /// Transform is used to transform predicates to partition predicates, /// in addition to transforming data values. /// @@ -417,35 +420,27 @@ impl Transform { width: Option, ) -> Result> { if let Some(boundary) = self.projected_boundary(op, datum)? { - let tran_datum = func.transform_literal_result(&boundary)?; - let adj_datum = self.adjust_projection(op, datum, &tran_datum); - - let proj_op = self.projected_operator(op, datum, width); - - if let Some(op) = proj_op { - match adj_datum { - None => { - return Ok(Some(Predicate::Binary(BinaryExpression::new( - op, - Reference::new(name), - tran_datum, - )))); - } + let transformed = func.transform_literal_result(&boundary)?; + let adjusted = self.adjust_projection(op, datum, &transformed); + let op = self.projected_operator(op, datum, width); + + if let Some(op) = op { + let predicate = match adjusted { + None => Predicate::Binary(BinaryExpression::new( + op, + Reference::new(name), + transformed, + )), Some(AdjustedProjection::Single(d)) => { - return Ok(Some(Predicate::Binary(BinaryExpression::new( - op, - Reference::new(name), - d, - )))); - } - Some(AdjustedProjection::Set(d)) => { - return Ok(Some(Predicate::Set(SetExpression::new( - PredicateOperator::In, - Reference::new(name), - d, - )))); + Predicate::Binary(BinaryExpression::new(op, Reference::new(name), d)) } - } + Some(AdjustedProjection::Set(d)) => Predicate::Set(SetExpression::new( + PredicateOperator::In, + Reference::new(name), + d, + )), + }; + return Ok(Some(predicate)); } }; @@ -457,7 +452,6 @@ impl Transform { /// can not be projected fn projected_boundary(&self, op: &PredicateOperator, datum: &Datum) -> Result> { let literal = datum.literal(); - let day_micros = 86_400_000_000; let projected_boundary = match op { PredicateOperator::LessThan => match literal { @@ -465,9 +459,11 @@ impl Transform { PrimitiveLiteral::Long(v) => Some(Datum::long(v - 1)), PrimitiveLiteral::Decimal(v) => Some(Datum::decimal(v - 1)?), PrimitiveLiteral::Date(v) => Some(Datum::date(v - 1)), - PrimitiveLiteral::Time(v) => Some(Datum::time_micros(v - day_micros)?), - PrimitiveLiteral::Timestamp(v) => Some(Datum::timestamp_micros(v - day_micros)), - PrimitiveLiteral::TimestampTZ(v) => Some(Datum::timestamptz_micros(v - day_micros)), + PrimitiveLiteral::Time(v) => Some(Datum::time_micros(v - DAY_IN_MICROS)?), + PrimitiveLiteral::Timestamp(v) => Some(Datum::timestamp_micros(v - DAY_IN_MICROS)), + PrimitiveLiteral::TimestampTZ(v) => { + Some(Datum::timestamptz_micros(v - DAY_IN_MICROS)) + } _ => Some(datum.to_owned()), }, PredicateOperator::GreaterThan => match literal { @@ -475,6 +471,11 @@ impl Transform { PrimitiveLiteral::Long(v) => Some(Datum::long(v + 1)), PrimitiveLiteral::Decimal(v) => Some(Datum::decimal(v + 1)?), PrimitiveLiteral::Date(v) => Some(Datum::date(v + 1)), + PrimitiveLiteral::Time(v) => Some(Datum::time_micros(v + DAY_IN_MICROS)?), + PrimitiveLiteral::Timestamp(v) => Some(Datum::timestamp_micros(v + DAY_IN_MICROS)), + PrimitiveLiteral::TimestampTZ(v) => { + Some(Datum::timestamptz_micros(v + DAY_IN_MICROS)) + } _ => Some(datum.to_owned()), }, PredicateOperator::Eq @@ -781,35 +782,33 @@ mod tests { } #[test] - fn test_projection_timestamp_day_negative_lower_bound() -> Result<()> { - // -365 - let value = "1969-01-01T00:00:00.000000"; - // -364 - let _another = "1969-01-02T00:00:00.000000"; + fn test_projection_timestamp_year_upper_bound() -> Result<()> { + let value = "2017-12-31T23:59:59.999999"; + let another = "2016-12-31T23:59:59.999999"; let fixture = TestProjectionParameter::new( - Transform::Day, + Transform::Year, "name", NestedField::required(1, "value", Type::Primitive(PrimitiveType::Timestamp)), ); - // assert_projection( - // &fixture.binary_predicate( - // PredicateOperator::LessThan, - // Datum::timestamp_from_str(value)?, - // ), - // &fixture, - // Some("name <= -365"), - // )?; + assert_projection( + &fixture.binary_predicate( + PredicateOperator::LessThan, + Datum::timestamp_from_str(value)?, + ), + &fixture, + Some("name <= 47"), + )?; - // assert_projection( - // &fixture.binary_predicate( - // PredicateOperator::LessThanOrEq, - // Datum::timestamp_from_str(value)?, - // ), - // &fixture, - // Some("name <= -364"), - // )?; + assert_projection( + &fixture.binary_predicate( + PredicateOperator::LessThanOrEq, + Datum::timestamp_from_str(value)?, + ), + &fixture, + Some("name <= 47"), + )?; assert_projection( &fixture.binary_predicate( @@ -817,66 +816,63 @@ mod tests { Datum::timestamp_from_str(value)?, ), &fixture, - Some("name >= -365"), + Some("name >= 48"), )?; - // assert_projection( - // &fixture.binary_predicate( - // PredicateOperator::GreaterThanOrEq, - // Datum::timestamp_from_str(value)?, - // ), - // &fixture, - // Some("name >= -365"), - // )?; - - // assert_projection( - // &fixture.binary_predicate(PredicateOperator::Eq, Datum::timestamp_from_str(value)?), - // &fixture, - // Some("name IN (-364, -365)"), - // )?; - - // assert_projection( - // &fixture.binary_predicate(PredicateOperator::NotEq, Datum::timestamp_from_str(value)?), - // &fixture, - // None, - // )?; - - // assert_projection( - // &fixture.set_predicate( - // PredicateOperator::In, - // vec![ - // Datum::timestamp_from_str(value)?, - // Datum::timestamp_from_str(another)?, - // ], - // ), - // &fixture, - // Some("name IN (-363, -364, -365)"), - // )?; - - // assert_projection( - // &fixture.set_predicate( - // PredicateOperator::NotIn, - // vec![ - // Datum::timestamp_from_str(value)?, - // Datum::timestamp_from_str(another)?, - // ], - // ), - // &fixture, - // None, - // )?; + assert_projection( + &fixture.binary_predicate( + PredicateOperator::GreaterThanOrEq, + Datum::timestamp_from_str(value)?, + ), + &fixture, + Some("name >= 47"), + )?; + + assert_projection( + &fixture.binary_predicate(PredicateOperator::Eq, Datum::timestamp_from_str(value)?), + &fixture, + Some("name = 47"), + )?; + + assert_projection( + &fixture.binary_predicate(PredicateOperator::NotEq, Datum::timestamp_from_str(value)?), + &fixture, + None, + )?; + + assert_projection( + &fixture.set_predicate( + PredicateOperator::In, + vec![ + Datum::timestamp_from_str(value)?, + Datum::timestamp_from_str(another)?, + ], + ), + &fixture, + Some("name IN (47, 46)"), + )?; + assert_projection( + &fixture.set_predicate( + PredicateOperator::NotIn, + vec![ + Datum::timestamp_from_str(value)?, + Datum::timestamp_from_str(another)?, + ], + ), + &fixture, + None, + )?; Ok(()) } #[test] - fn test_projection_timestamp_day_lower_bound() -> Result<()> { - // 17501 - let value = "2017-12-01T00:00:00.00000"; - // 17502 - let another = "2017-12-02T00:00:00.00000"; + fn test_projection_timestamp_year_lower_bound() -> Result<()> { + let value = "2017-01-01T00:00:00.000000"; + let another = "2016-12-02T00:00:00.000000"; let fixture = TestProjectionParameter::new( - Transform::Day, + Transform::Year, "name", NestedField::required(1, "value", Type::Primitive(PrimitiveType::Timestamp)), ); @@ -887,7 +883,7 @@ mod tests { Datum::timestamp_from_str(value)?, ), &fixture, - Some("name <= 17500"), + Some("name <= 46"), )?; assert_projection( @@ -896,7 +892,7 @@ mod tests { Datum::timestamp_from_str(value)?, ), &fixture, - Some("name <= 17501"), + Some("name <= 47"), )?; assert_projection( @@ -905,7 +901,7 @@ mod tests { Datum::timestamp_from_str(value)?, ), &fixture, - Some("name >= 17501"), + Some("name >= 47"), )?; assert_projection( @@ -914,13 +910,13 @@ mod tests { Datum::timestamp_from_str(value)?, ), &fixture, - Some("name >= 17501"), + Some("name >= 47"), )?; assert_projection( &fixture.binary_predicate(PredicateOperator::Eq, Datum::timestamp_from_str(value)?), &fixture, - Some("name = 17501"), + Some("name = 47"), )?; assert_projection( @@ -938,7 +934,7 @@ mod tests { ], ), &fixture, - Some("name IN (17501, 17502)"), + Some("name IN (47, 46)"), )?; assert_projection( @@ -957,14 +953,12 @@ mod tests { } #[test] - fn test_projection_timestamp_day_epoch() -> Result<()> { - // 0 - let value = "1970-01-01T00:00:00.00000"; - // 1 - let another = "1970-01-02T00:00:00.00000"; + fn test_projection_timestamp_month_negative_upper_bound() -> Result<()> { + let value = "1969-12-31T23:59:59.999999"; + let another = "1970-01-01T00:00:00.000000"; let fixture = TestProjectionParameter::new( - Transform::Day, + Transform::Month, "name", NestedField::required(1, "value", Type::Primitive(PrimitiveType::Timestamp)), ); @@ -996,6 +990,717 @@ mod tests { Some("name >= 0"), )?; + assert_projection( + &fixture.binary_predicate( + PredicateOperator::GreaterThanOrEq, + Datum::timestamp_from_str(value)?, + ), + &fixture, + Some("name >= -1"), + )?; + + assert_projection( + &fixture.binary_predicate(PredicateOperator::Eq, Datum::timestamp_from_str(value)?), + &fixture, + Some("name IN (-1, 0)"), + )?; + + assert_projection( + &fixture.binary_predicate(PredicateOperator::NotEq, Datum::timestamp_from_str(value)?), + &fixture, + None, + )?; + + assert_projection( + &fixture.set_predicate( + PredicateOperator::In, + vec![ + Datum::timestamp_from_str(value)?, + Datum::timestamp_from_str(another)?, + ], + ), + &fixture, + Some("name IN (0, -1)"), + )?; + + assert_projection( + &fixture.set_predicate( + PredicateOperator::NotIn, + vec![ + Datum::timestamp_from_str(value)?, + Datum::timestamp_from_str(another)?, + ], + ), + &fixture, + None, + )?; + + Ok(()) + } + + #[test] + fn test_projection_timestamp_month_upper_bound() -> Result<()> { + let value = "2017-12-01T23:59:59.999999"; + let another = "2017-11-02T00:00:00.000000"; + + let fixture = TestProjectionParameter::new( + Transform::Month, + "name", + NestedField::required(1, "value", Type::Primitive(PrimitiveType::Timestamp)), + ); + + // TODO: Differs from Java 575 + assert_projection( + &fixture.binary_predicate( + PredicateOperator::LessThan, + Datum::timestamp_from_str(value)?, + ), + &fixture, + Some("name <= 574"), + )?; + + assert_projection( + &fixture.binary_predicate( + PredicateOperator::LessThanOrEq, + Datum::timestamp_from_str(value)?, + ), + &fixture, + Some("name <= 575"), + )?; + + assert_projection( + &fixture.binary_predicate( + PredicateOperator::GreaterThan, + Datum::timestamp_from_str(value)?, + ), + &fixture, + Some("name >= 575"), + )?; + + assert_projection( + &fixture.binary_predicate( + PredicateOperator::GreaterThanOrEq, + Datum::timestamp_from_str(value)?, + ), + &fixture, + Some("name >= 575"), + )?; + + assert_projection( + &fixture.binary_predicate(PredicateOperator::Eq, Datum::timestamp_from_str(value)?), + &fixture, + Some("name = 575"), + )?; + + assert_projection( + &fixture.binary_predicate(PredicateOperator::NotEq, Datum::timestamp_from_str(value)?), + &fixture, + None, + )?; + + assert_projection( + &fixture.set_predicate( + PredicateOperator::In, + vec![ + Datum::timestamp_from_str(value)?, + Datum::timestamp_from_str(another)?, + ], + ), + &fixture, + Some("name IN (575, 574)"), + )?; + + assert_projection( + &fixture.set_predicate( + PredicateOperator::NotIn, + vec![ + Datum::timestamp_from_str(value)?, + Datum::timestamp_from_str(another)?, + ], + ), + &fixture, + None, + )?; + Ok(()) + } + + #[test] + fn test_projection_timestamp_month_negative_lower_bound() -> Result<()> { + let value = "1969-01-01T00:00:00.000000"; + let another = "1969-03-01T00:00:00.000000"; + + let fixture = TestProjectionParameter::new( + Transform::Month, + "name", + NestedField::required(1, "value", Type::Primitive(PrimitiveType::Timestamp)), + ); + + assert_projection( + &fixture.binary_predicate( + PredicateOperator::LessThan, + Datum::timestamp_from_str(value)?, + ), + &fixture, + Some("name <= -12"), + )?; + + assert_projection( + &fixture.binary_predicate( + PredicateOperator::LessThanOrEq, + Datum::timestamp_from_str(value)?, + ), + &fixture, + Some("name <= -11"), + )?; + + assert_projection( + &fixture.binary_predicate( + PredicateOperator::GreaterThan, + Datum::timestamp_from_str(value)?, + ), + &fixture, + Some("name >= -12"), + )?; + + assert_projection( + &fixture.binary_predicate( + PredicateOperator::GreaterThanOrEq, + Datum::timestamp_from_str(value)?, + ), + &fixture, + Some("name >= -12"), + )?; + + assert_projection( + &fixture.binary_predicate(PredicateOperator::Eq, Datum::timestamp_from_str(value)?), + &fixture, + Some("name IN (-12, -11)"), + )?; + + assert_projection( + &fixture.binary_predicate(PredicateOperator::NotEq, Datum::timestamp_from_str(value)?), + &fixture, + None, + )?; + + assert_projection( + &fixture.set_predicate( + PredicateOperator::In, + vec![ + Datum::timestamp_from_str(value)?, + Datum::timestamp_from_str(another)?, + ], + ), + &fixture, + Some("name IN (-10, -9, -12, -11)"), + )?; + + assert_projection( + &fixture.set_predicate( + PredicateOperator::NotIn, + vec![ + Datum::timestamp_from_str(value)?, + Datum::timestamp_from_str(another)?, + ], + ), + &fixture, + None, + )?; + + Ok(()) + } + + #[test] + fn test_projection_timestamp_month_lower_bound() -> Result<()> { + let value = "2017-12-01T00:00:00.000000"; + let another = "2017-12-02T00:00:00.000000"; + + let fixture = TestProjectionParameter::new( + Transform::Month, + "name", + NestedField::required(1, "value", Type::Primitive(PrimitiveType::Timestamp)), + ); + + assert_projection( + &fixture.binary_predicate( + PredicateOperator::LessThan, + Datum::timestamp_from_str(value)?, + ), + &fixture, + Some("name <= 574"), + )?; + + assert_projection( + &fixture.binary_predicate( + PredicateOperator::LessThanOrEq, + Datum::timestamp_from_str(value)?, + ), + &fixture, + Some("name <= 575"), + )?; + + assert_projection( + &fixture.binary_predicate( + PredicateOperator::GreaterThan, + Datum::timestamp_from_str(value)?, + ), + &fixture, + Some("name >= 575"), + )?; + + assert_projection( + &fixture.binary_predicate( + PredicateOperator::GreaterThanOrEq, + Datum::timestamp_from_str(value)?, + ), + &fixture, + Some("name >= 575"), + )?; + + assert_projection( + &fixture.binary_predicate(PredicateOperator::Eq, Datum::timestamp_from_str(value)?), + &fixture, + Some("name = 575"), + )?; + + assert_projection( + &fixture.binary_predicate(PredicateOperator::NotEq, Datum::timestamp_from_str(value)?), + &fixture, + None, + )?; + + assert_projection( + &fixture.set_predicate( + PredicateOperator::In, + vec![ + Datum::timestamp_from_str(value)?, + Datum::timestamp_from_str(another)?, + ], + ), + &fixture, + Some("name IN (575)"), + )?; + + assert_projection( + &fixture.set_predicate( + PredicateOperator::NotIn, + vec![ + Datum::timestamp_from_str(value)?, + Datum::timestamp_from_str(another)?, + ], + ), + &fixture, + None, + )?; + + Ok(()) + } + + #[test] + fn test_projection_timestamp_day_negative_upper_bound() -> Result<()> { + // should be -1 + let value = "1969-12-31T23:59:59.999999"; + // 0 + let another = "1970-01-01T00:00:00.000000"; + + let fixture = TestProjectionParameter::new( + Transform::Day, + "name", + NestedField::required(1, "value", Type::Primitive(PrimitiveType::Timestamp)), + ); + + assert_projection( + &fixture.binary_predicate( + PredicateOperator::LessThan, + Datum::timestamp_from_str(value)?, + ), + &fixture, + Some("name <= 0"), + )?; + + assert_projection( + &fixture.binary_predicate( + PredicateOperator::LessThanOrEq, + Datum::timestamp_from_str(value)?, + ), + &fixture, + Some("name <= 0"), + )?; + + assert_projection( + &fixture.binary_predicate( + PredicateOperator::GreaterThan, + Datum::timestamp_from_str(value)?, + ), + &fixture, + Some("name >= 0"), + )?; + + // TODO: Differs from Java Test due to + // Timestamp conversion as i32? + // According to Java should be `name >= -1` + assert_projection( + &fixture.binary_predicate( + PredicateOperator::GreaterThanOrEq, + Datum::timestamp_from_str(value)?, + ), + &fixture, + Some("name >= 0"), + )?; + + // TODO: Differs from Java Test due to + // Timestamp conversion as i32? + // According to Java should be `name IN (0, -1)` + assert_projection( + &fixture.binary_predicate(PredicateOperator::Eq, Datum::timestamp_from_str(value)?), + &fixture, + Some("name = 0"), + )?; + + assert_projection( + &fixture.binary_predicate(PredicateOperator::NotEq, Datum::timestamp_from_str(value)?), + &fixture, + None, + )?; + + // TODO: Differs from Java Test due to + // Timestamp conversion as i32? + // According to Java should be `name IN (0, -1)` + assert_projection( + &fixture.set_predicate( + PredicateOperator::In, + vec![ + Datum::timestamp_from_str(value)?, + Datum::timestamp_from_str(another)?, + ], + ), + &fixture, + Some("name IN (0)"), + )?; + + assert_projection( + &fixture.set_predicate( + PredicateOperator::NotIn, + vec![ + Datum::timestamp_from_str(value)?, + Datum::timestamp_from_str(another)?, + ], + ), + &fixture, + None, + )?; + + Ok(()) + } + + #[test] + fn test_projection_timestamp_day_upper_bound() -> Result<()> { + // 17501 + let value = "2017-12-01T23:59:59.999999"; + // 17502 + let another = "2017-12-02T00:00:00.000000"; + + let fixture = TestProjectionParameter::new( + Transform::Day, + "name", + NestedField::required(1, "value", Type::Primitive(PrimitiveType::Timestamp)), + ); + + // TODO: why differ from Java 17501 + assert_projection( + &fixture.binary_predicate( + PredicateOperator::LessThan, + Datum::timestamp_from_str(value)?, + ), + &fixture, + Some("name <= 17500"), + )?; + + assert_projection( + &fixture.binary_predicate( + PredicateOperator::LessThanOrEq, + Datum::timestamp_from_str(value)?, + ), + &fixture, + Some("name <= 17501"), + )?; + + assert_projection( + &fixture.binary_predicate( + PredicateOperator::GreaterThan, + Datum::timestamp_from_str(value)?, + ), + &fixture, + Some("name >= 17502"), + )?; + + assert_projection( + &fixture.binary_predicate( + PredicateOperator::GreaterThanOrEq, + Datum::timestamp_from_str(value)?, + ), + &fixture, + Some("name >= 17501"), + )?; + + assert_projection( + &fixture.binary_predicate(PredicateOperator::Eq, Datum::timestamp_from_str(value)?), + &fixture, + Some("name = 17501"), + )?; + + assert_projection( + &fixture.binary_predicate(PredicateOperator::NotEq, Datum::timestamp_from_str(value)?), + &fixture, + None, + )?; + + assert_projection( + &fixture.set_predicate( + PredicateOperator::In, + vec![ + Datum::timestamp_from_str(value)?, + Datum::timestamp_from_str(another)?, + ], + ), + &fixture, + Some("name IN (17501, 17502)"), + )?; + + assert_projection( + &fixture.set_predicate( + PredicateOperator::NotIn, + vec![ + Datum::timestamp_from_str(value)?, + Datum::timestamp_from_str(another)?, + ], + ), + &fixture, + None, + )?; + + Ok(()) + } + + #[test] + fn test_projection_timestamp_day_negative_lower_bound() -> Result<()> { + // -365 + let value = "1969-01-01T00:00:00.000000"; + // -364 + let another = "1969-01-02T00:00:00.000000"; + + let fixture = TestProjectionParameter::new( + Transform::Day, + "name", + NestedField::required(1, "value", Type::Primitive(PrimitiveType::Timestamp)), + ); + + assert_projection( + &fixture.binary_predicate( + PredicateOperator::LessThan, + Datum::timestamp_from_str(value)?, + ), + &fixture, + Some("name <= -365"), + )?; + + assert_projection( + &fixture.binary_predicate( + PredicateOperator::LessThanOrEq, + Datum::timestamp_from_str(value)?, + ), + &fixture, + Some("name <= -364"), + )?; + + // TODO: why differ from java -365 + assert_projection( + &fixture.binary_predicate( + PredicateOperator::GreaterThan, + Datum::timestamp_from_str(value)?, + ), + &fixture, + Some("name >= -364"), + )?; + + assert_projection( + &fixture.binary_predicate( + PredicateOperator::GreaterThanOrEq, + Datum::timestamp_from_str(value)?, + ), + &fixture, + Some("name >= -365"), + )?; + + assert_projection( + &fixture.binary_predicate(PredicateOperator::Eq, Datum::timestamp_from_str(value)?), + &fixture, + Some("name IN (-364, -365)"), + )?; + + assert_projection( + &fixture.binary_predicate(PredicateOperator::NotEq, Datum::timestamp_from_str(value)?), + &fixture, + None, + )?; + + assert_projection( + &fixture.set_predicate( + PredicateOperator::In, + vec![ + Datum::timestamp_from_str(value)?, + Datum::timestamp_from_str(another)?, + ], + ), + &fixture, + Some("name IN (-363, -364, -365)"), + )?; + + assert_projection( + &fixture.set_predicate( + PredicateOperator::NotIn, + vec![ + Datum::timestamp_from_str(value)?, + Datum::timestamp_from_str(another)?, + ], + ), + &fixture, + None, + )?; + + Ok(()) + } + + #[test] + fn test_projection_timestamp_day_lower_bound() -> Result<()> { + // 17501 + let value = "2017-12-01T00:00:00.000000"; + // 17502 + let another = "2017-12-02T00:00:00.000000"; + + let fixture = TestProjectionParameter::new( + Transform::Day, + "name", + NestedField::required(1, "value", Type::Primitive(PrimitiveType::Timestamp)), + ); + + assert_projection( + &fixture.binary_predicate( + PredicateOperator::LessThan, + Datum::timestamp_from_str(value)?, + ), + &fixture, + Some("name <= 17500"), + )?; + + assert_projection( + &fixture.binary_predicate( + PredicateOperator::LessThanOrEq, + Datum::timestamp_from_str(value)?, + ), + &fixture, + Some("name <= 17501"), + )?; + + //TODO: why differ from Java 17501 + assert_projection( + &fixture.binary_predicate( + PredicateOperator::GreaterThan, + Datum::timestamp_from_str(value)?, + ), + &fixture, + Some("name >= 17502"), + )?; + + assert_projection( + &fixture.binary_predicate( + PredicateOperator::GreaterThanOrEq, + Datum::timestamp_from_str(value)?, + ), + &fixture, + Some("name >= 17501"), + )?; + + assert_projection( + &fixture.binary_predicate(PredicateOperator::Eq, Datum::timestamp_from_str(value)?), + &fixture, + Some("name = 17501"), + )?; + + assert_projection( + &fixture.binary_predicate(PredicateOperator::NotEq, Datum::timestamp_from_str(value)?), + &fixture, + None, + )?; + + assert_projection( + &fixture.set_predicate( + PredicateOperator::In, + vec![ + Datum::timestamp_from_str(value)?, + Datum::timestamp_from_str(another)?, + ], + ), + &fixture, + Some("name IN (17501, 17502)"), + )?; + + assert_projection( + &fixture.set_predicate( + PredicateOperator::NotIn, + vec![ + Datum::timestamp_from_str(value)?, + Datum::timestamp_from_str(another)?, + ], + ), + &fixture, + None, + )?; + + Ok(()) + } + + #[test] + fn test_projection_timestamp_day_epoch() -> Result<()> { + // 0 + let value = "1970-01-01T00:00:00.00000"; + // 1 + let another = "1970-01-02T00:00:00.00000"; + + let fixture = TestProjectionParameter::new( + Transform::Day, + "name", + NestedField::required(1, "value", Type::Primitive(PrimitiveType::Timestamp)), + ); + + assert_projection( + &fixture.binary_predicate( + PredicateOperator::LessThan, + Datum::timestamp_from_str(value)?, + ), + &fixture, + Some("name <= 0"), + )?; + + assert_projection( + &fixture.binary_predicate( + PredicateOperator::LessThanOrEq, + Datum::timestamp_from_str(value)?, + ), + &fixture, + Some("name <= 0"), + )?; + + // TODO: why differ from Java Test 0 + assert_projection( + &fixture.binary_predicate( + PredicateOperator::GreaterThan, + Datum::timestamp_from_str(value)?, + ), + &fixture, + Some("name >= 1"), + )?; + assert_projection( &fixture.binary_predicate( PredicateOperator::GreaterThanOrEq, From 7a4eb604eed61d99faf6d7b8264514b9bf06e28a Mon Sep 17 00:00:00 2001 From: marvinlanhenke Date: Sat, 30 Mar 2024 20:59:05 +0100 Subject: [PATCH 33/46] refactor tests --- crates/iceberg/src/spec/transform.rs | 778 +++++++++++++++++---------- 1 file changed, 494 insertions(+), 284 deletions(-) diff --git a/crates/iceberg/src/spec/transform.rs b/crates/iceberg/src/spec/transform.rs index 6821ecc31..c46fc6ed1 100644 --- a/crates/iceberg/src/spec/transform.rs +++ b/crates/iceberg/src/spec/transform.rs @@ -1298,7 +1298,7 @@ mod tests { #[test] fn test_projection_timestamp_day_negative_upper_bound() -> Result<()> { - // should be -1 + // -1 let value = "1969-12-31T23:59:59.999999"; // 0 let another = "1970-01-01T00:00:00.000000"; @@ -1406,7 +1406,7 @@ mod tests { NestedField::required(1, "value", Type::Primitive(PrimitiveType::Timestamp)), ); - // TODO: why differ from Java 17501 + // TODO: Differs from Java 17501 assert_projection( &fixture.binary_predicate( PredicateOperator::LessThan, @@ -1513,7 +1513,7 @@ mod tests { Some("name <= -364"), )?; - // TODO: why differ from java -365 + // TODO: Differs from java -365 assert_projection( &fixture.binary_predicate( PredicateOperator::GreaterThan, @@ -1602,7 +1602,7 @@ mod tests { Some("name <= 17501"), )?; - //TODO: why differ from Java 17501 + //TODO: Differs from Java 17501 assert_projection( &fixture.binary_predicate( PredicateOperator::GreaterThan, @@ -1691,7 +1691,7 @@ mod tests { Some("name <= 0"), )?; - // TODO: why differ from Java Test 0 + // TODO: Differs from Java 0 assert_projection( &fixture.binary_predicate( PredicateOperator::GreaterThan, @@ -1751,7 +1751,9 @@ mod tests { #[test] fn test_projection_date_day_negative() -> Result<()> { + // -2 let value = "1969-12-30"; + // -4 let another = "1969-12-28"; let fixture = TestProjectionParameter::new( @@ -1825,7 +1827,9 @@ mod tests { #[test] fn test_projection_date_day() -> Result<()> { + // 17167 let value = "2017-01-01"; + // 17531 let another = "2017-12-31"; let fixture = TestProjectionParameter::new( @@ -1899,7 +1903,9 @@ mod tests { #[test] fn test_projection_date_month_negative_upper_bound() -> Result<()> { + // -1 => 1969-12 let value = "1969-12-31"; + // -12 => 1969-01 let another = "1969-01-01"; let fixture = TestProjectionParameter::new( @@ -1908,7 +1914,6 @@ mod tests { NestedField::required(1, "value", Type::Primitive(PrimitiveType::Date)), ); - // 0 = number of months -> "1970-01" assert_projection( &fixture.binary_predicate(PredicateOperator::LessThan, Datum::date_from_str(value)?), &fixture, @@ -1930,7 +1935,6 @@ mod tests { Some("name >= 0"), )?; - // -1 = number of months -> "1969-12" assert_projection( &fixture.binary_predicate( PredicateOperator::GreaterThanOrEq, @@ -1975,7 +1979,9 @@ mod tests { #[test] fn test_projection_date_month_upper_bound() -> Result<()> { + // 575 => 2017-12 let value = "2017-12-31"; + // 564 => 2017-01 let another = "2017-01-01"; let fixture = TestProjectionParameter::new( @@ -1991,7 +1997,6 @@ mod tests { Some("name <= 575"), )?; - // 575 = number of months -> "2017-12-01" assert_projection( &fixture.binary_predicate( PredicateOperator::LessThanOrEq, @@ -2001,7 +2006,6 @@ mod tests { Some("name <= 575"), )?; - // 576 = number of months -> "2018-01-01" assert_projection( &fixture.binary_predicate(PredicateOperator::GreaterThan, Datum::date_from_str(value)?), &fixture, @@ -2029,7 +2033,6 @@ mod tests { None, )?; - // 564 = number of months -> "2017-01-01" assert_projection( &fixture.set_predicate( PredicateOperator::In, @@ -2053,7 +2056,9 @@ mod tests { #[test] fn test_projection_date_month_negative_lower_bound() -> Result<()> { + // -12 => 1969-01 let value = "1969-01-01"; + // -1 => 1969-12 let another = "1969-12-31"; let fixture = TestProjectionParameter::new( @@ -2062,14 +2067,12 @@ mod tests { NestedField::required(1, "value", Type::Primitive(PrimitiveType::Date)), ); - // -12 = number of months -> "1969-01-01" assert_projection( &fixture.binary_predicate(PredicateOperator::LessThan, Datum::date_from_str(value)?), &fixture, Some("name <= -12"), )?; - // -11 = number of months -> "1969-02-01" assert_projection( &fixture.binary_predicate( PredicateOperator::LessThanOrEq, @@ -2129,7 +2132,9 @@ mod tests { #[test] fn test_projection_date_month_lower_bound() -> Result<()> { + // 575 => 2017-12 let value = "2017-12-01"; + // 564 => 2017-01 let another = "2017-01-01"; let fixture = TestProjectionParameter::new( @@ -2138,7 +2143,6 @@ mod tests { NestedField::required(1, "value", Type::Primitive(PrimitiveType::Date)), ); - // 574 = number of months -> "2017-11-01" assert_projection( &fixture.binary_predicate(PredicateOperator::LessThan, Datum::date_from_str(value)?), &fixture, @@ -2206,7 +2210,9 @@ mod tests { #[test] fn test_projection_date_month_epoch() -> Result<()> { + // 0 => 1970-01 let value = "1970-01-01"; + // -1 => 1969-12 let another = "1969-12-31"; let fixture = TestProjectionParameter::new( @@ -2215,7 +2221,6 @@ mod tests { NestedField::required(1, "value", Type::Primitive(PrimitiveType::Date)), ); - // TODO: still need fixInclusiveTimeProjection?? assert_projection( &fixture.binary_predicate(PredicateOperator::LessThan, Datum::date_from_str(value)?), &fixture, @@ -2281,6 +2286,7 @@ mod tests { #[test] fn test_projection_date_year_negative_upper_bound() -> Result<()> { + // -1 => 1969 let value = "1969-12-31"; let another = "1969-01-01"; @@ -2290,7 +2296,6 @@ mod tests { NestedField::required(1, "value", Type::Primitive(PrimitiveType::Date)), ); - // 0 = number of years -> "1970" assert_projection( &fixture.binary_predicate(PredicateOperator::LessThan, Datum::date_from_str(value)?), &fixture, @@ -2356,7 +2361,9 @@ mod tests { #[test] fn test_projection_date_year_upper_bound() -> Result<()> { + // 47 => 2017 let value = "2017-12-31"; + // 46 => 2016 let another = "2016-01-01"; let fixture = TestProjectionParameter::new( @@ -2365,7 +2372,6 @@ mod tests { NestedField::required(1, "value", Type::Primitive(PrimitiveType::Date)), ); - // 47 = number of years -> "2017" assert_projection( &fixture.binary_predicate(PredicateOperator::LessThan, Datum::date_from_str(value)?), &fixture, @@ -2431,7 +2437,9 @@ mod tests { #[test] fn test_projection_date_year_negative_lower_bound() -> Result<()> { + // 0 => 1970 let value = "1970-01-01"; + // -1 => 1969 let another = "1969-12-31"; let fixture = TestProjectionParameter::new( @@ -2440,7 +2448,6 @@ mod tests { NestedField::required(1, "value", Type::Primitive(PrimitiveType::Date)), ); - // 0 = number of years -> "1970" assert_projection( &fixture.binary_predicate(PredicateOperator::LessThan, Datum::date_from_str(value)?), &fixture, @@ -2506,7 +2513,9 @@ mod tests { #[test] fn test_projection_date_year_lower_bound() -> Result<()> { + // 47 => 2017 let value = "2017-01-01"; + // 46 => 2016 let another = "2016-12-31"; let fixture = TestProjectionParameter::new( @@ -2515,7 +2524,6 @@ mod tests { NestedField::required(1, "value", Type::Primitive(PrimitiveType::Date)), ); - // 46 = number of years -> "2016" assert_projection( &fixture.binary_predicate(PredicateOperator::LessThan, Datum::date_from_str(value)?), &fixture, @@ -2677,235 +2685,427 @@ mod tests { } #[test] - fn test_projection_truncate_decimal() -> Result<()> { - // test lower and upper bound - for &value in [100.00, 99.99].iter() { - // format as i128 unscaled - let result = if value == 100.00 { "10000" } else { "9990" }; - let value_str = format!("{:.2}", value); - - let fixture = TestProjectionParameter::new( - Transform::Truncate(10), - "name", - NestedField::required( - 1, - "value", - Type::Primitive(PrimitiveType::Decimal { - precision: 9, - scale: 2, - }), - ), - ); + fn test_projection_truncate_upper_bound_decimal() -> Result<()> { + let prev = "98.99"; + let curr = "99.99"; + let next = "100.99"; - assert_projection( - &fixture.binary_predicate( - PredicateOperator::LessThan, - Datum::decimal_from_str(&value_str)?, - ), - &fixture, - Some("name <= 9990"), - )?; - - assert_projection( - &fixture.binary_predicate( - PredicateOperator::LessThanOrEq, - Datum::decimal_from_str(&value_str)?, - ), - &fixture, - Some(format!("name <= {}", result).as_str()), - )?; - - assert_projection( - &fixture.binary_predicate( - PredicateOperator::GreaterThanOrEq, - Datum::decimal_from_str(&value_str)?, - ), - &fixture, - Some(format!("name >= {}", result).as_str()), - )?; - - assert_projection( - &fixture - .binary_predicate(PredicateOperator::Eq, Datum::decimal_from_str(&value_str)?), - &fixture, - Some(format!("name = {}", result).as_str()), - )?; - - assert_projection( - &fixture.binary_predicate( - PredicateOperator::NotEq, - Datum::decimal_from_str(&value_str)?, - ), - &fixture, - None, - )?; - - let set_result = if value == 100.00 { - "name IN (9900, 10000, 10100)" - } else { - "name IN (10090, 9990, 9890)" - }; - assert_projection( - &fixture.set_predicate( - PredicateOperator::In, - vec![ - Datum::decimal_from_str(format!("{:.2}", value - 1.0))?, - Datum::decimal_from_str(&value_str)?, - Datum::decimal_from_str(format!("{:.2}", value + 1.0))?, - ], - ), - &fixture, - Some(set_result), - )?; - - assert_projection( - &fixture.set_predicate( - PredicateOperator::NotIn, - vec![ - Datum::decimal_from_str(&value_str)?, - Datum::decimal_from_str(format!("{:.2}", value + 1.0))?, - ], - ), - &fixture, - None, - )?; - } + let fixture = TestProjectionParameter::new( + Transform::Truncate(10), + "name", + NestedField::required( + 1, + "value", + Type::Primitive(PrimitiveType::Decimal { + precision: 9, + scale: 2, + }), + ), + ); + + assert_projection( + &fixture.binary_predicate(PredicateOperator::LessThan, Datum::decimal_from_str(curr)?), + &fixture, + Some("name <= 9990"), + )?; + + assert_projection( + &fixture.binary_predicate( + PredicateOperator::LessThanOrEq, + Datum::decimal_from_str(curr)?, + ), + &fixture, + Some("name <= 9990"), + )?; + + assert_projection( + &fixture.binary_predicate( + PredicateOperator::GreaterThanOrEq, + Datum::decimal_from_str(curr)?, + ), + &fixture, + Some("name >= 9990"), + )?; + + assert_projection( + &fixture.binary_predicate(PredicateOperator::Eq, Datum::decimal_from_str(curr)?), + &fixture, + Some("name = 9990"), + )?; + + assert_projection( + &fixture.binary_predicate(PredicateOperator::NotEq, Datum::decimal_from_str(curr)?), + &fixture, + None, + )?; + + assert_projection( + &fixture.set_predicate( + PredicateOperator::In, + vec![ + Datum::decimal_from_str(prev)?, + Datum::decimal_from_str(curr)?, + Datum::decimal_from_str(next)?, + ], + ), + &fixture, + Some("name IN (10090, 9990, 9890)"), + )?; + + assert_projection( + &fixture.set_predicate( + PredicateOperator::NotIn, + vec![ + Datum::decimal_from_str(curr)?, + Datum::decimal_from_str(next)?, + ], + ), + &fixture, + None, + )?; Ok(()) } #[test] - fn test_projection_truncate_long() -> Result<()> { - // test lower and upper bound - for &value in [100i64, 99i64].iter() { - let result = if value == 100 { "100" } else { "90" }; - - let fixture = TestProjectionParameter::new( - Transform::Truncate(10), - "name", - NestedField::required(1, "value", Type::Primitive(PrimitiveType::Long)), - ); + fn test_projection_truncate_lower_bound_decimal() -> Result<()> { + let prev = "99.00"; + let curr = "100.00"; + let next = "101.00"; - assert_projection( - &fixture.binary_predicate(PredicateOperator::LessThan, Datum::long(value)), - &fixture, - Some("name <= 90"), - )?; - - assert_projection( - &fixture.binary_predicate(PredicateOperator::LessThanOrEq, Datum::long(value)), - &fixture, - Some(format!("name <= {}", result).as_str()), - )?; - - assert_projection( - &fixture.binary_predicate(PredicateOperator::GreaterThanOrEq, Datum::long(value)), - &fixture, - Some(format!("name >= {}", result).as_str()), - )?; - - assert_projection( - &fixture.binary_predicate(PredicateOperator::Eq, Datum::long(value)), - &fixture, - Some(format!("name = {}", result).as_str()), - )?; - - assert_projection( - &fixture.binary_predicate(PredicateOperator::NotEq, Datum::long(value)), - &fixture, - None, - )?; - - assert_projection( - &fixture.set_predicate( - PredicateOperator::In, - vec![ - Datum::long(value - 1), - Datum::long(value), - Datum::long(value + 1), - ], - ), - &fixture, - Some("name IN (100, 90)"), - )?; - - assert_projection( - &fixture.set_predicate( - PredicateOperator::NotIn, - vec![Datum::long(value), Datum::long(value + 1)], - ), - &fixture, - None, - )?; - } + let fixture = TestProjectionParameter::new( + Transform::Truncate(10), + "name", + NestedField::required( + 1, + "value", + Type::Primitive(PrimitiveType::Decimal { + precision: 9, + scale: 2, + }), + ), + ); + + assert_projection( + &fixture.binary_predicate(PredicateOperator::LessThan, Datum::decimal_from_str(curr)?), + &fixture, + Some("name <= 9990"), + )?; + + assert_projection( + &fixture.binary_predicate( + PredicateOperator::LessThanOrEq, + Datum::decimal_from_str(curr)?, + ), + &fixture, + Some("name <= 10000"), + )?; + + assert_projection( + &fixture.binary_predicate( + PredicateOperator::GreaterThanOrEq, + Datum::decimal_from_str(curr)?, + ), + &fixture, + Some("name >= 10000"), + )?; + + assert_projection( + &fixture.binary_predicate(PredicateOperator::Eq, Datum::decimal_from_str(curr)?), + &fixture, + Some("name = 10000"), + )?; + + assert_projection( + &fixture.binary_predicate(PredicateOperator::NotEq, Datum::decimal_from_str(curr)?), + &fixture, + None, + )?; + + assert_projection( + &fixture.set_predicate( + PredicateOperator::In, + vec![ + Datum::decimal_from_str(prev)?, + Datum::decimal_from_str(curr)?, + Datum::decimal_from_str(next)?, + ], + ), + &fixture, + Some("name IN (9900, 10000, 10100)"), + )?; + + assert_projection( + &fixture.set_predicate( + PredicateOperator::NotIn, + vec![ + Datum::decimal_from_str(curr)?, + Datum::decimal_from_str(next)?, + ], + ), + &fixture, + None, + )?; Ok(()) } #[test] - fn test_projection_truncate_integer() -> Result<()> { - // test lower and upper bound - for &value in [100, 99].iter() { - let result = if value == 100 { "100" } else { "90" }; - - let fixture = TestProjectionParameter::new( - Transform::Truncate(10), - "name", - NestedField::required(1, "value", Type::Primitive(PrimitiveType::Int)), - ); + fn test_projection_truncate_upper_bound_long() -> Result<()> { + let value = 99i64; - assert_projection( - &fixture.binary_predicate(PredicateOperator::LessThan, Datum::int(value)), - &fixture, - Some("name <= 90"), - )?; - - assert_projection( - &fixture.binary_predicate(PredicateOperator::LessThanOrEq, Datum::int(value)), - &fixture, - Some(format!("name <= {}", result).as_str()), - )?; - - assert_projection( - &fixture.binary_predicate(PredicateOperator::GreaterThanOrEq, Datum::int(value)), - &fixture, - Some(format!("name >= {}", result).as_str()), - )?; - - assert_projection( - &fixture.binary_predicate(PredicateOperator::Eq, Datum::int(value)), - &fixture, - Some(format!("name = {}", result).as_str()), - )?; - - assert_projection( - &fixture.binary_predicate(PredicateOperator::NotEq, Datum::int(value)), - &fixture, - None, - )?; - - assert_projection( - &fixture.set_predicate( - PredicateOperator::In, - vec![ - Datum::int(value - 1), - Datum::int(value), - Datum::int(value + 1), - ], - ), - &fixture, - Some("name IN (100, 90)"), - )?; - - assert_projection( - &fixture.set_predicate( - PredicateOperator::NotIn, - vec![Datum::int(value), Datum::int(value + 1)], - ), - &fixture, - None, - )?; - } + let fixture = TestProjectionParameter::new( + Transform::Truncate(10), + "name", + NestedField::required(1, "value", Type::Primitive(PrimitiveType::Long)), + ); + + assert_projection( + &fixture.binary_predicate(PredicateOperator::LessThan, Datum::long(value)), + &fixture, + Some("name <= 90"), + )?; + + assert_projection( + &fixture.binary_predicate(PredicateOperator::LessThanOrEq, Datum::long(value)), + &fixture, + Some("name <= 90"), + )?; + + assert_projection( + &fixture.binary_predicate(PredicateOperator::GreaterThanOrEq, Datum::long(value)), + &fixture, + Some("name >= 90"), + )?; + + assert_projection( + &fixture.binary_predicate(PredicateOperator::Eq, Datum::long(value)), + &fixture, + Some("name = 90"), + )?; + + assert_projection( + &fixture.binary_predicate(PredicateOperator::NotEq, Datum::long(value)), + &fixture, + None, + )?; + + assert_projection( + &fixture.set_predicate( + PredicateOperator::In, + vec![ + Datum::long(value - 1), + Datum::long(value), + Datum::long(value + 1), + ], + ), + &fixture, + Some("name IN (100, 90)"), + )?; + + assert_projection( + &fixture.set_predicate( + PredicateOperator::NotIn, + vec![Datum::long(value), Datum::long(value + 1)], + ), + &fixture, + None, + )?; + + Ok(()) + } + + #[test] + fn test_projection_truncate_lower_bound_long() -> Result<()> { + let value = 100i64; + + let fixture = TestProjectionParameter::new( + Transform::Truncate(10), + "name", + NestedField::required(1, "value", Type::Primitive(PrimitiveType::Long)), + ); + + assert_projection( + &fixture.binary_predicate(PredicateOperator::LessThan, Datum::long(value)), + &fixture, + Some("name <= 90"), + )?; + + assert_projection( + &fixture.binary_predicate(PredicateOperator::LessThanOrEq, Datum::long(value)), + &fixture, + Some("name <= 100"), + )?; + + assert_projection( + &fixture.binary_predicate(PredicateOperator::GreaterThanOrEq, Datum::long(value)), + &fixture, + Some("name >= 100"), + )?; + + assert_projection( + &fixture.binary_predicate(PredicateOperator::Eq, Datum::long(value)), + &fixture, + Some("name = 100"), + )?; + + assert_projection( + &fixture.binary_predicate(PredicateOperator::NotEq, Datum::long(value)), + &fixture, + None, + )?; + + assert_projection( + &fixture.set_predicate( + PredicateOperator::In, + vec![ + Datum::long(value - 1), + Datum::long(value), + Datum::long(value + 1), + ], + ), + &fixture, + Some("name IN (100, 90)"), + )?; + + assert_projection( + &fixture.set_predicate( + PredicateOperator::NotIn, + vec![Datum::long(value), Datum::long(value + 1)], + ), + &fixture, + None, + )?; + + Ok(()) + } + + #[test] + fn test_projection_truncate_upper_bound_integer() -> Result<()> { + let value = 99; + + let fixture = TestProjectionParameter::new( + Transform::Truncate(10), + "name", + NestedField::required(1, "value", Type::Primitive(PrimitiveType::Int)), + ); + + assert_projection( + &fixture.binary_predicate(PredicateOperator::LessThan, Datum::int(value)), + &fixture, + Some("name <= 90"), + )?; + + assert_projection( + &fixture.binary_predicate(PredicateOperator::LessThanOrEq, Datum::int(value)), + &fixture, + Some("name <= 90"), + )?; + + assert_projection( + &fixture.binary_predicate(PredicateOperator::GreaterThanOrEq, Datum::int(value)), + &fixture, + Some("name >= 90"), + )?; + + assert_projection( + &fixture.binary_predicate(PredicateOperator::Eq, Datum::int(value)), + &fixture, + Some("name = 90"), + )?; + + assert_projection( + &fixture.binary_predicate(PredicateOperator::NotEq, Datum::int(value)), + &fixture, + None, + )?; + + assert_projection( + &fixture.set_predicate( + PredicateOperator::In, + vec![ + Datum::int(value - 1), + Datum::int(value), + Datum::int(value + 1), + ], + ), + &fixture, + Some("name IN (100, 90)"), + )?; + + assert_projection( + &fixture.set_predicate( + PredicateOperator::NotIn, + vec![Datum::int(value), Datum::int(value + 1)], + ), + &fixture, + None, + )?; + + Ok(()) + } + + #[test] + fn test_projection_truncate_lower_bound_integer() -> Result<()> { + let value = 100; + + let fixture = TestProjectionParameter::new( + Transform::Truncate(10), + "name", + NestedField::required(1, "value", Type::Primitive(PrimitiveType::Int)), + ); + + assert_projection( + &fixture.binary_predicate(PredicateOperator::LessThan, Datum::int(value)), + &fixture, + Some("name <= 90"), + )?; + + assert_projection( + &fixture.binary_predicate(PredicateOperator::LessThanOrEq, Datum::int(value)), + &fixture, + Some("name <= 100"), + )?; + + assert_projection( + &fixture.binary_predicate(PredicateOperator::GreaterThanOrEq, Datum::int(value)), + &fixture, + Some("name >= 100"), + )?; + + assert_projection( + &fixture.binary_predicate(PredicateOperator::Eq, Datum::int(value)), + &fixture, + Some("name = 100"), + )?; + + assert_projection( + &fixture.binary_predicate(PredicateOperator::NotEq, Datum::int(value)), + &fixture, + None, + )?; + + assert_projection( + &fixture.set_predicate( + PredicateOperator::In, + vec![ + Datum::int(value - 1), + Datum::int(value), + Datum::int(value + 1), + ], + ), + &fixture, + Some("name IN (100, 90)"), + )?; + + assert_projection( + &fixture.set_predicate( + PredicateOperator::NotIn, + vec![Datum::int(value), Datum::int(value + 1)], + ), + &fixture, + None, + )?; Ok(()) } @@ -2913,7 +3113,8 @@ mod tests { #[test] fn test_projection_bucket_uuid() -> Result<()> { let value = uuid::Uuid::from_u64_pair(123, 456); - let another_value = uuid::Uuid::from_u64_pair(456, 123); + let another = uuid::Uuid::from_u64_pair(456, 123); + let fixture = TestProjectionParameter::new( Transform::Bucket(10), "name", @@ -2959,7 +3160,7 @@ mod tests { assert_projection( &fixture.set_predicate( PredicateOperator::In, - vec![Datum::uuid(value), Datum::uuid(another_value)], + vec![Datum::uuid(value), Datum::uuid(another)], ), &fixture, Some("name IN (4, 6)"), @@ -2968,7 +3169,7 @@ mod tests { assert_projection( &fixture.set_predicate( PredicateOperator::NotIn, - vec![Datum::uuid(value), Datum::uuid(another_value)], + vec![Datum::uuid(value), Datum::uuid(another)], ), &fixture, None, @@ -2980,7 +3181,8 @@ mod tests { #[test] fn test_projection_bucket_fixed() -> Result<()> { let value = "abcdefg".as_bytes().to_vec(); - let another_value = "abcdehij".as_bytes().to_vec(); + let another = "abcdehij".as_bytes().to_vec(); + let fixture = TestProjectionParameter::new( Transform::Bucket(10), "name", @@ -3033,10 +3235,7 @@ mod tests { assert_projection( &fixture.set_predicate( PredicateOperator::In, - vec![ - Datum::fixed(value.clone()), - Datum::fixed(another_value.clone()), - ], + vec![Datum::fixed(value.clone()), Datum::fixed(another.clone())], ), &fixture, Some("name IN (4, 6)"), @@ -3045,10 +3244,7 @@ mod tests { assert_projection( &fixture.set_predicate( PredicateOperator::NotIn, - vec![ - Datum::fixed(value.clone()), - Datum::fixed(another_value.clone()), - ], + vec![Datum::fixed(value.clone()), Datum::fixed(another.clone())], ), &fixture, None, @@ -3059,6 +3255,9 @@ mod tests { #[test] fn test_projection_bucket_string() -> Result<()> { + let value = "abcdefg"; + let another = "abcdefgabc"; + let fixture = TestProjectionParameter::new( Transform::Bucket(10), "name", @@ -3066,37 +3265,37 @@ mod tests { ); assert_projection( - &fixture.binary_predicate(PredicateOperator::Eq, Datum::string("abcdefg")), + &fixture.binary_predicate(PredicateOperator::Eq, Datum::string(value)), &fixture, Some("name = 4"), )?; assert_projection( - &fixture.binary_predicate(PredicateOperator::NotEq, Datum::string("abcdefg")), + &fixture.binary_predicate(PredicateOperator::NotEq, Datum::string(value)), &fixture, None, )?; assert_projection( - &fixture.binary_predicate(PredicateOperator::LessThan, Datum::string("abcdefg")), + &fixture.binary_predicate(PredicateOperator::LessThan, Datum::string(value)), &fixture, None, )?; assert_projection( - &fixture.binary_predicate(PredicateOperator::LessThanOrEq, Datum::string("abcdefg")), + &fixture.binary_predicate(PredicateOperator::LessThanOrEq, Datum::string(value)), &fixture, None, )?; assert_projection( - &fixture.binary_predicate(PredicateOperator::GreaterThan, Datum::string("abcdefg")), + &fixture.binary_predicate(PredicateOperator::GreaterThan, Datum::string(value)), &fixture, None, )?; assert_projection( - &fixture.binary_predicate(PredicateOperator::GreaterThanOrEq, Datum::string("abcdefg")), + &fixture.binary_predicate(PredicateOperator::GreaterThanOrEq, Datum::string(value)), &fixture, None, )?; @@ -3104,7 +3303,7 @@ mod tests { assert_projection( &fixture.set_predicate( PredicateOperator::In, - vec![Datum::string("abcdefg"), Datum::string("abcdefgabc")], + vec![Datum::string(value), Datum::string(another)], ), &fixture, Some("name IN (9, 4)"), @@ -3113,7 +3312,7 @@ mod tests { assert_projection( &fixture.set_predicate( PredicateOperator::NotIn, - vec![Datum::string("abcdefg"), Datum::string("abcdefgabc")], + vec![Datum::string(value), Datum::string(another)], ), &fixture, None, @@ -3124,6 +3323,10 @@ mod tests { #[test] fn test_projection_bucket_decimal() -> Result<()> { + let prev = "99.00"; + let curr = "100.00"; + let next = "101.00"; + let fixture = TestProjectionParameter::new( Transform::Bucket(10), "name", @@ -3138,22 +3341,19 @@ mod tests { ); assert_projection( - &fixture.binary_predicate(PredicateOperator::Eq, Datum::decimal_from_str("100.00")?), + &fixture.binary_predicate(PredicateOperator::Eq, Datum::decimal_from_str(curr)?), &fixture, Some("name = 2"), )?; assert_projection( - &fixture.binary_predicate(PredicateOperator::NotEq, Datum::decimal_from_str("100.00")?), + &fixture.binary_predicate(PredicateOperator::NotEq, Datum::decimal_from_str(curr)?), &fixture, None, )?; assert_projection( - &fixture.binary_predicate( - PredicateOperator::LessThan, - Datum::decimal_from_str("100.00")?, - ), + &fixture.binary_predicate(PredicateOperator::LessThan, Datum::decimal_from_str(curr)?), &fixture, None, )?; @@ -3161,7 +3361,7 @@ mod tests { assert_projection( &fixture.binary_predicate( PredicateOperator::LessThanOrEq, - Datum::decimal_from_str("100.00")?, + Datum::decimal_from_str(curr)?, ), &fixture, None, @@ -3170,7 +3370,7 @@ mod tests { assert_projection( &fixture.binary_predicate( PredicateOperator::GreaterThan, - Datum::decimal_from_str("100.00")?, + Datum::decimal_from_str(curr)?, ), &fixture, None, @@ -3179,7 +3379,7 @@ mod tests { assert_projection( &fixture.binary_predicate( PredicateOperator::GreaterThanOrEq, - Datum::decimal_from_str("100.00")?, + Datum::decimal_from_str(curr)?, ), &fixture, None, @@ -3189,9 +3389,9 @@ mod tests { &fixture.set_predicate( PredicateOperator::In, vec![ - Datum::decimal_from_str("101.00")?, - Datum::decimal_from_str("100.00")?, - Datum::decimal_from_str("99.00")?, + Datum::decimal_from_str(next)?, + Datum::decimal_from_str(curr)?, + Datum::decimal_from_str(prev)?, ], ), &fixture, @@ -3202,8 +3402,8 @@ mod tests { &fixture.set_predicate( PredicateOperator::NotIn, vec![ - Datum::decimal_from_str("100.00")?, - Datum::decimal_from_str("101.00")?, + Datum::decimal_from_str(curr)?, + Datum::decimal_from_str(next)?, ], ), &fixture, @@ -3215,6 +3415,7 @@ mod tests { #[test] fn test_projection_bucket_long() -> Result<()> { + let value = 100; let fixture = TestProjectionParameter::new( Transform::Bucket(10), "name", @@ -3222,37 +3423,37 @@ mod tests { ); assert_projection( - &fixture.binary_predicate(PredicateOperator::Eq, Datum::long(100)), + &fixture.binary_predicate(PredicateOperator::Eq, Datum::long(value)), &fixture, Some("name = 6"), )?; assert_projection( - &fixture.binary_predicate(PredicateOperator::NotEq, Datum::long(100)), + &fixture.binary_predicate(PredicateOperator::NotEq, Datum::long(value)), &fixture, None, )?; assert_projection( - &fixture.binary_predicate(PredicateOperator::LessThan, Datum::long(100)), + &fixture.binary_predicate(PredicateOperator::LessThan, Datum::long(value)), &fixture, None, )?; assert_projection( - &fixture.binary_predicate(PredicateOperator::LessThanOrEq, Datum::long(100)), + &fixture.binary_predicate(PredicateOperator::LessThanOrEq, Datum::long(value)), &fixture, None, )?; assert_projection( - &fixture.binary_predicate(PredicateOperator::GreaterThan, Datum::long(100)), + &fixture.binary_predicate(PredicateOperator::GreaterThan, Datum::long(value)), &fixture, None, )?; assert_projection( - &fixture.binary_predicate(PredicateOperator::GreaterThanOrEq, Datum::long(100)), + &fixture.binary_predicate(PredicateOperator::GreaterThanOrEq, Datum::long(value)), &fixture, None, )?; @@ -3260,7 +3461,11 @@ mod tests { assert_projection( &fixture.set_predicate( PredicateOperator::In, - vec![Datum::long(99), Datum::long(100), Datum::long(101)], + vec![ + Datum::long(value - 1), + Datum::long(value), + Datum::long(value + 1), + ], ), &fixture, Some("name IN (8, 7, 6)"), @@ -3269,7 +3474,7 @@ mod tests { assert_projection( &fixture.set_predicate( PredicateOperator::NotIn, - vec![Datum::long(100), Datum::long(101)], + vec![Datum::long(value), Datum::long(value + 1)], ), &fixture, None, @@ -3280,6 +3485,7 @@ mod tests { #[test] fn test_projection_bucket_integer() -> Result<()> { + let value = 100; let fixture = TestProjectionParameter::new( Transform::Bucket(10), "name", @@ -3287,37 +3493,37 @@ mod tests { ); assert_projection( - &fixture.binary_predicate(PredicateOperator::Eq, Datum::int(100)), + &fixture.binary_predicate(PredicateOperator::Eq, Datum::int(value)), &fixture, Some("name = 6"), )?; assert_projection( - &fixture.binary_predicate(PredicateOperator::NotEq, Datum::int(100)), + &fixture.binary_predicate(PredicateOperator::NotEq, Datum::int(value)), &fixture, None, )?; assert_projection( - &fixture.binary_predicate(PredicateOperator::LessThan, Datum::int(100)), + &fixture.binary_predicate(PredicateOperator::LessThan, Datum::int(value)), &fixture, None, )?; assert_projection( - &fixture.binary_predicate(PredicateOperator::LessThanOrEq, Datum::int(100)), + &fixture.binary_predicate(PredicateOperator::LessThanOrEq, Datum::int(value)), &fixture, None, )?; assert_projection( - &fixture.binary_predicate(PredicateOperator::GreaterThan, Datum::int(100)), + &fixture.binary_predicate(PredicateOperator::GreaterThan, Datum::int(value)), &fixture, None, )?; assert_projection( - &fixture.binary_predicate(PredicateOperator::GreaterThanOrEq, Datum::int(100)), + &fixture.binary_predicate(PredicateOperator::GreaterThanOrEq, Datum::int(value)), &fixture, None, )?; @@ -3325,7 +3531,11 @@ mod tests { assert_projection( &fixture.set_predicate( PredicateOperator::In, - vec![Datum::int(99), Datum::int(100), Datum::int(101)], + vec![ + Datum::int(value - 1), + Datum::int(value), + Datum::int(value + 1), + ], ), &fixture, Some("name IN (8, 7, 6)"), @@ -3334,7 +3544,7 @@ mod tests { assert_projection( &fixture.set_predicate( PredicateOperator::NotIn, - vec![Datum::int(100), Datum::int(101)], + vec![Datum::int(value), Datum::int(value + 1)], ), &fixture, None, From 18a6db2b0069a7b4f22106ca41e8a0629e7a3045 Mon Sep 17 00:00:00 2001 From: marvinlanhenke Date: Sun, 31 Mar 2024 22:21:02 +0200 Subject: [PATCH 34/46] fix: timestamp conversion --- crates/iceberg/src/spec/transform.rs | 72 +++++++++++------------- crates/iceberg/src/transform/temporal.rs | 35 +++++++++++- 2 files changed, 66 insertions(+), 41 deletions(-) diff --git a/crates/iceberg/src/spec/transform.rs b/crates/iceberg/src/spec/transform.rs index c46fc6ed1..d519aa573 100644 --- a/crates/iceberg/src/spec/transform.rs +++ b/crates/iceberg/src/spec/transform.rs @@ -32,9 +32,6 @@ use std::str::FromStr; use super::{Datum, PrimitiveLiteral}; -/// A `Day` in microseconds -const DAY_IN_MICROS: i64 = 86_400_000_000; - /// Transform is used to transform predicates to partition predicates, /// in addition to transforming data values. /// @@ -459,11 +456,7 @@ impl Transform { PrimitiveLiteral::Long(v) => Some(Datum::long(v - 1)), PrimitiveLiteral::Decimal(v) => Some(Datum::decimal(v - 1)?), PrimitiveLiteral::Date(v) => Some(Datum::date(v - 1)), - PrimitiveLiteral::Time(v) => Some(Datum::time_micros(v - DAY_IN_MICROS)?), - PrimitiveLiteral::Timestamp(v) => Some(Datum::timestamp_micros(v - DAY_IN_MICROS)), - PrimitiveLiteral::TimestampTZ(v) => { - Some(Datum::timestamptz_micros(v - DAY_IN_MICROS)) - } + PrimitiveLiteral::Timestamp(v) => Some(Datum::timestamp_micros(v - 1)), _ => Some(datum.to_owned()), }, PredicateOperator::GreaterThan => match literal { @@ -471,11 +464,7 @@ impl Transform { PrimitiveLiteral::Long(v) => Some(Datum::long(v + 1)), PrimitiveLiteral::Decimal(v) => Some(Datum::decimal(v + 1)?), PrimitiveLiteral::Date(v) => Some(Datum::date(v + 1)), - PrimitiveLiteral::Time(v) => Some(Datum::time_micros(v + DAY_IN_MICROS)?), - PrimitiveLiteral::Timestamp(v) => Some(Datum::timestamp_micros(v + DAY_IN_MICROS)), - PrimitiveLiteral::TimestampTZ(v) => { - Some(Datum::timestamptz_micros(v + DAY_IN_MICROS)) - } + PrimitiveLiteral::Timestamp(v) => Some(Datum::timestamp_micros(v + 1)), _ => Some(datum.to_owned()), }, PredicateOperator::Eq @@ -780,6 +769,30 @@ mod tests { assert_eq!(result_type, trans.result_type(&input_type).ok()); } } + #[test] + fn test_projection_timestamp_hour_lower_bound() -> Result<()> { + // 420034 //420010 + let value = "2017-12-01T10:00:00.000000"; + // 411288 + let _another = "2016-12-02T00:00:00.000000"; + + let fixture = TestProjectionParameter::new( + Transform::Hour, + "name", + NestedField::required(1, "value", Type::Primitive(PrimitiveType::Timestamp)), + ); + + assert_projection( + &fixture.binary_predicate( + PredicateOperator::LessThan, + Datum::timestamp_from_str(value)?, + ), + &fixture, + Some("name <= 420033"), + )?; + + Ok(()) + } #[test] fn test_projection_timestamp_year_upper_bound() -> Result<()> { @@ -1049,14 +1062,13 @@ mod tests { NestedField::required(1, "value", Type::Primitive(PrimitiveType::Timestamp)), ); - // TODO: Differs from Java 575 assert_projection( &fixture.binary_predicate( PredicateOperator::LessThan, Datum::timestamp_from_str(value)?, ), &fixture, - Some("name <= 574"), + Some("name <= 575"), )?; assert_projection( @@ -1336,25 +1348,19 @@ mod tests { Some("name >= 0"), )?; - // TODO: Differs from Java Test due to - // Timestamp conversion as i32? - // According to Java should be `name >= -1` assert_projection( &fixture.binary_predicate( PredicateOperator::GreaterThanOrEq, Datum::timestamp_from_str(value)?, ), &fixture, - Some("name >= 0"), + Some("name >= -1"), )?; - // TODO: Differs from Java Test due to - // Timestamp conversion as i32? - // According to Java should be `name IN (0, -1)` assert_projection( &fixture.binary_predicate(PredicateOperator::Eq, Datum::timestamp_from_str(value)?), &fixture, - Some("name = 0"), + Some("name IN (-1, 0)"), )?; assert_projection( @@ -1363,9 +1369,6 @@ mod tests { None, )?; - // TODO: Differs from Java Test due to - // Timestamp conversion as i32? - // According to Java should be `name IN (0, -1)` assert_projection( &fixture.set_predicate( PredicateOperator::In, @@ -1375,7 +1378,7 @@ mod tests { ], ), &fixture, - Some("name IN (0)"), + Some("name IN (0, -1)"), )?; assert_projection( @@ -1406,14 +1409,13 @@ mod tests { NestedField::required(1, "value", Type::Primitive(PrimitiveType::Timestamp)), ); - // TODO: Differs from Java 17501 assert_projection( &fixture.binary_predicate( PredicateOperator::LessThan, Datum::timestamp_from_str(value)?, ), &fixture, - Some("name <= 17500"), + Some("name <= 17501"), )?; assert_projection( @@ -1513,14 +1515,13 @@ mod tests { Some("name <= -364"), )?; - // TODO: Differs from java -365 assert_projection( &fixture.binary_predicate( PredicateOperator::GreaterThan, Datum::timestamp_from_str(value)?, ), &fixture, - Some("name >= -364"), + Some("name >= -365"), )?; assert_projection( @@ -1602,14 +1603,13 @@ mod tests { Some("name <= 17501"), )?; - //TODO: Differs from Java 17501 assert_projection( &fixture.binary_predicate( PredicateOperator::GreaterThan, Datum::timestamp_from_str(value)?, ), &fixture, - Some("name >= 17502"), + Some("name >= 17501"), )?; assert_projection( @@ -1691,14 +1691,13 @@ mod tests { Some("name <= 0"), )?; - // TODO: Differs from Java 0 assert_projection( &fixture.binary_predicate( PredicateOperator::GreaterThan, Datum::timestamp_from_str(value)?, ), &fixture, - Some("name >= 1"), + Some("name >= 0"), )?; assert_projection( @@ -1990,7 +1989,6 @@ mod tests { NestedField::required(1, "value", Type::Primitive(PrimitiveType::Date)), ); - // 574 = number of months -> "2017-11-01" assert_projection( &fixture.binary_predicate(PredicateOperator::LessThan, Datum::date_from_str(value)?), &fixture, @@ -2149,7 +2147,6 @@ mod tests { Some("name <= 574"), )?; - // 575 = number of months -> "2017-12-01" assert_projection( &fixture.binary_predicate( PredicateOperator::LessThanOrEq, @@ -2186,7 +2183,6 @@ mod tests { None, )?; - // 564 = number of months -> "2017-01-01" assert_projection( &fixture.set_predicate( PredicateOperator::In, diff --git a/crates/iceberg/src/transform/temporal.rs b/crates/iceberg/src/transform/temporal.rs index 2a79db300..22e52cc4e 100644 --- a/crates/iceberg/src/transform/temporal.rs +++ b/crates/iceberg/src/transform/temporal.rs @@ -24,15 +24,17 @@ use arrow_array::{ types::Date32Type, Array, ArrayRef, Date32Array, Int32Array, TimestampMicrosecondArray, }; use arrow_schema::{DataType, TimeUnit}; -use chrono::{DateTime, Datelike}; +use chrono::{DateTime, Datelike, Duration}; use std::sync::Arc; /// Hour in one second. const HOUR_PER_SECOND: f64 = 1.0_f64 / 3600.0_f64; /// Day in one second. -const DAY_PER_SECOND: f64 = 1.0_f64 / 24.0_f64 / 3600.0_f64; +const _DAY_PER_SECOND: f64 = 1.0_f64 / 24.0_f64 / 3600.0_f64; /// Year of unix epoch. const UNIX_EPOCH_YEAR: i32 = 1970; +/// One second in micros. +const MICROS_PER_SECOND: i64 = 1_000_000; /// Extract a date or timestamp year, as years from 1970 #[derive(Debug)] @@ -164,7 +166,34 @@ pub struct Day; impl Day { #[inline] fn day_timestamp_micro(v: i64) -> i32 { - (v as f64 / 1000.0 / 1000.0 * DAY_PER_SECOND) as i32 + // (v as f64 / 1000.0 / 1000.0 * DAY_PER_SECOND) as i32 + let secs = v / MICROS_PER_SECOND; + + let (nanos, offset) = if v >= 0 { + let nanos = (v.rem_euclid(MICROS_PER_SECOND) * 1_000) as u32; + let offset = 0i64; + (nanos, offset) + } else { + let v = v + 1; + let nanos = (v.rem_euclid(MICROS_PER_SECOND) * 1_000) as u32; + let offset = 1i64; + (nanos, offset) + }; + + // TODO: Handle unwrap, return Result + let delta = Duration::new(secs, nanos) + .ok_or_else(|| { + Error::new( + ErrorKind::DataInvalid, + format!( + "Failed to create 'TimeDelta' from seconds {} and nanos {}", + secs, nanos + ), + ) + }) + .unwrap(); + + (delta.num_days() - offset) as i32 } } From b390d4b8942e2bc5c41d3f4d57fba98b6d14ed06 Mon Sep 17 00:00:00 2001 From: marvinlanhenke Date: Sun, 31 Mar 2024 22:56:43 +0200 Subject: [PATCH 35/46] fix: temporal test_result --- crates/iceberg/src/transform/temporal.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/iceberg/src/transform/temporal.rs b/crates/iceberg/src/transform/temporal.rs index 22e52cc4e..459d2430a 100644 --- a/crates/iceberg/src/transform/temporal.rs +++ b/crates/iceberg/src/transform/temporal.rs @@ -613,7 +613,7 @@ mod test { // Test TimestampMicrosecond test_timestamp_and_tz_transform_using_i64(1512151975038194, &day, Datum::int(17501)); - test_timestamp_and_tz_transform_using_i64(-115200000000, &day, Datum::int(-1)); + test_timestamp_and_tz_transform_using_i64(-115200000000, &day, Datum::int(-2)); test_timestamp_and_tz_transform("2017-12-01 10:30:42.123", &day, Datum::int(17501)); } From a123fc16873d64376ce7ad718d08d586a65d313f Mon Sep 17 00:00:00 2001 From: marvinlanhenke Date: Mon, 1 Apr 2024 07:23:46 +0200 Subject: [PATCH 36/46] basic fix --- crates/iceberg/src/transform/temporal.rs | 36 ++++++++++++++++++++---- 1 file changed, 31 insertions(+), 5 deletions(-) diff --git a/crates/iceberg/src/transform/temporal.rs b/crates/iceberg/src/transform/temporal.rs index 2a79db300..c1c1bb827 100644 --- a/crates/iceberg/src/transform/temporal.rs +++ b/crates/iceberg/src/transform/temporal.rs @@ -24,15 +24,15 @@ use arrow_array::{ types::Date32Type, Array, ArrayRef, Date32Array, Int32Array, TimestampMicrosecondArray, }; use arrow_schema::{DataType, TimeUnit}; -use chrono::{DateTime, Datelike}; +use chrono::{DateTime, Datelike, Duration}; use std::sync::Arc; /// Hour in one second. const HOUR_PER_SECOND: f64 = 1.0_f64 / 3600.0_f64; -/// Day in one second. -const DAY_PER_SECOND: f64 = 1.0_f64 / 24.0_f64 / 3600.0_f64; /// Year of unix epoch. const UNIX_EPOCH_YEAR: i32 = 1970; +/// One second in micros. +const MICROS_PER_SECOND: i64 = 1_000_000; /// Extract a date or timestamp year, as years from 1970 #[derive(Debug)] @@ -164,7 +164,33 @@ pub struct Day; impl Day { #[inline] fn day_timestamp_micro(v: i64) -> i32 { - (v as f64 / 1000.0 / 1000.0 * DAY_PER_SECOND) as i32 + let secs = v / MICROS_PER_SECOND; + + let (nanos, offset) = if v >= 0 { + let nanos = (v.rem_euclid(MICROS_PER_SECOND) * 1_000) as u32; + let offset = 0i64; + (nanos, offset) + } else { + let v = v + 1; + let nanos = (v.rem_euclid(MICROS_PER_SECOND) * 1_000) as u32; + let offset = 1i64; + (nanos, offset) + }; + + // TODO: Handle unwrap, return Result + let delta = Duration::new(secs, nanos) + .ok_or_else(|| { + Error::new( + ErrorKind::DataInvalid, + format!( + "Failed to create 'TimeDelta' from seconds {} and nanos {}", + secs, nanos + ), + ) + }) + .unwrap(); + + (delta.num_days() - offset) as i32 } } @@ -584,7 +610,7 @@ mod test { // Test TimestampMicrosecond test_timestamp_and_tz_transform_using_i64(1512151975038194, &day, Datum::int(17501)); - test_timestamp_and_tz_transform_using_i64(-115200000000, &day, Datum::int(-1)); + test_timestamp_and_tz_transform_using_i64(-115200000000, &day, Datum::int(-2)); test_timestamp_and_tz_transform("2017-12-01 10:30:42.123", &day, Datum::int(17501)); } From 3483f331425215fbd2bac677971ddf5495fa9459 Mon Sep 17 00:00:00 2001 From: marvinlanhenke Date: Mon, 1 Apr 2024 07:33:18 +0200 Subject: [PATCH 37/46] change to Result --- crates/iceberg/src/transform/temporal.rs | 33 ++++++++++++------------ 1 file changed, 16 insertions(+), 17 deletions(-) diff --git a/crates/iceberg/src/transform/temporal.rs b/crates/iceberg/src/transform/temporal.rs index c1c1bb827..593383799 100644 --- a/crates/iceberg/src/transform/temporal.rs +++ b/crates/iceberg/src/transform/temporal.rs @@ -163,7 +163,7 @@ pub struct Day; impl Day { #[inline] - fn day_timestamp_micro(v: i64) -> i32 { + fn day_timestamp_micro(v: i64) -> Result { let secs = v / MICROS_PER_SECOND; let (nanos, offset) = if v >= 0 { @@ -177,20 +177,19 @@ impl Day { (nanos, offset) }; - // TODO: Handle unwrap, return Result - let delta = Duration::new(secs, nanos) - .ok_or_else(|| { - Error::new( - ErrorKind::DataInvalid, - format!( - "Failed to create 'TimeDelta' from seconds {} and nanos {}", - secs, nanos - ), - ) - }) - .unwrap(); + let delta = Duration::new(secs, nanos).ok_or_else(|| { + Error::new( + ErrorKind::DataInvalid, + format!( + "Failed to create 'TimeDelta' from seconds {} and nanos {}", + secs, nanos + ), + ) + })?; + + let days = (delta.num_days() - offset) as i32; - (delta.num_days() - offset) as i32 + Ok(days) } } @@ -201,7 +200,7 @@ impl TransformFunction for Day { .as_any() .downcast_ref::() .unwrap() - .unary(|v| -> i32 { Self::day_timestamp_micro(v) }), + .unary(|v| -> i32 { Self::day_timestamp_micro(v).unwrap() }), DataType::Date32 => input .as_any() .downcast_ref::() @@ -223,8 +222,8 @@ impl TransformFunction for Day { fn transform_literal(&self, input: &crate::spec::Datum) -> Result> { let val = match input.literal() { PrimitiveLiteral::Date(v) => *v, - PrimitiveLiteral::Timestamp(v) => Self::day_timestamp_micro(*v), - PrimitiveLiteral::TimestampTZ(v) => Self::day_timestamp_micro(*v), + PrimitiveLiteral::Timestamp(v) => Self::day_timestamp_micro(*v)?, + PrimitiveLiteral::TimestampTZ(v) => Self::day_timestamp_micro(*v)?, _ => { return Err(crate::Error::new( crate::ErrorKind::FeatureUnsupported, From a55be8fbab81bcca364145fbd9879f9dee0280b4 Mon Sep 17 00:00:00 2001 From: marvinlanhenke Date: Mon, 1 Apr 2024 07:45:46 +0200 Subject: [PATCH 38/46] use try_unary --- crates/iceberg/src/transform/temporal.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/iceberg/src/transform/temporal.rs b/crates/iceberg/src/transform/temporal.rs index 593383799..9c6489e91 100644 --- a/crates/iceberg/src/transform/temporal.rs +++ b/crates/iceberg/src/transform/temporal.rs @@ -200,7 +200,7 @@ impl TransformFunction for Day { .as_any() .downcast_ref::() .unwrap() - .unary(|v| -> i32 { Self::day_timestamp_micro(v).unwrap() }), + .try_unary(|v| -> Result { Self::day_timestamp_micro(v) })?, DataType::Date32 => input .as_any() .downcast_ref::() From 014d793018b922f7353069844d2ad09a934c2a61 Mon Sep 17 00:00:00 2001 From: marvinlanhenke Date: Mon, 1 Apr 2024 10:47:19 +0200 Subject: [PATCH 39/46] add: java-testsuite Transform::Timestamp Hours --- crates/iceberg/src/spec/transform.rs | 160 ++++++++++++++++++++++++++- 1 file changed, 156 insertions(+), 4 deletions(-) diff --git a/crates/iceberg/src/spec/transform.rs b/crates/iceberg/src/spec/transform.rs index d519aa573..117eee0f9 100644 --- a/crates/iceberg/src/spec/transform.rs +++ b/crates/iceberg/src/spec/transform.rs @@ -522,8 +522,8 @@ impl Transform { } } - /// Adjust time projection - ///https://github.com/apache/iceberg/blob/main/api/src/main/java/org/apache/iceberg/transforms/ProjectionUtil.java#L275 + /// Adjust projection for temporal transforms, align with Java + /// implementation: https://github.com/apache/iceberg/blob/main/api/src/main/java/org/apache/iceberg/transforms/ProjectionUtil.java#L275 fn adjust_projection( &self, op: &PredicateOperator, @@ -769,12 +769,101 @@ mod tests { assert_eq!(result_type, trans.result_type(&input_type).ok()); } } + + #[test] + fn test_projection_timestamp_hour_upper_bound() -> Result<()> { + // 420034 + let value = "2017-12-01T10:59:59.999999"; + // 412007 + let another = "2016-12-31T23:59:59.999999"; + + let fixture = TestProjectionParameter::new( + Transform::Hour, + "name", + NestedField::required(1, "value", Type::Primitive(PrimitiveType::Timestamp)), + ); + + assert_projection( + &fixture.binary_predicate( + PredicateOperator::LessThan, + Datum::timestamp_from_str(value)?, + ), + &fixture, + Some("name <= 420034"), + )?; + + assert_projection( + &fixture.binary_predicate( + PredicateOperator::LessThanOrEq, + Datum::timestamp_from_str(value)?, + ), + &fixture, + Some("name <= 420034"), + )?; + + assert_projection( + &fixture.binary_predicate( + PredicateOperator::GreaterThan, + Datum::timestamp_from_str(value)?, + ), + &fixture, + Some("name >= 420035"), + )?; + + assert_projection( + &fixture.binary_predicate( + PredicateOperator::GreaterThanOrEq, + Datum::timestamp_from_str(value)?, + ), + &fixture, + Some("name >= 420034"), + )?; + + assert_projection( + &fixture.binary_predicate(PredicateOperator::Eq, Datum::timestamp_from_str(value)?), + &fixture, + Some("name = 420034"), + )?; + + assert_projection( + &fixture.binary_predicate(PredicateOperator::NotEq, Datum::timestamp_from_str(value)?), + &fixture, + None, + )?; + + assert_projection( + &fixture.set_predicate( + PredicateOperator::In, + vec![ + Datum::timestamp_from_str(value)?, + Datum::timestamp_from_str(another)?, + ], + ), + &fixture, + Some("name IN (420034, 412007)"), + )?; + + assert_projection( + &fixture.set_predicate( + PredicateOperator::NotIn, + vec![ + Datum::timestamp_from_str(value)?, + Datum::timestamp_from_str(another)?, + ], + ), + &fixture, + None, + )?; + + Ok(()) + } + #[test] fn test_projection_timestamp_hour_lower_bound() -> Result<()> { - // 420034 //420010 + // 420034 let value = "2017-12-01T10:00:00.000000"; // 411288 - let _another = "2016-12-02T00:00:00.000000"; + let another = "2016-12-02T00:00:00.000000"; let fixture = TestProjectionParameter::new( Transform::Hour, @@ -791,6 +880,69 @@ mod tests { Some("name <= 420033"), )?; + assert_projection( + &fixture.binary_predicate( + PredicateOperator::LessThanOrEq, + Datum::timestamp_from_str(value)?, + ), + &fixture, + Some("name <= 420034"), + )?; + + assert_projection( + &fixture.binary_predicate( + PredicateOperator::GreaterThan, + Datum::timestamp_from_str(value)?, + ), + &fixture, + Some("name >= 420034"), + )?; + + assert_projection( + &fixture.binary_predicate( + PredicateOperator::GreaterThanOrEq, + Datum::timestamp_from_str(value)?, + ), + &fixture, + Some("name >= 420034"), + )?; + + assert_projection( + &fixture.binary_predicate(PredicateOperator::Eq, Datum::timestamp_from_str(value)?), + &fixture, + Some("name = 420034"), + )?; + + assert_projection( + &fixture.binary_predicate(PredicateOperator::NotEq, Datum::timestamp_from_str(value)?), + &fixture, + None, + )?; + + assert_projection( + &fixture.set_predicate( + PredicateOperator::In, + vec![ + Datum::timestamp_from_str(value)?, + Datum::timestamp_from_str(another)?, + ], + ), + &fixture, + Some("name IN (420034, 411288)"), + )?; + + assert_projection( + &fixture.set_predicate( + PredicateOperator::NotIn, + vec![ + Datum::timestamp_from_str(value)?, + Datum::timestamp_from_str(another)?, + ], + ), + &fixture, + None, + )?; + Ok(()) } From ab060221e6233796d6228fbe0cf4fe55d0f3115e Mon Sep 17 00:00:00 2001 From: marvinlanhenke Date: Mon, 1 Apr 2024 16:24:12 +0200 Subject: [PATCH 40/46] refactor: split and move tests --- crates/iceberg/src/spec/transform.rs | 2995 ---------------------- crates/iceberg/src/transform/bucket.rs | 404 ++- crates/iceberg/src/transform/mod.rs | 70 + crates/iceberg/src/transform/temporal.rs | 1780 ++++++++++++- crates/iceberg/src/transform/truncate.rs | 479 +++- 5 files changed, 2729 insertions(+), 2999 deletions(-) diff --git a/crates/iceberg/src/spec/transform.rs b/crates/iceberg/src/spec/transform.rs index 117eee0f9..20e9a08d8 100644 --- a/crates/iceberg/src/spec/transform.rs +++ b/crates/iceberg/src/spec/transform.rs @@ -666,11 +666,6 @@ enum AdjustedProjection { #[cfg(test)] mod tests { - use super::*; - use std::collections::HashSet; - use std::sync::Arc; - - use crate::expr::{BoundPredicate, BoundReference, PredicateOperator}; use crate::spec::datatypes::PrimitiveType::{ Binary, Date, Decimal, Fixed, Int, Long, String as StringType, Time, Timestamp, Timestamptz, Uuid, @@ -678,7 +673,6 @@ mod tests { use crate::spec::datatypes::Type::{Primitive, Struct}; use crate::spec::datatypes::{NestedField, StructType, Type}; use crate::spec::transform::Transform; - use crate::spec::{Datum, NestedFieldRef, PrimitiveType}; struct TestParameter { display: String, @@ -689,64 +683,6 @@ mod tests { trans_types: Vec<(Type, Option)>, } - struct TestProjectionParameter { - transform: Transform, - name: String, - field: NestedFieldRef, - } - - impl TestProjectionParameter { - fn new(transform: Transform, name: impl Into, field: NestedField) -> Self { - TestProjectionParameter { - transform, - name: name.into(), - field: Arc::new(field), - } - } - fn name(&self) -> String { - self.name.clone() - } - fn field(&self) -> NestedFieldRef { - self.field.clone() - } - fn project(&self, predicate: &BoundPredicate) -> Result> { - self.transform.project(self.name(), predicate) - } - fn _unary_predicate(&self, op: PredicateOperator) -> BoundPredicate { - BoundPredicate::Unary(UnaryExpression::new( - op, - BoundReference::new(self.name(), self.field()), - )) - } - fn binary_predicate(&self, op: PredicateOperator, literal: Datum) -> BoundPredicate { - BoundPredicate::Binary(BinaryExpression::new( - op, - BoundReference::new(self.name(), self.field()), - literal, - )) - } - fn set_predicate(&self, op: PredicateOperator, literals: Vec) -> BoundPredicate { - BoundPredicate::Set(SetExpression::new( - op, - BoundReference::new(self.name(), self.field()), - HashSet::from_iter(literals), - )) - } - } - - fn assert_projection( - predicate: &BoundPredicate, - fixture: &TestProjectionParameter, - expected: Option<&str>, - ) -> Result<()> { - let result = fixture.project(predicate)?; - match expected { - Some(exp) => assert_eq!(format!("{}", result.unwrap()), exp), - None => assert!(result.is_none()), - } - Ok(()) - } - fn check_transform(trans: Transform, param: TestParameter) { assert_eq!(param.display, format!("{trans}")); assert_eq!(param.json, serde_json::to_string(&trans).unwrap()); @@ -770,2937 +706,6 @@ mod tests { } } - #[test] - fn test_projection_timestamp_hour_upper_bound() -> Result<()> { - // 420034 - let value = "2017-12-01T10:59:59.999999"; - // 412007 - let another = "2016-12-31T23:59:59.999999"; - - let fixture = TestProjectionParameter::new( - Transform::Hour, - "name", - NestedField::required(1, "value", Type::Primitive(PrimitiveType::Timestamp)), - ); - - assert_projection( - &fixture.binary_predicate( - PredicateOperator::LessThan, - Datum::timestamp_from_str(value)?, - ), - &fixture, - Some("name <= 420034"), - )?; - - assert_projection( - &fixture.binary_predicate( - PredicateOperator::LessThanOrEq, - Datum::timestamp_from_str(value)?, - ), - &fixture, - Some("name <= 420034"), - )?; - - assert_projection( - &fixture.binary_predicate( - PredicateOperator::GreaterThan, - Datum::timestamp_from_str(value)?, - ), - &fixture, - Some("name >= 420035"), - )?; - - assert_projection( - &fixture.binary_predicate( - PredicateOperator::GreaterThanOrEq, - Datum::timestamp_from_str(value)?, - ), - &fixture, - Some("name >= 420034"), - )?; - - assert_projection( - &fixture.binary_predicate(PredicateOperator::Eq, Datum::timestamp_from_str(value)?), - &fixture, - Some("name = 420034"), - )?; - - assert_projection( - &fixture.binary_predicate(PredicateOperator::NotEq, Datum::timestamp_from_str(value)?), - &fixture, - None, - )?; - - assert_projection( - &fixture.set_predicate( - PredicateOperator::In, - vec![ - Datum::timestamp_from_str(value)?, - Datum::timestamp_from_str(another)?, - ], - ), - &fixture, - Some("name IN (420034, 412007)"), - )?; - - assert_projection( - &fixture.set_predicate( - PredicateOperator::NotIn, - vec![ - Datum::timestamp_from_str(value)?, - Datum::timestamp_from_str(another)?, - ], - ), - &fixture, - None, - )?; - - Ok(()) - } - - #[test] - fn test_projection_timestamp_hour_lower_bound() -> Result<()> { - // 420034 - let value = "2017-12-01T10:00:00.000000"; - // 411288 - let another = "2016-12-02T00:00:00.000000"; - - let fixture = TestProjectionParameter::new( - Transform::Hour, - "name", - NestedField::required(1, "value", Type::Primitive(PrimitiveType::Timestamp)), - ); - - assert_projection( - &fixture.binary_predicate( - PredicateOperator::LessThan, - Datum::timestamp_from_str(value)?, - ), - &fixture, - Some("name <= 420033"), - )?; - - assert_projection( - &fixture.binary_predicate( - PredicateOperator::LessThanOrEq, - Datum::timestamp_from_str(value)?, - ), - &fixture, - Some("name <= 420034"), - )?; - - assert_projection( - &fixture.binary_predicate( - PredicateOperator::GreaterThan, - Datum::timestamp_from_str(value)?, - ), - &fixture, - Some("name >= 420034"), - )?; - - assert_projection( - &fixture.binary_predicate( - PredicateOperator::GreaterThanOrEq, - Datum::timestamp_from_str(value)?, - ), - &fixture, - Some("name >= 420034"), - )?; - - assert_projection( - &fixture.binary_predicate(PredicateOperator::Eq, Datum::timestamp_from_str(value)?), - &fixture, - Some("name = 420034"), - )?; - - assert_projection( - &fixture.binary_predicate(PredicateOperator::NotEq, Datum::timestamp_from_str(value)?), - &fixture, - None, - )?; - - assert_projection( - &fixture.set_predicate( - PredicateOperator::In, - vec![ - Datum::timestamp_from_str(value)?, - Datum::timestamp_from_str(another)?, - ], - ), - &fixture, - Some("name IN (420034, 411288)"), - )?; - - assert_projection( - &fixture.set_predicate( - PredicateOperator::NotIn, - vec![ - Datum::timestamp_from_str(value)?, - Datum::timestamp_from_str(another)?, - ], - ), - &fixture, - None, - )?; - - Ok(()) - } - - #[test] - fn test_projection_timestamp_year_upper_bound() -> Result<()> { - let value = "2017-12-31T23:59:59.999999"; - let another = "2016-12-31T23:59:59.999999"; - - let fixture = TestProjectionParameter::new( - Transform::Year, - "name", - NestedField::required(1, "value", Type::Primitive(PrimitiveType::Timestamp)), - ); - - assert_projection( - &fixture.binary_predicate( - PredicateOperator::LessThan, - Datum::timestamp_from_str(value)?, - ), - &fixture, - Some("name <= 47"), - )?; - - assert_projection( - &fixture.binary_predicate( - PredicateOperator::LessThanOrEq, - Datum::timestamp_from_str(value)?, - ), - &fixture, - Some("name <= 47"), - )?; - - assert_projection( - &fixture.binary_predicate( - PredicateOperator::GreaterThan, - Datum::timestamp_from_str(value)?, - ), - &fixture, - Some("name >= 48"), - )?; - - assert_projection( - &fixture.binary_predicate( - PredicateOperator::GreaterThanOrEq, - Datum::timestamp_from_str(value)?, - ), - &fixture, - Some("name >= 47"), - )?; - - assert_projection( - &fixture.binary_predicate(PredicateOperator::Eq, Datum::timestamp_from_str(value)?), - &fixture, - Some("name = 47"), - )?; - - assert_projection( - &fixture.binary_predicate(PredicateOperator::NotEq, Datum::timestamp_from_str(value)?), - &fixture, - None, - )?; - - assert_projection( - &fixture.set_predicate( - PredicateOperator::In, - vec![ - Datum::timestamp_from_str(value)?, - Datum::timestamp_from_str(another)?, - ], - ), - &fixture, - Some("name IN (47, 46)"), - )?; - - assert_projection( - &fixture.set_predicate( - PredicateOperator::NotIn, - vec![ - Datum::timestamp_from_str(value)?, - Datum::timestamp_from_str(another)?, - ], - ), - &fixture, - None, - )?; - Ok(()) - } - - #[test] - fn test_projection_timestamp_year_lower_bound() -> Result<()> { - let value = "2017-01-01T00:00:00.000000"; - let another = "2016-12-02T00:00:00.000000"; - - let fixture = TestProjectionParameter::new( - Transform::Year, - "name", - NestedField::required(1, "value", Type::Primitive(PrimitiveType::Timestamp)), - ); - - assert_projection( - &fixture.binary_predicate( - PredicateOperator::LessThan, - Datum::timestamp_from_str(value)?, - ), - &fixture, - Some("name <= 46"), - )?; - - assert_projection( - &fixture.binary_predicate( - PredicateOperator::LessThanOrEq, - Datum::timestamp_from_str(value)?, - ), - &fixture, - Some("name <= 47"), - )?; - - assert_projection( - &fixture.binary_predicate( - PredicateOperator::GreaterThan, - Datum::timestamp_from_str(value)?, - ), - &fixture, - Some("name >= 47"), - )?; - - assert_projection( - &fixture.binary_predicate( - PredicateOperator::GreaterThanOrEq, - Datum::timestamp_from_str(value)?, - ), - &fixture, - Some("name >= 47"), - )?; - - assert_projection( - &fixture.binary_predicate(PredicateOperator::Eq, Datum::timestamp_from_str(value)?), - &fixture, - Some("name = 47"), - )?; - - assert_projection( - &fixture.binary_predicate(PredicateOperator::NotEq, Datum::timestamp_from_str(value)?), - &fixture, - None, - )?; - - assert_projection( - &fixture.set_predicate( - PredicateOperator::In, - vec![ - Datum::timestamp_from_str(value)?, - Datum::timestamp_from_str(another)?, - ], - ), - &fixture, - Some("name IN (47, 46)"), - )?; - - assert_projection( - &fixture.set_predicate( - PredicateOperator::NotIn, - vec![ - Datum::timestamp_from_str(value)?, - Datum::timestamp_from_str(another)?, - ], - ), - &fixture, - None, - )?; - - Ok(()) - } - - #[test] - fn test_projection_timestamp_month_negative_upper_bound() -> Result<()> { - let value = "1969-12-31T23:59:59.999999"; - let another = "1970-01-01T00:00:00.000000"; - - let fixture = TestProjectionParameter::new( - Transform::Month, - "name", - NestedField::required(1, "value", Type::Primitive(PrimitiveType::Timestamp)), - ); - - assert_projection( - &fixture.binary_predicate( - PredicateOperator::LessThan, - Datum::timestamp_from_str(value)?, - ), - &fixture, - Some("name <= 0"), - )?; - - assert_projection( - &fixture.binary_predicate( - PredicateOperator::LessThanOrEq, - Datum::timestamp_from_str(value)?, - ), - &fixture, - Some("name <= 0"), - )?; - - assert_projection( - &fixture.binary_predicate( - PredicateOperator::GreaterThan, - Datum::timestamp_from_str(value)?, - ), - &fixture, - Some("name >= 0"), - )?; - - assert_projection( - &fixture.binary_predicate( - PredicateOperator::GreaterThanOrEq, - Datum::timestamp_from_str(value)?, - ), - &fixture, - Some("name >= -1"), - )?; - - assert_projection( - &fixture.binary_predicate(PredicateOperator::Eq, Datum::timestamp_from_str(value)?), - &fixture, - Some("name IN (-1, 0)"), - )?; - - assert_projection( - &fixture.binary_predicate(PredicateOperator::NotEq, Datum::timestamp_from_str(value)?), - &fixture, - None, - )?; - - assert_projection( - &fixture.set_predicate( - PredicateOperator::In, - vec![ - Datum::timestamp_from_str(value)?, - Datum::timestamp_from_str(another)?, - ], - ), - &fixture, - Some("name IN (0, -1)"), - )?; - - assert_projection( - &fixture.set_predicate( - PredicateOperator::NotIn, - vec![ - Datum::timestamp_from_str(value)?, - Datum::timestamp_from_str(another)?, - ], - ), - &fixture, - None, - )?; - - Ok(()) - } - - #[test] - fn test_projection_timestamp_month_upper_bound() -> Result<()> { - let value = "2017-12-01T23:59:59.999999"; - let another = "2017-11-02T00:00:00.000000"; - - let fixture = TestProjectionParameter::new( - Transform::Month, - "name", - NestedField::required(1, "value", Type::Primitive(PrimitiveType::Timestamp)), - ); - - assert_projection( - &fixture.binary_predicate( - PredicateOperator::LessThan, - Datum::timestamp_from_str(value)?, - ), - &fixture, - Some("name <= 575"), - )?; - - assert_projection( - &fixture.binary_predicate( - PredicateOperator::LessThanOrEq, - Datum::timestamp_from_str(value)?, - ), - &fixture, - Some("name <= 575"), - )?; - - assert_projection( - &fixture.binary_predicate( - PredicateOperator::GreaterThan, - Datum::timestamp_from_str(value)?, - ), - &fixture, - Some("name >= 575"), - )?; - - assert_projection( - &fixture.binary_predicate( - PredicateOperator::GreaterThanOrEq, - Datum::timestamp_from_str(value)?, - ), - &fixture, - Some("name >= 575"), - )?; - - assert_projection( - &fixture.binary_predicate(PredicateOperator::Eq, Datum::timestamp_from_str(value)?), - &fixture, - Some("name = 575"), - )?; - - assert_projection( - &fixture.binary_predicate(PredicateOperator::NotEq, Datum::timestamp_from_str(value)?), - &fixture, - None, - )?; - - assert_projection( - &fixture.set_predicate( - PredicateOperator::In, - vec![ - Datum::timestamp_from_str(value)?, - Datum::timestamp_from_str(another)?, - ], - ), - &fixture, - Some("name IN (575, 574)"), - )?; - - assert_projection( - &fixture.set_predicate( - PredicateOperator::NotIn, - vec![ - Datum::timestamp_from_str(value)?, - Datum::timestamp_from_str(another)?, - ], - ), - &fixture, - None, - )?; - Ok(()) - } - - #[test] - fn test_projection_timestamp_month_negative_lower_bound() -> Result<()> { - let value = "1969-01-01T00:00:00.000000"; - let another = "1969-03-01T00:00:00.000000"; - - let fixture = TestProjectionParameter::new( - Transform::Month, - "name", - NestedField::required(1, "value", Type::Primitive(PrimitiveType::Timestamp)), - ); - - assert_projection( - &fixture.binary_predicate( - PredicateOperator::LessThan, - Datum::timestamp_from_str(value)?, - ), - &fixture, - Some("name <= -12"), - )?; - - assert_projection( - &fixture.binary_predicate( - PredicateOperator::LessThanOrEq, - Datum::timestamp_from_str(value)?, - ), - &fixture, - Some("name <= -11"), - )?; - - assert_projection( - &fixture.binary_predicate( - PredicateOperator::GreaterThan, - Datum::timestamp_from_str(value)?, - ), - &fixture, - Some("name >= -12"), - )?; - - assert_projection( - &fixture.binary_predicate( - PredicateOperator::GreaterThanOrEq, - Datum::timestamp_from_str(value)?, - ), - &fixture, - Some("name >= -12"), - )?; - - assert_projection( - &fixture.binary_predicate(PredicateOperator::Eq, Datum::timestamp_from_str(value)?), - &fixture, - Some("name IN (-12, -11)"), - )?; - - assert_projection( - &fixture.binary_predicate(PredicateOperator::NotEq, Datum::timestamp_from_str(value)?), - &fixture, - None, - )?; - - assert_projection( - &fixture.set_predicate( - PredicateOperator::In, - vec![ - Datum::timestamp_from_str(value)?, - Datum::timestamp_from_str(another)?, - ], - ), - &fixture, - Some("name IN (-10, -9, -12, -11)"), - )?; - - assert_projection( - &fixture.set_predicate( - PredicateOperator::NotIn, - vec![ - Datum::timestamp_from_str(value)?, - Datum::timestamp_from_str(another)?, - ], - ), - &fixture, - None, - )?; - - Ok(()) - } - - #[test] - fn test_projection_timestamp_month_lower_bound() -> Result<()> { - let value = "2017-12-01T00:00:00.000000"; - let another = "2017-12-02T00:00:00.000000"; - - let fixture = TestProjectionParameter::new( - Transform::Month, - "name", - NestedField::required(1, "value", Type::Primitive(PrimitiveType::Timestamp)), - ); - - assert_projection( - &fixture.binary_predicate( - PredicateOperator::LessThan, - Datum::timestamp_from_str(value)?, - ), - &fixture, - Some("name <= 574"), - )?; - - assert_projection( - &fixture.binary_predicate( - PredicateOperator::LessThanOrEq, - Datum::timestamp_from_str(value)?, - ), - &fixture, - Some("name <= 575"), - )?; - - assert_projection( - &fixture.binary_predicate( - PredicateOperator::GreaterThan, - Datum::timestamp_from_str(value)?, - ), - &fixture, - Some("name >= 575"), - )?; - - assert_projection( - &fixture.binary_predicate( - PredicateOperator::GreaterThanOrEq, - Datum::timestamp_from_str(value)?, - ), - &fixture, - Some("name >= 575"), - )?; - - assert_projection( - &fixture.binary_predicate(PredicateOperator::Eq, Datum::timestamp_from_str(value)?), - &fixture, - Some("name = 575"), - )?; - - assert_projection( - &fixture.binary_predicate(PredicateOperator::NotEq, Datum::timestamp_from_str(value)?), - &fixture, - None, - )?; - - assert_projection( - &fixture.set_predicate( - PredicateOperator::In, - vec![ - Datum::timestamp_from_str(value)?, - Datum::timestamp_from_str(another)?, - ], - ), - &fixture, - Some("name IN (575)"), - )?; - - assert_projection( - &fixture.set_predicate( - PredicateOperator::NotIn, - vec![ - Datum::timestamp_from_str(value)?, - Datum::timestamp_from_str(another)?, - ], - ), - &fixture, - None, - )?; - - Ok(()) - } - - #[test] - fn test_projection_timestamp_day_negative_upper_bound() -> Result<()> { - // -1 - let value = "1969-12-31T23:59:59.999999"; - // 0 - let another = "1970-01-01T00:00:00.000000"; - - let fixture = TestProjectionParameter::new( - Transform::Day, - "name", - NestedField::required(1, "value", Type::Primitive(PrimitiveType::Timestamp)), - ); - - assert_projection( - &fixture.binary_predicate( - PredicateOperator::LessThan, - Datum::timestamp_from_str(value)?, - ), - &fixture, - Some("name <= 0"), - )?; - - assert_projection( - &fixture.binary_predicate( - PredicateOperator::LessThanOrEq, - Datum::timestamp_from_str(value)?, - ), - &fixture, - Some("name <= 0"), - )?; - - assert_projection( - &fixture.binary_predicate( - PredicateOperator::GreaterThan, - Datum::timestamp_from_str(value)?, - ), - &fixture, - Some("name >= 0"), - )?; - - assert_projection( - &fixture.binary_predicate( - PredicateOperator::GreaterThanOrEq, - Datum::timestamp_from_str(value)?, - ), - &fixture, - Some("name >= -1"), - )?; - - assert_projection( - &fixture.binary_predicate(PredicateOperator::Eq, Datum::timestamp_from_str(value)?), - &fixture, - Some("name IN (-1, 0)"), - )?; - - assert_projection( - &fixture.binary_predicate(PredicateOperator::NotEq, Datum::timestamp_from_str(value)?), - &fixture, - None, - )?; - - assert_projection( - &fixture.set_predicate( - PredicateOperator::In, - vec![ - Datum::timestamp_from_str(value)?, - Datum::timestamp_from_str(another)?, - ], - ), - &fixture, - Some("name IN (0, -1)"), - )?; - - assert_projection( - &fixture.set_predicate( - PredicateOperator::NotIn, - vec![ - Datum::timestamp_from_str(value)?, - Datum::timestamp_from_str(another)?, - ], - ), - &fixture, - None, - )?; - - Ok(()) - } - - #[test] - fn test_projection_timestamp_day_upper_bound() -> Result<()> { - // 17501 - let value = "2017-12-01T23:59:59.999999"; - // 17502 - let another = "2017-12-02T00:00:00.000000"; - - let fixture = TestProjectionParameter::new( - Transform::Day, - "name", - NestedField::required(1, "value", Type::Primitive(PrimitiveType::Timestamp)), - ); - - assert_projection( - &fixture.binary_predicate( - PredicateOperator::LessThan, - Datum::timestamp_from_str(value)?, - ), - &fixture, - Some("name <= 17501"), - )?; - - assert_projection( - &fixture.binary_predicate( - PredicateOperator::LessThanOrEq, - Datum::timestamp_from_str(value)?, - ), - &fixture, - Some("name <= 17501"), - )?; - - assert_projection( - &fixture.binary_predicate( - PredicateOperator::GreaterThan, - Datum::timestamp_from_str(value)?, - ), - &fixture, - Some("name >= 17502"), - )?; - - assert_projection( - &fixture.binary_predicate( - PredicateOperator::GreaterThanOrEq, - Datum::timestamp_from_str(value)?, - ), - &fixture, - Some("name >= 17501"), - )?; - - assert_projection( - &fixture.binary_predicate(PredicateOperator::Eq, Datum::timestamp_from_str(value)?), - &fixture, - Some("name = 17501"), - )?; - - assert_projection( - &fixture.binary_predicate(PredicateOperator::NotEq, Datum::timestamp_from_str(value)?), - &fixture, - None, - )?; - - assert_projection( - &fixture.set_predicate( - PredicateOperator::In, - vec![ - Datum::timestamp_from_str(value)?, - Datum::timestamp_from_str(another)?, - ], - ), - &fixture, - Some("name IN (17501, 17502)"), - )?; - - assert_projection( - &fixture.set_predicate( - PredicateOperator::NotIn, - vec![ - Datum::timestamp_from_str(value)?, - Datum::timestamp_from_str(another)?, - ], - ), - &fixture, - None, - )?; - - Ok(()) - } - - #[test] - fn test_projection_timestamp_day_negative_lower_bound() -> Result<()> { - // -365 - let value = "1969-01-01T00:00:00.000000"; - // -364 - let another = "1969-01-02T00:00:00.000000"; - - let fixture = TestProjectionParameter::new( - Transform::Day, - "name", - NestedField::required(1, "value", Type::Primitive(PrimitiveType::Timestamp)), - ); - - assert_projection( - &fixture.binary_predicate( - PredicateOperator::LessThan, - Datum::timestamp_from_str(value)?, - ), - &fixture, - Some("name <= -365"), - )?; - - assert_projection( - &fixture.binary_predicate( - PredicateOperator::LessThanOrEq, - Datum::timestamp_from_str(value)?, - ), - &fixture, - Some("name <= -364"), - )?; - - assert_projection( - &fixture.binary_predicate( - PredicateOperator::GreaterThan, - Datum::timestamp_from_str(value)?, - ), - &fixture, - Some("name >= -365"), - )?; - - assert_projection( - &fixture.binary_predicate( - PredicateOperator::GreaterThanOrEq, - Datum::timestamp_from_str(value)?, - ), - &fixture, - Some("name >= -365"), - )?; - - assert_projection( - &fixture.binary_predicate(PredicateOperator::Eq, Datum::timestamp_from_str(value)?), - &fixture, - Some("name IN (-364, -365)"), - )?; - - assert_projection( - &fixture.binary_predicate(PredicateOperator::NotEq, Datum::timestamp_from_str(value)?), - &fixture, - None, - )?; - - assert_projection( - &fixture.set_predicate( - PredicateOperator::In, - vec![ - Datum::timestamp_from_str(value)?, - Datum::timestamp_from_str(another)?, - ], - ), - &fixture, - Some("name IN (-363, -364, -365)"), - )?; - - assert_projection( - &fixture.set_predicate( - PredicateOperator::NotIn, - vec![ - Datum::timestamp_from_str(value)?, - Datum::timestamp_from_str(another)?, - ], - ), - &fixture, - None, - )?; - - Ok(()) - } - - #[test] - fn test_projection_timestamp_day_lower_bound() -> Result<()> { - // 17501 - let value = "2017-12-01T00:00:00.000000"; - // 17502 - let another = "2017-12-02T00:00:00.000000"; - - let fixture = TestProjectionParameter::new( - Transform::Day, - "name", - NestedField::required(1, "value", Type::Primitive(PrimitiveType::Timestamp)), - ); - - assert_projection( - &fixture.binary_predicate( - PredicateOperator::LessThan, - Datum::timestamp_from_str(value)?, - ), - &fixture, - Some("name <= 17500"), - )?; - - assert_projection( - &fixture.binary_predicate( - PredicateOperator::LessThanOrEq, - Datum::timestamp_from_str(value)?, - ), - &fixture, - Some("name <= 17501"), - )?; - - assert_projection( - &fixture.binary_predicate( - PredicateOperator::GreaterThan, - Datum::timestamp_from_str(value)?, - ), - &fixture, - Some("name >= 17501"), - )?; - - assert_projection( - &fixture.binary_predicate( - PredicateOperator::GreaterThanOrEq, - Datum::timestamp_from_str(value)?, - ), - &fixture, - Some("name >= 17501"), - )?; - - assert_projection( - &fixture.binary_predicate(PredicateOperator::Eq, Datum::timestamp_from_str(value)?), - &fixture, - Some("name = 17501"), - )?; - - assert_projection( - &fixture.binary_predicate(PredicateOperator::NotEq, Datum::timestamp_from_str(value)?), - &fixture, - None, - )?; - - assert_projection( - &fixture.set_predicate( - PredicateOperator::In, - vec![ - Datum::timestamp_from_str(value)?, - Datum::timestamp_from_str(another)?, - ], - ), - &fixture, - Some("name IN (17501, 17502)"), - )?; - - assert_projection( - &fixture.set_predicate( - PredicateOperator::NotIn, - vec![ - Datum::timestamp_from_str(value)?, - Datum::timestamp_from_str(another)?, - ], - ), - &fixture, - None, - )?; - - Ok(()) - } - - #[test] - fn test_projection_timestamp_day_epoch() -> Result<()> { - // 0 - let value = "1970-01-01T00:00:00.00000"; - // 1 - let another = "1970-01-02T00:00:00.00000"; - - let fixture = TestProjectionParameter::new( - Transform::Day, - "name", - NestedField::required(1, "value", Type::Primitive(PrimitiveType::Timestamp)), - ); - - assert_projection( - &fixture.binary_predicate( - PredicateOperator::LessThan, - Datum::timestamp_from_str(value)?, - ), - &fixture, - Some("name <= 0"), - )?; - - assert_projection( - &fixture.binary_predicate( - PredicateOperator::LessThanOrEq, - Datum::timestamp_from_str(value)?, - ), - &fixture, - Some("name <= 0"), - )?; - - assert_projection( - &fixture.binary_predicate( - PredicateOperator::GreaterThan, - Datum::timestamp_from_str(value)?, - ), - &fixture, - Some("name >= 0"), - )?; - - assert_projection( - &fixture.binary_predicate( - PredicateOperator::GreaterThanOrEq, - Datum::timestamp_from_str(value)?, - ), - &fixture, - Some("name >= 0"), - )?; - - assert_projection( - &fixture.binary_predicate(PredicateOperator::Eq, Datum::timestamp_from_str(value)?), - &fixture, - Some("name = 0"), - )?; - - assert_projection( - &fixture.binary_predicate(PredicateOperator::NotEq, Datum::timestamp_from_str(value)?), - &fixture, - None, - )?; - - assert_projection( - &fixture.set_predicate( - PredicateOperator::In, - vec![ - Datum::timestamp_from_str(value)?, - Datum::timestamp_from_str(another)?, - ], - ), - &fixture, - Some("name IN (1, 0)"), - )?; - - assert_projection( - &fixture.set_predicate( - PredicateOperator::NotIn, - vec![ - Datum::timestamp_from_str(value)?, - Datum::timestamp_from_str(another)?, - ], - ), - &fixture, - None, - )?; - - Ok(()) - } - - #[test] - fn test_projection_date_day_negative() -> Result<()> { - // -2 - let value = "1969-12-30"; - // -4 - let another = "1969-12-28"; - - let fixture = TestProjectionParameter::new( - Transform::Day, - "name", - NestedField::required(1, "value", Type::Primitive(PrimitiveType::Date)), - ); - - assert_projection( - &fixture.binary_predicate(PredicateOperator::LessThan, Datum::date_from_str(value)?), - &fixture, - Some("name <= -3"), - )?; - - assert_projection( - &fixture.binary_predicate( - PredicateOperator::LessThanOrEq, - Datum::date_from_str(value)?, - ), - &fixture, - Some("name <= -2"), - )?; - - assert_projection( - &fixture.binary_predicate(PredicateOperator::GreaterThan, Datum::date_from_str(value)?), - &fixture, - Some("name >= -1"), - )?; - - assert_projection( - &fixture.binary_predicate( - PredicateOperator::GreaterThanOrEq, - Datum::date_from_str(value)?, - ), - &fixture, - Some("name >= -2"), - )?; - - assert_projection( - &fixture.binary_predicate(PredicateOperator::Eq, Datum::date_from_str(value)?), - &fixture, - Some("name = -2"), - )?; - - assert_projection( - &fixture.binary_predicate(PredicateOperator::NotEq, Datum::date_from_str(value)?), - &fixture, - None, - )?; - - assert_projection( - &fixture.set_predicate( - PredicateOperator::In, - vec![Datum::date_from_str(value)?, Datum::date_from_str(another)?], - ), - &fixture, - Some("name IN (-2, -4)"), - )?; - - assert_projection( - &fixture.set_predicate( - PredicateOperator::NotIn, - vec![Datum::date_from_str(value)?, Datum::date_from_str(another)?], - ), - &fixture, - None, - )?; - - Ok(()) - } - - #[test] - fn test_projection_date_day() -> Result<()> { - // 17167 - let value = "2017-01-01"; - // 17531 - let another = "2017-12-31"; - - let fixture = TestProjectionParameter::new( - Transform::Day, - "name", - NestedField::required(1, "value", Type::Primitive(PrimitiveType::Date)), - ); - - assert_projection( - &fixture.binary_predicate(PredicateOperator::LessThan, Datum::date_from_str(value)?), - &fixture, - Some("name <= 17166"), - )?; - - assert_projection( - &fixture.binary_predicate( - PredicateOperator::LessThanOrEq, - Datum::date_from_str(value)?, - ), - &fixture, - Some("name <= 17167"), - )?; - - assert_projection( - &fixture.binary_predicate(PredicateOperator::GreaterThan, Datum::date_from_str(value)?), - &fixture, - Some("name >= 17168"), - )?; - - assert_projection( - &fixture.binary_predicate( - PredicateOperator::GreaterThanOrEq, - Datum::date_from_str(value)?, - ), - &fixture, - Some("name >= 17167"), - )?; - - assert_projection( - &fixture.binary_predicate(PredicateOperator::Eq, Datum::date_from_str(value)?), - &fixture, - Some("name = 17167"), - )?; - - assert_projection( - &fixture.binary_predicate(PredicateOperator::NotEq, Datum::date_from_str(value)?), - &fixture, - None, - )?; - - assert_projection( - &fixture.set_predicate( - PredicateOperator::In, - vec![Datum::date_from_str(value)?, Datum::date_from_str(another)?], - ), - &fixture, - Some("name IN (17531, 17167)"), - )?; - - assert_projection( - &fixture.set_predicate( - PredicateOperator::NotIn, - vec![Datum::date_from_str(value)?, Datum::date_from_str(another)?], - ), - &fixture, - None, - )?; - - Ok(()) - } - - #[test] - fn test_projection_date_month_negative_upper_bound() -> Result<()> { - // -1 => 1969-12 - let value = "1969-12-31"; - // -12 => 1969-01 - let another = "1969-01-01"; - - let fixture = TestProjectionParameter::new( - Transform::Month, - "name", - NestedField::required(1, "value", Type::Primitive(PrimitiveType::Date)), - ); - - assert_projection( - &fixture.binary_predicate(PredicateOperator::LessThan, Datum::date_from_str(value)?), - &fixture, - Some("name <= 0"), - )?; - - assert_projection( - &fixture.binary_predicate( - PredicateOperator::LessThanOrEq, - Datum::date_from_str(value)?, - ), - &fixture, - Some("name <= 0"), - )?; - - assert_projection( - &fixture.binary_predicate(PredicateOperator::GreaterThan, Datum::date_from_str(value)?), - &fixture, - Some("name >= 0"), - )?; - - assert_projection( - &fixture.binary_predicate( - PredicateOperator::GreaterThanOrEq, - Datum::date_from_str(value)?, - ), - &fixture, - Some("name >= -1"), - )?; - - assert_projection( - &fixture.binary_predicate(PredicateOperator::Eq, Datum::date_from_str(value)?), - &fixture, - Some("name IN (-1, 0)"), - )?; - - assert_projection( - &fixture.binary_predicate(PredicateOperator::NotEq, Datum::date_from_str(value)?), - &fixture, - None, - )?; - - assert_projection( - &fixture.set_predicate( - PredicateOperator::In, - vec![Datum::date_from_str(value)?, Datum::date_from_str(another)?], - ), - &fixture, - Some("name IN (-1, -12, -11, 0)"), - )?; - - assert_projection( - &fixture.set_predicate( - PredicateOperator::NotIn, - vec![Datum::date_from_str(value)?, Datum::date_from_str(another)?], - ), - &fixture, - None, - )?; - - Ok(()) - } - - #[test] - fn test_projection_date_month_upper_bound() -> Result<()> { - // 575 => 2017-12 - let value = "2017-12-31"; - // 564 => 2017-01 - let another = "2017-01-01"; - - let fixture = TestProjectionParameter::new( - Transform::Month, - "name", - NestedField::required(1, "value", Type::Primitive(PrimitiveType::Date)), - ); - - assert_projection( - &fixture.binary_predicate(PredicateOperator::LessThan, Datum::date_from_str(value)?), - &fixture, - Some("name <= 575"), - )?; - - assert_projection( - &fixture.binary_predicate( - PredicateOperator::LessThanOrEq, - Datum::date_from_str(value)?, - ), - &fixture, - Some("name <= 575"), - )?; - - assert_projection( - &fixture.binary_predicate(PredicateOperator::GreaterThan, Datum::date_from_str(value)?), - &fixture, - Some("name >= 576"), - )?; - - assert_projection( - &fixture.binary_predicate( - PredicateOperator::GreaterThanOrEq, - Datum::date_from_str(value)?, - ), - &fixture, - Some("name >= 575"), - )?; - - assert_projection( - &fixture.binary_predicate(PredicateOperator::Eq, Datum::date_from_str(value)?), - &fixture, - Some("name = 575"), - )?; - - assert_projection( - &fixture.binary_predicate(PredicateOperator::NotEq, Datum::date_from_str(value)?), - &fixture, - None, - )?; - - assert_projection( - &fixture.set_predicate( - PredicateOperator::In, - vec![Datum::date_from_str(value)?, Datum::date_from_str(another)?], - ), - &fixture, - Some("name IN (575, 564)"), - )?; - - assert_projection( - &fixture.set_predicate( - PredicateOperator::NotIn, - vec![Datum::date_from_str(value)?, Datum::date_from_str(another)?], - ), - &fixture, - None, - )?; - - Ok(()) - } - - #[test] - fn test_projection_date_month_negative_lower_bound() -> Result<()> { - // -12 => 1969-01 - let value = "1969-01-01"; - // -1 => 1969-12 - let another = "1969-12-31"; - - let fixture = TestProjectionParameter::new( - Transform::Month, - "name", - NestedField::required(1, "value", Type::Primitive(PrimitiveType::Date)), - ); - - assert_projection( - &fixture.binary_predicate(PredicateOperator::LessThan, Datum::date_from_str(value)?), - &fixture, - Some("name <= -12"), - )?; - - assert_projection( - &fixture.binary_predicate( - PredicateOperator::LessThanOrEq, - Datum::date_from_str(value)?, - ), - &fixture, - Some("name <= -11"), - )?; - - assert_projection( - &fixture.binary_predicate(PredicateOperator::GreaterThan, Datum::date_from_str(value)?), - &fixture, - Some("name >= -12"), - )?; - - assert_projection( - &fixture.binary_predicate( - PredicateOperator::GreaterThanOrEq, - Datum::date_from_str(value)?, - ), - &fixture, - Some("name >= -12"), - )?; - - assert_projection( - &fixture.binary_predicate(PredicateOperator::Eq, Datum::date_from_str(value)?), - &fixture, - Some("name IN (-12, -11)"), - )?; - - assert_projection( - &fixture.binary_predicate(PredicateOperator::NotEq, Datum::date_from_str(value)?), - &fixture, - None, - )?; - - assert_projection( - &fixture.set_predicate( - PredicateOperator::In, - vec![Datum::date_from_str(value)?, Datum::date_from_str(another)?], - ), - &fixture, - Some("name IN (-1, -12, -11, 0)"), - )?; - - assert_projection( - &fixture.set_predicate( - PredicateOperator::NotIn, - vec![Datum::date_from_str(value)?, Datum::date_from_str(another)?], - ), - &fixture, - None, - )?; - - Ok(()) - } - - #[test] - fn test_projection_date_month_lower_bound() -> Result<()> { - // 575 => 2017-12 - let value = "2017-12-01"; - // 564 => 2017-01 - let another = "2017-01-01"; - - let fixture = TestProjectionParameter::new( - Transform::Month, - "name", - NestedField::required(1, "value", Type::Primitive(PrimitiveType::Date)), - ); - - assert_projection( - &fixture.binary_predicate(PredicateOperator::LessThan, Datum::date_from_str(value)?), - &fixture, - Some("name <= 574"), - )?; - - assert_projection( - &fixture.binary_predicate( - PredicateOperator::LessThanOrEq, - Datum::date_from_str(value)?, - ), - &fixture, - Some("name <= 575"), - )?; - - assert_projection( - &fixture.binary_predicate(PredicateOperator::GreaterThan, Datum::date_from_str(value)?), - &fixture, - Some("name >= 575"), - )?; - - assert_projection( - &fixture.binary_predicate( - PredicateOperator::GreaterThanOrEq, - Datum::date_from_str(value)?, - ), - &fixture, - Some("name >= 575"), - )?; - - assert_projection( - &fixture.binary_predicate(PredicateOperator::Eq, Datum::date_from_str(value)?), - &fixture, - Some("name = 575"), - )?; - - assert_projection( - &fixture.binary_predicate(PredicateOperator::NotEq, Datum::date_from_str(value)?), - &fixture, - None, - )?; - - assert_projection( - &fixture.set_predicate( - PredicateOperator::In, - vec![Datum::date_from_str(value)?, Datum::date_from_str(another)?], - ), - &fixture, - Some("name IN (575, 564)"), - )?; - - assert_projection( - &fixture.set_predicate( - PredicateOperator::NotIn, - vec![Datum::date_from_str(value)?, Datum::date_from_str(another)?], - ), - &fixture, - None, - )?; - - Ok(()) - } - - #[test] - fn test_projection_date_month_epoch() -> Result<()> { - // 0 => 1970-01 - let value = "1970-01-01"; - // -1 => 1969-12 - let another = "1969-12-31"; - - let fixture = TestProjectionParameter::new( - Transform::Month, - "name", - NestedField::required(1, "value", Type::Primitive(PrimitiveType::Date)), - ); - - assert_projection( - &fixture.binary_predicate(PredicateOperator::LessThan, Datum::date_from_str(value)?), - &fixture, - Some("name <= 0"), - )?; - - assert_projection( - &fixture.binary_predicate( - PredicateOperator::LessThanOrEq, - Datum::date_from_str(value)?, - ), - &fixture, - Some("name <= 0"), - )?; - - assert_projection( - &fixture.binary_predicate(PredicateOperator::GreaterThan, Datum::date_from_str(value)?), - &fixture, - Some("name >= 0"), - )?; - - assert_projection( - &fixture.binary_predicate( - PredicateOperator::GreaterThanOrEq, - Datum::date_from_str(value)?, - ), - &fixture, - Some("name >= 0"), - )?; - - assert_projection( - &fixture.binary_predicate(PredicateOperator::Eq, Datum::date_from_str(value)?), - &fixture, - Some("name = 0"), - )?; - - assert_projection( - &fixture.binary_predicate(PredicateOperator::NotEq, Datum::date_from_str(value)?), - &fixture, - None, - )?; - - assert_projection( - &fixture.set_predicate( - PredicateOperator::In, - vec![Datum::date_from_str(value)?, Datum::date_from_str(another)?], - ), - &fixture, - Some("name IN (0, -1)"), - )?; - - assert_projection( - &fixture.set_predicate( - PredicateOperator::NotIn, - vec![Datum::date_from_str(value)?, Datum::date_from_str(another)?], - ), - &fixture, - None, - )?; - - Ok(()) - } - - #[test] - fn test_projection_date_year_negative_upper_bound() -> Result<()> { - // -1 => 1969 - let value = "1969-12-31"; - let another = "1969-01-01"; - - let fixture = TestProjectionParameter::new( - Transform::Year, - "name", - NestedField::required(1, "value", Type::Primitive(PrimitiveType::Date)), - ); - - assert_projection( - &fixture.binary_predicate(PredicateOperator::LessThan, Datum::date_from_str(value)?), - &fixture, - Some("name <= 0"), - )?; - - assert_projection( - &fixture.binary_predicate( - PredicateOperator::LessThanOrEq, - Datum::date_from_str(value)?, - ), - &fixture, - Some("name <= 0"), - )?; - - assert_projection( - &fixture.binary_predicate(PredicateOperator::GreaterThan, Datum::date_from_str(value)?), - &fixture, - Some("name >= 0"), - )?; - - assert_projection( - &fixture.binary_predicate( - PredicateOperator::GreaterThanOrEq, - Datum::date_from_str(value)?, - ), - &fixture, - Some("name >= -1"), - )?; - - assert_projection( - &fixture.binary_predicate(PredicateOperator::Eq, Datum::date_from_str(value)?), - &fixture, - Some("name IN (-1, 0)"), - )?; - - assert_projection( - &fixture.binary_predicate(PredicateOperator::NotEq, Datum::date_from_str(value)?), - &fixture, - None, - )?; - - assert_projection( - &fixture.set_predicate( - PredicateOperator::In, - vec![Datum::date_from_str(value)?, Datum::date_from_str(another)?], - ), - &fixture, - Some("name IN (0, -1)"), - )?; - - assert_projection( - &fixture.set_predicate( - PredicateOperator::NotIn, - vec![Datum::date_from_str(value)?, Datum::date_from_str(another)?], - ), - &fixture, - None, - )?; - - Ok(()) - } - - #[test] - fn test_projection_date_year_upper_bound() -> Result<()> { - // 47 => 2017 - let value = "2017-12-31"; - // 46 => 2016 - let another = "2016-01-01"; - - let fixture = TestProjectionParameter::new( - Transform::Year, - "name", - NestedField::required(1, "value", Type::Primitive(PrimitiveType::Date)), - ); - - assert_projection( - &fixture.binary_predicate(PredicateOperator::LessThan, Datum::date_from_str(value)?), - &fixture, - Some("name <= 47"), - )?; - - assert_projection( - &fixture.binary_predicate( - PredicateOperator::LessThanOrEq, - Datum::date_from_str(value)?, - ), - &fixture, - Some("name <= 47"), - )?; - - assert_projection( - &fixture.binary_predicate(PredicateOperator::GreaterThan, Datum::date_from_str(value)?), - &fixture, - Some("name >= 48"), - )?; - - assert_projection( - &fixture.binary_predicate( - PredicateOperator::GreaterThanOrEq, - Datum::date_from_str(value)?, - ), - &fixture, - Some("name >= 47"), - )?; - - assert_projection( - &fixture.binary_predicate(PredicateOperator::Eq, Datum::date_from_str(value)?), - &fixture, - Some("name = 47"), - )?; - - assert_projection( - &fixture.binary_predicate(PredicateOperator::NotEq, Datum::date_from_str(value)?), - &fixture, - None, - )?; - - assert_projection( - &fixture.set_predicate( - PredicateOperator::In, - vec![Datum::date_from_str(value)?, Datum::date_from_str(another)?], - ), - &fixture, - Some("name IN (47, 46)"), - )?; - - assert_projection( - &fixture.set_predicate( - PredicateOperator::NotIn, - vec![Datum::date_from_str(value)?, Datum::date_from_str(another)?], - ), - &fixture, - None, - )?; - - Ok(()) - } - - #[test] - fn test_projection_date_year_negative_lower_bound() -> Result<()> { - // 0 => 1970 - let value = "1970-01-01"; - // -1 => 1969 - let another = "1969-12-31"; - - let fixture = TestProjectionParameter::new( - Transform::Year, - "name", - NestedField::required(1, "value", Type::Primitive(PrimitiveType::Date)), - ); - - assert_projection( - &fixture.binary_predicate(PredicateOperator::LessThan, Datum::date_from_str(value)?), - &fixture, - Some("name <= 0"), - )?; - - assert_projection( - &fixture.binary_predicate( - PredicateOperator::LessThanOrEq, - Datum::date_from_str(value)?, - ), - &fixture, - Some("name <= 0"), - )?; - - assert_projection( - &fixture.binary_predicate(PredicateOperator::GreaterThan, Datum::date_from_str(value)?), - &fixture, - Some("name >= 0"), - )?; - - assert_projection( - &fixture.binary_predicate( - PredicateOperator::GreaterThanOrEq, - Datum::date_from_str(value)?, - ), - &fixture, - Some("name >= 0"), - )?; - - assert_projection( - &fixture.binary_predicate(PredicateOperator::Eq, Datum::date_from_str(value)?), - &fixture, - Some("name = 0"), - )?; - - assert_projection( - &fixture.binary_predicate(PredicateOperator::NotEq, Datum::date_from_str(value)?), - &fixture, - None, - )?; - - assert_projection( - &fixture.set_predicate( - PredicateOperator::In, - vec![Datum::date_from_str(value)?, Datum::date_from_str(another)?], - ), - &fixture, - Some("name IN (0, -1)"), - )?; - - assert_projection( - &fixture.set_predicate( - PredicateOperator::NotIn, - vec![Datum::date_from_str(value)?, Datum::date_from_str(another)?], - ), - &fixture, - None, - )?; - - Ok(()) - } - - #[test] - fn test_projection_date_year_lower_bound() -> Result<()> { - // 47 => 2017 - let value = "2017-01-01"; - // 46 => 2016 - let another = "2016-12-31"; - - let fixture = TestProjectionParameter::new( - Transform::Year, - "name", - NestedField::required(1, "value", Type::Primitive(PrimitiveType::Date)), - ); - - assert_projection( - &fixture.binary_predicate(PredicateOperator::LessThan, Datum::date_from_str(value)?), - &fixture, - Some("name <= 46"), - )?; - - assert_projection( - &fixture.binary_predicate( - PredicateOperator::LessThanOrEq, - Datum::date_from_str(value)?, - ), - &fixture, - Some("name <= 47"), - )?; - - assert_projection( - &fixture.binary_predicate(PredicateOperator::GreaterThan, Datum::date_from_str(value)?), - &fixture, - Some("name >= 47"), - )?; - - assert_projection( - &fixture.binary_predicate( - PredicateOperator::GreaterThanOrEq, - Datum::date_from_str(value)?, - ), - &fixture, - Some("name >= 47"), - )?; - - assert_projection( - &fixture.binary_predicate(PredicateOperator::Eq, Datum::date_from_str(value)?), - &fixture, - Some("name = 47"), - )?; - - assert_projection( - &fixture.binary_predicate(PredicateOperator::NotEq, Datum::date_from_str(value)?), - &fixture, - None, - )?; - - assert_projection( - &fixture.set_predicate( - PredicateOperator::In, - vec![Datum::date_from_str(value)?, Datum::date_from_str(another)?], - ), - &fixture, - Some("name IN (47, 46)"), - )?; - - assert_projection( - &fixture.set_predicate( - PredicateOperator::NotIn, - vec![Datum::date_from_str(value)?, Datum::date_from_str(another)?], - ), - &fixture, - None, - )?; - - Ok(()) - } - - #[test] - fn test_projection_truncate_string_rewrite_op() -> Result<()> { - let fixture = TestProjectionParameter::new( - Transform::Truncate(5), - "name", - NestedField::required(1, "value", Type::Primitive(PrimitiveType::String)), - ); - - let value = "abcde"; - assert_projection( - &fixture.binary_predicate(PredicateOperator::StartsWith, Datum::string(value)), - &fixture, - Some(r#"name = "abcde""#), - )?; - - assert_projection( - &fixture.binary_predicate(PredicateOperator::NotStartsWith, Datum::string(value)), - &fixture, - Some(r#"name != "abcde""#), - )?; - - let value = "abcdefg"; - assert_projection( - &fixture.binary_predicate(PredicateOperator::StartsWith, Datum::string(value)), - &fixture, - Some(r#"name STARTS WITH "abcde""#), - )?; - - assert_projection( - &fixture.binary_predicate(PredicateOperator::NotStartsWith, Datum::string(value)), - &fixture, - None, - )?; - - Ok(()) - } - - #[test] - fn test_projection_truncate_string() -> Result<()> { - let value = "abcdefg"; - let fixture = TestProjectionParameter::new( - Transform::Truncate(5), - "name", - NestedField::required(1, "value", Type::Primitive(PrimitiveType::String)), - ); - - assert_projection( - &fixture.binary_predicate(PredicateOperator::LessThan, Datum::string(value)), - &fixture, - Some(r#"name <= "abcde""#), - )?; - - assert_projection( - &fixture.binary_predicate(PredicateOperator::LessThanOrEq, Datum::string(value)), - &fixture, - Some(r#"name <= "abcde""#), - )?; - - assert_projection( - &fixture.binary_predicate(PredicateOperator::GreaterThan, Datum::string(value)), - &fixture, - Some(r#"name >= "abcde""#), - )?; - - assert_projection( - &fixture.binary_predicate(PredicateOperator::GreaterThanOrEq, Datum::string(value)), - &fixture, - Some(r#"name >= "abcde""#), - )?; - - assert_projection( - &fixture.binary_predicate(PredicateOperator::Eq, Datum::string(value)), - &fixture, - Some(r#"name = "abcde""#), - )?; - - assert_projection( - &fixture.set_predicate( - PredicateOperator::In, - vec![Datum::string(value), Datum::string(format!("{}abc", value))], - ), - &fixture, - Some(r#"name IN ("abcde")"#), - )?; - - assert_projection( - &fixture.set_predicate( - PredicateOperator::NotIn, - vec![Datum::string(value), Datum::string(format!("{}abc", value))], - ), - &fixture, - None, - )?; - - Ok(()) - } - - #[test] - fn test_projection_truncate_upper_bound_decimal() -> Result<()> { - let prev = "98.99"; - let curr = "99.99"; - let next = "100.99"; - - let fixture = TestProjectionParameter::new( - Transform::Truncate(10), - "name", - NestedField::required( - 1, - "value", - Type::Primitive(PrimitiveType::Decimal { - precision: 9, - scale: 2, - }), - ), - ); - - assert_projection( - &fixture.binary_predicate(PredicateOperator::LessThan, Datum::decimal_from_str(curr)?), - &fixture, - Some("name <= 9990"), - )?; - - assert_projection( - &fixture.binary_predicate( - PredicateOperator::LessThanOrEq, - Datum::decimal_from_str(curr)?, - ), - &fixture, - Some("name <= 9990"), - )?; - - assert_projection( - &fixture.binary_predicate( - PredicateOperator::GreaterThanOrEq, - Datum::decimal_from_str(curr)?, - ), - &fixture, - Some("name >= 9990"), - )?; - - assert_projection( - &fixture.binary_predicate(PredicateOperator::Eq, Datum::decimal_from_str(curr)?), - &fixture, - Some("name = 9990"), - )?; - - assert_projection( - &fixture.binary_predicate(PredicateOperator::NotEq, Datum::decimal_from_str(curr)?), - &fixture, - None, - )?; - - assert_projection( - &fixture.set_predicate( - PredicateOperator::In, - vec![ - Datum::decimal_from_str(prev)?, - Datum::decimal_from_str(curr)?, - Datum::decimal_from_str(next)?, - ], - ), - &fixture, - Some("name IN (10090, 9990, 9890)"), - )?; - - assert_projection( - &fixture.set_predicate( - PredicateOperator::NotIn, - vec![ - Datum::decimal_from_str(curr)?, - Datum::decimal_from_str(next)?, - ], - ), - &fixture, - None, - )?; - - Ok(()) - } - - #[test] - fn test_projection_truncate_lower_bound_decimal() -> Result<()> { - let prev = "99.00"; - let curr = "100.00"; - let next = "101.00"; - - let fixture = TestProjectionParameter::new( - Transform::Truncate(10), - "name", - NestedField::required( - 1, - "value", - Type::Primitive(PrimitiveType::Decimal { - precision: 9, - scale: 2, - }), - ), - ); - - assert_projection( - &fixture.binary_predicate(PredicateOperator::LessThan, Datum::decimal_from_str(curr)?), - &fixture, - Some("name <= 9990"), - )?; - - assert_projection( - &fixture.binary_predicate( - PredicateOperator::LessThanOrEq, - Datum::decimal_from_str(curr)?, - ), - &fixture, - Some("name <= 10000"), - )?; - - assert_projection( - &fixture.binary_predicate( - PredicateOperator::GreaterThanOrEq, - Datum::decimal_from_str(curr)?, - ), - &fixture, - Some("name >= 10000"), - )?; - - assert_projection( - &fixture.binary_predicate(PredicateOperator::Eq, Datum::decimal_from_str(curr)?), - &fixture, - Some("name = 10000"), - )?; - - assert_projection( - &fixture.binary_predicate(PredicateOperator::NotEq, Datum::decimal_from_str(curr)?), - &fixture, - None, - )?; - - assert_projection( - &fixture.set_predicate( - PredicateOperator::In, - vec![ - Datum::decimal_from_str(prev)?, - Datum::decimal_from_str(curr)?, - Datum::decimal_from_str(next)?, - ], - ), - &fixture, - Some("name IN (9900, 10000, 10100)"), - )?; - - assert_projection( - &fixture.set_predicate( - PredicateOperator::NotIn, - vec![ - Datum::decimal_from_str(curr)?, - Datum::decimal_from_str(next)?, - ], - ), - &fixture, - None, - )?; - - Ok(()) - } - - #[test] - fn test_projection_truncate_upper_bound_long() -> Result<()> { - let value = 99i64; - - let fixture = TestProjectionParameter::new( - Transform::Truncate(10), - "name", - NestedField::required(1, "value", Type::Primitive(PrimitiveType::Long)), - ); - - assert_projection( - &fixture.binary_predicate(PredicateOperator::LessThan, Datum::long(value)), - &fixture, - Some("name <= 90"), - )?; - - assert_projection( - &fixture.binary_predicate(PredicateOperator::LessThanOrEq, Datum::long(value)), - &fixture, - Some("name <= 90"), - )?; - - assert_projection( - &fixture.binary_predicate(PredicateOperator::GreaterThanOrEq, Datum::long(value)), - &fixture, - Some("name >= 90"), - )?; - - assert_projection( - &fixture.binary_predicate(PredicateOperator::Eq, Datum::long(value)), - &fixture, - Some("name = 90"), - )?; - - assert_projection( - &fixture.binary_predicate(PredicateOperator::NotEq, Datum::long(value)), - &fixture, - None, - )?; - - assert_projection( - &fixture.set_predicate( - PredicateOperator::In, - vec![ - Datum::long(value - 1), - Datum::long(value), - Datum::long(value + 1), - ], - ), - &fixture, - Some("name IN (100, 90)"), - )?; - - assert_projection( - &fixture.set_predicate( - PredicateOperator::NotIn, - vec![Datum::long(value), Datum::long(value + 1)], - ), - &fixture, - None, - )?; - - Ok(()) - } - - #[test] - fn test_projection_truncate_lower_bound_long() -> Result<()> { - let value = 100i64; - - let fixture = TestProjectionParameter::new( - Transform::Truncate(10), - "name", - NestedField::required(1, "value", Type::Primitive(PrimitiveType::Long)), - ); - - assert_projection( - &fixture.binary_predicate(PredicateOperator::LessThan, Datum::long(value)), - &fixture, - Some("name <= 90"), - )?; - - assert_projection( - &fixture.binary_predicate(PredicateOperator::LessThanOrEq, Datum::long(value)), - &fixture, - Some("name <= 100"), - )?; - - assert_projection( - &fixture.binary_predicate(PredicateOperator::GreaterThanOrEq, Datum::long(value)), - &fixture, - Some("name >= 100"), - )?; - - assert_projection( - &fixture.binary_predicate(PredicateOperator::Eq, Datum::long(value)), - &fixture, - Some("name = 100"), - )?; - - assert_projection( - &fixture.binary_predicate(PredicateOperator::NotEq, Datum::long(value)), - &fixture, - None, - )?; - - assert_projection( - &fixture.set_predicate( - PredicateOperator::In, - vec![ - Datum::long(value - 1), - Datum::long(value), - Datum::long(value + 1), - ], - ), - &fixture, - Some("name IN (100, 90)"), - )?; - - assert_projection( - &fixture.set_predicate( - PredicateOperator::NotIn, - vec![Datum::long(value), Datum::long(value + 1)], - ), - &fixture, - None, - )?; - - Ok(()) - } - - #[test] - fn test_projection_truncate_upper_bound_integer() -> Result<()> { - let value = 99; - - let fixture = TestProjectionParameter::new( - Transform::Truncate(10), - "name", - NestedField::required(1, "value", Type::Primitive(PrimitiveType::Int)), - ); - - assert_projection( - &fixture.binary_predicate(PredicateOperator::LessThan, Datum::int(value)), - &fixture, - Some("name <= 90"), - )?; - - assert_projection( - &fixture.binary_predicate(PredicateOperator::LessThanOrEq, Datum::int(value)), - &fixture, - Some("name <= 90"), - )?; - - assert_projection( - &fixture.binary_predicate(PredicateOperator::GreaterThanOrEq, Datum::int(value)), - &fixture, - Some("name >= 90"), - )?; - - assert_projection( - &fixture.binary_predicate(PredicateOperator::Eq, Datum::int(value)), - &fixture, - Some("name = 90"), - )?; - - assert_projection( - &fixture.binary_predicate(PredicateOperator::NotEq, Datum::int(value)), - &fixture, - None, - )?; - - assert_projection( - &fixture.set_predicate( - PredicateOperator::In, - vec![ - Datum::int(value - 1), - Datum::int(value), - Datum::int(value + 1), - ], - ), - &fixture, - Some("name IN (100, 90)"), - )?; - - assert_projection( - &fixture.set_predicate( - PredicateOperator::NotIn, - vec![Datum::int(value), Datum::int(value + 1)], - ), - &fixture, - None, - )?; - - Ok(()) - } - - #[test] - fn test_projection_truncate_lower_bound_integer() -> Result<()> { - let value = 100; - - let fixture = TestProjectionParameter::new( - Transform::Truncate(10), - "name", - NestedField::required(1, "value", Type::Primitive(PrimitiveType::Int)), - ); - - assert_projection( - &fixture.binary_predicate(PredicateOperator::LessThan, Datum::int(value)), - &fixture, - Some("name <= 90"), - )?; - - assert_projection( - &fixture.binary_predicate(PredicateOperator::LessThanOrEq, Datum::int(value)), - &fixture, - Some("name <= 100"), - )?; - - assert_projection( - &fixture.binary_predicate(PredicateOperator::GreaterThanOrEq, Datum::int(value)), - &fixture, - Some("name >= 100"), - )?; - - assert_projection( - &fixture.binary_predicate(PredicateOperator::Eq, Datum::int(value)), - &fixture, - Some("name = 100"), - )?; - - assert_projection( - &fixture.binary_predicate(PredicateOperator::NotEq, Datum::int(value)), - &fixture, - None, - )?; - - assert_projection( - &fixture.set_predicate( - PredicateOperator::In, - vec![ - Datum::int(value - 1), - Datum::int(value), - Datum::int(value + 1), - ], - ), - &fixture, - Some("name IN (100, 90)"), - )?; - - assert_projection( - &fixture.set_predicate( - PredicateOperator::NotIn, - vec![Datum::int(value), Datum::int(value + 1)], - ), - &fixture, - None, - )?; - - Ok(()) - } - - #[test] - fn test_projection_bucket_uuid() -> Result<()> { - let value = uuid::Uuid::from_u64_pair(123, 456); - let another = uuid::Uuid::from_u64_pair(456, 123); - - let fixture = TestProjectionParameter::new( - Transform::Bucket(10), - "name", - NestedField::required(1, "value", Type::Primitive(PrimitiveType::Uuid)), - ); - - assert_projection( - &fixture.binary_predicate(PredicateOperator::Eq, Datum::uuid(value)), - &fixture, - Some("name = 4"), - )?; - - assert_projection( - &fixture.binary_predicate(PredicateOperator::NotEq, Datum::uuid(value)), - &fixture, - None, - )?; - - assert_projection( - &fixture.binary_predicate(PredicateOperator::LessThan, Datum::uuid(value)), - &fixture, - None, - )?; - - assert_projection( - &fixture.binary_predicate(PredicateOperator::LessThanOrEq, Datum::uuid(value)), - &fixture, - None, - )?; - - assert_projection( - &fixture.binary_predicate(PredicateOperator::GreaterThan, Datum::uuid(value)), - &fixture, - None, - )?; - - assert_projection( - &fixture.binary_predicate(PredicateOperator::GreaterThanOrEq, Datum::uuid(value)), - &fixture, - None, - )?; - - assert_projection( - &fixture.set_predicate( - PredicateOperator::In, - vec![Datum::uuid(value), Datum::uuid(another)], - ), - &fixture, - Some("name IN (4, 6)"), - )?; - - assert_projection( - &fixture.set_predicate( - PredicateOperator::NotIn, - vec![Datum::uuid(value), Datum::uuid(another)], - ), - &fixture, - None, - )?; - - Ok(()) - } - - #[test] - fn test_projection_bucket_fixed() -> Result<()> { - let value = "abcdefg".as_bytes().to_vec(); - let another = "abcdehij".as_bytes().to_vec(); - - let fixture = TestProjectionParameter::new( - Transform::Bucket(10), - "name", - NestedField::required( - 1, - "value", - Type::Primitive(PrimitiveType::Fixed(value.len() as u64)), - ), - ); - - assert_projection( - &fixture.binary_predicate(PredicateOperator::Eq, Datum::fixed(value.clone())), - &fixture, - Some("name = 4"), - )?; - - assert_projection( - &fixture.binary_predicate(PredicateOperator::NotEq, Datum::fixed(value.clone())), - &fixture, - None, - )?; - - assert_projection( - &fixture.binary_predicate(PredicateOperator::LessThan, Datum::fixed(value.clone())), - &fixture, - None, - )?; - - assert_projection( - &fixture.binary_predicate(PredicateOperator::LessThanOrEq, Datum::fixed(value.clone())), - &fixture, - None, - )?; - - assert_projection( - &fixture.binary_predicate(PredicateOperator::GreaterThan, Datum::fixed(value.clone())), - &fixture, - None, - )?; - - assert_projection( - &fixture.binary_predicate( - PredicateOperator::GreaterThanOrEq, - Datum::fixed(value.clone()), - ), - &fixture, - None, - )?; - - assert_projection( - &fixture.set_predicate( - PredicateOperator::In, - vec![Datum::fixed(value.clone()), Datum::fixed(another.clone())], - ), - &fixture, - Some("name IN (4, 6)"), - )?; - - assert_projection( - &fixture.set_predicate( - PredicateOperator::NotIn, - vec![Datum::fixed(value.clone()), Datum::fixed(another.clone())], - ), - &fixture, - None, - )?; - - Ok(()) - } - - #[test] - fn test_projection_bucket_string() -> Result<()> { - let value = "abcdefg"; - let another = "abcdefgabc"; - - let fixture = TestProjectionParameter::new( - Transform::Bucket(10), - "name", - NestedField::required(1, "value", Type::Primitive(PrimitiveType::String)), - ); - - assert_projection( - &fixture.binary_predicate(PredicateOperator::Eq, Datum::string(value)), - &fixture, - Some("name = 4"), - )?; - - assert_projection( - &fixture.binary_predicate(PredicateOperator::NotEq, Datum::string(value)), - &fixture, - None, - )?; - - assert_projection( - &fixture.binary_predicate(PredicateOperator::LessThan, Datum::string(value)), - &fixture, - None, - )?; - - assert_projection( - &fixture.binary_predicate(PredicateOperator::LessThanOrEq, Datum::string(value)), - &fixture, - None, - )?; - - assert_projection( - &fixture.binary_predicate(PredicateOperator::GreaterThan, Datum::string(value)), - &fixture, - None, - )?; - - assert_projection( - &fixture.binary_predicate(PredicateOperator::GreaterThanOrEq, Datum::string(value)), - &fixture, - None, - )?; - - assert_projection( - &fixture.set_predicate( - PredicateOperator::In, - vec![Datum::string(value), Datum::string(another)], - ), - &fixture, - Some("name IN (9, 4)"), - )?; - - assert_projection( - &fixture.set_predicate( - PredicateOperator::NotIn, - vec![Datum::string(value), Datum::string(another)], - ), - &fixture, - None, - )?; - - Ok(()) - } - - #[test] - fn test_projection_bucket_decimal() -> Result<()> { - let prev = "99.00"; - let curr = "100.00"; - let next = "101.00"; - - let fixture = TestProjectionParameter::new( - Transform::Bucket(10), - "name", - NestedField::required( - 1, - "value", - Type::Primitive(PrimitiveType::Decimal { - precision: 9, - scale: 2, - }), - ), - ); - - assert_projection( - &fixture.binary_predicate(PredicateOperator::Eq, Datum::decimal_from_str(curr)?), - &fixture, - Some("name = 2"), - )?; - - assert_projection( - &fixture.binary_predicate(PredicateOperator::NotEq, Datum::decimal_from_str(curr)?), - &fixture, - None, - )?; - - assert_projection( - &fixture.binary_predicate(PredicateOperator::LessThan, Datum::decimal_from_str(curr)?), - &fixture, - None, - )?; - - assert_projection( - &fixture.binary_predicate( - PredicateOperator::LessThanOrEq, - Datum::decimal_from_str(curr)?, - ), - &fixture, - None, - )?; - - assert_projection( - &fixture.binary_predicate( - PredicateOperator::GreaterThan, - Datum::decimal_from_str(curr)?, - ), - &fixture, - None, - )?; - - assert_projection( - &fixture.binary_predicate( - PredicateOperator::GreaterThanOrEq, - Datum::decimal_from_str(curr)?, - ), - &fixture, - None, - )?; - - assert_projection( - &fixture.set_predicate( - PredicateOperator::In, - vec![ - Datum::decimal_from_str(next)?, - Datum::decimal_from_str(curr)?, - Datum::decimal_from_str(prev)?, - ], - ), - &fixture, - Some("name IN (6, 2)"), - )?; - - assert_projection( - &fixture.set_predicate( - PredicateOperator::NotIn, - vec![ - Datum::decimal_from_str(curr)?, - Datum::decimal_from_str(next)?, - ], - ), - &fixture, - None, - )?; - - Ok(()) - } - - #[test] - fn test_projection_bucket_long() -> Result<()> { - let value = 100; - let fixture = TestProjectionParameter::new( - Transform::Bucket(10), - "name", - NestedField::required(1, "value", Type::Primitive(PrimitiveType::Long)), - ); - - assert_projection( - &fixture.binary_predicate(PredicateOperator::Eq, Datum::long(value)), - &fixture, - Some("name = 6"), - )?; - - assert_projection( - &fixture.binary_predicate(PredicateOperator::NotEq, Datum::long(value)), - &fixture, - None, - )?; - - assert_projection( - &fixture.binary_predicate(PredicateOperator::LessThan, Datum::long(value)), - &fixture, - None, - )?; - - assert_projection( - &fixture.binary_predicate(PredicateOperator::LessThanOrEq, Datum::long(value)), - &fixture, - None, - )?; - - assert_projection( - &fixture.binary_predicate(PredicateOperator::GreaterThan, Datum::long(value)), - &fixture, - None, - )?; - - assert_projection( - &fixture.binary_predicate(PredicateOperator::GreaterThanOrEq, Datum::long(value)), - &fixture, - None, - )?; - - assert_projection( - &fixture.set_predicate( - PredicateOperator::In, - vec![ - Datum::long(value - 1), - Datum::long(value), - Datum::long(value + 1), - ], - ), - &fixture, - Some("name IN (8, 7, 6)"), - )?; - - assert_projection( - &fixture.set_predicate( - PredicateOperator::NotIn, - vec![Datum::long(value), Datum::long(value + 1)], - ), - &fixture, - None, - )?; - - Ok(()) - } - - #[test] - fn test_projection_bucket_integer() -> Result<()> { - let value = 100; - let fixture = TestProjectionParameter::new( - Transform::Bucket(10), - "name", - NestedField::required(1, "value", Type::Primitive(PrimitiveType::Int)), - ); - - assert_projection( - &fixture.binary_predicate(PredicateOperator::Eq, Datum::int(value)), - &fixture, - Some("name = 6"), - )?; - - assert_projection( - &fixture.binary_predicate(PredicateOperator::NotEq, Datum::int(value)), - &fixture, - None, - )?; - - assert_projection( - &fixture.binary_predicate(PredicateOperator::LessThan, Datum::int(value)), - &fixture, - None, - )?; - - assert_projection( - &fixture.binary_predicate(PredicateOperator::LessThanOrEq, Datum::int(value)), - &fixture, - None, - )?; - - assert_projection( - &fixture.binary_predicate(PredicateOperator::GreaterThan, Datum::int(value)), - &fixture, - None, - )?; - - assert_projection( - &fixture.binary_predicate(PredicateOperator::GreaterThanOrEq, Datum::int(value)), - &fixture, - None, - )?; - - assert_projection( - &fixture.set_predicate( - PredicateOperator::In, - vec![ - Datum::int(value - 1), - Datum::int(value), - Datum::int(value + 1), - ], - ), - &fixture, - Some("name IN (8, 7, 6)"), - )?; - - assert_projection( - &fixture.set_predicate( - PredicateOperator::NotIn, - vec![Datum::int(value), Datum::int(value + 1)], - ), - &fixture, - None, - )?; - - Ok(()) - } - #[test] fn test_bucket_transform() { let trans = Transform::Bucket(8); diff --git a/crates/iceberg/src/transform/bucket.rs b/crates/iceberg/src/transform/bucket.rs index 015aceaf4..3e4268e35 100644 --- a/crates/iceberg/src/transform/bucket.rs +++ b/crates/iceberg/src/transform/bucket.rs @@ -251,9 +251,411 @@ impl TransformFunction for Bucket { mod test { use chrono::{DateTime, NaiveDate, NaiveDateTime, NaiveTime}; - use crate::{spec::Datum, transform::TransformFunction}; + use crate::{ + expr::PredicateOperator, + spec::{Datum, NestedField, PrimitiveType, Transform, Type}, + transform::{test::TestProjectionFixture, TransformFunction}, + Result, + }; use super::Bucket; + + #[test] + fn test_projection_bucket_uuid() -> Result<()> { + let value = uuid::Uuid::from_u64_pair(123, 456); + let another = uuid::Uuid::from_u64_pair(456, 123); + + let fixture = TestProjectionFixture::new( + Transform::Bucket(10), + "name", + NestedField::required(1, "value", Type::Primitive(PrimitiveType::Uuid)), + ); + + fixture.assert_projection( + &fixture.binary_predicate(PredicateOperator::Eq, Datum::uuid(value)), + Some("name = 4"), + )?; + + fixture.assert_projection( + &fixture.binary_predicate(PredicateOperator::NotEq, Datum::uuid(value)), + None, + )?; + + fixture.assert_projection( + &fixture.binary_predicate(PredicateOperator::LessThan, Datum::uuid(value)), + None, + )?; + + fixture.assert_projection( + &fixture.binary_predicate(PredicateOperator::LessThanOrEq, Datum::uuid(value)), + None, + )?; + + fixture.assert_projection( + &fixture.binary_predicate(PredicateOperator::GreaterThan, Datum::uuid(value)), + None, + )?; + + fixture.assert_projection( + &fixture.binary_predicate(PredicateOperator::GreaterThanOrEq, Datum::uuid(value)), + None, + )?; + + fixture.assert_projection( + &fixture.set_predicate( + PredicateOperator::In, + vec![Datum::uuid(value), Datum::uuid(another)], + ), + Some("name IN (4, 6)"), + )?; + + fixture.assert_projection( + &fixture.set_predicate( + PredicateOperator::NotIn, + vec![Datum::uuid(value), Datum::uuid(another)], + ), + None, + )?; + + Ok(()) + } + + #[test] + fn test_projection_bucket_fixed() -> Result<()> { + let value = "abcdefg".as_bytes().to_vec(); + let another = "abcdehij".as_bytes().to_vec(); + + let fixture = TestProjectionFixture::new( + Transform::Bucket(10), + "name", + NestedField::required( + 1, + "value", + Type::Primitive(PrimitiveType::Fixed(value.len() as u64)), + ), + ); + + fixture.assert_projection( + &fixture.binary_predicate(PredicateOperator::Eq, Datum::fixed(value.clone())), + Some("name = 4"), + )?; + + fixture.assert_projection( + &fixture.binary_predicate(PredicateOperator::NotEq, Datum::fixed(value.clone())), + None, + )?; + + fixture.assert_projection( + &fixture.binary_predicate(PredicateOperator::LessThan, Datum::fixed(value.clone())), + None, + )?; + + fixture.assert_projection( + &fixture.binary_predicate(PredicateOperator::LessThanOrEq, Datum::fixed(value.clone())), + None, + )?; + + fixture.assert_projection( + &fixture.binary_predicate(PredicateOperator::GreaterThan, Datum::fixed(value.clone())), + None, + )?; + + fixture.assert_projection( + &fixture.binary_predicate( + PredicateOperator::GreaterThanOrEq, + Datum::fixed(value.clone()), + ), + None, + )?; + + fixture.assert_projection( + &fixture.set_predicate( + PredicateOperator::In, + vec![Datum::fixed(value.clone()), Datum::fixed(another.clone())], + ), + Some("name IN (4, 6)"), + )?; + + fixture.assert_projection( + &fixture.set_predicate( + PredicateOperator::NotIn, + vec![Datum::fixed(value.clone()), Datum::fixed(another.clone())], + ), + None, + )?; + + Ok(()) + } + + #[test] + fn test_projection_bucket_string() -> Result<()> { + let value = "abcdefg"; + let another = "abcdefgabc"; + + let fixture = TestProjectionFixture::new( + Transform::Bucket(10), + "name", + NestedField::required(1, "value", Type::Primitive(PrimitiveType::String)), + ); + + fixture.assert_projection( + &fixture.binary_predicate(PredicateOperator::Eq, Datum::string(value)), + Some("name = 4"), + )?; + + fixture.assert_projection( + &fixture.binary_predicate(PredicateOperator::NotEq, Datum::string(value)), + None, + )?; + + fixture.assert_projection( + &fixture.binary_predicate(PredicateOperator::LessThan, Datum::string(value)), + None, + )?; + + fixture.assert_projection( + &fixture.binary_predicate(PredicateOperator::LessThanOrEq, Datum::string(value)), + None, + )?; + + fixture.assert_projection( + &fixture.binary_predicate(PredicateOperator::GreaterThan, Datum::string(value)), + None, + )?; + + fixture.assert_projection( + &fixture.binary_predicate(PredicateOperator::GreaterThanOrEq, Datum::string(value)), + None, + )?; + + fixture.assert_projection( + &fixture.set_predicate( + PredicateOperator::In, + vec![Datum::string(value), Datum::string(another)], + ), + Some("name IN (9, 4)"), + )?; + + fixture.assert_projection( + &fixture.set_predicate( + PredicateOperator::NotIn, + vec![Datum::string(value), Datum::string(another)], + ), + None, + )?; + + Ok(()) + } + + #[test] + fn test_projection_bucket_decimal() -> Result<()> { + let prev = "99.00"; + let curr = "100.00"; + let next = "101.00"; + + let fixture = TestProjectionFixture::new( + Transform::Bucket(10), + "name", + NestedField::required( + 1, + "value", + Type::Primitive(PrimitiveType::Decimal { + precision: 9, + scale: 2, + }), + ), + ); + + fixture.assert_projection( + &fixture.binary_predicate(PredicateOperator::Eq, Datum::decimal_from_str(curr)?), + Some("name = 2"), + )?; + + fixture.assert_projection( + &fixture.binary_predicate(PredicateOperator::NotEq, Datum::decimal_from_str(curr)?), + None, + )?; + + fixture.assert_projection( + &fixture.binary_predicate(PredicateOperator::LessThan, Datum::decimal_from_str(curr)?), + None, + )?; + + fixture.assert_projection( + &fixture.binary_predicate( + PredicateOperator::LessThanOrEq, + Datum::decimal_from_str(curr)?, + ), + None, + )?; + + fixture.assert_projection( + &fixture.binary_predicate( + PredicateOperator::GreaterThan, + Datum::decimal_from_str(curr)?, + ), + None, + )?; + + fixture.assert_projection( + &fixture.binary_predicate( + PredicateOperator::GreaterThanOrEq, + Datum::decimal_from_str(curr)?, + ), + None, + )?; + + fixture.assert_projection( + &fixture.set_predicate( + PredicateOperator::In, + vec![ + Datum::decimal_from_str(next)?, + Datum::decimal_from_str(curr)?, + Datum::decimal_from_str(prev)?, + ], + ), + Some("name IN (6, 2)"), + )?; + + fixture.assert_projection( + &fixture.set_predicate( + PredicateOperator::NotIn, + vec![ + Datum::decimal_from_str(curr)?, + Datum::decimal_from_str(next)?, + ], + ), + None, + )?; + + Ok(()) + } + + #[test] + fn test_projection_bucket_long() -> Result<()> { + let value = 100; + let fixture = TestProjectionFixture::new( + Transform::Bucket(10), + "name", + NestedField::required(1, "value", Type::Primitive(PrimitiveType::Long)), + ); + + fixture.assert_projection( + &fixture.binary_predicate(PredicateOperator::Eq, Datum::long(value)), + Some("name = 6"), + )?; + + fixture.assert_projection( + &fixture.binary_predicate(PredicateOperator::NotEq, Datum::long(value)), + None, + )?; + + fixture.assert_projection( + &fixture.binary_predicate(PredicateOperator::LessThan, Datum::long(value)), + None, + )?; + + fixture.assert_projection( + &fixture.binary_predicate(PredicateOperator::LessThanOrEq, Datum::long(value)), + None, + )?; + + fixture.assert_projection( + &fixture.binary_predicate(PredicateOperator::GreaterThan, Datum::long(value)), + None, + )?; + + fixture.assert_projection( + &fixture.binary_predicate(PredicateOperator::GreaterThanOrEq, Datum::long(value)), + None, + )?; + + fixture.assert_projection( + &fixture.set_predicate( + PredicateOperator::In, + vec![ + Datum::long(value - 1), + Datum::long(value), + Datum::long(value + 1), + ], + ), + Some("name IN (8, 7, 6)"), + )?; + + fixture.assert_projection( + &fixture.set_predicate( + PredicateOperator::NotIn, + vec![Datum::long(value), Datum::long(value + 1)], + ), + None, + )?; + + Ok(()) + } + + #[test] + fn test_projection_bucket_integer() -> Result<()> { + let value = 100; + + let fixture = TestProjectionFixture::new( + Transform::Bucket(10), + "name", + NestedField::required(1, "value", Type::Primitive(PrimitiveType::Int)), + ); + + fixture.assert_projection( + &fixture.binary_predicate(PredicateOperator::Eq, Datum::int(value)), + Some("name = 6"), + )?; + + fixture.assert_projection( + &fixture.binary_predicate(PredicateOperator::NotEq, Datum::int(value)), + None, + )?; + + fixture.assert_projection( + &fixture.binary_predicate(PredicateOperator::LessThan, Datum::int(value)), + None, + )?; + + fixture.assert_projection( + &fixture.binary_predicate(PredicateOperator::LessThanOrEq, Datum::int(value)), + None, + )?; + + fixture.assert_projection( + &fixture.binary_predicate(PredicateOperator::GreaterThan, Datum::int(value)), + None, + )?; + + fixture.assert_projection( + &fixture.binary_predicate(PredicateOperator::GreaterThanOrEq, Datum::int(value)), + None, + )?; + + fixture.assert_projection( + &fixture.set_predicate( + PredicateOperator::In, + vec![ + Datum::int(value - 1), + Datum::int(value), + Datum::int(value + 1), + ], + ), + Some("name IN (8, 7, 6)"), + )?; + + fixture.assert_projection( + &fixture.set_predicate( + PredicateOperator::NotIn, + vec![Datum::int(value), Datum::int(value + 1)], + ), + None, + )?; + + Ok(()) + } + #[test] fn test_hash() { // test int diff --git a/crates/iceberg/src/transform/mod.rs b/crates/iceberg/src/transform/mod.rs index b2ac67018..79a3fc594 100644 --- a/crates/iceberg/src/transform/mod.rs +++ b/crates/iceberg/src/transform/mod.rs @@ -16,6 +16,7 @@ // under the License. //! Transform function used to compute partition values. + use crate::{ spec::{Datum, Transform}, Error, ErrorKind, Result, @@ -68,3 +69,72 @@ pub fn create_transform_function(transform: &Transform) -> Result, + field: NestedField, + ) -> Self { + TestProjectionFixture { + transform, + name: name.into(), + field: Arc::new(field), + } + } + pub(crate) fn binary_predicate( + &self, + op: PredicateOperator, + literal: Datum, + ) -> BoundPredicate { + BoundPredicate::Binary(BinaryExpression::new( + op, + BoundReference::new(self.name.clone(), self.field.clone()), + literal, + )) + } + pub(crate) fn set_predicate( + &self, + op: PredicateOperator, + literals: Vec, + ) -> BoundPredicate { + BoundPredicate::Set(SetExpression::new( + op, + BoundReference::new(self.name.clone(), self.field.clone()), + HashSet::from_iter(literals), + )) + } + pub(crate) fn assert_projection( + &self, + predicate: &BoundPredicate, + expected: Option<&str>, + ) -> Result<()> { + let result = self.transform.project(self.name.clone(), predicate)?; + match expected { + Some(exp) => assert_eq!(format!("{}", result.unwrap()), exp), + None => assert!(result.is_none()), + } + Ok(()) + } + } +} diff --git a/crates/iceberg/src/transform/temporal.rs b/crates/iceberg/src/transform/temporal.rs index 9c6489e91..149343f4f 100644 --- a/crates/iceberg/src/transform/temporal.rs +++ b/crates/iceberg/src/transform/temporal.rs @@ -295,10 +295,1786 @@ mod test { use std::sync::Arc; use crate::{ - spec::Datum, - transform::{BoxedTransformFunction, TransformFunction}, + expr::PredicateOperator, + spec::{Datum, NestedField, PrimitiveType, Transform, Type}, + transform::{test::TestProjectionFixture, BoxedTransformFunction, TransformFunction}, + Result, }; + #[test] + fn test_projection_timestamp_hour_upper_bound() -> Result<()> { + // 420034 + let value = "2017-12-01T10:59:59.999999"; + // 412007 + let another = "2016-12-31T23:59:59.999999"; + + let fixture = TestProjectionFixture::new( + Transform::Hour, + "name", + NestedField::required(1, "value", Type::Primitive(PrimitiveType::Timestamp)), + ); + + fixture.assert_projection( + &fixture.binary_predicate( + PredicateOperator::LessThan, + Datum::timestamp_from_str(value)?, + ), + Some("name <= 420034"), + )?; + + fixture.assert_projection( + &fixture.binary_predicate( + PredicateOperator::LessThanOrEq, + Datum::timestamp_from_str(value)?, + ), + Some("name <= 420034"), + )?; + + fixture.assert_projection( + &fixture.binary_predicate( + PredicateOperator::GreaterThan, + Datum::timestamp_from_str(value)?, + ), + Some("name >= 420035"), + )?; + + fixture.assert_projection( + &fixture.binary_predicate( + PredicateOperator::GreaterThanOrEq, + Datum::timestamp_from_str(value)?, + ), + Some("name >= 420034"), + )?; + + fixture.assert_projection( + &fixture.binary_predicate(PredicateOperator::Eq, Datum::timestamp_from_str(value)?), + Some("name = 420034"), + )?; + + fixture.assert_projection( + &fixture.binary_predicate(PredicateOperator::NotEq, Datum::timestamp_from_str(value)?), + None, + )?; + + fixture.assert_projection( + &fixture.set_predicate( + PredicateOperator::In, + vec![ + Datum::timestamp_from_str(value)?, + Datum::timestamp_from_str(another)?, + ], + ), + Some("name IN (420034, 412007)"), + )?; + + fixture.assert_projection( + &fixture.set_predicate( + PredicateOperator::NotIn, + vec![ + Datum::timestamp_from_str(value)?, + Datum::timestamp_from_str(another)?, + ], + ), + None, + )?; + + Ok(()) + } + + #[test] + fn test_projection_timestamp_hour_lower_bound() -> Result<()> { + // 420034 + let value = "2017-12-01T10:00:00.000000"; + // 411288 + let another = "2016-12-02T00:00:00.000000"; + + let fixture = TestProjectionFixture::new( + Transform::Hour, + "name", + NestedField::required(1, "value", Type::Primitive(PrimitiveType::Timestamp)), + ); + + fixture.assert_projection( + &fixture.binary_predicate( + PredicateOperator::LessThan, + Datum::timestamp_from_str(value)?, + ), + Some("name <= 420033"), + )?; + + fixture.assert_projection( + &fixture.binary_predicate( + PredicateOperator::LessThanOrEq, + Datum::timestamp_from_str(value)?, + ), + Some("name <= 420034"), + )?; + + fixture.assert_projection( + &fixture.binary_predicate( + PredicateOperator::GreaterThan, + Datum::timestamp_from_str(value)?, + ), + Some("name >= 420034"), + )?; + + fixture.assert_projection( + &fixture.binary_predicate( + PredicateOperator::GreaterThanOrEq, + Datum::timestamp_from_str(value)?, + ), + Some("name >= 420034"), + )?; + + fixture.assert_projection( + &fixture.binary_predicate(PredicateOperator::Eq, Datum::timestamp_from_str(value)?), + Some("name = 420034"), + )?; + + fixture.assert_projection( + &fixture.binary_predicate(PredicateOperator::NotEq, Datum::timestamp_from_str(value)?), + None, + )?; + + fixture.assert_projection( + &fixture.set_predicate( + PredicateOperator::In, + vec![ + Datum::timestamp_from_str(value)?, + Datum::timestamp_from_str(another)?, + ], + ), + Some("name IN (420034, 411288)"), + )?; + + fixture.assert_projection( + &fixture.set_predicate( + PredicateOperator::NotIn, + vec![ + Datum::timestamp_from_str(value)?, + Datum::timestamp_from_str(another)?, + ], + ), + None, + )?; + + Ok(()) + } + + #[test] + fn test_projection_timestamp_year_upper_bound() -> Result<()> { + let value = "2017-12-31T23:59:59.999999"; + let another = "2016-12-31T23:59:59.999999"; + + let fixture = TestProjectionFixture::new( + Transform::Year, + "name", + NestedField::required(1, "value", Type::Primitive(PrimitiveType::Timestamp)), + ); + + fixture.assert_projection( + &fixture.binary_predicate( + PredicateOperator::LessThan, + Datum::timestamp_from_str(value)?, + ), + Some("name <= 47"), + )?; + + fixture.assert_projection( + &fixture.binary_predicate( + PredicateOperator::LessThanOrEq, + Datum::timestamp_from_str(value)?, + ), + Some("name <= 47"), + )?; + + fixture.assert_projection( + &fixture.binary_predicate( + PredicateOperator::GreaterThan, + Datum::timestamp_from_str(value)?, + ), + Some("name >= 48"), + )?; + + fixture.assert_projection( + &fixture.binary_predicate( + PredicateOperator::GreaterThanOrEq, + Datum::timestamp_from_str(value)?, + ), + Some("name >= 47"), + )?; + + fixture.assert_projection( + &fixture.binary_predicate(PredicateOperator::Eq, Datum::timestamp_from_str(value)?), + Some("name = 47"), + )?; + + fixture.assert_projection( + &fixture.binary_predicate(PredicateOperator::NotEq, Datum::timestamp_from_str(value)?), + None, + )?; + + fixture.assert_projection( + &fixture.set_predicate( + PredicateOperator::In, + vec![ + Datum::timestamp_from_str(value)?, + Datum::timestamp_from_str(another)?, + ], + ), + Some("name IN (47, 46)"), + )?; + + fixture.assert_projection( + &fixture.set_predicate( + PredicateOperator::NotIn, + vec![ + Datum::timestamp_from_str(value)?, + Datum::timestamp_from_str(another)?, + ], + ), + None, + )?; + + Ok(()) + } + + #[test] + fn test_projection_timestamp_year_lower_bound() -> Result<()> { + let value = "2017-01-01T00:00:00.000000"; + let another = "2016-12-02T00:00:00.000000"; + + let fixture = TestProjectionFixture::new( + Transform::Year, + "name", + NestedField::required(1, "value", Type::Primitive(PrimitiveType::Timestamp)), + ); + + fixture.assert_projection( + &fixture.binary_predicate( + PredicateOperator::LessThan, + Datum::timestamp_from_str(value)?, + ), + Some("name <= 46"), + )?; + + fixture.assert_projection( + &fixture.binary_predicate( + PredicateOperator::LessThanOrEq, + Datum::timestamp_from_str(value)?, + ), + Some("name <= 47"), + )?; + + fixture.assert_projection( + &fixture.binary_predicate( + PredicateOperator::GreaterThan, + Datum::timestamp_from_str(value)?, + ), + Some("name >= 47"), + )?; + + fixture.assert_projection( + &fixture.binary_predicate( + PredicateOperator::GreaterThanOrEq, + Datum::timestamp_from_str(value)?, + ), + Some("name >= 47"), + )?; + + fixture.assert_projection( + &fixture.binary_predicate(PredicateOperator::Eq, Datum::timestamp_from_str(value)?), + Some("name = 47"), + )?; + + fixture.assert_projection( + &fixture.binary_predicate(PredicateOperator::NotEq, Datum::timestamp_from_str(value)?), + None, + )?; + + fixture.assert_projection( + &fixture.set_predicate( + PredicateOperator::In, + vec![ + Datum::timestamp_from_str(value)?, + Datum::timestamp_from_str(another)?, + ], + ), + Some("name IN (47, 46)"), + )?; + + fixture.assert_projection( + &fixture.set_predicate( + PredicateOperator::NotIn, + vec![ + Datum::timestamp_from_str(value)?, + Datum::timestamp_from_str(another)?, + ], + ), + None, + )?; + + Ok(()) + } + + #[test] + fn test_projection_timestamp_month_negative_upper_bound() -> Result<()> { + let value = "1969-12-31T23:59:59.999999"; + let another = "1970-01-01T00:00:00.000000"; + + let fixture = TestProjectionFixture::new( + Transform::Month, + "name", + NestedField::required(1, "value", Type::Primitive(PrimitiveType::Timestamp)), + ); + + fixture.assert_projection( + &fixture.binary_predicate( + PredicateOperator::LessThan, + Datum::timestamp_from_str(value)?, + ), + Some("name <= 0"), + )?; + + fixture.assert_projection( + &fixture.binary_predicate( + PredicateOperator::LessThanOrEq, + Datum::timestamp_from_str(value)?, + ), + Some("name <= 0"), + )?; + + fixture.assert_projection( + &fixture.binary_predicate( + PredicateOperator::GreaterThan, + Datum::timestamp_from_str(value)?, + ), + Some("name >= 0"), + )?; + + fixture.assert_projection( + &fixture.binary_predicate( + PredicateOperator::GreaterThanOrEq, + Datum::timestamp_from_str(value)?, + ), + Some("name >= -1"), + )?; + + fixture.assert_projection( + &fixture.binary_predicate(PredicateOperator::Eq, Datum::timestamp_from_str(value)?), + Some("name IN (-1, 0)"), + )?; + + fixture.assert_projection( + &fixture.binary_predicate(PredicateOperator::NotEq, Datum::timestamp_from_str(value)?), + None, + )?; + + fixture.assert_projection( + &fixture.set_predicate( + PredicateOperator::In, + vec![ + Datum::timestamp_from_str(value)?, + Datum::timestamp_from_str(another)?, + ], + ), + Some("name IN (0, -1)"), + )?; + + fixture.assert_projection( + &fixture.set_predicate( + PredicateOperator::NotIn, + vec![ + Datum::timestamp_from_str(value)?, + Datum::timestamp_from_str(another)?, + ], + ), + None, + )?; + + Ok(()) + } + + #[test] + fn test_projection_timestamp_month_upper_bound() -> Result<()> { + let value = "2017-12-01T23:59:59.999999"; + let another = "2017-11-02T00:00:00.000000"; + + let fixture = TestProjectionFixture::new( + Transform::Month, + "name", + NestedField::required(1, "value", Type::Primitive(PrimitiveType::Timestamp)), + ); + + fixture.assert_projection( + &fixture.binary_predicate( + PredicateOperator::LessThan, + Datum::timestamp_from_str(value)?, + ), + Some("name <= 575"), + )?; + + fixture.assert_projection( + &fixture.binary_predicate( + PredicateOperator::LessThanOrEq, + Datum::timestamp_from_str(value)?, + ), + Some("name <= 575"), + )?; + + fixture.assert_projection( + &fixture.binary_predicate( + PredicateOperator::GreaterThan, + Datum::timestamp_from_str(value)?, + ), + Some("name >= 575"), + )?; + + fixture.assert_projection( + &fixture.binary_predicate( + PredicateOperator::GreaterThanOrEq, + Datum::timestamp_from_str(value)?, + ), + Some("name >= 575"), + )?; + + fixture.assert_projection( + &fixture.binary_predicate(PredicateOperator::Eq, Datum::timestamp_from_str(value)?), + Some("name = 575"), + )?; + + fixture.assert_projection( + &fixture.binary_predicate(PredicateOperator::NotEq, Datum::timestamp_from_str(value)?), + None, + )?; + + fixture.assert_projection( + &fixture.set_predicate( + PredicateOperator::In, + vec![ + Datum::timestamp_from_str(value)?, + Datum::timestamp_from_str(another)?, + ], + ), + Some("name IN (575, 574)"), + )?; + + fixture.assert_projection( + &fixture.set_predicate( + PredicateOperator::NotIn, + vec![ + Datum::timestamp_from_str(value)?, + Datum::timestamp_from_str(another)?, + ], + ), + None, + )?; + Ok(()) + } + + #[test] + fn test_projection_timestamp_month_negative_lower_bound() -> Result<()> { + let value = "1969-01-01T00:00:00.000000"; + let another = "1969-03-01T00:00:00.000000"; + + let fixture = TestProjectionFixture::new( + Transform::Month, + "name", + NestedField::required(1, "value", Type::Primitive(PrimitiveType::Timestamp)), + ); + + fixture.assert_projection( + &fixture.binary_predicate( + PredicateOperator::LessThan, + Datum::timestamp_from_str(value)?, + ), + Some("name <= -12"), + )?; + + fixture.assert_projection( + &fixture.binary_predicate( + PredicateOperator::LessThanOrEq, + Datum::timestamp_from_str(value)?, + ), + Some("name <= -11"), + )?; + + fixture.assert_projection( + &fixture.binary_predicate( + PredicateOperator::GreaterThan, + Datum::timestamp_from_str(value)?, + ), + Some("name >= -12"), + )?; + + fixture.assert_projection( + &fixture.binary_predicate( + PredicateOperator::GreaterThanOrEq, + Datum::timestamp_from_str(value)?, + ), + Some("name >= -12"), + )?; + + fixture.assert_projection( + &fixture.binary_predicate(PredicateOperator::Eq, Datum::timestamp_from_str(value)?), + Some("name IN (-12, -11)"), + )?; + + fixture.assert_projection( + &fixture.binary_predicate(PredicateOperator::NotEq, Datum::timestamp_from_str(value)?), + None, + )?; + + fixture.assert_projection( + &fixture.set_predicate( + PredicateOperator::In, + vec![ + Datum::timestamp_from_str(value)?, + Datum::timestamp_from_str(another)?, + ], + ), + Some("name IN (-10, -9, -12, -11)"), + )?; + + fixture.assert_projection( + &fixture.set_predicate( + PredicateOperator::NotIn, + vec![ + Datum::timestamp_from_str(value)?, + Datum::timestamp_from_str(another)?, + ], + ), + None, + )?; + + Ok(()) + } + + #[test] + fn test_projection_timestamp_month_lower_bound() -> Result<()> { + let value = "2017-12-01T00:00:00.000000"; + let another = "2017-12-02T00:00:00.000000"; + + let fixture = TestProjectionFixture::new( + Transform::Month, + "name", + NestedField::required(1, "value", Type::Primitive(PrimitiveType::Timestamp)), + ); + + fixture.assert_projection( + &fixture.binary_predicate( + PredicateOperator::LessThan, + Datum::timestamp_from_str(value)?, + ), + Some("name <= 574"), + )?; + + fixture.assert_projection( + &fixture.binary_predicate( + PredicateOperator::LessThanOrEq, + Datum::timestamp_from_str(value)?, + ), + Some("name <= 575"), + )?; + + fixture.assert_projection( + &fixture.binary_predicate( + PredicateOperator::GreaterThan, + Datum::timestamp_from_str(value)?, + ), + Some("name >= 575"), + )?; + + fixture.assert_projection( + &fixture.binary_predicate( + PredicateOperator::GreaterThanOrEq, + Datum::timestamp_from_str(value)?, + ), + Some("name >= 575"), + )?; + + fixture.assert_projection( + &fixture.binary_predicate(PredicateOperator::Eq, Datum::timestamp_from_str(value)?), + Some("name = 575"), + )?; + + fixture.assert_projection( + &fixture.binary_predicate(PredicateOperator::NotEq, Datum::timestamp_from_str(value)?), + None, + )?; + + fixture.assert_projection( + &fixture.set_predicate( + PredicateOperator::In, + vec![ + Datum::timestamp_from_str(value)?, + Datum::timestamp_from_str(another)?, + ], + ), + Some("name IN (575)"), + )?; + + fixture.assert_projection( + &fixture.set_predicate( + PredicateOperator::NotIn, + vec![ + Datum::timestamp_from_str(value)?, + Datum::timestamp_from_str(another)?, + ], + ), + None, + )?; + + Ok(()) + } + + #[test] + fn test_projection_timestamp_day_negative_upper_bound() -> Result<()> { + // -1 + let value = "1969-12-31T23:59:59.999999"; + // 0 + let another = "1970-01-01T00:00:00.000000"; + + let fixture = TestProjectionFixture::new( + Transform::Day, + "name", + NestedField::required(1, "value", Type::Primitive(PrimitiveType::Timestamp)), + ); + + fixture.assert_projection( + &fixture.binary_predicate( + PredicateOperator::LessThan, + Datum::timestamp_from_str(value)?, + ), + Some("name <= 0"), + )?; + + fixture.assert_projection( + &fixture.binary_predicate( + PredicateOperator::LessThanOrEq, + Datum::timestamp_from_str(value)?, + ), + Some("name <= 0"), + )?; + + fixture.assert_projection( + &fixture.binary_predicate( + PredicateOperator::GreaterThan, + Datum::timestamp_from_str(value)?, + ), + Some("name >= 0"), + )?; + + fixture.assert_projection( + &fixture.binary_predicate( + PredicateOperator::GreaterThanOrEq, + Datum::timestamp_from_str(value)?, + ), + Some("name >= -1"), + )?; + + fixture.assert_projection( + &fixture.binary_predicate(PredicateOperator::Eq, Datum::timestamp_from_str(value)?), + Some("name IN (-1, 0)"), + )?; + + fixture.assert_projection( + &fixture.binary_predicate(PredicateOperator::NotEq, Datum::timestamp_from_str(value)?), + None, + )?; + + fixture.assert_projection( + &fixture.set_predicate( + PredicateOperator::In, + vec![ + Datum::timestamp_from_str(value)?, + Datum::timestamp_from_str(another)?, + ], + ), + Some("name IN (0, -1)"), + )?; + + fixture.assert_projection( + &fixture.set_predicate( + PredicateOperator::NotIn, + vec![ + Datum::timestamp_from_str(value)?, + Datum::timestamp_from_str(another)?, + ], + ), + None, + )?; + + Ok(()) + } + + #[test] + fn test_projection_timestamp_day_upper_bound() -> Result<()> { + // 17501 + let value = "2017-12-01T23:59:59.999999"; + // 17502 + let another = "2017-12-02T00:00:00.000000"; + + let fixture = TestProjectionFixture::new( + Transform::Day, + "name", + NestedField::required(1, "value", Type::Primitive(PrimitiveType::Timestamp)), + ); + + fixture.assert_projection( + &fixture.binary_predicate( + PredicateOperator::LessThan, + Datum::timestamp_from_str(value)?, + ), + Some("name <= 17501"), + )?; + + fixture.assert_projection( + &fixture.binary_predicate( + PredicateOperator::LessThanOrEq, + Datum::timestamp_from_str(value)?, + ), + Some("name <= 17501"), + )?; + + fixture.assert_projection( + &fixture.binary_predicate( + PredicateOperator::GreaterThan, + Datum::timestamp_from_str(value)?, + ), + Some("name >= 17502"), + )?; + + fixture.assert_projection( + &fixture.binary_predicate( + PredicateOperator::GreaterThanOrEq, + Datum::timestamp_from_str(value)?, + ), + Some("name >= 17501"), + )?; + + fixture.assert_projection( + &fixture.binary_predicate(PredicateOperator::Eq, Datum::timestamp_from_str(value)?), + Some("name = 17501"), + )?; + + fixture.assert_projection( + &fixture.binary_predicate(PredicateOperator::NotEq, Datum::timestamp_from_str(value)?), + None, + )?; + + fixture.assert_projection( + &fixture.set_predicate( + PredicateOperator::In, + vec![ + Datum::timestamp_from_str(value)?, + Datum::timestamp_from_str(another)?, + ], + ), + Some("name IN (17501, 17502)"), + )?; + + fixture.assert_projection( + &fixture.set_predicate( + PredicateOperator::NotIn, + vec![ + Datum::timestamp_from_str(value)?, + Datum::timestamp_from_str(another)?, + ], + ), + None, + )?; + + Ok(()) + } + + #[test] + fn test_projection_timestamp_day_negative_lower_bound() -> Result<()> { + // -365 + let value = "1969-01-01T00:00:00.000000"; + // -364 + let another = "1969-01-02T00:00:00.000000"; + + let fixture = TestProjectionFixture::new( + Transform::Day, + "name", + NestedField::required(1, "value", Type::Primitive(PrimitiveType::Timestamp)), + ); + + fixture.assert_projection( + &fixture.binary_predicate( + PredicateOperator::LessThan, + Datum::timestamp_from_str(value)?, + ), + Some("name <= -365"), + )?; + + fixture.assert_projection( + &fixture.binary_predicate( + PredicateOperator::LessThanOrEq, + Datum::timestamp_from_str(value)?, + ), + Some("name <= -364"), + )?; + + fixture.assert_projection( + &fixture.binary_predicate( + PredicateOperator::GreaterThan, + Datum::timestamp_from_str(value)?, + ), + Some("name >= -365"), + )?; + + fixture.assert_projection( + &fixture.binary_predicate( + PredicateOperator::GreaterThanOrEq, + Datum::timestamp_from_str(value)?, + ), + Some("name >= -365"), + )?; + + fixture.assert_projection( + &fixture.binary_predicate(PredicateOperator::Eq, Datum::timestamp_from_str(value)?), + Some("name IN (-364, -365)"), + )?; + + fixture.assert_projection( + &fixture.binary_predicate(PredicateOperator::NotEq, Datum::timestamp_from_str(value)?), + None, + )?; + + fixture.assert_projection( + &fixture.set_predicate( + PredicateOperator::In, + vec![ + Datum::timestamp_from_str(value)?, + Datum::timestamp_from_str(another)?, + ], + ), + Some("name IN (-363, -364, -365)"), + )?; + + fixture.assert_projection( + &fixture.set_predicate( + PredicateOperator::NotIn, + vec![ + Datum::timestamp_from_str(value)?, + Datum::timestamp_from_str(another)?, + ], + ), + None, + )?; + + Ok(()) + } + + #[test] + fn test_projection_timestamp_day_lower_bound() -> Result<()> { + // 17501 + let value = "2017-12-01T00:00:00.000000"; + // 17502 + let another = "2017-12-02T00:00:00.000000"; + + let fixture = TestProjectionFixture::new( + Transform::Day, + "name", + NestedField::required(1, "value", Type::Primitive(PrimitiveType::Timestamp)), + ); + + fixture.assert_projection( + &fixture.binary_predicate( + PredicateOperator::LessThan, + Datum::timestamp_from_str(value)?, + ), + Some("name <= 17500"), + )?; + + fixture.assert_projection( + &fixture.binary_predicate( + PredicateOperator::LessThanOrEq, + Datum::timestamp_from_str(value)?, + ), + Some("name <= 17501"), + )?; + + fixture.assert_projection( + &fixture.binary_predicate( + PredicateOperator::GreaterThan, + Datum::timestamp_from_str(value)?, + ), + Some("name >= 17501"), + )?; + + fixture.assert_projection( + &fixture.binary_predicate( + PredicateOperator::GreaterThanOrEq, + Datum::timestamp_from_str(value)?, + ), + Some("name >= 17501"), + )?; + + fixture.assert_projection( + &fixture.binary_predicate(PredicateOperator::Eq, Datum::timestamp_from_str(value)?), + Some("name = 17501"), + )?; + + fixture.assert_projection( + &fixture.binary_predicate(PredicateOperator::NotEq, Datum::timestamp_from_str(value)?), + None, + )?; + + fixture.assert_projection( + &fixture.set_predicate( + PredicateOperator::In, + vec![ + Datum::timestamp_from_str(value)?, + Datum::timestamp_from_str(another)?, + ], + ), + Some("name IN (17501, 17502)"), + )?; + + fixture.assert_projection( + &fixture.set_predicate( + PredicateOperator::NotIn, + vec![ + Datum::timestamp_from_str(value)?, + Datum::timestamp_from_str(another)?, + ], + ), + None, + )?; + + Ok(()) + } + + #[test] + fn test_projection_timestamp_day_epoch() -> Result<()> { + // 0 + let value = "1970-01-01T00:00:00.00000"; + // 1 + let another = "1970-01-02T00:00:00.00000"; + + let fixture = TestProjectionFixture::new( + Transform::Day, + "name", + NestedField::required(1, "value", Type::Primitive(PrimitiveType::Timestamp)), + ); + + fixture.assert_projection( + &fixture.binary_predicate( + PredicateOperator::LessThan, + Datum::timestamp_from_str(value)?, + ), + Some("name <= 0"), + )?; + + fixture.assert_projection( + &fixture.binary_predicate( + PredicateOperator::LessThanOrEq, + Datum::timestamp_from_str(value)?, + ), + Some("name <= 0"), + )?; + + fixture.assert_projection( + &fixture.binary_predicate( + PredicateOperator::GreaterThan, + Datum::timestamp_from_str(value)?, + ), + Some("name >= 0"), + )?; + + fixture.assert_projection( + &fixture.binary_predicate( + PredicateOperator::GreaterThanOrEq, + Datum::timestamp_from_str(value)?, + ), + Some("name >= 0"), + )?; + + fixture.assert_projection( + &fixture.binary_predicate(PredicateOperator::Eq, Datum::timestamp_from_str(value)?), + Some("name = 0"), + )?; + + fixture.assert_projection( + &fixture.binary_predicate(PredicateOperator::NotEq, Datum::timestamp_from_str(value)?), + None, + )?; + + fixture.assert_projection( + &fixture.set_predicate( + PredicateOperator::In, + vec![ + Datum::timestamp_from_str(value)?, + Datum::timestamp_from_str(another)?, + ], + ), + Some("name IN (1, 0)"), + )?; + + fixture.assert_projection( + &fixture.set_predicate( + PredicateOperator::NotIn, + vec![ + Datum::timestamp_from_str(value)?, + Datum::timestamp_from_str(another)?, + ], + ), + None, + )?; + + Ok(()) + } + + #[test] + fn test_projection_date_day_negative() -> Result<()> { + // -2 + let value = "1969-12-30"; + // -4 + let another = "1969-12-28"; + + let fixture = TestProjectionFixture::new( + Transform::Day, + "name", + NestedField::required(1, "value", Type::Primitive(PrimitiveType::Date)), + ); + + fixture.assert_projection( + &fixture.binary_predicate(PredicateOperator::LessThan, Datum::date_from_str(value)?), + Some("name <= -3"), + )?; + + fixture.assert_projection( + &fixture.binary_predicate( + PredicateOperator::LessThanOrEq, + Datum::date_from_str(value)?, + ), + Some("name <= -2"), + )?; + + fixture.assert_projection( + &fixture.binary_predicate(PredicateOperator::GreaterThan, Datum::date_from_str(value)?), + Some("name >= -1"), + )?; + + fixture.assert_projection( + &fixture.binary_predicate( + PredicateOperator::GreaterThanOrEq, + Datum::date_from_str(value)?, + ), + Some("name >= -2"), + )?; + + fixture.assert_projection( + &fixture.binary_predicate(PredicateOperator::Eq, Datum::date_from_str(value)?), + Some("name = -2"), + )?; + + fixture.assert_projection( + &fixture.binary_predicate(PredicateOperator::NotEq, Datum::date_from_str(value)?), + None, + )?; + + fixture.assert_projection( + &fixture.set_predicate( + PredicateOperator::In, + vec![Datum::date_from_str(value)?, Datum::date_from_str(another)?], + ), + Some("name IN (-2, -4)"), + )?; + + fixture.assert_projection( + &fixture.set_predicate( + PredicateOperator::NotIn, + vec![Datum::date_from_str(value)?, Datum::date_from_str(another)?], + ), + None, + )?; + + Ok(()) + } + + #[test] + fn test_projection_date_day() -> Result<()> { + // 17167 + let value = "2017-01-01"; + // 17531 + let another = "2017-12-31"; + + let fixture = TestProjectionFixture::new( + Transform::Day, + "name", + NestedField::required(1, "value", Type::Primitive(PrimitiveType::Date)), + ); + + fixture.assert_projection( + &fixture.binary_predicate(PredicateOperator::LessThan, Datum::date_from_str(value)?), + Some("name <= 17166"), + )?; + + fixture.assert_projection( + &fixture.binary_predicate( + PredicateOperator::LessThanOrEq, + Datum::date_from_str(value)?, + ), + Some("name <= 17167"), + )?; + + fixture.assert_projection( + &fixture.binary_predicate(PredicateOperator::GreaterThan, Datum::date_from_str(value)?), + Some("name >= 17168"), + )?; + + fixture.assert_projection( + &fixture.binary_predicate( + PredicateOperator::GreaterThanOrEq, + Datum::date_from_str(value)?, + ), + Some("name >= 17167"), + )?; + + fixture.assert_projection( + &fixture.binary_predicate(PredicateOperator::Eq, Datum::date_from_str(value)?), + Some("name = 17167"), + )?; + + fixture.assert_projection( + &fixture.binary_predicate(PredicateOperator::NotEq, Datum::date_from_str(value)?), + None, + )?; + + fixture.assert_projection( + &fixture.set_predicate( + PredicateOperator::In, + vec![Datum::date_from_str(value)?, Datum::date_from_str(another)?], + ), + Some("name IN (17531, 17167)"), + )?; + + fixture.assert_projection( + &fixture.set_predicate( + PredicateOperator::NotIn, + vec![Datum::date_from_str(value)?, Datum::date_from_str(another)?], + ), + None, + )?; + + Ok(()) + } + + #[test] + fn test_projection_date_month_negative_upper_bound() -> Result<()> { + // -1 => 1969-12 + let value = "1969-12-31"; + // -12 => 1969-01 + let another = "1969-01-01"; + + let fixture = TestProjectionFixture::new( + Transform::Month, + "name", + NestedField::required(1, "value", Type::Primitive(PrimitiveType::Date)), + ); + + fixture.assert_projection( + &fixture.binary_predicate(PredicateOperator::LessThan, Datum::date_from_str(value)?), + Some("name <= 0"), + )?; + + fixture.assert_projection( + &fixture.binary_predicate( + PredicateOperator::LessThanOrEq, + Datum::date_from_str(value)?, + ), + Some("name <= 0"), + )?; + + fixture.assert_projection( + &fixture.binary_predicate(PredicateOperator::GreaterThan, Datum::date_from_str(value)?), + Some("name >= 0"), + )?; + + fixture.assert_projection( + &fixture.binary_predicate( + PredicateOperator::GreaterThanOrEq, + Datum::date_from_str(value)?, + ), + Some("name >= -1"), + )?; + + fixture.assert_projection( + &fixture.binary_predicate(PredicateOperator::Eq, Datum::date_from_str(value)?), + Some("name IN (-1, 0)"), + )?; + + fixture.assert_projection( + &fixture.binary_predicate(PredicateOperator::NotEq, Datum::date_from_str(value)?), + None, + )?; + + fixture.assert_projection( + &fixture.set_predicate( + PredicateOperator::In, + vec![Datum::date_from_str(value)?, Datum::date_from_str(another)?], + ), + Some("name IN (-1, -12, -11, 0)"), + )?; + + fixture.assert_projection( + &fixture.set_predicate( + PredicateOperator::NotIn, + vec![Datum::date_from_str(value)?, Datum::date_from_str(another)?], + ), + None, + )?; + + Ok(()) + } + + #[test] + fn test_projection_date_month_upper_bound() -> Result<()> { + // 575 => 2017-12 + let value = "2017-12-31"; + // 564 => 2017-01 + let another = "2017-01-01"; + + let fixture = TestProjectionFixture::new( + Transform::Month, + "name", + NestedField::required(1, "value", Type::Primitive(PrimitiveType::Date)), + ); + + fixture.assert_projection( + &fixture.binary_predicate(PredicateOperator::LessThan, Datum::date_from_str(value)?), + Some("name <= 575"), + )?; + + fixture.assert_projection( + &fixture.binary_predicate( + PredicateOperator::LessThanOrEq, + Datum::date_from_str(value)?, + ), + Some("name <= 575"), + )?; + + fixture.assert_projection( + &fixture.binary_predicate(PredicateOperator::GreaterThan, Datum::date_from_str(value)?), + Some("name >= 576"), + )?; + + fixture.assert_projection( + &fixture.binary_predicate( + PredicateOperator::GreaterThanOrEq, + Datum::date_from_str(value)?, + ), + Some("name >= 575"), + )?; + + fixture.assert_projection( + &fixture.binary_predicate(PredicateOperator::Eq, Datum::date_from_str(value)?), + Some("name = 575"), + )?; + + fixture.assert_projection( + &fixture.binary_predicate(PredicateOperator::NotEq, Datum::date_from_str(value)?), + None, + )?; + + fixture.assert_projection( + &fixture.set_predicate( + PredicateOperator::In, + vec![Datum::date_from_str(value)?, Datum::date_from_str(another)?], + ), + Some("name IN (575, 564)"), + )?; + + fixture.assert_projection( + &fixture.set_predicate( + PredicateOperator::NotIn, + vec![Datum::date_from_str(value)?, Datum::date_from_str(another)?], + ), + None, + )?; + + Ok(()) + } + + #[test] + fn test_projection_date_month_negative_lower_bound() -> Result<()> { + // -12 => 1969-01 + let value = "1969-01-01"; + // -1 => 1969-12 + let another = "1969-12-31"; + + let fixture = TestProjectionFixture::new( + Transform::Month, + "name", + NestedField::required(1, "value", Type::Primitive(PrimitiveType::Date)), + ); + + fixture.assert_projection( + &fixture.binary_predicate(PredicateOperator::LessThan, Datum::date_from_str(value)?), + Some("name <= -12"), + )?; + + fixture.assert_projection( + &fixture.binary_predicate( + PredicateOperator::LessThanOrEq, + Datum::date_from_str(value)?, + ), + Some("name <= -11"), + )?; + + fixture.assert_projection( + &fixture.binary_predicate(PredicateOperator::GreaterThan, Datum::date_from_str(value)?), + Some("name >= -12"), + )?; + + fixture.assert_projection( + &fixture.binary_predicate( + PredicateOperator::GreaterThanOrEq, + Datum::date_from_str(value)?, + ), + Some("name >= -12"), + )?; + + fixture.assert_projection( + &fixture.binary_predicate(PredicateOperator::Eq, Datum::date_from_str(value)?), + Some("name IN (-12, -11)"), + )?; + + fixture.assert_projection( + &fixture.binary_predicate(PredicateOperator::NotEq, Datum::date_from_str(value)?), + None, + )?; + + fixture.assert_projection( + &fixture.set_predicate( + PredicateOperator::In, + vec![Datum::date_from_str(value)?, Datum::date_from_str(another)?], + ), + Some("name IN (-1, -12, -11, 0)"), + )?; + + fixture.assert_projection( + &fixture.set_predicate( + PredicateOperator::NotIn, + vec![Datum::date_from_str(value)?, Datum::date_from_str(another)?], + ), + None, + )?; + + Ok(()) + } + + #[test] + fn test_projection_date_month_lower_bound() -> Result<()> { + // 575 => 2017-12 + let value = "2017-12-01"; + // 564 => 2017-01 + let another = "2017-01-01"; + + let fixture = TestProjectionFixture::new( + Transform::Month, + "name", + NestedField::required(1, "value", Type::Primitive(PrimitiveType::Date)), + ); + + fixture.assert_projection( + &fixture.binary_predicate(PredicateOperator::LessThan, Datum::date_from_str(value)?), + Some("name <= 574"), + )?; + + fixture.assert_projection( + &fixture.binary_predicate( + PredicateOperator::LessThanOrEq, + Datum::date_from_str(value)?, + ), + Some("name <= 575"), + )?; + + fixture.assert_projection( + &fixture.binary_predicate(PredicateOperator::GreaterThan, Datum::date_from_str(value)?), + Some("name >= 575"), + )?; + + fixture.assert_projection( + &fixture.binary_predicate( + PredicateOperator::GreaterThanOrEq, + Datum::date_from_str(value)?, + ), + Some("name >= 575"), + )?; + + fixture.assert_projection( + &fixture.binary_predicate(PredicateOperator::Eq, Datum::date_from_str(value)?), + Some("name = 575"), + )?; + + fixture.assert_projection( + &fixture.binary_predicate(PredicateOperator::NotEq, Datum::date_from_str(value)?), + None, + )?; + + fixture.assert_projection( + &fixture.set_predicate( + PredicateOperator::In, + vec![Datum::date_from_str(value)?, Datum::date_from_str(another)?], + ), + Some("name IN (575, 564)"), + )?; + + fixture.assert_projection( + &fixture.set_predicate( + PredicateOperator::NotIn, + vec![Datum::date_from_str(value)?, Datum::date_from_str(another)?], + ), + None, + )?; + + Ok(()) + } + + #[test] + fn test_projection_date_month_epoch() -> Result<()> { + // 0 => 1970-01 + let value = "1970-01-01"; + // -1 => 1969-12 + let another = "1969-12-31"; + + let fixture = TestProjectionFixture::new( + Transform::Month, + "name", + NestedField::required(1, "value", Type::Primitive(PrimitiveType::Date)), + ); + + fixture.assert_projection( + &fixture.binary_predicate(PredicateOperator::LessThan, Datum::date_from_str(value)?), + Some("name <= 0"), + )?; + + fixture.assert_projection( + &fixture.binary_predicate( + PredicateOperator::LessThanOrEq, + Datum::date_from_str(value)?, + ), + Some("name <= 0"), + )?; + + fixture.assert_projection( + &fixture.binary_predicate(PredicateOperator::GreaterThan, Datum::date_from_str(value)?), + Some("name >= 0"), + )?; + + fixture.assert_projection( + &fixture.binary_predicate( + PredicateOperator::GreaterThanOrEq, + Datum::date_from_str(value)?, + ), + Some("name >= 0"), + )?; + + fixture.assert_projection( + &fixture.binary_predicate(PredicateOperator::Eq, Datum::date_from_str(value)?), + Some("name = 0"), + )?; + + fixture.assert_projection( + &fixture.binary_predicate(PredicateOperator::NotEq, Datum::date_from_str(value)?), + None, + )?; + + fixture.assert_projection( + &fixture.set_predicate( + PredicateOperator::In, + vec![Datum::date_from_str(value)?, Datum::date_from_str(another)?], + ), + Some("name IN (0, -1)"), + )?; + + fixture.assert_projection( + &fixture.set_predicate( + PredicateOperator::NotIn, + vec![Datum::date_from_str(value)?, Datum::date_from_str(another)?], + ), + None, + )?; + + Ok(()) + } + + #[test] + fn test_projection_date_year_negative_upper_bound() -> Result<()> { + // -1 => 1969 + let value = "1969-12-31"; + let another = "1969-01-01"; + + let fixture = TestProjectionFixture::new( + Transform::Year, + "name", + NestedField::required(1, "value", Type::Primitive(PrimitiveType::Date)), + ); + + fixture.assert_projection( + &fixture.binary_predicate(PredicateOperator::LessThan, Datum::date_from_str(value)?), + Some("name <= 0"), + )?; + + fixture.assert_projection( + &fixture.binary_predicate( + PredicateOperator::LessThanOrEq, + Datum::date_from_str(value)?, + ), + Some("name <= 0"), + )?; + + fixture.assert_projection( + &fixture.binary_predicate(PredicateOperator::GreaterThan, Datum::date_from_str(value)?), + Some("name >= 0"), + )?; + + fixture.assert_projection( + &fixture.binary_predicate( + PredicateOperator::GreaterThanOrEq, + Datum::date_from_str(value)?, + ), + Some("name >= -1"), + )?; + + fixture.assert_projection( + &fixture.binary_predicate(PredicateOperator::Eq, Datum::date_from_str(value)?), + Some("name IN (-1, 0)"), + )?; + + fixture.assert_projection( + &fixture.binary_predicate(PredicateOperator::NotEq, Datum::date_from_str(value)?), + None, + )?; + + fixture.assert_projection( + &fixture.set_predicate( + PredicateOperator::In, + vec![Datum::date_from_str(value)?, Datum::date_from_str(another)?], + ), + Some("name IN (0, -1)"), + )?; + + fixture.assert_projection( + &fixture.set_predicate( + PredicateOperator::NotIn, + vec![Datum::date_from_str(value)?, Datum::date_from_str(another)?], + ), + None, + )?; + + Ok(()) + } + + #[test] + fn test_projection_date_year_upper_bound() -> Result<()> { + // 47 => 2017 + let value = "2017-12-31"; + // 46 => 2016 + let another = "2016-01-01"; + + let fixture = TestProjectionFixture::new( + Transform::Year, + "name", + NestedField::required(1, "value", Type::Primitive(PrimitiveType::Date)), + ); + + fixture.assert_projection( + &fixture.binary_predicate(PredicateOperator::LessThan, Datum::date_from_str(value)?), + Some("name <= 47"), + )?; + + fixture.assert_projection( + &fixture.binary_predicate( + PredicateOperator::LessThanOrEq, + Datum::date_from_str(value)?, + ), + Some("name <= 47"), + )?; + + fixture.assert_projection( + &fixture.binary_predicate(PredicateOperator::GreaterThan, Datum::date_from_str(value)?), + Some("name >= 48"), + )?; + + fixture.assert_projection( + &fixture.binary_predicate( + PredicateOperator::GreaterThanOrEq, + Datum::date_from_str(value)?, + ), + Some("name >= 47"), + )?; + + fixture.assert_projection( + &fixture.binary_predicate(PredicateOperator::Eq, Datum::date_from_str(value)?), + Some("name = 47"), + )?; + + fixture.assert_projection( + &fixture.binary_predicate(PredicateOperator::NotEq, Datum::date_from_str(value)?), + None, + )?; + + fixture.assert_projection( + &fixture.set_predicate( + PredicateOperator::In, + vec![Datum::date_from_str(value)?, Datum::date_from_str(another)?], + ), + Some("name IN (47, 46)"), + )?; + + fixture.assert_projection( + &fixture.set_predicate( + PredicateOperator::NotIn, + vec![Datum::date_from_str(value)?, Datum::date_from_str(another)?], + ), + None, + )?; + + Ok(()) + } + + #[test] + fn test_projection_date_year_negative_lower_bound() -> Result<()> { + // 0 => 1970 + let value = "1970-01-01"; + // -1 => 1969 + let another = "1969-12-31"; + + let fixture = TestProjectionFixture::new( + Transform::Year, + "name", + NestedField::required(1, "value", Type::Primitive(PrimitiveType::Date)), + ); + + fixture.assert_projection( + &fixture.binary_predicate(PredicateOperator::LessThan, Datum::date_from_str(value)?), + Some("name <= 0"), + )?; + + fixture.assert_projection( + &fixture.binary_predicate( + PredicateOperator::LessThanOrEq, + Datum::date_from_str(value)?, + ), + Some("name <= 0"), + )?; + + fixture.assert_projection( + &fixture.binary_predicate(PredicateOperator::GreaterThan, Datum::date_from_str(value)?), + Some("name >= 0"), + )?; + + fixture.assert_projection( + &fixture.binary_predicate( + PredicateOperator::GreaterThanOrEq, + Datum::date_from_str(value)?, + ), + Some("name >= 0"), + )?; + + fixture.assert_projection( + &fixture.binary_predicate(PredicateOperator::Eq, Datum::date_from_str(value)?), + Some("name = 0"), + )?; + + fixture.assert_projection( + &fixture.binary_predicate(PredicateOperator::NotEq, Datum::date_from_str(value)?), + None, + )?; + + fixture.assert_projection( + &fixture.set_predicate( + PredicateOperator::In, + vec![Datum::date_from_str(value)?, Datum::date_from_str(another)?], + ), + Some("name IN (0, -1)"), + )?; + + fixture.assert_projection( + &fixture.set_predicate( + PredicateOperator::NotIn, + vec![Datum::date_from_str(value)?, Datum::date_from_str(another)?], + ), + None, + )?; + + Ok(()) + } + + #[test] + fn test_projection_date_year_lower_bound() -> Result<()> { + // 47 => 2017 + let value = "2017-01-01"; + // 46 => 2016 + let another = "2016-12-31"; + + let fixture = TestProjectionFixture::new( + Transform::Year, + "name", + NestedField::required(1, "value", Type::Primitive(PrimitiveType::Date)), + ); + + fixture.assert_projection( + &fixture.binary_predicate(PredicateOperator::LessThan, Datum::date_from_str(value)?), + Some("name <= 46"), + )?; + + fixture.assert_projection( + &fixture.binary_predicate( + PredicateOperator::LessThanOrEq, + Datum::date_from_str(value)?, + ), + Some("name <= 47"), + )?; + + fixture.assert_projection( + &fixture.binary_predicate(PredicateOperator::GreaterThan, Datum::date_from_str(value)?), + Some("name >= 47"), + )?; + + fixture.assert_projection( + &fixture.binary_predicate( + PredicateOperator::GreaterThanOrEq, + Datum::date_from_str(value)?, + ), + Some("name >= 47"), + )?; + + fixture.assert_projection( + &fixture.binary_predicate(PredicateOperator::Eq, Datum::date_from_str(value)?), + Some("name = 47"), + )?; + + fixture.assert_projection( + &fixture.binary_predicate(PredicateOperator::NotEq, Datum::date_from_str(value)?), + None, + )?; + + fixture.assert_projection( + &fixture.set_predicate( + PredicateOperator::In, + vec![Datum::date_from_str(value)?, Datum::date_from_str(another)?], + ), + Some("name IN (47, 46)"), + )?; + + fixture.assert_projection( + &fixture.set_predicate( + PredicateOperator::NotIn, + vec![Datum::date_from_str(value)?, Datum::date_from_str(another)?], + ), + None, + )?; + + Ok(()) + } + #[test] fn test_transform_years() { let year = super::Year; diff --git a/crates/iceberg/src/transform/truncate.rs b/crates/iceberg/src/transform/truncate.rs index 767ca0036..55e1dedc7 100644 --- a/crates/iceberg/src/transform/truncate.rs +++ b/crates/iceberg/src/transform/truncate.rs @@ -174,7 +174,484 @@ mod test { builder::PrimitiveBuilder, types::Decimal128Type, Decimal128Array, Int32Array, Int64Array, }; - use crate::{spec::Datum, transform::TransformFunction}; + use crate::{ + expr::PredicateOperator, + spec::{Datum, NestedField, PrimitiveType, Transform, Type}, + transform::{test::TestProjectionFixture, TransformFunction}, + Result, + }; + + #[test] + fn test_projection_truncate_string_rewrite_op() -> Result<()> { + let value = "abcde"; + + let fixture = TestProjectionFixture::new( + Transform::Truncate(5), + "name", + NestedField::required(1, "value", Type::Primitive(PrimitiveType::String)), + ); + + fixture.assert_projection( + &fixture.binary_predicate(PredicateOperator::StartsWith, Datum::string(value)), + Some(r#"name = "abcde""#), + )?; + + fixture.assert_projection( + &fixture.binary_predicate(PredicateOperator::NotStartsWith, Datum::string(value)), + Some(r#"name != "abcde""#), + )?; + + let value = "abcdefg"; + fixture.assert_projection( + &fixture.binary_predicate(PredicateOperator::StartsWith, Datum::string(value)), + Some(r#"name STARTS WITH "abcde""#), + )?; + + fixture.assert_projection( + &fixture.binary_predicate(PredicateOperator::NotStartsWith, Datum::string(value)), + None, + )?; + + Ok(()) + } + + #[test] + fn test_projection_truncate_string() -> Result<()> { + let value = "abcdefg"; + + let fixture = TestProjectionFixture::new( + Transform::Truncate(5), + "name", + NestedField::required(1, "value", Type::Primitive(PrimitiveType::String)), + ); + + fixture.assert_projection( + &fixture.binary_predicate(PredicateOperator::LessThan, Datum::string(value)), + Some(r#"name <= "abcde""#), + )?; + + fixture.assert_projection( + &fixture.binary_predicate(PredicateOperator::LessThanOrEq, Datum::string(value)), + Some(r#"name <= "abcde""#), + )?; + + fixture.assert_projection( + &fixture.binary_predicate(PredicateOperator::GreaterThan, Datum::string(value)), + Some(r#"name >= "abcde""#), + )?; + + fixture.assert_projection( + &fixture.binary_predicate(PredicateOperator::GreaterThanOrEq, Datum::string(value)), + Some(r#"name >= "abcde""#), + )?; + + fixture.assert_projection( + &fixture.binary_predicate(PredicateOperator::Eq, Datum::string(value)), + Some(r#"name = "abcde""#), + )?; + + fixture.assert_projection( + &fixture.set_predicate( + PredicateOperator::In, + vec![Datum::string(value), Datum::string(format!("{}abc", value))], + ), + Some(r#"name IN ("abcde")"#), + )?; + + fixture.assert_projection( + &fixture.set_predicate( + PredicateOperator::NotIn, + vec![Datum::string(value), Datum::string(format!("{}abc", value))], + ), + None, + )?; + + Ok(()) + } + + #[test] + fn test_projection_truncate_upper_bound_decimal() -> Result<()> { + let prev = "98.99"; + let curr = "99.99"; + let next = "100.99"; + + let fixture = TestProjectionFixture::new( + Transform::Truncate(10), + "name", + NestedField::required( + 1, + "value", + Type::Primitive(PrimitiveType::Decimal { + precision: 9, + scale: 2, + }), + ), + ); + + fixture.assert_projection( + &fixture.binary_predicate(PredicateOperator::LessThan, Datum::decimal_from_str(curr)?), + Some("name <= 9990"), + )?; + + fixture.assert_projection( + &fixture.binary_predicate( + PredicateOperator::LessThanOrEq, + Datum::decimal_from_str(curr)?, + ), + Some("name <= 9990"), + )?; + + fixture.assert_projection( + &fixture.binary_predicate( + PredicateOperator::GreaterThanOrEq, + Datum::decimal_from_str(curr)?, + ), + Some("name >= 9990"), + )?; + + fixture.assert_projection( + &fixture.binary_predicate(PredicateOperator::Eq, Datum::decimal_from_str(curr)?), + Some("name = 9990"), + )?; + + fixture.assert_projection( + &fixture.binary_predicate(PredicateOperator::NotEq, Datum::decimal_from_str(curr)?), + None, + )?; + + fixture.assert_projection( + &fixture.set_predicate( + PredicateOperator::In, + vec![ + Datum::decimal_from_str(prev)?, + Datum::decimal_from_str(curr)?, + Datum::decimal_from_str(next)?, + ], + ), + Some("name IN (10090, 9990, 9890)"), + )?; + + fixture.assert_projection( + &fixture.set_predicate( + PredicateOperator::NotIn, + vec![ + Datum::decimal_from_str(curr)?, + Datum::decimal_from_str(next)?, + ], + ), + None, + )?; + + Ok(()) + } + + #[test] + fn test_projection_truncate_lower_bound_decimal() -> Result<()> { + let prev = "99.00"; + let curr = "100.00"; + let next = "101.00"; + + let fixture = TestProjectionFixture::new( + Transform::Truncate(10), + "name", + NestedField::required( + 1, + "value", + Type::Primitive(PrimitiveType::Decimal { + precision: 9, + scale: 2, + }), + ), + ); + + fixture.assert_projection( + &fixture.binary_predicate(PredicateOperator::LessThan, Datum::decimal_from_str(curr)?), + Some("name <= 9990"), + )?; + + fixture.assert_projection( + &fixture.binary_predicate( + PredicateOperator::LessThanOrEq, + Datum::decimal_from_str(curr)?, + ), + Some("name <= 10000"), + )?; + + fixture.assert_projection( + &fixture.binary_predicate( + PredicateOperator::GreaterThanOrEq, + Datum::decimal_from_str(curr)?, + ), + Some("name >= 10000"), + )?; + + fixture.assert_projection( + &fixture.binary_predicate(PredicateOperator::Eq, Datum::decimal_from_str(curr)?), + Some("name = 10000"), + )?; + + fixture.assert_projection( + &fixture.binary_predicate(PredicateOperator::NotEq, Datum::decimal_from_str(curr)?), + None, + )?; + + fixture.assert_projection( + &fixture.set_predicate( + PredicateOperator::In, + vec![ + Datum::decimal_from_str(prev)?, + Datum::decimal_from_str(curr)?, + Datum::decimal_from_str(next)?, + ], + ), + Some("name IN (9900, 10000, 10100)"), + )?; + + fixture.assert_projection( + &fixture.set_predicate( + PredicateOperator::NotIn, + vec![ + Datum::decimal_from_str(curr)?, + Datum::decimal_from_str(next)?, + ], + ), + None, + )?; + + Ok(()) + } + + #[test] + fn test_projection_truncate_upper_bound_long() -> Result<()> { + let value = 99i64; + + let fixture = TestProjectionFixture::new( + Transform::Truncate(10), + "name", + NestedField::required(1, "value", Type::Primitive(PrimitiveType::Long)), + ); + + fixture.assert_projection( + &fixture.binary_predicate(PredicateOperator::LessThan, Datum::long(value)), + Some("name <= 90"), + )?; + + fixture.assert_projection( + &fixture.binary_predicate(PredicateOperator::LessThanOrEq, Datum::long(value)), + Some("name <= 90"), + )?; + + fixture.assert_projection( + &fixture.binary_predicate(PredicateOperator::GreaterThanOrEq, Datum::long(value)), + Some("name >= 90"), + )?; + + fixture.assert_projection( + &fixture.binary_predicate(PredicateOperator::Eq, Datum::long(value)), + Some("name = 90"), + )?; + + fixture.assert_projection( + &fixture.binary_predicate(PredicateOperator::NotEq, Datum::long(value)), + None, + )?; + + fixture.assert_projection( + &fixture.set_predicate( + PredicateOperator::In, + vec![ + Datum::long(value - 1), + Datum::long(value), + Datum::long(value + 1), + ], + ), + Some("name IN (100, 90)"), + )?; + + fixture.assert_projection( + &fixture.set_predicate( + PredicateOperator::NotIn, + vec![Datum::long(value), Datum::long(value + 1)], + ), + None, + )?; + + Ok(()) + } + + #[test] + fn test_projection_truncate_lower_bound_long() -> Result<()> { + let value = 100i64; + + let fixture = TestProjectionFixture::new( + Transform::Truncate(10), + "name", + NestedField::required(1, "value", Type::Primitive(PrimitiveType::Long)), + ); + + fixture.assert_projection( + &fixture.binary_predicate(PredicateOperator::LessThan, Datum::long(value)), + Some("name <= 90"), + )?; + + fixture.assert_projection( + &fixture.binary_predicate(PredicateOperator::LessThanOrEq, Datum::long(value)), + Some("name <= 100"), + )?; + + fixture.assert_projection( + &fixture.binary_predicate(PredicateOperator::GreaterThanOrEq, Datum::long(value)), + Some("name >= 100"), + )?; + + fixture.assert_projection( + &fixture.binary_predicate(PredicateOperator::Eq, Datum::long(value)), + Some("name = 100"), + )?; + + fixture.assert_projection( + &fixture.binary_predicate(PredicateOperator::NotEq, Datum::long(value)), + None, + )?; + + fixture.assert_projection( + &fixture.set_predicate( + PredicateOperator::In, + vec![ + Datum::long(value - 1), + Datum::long(value), + Datum::long(value + 1), + ], + ), + Some("name IN (100, 90)"), + )?; + + fixture.assert_projection( + &fixture.set_predicate( + PredicateOperator::NotIn, + vec![Datum::long(value), Datum::long(value + 1)], + ), + None, + )?; + + Ok(()) + } + + #[test] + fn test_projection_truncate_upper_bound_integer() -> Result<()> { + let value = 99; + + let fixture = TestProjectionFixture::new( + Transform::Truncate(10), + "name", + NestedField::required(1, "value", Type::Primitive(PrimitiveType::Int)), + ); + + fixture.assert_projection( + &fixture.binary_predicate(PredicateOperator::LessThan, Datum::int(value)), + Some("name <= 90"), + )?; + + fixture.assert_projection( + &fixture.binary_predicate(PredicateOperator::LessThanOrEq, Datum::int(value)), + Some("name <= 90"), + )?; + + fixture.assert_projection( + &fixture.binary_predicate(PredicateOperator::GreaterThanOrEq, Datum::int(value)), + Some("name >= 90"), + )?; + + fixture.assert_projection( + &fixture.binary_predicate(PredicateOperator::Eq, Datum::int(value)), + Some("name = 90"), + )?; + + fixture.assert_projection( + &fixture.binary_predicate(PredicateOperator::NotEq, Datum::int(value)), + None, + )?; + + fixture.assert_projection( + &fixture.set_predicate( + PredicateOperator::In, + vec![ + Datum::int(value - 1), + Datum::int(value), + Datum::int(value + 1), + ], + ), + Some("name IN (100, 90)"), + )?; + + fixture.assert_projection( + &fixture.set_predicate( + PredicateOperator::NotIn, + vec![Datum::int(value), Datum::int(value + 1)], + ), + None, + )?; + + Ok(()) + } + + #[test] + fn test_projection_truncate_lower_bound_integer() -> Result<()> { + let value = 100; + + let fixture = TestProjectionFixture::new( + Transform::Truncate(10), + "name", + NestedField::required(1, "value", Type::Primitive(PrimitiveType::Int)), + ); + + fixture.assert_projection( + &fixture.binary_predicate(PredicateOperator::LessThan, Datum::int(value)), + Some("name <= 90"), + )?; + + fixture.assert_projection( + &fixture.binary_predicate(PredicateOperator::LessThanOrEq, Datum::int(value)), + Some("name <= 100"), + )?; + + fixture.assert_projection( + &fixture.binary_predicate(PredicateOperator::GreaterThanOrEq, Datum::int(value)), + Some("name >= 100"), + )?; + + fixture.assert_projection( + &fixture.binary_predicate(PredicateOperator::Eq, Datum::int(value)), + Some("name = 100"), + )?; + + fixture.assert_projection( + &fixture.binary_predicate(PredicateOperator::NotEq, Datum::int(value)), + None, + )?; + + fixture.assert_projection( + &fixture.set_predicate( + PredicateOperator::In, + vec![ + Datum::int(value - 1), + Datum::int(value), + Datum::int(value + 1), + ], + ), + Some("name IN (100, 90)"), + )?; + + fixture.assert_projection( + &fixture.set_predicate( + PredicateOperator::NotIn, + vec![Datum::int(value), Datum::int(value + 1)], + ), + None, + )?; + + Ok(()) + } // Test case ref from: https://iceberg.apache.org/spec/#truncate-transform-details #[test] From d78e269eac833147fd41fe2849050a0db02202c4 Mon Sep 17 00:00:00 2001 From: marvinlanhenke Date: Mon, 1 Apr 2024 17:00:19 +0200 Subject: [PATCH 41/46] refactor: move transform tests --- crates/iceberg/src/spec/transform.rs | 506 +---------------------- crates/iceberg/src/transform/bucket.rs | 59 ++- crates/iceberg/src/transform/identity.rs | 65 +++ crates/iceberg/src/transform/mod.rs | 39 +- crates/iceberg/src/transform/temporal.rs | 208 ++++++++++ crates/iceberg/src/transform/truncate.rs | 59 +++ crates/iceberg/src/transform/void.rs | 122 ++++++ 7 files changed, 551 insertions(+), 507 deletions(-) diff --git a/crates/iceberg/src/spec/transform.rs b/crates/iceberg/src/spec/transform.rs index 20e9a08d8..f1641bcf8 100644 --- a/crates/iceberg/src/spec/transform.rs +++ b/crates/iceberg/src/spec/transform.rs @@ -658,512 +658,10 @@ impl<'de> Deserialize<'de> for Transform { } } +/// An enum representing the result of the adjusted projection. +/// Either being a single adjusted datum or a set. #[derive(Debug)] enum AdjustedProjection { Single(Datum), Set(FnvHashSet), } - -#[cfg(test)] -mod tests { - use crate::spec::datatypes::PrimitiveType::{ - Binary, Date, Decimal, Fixed, Int, Long, String as StringType, Time, Timestamp, - Timestamptz, Uuid, - }; - use crate::spec::datatypes::Type::{Primitive, Struct}; - use crate::spec::datatypes::{NestedField, StructType, Type}; - use crate::spec::transform::Transform; - - struct TestParameter { - display: String, - json: String, - dedup_name: String, - preserves_order: bool, - satisfies_order_of: Vec<(Transform, bool)>, - trans_types: Vec<(Type, Option)>, - } - - fn check_transform(trans: Transform, param: TestParameter) { - assert_eq!(param.display, format!("{trans}")); - assert_eq!(param.json, serde_json::to_string(&trans).unwrap()); - assert_eq!(trans, serde_json::from_str(param.json.as_str()).unwrap()); - assert_eq!(param.dedup_name, trans.dedup_name()); - assert_eq!(param.preserves_order, trans.preserves_order()); - - for (other_trans, satisfies_order_of) in param.satisfies_order_of { - assert_eq!( - satisfies_order_of, - trans.satisfies_order_of(&other_trans), - "Failed to check satisfies order {}, {}, {}", - trans, - other_trans, - satisfies_order_of - ); - } - - for (input_type, result_type) in param.trans_types { - assert_eq!(result_type, trans.result_type(&input_type).ok()); - } - } - - #[test] - fn test_bucket_transform() { - let trans = Transform::Bucket(8); - - let test_param = TestParameter { - display: "bucket[8]".to_string(), - json: r#""bucket[8]""#.to_string(), - dedup_name: "bucket[8]".to_string(), - preserves_order: false, - satisfies_order_of: vec![ - (Transform::Bucket(8), true), - (Transform::Bucket(4), false), - (Transform::Void, false), - (Transform::Day, false), - ], - trans_types: vec![ - (Primitive(Binary), Some(Primitive(Int))), - (Primitive(Date), Some(Primitive(Int))), - ( - Primitive(Decimal { - precision: 8, - scale: 5, - }), - Some(Primitive(Int)), - ), - (Primitive(Fixed(8)), Some(Primitive(Int))), - (Primitive(Int), Some(Primitive(Int))), - (Primitive(Long), Some(Primitive(Int))), - (Primitive(StringType), Some(Primitive(Int))), - (Primitive(Uuid), Some(Primitive(Int))), - (Primitive(Time), Some(Primitive(Int))), - (Primitive(Timestamp), Some(Primitive(Int))), - (Primitive(Timestamptz), Some(Primitive(Int))), - ( - Struct(StructType::new(vec![NestedField::optional( - 1, - "a", - Primitive(Timestamp), - ) - .into()])), - None, - ), - ], - }; - - check_transform(trans, test_param); - } - - #[test] - fn test_truncate_transform() { - let trans = Transform::Truncate(4); - - let test_param = TestParameter { - display: "truncate[4]".to_string(), - json: r#""truncate[4]""#.to_string(), - dedup_name: "truncate[4]".to_string(), - preserves_order: true, - satisfies_order_of: vec![ - (Transform::Truncate(4), true), - (Transform::Truncate(2), false), - (Transform::Bucket(4), false), - (Transform::Void, false), - (Transform::Day, false), - ], - trans_types: vec![ - (Primitive(Binary), Some(Primitive(Binary))), - (Primitive(Date), None), - ( - Primitive(Decimal { - precision: 8, - scale: 5, - }), - Some(Primitive(Decimal { - precision: 8, - scale: 5, - })), - ), - (Primitive(Fixed(8)), None), - (Primitive(Int), Some(Primitive(Int))), - (Primitive(Long), Some(Primitive(Long))), - (Primitive(StringType), Some(Primitive(StringType))), - (Primitive(Uuid), None), - (Primitive(Time), None), - (Primitive(Timestamp), None), - (Primitive(Timestamptz), None), - ( - Struct(StructType::new(vec![NestedField::optional( - 1, - "a", - Primitive(Timestamp), - ) - .into()])), - None, - ), - ], - }; - - check_transform(trans, test_param); - } - - #[test] - fn test_identity_transform() { - let trans = Transform::Identity; - - let test_param = TestParameter { - display: "identity".to_string(), - json: r#""identity""#.to_string(), - dedup_name: "identity".to_string(), - preserves_order: true, - satisfies_order_of: vec![ - (Transform::Truncate(4), true), - (Transform::Truncate(2), true), - (Transform::Bucket(4), false), - (Transform::Void, false), - (Transform::Day, true), - ], - trans_types: vec![ - (Primitive(Binary), Some(Primitive(Binary))), - (Primitive(Date), Some(Primitive(Date))), - ( - Primitive(Decimal { - precision: 8, - scale: 5, - }), - Some(Primitive(Decimal { - precision: 8, - scale: 5, - })), - ), - (Primitive(Fixed(8)), Some(Primitive(Fixed(8)))), - (Primitive(Int), Some(Primitive(Int))), - (Primitive(Long), Some(Primitive(Long))), - (Primitive(StringType), Some(Primitive(StringType))), - (Primitive(Uuid), Some(Primitive(Uuid))), - (Primitive(Time), Some(Primitive(Time))), - (Primitive(Timestamp), Some(Primitive(Timestamp))), - (Primitive(Timestamptz), Some(Primitive(Timestamptz))), - ( - Struct(StructType::new(vec![NestedField::optional( - 1, - "a", - Primitive(Timestamp), - ) - .into()])), - None, - ), - ], - }; - - check_transform(trans, test_param); - } - - #[test] - fn test_year_transform() { - let trans = Transform::Year; - - let test_param = TestParameter { - display: "year".to_string(), - json: r#""year""#.to_string(), - dedup_name: "time".to_string(), - preserves_order: true, - satisfies_order_of: vec![ - (Transform::Year, true), - (Transform::Month, false), - (Transform::Day, false), - (Transform::Hour, false), - (Transform::Void, false), - (Transform::Identity, false), - ], - trans_types: vec![ - (Primitive(Binary), None), - (Primitive(Date), Some(Primitive(Int))), - ( - Primitive(Decimal { - precision: 8, - scale: 5, - }), - None, - ), - (Primitive(Fixed(8)), None), - (Primitive(Int), None), - (Primitive(Long), None), - (Primitive(StringType), None), - (Primitive(Uuid), None), - (Primitive(Time), None), - (Primitive(Timestamp), Some(Primitive(Int))), - (Primitive(Timestamptz), Some(Primitive(Int))), - ( - Struct(StructType::new(vec![NestedField::optional( - 1, - "a", - Primitive(Timestamp), - ) - .into()])), - None, - ), - ], - }; - - check_transform(trans, test_param); - } - - #[test] - fn test_month_transform() { - let trans = Transform::Month; - - let test_param = TestParameter { - display: "month".to_string(), - json: r#""month""#.to_string(), - dedup_name: "time".to_string(), - preserves_order: true, - satisfies_order_of: vec![ - (Transform::Year, true), - (Transform::Month, true), - (Transform::Day, false), - (Transform::Hour, false), - (Transform::Void, false), - (Transform::Identity, false), - ], - trans_types: vec![ - (Primitive(Binary), None), - (Primitive(Date), Some(Primitive(Int))), - ( - Primitive(Decimal { - precision: 8, - scale: 5, - }), - None, - ), - (Primitive(Fixed(8)), None), - (Primitive(Int), None), - (Primitive(Long), None), - (Primitive(StringType), None), - (Primitive(Uuid), None), - (Primitive(Time), None), - (Primitive(Timestamp), Some(Primitive(Int))), - (Primitive(Timestamptz), Some(Primitive(Int))), - ( - Struct(StructType::new(vec![NestedField::optional( - 1, - "a", - Primitive(Timestamp), - ) - .into()])), - None, - ), - ], - }; - - check_transform(trans, test_param); - } - - #[test] - fn test_day_transform() { - let trans = Transform::Day; - - let test_param = TestParameter { - display: "day".to_string(), - json: r#""day""#.to_string(), - dedup_name: "time".to_string(), - preserves_order: true, - satisfies_order_of: vec![ - (Transform::Year, true), - (Transform::Month, true), - (Transform::Day, true), - (Transform::Hour, false), - (Transform::Void, false), - (Transform::Identity, false), - ], - trans_types: vec![ - (Primitive(Binary), None), - (Primitive(Date), Some(Primitive(Int))), - ( - Primitive(Decimal { - precision: 8, - scale: 5, - }), - None, - ), - (Primitive(Fixed(8)), None), - (Primitive(Int), None), - (Primitive(Long), None), - (Primitive(StringType), None), - (Primitive(Uuid), None), - (Primitive(Time), None), - (Primitive(Timestamp), Some(Primitive(Int))), - (Primitive(Timestamptz), Some(Primitive(Int))), - ( - Struct(StructType::new(vec![NestedField::optional( - 1, - "a", - Primitive(Timestamp), - ) - .into()])), - None, - ), - ], - }; - - check_transform(trans, test_param); - } - - #[test] - fn test_hour_transform() { - let trans = Transform::Hour; - - let test_param = TestParameter { - display: "hour".to_string(), - json: r#""hour""#.to_string(), - dedup_name: "time".to_string(), - preserves_order: true, - satisfies_order_of: vec![ - (Transform::Year, true), - (Transform::Month, true), - (Transform::Day, true), - (Transform::Hour, true), - (Transform::Void, false), - (Transform::Identity, false), - ], - trans_types: vec![ - (Primitive(Binary), None), - (Primitive(Date), None), - ( - Primitive(Decimal { - precision: 8, - scale: 5, - }), - None, - ), - (Primitive(Fixed(8)), None), - (Primitive(Int), None), - (Primitive(Long), None), - (Primitive(StringType), None), - (Primitive(Uuid), None), - (Primitive(Time), None), - (Primitive(Timestamp), Some(Primitive(Int))), - (Primitive(Timestamptz), Some(Primitive(Int))), - ( - Struct(StructType::new(vec![NestedField::optional( - 1, - "a", - Primitive(Timestamp), - ) - .into()])), - None, - ), - ], - }; - - check_transform(trans, test_param); - } - - #[test] - fn test_void_transform() { - let trans = Transform::Void; - - let test_param = TestParameter { - display: "void".to_string(), - json: r#""void""#.to_string(), - dedup_name: "void".to_string(), - preserves_order: false, - satisfies_order_of: vec![ - (Transform::Year, false), - (Transform::Month, false), - (Transform::Day, false), - (Transform::Hour, false), - (Transform::Void, true), - (Transform::Identity, false), - ], - trans_types: vec![ - (Primitive(Binary), Some(Primitive(Binary))), - (Primitive(Date), Some(Primitive(Date))), - ( - Primitive(Decimal { - precision: 8, - scale: 5, - }), - Some(Primitive(Decimal { - precision: 8, - scale: 5, - })), - ), - (Primitive(Fixed(8)), Some(Primitive(Fixed(8)))), - (Primitive(Int), Some(Primitive(Int))), - (Primitive(Long), Some(Primitive(Long))), - (Primitive(StringType), Some(Primitive(StringType))), - (Primitive(Uuid), Some(Primitive(Uuid))), - (Primitive(Time), Some(Primitive(Time))), - (Primitive(Timestamp), Some(Primitive(Timestamp))), - (Primitive(Timestamptz), Some(Primitive(Timestamptz))), - ( - Struct(StructType::new(vec![NestedField::optional( - 1, - "a", - Primitive(Timestamp), - ) - .into()])), - Some(Struct(StructType::new(vec![NestedField::optional( - 1, - "a", - Primitive(Timestamp), - ) - .into()]))), - ), - ], - }; - - check_transform(trans, test_param); - } - - #[test] - fn test_known_transform() { - let trans = Transform::Unknown; - - let test_param = TestParameter { - display: "unknown".to_string(), - json: r#""unknown""#.to_string(), - dedup_name: "unknown".to_string(), - preserves_order: false, - satisfies_order_of: vec![ - (Transform::Year, false), - (Transform::Month, false), - (Transform::Day, false), - (Transform::Hour, false), - (Transform::Void, false), - (Transform::Identity, false), - (Transform::Unknown, true), - ], - trans_types: vec![ - (Primitive(Binary), Some(Primitive(StringType))), - (Primitive(Date), Some(Primitive(StringType))), - ( - Primitive(Decimal { - precision: 8, - scale: 5, - }), - Some(Primitive(StringType)), - ), - (Primitive(Fixed(8)), Some(Primitive(StringType))), - (Primitive(Int), Some(Primitive(StringType))), - (Primitive(Long), Some(Primitive(StringType))), - (Primitive(StringType), Some(Primitive(StringType))), - (Primitive(Uuid), Some(Primitive(StringType))), - (Primitive(Time), Some(Primitive(StringType))), - (Primitive(Timestamp), Some(Primitive(StringType))), - (Primitive(Timestamptz), Some(Primitive(StringType))), - ( - Struct(StructType::new(vec![NestedField::optional( - 1, - "a", - Primitive(Timestamp), - ) - .into()])), - Some(Primitive(StringType)), - ), - ], - }; - - check_transform(trans, test_param); - } -} diff --git a/crates/iceberg/src/transform/bucket.rs b/crates/iceberg/src/transform/bucket.rs index 3e4268e35..d454c697f 100644 --- a/crates/iceberg/src/transform/bucket.rs +++ b/crates/iceberg/src/transform/bucket.rs @@ -251,15 +251,72 @@ impl TransformFunction for Bucket { mod test { use chrono::{DateTime, NaiveDate, NaiveDateTime, NaiveTime}; + use crate::spec::PrimitiveType::{ + Binary, Date, Decimal, Fixed, Int, Long, String as StringType, Time, Timestamp, + Timestamptz, Uuid, + }; + use crate::spec::StructType; + use crate::spec::Type::{Primitive, Struct}; use crate::{ expr::PredicateOperator, spec::{Datum, NestedField, PrimitiveType, Transform, Type}, - transform::{test::TestProjectionFixture, TransformFunction}, + transform::{ + test::{TestProjectionFixture, TestTransformFixture}, + TransformFunction, + }, Result, }; use super::Bucket; + #[test] + fn test_bucket_transform() { + let trans = Transform::Bucket(8); + + let fixture = TestTransformFixture { + display: "bucket[8]".to_string(), + json: r#""bucket[8]""#.to_string(), + dedup_name: "bucket[8]".to_string(), + preserves_order: false, + satisfies_order_of: vec![ + (Transform::Bucket(8), true), + (Transform::Bucket(4), false), + (Transform::Void, false), + (Transform::Day, false), + ], + trans_types: vec![ + (Primitive(Binary), Some(Primitive(Int))), + (Primitive(Date), Some(Primitive(Int))), + ( + Primitive(Decimal { + precision: 8, + scale: 5, + }), + Some(Primitive(Int)), + ), + (Primitive(Fixed(8)), Some(Primitive(Int))), + (Primitive(Int), Some(Primitive(Int))), + (Primitive(Long), Some(Primitive(Int))), + (Primitive(StringType), Some(Primitive(Int))), + (Primitive(Uuid), Some(Primitive(Int))), + (Primitive(Time), Some(Primitive(Int))), + (Primitive(Timestamp), Some(Primitive(Int))), + (Primitive(Timestamptz), Some(Primitive(Int))), + ( + Struct(StructType::new(vec![NestedField::optional( + 1, + "a", + Primitive(Timestamp), + ) + .into()])), + None, + ), + ], + }; + + fixture.assert_transform(trans); + } + #[test] fn test_projection_bucket_uuid() -> Result<()> { let value = uuid::Uuid::from_u64_pair(123, 456); diff --git a/crates/iceberg/src/transform/identity.rs b/crates/iceberg/src/transform/identity.rs index 49ab612aa..0f6f234c8 100644 --- a/crates/iceberg/src/transform/identity.rs +++ b/crates/iceberg/src/transform/identity.rs @@ -33,3 +33,68 @@ impl TransformFunction for Identity { Ok(Some(input.clone())) } } + +#[cfg(test)] +mod test { + use crate::spec::PrimitiveType::{ + Binary, Date, Decimal, Fixed, Int, Long, String as StringType, Time, Timestamp, + Timestamptz, Uuid, + }; + use crate::spec::StructType; + use crate::spec::Type::{Primitive, Struct}; + use crate::transform::test::TestTransformFixture; + + use crate::spec::{NestedField, Transform}; + + #[test] + fn test_identity_transform() { + let trans = Transform::Identity; + + let fixture = TestTransformFixture { + display: "identity".to_string(), + json: r#""identity""#.to_string(), + dedup_name: "identity".to_string(), + preserves_order: true, + satisfies_order_of: vec![ + (Transform::Truncate(4), true), + (Transform::Truncate(2), true), + (Transform::Bucket(4), false), + (Transform::Void, false), + (Transform::Day, true), + ], + trans_types: vec![ + (Primitive(Binary), Some(Primitive(Binary))), + (Primitive(Date), Some(Primitive(Date))), + ( + Primitive(Decimal { + precision: 8, + scale: 5, + }), + Some(Primitive(Decimal { + precision: 8, + scale: 5, + })), + ), + (Primitive(Fixed(8)), Some(Primitive(Fixed(8)))), + (Primitive(Int), Some(Primitive(Int))), + (Primitive(Long), Some(Primitive(Long))), + (Primitive(StringType), Some(Primitive(StringType))), + (Primitive(Uuid), Some(Primitive(Uuid))), + (Primitive(Time), Some(Primitive(Time))), + (Primitive(Timestamp), Some(Primitive(Timestamp))), + (Primitive(Timestamptz), Some(Primitive(Timestamptz))), + ( + Struct(StructType::new(vec![NestedField::optional( + 1, + "a", + Primitive(Timestamp), + ) + .into()])), + None, + ), + ], + }; + + fixture.assert_transform(trans); + } +} diff --git a/crates/iceberg/src/transform/mod.rs b/crates/iceberg/src/transform/mod.rs index 79a3fc594..9fc7e1050 100644 --- a/crates/iceberg/src/transform/mod.rs +++ b/crates/iceberg/src/transform/mod.rs @@ -76,14 +76,13 @@ mod test { expr::{ BinaryExpression, BoundPredicate, BoundReference, PredicateOperator, SetExpression, }, - spec::{Datum, NestedField, NestedFieldRef, Transform}, + spec::{Datum, NestedField, NestedFieldRef, Transform, Type}, Result, }; use std::{collections::HashSet, sync::Arc}; /// A utitily struct, test fixture /// used for testing the projection on `Transform` - #[derive(Debug)] pub(crate) struct TestProjectionFixture { transform: Transform, name: String, @@ -137,4 +136,40 @@ mod test { Ok(()) } } + + /// A utitily struct, test fixture + /// used for testing the transform on `Transform` + pub(crate) struct TestTransformFixture { + pub display: String, + pub json: String, + pub dedup_name: String, + pub preserves_order: bool, + pub satisfies_order_of: Vec<(Transform, bool)>, + pub trans_types: Vec<(Type, Option)>, + } + + impl TestTransformFixture { + pub(crate) fn assert_transform(&self, trans: Transform) { + assert_eq!(self.display, format!("{trans}")); + assert_eq!(self.json, serde_json::to_string(&trans).unwrap()); + assert_eq!(trans, serde_json::from_str(self.json.as_str()).unwrap()); + assert_eq!(self.dedup_name, trans.dedup_name()); + assert_eq!(self.preserves_order, trans.preserves_order()); + + for (other_trans, satisfies_order_of) in &self.satisfies_order_of { + assert_eq!( + satisfies_order_of, + &trans.satisfies_order_of(other_trans), + "Failed to check satisfies order {}, {}, {}", + trans, + other_trans, + satisfies_order_of + ); + } + + for (input_type, result_type) in &self.trans_types { + assert_eq!(result_type, &trans.result_type(input_type).ok()); + } + } + } } diff --git a/crates/iceberg/src/transform/temporal.rs b/crates/iceberg/src/transform/temporal.rs index 149343f4f..0cbdde076 100644 --- a/crates/iceberg/src/transform/temporal.rs +++ b/crates/iceberg/src/transform/temporal.rs @@ -294,6 +294,14 @@ mod test { use chrono::{NaiveDate, NaiveDateTime}; use std::sync::Arc; + use crate::spec::PrimitiveType::{ + Binary, Date, Decimal, Fixed, Int, Long, String as StringType, Time, Timestamp, + Timestamptz, Uuid, + }; + use crate::spec::StructType; + use crate::spec::Type::{Primitive, Struct}; + + use crate::transform::test::TestTransformFixture; use crate::{ expr::PredicateOperator, spec::{Datum, NestedField, PrimitiveType, Transform, Type}, @@ -301,6 +309,206 @@ mod test { Result, }; + #[test] + fn test_year_transform() { + let trans = Transform::Year; + + let fixture = TestTransformFixture { + display: "year".to_string(), + json: r#""year""#.to_string(), + dedup_name: "time".to_string(), + preserves_order: true, + satisfies_order_of: vec![ + (Transform::Year, true), + (Transform::Month, false), + (Transform::Day, false), + (Transform::Hour, false), + (Transform::Void, false), + (Transform::Identity, false), + ], + trans_types: vec![ + (Primitive(Binary), None), + (Primitive(Date), Some(Primitive(Int))), + ( + Primitive(Decimal { + precision: 8, + scale: 5, + }), + None, + ), + (Primitive(Fixed(8)), None), + (Primitive(Int), None), + (Primitive(Long), None), + (Primitive(StringType), None), + (Primitive(Uuid), None), + (Primitive(Time), None), + (Primitive(Timestamp), Some(Primitive(Int))), + (Primitive(Timestamptz), Some(Primitive(Int))), + ( + Struct(StructType::new(vec![NestedField::optional( + 1, + "a", + Primitive(Timestamp), + ) + .into()])), + None, + ), + ], + }; + + fixture.assert_transform(trans); + } + + #[test] + fn test_month_transform() { + let trans = Transform::Month; + + let fixture = TestTransformFixture { + display: "month".to_string(), + json: r#""month""#.to_string(), + dedup_name: "time".to_string(), + preserves_order: true, + satisfies_order_of: vec![ + (Transform::Year, true), + (Transform::Month, true), + (Transform::Day, false), + (Transform::Hour, false), + (Transform::Void, false), + (Transform::Identity, false), + ], + trans_types: vec![ + (Primitive(Binary), None), + (Primitive(Date), Some(Primitive(Int))), + ( + Primitive(Decimal { + precision: 8, + scale: 5, + }), + None, + ), + (Primitive(Fixed(8)), None), + (Primitive(Int), None), + (Primitive(Long), None), + (Primitive(StringType), None), + (Primitive(Uuid), None), + (Primitive(Time), None), + (Primitive(Timestamp), Some(Primitive(Int))), + (Primitive(Timestamptz), Some(Primitive(Int))), + ( + Struct(StructType::new(vec![NestedField::optional( + 1, + "a", + Primitive(Timestamp), + ) + .into()])), + None, + ), + ], + }; + + fixture.assert_transform(trans); + } + + #[test] + fn test_day_transform() { + let trans = Transform::Day; + + let fixture = TestTransformFixture { + display: "day".to_string(), + json: r#""day""#.to_string(), + dedup_name: "time".to_string(), + preserves_order: true, + satisfies_order_of: vec![ + (Transform::Year, true), + (Transform::Month, true), + (Transform::Day, true), + (Transform::Hour, false), + (Transform::Void, false), + (Transform::Identity, false), + ], + trans_types: vec![ + (Primitive(Binary), None), + (Primitive(Date), Some(Primitive(Int))), + ( + Primitive(Decimal { + precision: 8, + scale: 5, + }), + None, + ), + (Primitive(Fixed(8)), None), + (Primitive(Int), None), + (Primitive(Long), None), + (Primitive(StringType), None), + (Primitive(Uuid), None), + (Primitive(Time), None), + (Primitive(Timestamp), Some(Primitive(Int))), + (Primitive(Timestamptz), Some(Primitive(Int))), + ( + Struct(StructType::new(vec![NestedField::optional( + 1, + "a", + Primitive(Timestamp), + ) + .into()])), + None, + ), + ], + }; + + fixture.assert_transform(trans); + } + + #[test] + fn test_hour_transform() { + let trans = Transform::Hour; + + let fixture = TestTransformFixture { + display: "hour".to_string(), + json: r#""hour""#.to_string(), + dedup_name: "time".to_string(), + preserves_order: true, + satisfies_order_of: vec![ + (Transform::Year, true), + (Transform::Month, true), + (Transform::Day, true), + (Transform::Hour, true), + (Transform::Void, false), + (Transform::Identity, false), + ], + trans_types: vec![ + (Primitive(Binary), None), + (Primitive(Date), None), + ( + Primitive(Decimal { + precision: 8, + scale: 5, + }), + None, + ), + (Primitive(Fixed(8)), None), + (Primitive(Int), None), + (Primitive(Long), None), + (Primitive(StringType), None), + (Primitive(Uuid), None), + (Primitive(Time), None), + (Primitive(Timestamp), Some(Primitive(Int))), + (Primitive(Timestamptz), Some(Primitive(Int))), + ( + Struct(StructType::new(vec![NestedField::optional( + 1, + "a", + Primitive(Timestamp), + ) + .into()])), + None, + ), + ], + }; + + fixture.assert_transform(trans); + } + #[test] fn test_projection_timestamp_hour_upper_bound() -> Result<()> { // 420034 diff --git a/crates/iceberg/src/transform/truncate.rs b/crates/iceberg/src/transform/truncate.rs index 55e1dedc7..4d163b11f 100644 --- a/crates/iceberg/src/transform/truncate.rs +++ b/crates/iceberg/src/transform/truncate.rs @@ -170,6 +170,13 @@ impl TransformFunction for Truncate { mod test { use std::sync::Arc; + use crate::spec::PrimitiveType::{ + Binary, Date, Decimal, Fixed, Int, Long, String as StringType, Time, Timestamp, + Timestamptz, Uuid, + }; + use crate::spec::StructType; + use crate::spec::Type::{Primitive, Struct}; + use crate::transform::test::TestTransformFixture; use arrow_array::{ builder::PrimitiveBuilder, types::Decimal128Type, Decimal128Array, Int32Array, Int64Array, }; @@ -181,6 +188,58 @@ mod test { Result, }; + #[test] + fn test_truncate_transform() { + let trans = Transform::Truncate(4); + + let fixture = TestTransformFixture { + display: "truncate[4]".to_string(), + json: r#""truncate[4]""#.to_string(), + dedup_name: "truncate[4]".to_string(), + preserves_order: true, + satisfies_order_of: vec![ + (Transform::Truncate(4), true), + (Transform::Truncate(2), false), + (Transform::Bucket(4), false), + (Transform::Void, false), + (Transform::Day, false), + ], + trans_types: vec![ + (Primitive(Binary), Some(Primitive(Binary))), + (Primitive(Date), None), + ( + Primitive(Decimal { + precision: 8, + scale: 5, + }), + Some(Primitive(Decimal { + precision: 8, + scale: 5, + })), + ), + (Primitive(Fixed(8)), None), + (Primitive(Int), Some(Primitive(Int))), + (Primitive(Long), Some(Primitive(Long))), + (Primitive(StringType), Some(Primitive(StringType))), + (Primitive(Uuid), None), + (Primitive(Time), None), + (Primitive(Timestamp), None), + (Primitive(Timestamptz), None), + ( + Struct(StructType::new(vec![NestedField::optional( + 1, + "a", + Primitive(Timestamp), + ) + .into()])), + None, + ), + ], + }; + + fixture.assert_transform(trans); + } + #[test] fn test_projection_truncate_string_rewrite_op() -> Result<()> { let value = "abcde"; diff --git a/crates/iceberg/src/transform/void.rs b/crates/iceberg/src/transform/void.rs index 7cbee27ca..5a631cdc4 100644 --- a/crates/iceberg/src/transform/void.rs +++ b/crates/iceberg/src/transform/void.rs @@ -32,3 +32,125 @@ impl TransformFunction for Void { Ok(None) } } + +#[cfg(test)] +mod test { + use crate::spec::PrimitiveType::{ + Binary, Date, Decimal, Fixed, Int, Long, String as StringType, Time, Timestamp, + Timestamptz, Uuid, + }; + use crate::spec::StructType; + use crate::spec::Type::{Primitive, Struct}; + use crate::transform::test::TestTransformFixture; + + use crate::spec::{NestedField, Transform}; + + #[test] + fn test_void_transform() { + let trans = Transform::Void; + + let fixture = TestTransformFixture { + display: "void".to_string(), + json: r#""void""#.to_string(), + dedup_name: "void".to_string(), + preserves_order: false, + satisfies_order_of: vec![ + (Transform::Year, false), + (Transform::Month, false), + (Transform::Day, false), + (Transform::Hour, false), + (Transform::Void, true), + (Transform::Identity, false), + ], + trans_types: vec![ + (Primitive(Binary), Some(Primitive(Binary))), + (Primitive(Date), Some(Primitive(Date))), + ( + Primitive(Decimal { + precision: 8, + scale: 5, + }), + Some(Primitive(Decimal { + precision: 8, + scale: 5, + })), + ), + (Primitive(Fixed(8)), Some(Primitive(Fixed(8)))), + (Primitive(Int), Some(Primitive(Int))), + (Primitive(Long), Some(Primitive(Long))), + (Primitive(StringType), Some(Primitive(StringType))), + (Primitive(Uuid), Some(Primitive(Uuid))), + (Primitive(Time), Some(Primitive(Time))), + (Primitive(Timestamp), Some(Primitive(Timestamp))), + (Primitive(Timestamptz), Some(Primitive(Timestamptz))), + ( + Struct(StructType::new(vec![NestedField::optional( + 1, + "a", + Primitive(Timestamp), + ) + .into()])), + Some(Struct(StructType::new(vec![NestedField::optional( + 1, + "a", + Primitive(Timestamp), + ) + .into()]))), + ), + ], + }; + + fixture.assert_transform(trans); + } + + #[test] + fn test_known_transform() { + let trans = Transform::Unknown; + + let fixture = TestTransformFixture { + display: "unknown".to_string(), + json: r#""unknown""#.to_string(), + dedup_name: "unknown".to_string(), + preserves_order: false, + satisfies_order_of: vec![ + (Transform::Year, false), + (Transform::Month, false), + (Transform::Day, false), + (Transform::Hour, false), + (Transform::Void, false), + (Transform::Identity, false), + (Transform::Unknown, true), + ], + trans_types: vec![ + (Primitive(Binary), Some(Primitive(StringType))), + (Primitive(Date), Some(Primitive(StringType))), + ( + Primitive(Decimal { + precision: 8, + scale: 5, + }), + Some(Primitive(StringType)), + ), + (Primitive(Fixed(8)), Some(Primitive(StringType))), + (Primitive(Int), Some(Primitive(StringType))), + (Primitive(Long), Some(Primitive(StringType))), + (Primitive(StringType), Some(Primitive(StringType))), + (Primitive(Uuid), Some(Primitive(StringType))), + (Primitive(Time), Some(Primitive(StringType))), + (Primitive(Timestamp), Some(Primitive(StringType))), + (Primitive(Timestamptz), Some(Primitive(StringType))), + ( + Struct(StructType::new(vec![NestedField::optional( + 1, + "a", + Primitive(Timestamp), + ) + .into()])), + Some(Primitive(StringType)), + ), + ], + }; + + fixture.assert_transform(trans); + } +} From 4f84a0ee047648c81eab125f0c49af01c7991ccf Mon Sep 17 00:00:00 2001 From: marvinlanhenke Date: Tue, 2 Apr 2024 13:14:05 +0200 Subject: [PATCH 42/46] remove self --- crates/iceberg/src/spec/transform.rs | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/crates/iceberg/src/spec/transform.rs b/crates/iceberg/src/spec/transform.rs index f1641bcf8..e13298a34 100644 --- a/crates/iceberg/src/spec/transform.rs +++ b/crates/iceberg/src/spec/transform.rs @@ -416,10 +416,10 @@ impl Transform { func: &BoxedTransformFunction, width: Option, ) -> Result> { - if let Some(boundary) = self.projected_boundary(op, datum)? { + if let Some(boundary) = Self::projected_boundary(op, datum)? { let transformed = func.transform_literal_result(&boundary)?; let adjusted = self.adjust_projection(op, datum, &transformed); - let op = self.projected_operator(op, datum, width); + let op = Self::projected_operator(op, datum, width); if let Some(op) = op { let predicate = match adjusted { @@ -447,7 +447,7 @@ impl Transform { /// Create a new `Datum` with adjusted projection boundary. /// Returns `None` if `PredicateOperator` and `PrimitiveLiteral` /// can not be projected - fn projected_boundary(&self, op: &PredicateOperator, datum: &Datum) -> Result> { + fn projected_boundary(op: &PredicateOperator, datum: &Datum) -> Result> { let literal = datum.literal(); let projected_boundary = match op { @@ -480,7 +480,6 @@ impl Transform { /// Create a new `PredicateOperator`, rewritten for projection fn projected_operator( - &self, op: &PredicateOperator, datum: &Datum, width: Option, From eaacaa8fc918bc95eabaecb7bba3f7e978722fae Mon Sep 17 00:00:00 2001 From: marvinlanhenke Date: Tue, 2 Apr 2024 14:26:52 +0200 Subject: [PATCH 43/46] refactor: structure fn project + helpers --- crates/iceberg/src/spec/transform.rs | 264 +++++++++++++++------------ 1 file changed, 144 insertions(+), 120 deletions(-) diff --git a/crates/iceberg/src/spec/transform.rs b/crates/iceberg/src/spec/transform.rs index e13298a34..2a86ca9fd 100644 --- a/crates/iceberg/src/spec/transform.rs +++ b/crates/iceberg/src/spec/transform.rs @@ -285,95 +285,47 @@ impl Transform { pub fn project(&self, name: String, predicate: &BoundPredicate) -> Result> { let func = create_transform_function(self)?; - let projection = match predicate { - BoundPredicate::Unary(expr) => match self { - Transform::Identity - | Transform::Bucket(_) - | Transform::Truncate(_) - | Transform::Year - | Transform::Month - | Transform::Day - | Transform::Hour => Some(Predicate::Unary(UnaryExpression::new( - expr.op(), - Reference::new(name), - ))), - _ => None, - }, - BoundPredicate::Binary(expr) => match self { - Transform::Identity => Some(Predicate::Binary(BinaryExpression::new( + match self { + Transform::Identity => match predicate { + BoundPredicate::Unary(expr) => Self::project_unary(expr.op(), name), + BoundPredicate::Binary(expr) => Ok(Some(Predicate::Binary(BinaryExpression::new( expr.op(), Reference::new(name), expr.literal().to_owned(), - ))), - Transform::Bucket(_) => { - if expr.op() != PredicateOperator::Eq || !self.can_transform(expr.literal()) { - return Ok(None); - } - - Some(Predicate::Binary(BinaryExpression::new( - expr.op(), - Reference::new(name), - func.transform_literal_result(expr.literal())?, - ))) - } - Transform::Truncate(width) => { - if !self.can_transform(expr.literal()) { - return Ok(None); - } - - self.transform_projected_boundary( - name, - expr.literal(), - &expr.op(), - &func, - Some(*width), - )? - } - Transform::Year | Transform::Month | Transform::Day | Transform::Hour => { - if !self.can_transform(expr.literal()) { - return Ok(None); - } - - self.transform_projected_boundary( - name, - expr.literal(), - &expr.op(), - &func, - None, - )? - } - _ => None, - }, - BoundPredicate::Set(expr) => match self { - Transform::Identity => Some(Predicate::Set(SetExpression::new( + )))), + BoundPredicate::Set(expr) => Ok(Some(Predicate::Set(SetExpression::new( expr.op(), Reference::new(name), expr.literals().to_owned(), - ))), - Transform::Bucket(_) - | Transform::Truncate(_) - | Transform::Year - | Transform::Month - | Transform::Day - | Transform::Hour => { - if expr.op() != PredicateOperator::In - || expr.literals().iter().any(|d| !self.can_transform(d)) - { - return Ok(None); - } - - Some(Predicate::Set(SetExpression::new( - expr.op(), - Reference::new(name), - self.transform_set(expr.literals(), &func)?, - ))) + )))), + _ => Ok(None), + }, + Transform::Bucket(_) => match predicate { + BoundPredicate::Unary(expr) => Self::project_unary(expr.op(), name), + BoundPredicate::Binary(expr) => self.project_binary(name, expr, &func), + BoundPredicate::Set(expr) => self.project_set(expr, name, &func), + _ => Ok(None), + }, + Transform::Truncate(width) => match predicate { + BoundPredicate::Unary(expr) => Self::project_unary(expr.op(), name), + BoundPredicate::Binary(expr) => { + self.project_binary_with_adjusted_boundary(name, &expr, &func, Some(*width)) } - _ => None, + BoundPredicate::Set(expr) => self.project_set(expr, name, &func), + _ => Ok(None), }, - _ => None, - }; - - Ok(projection) + Transform::Year | Transform::Month | Transform::Day | Transform::Hour => { + match predicate { + BoundPredicate::Unary(expr) => Self::project_unary(expr.op(), name), + BoundPredicate::Binary(expr) => { + self.project_binary_with_adjusted_boundary(name, &expr, &func, None) + } + BoundPredicate::Set(expr) => self.project_set(expr, name, &func), + _ => Ok(None), + } + } + _ => Ok(None), + } } /// Check if `Transform` is applicable on datum's `PrimitiveType` @@ -382,51 +334,73 @@ impl Transform { self.result_type(&Type::Primitive(input_type)).is_ok() } - /// Transform each literal value of `FnvHashSet` - fn transform_set( + /// Creates a unary predicate from a given operator and a reference name. + fn project_unary(op: PredicateOperator, name: String) -> Result> { + Ok(Some(Predicate::Unary(UnaryExpression::new( + op, + Reference::new(name), + )))) + } + + /// Attempts to create a binary predicate based on a binary expression, + /// if applicable. + /// + /// This method evaluates a given binary expression and, if the operation + /// is equality (`Eq`) and the literal can be transformed, constructs a + /// `Predicate::Binary`variant representing the binary operation. + fn project_binary( &self, - literals: &FnvHashSet, + name: String, + expr: &BinaryExpression, func: &BoxedTransformFunction, - ) -> Result> { - let mut new_set = FnvHashSet::default(); - - for lit in literals { - let datum = func.transform_literal_result(lit)?; - - if let Some(AdjustedProjection::Single(d)) = - self.adjust_projection(&PredicateOperator::In, lit, &datum) - { - new_set.insert(d); - }; - - new_set.insert(datum); + ) -> Result> { + if expr.op() != PredicateOperator::Eq || !self.can_transform(expr.literal()) { + return Ok(None); } - Ok(new_set) + Ok(Some(Predicate::Binary(BinaryExpression::new( + expr.op(), + Reference::new(name), + func.transform_literal_result(expr.literal())?, + )))) } - /// Apply transform on `Datum` with adjusted boundaries. - /// Returns Predicate with projection and possibly - /// rewritten `PredicateOperator` - fn transform_projected_boundary( + /// Projects a binary expression to a predicate with an adjusted boundary. + /// + /// Checks if the literal within the given binary expression is + /// transformable. If transformable, it proceeds to potentially adjust + /// the boundary of the expression based on the comparison operator (`op`). + /// The potential adjustements involve incrementing or decrementing the + /// literal value and changing the `PredicateOperator` itself to its + /// inclusive variant. + fn project_binary_with_adjusted_boundary( &self, name: String, - datum: &Datum, - op: &PredicateOperator, + expr: &BinaryExpression, func: &BoxedTransformFunction, width: Option, ) -> Result> { - if let Some(boundary) = Self::projected_boundary(op, datum)? { - let transformed = func.transform_literal_result(&boundary)?; - let adjusted = self.adjust_projection(op, datum, &transformed); - let op = Self::projected_operator(op, datum, width); + if !self.can_transform(expr.literal()) { + return Ok(None); + } + + let op = &expr.op(); + let datum = &expr.literal(); + + if let Some(boundary) = Self::adjust_boundary(op, datum)? { + let transformed_projection = func.transform_literal_result(&boundary)?; + + let adjusted_projection = + self.adjust_time_projection(op, datum, &transformed_projection); + + let adjusted_operator = Self::adjust_operator(op, datum, width); - if let Some(op) = op { - let predicate = match adjusted { + if let Some(op) = adjusted_operator { + let predicate = match adjusted_projection { None => Predicate::Binary(BinaryExpression::new( op, Reference::new(name), - transformed, + transformed_projection, )), Some(AdjustedProjection::Single(d)) => { Predicate::Binary(BinaryExpression::new(op, Reference::new(name), d)) @@ -444,13 +418,54 @@ impl Transform { Ok(None) } - /// Create a new `Datum` with adjusted projection boundary. - /// Returns `None` if `PredicateOperator` and `PrimitiveLiteral` - /// can not be projected - fn projected_boundary(op: &PredicateOperator, datum: &Datum) -> Result> { + /// Projects a set expression to a predicate, + /// applying a transformation to each literal in the set. + fn project_set( + &self, + expr: &SetExpression, + name: String, + func: &BoxedTransformFunction, + ) -> Result> { + if expr.op() != PredicateOperator::In + || expr.literals().iter().any(|d| !self.can_transform(d)) + { + return Ok(None); + } + + let mut new_set = FnvHashSet::default(); + + for lit in expr.literals() { + let datum = func.transform_literal_result(lit)?; + + if let Some(AdjustedProjection::Single(d)) = + self.adjust_time_projection(&PredicateOperator::In, lit, &datum) + { + new_set.insert(d); + }; + + new_set.insert(datum); + } + + Ok(Some(Predicate::Set(SetExpression::new( + expr.op(), + Reference::new(name), + new_set, + )))) + } + + /// Adjusts the boundary value for comparison operations + /// based on the specified `PredicateOperator` and `Datum`. + /// + /// This function modifies the boundary value for certain comparison + /// operators (`LessThan`, `GreaterThan`) by incrementing or decrementing + /// the literal value within the given `Datum`. For operators that do not + /// imply a boundary shift (`Eq`, `LessThanOrEq`, `GreaterThanOrEq`, + /// `StartsWith`, `NotStartsWith`), the original datum is returned + /// unmodified. + fn adjust_boundary(op: &PredicateOperator, datum: &Datum) -> Result> { let literal = datum.literal(); - let projected_boundary = match op { + let adjusted_boundary = match op { PredicateOperator::LessThan => match literal { PrimitiveLiteral::Int(v) => Some(Datum::int(v - 1)), PrimitiveLiteral::Long(v) => Some(Datum::long(v - 1)), @@ -475,11 +490,20 @@ impl Transform { _ => None, }; - Ok(projected_boundary) + Ok(adjusted_boundary) } - /// Create a new `PredicateOperator`, rewritten for projection - fn projected_operator( + /// Adjusts the comparison operator based on the specified datum and an + /// optional width constraint. + /// + /// This function modifies the comparison operator for `LessThan` and + /// `GreaterThan` cases to their inclusive counterparts (`LessThanOrEq`, + /// `GreaterThanOrEq`) unconditionally. For `StartsWith` and + /// `NotStartsWith` operators acting on string literals, the operator may + /// be adjusted to `Eq` or `NotEq` if the string length matches the + /// specified width, indicating a precise match rather than a prefix + /// condition. + fn adjust_operator( op: &PredicateOperator, datum: &Datum, width: Option, @@ -523,7 +547,7 @@ impl Transform { /// Adjust projection for temporal transforms, align with Java /// implementation: https://github.com/apache/iceberg/blob/main/api/src/main/java/org/apache/iceberg/transforms/ProjectionUtil.java#L275 - fn adjust_projection( + fn adjust_time_projection( &self, op: &PredicateOperator, original: &Datum, From 2bb2f95ee63e78c76943ce1c5075c77027156b07 Mon Sep 17 00:00:00 2001 From: marvinlanhenke Date: Tue, 2 Apr 2024 14:27:56 +0200 Subject: [PATCH 44/46] fix: clippy --- crates/iceberg/src/spec/transform.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/crates/iceberg/src/spec/transform.rs b/crates/iceberg/src/spec/transform.rs index 2a86ca9fd..c3675a98e 100644 --- a/crates/iceberg/src/spec/transform.rs +++ b/crates/iceberg/src/spec/transform.rs @@ -309,7 +309,7 @@ impl Transform { Transform::Truncate(width) => match predicate { BoundPredicate::Unary(expr) => Self::project_unary(expr.op(), name), BoundPredicate::Binary(expr) => { - self.project_binary_with_adjusted_boundary(name, &expr, &func, Some(*width)) + self.project_binary_with_adjusted_boundary(name, expr, &func, Some(*width)) } BoundPredicate::Set(expr) => self.project_set(expr, name, &func), _ => Ok(None), @@ -318,7 +318,7 @@ impl Transform { match predicate { BoundPredicate::Unary(expr) => Self::project_unary(expr.op(), name), BoundPredicate::Binary(expr) => { - self.project_binary_with_adjusted_boundary(name, &expr, &func, None) + self.project_binary_with_adjusted_boundary(name, expr, &func, None) } BoundPredicate::Set(expr) => self.project_set(expr, name, &func), _ => Ok(None), From 976d8c932ca78995ecf2fa0526598ef5a65e1016 Mon Sep 17 00:00:00 2001 From: marvinlanhenke Date: Tue, 2 Apr 2024 14:31:19 +0200 Subject: [PATCH 45/46] fix: typo --- crates/iceberg/src/spec/transform.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/iceberg/src/spec/transform.rs b/crates/iceberg/src/spec/transform.rs index c3675a98e..724f012ef 100644 --- a/crates/iceberg/src/spec/transform.rs +++ b/crates/iceberg/src/spec/transform.rs @@ -370,7 +370,7 @@ impl Transform { /// Checks if the literal within the given binary expression is /// transformable. If transformable, it proceeds to potentially adjust /// the boundary of the expression based on the comparison operator (`op`). - /// The potential adjustements involve incrementing or decrementing the + /// The potential adjustments involve incrementing or decrementing the /// literal value and changing the `PredicateOperator` itself to its /// inclusive variant. fn project_binary_with_adjusted_boundary( From 82e62440f40fa6b056279655181e61ed727147bf Mon Sep 17 00:00:00 2001 From: marvinlanhenke Date: Fri, 5 Apr 2024 11:39:48 +0200 Subject: [PATCH 46/46] fix: naming + generics --- crates/iceberg/src/spec/transform.rs | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/crates/iceberg/src/spec/transform.rs b/crates/iceberg/src/spec/transform.rs index 724f012ef..bdddd326b 100644 --- a/crates/iceberg/src/spec/transform.rs +++ b/crates/iceberg/src/spec/transform.rs @@ -19,8 +19,8 @@ use crate::error::{Error, Result}; use crate::expr::{ - BinaryExpression, BoundPredicate, Predicate, PredicateOperator, Reference, SetExpression, - UnaryExpression, + BinaryExpression, BoundPredicate, BoundReference, Predicate, PredicateOperator, Reference, + SetExpression, UnaryExpression, }; use crate::spec::datatypes::{PrimitiveType, Type}; use crate::transform::{create_transform_function, BoxedTransformFunction}; @@ -302,8 +302,8 @@ impl Transform { }, Transform::Bucket(_) => match predicate { BoundPredicate::Unary(expr) => Self::project_unary(expr.op(), name), - BoundPredicate::Binary(expr) => self.project_binary(name, expr, &func), - BoundPredicate::Set(expr) => self.project_set(expr, name, &func), + BoundPredicate::Binary(expr) => self.project_eq_operator(name, expr, &func), + BoundPredicate::Set(expr) => self.project_in_operator(expr, name, &func), _ => Ok(None), }, Transform::Truncate(width) => match predicate { @@ -311,7 +311,7 @@ impl Transform { BoundPredicate::Binary(expr) => { self.project_binary_with_adjusted_boundary(name, expr, &func, Some(*width)) } - BoundPredicate::Set(expr) => self.project_set(expr, name, &func), + BoundPredicate::Set(expr) => self.project_in_operator(expr, name, &func), _ => Ok(None), }, Transform::Year | Transform::Month | Transform::Day | Transform::Hour => { @@ -320,7 +320,7 @@ impl Transform { BoundPredicate::Binary(expr) => { self.project_binary_with_adjusted_boundary(name, expr, &func, None) } - BoundPredicate::Set(expr) => self.project_set(expr, name, &func), + BoundPredicate::Set(expr) => self.project_in_operator(expr, name, &func), _ => Ok(None), } } @@ -348,10 +348,10 @@ impl Transform { /// This method evaluates a given binary expression and, if the operation /// is equality (`Eq`) and the literal can be transformed, constructs a /// `Predicate::Binary`variant representing the binary operation. - fn project_binary( + fn project_eq_operator( &self, name: String, - expr: &BinaryExpression, + expr: &BinaryExpression, func: &BoxedTransformFunction, ) -> Result> { if expr.op() != PredicateOperator::Eq || !self.can_transform(expr.literal()) { @@ -373,10 +373,10 @@ impl Transform { /// The potential adjustments involve incrementing or decrementing the /// literal value and changing the `PredicateOperator` itself to its /// inclusive variant. - fn project_binary_with_adjusted_boundary( + fn project_binary_with_adjusted_boundary( &self, name: String, - expr: &BinaryExpression, + expr: &BinaryExpression, func: &BoxedTransformFunction, width: Option, ) -> Result> { @@ -420,9 +420,9 @@ impl Transform { /// Projects a set expression to a predicate, /// applying a transformation to each literal in the set. - fn project_set( + fn project_in_operator( &self, - expr: &SetExpression, + expr: &SetExpression, name: String, func: &BoxedTransformFunction, ) -> Result> {