diff --git a/crates/iceberg/src/expr/predicate.rs b/crates/iceberg/src/expr/predicate.rs index da8a863d2..530923f15 100644 --- a/crates/iceberg/src/expr/predicate.rs +++ b/crates/iceberg/src/expr/predicate.rs @@ -116,6 +116,9 @@ impl UnaryExpression { debug_assert!(op.is_unary()); Self { op, term } } + pub(crate) fn op(&self) -> PredicateOperator { + self.op + } } /// Binary predicate, for example, `a > 10`. @@ -144,6 +147,12 @@ impl BinaryExpression { debug_assert!(op.is_binary()); Self { op, term, literal } } + pub(crate) fn op(&self) -> PredicateOperator { + self.op + } + pub(crate) fn literal(&self) -> &Datum { + &self.literal + } } impl Display for BinaryExpression { @@ -191,6 +200,12 @@ impl SetExpression { debug_assert!(op.is_set()); Self { op, term, literals } } + pub(crate) fn op(&self) -> PredicateOperator { + self.op + } + pub(crate) fn literals(&self) -> &FnvHashSet { + &self.literals + } } impl Bind for SetExpression { diff --git a/crates/iceberg/src/spec/transform.rs b/crates/iceberg/src/spec/transform.rs index 839d582dc..bdddd326b 100644 --- a/crates/iceberg/src/spec/transform.rs +++ b/crates/iceberg/src/spec/transform.rs @@ -18,12 +18,20 @@ //! Transforms in iceberg. use crate::error::{Error, Result}; +use crate::expr::{ + BinaryExpression, BoundPredicate, BoundReference, Predicate, PredicateOperator, Reference, + SetExpression, UnaryExpression, +}; use crate::spec::datatypes::{PrimitiveType, Type}; +use crate::transform::{create_transform_function, BoxedTransformFunction}; use crate::ErrorKind; +use fnv::FnvHashSet; use serde::{Deserialize, Deserializer, Serialize, Serializer}; use std::fmt::{Display, Formatter}; use std::str::FromStr; +use super::{Datum, PrimitiveLiteral}; + /// Transform is used to transform predicates to partition predicates, /// in addition to transforming data values. /// @@ -248,7 +256,7 @@ impl Transform { /// result. /// /// For example, sorting by day(ts) will produce an ordering that is also by month(ts) or - // year(ts). However, sorting by day(ts) will not satisfy the order of hour(ts) or identity(ts). + /// year(ts). However, sorting by day(ts) will not satisfy the order of hour(ts) or identity(ts). pub fn satisfies_order_of(&self, other: &Self) -> bool { match self { Transform::Identity => other.preserves_order(), @@ -261,6 +269,323 @@ impl Transform { _ => self == other, } } + + /// Projects a given predicate according to the transformation + /// specified by the `Transform` instance. + /// + /// This allows predicates to be effectively applied to data + /// that has undergone transformation, enabling efficient querying + /// and filtering based on the original, untransformed data. + /// + /// # Example + /// Suppose, we have row filter `a = 10`, and a partition spec + /// `bucket(a, 37) as bs`, if one row matches `a = 10`, then its partition + /// value should match `bucket(10, 37) as bs`, and we project `a = 10` to + /// `bs = bucket(10, 37)` + pub fn project(&self, name: String, predicate: &BoundPredicate) -> Result> { + let func = create_transform_function(self)?; + + match self { + Transform::Identity => match predicate { + BoundPredicate::Unary(expr) => Self::project_unary(expr.op(), name), + BoundPredicate::Binary(expr) => Ok(Some(Predicate::Binary(BinaryExpression::new( + expr.op(), + Reference::new(name), + expr.literal().to_owned(), + )))), + BoundPredicate::Set(expr) => Ok(Some(Predicate::Set(SetExpression::new( + expr.op(), + Reference::new(name), + expr.literals().to_owned(), + )))), + _ => Ok(None), + }, + Transform::Bucket(_) => match predicate { + BoundPredicate::Unary(expr) => Self::project_unary(expr.op(), name), + BoundPredicate::Binary(expr) => self.project_eq_operator(name, expr, &func), + BoundPredicate::Set(expr) => self.project_in_operator(expr, name, &func), + _ => Ok(None), + }, + Transform::Truncate(width) => match predicate { + BoundPredicate::Unary(expr) => Self::project_unary(expr.op(), name), + BoundPredicate::Binary(expr) => { + self.project_binary_with_adjusted_boundary(name, expr, &func, Some(*width)) + } + BoundPredicate::Set(expr) => self.project_in_operator(expr, name, &func), + _ => Ok(None), + }, + Transform::Year | Transform::Month | Transform::Day | Transform::Hour => { + match predicate { + BoundPredicate::Unary(expr) => Self::project_unary(expr.op(), name), + BoundPredicate::Binary(expr) => { + self.project_binary_with_adjusted_boundary(name, expr, &func, None) + } + BoundPredicate::Set(expr) => self.project_in_operator(expr, name, &func), + _ => Ok(None), + } + } + _ => Ok(None), + } + } + + /// Check if `Transform` is applicable on datum's `PrimitiveType` + fn can_transform(&self, datum: &Datum) -> bool { + let input_type = datum.data_type().clone(); + self.result_type(&Type::Primitive(input_type)).is_ok() + } + + /// Creates a unary predicate from a given operator and a reference name. + fn project_unary(op: PredicateOperator, name: String) -> Result> { + Ok(Some(Predicate::Unary(UnaryExpression::new( + op, + Reference::new(name), + )))) + } + + /// Attempts to create a binary predicate based on a binary expression, + /// if applicable. + /// + /// This method evaluates a given binary expression and, if the operation + /// is equality (`Eq`) and the literal can be transformed, constructs a + /// `Predicate::Binary`variant representing the binary operation. + fn project_eq_operator( + &self, + name: String, + expr: &BinaryExpression, + func: &BoxedTransformFunction, + ) -> Result> { + if expr.op() != PredicateOperator::Eq || !self.can_transform(expr.literal()) { + return Ok(None); + } + + Ok(Some(Predicate::Binary(BinaryExpression::new( + expr.op(), + Reference::new(name), + func.transform_literal_result(expr.literal())?, + )))) + } + + /// Projects a binary expression to a predicate with an adjusted boundary. + /// + /// Checks if the literal within the given binary expression is + /// transformable. If transformable, it proceeds to potentially adjust + /// the boundary of the expression based on the comparison operator (`op`). + /// The potential adjustments involve incrementing or decrementing the + /// literal value and changing the `PredicateOperator` itself to its + /// inclusive variant. + fn project_binary_with_adjusted_boundary( + &self, + name: String, + expr: &BinaryExpression, + func: &BoxedTransformFunction, + width: Option, + ) -> Result> { + if !self.can_transform(expr.literal()) { + return Ok(None); + } + + let op = &expr.op(); + let datum = &expr.literal(); + + if let Some(boundary) = Self::adjust_boundary(op, datum)? { + let transformed_projection = func.transform_literal_result(&boundary)?; + + let adjusted_projection = + self.adjust_time_projection(op, datum, &transformed_projection); + + let adjusted_operator = Self::adjust_operator(op, datum, width); + + if let Some(op) = adjusted_operator { + let predicate = match adjusted_projection { + None => Predicate::Binary(BinaryExpression::new( + op, + Reference::new(name), + transformed_projection, + )), + Some(AdjustedProjection::Single(d)) => { + Predicate::Binary(BinaryExpression::new(op, Reference::new(name), d)) + } + Some(AdjustedProjection::Set(d)) => Predicate::Set(SetExpression::new( + PredicateOperator::In, + Reference::new(name), + d, + )), + }; + return Ok(Some(predicate)); + } + }; + + Ok(None) + } + + /// Projects a set expression to a predicate, + /// applying a transformation to each literal in the set. + fn project_in_operator( + &self, + expr: &SetExpression, + name: String, + func: &BoxedTransformFunction, + ) -> Result> { + if expr.op() != PredicateOperator::In + || expr.literals().iter().any(|d| !self.can_transform(d)) + { + return Ok(None); + } + + let mut new_set = FnvHashSet::default(); + + for lit in expr.literals() { + let datum = func.transform_literal_result(lit)?; + + if let Some(AdjustedProjection::Single(d)) = + self.adjust_time_projection(&PredicateOperator::In, lit, &datum) + { + new_set.insert(d); + }; + + new_set.insert(datum); + } + + Ok(Some(Predicate::Set(SetExpression::new( + expr.op(), + Reference::new(name), + new_set, + )))) + } + + /// Adjusts the boundary value for comparison operations + /// based on the specified `PredicateOperator` and `Datum`. + /// + /// This function modifies the boundary value for certain comparison + /// operators (`LessThan`, `GreaterThan`) by incrementing or decrementing + /// the literal value within the given `Datum`. For operators that do not + /// imply a boundary shift (`Eq`, `LessThanOrEq`, `GreaterThanOrEq`, + /// `StartsWith`, `NotStartsWith`), the original datum is returned + /// unmodified. + fn adjust_boundary(op: &PredicateOperator, datum: &Datum) -> Result> { + let literal = datum.literal(); + + let adjusted_boundary = match op { + PredicateOperator::LessThan => match literal { + PrimitiveLiteral::Int(v) => Some(Datum::int(v - 1)), + PrimitiveLiteral::Long(v) => Some(Datum::long(v - 1)), + PrimitiveLiteral::Decimal(v) => Some(Datum::decimal(v - 1)?), + PrimitiveLiteral::Date(v) => Some(Datum::date(v - 1)), + PrimitiveLiteral::Timestamp(v) => Some(Datum::timestamp_micros(v - 1)), + _ => Some(datum.to_owned()), + }, + PredicateOperator::GreaterThan => match literal { + PrimitiveLiteral::Int(v) => Some(Datum::int(v + 1)), + PrimitiveLiteral::Long(v) => Some(Datum::long(v + 1)), + PrimitiveLiteral::Decimal(v) => Some(Datum::decimal(v + 1)?), + PrimitiveLiteral::Date(v) => Some(Datum::date(v + 1)), + PrimitiveLiteral::Timestamp(v) => Some(Datum::timestamp_micros(v + 1)), + _ => Some(datum.to_owned()), + }, + PredicateOperator::Eq + | PredicateOperator::LessThanOrEq + | PredicateOperator::GreaterThanOrEq + | PredicateOperator::StartsWith + | PredicateOperator::NotStartsWith => Some(datum.to_owned()), + _ => None, + }; + + Ok(adjusted_boundary) + } + + /// Adjusts the comparison operator based on the specified datum and an + /// optional width constraint. + /// + /// This function modifies the comparison operator for `LessThan` and + /// `GreaterThan` cases to their inclusive counterparts (`LessThanOrEq`, + /// `GreaterThanOrEq`) unconditionally. For `StartsWith` and + /// `NotStartsWith` operators acting on string literals, the operator may + /// be adjusted to `Eq` or `NotEq` if the string length matches the + /// specified width, indicating a precise match rather than a prefix + /// condition. + fn adjust_operator( + op: &PredicateOperator, + datum: &Datum, + width: Option, + ) -> Option { + match op { + PredicateOperator::LessThan => Some(PredicateOperator::LessThanOrEq), + PredicateOperator::GreaterThan => Some(PredicateOperator::GreaterThanOrEq), + PredicateOperator::StartsWith => match datum.literal() { + PrimitiveLiteral::String(s) => { + if let Some(w) = width { + if s.len() == w as usize { + return Some(PredicateOperator::Eq); + }; + }; + Some(*op) + } + _ => Some(*op), + }, + PredicateOperator::NotStartsWith => match datum.literal() { + PrimitiveLiteral::String(s) => { + if let Some(w) = width { + let w = w as usize; + + if s.len() == w { + return Some(PredicateOperator::NotEq); + } + + if s.len() < w { + return Some(*op); + } + + return None; + }; + Some(*op) + } + _ => Some(*op), + }, + _ => Some(*op), + } + } + + /// Adjust projection for temporal transforms, align with Java + /// implementation: https://github.com/apache/iceberg/blob/main/api/src/main/java/org/apache/iceberg/transforms/ProjectionUtil.java#L275 + fn adjust_time_projection( + &self, + op: &PredicateOperator, + original: &Datum, + transformed: &Datum, + ) -> Option { + let should_adjust = match self { + Transform::Day => matches!(original.literal(), PrimitiveLiteral::Timestamp(_)), + Transform::Year | Transform::Month => true, + _ => false, + }; + + if should_adjust { + if let &PrimitiveLiteral::Int(v) = transformed.literal() { + match op { + PredicateOperator::LessThan + | PredicateOperator::LessThanOrEq + | PredicateOperator::In => { + if v < 0 { + return Some(AdjustedProjection::Single(Datum::int(v + 1))); + }; + } + PredicateOperator::Eq => { + if v < 0 { + let new_set = FnvHashSet::from_iter(vec![ + transformed.to_owned(), + Datum::int(v + 1), + ]); + return Some(AdjustedProjection::Set(new_set)); + } + } + _ => { + return None; + } + } + }; + } + None + } } impl Display for Transform { @@ -356,506 +681,10 @@ impl<'de> Deserialize<'de> for Transform { } } -#[cfg(test)] -mod tests { - use crate::spec::datatypes::PrimitiveType::{ - Binary, Date, Decimal, Fixed, Int, Long, String as StringType, Time, Timestamp, - Timestamptz, Uuid, - }; - use crate::spec::datatypes::Type::{Primitive, Struct}; - use crate::spec::datatypes::{NestedField, StructType, Type}; - use crate::spec::transform::Transform; - - struct TestParameter { - display: String, - json: String, - dedup_name: String, - preserves_order: bool, - satisfies_order_of: Vec<(Transform, bool)>, - trans_types: Vec<(Type, Option)>, - } - - fn check_transform(trans: Transform, param: TestParameter) { - assert_eq!(param.display, format!("{trans}")); - assert_eq!(param.json, serde_json::to_string(&trans).unwrap()); - assert_eq!(trans, serde_json::from_str(param.json.as_str()).unwrap()); - assert_eq!(param.dedup_name, trans.dedup_name()); - assert_eq!(param.preserves_order, trans.preserves_order()); - - for (other_trans, satisfies_order_of) in param.satisfies_order_of { - assert_eq!( - satisfies_order_of, - trans.satisfies_order_of(&other_trans), - "Failed to check satisfies order {}, {}, {}", - trans, - other_trans, - satisfies_order_of - ); - } - - for (input_type, result_type) in param.trans_types { - assert_eq!(result_type, trans.result_type(&input_type).ok()); - } - } - - #[test] - fn test_bucket_transform() { - let trans = Transform::Bucket(8); - - let test_param = TestParameter { - display: "bucket[8]".to_string(), - json: r#""bucket[8]""#.to_string(), - dedup_name: "bucket[8]".to_string(), - preserves_order: false, - satisfies_order_of: vec![ - (Transform::Bucket(8), true), - (Transform::Bucket(4), false), - (Transform::Void, false), - (Transform::Day, false), - ], - trans_types: vec![ - (Primitive(Binary), Some(Primitive(Int))), - (Primitive(Date), Some(Primitive(Int))), - ( - Primitive(Decimal { - precision: 8, - scale: 5, - }), - Some(Primitive(Int)), - ), - (Primitive(Fixed(8)), Some(Primitive(Int))), - (Primitive(Int), Some(Primitive(Int))), - (Primitive(Long), Some(Primitive(Int))), - (Primitive(StringType), Some(Primitive(Int))), - (Primitive(Uuid), Some(Primitive(Int))), - (Primitive(Time), Some(Primitive(Int))), - (Primitive(Timestamp), Some(Primitive(Int))), - (Primitive(Timestamptz), Some(Primitive(Int))), - ( - Struct(StructType::new(vec![NestedField::optional( - 1, - "a", - Primitive(Timestamp), - ) - .into()])), - None, - ), - ], - }; - - check_transform(trans, test_param); - } - - #[test] - fn test_truncate_transform() { - let trans = Transform::Truncate(4); - - let test_param = TestParameter { - display: "truncate[4]".to_string(), - json: r#""truncate[4]""#.to_string(), - dedup_name: "truncate[4]".to_string(), - preserves_order: true, - satisfies_order_of: vec![ - (Transform::Truncate(4), true), - (Transform::Truncate(2), false), - (Transform::Bucket(4), false), - (Transform::Void, false), - (Transform::Day, false), - ], - trans_types: vec![ - (Primitive(Binary), Some(Primitive(Binary))), - (Primitive(Date), None), - ( - Primitive(Decimal { - precision: 8, - scale: 5, - }), - Some(Primitive(Decimal { - precision: 8, - scale: 5, - })), - ), - (Primitive(Fixed(8)), None), - (Primitive(Int), Some(Primitive(Int))), - (Primitive(Long), Some(Primitive(Long))), - (Primitive(StringType), Some(Primitive(StringType))), - (Primitive(Uuid), None), - (Primitive(Time), None), - (Primitive(Timestamp), None), - (Primitive(Timestamptz), None), - ( - Struct(StructType::new(vec![NestedField::optional( - 1, - "a", - Primitive(Timestamp), - ) - .into()])), - None, - ), - ], - }; - - check_transform(trans, test_param); - } - - #[test] - fn test_identity_transform() { - let trans = Transform::Identity; - - let test_param = TestParameter { - display: "identity".to_string(), - json: r#""identity""#.to_string(), - dedup_name: "identity".to_string(), - preserves_order: true, - satisfies_order_of: vec![ - (Transform::Truncate(4), true), - (Transform::Truncate(2), true), - (Transform::Bucket(4), false), - (Transform::Void, false), - (Transform::Day, true), - ], - trans_types: vec![ - (Primitive(Binary), Some(Primitive(Binary))), - (Primitive(Date), Some(Primitive(Date))), - ( - Primitive(Decimal { - precision: 8, - scale: 5, - }), - Some(Primitive(Decimal { - precision: 8, - scale: 5, - })), - ), - (Primitive(Fixed(8)), Some(Primitive(Fixed(8)))), - (Primitive(Int), Some(Primitive(Int))), - (Primitive(Long), Some(Primitive(Long))), - (Primitive(StringType), Some(Primitive(StringType))), - (Primitive(Uuid), Some(Primitive(Uuid))), - (Primitive(Time), Some(Primitive(Time))), - (Primitive(Timestamp), Some(Primitive(Timestamp))), - (Primitive(Timestamptz), Some(Primitive(Timestamptz))), - ( - Struct(StructType::new(vec![NestedField::optional( - 1, - "a", - Primitive(Timestamp), - ) - .into()])), - None, - ), - ], - }; - - check_transform(trans, test_param); - } - - #[test] - fn test_year_transform() { - let trans = Transform::Year; - - let test_param = TestParameter { - display: "year".to_string(), - json: r#""year""#.to_string(), - dedup_name: "time".to_string(), - preserves_order: true, - satisfies_order_of: vec![ - (Transform::Year, true), - (Transform::Month, false), - (Transform::Day, false), - (Transform::Hour, false), - (Transform::Void, false), - (Transform::Identity, false), - ], - trans_types: vec![ - (Primitive(Binary), None), - (Primitive(Date), Some(Primitive(Int))), - ( - Primitive(Decimal { - precision: 8, - scale: 5, - }), - None, - ), - (Primitive(Fixed(8)), None), - (Primitive(Int), None), - (Primitive(Long), None), - (Primitive(StringType), None), - (Primitive(Uuid), None), - (Primitive(Time), None), - (Primitive(Timestamp), Some(Primitive(Int))), - (Primitive(Timestamptz), Some(Primitive(Int))), - ( - Struct(StructType::new(vec![NestedField::optional( - 1, - "a", - Primitive(Timestamp), - ) - .into()])), - None, - ), - ], - }; - - check_transform(trans, test_param); - } - - #[test] - fn test_month_transform() { - let trans = Transform::Month; - - let test_param = TestParameter { - display: "month".to_string(), - json: r#""month""#.to_string(), - dedup_name: "time".to_string(), - preserves_order: true, - satisfies_order_of: vec![ - (Transform::Year, true), - (Transform::Month, true), - (Transform::Day, false), - (Transform::Hour, false), - (Transform::Void, false), - (Transform::Identity, false), - ], - trans_types: vec![ - (Primitive(Binary), None), - (Primitive(Date), Some(Primitive(Int))), - ( - Primitive(Decimal { - precision: 8, - scale: 5, - }), - None, - ), - (Primitive(Fixed(8)), None), - (Primitive(Int), None), - (Primitive(Long), None), - (Primitive(StringType), None), - (Primitive(Uuid), None), - (Primitive(Time), None), - (Primitive(Timestamp), Some(Primitive(Int))), - (Primitive(Timestamptz), Some(Primitive(Int))), - ( - Struct(StructType::new(vec![NestedField::optional( - 1, - "a", - Primitive(Timestamp), - ) - .into()])), - None, - ), - ], - }; - - check_transform(trans, test_param); - } - - #[test] - fn test_day_transform() { - let trans = Transform::Day; - - let test_param = TestParameter { - display: "day".to_string(), - json: r#""day""#.to_string(), - dedup_name: "time".to_string(), - preserves_order: true, - satisfies_order_of: vec![ - (Transform::Year, true), - (Transform::Month, true), - (Transform::Day, true), - (Transform::Hour, false), - (Transform::Void, false), - (Transform::Identity, false), - ], - trans_types: vec![ - (Primitive(Binary), None), - (Primitive(Date), Some(Primitive(Int))), - ( - Primitive(Decimal { - precision: 8, - scale: 5, - }), - None, - ), - (Primitive(Fixed(8)), None), - (Primitive(Int), None), - (Primitive(Long), None), - (Primitive(StringType), None), - (Primitive(Uuid), None), - (Primitive(Time), None), - (Primitive(Timestamp), Some(Primitive(Int))), - (Primitive(Timestamptz), Some(Primitive(Int))), - ( - Struct(StructType::new(vec![NestedField::optional( - 1, - "a", - Primitive(Timestamp), - ) - .into()])), - None, - ), - ], - }; - - check_transform(trans, test_param); - } - - #[test] - fn test_hour_transform() { - let trans = Transform::Hour; - - let test_param = TestParameter { - display: "hour".to_string(), - json: r#""hour""#.to_string(), - dedup_name: "time".to_string(), - preserves_order: true, - satisfies_order_of: vec![ - (Transform::Year, true), - (Transform::Month, true), - (Transform::Day, true), - (Transform::Hour, true), - (Transform::Void, false), - (Transform::Identity, false), - ], - trans_types: vec![ - (Primitive(Binary), None), - (Primitive(Date), None), - ( - Primitive(Decimal { - precision: 8, - scale: 5, - }), - None, - ), - (Primitive(Fixed(8)), None), - (Primitive(Int), None), - (Primitive(Long), None), - (Primitive(StringType), None), - (Primitive(Uuid), None), - (Primitive(Time), None), - (Primitive(Timestamp), Some(Primitive(Int))), - (Primitive(Timestamptz), Some(Primitive(Int))), - ( - Struct(StructType::new(vec![NestedField::optional( - 1, - "a", - Primitive(Timestamp), - ) - .into()])), - None, - ), - ], - }; - - check_transform(trans, test_param); - } - - #[test] - fn test_void_transform() { - let trans = Transform::Void; - - let test_param = TestParameter { - display: "void".to_string(), - json: r#""void""#.to_string(), - dedup_name: "void".to_string(), - preserves_order: false, - satisfies_order_of: vec![ - (Transform::Year, false), - (Transform::Month, false), - (Transform::Day, false), - (Transform::Hour, false), - (Transform::Void, true), - (Transform::Identity, false), - ], - trans_types: vec![ - (Primitive(Binary), Some(Primitive(Binary))), - (Primitive(Date), Some(Primitive(Date))), - ( - Primitive(Decimal { - precision: 8, - scale: 5, - }), - Some(Primitive(Decimal { - precision: 8, - scale: 5, - })), - ), - (Primitive(Fixed(8)), Some(Primitive(Fixed(8)))), - (Primitive(Int), Some(Primitive(Int))), - (Primitive(Long), Some(Primitive(Long))), - (Primitive(StringType), Some(Primitive(StringType))), - (Primitive(Uuid), Some(Primitive(Uuid))), - (Primitive(Time), Some(Primitive(Time))), - (Primitive(Timestamp), Some(Primitive(Timestamp))), - (Primitive(Timestamptz), Some(Primitive(Timestamptz))), - ( - Struct(StructType::new(vec![NestedField::optional( - 1, - "a", - Primitive(Timestamp), - ) - .into()])), - Some(Struct(StructType::new(vec![NestedField::optional( - 1, - "a", - Primitive(Timestamp), - ) - .into()]))), - ), - ], - }; - - check_transform(trans, test_param); - } - - #[test] - fn test_known_transform() { - let trans = Transform::Unknown; - - let test_param = TestParameter { - display: "unknown".to_string(), - json: r#""unknown""#.to_string(), - dedup_name: "unknown".to_string(), - preserves_order: false, - satisfies_order_of: vec![ - (Transform::Year, false), - (Transform::Month, false), - (Transform::Day, false), - (Transform::Hour, false), - (Transform::Void, false), - (Transform::Identity, false), - (Transform::Unknown, true), - ], - trans_types: vec![ - (Primitive(Binary), Some(Primitive(StringType))), - (Primitive(Date), Some(Primitive(StringType))), - ( - Primitive(Decimal { - precision: 8, - scale: 5, - }), - Some(Primitive(StringType)), - ), - (Primitive(Fixed(8)), Some(Primitive(StringType))), - (Primitive(Int), Some(Primitive(StringType))), - (Primitive(Long), Some(Primitive(StringType))), - (Primitive(StringType), Some(Primitive(StringType))), - (Primitive(Uuid), Some(Primitive(StringType))), - (Primitive(Time), Some(Primitive(StringType))), - (Primitive(Timestamp), Some(Primitive(StringType))), - (Primitive(Timestamptz), Some(Primitive(StringType))), - ( - Struct(StructType::new(vec![NestedField::optional( - 1, - "a", - Primitive(Timestamp), - ) - .into()])), - Some(Primitive(StringType)), - ), - ], - }; - - check_transform(trans, test_param); - } +/// An enum representing the result of the adjusted projection. +/// Either being a single adjusted datum or a set. +#[derive(Debug)] +enum AdjustedProjection { + Single(Datum), + Set(FnvHashSet), } diff --git a/crates/iceberg/src/transform/bucket.rs b/crates/iceberg/src/transform/bucket.rs index 015aceaf4..d454c697f 100644 --- a/crates/iceberg/src/transform/bucket.rs +++ b/crates/iceberg/src/transform/bucket.rs @@ -251,9 +251,468 @@ impl TransformFunction for Bucket { mod test { use chrono::{DateTime, NaiveDate, NaiveDateTime, NaiveTime}; - use crate::{spec::Datum, transform::TransformFunction}; + use crate::spec::PrimitiveType::{ + Binary, Date, Decimal, Fixed, Int, Long, String as StringType, Time, Timestamp, + Timestamptz, Uuid, + }; + use crate::spec::StructType; + use crate::spec::Type::{Primitive, Struct}; + use crate::{ + expr::PredicateOperator, + spec::{Datum, NestedField, PrimitiveType, Transform, Type}, + transform::{ + test::{TestProjectionFixture, TestTransformFixture}, + TransformFunction, + }, + Result, + }; use super::Bucket; + + #[test] + fn test_bucket_transform() { + let trans = Transform::Bucket(8); + + let fixture = TestTransformFixture { + display: "bucket[8]".to_string(), + json: r#""bucket[8]""#.to_string(), + dedup_name: "bucket[8]".to_string(), + preserves_order: false, + satisfies_order_of: vec![ + (Transform::Bucket(8), true), + (Transform::Bucket(4), false), + (Transform::Void, false), + (Transform::Day, false), + ], + trans_types: vec![ + (Primitive(Binary), Some(Primitive(Int))), + (Primitive(Date), Some(Primitive(Int))), + ( + Primitive(Decimal { + precision: 8, + scale: 5, + }), + Some(Primitive(Int)), + ), + (Primitive(Fixed(8)), Some(Primitive(Int))), + (Primitive(Int), Some(Primitive(Int))), + (Primitive(Long), Some(Primitive(Int))), + (Primitive(StringType), Some(Primitive(Int))), + (Primitive(Uuid), Some(Primitive(Int))), + (Primitive(Time), Some(Primitive(Int))), + (Primitive(Timestamp), Some(Primitive(Int))), + (Primitive(Timestamptz), Some(Primitive(Int))), + ( + Struct(StructType::new(vec![NestedField::optional( + 1, + "a", + Primitive(Timestamp), + ) + .into()])), + None, + ), + ], + }; + + fixture.assert_transform(trans); + } + + #[test] + fn test_projection_bucket_uuid() -> Result<()> { + let value = uuid::Uuid::from_u64_pair(123, 456); + let another = uuid::Uuid::from_u64_pair(456, 123); + + let fixture = TestProjectionFixture::new( + Transform::Bucket(10), + "name", + NestedField::required(1, "value", Type::Primitive(PrimitiveType::Uuid)), + ); + + fixture.assert_projection( + &fixture.binary_predicate(PredicateOperator::Eq, Datum::uuid(value)), + Some("name = 4"), + )?; + + fixture.assert_projection( + &fixture.binary_predicate(PredicateOperator::NotEq, Datum::uuid(value)), + None, + )?; + + fixture.assert_projection( + &fixture.binary_predicate(PredicateOperator::LessThan, Datum::uuid(value)), + None, + )?; + + fixture.assert_projection( + &fixture.binary_predicate(PredicateOperator::LessThanOrEq, Datum::uuid(value)), + None, + )?; + + fixture.assert_projection( + &fixture.binary_predicate(PredicateOperator::GreaterThan, Datum::uuid(value)), + None, + )?; + + fixture.assert_projection( + &fixture.binary_predicate(PredicateOperator::GreaterThanOrEq, Datum::uuid(value)), + None, + )?; + + fixture.assert_projection( + &fixture.set_predicate( + PredicateOperator::In, + vec![Datum::uuid(value), Datum::uuid(another)], + ), + Some("name IN (4, 6)"), + )?; + + fixture.assert_projection( + &fixture.set_predicate( + PredicateOperator::NotIn, + vec![Datum::uuid(value), Datum::uuid(another)], + ), + None, + )?; + + Ok(()) + } + + #[test] + fn test_projection_bucket_fixed() -> Result<()> { + let value = "abcdefg".as_bytes().to_vec(); + let another = "abcdehij".as_bytes().to_vec(); + + let fixture = TestProjectionFixture::new( + Transform::Bucket(10), + "name", + NestedField::required( + 1, + "value", + Type::Primitive(PrimitiveType::Fixed(value.len() as u64)), + ), + ); + + fixture.assert_projection( + &fixture.binary_predicate(PredicateOperator::Eq, Datum::fixed(value.clone())), + Some("name = 4"), + )?; + + fixture.assert_projection( + &fixture.binary_predicate(PredicateOperator::NotEq, Datum::fixed(value.clone())), + None, + )?; + + fixture.assert_projection( + &fixture.binary_predicate(PredicateOperator::LessThan, Datum::fixed(value.clone())), + None, + )?; + + fixture.assert_projection( + &fixture.binary_predicate(PredicateOperator::LessThanOrEq, Datum::fixed(value.clone())), + None, + )?; + + fixture.assert_projection( + &fixture.binary_predicate(PredicateOperator::GreaterThan, Datum::fixed(value.clone())), + None, + )?; + + fixture.assert_projection( + &fixture.binary_predicate( + PredicateOperator::GreaterThanOrEq, + Datum::fixed(value.clone()), + ), + None, + )?; + + fixture.assert_projection( + &fixture.set_predicate( + PredicateOperator::In, + vec![Datum::fixed(value.clone()), Datum::fixed(another.clone())], + ), + Some("name IN (4, 6)"), + )?; + + fixture.assert_projection( + &fixture.set_predicate( + PredicateOperator::NotIn, + vec![Datum::fixed(value.clone()), Datum::fixed(another.clone())], + ), + None, + )?; + + Ok(()) + } + + #[test] + fn test_projection_bucket_string() -> Result<()> { + let value = "abcdefg"; + let another = "abcdefgabc"; + + let fixture = TestProjectionFixture::new( + Transform::Bucket(10), + "name", + NestedField::required(1, "value", Type::Primitive(PrimitiveType::String)), + ); + + fixture.assert_projection( + &fixture.binary_predicate(PredicateOperator::Eq, Datum::string(value)), + Some("name = 4"), + )?; + + fixture.assert_projection( + &fixture.binary_predicate(PredicateOperator::NotEq, Datum::string(value)), + None, + )?; + + fixture.assert_projection( + &fixture.binary_predicate(PredicateOperator::LessThan, Datum::string(value)), + None, + )?; + + fixture.assert_projection( + &fixture.binary_predicate(PredicateOperator::LessThanOrEq, Datum::string(value)), + None, + )?; + + fixture.assert_projection( + &fixture.binary_predicate(PredicateOperator::GreaterThan, Datum::string(value)), + None, + )?; + + fixture.assert_projection( + &fixture.binary_predicate(PredicateOperator::GreaterThanOrEq, Datum::string(value)), + None, + )?; + + fixture.assert_projection( + &fixture.set_predicate( + PredicateOperator::In, + vec![Datum::string(value), Datum::string(another)], + ), + Some("name IN (9, 4)"), + )?; + + fixture.assert_projection( + &fixture.set_predicate( + PredicateOperator::NotIn, + vec![Datum::string(value), Datum::string(another)], + ), + None, + )?; + + Ok(()) + } + + #[test] + fn test_projection_bucket_decimal() -> Result<()> { + let prev = "99.00"; + let curr = "100.00"; + let next = "101.00"; + + let fixture = TestProjectionFixture::new( + Transform::Bucket(10), + "name", + NestedField::required( + 1, + "value", + Type::Primitive(PrimitiveType::Decimal { + precision: 9, + scale: 2, + }), + ), + ); + + fixture.assert_projection( + &fixture.binary_predicate(PredicateOperator::Eq, Datum::decimal_from_str(curr)?), + Some("name = 2"), + )?; + + fixture.assert_projection( + &fixture.binary_predicate(PredicateOperator::NotEq, Datum::decimal_from_str(curr)?), + None, + )?; + + fixture.assert_projection( + &fixture.binary_predicate(PredicateOperator::LessThan, Datum::decimal_from_str(curr)?), + None, + )?; + + fixture.assert_projection( + &fixture.binary_predicate( + PredicateOperator::LessThanOrEq, + Datum::decimal_from_str(curr)?, + ), + None, + )?; + + fixture.assert_projection( + &fixture.binary_predicate( + PredicateOperator::GreaterThan, + Datum::decimal_from_str(curr)?, + ), + None, + )?; + + fixture.assert_projection( + &fixture.binary_predicate( + PredicateOperator::GreaterThanOrEq, + Datum::decimal_from_str(curr)?, + ), + None, + )?; + + fixture.assert_projection( + &fixture.set_predicate( + PredicateOperator::In, + vec![ + Datum::decimal_from_str(next)?, + Datum::decimal_from_str(curr)?, + Datum::decimal_from_str(prev)?, + ], + ), + Some("name IN (6, 2)"), + )?; + + fixture.assert_projection( + &fixture.set_predicate( + PredicateOperator::NotIn, + vec![ + Datum::decimal_from_str(curr)?, + Datum::decimal_from_str(next)?, + ], + ), + None, + )?; + + Ok(()) + } + + #[test] + fn test_projection_bucket_long() -> Result<()> { + let value = 100; + let fixture = TestProjectionFixture::new( + Transform::Bucket(10), + "name", + NestedField::required(1, "value", Type::Primitive(PrimitiveType::Long)), + ); + + fixture.assert_projection( + &fixture.binary_predicate(PredicateOperator::Eq, Datum::long(value)), + Some("name = 6"), + )?; + + fixture.assert_projection( + &fixture.binary_predicate(PredicateOperator::NotEq, Datum::long(value)), + None, + )?; + + fixture.assert_projection( + &fixture.binary_predicate(PredicateOperator::LessThan, Datum::long(value)), + None, + )?; + + fixture.assert_projection( + &fixture.binary_predicate(PredicateOperator::LessThanOrEq, Datum::long(value)), + None, + )?; + + fixture.assert_projection( + &fixture.binary_predicate(PredicateOperator::GreaterThan, Datum::long(value)), + None, + )?; + + fixture.assert_projection( + &fixture.binary_predicate(PredicateOperator::GreaterThanOrEq, Datum::long(value)), + None, + )?; + + fixture.assert_projection( + &fixture.set_predicate( + PredicateOperator::In, + vec![ + Datum::long(value - 1), + Datum::long(value), + Datum::long(value + 1), + ], + ), + Some("name IN (8, 7, 6)"), + )?; + + fixture.assert_projection( + &fixture.set_predicate( + PredicateOperator::NotIn, + vec![Datum::long(value), Datum::long(value + 1)], + ), + None, + )?; + + Ok(()) + } + + #[test] + fn test_projection_bucket_integer() -> Result<()> { + let value = 100; + + let fixture = TestProjectionFixture::new( + Transform::Bucket(10), + "name", + NestedField::required(1, "value", Type::Primitive(PrimitiveType::Int)), + ); + + fixture.assert_projection( + &fixture.binary_predicate(PredicateOperator::Eq, Datum::int(value)), + Some("name = 6"), + )?; + + fixture.assert_projection( + &fixture.binary_predicate(PredicateOperator::NotEq, Datum::int(value)), + None, + )?; + + fixture.assert_projection( + &fixture.binary_predicate(PredicateOperator::LessThan, Datum::int(value)), + None, + )?; + + fixture.assert_projection( + &fixture.binary_predicate(PredicateOperator::LessThanOrEq, Datum::int(value)), + None, + )?; + + fixture.assert_projection( + &fixture.binary_predicate(PredicateOperator::GreaterThan, Datum::int(value)), + None, + )?; + + fixture.assert_projection( + &fixture.binary_predicate(PredicateOperator::GreaterThanOrEq, Datum::int(value)), + None, + )?; + + fixture.assert_projection( + &fixture.set_predicate( + PredicateOperator::In, + vec![ + Datum::int(value - 1), + Datum::int(value), + Datum::int(value + 1), + ], + ), + Some("name IN (8, 7, 6)"), + )?; + + fixture.assert_projection( + &fixture.set_predicate( + PredicateOperator::NotIn, + vec![Datum::int(value), Datum::int(value + 1)], + ), + None, + )?; + + Ok(()) + } + #[test] fn test_hash() { // test int diff --git a/crates/iceberg/src/transform/identity.rs b/crates/iceberg/src/transform/identity.rs index 49ab612aa..0f6f234c8 100644 --- a/crates/iceberg/src/transform/identity.rs +++ b/crates/iceberg/src/transform/identity.rs @@ -33,3 +33,68 @@ impl TransformFunction for Identity { Ok(Some(input.clone())) } } + +#[cfg(test)] +mod test { + use crate::spec::PrimitiveType::{ + Binary, Date, Decimal, Fixed, Int, Long, String as StringType, Time, Timestamp, + Timestamptz, Uuid, + }; + use crate::spec::StructType; + use crate::spec::Type::{Primitive, Struct}; + use crate::transform::test::TestTransformFixture; + + use crate::spec::{NestedField, Transform}; + + #[test] + fn test_identity_transform() { + let trans = Transform::Identity; + + let fixture = TestTransformFixture { + display: "identity".to_string(), + json: r#""identity""#.to_string(), + dedup_name: "identity".to_string(), + preserves_order: true, + satisfies_order_of: vec![ + (Transform::Truncate(4), true), + (Transform::Truncate(2), true), + (Transform::Bucket(4), false), + (Transform::Void, false), + (Transform::Day, true), + ], + trans_types: vec![ + (Primitive(Binary), Some(Primitive(Binary))), + (Primitive(Date), Some(Primitive(Date))), + ( + Primitive(Decimal { + precision: 8, + scale: 5, + }), + Some(Primitive(Decimal { + precision: 8, + scale: 5, + })), + ), + (Primitive(Fixed(8)), Some(Primitive(Fixed(8)))), + (Primitive(Int), Some(Primitive(Int))), + (Primitive(Long), Some(Primitive(Long))), + (Primitive(StringType), Some(Primitive(StringType))), + (Primitive(Uuid), Some(Primitive(Uuid))), + (Primitive(Time), Some(Primitive(Time))), + (Primitive(Timestamp), Some(Primitive(Timestamp))), + (Primitive(Timestamptz), Some(Primitive(Timestamptz))), + ( + Struct(StructType::new(vec![NestedField::optional( + 1, + "a", + Primitive(Timestamp), + ) + .into()])), + None, + ), + ], + }; + + fixture.assert_transform(trans); + } +} diff --git a/crates/iceberg/src/transform/mod.rs b/crates/iceberg/src/transform/mod.rs index 7effdbec3..9fc7e1050 100644 --- a/crates/iceberg/src/transform/mod.rs +++ b/crates/iceberg/src/transform/mod.rs @@ -16,9 +16,10 @@ // under the License. //! Transform function used to compute partition values. + use crate::{ spec::{Datum, Transform}, - Result, + Error, ErrorKind, Result, }; use arrow_array::ArrayRef; @@ -36,6 +37,16 @@ pub trait TransformFunction: Send { fn transform(&self, input: ArrayRef) -> Result; /// transform_literal will take an input literal and transform it into a new literal. fn transform_literal(&self, input: &Datum) -> Result>; + /// A thin wrapper around `transform_literal` + /// to return an error even when it's `None`. + fn transform_literal_result(&self, input: &Datum) -> Result { + self.transform_literal(input)?.ok_or_else(|| { + Error::new( + ErrorKind::Unexpected, + format!("Returns 'None' for literal {}", input), + ) + }) + } } /// BoxedTransformFunction is a boxed trait object of TransformFunction. @@ -58,3 +69,107 @@ pub fn create_transform_function(transform: &Transform) -> Result, + field: NestedField, + ) -> Self { + TestProjectionFixture { + transform, + name: name.into(), + field: Arc::new(field), + } + } + pub(crate) fn binary_predicate( + &self, + op: PredicateOperator, + literal: Datum, + ) -> BoundPredicate { + BoundPredicate::Binary(BinaryExpression::new( + op, + BoundReference::new(self.name.clone(), self.field.clone()), + literal, + )) + } + pub(crate) fn set_predicate( + &self, + op: PredicateOperator, + literals: Vec, + ) -> BoundPredicate { + BoundPredicate::Set(SetExpression::new( + op, + BoundReference::new(self.name.clone(), self.field.clone()), + HashSet::from_iter(literals), + )) + } + pub(crate) fn assert_projection( + &self, + predicate: &BoundPredicate, + expected: Option<&str>, + ) -> Result<()> { + let result = self.transform.project(self.name.clone(), predicate)?; + match expected { + Some(exp) => assert_eq!(format!("{}", result.unwrap()), exp), + None => assert!(result.is_none()), + } + Ok(()) + } + } + + /// A utitily struct, test fixture + /// used for testing the transform on `Transform` + pub(crate) struct TestTransformFixture { + pub display: String, + pub json: String, + pub dedup_name: String, + pub preserves_order: bool, + pub satisfies_order_of: Vec<(Transform, bool)>, + pub trans_types: Vec<(Type, Option)>, + } + + impl TestTransformFixture { + pub(crate) fn assert_transform(&self, trans: Transform) { + assert_eq!(self.display, format!("{trans}")); + assert_eq!(self.json, serde_json::to_string(&trans).unwrap()); + assert_eq!(trans, serde_json::from_str(self.json.as_str()).unwrap()); + assert_eq!(self.dedup_name, trans.dedup_name()); + assert_eq!(self.preserves_order, trans.preserves_order()); + + for (other_trans, satisfies_order_of) in &self.satisfies_order_of { + assert_eq!( + satisfies_order_of, + &trans.satisfies_order_of(other_trans), + "Failed to check satisfies order {}, {}, {}", + trans, + other_trans, + satisfies_order_of + ); + } + + for (input_type, result_type) in &self.trans_types { + assert_eq!(result_type, &trans.result_type(input_type).ok()); + } + } + } +} diff --git a/crates/iceberg/src/transform/temporal.rs b/crates/iceberg/src/transform/temporal.rs index 9c6489e91..0cbdde076 100644 --- a/crates/iceberg/src/transform/temporal.rs +++ b/crates/iceberg/src/transform/temporal.rs @@ -294,11 +294,1995 @@ mod test { use chrono::{NaiveDate, NaiveDateTime}; use std::sync::Arc; + use crate::spec::PrimitiveType::{ + Binary, Date, Decimal, Fixed, Int, Long, String as StringType, Time, Timestamp, + Timestamptz, Uuid, + }; + use crate::spec::StructType; + use crate::spec::Type::{Primitive, Struct}; + + use crate::transform::test::TestTransformFixture; use crate::{ - spec::Datum, - transform::{BoxedTransformFunction, TransformFunction}, + expr::PredicateOperator, + spec::{Datum, NestedField, PrimitiveType, Transform, Type}, + transform::{test::TestProjectionFixture, BoxedTransformFunction, TransformFunction}, + Result, }; + #[test] + fn test_year_transform() { + let trans = Transform::Year; + + let fixture = TestTransformFixture { + display: "year".to_string(), + json: r#""year""#.to_string(), + dedup_name: "time".to_string(), + preserves_order: true, + satisfies_order_of: vec![ + (Transform::Year, true), + (Transform::Month, false), + (Transform::Day, false), + (Transform::Hour, false), + (Transform::Void, false), + (Transform::Identity, false), + ], + trans_types: vec![ + (Primitive(Binary), None), + (Primitive(Date), Some(Primitive(Int))), + ( + Primitive(Decimal { + precision: 8, + scale: 5, + }), + None, + ), + (Primitive(Fixed(8)), None), + (Primitive(Int), None), + (Primitive(Long), None), + (Primitive(StringType), None), + (Primitive(Uuid), None), + (Primitive(Time), None), + (Primitive(Timestamp), Some(Primitive(Int))), + (Primitive(Timestamptz), Some(Primitive(Int))), + ( + Struct(StructType::new(vec![NestedField::optional( + 1, + "a", + Primitive(Timestamp), + ) + .into()])), + None, + ), + ], + }; + + fixture.assert_transform(trans); + } + + #[test] + fn test_month_transform() { + let trans = Transform::Month; + + let fixture = TestTransformFixture { + display: "month".to_string(), + json: r#""month""#.to_string(), + dedup_name: "time".to_string(), + preserves_order: true, + satisfies_order_of: vec![ + (Transform::Year, true), + (Transform::Month, true), + (Transform::Day, false), + (Transform::Hour, false), + (Transform::Void, false), + (Transform::Identity, false), + ], + trans_types: vec![ + (Primitive(Binary), None), + (Primitive(Date), Some(Primitive(Int))), + ( + Primitive(Decimal { + precision: 8, + scale: 5, + }), + None, + ), + (Primitive(Fixed(8)), None), + (Primitive(Int), None), + (Primitive(Long), None), + (Primitive(StringType), None), + (Primitive(Uuid), None), + (Primitive(Time), None), + (Primitive(Timestamp), Some(Primitive(Int))), + (Primitive(Timestamptz), Some(Primitive(Int))), + ( + Struct(StructType::new(vec![NestedField::optional( + 1, + "a", + Primitive(Timestamp), + ) + .into()])), + None, + ), + ], + }; + + fixture.assert_transform(trans); + } + + #[test] + fn test_day_transform() { + let trans = Transform::Day; + + let fixture = TestTransformFixture { + display: "day".to_string(), + json: r#""day""#.to_string(), + dedup_name: "time".to_string(), + preserves_order: true, + satisfies_order_of: vec![ + (Transform::Year, true), + (Transform::Month, true), + (Transform::Day, true), + (Transform::Hour, false), + (Transform::Void, false), + (Transform::Identity, false), + ], + trans_types: vec![ + (Primitive(Binary), None), + (Primitive(Date), Some(Primitive(Int))), + ( + Primitive(Decimal { + precision: 8, + scale: 5, + }), + None, + ), + (Primitive(Fixed(8)), None), + (Primitive(Int), None), + (Primitive(Long), None), + (Primitive(StringType), None), + (Primitive(Uuid), None), + (Primitive(Time), None), + (Primitive(Timestamp), Some(Primitive(Int))), + (Primitive(Timestamptz), Some(Primitive(Int))), + ( + Struct(StructType::new(vec![NestedField::optional( + 1, + "a", + Primitive(Timestamp), + ) + .into()])), + None, + ), + ], + }; + + fixture.assert_transform(trans); + } + + #[test] + fn test_hour_transform() { + let trans = Transform::Hour; + + let fixture = TestTransformFixture { + display: "hour".to_string(), + json: r#""hour""#.to_string(), + dedup_name: "time".to_string(), + preserves_order: true, + satisfies_order_of: vec![ + (Transform::Year, true), + (Transform::Month, true), + (Transform::Day, true), + (Transform::Hour, true), + (Transform::Void, false), + (Transform::Identity, false), + ], + trans_types: vec![ + (Primitive(Binary), None), + (Primitive(Date), None), + ( + Primitive(Decimal { + precision: 8, + scale: 5, + }), + None, + ), + (Primitive(Fixed(8)), None), + (Primitive(Int), None), + (Primitive(Long), None), + (Primitive(StringType), None), + (Primitive(Uuid), None), + (Primitive(Time), None), + (Primitive(Timestamp), Some(Primitive(Int))), + (Primitive(Timestamptz), Some(Primitive(Int))), + ( + Struct(StructType::new(vec![NestedField::optional( + 1, + "a", + Primitive(Timestamp), + ) + .into()])), + None, + ), + ], + }; + + fixture.assert_transform(trans); + } + + #[test] + fn test_projection_timestamp_hour_upper_bound() -> Result<()> { + // 420034 + let value = "2017-12-01T10:59:59.999999"; + // 412007 + let another = "2016-12-31T23:59:59.999999"; + + let fixture = TestProjectionFixture::new( + Transform::Hour, + "name", + NestedField::required(1, "value", Type::Primitive(PrimitiveType::Timestamp)), + ); + + fixture.assert_projection( + &fixture.binary_predicate( + PredicateOperator::LessThan, + Datum::timestamp_from_str(value)?, + ), + Some("name <= 420034"), + )?; + + fixture.assert_projection( + &fixture.binary_predicate( + PredicateOperator::LessThanOrEq, + Datum::timestamp_from_str(value)?, + ), + Some("name <= 420034"), + )?; + + fixture.assert_projection( + &fixture.binary_predicate( + PredicateOperator::GreaterThan, + Datum::timestamp_from_str(value)?, + ), + Some("name >= 420035"), + )?; + + fixture.assert_projection( + &fixture.binary_predicate( + PredicateOperator::GreaterThanOrEq, + Datum::timestamp_from_str(value)?, + ), + Some("name >= 420034"), + )?; + + fixture.assert_projection( + &fixture.binary_predicate(PredicateOperator::Eq, Datum::timestamp_from_str(value)?), + Some("name = 420034"), + )?; + + fixture.assert_projection( + &fixture.binary_predicate(PredicateOperator::NotEq, Datum::timestamp_from_str(value)?), + None, + )?; + + fixture.assert_projection( + &fixture.set_predicate( + PredicateOperator::In, + vec![ + Datum::timestamp_from_str(value)?, + Datum::timestamp_from_str(another)?, + ], + ), + Some("name IN (420034, 412007)"), + )?; + + fixture.assert_projection( + &fixture.set_predicate( + PredicateOperator::NotIn, + vec![ + Datum::timestamp_from_str(value)?, + Datum::timestamp_from_str(another)?, + ], + ), + None, + )?; + + Ok(()) + } + + #[test] + fn test_projection_timestamp_hour_lower_bound() -> Result<()> { + // 420034 + let value = "2017-12-01T10:00:00.000000"; + // 411288 + let another = "2016-12-02T00:00:00.000000"; + + let fixture = TestProjectionFixture::new( + Transform::Hour, + "name", + NestedField::required(1, "value", Type::Primitive(PrimitiveType::Timestamp)), + ); + + fixture.assert_projection( + &fixture.binary_predicate( + PredicateOperator::LessThan, + Datum::timestamp_from_str(value)?, + ), + Some("name <= 420033"), + )?; + + fixture.assert_projection( + &fixture.binary_predicate( + PredicateOperator::LessThanOrEq, + Datum::timestamp_from_str(value)?, + ), + Some("name <= 420034"), + )?; + + fixture.assert_projection( + &fixture.binary_predicate( + PredicateOperator::GreaterThan, + Datum::timestamp_from_str(value)?, + ), + Some("name >= 420034"), + )?; + + fixture.assert_projection( + &fixture.binary_predicate( + PredicateOperator::GreaterThanOrEq, + Datum::timestamp_from_str(value)?, + ), + Some("name >= 420034"), + )?; + + fixture.assert_projection( + &fixture.binary_predicate(PredicateOperator::Eq, Datum::timestamp_from_str(value)?), + Some("name = 420034"), + )?; + + fixture.assert_projection( + &fixture.binary_predicate(PredicateOperator::NotEq, Datum::timestamp_from_str(value)?), + None, + )?; + + fixture.assert_projection( + &fixture.set_predicate( + PredicateOperator::In, + vec![ + Datum::timestamp_from_str(value)?, + Datum::timestamp_from_str(another)?, + ], + ), + Some("name IN (420034, 411288)"), + )?; + + fixture.assert_projection( + &fixture.set_predicate( + PredicateOperator::NotIn, + vec![ + Datum::timestamp_from_str(value)?, + Datum::timestamp_from_str(another)?, + ], + ), + None, + )?; + + Ok(()) + } + + #[test] + fn test_projection_timestamp_year_upper_bound() -> Result<()> { + let value = "2017-12-31T23:59:59.999999"; + let another = "2016-12-31T23:59:59.999999"; + + let fixture = TestProjectionFixture::new( + Transform::Year, + "name", + NestedField::required(1, "value", Type::Primitive(PrimitiveType::Timestamp)), + ); + + fixture.assert_projection( + &fixture.binary_predicate( + PredicateOperator::LessThan, + Datum::timestamp_from_str(value)?, + ), + Some("name <= 47"), + )?; + + fixture.assert_projection( + &fixture.binary_predicate( + PredicateOperator::LessThanOrEq, + Datum::timestamp_from_str(value)?, + ), + Some("name <= 47"), + )?; + + fixture.assert_projection( + &fixture.binary_predicate( + PredicateOperator::GreaterThan, + Datum::timestamp_from_str(value)?, + ), + Some("name >= 48"), + )?; + + fixture.assert_projection( + &fixture.binary_predicate( + PredicateOperator::GreaterThanOrEq, + Datum::timestamp_from_str(value)?, + ), + Some("name >= 47"), + )?; + + fixture.assert_projection( + &fixture.binary_predicate(PredicateOperator::Eq, Datum::timestamp_from_str(value)?), + Some("name = 47"), + )?; + + fixture.assert_projection( + &fixture.binary_predicate(PredicateOperator::NotEq, Datum::timestamp_from_str(value)?), + None, + )?; + + fixture.assert_projection( + &fixture.set_predicate( + PredicateOperator::In, + vec![ + Datum::timestamp_from_str(value)?, + Datum::timestamp_from_str(another)?, + ], + ), + Some("name IN (47, 46)"), + )?; + + fixture.assert_projection( + &fixture.set_predicate( + PredicateOperator::NotIn, + vec![ + Datum::timestamp_from_str(value)?, + Datum::timestamp_from_str(another)?, + ], + ), + None, + )?; + + Ok(()) + } + + #[test] + fn test_projection_timestamp_year_lower_bound() -> Result<()> { + let value = "2017-01-01T00:00:00.000000"; + let another = "2016-12-02T00:00:00.000000"; + + let fixture = TestProjectionFixture::new( + Transform::Year, + "name", + NestedField::required(1, "value", Type::Primitive(PrimitiveType::Timestamp)), + ); + + fixture.assert_projection( + &fixture.binary_predicate( + PredicateOperator::LessThan, + Datum::timestamp_from_str(value)?, + ), + Some("name <= 46"), + )?; + + fixture.assert_projection( + &fixture.binary_predicate( + PredicateOperator::LessThanOrEq, + Datum::timestamp_from_str(value)?, + ), + Some("name <= 47"), + )?; + + fixture.assert_projection( + &fixture.binary_predicate( + PredicateOperator::GreaterThan, + Datum::timestamp_from_str(value)?, + ), + Some("name >= 47"), + )?; + + fixture.assert_projection( + &fixture.binary_predicate( + PredicateOperator::GreaterThanOrEq, + Datum::timestamp_from_str(value)?, + ), + Some("name >= 47"), + )?; + + fixture.assert_projection( + &fixture.binary_predicate(PredicateOperator::Eq, Datum::timestamp_from_str(value)?), + Some("name = 47"), + )?; + + fixture.assert_projection( + &fixture.binary_predicate(PredicateOperator::NotEq, Datum::timestamp_from_str(value)?), + None, + )?; + + fixture.assert_projection( + &fixture.set_predicate( + PredicateOperator::In, + vec![ + Datum::timestamp_from_str(value)?, + Datum::timestamp_from_str(another)?, + ], + ), + Some("name IN (47, 46)"), + )?; + + fixture.assert_projection( + &fixture.set_predicate( + PredicateOperator::NotIn, + vec![ + Datum::timestamp_from_str(value)?, + Datum::timestamp_from_str(another)?, + ], + ), + None, + )?; + + Ok(()) + } + + #[test] + fn test_projection_timestamp_month_negative_upper_bound() -> Result<()> { + let value = "1969-12-31T23:59:59.999999"; + let another = "1970-01-01T00:00:00.000000"; + + let fixture = TestProjectionFixture::new( + Transform::Month, + "name", + NestedField::required(1, "value", Type::Primitive(PrimitiveType::Timestamp)), + ); + + fixture.assert_projection( + &fixture.binary_predicate( + PredicateOperator::LessThan, + Datum::timestamp_from_str(value)?, + ), + Some("name <= 0"), + )?; + + fixture.assert_projection( + &fixture.binary_predicate( + PredicateOperator::LessThanOrEq, + Datum::timestamp_from_str(value)?, + ), + Some("name <= 0"), + )?; + + fixture.assert_projection( + &fixture.binary_predicate( + PredicateOperator::GreaterThan, + Datum::timestamp_from_str(value)?, + ), + Some("name >= 0"), + )?; + + fixture.assert_projection( + &fixture.binary_predicate( + PredicateOperator::GreaterThanOrEq, + Datum::timestamp_from_str(value)?, + ), + Some("name >= -1"), + )?; + + fixture.assert_projection( + &fixture.binary_predicate(PredicateOperator::Eq, Datum::timestamp_from_str(value)?), + Some("name IN (-1, 0)"), + )?; + + fixture.assert_projection( + &fixture.binary_predicate(PredicateOperator::NotEq, Datum::timestamp_from_str(value)?), + None, + )?; + + fixture.assert_projection( + &fixture.set_predicate( + PredicateOperator::In, + vec![ + Datum::timestamp_from_str(value)?, + Datum::timestamp_from_str(another)?, + ], + ), + Some("name IN (0, -1)"), + )?; + + fixture.assert_projection( + &fixture.set_predicate( + PredicateOperator::NotIn, + vec![ + Datum::timestamp_from_str(value)?, + Datum::timestamp_from_str(another)?, + ], + ), + None, + )?; + + Ok(()) + } + + #[test] + fn test_projection_timestamp_month_upper_bound() -> Result<()> { + let value = "2017-12-01T23:59:59.999999"; + let another = "2017-11-02T00:00:00.000000"; + + let fixture = TestProjectionFixture::new( + Transform::Month, + "name", + NestedField::required(1, "value", Type::Primitive(PrimitiveType::Timestamp)), + ); + + fixture.assert_projection( + &fixture.binary_predicate( + PredicateOperator::LessThan, + Datum::timestamp_from_str(value)?, + ), + Some("name <= 575"), + )?; + + fixture.assert_projection( + &fixture.binary_predicate( + PredicateOperator::LessThanOrEq, + Datum::timestamp_from_str(value)?, + ), + Some("name <= 575"), + )?; + + fixture.assert_projection( + &fixture.binary_predicate( + PredicateOperator::GreaterThan, + Datum::timestamp_from_str(value)?, + ), + Some("name >= 575"), + )?; + + fixture.assert_projection( + &fixture.binary_predicate( + PredicateOperator::GreaterThanOrEq, + Datum::timestamp_from_str(value)?, + ), + Some("name >= 575"), + )?; + + fixture.assert_projection( + &fixture.binary_predicate(PredicateOperator::Eq, Datum::timestamp_from_str(value)?), + Some("name = 575"), + )?; + + fixture.assert_projection( + &fixture.binary_predicate(PredicateOperator::NotEq, Datum::timestamp_from_str(value)?), + None, + )?; + + fixture.assert_projection( + &fixture.set_predicate( + PredicateOperator::In, + vec![ + Datum::timestamp_from_str(value)?, + Datum::timestamp_from_str(another)?, + ], + ), + Some("name IN (575, 574)"), + )?; + + fixture.assert_projection( + &fixture.set_predicate( + PredicateOperator::NotIn, + vec![ + Datum::timestamp_from_str(value)?, + Datum::timestamp_from_str(another)?, + ], + ), + None, + )?; + Ok(()) + } + + #[test] + fn test_projection_timestamp_month_negative_lower_bound() -> Result<()> { + let value = "1969-01-01T00:00:00.000000"; + let another = "1969-03-01T00:00:00.000000"; + + let fixture = TestProjectionFixture::new( + Transform::Month, + "name", + NestedField::required(1, "value", Type::Primitive(PrimitiveType::Timestamp)), + ); + + fixture.assert_projection( + &fixture.binary_predicate( + PredicateOperator::LessThan, + Datum::timestamp_from_str(value)?, + ), + Some("name <= -12"), + )?; + + fixture.assert_projection( + &fixture.binary_predicate( + PredicateOperator::LessThanOrEq, + Datum::timestamp_from_str(value)?, + ), + Some("name <= -11"), + )?; + + fixture.assert_projection( + &fixture.binary_predicate( + PredicateOperator::GreaterThan, + Datum::timestamp_from_str(value)?, + ), + Some("name >= -12"), + )?; + + fixture.assert_projection( + &fixture.binary_predicate( + PredicateOperator::GreaterThanOrEq, + Datum::timestamp_from_str(value)?, + ), + Some("name >= -12"), + )?; + + fixture.assert_projection( + &fixture.binary_predicate(PredicateOperator::Eq, Datum::timestamp_from_str(value)?), + Some("name IN (-12, -11)"), + )?; + + fixture.assert_projection( + &fixture.binary_predicate(PredicateOperator::NotEq, Datum::timestamp_from_str(value)?), + None, + )?; + + fixture.assert_projection( + &fixture.set_predicate( + PredicateOperator::In, + vec![ + Datum::timestamp_from_str(value)?, + Datum::timestamp_from_str(another)?, + ], + ), + Some("name IN (-10, -9, -12, -11)"), + )?; + + fixture.assert_projection( + &fixture.set_predicate( + PredicateOperator::NotIn, + vec![ + Datum::timestamp_from_str(value)?, + Datum::timestamp_from_str(another)?, + ], + ), + None, + )?; + + Ok(()) + } + + #[test] + fn test_projection_timestamp_month_lower_bound() -> Result<()> { + let value = "2017-12-01T00:00:00.000000"; + let another = "2017-12-02T00:00:00.000000"; + + let fixture = TestProjectionFixture::new( + Transform::Month, + "name", + NestedField::required(1, "value", Type::Primitive(PrimitiveType::Timestamp)), + ); + + fixture.assert_projection( + &fixture.binary_predicate( + PredicateOperator::LessThan, + Datum::timestamp_from_str(value)?, + ), + Some("name <= 574"), + )?; + + fixture.assert_projection( + &fixture.binary_predicate( + PredicateOperator::LessThanOrEq, + Datum::timestamp_from_str(value)?, + ), + Some("name <= 575"), + )?; + + fixture.assert_projection( + &fixture.binary_predicate( + PredicateOperator::GreaterThan, + Datum::timestamp_from_str(value)?, + ), + Some("name >= 575"), + )?; + + fixture.assert_projection( + &fixture.binary_predicate( + PredicateOperator::GreaterThanOrEq, + Datum::timestamp_from_str(value)?, + ), + Some("name >= 575"), + )?; + + fixture.assert_projection( + &fixture.binary_predicate(PredicateOperator::Eq, Datum::timestamp_from_str(value)?), + Some("name = 575"), + )?; + + fixture.assert_projection( + &fixture.binary_predicate(PredicateOperator::NotEq, Datum::timestamp_from_str(value)?), + None, + )?; + + fixture.assert_projection( + &fixture.set_predicate( + PredicateOperator::In, + vec![ + Datum::timestamp_from_str(value)?, + Datum::timestamp_from_str(another)?, + ], + ), + Some("name IN (575)"), + )?; + + fixture.assert_projection( + &fixture.set_predicate( + PredicateOperator::NotIn, + vec![ + Datum::timestamp_from_str(value)?, + Datum::timestamp_from_str(another)?, + ], + ), + None, + )?; + + Ok(()) + } + + #[test] + fn test_projection_timestamp_day_negative_upper_bound() -> Result<()> { + // -1 + let value = "1969-12-31T23:59:59.999999"; + // 0 + let another = "1970-01-01T00:00:00.000000"; + + let fixture = TestProjectionFixture::new( + Transform::Day, + "name", + NestedField::required(1, "value", Type::Primitive(PrimitiveType::Timestamp)), + ); + + fixture.assert_projection( + &fixture.binary_predicate( + PredicateOperator::LessThan, + Datum::timestamp_from_str(value)?, + ), + Some("name <= 0"), + )?; + + fixture.assert_projection( + &fixture.binary_predicate( + PredicateOperator::LessThanOrEq, + Datum::timestamp_from_str(value)?, + ), + Some("name <= 0"), + )?; + + fixture.assert_projection( + &fixture.binary_predicate( + PredicateOperator::GreaterThan, + Datum::timestamp_from_str(value)?, + ), + Some("name >= 0"), + )?; + + fixture.assert_projection( + &fixture.binary_predicate( + PredicateOperator::GreaterThanOrEq, + Datum::timestamp_from_str(value)?, + ), + Some("name >= -1"), + )?; + + fixture.assert_projection( + &fixture.binary_predicate(PredicateOperator::Eq, Datum::timestamp_from_str(value)?), + Some("name IN (-1, 0)"), + )?; + + fixture.assert_projection( + &fixture.binary_predicate(PredicateOperator::NotEq, Datum::timestamp_from_str(value)?), + None, + )?; + + fixture.assert_projection( + &fixture.set_predicate( + PredicateOperator::In, + vec![ + Datum::timestamp_from_str(value)?, + Datum::timestamp_from_str(another)?, + ], + ), + Some("name IN (0, -1)"), + )?; + + fixture.assert_projection( + &fixture.set_predicate( + PredicateOperator::NotIn, + vec![ + Datum::timestamp_from_str(value)?, + Datum::timestamp_from_str(another)?, + ], + ), + None, + )?; + + Ok(()) + } + + #[test] + fn test_projection_timestamp_day_upper_bound() -> Result<()> { + // 17501 + let value = "2017-12-01T23:59:59.999999"; + // 17502 + let another = "2017-12-02T00:00:00.000000"; + + let fixture = TestProjectionFixture::new( + Transform::Day, + "name", + NestedField::required(1, "value", Type::Primitive(PrimitiveType::Timestamp)), + ); + + fixture.assert_projection( + &fixture.binary_predicate( + PredicateOperator::LessThan, + Datum::timestamp_from_str(value)?, + ), + Some("name <= 17501"), + )?; + + fixture.assert_projection( + &fixture.binary_predicate( + PredicateOperator::LessThanOrEq, + Datum::timestamp_from_str(value)?, + ), + Some("name <= 17501"), + )?; + + fixture.assert_projection( + &fixture.binary_predicate( + PredicateOperator::GreaterThan, + Datum::timestamp_from_str(value)?, + ), + Some("name >= 17502"), + )?; + + fixture.assert_projection( + &fixture.binary_predicate( + PredicateOperator::GreaterThanOrEq, + Datum::timestamp_from_str(value)?, + ), + Some("name >= 17501"), + )?; + + fixture.assert_projection( + &fixture.binary_predicate(PredicateOperator::Eq, Datum::timestamp_from_str(value)?), + Some("name = 17501"), + )?; + + fixture.assert_projection( + &fixture.binary_predicate(PredicateOperator::NotEq, Datum::timestamp_from_str(value)?), + None, + )?; + + fixture.assert_projection( + &fixture.set_predicate( + PredicateOperator::In, + vec![ + Datum::timestamp_from_str(value)?, + Datum::timestamp_from_str(another)?, + ], + ), + Some("name IN (17501, 17502)"), + )?; + + fixture.assert_projection( + &fixture.set_predicate( + PredicateOperator::NotIn, + vec![ + Datum::timestamp_from_str(value)?, + Datum::timestamp_from_str(another)?, + ], + ), + None, + )?; + + Ok(()) + } + + #[test] + fn test_projection_timestamp_day_negative_lower_bound() -> Result<()> { + // -365 + let value = "1969-01-01T00:00:00.000000"; + // -364 + let another = "1969-01-02T00:00:00.000000"; + + let fixture = TestProjectionFixture::new( + Transform::Day, + "name", + NestedField::required(1, "value", Type::Primitive(PrimitiveType::Timestamp)), + ); + + fixture.assert_projection( + &fixture.binary_predicate( + PredicateOperator::LessThan, + Datum::timestamp_from_str(value)?, + ), + Some("name <= -365"), + )?; + + fixture.assert_projection( + &fixture.binary_predicate( + PredicateOperator::LessThanOrEq, + Datum::timestamp_from_str(value)?, + ), + Some("name <= -364"), + )?; + + fixture.assert_projection( + &fixture.binary_predicate( + PredicateOperator::GreaterThan, + Datum::timestamp_from_str(value)?, + ), + Some("name >= -365"), + )?; + + fixture.assert_projection( + &fixture.binary_predicate( + PredicateOperator::GreaterThanOrEq, + Datum::timestamp_from_str(value)?, + ), + Some("name >= -365"), + )?; + + fixture.assert_projection( + &fixture.binary_predicate(PredicateOperator::Eq, Datum::timestamp_from_str(value)?), + Some("name IN (-364, -365)"), + )?; + + fixture.assert_projection( + &fixture.binary_predicate(PredicateOperator::NotEq, Datum::timestamp_from_str(value)?), + None, + )?; + + fixture.assert_projection( + &fixture.set_predicate( + PredicateOperator::In, + vec![ + Datum::timestamp_from_str(value)?, + Datum::timestamp_from_str(another)?, + ], + ), + Some("name IN (-363, -364, -365)"), + )?; + + fixture.assert_projection( + &fixture.set_predicate( + PredicateOperator::NotIn, + vec![ + Datum::timestamp_from_str(value)?, + Datum::timestamp_from_str(another)?, + ], + ), + None, + )?; + + Ok(()) + } + + #[test] + fn test_projection_timestamp_day_lower_bound() -> Result<()> { + // 17501 + let value = "2017-12-01T00:00:00.000000"; + // 17502 + let another = "2017-12-02T00:00:00.000000"; + + let fixture = TestProjectionFixture::new( + Transform::Day, + "name", + NestedField::required(1, "value", Type::Primitive(PrimitiveType::Timestamp)), + ); + + fixture.assert_projection( + &fixture.binary_predicate( + PredicateOperator::LessThan, + Datum::timestamp_from_str(value)?, + ), + Some("name <= 17500"), + )?; + + fixture.assert_projection( + &fixture.binary_predicate( + PredicateOperator::LessThanOrEq, + Datum::timestamp_from_str(value)?, + ), + Some("name <= 17501"), + )?; + + fixture.assert_projection( + &fixture.binary_predicate( + PredicateOperator::GreaterThan, + Datum::timestamp_from_str(value)?, + ), + Some("name >= 17501"), + )?; + + fixture.assert_projection( + &fixture.binary_predicate( + PredicateOperator::GreaterThanOrEq, + Datum::timestamp_from_str(value)?, + ), + Some("name >= 17501"), + )?; + + fixture.assert_projection( + &fixture.binary_predicate(PredicateOperator::Eq, Datum::timestamp_from_str(value)?), + Some("name = 17501"), + )?; + + fixture.assert_projection( + &fixture.binary_predicate(PredicateOperator::NotEq, Datum::timestamp_from_str(value)?), + None, + )?; + + fixture.assert_projection( + &fixture.set_predicate( + PredicateOperator::In, + vec![ + Datum::timestamp_from_str(value)?, + Datum::timestamp_from_str(another)?, + ], + ), + Some("name IN (17501, 17502)"), + )?; + + fixture.assert_projection( + &fixture.set_predicate( + PredicateOperator::NotIn, + vec![ + Datum::timestamp_from_str(value)?, + Datum::timestamp_from_str(another)?, + ], + ), + None, + )?; + + Ok(()) + } + + #[test] + fn test_projection_timestamp_day_epoch() -> Result<()> { + // 0 + let value = "1970-01-01T00:00:00.00000"; + // 1 + let another = "1970-01-02T00:00:00.00000"; + + let fixture = TestProjectionFixture::new( + Transform::Day, + "name", + NestedField::required(1, "value", Type::Primitive(PrimitiveType::Timestamp)), + ); + + fixture.assert_projection( + &fixture.binary_predicate( + PredicateOperator::LessThan, + Datum::timestamp_from_str(value)?, + ), + Some("name <= 0"), + )?; + + fixture.assert_projection( + &fixture.binary_predicate( + PredicateOperator::LessThanOrEq, + Datum::timestamp_from_str(value)?, + ), + Some("name <= 0"), + )?; + + fixture.assert_projection( + &fixture.binary_predicate( + PredicateOperator::GreaterThan, + Datum::timestamp_from_str(value)?, + ), + Some("name >= 0"), + )?; + + fixture.assert_projection( + &fixture.binary_predicate( + PredicateOperator::GreaterThanOrEq, + Datum::timestamp_from_str(value)?, + ), + Some("name >= 0"), + )?; + + fixture.assert_projection( + &fixture.binary_predicate(PredicateOperator::Eq, Datum::timestamp_from_str(value)?), + Some("name = 0"), + )?; + + fixture.assert_projection( + &fixture.binary_predicate(PredicateOperator::NotEq, Datum::timestamp_from_str(value)?), + None, + )?; + + fixture.assert_projection( + &fixture.set_predicate( + PredicateOperator::In, + vec![ + Datum::timestamp_from_str(value)?, + Datum::timestamp_from_str(another)?, + ], + ), + Some("name IN (1, 0)"), + )?; + + fixture.assert_projection( + &fixture.set_predicate( + PredicateOperator::NotIn, + vec![ + Datum::timestamp_from_str(value)?, + Datum::timestamp_from_str(another)?, + ], + ), + None, + )?; + + Ok(()) + } + + #[test] + fn test_projection_date_day_negative() -> Result<()> { + // -2 + let value = "1969-12-30"; + // -4 + let another = "1969-12-28"; + + let fixture = TestProjectionFixture::new( + Transform::Day, + "name", + NestedField::required(1, "value", Type::Primitive(PrimitiveType::Date)), + ); + + fixture.assert_projection( + &fixture.binary_predicate(PredicateOperator::LessThan, Datum::date_from_str(value)?), + Some("name <= -3"), + )?; + + fixture.assert_projection( + &fixture.binary_predicate( + PredicateOperator::LessThanOrEq, + Datum::date_from_str(value)?, + ), + Some("name <= -2"), + )?; + + fixture.assert_projection( + &fixture.binary_predicate(PredicateOperator::GreaterThan, Datum::date_from_str(value)?), + Some("name >= -1"), + )?; + + fixture.assert_projection( + &fixture.binary_predicate( + PredicateOperator::GreaterThanOrEq, + Datum::date_from_str(value)?, + ), + Some("name >= -2"), + )?; + + fixture.assert_projection( + &fixture.binary_predicate(PredicateOperator::Eq, Datum::date_from_str(value)?), + Some("name = -2"), + )?; + + fixture.assert_projection( + &fixture.binary_predicate(PredicateOperator::NotEq, Datum::date_from_str(value)?), + None, + )?; + + fixture.assert_projection( + &fixture.set_predicate( + PredicateOperator::In, + vec![Datum::date_from_str(value)?, Datum::date_from_str(another)?], + ), + Some("name IN (-2, -4)"), + )?; + + fixture.assert_projection( + &fixture.set_predicate( + PredicateOperator::NotIn, + vec![Datum::date_from_str(value)?, Datum::date_from_str(another)?], + ), + None, + )?; + + Ok(()) + } + + #[test] + fn test_projection_date_day() -> Result<()> { + // 17167 + let value = "2017-01-01"; + // 17531 + let another = "2017-12-31"; + + let fixture = TestProjectionFixture::new( + Transform::Day, + "name", + NestedField::required(1, "value", Type::Primitive(PrimitiveType::Date)), + ); + + fixture.assert_projection( + &fixture.binary_predicate(PredicateOperator::LessThan, Datum::date_from_str(value)?), + Some("name <= 17166"), + )?; + + fixture.assert_projection( + &fixture.binary_predicate( + PredicateOperator::LessThanOrEq, + Datum::date_from_str(value)?, + ), + Some("name <= 17167"), + )?; + + fixture.assert_projection( + &fixture.binary_predicate(PredicateOperator::GreaterThan, Datum::date_from_str(value)?), + Some("name >= 17168"), + )?; + + fixture.assert_projection( + &fixture.binary_predicate( + PredicateOperator::GreaterThanOrEq, + Datum::date_from_str(value)?, + ), + Some("name >= 17167"), + )?; + + fixture.assert_projection( + &fixture.binary_predicate(PredicateOperator::Eq, Datum::date_from_str(value)?), + Some("name = 17167"), + )?; + + fixture.assert_projection( + &fixture.binary_predicate(PredicateOperator::NotEq, Datum::date_from_str(value)?), + None, + )?; + + fixture.assert_projection( + &fixture.set_predicate( + PredicateOperator::In, + vec![Datum::date_from_str(value)?, Datum::date_from_str(another)?], + ), + Some("name IN (17531, 17167)"), + )?; + + fixture.assert_projection( + &fixture.set_predicate( + PredicateOperator::NotIn, + vec![Datum::date_from_str(value)?, Datum::date_from_str(another)?], + ), + None, + )?; + + Ok(()) + } + + #[test] + fn test_projection_date_month_negative_upper_bound() -> Result<()> { + // -1 => 1969-12 + let value = "1969-12-31"; + // -12 => 1969-01 + let another = "1969-01-01"; + + let fixture = TestProjectionFixture::new( + Transform::Month, + "name", + NestedField::required(1, "value", Type::Primitive(PrimitiveType::Date)), + ); + + fixture.assert_projection( + &fixture.binary_predicate(PredicateOperator::LessThan, Datum::date_from_str(value)?), + Some("name <= 0"), + )?; + + fixture.assert_projection( + &fixture.binary_predicate( + PredicateOperator::LessThanOrEq, + Datum::date_from_str(value)?, + ), + Some("name <= 0"), + )?; + + fixture.assert_projection( + &fixture.binary_predicate(PredicateOperator::GreaterThan, Datum::date_from_str(value)?), + Some("name >= 0"), + )?; + + fixture.assert_projection( + &fixture.binary_predicate( + PredicateOperator::GreaterThanOrEq, + Datum::date_from_str(value)?, + ), + Some("name >= -1"), + )?; + + fixture.assert_projection( + &fixture.binary_predicate(PredicateOperator::Eq, Datum::date_from_str(value)?), + Some("name IN (-1, 0)"), + )?; + + fixture.assert_projection( + &fixture.binary_predicate(PredicateOperator::NotEq, Datum::date_from_str(value)?), + None, + )?; + + fixture.assert_projection( + &fixture.set_predicate( + PredicateOperator::In, + vec![Datum::date_from_str(value)?, Datum::date_from_str(another)?], + ), + Some("name IN (-1, -12, -11, 0)"), + )?; + + fixture.assert_projection( + &fixture.set_predicate( + PredicateOperator::NotIn, + vec![Datum::date_from_str(value)?, Datum::date_from_str(another)?], + ), + None, + )?; + + Ok(()) + } + + #[test] + fn test_projection_date_month_upper_bound() -> Result<()> { + // 575 => 2017-12 + let value = "2017-12-31"; + // 564 => 2017-01 + let another = "2017-01-01"; + + let fixture = TestProjectionFixture::new( + Transform::Month, + "name", + NestedField::required(1, "value", Type::Primitive(PrimitiveType::Date)), + ); + + fixture.assert_projection( + &fixture.binary_predicate(PredicateOperator::LessThan, Datum::date_from_str(value)?), + Some("name <= 575"), + )?; + + fixture.assert_projection( + &fixture.binary_predicate( + PredicateOperator::LessThanOrEq, + Datum::date_from_str(value)?, + ), + Some("name <= 575"), + )?; + + fixture.assert_projection( + &fixture.binary_predicate(PredicateOperator::GreaterThan, Datum::date_from_str(value)?), + Some("name >= 576"), + )?; + + fixture.assert_projection( + &fixture.binary_predicate( + PredicateOperator::GreaterThanOrEq, + Datum::date_from_str(value)?, + ), + Some("name >= 575"), + )?; + + fixture.assert_projection( + &fixture.binary_predicate(PredicateOperator::Eq, Datum::date_from_str(value)?), + Some("name = 575"), + )?; + + fixture.assert_projection( + &fixture.binary_predicate(PredicateOperator::NotEq, Datum::date_from_str(value)?), + None, + )?; + + fixture.assert_projection( + &fixture.set_predicate( + PredicateOperator::In, + vec![Datum::date_from_str(value)?, Datum::date_from_str(another)?], + ), + Some("name IN (575, 564)"), + )?; + + fixture.assert_projection( + &fixture.set_predicate( + PredicateOperator::NotIn, + vec![Datum::date_from_str(value)?, Datum::date_from_str(another)?], + ), + None, + )?; + + Ok(()) + } + + #[test] + fn test_projection_date_month_negative_lower_bound() -> Result<()> { + // -12 => 1969-01 + let value = "1969-01-01"; + // -1 => 1969-12 + let another = "1969-12-31"; + + let fixture = TestProjectionFixture::new( + Transform::Month, + "name", + NestedField::required(1, "value", Type::Primitive(PrimitiveType::Date)), + ); + + fixture.assert_projection( + &fixture.binary_predicate(PredicateOperator::LessThan, Datum::date_from_str(value)?), + Some("name <= -12"), + )?; + + fixture.assert_projection( + &fixture.binary_predicate( + PredicateOperator::LessThanOrEq, + Datum::date_from_str(value)?, + ), + Some("name <= -11"), + )?; + + fixture.assert_projection( + &fixture.binary_predicate(PredicateOperator::GreaterThan, Datum::date_from_str(value)?), + Some("name >= -12"), + )?; + + fixture.assert_projection( + &fixture.binary_predicate( + PredicateOperator::GreaterThanOrEq, + Datum::date_from_str(value)?, + ), + Some("name >= -12"), + )?; + + fixture.assert_projection( + &fixture.binary_predicate(PredicateOperator::Eq, Datum::date_from_str(value)?), + Some("name IN (-12, -11)"), + )?; + + fixture.assert_projection( + &fixture.binary_predicate(PredicateOperator::NotEq, Datum::date_from_str(value)?), + None, + )?; + + fixture.assert_projection( + &fixture.set_predicate( + PredicateOperator::In, + vec![Datum::date_from_str(value)?, Datum::date_from_str(another)?], + ), + Some("name IN (-1, -12, -11, 0)"), + )?; + + fixture.assert_projection( + &fixture.set_predicate( + PredicateOperator::NotIn, + vec![Datum::date_from_str(value)?, Datum::date_from_str(another)?], + ), + None, + )?; + + Ok(()) + } + + #[test] + fn test_projection_date_month_lower_bound() -> Result<()> { + // 575 => 2017-12 + let value = "2017-12-01"; + // 564 => 2017-01 + let another = "2017-01-01"; + + let fixture = TestProjectionFixture::new( + Transform::Month, + "name", + NestedField::required(1, "value", Type::Primitive(PrimitiveType::Date)), + ); + + fixture.assert_projection( + &fixture.binary_predicate(PredicateOperator::LessThan, Datum::date_from_str(value)?), + Some("name <= 574"), + )?; + + fixture.assert_projection( + &fixture.binary_predicate( + PredicateOperator::LessThanOrEq, + Datum::date_from_str(value)?, + ), + Some("name <= 575"), + )?; + + fixture.assert_projection( + &fixture.binary_predicate(PredicateOperator::GreaterThan, Datum::date_from_str(value)?), + Some("name >= 575"), + )?; + + fixture.assert_projection( + &fixture.binary_predicate( + PredicateOperator::GreaterThanOrEq, + Datum::date_from_str(value)?, + ), + Some("name >= 575"), + )?; + + fixture.assert_projection( + &fixture.binary_predicate(PredicateOperator::Eq, Datum::date_from_str(value)?), + Some("name = 575"), + )?; + + fixture.assert_projection( + &fixture.binary_predicate(PredicateOperator::NotEq, Datum::date_from_str(value)?), + None, + )?; + + fixture.assert_projection( + &fixture.set_predicate( + PredicateOperator::In, + vec![Datum::date_from_str(value)?, Datum::date_from_str(another)?], + ), + Some("name IN (575, 564)"), + )?; + + fixture.assert_projection( + &fixture.set_predicate( + PredicateOperator::NotIn, + vec![Datum::date_from_str(value)?, Datum::date_from_str(another)?], + ), + None, + )?; + + Ok(()) + } + + #[test] + fn test_projection_date_month_epoch() -> Result<()> { + // 0 => 1970-01 + let value = "1970-01-01"; + // -1 => 1969-12 + let another = "1969-12-31"; + + let fixture = TestProjectionFixture::new( + Transform::Month, + "name", + NestedField::required(1, "value", Type::Primitive(PrimitiveType::Date)), + ); + + fixture.assert_projection( + &fixture.binary_predicate(PredicateOperator::LessThan, Datum::date_from_str(value)?), + Some("name <= 0"), + )?; + + fixture.assert_projection( + &fixture.binary_predicate( + PredicateOperator::LessThanOrEq, + Datum::date_from_str(value)?, + ), + Some("name <= 0"), + )?; + + fixture.assert_projection( + &fixture.binary_predicate(PredicateOperator::GreaterThan, Datum::date_from_str(value)?), + Some("name >= 0"), + )?; + + fixture.assert_projection( + &fixture.binary_predicate( + PredicateOperator::GreaterThanOrEq, + Datum::date_from_str(value)?, + ), + Some("name >= 0"), + )?; + + fixture.assert_projection( + &fixture.binary_predicate(PredicateOperator::Eq, Datum::date_from_str(value)?), + Some("name = 0"), + )?; + + fixture.assert_projection( + &fixture.binary_predicate(PredicateOperator::NotEq, Datum::date_from_str(value)?), + None, + )?; + + fixture.assert_projection( + &fixture.set_predicate( + PredicateOperator::In, + vec![Datum::date_from_str(value)?, Datum::date_from_str(another)?], + ), + Some("name IN (0, -1)"), + )?; + + fixture.assert_projection( + &fixture.set_predicate( + PredicateOperator::NotIn, + vec![Datum::date_from_str(value)?, Datum::date_from_str(another)?], + ), + None, + )?; + + Ok(()) + } + + #[test] + fn test_projection_date_year_negative_upper_bound() -> Result<()> { + // -1 => 1969 + let value = "1969-12-31"; + let another = "1969-01-01"; + + let fixture = TestProjectionFixture::new( + Transform::Year, + "name", + NestedField::required(1, "value", Type::Primitive(PrimitiveType::Date)), + ); + + fixture.assert_projection( + &fixture.binary_predicate(PredicateOperator::LessThan, Datum::date_from_str(value)?), + Some("name <= 0"), + )?; + + fixture.assert_projection( + &fixture.binary_predicate( + PredicateOperator::LessThanOrEq, + Datum::date_from_str(value)?, + ), + Some("name <= 0"), + )?; + + fixture.assert_projection( + &fixture.binary_predicate(PredicateOperator::GreaterThan, Datum::date_from_str(value)?), + Some("name >= 0"), + )?; + + fixture.assert_projection( + &fixture.binary_predicate( + PredicateOperator::GreaterThanOrEq, + Datum::date_from_str(value)?, + ), + Some("name >= -1"), + )?; + + fixture.assert_projection( + &fixture.binary_predicate(PredicateOperator::Eq, Datum::date_from_str(value)?), + Some("name IN (-1, 0)"), + )?; + + fixture.assert_projection( + &fixture.binary_predicate(PredicateOperator::NotEq, Datum::date_from_str(value)?), + None, + )?; + + fixture.assert_projection( + &fixture.set_predicate( + PredicateOperator::In, + vec![Datum::date_from_str(value)?, Datum::date_from_str(another)?], + ), + Some("name IN (0, -1)"), + )?; + + fixture.assert_projection( + &fixture.set_predicate( + PredicateOperator::NotIn, + vec![Datum::date_from_str(value)?, Datum::date_from_str(another)?], + ), + None, + )?; + + Ok(()) + } + + #[test] + fn test_projection_date_year_upper_bound() -> Result<()> { + // 47 => 2017 + let value = "2017-12-31"; + // 46 => 2016 + let another = "2016-01-01"; + + let fixture = TestProjectionFixture::new( + Transform::Year, + "name", + NestedField::required(1, "value", Type::Primitive(PrimitiveType::Date)), + ); + + fixture.assert_projection( + &fixture.binary_predicate(PredicateOperator::LessThan, Datum::date_from_str(value)?), + Some("name <= 47"), + )?; + + fixture.assert_projection( + &fixture.binary_predicate( + PredicateOperator::LessThanOrEq, + Datum::date_from_str(value)?, + ), + Some("name <= 47"), + )?; + + fixture.assert_projection( + &fixture.binary_predicate(PredicateOperator::GreaterThan, Datum::date_from_str(value)?), + Some("name >= 48"), + )?; + + fixture.assert_projection( + &fixture.binary_predicate( + PredicateOperator::GreaterThanOrEq, + Datum::date_from_str(value)?, + ), + Some("name >= 47"), + )?; + + fixture.assert_projection( + &fixture.binary_predicate(PredicateOperator::Eq, Datum::date_from_str(value)?), + Some("name = 47"), + )?; + + fixture.assert_projection( + &fixture.binary_predicate(PredicateOperator::NotEq, Datum::date_from_str(value)?), + None, + )?; + + fixture.assert_projection( + &fixture.set_predicate( + PredicateOperator::In, + vec![Datum::date_from_str(value)?, Datum::date_from_str(another)?], + ), + Some("name IN (47, 46)"), + )?; + + fixture.assert_projection( + &fixture.set_predicate( + PredicateOperator::NotIn, + vec![Datum::date_from_str(value)?, Datum::date_from_str(another)?], + ), + None, + )?; + + Ok(()) + } + + #[test] + fn test_projection_date_year_negative_lower_bound() -> Result<()> { + // 0 => 1970 + let value = "1970-01-01"; + // -1 => 1969 + let another = "1969-12-31"; + + let fixture = TestProjectionFixture::new( + Transform::Year, + "name", + NestedField::required(1, "value", Type::Primitive(PrimitiveType::Date)), + ); + + fixture.assert_projection( + &fixture.binary_predicate(PredicateOperator::LessThan, Datum::date_from_str(value)?), + Some("name <= 0"), + )?; + + fixture.assert_projection( + &fixture.binary_predicate( + PredicateOperator::LessThanOrEq, + Datum::date_from_str(value)?, + ), + Some("name <= 0"), + )?; + + fixture.assert_projection( + &fixture.binary_predicate(PredicateOperator::GreaterThan, Datum::date_from_str(value)?), + Some("name >= 0"), + )?; + + fixture.assert_projection( + &fixture.binary_predicate( + PredicateOperator::GreaterThanOrEq, + Datum::date_from_str(value)?, + ), + Some("name >= 0"), + )?; + + fixture.assert_projection( + &fixture.binary_predicate(PredicateOperator::Eq, Datum::date_from_str(value)?), + Some("name = 0"), + )?; + + fixture.assert_projection( + &fixture.binary_predicate(PredicateOperator::NotEq, Datum::date_from_str(value)?), + None, + )?; + + fixture.assert_projection( + &fixture.set_predicate( + PredicateOperator::In, + vec![Datum::date_from_str(value)?, Datum::date_from_str(another)?], + ), + Some("name IN (0, -1)"), + )?; + + fixture.assert_projection( + &fixture.set_predicate( + PredicateOperator::NotIn, + vec![Datum::date_from_str(value)?, Datum::date_from_str(another)?], + ), + None, + )?; + + Ok(()) + } + + #[test] + fn test_projection_date_year_lower_bound() -> Result<()> { + // 47 => 2017 + let value = "2017-01-01"; + // 46 => 2016 + let another = "2016-12-31"; + + let fixture = TestProjectionFixture::new( + Transform::Year, + "name", + NestedField::required(1, "value", Type::Primitive(PrimitiveType::Date)), + ); + + fixture.assert_projection( + &fixture.binary_predicate(PredicateOperator::LessThan, Datum::date_from_str(value)?), + Some("name <= 46"), + )?; + + fixture.assert_projection( + &fixture.binary_predicate( + PredicateOperator::LessThanOrEq, + Datum::date_from_str(value)?, + ), + Some("name <= 47"), + )?; + + fixture.assert_projection( + &fixture.binary_predicate(PredicateOperator::GreaterThan, Datum::date_from_str(value)?), + Some("name >= 47"), + )?; + + fixture.assert_projection( + &fixture.binary_predicate( + PredicateOperator::GreaterThanOrEq, + Datum::date_from_str(value)?, + ), + Some("name >= 47"), + )?; + + fixture.assert_projection( + &fixture.binary_predicate(PredicateOperator::Eq, Datum::date_from_str(value)?), + Some("name = 47"), + )?; + + fixture.assert_projection( + &fixture.binary_predicate(PredicateOperator::NotEq, Datum::date_from_str(value)?), + None, + )?; + + fixture.assert_projection( + &fixture.set_predicate( + PredicateOperator::In, + vec![Datum::date_from_str(value)?, Datum::date_from_str(another)?], + ), + Some("name IN (47, 46)"), + )?; + + fixture.assert_projection( + &fixture.set_predicate( + PredicateOperator::NotIn, + vec![Datum::date_from_str(value)?, Datum::date_from_str(another)?], + ), + None, + )?; + + Ok(()) + } + #[test] fn test_transform_years() { let year = super::Year; diff --git a/crates/iceberg/src/transform/truncate.rs b/crates/iceberg/src/transform/truncate.rs index 767ca0036..4d163b11f 100644 --- a/crates/iceberg/src/transform/truncate.rs +++ b/crates/iceberg/src/transform/truncate.rs @@ -170,11 +170,547 @@ impl TransformFunction for Truncate { mod test { use std::sync::Arc; + use crate::spec::PrimitiveType::{ + Binary, Date, Decimal, Fixed, Int, Long, String as StringType, Time, Timestamp, + Timestamptz, Uuid, + }; + use crate::spec::StructType; + use crate::spec::Type::{Primitive, Struct}; + use crate::transform::test::TestTransformFixture; use arrow_array::{ builder::PrimitiveBuilder, types::Decimal128Type, Decimal128Array, Int32Array, Int64Array, }; - use crate::{spec::Datum, transform::TransformFunction}; + use crate::{ + expr::PredicateOperator, + spec::{Datum, NestedField, PrimitiveType, Transform, Type}, + transform::{test::TestProjectionFixture, TransformFunction}, + Result, + }; + + #[test] + fn test_truncate_transform() { + let trans = Transform::Truncate(4); + + let fixture = TestTransformFixture { + display: "truncate[4]".to_string(), + json: r#""truncate[4]""#.to_string(), + dedup_name: "truncate[4]".to_string(), + preserves_order: true, + satisfies_order_of: vec![ + (Transform::Truncate(4), true), + (Transform::Truncate(2), false), + (Transform::Bucket(4), false), + (Transform::Void, false), + (Transform::Day, false), + ], + trans_types: vec![ + (Primitive(Binary), Some(Primitive(Binary))), + (Primitive(Date), None), + ( + Primitive(Decimal { + precision: 8, + scale: 5, + }), + Some(Primitive(Decimal { + precision: 8, + scale: 5, + })), + ), + (Primitive(Fixed(8)), None), + (Primitive(Int), Some(Primitive(Int))), + (Primitive(Long), Some(Primitive(Long))), + (Primitive(StringType), Some(Primitive(StringType))), + (Primitive(Uuid), None), + (Primitive(Time), None), + (Primitive(Timestamp), None), + (Primitive(Timestamptz), None), + ( + Struct(StructType::new(vec![NestedField::optional( + 1, + "a", + Primitive(Timestamp), + ) + .into()])), + None, + ), + ], + }; + + fixture.assert_transform(trans); + } + + #[test] + fn test_projection_truncate_string_rewrite_op() -> Result<()> { + let value = "abcde"; + + let fixture = TestProjectionFixture::new( + Transform::Truncate(5), + "name", + NestedField::required(1, "value", Type::Primitive(PrimitiveType::String)), + ); + + fixture.assert_projection( + &fixture.binary_predicate(PredicateOperator::StartsWith, Datum::string(value)), + Some(r#"name = "abcde""#), + )?; + + fixture.assert_projection( + &fixture.binary_predicate(PredicateOperator::NotStartsWith, Datum::string(value)), + Some(r#"name != "abcde""#), + )?; + + let value = "abcdefg"; + fixture.assert_projection( + &fixture.binary_predicate(PredicateOperator::StartsWith, Datum::string(value)), + Some(r#"name STARTS WITH "abcde""#), + )?; + + fixture.assert_projection( + &fixture.binary_predicate(PredicateOperator::NotStartsWith, Datum::string(value)), + None, + )?; + + Ok(()) + } + + #[test] + fn test_projection_truncate_string() -> Result<()> { + let value = "abcdefg"; + + let fixture = TestProjectionFixture::new( + Transform::Truncate(5), + "name", + NestedField::required(1, "value", Type::Primitive(PrimitiveType::String)), + ); + + fixture.assert_projection( + &fixture.binary_predicate(PredicateOperator::LessThan, Datum::string(value)), + Some(r#"name <= "abcde""#), + )?; + + fixture.assert_projection( + &fixture.binary_predicate(PredicateOperator::LessThanOrEq, Datum::string(value)), + Some(r#"name <= "abcde""#), + )?; + + fixture.assert_projection( + &fixture.binary_predicate(PredicateOperator::GreaterThan, Datum::string(value)), + Some(r#"name >= "abcde""#), + )?; + + fixture.assert_projection( + &fixture.binary_predicate(PredicateOperator::GreaterThanOrEq, Datum::string(value)), + Some(r#"name >= "abcde""#), + )?; + + fixture.assert_projection( + &fixture.binary_predicate(PredicateOperator::Eq, Datum::string(value)), + Some(r#"name = "abcde""#), + )?; + + fixture.assert_projection( + &fixture.set_predicate( + PredicateOperator::In, + vec![Datum::string(value), Datum::string(format!("{}abc", value))], + ), + Some(r#"name IN ("abcde")"#), + )?; + + fixture.assert_projection( + &fixture.set_predicate( + PredicateOperator::NotIn, + vec![Datum::string(value), Datum::string(format!("{}abc", value))], + ), + None, + )?; + + Ok(()) + } + + #[test] + fn test_projection_truncate_upper_bound_decimal() -> Result<()> { + let prev = "98.99"; + let curr = "99.99"; + let next = "100.99"; + + let fixture = TestProjectionFixture::new( + Transform::Truncate(10), + "name", + NestedField::required( + 1, + "value", + Type::Primitive(PrimitiveType::Decimal { + precision: 9, + scale: 2, + }), + ), + ); + + fixture.assert_projection( + &fixture.binary_predicate(PredicateOperator::LessThan, Datum::decimal_from_str(curr)?), + Some("name <= 9990"), + )?; + + fixture.assert_projection( + &fixture.binary_predicate( + PredicateOperator::LessThanOrEq, + Datum::decimal_from_str(curr)?, + ), + Some("name <= 9990"), + )?; + + fixture.assert_projection( + &fixture.binary_predicate( + PredicateOperator::GreaterThanOrEq, + Datum::decimal_from_str(curr)?, + ), + Some("name >= 9990"), + )?; + + fixture.assert_projection( + &fixture.binary_predicate(PredicateOperator::Eq, Datum::decimal_from_str(curr)?), + Some("name = 9990"), + )?; + + fixture.assert_projection( + &fixture.binary_predicate(PredicateOperator::NotEq, Datum::decimal_from_str(curr)?), + None, + )?; + + fixture.assert_projection( + &fixture.set_predicate( + PredicateOperator::In, + vec![ + Datum::decimal_from_str(prev)?, + Datum::decimal_from_str(curr)?, + Datum::decimal_from_str(next)?, + ], + ), + Some("name IN (10090, 9990, 9890)"), + )?; + + fixture.assert_projection( + &fixture.set_predicate( + PredicateOperator::NotIn, + vec![ + Datum::decimal_from_str(curr)?, + Datum::decimal_from_str(next)?, + ], + ), + None, + )?; + + Ok(()) + } + + #[test] + fn test_projection_truncate_lower_bound_decimal() -> Result<()> { + let prev = "99.00"; + let curr = "100.00"; + let next = "101.00"; + + let fixture = TestProjectionFixture::new( + Transform::Truncate(10), + "name", + NestedField::required( + 1, + "value", + Type::Primitive(PrimitiveType::Decimal { + precision: 9, + scale: 2, + }), + ), + ); + + fixture.assert_projection( + &fixture.binary_predicate(PredicateOperator::LessThan, Datum::decimal_from_str(curr)?), + Some("name <= 9990"), + )?; + + fixture.assert_projection( + &fixture.binary_predicate( + PredicateOperator::LessThanOrEq, + Datum::decimal_from_str(curr)?, + ), + Some("name <= 10000"), + )?; + + fixture.assert_projection( + &fixture.binary_predicate( + PredicateOperator::GreaterThanOrEq, + Datum::decimal_from_str(curr)?, + ), + Some("name >= 10000"), + )?; + + fixture.assert_projection( + &fixture.binary_predicate(PredicateOperator::Eq, Datum::decimal_from_str(curr)?), + Some("name = 10000"), + )?; + + fixture.assert_projection( + &fixture.binary_predicate(PredicateOperator::NotEq, Datum::decimal_from_str(curr)?), + None, + )?; + + fixture.assert_projection( + &fixture.set_predicate( + PredicateOperator::In, + vec![ + Datum::decimal_from_str(prev)?, + Datum::decimal_from_str(curr)?, + Datum::decimal_from_str(next)?, + ], + ), + Some("name IN (9900, 10000, 10100)"), + )?; + + fixture.assert_projection( + &fixture.set_predicate( + PredicateOperator::NotIn, + vec![ + Datum::decimal_from_str(curr)?, + Datum::decimal_from_str(next)?, + ], + ), + None, + )?; + + Ok(()) + } + + #[test] + fn test_projection_truncate_upper_bound_long() -> Result<()> { + let value = 99i64; + + let fixture = TestProjectionFixture::new( + Transform::Truncate(10), + "name", + NestedField::required(1, "value", Type::Primitive(PrimitiveType::Long)), + ); + + fixture.assert_projection( + &fixture.binary_predicate(PredicateOperator::LessThan, Datum::long(value)), + Some("name <= 90"), + )?; + + fixture.assert_projection( + &fixture.binary_predicate(PredicateOperator::LessThanOrEq, Datum::long(value)), + Some("name <= 90"), + )?; + + fixture.assert_projection( + &fixture.binary_predicate(PredicateOperator::GreaterThanOrEq, Datum::long(value)), + Some("name >= 90"), + )?; + + fixture.assert_projection( + &fixture.binary_predicate(PredicateOperator::Eq, Datum::long(value)), + Some("name = 90"), + )?; + + fixture.assert_projection( + &fixture.binary_predicate(PredicateOperator::NotEq, Datum::long(value)), + None, + )?; + + fixture.assert_projection( + &fixture.set_predicate( + PredicateOperator::In, + vec![ + Datum::long(value - 1), + Datum::long(value), + Datum::long(value + 1), + ], + ), + Some("name IN (100, 90)"), + )?; + + fixture.assert_projection( + &fixture.set_predicate( + PredicateOperator::NotIn, + vec![Datum::long(value), Datum::long(value + 1)], + ), + None, + )?; + + Ok(()) + } + + #[test] + fn test_projection_truncate_lower_bound_long() -> Result<()> { + let value = 100i64; + + let fixture = TestProjectionFixture::new( + Transform::Truncate(10), + "name", + NestedField::required(1, "value", Type::Primitive(PrimitiveType::Long)), + ); + + fixture.assert_projection( + &fixture.binary_predicate(PredicateOperator::LessThan, Datum::long(value)), + Some("name <= 90"), + )?; + + fixture.assert_projection( + &fixture.binary_predicate(PredicateOperator::LessThanOrEq, Datum::long(value)), + Some("name <= 100"), + )?; + + fixture.assert_projection( + &fixture.binary_predicate(PredicateOperator::GreaterThanOrEq, Datum::long(value)), + Some("name >= 100"), + )?; + + fixture.assert_projection( + &fixture.binary_predicate(PredicateOperator::Eq, Datum::long(value)), + Some("name = 100"), + )?; + + fixture.assert_projection( + &fixture.binary_predicate(PredicateOperator::NotEq, Datum::long(value)), + None, + )?; + + fixture.assert_projection( + &fixture.set_predicate( + PredicateOperator::In, + vec![ + Datum::long(value - 1), + Datum::long(value), + Datum::long(value + 1), + ], + ), + Some("name IN (100, 90)"), + )?; + + fixture.assert_projection( + &fixture.set_predicate( + PredicateOperator::NotIn, + vec![Datum::long(value), Datum::long(value + 1)], + ), + None, + )?; + + Ok(()) + } + + #[test] + fn test_projection_truncate_upper_bound_integer() -> Result<()> { + let value = 99; + + let fixture = TestProjectionFixture::new( + Transform::Truncate(10), + "name", + NestedField::required(1, "value", Type::Primitive(PrimitiveType::Int)), + ); + + fixture.assert_projection( + &fixture.binary_predicate(PredicateOperator::LessThan, Datum::int(value)), + Some("name <= 90"), + )?; + + fixture.assert_projection( + &fixture.binary_predicate(PredicateOperator::LessThanOrEq, Datum::int(value)), + Some("name <= 90"), + )?; + + fixture.assert_projection( + &fixture.binary_predicate(PredicateOperator::GreaterThanOrEq, Datum::int(value)), + Some("name >= 90"), + )?; + + fixture.assert_projection( + &fixture.binary_predicate(PredicateOperator::Eq, Datum::int(value)), + Some("name = 90"), + )?; + + fixture.assert_projection( + &fixture.binary_predicate(PredicateOperator::NotEq, Datum::int(value)), + None, + )?; + + fixture.assert_projection( + &fixture.set_predicate( + PredicateOperator::In, + vec![ + Datum::int(value - 1), + Datum::int(value), + Datum::int(value + 1), + ], + ), + Some("name IN (100, 90)"), + )?; + + fixture.assert_projection( + &fixture.set_predicate( + PredicateOperator::NotIn, + vec![Datum::int(value), Datum::int(value + 1)], + ), + None, + )?; + + Ok(()) + } + + #[test] + fn test_projection_truncate_lower_bound_integer() -> Result<()> { + let value = 100; + + let fixture = TestProjectionFixture::new( + Transform::Truncate(10), + "name", + NestedField::required(1, "value", Type::Primitive(PrimitiveType::Int)), + ); + + fixture.assert_projection( + &fixture.binary_predicate(PredicateOperator::LessThan, Datum::int(value)), + Some("name <= 90"), + )?; + + fixture.assert_projection( + &fixture.binary_predicate(PredicateOperator::LessThanOrEq, Datum::int(value)), + Some("name <= 100"), + )?; + + fixture.assert_projection( + &fixture.binary_predicate(PredicateOperator::GreaterThanOrEq, Datum::int(value)), + Some("name >= 100"), + )?; + + fixture.assert_projection( + &fixture.binary_predicate(PredicateOperator::Eq, Datum::int(value)), + Some("name = 100"), + )?; + + fixture.assert_projection( + &fixture.binary_predicate(PredicateOperator::NotEq, Datum::int(value)), + None, + )?; + + fixture.assert_projection( + &fixture.set_predicate( + PredicateOperator::In, + vec![ + Datum::int(value - 1), + Datum::int(value), + Datum::int(value + 1), + ], + ), + Some("name IN (100, 90)"), + )?; + + fixture.assert_projection( + &fixture.set_predicate( + PredicateOperator::NotIn, + vec![Datum::int(value), Datum::int(value + 1)], + ), + None, + )?; + + Ok(()) + } // Test case ref from: https://iceberg.apache.org/spec/#truncate-transform-details #[test] diff --git a/crates/iceberg/src/transform/void.rs b/crates/iceberg/src/transform/void.rs index 7cbee27ca..5a631cdc4 100644 --- a/crates/iceberg/src/transform/void.rs +++ b/crates/iceberg/src/transform/void.rs @@ -32,3 +32,125 @@ impl TransformFunction for Void { Ok(None) } } + +#[cfg(test)] +mod test { + use crate::spec::PrimitiveType::{ + Binary, Date, Decimal, Fixed, Int, Long, String as StringType, Time, Timestamp, + Timestamptz, Uuid, + }; + use crate::spec::StructType; + use crate::spec::Type::{Primitive, Struct}; + use crate::transform::test::TestTransformFixture; + + use crate::spec::{NestedField, Transform}; + + #[test] + fn test_void_transform() { + let trans = Transform::Void; + + let fixture = TestTransformFixture { + display: "void".to_string(), + json: r#""void""#.to_string(), + dedup_name: "void".to_string(), + preserves_order: false, + satisfies_order_of: vec![ + (Transform::Year, false), + (Transform::Month, false), + (Transform::Day, false), + (Transform::Hour, false), + (Transform::Void, true), + (Transform::Identity, false), + ], + trans_types: vec![ + (Primitive(Binary), Some(Primitive(Binary))), + (Primitive(Date), Some(Primitive(Date))), + ( + Primitive(Decimal { + precision: 8, + scale: 5, + }), + Some(Primitive(Decimal { + precision: 8, + scale: 5, + })), + ), + (Primitive(Fixed(8)), Some(Primitive(Fixed(8)))), + (Primitive(Int), Some(Primitive(Int))), + (Primitive(Long), Some(Primitive(Long))), + (Primitive(StringType), Some(Primitive(StringType))), + (Primitive(Uuid), Some(Primitive(Uuid))), + (Primitive(Time), Some(Primitive(Time))), + (Primitive(Timestamp), Some(Primitive(Timestamp))), + (Primitive(Timestamptz), Some(Primitive(Timestamptz))), + ( + Struct(StructType::new(vec![NestedField::optional( + 1, + "a", + Primitive(Timestamp), + ) + .into()])), + Some(Struct(StructType::new(vec![NestedField::optional( + 1, + "a", + Primitive(Timestamp), + ) + .into()]))), + ), + ], + }; + + fixture.assert_transform(trans); + } + + #[test] + fn test_known_transform() { + let trans = Transform::Unknown; + + let fixture = TestTransformFixture { + display: "unknown".to_string(), + json: r#""unknown""#.to_string(), + dedup_name: "unknown".to_string(), + preserves_order: false, + satisfies_order_of: vec![ + (Transform::Year, false), + (Transform::Month, false), + (Transform::Day, false), + (Transform::Hour, false), + (Transform::Void, false), + (Transform::Identity, false), + (Transform::Unknown, true), + ], + trans_types: vec![ + (Primitive(Binary), Some(Primitive(StringType))), + (Primitive(Date), Some(Primitive(StringType))), + ( + Primitive(Decimal { + precision: 8, + scale: 5, + }), + Some(Primitive(StringType)), + ), + (Primitive(Fixed(8)), Some(Primitive(StringType))), + (Primitive(Int), Some(Primitive(StringType))), + (Primitive(Long), Some(Primitive(StringType))), + (Primitive(StringType), Some(Primitive(StringType))), + (Primitive(Uuid), Some(Primitive(StringType))), + (Primitive(Time), Some(Primitive(StringType))), + (Primitive(Timestamp), Some(Primitive(StringType))), + (Primitive(Timestamptz), Some(Primitive(StringType))), + ( + Struct(StructType::new(vec![NestedField::optional( + 1, + "a", + Primitive(Timestamp), + ) + .into()])), + Some(Primitive(StringType)), + ), + ], + }; + + fixture.assert_transform(trans); + } +}