Skip to content

Commit

Permalink
refine
Browse files Browse the repository at this point in the history
  • Loading branch information
ZENOTME committed Mar 25, 2024
1 parent ccf2122 commit eda5787
Show file tree
Hide file tree
Showing 7 changed files with 267 additions and 459 deletions.
12 changes: 11 additions & 1 deletion crates/iceberg/src/spec/values.rs
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ pub enum PrimitiveLiteral {
///
/// By default, we decouple the type and value of a literal, so we can use avoid the cost of storing extra type info
/// for each literal. But associate type with literal can be useful in some cases, for example, in unbound expression.
#[derive(Debug, PartialEq, Hash, Eq)]
#[derive(Debug, PartialEq, Hash, Eq, Clone)]
pub struct Datum {
r#type: PrimitiveType,
literal: PrimitiveLiteral,
Expand Down Expand Up @@ -673,6 +673,16 @@ impl Datum {
)),
}
}

/// Get the primitive literal from datum.
pub fn literal(&self) -> &PrimitiveLiteral {
&self.literal
}

/// Get the primitive type from datum.
pub fn data_type(&self) -> &PrimitiveType {
&self.r#type
}
}

/// Values present in iceberg type
Expand Down
144 changes: 55 additions & 89 deletions crates/iceberg/src/transform/bucket.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ use std::sync::Arc;
use arrow_array::ArrayRef;
use arrow_schema::{DataType, TimeUnit};

use crate::spec::{Literal, PrimitiveLiteral};
use crate::spec::{Datum, PrimitiveLiteral};

use super::TransformFunction;

Expand Down Expand Up @@ -208,61 +208,53 @@ impl TransformFunction for Bucket {
.iter()
.map(|v| v.map(|v| self.bucket_bytes(v))),
),
_ => unreachable!("Unsupported data type: {:?}", input.data_type()),
_ => {
return Err(crate::Error::new(
crate::ErrorKind::FeatureUnsupported,
format!(
"Unsupported data type for bucket transform: {:?}",
input.data_type()
),
))
}
};
Ok(Arc::new(res))
}

fn transform_literal(&self, input: &Literal) -> crate::Result<Option<Literal>> {
match input {
Literal::Primitive(PrimitiveLiteral::Int(v)) => Ok(Some(Literal::Primitive(
PrimitiveLiteral::Int(self.bucket_int(*v)),
))),
Literal::Primitive(PrimitiveLiteral::Long(v)) => Ok(Some(Literal::Primitive(
PrimitiveLiteral::Int(self.bucket_long(*v)),
))),
Literal::Primitive(PrimitiveLiteral::Decimal(v)) => Ok(Some(Literal::Primitive(
PrimitiveLiteral::Int(self.bucket_decimal(*v)),
))),
Literal::Primitive(PrimitiveLiteral::Date(v)) => Ok(Some(Literal::Primitive(
PrimitiveLiteral::Int(self.bucket_date(*v)),
))),
Literal::Primitive(PrimitiveLiteral::Time(v)) => Ok(Some(Literal::Primitive(
PrimitiveLiteral::Int(self.bucket_time(*v)),
))),
Literal::Primitive(PrimitiveLiteral::Timestamp(v)) => Ok(Some(Literal::Primitive(
PrimitiveLiteral::Int(self.bucket_timestamp(*v)),
))),
Literal::Primitive(PrimitiveLiteral::String(v)) => Ok(Some(Literal::Primitive(
PrimitiveLiteral::Int(self.bucket_str(v)),
))),
Literal::Primitive(PrimitiveLiteral::UUID(v)) => Ok(Some(Literal::Primitive(
PrimitiveLiteral::Int(self.bucket_bytes(v.as_ref())),
))),
Literal::Primitive(PrimitiveLiteral::Binary(v)) => Ok(Some(Literal::Primitive(
PrimitiveLiteral::Int(self.bucket_bytes(v.as_ref())),
))),
Literal::Primitive(PrimitiveLiteral::Fixed(v)) => Ok(Some(Literal::Primitive(
PrimitiveLiteral::Int(self.bucket_bytes(v.as_ref())),
))),
_ => unreachable!("Unsupported literal: {:?}", input),
}
fn transform_literal(&self, input: &Datum) -> crate::Result<Option<Datum>> {
let val = match input.literal() {
PrimitiveLiteral::Int(v) => self.bucket_int(*v),
PrimitiveLiteral::Long(v) => self.bucket_long(*v),
PrimitiveLiteral::Decimal(v) => self.bucket_decimal(*v),
PrimitiveLiteral::Date(v) => self.bucket_date(*v),
PrimitiveLiteral::Time(v) => self.bucket_time(*v),
PrimitiveLiteral::Timestamp(v) => self.bucket_timestamp(*v),
PrimitiveLiteral::String(v) => self.bucket_str(v.as_str()),
PrimitiveLiteral::UUID(v) => self.bucket_bytes(v.as_ref()),
PrimitiveLiteral::Binary(v) => self.bucket_bytes(v.as_ref()),
PrimitiveLiteral::Fixed(v) => self.bucket_bytes(v.as_ref()),
_ => {
return Err(crate::Error::new(
crate::ErrorKind::FeatureUnsupported,
format!(
"Unsupported data type for bucket transform: {:?}",
input.data_type()
),
))
}
};
Ok(Some(Datum::int(val)))
}
}

#[cfg(test)]
mod test {
use chrono::{DateTime, NaiveDate, NaiveDateTime, NaiveTime};

use crate::transform::TransformFunction;
use crate::{spec::Datum, transform::TransformFunction};

use super::Bucket;
#[test]
fn t() {
let bucket = Bucket::new(10);
println!("{}", bucket.bucket_n(-653330422));
}
#[test]
fn test_hash() {
// test int
assert_eq!(Bucket::hash_int(34), 2017239379);
Expand Down Expand Up @@ -341,27 +333,17 @@ mod test {
fn test_int_literal() {
let bucket = Bucket::new(10);
assert_eq!(
bucket
.transform_literal(&crate::spec::Literal::Primitive(
crate::spec::PrimitiveLiteral::Int(34)
))
.unwrap()
.unwrap(),
crate::spec::Literal::Primitive(crate::spec::PrimitiveLiteral::Int(9))
bucket.transform_literal(&Datum::int(34)).unwrap().unwrap(),
Datum::int(9)
);
}

#[test]
fn test_long_literal() {
let bucket = Bucket::new(10);
assert_eq!(
bucket
.transform_literal(&crate::spec::Literal::Primitive(
crate::spec::PrimitiveLiteral::Long(34)
))
.unwrap()
.unwrap(),
crate::spec::Literal::Primitive(crate::spec::PrimitiveLiteral::Int(9))
bucket.transform_literal(&Datum::long(34)).unwrap().unwrap(),
Datum::int(9)
);
}

Expand All @@ -370,12 +352,10 @@ mod test {
let bucket = Bucket::new(10);
assert_eq!(
bucket
.transform_literal(&crate::spec::Literal::Primitive(
crate::spec::PrimitiveLiteral::Decimal(1420)
))
.transform_literal(&Datum::decimal(1420).unwrap())
.unwrap()
.unwrap(),
crate::spec::Literal::Primitive(crate::spec::PrimitiveLiteral::Int(9))
Datum::int(9)
);
}

Expand All @@ -384,12 +364,10 @@ mod test {
let bucket = Bucket::new(100);
assert_eq!(
bucket
.transform_literal(&crate::spec::Literal::Primitive(
crate::spec::PrimitiveLiteral::Date(17486)
))
.transform_literal(&Datum::date(17486))
.unwrap()
.unwrap(),
crate::spec::Literal::Primitive(crate::spec::PrimitiveLiteral::Int(26))
Datum::int(26)
);
}

Expand All @@ -398,12 +376,10 @@ mod test {
let bucket = Bucket::new(100);
assert_eq!(
bucket
.transform_literal(&crate::spec::Literal::Primitive(
crate::spec::PrimitiveLiteral::Time(81068000000)
))
.transform_literal(&Datum::time_micros(81068000000).unwrap())
.unwrap()
.unwrap(),
crate::spec::Literal::Primitive(crate::spec::PrimitiveLiteral::Int(59))
Datum::int(59)
);
}

Expand All @@ -412,12 +388,10 @@ mod test {
let bucket = Bucket::new(100);
assert_eq!(
bucket
.transform_literal(&crate::spec::Literal::Primitive(
crate::spec::PrimitiveLiteral::Timestamp(1510871468000000)
))
.transform_literal(&Datum::timestamp_micros(1510871468000000))
.unwrap()
.unwrap(),
crate::spec::Literal::Primitive(crate::spec::PrimitiveLiteral::Int(7))
Datum::int(7)
);
}

Expand All @@ -426,12 +400,10 @@ mod test {
let bucket = Bucket::new(100);
assert_eq!(
bucket
.transform_literal(&crate::spec::Literal::Primitive(
crate::spec::PrimitiveLiteral::String("iceberg".to_string())
))
.transform_literal(&Datum::string("iceberg"))
.unwrap()
.unwrap(),
crate::spec::Literal::Primitive(crate::spec::PrimitiveLiteral::Int(89))
Datum::int(89)
);
}

Expand All @@ -440,14 +412,12 @@ mod test {
let bucket = Bucket::new(100);
assert_eq!(
bucket
.transform_literal(&crate::spec::Literal::Primitive(
crate::spec::PrimitiveLiteral::UUID(
"F79C3E09-677C-4BBD-A479-3F349CB785E7".parse().unwrap()
)
.transform_literal(&Datum::uuid(
"F79C3E09-677C-4BBD-A479-3F349CB785E7".parse().unwrap()
))
.unwrap()
.unwrap(),
crate::spec::Literal::Primitive(crate::spec::PrimitiveLiteral::Int(40))
Datum::int(40)
);
}

Expand All @@ -456,12 +426,10 @@ mod test {
let bucket = Bucket::new(128);
assert_eq!(
bucket
.transform_literal(&crate::spec::Literal::Primitive(
crate::spec::PrimitiveLiteral::Binary(b"\x00\x01\x02\x03".to_vec())
))
.transform_literal(&Datum::binary(b"\x00\x01\x02\x03".to_vec()))
.unwrap()
.unwrap(),
crate::spec::Literal::Primitive(crate::spec::PrimitiveLiteral::Int(57))
Datum::int(57)
);
}

Expand All @@ -470,12 +438,10 @@ mod test {
let bucket = Bucket::new(128);
assert_eq!(
bucket
.transform_literal(&crate::spec::Literal::Primitive(
crate::spec::PrimitiveLiteral::Fixed(b"foo".to_vec())
))
.transform_literal(&Datum::fixed(b"foo".to_vec()))
.unwrap()
.unwrap(),
crate::spec::Literal::Primitive(crate::spec::PrimitiveLiteral::Int(32))
Datum::int(32)
);
}
}
5 changes: 1 addition & 4 deletions crates/iceberg/src/transform/identity.rs
Original file line number Diff line number Diff line change
Expand Up @@ -29,10 +29,7 @@ impl TransformFunction for Identity {
Ok(input)
}

fn transform_literal(
&self,
input: &crate::spec::Literal,
) -> Result<Option<crate::spec::Literal>> {
fn transform_literal(&self, input: &crate::spec::Datum) -> Result<Option<crate::spec::Datum>> {
Ok(Some(input.clone()))
}
}
4 changes: 2 additions & 2 deletions crates/iceberg/src/transform/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@

//! Transform function used to compute partition values.
use crate::{
spec::{Literal, Transform},
spec::{Datum, Transform},
Result,
};
use arrow_array::ArrayRef;
Expand All @@ -35,7 +35,7 @@ pub trait TransformFunction: Send {
/// type.
fn transform(&self, input: ArrayRef) -> Result<ArrayRef>;
/// transform_literal will take an input literal and transform it into a new literal.
fn transform_literal(&self, input: &Literal) -> Result<Option<Literal>>;
fn transform_literal(&self, input: &Datum) -> Result<Option<Datum>>;
}

/// BoxedTransformFunction is a boxed trait object of TransformFunction.
Expand Down
Loading

0 comments on commit eda5787

Please sign in to comment.