Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions crates/catalog/glue/src/schema.rs
Original file line number Diff line number Diff line change
Expand Up @@ -182,6 +182,13 @@ impl SchemaVisitor for GlueSchemaBuilder {

Ok(glue_type)
}

fn variant(&mut self, _v: &iceberg::spec::VariantType) -> iceberg::Result<String> {
Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We will work on this in follow up PRs

Err(Error::new(
ErrorKind::FeatureUnsupported,
"Conversion from Variant to Glue type is not supported",
))
}
}

#[cfg(test)]
Expand Down
7 changes: 7 additions & 0 deletions crates/catalog/hms/src/schema.rs
Original file line number Diff line number Diff line change
Expand Up @@ -139,6 +139,13 @@ impl SchemaVisitor for HiveSchemaBuilder {

Ok(hive_type)
}

fn variant(&mut self, _v: &iceberg::spec::VariantType) -> iceberg::Result<String> {
Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We will work on this in follow up PRs

Err(Error::new(
ErrorKind::FeatureUnsupported,
"Conversion from Variant to Hive type is not supported",
))
}
}

#[cfg(test)]
Expand Down
2 changes: 1 addition & 1 deletion crates/iceberg/src/arrow/reader/projection.rs
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ impl ArrowReader {
/// Nested types (struct/list/map) are flattened in Parquet's columnar format.
fn include_leaf_field_id(field: &NestedField, field_ids: &mut Vec<i32>) {
match field.field_type.as_ref() {
Type::Primitive(_) => {
Type::Primitive(_) | Type::Variant(_) => {
field_ids.push(field.id);
}
Type::Struct(struct_type) => {
Expand Down
10 changes: 10 additions & 0 deletions crates/iceberg/src/arrow/schema.rs
Original file line number Diff line number Diff line change
Expand Up @@ -692,6 +692,16 @@ impl SchemaVisitor for ToArrowSchemaConverter {
}
}
}

fn variant(
&mut self,
_v: &crate::spec::VariantType,
) -> crate::Result<ArrowSchemaOrFieldOrType> {
Err(crate::Error::new(
crate::ErrorKind::FeatureUnsupported,
"Arrow schema conversion for Variant is not yet implemented",
Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We will work on this in follow up PRs

))
}
}

/// Convert iceberg schema to an arrow schema.
Expand Down
9 changes: 8 additions & 1 deletion crates/iceberg/src/arrow/value.rs
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ use uuid::Uuid;
use super::get_field_id_from_metadata;
use crate::spec::{
ListType, Literal, Map, MapType, NestedField, PartnerAccessor, PrimitiveLiteral, PrimitiveType,
SchemaWithPartnerVisitor, Struct, StructType, Type, visit_struct_with_partner,
SchemaWithPartnerVisitor, Struct, StructType, Type, VariantType, visit_struct_with_partner,
visit_type_with_partner,
};
use crate::{Error, ErrorKind, Result};
Expand Down Expand Up @@ -426,6 +426,13 @@ impl SchemaWithPartnerVisitor<ArrayRef> for ArrowArrayToIcebergStructConverter {
}
}
}

fn variant(&mut self, _v: &VariantType, _partner: &ArrayRef) -> Result<Vec<Option<Literal>>> {
Err(Error::new(
ErrorKind::FeatureUnsupported,
"Arrow value extraction for Variant is not yet implemented",
Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We will work on this in follow up PRs

))
}
}

/// Defines how Arrow fields are matched with Iceberg fields when converting data.
Expand Down
9 changes: 8 additions & 1 deletion crates/iceberg/src/avro/schema.rs
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ use serde_json::{Number, Value};

use crate::spec::{
ListType, MapType, NestedField, NestedFieldRef, PrimitiveType, Schema, SchemaVisitor,
StructType, Type, visit_schema,
StructType, Type, VariantType, visit_schema,
};
use crate::{Error, ErrorKind, Result, ensure_data_valid};

Expand Down Expand Up @@ -243,6 +243,13 @@ impl SchemaVisitor for SchemaToAvroSchema {
};
Ok(Either::Left(avro_schema))
}

fn variant(&mut self, _v: &VariantType) -> Result<AvroSchemaOrField> {
Err(Error::new(
ErrorKind::FeatureUnsupported,
"Avro schema conversion for Variant is not yet implemented",
))
}
}

/// Converting iceberg schema to avro schema.
Expand Down
104 changes: 102 additions & 2 deletions crates/iceberg/src/spec/datatypes.rs
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ mod _decimal {
}

#[derive(Debug, PartialEq, Eq, Clone)]
/// All data types are either primitives or nested types, which are maps, lists, or structs.
/// All data types are either primitives, nested types (maps, lists, structs), or variant.
pub enum Type {
/// Primitive types
Primitive(PrimitiveType),
Expand All @@ -90,6 +90,8 @@ pub enum Type {
List(ListType),
/// Map type
Map(MapType),
/// Variant type (Iceberg v3): semi-structured data carried as a pair of binary blobs.
Variant(VariantType),
}

impl fmt::Display for Type {
Expand All @@ -99,6 +101,7 @@ impl fmt::Display for Type {
Type::Struct(s) => write!(f, "{s}"),
Type::List(_) => write!(f, "list"),
Type::Map(_) => write!(f, "map"),
Type::Variant(v) => write!(f, "{v}"),
}
}
}
Expand All @@ -122,6 +125,12 @@ impl Type {
matches!(self, Type::Struct(_) | Type::List(_) | Type::Map(_))
}

/// Whether the type is variant type.
#[inline(always)]
pub fn is_variant(&self) -> bool {
matches!(self, Type::Variant(_))
}

/// Convert Type to reference of PrimitiveType
pub fn as_primitive_type(&self) -> Option<&PrimitiveType> {
if let Type::Primitive(primitive_type) = self {
Expand All @@ -131,6 +140,15 @@ impl Type {
}
}

/// Convert Type to reference of VariantType.
pub fn as_variant_type(&self) -> Option<&VariantType> {
if let Type::Variant(v) = self {
Some(v)
} else {
None
}
}

/// Convert Type to StructType
pub fn to_struct_type(self) -> Option<StructType> {
if let Type::Struct(struct_type) = self {
Expand Down Expand Up @@ -710,6 +728,7 @@ pub(super) mod _serde {
use crate::spec::datatypes::Type::Map;
use crate::spec::datatypes::{
ListType, MapType, NestedField, NestedFieldRef, PrimitiveType, StructType, Type,
VariantType,
};

/// List type for serialization and deserialization
Expand Down Expand Up @@ -737,6 +756,7 @@ pub(super) mod _serde {
value: Cow<'a, Type>,
},
Primitive(PrimitiveType),
Variant(VariantType),
}

impl From<SerdeType<'_>> for Type {
Expand Down Expand Up @@ -775,6 +795,7 @@ pub(super) mod _serde {
Self::Struct(StructType::new(fields.into_owned()))
}
SerdeType::Primitive(p) => Self::Primitive(p),
SerdeType::Variant(v) => Self::Variant(v),
}
}
}
Expand All @@ -801,6 +822,7 @@ pub(super) mod _serde {
fields: Cow::Borrowed(&s.fields),
},
Type::Primitive(p) => SerdeType::Primitive(p.clone()),
Type::Variant(v) => SerdeType::Variant(*v),
}
}
}
Expand Down Expand Up @@ -844,6 +866,49 @@ impl MapType {
}
}

/// Variant type (Iceberg spec v3).
#[derive(Debug, PartialEq, Eq, Clone, Copy, Hash, Default)]
pub struct VariantType;

impl VariantType {
/// Canonical spec name.
pub const NAME: &'static str = "variant";
}

impl fmt::Display for VariantType {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "{}", Self::NAME)
}
}

impl From<VariantType> for Type {
fn from(_: VariantType) -> Self {
Type::Variant(VariantType)
}
}

impl Serialize for VariantType {
fn serialize<S>(&self, serializer: S) -> std::result::Result<S::Ok, S::Error>
where S: Serializer {
serializer.serialize_str(Self::NAME)
}
}

impl<'de> Deserialize<'de> for VariantType {
fn deserialize<D>(deserializer: D) -> std::result::Result<Self, D::Error>
where D: Deserializer<'de> {
let s = String::deserialize(deserializer)?;
if s == Self::NAME {
Ok(VariantType)
} else {
Err(serde::de::Error::custom(format!(
"expected type '{}', got '{s}'",
Self::NAME
)))
}
}
}

#[cfg(test)]
mod tests {
use pretty_assertions::assert_eq;
Expand Down Expand Up @@ -884,7 +949,8 @@ mod tests {
{"id": 13, "name": "uuid_field", "required": true, "type": "uuid"},
{"id": 14, "name": "fixed_field", "required": true, "type": "fixed[10]"},
{"id": 15, "name": "binary_field", "required": true, "type": "binary"},
{"id": 16, "name": "string_field", "required": true, "type": "string"}
{"id": 16, "name": "string_field", "required": true, "type": "string"},
{"id": 17, "name": "variant_field", "required": false, "type": "variant"}
]
}
"#;
Expand Down Expand Up @@ -964,6 +1030,7 @@ mod tests {
Type::Primitive(PrimitiveType::String),
)
.into(),
NestedField::optional(17, "variant_field", Type::Variant(VariantType)).into(),
],
id_lookup: OnceLock::default(),
name_lookup: OnceLock::default(),
Expand Down Expand Up @@ -1320,4 +1387,37 @@ mod tests {
.contains("expected type 'struct'")
);
}

#[test]
fn variant_type_display() {
assert_eq!(VariantType.to_string(), "variant");
assert_eq!(Type::Variant(VariantType).to_string(), "variant");
}

#[test]
fn variant_type_categories() {
let t = Type::Variant(VariantType);
assert!(!t.is_primitive());
assert!(!t.is_nested());
assert!(!t.is_struct());
assert!(t.is_variant());
assert!(t.as_primitive_type().is_none());
assert_eq!(t.as_variant_type(), Some(&VariantType));
}

#[test]
fn variant_type_field_serde_round_trip() {
let json = r#"{"id":17,"name":"v","required":false,"type":"variant"}"#;
let field: NestedField = serde_json::from_str(json).unwrap();
assert_eq!(*field.field_type, Type::Variant(VariantType));
let serialized = serde_json::to_string(&field).unwrap();
let reparsed: NestedField = serde_json::from_str(&serialized).unwrap();
assert_eq!(field, reparsed);
}

#[test]
fn variant_type_rejects_other_strings() {
let err = serde_json::from_str::<VariantType>("\"binary\"").unwrap_err();
assert!(err.to_string().contains("expected type 'variant'"));
}
}
1 change: 1 addition & 0 deletions crates/iceberg/src/spec/schema/id_reassigner.rs
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,7 @@ impl ReassignFieldIds {
fn reassign_ids_visit_type(&mut self, field_type: Type) -> Result<Type> {
match field_type {
Type::Primitive(s) => Ok(Type::Primitive(s)),
Type::Variant(v) => Ok(Type::Variant(v)),
Type::Struct(s) => {
let new_fields = self.reassign_field_ids(s.fields().to_vec())?;
Ok(Type::Struct(StructType::new(new_fields)))
Expand Down
12 changes: 12 additions & 0 deletions crates/iceberg/src/spec/schema/index.rs
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,10 @@ pub fn index_by_id(r#struct: &StructType) -> Result<HashMap<i32, NestedFieldRef>
fn primitive(&mut self, _: &PrimitiveType) -> Result<Self::T> {
Ok(())
}

fn variant(&mut self, _: &VariantType) -> Result<Self::T> {
Ok(())
}
}

let mut index = IndexById(HashMap::new());
Expand Down Expand Up @@ -145,6 +149,10 @@ pub fn index_parents(r#struct: &StructType) -> Result<HashMap<i32, i32>> {
fn primitive(&mut self, _p: &PrimitiveType) -> Result<Self::T> {
Ok(())
}

fn variant(&mut self, _v: &VariantType) -> Result<Self::T> {
Ok(())
}
}

let mut index = IndexByParent {
Expand Down Expand Up @@ -293,6 +301,10 @@ impl SchemaVisitor for IndexByName {
fn primitive(&mut self, _p: &PrimitiveType) -> Result<Self::T> {
Ok(())
}

fn variant(&mut self, _v: &VariantType) -> Result<Self::T> {
Ok(())
}
}

#[cfg(test)]
Expand Down
2 changes: 1 addition & 1 deletion crates/iceberg/src/spec/schema/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ use crate::error::Result;
use crate::expr::accessor::StructAccessor;
use crate::spec::datatypes::{
LIST_FIELD_NAME, ListType, MAP_KEY_FIELD_NAME, MAP_VALUE_FIELD_NAME, MapType, NestedFieldRef,
PrimitiveType, StructType, Type,
PrimitiveType, StructType, Type, VariantType,
};
use crate::{Error, ErrorKind, ensure_data_valid};

Expand Down
4 changes: 4 additions & 0 deletions crates/iceberg/src/spec/schema/prune_columns.rs
Original file line number Diff line number Diff line change
Expand Up @@ -238,6 +238,10 @@ impl SchemaVisitor for PruneColumn {
fn primitive(&mut self, _p: &PrimitiveType) -> Result<Option<Type>> {
Ok(None)
}

fn variant(&mut self, _v: &VariantType) -> Result<Option<Type>> {
Ok(None)
}
}

#[cfg(test)]
Expand Down
16 changes: 16 additions & 0 deletions crates/iceberg/src/spec/schema/visitor.rs
Original file line number Diff line number Diff line change
Expand Up @@ -69,12 +69,20 @@ pub trait SchemaVisitor {
fn map(&mut self, map: &MapType, key_value: Self::T, value: Self::T) -> Result<Self::T>;
/// Called when see a primitive type.
fn primitive(&mut self, p: &PrimitiveType) -> Result<Self::T>;
/// Called when see a variant type.
fn variant(&mut self, _v: &VariantType) -> Result<Self::T> {
Err(Error::new(
ErrorKind::FeatureUnsupported,
"Variant type is not supported by this visitor",
))
}
}

/// Visiting a type in post order.
pub(crate) fn visit_type<V: SchemaVisitor>(r#type: &Type, visitor: &mut V) -> Result<V::T> {
match r#type {
Type::Primitive(p) => visitor.primitive(p),
Type::Variant(v) => visitor.variant(v),
Type::List(list) => {
visitor.before_list_element(&list.element_field)?;
let value = visit_type(&list.element_field.field_type, visitor)?;
Expand Down Expand Up @@ -185,6 +193,13 @@ pub trait SchemaWithPartnerVisitor<P> {
) -> Result<Self::T>;
/// Called when see a primitive type.
fn primitive(&mut self, p: &PrimitiveType, partner: &P) -> Result<Self::T>;
/// Called when see a variant type.
fn variant(&mut self, _v: &VariantType, _partner: &P) -> Result<Self::T> {
Err(Error::new(
ErrorKind::FeatureUnsupported,
"Variant type is not supported by this visitor",
))
}
}

/// Accessor used to get child partner from parent partner.
Expand All @@ -210,6 +225,7 @@ pub(crate) fn visit_type_with_partner<P, V: SchemaWithPartnerVisitor<P>, A: Part
) -> Result<V::T> {
match r#type {
Type::Primitive(p) => visitor.primitive(p, partner),
Type::Variant(v) => visitor.variant(v, partner),
Type::List(list) => {
let list_element_partner = accessor.list_element_partner(partner)?;
visitor.before_list_element(&list.element_field, list_element_partner)?;
Expand Down
Loading
Loading