From 3c86956a1f08ffea2570918844dd2671ce4faa57 Mon Sep 17 00:00:00 2001 From: shekharrajak Date: Sun, 10 May 2026 11:14:07 +0530 Subject: [PATCH 1/8] feat: add PrimitiveType::Variant to iceberg spec --- crates/iceberg/src/arrow/schema.rs | 5 ++++ crates/iceberg/src/arrow/value.rs | 4 +++ crates/iceberg/src/avro/schema.rs | 1 + crates/iceberg/src/spec/datatypes.rs | 33 ++++++++++++++++++++++++- crates/iceberg/src/spec/values/datum.rs | 1 + 5 files changed, 43 insertions(+), 1 deletion(-) diff --git a/crates/iceberg/src/arrow/schema.rs b/crates/iceberg/src/arrow/schema.rs index 9b504421ae..68fc288ce1 100644 --- a/crates/iceberg/src/arrow/schema.rs +++ b/crates/iceberg/src/arrow/schema.rs @@ -690,6 +690,10 @@ impl SchemaVisitor for ToArrowSchemaConverter { crate::spec::PrimitiveType::Binary => { Ok(ArrowSchemaOrFieldOrType::Type(DataType::LargeBinary)) } + crate::spec::PrimitiveType::Variant => Err(crate::Error::new( + crate::ErrorKind::FeatureUnsupported, + "Arrow schema conversion for Variant is not yet implemented", + )), } } } @@ -1131,6 +1135,7 @@ pub fn datum_to_arrow_type_with_ree(datum: &Datum) -> DataType { PrimitiveType::Uuid => make_ree(DataType::Binary), PrimitiveType::Fixed(_) => make_ree(DataType::Binary), PrimitiveType::Binary => make_ree(DataType::Binary), + PrimitiveType::Variant => make_ree(DataType::Binary), PrimitiveType::Decimal { precision, scale } => { make_ree(DataType::Decimal128(*precision as u8, *scale as i8)) } diff --git a/crates/iceberg/src/arrow/value.rs b/crates/iceberg/src/arrow/value.rs index d07233c420..e349af2392 100644 --- a/crates/iceberg/src/arrow/value.rs +++ b/crates/iceberg/src/arrow/value.rs @@ -424,6 +424,10 @@ impl SchemaWithPartnerVisitor for ArrowArrayToIcebergStructConverter { )) } } + PrimitiveType::Variant => Err(Error::new( + ErrorKind::FeatureUnsupported, + "Arrow value extraction for Variant is not yet implemented", + )), } } } diff --git a/crates/iceberg/src/avro/schema.rs b/crates/iceberg/src/avro/schema.rs index fdbc680977..dbe70a482f 100644 --- a/crates/iceberg/src/avro/schema.rs +++ b/crates/iceberg/src/avro/schema.rs @@ -237,6 +237,7 @@ impl SchemaVisitor for SchemaToAvroSchema { PrimitiveType::Uuid => AvroSchema::Uuid, PrimitiveType::Fixed(len) => avro_fixed_schema((*len) as usize)?, PrimitiveType::Binary => AvroSchema::Bytes, + PrimitiveType::Variant => AvroSchema::Bytes, PrimitiveType::Decimal { precision, scale } => { avro_decimal_schema(*precision as usize, *scale as usize)? } diff --git a/crates/iceberg/src/spec/datatypes.rs b/crates/iceberg/src/spec/datatypes.rs index ad4aea758f..ecf8ceb0a9 100644 --- a/crates/iceberg/src/spec/datatypes.rs +++ b/crates/iceberg/src/spec/datatypes.rs @@ -247,6 +247,8 @@ pub enum PrimitiveType { Fixed(u64), /// Arbitrary-length byte array. Binary, + /// Semi-structured data type (Iceberg spec v3). Stored in Parquet as `LogicalType::Variant`. + Variant, } impl PrimitiveType { @@ -382,6 +384,7 @@ impl fmt::Display for PrimitiveType { PrimitiveType::Uuid => write!(f, "uuid"), PrimitiveType::Fixed(size) => write!(f, "fixed({size})"), PrimitiveType::Binary => write!(f, "binary"), + PrimitiveType::Variant => write!(f, "variant"), } } } @@ -884,7 +887,8 @@ mod tests { {"id": 13, "name": "uuid_field", "required": true, "type": "uuid"}, {"id": 14, "name": "fixed_field", "required": true, "type": "fixed[10]"}, {"id": 15, "name": "binary_field", "required": true, "type": "binary"}, - {"id": 16, "name": "string_field", "required": true, "type": "string"} + {"id": 16, "name": "string_field", "required": true, "type": "string"}, + {"id": 17, "name": "variant_field", "required": false, "type": "variant"} ] } "#; @@ -964,6 +968,12 @@ mod tests { Type::Primitive(PrimitiveType::String), ) .into(), + NestedField::optional( + 17, + "variant_field", + Type::Primitive(PrimitiveType::Variant), + ) + .into(), ], id_lookup: OnceLock::default(), name_lookup: OnceLock::default(), @@ -1320,4 +1330,25 @@ mod tests { .contains("expected type 'struct'") ); } + + #[test] + fn variant_type_display() { + assert_eq!(PrimitiveType::Variant.to_string(), "variant"); + } + + #[test] + fn variant_type_serde() { + let json = r#"{"id": 1, "name": "v", "required": false, "type": "variant"}"#; + let field: NestedField = serde_json::from_str(json).unwrap(); + assert_eq!(*field.field_type, Type::Primitive(PrimitiveType::Variant)); + let serialized = serde_json::to_string(&field).unwrap(); + assert!(serialized.contains("\"variant\"")); + } + + #[test] + fn variant_type_not_compatible_with_literals() { + assert!(!PrimitiveType::Variant.compatible(&PrimitiveLiteral::Boolean(true))); + assert!(!PrimitiveType::Variant.compatible(&PrimitiveLiteral::Int(0))); + assert!(!PrimitiveType::Variant.compatible(&PrimitiveLiteral::Binary(vec![]))); + } } diff --git a/crates/iceberg/src/spec/values/datum.rs b/crates/iceberg/src/spec/values/datum.rs index 68ea6b3d46..46a783a770 100644 --- a/crates/iceberg/src/spec/values/datum.rs +++ b/crates/iceberg/src/spec/values/datum.rs @@ -419,6 +419,7 @@ impl Datum { } PrimitiveType::Fixed(_) => PrimitiveLiteral::Binary(Vec::from(bytes)), PrimitiveType::Binary => PrimitiveLiteral::Binary(Vec::from(bytes)), + PrimitiveType::Variant => PrimitiveLiteral::Binary(Vec::from(bytes)), PrimitiveType::Decimal { .. } => { PrimitiveLiteral::Int128(i128_from_be_bytes(bytes).ok_or_else(|| { Error::new( From 87a93a241012f6ca797e7c6ca911e1aaebc29ed2 Mon Sep 17 00:00:00 2001 From: shekharrajak Date: Sun, 10 May 2026 12:07:35 +0530 Subject: [PATCH 2/8] feat: add Variant to HMS and Glue schema converters --- crates/catalog/glue/src/schema.rs | 6 ++++++ crates/catalog/hms/src/schema.rs | 6 ++++++ 2 files changed, 12 insertions(+) diff --git a/crates/catalog/glue/src/schema.rs b/crates/catalog/glue/src/schema.rs index 864320dae4..4f5c1f664a 100644 --- a/crates/catalog/glue/src/schema.rs +++ b/crates/catalog/glue/src/schema.rs @@ -178,6 +178,12 @@ impl SchemaVisitor for GlueSchemaBuilder { PrimitiveType::Decimal { precision, scale } => { format!("decimal({precision},{scale})") } + PrimitiveType::Variant => { + return Err(Error::new( + ErrorKind::FeatureUnsupported, + "Conversion from Variant to Glue type is not supported", + )); + } }; Ok(glue_type) diff --git a/crates/catalog/hms/src/schema.rs b/crates/catalog/hms/src/schema.rs index c23b48719d..f48d163b30 100644 --- a/crates/catalog/hms/src/schema.rs +++ b/crates/catalog/hms/src/schema.rs @@ -135,6 +135,12 @@ impl SchemaVisitor for HiveSchemaBuilder { PrimitiveType::Decimal { precision, scale } => { format!("decimal({precision},{scale})") } + PrimitiveType::Variant => { + return Err(Error::new( + ErrorKind::FeatureUnsupported, + "Conversion from Variant to Hive type is not supported", + )); + } }; Ok(hive_type) From c668923e1b38ae1a784a269a5d1e3a8eead26b9d Mon Sep 17 00:00:00 2001 From: shekharrajak Date: Mon, 18 May 2026 23:54:12 +0530 Subject: [PATCH 3/8] feat(spec): lift Variant to top-level Type::Variant(VariantType) --- crates/iceberg/src/spec/datatypes.rs | 101 +++++++++++++++++++-------- 1 file changed, 71 insertions(+), 30 deletions(-) diff --git a/crates/iceberg/src/spec/datatypes.rs b/crates/iceberg/src/spec/datatypes.rs index ecf8ceb0a9..13f056c8c0 100644 --- a/crates/iceberg/src/spec/datatypes.rs +++ b/crates/iceberg/src/spec/datatypes.rs @@ -80,7 +80,7 @@ mod _decimal { } #[derive(Debug, PartialEq, Eq, Clone)] -/// All data types are either primitives or nested types, which are maps, lists, or structs. +/// All data types are either primitives, nested types (maps, lists, structs), or variant. pub enum Type { /// Primitive types Primitive(PrimitiveType), @@ -90,6 +90,8 @@ pub enum Type { List(ListType), /// Map type Map(MapType), + /// Variant type (Iceberg v3): semi-structured data carried as a pair of binary blobs. + Variant(VariantType), } impl fmt::Display for Type { @@ -99,6 +101,7 @@ impl fmt::Display for Type { Type::Struct(s) => write!(f, "{s}"), Type::List(_) => write!(f, "list"), Type::Map(_) => write!(f, "map"), + Type::Variant(v) => write!(f, "{v}"), } } } @@ -122,6 +125,12 @@ impl Type { matches!(self, Type::Struct(_) | Type::List(_) | Type::Map(_)) } + /// Whether the type is variant type. + #[inline(always)] + pub fn is_variant(&self) -> bool { + matches!(self, Type::Variant(_)) + } + /// Convert Type to reference of PrimitiveType pub fn as_primitive_type(&self) -> Option<&PrimitiveType> { if let Type::Primitive(primitive_type) = self { @@ -131,6 +140,15 @@ impl Type { } } + /// Convert Type to reference of VariantType. + pub fn as_variant_type(&self) -> Option<&VariantType> { + if let Type::Variant(v) = self { + Some(v) + } else { + None + } + } + /// Convert Type to StructType pub fn to_struct_type(self) -> Option { if let Type::Struct(struct_type) = self { @@ -247,8 +265,6 @@ pub enum PrimitiveType { Fixed(u64), /// Arbitrary-length byte array. Binary, - /// Semi-structured data type (Iceberg spec v3). Stored in Parquet as `LogicalType::Variant`. - Variant, } impl PrimitiveType { @@ -384,7 +400,6 @@ impl fmt::Display for PrimitiveType { PrimitiveType::Uuid => write!(f, "uuid"), PrimitiveType::Fixed(size) => write!(f, "fixed({size})"), PrimitiveType::Binary => write!(f, "binary"), - PrimitiveType::Variant => write!(f, "variant"), } } } @@ -713,6 +728,7 @@ pub(super) mod _serde { use crate::spec::datatypes::Type::Map; use crate::spec::datatypes::{ ListType, MapType, NestedField, NestedFieldRef, PrimitiveType, StructType, Type, + VariantType, }; /// List type for serialization and deserialization @@ -740,6 +756,7 @@ pub(super) mod _serde { value: Cow<'a, Type>, }, Primitive(PrimitiveType), + Variant(VariantType), } impl From> for Type { @@ -778,6 +795,7 @@ pub(super) mod _serde { Self::Struct(StructType::new(fields.into_owned())) } SerdeType::Primitive(p) => Self::Primitive(p), + SerdeType::Variant(v) => Self::Variant(v), } } } @@ -804,6 +822,7 @@ pub(super) mod _serde { fields: Cow::Borrowed(&s.fields), }, Type::Primitive(p) => SerdeType::Primitive(p.clone()), + Type::Variant(v) => SerdeType::Variant(*v), } } } @@ -847,6 +866,53 @@ impl MapType { } } +/// Variant type (Iceberg spec v3). +/// +/// Semi-structured value carried as a pair of binary blobs (metadata + value) +/// per the parquet-format Variant encoding. Sits at the top level of `Type` +/// (not inside `PrimitiveType`), matching Java `Types.VariantType implements Type`. +#[derive(Debug, PartialEq, Eq, Clone, Copy, Hash, Default)] +pub struct VariantType; + +impl VariantType { + /// Canonical spec name. + pub const NAME: &'static str = "variant"; +} + +impl fmt::Display for VariantType { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "{}", Self::NAME) + } +} + +impl From for Type { + fn from(_: VariantType) -> Self { + Type::Variant(VariantType) + } +} + +impl Serialize for VariantType { + fn serialize(&self, serializer: S) -> std::result::Result + where S: Serializer { + serializer.serialize_str(Self::NAME) + } +} + +impl<'de> Deserialize<'de> for VariantType { + fn deserialize(deserializer: D) -> std::result::Result + where D: Deserializer<'de> { + let s = String::deserialize(deserializer)?; + if s == Self::NAME { + Ok(VariantType) + } else { + Err(serde::de::Error::custom(format!( + "expected type '{}', got '{s}'", + Self::NAME + ))) + } + } +} + #[cfg(test)] mod tests { use pretty_assertions::assert_eq; @@ -968,12 +1034,7 @@ mod tests { Type::Primitive(PrimitiveType::String), ) .into(), - NestedField::optional( - 17, - "variant_field", - Type::Primitive(PrimitiveType::Variant), - ) - .into(), + NestedField::optional(17, "variant_field", Type::Variant(VariantType)).into(), ], id_lookup: OnceLock::default(), name_lookup: OnceLock::default(), @@ -1331,24 +1392,4 @@ mod tests { ); } - #[test] - fn variant_type_display() { - assert_eq!(PrimitiveType::Variant.to_string(), "variant"); - } - - #[test] - fn variant_type_serde() { - let json = r#"{"id": 1, "name": "v", "required": false, "type": "variant"}"#; - let field: NestedField = serde_json::from_str(json).unwrap(); - assert_eq!(*field.field_type, Type::Primitive(PrimitiveType::Variant)); - let serialized = serde_json::to_string(&field).unwrap(); - assert!(serialized.contains("\"variant\"")); - } - - #[test] - fn variant_type_not_compatible_with_literals() { - assert!(!PrimitiveType::Variant.compatible(&PrimitiveLiteral::Boolean(true))); - assert!(!PrimitiveType::Variant.compatible(&PrimitiveLiteral::Int(0))); - assert!(!PrimitiveType::Variant.compatible(&PrimitiveLiteral::Binary(vec![]))); - } } From 5aee5458d292700b2cb4ead5b9330d49818eee34 Mon Sep 17 00:00:00 2001 From: shekharrajak Date: Mon, 18 May 2026 23:56:33 +0530 Subject: [PATCH 4/8] feat(spec/schema): add Variant arm to SchemaVisitor and built-in visitors --- crates/iceberg/src/spec/schema/id_reassigner.rs | 1 + crates/iceberg/src/spec/schema/index.rs | 12 ++++++++++++ crates/iceberg/src/spec/schema/mod.rs | 2 +- crates/iceberg/src/spec/schema/prune_columns.rs | 4 ++++ crates/iceberg/src/spec/schema/visitor.rs | 16 ++++++++++++++++ 5 files changed, 34 insertions(+), 1 deletion(-) diff --git a/crates/iceberg/src/spec/schema/id_reassigner.rs b/crates/iceberg/src/spec/schema/id_reassigner.rs index 5dbb370001..d2ec59b825 100644 --- a/crates/iceberg/src/spec/schema/id_reassigner.rs +++ b/crates/iceberg/src/spec/schema/id_reassigner.rs @@ -66,6 +66,7 @@ impl ReassignFieldIds { fn reassign_ids_visit_type(&mut self, field_type: Type) -> Result { match field_type { Type::Primitive(s) => Ok(Type::Primitive(s)), + Type::Variant(v) => Ok(Type::Variant(v)), Type::Struct(s) => { let new_fields = self.reassign_field_ids(s.fields().to_vec())?; Ok(Type::Struct(StructType::new(new_fields))) diff --git a/crates/iceberg/src/spec/schema/index.rs b/crates/iceberg/src/spec/schema/index.rs index d4e77ab2aa..026342325a 100644 --- a/crates/iceberg/src/spec/schema/index.rs +++ b/crates/iceberg/src/spec/schema/index.rs @@ -53,6 +53,10 @@ pub fn index_by_id(r#struct: &StructType) -> Result fn primitive(&mut self, _: &PrimitiveType) -> Result { Ok(()) } + + fn variant(&mut self, _: &VariantType) -> Result { + Ok(()) + } } let mut index = IndexById(HashMap::new()); @@ -145,6 +149,10 @@ pub fn index_parents(r#struct: &StructType) -> Result> { fn primitive(&mut self, _p: &PrimitiveType) -> Result { Ok(()) } + + fn variant(&mut self, _v: &VariantType) -> Result { + Ok(()) + } } let mut index = IndexByParent { @@ -293,6 +301,10 @@ impl SchemaVisitor for IndexByName { fn primitive(&mut self, _p: &PrimitiveType) -> Result { Ok(()) } + + fn variant(&mut self, _v: &VariantType) -> Result { + Ok(()) + } } #[cfg(test)] diff --git a/crates/iceberg/src/spec/schema/mod.rs b/crates/iceberg/src/spec/schema/mod.rs index 9109990e19..6dededc3cc 100644 --- a/crates/iceberg/src/spec/schema/mod.rs +++ b/crates/iceberg/src/spec/schema/mod.rs @@ -41,7 +41,7 @@ use crate::error::Result; use crate::expr::accessor::StructAccessor; use crate::spec::datatypes::{ LIST_FIELD_NAME, ListType, MAP_KEY_FIELD_NAME, MAP_VALUE_FIELD_NAME, MapType, NestedFieldRef, - PrimitiveType, StructType, Type, + PrimitiveType, StructType, Type, VariantType, }; use crate::{Error, ErrorKind, ensure_data_valid}; diff --git a/crates/iceberg/src/spec/schema/prune_columns.rs b/crates/iceberg/src/spec/schema/prune_columns.rs index 14f1bfd25f..5ec8c07909 100644 --- a/crates/iceberg/src/spec/schema/prune_columns.rs +++ b/crates/iceberg/src/spec/schema/prune_columns.rs @@ -238,6 +238,10 @@ impl SchemaVisitor for PruneColumn { fn primitive(&mut self, _p: &PrimitiveType) -> Result> { Ok(None) } + + fn variant(&mut self, _v: &VariantType) -> Result> { + Ok(None) + } } #[cfg(test)] diff --git a/crates/iceberg/src/spec/schema/visitor.rs b/crates/iceberg/src/spec/schema/visitor.rs index 50f7c04caa..486b6f958e 100644 --- a/crates/iceberg/src/spec/schema/visitor.rs +++ b/crates/iceberg/src/spec/schema/visitor.rs @@ -69,12 +69,20 @@ pub trait SchemaVisitor { fn map(&mut self, map: &MapType, key_value: Self::T, value: Self::T) -> Result; /// Called when see a primitive type. fn primitive(&mut self, p: &PrimitiveType) -> Result; + /// Called when see a variant type. + fn variant(&mut self, _v: &VariantType) -> Result { + Err(Error::new( + ErrorKind::FeatureUnsupported, + "Variant type is not supported by this visitor", + )) + } } /// Visiting a type in post order. pub(crate) fn visit_type(r#type: &Type, visitor: &mut V) -> Result { match r#type { Type::Primitive(p) => visitor.primitive(p), + Type::Variant(v) => visitor.variant(v), Type::List(list) => { visitor.before_list_element(&list.element_field)?; let value = visit_type(&list.element_field.field_type, visitor)?; @@ -185,6 +193,13 @@ pub trait SchemaWithPartnerVisitor

{ ) -> Result; /// Called when see a primitive type. fn primitive(&mut self, p: &PrimitiveType, partner: &P) -> Result; + /// Called when see a variant type. + fn variant(&mut self, _v: &VariantType, _partner: &P) -> Result { + Err(Error::new( + ErrorKind::FeatureUnsupported, + "Variant type is not supported by this visitor", + )) + } } /// Accessor used to get child partner from parent partner. @@ -210,6 +225,7 @@ pub(crate) fn visit_type_with_partner, A: Part ) -> Result { match r#type { Type::Primitive(p) => visitor.primitive(p, partner), + Type::Variant(v) => visitor.variant(v, partner), Type::List(list) => { let list_element_partner = accessor.list_element_partner(partner)?; visitor.before_list_element(&list.element_field, list_element_partner)?; From 43e4ba99b1214b0de79231d9bec7d7249d104a21 Mon Sep 17 00:00:00 2001 From: shekharrajak Date: Mon, 18 May 2026 23:57:12 +0530 Subject: [PATCH 5/8] feat(spec/values): drop Variant Datum arm, error on Variant JSON single-value --- crates/iceberg/src/spec/values/datum.rs | 1 - crates/iceberg/src/spec/values/literal.rs | 8 ++++++++ 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/crates/iceberg/src/spec/values/datum.rs b/crates/iceberg/src/spec/values/datum.rs index 46a783a770..68ea6b3d46 100644 --- a/crates/iceberg/src/spec/values/datum.rs +++ b/crates/iceberg/src/spec/values/datum.rs @@ -419,7 +419,6 @@ impl Datum { } PrimitiveType::Fixed(_) => PrimitiveLiteral::Binary(Vec::from(bytes)), PrimitiveType::Binary => PrimitiveLiteral::Binary(Vec::from(bytes)), - PrimitiveType::Variant => PrimitiveLiteral::Binary(Vec::from(bytes)), PrimitiveType::Decimal { .. } => { PrimitiveLiteral::Int128(i128_from_be_bytes(bytes).ok_or_else(|| { Error::new( diff --git a/crates/iceberg/src/spec/values/literal.rs b/crates/iceberg/src/spec/values/literal.rs index e82fa197cd..a16c11d3da 100644 --- a/crates/iceberg/src/spec/values/literal.rs +++ b/crates/iceberg/src/spec/values/literal.rs @@ -560,6 +560,10 @@ impl Literal { )) } } + Type::Variant(_) => Err(Error::new( + ErrorKind::DataInvalid, + "Variant type is not supported for single-value JSON deserialization", + )), Type::Map(map) => { if let JsonValue::Object(mut object) = value { if let (Some(JsonValue::Array(keys)), Some(JsonValue::Array(values))) = @@ -718,6 +722,10 @@ impl Literal { object.insert("values".to_string(), JsonValue::Array(json_values)); Ok(JsonValue::Object(object)) } + (_, Type::Variant(_)) => Err(Error::new( + ErrorKind::DataInvalid, + "Variant type is not supported for single-value JSON serialization", + )), (value, r#type) => Err(Error::new( ErrorKind::DataInvalid, format!("The iceberg value {value:?} doesn't fit to the iceberg type {type}."), From 164e4f473d156e5d10d245d86ac19439029630cc Mon Sep 17 00:00:00 2001 From: shekharrajak Date: Tue, 19 May 2026 00:03:37 +0530 Subject: [PATCH 6/8] feat(arrow/avro/parquet): handle Type::Variant in schema visitors and type walks --- crates/iceberg/src/arrow/reader/projection.rs | 2 +- crates/iceberg/src/arrow/schema.rs | 15 +++++--- crates/iceberg/src/arrow/value.rs | 17 +++++++--- crates/iceberg/src/avro/schema.rs | 10 ++++-- .../iceberg/src/transaction/update_schema.rs | 4 +-- .../src/writer/file_writer/parquet_writer.rs | 34 +++++++++++-------- 6 files changed, 53 insertions(+), 29 deletions(-) diff --git a/crates/iceberg/src/arrow/reader/projection.rs b/crates/iceberg/src/arrow/reader/projection.rs index 2589c78366..fedb263171 100644 --- a/crates/iceberg/src/arrow/reader/projection.rs +++ b/crates/iceberg/src/arrow/reader/projection.rs @@ -61,7 +61,7 @@ impl ArrowReader { /// Nested types (struct/list/map) are flattened in Parquet's columnar format. fn include_leaf_field_id(field: &NestedField, field_ids: &mut Vec) { match field.field_type.as_ref() { - Type::Primitive(_) => { + Type::Primitive(_) | Type::Variant(_) => { field_ids.push(field.id); } Type::Struct(struct_type) => { diff --git a/crates/iceberg/src/arrow/schema.rs b/crates/iceberg/src/arrow/schema.rs index 68fc288ce1..6270c54969 100644 --- a/crates/iceberg/src/arrow/schema.rs +++ b/crates/iceberg/src/arrow/schema.rs @@ -690,12 +690,18 @@ impl SchemaVisitor for ToArrowSchemaConverter { crate::spec::PrimitiveType::Binary => { Ok(ArrowSchemaOrFieldOrType::Type(DataType::LargeBinary)) } - crate::spec::PrimitiveType::Variant => Err(crate::Error::new( - crate::ErrorKind::FeatureUnsupported, - "Arrow schema conversion for Variant is not yet implemented", - )), } } + + fn variant( + &mut self, + _v: &crate::spec::VariantType, + ) -> crate::Result { + Err(crate::Error::new( + crate::ErrorKind::FeatureUnsupported, + "Arrow schema conversion for Variant is not yet implemented", + )) + } } /// Convert iceberg schema to an arrow schema. @@ -1135,7 +1141,6 @@ pub fn datum_to_arrow_type_with_ree(datum: &Datum) -> DataType { PrimitiveType::Uuid => make_ree(DataType::Binary), PrimitiveType::Fixed(_) => make_ree(DataType::Binary), PrimitiveType::Binary => make_ree(DataType::Binary), - PrimitiveType::Variant => make_ree(DataType::Binary), PrimitiveType::Decimal { precision, scale } => { make_ree(DataType::Decimal128(*precision as u8, *scale as i8)) } diff --git a/crates/iceberg/src/arrow/value.rs b/crates/iceberg/src/arrow/value.rs index e349af2392..798d2ec13c 100644 --- a/crates/iceberg/src/arrow/value.rs +++ b/crates/iceberg/src/arrow/value.rs @@ -30,7 +30,7 @@ use uuid::Uuid; use super::get_field_id_from_metadata; use crate::spec::{ ListType, Literal, Map, MapType, NestedField, PartnerAccessor, PrimitiveLiteral, PrimitiveType, - SchemaWithPartnerVisitor, Struct, StructType, Type, visit_struct_with_partner, + SchemaWithPartnerVisitor, Struct, StructType, Type, VariantType, visit_struct_with_partner, visit_type_with_partner, }; use crate::{Error, ErrorKind, Result}; @@ -424,12 +424,19 @@ impl SchemaWithPartnerVisitor for ArrowArrayToIcebergStructConverter { )) } } - PrimitiveType::Variant => Err(Error::new( - ErrorKind::FeatureUnsupported, - "Arrow value extraction for Variant is not yet implemented", - )), } } + + fn variant( + &mut self, + _v: &VariantType, + _partner: &ArrayRef, + ) -> Result>> { + Err(Error::new( + ErrorKind::FeatureUnsupported, + "Arrow value extraction for Variant is not yet implemented", + )) + } } /// Defines how Arrow fields are matched with Iceberg fields when converting data. diff --git a/crates/iceberg/src/avro/schema.rs b/crates/iceberg/src/avro/schema.rs index dbe70a482f..2e0aacf4f0 100644 --- a/crates/iceberg/src/avro/schema.rs +++ b/crates/iceberg/src/avro/schema.rs @@ -28,7 +28,7 @@ use serde_json::{Number, Value}; use crate::spec::{ ListType, MapType, NestedField, NestedFieldRef, PrimitiveType, Schema, SchemaVisitor, - StructType, Type, visit_schema, + StructType, Type, VariantType, visit_schema, }; use crate::{Error, ErrorKind, Result, ensure_data_valid}; @@ -237,13 +237,19 @@ impl SchemaVisitor for SchemaToAvroSchema { PrimitiveType::Uuid => AvroSchema::Uuid, PrimitiveType::Fixed(len) => avro_fixed_schema((*len) as usize)?, PrimitiveType::Binary => AvroSchema::Bytes, - PrimitiveType::Variant => AvroSchema::Bytes, PrimitiveType::Decimal { precision, scale } => { avro_decimal_schema(*precision as usize, *scale as usize)? } }; Ok(Either::Left(avro_schema)) } + + fn variant(&mut self, _v: &VariantType) -> Result { + Err(Error::new( + ErrorKind::FeatureUnsupported, + "Avro schema conversion for Variant is not yet implemented", + )) + } } /// Converting iceberg schema to avro schema. diff --git a/crates/iceberg/src/transaction/update_schema.rs b/crates/iceberg/src/transaction/update_schema.rs index da843d9b9b..222dc9f6a4 100644 --- a/crates/iceberg/src/transaction/update_schema.rs +++ b/crates/iceberg/src/transaction/update_schema.rs @@ -180,7 +180,7 @@ fn assign_fresh_ids(field: &NestedField, next_id: &mut i32) -> NestedFieldRef { /// Recursively assign fresh field IDs to all nested fields within a `Type`. fn assign_fresh_ids_to_type(field_type: &Type, next_id: &mut i32) -> Type { match field_type { - Type::Primitive(_) => field_type.clone(), + Type::Primitive(_) | Type::Variant(_) => field_type.clone(), Type::Struct(struct_type) => { let new_fields: Vec = struct_type .fields() @@ -279,7 +279,7 @@ fn rebuild_field( delete_ids: &HashSet, ) -> NestedFieldRef { match field.field_type.as_ref() { - Type::Primitive(_) => field.clone(), + Type::Primitive(_) | Type::Variant(_) => field.clone(), Type::Struct(s) => { let new_fields = rebuild_fields(s.fields(), adds, delete_ids, Some(field.id)); Arc::new(NestedField { diff --git a/crates/iceberg/src/writer/file_writer/parquet_writer.rs b/crates/iceberg/src/writer/file_writer/parquet_writer.rs index 840d1a5f16..3ec8b4e03b 100644 --- a/crates/iceberg/src/writer/file_writer/parquet_writer.rs +++ b/crates/iceberg/src/writer/file_writer/parquet_writer.rs @@ -40,7 +40,7 @@ use crate::io::{FileIO, FileWrite, OutputFile}; use crate::spec::{ DataContentType, DataFileBuilder, DataFileFormat, Datum, ListType, Literal, MapType, NestedFieldRef, PartitionSpec, PrimitiveType, Schema, SchemaRef, SchemaVisitor, Struct, - StructType, TableMetadata, Type, visit_schema, + StructType, TableMetadata, Type, VariantType, visit_schema, }; use crate::transform::create_transform_function; use crate::writer::{CurrentFileStatus, DataFile}; @@ -113,6 +113,21 @@ impl IndexByParquetPathName { pub fn get(&self, name: &str) -> Option<&i32> { self.name_to_id.get(name) } + + fn insert_current_path(&mut self) -> Result<()> { + let full_name = self.field_names.iter().map(String::as_str).join("."); + let field_id = self.field_id; + if let Some(existing_field_id) = self.name_to_id.get(full_name.as_str()) { + return Err(Error::new( + ErrorKind::DataInvalid, + format!( + "Invalid schema: multiple fields for name {full_name}: {field_id} and {existing_field_id}" + ), + )); + } + self.name_to_id.insert(full_name, field_id); + Ok(()) + } } impl Default for IndexByParquetPathName { @@ -191,20 +206,11 @@ impl SchemaVisitor for IndexByParquetPathName { } fn primitive(&mut self, _p: &PrimitiveType) -> Result { - let full_name = self.field_names.iter().map(String::as_str).join("."); - let field_id = self.field_id; - if let Some(existing_field_id) = self.name_to_id.get(full_name.as_str()) { - return Err(Error::new( - ErrorKind::DataInvalid, - format!( - "Invalid schema: multiple fields for name {full_name}: {field_id} and {existing_field_id}" - ), - )); - } else { - self.name_to_id.insert(full_name, field_id); - } + self.insert_current_path() + } - Ok(()) + fn variant(&mut self, _v: &VariantType) -> Result { + self.insert_current_path() } } From aad72f4f7af510b43511ebbddf72574ecd59b539 Mon Sep 17 00:00:00 2001 From: shekharrajak Date: Tue, 19 May 2026 00:04:59 +0530 Subject: [PATCH 7/8] feat(catalog): reject Variant in HMS and Glue schema converters via variant() --- crates/catalog/glue/src/schema.rs | 13 +++++++------ crates/catalog/hms/src/schema.rs | 13 +++++++------ 2 files changed, 14 insertions(+), 12 deletions(-) diff --git a/crates/catalog/glue/src/schema.rs b/crates/catalog/glue/src/schema.rs index 4f5c1f664a..a03155514b 100644 --- a/crates/catalog/glue/src/schema.rs +++ b/crates/catalog/glue/src/schema.rs @@ -178,16 +178,17 @@ impl SchemaVisitor for GlueSchemaBuilder { PrimitiveType::Decimal { precision, scale } => { format!("decimal({precision},{scale})") } - PrimitiveType::Variant => { - return Err(Error::new( - ErrorKind::FeatureUnsupported, - "Conversion from Variant to Glue type is not supported", - )); - } }; Ok(glue_type) } + + fn variant(&mut self, _v: &iceberg::spec::VariantType) -> iceberg::Result { + Err(Error::new( + ErrorKind::FeatureUnsupported, + "Conversion from Variant to Glue type is not supported", + )) + } } #[cfg(test)] diff --git a/crates/catalog/hms/src/schema.rs b/crates/catalog/hms/src/schema.rs index f48d163b30..81beaa8e16 100644 --- a/crates/catalog/hms/src/schema.rs +++ b/crates/catalog/hms/src/schema.rs @@ -135,16 +135,17 @@ impl SchemaVisitor for HiveSchemaBuilder { PrimitiveType::Decimal { precision, scale } => { format!("decimal({precision},{scale})") } - PrimitiveType::Variant => { - return Err(Error::new( - ErrorKind::FeatureUnsupported, - "Conversion from Variant to Hive type is not supported", - )); - } }; Ok(hive_type) } + + fn variant(&mut self, _v: &iceberg::spec::VariantType) -> iceberg::Result { + Err(Error::new( + ErrorKind::FeatureUnsupported, + "Conversion from Variant to Hive type is not supported", + )) + } } #[cfg(test)] From 14e91d5775a13162673bcdb168cd657a1e2b86c9 Mon Sep 17 00:00:00 2001 From: shekharrajak Date: Tue, 19 May 2026 00:06:21 +0530 Subject: [PATCH 8/8] test(spec): structural tests for top-level Type::Variant(VariantType) --- crates/iceberg/src/arrow/value.rs | 6 +---- crates/iceberg/src/spec/datatypes.rs | 36 ++++++++++++++++++++++++---- 2 files changed, 33 insertions(+), 9 deletions(-) diff --git a/crates/iceberg/src/arrow/value.rs b/crates/iceberg/src/arrow/value.rs index 798d2ec13c..9b3eed8c52 100644 --- a/crates/iceberg/src/arrow/value.rs +++ b/crates/iceberg/src/arrow/value.rs @@ -427,11 +427,7 @@ impl SchemaWithPartnerVisitor for ArrowArrayToIcebergStructConverter { } } - fn variant( - &mut self, - _v: &VariantType, - _partner: &ArrayRef, - ) -> Result>> { + fn variant(&mut self, _v: &VariantType, _partner: &ArrayRef) -> Result>> { Err(Error::new( ErrorKind::FeatureUnsupported, "Arrow value extraction for Variant is not yet implemented", diff --git a/crates/iceberg/src/spec/datatypes.rs b/crates/iceberg/src/spec/datatypes.rs index 13f056c8c0..cca4814d18 100644 --- a/crates/iceberg/src/spec/datatypes.rs +++ b/crates/iceberg/src/spec/datatypes.rs @@ -867,10 +867,6 @@ impl MapType { } /// Variant type (Iceberg spec v3). -/// -/// Semi-structured value carried as a pair of binary blobs (metadata + value) -/// per the parquet-format Variant encoding. Sits at the top level of `Type` -/// (not inside `PrimitiveType`), matching Java `Types.VariantType implements Type`. #[derive(Debug, PartialEq, Eq, Clone, Copy, Hash, Default)] pub struct VariantType; @@ -1392,4 +1388,36 @@ mod tests { ); } + #[test] + fn variant_type_display() { + assert_eq!(VariantType.to_string(), "variant"); + assert_eq!(Type::Variant(VariantType).to_string(), "variant"); + } + + #[test] + fn variant_type_categories() { + let t = Type::Variant(VariantType); + assert!(!t.is_primitive()); + assert!(!t.is_nested()); + assert!(!t.is_struct()); + assert!(t.is_variant()); + assert!(t.as_primitive_type().is_none()); + assert_eq!(t.as_variant_type(), Some(&VariantType)); + } + + #[test] + fn variant_type_field_serde_round_trip() { + let json = r#"{"id":17,"name":"v","required":false,"type":"variant"}"#; + let field: NestedField = serde_json::from_str(json).unwrap(); + assert_eq!(*field.field_type, Type::Variant(VariantType)); + let serialized = serde_json::to_string(&field).unwrap(); + let reparsed: NestedField = serde_json::from_str(&serialized).unwrap(); + assert_eq!(field, reparsed); + } + + #[test] + fn variant_type_rejects_other_strings() { + let err = serde_json::from_str::("\"binary\"").unwrap_err(); + assert!(err.to_string().contains("expected type 'variant'")); + } }