From 2d47696a5f65e9fce10960d80b8bcba6cadbcb5c Mon Sep 17 00:00:00 2001 From: theirix Date: Sun, 19 Apr 2026 19:27:23 +0100 Subject: [PATCH 1/7] Use NativeType in get_example_types and information schema --- datafusion/catalog/src/information_schema.rs | 172 +++++++++++--- datafusion/expr-common/src/signature.rs | 230 +++++++------------ 2 files changed, 221 insertions(+), 181 deletions(-) diff --git a/datafusion/catalog/src/information_schema.rs b/datafusion/catalog/src/information_schema.rs index 34c677c3dd43e..53b9609c42bad 100644 --- a/datafusion/catalog/src/information_schema.rs +++ b/datafusion/catalog/src/information_schema.rs @@ -22,16 +22,17 @@ use crate::streaming::StreamingTable; use crate::{CatalogProviderList, SchemaProvider, TableProvider}; use arrow::array::builder::{BooleanBuilder, UInt8Builder}; +use arrow::datatypes::{Fields, TimeUnit, UnionFields, UnionMode}; use arrow::{ array::{StringBuilder, UInt64Builder}, datatypes::{DataType, Field, FieldRef, Schema, SchemaRef}, record_batch::RecordBatch, }; use async_trait::async_trait; -use datafusion_common::DataFusionError; use datafusion_common::config::{ConfigEntry, ConfigOptions}; use datafusion_common::error::Result; use datafusion_common::types::NativeType; +use datafusion_common::{DataFusionError, not_impl_err}; use datafusion_execution::TaskContext; use datafusion_execution::runtime_env::RuntimeEnv; use datafusion_expr::function::WindowUDFFieldArgs; @@ -411,6 +412,127 @@ impl InformationSchemaConfig { } } +/// Resolve a native type `NativeType` to `DataType` for use in the information schema +/// Since it is one-to-many, use the most representative type on tie +fn get_data_type_for_schema(native_type: &NativeType) -> Option { + match native_type { + NativeType::Null => Some(DataType::Null), + NativeType::Boolean => Some(DataType::Boolean), + NativeType::Int8 => Some(DataType::Int8), + NativeType::Int16 => Some(DataType::Int16), + NativeType::Int32 => Some(DataType::Int32), + NativeType::Int64 => Some(DataType::Int64), + NativeType::UInt8 => Some(DataType::UInt8), + NativeType::UInt16 => Some(DataType::UInt16), + NativeType::UInt32 => Some(DataType::UInt32), + NativeType::UInt64 => Some(DataType::UInt64), + NativeType::Float16 => Some(DataType::Float16), + NativeType::Float32 => Some(DataType::Float32), + NativeType::Float64 => Some(DataType::Float64), + NativeType::Date => Some(DataType::Date32), // A tie + NativeType::Binary => Some(DataType::Binary), // A tie + NativeType::String => Some(DataType::Utf8), // A tie + NativeType::Decimal(precision, scale) => { + Some(DataType::Decimal256(*precision, *scale)) // A tie, use the widest type + } + NativeType::Timestamp(time_unit, timezone) => { + Some(DataType::Timestamp(*time_unit, timezone.to_owned())) + } + NativeType::Time(TimeUnit::Second) => Some(DataType::Time32(TimeUnit::Second)), + NativeType::Time(TimeUnit::Millisecond) => { + Some(DataType::Time32(TimeUnit::Millisecond)) + } + NativeType::Time(TimeUnit::Microsecond) => { + Some(DataType::Time64(TimeUnit::Microsecond)) + } + NativeType::Time(TimeUnit::Nanosecond) => { + Some(DataType::Time64(TimeUnit::Nanosecond)) + } + NativeType::Duration(time_unit) => Some(DataType::Duration(*time_unit)), + NativeType::Interval(interval_unit) => Some(DataType::Interval(*interval_unit)), + NativeType::FixedSizeBinary(size) => Some(DataType::FixedSizeBinary(*size)), + NativeType::FixedSizeList(logical_field, size) => get_data_type_for_schema( + logical_field.logical_type.native(), + ) + .map(|child_dt| { + DataType::FixedSizeList( + Arc::new(Field::new( + logical_field.name.clone(), + child_dt, + logical_field.nullable, + )), + *size, + ) + }), + NativeType::List(logical_field) => get_data_type_for_schema( + logical_field.logical_type.native(), + ) + .map(|child_dt| { + // A tie, use List + DataType::List(Arc::new(Field::new( + logical_field.name.clone(), + child_dt, + logical_field.nullable, + ))) + }), + NativeType::Struct(logical_fields) => { + let fields = logical_fields + .iter() + .map(|logical_field| { + let dt = + get_data_type_for_schema(logical_field.logical_type.native())?; + Some(Arc::new(Field::new( + logical_field.name.clone(), + dt, + logical_field.nullable, + ))) + }) + .collect::>()?; + Some(DataType::Struct(fields)) + } + NativeType::Union(logical_fields) => { + let ids = logical_fields.iter().map(|(i, _)| *i).collect::>(); + let fields: Vec = logical_fields + .iter() + .map(|(_, logical_field)| { + let dt = + get_data_type_for_schema(logical_field.logical_type.native())?; + Some(Arc::new(Field::new( + logical_field.name.clone(), + dt, + logical_field.nullable, + ))) + }) + .collect::>>()?; + Some(DataType::Union( + UnionFields::try_new(ids, fields).ok()?, + UnionMode::Dense, + )) + } + NativeType::Map(logical_field) => get_data_type_for_schema( + logical_field.logical_type.native(), + ) + .map(|child_dt| { + DataType::Map( + Arc::new(Field::new( + logical_field.name.clone(), + child_dt, + logical_field.nullable, + )), + true, + ) + }), + } +} + +pub fn resolve_informational_field(idx: usize, t: &NativeType) -> Result { + if let Some(data_type) = get_data_type_for_schema(t) { + Ok(Arc::new(Field::new(format!("arg_{idx}"), data_type, true))) + } else { + not_impl_err!("No support in information schema for type: {}", t) + } +} + /// get the arguments and return types of a UDF /// returns a tuple of (arg_types, return_type) fn get_udf_args_and_return_types( @@ -421,16 +543,14 @@ fn get_udf_args_and_return_types( if arg_types.is_empty() { Ok(vec![(vec![], None)].into_iter().collect::>()) } else { - Ok(arg_types + arg_types .into_iter() .map(|arg_types| { - let arg_fields: Vec = arg_types + let arg_fields = arg_types .iter() .enumerate() - .map(|(i, t)| { - Arc::new(Field::new(format!("arg_{i}"), t.clone(), true)) - }) - .collect(); + .map(|(i, t)| resolve_informational_field(i, t)) + .collect::>>()?; let scalar_arguments = vec![None; arg_fields.len()]; let return_type = udf .return_field_from_args(ReturnFieldArgs { @@ -445,11 +565,11 @@ fn get_udf_args_and_return_types( .ok(); let arg_types = arg_types .into_iter() - .map(|t| remove_native_type_prefix(&NativeType::from(t))) + .map(|t| remove_native_type_prefix(&t)) .collect::>(); - (arg_types, return_type) + Ok((arg_types, return_type)) }) - .collect::>()) + .collect::>>() } } @@ -461,16 +581,14 @@ fn get_udaf_args_and_return_types( if arg_types.is_empty() { Ok(vec![(vec![], None)].into_iter().collect::>()) } else { - Ok(arg_types + arg_types .into_iter() .map(|arg_types| { - let arg_fields: Vec = arg_types + let arg_fields = arg_types .iter() .enumerate() - .map(|(i, t)| { - Arc::new(Field::new(format!("arg_{i}"), t.clone(), true)) - }) - .collect(); + .map(|(i, t)| resolve_informational_field(i, t)) + .collect::>>()?; let return_type = udaf .return_field(&arg_fields) .map(|f| { @@ -481,11 +599,11 @@ fn get_udaf_args_and_return_types( .ok(); let arg_types = arg_types .into_iter() - .map(|t| remove_native_type_prefix(&NativeType::from(t))) + .map(|t| remove_native_type_prefix(&t)) .collect::>(); - (arg_types, return_type) + Ok((arg_types, return_type)) }) - .collect::>()) + .collect::>>() } } @@ -497,16 +615,14 @@ fn get_udwf_args_and_return_types( if arg_types.is_empty() { Ok(vec![(vec![], None)].into_iter().collect::>()) } else { - Ok(arg_types + arg_types .into_iter() .map(|arg_types| { - let arg_fields: Vec = arg_types + let arg_fields = arg_types .iter() .enumerate() - .map(|(i, t)| { - Arc::new(Field::new(format!("arg_{i}"), t.clone(), true)) - }) - .collect(); + .map(|(i, t)| resolve_informational_field(i, t)) + .collect::>>()?; let return_type = udwf .field(WindowUDFFieldArgs::new(&arg_fields, udwf.name())) .map(|f| { @@ -517,11 +633,11 @@ fn get_udwf_args_and_return_types( .ok(); let arg_types = arg_types .into_iter() - .map(|t| remove_native_type_prefix(&NativeType::from(t))) + .map(|t| remove_native_type_prefix(&t)) .collect::>(); - (arg_types, return_type) + Ok((arg_types, return_type)) }) - .collect::>()) + .collect::>>() } } diff --git a/datafusion/expr-common/src/signature.rs b/datafusion/expr-common/src/signature.rs index 3e941f00c2ee3..b89e37170ac64 100644 --- a/datafusion/expr-common/src/signature.rs +++ b/datafusion/expr-common/src/signature.rs @@ -19,11 +19,9 @@ use std::fmt::Display; use std::hash::Hash; -use std::sync::Arc; use arrow::datatypes::{ - DECIMAL32_MAX_PRECISION, DECIMAL64_MAX_PRECISION, DECIMAL128_MAX_PRECISION, DataType, - Decimal128Type, DecimalType, Field, IntervalUnit, TimeUnit, + DECIMAL256_MAX_PRECISION, DECIMAL256_MAX_SCALE, DataType, IntervalUnit, TimeUnit, }; use datafusion_common::types::{LogicalType, LogicalTypeRef, NativeType}; use datafusion_common::utils::ListCoercion; @@ -416,43 +414,39 @@ impl TypeSignatureClass { /// /// This is used for `information_schema` and can be used to generate /// documentation or error messages. - fn get_example_types(&self) -> Vec { + fn get_example_types(&self) -> Vec { match self { - // TODO: might be too much info to return every single type here - // maybe https://github.com/apache/datafusion/issues/14761 will help here? TypeSignatureClass::Any => vec![], - TypeSignatureClass::Native(l) => get_data_types(l.native()), + TypeSignatureClass::Native(l) => vec![l.native().clone()], TypeSignatureClass::Timestamp => { vec![ - DataType::Timestamp(TimeUnit::Nanosecond, None), - DataType::Timestamp( + NativeType::Timestamp(TimeUnit::Nanosecond, None), + NativeType::Timestamp( TimeUnit::Nanosecond, Some(TIMEZONE_WILDCARD.into()), ), ] } TypeSignatureClass::Time => { - vec![DataType::Time64(TimeUnit::Nanosecond)] + vec![NativeType::Time(TimeUnit::Nanosecond)] } TypeSignatureClass::Interval => { - vec![DataType::Interval(IntervalUnit::DayTime)] + vec![NativeType::Interval(IntervalUnit::DayTime)] } TypeSignatureClass::Duration => { - vec![DataType::Duration(TimeUnit::Nanosecond)] + vec![NativeType::Duration(TimeUnit::Nanosecond)] } TypeSignatureClass::Integer => { - vec![DataType::Int64] + vec![NativeType::Int64] } TypeSignatureClass::Binary => { - vec![DataType::Binary] + vec![NativeType::Binary] + } + TypeSignatureClass::Decimal => vec![NATIVE_TYPE_DECIMAL], + TypeSignatureClass::Float => vec![NativeType::Float64], + TypeSignatureClass::Numeric => { + vec![NativeType::Float64, NativeType::Int64, NATIVE_TYPE_DECIMAL] } - TypeSignatureClass::Decimal => vec![Decimal128Type::DEFAULT_TYPE], - TypeSignatureClass::Float => vec![DataType::Float64], - TypeSignatureClass::Numeric => vec![ - DataType::Float64, - DataType::Int64, - Decimal128Type::DEFAULT_TYPE, - ], } } @@ -595,18 +589,25 @@ impl Display for ArrayFunctionArgument { } } -static NUMERICS: &[DataType] = &[ - DataType::Int8, - DataType::Int16, - DataType::Int32, - DataType::Int64, - DataType::UInt8, - DataType::UInt16, - DataType::UInt32, - DataType::UInt64, - DataType::Float16, - DataType::Float32, - DataType::Float64, +/// Constant that is used as a Decimal type for `get_example_types` +/// Use Decimal256 precision as a reasonable default +const NATIVE_TYPE_DECIMAL: NativeType = + NativeType::Decimal(DECIMAL256_MAX_PRECISION, DECIMAL256_MAX_SCALE); + +/// Native types for `get_example_types` +static EXAMPLE_NUMERIC_TYPES: &[NativeType] = &[ + NativeType::Int8, + NativeType::Int16, + NativeType::Int32, + NativeType::Int64, + NativeType::UInt8, + NativeType::UInt16, + NativeType::UInt32, + NativeType::UInt64, + NativeType::Float16, + NativeType::Float32, + NativeType::Float64, + NATIVE_TYPE_DECIMAL, ]; impl TypeSignature { @@ -881,19 +882,24 @@ impl TypeSignature { } #[deprecated(since = "46.0.0", note = "See get_example_types instead")] - pub fn get_possible_types(&self) -> Vec> { + pub fn get_possible_types(&self) -> Vec> { self.get_example_types() } /// Return example acceptable types for this `TypeSignature`' /// - /// Returns a `Vec` for each argument to the function + /// Returns a `Vec` for each argument to the function /// /// This is used for `information_schema` and can be used to generate /// documentation or error messages. - pub fn get_example_types(&self) -> Vec> { + pub fn get_example_types(&self) -> Vec> { match self { - TypeSignature::Exact(types) => vec![types.clone()], + TypeSignature::Exact(types) => vec![ + types + .iter() + .map(|data_type| NativeType::from(data_type.clone())) + .collect(), + ], TypeSignature::OneOf(types) => types .iter() .flat_map(|type_sig| type_sig.get_example_types()) @@ -901,16 +907,16 @@ impl TypeSignature { TypeSignature::Uniform(arg_count, types) => types .iter() .cloned() - .map(|data_type| vec![data_type; *arg_count]) + .map(|data_type| vec![data_type.into(); *arg_count]) .collect(), TypeSignature::Coercible(coercions) => coercions .iter() .map(|c| { - let mut all_types: IndexSet = + let mut all_types: IndexSet = c.desired_type().get_example_types().into_iter().collect(); if let Some(implicit_coercion) = c.implicit_coercion() { - let allowed_casts: Vec = implicit_coercion + let allowed_casts: Vec = implicit_coercion .allowed_source_types .iter() .flat_map(|t| t.get_example_types()) @@ -925,17 +931,16 @@ impl TypeSignature { TypeSignature::Variadic(types) => types .iter() .cloned() - .map(|data_type| vec![data_type]) + .map(|data_type| vec![data_type.into()]) .collect(), - TypeSignature::Numeric(arg_count) => NUMERICS + TypeSignature::Numeric(arg_count) => EXAMPLE_NUMERIC_TYPES .iter() .cloned() .map(|numeric_type| vec![numeric_type; *arg_count]) .collect(), - TypeSignature::String(arg_count) => get_data_types(&NativeType::String) - .into_iter() - .map(|dt| vec![dt; *arg_count]) - .collect::>(), + TypeSignature::String(arg_count) => { + vec![vec![NativeType::String; *arg_count]] + } // TODO: Implement for other types TypeSignature::Any(_) | TypeSignature::Comparable(_) @@ -947,83 +952,6 @@ impl TypeSignature { } } -fn get_data_types(native_type: &NativeType) -> Vec { - match native_type { - NativeType::Null => vec![DataType::Null], - NativeType::Boolean => vec![DataType::Boolean], - NativeType::Int8 => vec![DataType::Int8], - NativeType::Int16 => vec![DataType::Int16], - NativeType::Int32 => vec![DataType::Int32], - NativeType::Int64 => vec![DataType::Int64], - NativeType::UInt8 => vec![DataType::UInt8], - NativeType::UInt16 => vec![DataType::UInt16], - NativeType::UInt32 => vec![DataType::UInt32], - NativeType::UInt64 => vec![DataType::UInt64], - NativeType::Float16 => vec![DataType::Float16], - NativeType::Float32 => vec![DataType::Float32], - NativeType::Float64 => vec![DataType::Float64], - NativeType::Date => vec![DataType::Date32, DataType::Date64], - NativeType::Binary => vec![ - DataType::Binary, - DataType::LargeBinary, - DataType::BinaryView, - ], - NativeType::String => { - vec![DataType::Utf8, DataType::LargeUtf8, DataType::Utf8View] - } - NativeType::Decimal(precision, scale) => { - // We assume incoming NativeType is valid already, in terms of precision & scale - let mut types = vec![DataType::Decimal256(*precision, *scale)]; - if *precision <= DECIMAL32_MAX_PRECISION { - types.push(DataType::Decimal32(*precision, *scale)); - } - if *precision <= DECIMAL64_MAX_PRECISION { - types.push(DataType::Decimal64(*precision, *scale)); - } - if *precision <= DECIMAL128_MAX_PRECISION { - types.push(DataType::Decimal128(*precision, *scale)); - } - types - } - NativeType::Timestamp(time_unit, timezone) => { - vec![DataType::Timestamp(*time_unit, timezone.to_owned())] - } - NativeType::Time(TimeUnit::Second) => vec![DataType::Time32(TimeUnit::Second)], - NativeType::Time(TimeUnit::Millisecond) => { - vec![DataType::Time32(TimeUnit::Millisecond)] - } - NativeType::Time(TimeUnit::Microsecond) => { - vec![DataType::Time64(TimeUnit::Microsecond)] - } - NativeType::Time(TimeUnit::Nanosecond) => { - vec![DataType::Time64(TimeUnit::Nanosecond)] - } - NativeType::Duration(time_unit) => vec![DataType::Duration(*time_unit)], - NativeType::Interval(interval_unit) => vec![DataType::Interval(*interval_unit)], - NativeType::FixedSizeBinary(size) => vec![DataType::FixedSizeBinary(*size)], - NativeType::FixedSizeList(logical_field, size) => { - get_data_types(logical_field.logical_type.native()) - .iter() - .map(|child_dt| { - let field = Field::new( - logical_field.name.clone(), - child_dt.clone(), - logical_field.nullable, - ); - DataType::FixedSizeList(Arc::new(field), *size) - }) - .collect() - } - // TODO: implement for nested types - NativeType::List(_) - | NativeType::Struct(_) - | NativeType::Union(_) - | NativeType::Map(_) => { - vec![] - } - } -} - /// Represents type coercion rules for function arguments, specifying both the desired type /// and optional implicit coercion rules for source types. /// @@ -1572,7 +1500,10 @@ mod tests { fn test_get_possible_types() { let type_signature = TypeSignature::Exact(vec![DataType::Int32, DataType::Int64]); let possible_types = type_signature.get_example_types(); - assert_eq!(possible_types, vec![vec![DataType::Int32, DataType::Int64]]); + assert_eq!( + possible_types, + vec![vec![NativeType::Int32, NativeType::Int64]] + ); let type_signature = TypeSignature::OneOf(vec![ TypeSignature::Exact(vec![DataType::Int32, DataType::Int64]), @@ -1582,8 +1513,8 @@ mod tests { assert_eq!( possible_types, vec![ - vec![DataType::Int32, DataType::Int64], - vec![DataType::Float32, DataType::Float64] + vec![NativeType::Int32, NativeType::Int64], + vec![NativeType::Float32, NativeType::Float64] ] ); @@ -1596,9 +1527,9 @@ mod tests { assert_eq!( possible_types, vec![ - vec![DataType::Int32, DataType::Int64], - vec![DataType::Float32, DataType::Float64], - vec![DataType::Utf8] + vec![NativeType::Int32, NativeType::Int64], + vec![NativeType::Float32, NativeType::Float64], + vec![NativeType::String] ] ); @@ -1608,8 +1539,8 @@ mod tests { assert_eq!( possible_types, vec![ - vec![DataType::Float32, DataType::Float32], - vec![DataType::Int64, DataType::Int64] + vec![NativeType::Float32, NativeType::Float32], + vec![NativeType::Int64, NativeType::Int64] ] ); @@ -1620,11 +1551,7 @@ mod tests { let possible_types = type_signature.get_example_types(); assert_eq!( possible_types, - vec![ - vec![DataType::Utf8, DataType::Int64], - vec![DataType::LargeUtf8, DataType::Int64], - vec![DataType::Utf8View, DataType::Int64] - ] + vec![vec![NativeType::String, NativeType::Int64]] ); let type_signature = @@ -1632,7 +1559,7 @@ mod tests { let possible_types = type_signature.get_example_types(); assert_eq!( possible_types, - vec![vec![DataType::Int32], vec![DataType::Int64]] + vec![vec![NativeType::Int32], vec![NativeType::Int64]] ); let type_signature = TypeSignature::Numeric(2); @@ -1640,17 +1567,18 @@ mod tests { assert_eq!( possible_types, vec![ - vec![DataType::Int8, DataType::Int8], - vec![DataType::Int16, DataType::Int16], - vec![DataType::Int32, DataType::Int32], - vec![DataType::Int64, DataType::Int64], - vec![DataType::UInt8, DataType::UInt8], - vec![DataType::UInt16, DataType::UInt16], - vec![DataType::UInt32, DataType::UInt32], - vec![DataType::UInt64, DataType::UInt64], - vec![DataType::Float16, DataType::Float16], - vec![DataType::Float32, DataType::Float32], - vec![DataType::Float64, DataType::Float64] + vec![NativeType::Int8, NativeType::Int8], + vec![NativeType::Int16, NativeType::Int16], + vec![NativeType::Int32, NativeType::Int32], + vec![NativeType::Int64, NativeType::Int64], + vec![NativeType::UInt8, NativeType::UInt8], + vec![NativeType::UInt16, NativeType::UInt16], + vec![NativeType::UInt32, NativeType::UInt32], + vec![NativeType::UInt64, NativeType::UInt64], + vec![NativeType::Float16, NativeType::Float16], + vec![NativeType::Float32, NativeType::Float32], + vec![NativeType::Float64, NativeType::Float64], + vec![NATIVE_TYPE_DECIMAL, NATIVE_TYPE_DECIMAL], ] ); @@ -1658,11 +1586,7 @@ mod tests { let possible_types = type_signature.get_example_types(); assert_eq!( possible_types, - vec![ - vec![DataType::Utf8, DataType::Utf8], - vec![DataType::LargeUtf8, DataType::LargeUtf8], - vec![DataType::Utf8View, DataType::Utf8View] - ] + vec![vec![NativeType::String, NativeType::String],] ); } From 179f79417730a22698f56f04bb0d923bdb1a1049 Mon Sep 17 00:00:00 2001 From: theirix Date: Mon, 20 Apr 2026 20:08:40 +0300 Subject: [PATCH 2/7] Apply suggestions from code review Co-authored-by: Martin Grigorov --- datafusion/catalog/src/information_schema.rs | 2 +- datafusion/expr-common/src/signature.rs | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/datafusion/catalog/src/information_schema.rs b/datafusion/catalog/src/information_schema.rs index 53b9609c42bad..0e9da856a26db 100644 --- a/datafusion/catalog/src/information_schema.rs +++ b/datafusion/catalog/src/information_schema.rs @@ -525,7 +525,7 @@ fn get_data_type_for_schema(native_type: &NativeType) -> Option { } } -pub fn resolve_informational_field(idx: usize, t: &NativeType) -> Result { +fn resolve_informational_field(idx: usize, t: &NativeType) -> Result { if let Some(data_type) = get_data_type_for_schema(t) { Ok(Arc::new(Field::new(format!("arg_{idx}"), data_type, true))) } else { diff --git a/datafusion/expr-common/src/signature.rs b/datafusion/expr-common/src/signature.rs index b89e37170ac64..233c8272f12c2 100644 --- a/datafusion/expr-common/src/signature.rs +++ b/datafusion/expr-common/src/signature.rs @@ -592,7 +592,7 @@ impl Display for ArrayFunctionArgument { /// Constant that is used as a Decimal type for `get_example_types` /// Use Decimal256 precision as a reasonable default const NATIVE_TYPE_DECIMAL: NativeType = - NativeType::Decimal(DECIMAL256_MAX_PRECISION, DECIMAL256_MAX_SCALE); + NativeType::Decimal(DECIMAL256_MAX_PRECISION, DECIMAL_DEFAULT_SCALE); /// Native types for `get_example_types` static EXAMPLE_NUMERIC_TYPES: &[NativeType] = &[ From a22c2f6d9c030960f4b4e5b7a420d69dc50fea89 Mon Sep 17 00:00:00 2001 From: theirix Date: Mon, 20 Apr 2026 18:11:31 +0100 Subject: [PATCH 3/7] Use of Decimals --- datafusion/catalog/src/information_schema.rs | 2 +- datafusion/expr-common/src/signature.rs | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/datafusion/catalog/src/information_schema.rs b/datafusion/catalog/src/information_schema.rs index 0e9da856a26db..0679e330d43f4 100644 --- a/datafusion/catalog/src/information_schema.rs +++ b/datafusion/catalog/src/information_schema.rs @@ -433,7 +433,7 @@ fn get_data_type_for_schema(native_type: &NativeType) -> Option { NativeType::Binary => Some(DataType::Binary), // A tie NativeType::String => Some(DataType::Utf8), // A tie NativeType::Decimal(precision, scale) => { - Some(DataType::Decimal256(*precision, *scale)) // A tie, use the widest type + Some(DataType::Decimal128(*precision, *scale)) // A tie } NativeType::Timestamp(time_unit, timezone) => { Some(DataType::Timestamp(*time_unit, timezone.to_owned())) diff --git a/datafusion/expr-common/src/signature.rs b/datafusion/expr-common/src/signature.rs index 233c8272f12c2..f7c5de5490db0 100644 --- a/datafusion/expr-common/src/signature.rs +++ b/datafusion/expr-common/src/signature.rs @@ -21,7 +21,7 @@ use std::fmt::Display; use std::hash::Hash; use arrow::datatypes::{ - DECIMAL256_MAX_PRECISION, DECIMAL256_MAX_SCALE, DataType, IntervalUnit, TimeUnit, + DECIMAL_DEFAULT_SCALE, DECIMAL128_MAX_PRECISION, DataType, IntervalUnit, TimeUnit, }; use datafusion_common::types::{LogicalType, LogicalTypeRef, NativeType}; use datafusion_common::utils::ListCoercion; @@ -590,9 +590,9 @@ impl Display for ArrayFunctionArgument { } /// Constant that is used as a Decimal type for `get_example_types` -/// Use Decimal256 precision as a reasonable default +/// Use Decimal128 precision as a reasonable default const NATIVE_TYPE_DECIMAL: NativeType = - NativeType::Decimal(DECIMAL256_MAX_PRECISION, DECIMAL_DEFAULT_SCALE); + NativeType::Decimal(DECIMAL128_MAX_PRECISION, DECIMAL_DEFAULT_SCALE); /// Native types for `get_example_types` static EXAMPLE_NUMERIC_TYPES: &[NativeType] = &[ From c54da404918d680a09a241fe255cab95767de844 Mon Sep 17 00:00:00 2001 From: theirix Date: Mon, 20 Apr 2026 18:21:44 +0100 Subject: [PATCH 4/7] Refactor get_data_type_for_schema to return result --- datafusion/catalog/src/information_schema.rs | 113 +++++++++---------- 1 file changed, 52 insertions(+), 61 deletions(-) diff --git a/datafusion/catalog/src/information_schema.rs b/datafusion/catalog/src/information_schema.rs index 0679e330d43f4..796e218ba7454 100644 --- a/datafusion/catalog/src/information_schema.rs +++ b/datafusion/catalog/src/information_schema.rs @@ -32,7 +32,7 @@ use async_trait::async_trait; use datafusion_common::config::{ConfigEntry, ConfigOptions}; use datafusion_common::error::Result; use datafusion_common::types::NativeType; -use datafusion_common::{DataFusionError, not_impl_err}; +use datafusion_common::{DataFusionError, internal_datafusion_err}; use datafusion_execution::TaskContext; use datafusion_execution::runtime_env::RuntimeEnv; use datafusion_expr::function::WindowUDFFieldArgs; @@ -414,81 +414,76 @@ impl InformationSchemaConfig { /// Resolve a native type `NativeType` to `DataType` for use in the information schema /// Since it is one-to-many, use the most representative type on tie -fn get_data_type_for_schema(native_type: &NativeType) -> Option { +fn get_data_type_for_schema(native_type: &NativeType) -> Result { match native_type { - NativeType::Null => Some(DataType::Null), - NativeType::Boolean => Some(DataType::Boolean), - NativeType::Int8 => Some(DataType::Int8), - NativeType::Int16 => Some(DataType::Int16), - NativeType::Int32 => Some(DataType::Int32), - NativeType::Int64 => Some(DataType::Int64), - NativeType::UInt8 => Some(DataType::UInt8), - NativeType::UInt16 => Some(DataType::UInt16), - NativeType::UInt32 => Some(DataType::UInt32), - NativeType::UInt64 => Some(DataType::UInt64), - NativeType::Float16 => Some(DataType::Float16), - NativeType::Float32 => Some(DataType::Float32), - NativeType::Float64 => Some(DataType::Float64), - NativeType::Date => Some(DataType::Date32), // A tie - NativeType::Binary => Some(DataType::Binary), // A tie - NativeType::String => Some(DataType::Utf8), // A tie + NativeType::Null => Ok(DataType::Null), + NativeType::Boolean => Ok(DataType::Boolean), + NativeType::Int8 => Ok(DataType::Int8), + NativeType::Int16 => Ok(DataType::Int16), + NativeType::Int32 => Ok(DataType::Int32), + NativeType::Int64 => Ok(DataType::Int64), + NativeType::UInt8 => Ok(DataType::UInt8), + NativeType::UInt16 => Ok(DataType::UInt16), + NativeType::UInt32 => Ok(DataType::UInt32), + NativeType::UInt64 => Ok(DataType::UInt64), + NativeType::Float16 => Ok(DataType::Float16), + NativeType::Float32 => Ok(DataType::Float32), + NativeType::Float64 => Ok(DataType::Float64), + NativeType::Date => Ok(DataType::Date32), // A tie + NativeType::Binary => Ok(DataType::Binary), // A tie + NativeType::String => Ok(DataType::Utf8), // A tie NativeType::Decimal(precision, scale) => { - Some(DataType::Decimal128(*precision, *scale)) // A tie + Ok(DataType::Decimal128(*precision, *scale)) // A tie } NativeType::Timestamp(time_unit, timezone) => { - Some(DataType::Timestamp(*time_unit, timezone.to_owned())) + Ok(DataType::Timestamp(*time_unit, timezone.to_owned())) } - NativeType::Time(TimeUnit::Second) => Some(DataType::Time32(TimeUnit::Second)), + NativeType::Time(TimeUnit::Second) => Ok(DataType::Time32(TimeUnit::Second)), NativeType::Time(TimeUnit::Millisecond) => { - Some(DataType::Time32(TimeUnit::Millisecond)) + Ok(DataType::Time32(TimeUnit::Millisecond)) } NativeType::Time(TimeUnit::Microsecond) => { - Some(DataType::Time64(TimeUnit::Microsecond)) + Ok(DataType::Time64(TimeUnit::Microsecond)) } NativeType::Time(TimeUnit::Nanosecond) => { - Some(DataType::Time64(TimeUnit::Nanosecond)) + Ok(DataType::Time64(TimeUnit::Nanosecond)) } - NativeType::Duration(time_unit) => Some(DataType::Duration(*time_unit)), - NativeType::Interval(interval_unit) => Some(DataType::Interval(*interval_unit)), - NativeType::FixedSizeBinary(size) => Some(DataType::FixedSizeBinary(*size)), - NativeType::FixedSizeList(logical_field, size) => get_data_type_for_schema( - logical_field.logical_type.native(), - ) - .map(|child_dt| { - DataType::FixedSizeList( + NativeType::Duration(time_unit) => Ok(DataType::Duration(*time_unit)), + NativeType::Interval(interval_unit) => Ok(DataType::Interval(*interval_unit)), + NativeType::FixedSizeBinary(size) => Ok(DataType::FixedSizeBinary(*size)), + NativeType::FixedSizeList(logical_field, size) => { + let child_dt = get_data_type_for_schema(logical_field.logical_type.native())?; + Ok(DataType::FixedSizeList( Arc::new(Field::new( logical_field.name.clone(), child_dt, logical_field.nullable, )), *size, - ) - }), - NativeType::List(logical_field) => get_data_type_for_schema( - logical_field.logical_type.native(), - ) - .map(|child_dt| { - // A tie, use List - DataType::List(Arc::new(Field::new( + )) + } + NativeType::List(logical_field) => { + let child_dt = get_data_type_for_schema(logical_field.logical_type.native())?; + Ok(DataType::List(Arc::new(Field::new( logical_field.name.clone(), child_dt, logical_field.nullable, - ))) - }), + )))) + } NativeType::Struct(logical_fields) => { let fields = logical_fields .iter() .map(|logical_field| { let dt = get_data_type_for_schema(logical_field.logical_type.native())?; - Some(Arc::new(Field::new( + Ok(Arc::new(Field::new( logical_field.name.clone(), dt, logical_field.nullable, ))) }) - .collect::>()?; - Some(DataType::Struct(fields)) + .collect::>()?; + Ok(DataType::Struct(fields)) } NativeType::Union(logical_fields) => { let ids = logical_fields.iter().map(|(i, _)| *i).collect::>(); @@ -497,40 +492,36 @@ fn get_data_type_for_schema(native_type: &NativeType) -> Option { .map(|(_, logical_field)| { let dt = get_data_type_for_schema(logical_field.logical_type.native())?; - Some(Arc::new(Field::new( + Ok(Arc::new(Field::new( logical_field.name.clone(), dt, logical_field.nullable, ))) }) - .collect::>>()?; - Some(DataType::Union( - UnionFields::try_new(ids, fields).ok()?, + .collect::>>()?; + Ok(DataType::Union( + UnionFields::try_new(ids, fields) + .map_err(|e| internal_datafusion_err!("UnionFields error: {e}"))?, UnionMode::Dense, )) } - NativeType::Map(logical_field) => get_data_type_for_schema( - logical_field.logical_type.native(), - ) - .map(|child_dt| { - DataType::Map( + NativeType::Map(logical_field) => { + let child_dt = get_data_type_for_schema(logical_field.logical_type.native())?; + Ok(DataType::Map( Arc::new(Field::new( logical_field.name.clone(), child_dt, logical_field.nullable, )), true, - ) - }), + )) + } } } fn resolve_informational_field(idx: usize, t: &NativeType) -> Result { - if let Some(data_type) = get_data_type_for_schema(t) { - Ok(Arc::new(Field::new(format!("arg_{idx}"), data_type, true))) - } else { - not_impl_err!("No support in information schema for type: {}", t) - } + let data_type = get_data_type_for_schema(t)?; + Ok(Arc::new(Field::new(format!("arg_{idx}"), data_type, true))) } /// get the arguments and return types of a UDF From 50b6858f655a245e60a077a4f1b8852b3e1e5ca7 Mon Sep 17 00:00:00 2001 From: theirix Date: Tue, 21 Apr 2026 21:02:09 +0100 Subject: [PATCH 5/7] Remove old deprecated get_possible_types --- datafusion/expr-common/src/signature.rs | 4 ---- 1 file changed, 4 deletions(-) diff --git a/datafusion/expr-common/src/signature.rs b/datafusion/expr-common/src/signature.rs index f7c5de5490db0..ac5462e8809a2 100644 --- a/datafusion/expr-common/src/signature.rs +++ b/datafusion/expr-common/src/signature.rs @@ -881,10 +881,6 @@ impl TypeSignature { } } - #[deprecated(since = "46.0.0", note = "See get_example_types instead")] - pub fn get_possible_types(&self) -> Vec> { - self.get_example_types() - } /// Return example acceptable types for this `TypeSignature`' /// From 52769334a2b9c5dd48bafd1949279ba695ed2951 Mon Sep 17 00:00:00 2001 From: theirix Date: Tue, 21 Apr 2026 21:02:49 +0100 Subject: [PATCH 6/7] Avoid breaking get_example_types API Instead, add the new `get_representative_types` API with NativeType. Deprecated old `get_example_types` and related helpers, left as-is to avoid breaking API change. --- datafusion/catalog/src/information_schema.rs | 6 +- datafusion/expr-common/src/signature.rs | 344 +++++++++++++++++-- 2 files changed, 325 insertions(+), 25 deletions(-) diff --git a/datafusion/catalog/src/information_schema.rs b/datafusion/catalog/src/information_schema.rs index 796e218ba7454..51d1e73857ee3 100644 --- a/datafusion/catalog/src/information_schema.rs +++ b/datafusion/catalog/src/information_schema.rs @@ -530,7 +530,7 @@ fn get_udf_args_and_return_types( udf: &Arc, ) -> Result, Option)>> { let signature = udf.signature(); - let arg_types = signature.type_signature.get_example_types(); + let arg_types = signature.type_signature.get_representative_types(); if arg_types.is_empty() { Ok(vec![(vec![], None)].into_iter().collect::>()) } else { @@ -568,7 +568,7 @@ fn get_udaf_args_and_return_types( udaf: &Arc, ) -> Result, Option)>> { let signature = udaf.signature(); - let arg_types = signature.type_signature.get_example_types(); + let arg_types = signature.type_signature.get_representative_types(); if arg_types.is_empty() { Ok(vec![(vec![], None)].into_iter().collect::>()) } else { @@ -602,7 +602,7 @@ fn get_udwf_args_and_return_types( udwf: &Arc, ) -> Result, Option)>> { let signature = udwf.signature(); - let arg_types = signature.type_signature.get_example_types(); + let arg_types = signature.type_signature.get_representative_types(); if arg_types.is_empty() { Ok(vec![(vec![], None)].into_iter().collect::>()) } else { diff --git a/datafusion/expr-common/src/signature.rs b/datafusion/expr-common/src/signature.rs index ac5462e8809a2..ffac009aa4952 100644 --- a/datafusion/expr-common/src/signature.rs +++ b/datafusion/expr-common/src/signature.rs @@ -19,9 +19,12 @@ use std::fmt::Display; use std::hash::Hash; +use std::sync::Arc; use arrow::datatypes::{ - DECIMAL_DEFAULT_SCALE, DECIMAL128_MAX_PRECISION, DataType, IntervalUnit, TimeUnit, + DECIMAL_DEFAULT_SCALE, DECIMAL32_MAX_PRECISION, DECIMAL64_MAX_PRECISION, + DECIMAL128_MAX_PRECISION, DataType, Decimal128Type, DecimalType, Field, IntervalUnit, + TimeUnit, }; use datafusion_common::types::{LogicalType, LogicalTypeRef, NativeType}; use datafusion_common::utils::ListCoercion; @@ -414,7 +417,52 @@ impl TypeSignatureClass { /// /// This is used for `information_schema` and can be used to generate /// documentation or error messages. - fn get_example_types(&self) -> Vec { + /// Remove with `get_example_types` + #[deprecated(since = "53.0.0", note = "See get_representative_types instead")] + fn get_example_types(&self) -> Vec { + match self { + TypeSignatureClass::Any => vec![], + #[expect(deprecated)] + TypeSignatureClass::Native(l) => get_data_types(l.native()), + TypeSignatureClass::Timestamp => { + vec![ + DataType::Timestamp(TimeUnit::Nanosecond, None), + DataType::Timestamp( + TimeUnit::Nanosecond, + Some(TIMEZONE_WILDCARD.into()), + ), + ] + } + TypeSignatureClass::Time => { + vec![DataType::Time64(TimeUnit::Nanosecond)] + } + TypeSignatureClass::Interval => { + vec![DataType::Interval(IntervalUnit::DayTime)] + } + TypeSignatureClass::Duration => { + vec![DataType::Duration(TimeUnit::Nanosecond)] + } + TypeSignatureClass::Integer => { + vec![DataType::Int64] + } + TypeSignatureClass::Binary => { + vec![DataType::Binary] + } + TypeSignatureClass::Decimal => vec![Decimal128Type::DEFAULT_TYPE], + TypeSignatureClass::Float => vec![DataType::Float64], + TypeSignatureClass::Numeric => vec![ + DataType::Float64, + DataType::Int64, + Decimal128Type::DEFAULT_TYPE, + ], + } + } + + /// Get example acceptable types for this `TypeSignatureClass` + /// + /// This is used for `information_schema` and can be used to generate + /// documentation or error messages. + fn get_representative_types(&self) -> Vec { match self { TypeSignatureClass::Any => vec![], TypeSignatureClass::Native(l) => vec![l.native().clone()], @@ -610,6 +658,21 @@ static EXAMPLE_NUMERIC_TYPES: &[NativeType] = &[ NATIVE_TYPE_DECIMAL, ]; +#[deprecated(since = "53.0.0", note = "See get_representative_types instead")] +static NUMERICS: &[DataType] = &[ + DataType::Int8, + DataType::Int16, + DataType::Int32, + DataType::Int64, + DataType::UInt8, + DataType::UInt16, + DataType::UInt32, + DataType::UInt64, + DataType::Float16, + DataType::Float32, + DataType::Float64, +]; + impl TypeSignature { pub fn to_string_repr(&self) -> Vec { match self { @@ -881,6 +944,68 @@ impl TypeSignature { } } + /// Return example acceptable types for this `TypeSignature`' + /// + /// Returns a `Vec` for each argument to the function + /// + /// This is used for `information_schema` and can be used to generate + /// documentation or error messages. + #[deprecated(since = "53.0.0", note = "See get_representative_types instead")] + pub fn get_example_types(&self) -> Vec> { + #[expect(deprecated)] + match self { + TypeSignature::Exact(types) => vec![types.clone()], + TypeSignature::OneOf(types) => types + .iter() + .flat_map(|type_sig| type_sig.get_example_types()) + .collect(), + TypeSignature::Uniform(arg_count, types) => types + .iter() + .cloned() + .map(|data_type| vec![data_type; *arg_count]) + .collect(), + TypeSignature::Coercible(coercions) => coercions + .iter() + .map(|c| { + let mut all_types: IndexSet = + c.desired_type().get_example_types().into_iter().collect(); + + if let Some(implicit_coercion) = c.implicit_coercion() { + let allowed_casts: Vec = implicit_coercion + .allowed_source_types + .iter() + .flat_map(|t| t.get_example_types()) + .collect(); + all_types.extend(allowed_casts); + } + + all_types.into_iter().collect::>() + }) + .multi_cartesian_product() + .collect(), + TypeSignature::Variadic(types) => types + .iter() + .cloned() + .map(|data_type| vec![data_type]) + .collect(), + TypeSignature::Numeric(arg_count) => NUMERICS + .iter() + .cloned() + .map(|numeric_type| vec![numeric_type; *arg_count]) + .collect(), + TypeSignature::String(arg_count) => get_data_types(&NativeType::String) + .into_iter() + .map(|dt| vec![dt; *arg_count]) + .collect::>(), + // TODO: Implement for other types + TypeSignature::Any(_) + | TypeSignature::Comparable(_) + | TypeSignature::Nullary + | TypeSignature::VariadicAny + | TypeSignature::ArraySignature(_) + | TypeSignature::UserDefined => vec![], + } + } /// Return example acceptable types for this `TypeSignature`' /// @@ -888,7 +1013,7 @@ impl TypeSignature { /// /// This is used for `information_schema` and can be used to generate /// documentation or error messages. - pub fn get_example_types(&self) -> Vec> { + pub fn get_representative_types(&self) -> Vec> { match self { TypeSignature::Exact(types) => vec![ types @@ -898,7 +1023,7 @@ impl TypeSignature { ], TypeSignature::OneOf(types) => types .iter() - .flat_map(|type_sig| type_sig.get_example_types()) + .flat_map(|type_sig| type_sig.get_representative_types()) .collect(), TypeSignature::Uniform(arg_count, types) => types .iter() @@ -908,14 +1033,17 @@ impl TypeSignature { TypeSignature::Coercible(coercions) => coercions .iter() .map(|c| { - let mut all_types: IndexSet = - c.desired_type().get_example_types().into_iter().collect(); + let mut all_types: IndexSet = c + .desired_type() + .get_representative_types() + .into_iter() + .collect(); if let Some(implicit_coercion) = c.implicit_coercion() { let allowed_casts: Vec = implicit_coercion .allowed_source_types .iter() - .flat_map(|t| t.get_example_types()) + .flat_map(|t| t.get_representative_types()) .collect(); all_types.extend(allowed_casts); } @@ -948,6 +1076,84 @@ impl TypeSignature { } } +#[deprecated(since = "53.0.0", note = "See get_representative_types instead")] +fn get_data_types(native_type: &NativeType) -> Vec { + match native_type { + NativeType::Null => vec![DataType::Null], + NativeType::Boolean => vec![DataType::Boolean], + NativeType::Int8 => vec![DataType::Int8], + NativeType::Int16 => vec![DataType::Int16], + NativeType::Int32 => vec![DataType::Int32], + NativeType::Int64 => vec![DataType::Int64], + NativeType::UInt8 => vec![DataType::UInt8], + NativeType::UInt16 => vec![DataType::UInt16], + NativeType::UInt32 => vec![DataType::UInt32], + NativeType::UInt64 => vec![DataType::UInt64], + NativeType::Float16 => vec![DataType::Float16], + NativeType::Float32 => vec![DataType::Float32], + NativeType::Float64 => vec![DataType::Float64], + NativeType::Date => vec![DataType::Date32, DataType::Date64], + NativeType::Binary => vec![ + DataType::Binary, + DataType::LargeBinary, + DataType::BinaryView, + ], + NativeType::String => { + vec![DataType::Utf8, DataType::LargeUtf8, DataType::Utf8View] + } + NativeType::Decimal(precision, scale) => { + // We assume incoming NativeType is valid already, in terms of precision & scale + let mut types = vec![DataType::Decimal256(*precision, *scale)]; + if *precision <= DECIMAL32_MAX_PRECISION { + types.push(DataType::Decimal32(*precision, *scale)); + } + if *precision <= DECIMAL64_MAX_PRECISION { + types.push(DataType::Decimal64(*precision, *scale)); + } + if *precision <= DECIMAL128_MAX_PRECISION { + types.push(DataType::Decimal128(*precision, *scale)); + } + types + } + NativeType::Timestamp(time_unit, timezone) => { + vec![DataType::Timestamp(*time_unit, timezone.to_owned())] + } + NativeType::Time(TimeUnit::Second) => vec![DataType::Time32(TimeUnit::Second)], + NativeType::Time(TimeUnit::Millisecond) => { + vec![DataType::Time32(TimeUnit::Millisecond)] + } + NativeType::Time(TimeUnit::Microsecond) => { + vec![DataType::Time64(TimeUnit::Microsecond)] + } + NativeType::Time(TimeUnit::Nanosecond) => { + vec![DataType::Time64(TimeUnit::Nanosecond)] + } + NativeType::Duration(time_unit) => vec![DataType::Duration(*time_unit)], + NativeType::Interval(interval_unit) => vec![DataType::Interval(*interval_unit)], + NativeType::FixedSizeBinary(size) => vec![DataType::FixedSizeBinary(*size)], + NativeType::FixedSizeList(logical_field, size) => { + get_data_types(logical_field.logical_type.native()) + .iter() + .map(|child_dt| { + let field = Field::new( + logical_field.name.clone(), + child_dt.clone(), + logical_field.nullable, + ); + DataType::FixedSizeList(Arc::new(field), *size) + }) + .collect() + } + // TODO: implement for nested types + NativeType::List(_) + | NativeType::Struct(_) + | NativeType::Union(_) + | NativeType::Map(_) => { + vec![] + } + } +} + /// Represents type coercion rules for function arguments, specifying both the desired type /// and optional implicit coercion rules for source types. /// @@ -1492,14 +1698,13 @@ mod tests { ); } + // Remove with get_example_types #[test] - fn test_get_possible_types() { + #[expect(deprecated)] + fn test_get_example_types() { let type_signature = TypeSignature::Exact(vec![DataType::Int32, DataType::Int64]); let possible_types = type_signature.get_example_types(); - assert_eq!( - possible_types, - vec![vec![NativeType::Int32, NativeType::Int64]] - ); + assert_eq!(possible_types, vec![vec![DataType::Int32, DataType::Int64]]); let type_signature = TypeSignature::OneOf(vec![ TypeSignature::Exact(vec![DataType::Int32, DataType::Int64]), @@ -1509,8 +1714,8 @@ mod tests { assert_eq!( possible_types, vec![ - vec![NativeType::Int32, NativeType::Int64], - vec![NativeType::Float32, NativeType::Float64] + vec![DataType::Int32, DataType::Int64], + vec![DataType::Float32, DataType::Float64] ] ); @@ -1523,9 +1728,9 @@ mod tests { assert_eq!( possible_types, vec![ - vec![NativeType::Int32, NativeType::Int64], - vec![NativeType::Float32, NativeType::Float64], - vec![NativeType::String] + vec![DataType::Int32, DataType::Int64], + vec![DataType::Float32, DataType::Float64], + vec![DataType::Utf8] ] ); @@ -1535,8 +1740,8 @@ mod tests { assert_eq!( possible_types, vec![ - vec![NativeType::Float32, NativeType::Float32], - vec![NativeType::Int64, NativeType::Int64] + vec![DataType::Float32, DataType::Float32], + vec![DataType::Int64, DataType::Int64] ] ); @@ -1547,7 +1752,11 @@ mod tests { let possible_types = type_signature.get_example_types(); assert_eq!( possible_types, - vec![vec![NativeType::String, NativeType::Int64]] + vec![ + vec![DataType::Utf8, DataType::Int64], + vec![DataType::LargeUtf8, DataType::Int64], + vec![DataType::Utf8View, DataType::Int64] + ] ); let type_signature = @@ -1555,13 +1764,104 @@ mod tests { let possible_types = type_signature.get_example_types(); assert_eq!( possible_types, - vec![vec![NativeType::Int32], vec![NativeType::Int64]] + vec![vec![DataType::Int32], vec![DataType::Int64]] ); let type_signature = TypeSignature::Numeric(2); let possible_types = type_signature.get_example_types(); assert_eq!( possible_types, + vec![ + vec![DataType::Int8, DataType::Int8], + vec![DataType::Int16, DataType::Int16], + vec![DataType::Int32, DataType::Int32], + vec![DataType::Int64, DataType::Int64], + vec![DataType::UInt8, DataType::UInt8], + vec![DataType::UInt16, DataType::UInt16], + vec![DataType::UInt32, DataType::UInt32], + vec![DataType::UInt64, DataType::UInt64], + vec![DataType::Float16, DataType::Float16], + vec![DataType::Float32, DataType::Float32], + vec![DataType::Float64, DataType::Float64] + ] + ); + + let type_signature = TypeSignature::String(2); + let possible_types = type_signature.get_example_types(); + assert_eq!( + possible_types, + vec![ + vec![DataType::Utf8, DataType::Utf8], + vec![DataType::LargeUtf8, DataType::LargeUtf8], + vec![DataType::Utf8View, DataType::Utf8View] + ] + ); + } + + #[test] + fn test_get_representative_types() { + let type_signature = TypeSignature::Exact(vec![DataType::Int32, DataType::Int64]); + let types = type_signature.get_representative_types(); + assert_eq!(types, vec![vec![NativeType::Int32, NativeType::Int64]]); + + let type_signature = TypeSignature::OneOf(vec![ + TypeSignature::Exact(vec![DataType::Int32, DataType::Int64]), + TypeSignature::Exact(vec![DataType::Float32, DataType::Float64]), + ]); + let types = type_signature.get_representative_types(); + assert_eq!( + types, + vec![ + vec![NativeType::Int32, NativeType::Int64], + vec![NativeType::Float32, NativeType::Float64] + ] + ); + + let type_signature = TypeSignature::OneOf(vec![ + TypeSignature::Exact(vec![DataType::Int32, DataType::Int64]), + TypeSignature::Exact(vec![DataType::Float32, DataType::Float64]), + TypeSignature::Exact(vec![DataType::Utf8]), + ]); + let types = type_signature.get_representative_types(); + assert_eq!( + types, + vec![ + vec![NativeType::Int32, NativeType::Int64], + vec![NativeType::Float32, NativeType::Float64], + vec![NativeType::String] + ] + ); + + let type_signature = + TypeSignature::Uniform(2, vec![DataType::Float32, DataType::Int64]); + let types = type_signature.get_representative_types(); + assert_eq!( + types, + vec![ + vec![NativeType::Float32, NativeType::Float32], + vec![NativeType::Int64, NativeType::Int64] + ] + ); + + let type_signature = TypeSignature::Coercible(vec![ + Coercion::new_exact(TypeSignatureClass::Native(logical_string())), + Coercion::new_exact(TypeSignatureClass::Native(logical_int64())), + ]); + let types = type_signature.get_representative_types(); + assert_eq!(types, vec![vec![NativeType::String, NativeType::Int64]]); + + let type_signature = + TypeSignature::Variadic(vec![DataType::Int32, DataType::Int64]); + let types = type_signature.get_representative_types(); + assert_eq!( + types, + vec![vec![NativeType::Int32], vec![NativeType::Int64]] + ); + + let type_signature = TypeSignature::Numeric(2); + let types = type_signature.get_representative_types(); + assert_eq!( + types, vec![ vec![NativeType::Int8, NativeType::Int8], vec![NativeType::Int16, NativeType::Int16], @@ -1579,7 +1879,7 @@ mod tests { ); let type_signature = TypeSignature::String(2); - let possible_types = type_signature.get_example_types(); + let possible_types = type_signature.get_representative_types(); assert_eq!( possible_types, vec![vec![NativeType::String, NativeType::String],] From 840fbb99d7b149bb7774618eaf0ed15ea4fb5ae2 Mon Sep 17 00:00:00 2001 From: theirix Date: Wed, 22 Apr 2026 18:01:20 +0100 Subject: [PATCH 7/7] Fix docs --- datafusion/expr-common/src/signature.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datafusion/expr-common/src/signature.rs b/datafusion/expr-common/src/signature.rs index ffac009aa4952..97010ca286ed8 100644 --- a/datafusion/expr-common/src/signature.rs +++ b/datafusion/expr-common/src/signature.rs @@ -417,7 +417,7 @@ impl TypeSignatureClass { /// /// This is used for `information_schema` and can be used to generate /// documentation or error messages. - /// Remove with `get_example_types` + // Remove with `get_example_types` #[deprecated(since = "53.0.0", note = "See get_representative_types instead")] fn get_example_types(&self) -> Vec { match self {