diff --git a/rust/cubesql/cubesql/src/compile/engine/context_postgresql.rs b/rust/cubesql/cubesql/src/compile/engine/context_postgresql.rs index b51db77398491..f4bb670fc2a48 100644 --- a/rust/cubesql/cubesql/src/compile/engine/context_postgresql.rs +++ b/rust/cubesql/cubesql/src/compile/engine/context_postgresql.rs @@ -19,9 +19,9 @@ use super::information_schema::postgres::{ InfoSchemaTestingBlockingProvider, InfoSchemaTestingDatasetProvider, PgCatalogAmProvider, PgCatalogAttrdefProvider, PgCatalogAttributeProvider, PgCatalogAuthMembersProvider, PgCatalogAvailableExtensionVersionsProvider, PgCatalogCastProvider, PgCatalogClassProvider, - PgCatalogConstraintProvider, PgCatalogDatabaseProvider, PgCatalogDependProvider, - PgCatalogDescriptionProvider, PgCatalogEnumProvider, PgCatalogEventTriggerProvider, - PgCatalogExtensionProvider, PgCatalogForeignDataWrapperProvider, + PgCatalogCollationProvider, PgCatalogConstraintProvider, PgCatalogDatabaseProvider, + PgCatalogDependProvider, PgCatalogDescriptionProvider, PgCatalogEnumProvider, + PgCatalogEventTriggerProvider, PgCatalogExtensionProvider, PgCatalogForeignDataWrapperProvider, PgCatalogForeignServerProvider, PgCatalogForeignTableProvider, PgCatalogIndexProvider, PgCatalogInheritsProvider, PgCatalogLanguageProvider, PgCatalogLocksProvider, PgCatalogMatviewsProvider, PgCatalogNamespaceProvider, PgCatalogOperatorProvider, @@ -99,6 +99,8 @@ impl DatabaseProtocol { "pg_catalog.pg_index".to_string() } else if let Some(_) = any.downcast_ref::() { "pg_catalog.pg_class".to_string() + } else if let Some(_) = any.downcast_ref::() { + "pg_catalog.pg_collation".to_string() } else if let Some(_) = any.downcast_ref::() { "pg_catalog.pg_proc".to_string() } else if let Some(_) = any.downcast_ref::() { @@ -377,6 +379,7 @@ impl DatabaseProtocol { "pg_class" => { return Some(Arc::new(PgCatalogClassProvider::new(&context.meta.tables))) } + "pg_collation" => return Some(Arc::new(PgCatalogCollationProvider::new())), "pg_proc" => return Some(Arc::new(PgCatalogProcProvider::new())), "pg_settings" => { return Some(Arc::new(PgCatalogSettingsProvider::new( diff --git a/rust/cubesql/cubesql/src/compile/engine/information_schema/postgres/mod.rs b/rust/cubesql/cubesql/src/compile/engine/information_schema/postgres/mod.rs index 7a589b083a2a9..505f9607f1697 100644 --- a/rust/cubesql/cubesql/src/compile/engine/information_schema/postgres/mod.rs +++ b/rust/cubesql/cubesql/src/compile/engine/information_schema/postgres/mod.rs @@ -20,6 +20,7 @@ mod pg_auth_members; mod pg_available_extension_versions; mod pg_cast; mod pg_class; +mod pg_collation; mod pg_constraint; mod pg_database; mod pg_depend; @@ -71,6 +72,7 @@ pub use pg_auth_members::*; pub use pg_available_extension_versions::*; pub use pg_cast::*; pub use pg_class::*; +pub use pg_collation::*; pub use pg_constraint::*; pub use pg_database::*; pub use pg_depend::*; diff --git a/rust/cubesql/cubesql/src/compile/engine/information_schema/postgres/pg_collation.rs b/rust/cubesql/cubesql/src/compile/engine/information_schema/postgres/pg_collation.rs new file mode 100644 index 0000000000000..2a73e6427b954 --- /dev/null +++ b/rust/cubesql/cubesql/src/compile/engine/information_schema/postgres/pg_collation.rs @@ -0,0 +1,275 @@ +use std::sync::Arc; + +use async_trait::async_trait; + +use datafusion::{ + arrow::{ + array::{Array, ArrayRef, BooleanBuilder, Int32Builder, StringBuilder, UInt32Builder}, + datatypes::{DataType, Field, Schema}, + record_batch::RecordBatch, + }, + datasource::{datasource::TableProviderFilterPushDown, TableProvider}, + error::Result, + logical_plan::Expr, + physical_plan::{memory::MemoryExec, ExecutionPlan}, +}; + +use crate::compile::engine::information_schema::postgres::PG_NAMESPACE_CATALOG_OID; + +struct PgCollation { + oid: u32, + collname: &'static str, + collnamespace: u32, + collowner: u32, + collprovider: String, + collisdeterministic: bool, + collencoding: i32, + collcollate: Option, + collctype: Option, + // Column `colliculocale` is renamed to `colllocale` since PostgreSQL 17. + colllocale: Option, + collicurules: Option, + collversion: Option, +} + +struct PgCatalogCollationBuilder { + oid: UInt32Builder, + collname: StringBuilder, + collnamespace: UInt32Builder, + collowner: UInt32Builder, + collprovider: StringBuilder, + collisdeterministic: BooleanBuilder, + collencoding: Int32Builder, + collcollate: StringBuilder, + collctype: StringBuilder, + // Column `colliculocale` is renamed to `colllocale` since PostgreSQL 17. + // Support both columns for backward-compatibility. + // Reference: https://pgpedia.info/p/pg_collation.html + colllocale: StringBuilder, + colliculocale: StringBuilder, + collicurules: StringBuilder, + collversion: StringBuilder, +} + +impl PgCatalogCollationBuilder { + fn new(capacity: usize) -> Self { + Self { + oid: UInt32Builder::new(capacity), + collname: StringBuilder::new(capacity), + collnamespace: UInt32Builder::new(capacity), + collowner: UInt32Builder::new(capacity), + collprovider: StringBuilder::new(capacity), + collisdeterministic: BooleanBuilder::new(capacity), + collencoding: Int32Builder::new(capacity), + collcollate: StringBuilder::new(capacity), + collctype: StringBuilder::new(capacity), + colllocale: StringBuilder::new(capacity), + colliculocale: StringBuilder::new(capacity), + collicurules: StringBuilder::new(capacity), + collversion: StringBuilder::new(capacity), + } + } + fn add_collation(&mut self, coll: &PgCollation) { + self.oid.append_value(coll.oid).unwrap(); + self.collname.append_value(coll.collname).unwrap(); + self.collnamespace.append_value(coll.collnamespace).unwrap(); + self.collowner.append_value(coll.collowner).unwrap(); + self.collprovider + .append_value(coll.collprovider.clone()) + .unwrap(); + self.collisdeterministic + .append_value(coll.collisdeterministic) + .unwrap(); + self.collencoding.append_value(coll.collencoding).unwrap(); + self.collcollate + .append_option(coll.collcollate.clone()) + .unwrap(); + self.collctype + .append_option(coll.collctype.clone()) + .unwrap(); + self.colllocale + .append_option(coll.colllocale.clone()) + .unwrap(); + // Column `colliculocale` is renamed to `colllocale` since PostgreSQL 17. + self.colliculocale + .append_option(coll.colllocale.clone()) + .unwrap(); + self.collicurules + .append_option(coll.collicurules.clone()) + .unwrap(); + self.collversion + .append_option(coll.collversion.clone()) + .unwrap(); + } + + fn finish(mut self) -> Vec> { + let columns: Vec> = vec![ + Arc::new(self.oid.finish()), + Arc::new(self.collname.finish()), + Arc::new(self.collnamespace.finish()), + Arc::new(self.collowner.finish()), + Arc::new(self.collprovider.finish()), + Arc::new(self.collisdeterministic.finish()), + Arc::new(self.collencoding.finish()), + Arc::new(self.collcollate.finish()), + Arc::new(self.collctype.finish()), + Arc::new(self.colllocale.finish()), + Arc::new(self.colliculocale.finish()), + Arc::new(self.collicurules.finish()), + Arc::new(self.collversion.finish()), + ]; + columns + } +} + +pub struct PgCatalogCollationProvider { + data: Arc>, +} + +impl PgCatalogCollationProvider { + pub fn new() -> Self { + // See https://github.com/postgres/postgres/blob/REL_17_6/src/include/catalog/pg_collation.h + let mut builder = PgCatalogCollationBuilder::new(6); + + // Initial contents of the pg_collation system catalog. + // See https://github.com/postgres/postgres/blob/REL_17_6/src/include/catalog/pg_collation.dat + + // database's default collation + builder.add_collation(&PgCollation { + oid: 100, + collname: "default", + collnamespace: PG_NAMESPACE_CATALOG_OID, + collowner: 10, + collprovider: "d".to_string(), + collisdeterministic: true, + collencoding: -1, + collcollate: None, + collctype: None, + colllocale: None, + collicurules: None, + collversion: None, + }); + // standard C collation + builder.add_collation(&PgCollation { + oid: 950, + collname: "C", + collnamespace: PG_NAMESPACE_CATALOG_OID, + collowner: 10, + collprovider: "c".to_string(), + collisdeterministic: true, + collencoding: -1, + collcollate: Some("C".to_string()), + collctype: Some("C".to_string()), + colllocale: None, + collicurules: None, + collversion: None, + }); + // standard POSIX collation + builder.add_collation(&PgCollation { + oid: 951, + collname: "POSIX", + collnamespace: PG_NAMESPACE_CATALOG_OID, + collowner: 10, + collprovider: "c".to_string(), + collisdeterministic: true, + collencoding: -1, + collcollate: Some("POSIX".to_string()), + collctype: Some("POSIX".to_string()), + colllocale: None, + collicurules: None, + collversion: None, + }); + // sorts by Unicode code point, C character semantics + builder.add_collation(&PgCollation { + oid: 962, + collname: "ucs_basic", + collnamespace: PG_NAMESPACE_CATALOG_OID, + collowner: 10, + collprovider: "b".to_string(), + collisdeterministic: true, + collencoding: 6, + collcollate: None, + collctype: None, + colllocale: Some("C".to_string()), + collicurules: None, + collversion: Some("1".to_string()), + }); + // sorts using the Unicode Collation Algorithm with default settings + builder.add_collation(&PgCollation { + oid: 963, + collname: "unicode", + collnamespace: PG_NAMESPACE_CATALOG_OID, + collowner: 10, + collprovider: "i".to_string(), + collisdeterministic: true, + collencoding: -1, + collcollate: None, + collctype: None, + colllocale: Some("und".to_string()), + collicurules: None, + collversion: Some("153.128".to_string()), + }); + // sorts by Unicode code point; Unicode and POSIX character semantics + builder.add_collation(&PgCollation { + oid: 811, + collname: "pg_c_utf8", + collnamespace: PG_NAMESPACE_CATALOG_OID, + collowner: 10, + collprovider: "b".to_string(), + collisdeterministic: true, + collencoding: 6, + collcollate: None, + collctype: None, + colllocale: Some("C.UTF-8".to_string()), + collicurules: None, + collversion: Some("1".to_string()), + }); + Self { + data: Arc::new(builder.finish()), + } + } +} + +#[async_trait] +impl TableProvider for PgCatalogCollationProvider { + fn as_any(&self) -> &dyn std::any::Any { + self + } + fn schema(&self) -> datafusion::arrow::datatypes::SchemaRef { + Arc::new(Schema::new(vec![ + Field::new("oid", DataType::UInt32, false), + Field::new("collname", DataType::Utf8, false), + Field::new("collnamespace", DataType::UInt32, false), + Field::new("collowner", DataType::UInt32, false), + Field::new("collprovider", DataType::Utf8, false), + Field::new("collisdeterministic", DataType::Boolean, false), + Field::new("collencoding", DataType::Int32, false), + Field::new("collcollate", DataType::Utf8, true), + Field::new("collctype", DataType::Utf8, true), + Field::new("colllocale", DataType::Utf8, true), + Field::new("colliculocale", DataType::Utf8, true), + Field::new("collicurules", DataType::Utf8, true), + Field::new("collversion", DataType::Utf8, true), + ])) + } + async fn scan( + &self, + projection: &Option>, + _filters: &[Expr], + // limit can be used to reduce the amount scanned + // from the datasource as a performance optimization. + // If set, it contains the amount of rows needed by the `LogicalPlan`, + // The datasource should return *at least* this number of rows if available. + _limit: Option, + ) -> Result> { + let batch = RecordBatch::try_new(self.schema(), self.data.to_vec())?; + Ok(Arc::new(MemoryExec::try_new( + &[vec![batch]], + self.schema(), + projection.clone(), + )?)) + } + fn supports_filter_pushdown(&self, _filter: &Expr) -> Result { + Ok(TableProviderFilterPushDown::Unsupported) + } +} diff --git a/rust/cubesql/cubesql/src/compile/mod.rs b/rust/cubesql/cubesql/src/compile/mod.rs index 26d707cca51f2..98b18e4ce9c2d 100644 --- a/rust/cubesql/cubesql/src/compile/mod.rs +++ b/rust/cubesql/cubesql/src/compile/mod.rs @@ -17698,4 +17698,17 @@ LIMIT {{ limit }}{% endif %}"#.to_string(), } ) } + + #[tokio::test] + async fn test_pg_collation() -> Result<(), CubeError> { + insta::assert_snapshot!( + "pg_collation_PG17", + execute_query( + "SELECT * FROM pg_catalog.pg_collation ORDER BY oid".to_string(), + DatabaseProtocol::PostgreSQL + ) + .await? + ); + Ok(()) + } } diff --git a/rust/cubesql/cubesql/src/compile/snapshots/cubesql__compile__tests__pg_collation_PG17.snap b/rust/cubesql/cubesql/src/compile/snapshots/cubesql__compile__tests__pg_collation_PG17.snap new file mode 100644 index 0000000000000..3a732611ce912 --- /dev/null +++ b/rust/cubesql/cubesql/src/compile/snapshots/cubesql__compile__tests__pg_collation_PG17.snap @@ -0,0 +1,14 @@ +--- +source: cubesql/src/compile/mod.rs +expression: "execute_query(\"SELECT * FROM pg_catalog.pg_collation ORDER BY oid\".to_string(),\nDatabaseProtocol::PostgreSQL).await?" +--- ++-----+-----------+---------------+-----------+--------------+---------------------+--------------+-------------+-----------+------------+---------------+--------------+-------------+ +| oid | collname | collnamespace | collowner | collprovider | collisdeterministic | collencoding | collcollate | collctype | colllocale | colliculocale | collicurules | collversion | ++-----+-----------+---------------+-----------+--------------+---------------------+--------------+-------------+-----------+------------+---------------+--------------+-------------+ +| 100 | default | 11 | 10 | d | true | -1 | NULL | NULL | NULL | NULL | NULL | NULL | +| 811 | pg_c_utf8 | 11 | 10 | b | true | 6 | NULL | NULL | C.UTF-8 | C.UTF-8 | NULL | 1 | +| 950 | C | 11 | 10 | c | true | -1 | C | C | NULL | NULL | NULL | NULL | +| 951 | POSIX | 11 | 10 | c | true | -1 | POSIX | POSIX | NULL | NULL | NULL | NULL | +| 962 | ucs_basic | 11 | 10 | b | true | 6 | NULL | NULL | C | C | NULL | 1 | +| 963 | unicode | 11 | 10 | i | true | -1 | NULL | NULL | und | und | NULL | 153.128 | ++-----+-----------+---------------+-----------+--------------+---------------------+--------------+-------------+-----------+------------+---------------+--------------+-------------+