From 06260dd302ec576506e9f82564dd4ad40cf2284a Mon Sep 17 00:00:00 2001 From: giangblackk Date: Sat, 13 Sep 2025 16:29:19 +0700 Subject: [PATCH 1/9] add pg_collation to pg_catalog --- .../engine/information_schema/postgres/mod.rs | 2 + .../postgres/pg_collation.rs | 249 ++++++++++++++++++ 2 files changed, 251 insertions(+) create mode 100644 rust/cubesql/cubesql/src/compile/engine/information_schema/postgres/pg_collation.rs diff --git a/rust/cubesql/cubesql/src/compile/engine/information_schema/postgres/mod.rs b/rust/cubesql/cubesql/src/compile/engine/information_schema/postgres/mod.rs index 7a589b083a2a9..9ddaa1585262b 100644 --- a/rust/cubesql/cubesql/src/compile/engine/information_schema/postgres/mod.rs +++ b/rust/cubesql/cubesql/src/compile/engine/information_schema/postgres/mod.rs @@ -58,6 +58,7 @@ mod pg_type; mod pg_user; mod pg_user_mapping; mod pg_views; +mod pg_collation; mod role_column_grants; mod role_table_grants; mod testing_blocking; @@ -109,6 +110,7 @@ pub use pg_type::*; pub use pg_user::*; pub use pg_user_mapping::*; pub use pg_views::*; +pub use pg_collation::*; pub use role_column_grants::*; pub use role_table_grants::*; pub use sql_implementation_info::*; diff --git a/rust/cubesql/cubesql/src/compile/engine/information_schema/postgres/pg_collation.rs b/rust/cubesql/cubesql/src/compile/engine/information_schema/postgres/pg_collation.rs new file mode 100644 index 0000000000000..b04247d433ea4 --- /dev/null +++ b/rust/cubesql/cubesql/src/compile/engine/information_schema/postgres/pg_collation.rs @@ -0,0 +1,249 @@ +use std::sync::Arc; + +use async_trait::async_trait; + +use datafusion::{ + arrow::{ + array::{Array, ArrayRef, BooleanBuilder, Int32Builder, StringBuilder, UInt32Builder}, + datatypes::{DataType, Field, Schema}, + record_batch::RecordBatch, + }, + datasource::{datasource::TableProviderFilterPushDown, TableProvider}, + error::Result, + logical_plan::Expr, + physical_plan::{memory::MemoryExec, ExecutionPlan}, +}; + +use crate::compile::engine::information_schema::postgres::PG_NAMESPACE_CATALOG_OID; + +struct PgCollation { + oid: u32, + collname: &'static str, + collnamespace: u32, + collowner: u32, + collprovider: String, + collisdeterministic: bool, + collencoding: i32, + collcollate: Option, + collctype: Option, + // Column `colliculocale` renamed to `colllocale` since PostgreSQL 17. + // Support both columns for backward-compatibility. + // Reference: https://pgpedia.info/p/pg_collation.html + colliculocale: Option, + colllocale: Option, + collicurules: Option, + collversion: Option, +} + +struct PgCatalogCollationBuilder { + oid: UInt32Builder, + collname: StringBuilder, + collnamespace: UInt32Builder, + collowner: UInt32Builder, + collprovider: StringBuilder, + collisdeterministic: BooleanBuilder, + collencoding: Int32Builder, + colliculocale: StringBuilder, + collcollate: StringBuilder, + collctype: StringBuilder, + colllocale: StringBuilder, + collicurules: StringBuilder, + collversion: StringBuilder, +} + +impl PgCatalogCollationBuilder { + fn new() -> Self { + let capacity = 5; + Self { + oid: UInt32Builder::new(capacity), + collname: StringBuilder::new(capacity), + collnamespace: UInt32Builder::new(capacity), + collowner: UInt32Builder::new(capacity), + collprovider: StringBuilder::new(capacity), + collisdeterministic: BooleanBuilder::new(capacity), + collencoding: Int32Builder::new(capacity), + colliculocale: StringBuilder::new(capacity), + collcollate: StringBuilder::new(capacity), + collctype: StringBuilder::new(capacity), + colllocale: StringBuilder::new(capacity), + collicurules: StringBuilder::new(capacity), + collversion: StringBuilder::new(capacity), + } + } + fn add_collation(&mut self, coll: &PgCollation) { + self.oid.append_value(coll.oid).unwrap(); + self.collname.append_value(coll.collname).unwrap(); + self.collnamespace.append_value(coll.collnamespace).unwrap(); + self.collowner.append_value(coll.collowner).unwrap(); + self.collprovider + .append_value(coll.collprovider.clone()) + .unwrap(); + self.collisdeterministic + .append_value(coll.collisdeterministic) + .unwrap(); + self.collencoding.append_value(coll.collencoding).unwrap(); + self.colliculocale + .append_option(coll.colliculocale.clone()) + .unwrap(); + self.collcollate + .append_option(coll.collcollate.clone()) + .unwrap(); + self.collctype + .append_option(coll.collctype.clone()) + .unwrap(); + self.collicurules + .append_option(coll.collicurules.clone()) + .unwrap(); + self.colllocale + .append_option(coll.colllocale.clone()) + .unwrap(); + self.collversion + .append_option(coll.collversion.clone()) + .unwrap(); + } + + fn finish(mut self) -> Vec> { + let columns: Vec> = vec![ + Arc::new(self.oid.finish()), + Arc::new(self.collname.finish()), + Arc::new(self.collnamespace.finish()), + Arc::new(self.collowner.finish()), + Arc::new(self.collprovider.finish()), + Arc::new(self.collisdeterministic.finish()), + Arc::new(self.collencoding.finish()), + Arc::new(self.colliculocale.finish()), + Arc::new(self.collcollate.finish()), + Arc::new(self.collctype.finish()), + Arc::new(self.collicurules.finish()), + Arc::new(self.colllocale.finish()), + Arc::new(self.collversion.finish()), + ]; + columns + } +} + +pub struct PgCatalogCollationProvider { + data: Arc>, +} + +impl PgCatalogCollationProvider { + pub fn new() -> Self { + // See https://github.com/postgres/postgres/blob/REL_16_4/src/include/catalog/pg_collation.h + let mut builder = PgCatalogCollationBuilder::new(); + + // Initial contents of the pg_collation system catalog. + // See https://github.com/postgres/postgres/blob/REL_16_4/src/include/catalog/pg_collation.dat + builder.add_collation(&PgCollation { + oid: 100, + collname: "default", + collnamespace: PG_NAMESPACE_CATALOG_OID, + collowner: 10, + collprovider: "d".to_string(), + collisdeterministic: true, + collencoding: -1, + collcollate: None, + colliculocale: None, + collctype: None, + colllocale: None, + collicurules: None, + collversion: None, + }); + builder.add_collation(&PgCollation { + oid: 950, + collname: "C", + collnamespace: PG_NAMESPACE_CATALOG_OID, + collowner: 10, + collprovider: "c".to_string(), + collisdeterministic: true, + collencoding: -1, + collcollate: Some("C".to_string()), + colliculocale: Some("C".to_string()), + collctype: Some("C".to_string()), + colllocale: None, + collicurules: None, + collversion: None, + }); + builder.add_collation(&PgCollation { + oid: 951, + collname: "POSIX", + collnamespace: PG_NAMESPACE_CATALOG_OID, + collowner: 10, + collprovider: "c".to_string(), + collisdeterministic: true, + collencoding: -1, + collcollate: Some("POSIX".to_string()), + colliculocale: Some("POSIX".to_string()), + collctype: Some("POSIX".to_string()), + colllocale: None, + collicurules: None, + collversion: None, + }); + builder.add_collation(&PgCollation { + oid: 962, + collname: "usc_basic", + collnamespace: PG_NAMESPACE_CATALOG_OID, + collowner: 10, + collprovider: "c".to_string(), + collisdeterministic: true, + collencoding: 6, + collcollate: Some("C".to_string()), + colliculocale: Some("C".to_string()), + collctype: Some("C".to_string()), + colllocale: None, + collicurules: None, + collversion: None, + }); + builder.add_collation(&PgCollation { + oid: 963, + collname: "unicode", + collnamespace: PG_NAMESPACE_CATALOG_OID, + collowner: 10, + collprovider: "i".to_string(), + collisdeterministic: true, + collencoding: -1, + collcollate: None, + colliculocale: None, + collctype: None, + colllocale: Some("und".to_string()), + collicurules: None, + collversion: Some("153.121".to_string()), + }); + Self { + data: Arc::new(builder.finish()), + } + } +} + +#[async_trait] +impl TableProvider for PgCatalogCollationProvider { + fn as_any(&self) -> &dyn std::any::Any { + self + } + fn schema(&self) -> datafusion::arrow::datatypes::SchemaRef { + Arc::new(Schema::new(vec![ + Field::new("oid", DataType::UInt32, false), + Field::new("collname", DataType::Utf8, false), + Field::new("collnamespace", DataType::UInt32, false), + ])) + } + async fn scan( + &self, + projection: &Option>, + _filters: &[Expr], + // limit can be used to reduce the amount scanned + // from the datasource as a performance optimization. + // If set, it contains the amount of rows needed by the `LogicalPlan`, + // The datasource should return *at least* this number of rows if available. + _limit: Option, + ) -> Result> { + let batch = RecordBatch::try_new(self.schema(), self.data.to_vec())?; + Ok(Arc::new(MemoryExec::try_new( + &[vec![batch]], + self.schema(), + projection.clone(), + )?)) + } + fn supports_filter_pushdown(&self, _filter: &Expr) -> Result { + Ok(TableProviderFilterPushDown::Unsupported) + } +} From a7ec0750527f06cd88b48456e14e1ff769c7f6ec Mon Sep 17 00:00:00 2001 From: giangblackk Date: Mon, 15 Sep 2025 15:55:03 +0700 Subject: [PATCH 2/9] fix clippy fmt --- .../src/compile/engine/information_schema/postgres/mod.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/rust/cubesql/cubesql/src/compile/engine/information_schema/postgres/mod.rs b/rust/cubesql/cubesql/src/compile/engine/information_schema/postgres/mod.rs index 9ddaa1585262b..505f9607f1697 100644 --- a/rust/cubesql/cubesql/src/compile/engine/information_schema/postgres/mod.rs +++ b/rust/cubesql/cubesql/src/compile/engine/information_schema/postgres/mod.rs @@ -20,6 +20,7 @@ mod pg_auth_members; mod pg_available_extension_versions; mod pg_cast; mod pg_class; +mod pg_collation; mod pg_constraint; mod pg_database; mod pg_depend; @@ -58,7 +59,6 @@ mod pg_type; mod pg_user; mod pg_user_mapping; mod pg_views; -mod pg_collation; mod role_column_grants; mod role_table_grants; mod testing_blocking; @@ -72,6 +72,7 @@ pub use pg_auth_members::*; pub use pg_available_extension_versions::*; pub use pg_cast::*; pub use pg_class::*; +pub use pg_collation::*; pub use pg_constraint::*; pub use pg_database::*; pub use pg_depend::*; @@ -110,7 +111,6 @@ pub use pg_type::*; pub use pg_user::*; pub use pg_user_mapping::*; pub use pg_views::*; -pub use pg_collation::*; pub use role_column_grants::*; pub use role_table_grants::*; pub use sql_implementation_info::*; From b2702cc816904305d60e73e06677adef4d37c7b8 Mon Sep 17 00:00:00 2001 From: giangblackk Date: Thu, 18 Sep 2025 11:59:31 +0700 Subject: [PATCH 3/9] add missing column in table `pg_collation` schema and reorganize column orders --- .../postgres/pg_collation.rs | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/rust/cubesql/cubesql/src/compile/engine/information_schema/postgres/pg_collation.rs b/rust/cubesql/cubesql/src/compile/engine/information_schema/postgres/pg_collation.rs index b04247d433ea4..3595498673dd4 100644 --- a/rust/cubesql/cubesql/src/compile/engine/information_schema/postgres/pg_collation.rs +++ b/rust/cubesql/cubesql/src/compile/engine/information_schema/postgres/pg_collation.rs @@ -91,12 +91,12 @@ impl PgCatalogCollationBuilder { self.collctype .append_option(coll.collctype.clone()) .unwrap(); - self.collicurules - .append_option(coll.collicurules.clone()) - .unwrap(); self.colllocale .append_option(coll.colllocale.clone()) .unwrap(); + self.collicurules + .append_option(coll.collicurules.clone()) + .unwrap(); self.collversion .append_option(coll.collversion.clone()) .unwrap(); @@ -114,8 +114,8 @@ impl PgCatalogCollationBuilder { Arc::new(self.colliculocale.finish()), Arc::new(self.collcollate.finish()), Arc::new(self.collctype.finish()), - Arc::new(self.collicurules.finish()), Arc::new(self.colllocale.finish()), + Arc::new(self.collicurules.finish()), Arc::new(self.collversion.finish()), ]; columns @@ -224,6 +224,16 @@ impl TableProvider for PgCatalogCollationProvider { Field::new("oid", DataType::UInt32, false), Field::new("collname", DataType::Utf8, false), Field::new("collnamespace", DataType::UInt32, false), + Field::new("collowner", DataType::UInt32, false), + Field::new("collprovider", DataType::Utf8, false), + Field::new("collisdeterministic", DataType::Boolean, false), + Field::new("collencoding", DataType::Int32, false), + Field::new("colliculocale", DataType::Utf8, false), + Field::new("collcollate", DataType::Utf8, false), + Field::new("collctype", DataType::Utf8, false), + Field::new("colllocale", DataType::Utf8, false), + Field::new("collicurules", DataType::Utf8, false), + Field::new("collversion", DataType::Utf8, false), ])) } async fn scan( From cb4853fe0c3a42ee5ff16574fe603835cf5307ed Mon Sep 17 00:00:00 2001 From: giangblackk Date: Thu, 18 Sep 2025 12:02:18 +0700 Subject: [PATCH 4/9] add unit test for `pg_catalog.pg_collation` table --- .../src/compile/engine/context_postgresql.rs | 9 ++++++--- rust/cubesql/cubesql/src/compile/mod.rs | 13 +++++++++++++ .../cubesql__compile__tests__pg_collation.snap | 13 +++++++++++++ 3 files changed, 32 insertions(+), 3 deletions(-) create mode 100644 rust/cubesql/cubesql/src/compile/snapshots/cubesql__compile__tests__pg_collation.snap diff --git a/rust/cubesql/cubesql/src/compile/engine/context_postgresql.rs b/rust/cubesql/cubesql/src/compile/engine/context_postgresql.rs index b51db77398491..f4bb670fc2a48 100644 --- a/rust/cubesql/cubesql/src/compile/engine/context_postgresql.rs +++ b/rust/cubesql/cubesql/src/compile/engine/context_postgresql.rs @@ -19,9 +19,9 @@ use super::information_schema::postgres::{ InfoSchemaTestingBlockingProvider, InfoSchemaTestingDatasetProvider, PgCatalogAmProvider, PgCatalogAttrdefProvider, PgCatalogAttributeProvider, PgCatalogAuthMembersProvider, PgCatalogAvailableExtensionVersionsProvider, PgCatalogCastProvider, PgCatalogClassProvider, - PgCatalogConstraintProvider, PgCatalogDatabaseProvider, PgCatalogDependProvider, - PgCatalogDescriptionProvider, PgCatalogEnumProvider, PgCatalogEventTriggerProvider, - PgCatalogExtensionProvider, PgCatalogForeignDataWrapperProvider, + PgCatalogCollationProvider, PgCatalogConstraintProvider, PgCatalogDatabaseProvider, + PgCatalogDependProvider, PgCatalogDescriptionProvider, PgCatalogEnumProvider, + PgCatalogEventTriggerProvider, PgCatalogExtensionProvider, PgCatalogForeignDataWrapperProvider, PgCatalogForeignServerProvider, PgCatalogForeignTableProvider, PgCatalogIndexProvider, PgCatalogInheritsProvider, PgCatalogLanguageProvider, PgCatalogLocksProvider, PgCatalogMatviewsProvider, PgCatalogNamespaceProvider, PgCatalogOperatorProvider, @@ -99,6 +99,8 @@ impl DatabaseProtocol { "pg_catalog.pg_index".to_string() } else if let Some(_) = any.downcast_ref::() { "pg_catalog.pg_class".to_string() + } else if let Some(_) = any.downcast_ref::() { + "pg_catalog.pg_collation".to_string() } else if let Some(_) = any.downcast_ref::() { "pg_catalog.pg_proc".to_string() } else if let Some(_) = any.downcast_ref::() { @@ -377,6 +379,7 @@ impl DatabaseProtocol { "pg_class" => { return Some(Arc::new(PgCatalogClassProvider::new(&context.meta.tables))) } + "pg_collation" => return Some(Arc::new(PgCatalogCollationProvider::new())), "pg_proc" => return Some(Arc::new(PgCatalogProcProvider::new())), "pg_settings" => { return Some(Arc::new(PgCatalogSettingsProvider::new( diff --git a/rust/cubesql/cubesql/src/compile/mod.rs b/rust/cubesql/cubesql/src/compile/mod.rs index 26d707cca51f2..942ffb8c4bfef 100644 --- a/rust/cubesql/cubesql/src/compile/mod.rs +++ b/rust/cubesql/cubesql/src/compile/mod.rs @@ -17698,4 +17698,17 @@ LIMIT {{ limit }}{% endif %}"#.to_string(), } ) } + + #[tokio::test] + async fn test_pg_collation() -> Result<(), CubeError> { + insta::assert_snapshot!( + "pg_collation", + execute_query( + "SELECT * FROM pg_catalog.pg_collation".to_string(), + DatabaseProtocol::PostgreSQL + ) + .await? + ); + Ok(()) + } } diff --git a/rust/cubesql/cubesql/src/compile/snapshots/cubesql__compile__tests__pg_collation.snap b/rust/cubesql/cubesql/src/compile/snapshots/cubesql__compile__tests__pg_collation.snap new file mode 100644 index 0000000000000..1f8f5df1bfef8 --- /dev/null +++ b/rust/cubesql/cubesql/src/compile/snapshots/cubesql__compile__tests__pg_collation.snap @@ -0,0 +1,13 @@ +--- +source: cubesql/src/compile/mod.rs +expression: "execute_query(\"SELECT * FROM pg_catalog.pg_collation\".to_string(),\nDatabaseProtocol::PostgreSQL).await?" +--- ++-----+-----------+---------------+-----------+--------------+---------------------+--------------+---------------+-------------+-----------+------------+--------------+-------------+ +| oid | collname | collnamespace | collowner | collprovider | collisdeterministic | collencoding | colliculocale | collcollate | collctype | colllocale | collicurules | collversion | ++-----+-----------+---------------+-----------+--------------+---------------------+--------------+---------------+-------------+-----------+------------+--------------+-------------+ +| 100 | default | 11 | 10 | d | true | -1 | NULL | NULL | NULL | NULL | NULL | NULL | +| 950 | C | 11 | 10 | c | true | -1 | C | C | C | NULL | NULL | NULL | +| 951 | POSIX | 11 | 10 | c | true | -1 | POSIX | POSIX | POSIX | NULL | NULL | NULL | +| 962 | usc_basic | 11 | 10 | c | true | 6 | C | C | C | NULL | NULL | NULL | +| 963 | unicode | 11 | 10 | i | true | -1 | NULL | NULL | NULL | und | NULL | 153.121 | ++-----+-----------+---------------+-----------+--------------+---------------------+--------------+---------------+-------------+-----------+------------+--------------+-------------+ From afca8af0bea88ff3fa630ea26765163c129b28a4 Mon Sep 17 00:00:00 2001 From: Giangblackk Date: Thu, 18 Sep 2025 22:17:58 +0700 Subject: [PATCH 5/9] Update rust/cubesql/cubesql/src/compile/engine/information_schema/postgres/pg_collation.rs Co-authored-by: Alex Qyoun-ae <4062971+MazterQyou@users.noreply.github.com> --- .../compile/engine/information_schema/postgres/pg_collation.rs | 3 --- 1 file changed, 3 deletions(-) diff --git a/rust/cubesql/cubesql/src/compile/engine/information_schema/postgres/pg_collation.rs b/rust/cubesql/cubesql/src/compile/engine/information_schema/postgres/pg_collation.rs index 3595498673dd4..0b9f27fcc85f4 100644 --- a/rust/cubesql/cubesql/src/compile/engine/information_schema/postgres/pg_collation.rs +++ b/rust/cubesql/cubesql/src/compile/engine/information_schema/postgres/pg_collation.rs @@ -27,9 +27,6 @@ struct PgCollation { collcollate: Option, collctype: Option, // Column `colliculocale` renamed to `colllocale` since PostgreSQL 17. - // Support both columns for backward-compatibility. - // Reference: https://pgpedia.info/p/pg_collation.html - colliculocale: Option, colllocale: Option, collicurules: Option, collversion: Option, From 09f9ca25ea2246d0856a7d5865a8f5b80ca93f82 Mon Sep 17 00:00:00 2001 From: Giangblackk Date: Thu, 18 Sep 2025 22:20:13 +0700 Subject: [PATCH 6/9] Update rust/cubesql/cubesql/src/compile/engine/information_schema/postgres/pg_collation.rs Co-authored-by: Alex Qyoun-ae <4062971+MazterQyou@users.noreply.github.com> --- .../compile/engine/information_schema/postgres/pg_collation.rs | 3 +++ 1 file changed, 3 insertions(+) diff --git a/rust/cubesql/cubesql/src/compile/engine/information_schema/postgres/pg_collation.rs b/rust/cubesql/cubesql/src/compile/engine/information_schema/postgres/pg_collation.rs index 0b9f27fcc85f4..04cd53e008fb4 100644 --- a/rust/cubesql/cubesql/src/compile/engine/information_schema/postgres/pg_collation.rs +++ b/rust/cubesql/cubesql/src/compile/engine/information_schema/postgres/pg_collation.rs @@ -40,6 +40,9 @@ struct PgCatalogCollationBuilder { collprovider: StringBuilder, collisdeterministic: BooleanBuilder, collencoding: Int32Builder, + // Column `colliculocale` renamed to `colllocale` since PostgreSQL 17. + // Support both columns for backward-compatibility. + // Reference: https://pgpedia.info/p/pg_collation.html colliculocale: StringBuilder, collcollate: StringBuilder, collctype: StringBuilder, From aeb1de2a5a0357c46b62f122e250de11c8ae7d7d Mon Sep 17 00:00:00 2001 From: Giangblackk Date: Thu, 18 Sep 2025 22:20:30 +0700 Subject: [PATCH 7/9] Update rust/cubesql/cubesql/src/compile/engine/information_schema/postgres/pg_collation.rs Co-authored-by: Alex Qyoun-ae <4062971+MazterQyou@users.noreply.github.com> --- .../compile/engine/information_schema/postgres/pg_collation.rs | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/rust/cubesql/cubesql/src/compile/engine/information_schema/postgres/pg_collation.rs b/rust/cubesql/cubesql/src/compile/engine/information_schema/postgres/pg_collation.rs index 04cd53e008fb4..bc28b3db24ec8 100644 --- a/rust/cubesql/cubesql/src/compile/engine/information_schema/postgres/pg_collation.rs +++ b/rust/cubesql/cubesql/src/compile/engine/information_schema/postgres/pg_collation.rs @@ -52,8 +52,7 @@ struct PgCatalogCollationBuilder { } impl PgCatalogCollationBuilder { - fn new() -> Self { - let capacity = 5; + fn new(capacity: usize) -> Self { Self { oid: UInt32Builder::new(capacity), collname: StringBuilder::new(capacity), From 0a32027b2dfd3e0869b7248a555e86b32da9f6fa Mon Sep 17 00:00:00 2001 From: giangblackk Date: Thu, 18 Sep 2025 23:35:52 +0700 Subject: [PATCH 8/9] update values for pg_collation records - remove redundant field `colliculocale` - reorganize field order to match postgres doc - update reference data to postgres version 17.6 - update nullable properties to match postgres information schema --- .../postgres/pg_collation.rs | 79 +++++++++++-------- 1 file changed, 48 insertions(+), 31 deletions(-) diff --git a/rust/cubesql/cubesql/src/compile/engine/information_schema/postgres/pg_collation.rs b/rust/cubesql/cubesql/src/compile/engine/information_schema/postgres/pg_collation.rs index bc28b3db24ec8..2a73e6427b954 100644 --- a/rust/cubesql/cubesql/src/compile/engine/information_schema/postgres/pg_collation.rs +++ b/rust/cubesql/cubesql/src/compile/engine/information_schema/postgres/pg_collation.rs @@ -26,7 +26,7 @@ struct PgCollation { collencoding: i32, collcollate: Option, collctype: Option, - // Column `colliculocale` renamed to `colllocale` since PostgreSQL 17. + // Column `colliculocale` is renamed to `colllocale` since PostgreSQL 17. colllocale: Option, collicurules: Option, collversion: Option, @@ -40,13 +40,13 @@ struct PgCatalogCollationBuilder { collprovider: StringBuilder, collisdeterministic: BooleanBuilder, collencoding: Int32Builder, - // Column `colliculocale` renamed to `colllocale` since PostgreSQL 17. - // Support both columns for backward-compatibility. - // Reference: https://pgpedia.info/p/pg_collation.html - colliculocale: StringBuilder, collcollate: StringBuilder, collctype: StringBuilder, + // Column `colliculocale` is renamed to `colllocale` since PostgreSQL 17. + // Support both columns for backward-compatibility. + // Reference: https://pgpedia.info/p/pg_collation.html colllocale: StringBuilder, + colliculocale: StringBuilder, collicurules: StringBuilder, collversion: StringBuilder, } @@ -61,10 +61,10 @@ impl PgCatalogCollationBuilder { collprovider: StringBuilder::new(capacity), collisdeterministic: BooleanBuilder::new(capacity), collencoding: Int32Builder::new(capacity), - colliculocale: StringBuilder::new(capacity), collcollate: StringBuilder::new(capacity), collctype: StringBuilder::new(capacity), colllocale: StringBuilder::new(capacity), + colliculocale: StringBuilder::new(capacity), collicurules: StringBuilder::new(capacity), collversion: StringBuilder::new(capacity), } @@ -81,9 +81,6 @@ impl PgCatalogCollationBuilder { .append_value(coll.collisdeterministic) .unwrap(); self.collencoding.append_value(coll.collencoding).unwrap(); - self.colliculocale - .append_option(coll.colliculocale.clone()) - .unwrap(); self.collcollate .append_option(coll.collcollate.clone()) .unwrap(); @@ -93,6 +90,10 @@ impl PgCatalogCollationBuilder { self.colllocale .append_option(coll.colllocale.clone()) .unwrap(); + // Column `colliculocale` is renamed to `colllocale` since PostgreSQL 17. + self.colliculocale + .append_option(coll.colllocale.clone()) + .unwrap(); self.collicurules .append_option(coll.collicurules.clone()) .unwrap(); @@ -110,10 +111,10 @@ impl PgCatalogCollationBuilder { Arc::new(self.collprovider.finish()), Arc::new(self.collisdeterministic.finish()), Arc::new(self.collencoding.finish()), - Arc::new(self.colliculocale.finish()), Arc::new(self.collcollate.finish()), Arc::new(self.collctype.finish()), Arc::new(self.colllocale.finish()), + Arc::new(self.colliculocale.finish()), Arc::new(self.collicurules.finish()), Arc::new(self.collversion.finish()), ]; @@ -127,11 +128,13 @@ pub struct PgCatalogCollationProvider { impl PgCatalogCollationProvider { pub fn new() -> Self { - // See https://github.com/postgres/postgres/blob/REL_16_4/src/include/catalog/pg_collation.h - let mut builder = PgCatalogCollationBuilder::new(); + // See https://github.com/postgres/postgres/blob/REL_17_6/src/include/catalog/pg_collation.h + let mut builder = PgCatalogCollationBuilder::new(6); // Initial contents of the pg_collation system catalog. - // See https://github.com/postgres/postgres/blob/REL_16_4/src/include/catalog/pg_collation.dat + // See https://github.com/postgres/postgres/blob/REL_17_6/src/include/catalog/pg_collation.dat + + // database's default collation builder.add_collation(&PgCollation { oid: 100, collname: "default", @@ -141,12 +144,12 @@ impl PgCatalogCollationProvider { collisdeterministic: true, collencoding: -1, collcollate: None, - colliculocale: None, collctype: None, colllocale: None, collicurules: None, collversion: None, }); + // standard C collation builder.add_collation(&PgCollation { oid: 950, collname: "C", @@ -156,12 +159,12 @@ impl PgCatalogCollationProvider { collisdeterministic: true, collencoding: -1, collcollate: Some("C".to_string()), - colliculocale: Some("C".to_string()), collctype: Some("C".to_string()), colllocale: None, collicurules: None, collversion: None, }); + // standard POSIX collation builder.add_collation(&PgCollation { oid: 951, collname: "POSIX", @@ -171,27 +174,27 @@ impl PgCatalogCollationProvider { collisdeterministic: true, collencoding: -1, collcollate: Some("POSIX".to_string()), - colliculocale: Some("POSIX".to_string()), collctype: Some("POSIX".to_string()), colllocale: None, collicurules: None, collversion: None, }); + // sorts by Unicode code point, C character semantics builder.add_collation(&PgCollation { oid: 962, - collname: "usc_basic", + collname: "ucs_basic", collnamespace: PG_NAMESPACE_CATALOG_OID, collowner: 10, - collprovider: "c".to_string(), + collprovider: "b".to_string(), collisdeterministic: true, collencoding: 6, - collcollate: Some("C".to_string()), - colliculocale: Some("C".to_string()), - collctype: Some("C".to_string()), - colllocale: None, + collcollate: None, + collctype: None, + colllocale: Some("C".to_string()), collicurules: None, - collversion: None, + collversion: Some("1".to_string()), }); + // sorts using the Unicode Collation Algorithm with default settings builder.add_collation(&PgCollation { oid: 963, collname: "unicode", @@ -201,11 +204,25 @@ impl PgCatalogCollationProvider { collisdeterministic: true, collencoding: -1, collcollate: None, - colliculocale: None, collctype: None, colllocale: Some("und".to_string()), collicurules: None, - collversion: Some("153.121".to_string()), + collversion: Some("153.128".to_string()), + }); + // sorts by Unicode code point; Unicode and POSIX character semantics + builder.add_collation(&PgCollation { + oid: 811, + collname: "pg_c_utf8", + collnamespace: PG_NAMESPACE_CATALOG_OID, + collowner: 10, + collprovider: "b".to_string(), + collisdeterministic: true, + collencoding: 6, + collcollate: None, + collctype: None, + colllocale: Some("C.UTF-8".to_string()), + collicurules: None, + collversion: Some("1".to_string()), }); Self { data: Arc::new(builder.finish()), @@ -227,12 +244,12 @@ impl TableProvider for PgCatalogCollationProvider { Field::new("collprovider", DataType::Utf8, false), Field::new("collisdeterministic", DataType::Boolean, false), Field::new("collencoding", DataType::Int32, false), - Field::new("colliculocale", DataType::Utf8, false), - Field::new("collcollate", DataType::Utf8, false), - Field::new("collctype", DataType::Utf8, false), - Field::new("colllocale", DataType::Utf8, false), - Field::new("collicurules", DataType::Utf8, false), - Field::new("collversion", DataType::Utf8, false), + Field::new("collcollate", DataType::Utf8, true), + Field::new("collctype", DataType::Utf8, true), + Field::new("colllocale", DataType::Utf8, true), + Field::new("colliculocale", DataType::Utf8, true), + Field::new("collicurules", DataType::Utf8, true), + Field::new("collversion", DataType::Utf8, true), ])) } async fn scan( From f3c9283e3a6d28027bb3b73ed461dc8ddb6ab1fe Mon Sep 17 00:00:00 2001 From: giangblackk Date: Thu, 18 Sep 2025 23:37:44 +0700 Subject: [PATCH 9/9] update unit test for pg_collation to postgres version 17 --- rust/cubesql/cubesql/src/compile/mod.rs | 4 ++-- .../cubesql__compile__tests__pg_collation.snap | 13 ------------- ...cubesql__compile__tests__pg_collation_PG17.snap | 14 ++++++++++++++ 3 files changed, 16 insertions(+), 15 deletions(-) delete mode 100644 rust/cubesql/cubesql/src/compile/snapshots/cubesql__compile__tests__pg_collation.snap create mode 100644 rust/cubesql/cubesql/src/compile/snapshots/cubesql__compile__tests__pg_collation_PG17.snap diff --git a/rust/cubesql/cubesql/src/compile/mod.rs b/rust/cubesql/cubesql/src/compile/mod.rs index 942ffb8c4bfef..98b18e4ce9c2d 100644 --- a/rust/cubesql/cubesql/src/compile/mod.rs +++ b/rust/cubesql/cubesql/src/compile/mod.rs @@ -17702,9 +17702,9 @@ LIMIT {{ limit }}{% endif %}"#.to_string(), #[tokio::test] async fn test_pg_collation() -> Result<(), CubeError> { insta::assert_snapshot!( - "pg_collation", + "pg_collation_PG17", execute_query( - "SELECT * FROM pg_catalog.pg_collation".to_string(), + "SELECT * FROM pg_catalog.pg_collation ORDER BY oid".to_string(), DatabaseProtocol::PostgreSQL ) .await? diff --git a/rust/cubesql/cubesql/src/compile/snapshots/cubesql__compile__tests__pg_collation.snap b/rust/cubesql/cubesql/src/compile/snapshots/cubesql__compile__tests__pg_collation.snap deleted file mode 100644 index 1f8f5df1bfef8..0000000000000 --- a/rust/cubesql/cubesql/src/compile/snapshots/cubesql__compile__tests__pg_collation.snap +++ /dev/null @@ -1,13 +0,0 @@ ---- -source: cubesql/src/compile/mod.rs -expression: "execute_query(\"SELECT * FROM pg_catalog.pg_collation\".to_string(),\nDatabaseProtocol::PostgreSQL).await?" ---- -+-----+-----------+---------------+-----------+--------------+---------------------+--------------+---------------+-------------+-----------+------------+--------------+-------------+ -| oid | collname | collnamespace | collowner | collprovider | collisdeterministic | collencoding | colliculocale | collcollate | collctype | colllocale | collicurules | collversion | -+-----+-----------+---------------+-----------+--------------+---------------------+--------------+---------------+-------------+-----------+------------+--------------+-------------+ -| 100 | default | 11 | 10 | d | true | -1 | NULL | NULL | NULL | NULL | NULL | NULL | -| 950 | C | 11 | 10 | c | true | -1 | C | C | C | NULL | NULL | NULL | -| 951 | POSIX | 11 | 10 | c | true | -1 | POSIX | POSIX | POSIX | NULL | NULL | NULL | -| 962 | usc_basic | 11 | 10 | c | true | 6 | C | C | C | NULL | NULL | NULL | -| 963 | unicode | 11 | 10 | i | true | -1 | NULL | NULL | NULL | und | NULL | 153.121 | -+-----+-----------+---------------+-----------+--------------+---------------------+--------------+---------------+-------------+-----------+------------+--------------+-------------+ diff --git a/rust/cubesql/cubesql/src/compile/snapshots/cubesql__compile__tests__pg_collation_PG17.snap b/rust/cubesql/cubesql/src/compile/snapshots/cubesql__compile__tests__pg_collation_PG17.snap new file mode 100644 index 0000000000000..3a732611ce912 --- /dev/null +++ b/rust/cubesql/cubesql/src/compile/snapshots/cubesql__compile__tests__pg_collation_PG17.snap @@ -0,0 +1,14 @@ +--- +source: cubesql/src/compile/mod.rs +expression: "execute_query(\"SELECT * FROM pg_catalog.pg_collation ORDER BY oid\".to_string(),\nDatabaseProtocol::PostgreSQL).await?" +--- ++-----+-----------+---------------+-----------+--------------+---------------------+--------------+-------------+-----------+------------+---------------+--------------+-------------+ +| oid | collname | collnamespace | collowner | collprovider | collisdeterministic | collencoding | collcollate | collctype | colllocale | colliculocale | collicurules | collversion | ++-----+-----------+---------------+-----------+--------------+---------------------+--------------+-------------+-----------+------------+---------------+--------------+-------------+ +| 100 | default | 11 | 10 | d | true | -1 | NULL | NULL | NULL | NULL | NULL | NULL | +| 811 | pg_c_utf8 | 11 | 10 | b | true | 6 | NULL | NULL | C.UTF-8 | C.UTF-8 | NULL | 1 | +| 950 | C | 11 | 10 | c | true | -1 | C | C | NULL | NULL | NULL | NULL | +| 951 | POSIX | 11 | 10 | c | true | -1 | POSIX | POSIX | NULL | NULL | NULL | NULL | +| 962 | ucs_basic | 11 | 10 | b | true | 6 | NULL | NULL | C | C | NULL | 1 | +| 963 | unicode | 11 | 10 | i | true | -1 | NULL | NULL | und | und | NULL | 153.128 | ++-----+-----------+---------------+-----------+--------------+---------------------+--------------+-------------+-----------+------------+---------------+--------------+-------------+