From fe425a418d3902eeb8985aece773e51c57dc8c51 Mon Sep 17 00:00:00 2001 From: umi Date: Mon, 30 Mar 2026 16:15:18 +0800 Subject: [PATCH 01/13] proto --- crates/integration_tests/Cargo.toml | 1 + crates/integration_tests/tests/read_tables.rs | 191 +++++++++++++++++- crates/paimon/Cargo.toml | 8 +- crates/paimon/src/api/api_response.rs | 11 + crates/paimon/src/api/auth/base.rs | 2 +- crates/paimon/src/api/mod.rs | 2 +- crates/paimon/src/api/rest_api.rs | 22 +- crates/paimon/src/catalog/filesystem.rs | 16 +- crates/paimon/src/catalog/mod.rs | 10 + crates/paimon/src/io/file_io.rs | 17 +- crates/paimon/src/io/storage_oss.rs | 13 +- crates/paimon/tests/mock_server.rs | 38 ++++ 12 files changed, 311 insertions(+), 20 deletions(-) diff --git a/crates/integration_tests/Cargo.toml b/crates/integration_tests/Cargo.toml index a4753bf0..f1b7ea59 100644 --- a/crates/integration_tests/Cargo.toml +++ b/crates/integration_tests/Cargo.toml @@ -28,3 +28,4 @@ paimon = { path = "../paimon" } arrow-array = { workspace = true } tokio = { version = "1", features = ["macros", "rt-multi-thread"] } futures = "0.3" +serde_json = "1" diff --git a/crates/integration_tests/tests/read_tables.rs b/crates/integration_tests/tests/read_tables.rs index 568400d3..20ab4926 100644 --- a/crates/integration_tests/tests/read_tables.rs +++ b/crates/integration_tests/tests/read_tables.rs @@ -19,9 +19,16 @@ use arrow_array::{Int32Array, RecordBatch, StringArray}; use futures::TryStreamExt; -use paimon::catalog::Identifier; +use paimon::api::ConfigResponse; +use paimon::catalog::{Identifier, RestCatalog}; +use paimon::common::Options; +use paimon::spec::{DataType, IntType, Schema, VarCharType}; use paimon::{Catalog, Error, FileSystemCatalog, Plan}; -use std::collections::HashSet; +use std::collections::{HashMap, HashSet}; + +#[path = "../../paimon/tests/mock_server.rs"] +mod mock_server; +use mock_server::start_mock_server; fn get_test_warehouse() -> String { std::env::var("PAIMON_TEST_WAREHOUSE").unwrap_or_else(|_| "/tmp/paimon-warehouse".to_string()) @@ -438,3 +445,183 @@ async fn test_read_projection_duplicate_column() { "Expected ConfigInvalid for duplicate projection, got: {err:?}" ); } + +// ======================= REST Catalog read tests =============================== + +/// Build a simple test schema matching the Spark-provisioned tables (id INT, name VARCHAR). +fn simple_log_schema() -> Schema { + Schema::builder() + .column("id", DataType::Int(IntType::new())) + .column( + "name", + DataType::VarChar(VarCharType::string_type()), + ) + .build() + .expect("Failed to build schema") +} + +/// Build a DV-enabled primary key schema (id INT NOT NULL as PK, name VARCHAR). +fn simple_dv_pk_schema() -> Schema { + Schema::builder() + .column("id", DataType::Int(IntType::with_nullable(false))) + .column( + "name", + DataType::VarChar(VarCharType::string_type()), + ) + .primary_key(["id"]) + .option("deletion-vectors.enabled", "true") + .build() + .expect("Failed to build schema") +} + +/// Start a mock REST server backed by Spark-provisioned data on disk, +/// register the given tables, and return a connected `RestCatalog`. +async fn setup_rest_catalog_with_tables( + table_configs: &[(&str, &str, Schema)], +) -> (mock_server::RESTServer, RestCatalog) { + let data_path = get_test_warehouse(); + // Use a simple warehouse name (no slashes) to avoid URL-encoding issues + let warehouse_name = "test_warehouse"; + let prefix = "mock-test"; + let mut defaults = HashMap::new(); + defaults.insert("prefix".to_string(), prefix.to_string()); + let config = ConfigResponse::new(defaults); + + let server = start_mock_server( + warehouse_name.to_string(), + data_path.clone(), + config, + vec!["default".to_string()], + ) + .await; + + // Register each table with its schema and the real on-disk path + for (database, table_name, schema) in table_configs { + let table_path = format!("{}/{}.db/{}", data_path, database, table_name); + server.add_table_with_schema(database, table_name, schema.clone(), &table_path); + } + + let url = server.url().expect("Failed to get server URL"); + let mut options = Options::new(); + options.set("uri", &url); + options.set("warehouse", warehouse_name); + options.set("token.provider", "bear"); + options.set("token", "test_token"); + + let catalog = RestCatalog::new(options, true) + .await + .expect("Failed to create RestCatalog"); + + (server, catalog) +} + +/// Test reading an append-only (log) table via REST catalog backed by mock server. +/// +/// The mock server returns table metadata pointing to Spark-provisioned data on disk. +#[tokio::test] +async fn test_rest_catalog_read_append_table() { + let table_name = "simple_log_table"; + let (_server, catalog) = setup_rest_catalog_with_tables(&[( + "default", + table_name, + simple_log_schema(), + )]) + .await; + + let identifier = Identifier::new("default", table_name); + let table = catalog + .get_table(&identifier) + .await + .expect("Failed to get table from REST catalog"); + + let read_builder = table.new_read_builder(); + let scan = read_builder.new_scan(); + let plan = scan.plan().await.expect("Failed to plan scan"); + + assert!( + !plan.splits().is_empty(), + "REST append table should have at least one split" + ); + + let read = read_builder.new_read().expect("Failed to create read"); + let stream = read + .to_arrow(plan.splits()) + .expect("Failed to create arrow stream"); + let batches: Vec<_> = stream + .try_collect() + .await + .expect("Failed to collect batches"); + + assert!( + !batches.is_empty(), + "REST append table should produce at least one batch" + ); + + let actual = extract_id_name(&batches); + let expected = vec![ + (1, "alice".to_string()), + (2, "bob".to_string()), + (3, "carol".to_string()), + ]; + assert_eq!( + actual, expected, + "REST catalog append table rows should match expected values" + ); +} + +/// Test reading a primary-key table with deletion vectors via REST catalog backed by mock server. +/// +/// The mock server returns table metadata pointing to Spark-provisioned data on disk. +#[tokio::test] +async fn test_rest_catalog_read_pk_table() { + let table_name = "simple_dv_pk_table"; + let (_server, catalog) = setup_rest_catalog_with_tables(&[( + "default", + table_name, + simple_dv_pk_schema(), + )]) + .await; + + let identifier = Identifier::new("default", table_name); + let table = catalog + .get_table(&identifier) + .await + .expect("Failed to get table from REST catalog"); + + let read_builder = table.new_read_builder(); + let scan = read_builder.new_scan(); + let plan = scan.plan().await.expect("Failed to plan scan"); + + assert!( + !plan.splits().is_empty(), + "REST PK table should have at least one split" + ); + + let read = read_builder.new_read().expect("Failed to create read"); + let stream = read + .to_arrow(plan.splits()) + .expect("Failed to create arrow stream"); + let batches: Vec<_> = stream + .try_collect() + .await + .expect("Failed to collect batches"); + + assert!( + !batches.is_empty(), + "REST PK table should produce at least one batch" + ); + + let actual = extract_id_name(&batches); + let expected = vec![ + (1, "alice-v2".to_string()), + (2, "bob-v2".to_string()), + (3, "carol-v2".to_string()), + (4, "dave-v2".to_string()), + (5, "eve-v2".to_string()), + (6, "frank-v1".to_string()), + ]; + assert_eq!( + actual, expected, + "REST catalog DV-enabled PK table should only expose the latest row per key" + ); +} diff --git a/crates/paimon/Cargo.toml b/crates/paimon/Cargo.toml index 8ea6f9e8..2597a82a 100644 --- a/crates/paimon/Cargo.toml +++ b/crates/paimon/Cargo.toml @@ -27,7 +27,7 @@ license.workspace = true version.workspace = true [features] -default = ["storage-memory", "storage-fs"] +default = ["storage-memory", "storage-fs", "storage-oss"] storage-all = ["storage-memory", "storage-fs", "storage-oss", "storage-s3"] storage-memory = ["opendal/services-memory"] @@ -49,7 +49,7 @@ serde_with = "3.9.0" serde_repr = "0.1" snafu = "0.8.3" typed-builder = "^0.19" -opendal = { version = "0.49", features = ["services-fs"] } +opendal = { version = "0.55", features = ["services-fs"] } pretty_assertions = "1" apache-avro = { version = "0.17", features = ["snappy", "zstandard"] } indexmap = "2.5.0" @@ -75,3 +75,7 @@ axum = { version = "0.7", features = ["macros", "tokio", "http1", "http2"] } rand = "0.8.5" serde_avro_fast = { version = "1.1.2", features = ["snappy"] } tempfile = "3" + +[[example]] +name = "rest_catalog_example" +path = "examples/rest_catalog_example.rs" diff --git a/crates/paimon/src/api/api_response.rs b/crates/paimon/src/api/api_response.rs index e282080e..b6e9f696 100644 --- a/crates/paimon/src/api/api_response.rs +++ b/crates/paimon/src/api/api_response.rs @@ -277,6 +277,17 @@ impl PagedList { } } } + +/// Response for getting table token. +#[derive(Debug, Clone, Serialize, Deserialize)] +#[serde(rename_all = "camelCase")] +pub struct GetTableTokenResponse { + /// Token key-value pairs (e.g. access_key_id, access_key_secret, etc.) + pub token: HashMap, + /// Token expiration time in milliseconds since epoch. + pub expires_at_millis: Option, +} + #[cfg(test)] mod tests { use super::*; diff --git a/crates/paimon/src/api/auth/base.rs b/crates/paimon/src/api/auth/base.rs index 7b0ae337..982f63a0 100644 --- a/crates/paimon/src/api/auth/base.rs +++ b/crates/paimon/src/api/auth/base.rs @@ -75,7 +75,7 @@ impl RESTAuthParameter { /// Implement this trait to provide custom authentication mechanisms /// for REST API requests. #[async_trait] -pub trait AuthProvider: Send { +pub trait AuthProvider: Send + Sync { /// Merge authentication headers into the base headers. /// /// # Arguments diff --git a/crates/paimon/src/api/mod.rs b/crates/paimon/src/api/mod.rs index 958323e8..6307cf87 100644 --- a/crates/paimon/src/api/mod.rs +++ b/crates/paimon/src/api/mod.rs @@ -37,7 +37,7 @@ pub use api_request::{ // Re-export response types pub use api_response::{ AuditRESTResponse, ConfigResponse, ErrorResponse, GetDatabaseResponse, GetTableResponse, - ListDatabasesResponse, ListTablesResponse, PagedList, + GetTableTokenResponse, ListDatabasesResponse, ListTablesResponse, PagedList, }; // Re-export error types diff --git a/crates/paimon/src/api/rest_api.rs b/crates/paimon/src/api/rest_api.rs index be09e9a4..c8dc3c13 100644 --- a/crates/paimon/src/api/rest_api.rs +++ b/crates/paimon/src/api/rest_api.rs @@ -77,7 +77,6 @@ fn validate_non_empty_multi(values: &[(&str, &str)]) -> Result<()> { pub struct RESTApi { client: HttpClient, resource_paths: ResourcePaths, - #[allow(dead_code)] options: Options, } @@ -168,6 +167,11 @@ impl RESTApi { }) } + /// Get the options (potentially merged with server config). + pub fn options(&self) -> &Options { + &self.options + } + // ==================== Database Operations ==================== /// List all databases. @@ -375,4 +379,20 @@ impl RESTApi { let _resp: serde_json::Value = self.client.delete(&path, None::<&[(&str, &str)]>).await?; Ok(()) } + + // ==================== Token Operations ==================== + + /// Load table token for data access. + /// + /// Corresponds to Python `RESTApi.load_table_token`. + pub async fn load_table_token( + &mut self, + identifier: &Identifier, + ) -> Result { + let database = identifier.database(); + let table = identifier.object(); + validate_non_empty_multi(&[(database, "database name"), (table, "table name")])?; + let path = self.resource_paths.table_token(database, table); + self.client.get(&path, None::<&[(&str, &str)]>).await + } } diff --git a/crates/paimon/src/catalog/filesystem.rs b/crates/paimon/src/catalog/filesystem.rs index 2685c660..3dc9a393 100644 --- a/crates/paimon/src/catalog/filesystem.rs +++ b/crates/paimon/src/catalog/filesystem.rs @@ -21,7 +21,7 @@ use std::collections::HashMap; -use crate::catalog::{Catalog, Identifier, DB_LOCATION_PROP, DB_SUFFIX}; +use crate::catalog::{Catalog, DB_LOCATION_PROP, DB_SUFFIX, Database, Identifier}; use crate::error::{Error, Result}; use crate::io::FileIO; use crate::spec::{Schema, TableSchema}; @@ -237,6 +237,20 @@ impl Catalog for FileSystemCatalog { Ok(()) } + async fn get_database(&self, name: &str) -> Result { + if !self.database_exists(name).await? { + return Err(Error::DatabaseNotExist { + database: name.to_string(), + }); + } + + Ok(Database::new( + name.to_string(), + HashMap::new(), + None, + )) + } + async fn drop_database( &self, name: &str, diff --git a/crates/paimon/src/catalog/mod.rs b/crates/paimon/src/catalog/mod.rs index 4b43ffa6..1b4bf574 100644 --- a/crates/paimon/src/catalog/mod.rs +++ b/crates/paimon/src/catalog/mod.rs @@ -20,12 +20,16 @@ //! Design aligns with [Paimon Java Catalog](https://github.com/apache/paimon/blob/master/paimon-core/src/main/java/org/apache/paimon/catalog/Catalog.java) //! and follows API patterns from Apache Iceberg Rust. +mod database; mod filesystem; +mod rest; use std::collections::HashMap; use std::fmt; +pub use database::*; pub use filesystem::*; +pub use rest::*; use serde::{Deserialize, Serialize}; /// Splitter for system table names (e.g. `table$snapshots`). @@ -146,6 +150,12 @@ pub trait Catalog: Send + Sync { properties: HashMap, ) -> Result<()>; + /// Get a database by name. + /// + /// # Errors + /// * [`crate::Error::DatabaseNotExist`] - database does not exist. + async fn get_database(&self, name: &str) -> Result; + /// Drop a database. /// /// * `ignore_if_not_exists` - if true, do nothing when the database does not exist. diff --git a/crates/paimon/src/io/file_io.rs b/crates/paimon/src/io/file_io.rs index 3b0a4d65..16b53784 100644 --- a/crates/paimon/src/io/file_io.rs +++ b/crates/paimon/src/io/file_io.rs @@ -21,9 +21,9 @@ use std::ops::Range; use std::sync::Arc; use bytes::Bytes; -use chrono::{DateTime, Utc}; use opendal::raw::normalize_root; -use opendal::{Metakey, Operator}; +use opendal::raw::Timestamp; +use opendal::Operator; use snafu::ResultExt; use url::Url; @@ -122,10 +122,8 @@ impl FileIO { // use normalize_root to make sure it end with `/`. let list_path = normalize_root(relative_path); - // Request ContentLength and LastModified so accessing meta.content_length() / last_modified() let entries = op .list_with(&list_path) - .metakey(Metakey::ContentLength | Metakey::LastModified) .await .context(IoUnexpectedSnafu { message: format!("Failed to list files in '{path}'"), @@ -152,7 +150,7 @@ impl FileIO { pub async fn exists(&self, path: &str) -> Result { let (op, relative_path) = self.storage.create(path)?; - op.is_exist(relative_path).await.context(IoUnexpectedSnafu { + op.exists(relative_path).await.context(IoUnexpectedSnafu { message: format!("Failed to check existence of '{path}'"), }) } @@ -285,7 +283,8 @@ impl FileWrite for opendal::Writer { } async fn close(&mut self) -> crate::Result<()> { - Ok(opendal::Writer::close(self).await?) + opendal::Writer::close(self).await?; + Ok(()) } } @@ -294,7 +293,7 @@ pub struct FileStatus { pub size: u64, pub is_dir: bool, pub path: String, - pub last_modified: Option>, + pub last_modified: Option, } #[derive(Debug)] @@ -312,7 +311,7 @@ impl InputFile { pub async fn exists(&self) -> crate::Result { Ok(self .op - .is_exist(&self.path[self.relative_path_pos..]) + .exists(&self.path[self.relative_path_pos..]) .await?) } @@ -355,7 +354,7 @@ impl OutputFile { pub async fn exists(&self) -> crate::Result { Ok(self .op - .is_exist(&self.path[self.relative_path_pos..]) + .exists(&self.path[self.relative_path_pos..]) .await?) } diff --git a/crates/paimon/src/io/storage_oss.rs b/crates/paimon/src/io/storage_oss.rs index 884b0a73..332a6347 100644 --- a/crates/paimon/src/io/storage_oss.rs +++ b/crates/paimon/src/io/storage_oss.rs @@ -39,11 +39,17 @@ const OSS_ACCESS_KEY_ID: &str = "fs.oss.accessKeyId"; /// Compatible with paimon-java's `fs.oss.accessKeySecret`. const OSS_ACCESS_KEY_SECRET: &str = "fs.oss.accessKeySecret"; +/// Configuration key for OSS STS security token (optional). +/// +/// Compatible with paimon-java's `fs.oss.securityToken`. +/// Required when using STS temporary credentials (e.g. from REST data tokens). +const OSS_SECURITY_TOKEN: &str = "fs.oss.securityToken"; + /// Parse paimon catalog options into an [`OssConfig`]. /// -/// Extracts OSS-related configuration keys (endpoint, access key, secret key) -/// from the provided properties map and maps them to the corresponding -/// [`OssConfig`] fields. +/// Extracts OSS-related configuration keys (endpoint, access key, secret key, +/// and optional security token) from the provided properties map and maps them +/// to the corresponding [`OssConfig`] fields. /// /// Returns an error if any required configuration key is missing. pub(crate) fn oss_config_parse(mut props: HashMap) -> Result { @@ -73,6 +79,7 @@ pub(crate) fn oss_config_parse(mut props: HashMap) -> Result Date: Mon, 30 Mar 2026 16:15:33 +0800 Subject: [PATCH 02/13] add --- .../paimon/examples/rest_catalog_example.rs | 188 ++++++++ .../rest_catalog_read_append_example.rs | 235 +++++++++ crates/paimon/src/catalog/database.rs | 48 ++ crates/paimon/src/catalog/rest/mod.rs | 29 ++ .../paimon/src/catalog/rest/rest_catalog.rs | 390 +++++++++++++++ crates/paimon/src/catalog/rest/rest_token.rs | 62 +++ .../src/catalog/rest/rest_token_file_io.rs | 203 ++++++++ crates/paimon/tests/rest_catalog_test.rs | 455 ++++++++++++++++++ 8 files changed, 1610 insertions(+) create mode 100644 crates/paimon/examples/rest_catalog_example.rs create mode 100644 crates/paimon/examples/rest_catalog_read_append_example.rs create mode 100644 crates/paimon/src/catalog/database.rs create mode 100644 crates/paimon/src/catalog/rest/mod.rs create mode 100644 crates/paimon/src/catalog/rest/rest_catalog.rs create mode 100644 crates/paimon/src/catalog/rest/rest_token.rs create mode 100644 crates/paimon/src/catalog/rest/rest_token_file_io.rs create mode 100644 crates/paimon/tests/rest_catalog_test.rs diff --git a/crates/paimon/examples/rest_catalog_example.rs b/crates/paimon/examples/rest_catalog_example.rs new file mode 100644 index 00000000..4696e66a --- /dev/null +++ b/crates/paimon/examples/rest_catalog_example.rs @@ -0,0 +1,188 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +//! Example: REST Catalog Operations +//! +//! This example demonstrates how to use `RestCatalog` for database and table operations +//! via the Paimon REST catalog API. +//! +//! # Usage +//! ```bash +//! # With DLF authentication: +//! DLF_ACCESS_KEY_ID=xxx DLF_ACCESS_KEY_SECRET=yyy \ +//! cargo run -p paimon --example rest_catalog_example +//! +//! # With Bearer token authentication: +//! PAIMON_REST_TOKEN=zzz \ +//! cargo run -p paimon --example rest_catalog_example +//! ``` + +use std::collections::HashMap; + +use paimon::catalog::{Catalog, Identifier, RestCatalog}; +use paimon::common::{CatalogOptions, Options}; +use paimon::spec::{DataType, IntType, Schema, VarCharType}; + +/// Create a simple test schema with `id` (INT) and `name` (VARCHAR) columns. +fn create_test_schema() -> Schema { + Schema::builder() + .column("id", DataType::Int(IntType::new())) + .column("name", DataType::VarChar(VarCharType::new(255).unwrap())) + .build() + .expect("Failed to build schema") +} + +#[tokio::main] +async fn main() { + // ==================== Configuration ==================== + let mut options = Options::new(); + + // Basic configuration — replace with your actual server URL and warehouse + options.set(CatalogOptions::METASTORE, "rest"); + options.set(CatalogOptions::WAREHOUSE, "pypaimon_catalog"); + options.set( + CatalogOptions::URI, + "http://sample.net/", + ); + + // --- Authentication (choose one) --- + + // Option A: DLF authentication (Alibaba Cloud) + options.set(CatalogOptions::TOKEN_PROVIDER, "dlf"); + options.set("dlf.region", "cn-hangzhou"); + options.set( + "dlf.access-key-id", + std::env::var("DLF_ACCESS_KEY_ID").expect("DLF_ACCESS_KEY_ID env var not set"), + ); + options.set( + "dlf.access-key-secret", + std::env::var("DLF_ACCESS_KEY_SECRET").expect("DLF_ACCESS_KEY_SECRET env var not set"), + ); + + // Option B: Bearer token authentication (uncomment to use) + // options.set(CatalogOptions::TOKEN_PROVIDER, "bearer"); + // options.set("token", std::env::var("PAIMON_REST_TOKEN") + // .expect("PAIMON_REST_TOKEN env var not set")); + + // ==================== Create RestCatalog ==================== + println!("Creating RestCatalog instance..."); + let catalog = match RestCatalog::new(options, true).await { + Ok(catalog) => catalog, + Err(err) => { + eprintln!("Failed to create RestCatalog: {}", err); + return; + } + }; + + // ==================== Database Operations ==================== + println!("\n=== Database Operations ===\n"); + + // List databases + println!("Listing databases..."); + match catalog.list_databases().await { + Ok(databases) => { + println!("Databases found: {:?}", databases); + println!("Total count: {}", databases.len()); + } + Err(err) => { + eprintln!("Failed to list databases: {}", err); + } + } + + // Create database + println!("\nCreating database 'example_db'..."); + match catalog + .create_database("example_db", false, HashMap::new()) + .await + { + Ok(()) => println!("Database created successfully"), + Err(err) => eprintln!("Failed to create database: {}", err), + } + + // Get database info + println!("\nGetting database info for 'example_db'..."); + match catalog.get_database("example_db").await { + Ok(database) => println!("Database: {:?}", database), + Err(err) => eprintln!("Failed to get database: {}", err), + } + + // ==================== Table Operations ==================== + println!("\n=== Table Operations ===\n"); + + // Create table + let table_identifier = Identifier::new("example_db", "users"); + println!("Creating table '{}'...", table_identifier); + let schema = create_test_schema(); + match catalog + .create_table(&table_identifier, schema, false) + .await + { + Ok(()) => println!("Table created successfully"), + Err(err) => eprintln!("Failed to create table: {}", err), + } + + // List tables + println!("\nListing tables in 'example_db'..."); + match catalog.list_tables("example_db").await { + Ok(tables) => { + println!("Tables found: {:?}", tables); + } + Err(err) => { + eprintln!("Failed to list tables: {}", err); + } + } + + // Get table info + println!("\nGetting table info for '{}'...", table_identifier); + match catalog.get_table(&table_identifier).await { + Ok(table) => println!("Table: {:?}", table), + Err(err) => eprintln!("Failed to get table: {}", err), + } + + // Rename table + let renamed_identifier = Identifier::new("example_db", "users_renamed"); + println!( + "\nRenaming table '{}' to '{}'...", + table_identifier, renamed_identifier + ); + match catalog + .rename_table(&table_identifier, &renamed_identifier, false) + .await + { + Ok(()) => println!("Table renamed successfully"), + Err(err) => eprintln!("Failed to rename table: {}", err), + } + + // Drop table + println!("\nDropping table '{}'...", renamed_identifier); + match catalog.drop_table(&renamed_identifier, false).await { + Ok(()) => println!("Table dropped successfully"), + Err(err) => eprintln!("Failed to drop table: {}", err), + } + + // ==================== Cleanup ==================== + println!("\n=== Cleanup ===\n"); + + // Drop database (cascade = true to force drop even if not empty) + println!("Dropping database 'example_db'..."); + match catalog.drop_database("example_db", false, true).await { + Ok(()) => println!("Database dropped successfully"), + Err(err) => eprintln!("Failed to drop database: {}", err), + } + + println!("\nExample completed!"); +} diff --git a/crates/paimon/examples/rest_catalog_read_append_example.rs b/crates/paimon/examples/rest_catalog_read_append_example.rs new file mode 100644 index 00000000..e3734817 --- /dev/null +++ b/crates/paimon/examples/rest_catalog_read_append_example.rs @@ -0,0 +1,235 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +//! Example: REST Catalog — Read Append Table Data +//! +//! This example demonstrates how to use `RestCatalog` to read data from an +//! append-only table with the following schema: +//! +//! | Column | Type | +//! |----------|---------| +//! | user_id | INT | +//! | item_id | BIGINT | +//! | behavior | VARCHAR | +//! | dt | VARCHAR | +//! +//! Partition key: `dt` +//! +//! The table `default.test_t` must already exist and contain data on the +//! REST catalog server. +//! +//! # Usage +//! ```bash +//! # With DLF authentication: +//! DLF_ACCESS_KEY_ID=xxx DLF_ACCESS_KEY_SECRET=yyy \ +//! cargo run -p paimon --example rest_catalog_read_append_example +//! +//! # With Bearer token authentication: +//! PAIMON_REST_TOKEN=zzz \ +//! cargo run -p paimon --example rest_catalog_read_append_example +//! ``` + +use futures::TryStreamExt; + +use paimon::catalog::{Catalog, Identifier, RestCatalog}; +use paimon::common::{CatalogOptions, Options}; + +#[tokio::main] +async fn main() { + // ==================== Configuration ==================== + let mut options = Options::new(); + + // Basic configuration — replace with your actual server URL and warehouse + options.set(CatalogOptions::METASTORE, "rest"); + options.set(CatalogOptions::WAREHOUSE, "pypaimon_catalog"); + options.set( + CatalogOptions::URI, + "http://sample.net/", + ); + + // --- Authentication (choose one) --- + + // Option A: DLF authentication (Alibaba Cloud) + options.set(CatalogOptions::TOKEN_PROVIDER, "dlf"); + options.set("dlf.region", "cn-hangzhou"); + options.set( + "dlf.access-key-id", + std::env::var("DLF_ACCESS_KEY_ID").expect("DLF_ACCESS_KEY_ID env var not set"), + ); + options.set( + "dlf.access-key-secret", + std::env::var("DLF_ACCESS_KEY_SECRET").expect("DLF_ACCESS_KEY_SECRET env var not set"), + ); + + // Option B: Bearer token authentication (uncomment to use) + // options.set(CatalogOptions::TOKEN_PROVIDER, "bearer"); + // options.set("token", std::env::var("PAIMON_REST_TOKEN") + // .expect("PAIMON_REST_TOKEN env var not set")); + + // ==================== Create RestCatalog ==================== + println!("Creating RestCatalog instance..."); + let catalog = match RestCatalog::new(options, true).await { + Ok(catalog) => catalog, + Err(err) => { + eprintln!("Failed to create RestCatalog: {}", err); + return; + } + }; + + // ==================== Get Table ==================== + let table_identifier = Identifier::new("default", "test_t"); + println!("Getting table '{}'...", table_identifier); + + let table = match catalog.get_table(&table_identifier).await { + Ok(table) => { + println!("Table retrieved successfully"); + println!(" Location: {}", table.location()); + println!(" Schema fields: {:?}", table.schema().fields()); + table + } + Err(err) => { + eprintln!("Failed to get table: {}", err); + return; + } + }; + + // ==================== Scan Table ==================== + println!("\n=== Scanning Table ===\n"); + + let read_builder = table.new_read_builder(); + let scan = read_builder.new_scan(); + + let plan = match scan.plan().await { + Ok(plan) => { + println!("Scan plan created successfully"); + println!(" Number of splits: {}", plan.splits().len()); + plan + } + Err(err) => { + eprintln!("Failed to plan scan: {}", err); + return; + } + }; + + if plan.splits().is_empty() { + println!("No data splits found — the table may be empty."); + return; + } + + // ==================== Read Table Data ==================== + println!("\n=== Reading Table Data ===\n"); + + let read = match read_builder.new_read() { + Ok(read) => read, + Err(err) => { + eprintln!("Failed to create table read: {}", err); + return; + } + }; + + let stream = match read.to_arrow(plan.splits()) { + Ok(stream) => stream, + Err(err) => { + eprintln!("Failed to create arrow stream: {}", err); + return; + } + }; + + let batches: Vec<_> = match stream.try_collect().await { + Ok(batches) => batches, + Err(err) => { + eprintln!("Failed to collect record batches: {}", err); + return; + } + }; + + println!("Collected {} record batch(es)", batches.len()); + + let mut total_rows = 0; + for (batch_index, batch) in batches.iter().enumerate() { + let num_rows = batch.num_rows(); + total_rows += num_rows; + println!( + "\n--- Batch {} ({} rows, {} columns) ---", + batch_index, + num_rows, + batch.num_columns() + ); + println!("Schema: {}", batch.schema()); + + // Print up to 20 rows per batch for readability + let display_rows = num_rows.min(20); + for row in 0..display_rows { + let mut row_values = Vec::new(); + for col in 0..batch.num_columns() { + let column = batch.column(col); + row_values.push(array_value_to_string(column, row)); + } + println!(" Row {}: [{}]", row, row_values.join(", ")); + } + if num_rows > display_rows { + println!(" ... ({} more rows omitted)", num_rows - display_rows); + } + } + + println!("\n=== Summary ==="); + println!("Total rows read: {}", total_rows); + println!("Total batches: {}", batches.len()); + println!("\nExample completed!"); +} + +/// Format a single cell value from an Arrow array at the given row index. +fn array_value_to_string(array: &dyn arrow_array::Array, row: usize) -> String { + use arrow_array::*; + + if array.is_null(row) { + return "null".to_string(); + } + + if let Some(arr) = array.as_any().downcast_ref::() { + return arr.value(row).to_string(); + } + if let Some(arr) = array.as_any().downcast_ref::() { + return arr.value(row).to_string(); + } + if let Some(arr) = array.as_any().downcast_ref::() { + return arr.value(row).to_string(); + } + if let Some(arr) = array.as_any().downcast_ref::() { + return arr.value(row).to_string(); + } + if let Some(arr) = array.as_any().downcast_ref::() { + return arr.value(row).to_string(); + } + if let Some(arr) = array.as_any().downcast_ref::() { + return arr.value(row).to_string(); + } + if let Some(arr) = array.as_any().downcast_ref::() { + return arr.value(row).to_string(); + } + if let Some(arr) = array.as_any().downcast_ref::() { + return arr.value(row).to_string(); + } + if let Some(arr) = array.as_any().downcast_ref::() { + return arr.value(row).to_string(); + } + if let Some(arr) = array.as_any().downcast_ref::() { + return format!("{:?}", arr.value(row)); + } + + format!("", array.data_type()) +} diff --git a/crates/paimon/src/catalog/database.rs b/crates/paimon/src/catalog/database.rs new file mode 100644 index 00000000..594c0ac5 --- /dev/null +++ b/crates/paimon/src/catalog/database.rs @@ -0,0 +1,48 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +//! Database structure for Apache Paimon catalogs. + +use std::collections::HashMap; + +/// Structure representing a database in a Paimon catalog. +/// +/// Corresponds to Python `Database` in `pypaimon/catalog/rest/rest_catalog.py`. +#[derive(Debug, Clone)] +pub struct Database { + /// Database name. + pub name: String, + /// Database options/properties. + pub options: HashMap, + /// Optional comment describing the database. + pub comment: Option, +} + +impl Database { + /// Create a new Database. + pub fn new( + name: String, + options: HashMap, + comment: Option, + ) -> Self { + Self { + name, + options, + comment, + } + } +} diff --git a/crates/paimon/src/catalog/rest/mod.rs b/crates/paimon/src/catalog/rest/mod.rs new file mode 100644 index 00000000..921e6290 --- /dev/null +++ b/crates/paimon/src/catalog/rest/mod.rs @@ -0,0 +1,29 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +//! REST catalog implementation for Apache Paimon. +//! +//! This module provides a REST-based catalog that communicates with +//! a Paimon REST catalog server for metadata operations. + +mod rest_catalog; +mod rest_token; +mod rest_token_file_io; + +pub use rest_catalog::*; +pub use rest_token::*; +pub use rest_token_file_io::*; diff --git a/crates/paimon/src/catalog/rest/rest_catalog.rs b/crates/paimon/src/catalog/rest/rest_catalog.rs new file mode 100644 index 00000000..da31949d --- /dev/null +++ b/crates/paimon/src/catalog/rest/rest_catalog.rs @@ -0,0 +1,390 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +//! REST catalog implementation for Apache Paimon. +//! +//! This module provides a REST-based catalog that communicates with +//! a Paimon REST catalog server for database and table CRUD operations. +//! +//! Reference: Python `RESTCatalog` in `pypaimon/catalog/rest/rest_catalog.py`. + +use std::collections::HashMap; + +use async_trait::async_trait; +use tokio::sync::Mutex; + +use crate::api::rest_api::RESTApi; +use crate::api::rest_error::RestError; +use crate::api::PagedList; +use crate::catalog::{Catalog, Database, Identifier, DB_LOCATION_PROP}; +use crate::common::{CatalogOptions, Options}; +use crate::error::Error; +use crate::io::FileIO; +use crate::spec::{Schema, SchemaChange, TableSchema}; +use crate::table::Table; +use crate::Result; + +use super::rest_token_file_io::RESTTokenFileIO; + +/// REST catalog implementation. +/// +/// This catalog communicates with a Paimon REST catalog server +/// for all metadata operations (database and table CRUD). +/// +/// Corresponds to Python `RESTCatalog` in `pypaimon/catalog/rest/rest_catalog.py`. +pub struct RestCatalog { + /// The REST API client, wrapped in a Mutex because `RESTApi` methods + /// require `&mut self` while `Catalog` trait methods take `&self`. + api: Mutex, + /// Catalog configuration options. + options: Options, + /// Warehouse path. + warehouse: String, + /// Whether data token is enabled for FileIO construction. + data_token_enabled: bool, +} + +impl RestCatalog { + /// Create a new REST catalog. + /// + /// # Arguments + /// * `options` - Configuration options containing URI, warehouse, etc. + /// * `config_required` - Whether to fetch config from server and merge with options. + /// + /// # Errors + /// Returns an error if required options are missing or if initialization fails. + pub async fn new(options: Options, config_required: bool) -> Result { + let warehouse = options + .get(CatalogOptions::WAREHOUSE) + .cloned() + .unwrap_or_default(); + + let api = RESTApi::new(options.clone(), config_required).await?; + + let data_token_enabled = api + .options() + .get(CatalogOptions::DATA_TOKEN_ENABLED) + .map(|v| v.eq_ignore_ascii_case("true")) + .unwrap_or(false); + + let api_options = api.options().clone(); + + Ok(Self { + api: Mutex::new(api), + options: api_options, + warehouse, + data_token_enabled, + }) + } + + /// Get the warehouse path. + pub fn warehouse(&self) -> &str { + &self.warehouse + } + + /// Get the catalog options. + pub fn options(&self) -> &Options { + &self.options + } + + /// Whether data token is enabled. + pub fn data_token_enabled(&self) -> bool { + self.data_token_enabled + } + + /// List databases with pagination. + /// + /// Corresponds to Python `RESTCatalog.list_databases_paged`. + pub async fn list_databases_paged( + &self, + max_results: Option, + page_token: Option<&str>, + database_name_pattern: Option<&str>, + ) -> Result> { + let mut api = self.api.lock().await; + api.list_databases_paged(max_results, page_token, database_name_pattern) + .await + } + +} + +// ============================================================================ +// Error mapping helpers +// ============================================================================ + +/// Map a REST API error to a catalog-level database error. +/// +/// Converts `RestError::NoSuchResource` -> `Error::DatabaseNotExist`, +/// `RestError::AlreadyExists` -> `Error::DatabaseAlreadyExist`, +/// and passes through other errors via `Error::RestApi`. +fn map_rest_error_for_database(err: Error, database_name: &str) -> Error { + match &err { + Error::RestApi { source } => match source { + RestError::NoSuchResource { .. } => Error::DatabaseNotExist { + database: database_name.to_string(), + }, + RestError::AlreadyExists { .. } => Error::DatabaseAlreadyExist { + database: database_name.to_string(), + }, + _ => err, + }, + _ => err, + } +} + +/// Map a REST API error to a catalog-level table error. +/// +/// Converts `RestError::NoSuchResource` -> `Error::TableNotExist`, +/// `RestError::AlreadyExists` -> `Error::TableAlreadyExist`, +/// and passes through other errors via `Error::RestApi`. +fn map_rest_error_for_table(err: Error, identifier: &Identifier) -> Error { + match &err { + Error::RestApi { source } => match source { + RestError::NoSuchResource { .. } => Error::TableNotExist { + full_name: identifier.full_name(), + }, + RestError::AlreadyExists { .. } => Error::TableAlreadyExist { + full_name: identifier.full_name(), + }, + _ => err, + }, + _ => err, + } +} + +// ============================================================================ +// Catalog trait implementation +// ============================================================================ + +#[async_trait] +impl Catalog for RestCatalog { + // ======================= database methods =============================== + + async fn list_databases(&self) -> Result> { + let mut api = self.api.lock().await; + api.list_databases().await + } + + async fn create_database( + &self, + name: &str, + ignore_if_exists: bool, + properties: HashMap, + ) -> Result<()> { + let mut api = self.api.lock().await; + let options = if properties.is_empty() { + None + } else { + Some(properties) + }; + match api.create_database(name, options).await { + Ok(()) => Ok(()), + Err(err) => { + let mapped = map_rest_error_for_database(err, name); + match &mapped { + Error::DatabaseAlreadyExist { .. } if ignore_if_exists => Ok(()), + _ => Err(mapped), + } + } + } + } + + async fn get_database(&self, name: &str) -> Result { + let mut api = self.api.lock().await; + let response = api + .get_database(name) + .await + .map_err(|e| map_rest_error_for_database(e, name))?; + + let mut options = response.options; + if let Some(location) = response.location { + options.insert(DB_LOCATION_PROP.to_string(), location); + } + + Ok(Database::new( + name.to_string(), + options, + None, + )) + } + + async fn drop_database( + &self, + name: &str, + ignore_if_not_exists: bool, + cascade: bool, + ) -> Result<()> { + let mut api = self.api.lock().await; + + // If not cascade, check if database is empty first + if !cascade { + match api.list_tables(name).await { + Ok(tables) => { + if !tables.is_empty() { + return Err(Error::DatabaseNotEmpty { + database: name.to_string(), + }); + } + } + Err(err) => { + let mapped = map_rest_error_for_database(err, name); + match &mapped { + Error::DatabaseNotExist { .. } if ignore_if_not_exists => return Ok(()), + _ => return Err(mapped), + } + } + } + } + + match api.drop_database(name).await { + Ok(()) => Ok(()), + Err(err) => { + let mapped = map_rest_error_for_database(err, name); + match &mapped { + Error::DatabaseNotExist { .. } if ignore_if_not_exists => Ok(()), + _ => Err(mapped), + } + } + } + } + + // ======================= table methods =============================== + + async fn get_table(&self, identifier: &Identifier) -> Result { + let mut api = self.api.lock().await; + let response = api + .get_table(identifier) + .await + .map_err(|e| map_rest_error_for_table(e, identifier))?; + + // Extract schema from response + let schema = response.schema.ok_or_else(|| Error::DataInvalid { + message: format!( + "Table {} response missing schema", + identifier.full_name() + ), + source: None, + })?; + + let schema_id = response.schema_id.unwrap_or(0); + let table_schema = TableSchema::new(schema_id, &schema); + + // Extract table path from response + let table_path = response.path.ok_or_else(|| Error::DataInvalid { + message: format!("Table {} response missing path", identifier.full_name()), + source: None, + })?; + + // Check if the table is external + let is_external = response.is_external.unwrap_or(false); + + // Drop the API lock before async FileIO operations + drop(api); + + // Build FileIO based on data_token_enabled and is_external + let file_io = if self.data_token_enabled && !is_external { + // Use RESTTokenFileIO to get token-based FileIO + let token_file_io = RESTTokenFileIO::new( + identifier.clone(), + table_path.clone(), + self.options.clone(), + ); + token_file_io.build_file_io().await? + } else { + // Use standard FileIO from path + FileIO::from_path(&table_path)?.build()? + }; + + Ok(Table::new(file_io, identifier.clone(), table_path, table_schema)) + } + + async fn list_tables(&self, database_name: &str) -> Result> { + let mut api = self.api.lock().await; + api.list_tables(database_name) + .await + .map_err(|e| map_rest_error_for_database(e, database_name)) + } + + async fn create_table( + &self, + identifier: &Identifier, + creation: Schema, + ignore_if_exists: bool, + ) -> Result<()> { + let mut api = self.api.lock().await; + match api.create_table(identifier, creation).await { + Ok(()) => Ok(()), + Err(err) => { + let mapped = map_rest_error_for_table(err, identifier); + match &mapped { + Error::TableAlreadyExist { .. } if ignore_if_exists => Ok(()), + _ => Err(mapped), + } + } + } + } + + async fn drop_table(&self, identifier: &Identifier, ignore_if_not_exists: bool) -> Result<()> { + let mut api = self.api.lock().await; + match api.drop_table(identifier).await { + Ok(()) => Ok(()), + Err(err) => { + let mapped = map_rest_error_for_table(err, identifier); + match &mapped { + Error::TableNotExist { .. } if ignore_if_not_exists => Ok(()), + _ => Err(mapped), + } + } + } + } + + async fn rename_table( + &self, + from: &Identifier, + to: &Identifier, + ignore_if_not_exists: bool, + ) -> Result<()> { + let mut api = self.api.lock().await; + match api.rename_table(from, to).await { + Ok(()) => Ok(()), + Err(err) => { + // Check if the error is about the source table not existing + let mapped = map_rest_error_for_table(err, from); + match &mapped { + Error::TableNotExist { .. } if ignore_if_not_exists => Ok(()), + // Also check if target already exists + Error::TableAlreadyExist { .. } => Err(Error::TableAlreadyExist { + full_name: to.full_name(), + }), + _ => Err(mapped), + } + } + } + } + + async fn alter_table( + &self, + _identifier: &Identifier, + _changes: Vec, + _ignore_if_not_exists: bool, + ) -> Result<()> { + // TODO: Implement alter_table when RESTApi supports it + Err(Error::Unsupported { + message: "Alter table is not yet implemented for REST catalog".to_string(), + }) + } +} diff --git a/crates/paimon/src/catalog/rest/rest_token.rs b/crates/paimon/src/catalog/rest/rest_token.rs new file mode 100644 index 00000000..8272cc64 --- /dev/null +++ b/crates/paimon/src/catalog/rest/rest_token.rs @@ -0,0 +1,62 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +//! REST token for data access in Apache Paimon. + +use std::collections::HashMap; + +/// Token for REST data access, containing credentials and expiration. +/// +/// Corresponds to Python `RESTToken` in `pypaimon/catalog/rest/rest_token.py`. +#[derive(Debug, Clone)] +pub struct RESTToken { + /// Token key-value pairs (e.g. access_key_id, access_key_secret, etc.) + pub token: HashMap, + /// Token expiration time in milliseconds since epoch. + pub expire_at_millis: i64, +} + +impl RESTToken { + /// Create a new RESTToken. + pub fn new(token: HashMap, expire_at_millis: i64) -> Self { + Self { + token, + expire_at_millis, + } + } +} + +impl PartialEq for RESTToken { + fn eq(&self, other: &Self) -> bool { + self.expire_at_millis == other.expire_at_millis && self.token == other.token + } +} + +impl Eq for RESTToken {} + +impl std::hash::Hash for RESTToken { + fn hash(&self, state: &mut H) { + self.expire_at_millis.hash(state); + // Sort keys for deterministic hashing + let mut pairs: Vec<_> = self.token.iter().collect(); + pairs.sort_by_key(|(k, _)| (*k).clone()); + for (k, v) in pairs { + k.hash(state); + v.hash(state); + } + } +} diff --git a/crates/paimon/src/catalog/rest/rest_token_file_io.rs b/crates/paimon/src/catalog/rest/rest_token_file_io.rs new file mode 100644 index 00000000..18f1296f --- /dev/null +++ b/crates/paimon/src/catalog/rest/rest_token_file_io.rs @@ -0,0 +1,203 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +//! REST token-based FileIO for Apache Paimon. +//! +//! This module provides a FileIO wrapper that supports getting data access +//! tokens from a REST Server. It handles token caching, expiration detection, +//! and automatic refresh. +//! +//! Corresponds to Python `RESTTokenFileIO` in +//! `pypaimon/catalog/rest/rest_token_file_io.py`. + +use std::collections::HashMap; + +use tokio::sync::RwLock; + +use crate::api::rest_api::RESTApi; +use crate::catalog::Identifier; +use crate::common::Options; +use crate::io::FileIO; +use crate::Result; + +use super::rest_token::RESTToken; + +/// Safe time margin (in milliseconds) before token expiration to trigger refresh. +/// Matches `RESTApi.TOKEN_EXPIRATION_SAFE_TIME_MILLIS` in Python. +const TOKEN_EXPIRATION_SAFE_TIME_MILLIS: i64 = 3_600_000; + +/// A FileIO wrapper that supports getting data access tokens from a REST Server. +/// +/// This struct handles: +/// - Token caching with expiration detection +/// - Automatic token refresh via `RESTApi::load_table_token` +/// - Merging token credentials into catalog options to build the underlying `FileIO` +/// +/// Corresponds to Python `RESTTokenFileIO` in +/// `pypaimon/catalog/rest/rest_token_file_io.py`. +pub struct RESTTokenFileIO { + /// Table identifier for token requests. + identifier: Identifier, + /// Table path (e.g. "oss://bucket/warehouse/db.db/table"). + path: String, + /// Catalog options used to create RESTApi and build FileIO. + catalog_options: Options, + /// Cached token with RwLock for concurrent access. + token: RwLock>, +} + +impl RESTTokenFileIO { + /// Create a new RESTTokenFileIO. + /// + /// # Arguments + /// * `identifier` - Table identifier for token requests. + /// * `path` - Table path for FileIO construction. + /// * `catalog_options` - Catalog options for RESTApi and FileIO. + pub fn new(identifier: Identifier, path: String, catalog_options: Options) -> Self { + Self { + identifier, + path, + catalog_options, + token: RwLock::new(None), + } + } + + /// Build a `FileIO` instance with the current token merged into options. + /// + /// This method: + /// 1. Refreshes the token if expired or not yet obtained. + /// 2. Merges token credentials into catalog options. + /// 3. Builds a `FileIO` from the merged options. + /// + /// This method builds a FileIO with the current token, + /// which can be passed to `Table::new`. If the token expires, a new + /// `get_table` call is needed. + pub async fn build_file_io(&self) -> Result { + // Ensure token is fresh + self.try_to_refresh_token().await?; + + let token_guard = self.token.read().await; + match token_guard.as_ref() { + Some(token) => { + // Merge token credentials with catalog options + // token.token["fs.oss.endpoint"] = oss-cn-hangzhou.aliyuncs.com + let mut token_with_endpoint = token.token.clone(); + token_with_endpoint.insert("fs.oss.endpoint".to_string(), "oss-cn-hangzhou.aliyuncs.com".to_string()); + let merged_props = self.merge_token_with_options(&token_with_endpoint); + // let merged_props = self.merge_token_with_options(&token.token); + // Build FileIO with merged properties + let mut builder = FileIO::from_path(&self.path)?; + builder = builder.with_props(merged_props); + builder.build() + } + None => { + // No token available, build FileIO from path only + FileIO::from_path(&self.path)?.build() + } + } + } + + /// Try to refresh the token if it is expired or not yet obtained. + /// + /// Corresponds to Python `RESTTokenFileIO.try_to_refresh_token`. + async fn try_to_refresh_token(&self) -> Result<()> { + // Fast path: check if token is still valid under read lock + { + let token_guard = self.token.read().await; + if let Some(ref token) = *token_guard { + if !Self::is_token_expired(token) { + return Ok(()); + } + } + } + + // Slow path: acquire write lock and refresh + let mut token_guard = self.token.write().await; + + // Double-check after acquiring write lock (another task may have refreshed) + if let Some(ref token) = *token_guard { + if !Self::is_token_expired(token) { + return Ok(()); + } + } + + // Refresh the token + let new_token = self.refresh_token().await?; + *token_guard = Some(new_token); + Ok(()) + } + + /// Refresh the token by calling `RESTApi::load_table_token`. + /// + /// Creates a temporary `RESTApi` instance (with `config_required=false`) + /// and loads the table token. + /// + /// Corresponds to Python `RESTTokenFileIO.refresh_token`. + async fn refresh_token(&self) -> Result { + let mut api = RESTApi::new(self.catalog_options.clone(), false).await?; + let response = api.load_table_token(&self.identifier).await?; + + let expires_at_millis = response.expires_at_millis.unwrap_or(0); + + // Merge token with catalog options (e.g. DLF OSS endpoint override) + let merged_token = self.merge_token_with_catalog_options(response.token); + Ok(RESTToken::new(merged_token, expires_at_millis)) + } + + /// Check if a token is expired (within the safe time margin). + fn is_token_expired(token: &RESTToken) -> bool { + let current_time = std::time::SystemTime::now() + .duration_since(std::time::UNIX_EPOCH) + .unwrap_or_default() + .as_millis() as i64; + (token.expire_at_millis - current_time) < TOKEN_EXPIRATION_SAFE_TIME_MILLIS + } + + /// Merge token credentials with catalog options for DLF OSS endpoint override. + /// + /// Corresponds to Python `RESTTokenFileIO._merge_token_with_catalog_options`. + fn merge_token_with_catalog_options( + &self, + token: HashMap, + ) -> HashMap { + let mut merged = token; + // If catalog options contain a DLF OSS endpoint, override the standard OSS endpoint + if let Some(dlf_oss_endpoint) = self.catalog_options.get("dlf.oss-endpoint") { + if !dlf_oss_endpoint.trim().is_empty() { + merged.insert("fs.oss.endpoint".to_string(), dlf_oss_endpoint.clone()); + } + } + merged + } + + /// Merge token credentials into catalog options map for FileIO construction. + fn merge_token_with_options( + &self, + token: &HashMap, + ) -> HashMap { + let mut merged = self.catalog_options.to_map().clone(); + // Token values override catalog options + merged.extend(token.iter().map(|(k, v)| (k.clone(), v.clone()))); + // DLF OSS endpoint override + if let Some(dlf_oss_endpoint) = self.catalog_options.get("dlf.oss-endpoint") { + if !dlf_oss_endpoint.trim().is_empty() { + merged.insert("fs.oss.endpoint".to_string(), dlf_oss_endpoint.clone()); + } + } + merged + } +} diff --git a/crates/paimon/tests/rest_catalog_test.rs b/crates/paimon/tests/rest_catalog_test.rs new file mode 100644 index 00000000..0f05d6b4 --- /dev/null +++ b/crates/paimon/tests/rest_catalog_test.rs @@ -0,0 +1,455 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +//! Integration tests for RestCatalog. +//! +//! These tests use a mock server to verify the RestCatalog behavior +//! through the Catalog trait interface. + +use std::collections::HashMap; + +use paimon::api::ConfigResponse; +use paimon::catalog::{Catalog, Identifier, RestCatalog}; +use paimon::common::Options; +use paimon::spec::{BigIntType, DataType, Schema, VarCharType}; + +mod mock_server; +use mock_server::{start_mock_server, RESTServer}; + +/// Helper struct to hold test resources. +struct TestContext { + server: RESTServer, + catalog: RestCatalog, +} + +/// Helper function to set up a test environment with RestCatalog. +async fn setup_catalog(initial_dbs: Vec<&str>) -> TestContext { + let prefix = "mock-test"; + let mut defaults = HashMap::new(); + defaults.insert("prefix".to_string(), prefix.to_string()); + let config = ConfigResponse::new(defaults); + + let initial: Vec = initial_dbs.iter().map(|s| s.to_string()).collect(); + let server = start_mock_server( + "test_warehouse".to_string(), + "/tmp/test_warehouse".to_string(), + config, + initial, + ) + .await; + + let url = server.url().expect("Failed to get server URL"); + let mut options = Options::new(); + options.set("uri", &url); + options.set("warehouse", "test_warehouse"); + options.set("token.provider", "bear"); + options.set("token", "test_token"); + + let catalog = RestCatalog::new(options, true) + .await + .expect("Failed to create RestCatalog"); + + TestContext { server, catalog } +} + +/// Helper to build a simple test schema. +fn test_schema() -> Schema { + Schema::builder() + .column("id", DataType::BigInt(BigIntType::new())) + .column("name", DataType::VarChar(VarCharType::new(255).unwrap())) + .build() + .expect("Failed to build schema") +} + +// ==================== Database Tests ==================== + +#[tokio::test] +async fn test_catalog_list_databases() { + let ctx = setup_catalog(vec!["default", "test_db1", "prod_db"]).await; + + let dbs = ctx.catalog.list_databases().await.unwrap(); + + assert!(dbs.contains(&"default".to_string())); + assert!(dbs.contains(&"test_db1".to_string())); + assert!(dbs.contains(&"prod_db".to_string())); +} + +#[tokio::test] +async fn test_catalog_create_database() { + let ctx = setup_catalog(vec!["default"]).await; + + // Create new database + let result = ctx + .catalog + .create_database("new_db", false, HashMap::new()) + .await; + assert!(result.is_ok(), "failed to create database: {:?}", result); + + // Verify creation + let dbs = ctx.catalog.list_databases().await.unwrap(); + assert!(dbs.contains(&"new_db".to_string())); +} + +#[tokio::test] +async fn test_catalog_create_database_already_exists() { + let ctx = setup_catalog(vec!["default"]).await; + + // Duplicate creation with ignore_if_exists=false should fail + let result = ctx + .catalog + .create_database("default", false, HashMap::new()) + .await; + assert!( + result.is_err(), + "creating duplicate database should fail when ignore_if_exists=false" + ); +} + +#[tokio::test] +async fn test_catalog_create_database_ignore_if_exists() { + let ctx = setup_catalog(vec!["default"]).await; + + // Duplicate creation with ignore_if_exists=true should succeed + let result = ctx + .catalog + .create_database("default", true, HashMap::new()) + .await; + assert!( + result.is_ok(), + "creating duplicate database should succeed when ignore_if_exists=true" + ); +} + +#[tokio::test] +async fn test_catalog_drop_database() { + let ctx = setup_catalog(vec!["default", "to_drop"]).await; + + // Verify database exists + let dbs = ctx.catalog.list_databases().await.unwrap(); + assert!(dbs.contains(&"to_drop".to_string())); + + // Drop database (cascade=true to skip empty check) + let result = ctx.catalog.drop_database("to_drop", false, true).await; + assert!(result.is_ok(), "failed to drop database: {:?}", result); + + // Verify database is gone + let dbs = ctx.catalog.list_databases().await.unwrap(); + assert!(!dbs.contains(&"to_drop".to_string())); +} + +#[tokio::test] +async fn test_catalog_drop_database_not_exists() { + let ctx = setup_catalog(vec!["default"]).await; + + // Dropping non-existent database with ignore_if_not_exists=false should fail + let result = ctx + .catalog + .drop_database("non_existent", false, true) + .await; + assert!( + result.is_err(), + "dropping non-existent database should fail when ignore_if_not_exists=false" + ); +} + +#[tokio::test] +async fn test_catalog_drop_database_ignore_if_not_exists() { + let ctx = setup_catalog(vec!["default"]).await; + + // Dropping non-existent database with ignore_if_not_exists=true should succeed + let result = ctx + .catalog + .drop_database("non_existent", true, true) + .await; + assert!( + result.is_ok(), + "dropping non-existent database should succeed when ignore_if_not_exists=true" + ); +} + +#[tokio::test] +async fn test_catalog_drop_database_not_empty_no_cascade() { + let ctx = setup_catalog(vec!["default"]).await; + + // Add a table to the database + ctx.server.add_table("default", "some_table"); + + // Drop database with cascade=false should fail because it's not empty + let result = ctx.catalog.drop_database("default", false, false).await; + assert!( + result.is_err(), + "dropping non-empty database should fail when cascade=false" + ); +} + +#[tokio::test] +async fn test_catalog_drop_database_not_empty_cascade() { + let ctx = setup_catalog(vec!["default"]).await; + + // Add a table to the database + ctx.server.add_table("default", "some_table"); + + // Drop database with cascade=true should succeed + let result = ctx.catalog.drop_database("default", false, true).await; + assert!( + result.is_ok(), + "dropping non-empty database should succeed when cascade=true" + ); + + // Verify database is gone + let dbs = ctx.catalog.list_databases().await.unwrap(); + assert!(!dbs.contains(&"default".to_string())); +} + +// ==================== Table Tests ==================== + +#[tokio::test] +async fn test_catalog_list_tables() { + let ctx = setup_catalog(vec!["default"]).await; + + // Add tables + ctx.server.add_table("default", "table1"); + ctx.server.add_table("default", "table2"); + + // List tables + let tables = ctx.catalog.list_tables("default").await.unwrap(); + assert!(tables.contains(&"table1".to_string())); + assert!(tables.contains(&"table2".to_string())); +} + +#[tokio::test] +async fn test_catalog_list_tables_empty() { + let ctx = setup_catalog(vec!["default"]).await; + + let tables = ctx.catalog.list_tables("default").await.unwrap(); + assert!( + tables.is_empty(), + "expected empty tables list, got: {:?}", + tables + ); +} + +#[tokio::test] +async fn test_catalog_get_table() { + let ctx = setup_catalog(vec!["default"]).await; + + // Add a table with schema and path so get_table can build a Table object + let schema = test_schema(); + ctx.server + .add_table_with_schema("default", "my_table", schema, "/tmp/test_warehouse/default.db/my_table"); + + let identifier = Identifier::new("default", "my_table"); + let table = ctx.catalog.get_table(&identifier).await; + assert!(table.is_ok(), "failed to get table: {:?}", table); +} + +#[tokio::test] +async fn test_catalog_get_table_not_found() { + let ctx = setup_catalog(vec!["default"]).await; + + let identifier = Identifier::new("default", "non_existent"); + let result = ctx.catalog.get_table(&identifier).await; + assert!(result.is_err(), "getting non-existent table should fail"); +} + +#[tokio::test] +async fn test_catalog_create_table() { + let ctx = setup_catalog(vec!["default"]).await; + + let schema = test_schema(); + let identifier = Identifier::new("default", "new_table"); + + let result = ctx.catalog.create_table(&identifier, schema, false).await; + assert!(result.is_ok(), "failed to create table: {:?}", result); + + // Verify table exists + let tables = ctx.catalog.list_tables("default").await.unwrap(); + assert!(tables.contains(&"new_table".to_string())); +} + +#[tokio::test] +async fn test_catalog_create_table_already_exists() { + let ctx = setup_catalog(vec!["default"]).await; + + // Add a table first + ctx.server.add_table("default", "existing_table"); + + let schema = test_schema(); + let identifier = Identifier::new("default", "existing_table"); + + // Create with ignore_if_exists=false should fail + let result = ctx.catalog.create_table(&identifier, schema, false).await; + assert!( + result.is_err(), + "creating duplicate table should fail when ignore_if_exists=false" + ); +} + +#[tokio::test] +async fn test_catalog_create_table_ignore_if_exists() { + let ctx = setup_catalog(vec!["default"]).await; + + // Add a table first + ctx.server.add_table("default", "existing_table"); + + let schema = test_schema(); + let identifier = Identifier::new("default", "existing_table"); + + // Create with ignore_if_exists=true should succeed + let result = ctx.catalog.create_table(&identifier, schema, true).await; + assert!( + result.is_ok(), + "creating duplicate table should succeed when ignore_if_exists=true" + ); +} + +#[tokio::test] +async fn test_catalog_drop_table() { + let ctx = setup_catalog(vec!["default"]).await; + + // Add a table + ctx.server.add_table("default", "table_to_drop"); + + let identifier = Identifier::new("default", "table_to_drop"); + + // Drop table + let result = ctx.catalog.drop_table(&identifier, false).await; + assert!(result.is_ok(), "failed to drop table: {:?}", result); + + // Verify table is gone + let tables = ctx.catalog.list_tables("default").await.unwrap(); + assert!(!tables.contains(&"table_to_drop".to_string())); +} + +#[tokio::test] +async fn test_catalog_drop_table_not_found() { + let ctx = setup_catalog(vec!["default"]).await; + + let identifier = Identifier::new("default", "non_existent"); + + // Drop with ignore_if_not_exists=false should fail + let result = ctx.catalog.drop_table(&identifier, false).await; + assert!( + result.is_err(), + "dropping non-existent table should fail when ignore_if_not_exists=false" + ); +} + +#[tokio::test] +async fn test_catalog_drop_table_ignore_if_not_exists() { + let ctx = setup_catalog(vec!["default"]).await; + + let identifier = Identifier::new("default", "non_existent"); + + // Drop with ignore_if_not_exists=true should succeed + let result = ctx.catalog.drop_table(&identifier, true).await; + assert!( + result.is_ok(), + "dropping non-existent table should succeed when ignore_if_not_exists=true" + ); +} + +// ==================== Rename Table Tests ==================== + +#[tokio::test] +async fn test_catalog_rename_table() { + let ctx = setup_catalog(vec!["default"]).await; + + // Add a table + ctx.server.add_table("default", "old_table"); + + let from = Identifier::new("default", "old_table"); + let to = Identifier::new("default", "new_table"); + + // Rename table + let result = ctx.catalog.rename_table(&from, &to, false).await; + assert!(result.is_ok(), "failed to rename table: {:?}", result); + + // Verify old table is gone and new table exists + let tables = ctx.catalog.list_tables("default").await.unwrap(); + assert!(!tables.contains(&"old_table".to_string())); + assert!(tables.contains(&"new_table".to_string())); +} + +#[tokio::test] +async fn test_catalog_rename_table_not_found() { + let ctx = setup_catalog(vec!["default"]).await; + + let from = Identifier::new("default", "non_existent"); + let to = Identifier::new("default", "new_name"); + + // Rename with ignore_if_not_exists=false should fail + let result = ctx.catalog.rename_table(&from, &to, false).await; + assert!( + result.is_err(), + "renaming non-existent table should fail when ignore_if_not_exists=false" + ); +} + +#[tokio::test] +async fn test_catalog_rename_table_ignore_if_not_exists() { + let ctx = setup_catalog(vec!["default"]).await; + + let from = Identifier::new("default", "non_existent"); + let to = Identifier::new("default", "new_name"); + + // Rename with ignore_if_not_exists=true should succeed + let result = ctx.catalog.rename_table(&from, &to, true).await; + assert!( + result.is_ok(), + "renaming non-existent table should succeed when ignore_if_not_exists=true" + ); +} + +// ==================== Alter Table Tests ==================== + +#[tokio::test] +async fn test_catalog_alter_table_unsupported() { + let ctx = setup_catalog(vec!["default"]).await; + + let identifier = Identifier::new("default", "some_table"); + + // alter_table should return Unsupported error + let result = ctx.catalog.alter_table(&identifier, vec![], false).await; + assert!( + result.is_err(), + "alter_table should return Unsupported error" + ); +} + +// ==================== Multiple Databases Tests ==================== + +#[tokio::test] +async fn test_catalog_multiple_databases_with_tables() { + let ctx = setup_catalog(vec!["db1", "db2"]).await; + + // Add tables to different databases + ctx.server.add_table("db1", "table1_db1"); + ctx.server.add_table("db1", "table2_db1"); + ctx.server.add_table("db2", "table1_db2"); + + // Verify db1 tables + let tables_db1 = ctx.catalog.list_tables("db1").await.unwrap(); + assert_eq!(tables_db1.len(), 2); + assert!(tables_db1.contains(&"table1_db1".to_string())); + assert!(tables_db1.contains(&"table2_db1".to_string())); + + // Verify db2 tables + let tables_db2 = ctx.catalog.list_tables("db2").await.unwrap(); + assert_eq!(tables_db2.len(), 1); + assert!(tables_db2.contains(&"table1_db2".to_string())); +} From a68df611fd027f991022da2e05403f18dcaf7b86 Mon Sep 17 00:00:00 2001 From: umi Date: Mon, 30 Mar 2026 17:02:44 +0800 Subject: [PATCH 03/13] feat: implement RESTCatalog with database and table CRUD # Conflicts: # crates/paimon/tests/mock_server.rs --- crates/integration_tests/tests/read_tables.rs | 36 +-- .../paimon/examples/rest_catalog_example.rs | 22 +- .../rest_catalog_read_append_example.rs | 17 +- crates/paimon/src/api/api_response.rs | 2 +- crates/paimon/src/api/rest_api.rs | 10 +- crates/paimon/src/api/rest_util.rs | 28 +++ crates/paimon/src/catalog/database.rs | 8 +- crates/paimon/src/catalog/filesystem.rs | 8 +- .../paimon/src/catalog/rest/rest_catalog.rs | 210 +++++++++--------- crates/paimon/src/catalog/rest/rest_token.rs | 13 -- .../src/catalog/rest/rest_token_file_io.rs | 52 +++-- crates/paimon/src/io/file_io.rs | 19 +- crates/paimon/src/io/storage_oss.rs | 2 +- crates/paimon/tests/mock_server.rs | 10 +- crates/paimon/tests/rest_catalog_test.rs | 32 ++- 15 files changed, 220 insertions(+), 249 deletions(-) diff --git a/crates/integration_tests/tests/read_tables.rs b/crates/integration_tests/tests/read_tables.rs index 20ab4926..c92f262f 100644 --- a/crates/integration_tests/tests/read_tables.rs +++ b/crates/integration_tests/tests/read_tables.rs @@ -20,7 +20,7 @@ use arrow_array::{Int32Array, RecordBatch, StringArray}; use futures::TryStreamExt; use paimon::api::ConfigResponse; -use paimon::catalog::{Identifier, RestCatalog}; +use paimon::catalog::{Identifier, RESTCatalog}; use paimon::common::Options; use paimon::spec::{DataType, IntType, Schema, VarCharType}; use paimon::{Catalog, Error, FileSystemCatalog, Plan}; @@ -452,10 +452,7 @@ async fn test_read_projection_duplicate_column() { fn simple_log_schema() -> Schema { Schema::builder() .column("id", DataType::Int(IntType::new())) - .column( - "name", - DataType::VarChar(VarCharType::string_type()), - ) + .column("name", DataType::VarChar(VarCharType::string_type())) .build() .expect("Failed to build schema") } @@ -464,10 +461,7 @@ fn simple_log_schema() -> Schema { fn simple_dv_pk_schema() -> Schema { Schema::builder() .column("id", DataType::Int(IntType::with_nullable(false))) - .column( - "name", - DataType::VarChar(VarCharType::string_type()), - ) + .column("name", DataType::VarChar(VarCharType::string_type())) .primary_key(["id"]) .option("deletion-vectors.enabled", "true") .build() @@ -475,10 +469,10 @@ fn simple_dv_pk_schema() -> Schema { } /// Start a mock REST server backed by Spark-provisioned data on disk, -/// register the given tables, and return a connected `RestCatalog`. +/// register the given tables, and return a connected `RESTCatalog`. async fn setup_rest_catalog_with_tables( table_configs: &[(&str, &str, Schema)], -) -> (mock_server::RESTServer, RestCatalog) { +) -> (mock_server::RESTServer, RESTCatalog) { let data_path = get_test_warehouse(); // Use a simple warehouse name (no slashes) to avoid URL-encoding issues let warehouse_name = "test_warehouse"; @@ -508,9 +502,9 @@ async fn setup_rest_catalog_with_tables( options.set("token.provider", "bear"); options.set("token", "test_token"); - let catalog = RestCatalog::new(options, true) + let catalog = RESTCatalog::new(options, true) .await - .expect("Failed to create RestCatalog"); + .expect("Failed to create RESTCatalog"); (server, catalog) } @@ -521,12 +515,8 @@ async fn setup_rest_catalog_with_tables( #[tokio::test] async fn test_rest_catalog_read_append_table() { let table_name = "simple_log_table"; - let (_server, catalog) = setup_rest_catalog_with_tables(&[( - "default", - table_name, - simple_log_schema(), - )]) - .await; + let (_server, catalog) = + setup_rest_catalog_with_tables(&[("default", table_name, simple_log_schema())]).await; let identifier = Identifier::new("default", table_name); let table = catalog @@ -575,12 +565,8 @@ async fn test_rest_catalog_read_append_table() { #[tokio::test] async fn test_rest_catalog_read_pk_table() { let table_name = "simple_dv_pk_table"; - let (_server, catalog) = setup_rest_catalog_with_tables(&[( - "default", - table_name, - simple_dv_pk_schema(), - )]) - .await; + let (_server, catalog) = + setup_rest_catalog_with_tables(&[("default", table_name, simple_dv_pk_schema())]).await; let identifier = Identifier::new("default", table_name); let table = catalog diff --git a/crates/paimon/examples/rest_catalog_example.rs b/crates/paimon/examples/rest_catalog_example.rs index 4696e66a..da3b2dca 100644 --- a/crates/paimon/examples/rest_catalog_example.rs +++ b/crates/paimon/examples/rest_catalog_example.rs @@ -17,7 +17,7 @@ //! Example: REST Catalog Operations //! -//! This example demonstrates how to use `RestCatalog` for database and table operations +//! This example demonstrates how to use `RESTCatalog` for database and table operations //! via the Paimon REST catalog API. //! //! # Usage @@ -33,7 +33,7 @@ use std::collections::HashMap; -use paimon::catalog::{Catalog, Identifier, RestCatalog}; +use paimon::catalog::{Catalog, Identifier, RESTCatalog}; use paimon::common::{CatalogOptions, Options}; use paimon::spec::{DataType, IntType, Schema, VarCharType}; @@ -54,10 +54,7 @@ async fn main() { // Basic configuration — replace with your actual server URL and warehouse options.set(CatalogOptions::METASTORE, "rest"); options.set(CatalogOptions::WAREHOUSE, "pypaimon_catalog"); - options.set( - CatalogOptions::URI, - "http://sample.net/", - ); + options.set(CatalogOptions::URI, "http://sample.net/"); // --- Authentication (choose one) --- @@ -78,12 +75,12 @@ async fn main() { // options.set("token", std::env::var("PAIMON_REST_TOKEN") // .expect("PAIMON_REST_TOKEN env var not set")); - // ==================== Create RestCatalog ==================== - println!("Creating RestCatalog instance..."); - let catalog = match RestCatalog::new(options, true).await { + // ==================== Create RESTCatalog ==================== + println!("Creating RESTCatalog instance..."); + let catalog = match RESTCatalog::new(options, true).await { Ok(catalog) => catalog, Err(err) => { - eprintln!("Failed to create RestCatalog: {}", err); + eprintln!("Failed to create RESTCatalog: {}", err); return; } }; @@ -127,10 +124,7 @@ async fn main() { let table_identifier = Identifier::new("example_db", "users"); println!("Creating table '{}'...", table_identifier); let schema = create_test_schema(); - match catalog - .create_table(&table_identifier, schema, false) - .await - { + match catalog.create_table(&table_identifier, schema, false).await { Ok(()) => println!("Table created successfully"), Err(err) => eprintln!("Failed to create table: {}", err), } diff --git a/crates/paimon/examples/rest_catalog_read_append_example.rs b/crates/paimon/examples/rest_catalog_read_append_example.rs index e3734817..bca61fb6 100644 --- a/crates/paimon/examples/rest_catalog_read_append_example.rs +++ b/crates/paimon/examples/rest_catalog_read_append_example.rs @@ -17,7 +17,7 @@ //! Example: REST Catalog — Read Append Table Data //! -//! This example demonstrates how to use `RestCatalog` to read data from an +//! This example demonstrates how to use `RESTCatalog` to read data from an //! append-only table with the following schema: //! //! | Column | Type | @@ -45,7 +45,7 @@ use futures::TryStreamExt; -use paimon::catalog::{Catalog, Identifier, RestCatalog}; +use paimon::catalog::{Catalog, Identifier, RESTCatalog}; use paimon::common::{CatalogOptions, Options}; #[tokio::main] @@ -56,10 +56,7 @@ async fn main() { // Basic configuration — replace with your actual server URL and warehouse options.set(CatalogOptions::METASTORE, "rest"); options.set(CatalogOptions::WAREHOUSE, "pypaimon_catalog"); - options.set( - CatalogOptions::URI, - "http://sample.net/", - ); + options.set(CatalogOptions::URI, "http://sample.net/"); // --- Authentication (choose one) --- @@ -80,12 +77,12 @@ async fn main() { // options.set("token", std::env::var("PAIMON_REST_TOKEN") // .expect("PAIMON_REST_TOKEN env var not set")); - // ==================== Create RestCatalog ==================== - println!("Creating RestCatalog instance..."); - let catalog = match RestCatalog::new(options, true).await { + // ==================== Create RESTCatalog ==================== + println!("Creating RESTCatalog instance..."); + let catalog = match RESTCatalog::new(options, true).await { Ok(catalog) => catalog, Err(err) => { - eprintln!("Failed to create RestCatalog: {}", err); + eprintln!("Failed to create RESTCatalog: {}", err); return; } }; diff --git a/crates/paimon/src/api/api_response.rs b/crates/paimon/src/api/api_response.rs index b6e9f696..c3169a27 100644 --- a/crates/paimon/src/api/api_response.rs +++ b/crates/paimon/src/api/api_response.rs @@ -279,7 +279,7 @@ impl PagedList { } /// Response for getting table token. -#[derive(Debug, Clone, Serialize, Deserialize)] +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] #[serde(rename_all = "camelCase")] pub struct GetTableTokenResponse { /// Token key-value pairs (e.g. access_key_id, access_key_secret, etc.) diff --git a/crates/paimon/src/api/rest_api.rs b/crates/paimon/src/api/rest_api.rs index c8dc3c13..41c97dc5 100644 --- a/crates/paimon/src/api/rest_api.rs +++ b/crates/paimon/src/api/rest_api.rs @@ -100,7 +100,7 @@ impl RESTApi { /// /// # Errors /// Returns an error if required options are missing or if config fetch fails. - pub async fn new(mut options: Options, config_required: bool) -> Result { + pub async fn new(options: Options, config_required: bool) -> Result { let uri = options .get(CatalogOptions::URI) .ok_or_else(|| crate::Error::ConfigInvalid { @@ -143,17 +143,17 @@ impl RESTApi { .await?; // Merge config response with options (client config takes priority) - options = config_response.merge_options(&options); + let merged = config_response.merge_options(&options); // Update base headers from merged options and recreate auth function - base_headers.extend(RESTUtil::extract_prefix_map(&options, Self::HEADER_PREFIX)); + base_headers.extend(RESTUtil::extract_prefix_map(&merged, Self::HEADER_PREFIX)); // Recreate auth function with updated headers if needed - let auth_provider = AuthProviderFactory::create_auth_provider(&options)?; + let auth_provider = AuthProviderFactory::create_auth_provider(&merged)?; let rest_auth_function = RESTAuthFunction::new(base_headers, auth_provider); client.set_auth_function(rest_auth_function); - options + merged } else { options }; diff --git a/crates/paimon/src/api/rest_util.rs b/crates/paimon/src/api/rest_util.rs index ce8627c2..9c992e43 100644 --- a/crates/paimon/src/api/rest_util.rs +++ b/crates/paimon/src/api/rest_util.rs @@ -41,6 +41,34 @@ impl RESTUtil { pub fn extract_prefix_map(options: &Options, prefix: &str) -> HashMap { options.extract_prefix_map(prefix) } + + /// Merge two property maps, with `override_properties` taking precedence. + /// + /// For keys present in both maps, the value from `override_properties` wins. + /// `None` values are skipped (only relevant at the map level; individual + /// entries are always `String`). + /// + /// Corresponds to Python `RESTUtil.merge`. + pub fn merge( + base_properties: Option<&HashMap>, + override_properties: Option<&HashMap>, + ) -> HashMap { + let mut result = HashMap::new(); + + if let Some(base) = base_properties { + for (key, value) in base { + result.insert(key.clone(), value.clone()); + } + } + + if let Some(overrides) = override_properties { + for (key, value) in overrides { + result.insert(key.clone(), value.clone()); + } + } + + result + } } #[cfg(test)] diff --git a/crates/paimon/src/catalog/database.rs b/crates/paimon/src/catalog/database.rs index 594c0ac5..6979d073 100644 --- a/crates/paimon/src/catalog/database.rs +++ b/crates/paimon/src/catalog/database.rs @@ -22,7 +22,7 @@ use std::collections::HashMap; /// Structure representing a database in a Paimon catalog. /// /// Corresponds to Python `Database` in `pypaimon/catalog/rest/rest_catalog.py`. -#[derive(Debug, Clone)] +#[derive(Debug, Clone, PartialEq)] pub struct Database { /// Database name. pub name: String, @@ -34,11 +34,7 @@ pub struct Database { impl Database { /// Create a new Database. - pub fn new( - name: String, - options: HashMap, - comment: Option, - ) -> Self { + pub fn new(name: String, options: HashMap, comment: Option) -> Self { Self { name, options, diff --git a/crates/paimon/src/catalog/filesystem.rs b/crates/paimon/src/catalog/filesystem.rs index 3dc9a393..61d6e8d3 100644 --- a/crates/paimon/src/catalog/filesystem.rs +++ b/crates/paimon/src/catalog/filesystem.rs @@ -21,7 +21,7 @@ use std::collections::HashMap; -use crate::catalog::{Catalog, DB_LOCATION_PROP, DB_SUFFIX, Database, Identifier}; +use crate::catalog::{Catalog, Database, Identifier, DB_LOCATION_PROP, DB_SUFFIX}; use crate::error::{Error, Result}; use crate::io::FileIO; use crate::spec::{Schema, TableSchema}; @@ -244,11 +244,7 @@ impl Catalog for FileSystemCatalog { }); } - Ok(Database::new( - name.to_string(), - HashMap::new(), - None, - )) + Ok(Database::new(name.to_string(), HashMap::new(), None)) } async fn drop_database( diff --git a/crates/paimon/src/catalog/rest/rest_catalog.rs b/crates/paimon/src/catalog/rest/rest_catalog.rs index da31949d..ab64d59b 100644 --- a/crates/paimon/src/catalog/rest/rest_catalog.rs +++ b/crates/paimon/src/catalog/rest/rest_catalog.rs @@ -46,7 +46,7 @@ use super::rest_token_file_io::RESTTokenFileIO; /// for all metadata operations (database and table CRUD). /// /// Corresponds to Python `RESTCatalog` in `pypaimon/catalog/rest/rest_catalog.py`. -pub struct RestCatalog { +pub struct RESTCatalog { /// The REST API client, wrapped in a Mutex because `RESTApi` methods /// require `&mut self` while `Catalog` trait methods take `&self`. api: Mutex, @@ -58,7 +58,7 @@ pub struct RestCatalog { data_token_enabled: bool, } -impl RestCatalog { +impl RESTCatalog { /// Create a new REST catalog. /// /// # Arguments @@ -119,7 +119,6 @@ impl RestCatalog { api.list_databases_paged(max_results, page_token, database_name_pattern) .await } - } // ============================================================================ @@ -132,17 +131,18 @@ impl RestCatalog { /// `RestError::AlreadyExists` -> `Error::DatabaseAlreadyExist`, /// and passes through other errors via `Error::RestApi`. fn map_rest_error_for_database(err: Error, database_name: &str) -> Error { - match &err { - Error::RestApi { source } => match source { - RestError::NoSuchResource { .. } => Error::DatabaseNotExist { - database: database_name.to_string(), - }, - RestError::AlreadyExists { .. } => Error::DatabaseAlreadyExist { - database: database_name.to_string(), - }, - _ => err, + match err { + Error::RestApi { + source: RestError::NoSuchResource { .. }, + } => Error::DatabaseNotExist { + database: database_name.to_string(), + }, + Error::RestApi { + source: RestError::AlreadyExists { .. }, + } => Error::DatabaseAlreadyExist { + database: database_name.to_string(), }, - _ => err, + other => other, } } @@ -152,26 +152,45 @@ fn map_rest_error_for_database(err: Error, database_name: &str) -> Error { /// `RestError::AlreadyExists` -> `Error::TableAlreadyExist`, /// and passes through other errors via `Error::RestApi`. fn map_rest_error_for_table(err: Error, identifier: &Identifier) -> Error { - match &err { - Error::RestApi { source } => match source { - RestError::NoSuchResource { .. } => Error::TableNotExist { - full_name: identifier.full_name(), - }, - RestError::AlreadyExists { .. } => Error::TableAlreadyExist { - full_name: identifier.full_name(), - }, - _ => err, + match err { + Error::RestApi { + source: RestError::NoSuchResource { .. }, + } => Error::TableNotExist { + full_name: identifier.full_name(), + }, + Error::RestApi { + source: RestError::AlreadyExists { .. }, + } => Error::TableAlreadyExist { + full_name: identifier.full_name(), }, - _ => err, + other => other, } } +/// Execute a fallible operation and ignore a specific error variant. +/// +/// If the operation succeeds, returns `Ok(())`. +/// If it fails with an error that `should_ignore` returns `true` for, returns `Ok(())`. +/// Otherwise, returns the error. +fn ignore_error_if(result: Result<()>, should_ignore: F) -> Result<()> +where + F: Fn(&Error) -> bool, +{ + result.or_else(|err| { + if should_ignore(&err) { + Ok(()) + } else { + Err(err) + } + }) +} + // ============================================================================ // Catalog trait implementation // ============================================================================ #[async_trait] -impl Catalog for RestCatalog { +impl Catalog for RESTCatalog { // ======================= database methods =============================== async fn list_databases(&self) -> Result> { @@ -191,16 +210,13 @@ impl Catalog for RestCatalog { } else { Some(properties) }; - match api.create_database(name, options).await { - Ok(()) => Ok(()), - Err(err) => { - let mapped = map_rest_error_for_database(err, name); - match &mapped { - Error::DatabaseAlreadyExist { .. } if ignore_if_exists => Ok(()), - _ => Err(mapped), - } - } - } + let result = api + .create_database(name, options) + .await + .map_err(|e| map_rest_error_for_database(e, name)); + ignore_error_if(result, |e| { + ignore_if_exists && matches!(e, Error::DatabaseAlreadyExist { .. }) + }) } async fn get_database(&self, name: &str) -> Result { @@ -215,11 +231,7 @@ impl Catalog for RestCatalog { options.insert(DB_LOCATION_PROP.to_string(), location); } - Ok(Database::new( - name.to_string(), - options, - None, - )) + Ok(Database::new(name.to_string(), options, None)) } async fn drop_database( @@ -232,34 +244,30 @@ impl Catalog for RestCatalog { // If not cascade, check if database is empty first if !cascade { - match api.list_tables(name).await { - Ok(tables) => { - if !tables.is_empty() { - return Err(Error::DatabaseNotEmpty { - database: name.to_string(), - }); - } - } + let tables = match api.list_tables(name).await { + Ok(tables) => tables, Err(err) => { let mapped = map_rest_error_for_database(err, name); - match &mapped { - Error::DatabaseNotExist { .. } if ignore_if_not_exists => return Ok(()), - _ => return Err(mapped), + if ignore_if_not_exists && matches!(mapped, Error::DatabaseNotExist { .. }) { + return Ok(()); } + return Err(mapped); } + }; + if !tables.is_empty() { + return Err(Error::DatabaseNotEmpty { + database: name.to_string(), + }); } } - match api.drop_database(name).await { - Ok(()) => Ok(()), - Err(err) => { - let mapped = map_rest_error_for_database(err, name); - match &mapped { - Error::DatabaseNotExist { .. } if ignore_if_not_exists => Ok(()), - _ => Err(mapped), - } - } - } + let result = api + .drop_database(name) + .await + .map_err(|e| map_rest_error_for_database(e, name)); + ignore_error_if(result, |e| { + ignore_if_not_exists && matches!(e, Error::DatabaseNotExist { .. }) + }) } // ======================= table methods =============================== @@ -273,10 +281,7 @@ impl Catalog for RestCatalog { // Extract schema from response let schema = response.schema.ok_or_else(|| Error::DataInvalid { - message: format!( - "Table {} response missing schema", - identifier.full_name() - ), + message: format!("Table {} response missing schema", identifier.full_name()), source: None, })?; @@ -298,18 +303,20 @@ impl Catalog for RestCatalog { // Build FileIO based on data_token_enabled and is_external let file_io = if self.data_token_enabled && !is_external { // Use RESTTokenFileIO to get token-based FileIO - let token_file_io = RESTTokenFileIO::new( - identifier.clone(), - table_path.clone(), - self.options.clone(), - ); + let token_file_io = + RESTTokenFileIO::new(identifier.clone(), table_path.clone(), self.options.clone()); token_file_io.build_file_io().await? } else { // Use standard FileIO from path FileIO::from_path(&table_path)?.build()? }; - Ok(Table::new(file_io, identifier.clone(), table_path, table_schema)) + Ok(Table::new( + file_io, + identifier.clone(), + table_path, + table_schema, + )) } async fn list_tables(&self, database_name: &str) -> Result> { @@ -326,30 +333,24 @@ impl Catalog for RestCatalog { ignore_if_exists: bool, ) -> Result<()> { let mut api = self.api.lock().await; - match api.create_table(identifier, creation).await { - Ok(()) => Ok(()), - Err(err) => { - let mapped = map_rest_error_for_table(err, identifier); - match &mapped { - Error::TableAlreadyExist { .. } if ignore_if_exists => Ok(()), - _ => Err(mapped), - } - } - } + let result = api + .create_table(identifier, creation) + .await + .map_err(|e| map_rest_error_for_table(e, identifier)); + ignore_error_if(result, |e| { + ignore_if_exists && matches!(e, Error::TableAlreadyExist { .. }) + }) } async fn drop_table(&self, identifier: &Identifier, ignore_if_not_exists: bool) -> Result<()> { let mut api = self.api.lock().await; - match api.drop_table(identifier).await { - Ok(()) => Ok(()), - Err(err) => { - let mapped = map_rest_error_for_table(err, identifier); - match &mapped { - Error::TableNotExist { .. } if ignore_if_not_exists => Ok(()), - _ => Err(mapped), - } - } - } + let result = api + .drop_table(identifier) + .await + .map_err(|e| map_rest_error_for_table(e, identifier)); + ignore_error_if(result, |e| { + ignore_if_not_exists && matches!(e, Error::TableNotExist { .. }) + }) } async fn rename_table( @@ -359,21 +360,20 @@ impl Catalog for RestCatalog { ignore_if_not_exists: bool, ) -> Result<()> { let mut api = self.api.lock().await; - match api.rename_table(from, to).await { - Ok(()) => Ok(()), - Err(err) => { - // Check if the error is about the source table not existing - let mapped = map_rest_error_for_table(err, from); - match &mapped { - Error::TableNotExist { .. } if ignore_if_not_exists => Ok(()), - // Also check if target already exists - Error::TableAlreadyExist { .. } => Err(Error::TableAlreadyExist { - full_name: to.full_name(), - }), - _ => Err(mapped), - } - } - } + let result = api + .rename_table(from, to) + .await + .map_err(|e| map_rest_error_for_table(e, from)) + // Remap TableAlreadyExist to use destination identifier + .map_err(|e| match e { + Error::TableAlreadyExist { .. } => Error::TableAlreadyExist { + full_name: to.full_name(), + }, + other => other, + }); + ignore_error_if(result, |e| { + ignore_if_not_exists && matches!(e, Error::TableNotExist { .. }) + }) } async fn alter_table( diff --git a/crates/paimon/src/catalog/rest/rest_token.rs b/crates/paimon/src/catalog/rest/rest_token.rs index 8272cc64..f72da5da 100644 --- a/crates/paimon/src/catalog/rest/rest_token.rs +++ b/crates/paimon/src/catalog/rest/rest_token.rs @@ -47,16 +47,3 @@ impl PartialEq for RESTToken { } impl Eq for RESTToken {} - -impl std::hash::Hash for RESTToken { - fn hash(&self, state: &mut H) { - self.expire_at_millis.hash(state); - // Sort keys for deterministic hashing - let mut pairs: Vec<_> = self.token.iter().collect(); - pairs.sort_by_key(|(k, _)| (*k).clone()); - for (k, v) in pairs { - k.hash(state); - v.hash(state); - } - } -} diff --git a/crates/paimon/src/catalog/rest/rest_token_file_io.rs b/crates/paimon/src/catalog/rest/rest_token_file_io.rs index 18f1296f..3b284657 100644 --- a/crates/paimon/src/catalog/rest/rest_token_file_io.rs +++ b/crates/paimon/src/catalog/rest/rest_token_file_io.rs @@ -26,9 +26,10 @@ use std::collections::HashMap; -use tokio::sync::RwLock; +use tokio::sync::{Mutex, RwLock}; use crate::api::rest_api::RESTApi; +use crate::api::rest_util::RESTUtil; use crate::catalog::Identifier; use crate::common::Options; use crate::io::FileIO; @@ -54,8 +55,11 @@ pub struct RESTTokenFileIO { identifier: Identifier, /// Table path (e.g. "oss://bucket/warehouse/db.db/table"). path: String, - /// Catalog options used to create RESTApi and build FileIO. + /// Catalog options used to build FileIO and create RESTApi. catalog_options: Options, + /// Lazily-initialized REST API client for token refresh. + /// Created on first token refresh and reused for subsequent refreshes. + api: Mutex>, /// Cached token with RwLock for concurrent access. token: RwLock>, } @@ -72,6 +76,7 @@ impl RESTTokenFileIO { identifier, path, catalog_options, + api: Mutex::new(None), token: RwLock::new(None), } } @@ -93,12 +98,15 @@ impl RESTTokenFileIO { let token_guard = self.token.read().await; match token_guard.as_ref() { Some(token) => { - // Merge token credentials with catalog options - // token.token["fs.oss.endpoint"] = oss-cn-hangzhou.aliyuncs.com + // Merge catalog options (base) with token credentials (override) + // token.token["fs.oss.endpoint"] = oss-cn-hangzhou.aliyuncs.com let mut token_with_endpoint = token.token.clone(); - token_with_endpoint.insert("fs.oss.endpoint".to_string(), "oss-cn-hangzhou.aliyuncs.com".to_string()); - let merged_props = self.merge_token_with_options(&token_with_endpoint); - // let merged_props = self.merge_token_with_options(&token.token); + token_with_endpoint.insert( + "fs.oss.endpoint".to_string(), + "oss-cn-hangzhou.aliyuncs.com".to_string(), + ); + let base = self.catalog_options.to_map().clone(); + let merged_props = RESTUtil::merge(Some(&base), Some(&token_with_endpoint)); // Build FileIO with merged properties let mut builder = FileIO::from_path(&self.path)?; builder = builder.with_props(merged_props); @@ -143,12 +151,19 @@ impl RESTTokenFileIO { /// Refresh the token by calling `RESTApi::load_table_token`. /// - /// Creates a temporary `RESTApi` instance (with `config_required=false`) - /// and loads the table token. + /// Lazily creates a `RESTApi` instance on first call and reuses it + /// for subsequent refreshes. /// /// Corresponds to Python `RESTTokenFileIO.refresh_token`. async fn refresh_token(&self) -> Result { - let mut api = RESTApi::new(self.catalog_options.clone(), false).await?; + let mut api_guard = self.api.lock().await; + let api = match api_guard.as_mut() { + Some(existing) => existing, + None => { + let new_api = RESTApi::new(self.catalog_options.clone(), false).await?; + api_guard.insert(new_api) + } + }; let response = api.load_table_token(&self.identifier).await?; let expires_at_millis = response.expires_at_millis.unwrap_or(0); @@ -183,21 +198,4 @@ impl RESTTokenFileIO { } merged } - - /// Merge token credentials into catalog options map for FileIO construction. - fn merge_token_with_options( - &self, - token: &HashMap, - ) -> HashMap { - let mut merged = self.catalog_options.to_map().clone(); - // Token values override catalog options - merged.extend(token.iter().map(|(k, v)| (k.clone(), v.clone()))); - // DLF OSS endpoint override - if let Some(dlf_oss_endpoint) = self.catalog_options.get("dlf.oss-endpoint") { - if !dlf_oss_endpoint.trim().is_empty() { - merged.insert("fs.oss.endpoint".to_string(), dlf_oss_endpoint.clone()); - } - } - merged - } } diff --git a/crates/paimon/src/io/file_io.rs b/crates/paimon/src/io/file_io.rs index 16b53784..242cb96f 100644 --- a/crates/paimon/src/io/file_io.rs +++ b/crates/paimon/src/io/file_io.rs @@ -122,12 +122,9 @@ impl FileIO { // use normalize_root to make sure it end with `/`. let list_path = normalize_root(relative_path); - let entries = op - .list_with(&list_path) - .await - .context(IoUnexpectedSnafu { - message: format!("Failed to list files in '{path}'"), - })?; + let entries = op.list_with(&list_path).await.context(IoUnexpectedSnafu { + message: format!("Failed to list files in '{path}'"), + })?; let mut statuses = Vec::new(); @@ -309,10 +306,7 @@ impl InputFile { } pub async fn exists(&self) -> crate::Result { - Ok(self - .op - .exists(&self.path[self.relative_path_pos..]) - .await?) + Ok(self.op.exists(&self.path[self.relative_path_pos..]).await?) } pub async fn metadata(&self) -> crate::Result { @@ -352,10 +346,7 @@ impl OutputFile { } pub async fn exists(&self) -> crate::Result { - Ok(self - .op - .exists(&self.path[self.relative_path_pos..]) - .await?) + Ok(self.op.exists(&self.path[self.relative_path_pos..]).await?) } pub fn to_input_file(self) -> InputFile { diff --git a/crates/paimon/src/io/storage_oss.rs b/crates/paimon/src/io/storage_oss.rs index 332a6347..8c7c36f0 100644 --- a/crates/paimon/src/io/storage_oss.rs +++ b/crates/paimon/src/io/storage_oss.rs @@ -79,7 +79,7 @@ pub(crate) fn oss_config_parse(mut props: HashMap) -> Result) -> TestContext { let prefix = "mock-test"; let mut defaults = HashMap::new(); @@ -59,9 +59,9 @@ async fn setup_catalog(initial_dbs: Vec<&str>) -> TestContext { options.set("token.provider", "bear"); options.set("token", "test_token"); - let catalog = RestCatalog::new(options, true) + let catalog = RESTCatalog::new(options, true) .await - .expect("Failed to create RestCatalog"); + .expect("Failed to create RESTCatalog"); TestContext { server, catalog } } @@ -156,10 +156,7 @@ async fn test_catalog_drop_database_not_exists() { let ctx = setup_catalog(vec!["default"]).await; // Dropping non-existent database with ignore_if_not_exists=false should fail - let result = ctx - .catalog - .drop_database("non_existent", false, true) - .await; + let result = ctx.catalog.drop_database("non_existent", false, true).await; assert!( result.is_err(), "dropping non-existent database should fail when ignore_if_not_exists=false" @@ -171,10 +168,7 @@ async fn test_catalog_drop_database_ignore_if_not_exists() { let ctx = setup_catalog(vec!["default"]).await; // Dropping non-existent database with ignore_if_not_exists=true should succeed - let result = ctx - .catalog - .drop_database("non_existent", true, true) - .await; + let result = ctx.catalog.drop_database("non_existent", true, true).await; assert!( result.is_ok(), "dropping non-existent database should succeed when ignore_if_not_exists=true" @@ -249,8 +243,12 @@ async fn test_catalog_get_table() { // Add a table with schema and path so get_table can build a Table object let schema = test_schema(); - ctx.server - .add_table_with_schema("default", "my_table", schema, "/tmp/test_warehouse/default.db/my_table"); + ctx.server.add_table_with_schema( + "default", + "my_table", + schema, + "/tmp/test_warehouse/default.db/my_table", + ); let identifier = Identifier::new("default", "my_table"); let table = ctx.catalog.get_table(&identifier).await; From 235d1ddd2cc28b720feb1abbdc9cf3d757df07d2 Mon Sep 17 00:00:00 2001 From: umi Date: Mon, 30 Mar 2026 17:38:45 +0800 Subject: [PATCH 04/13] fix --- crates/paimon/src/catalog/filesystem.rs | 7 +++++++ crates/paimon/src/io/file_io.rs | 9 +++++++++ 2 files changed, 16 insertions(+) diff --git a/crates/paimon/src/catalog/filesystem.rs b/crates/paimon/src/catalog/filesystem.rs index 61d6e8d3..6a9af206 100644 --- a/crates/paimon/src/catalog/filesystem.rs +++ b/crates/paimon/src/catalog/filesystem.rs @@ -121,9 +121,16 @@ impl FileSystemCatalog { /// List directories in the given path. async fn list_directories(&self, path: &str) -> Result> { let statuses = self.file_io.list_status(path).await?; + // Normalize the listed path for comparison: strip trailing slash + let normalized_path = path.trim_end_matches('/'); let mut dirs = Vec::new(); for status in statuses { if status.is_dir { + // Skip the directory itself (opendal list_with includes the root entry) + let entry_path = status.path.trim_end_matches('/'); + if entry_path == normalized_path { + continue; + } if let Some(p) = get_basename(status.path.as_str()) // opendal get_basename will contain "/" for directory, // we need to strip suffix to get the real base name diff --git a/crates/paimon/src/io/file_io.rs b/crates/paimon/src/io/file_io.rs index 242cb96f..43b10892 100644 --- a/crates/paimon/src/io/file_io.rs +++ b/crates/paimon/src/io/file_io.rs @@ -129,6 +129,15 @@ impl FileIO { let mut statuses = Vec::new(); for entry in entries { + // opendal list_with includes the root directory itself as the first entry. + // The root entry's path equals list_path (with or without leading slash). + // Skip it so callers only see the direct children. + let entry_path = entry.path(); + let entry_path_normalized = entry_path.trim_start_matches('/'); + let list_path_normalized = list_path.trim_start_matches('/'); + if entry_path_normalized == list_path_normalized { + continue; + } let meta = entry.metadata(); statuses.push(FileStatus { size: meta.content_length(), From c317b8846f9bee9b350fcf730e540736881bac11 Mon Sep 17 00:00:00 2001 From: umi Date: Mon, 30 Mar 2026 17:52:14 +0800 Subject: [PATCH 05/13] clippy --- crates/integration_tests/Cargo.toml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/crates/integration_tests/Cargo.toml b/crates/integration_tests/Cargo.toml index f1b7ea59..640978a0 100644 --- a/crates/integration_tests/Cargo.toml +++ b/crates/integration_tests/Cargo.toml @@ -29,3 +29,6 @@ arrow-array = { workspace = true } tokio = { version = "1", features = ["macros", "rt-multi-thread"] } futures = "0.3" serde_json = "1" + +[dev-dependencies] +axum = { version = "0.7", features = ["macros", "tokio", "http1", "http2"] } From 80da0e0f850e5f53b40464c4485b16f61d9e6947 Mon Sep 17 00:00:00 2001 From: umi Date: Mon, 30 Mar 2026 18:12:10 +0800 Subject: [PATCH 06/13] fix --- crates/paimon/tests/rest_catalog_test.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/paimon/tests/rest_catalog_test.rs b/crates/paimon/tests/rest_catalog_test.rs index 28c2699d..7ac8536b 100644 --- a/crates/paimon/tests/rest_catalog_test.rs +++ b/crates/paimon/tests/rest_catalog_test.rs @@ -247,7 +247,7 @@ async fn test_catalog_get_table() { "default", "my_table", schema, - "/tmp/test_warehouse/default.db/my_table", + "file:///tmp/test_warehouse/default.db/my_table", ); let identifier = Identifier::new("default", "my_table"); From a8161d0eaa75af48cc2e9e74b41cb839541af8b8 Mon Sep 17 00:00:00 2001 From: umi Date: Tue, 31 Mar 2026 13:41:31 +0800 Subject: [PATCH 07/13] fix --- crates/integration_tests/tests/read_tables.rs | 6 +- crates/paimon/Cargo.toml | 4 - .../rest_catalog_read_append_example.rs | 19 +- crates/paimon/src/api/auth/base.rs | 7 +- crates/paimon/src/api/auth/bearer_provider.rs | 6 +- crates/paimon/src/api/auth/dlf_provider.rs | 16 +- crates/paimon/src/api/auth/factory.rs | 4 +- crates/paimon/src/api/rest_api.rs | 34 ++- crates/paimon/src/api/rest_client.rs | 10 +- crates/paimon/src/api/rest_util.rs | 2 - crates/paimon/src/catalog/database.rs | 2 - .../paimon/src/catalog/rest/rest_catalog.rs | 199 ++++++++---------- crates/paimon/src/catalog/rest/rest_token.rs | 8 - .../src/catalog/rest/rest_token_file_io.rs | 33 +-- crates/paimon/src/common/options.rs | 3 + crates/paimon/src/io/file_io.rs | 3 +- crates/paimon/src/io/mod.rs | 2 +- crates/paimon/src/io/storage_oss.rs | 8 +- crates/paimon/tests/rest_api_test.rs | 30 +-- 19 files changed, 163 insertions(+), 233 deletions(-) diff --git a/crates/integration_tests/tests/read_tables.rs b/crates/integration_tests/tests/read_tables.rs index c92f262f..dc59aa6d 100644 --- a/crates/integration_tests/tests/read_tables.rs +++ b/crates/integration_tests/tests/read_tables.rs @@ -473,7 +473,7 @@ fn simple_dv_pk_schema() -> Schema { async fn setup_rest_catalog_with_tables( table_configs: &[(&str, &str, Schema)], ) -> (mock_server::RESTServer, RESTCatalog) { - let data_path = get_test_warehouse(); + let catalog_path = get_test_warehouse(); // Use a simple warehouse name (no slashes) to avoid URL-encoding issues let warehouse_name = "test_warehouse"; let prefix = "mock-test"; @@ -483,7 +483,7 @@ async fn setup_rest_catalog_with_tables( let server = start_mock_server( warehouse_name.to_string(), - data_path.clone(), + catalog_path.clone(), config, vec!["default".to_string()], ) @@ -491,7 +491,7 @@ async fn setup_rest_catalog_with_tables( // Register each table with its schema and the real on-disk path for (database, table_name, schema) in table_configs { - let table_path = format!("{}/{}.db/{}", data_path, database, table_name); + let table_path = format!("{}/{}.db/{}", catalog_path, database, table_name); server.add_table_with_schema(database, table_name, schema.clone(), &table_path); } diff --git a/crates/paimon/Cargo.toml b/crates/paimon/Cargo.toml index 2597a82a..c78d35ba 100644 --- a/crates/paimon/Cargo.toml +++ b/crates/paimon/Cargo.toml @@ -75,7 +75,3 @@ axum = { version = "0.7", features = ["macros", "tokio", "http1", "http2"] } rand = "0.8.5" serde_avro_fast = { version = "1.1.2", features = ["snappy"] } tempfile = "3" - -[[example]] -name = "rest_catalog_example" -path = "examples/rest_catalog_example.rs" diff --git a/crates/paimon/examples/rest_catalog_read_append_example.rs b/crates/paimon/examples/rest_catalog_read_append_example.rs index bca61fb6..f4ac83a2 100644 --- a/crates/paimon/examples/rest_catalog_read_append_example.rs +++ b/crates/paimon/examples/rest_catalog_read_append_example.rs @@ -190,6 +190,7 @@ async fn main() { } /// Format a single cell value from an Arrow array at the given row index. +/// Supports INT (Int32), BIGINT (Int64), and VARCHAR (String/LargeString). fn array_value_to_string(array: &dyn arrow_array::Array, row: usize) -> String { use arrow_array::*; @@ -209,24 +210,6 @@ fn array_value_to_string(array: &dyn arrow_array::Array, row: usize) -> String { if let Some(arr) = array.as_any().downcast_ref::() { return arr.value(row).to_string(); } - if let Some(arr) = array.as_any().downcast_ref::() { - return arr.value(row).to_string(); - } - if let Some(arr) = array.as_any().downcast_ref::() { - return arr.value(row).to_string(); - } - if let Some(arr) = array.as_any().downcast_ref::() { - return arr.value(row).to_string(); - } - if let Some(arr) = array.as_any().downcast_ref::() { - return arr.value(row).to_string(); - } - if let Some(arr) = array.as_any().downcast_ref::() { - return arr.value(row).to_string(); - } - if let Some(arr) = array.as_any().downcast_ref::() { - return format!("{:?}", arr.value(row)); - } format!("", array.data_type()) } diff --git a/crates/paimon/src/api/auth/base.rs b/crates/paimon/src/api/auth/base.rs index 982f63a0..444afe1b 100644 --- a/crates/paimon/src/api/auth/base.rs +++ b/crates/paimon/src/api/auth/base.rs @@ -84,7 +84,7 @@ pub trait AuthProvider: Send + Sync { /// /// # Returns async fn merge_auth_header( - &mut self, + &self, base_header: HashMap, parameter: &RESTAuthParameter, ) -> Result>; @@ -120,10 +120,7 @@ impl RESTAuthFunction { /// /// # Returns /// A HashMap containing the authenticated headers. - pub async fn apply( - &mut self, - parameter: &RESTAuthParameter, - ) -> Result> { + pub async fn apply(&self, parameter: &RESTAuthParameter) -> Result> { self.auth_provider .merge_auth_header(self.init_header.clone(), parameter) .await diff --git a/crates/paimon/src/api/auth/bearer_provider.rs b/crates/paimon/src/api/auth/bearer_provider.rs index 2b9f0e14..ba6d4dd4 100644 --- a/crates/paimon/src/api/auth/bearer_provider.rs +++ b/crates/paimon/src/api/auth/bearer_provider.rs @@ -46,7 +46,7 @@ impl BearerTokenAuthProvider { #[async_trait] impl AuthProvider for BearerTokenAuthProvider { async fn merge_auth_header( - &mut self, + &self, mut base_header: HashMap, _parameter: &RESTAuthParameter, ) -> crate::Result> { @@ -64,7 +64,7 @@ mod tests { #[tokio::test] async fn test_bearer_token_auth() { - let mut provider = BearerTokenAuthProvider::new("test-token"); + let provider = BearerTokenAuthProvider::new("test-token"); let base_header = HashMap::new(); let parameter = RESTAuthParameter::for_get("/test", HashMap::new()); @@ -81,7 +81,7 @@ mod tests { #[tokio::test] async fn test_bearer_token_with_base_headers() { - let mut provider = BearerTokenAuthProvider::new("my-token"); + let provider = BearerTokenAuthProvider::new("my-token"); let mut base_header = HashMap::new(); base_header.insert("Content-Type".to_string(), "application/json".to_string()); let parameter = RESTAuthParameter::for_get("/test", HashMap::new()); diff --git a/crates/paimon/src/api/auth/dlf_provider.rs b/crates/paimon/src/api/auth/dlf_provider.rs index 4f264d88..a18a0143 100644 --- a/crates/paimon/src/api/auth/dlf_provider.rs +++ b/crates/paimon/src/api/auth/dlf_provider.rs @@ -244,7 +244,7 @@ const TOKEN_EXPIRATION_SAFE_TIME_MILLIS: i64 = 3_600_000; /// (ROA v2 HMAC-SHA1). pub struct DLFAuthProvider { uri: String, - token: Option, + token: tokio::sync::Mutex>, token_loader: Option>, signer: Box, } @@ -279,7 +279,7 @@ impl DLFAuthProvider { Ok(Self { uri, - token, + token: tokio::sync::Mutex::new(token), token_loader, signer, }) @@ -290,9 +290,11 @@ impl DLFAuthProvider { /// If token_loader is configured, this method will: /// - Load a new token if current token is None /// - Refresh the token if it's about to expire (within TOKEN_EXPIRATION_SAFE_TIME_MILLIS) - async fn get_or_refresh_token(&mut self) -> Result { + async fn get_or_refresh_token(&self) -> Result { + let mut token_guard = self.token.lock().await; + if let Some(loader) = &self.token_loader { - let need_reload = match &self.token { + let need_reload = match &*token_guard { None => true, Some(token) => match token.expiration_at_millis { Some(expiration_at_millis) => { @@ -305,11 +307,11 @@ impl DLFAuthProvider { if need_reload { let new_token = loader.load_token().await?; - self.token = Some(new_token); + *token_guard = Some(new_token); } } - self.token.clone().ok_or_else(|| Error::DataInvalid { + token_guard.clone().ok_or_else(|| Error::DataInvalid { message: "Either token or token_loader must be provided".to_string(), source: None, }) @@ -330,7 +332,7 @@ impl DLFAuthProvider { #[async_trait] impl AuthProvider for DLFAuthProvider { async fn merge_auth_header( - &mut self, + &self, mut base_header: HashMap, rest_auth_parameter: &RESTAuthParameter, ) -> crate::Result> { diff --git a/crates/paimon/src/api/auth/factory.rs b/crates/paimon/src/api/auth/factory.rs index 68c2881a..fc2a1b5b 100644 --- a/crates/paimon/src/api/auth/factory.rs +++ b/crates/paimon/src/api/auth/factory.rs @@ -162,7 +162,7 @@ mod tests { options.set(CatalogOptions::TOKEN_PROVIDER, "bear"); options.set(CatalogOptions::TOKEN, "test-token"); - let mut provider = AuthProviderFactory::create_auth_provider(&options).unwrap(); + let provider = AuthProviderFactory::create_auth_provider(&options).unwrap(); let base_header = HashMap::new(); let param = RESTAuthParameter::new("GET", "/test", None, HashMap::new()); @@ -202,7 +202,7 @@ mod tests { options.set(CatalogOptions::DLF_ACCESS_KEY_ID, "test_key_id"); options.set(CatalogOptions::DLF_ACCESS_KEY_SECRET, "test_key_secret"); - let mut provider = AuthProviderFactory::create_auth_provider(&options).unwrap(); + let provider = AuthProviderFactory::create_auth_provider(&options).unwrap(); let base_header = HashMap::new(); let param = RESTAuthParameter::new("GET", "/test", None, HashMap::new()); diff --git a/crates/paimon/src/api/rest_api.rs b/crates/paimon/src/api/rest_api.rs index 41c97dc5..00a32b8d 100644 --- a/crates/paimon/src/api/rest_api.rs +++ b/crates/paimon/src/api/rest_api.rs @@ -175,7 +175,7 @@ impl RESTApi { // ==================== Database Operations ==================== /// List all databases. - pub async fn list_databases(&mut self) -> Result> { + pub async fn list_databases(&self) -> Result> { let mut results = Vec::new(); let mut page_token: Option = None; @@ -196,7 +196,7 @@ impl RESTApi { /// List databases with pagination. pub async fn list_databases_paged( - &mut self, + &self, max_results: Option, page_token: Option<&str>, database_name_pattern: Option<&str>, @@ -227,9 +227,9 @@ impl RESTApi { /// Create a new database. pub async fn create_database( - &mut self, + &self, name: &str, - options: Option>, + options: Option>, ) -> Result<()> { validate_non_empty(name, "database name")?; let path = self.resource_paths.databases(); @@ -239,7 +239,7 @@ impl RESTApi { } /// Get database information. - pub async fn get_database(&mut self, name: &str) -> Result { + pub async fn get_database(&self, name: &str) -> Result { validate_non_empty(name, "database name")?; let path = self.resource_paths.database(name); self.client.get(&path, None::<&[(&str, &str)]>).await @@ -247,10 +247,10 @@ impl RESTApi { /// Alter database configuration. pub async fn alter_database( - &mut self, + &self, name: &str, removals: Vec, - updates: std::collections::HashMap, + updates: HashMap, ) -> Result<()> { validate_non_empty(name, "database name")?; let path = self.resource_paths.database(name); @@ -260,7 +260,7 @@ impl RESTApi { } /// Drop a database. - pub async fn drop_database(&mut self, name: &str) -> Result<()> { + pub async fn drop_database(&self, name: &str) -> Result<()> { validate_non_empty(name, "database name")?; let path = self.resource_paths.database(name); let _resp: serde_json::Value = self.client.delete(&path, None::<&[(&str, &str)]>).await?; @@ -270,7 +270,7 @@ impl RESTApi { // ==================== Table Operations ==================== /// List all tables in a database. - pub async fn list_tables(&mut self, database: &str) -> Result> { + pub async fn list_tables(&self, database: &str) -> Result> { validate_non_empty(database, "database name")?; let mut results = Vec::new(); @@ -293,7 +293,7 @@ impl RESTApi { /// List tables with pagination. pub async fn list_tables_paged( - &mut self, + &self, database: &str, max_results: Option, page_token: Option<&str>, @@ -333,7 +333,7 @@ impl RESTApi { } /// Create a new table. - pub async fn create_table(&mut self, identifier: &Identifier, schema: Schema) -> Result<()> { + pub async fn create_table(&self, identifier: &Identifier, schema: Schema) -> Result<()> { let database = identifier.database(); let table = identifier.object(); validate_non_empty_multi(&[(database, "database name"), (table, "table name")])?; @@ -344,7 +344,7 @@ impl RESTApi { } /// Get table information. - pub async fn get_table(&mut self, identifier: &Identifier) -> Result { + pub async fn get_table(&self, identifier: &Identifier) -> Result { let database = identifier.database(); let table = identifier.object(); validate_non_empty_multi(&[(database, "database name"), (table, "table name")])?; @@ -353,11 +353,7 @@ impl RESTApi { } /// Rename a table. - pub async fn rename_table( - &mut self, - source: &Identifier, - destination: &Identifier, - ) -> Result<()> { + pub async fn rename_table(&self, source: &Identifier, destination: &Identifier) -> Result<()> { validate_non_empty_multi(&[ (source.database(), "source database name"), (source.object(), "source table name"), @@ -371,7 +367,7 @@ impl RESTApi { } /// Drop a table. - pub async fn drop_table(&mut self, identifier: &Identifier) -> Result<()> { + pub async fn drop_table(&self, identifier: &Identifier) -> Result<()> { let database = identifier.database(); let table = identifier.object(); validate_non_empty_multi(&[(database, "database name"), (table, "table name")])?; @@ -386,7 +382,7 @@ impl RESTApi { /// /// Corresponds to Python `RESTApi.load_table_token`. pub async fn load_table_token( - &mut self, + &self, identifier: &Identifier, ) -> Result { let database = identifier.database(); diff --git a/crates/paimon/src/api/rest_client.rs b/crates/paimon/src/api/rest_client.rs index 76db48f5..1b6a636f 100644 --- a/crates/paimon/src/api/rest_client.rs +++ b/crates/paimon/src/api/rest_client.rs @@ -94,7 +94,7 @@ impl HttpClient { /// # Returns /// The parsed JSON response. pub async fn get( - &mut self, + &self, path: &str, params: Option<&[(impl AsRef, impl AsRef)]>, ) -> Result { @@ -136,7 +136,7 @@ impl HttpClient { /// # Returns /// The parsed JSON response. pub async fn post( - &mut self, + &self, path: &str, body: &B, ) -> Result { @@ -163,7 +163,7 @@ impl HttpClient { /// # Returns /// The parsed JSON response. pub async fn delete( - &mut self, + &self, path: &str, params: Option<&[(impl AsRef, impl AsRef)]>, ) -> Result { @@ -203,13 +203,13 @@ impl HttpClient { /// Build auth headers for a request. async fn build_auth_headers( - &mut self, + &self, method: &str, path: &str, data: Option<&str>, params: HashMap, ) -> Result> { - if let Some(ref mut auth_fn) = self.auth_function { + if let Some(ref auth_fn) = self.auth_function { let parameter = RESTAuthParameter::new(method, path, data.map(|s| s.to_string()), params); auth_fn.apply(¶meter).await diff --git a/crates/paimon/src/api/rest_util.rs b/crates/paimon/src/api/rest_util.rs index 9c992e43..5c259fea 100644 --- a/crates/paimon/src/api/rest_util.rs +++ b/crates/paimon/src/api/rest_util.rs @@ -47,8 +47,6 @@ impl RESTUtil { /// For keys present in both maps, the value from `override_properties` wins. /// `None` values are skipped (only relevant at the map level; individual /// entries are always `String`). - /// - /// Corresponds to Python `RESTUtil.merge`. pub fn merge( base_properties: Option<&HashMap>, override_properties: Option<&HashMap>, diff --git a/crates/paimon/src/catalog/database.rs b/crates/paimon/src/catalog/database.rs index 6979d073..b291ec7c 100644 --- a/crates/paimon/src/catalog/database.rs +++ b/crates/paimon/src/catalog/database.rs @@ -20,8 +20,6 @@ use std::collections::HashMap; /// Structure representing a database in a Paimon catalog. -/// -/// Corresponds to Python `Database` in `pypaimon/catalog/rest/rest_catalog.py`. #[derive(Debug, Clone, PartialEq)] pub struct Database { /// Database name. diff --git a/crates/paimon/src/catalog/rest/rest_catalog.rs b/crates/paimon/src/catalog/rest/rest_catalog.rs index ab64d59b..a9b7aabf 100644 --- a/crates/paimon/src/catalog/rest/rest_catalog.rs +++ b/crates/paimon/src/catalog/rest/rest_catalog.rs @@ -19,13 +19,10 @@ //! //! This module provides a REST-based catalog that communicates with //! a Paimon REST catalog server for database and table CRUD operations. -//! -//! Reference: Python `RESTCatalog` in `pypaimon/catalog/rest/rest_catalog.py`. use std::collections::HashMap; use async_trait::async_trait; -use tokio::sync::Mutex; use crate::api::rest_api::RESTApi; use crate::api::rest_error::RestError; @@ -47,9 +44,8 @@ use super::rest_token_file_io::RESTTokenFileIO; /// /// Corresponds to Python `RESTCatalog` in `pypaimon/catalog/rest/rest_catalog.py`. pub struct RESTCatalog { - /// The REST API client, wrapped in a Mutex because `RESTApi` methods - /// require `&mut self` while `Catalog` trait methods take `&self`. - api: Mutex, + /// The REST API client. + api: RESTApi, /// Catalog configuration options. options: Options, /// Warehouse path. @@ -84,7 +80,7 @@ impl RESTCatalog { let api_options = api.options().clone(); Ok(Self { - api: Mutex::new(api), + api, options: api_options, warehouse, data_token_enabled, @@ -107,84 +103,18 @@ impl RESTCatalog { } /// List databases with pagination. - /// - /// Corresponds to Python `RESTCatalog.list_databases_paged`. pub async fn list_databases_paged( &self, max_results: Option, page_token: Option<&str>, database_name_pattern: Option<&str>, ) -> Result> { - let mut api = self.api.lock().await; - api.list_databases_paged(max_results, page_token, database_name_pattern) + self.api + .list_databases_paged(max_results, page_token, database_name_pattern) .await } } -// ============================================================================ -// Error mapping helpers -// ============================================================================ - -/// Map a REST API error to a catalog-level database error. -/// -/// Converts `RestError::NoSuchResource` -> `Error::DatabaseNotExist`, -/// `RestError::AlreadyExists` -> `Error::DatabaseAlreadyExist`, -/// and passes through other errors via `Error::RestApi`. -fn map_rest_error_for_database(err: Error, database_name: &str) -> Error { - match err { - Error::RestApi { - source: RestError::NoSuchResource { .. }, - } => Error::DatabaseNotExist { - database: database_name.to_string(), - }, - Error::RestApi { - source: RestError::AlreadyExists { .. }, - } => Error::DatabaseAlreadyExist { - database: database_name.to_string(), - }, - other => other, - } -} - -/// Map a REST API error to a catalog-level table error. -/// -/// Converts `RestError::NoSuchResource` -> `Error::TableNotExist`, -/// `RestError::AlreadyExists` -> `Error::TableAlreadyExist`, -/// and passes through other errors via `Error::RestApi`. -fn map_rest_error_for_table(err: Error, identifier: &Identifier) -> Error { - match err { - Error::RestApi { - source: RestError::NoSuchResource { .. }, - } => Error::TableNotExist { - full_name: identifier.full_name(), - }, - Error::RestApi { - source: RestError::AlreadyExists { .. }, - } => Error::TableAlreadyExist { - full_name: identifier.full_name(), - }, - other => other, - } -} - -/// Execute a fallible operation and ignore a specific error variant. -/// -/// If the operation succeeds, returns `Ok(())`. -/// If it fails with an error that `should_ignore` returns `true` for, returns `Ok(())`. -/// Otherwise, returns the error. -fn ignore_error_if(result: Result<()>, should_ignore: F) -> Result<()> -where - F: Fn(&Error) -> bool, -{ - result.or_else(|err| { - if should_ignore(&err) { - Ok(()) - } else { - Err(err) - } - }) -} - // ============================================================================ // Catalog trait implementation // ============================================================================ @@ -194,8 +124,7 @@ impl Catalog for RESTCatalog { // ======================= database methods =============================== async fn list_databases(&self) -> Result> { - let mut api = self.api.lock().await; - api.list_databases().await + self.api.list_databases().await } async fn create_database( @@ -204,14 +133,9 @@ impl Catalog for RESTCatalog { ignore_if_exists: bool, properties: HashMap, ) -> Result<()> { - let mut api = self.api.lock().await; - let options = if properties.is_empty() { - None - } else { - Some(properties) - }; - let result = api - .create_database(name, options) + let result = self + .api + .create_database(name, Some(properties)) .await .map_err(|e| map_rest_error_for_database(e, name)); ignore_error_if(result, |e| { @@ -220,8 +144,8 @@ impl Catalog for RESTCatalog { } async fn get_database(&self, name: &str) -> Result { - let mut api = self.api.lock().await; - let response = api + let response = self + .api .get_database(name) .await .map_err(|e| map_rest_error_for_database(e, name))?; @@ -240,18 +164,14 @@ impl Catalog for RESTCatalog { ignore_if_not_exists: bool, cascade: bool, ) -> Result<()> { - let mut api = self.api.lock().await; - // If not cascade, check if database is empty first if !cascade { - let tables = match api.list_tables(name).await { + let tables = match self.api.list_tables(name).await { Ok(tables) => tables, Err(err) => { - let mapped = map_rest_error_for_database(err, name); - if ignore_if_not_exists && matches!(mapped, Error::DatabaseNotExist { .. }) { - return Ok(()); - } - return Err(mapped); + return ignore_error_if(Err(map_rest_error_for_database(err, name)), |e| { + ignore_if_not_exists && matches!(e, Error::DatabaseNotExist { .. }) + }); } }; if !tables.is_empty() { @@ -261,7 +181,8 @@ impl Catalog for RESTCatalog { } } - let result = api + let result = self + .api .drop_database(name) .await .map_err(|e| map_rest_error_for_database(e, name)); @@ -273,8 +194,8 @@ impl Catalog for RESTCatalog { // ======================= table methods =============================== async fn get_table(&self, identifier: &Identifier) -> Result
{ - let mut api = self.api.lock().await; - let response = api + let response = self + .api .get_table(identifier) .await .map_err(|e| map_rest_error_for_table(e, identifier))?; @@ -297,9 +218,6 @@ impl Catalog for RESTCatalog { // Check if the table is external let is_external = response.is_external.unwrap_or(false); - // Drop the API lock before async FileIO operations - drop(api); - // Build FileIO based on data_token_enabled and is_external let file_io = if self.data_token_enabled && !is_external { // Use RESTTokenFileIO to get token-based FileIO @@ -320,8 +238,8 @@ impl Catalog for RESTCatalog { } async fn list_tables(&self, database_name: &str) -> Result> { - let mut api = self.api.lock().await; - api.list_tables(database_name) + self.api + .list_tables(database_name) .await .map_err(|e| map_rest_error_for_database(e, database_name)) } @@ -332,8 +250,8 @@ impl Catalog for RESTCatalog { creation: Schema, ignore_if_exists: bool, ) -> Result<()> { - let mut api = self.api.lock().await; - let result = api + let result = self + .api .create_table(identifier, creation) .await .map_err(|e| map_rest_error_for_table(e, identifier)); @@ -343,8 +261,8 @@ impl Catalog for RESTCatalog { } async fn drop_table(&self, identifier: &Identifier, ignore_if_not_exists: bool) -> Result<()> { - let mut api = self.api.lock().await; - let result = api + let result = self + .api .drop_table(identifier) .await .map_err(|e| map_rest_error_for_table(e, identifier)); @@ -359,8 +277,8 @@ impl Catalog for RESTCatalog { to: &Identifier, ignore_if_not_exists: bool, ) -> Result<()> { - let mut api = self.api.lock().await; - let result = api + let result = self + .api .rename_table(from, to) .await .map_err(|e| map_rest_error_for_table(e, from)) @@ -388,3 +306,66 @@ impl Catalog for RESTCatalog { }) } } +// ============================================================================ +// Error mapping helpers +// ============================================================================ + +/// Map a REST API error to a catalog-level database error. +/// +/// Converts `RestError::NoSuchResource` -> `Error::DatabaseNotExist`, +/// `RestError::AlreadyExists` -> `Error::DatabaseAlreadyExist`, +/// and passes through other errors via `Error::RestApi`. +fn map_rest_error_for_database(err: Error, database_name: &str) -> Error { + match err { + Error::RestApi { + source: RestError::NoSuchResource { .. }, + } => Error::DatabaseNotExist { + database: database_name.to_string(), + }, + Error::RestApi { + source: RestError::AlreadyExists { .. }, + } => Error::DatabaseAlreadyExist { + database: database_name.to_string(), + }, + other => other, + } +} + +/// Map a REST API error to a catalog-level table error. +/// +/// Converts `RestError::NoSuchResource` -> `Error::TableNotExist`, +/// `RestError::AlreadyExists` -> `Error::TableAlreadyExist`, +/// and passes through other errors via `Error::RestApi`. +fn map_rest_error_for_table(err: Error, identifier: &Identifier) -> Error { + match err { + Error::RestApi { + source: RestError::NoSuchResource { .. }, + } => Error::TableNotExist { + full_name: identifier.full_name(), + }, + Error::RestApi { + source: RestError::AlreadyExists { .. }, + } => Error::TableAlreadyExist { + full_name: identifier.full_name(), + }, + other => other, + } +} + +/// Execute a fallible operation and ignore a specific error variant. +/// +/// If the operation succeeds, returns `Ok(())`. +/// If it fails with an error that `should_ignore` returns `true` for, returns `Ok(())`. +/// Otherwise, returns the error. +fn ignore_error_if(result: Result<()>, should_ignore: F) -> Result<()> +where + F: Fn(&Error) -> bool, +{ + result.or_else(|err| { + if should_ignore(&err) { + Ok(()) + } else { + Err(err) + } + }) +} diff --git a/crates/paimon/src/catalog/rest/rest_token.rs b/crates/paimon/src/catalog/rest/rest_token.rs index f72da5da..dc61d23f 100644 --- a/crates/paimon/src/catalog/rest/rest_token.rs +++ b/crates/paimon/src/catalog/rest/rest_token.rs @@ -39,11 +39,3 @@ impl RESTToken { } } } - -impl PartialEq for RESTToken { - fn eq(&self, other: &Self) -> bool { - self.expire_at_millis == other.expire_at_millis && self.token == other.token - } -} - -impl Eq for RESTToken {} diff --git a/crates/paimon/src/catalog/rest/rest_token_file_io.rs b/crates/paimon/src/catalog/rest/rest_token_file_io.rs index 3b284657..95a2f59c 100644 --- a/crates/paimon/src/catalog/rest/rest_token_file_io.rs +++ b/crates/paimon/src/catalog/rest/rest_token_file_io.rs @@ -31,14 +31,14 @@ use tokio::sync::{Mutex, RwLock}; use crate::api::rest_api::RESTApi; use crate::api::rest_util::RESTUtil; use crate::catalog::Identifier; -use crate::common::Options; +use crate::common::{CatalogOptions, Options}; +use crate::io::storage_oss::OSS_ENDPOINT; use crate::io::FileIO; use crate::Result; use super::rest_token::RESTToken; /// Safe time margin (in milliseconds) before token expiration to trigger refresh. -/// Matches `RESTApi.TOKEN_EXPIRATION_SAFE_TIME_MILLIS` in Python. const TOKEN_EXPIRATION_SAFE_TIME_MILLIS: i64 = 3_600_000; /// A FileIO wrapper that supports getting data access tokens from a REST Server. @@ -47,9 +47,6 @@ const TOKEN_EXPIRATION_SAFE_TIME_MILLIS: i64 = 3_600_000; /// - Token caching with expiration detection /// - Automatic token refresh via `RESTApi::load_table_token` /// - Merging token credentials into catalog options to build the underlying `FileIO` -/// -/// Corresponds to Python `RESTTokenFileIO` in -/// `pypaimon/catalog/rest/rest_token_file_io.py`. pub struct RESTTokenFileIO { /// Table identifier for token requests. identifier: Identifier, @@ -99,14 +96,8 @@ impl RESTTokenFileIO { match token_guard.as_ref() { Some(token) => { // Merge catalog options (base) with token credentials (override) - // token.token["fs.oss.endpoint"] = oss-cn-hangzhou.aliyuncs.com - let mut token_with_endpoint = token.token.clone(); - token_with_endpoint.insert( - "fs.oss.endpoint".to_string(), - "oss-cn-hangzhou.aliyuncs.com".to_string(), - ); - let base = self.catalog_options.to_map().clone(); - let merged_props = RESTUtil::merge(Some(&base), Some(&token_with_endpoint)); + let merged_props = + RESTUtil::merge(Some(self.catalog_options.to_map()), Some(&token.token)); // Build FileIO with merged properties let mut builder = FileIO::from_path(&self.path)?; builder = builder.with_props(merged_props); @@ -120,13 +111,11 @@ impl RESTTokenFileIO { } /// Try to refresh the token if it is expired or not yet obtained. - /// - /// Corresponds to Python `RESTTokenFileIO.try_to_refresh_token`. async fn try_to_refresh_token(&self) -> Result<()> { // Fast path: check if token is still valid under read lock { let token_guard = self.token.read().await; - if let Some(ref token) = *token_guard { + if let Some(token) = token_guard.as_ref() { if !Self::is_token_expired(token) { return Ok(()); } @@ -137,7 +126,7 @@ impl RESTTokenFileIO { let mut token_guard = self.token.write().await; // Double-check after acquiring write lock (another task may have refreshed) - if let Some(ref token) = *token_guard { + if let Some(token) = token_guard.as_ref() { if !Self::is_token_expired(token) { return Ok(()); } @@ -153,11 +142,9 @@ impl RESTTokenFileIO { /// /// Lazily creates a `RESTApi` instance on first call and reuses it /// for subsequent refreshes. - /// - /// Corresponds to Python `RESTTokenFileIO.refresh_token`. async fn refresh_token(&self) -> Result { let mut api_guard = self.api.lock().await; - let api = match api_guard.as_mut() { + let api = match api_guard.as_ref() { Some(existing) => existing, None => { let new_api = RESTApi::new(self.catalog_options.clone(), false).await?; @@ -183,17 +170,15 @@ impl RESTTokenFileIO { } /// Merge token credentials with catalog options for DLF OSS endpoint override. - /// - /// Corresponds to Python `RESTTokenFileIO._merge_token_with_catalog_options`. fn merge_token_with_catalog_options( &self, token: HashMap, ) -> HashMap { let mut merged = token; // If catalog options contain a DLF OSS endpoint, override the standard OSS endpoint - if let Some(dlf_oss_endpoint) = self.catalog_options.get("dlf.oss-endpoint") { + if let Some(dlf_oss_endpoint) = self.catalog_options.get(CatalogOptions::DLF_OSS_ENDPOINT) { if !dlf_oss_endpoint.trim().is_empty() { - merged.insert("fs.oss.endpoint".to_string(), dlf_oss_endpoint.clone()); + merged.insert(OSS_ENDPOINT.to_string(), dlf_oss_endpoint.clone()); } } merged diff --git a/crates/paimon/src/common/options.rs b/crates/paimon/src/common/options.rs index a469a07c..792c6017 100644 --- a/crates/paimon/src/common/options.rs +++ b/crates/paimon/src/common/options.rs @@ -69,6 +69,9 @@ impl CatalogOptions { /// DLF ECS role name. pub const DLF_TOKEN_ECS_ROLE_NAME: &'static str = "dlf.token-ecs-role-name"; + + /// DLF OSS endpoint override. + pub const DLF_OSS_ENDPOINT: &'static str = "dlf.oss-endpoint"; } /// Configuration options container. diff --git a/crates/paimon/src/io/file_io.rs b/crates/paimon/src/io/file_io.rs index 43b10892..84c134ad 100644 --- a/crates/paimon/src/io/file_io.rs +++ b/crates/paimon/src/io/file_io.rs @@ -127,14 +127,13 @@ impl FileIO { })?; let mut statuses = Vec::new(); - + let list_path_normalized = list_path.trim_start_matches('/'); for entry in entries { // opendal list_with includes the root directory itself as the first entry. // The root entry's path equals list_path (with or without leading slash). // Skip it so callers only see the direct children. let entry_path = entry.path(); let entry_path_normalized = entry_path.trim_start_matches('/'); - let list_path_normalized = list_path.trim_start_matches('/'); if entry_path_normalized == list_path_normalized { continue; } diff --git a/crates/paimon/src/io/mod.rs b/crates/paimon/src/io/mod.rs index 226aa72a..92a909a7 100644 --- a/crates/paimon/src/io/mod.rs +++ b/crates/paimon/src/io/mod.rs @@ -32,7 +32,7 @@ mod storage_memory; use storage_memory::*; #[cfg(feature = "storage-oss")] -mod storage_oss; +pub(crate) mod storage_oss; #[cfg(feature = "storage-oss")] use storage_oss::*; diff --git a/crates/paimon/src/io/storage_oss.rs b/crates/paimon/src/io/storage_oss.rs index 8c7c36f0..7884894d 100644 --- a/crates/paimon/src/io/storage_oss.rs +++ b/crates/paimon/src/io/storage_oss.rs @@ -27,23 +27,23 @@ use crate::Result; /// Configuration key for OSS endpoint. /// /// Compatible with paimon-java's `fs.oss.endpoint`. -const OSS_ENDPOINT: &str = "fs.oss.endpoint"; +pub(crate) const OSS_ENDPOINT: &str = "fs.oss.endpoint"; /// Configuration key for OSS access key ID. /// /// Compatible with paimon-java's `fs.oss.accessKeyId`. -const OSS_ACCESS_KEY_ID: &str = "fs.oss.accessKeyId"; +pub(crate) const OSS_ACCESS_KEY_ID: &str = "fs.oss.accessKeyId"; /// Configuration key for OSS access key secret. /// /// Compatible with paimon-java's `fs.oss.accessKeySecret`. -const OSS_ACCESS_KEY_SECRET: &str = "fs.oss.accessKeySecret"; +pub(crate) const OSS_ACCESS_KEY_SECRET: &str = "fs.oss.accessKeySecret"; /// Configuration key for OSS STS security token (optional). /// /// Compatible with paimon-java's `fs.oss.securityToken`. /// Required when using STS temporary credentials (e.g. from REST data tokens). -const OSS_SECURITY_TOKEN: &str = "fs.oss.securityToken"; +pub(crate) const OSS_SECURITY_TOKEN: &str = "fs.oss.securityToken"; /// Parse paimon catalog options into an [`OssConfig`]. /// diff --git a/crates/paimon/tests/rest_api_test.rs b/crates/paimon/tests/rest_api_test.rs index 753f5d1f..74784b34 100644 --- a/crates/paimon/tests/rest_api_test.rs +++ b/crates/paimon/tests/rest_api_test.rs @@ -73,7 +73,7 @@ async fn setup_test_server(initial_dbs: Vec<&str>) -> TestContext { // ==================== Database Tests ==================== #[tokio::test] async fn test_list_databases() { - let mut ctx = setup_test_server(vec!["default", "test_db1", "prod_db"]).await; + let ctx = setup_test_server(vec!["default", "test_db1", "prod_db"]).await; let dbs = ctx.api.list_databases().await.unwrap(); @@ -84,7 +84,7 @@ async fn test_list_databases() { #[tokio::test] async fn test_create_database() { - let mut ctx = setup_test_server(vec!["default"]).await; + let ctx = setup_test_server(vec!["default"]).await; // Create new database let result = ctx.api.create_database("new_db", None).await; @@ -101,7 +101,7 @@ async fn test_create_database() { #[tokio::test] async fn test_get_database() { - let mut ctx = setup_test_server(vec!["default"]).await; + let ctx = setup_test_server(vec!["default"]).await; let db_resp = ctx.api.get_database("default").await.unwrap(); assert_eq!(db_resp.name, Some("default".to_string())); @@ -160,7 +160,7 @@ async fn test_error_responses_status_mapping() { #[tokio::test] async fn test_alter_database() { - let mut ctx = setup_test_server(vec!["default"]).await; + let ctx = setup_test_server(vec!["default"]).await; // Alter database with updates let mut updates = HashMap::new(); @@ -189,7 +189,7 @@ async fn test_alter_database() { #[tokio::test] async fn test_alter_database_not_found() { - let mut ctx = setup_test_server(vec!["default"]).await; + let ctx = setup_test_server(vec!["default"]).await; let result = ctx .api @@ -203,7 +203,7 @@ async fn test_alter_database_not_found() { #[tokio::test] async fn test_drop_database() { - let mut ctx = setup_test_server(vec!["default", "to_drop"]).await; + let ctx = setup_test_server(vec!["default", "to_drop"]).await; // Verify database exists let dbs = ctx.api.list_databases().await.unwrap(); @@ -227,7 +227,7 @@ async fn test_drop_database() { #[tokio::test] async fn test_drop_database_no_permission() { - let mut ctx = setup_test_server(vec!["default"]).await; + let ctx = setup_test_server(vec!["default"]).await; ctx.server.add_no_permission_database("secret"); let result = ctx.api.drop_database("secret").await; @@ -240,7 +240,7 @@ async fn test_drop_database_no_permission() { #[tokio::test] async fn test_list_tables_and_get_table() { - let mut ctx = setup_test_server(vec!["default"]).await; + let ctx = setup_test_server(vec!["default"]).await; // Add tables ctx.server.add_table("default", "table1"); @@ -262,7 +262,7 @@ async fn test_list_tables_and_get_table() { #[tokio::test] async fn test_get_table_not_found() { - let mut ctx = setup_test_server(vec!["default"]).await; + let ctx = setup_test_server(vec!["default"]).await; let result = ctx .api @@ -273,7 +273,7 @@ async fn test_get_table_not_found() { #[tokio::test] async fn test_list_tables_empty_database() { - let mut ctx = setup_test_server(vec!["default"]).await; + let ctx = setup_test_server(vec!["default"]).await; let tables = ctx.api.list_tables("default").await.unwrap(); assert!( @@ -284,7 +284,7 @@ async fn test_list_tables_empty_database() { #[tokio::test] async fn test_multiple_databases_with_tables() { - let mut ctx = setup_test_server(vec!["db1", "db2"]).await; + let ctx = setup_test_server(vec!["db1", "db2"]).await; // Add tables to different databases ctx.server.add_table("db1", "table1_db1"); @@ -305,7 +305,7 @@ async fn test_multiple_databases_with_tables() { #[tokio::test] async fn test_create_table() { - let mut ctx = setup_test_server(vec!["default"]).await; + let ctx = setup_test_server(vec!["default"]).await; // Create a simple schema using builder use paimon::spec::{DataType, Schema}; @@ -339,7 +339,7 @@ async fn test_create_table() { #[tokio::test] async fn test_drop_table() { - let mut ctx = setup_test_server(vec!["default"]).await; + let ctx = setup_test_server(vec!["default"]).await; // Add a table ctx.server.add_table("default", "table_to_drop"); @@ -369,7 +369,7 @@ async fn test_drop_table() { #[tokio::test] async fn test_drop_table_no_permission() { - let mut ctx = setup_test_server(vec!["default"]).await; + let ctx = setup_test_server(vec!["default"]).await; ctx.server .add_no_permission_table("default", "secret_table"); @@ -384,7 +384,7 @@ async fn test_drop_table_no_permission() { #[tokio::test] async fn test_rename_table() { - let mut ctx = setup_test_server(vec!["default"]).await; + let ctx = setup_test_server(vec!["default"]).await; // Add a table ctx.server.add_table("default", "old_table"); From 23820f5fae422a460fadc8117b9910ecd2fa6a96 Mon Sep 17 00:00:00 2001 From: umi Date: Tue, 31 Mar 2026 15:10:17 +0800 Subject: [PATCH 08/13] fix --- crates/paimon/examples/rest_catalog_example.rs | 9 ++------- .../paimon/examples/rest_catalog_read_append_example.rs | 9 ++------- crates/paimon/src/catalog/rest/rest_token.rs | 2 -- crates/paimon/src/catalog/rest/rest_token_file_io.rs | 3 --- 4 files changed, 4 insertions(+), 19 deletions(-) diff --git a/crates/paimon/examples/rest_catalog_example.rs b/crates/paimon/examples/rest_catalog_example.rs index da3b2dca..54ea1bbb 100644 --- a/crates/paimon/examples/rest_catalog_example.rs +++ b/crates/paimon/examples/rest_catalog_example.rs @@ -53,12 +53,12 @@ async fn main() { // Basic configuration — replace with your actual server URL and warehouse options.set(CatalogOptions::METASTORE, "rest"); - options.set(CatalogOptions::WAREHOUSE, "pypaimon_catalog"); + options.set(CatalogOptions::WAREHOUSE, "paimon_catalog"); options.set(CatalogOptions::URI, "http://sample.net/"); // --- Authentication (choose one) --- - // Option A: DLF authentication (Alibaba Cloud) + // DLF authentication (Alibaba Cloud) options.set(CatalogOptions::TOKEN_PROVIDER, "dlf"); options.set("dlf.region", "cn-hangzhou"); options.set( @@ -70,11 +70,6 @@ async fn main() { std::env::var("DLF_ACCESS_KEY_SECRET").expect("DLF_ACCESS_KEY_SECRET env var not set"), ); - // Option B: Bearer token authentication (uncomment to use) - // options.set(CatalogOptions::TOKEN_PROVIDER, "bearer"); - // options.set("token", std::env::var("PAIMON_REST_TOKEN") - // .expect("PAIMON_REST_TOKEN env var not set")); - // ==================== Create RESTCatalog ==================== println!("Creating RESTCatalog instance..."); let catalog = match RESTCatalog::new(options, true).await { diff --git a/crates/paimon/examples/rest_catalog_read_append_example.rs b/crates/paimon/examples/rest_catalog_read_append_example.rs index f4ac83a2..5dbda442 100644 --- a/crates/paimon/examples/rest_catalog_read_append_example.rs +++ b/crates/paimon/examples/rest_catalog_read_append_example.rs @@ -55,12 +55,12 @@ async fn main() { // Basic configuration — replace with your actual server URL and warehouse options.set(CatalogOptions::METASTORE, "rest"); - options.set(CatalogOptions::WAREHOUSE, "pypaimon_catalog"); + options.set(CatalogOptions::WAREHOUSE, "paimon_catalog"); options.set(CatalogOptions::URI, "http://sample.net/"); // --- Authentication (choose one) --- - // Option A: DLF authentication (Alibaba Cloud) + // DLF authentication (Alibaba Cloud) options.set(CatalogOptions::TOKEN_PROVIDER, "dlf"); options.set("dlf.region", "cn-hangzhou"); options.set( @@ -72,11 +72,6 @@ async fn main() { std::env::var("DLF_ACCESS_KEY_SECRET").expect("DLF_ACCESS_KEY_SECRET env var not set"), ); - // Option B: Bearer token authentication (uncomment to use) - // options.set(CatalogOptions::TOKEN_PROVIDER, "bearer"); - // options.set("token", std::env::var("PAIMON_REST_TOKEN") - // .expect("PAIMON_REST_TOKEN env var not set")); - // ==================== Create RESTCatalog ==================== println!("Creating RESTCatalog instance..."); let catalog = match RESTCatalog::new(options, true).await { diff --git a/crates/paimon/src/catalog/rest/rest_token.rs b/crates/paimon/src/catalog/rest/rest_token.rs index dc61d23f..83d9dc98 100644 --- a/crates/paimon/src/catalog/rest/rest_token.rs +++ b/crates/paimon/src/catalog/rest/rest_token.rs @@ -20,8 +20,6 @@ use std::collections::HashMap; /// Token for REST data access, containing credentials and expiration. -/// -/// Corresponds to Python `RESTToken` in `pypaimon/catalog/rest/rest_token.py`. #[derive(Debug, Clone)] pub struct RESTToken { /// Token key-value pairs (e.g. access_key_id, access_key_secret, etc.) diff --git a/crates/paimon/src/catalog/rest/rest_token_file_io.rs b/crates/paimon/src/catalog/rest/rest_token_file_io.rs index 95a2f59c..bf9f7030 100644 --- a/crates/paimon/src/catalog/rest/rest_token_file_io.rs +++ b/crates/paimon/src/catalog/rest/rest_token_file_io.rs @@ -20,9 +20,6 @@ //! This module provides a FileIO wrapper that supports getting data access //! tokens from a REST Server. It handles token caching, expiration detection, //! and automatic refresh. -//! -//! Corresponds to Python `RESTTokenFileIO` in -//! `pypaimon/catalog/rest/rest_token_file_io.py`. use std::collections::HashMap; From 61cad79f99d4671558200ba64d522378367bd008 Mon Sep 17 00:00:00 2001 From: umi Date: Wed, 1 Apr 2026 11:52:45 +0800 Subject: [PATCH 09/13] fix --- crates/integration_tests/Cargo.toml | 4 +- crates/integration_tests/tests/read_tables.rs | 151 ++++--------- .../paimon/examples/rest_catalog_example.rs | 149 ++++++++++++- .../rest_catalog_read_append_example.rs | 210 ------------------ .../paimon/src/catalog/rest/rest_catalog.rs | 12 +- .../src/catalog/rest/rest_token_file_io.rs | 39 ++-- 6 files changed, 211 insertions(+), 354 deletions(-) delete mode 100644 crates/paimon/examples/rest_catalog_read_append_example.rs diff --git a/crates/integration_tests/Cargo.toml b/crates/integration_tests/Cargo.toml index 640978a0..092ad949 100644 --- a/crates/integration_tests/Cargo.toml +++ b/crates/integration_tests/Cargo.toml @@ -28,7 +28,7 @@ paimon = { path = "../paimon" } arrow-array = { workspace = true } tokio = { version = "1", features = ["macros", "rt-multi-thread"] } futures = "0.3" -serde_json = "1" [dev-dependencies] -axum = { version = "0.7", features = ["macros", "tokio", "http1", "http2"] } +serde_json = "1" +axum = { version = "0.7", features = ["macros", "tokio", "http1", "http2"] } \ No newline at end of file diff --git a/crates/integration_tests/tests/read_tables.rs b/crates/integration_tests/tests/read_tables.rs index dc59aa6d..1b0032a8 100644 --- a/crates/integration_tests/tests/read_tables.rs +++ b/crates/integration_tests/tests/read_tables.rs @@ -34,15 +34,12 @@ fn get_test_warehouse() -> String { std::env::var("PAIMON_TEST_WAREHOUSE").unwrap_or_else(|_| "/tmp/paimon-warehouse".to_string()) } -async fn scan_and_read(table_name: &str) -> (Plan, Vec) { - scan_and_read_with_projection(table_name, None).await -} - -async fn scan_and_read_with_projection( +async fn scan_and_read( + catalog: &C, table_name: &str, projection: Option<&[&str]>, ) -> (Plan, Vec) { - let table = get_test_table(table_name).await; + let table = get_table_from_catalog(catalog, table_name).await; let mut read_builder = table.new_read_builder(); if let Some(cols) = projection { @@ -67,6 +64,30 @@ async fn scan_and_read_with_projection( (plan, batches) } +async fn get_table_from_catalog( + catalog: &C, + table_name: &str, +) -> paimon::Table { + let identifier = Identifier::new("default", table_name); + catalog + .get_table(&identifier) + .await + .expect("Failed to get table") +} + +fn create_file_system_catalog() -> FileSystemCatalog { + let warehouse = get_test_warehouse(); + FileSystemCatalog::new(warehouse).expect("Failed to create FileSystemCatalog") +} + +async fn scan_and_read_with_fs_catalog( + table_name: &str, + projection: Option<&[&str]>, +) -> (Plan, Vec) { + let catalog = create_file_system_catalog(); + scan_and_read(&catalog, table_name, projection).await +} + fn extract_id_name(batches: &[RecordBatch]) -> Vec<(i32, String)> { let mut rows = Vec::new(); for batch in batches { @@ -88,7 +109,7 @@ fn extract_id_name(batches: &[RecordBatch]) -> Vec<(i32, String)> { #[tokio::test] async fn test_read_log_table() { - let (plan, batches) = scan_and_read("simple_log_table").await; + let (plan, batches) = scan_and_read_with_fs_catalog("simple_log_table", None).await; // Non-partitioned table: partition should be a valid arity=0 BinaryRow // deserialized from manifest bytes, not a stub without backing data. @@ -112,7 +133,7 @@ async fn test_read_log_table() { #[tokio::test] async fn test_read_dv_primary_key_table() { - let (_, batches) = scan_and_read("simple_dv_pk_table").await; + let (_, batches) = scan_and_read_with_fs_catalog("simple_dv_pk_table", None).await; let actual = extract_id_name(&batches); let expected = vec![ (1, "alice-v2".to_string()), @@ -130,7 +151,7 @@ async fn test_read_dv_primary_key_table() { #[tokio::test] async fn test_read_partitioned_log_table() { - let (plan, batches) = scan_and_read("partitioned_log_table").await; + let (plan, batches) = scan_and_read_with_fs_catalog("partitioned_log_table", None).await; let mut seen_partitions: HashSet = HashSet::new(); for split in plan.splits() { @@ -183,7 +204,7 @@ async fn test_read_partitioned_log_table() { #[tokio::test] async fn test_read_multi_partitioned_log_table() { - let (plan, batches) = scan_and_read("multi_partitioned_log_table").await; + let (plan, batches) = scan_and_read_with_fs_catalog("multi_partitioned_log_table", None).await; let mut seen_partitions: HashSet<(String, i32)> = HashSet::new(); for split in plan.splits() { @@ -251,7 +272,7 @@ async fn test_read_multi_partitioned_log_table() { #[tokio::test] async fn test_read_partitioned_dv_pk_table() { - let (plan, batches) = scan_and_read("partitioned_dv_pk_table").await; + let (plan, batches) = scan_and_read_with_fs_catalog("partitioned_dv_pk_table", None).await; // Verify partition metadata on each split. let mut seen_partitions: HashSet = HashSet::new(); @@ -305,20 +326,10 @@ async fn test_read_partitioned_dv_pk_table() { ); } -async fn get_test_table(table_name: &str) -> paimon::Table { - let warehouse = get_test_warehouse(); - let catalog = FileSystemCatalog::new(warehouse).expect("Failed to create catalog"); - let identifier = Identifier::new("default", table_name); - catalog - .get_table(&identifier) - .await - .expect("Failed to get table") -} - #[tokio::test] async fn test_read_with_column_projection() { let (_, batches) = - scan_and_read_with_projection("partitioned_log_table", Some(&["name", "id"])).await; + scan_and_read_with_fs_catalog("partitioned_log_table", Some(&["name", "id"])).await; // Verify that output schema preserves caller-specified column order. for batch in &batches { @@ -347,7 +358,8 @@ async fn test_read_with_column_projection() { #[tokio::test] async fn test_read_projection_empty() { - let table = get_test_table("simple_log_table").await; + let catalog = create_file_system_catalog(); + let table = get_table_from_catalog(&catalog, "simple_log_table").await; let mut read_builder = table.new_read_builder(); read_builder.with_projection(&[]); @@ -385,10 +397,10 @@ async fn test_read_projection_empty() { ); } } - #[tokio::test] async fn test_read_projection_unknown_column() { - let table = get_test_table("simple_log_table").await; + let catalog = create_file_system_catalog(); + let table = get_table_from_catalog(&catalog, "simple_log_table").await; let mut read_builder = table.new_read_builder(); read_builder.with_projection(&["id", "nonexistent_column"]); @@ -410,7 +422,8 @@ async fn test_read_projection_unknown_column() { #[tokio::test] async fn test_read_projection_all_invalid() { - let table = get_test_table("simple_log_table").await; + let catalog = create_file_system_catalog(); + let table = get_table_from_catalog(&catalog, "simple_log_table").await; let mut read_builder = table.new_read_builder(); read_builder.with_projection(&["nonexistent_a", "nonexistent_b"]); @@ -432,7 +445,8 @@ async fn test_read_projection_all_invalid() { #[tokio::test] async fn test_read_projection_duplicate_column() { - let table = get_test_table("simple_log_table").await; + let catalog = create_file_system_catalog(); + let table = get_table_from_catalog(&catalog, "simple_log_table").await; let mut read_builder = table.new_read_builder(); read_builder.with_projection(&["id", "id"]); @@ -457,17 +471,6 @@ fn simple_log_schema() -> Schema { .expect("Failed to build schema") } -/// Build a DV-enabled primary key schema (id INT NOT NULL as PK, name VARCHAR). -fn simple_dv_pk_schema() -> Schema { - Schema::builder() - .column("id", DataType::Int(IntType::with_nullable(false))) - .column("name", DataType::VarChar(VarCharType::string_type())) - .primary_key(["id"]) - .option("deletion-vectors.enabled", "true") - .build() - .expect("Failed to build schema") -} - /// Start a mock REST server backed by Spark-provisioned data on disk, /// register the given tables, and return a connected `RESTCatalog`. async fn setup_rest_catalog_with_tables( @@ -518,30 +521,13 @@ async fn test_rest_catalog_read_append_table() { let (_server, catalog) = setup_rest_catalog_with_tables(&[("default", table_name, simple_log_schema())]).await; - let identifier = Identifier::new("default", table_name); - let table = catalog - .get_table(&identifier) - .await - .expect("Failed to get table from REST catalog"); - - let read_builder = table.new_read_builder(); - let scan = read_builder.new_scan(); - let plan = scan.plan().await.expect("Failed to plan scan"); + let (plan, batches) = scan_and_read(&catalog, table_name, None).await; assert!( !plan.splits().is_empty(), "REST append table should have at least one split" ); - let read = read_builder.new_read().expect("Failed to create read"); - let stream = read - .to_arrow(plan.splits()) - .expect("Failed to create arrow stream"); - let batches: Vec<_> = stream - .try_collect() - .await - .expect("Failed to collect batches"); - assert!( !batches.is_empty(), "REST append table should produce at least one batch" @@ -558,56 +544,3 @@ async fn test_rest_catalog_read_append_table() { "REST catalog append table rows should match expected values" ); } - -/// Test reading a primary-key table with deletion vectors via REST catalog backed by mock server. -/// -/// The mock server returns table metadata pointing to Spark-provisioned data on disk. -#[tokio::test] -async fn test_rest_catalog_read_pk_table() { - let table_name = "simple_dv_pk_table"; - let (_server, catalog) = - setup_rest_catalog_with_tables(&[("default", table_name, simple_dv_pk_schema())]).await; - - let identifier = Identifier::new("default", table_name); - let table = catalog - .get_table(&identifier) - .await - .expect("Failed to get table from REST catalog"); - - let read_builder = table.new_read_builder(); - let scan = read_builder.new_scan(); - let plan = scan.plan().await.expect("Failed to plan scan"); - - assert!( - !plan.splits().is_empty(), - "REST PK table should have at least one split" - ); - - let read = read_builder.new_read().expect("Failed to create read"); - let stream = read - .to_arrow(plan.splits()) - .expect("Failed to create arrow stream"); - let batches: Vec<_> = stream - .try_collect() - .await - .expect("Failed to collect batches"); - - assert!( - !batches.is_empty(), - "REST PK table should produce at least one batch" - ); - - let actual = extract_id_name(&batches); - let expected = vec![ - (1, "alice-v2".to_string()), - (2, "bob-v2".to_string()), - (3, "carol-v2".to_string()), - (4, "dave-v2".to_string()), - (5, "eve-v2".to_string()), - (6, "frank-v1".to_string()), - ]; - assert_eq!( - actual, expected, - "REST catalog DV-enabled PK table should only expose the latest row per key" - ); -} diff --git a/crates/paimon/examples/rest_catalog_example.rs b/crates/paimon/examples/rest_catalog_example.rs index 54ea1bbb..1e0d6fa5 100644 --- a/crates/paimon/examples/rest_catalog_example.rs +++ b/crates/paimon/examples/rest_catalog_example.rs @@ -15,10 +15,12 @@ // specific language governing permissions and limitations // under the License. -//! Example: REST Catalog Operations +//! Example: REST Catalog Operations (Complete) //! -//! This example demonstrates how to use `RESTCatalog` for database and table operations -//! via the Paimon REST catalog API. +//! This example demonstrates how to use `RESTCatalog` for: +//! 1. Database operations (create, list, get, drop) +//! 2. Table operations (create, list, get, rename, drop) +//! 3. Data reading from append-only tables //! //! # Usage //! ```bash @@ -33,6 +35,8 @@ use std::collections::HashMap; +use futures::TryStreamExt; + use paimon::catalog::{Catalog, Identifier, RESTCatalog}; use paimon::common::{CatalogOptions, Options}; use paimon::spec::{DataType, IntType, Schema, VarCharType}; @@ -46,6 +50,31 @@ fn create_test_schema() -> Schema { .expect("Failed to build schema") } +/// Format a single cell value from an Arrow array at the given row index. +/// Supports INT (Int32), BIGINT (Int64), and VARCHAR (String/LargeString). +fn array_value_to_string(array: &dyn arrow_array::Array, row: usize) -> String { + use arrow_array::*; + + if array.is_null(row) { + return "null".to_string(); + } + + if let Some(arr) = array.as_any().downcast_ref::() { + return arr.value(row).to_string(); + } + if let Some(arr) = array.as_any().downcast_ref::() { + return arr.value(row).to_string(); + } + if let Some(arr) = array.as_any().downcast_ref::() { + return arr.value(row).to_string(); + } + if let Some(arr) = array.as_any().downcast_ref::() { + return arr.value(row).to_string(); + } + + format!("", array.data_type()) +} + #[tokio::main] async fn main() { // ==================== Configuration ==================== @@ -80,8 +109,8 @@ async fn main() { } }; - // ==================== Database Operations ==================== - println!("\n=== Database Operations ===\n"); + // ==================== Part 1: Database Operations ==================== + println!("\n=== Part 1: Database Operations ===\n"); // List databases println!("Listing databases..."); @@ -112,8 +141,8 @@ async fn main() { Err(err) => eprintln!("Failed to get database: {}", err), } - // ==================== Table Operations ==================== - println!("\n=== Table Operations ===\n"); + // ==================== Part 2: Table Operations ==================== + println!("\n=== Part 2: Table Operations ===\n"); // Create table let table_identifier = Identifier::new("example_db", "users"); @@ -138,7 +167,10 @@ async fn main() { // Get table info println!("\nGetting table info for '{}'...", table_identifier); match catalog.get_table(&table_identifier).await { - Ok(table) => println!("Table: {:?}", table), + Ok(table) => { + println!("Table location: {}", table.location()); + println!("Table schema fields: {:?}", table.schema().fields()); + } Err(err) => eprintln!("Failed to get table: {}", err), } @@ -156,16 +188,109 @@ async fn main() { Err(err) => eprintln!("Failed to rename table: {}", err), } + // ==================== Part 3: Read Data from Existing Table ==================== + println!("\n=== Part 3: Read Data from Existing Table ===\n"); + + // Try to read from an existing table (example_db.users_renamed) + // This table must already exist on the REST catalog server + let read_table_identifier = Identifier::new("example_db", "users_renamed"); + println!( + "Attempting to read from table '{}'...", + read_table_identifier + ); + + match catalog.get_table(&read_table_identifier).await { + Ok(table) => { + println!("Table retrieved successfully"); + println!(" Location: {}", table.location()); + println!(" Schema fields: {:?}", table.schema().fields()); + + // Scan table + println!("\nScanning table..."); + let read_builder = table.new_read_builder(); + let scan = read_builder.new_scan(); + + match scan.plan().await { + Ok(plan) => { + println!(" Number of splits: {}", plan.splits().len()); + + if plan.splits().is_empty() { + println!("No data splits found — the table may be empty."); + } else { + // Read table data + println!("\nReading table data..."); + match read_builder.new_read() { + Ok(read) => match read.to_arrow(plan.splits()) { + Ok(stream) => { + let batches: Vec<_> = + stream.try_collect().await.unwrap_or_default(); + println!("Collected {} record batch(es)", batches.len()); + + let mut total_rows = 0; + for (batch_index, batch) in batches.iter().enumerate() { + let num_rows = batch.num_rows(); + total_rows += num_rows; + println!( + "\n--- Batch {} ({} rows, {} columns) ---", + batch_index, + num_rows, + batch.num_columns() + ); + println!("Schema: {}", batch.schema()); + + // Print up to 10 rows per batch + let display_rows = num_rows.min(10); + for row in 0..display_rows { + let mut row_values = Vec::new(); + for col in 0..batch.num_columns() { + let column = batch.column(col); + row_values.push(array_value_to_string(column, row)); + } + println!(" Row {}: [{}]", row, row_values.join(", ")); + } + if num_rows > display_rows { + println!( + " ... ({} more rows omitted)", + num_rows - display_rows + ); + } + } + + println!("\n=== Read Summary ==="); + println!("Total rows read: {}", total_rows); + println!("Total batches: {}", batches.len()); + } + Err(err) => { + eprintln!("Failed to create arrow stream: {}", err); + } + }, + Err(err) => { + eprintln!("Failed to create table read: {}", err); + } + } + } + } + Err(err) => { + eprintln!("Failed to plan scan: {}", err); + } + } + } + Err(err) => { + eprintln!( + "Failed to get table '{}' (this is expected if the table doesn't exist): {}", + read_table_identifier, err + ); + } + } + + // ==================== Cleanup ==================== + println!("\n=== Cleanup ===\n"); // Drop table println!("\nDropping table '{}'...", renamed_identifier); match catalog.drop_table(&renamed_identifier, false).await { Ok(()) => println!("Table dropped successfully"), Err(err) => eprintln!("Failed to drop table: {}", err), } - - // ==================== Cleanup ==================== - println!("\n=== Cleanup ===\n"); - // Drop database (cascade = true to force drop even if not empty) println!("Dropping database 'example_db'..."); match catalog.drop_database("example_db", false, true).await { diff --git a/crates/paimon/examples/rest_catalog_read_append_example.rs b/crates/paimon/examples/rest_catalog_read_append_example.rs deleted file mode 100644 index 5dbda442..00000000 --- a/crates/paimon/examples/rest_catalog_read_append_example.rs +++ /dev/null @@ -1,210 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -//! Example: REST Catalog — Read Append Table Data -//! -//! This example demonstrates how to use `RESTCatalog` to read data from an -//! append-only table with the following schema: -//! -//! | Column | Type | -//! |----------|---------| -//! | user_id | INT | -//! | item_id | BIGINT | -//! | behavior | VARCHAR | -//! | dt | VARCHAR | -//! -//! Partition key: `dt` -//! -//! The table `default.test_t` must already exist and contain data on the -//! REST catalog server. -//! -//! # Usage -//! ```bash -//! # With DLF authentication: -//! DLF_ACCESS_KEY_ID=xxx DLF_ACCESS_KEY_SECRET=yyy \ -//! cargo run -p paimon --example rest_catalog_read_append_example -//! -//! # With Bearer token authentication: -//! PAIMON_REST_TOKEN=zzz \ -//! cargo run -p paimon --example rest_catalog_read_append_example -//! ``` - -use futures::TryStreamExt; - -use paimon::catalog::{Catalog, Identifier, RESTCatalog}; -use paimon::common::{CatalogOptions, Options}; - -#[tokio::main] -async fn main() { - // ==================== Configuration ==================== - let mut options = Options::new(); - - // Basic configuration — replace with your actual server URL and warehouse - options.set(CatalogOptions::METASTORE, "rest"); - options.set(CatalogOptions::WAREHOUSE, "paimon_catalog"); - options.set(CatalogOptions::URI, "http://sample.net/"); - - // --- Authentication (choose one) --- - - // DLF authentication (Alibaba Cloud) - options.set(CatalogOptions::TOKEN_PROVIDER, "dlf"); - options.set("dlf.region", "cn-hangzhou"); - options.set( - "dlf.access-key-id", - std::env::var("DLF_ACCESS_KEY_ID").expect("DLF_ACCESS_KEY_ID env var not set"), - ); - options.set( - "dlf.access-key-secret", - std::env::var("DLF_ACCESS_KEY_SECRET").expect("DLF_ACCESS_KEY_SECRET env var not set"), - ); - - // ==================== Create RESTCatalog ==================== - println!("Creating RESTCatalog instance..."); - let catalog = match RESTCatalog::new(options, true).await { - Ok(catalog) => catalog, - Err(err) => { - eprintln!("Failed to create RESTCatalog: {}", err); - return; - } - }; - - // ==================== Get Table ==================== - let table_identifier = Identifier::new("default", "test_t"); - println!("Getting table '{}'...", table_identifier); - - let table = match catalog.get_table(&table_identifier).await { - Ok(table) => { - println!("Table retrieved successfully"); - println!(" Location: {}", table.location()); - println!(" Schema fields: {:?}", table.schema().fields()); - table - } - Err(err) => { - eprintln!("Failed to get table: {}", err); - return; - } - }; - - // ==================== Scan Table ==================== - println!("\n=== Scanning Table ===\n"); - - let read_builder = table.new_read_builder(); - let scan = read_builder.new_scan(); - - let plan = match scan.plan().await { - Ok(plan) => { - println!("Scan plan created successfully"); - println!(" Number of splits: {}", plan.splits().len()); - plan - } - Err(err) => { - eprintln!("Failed to plan scan: {}", err); - return; - } - }; - - if plan.splits().is_empty() { - println!("No data splits found — the table may be empty."); - return; - } - - // ==================== Read Table Data ==================== - println!("\n=== Reading Table Data ===\n"); - - let read = match read_builder.new_read() { - Ok(read) => read, - Err(err) => { - eprintln!("Failed to create table read: {}", err); - return; - } - }; - - let stream = match read.to_arrow(plan.splits()) { - Ok(stream) => stream, - Err(err) => { - eprintln!("Failed to create arrow stream: {}", err); - return; - } - }; - - let batches: Vec<_> = match stream.try_collect().await { - Ok(batches) => batches, - Err(err) => { - eprintln!("Failed to collect record batches: {}", err); - return; - } - }; - - println!("Collected {} record batch(es)", batches.len()); - - let mut total_rows = 0; - for (batch_index, batch) in batches.iter().enumerate() { - let num_rows = batch.num_rows(); - total_rows += num_rows; - println!( - "\n--- Batch {} ({} rows, {} columns) ---", - batch_index, - num_rows, - batch.num_columns() - ); - println!("Schema: {}", batch.schema()); - - // Print up to 20 rows per batch for readability - let display_rows = num_rows.min(20); - for row in 0..display_rows { - let mut row_values = Vec::new(); - for col in 0..batch.num_columns() { - let column = batch.column(col); - row_values.push(array_value_to_string(column, row)); - } - println!(" Row {}: [{}]", row, row_values.join(", ")); - } - if num_rows > display_rows { - println!(" ... ({} more rows omitted)", num_rows - display_rows); - } - } - - println!("\n=== Summary ==="); - println!("Total rows read: {}", total_rows); - println!("Total batches: {}", batches.len()); - println!("\nExample completed!"); -} - -/// Format a single cell value from an Arrow array at the given row index. -/// Supports INT (Int32), BIGINT (Int64), and VARCHAR (String/LargeString). -fn array_value_to_string(array: &dyn arrow_array::Array, row: usize) -> String { - use arrow_array::*; - - if array.is_null(row) { - return "null".to_string(); - } - - if let Some(arr) = array.as_any().downcast_ref::() { - return arr.value(row).to_string(); - } - if let Some(arr) = array.as_any().downcast_ref::() { - return arr.value(row).to_string(); - } - if let Some(arr) = array.as_any().downcast_ref::() { - return arr.value(row).to_string(); - } - if let Some(arr) = array.as_any().downcast_ref::() { - return arr.value(row).to_string(); - } - - format!("", array.data_type()) -} diff --git a/crates/paimon/src/catalog/rest/rest_catalog.rs b/crates/paimon/src/catalog/rest/rest_catalog.rs index a9b7aabf..8fb62f40 100644 --- a/crates/paimon/src/catalog/rest/rest_catalog.rs +++ b/crates/paimon/src/catalog/rest/rest_catalog.rs @@ -67,7 +67,9 @@ impl RESTCatalog { let warehouse = options .get(CatalogOptions::WAREHOUSE) .cloned() - .unwrap_or_default(); + .ok_or_else(|| RestError::BadRequest { + message: format!("Missing required option: {}", CatalogOptions::WAREHOUSE), + })?; let api = RESTApi::new(options.clone(), config_required).await?; @@ -206,7 +208,13 @@ impl Catalog for RESTCatalog { source: None, })?; - let schema_id = response.schema_id.unwrap_or(0); + let schema_id = response.schema_id.ok_or_else(|| Error::DataInvalid { + message: format!( + "Table {} response missing schema_id", + identifier.full_name() + ), + source: None, + })?; let table_schema = TableSchema::new(schema_id, &schema); // Extract table path from response diff --git a/crates/paimon/src/catalog/rest/rest_token_file_io.rs b/crates/paimon/src/catalog/rest/rest_token_file_io.rs index bf9f7030..92a2062e 100644 --- a/crates/paimon/src/catalog/rest/rest_token_file_io.rs +++ b/crates/paimon/src/catalog/rest/rest_token_file_io.rs @@ -23,7 +23,7 @@ use std::collections::HashMap; -use tokio::sync::{Mutex, RwLock}; +use tokio::sync::{OnceCell, RwLock}; use crate::api::rest_api::RESTApi; use crate::api::rest_util::RESTUtil; @@ -53,7 +53,7 @@ pub struct RESTTokenFileIO { catalog_options: Options, /// Lazily-initialized REST API client for token refresh. /// Created on first token refresh and reused for subsequent refreshes. - api: Mutex>, + api: OnceCell, /// Cached token with RwLock for concurrent access. token: RwLock>, } @@ -70,7 +70,7 @@ impl RESTTokenFileIO { identifier, path, catalog_options, - api: Mutex::new(None), + api: OnceCell::new(), token: RwLock::new(None), } } @@ -119,18 +119,22 @@ impl RESTTokenFileIO { } } - // Slow path: acquire write lock and refresh - let mut token_guard = self.token.write().await; - - // Double-check after acquiring write lock (another task may have refreshed) - if let Some(token) = token_guard.as_ref() { - if !Self::is_token_expired(token) { - return Ok(()); + // Slow path: acquire write lock and check again + { + let token_guard = self.token.write().await; + if let Some(token) = token_guard.as_ref() { + if !Self::is_token_expired(token) { + return Ok(()); + } } } + // Write lock released before .await to avoid potential deadlock - // Refresh the token + // Refresh the token WITHOUT holding the lock let new_token = self.refresh_token().await?; + + // Acquire write lock again to update + let mut token_guard = self.token.write().await; *token_guard = Some(new_token); Ok(()) } @@ -140,14 +144,11 @@ impl RESTTokenFileIO { /// Lazily creates a `RESTApi` instance on first call and reuses it /// for subsequent refreshes. async fn refresh_token(&self) -> Result { - let mut api_guard = self.api.lock().await; - let api = match api_guard.as_ref() { - Some(existing) => existing, - None => { - let new_api = RESTApi::new(self.catalog_options.clone(), false).await?; - api_guard.insert(new_api) - } - }; + let api = self + .api + .get_or_try_init(|| async { RESTApi::new(self.catalog_options.clone(), false).await }) + .await?; + let response = api.load_table_token(&self.identifier).await?; let expires_at_millis = response.expires_at_millis.unwrap_or(0); From d9468c00c6b0ff15d58bd89c3e71409a69d2fd2a Mon Sep 17 00:00:00 2001 From: umi Date: Wed, 1 Apr 2026 12:11:54 +0800 Subject: [PATCH 10/13] todo --- crates/paimon/src/catalog/rest/rest_catalog.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/crates/paimon/src/catalog/rest/rest_catalog.rs b/crates/paimon/src/catalog/rest/rest_catalog.rs index 8fb62f40..240936e2 100644 --- a/crates/paimon/src/catalog/rest/rest_catalog.rs +++ b/crates/paimon/src/catalog/rest/rest_catalog.rs @@ -227,6 +227,7 @@ impl Catalog for RESTCatalog { let is_external = response.is_external.unwrap_or(false); // Build FileIO based on data_token_enabled and is_external + // TODO Support token cache and direct oss access let file_io = if self.data_token_enabled && !is_external { // Use RESTTokenFileIO to get token-based FileIO let token_file_io = From daaf46b67b708fd25097824f32bbe0ed4add184d Mon Sep 17 00:00:00 2001 From: umi Date: Wed, 1 Apr 2026 15:29:08 +0800 Subject: [PATCH 11/13] fix --- crates/paimon/src/catalog/rest/rest_catalog.rs | 5 ++++- crates/paimon/src/catalog/rest/rest_token_file_io.rs | 10 +++++++++- 2 files changed, 13 insertions(+), 2 deletions(-) diff --git a/crates/paimon/src/catalog/rest/rest_catalog.rs b/crates/paimon/src/catalog/rest/rest_catalog.rs index 240936e2..a19b0ce6 100644 --- a/crates/paimon/src/catalog/rest/rest_catalog.rs +++ b/crates/paimon/src/catalog/rest/rest_catalog.rs @@ -224,7 +224,10 @@ impl Catalog for RESTCatalog { })?; // Check if the table is external - let is_external = response.is_external.unwrap_or(false); + let is_external = response.is_external.ok_or_else(|| Error::DataInvalid { + message: format!("Table {} response missing is_external", identifier.full_name()), + source: None, + })?;(); // Build FileIO based on data_token_enabled and is_external // TODO Support token cache and direct oss access diff --git a/crates/paimon/src/catalog/rest/rest_token_file_io.rs b/crates/paimon/src/catalog/rest/rest_token_file_io.rs index 92a2062e..21061283 100644 --- a/crates/paimon/src/catalog/rest/rest_token_file_io.rs +++ b/crates/paimon/src/catalog/rest/rest_token_file_io.rs @@ -151,7 +151,15 @@ impl RESTTokenFileIO { let response = api.load_table_token(&self.identifier).await?; - let expires_at_millis = response.expires_at_millis.unwrap_or(0); + let expires_at_millis = response.expires_at_millis.ok_or_else(|| { + crate::Error::DataInvalid { + message: format!( + "Token response for table '{}' missing expires_at_millis", + self.identifier.full_name() + ), + source: None, + } + })?; // Merge token with catalog options (e.g. DLF OSS endpoint override) let merged_token = self.merge_token_with_catalog_options(response.token); From 7e814bbab66b10b3f9c1e0cdad1a155739b0d9d5 Mon Sep 17 00:00:00 2001 From: umi Date: Wed, 1 Apr 2026 15:31:58 +0800 Subject: [PATCH 12/13] fmt --- .../paimon/src/catalog/rest/rest_catalog.rs | 7 +++++-- .../src/catalog/rest/rest_token_file_io.rs | 19 ++++++++++--------- 2 files changed, 15 insertions(+), 11 deletions(-) diff --git a/crates/paimon/src/catalog/rest/rest_catalog.rs b/crates/paimon/src/catalog/rest/rest_catalog.rs index a19b0ce6..e5d023ff 100644 --- a/crates/paimon/src/catalog/rest/rest_catalog.rs +++ b/crates/paimon/src/catalog/rest/rest_catalog.rs @@ -225,9 +225,12 @@ impl Catalog for RESTCatalog { // Check if the table is external let is_external = response.is_external.ok_or_else(|| Error::DataInvalid { - message: format!("Table {} response missing is_external", identifier.full_name()), + message: format!( + "Table {} response missing is_external", + identifier.full_name() + ), source: None, - })?;(); + })?; // Build FileIO based on data_token_enabled and is_external // TODO Support token cache and direct oss access diff --git a/crates/paimon/src/catalog/rest/rest_token_file_io.rs b/crates/paimon/src/catalog/rest/rest_token_file_io.rs index 21061283..6233eb10 100644 --- a/crates/paimon/src/catalog/rest/rest_token_file_io.rs +++ b/crates/paimon/src/catalog/rest/rest_token_file_io.rs @@ -151,15 +151,16 @@ impl RESTTokenFileIO { let response = api.load_table_token(&self.identifier).await?; - let expires_at_millis = response.expires_at_millis.ok_or_else(|| { - crate::Error::DataInvalid { - message: format!( - "Token response for table '{}' missing expires_at_millis", - self.identifier.full_name() - ), - source: None, - } - })?; + let expires_at_millis = + response + .expires_at_millis + .ok_or_else(|| crate::Error::DataInvalid { + message: format!( + "Token response for table '{}' missing expires_at_millis", + self.identifier.full_name() + ), + source: None, + })?; // Merge token with catalog options (e.g. DLF OSS endpoint override) let merged_token = self.merge_token_with_catalog_options(response.token); From 388a01caa780ee425bf6a777306d809e69de6c2c Mon Sep 17 00:00:00 2001 From: umi Date: Wed, 1 Apr 2026 15:50:29 +0800 Subject: [PATCH 13/13] fix --- crates/paimon/src/catalog/filesystem.rs | 7 ------- 1 file changed, 7 deletions(-) diff --git a/crates/paimon/src/catalog/filesystem.rs b/crates/paimon/src/catalog/filesystem.rs index 6a9af206..61d6e8d3 100644 --- a/crates/paimon/src/catalog/filesystem.rs +++ b/crates/paimon/src/catalog/filesystem.rs @@ -121,16 +121,9 @@ impl FileSystemCatalog { /// List directories in the given path. async fn list_directories(&self, path: &str) -> Result> { let statuses = self.file_io.list_status(path).await?; - // Normalize the listed path for comparison: strip trailing slash - let normalized_path = path.trim_end_matches('/'); let mut dirs = Vec::new(); for status in statuses { if status.is_dir { - // Skip the directory itself (opendal list_with includes the root entry) - let entry_path = status.path.trim_end_matches('/'); - if entry_path == normalized_path { - continue; - } if let Some(p) = get_basename(status.path.as_str()) // opendal get_basename will contain "/" for directory, // we need to strip suffix to get the real base name