From 75eea5fc18420f183683db3e67fae6056443252d Mon Sep 17 00:00:00 2001 From: zouxxyy Date: Tue, 19 May 2026 21:56:17 +0800 Subject: [PATCH] [docs] Add missing Spark procedures Add documentation entries for compact_database, create_global_index, and drop_global_index, which are registered in SparkProcedures.java but were not documented. --- docs/content/spark/procedures.md | 52 ++++++++++++++++++++++++++++++++ 1 file changed, 52 insertions(+) diff --git a/docs/content/spark/procedures.md b/docs/content/spark/procedures.md index ffc7005f42c1..5bd145f8a430 100644 --- a/docs/content/spark/procedures.md +++ b/docs/content/spark/procedures.md @@ -59,6 +59,28 @@ This section introduce all available spark procedures about paimon. CALL sys.compact(table => 'T', compact_strategy => 'minor')

+ + compact_database + + To compact all tables across one or more databases. Arguments: +
  • including_databases: regular expression to match databases to compact. Left empty to match all databases (i.e. '.*').
  • +
  • including_tables: regular expression to match table identifiers (in 'db.table' form) to compact. Left empty to match all tables (i.e. '.*').
  • +
  • excluding_tables: regular expression to match table identifiers to exclude from compaction.
  • +
  • options: additional dynamic options of the table. It prioritizes higher than original `tableProp` and lower than `procedureArg`.
  • + + + -- compact all databases
    + CALL sys.compact_database()

    + -- compact some databases (accept regular expression)
    + CALL sys.compact_database(including_databases => 'db1|db2')

    + -- compact some tables (accept regular expression)
    + CALL sys.compact_database(including_databases => 'db1', including_tables => 'db1.table1|db1.table2')

    + -- exclude some tables (accept regular expression)
    + CALL sys.compact_database(including_databases => 'db1', including_tables => '.*', excluding_tables => '.*ignore_table')

    + -- set table options
    + CALL sys.compact_database(including_databases => 'db1', options => 'target-file-size=128m') + + expire_snapshots @@ -476,6 +498,36 @@ This section introduce all available spark procedures about paimon. CALL sys.rewrite_file_index(table => "t", where => "day = '2025-08-17'")
    + + create_global_index + + To create global index files for a given column. The table must have row-tracking.enabled=true. Arguments: +
  • table: the target table identifier. Cannot be empty.
  • +
  • index_column: the name of the column to index. Cannot be empty.
  • +
  • index_type: type of the index to build, e.g. 'btree' or 'bitmap'. Cannot be empty.
  • +
  • partitions: partition filter to limit the partitions on which to build the index. The comma (",") represents "AND", the semicolon (";") represents "OR". Left empty for all partitions.
  • +
  • options: additional dynamic options of the table. It prioritizes higher than original `tableProp` and lower than `procedureArg`.
  • + + + CALL sys.create_global_index(table => 'default.T', index_column => 'name', index_type => 'bitmap')

    + CALL sys.create_global_index(table => 'default.T', index_column => 'name', index_type => 'btree')

    + CALL sys.create_global_index(table => 'default.T', index_column => 'name', index_type => 'btree', partitions => 'pt=p1;pt=p2') + + + + drop_global_index + + To drop global index files for a given column. Arguments: +
  • table: the target table identifier. Cannot be empty.
  • +
  • index_column: the name of the indexed column. Cannot be empty.
  • +
  • index_type: type of the index to drop, e.g. 'btree' or 'bitmap'. Cannot be empty.
  • +
  • partitions: partition filter to limit the partitions from which to drop the index. The comma (",") represents "AND", the semicolon (";") represents "OR". Left empty for all partitions.
  • + + + CALL sys.drop_global_index(table => 'default.T', index_column => 'name', index_type => 'bitmap')

    + CALL sys.drop_global_index(table => 'default.T', index_column => 'name', index_type => 'bitmap', partitions => 'pt=p1') + + copy