From 8bbd6cf5b70335e6fc8313149daccd4d978548f8 Mon Sep 17 00:00:00 2001 From: Chojan Shang Date: Tue, 14 Mar 2023 18:02:36 +0800 Subject: [PATCH 1/2] docs: add how to write a system table Signed-off-by: Chojan Shang --- .../10-how-to-write-a-system-table.md | 176 ++++++++++++++++++ 1 file changed, 176 insertions(+) create mode 100644 docs/doc/90-contributing/10-how-to-write-a-system-table.md diff --git a/docs/doc/90-contributing/10-how-to-write-a-system-table.md b/docs/doc/90-contributing/10-how-to-write-a-system-table.md new file mode 100644 index 0000000000000..32d0e40227fa3 --- /dev/null +++ b/docs/doc/90-contributing/10-how-to-write-a-system-table.md @@ -0,0 +1,176 @@ +--- +title: How to Write a System Table +--- + +System tables are special tables that provide information about Databend's internal state, such as databases, tables, functions, settings, etc. In this document, we will show you how to write a new system table for Databend using the credits table as an example. + +The credits table returns information about the upstream dependencies used by Databend, including their names, versions and licenses. + +## Prerequisites + +To write a new system table for Databend, you need to have some basic knowledge of Rust programming language and Databend's code structure. + +## Location + +The existing system tables for Databend are located in the `query/storage` directory. You should place your new system table file in this directory as well, unless there are some special build reasons that prevent you from doing so. In that case, you can temporarily place it in the `service/databases/system` directory (not recommended). + +## Definition + +The definition of a system table mainly focuses on two aspects: one is the table information, which includes the table `name` and `schema`, etc.; the other is the data generation/retrieval logic for the table content. These two aspects correspond to two traits: `SyncSystemTable` and `AsyncSystemTable`. You need to implement one of these traits depending on whether your data retrieval involves asynchronous function calls or not. + +## Implementation + +In this section, we will walk through the implementation of the credits table step by step. The code file is located at `credits_table.rs`. + +Firstly, you need to define a struct for your system table that contains only the fields for storing the table information. For example: + +```rust +pub struct CreditsTable { + table_info: TableInfo, +} +``` + +Next, you need to implement a create method for your system table struct that takes a `table_id` as an argument and returns an `Arc`. The `table_id` is generated by `sys_db_meta.next_table_id()` when creating a new system table. + +```rust +pub fn create(table_id: u64) -> Arc +``` + +Inside this method, you need to define a schema for your system table using `TableSchemaRefExt` and `TableField`. The schema describes the structure of your system table with field names and types depending on the data you want to store in it. + +For example: + +```rust +let schema = TableSchemaRefExt::create(vec![ + TableField::new("name", TableDataType::String), + TableField::new("version", TableDataType::String), + TableField::new("license", TableDataType::String), +]); +``` + +For string-type data, you can use `TableDataType::String`; other basic types are similar. But if you need to allow null values in your field, such as an optional 64-bit unsigned integer field, you can use `TableDataType::Nullable(Box::new(TableDataType::Number(NumberDataType::UInt64)))` instead. `TableDataType::Nullable` indicates that null values are allowed; `TableDataType::Number(NumberDataType::UInt64)` represents that the type is 64-bit unsigned integer. + +After defining the schema, you need to define some metadata for your system table, such as description (`desc`), `name`, `meta`, etc. You can follow other existing examples and fill in these fields accordingly. + +For example: + +```rust +let table_info = TableInfo { + desc: "'system'.'credits'".to_string(), + name: "credits".to_string(), + ident: TableIdent::new(table_id, 0), + meta: TableMeta { + schema, + engine: "SystemCredits".to_string(), + ..Default::default() + }, + ..Default::default() +}; + +SyncOneBlockSystemTable::create(CreditsTable { table_info }) +``` + +Finally, you need to create an instance of your system table struct with these fields and wrap it with either `SyncOneBlockSystemTable` or `AsyncOneBlockSystemTable` depending on whether your data retrieval logic is synchronous or asynchronous. + +Next, you need to implement either `SyncSystemTable` or `AsyncSystemTable` trait for your system table struct. `SyncSystemTable` requires you to define `NAME` constant and implement four methods: `get_table_info()`, `get_full_data()`, `get_partitions()` and `truncate()`. However, the last two methods have default implementations, so you don't need to implement them yourself in most cases. (`AsyncSystemTable` is similar, but it doesn't have `truncate()` method.) + +`NAME` constant follows the format of `system.`. + +```rust +const NAME: &'static str = "system.credits"; +``` + +`get_table_info()` method returns the table information stored in the struct. + +```rust +fn get_table_info(&self) -> &TableInfo { + &self.table_info +} +``` + +`get_full_data()` method is the most important part, because it contains the logic for generating or retrieving the data for your system table. The credits table has three fields that are similar, so we will only show the license field as an example. + +The license field information is obtained from an environment variable named `DATABEND_CREDITS_LICENSES` (see `common-building`). Each data item is separated by a comma. + +String-type columns are eventually converted from `Vec>`, where each string needs to be converted to `Vec`. So we use `.as_bytes().to_vec()` to do this conversion when iterating over the data. + +```rust +let licenses: Vec> = env!("DATABEND_CREDITS_LICENSES") + .split_terminator(',') + .map(|x| x.trim().as_bytes().to_vec()) + .collect(); +``` + +After getting all the data, you can return them in a `DataBlock` format. For non-null types, use `from_data`; for nullable types, use `from_opt_data`. + +For example: + +```rust +Ok(DataBlock::new_from_columns(vec![ + StringType::from_data(names), + StringType::from_data(versions), + StringType::from_data(licenses), +])) +``` + +Lastly, if you want to integrate your system table into Databend, you also need to edit `system_database.rs` and register it to `SystemDatabase`. + +```rust +impl SystemDatabase { + pub fn create(sys_db_meta: &mut InMemoryMetas, config: &Config) -> Self { + ... + CreditsTable::create(sys_db_meta.next_table_id()), + ... + } +} +``` + +## Testing + +The tests for system tables are currently located at `tests/it/storages/system.rs`. + +For tables whose content does not change frequently, you can use Golden File testing. Its logic is to write the corresponding table into a specified file and compare it with an expected file. If they match, then the test passes; otherwise, it fails. + +For example: + +```rust +#[tokio::test(flavor = "multi_thread")] +async fn test_columns_table() -> Result<()> { + let (_guard, ctx) = crate::tests::create_query_context().await?; + + let mut mint = Mint::new("tests/it/storages/testdata"); + let file = &mut mint.new_goldenfile("columns_table.txt").unwrap(); + let table = ColumnsTable::create(1); + + run_table_tests(file, ctx, table).await?; + Ok(()) +} +``` + +For tables whose content may change dynamically or depend on external factors, there is a lack of sufficient testing methods. You can choose to test the parts that have relatively fixed patterns, such as the number of rows and columns; or you can verify whether the output contains specific content. + +For example: + +```rust +#[tokio::test(flavor = "multi_thread")] +async fn test_metrics_table() -> Result<()> { + ... + let result = stream.try_collect::>().await?; + let block = &result[0]; + assert_eq!(block.num_columns(), 4); + assert!(block.num_rows() >= 1); + + let output = pretty_format_blocks(result.as_slice())?; + assert!(output.contains("test_test_metrics_table_count")); + #[cfg(feature = "enable_histogram")] + assert!(output.contains("test_test_metrics_table_histogram")); + + Ok(()) +} +``` + +## Summary + +In this document, we have shown you how to write a new system table for Databend using the credits table as an example. We hope this document helps you understand the basic steps and principles of creating a system table for Databend. If you have any questions or feedback, please feel free to contact us on GitHub or Slack. Thank you for your interest and contribution to Databend! + + From 959bc865e7cce4b834c9065dde60759c9fc32b98 Mon Sep 17 00:00:00 2001 From: Chojan Shang Date: Wed, 15 Mar 2023 11:28:53 +0800 Subject: [PATCH 2/2] docs: update as eric's comments Signed-off-by: Chojan Shang --- .../10-how-to-write-a-system-table.md | 196 ++++++++---------- 1 file changed, 84 insertions(+), 112 deletions(-) diff --git a/docs/doc/90-contributing/10-how-to-write-a-system-table.md b/docs/doc/90-contributing/10-how-to-write-a-system-table.md index 32d0e40227fa3..309fa6a83b62c 100644 --- a/docs/doc/90-contributing/10-how-to-write-a-system-table.md +++ b/docs/doc/90-contributing/10-how-to-write-a-system-table.md @@ -1,155 +1,135 @@ --- -title: How to Write a System Table +title: How to Create a System Table --- -System tables are special tables that provide information about Databend's internal state, such as databases, tables, functions, settings, etc. In this document, we will show you how to write a new system table for Databend using the credits table as an example. +System tables are tables that provide information about Databend's internal state, such as databases, tables, functions, and settings. If you're familiar with the Databend code structure and have basic knowledge about Rust, you can also create your own system tables as needed. -The credits table returns information about the upstream dependencies used by Databend, including their names, versions and licenses. +Creating a system table mainly involves defining the table information (table name and schema) and how to generate and retrieve data for the table. This can be done through implementing the trait `SyncSystemTable` or `AsyncSystemTable`. -## Prerequisites +This guide will show you how to create a new system table for Databend, using the table [system.credits](https://databend.rs/doc/sql-reference/system-tables/system-credits) as an example. The table provides information Databend's upstream dependencies and the code is located at `src/query/storage/system/src/credits_table.rs`. -To write a new system table for Databend, you need to have some basic knowledge of Rust programming language and Databend's code structure. +:::note +Databend suggests that you store the code for new system tables in the directory `src/query/storage/system/src/`. However, there may be situations where you cannot do so, such as issues related to the build process. In such cases, you can place it temporarily in a directory called `src/query/service/src/databases/system` (although this is not recommended). +::: -## Location +## Creating a System Table -The existing system tables for Databend are located in the `query/storage` directory. You should place your new system table file in this directory as well, unless there are some special build reasons that prevent you from doing so. In that case, you can temporarily place it in the `service/databases/system` directory (not recommended). +The following walks through the implementation of the table `system.credits` step by step. -## Definition +1. Define a struct for your system table that contains only the fields for storing the table information. -The definition of a system table mainly focuses on two aspects: one is the table information, which includes the table `name` and `schema`, etc.; the other is the data generation/retrieval logic for the table content. These two aspects correspond to two traits: `SyncSystemTable` and `AsyncSystemTable`. You need to implement one of these traits depending on whether your data retrieval involves asynchronous function calls or not. - -## Implementation - -In this section, we will walk through the implementation of the credits table step by step. The code file is located at `credits_table.rs`. - -Firstly, you need to define a struct for your system table that contains only the fields for storing the table information. For example: - -```rust -pub struct CreditsTable { - table_info: TableInfo, -} -``` - -Next, you need to implement a create method for your system table struct that takes a `table_id` as an argument and returns an `Arc`. The `table_id` is generated by `sys_db_meta.next_table_id()` when creating a new system table. - -```rust -pub fn create(table_id: u64) -> Arc -``` + ```rust + pub struct CreditsTable { + table_info: TableInfo, + } + ``` -Inside this method, you need to define a schema for your system table using `TableSchemaRefExt` and `TableField`. The schema describes the structure of your system table with field names and types depending on the data you want to store in it. +2. Implement a `create` method for your system table struct that takes `table_id` as an argument and returns `Arc`. The `table_id` is generated by `sys_db_meta.next_table_id()` when creating a new system table. -For example: + ```rust + pub fn create(table_id: u64) -> Arc + ``` -```rust -let schema = TableSchemaRefExt::create(vec![ - TableField::new("name", TableDataType::String), - TableField::new("version", TableDataType::String), - TableField::new("license", TableDataType::String), -]); -``` +3. Define a schema for your system table using `TableSchemaRefExt` and `TableField`. The schema describes the structure of your system table with field names and types depending on the data you want to store in it. -For string-type data, you can use `TableDataType::String`; other basic types are similar. But if you need to allow null values in your field, such as an optional 64-bit unsigned integer field, you can use `TableDataType::Nullable(Box::new(TableDataType::Number(NumberDataType::UInt64)))` instead. `TableDataType::Nullable` indicates that null values are allowed; `TableDataType::Number(NumberDataType::UInt64)` represents that the type is 64-bit unsigned integer. + For string-type data, you can use `TableDataType::String`; other basic types are similar. But if you need to allow null values in your field, such as an optional 64-bit unsigned integer field, you can use `TableDataType::Nullable(Box::new(TableDataType::Number(NumberDataType::UInt64)))` instead. `TableDataType::Nullable` indicates that null values are allowed; `TableDataType::Number(NumberDataType::UInt64)` represents that the type is 64-bit unsigned integer. -After defining the schema, you need to define some metadata for your system table, such as description (`desc`), `name`, `meta`, etc. You can follow other existing examples and fill in these fields accordingly. + ```rust + let schema = TableSchemaRefExt::create(vec![ + TableField::new("name", TableDataType::String), + TableField::new("version", TableDataType::String), + TableField::new("license", TableDataType::String), + ]); + ``` -For example: +4. Define metadata for your system table, such as description (`desc`), `name`, `meta`, etc. You can follow other existing examples and fill in these fields accordingly. -```rust -let table_info = TableInfo { - desc: "'system'.'credits'".to_string(), - name: "credits".to_string(), - ident: TableIdent::new(table_id, 0), - meta: TableMeta { - schema, - engine: "SystemCredits".to_string(), - ..Default::default() - }, - ..Default::default() -}; - -SyncOneBlockSystemTable::create(CreditsTable { table_info }) -``` + ```rust + let table_info = TableInfo { + desc: "'system'.'credits'".to_string(), + name: "credits".to_string(), + ident: TableIdent::new(table_id, 0), + meta: TableMeta { + schema, + engine: "SystemCredits".to_string(), + ..Default::default() + }, + ..Default::default() + }; -Finally, you need to create an instance of your system table struct with these fields and wrap it with either `SyncOneBlockSystemTable` or `AsyncOneBlockSystemTable` depending on whether your data retrieval logic is synchronous or asynchronous. + SyncOneBlockSystemTable::create(CreditsTable { table_info }) + ``` -Next, you need to implement either `SyncSystemTable` or `AsyncSystemTable` trait for your system table struct. `SyncSystemTable` requires you to define `NAME` constant and implement four methods: `get_table_info()`, `get_full_data()`, `get_partitions()` and `truncate()`. However, the last two methods have default implementations, so you don't need to implement them yourself in most cases. (`AsyncSystemTable` is similar, but it doesn't have `truncate()` method.) +5. Create an instance of your system table struct with these fields and wrap it with either `SyncOneBlockSystemTable` or `AsyncOneBlockSystemTable`, depending on whether your data retrieval is synchronous or asynchronous. -`NAME` constant follows the format of `system.`. +6. Implement either `SyncSystemTable` or `AsyncSystemTable` trait for your system table struct. `SyncSystemTable` requires you to define a `NAME` constant and implement four methods: `get_table_info()`, `get_full_data()`, `get_partitions()`, and `truncate()`. However, the last two methods have default implementations, so you don't need to implement them yourself in most cases. (`AsyncSystemTable` is similar, but it doesn't have `truncate()` method.) -```rust -const NAME: &'static str = "system.credits"; -``` + `NAME` constant follows the format of `system.`. -`get_table_info()` method returns the table information stored in the struct. + ```rust + const NAME: &'static str = "system.credits"; + ``` -```rust -fn get_table_info(&self) -> &TableInfo { - &self.table_info -} -``` + `get_table_info()` method returns the table information stored in the struct. -`get_full_data()` method is the most important part, because it contains the logic for generating or retrieving the data for your system table. The credits table has three fields that are similar, so we will only show the license field as an example. + ```rust + fn get_table_info(&self) -> &TableInfo { + &self.table_info + } + ``` -The license field information is obtained from an environment variable named `DATABEND_CREDITS_LICENSES` (see `common-building`). Each data item is separated by a comma. + `get_full_data()` method is the most important part, because it contains the logic for generating or retrieving the data for your system table. The credits table has three fields that are similar, so we will only show the license field as an example. -String-type columns are eventually converted from `Vec>`, where each string needs to be converted to `Vec`. So we use `.as_bytes().to_vec()` to do this conversion when iterating over the data. + The license field information is obtained from an environment variable named `DATABEND_CREDITS_LICENSES` (see `common-building`). Each data item is separated by a comma. -```rust -let licenses: Vec> = env!("DATABEND_CREDITS_LICENSES") - .split_terminator(',') - .map(|x| x.trim().as_bytes().to_vec()) - .collect(); -``` + String-type columns are eventually converted from `Vec>`, where each string needs to be converted to `Vec`. So we use `.as_bytes().to_vec()` to do this conversion when iterating over the data. -After getting all the data, you can return them in a `DataBlock` format. For non-null types, use `from_data`; for nullable types, use `from_opt_data`. + ```rust + let licenses: Vec> = env!("DATABEND_CREDITS_LICENSES") + .split_terminator(',') + .map(|x| x.trim().as_bytes().to_vec()) + .collect(); + ``` -For example: +7. Return the retrieved data in a `DataBlock` format. Use `from_data` for non-null types and `from_opt_data` for nullable types. For example: -```rust -Ok(DataBlock::new_from_columns(vec![ - StringType::from_data(names), - StringType::from_data(versions), - StringType::from_data(licenses), -])) -``` + ```rust + Ok(DataBlock::new_from_columns(vec![ + StringType::from_data(names), + StringType::from_data(versions), + StringType::from_data(licenses), + ])) + ``` -Lastly, if you want to integrate your system table into Databend, you also need to edit `system_database.rs` and register it to `SystemDatabase`. +8. Edit `system_database.rs` to register the new table to `SystemDatabase`. -```rust -impl SystemDatabase { - pub fn create(sys_db_meta: &mut InMemoryMetas, config: &Config) -> Self { - ... - CreditsTable::create(sys_db_meta.next_table_id()), - ... + ```rust + impl SystemDatabase { + pub fn create(sys_db_meta: &mut InMemoryMetas, config: &Config) -> Self { + ... + CreditsTable::create(sys_db_meta.next_table_id()), + ... + } } -} -``` - -## Testing + ``` -The tests for system tables are currently located at `tests/it/storages/system.rs`. +## Testing a New System Table -For tables whose content does not change frequently, you can use Golden File testing. Its logic is to write the corresponding table into a specified file and compare it with an expected file. If they match, then the test passes; otherwise, it fails. - -For example: +The system table tests are located at `tests/it/storages/system.rs`. For tables with infrequent content changes, Golden File testing can be used, which involves writing the table to a specified file and comparing it to an expected file. For example: ```rust #[tokio::test(flavor = "multi_thread")] async fn test_columns_table() -> Result<()> { let (_guard, ctx) = crate::tests::create_query_context().await?; - let mut mint = Mint::new("tests/it/storages/testdata"); let file = &mut mint.new_goldenfile("columns_table.txt").unwrap(); let table = ColumnsTable::create(1); - run_table_tests(file, ctx, table).await?; Ok(()) } ``` -For tables whose content may change dynamically or depend on external factors, there is a lack of sufficient testing methods. You can choose to test the parts that have relatively fixed patterns, such as the number of rows and columns; or you can verify whether the output contains specific content. - -For example: +For tables with dynamically changing content or external dependencies, testing methods are limited. You can test relatively fixed patterns such as the number of rows and columns, or verify if the output contains specific content. For example: ```rust #[tokio::test(flavor = "multi_thread")] @@ -159,18 +139,10 @@ async fn test_metrics_table() -> Result<()> { let block = &result[0]; assert_eq!(block.num_columns(), 4); assert!(block.num_rows() >= 1); - let output = pretty_format_blocks(result.as_slice())?; assert!(output.contains("test_test_metrics_table_count")); #[cfg(feature = "enable_histogram")] assert!(output.contains("test_test_metrics_table_histogram")); - Ok(()) } ``` - -## Summary - -In this document, we have shown you how to write a new system table for Databend using the credits table as an example. We hope this document helps you understand the basic steps and principles of creating a system table for Databend. If you have any questions or feedback, please feel free to contact us on GitHub or Slack. Thank you for your interest and contribution to Databend! - -