From 54bc8e66054edaeb89af70534d056c5a96cdce45 Mon Sep 17 00:00:00 2001 From: BohuTANG Date: Thu, 23 Oct 2025 23:39:04 +0800 Subject: [PATCH] docs: refresh unified workloads guides --- .../01-dee/10-enterprise-features.md | 4 +- .../51-ai-functions/01-external-functions.md | 4 +- .../01-dee/10-enterprise-features.md | 2 +- .../51-ai-functions/01-external-functions.md | 4 +- .../guides/54-query/00-basics/_category_.json | 4 - .../54-query/00-basics/aggregating-data.md | 90 ------ .../54-query/00-basics/filtering-selection.md | 103 ------- .../00-basics/groupby/_category_.json | 3 - .../00-basics/groupby/group-by-cube.md | 77 ----- .../groupby/group-by-grouping-sets.md | 135 --------- .../00-basics/groupby/group-by-rollup.md | 74 ----- .../54-query/00-basics/groupby/index.md | 157 ---------- docs/en/guides/54-query/00-basics/index.md | 63 ---- docs/en/guides/54-query/00-sql-analytics.md | 277 ++++++++++++++++++ .../01-combining-data/_category_.json | 4 - .../guides/54-query/01-combining-data/cte.md | 94 ------ .../54-query/01-combining-data/index.md | 99 ------- .../54-query/01-combining-data/joins.md | 200 ------------- .../54-query/01-combining-data/subqueries.md | 158 ---------- docs/en/guides/54-query/01-json-search.md | 140 +++++++++ .../54-query/02-advanced/_category_.json | 4 - .../54-query/02-advanced/external-function.md | 224 -------------- docs/en/guides/54-query/02-advanced/index.md | 59 ---- .../guides/54-query/02-advanced/sequences.md | 134 --------- .../54-query/02-advanced/stored-procedure.md | 64 ---- docs/en/guides/54-query/02-advanced/udf.md | 123 -------- docs/en/guides/54-query/02-vector-db.md | 100 +++++++ docs/en/guides/54-query/03-geo-analytics.md | 139 +++++++++ .../54-query/03-optimization/_category_.json | 4 - .../guides/54-query/03-optimization/index.md | 201 ------------- .../54-query/03-optimization/query-hash.md | 112 ------- .../54-query/03-optimization/query-profile.md | 64 ---- docs/en/guides/54-query/04-lakehouse-etl.md | 186 ++++++++++++ docs/en/guides/54-query/_category_.json | 4 +- docs/en/guides/54-query/index.md | 129 +------- .../55-performance/03-fulltext-index.md | 4 + site-redirects.ts | 24 +- 37 files changed, 877 insertions(+), 2390 deletions(-) delete mode 100644 docs/en/guides/54-query/00-basics/_category_.json delete mode 100644 docs/en/guides/54-query/00-basics/aggregating-data.md delete mode 100644 docs/en/guides/54-query/00-basics/filtering-selection.md delete mode 100644 docs/en/guides/54-query/00-basics/groupby/_category_.json delete mode 100644 docs/en/guides/54-query/00-basics/groupby/group-by-cube.md delete mode 100644 docs/en/guides/54-query/00-basics/groupby/group-by-grouping-sets.md delete mode 100644 docs/en/guides/54-query/00-basics/groupby/group-by-rollup.md delete mode 100644 docs/en/guides/54-query/00-basics/groupby/index.md delete mode 100644 docs/en/guides/54-query/00-basics/index.md create mode 100644 docs/en/guides/54-query/00-sql-analytics.md delete mode 100644 docs/en/guides/54-query/01-combining-data/_category_.json delete mode 100644 docs/en/guides/54-query/01-combining-data/cte.md delete mode 100644 docs/en/guides/54-query/01-combining-data/index.md delete mode 100644 docs/en/guides/54-query/01-combining-data/joins.md delete mode 100644 docs/en/guides/54-query/01-combining-data/subqueries.md create mode 100644 docs/en/guides/54-query/01-json-search.md delete mode 100644 docs/en/guides/54-query/02-advanced/_category_.json delete mode 100644 docs/en/guides/54-query/02-advanced/external-function.md delete mode 100644 docs/en/guides/54-query/02-advanced/index.md delete mode 100644 docs/en/guides/54-query/02-advanced/sequences.md delete mode 100644 docs/en/guides/54-query/02-advanced/stored-procedure.md delete mode 100644 docs/en/guides/54-query/02-advanced/udf.md create mode 100644 docs/en/guides/54-query/02-vector-db.md create mode 100644 docs/en/guides/54-query/03-geo-analytics.md delete mode 100644 docs/en/guides/54-query/03-optimization/_category_.json delete mode 100644 docs/en/guides/54-query/03-optimization/index.md delete mode 100644 docs/en/guides/54-query/03-optimization/query-hash.md delete mode 100644 docs/en/guides/54-query/03-optimization/query-profile.md create mode 100644 docs/en/guides/54-query/04-lakehouse-etl.md diff --git a/docs/cn/guides/00-products/01-dee/10-enterprise-features.md b/docs/cn/guides/00-products/01-dee/10-enterprise-features.md index b826c19b44..92ae832312 100644 --- a/docs/cn/guides/00-products/01-dee/10-enterprise-features.md +++ b/docs/cn/guides/00-products/01-dee/10-enterprise-features.md @@ -20,7 +20,7 @@ import DatabendTable from '@site/src/components/DatabendTable'; | [Ngram 索引 (Ngram Index)](/guides/performance/ngram-index) | 查询性能 | 通过通配符搜索加速 LIKE 模式匹配查询。 | | [虚拟列 (Virtual Column)](/sql/sql-commands/ddl/virtual-column) | 查询性能 | 对 VARIANT 数据零配置性能优化,自动加速 JSON 查询。 | | [动态列 (Dynamic Column)](/sql/sql-commands/ddl/table/ddl-create-table#computed-columns) | 查询性能 | 通过存储或虚拟计算模式,从标量表达式自动生成列。 | -| [Python UDF](/guides/query/advanced/udf#python-requires-databend-enterprise) | 高级分析 | 使用内置处理器在 SQL 查询中执行 Python 代码。 | +| [Python UDF](/sql/sql-commands/ddl/udf/ddl-create-function-embedded#python) | 高级分析 | 使用内置处理器在 SQL 查询中执行 Python 代码。 | | [ATTACH TABLE](/sql/sql-commands/ddl/table/attach-table) | 数据共享 | 创建指向现有表数据的只读链接,实现跨环境零拷贝访问。 | | [流 (Stream)](/sql/sql-commands/ddl/stream) | 变更数据捕获 | 跟踪并捕获表变更,用于增量数据处理。 | | [清理临时文件 (Vacuum Temp Files)](/sql/sql-commands/administration-cmds/vacuum-temp-files) | 存储管理 | 清理临时文件(连接、聚合、排序溢出文件),释放存储空间。 | @@ -166,4 +166,4 @@ tbody={[ ['部署与升级', '✕', '✓'], ['运维支持', '✕', '✓'], ]} -/> \ No newline at end of file +/> diff --git a/docs/cn/guides/51-ai-functions/01-external-functions.md b/docs/cn/guides/51-ai-functions/01-external-functions.md index 770fe08cb3..338283ac17 100644 --- a/docs/cn/guides/51-ai-functions/01-external-functions.md +++ b/docs/cn/guides/51-ai-functions/01-external-functions.md @@ -78,5 +78,5 @@ LIMIT 5; ## 了解更多 -- **[外部函数指南](/guides/query/advanced/external-function)** - 完整的设置和部署说明 -- **[Databend Cloud](https://databend.cn)** - 使用免费账户试用外部函数 \ No newline at end of file +- **[外部函数指南](/guides/ai-functions/external-functions)** - 完整的设置和部署说明 +- **[Databend Cloud](https://databend.cn)** - 使用免费账户试用外部函数 diff --git a/docs/en/guides/00-products/01-dee/10-enterprise-features.md b/docs/en/guides/00-products/01-dee/10-enterprise-features.md index b93e6bff5b..171c09d39f 100644 --- a/docs/en/guides/00-products/01-dee/10-enterprise-features.md +++ b/docs/en/guides/00-products/01-dee/10-enterprise-features.md @@ -20,7 +20,7 @@ This page provides an updated list of available enterprise features. To access t | [Ngram Index](/guides/performance/ngram-index) | Query Performance | Accelerate LIKE pattern matching queries with wildcard searches. | | [Virtual Column](/sql/sql-commands/ddl/virtual-column) | Query Performance | Automatically accelerate JSON queries with zero-configuration performance optimization for VARIANT data. | | [Dynamic Column](/sql/sql-commands/ddl/table/ddl-create-table#computed-columns) | Query Performance | Generate columns automatically from scalar expressions with stored or virtual calculation modes. | -| [Python UDF](/guides/query/advanced/udf#python-requires-databend-enterprise) | Advanced Analytics | Execute Python code within SQL queries using built-in handler. | +| [Python UDF](/sql/sql-commands/ddl/udf/ddl-create-function-embedded#python) | Advanced Analytics | Execute Python code within SQL queries using built-in handler. | | [ATTACH TABLE](/sql/sql-commands/ddl/table/attach-table) | Data Sharing | Create read-only links to existing table data with zero-copy access across environments. | | [Stream](/sql/sql-commands/ddl/stream) | Change Data Capture | Track and capture table changes for incremental data processing. | | [Vacuum Temp Files](/sql/sql-commands/administration-cmds/vacuum-temp-files) | Storage Management | Clean up temporary files (join, aggregate, sort spills) to free storage space. | diff --git a/docs/en/guides/51-ai-functions/01-external-functions.md b/docs/en/guides/51-ai-functions/01-external-functions.md index 0ec8bbba73..b33d6ab631 100644 --- a/docs/en/guides/51-ai-functions/01-external-functions.md +++ b/docs/en/guides/51-ai-functions/01-external-functions.md @@ -78,5 +78,5 @@ LIMIT 5; ## Learn More -- **[External Functions Guide](/guides/query/advanced/external-function)** - Complete setup and deployment instructions -- **[Databend Cloud](https://databend.com)** - Try external functions with a free account \ No newline at end of file +- **[External Functions Guide](/guides/ai-functions/external-functions)** - Complete setup and deployment instructions +- **[Databend Cloud](https://databend.com)** - Try external functions with a free account diff --git a/docs/en/guides/54-query/00-basics/_category_.json b/docs/en/guides/54-query/00-basics/_category_.json deleted file mode 100644 index 78ffea89df..0000000000 --- a/docs/en/guides/54-query/00-basics/_category_.json +++ /dev/null @@ -1,4 +0,0 @@ -{ - "label": "Basic Queries", - "position": 1 -} \ No newline at end of file diff --git a/docs/en/guides/54-query/00-basics/aggregating-data.md b/docs/en/guides/54-query/00-basics/aggregating-data.md deleted file mode 100644 index ad209f2ac4..0000000000 --- a/docs/en/guides/54-query/00-basics/aggregating-data.md +++ /dev/null @@ -1,90 +0,0 @@ ---- -title: Aggregating Data ---- - -Learn to summarize and analyze data using GROUP BY, aggregate functions, and advanced grouping techniques. - -## Basic Aggregation - -### Common Aggregate Functions -```sql --- Count rows -SELECT COUNT(*) FROM employees; - --- Statistical functions -SELECT - AVG(salary) as avg_salary, - MIN(salary) as min_salary, - MAX(salary) as max_salary, - SUM(salary) as total_salary -FROM employees; -``` - -## GROUP BY Fundamentals - -### Single Column Grouping -```sql --- Count employees by department -SELECT department, COUNT(*) as emp_count -FROM employees -GROUP BY department; - --- Average salary by department -SELECT department, AVG(salary) as avg_salary -FROM employees -GROUP BY department -ORDER BY avg_salary DESC; -``` - -### Multiple Column Grouping -```sql --- Group by department and hire year -SELECT - department, - EXTRACT(YEAR FROM hire_date) as hire_year, - COUNT(*) as count, - AVG(salary) as avg_salary -FROM employees -GROUP BY department, EXTRACT(YEAR FROM hire_date) -ORDER BY department, hire_year; -``` - -### GROUP BY with HAVING -```sql --- Find departments with more than 5 employees -SELECT department, COUNT(*) as emp_count -FROM employees -GROUP BY department -HAVING COUNT(*) > 5; - --- Departments with average salary > 70000 -SELECT department, AVG(salary) as avg_salary -FROM employees -GROUP BY department -HAVING AVG(salary) > 70000; -``` - -## Advanced Grouping - -### GROUP BY ALL -```sql --- Automatically group by all non-aggregate columns -SELECT department, job_title, COUNT(*) as count -FROM employees -GROUP BY ALL; -``` - -## Advanced Grouping Extensions - -Databend supports SQL:2003 standard grouping extensions: - -- **[ROLLUP](./groupby/group-by-rollup.md)** - Hierarchical subtotals -- **[CUBE](./groupby/group-by-cube.md)** - All possible combinations -- **[GROUPING SETS](./groupby/group-by-grouping-sets.md)** - Custom combinations - -## Best Practices - -1. **Use appropriate aggregates** - COUNT(*) vs COUNT(column) -2. **Filter before grouping** - Use WHERE before GROUP BY -3. **Use HAVING for aggregate conditions** - Filter groups after aggregation -4. **Consider indexes** - GROUP BY columns should be indexed \ No newline at end of file diff --git a/docs/en/guides/54-query/00-basics/filtering-selection.md b/docs/en/guides/54-query/00-basics/filtering-selection.md deleted file mode 100644 index 172023ed36..0000000000 --- a/docs/en/guides/54-query/00-basics/filtering-selection.md +++ /dev/null @@ -1,103 +0,0 @@ ---- -title: Filtering & Selection ---- - -Learn the fundamentals of querying data in Databend with SELECT, WHERE, and basic operations. - -## Basic SELECT Queries - -### Selecting Columns -```sql --- Select specific columns -SELECT name, salary FROM employees; - --- Select all columns -SELECT * FROM employees; - --- Select with column aliases -SELECT name AS employee_name, salary AS monthly_pay -FROM employees; -``` - -### Filtering with WHERE -```sql --- Simple condition -SELECT * FROM employees WHERE department = 'Engineering'; - --- Multiple conditions -SELECT * FROM employees -WHERE salary > 70000 AND department = 'Engineering'; - --- Using OR -SELECT * FROM employees -WHERE department = 'Engineering' OR department = 'Marketing'; - --- Range conditions -SELECT * FROM employees -WHERE salary BETWEEN 60000 AND 80000; - --- Pattern matching -SELECT * FROM employees -WHERE name LIKE 'A%'; -- Names starting with 'A' -``` - -## Sorting Results - -### ORDER BY Clause -```sql --- Sort by single column -SELECT * FROM employees ORDER BY salary DESC; - --- Sort by multiple columns -SELECT * FROM employees -ORDER BY department ASC, salary DESC; - --- Sort by column position -SELECT name, salary FROM employees ORDER BY 2 DESC; -``` - -## Limiting Results - -### LIMIT and OFFSET -```sql --- Get top 5 highest paid employees -SELECT * FROM employees -ORDER BY salary DESC -LIMIT 5; - --- Pagination - skip first 10, get next 5 -SELECT * FROM employees -ORDER BY salary DESC -LIMIT 5 OFFSET 10; -``` - -## Common Operators - -### Comparison Operators -- `=` Equal to -- `!=` or `<>` Not equal to -- `>` Greater than -- `<` Less than -- `>=` Greater than or equal -- `<=` Less than or equal - -### Logical Operators -- `AND` Both conditions must be true -- `OR` Either condition can be true -- `NOT` Negates a condition - -### NULL Handling -```sql --- Check for NULL values -SELECT * FROM employees WHERE manager_id IS NULL; - --- Check for non-NULL values -SELECT * FROM employees WHERE manager_id IS NOT NULL; -``` - -## Best Practices - -1. **Be specific with columns** - Avoid `SELECT *` in production -2. **Use indexes** - WHERE conditions on indexed columns are faster -3. **Limit large results** - Always use LIMIT for exploratory queries -4. **Filter early** - Apply WHERE conditions before JOINs when possible \ No newline at end of file diff --git a/docs/en/guides/54-query/00-basics/groupby/_category_.json b/docs/en/guides/54-query/00-basics/groupby/_category_.json deleted file mode 100644 index 479fa2a886..0000000000 --- a/docs/en/guides/54-query/00-basics/groupby/_category_.json +++ /dev/null @@ -1,3 +0,0 @@ -{ - "label": "GROUP BY" -} \ No newline at end of file diff --git a/docs/en/guides/54-query/00-basics/groupby/group-by-cube.md b/docs/en/guides/54-query/00-basics/groupby/group-by-cube.md deleted file mode 100644 index 177960dd37..0000000000 --- a/docs/en/guides/54-query/00-basics/groupby/group-by-cube.md +++ /dev/null @@ -1,77 +0,0 @@ ---- -title: GROUP BY CUBE ---- - -`GROUP BY CUBE` is an extension of the [GROUP BY](index.md) clause similar to [GROUP BY ROLLUP](group-by-rollup.md). In addition to producing all the rows of a `GROUP BY ROLLUP`, `GROUP BY CUBE` adds all the "cross-tabulations" rows. Sub-total rows are rows that further aggregate whose values are derived by computing the same aggregate functions that were used to produce the grouped rows. - -A `CUBE` grouping is equivalent to a series of grouping sets and is essentially a shorter specification. The N elements of a CUBE specification correspond to `2^N GROUPING SETS`. - -## Syntax - -```sql -SELECT ... -FROM ... -[ ... ] -GROUP BY CUBE ( groupCube [ , groupCube [ , ... ] ] ) -[ ... ] -``` - -Where: -```sql -groupCube ::= { | | } -``` - -- ``: Column alias appearing in the query block’s SELECT list - -- ``: Position of an expression in the SELECT list - -- ``: Any expression on tables in the current scope - - -## Examples - -Let's assume we have a sales_data table with the following schema and sample data: - -```sql -CREATE TABLE sales_data ( - region VARCHAR(255), - product VARCHAR(255), - sales_amount INT -); - -INSERT INTO sales_data (region, product, sales_amount) VALUES - ('North', 'WidgetA', 200), - ('North', 'WidgetB', 300), - ('South', 'WidgetA', 400), - ('South', 'WidgetB', 100), - ('West', 'WidgetA', 300), - ('West', 'WidgetB', 200); -``` - -Now, let's use the `GROUP BY CUBE` clause to get the total sales amount for each region and product, along with all possible aggregations: - -```sql -SELECT region, product, SUM(sales_amount) AS total_sales -FROM sales_data -GROUP BY CUBE (region, product); -``` - -The result will be: -```sql -+--------+---------+-------------+ -| region | product | total_sales | -+--------+---------+-------------+ -| South | NULL | 500 | -| NULL | WidgetB | 600 | -| West | NULL | 500 | -| North | NULL | 500 | -| West | WidgetB | 200 | -| NULL | NULL | 1500 | -| North | WidgetB | 300 | -| South | WidgetA | 400 | -| North | WidgetA | 200 | -| NULL | WidgetA | 900 | -| West | WidgetA | 300 | -| South | WidgetB | 100 | -+--------+---------+-------------+ -``` diff --git a/docs/en/guides/54-query/00-basics/groupby/group-by-grouping-sets.md b/docs/en/guides/54-query/00-basics/groupby/group-by-grouping-sets.md deleted file mode 100644 index 6f82c38ef1..0000000000 --- a/docs/en/guides/54-query/00-basics/groupby/group-by-grouping-sets.md +++ /dev/null @@ -1,135 +0,0 @@ ---- -title: GROUP BY GROUPING SETS ---- - -`GROUP BY GROUPING SETS` is a powerful extension of the [GROUP BY](index.md) clause that allows computing multiple group-by clauses in a single statement. The group set is a set of dimension columns. - -`GROUP BY GROUPING SETS` is equivalent to the UNION of two or more GROUP BY operations in the same result set: - -- `GROUP BY GROUPING SETS((a))` is equivalent to the single grouping set operation `GROUP BY a`. - -- `GROUP BY GROUPING SETS((a),(b))` is equivalent to `GROUP BY a UNION ALL GROUP BY b`. - -## Syntax - -```sql -SELECT ... -FROM ... -[ ... ] -GROUP BY GROUPING SETS ( groupSet [ , groupSet [ , ... ] ] ) -[ ... ] -``` - -Where: -```sql -groupSet ::= { | | } -``` - -- ``: Column alias appearing in the query block’s SELECT list - -- ``: Position of an expression in the SELECT list - -- ``: Any expression on tables in the current scope - - -## Examples - -Sample Data Setup: -```sql --- Create a sample sales table -CREATE TABLE sales ( - id INT, - sale_date DATE, - product_id INT, - store_id INT, - quantity INT -); - --- Insert sample data into the sales table -INSERT INTO sales (id, sale_date, product_id, store_id, quantity) -VALUES (1, '2021-01-01', 101, 1, 5), - (2, '2021-01-01', 102, 1, 10), - (3, '2021-01-01', 101, 2, 15), - (4, '2021-01-02', 102, 1, 8), - (5, '2021-01-02', 101, 2, 12), - (6, '2021-01-02', 103, 2, 20); -``` - -### GROUP BY GROUPING SETS with column aliases - -```sql -SELECT product_id AS pid, - store_id AS sid, - SUM(quantity) AS total_quantity -FROM sales -GROUP BY GROUPING SETS((pid), (sid)); -``` - -This query is equivalent to: - -```sql -SELECT product_id AS pid, - NULL AS sid, - SUM(quantity) AS total_quantity -FROM sales -GROUP BY pid -UNION ALL -SELECT NULL AS pid, - store_id AS sid, - SUM(quantity) AS total_quantity -FROM sales -GROUP BY sid; -``` - -Output: -```sql -+------+------+----------------+ -| pid | sid | total_quantity | -+------+------+----------------+ -| 102 | NULL | 18 | -| NULL | 2 | 47 | -| 101 | NULL | 32 | -| 103 | NULL | 20 | -| NULL | 1 | 23 | -+------+------+----------------+ -``` - -### GROUP BY GROUPING SETS with positions - -```sql -SELECT product_id, - store_id, - SUM(quantity) AS total_quantity -FROM sales -GROUP BY GROUPING SETS((1), (2)); -``` - -This query is equivalent to: - -```sql -SELECT product_id, - NULL AS store_id, - SUM(quantity) AS total_quantity -FROM sales -GROUP BY product_id -UNION ALL -SELECT NULL AS product_id, - store_id, - SUM(quantity) AS total_quantity -FROM sales -GROUP BY store_id; -``` - -Output: -```sql -+------------+----------+----------------+ -| product_id | store_id | total_quantity | -+------------+----------+----------------+ -| 102 | NULL | 18 | -| NULL | 2 | 47 | -| 101 | NULL | 32 | -| 103 | NULL | 20 | -| NULL | 1 | 23 | -+------------+----------+----------------+ -``` - diff --git a/docs/en/guides/54-query/00-basics/groupby/group-by-rollup.md b/docs/en/guides/54-query/00-basics/groupby/group-by-rollup.md deleted file mode 100644 index 74656b2574..0000000000 --- a/docs/en/guides/54-query/00-basics/groupby/group-by-rollup.md +++ /dev/null @@ -1,74 +0,0 @@ ---- -title: GROUP BY ROLLUP ---- - - -`GROUP BY ROLLUP` is an extension of the [GROUP BY](index.md) clause that produces sub-total rows (in addition to the grouped rows). Sub-total rows are rows that further aggregate whose values are derived by computing the same aggregate functions that were used to produce the grouped rows. - -## Syntax - -```sql -SELECT ... -FROM ... -[ ... ] -GROUP BY ROLLUP ( groupRollup [ , groupRollup [ , ... ] ] ) -[ ... ] -``` - -Where: -```sql -groupRollup ::= { | | } -``` - -- ``: Column alias appearing in the query block’s SELECT list - -- ``: Position of an expression in the SELECT list - -- ``: Any expression on tables in the current scope - - -## Examples - -Let's create a sample table named sales_data and insert some data: -```sql -CREATE TABLE sales_data ( - region VARCHAR(255), - product VARCHAR(255), - sales_amount INT -); - -INSERT INTO sales_data (region, product, sales_amount) VALUES - ('North', 'WidgetA', 200), - ('North', 'WidgetB', 300), - ('South', 'WidgetA', 400), - ('South', 'WidgetB', 100), - ('West', 'WidgetA', 300), - ('West', 'WidgetB', 200); -``` - -Now, let's use the GROUP BY ROLLUP clause to get the total sales amount for each region and product, along with sub-totals for each region: -```sql -SELECT region, product, SUM(sales_amount) AS total_sales -FROM sales_data -GROUP BY ROLLUP (region, product); -``` - -The result will be: -```sql -+--------+---------+-------------+ -| region | product | total_sales | -+--------+---------+-------------+ -| South | NULL | 500 | -| West | NULL | 500 | -| North | NULL | 500 | -| West | WidgetB | 200 | -| NULL | NULL | 1500 | -| North | WidgetB | 300 | -| South | WidgetA | 400 | -| North | WidgetA | 200 | -| West | WidgetA | 300 | -| South | WidgetB | 100 | -+--------+---------+-------------+ -``` - -In this example, the GROUP BY ROLLUP clause calculates the total sales for each region-product combination, each region, and the grand total. \ No newline at end of file diff --git a/docs/en/guides/54-query/00-basics/groupby/index.md b/docs/en/guides/54-query/00-basics/groupby/index.md deleted file mode 100644 index fb3948fd87..0000000000 --- a/docs/en/guides/54-query/00-basics/groupby/index.md +++ /dev/null @@ -1,157 +0,0 @@ ---- -title: GROUP BY ---- -import FunctionDescription from '@site/src/components/FunctionDescription'; - - - -The GROUP BY clause enables you to group rows based on the same group-by-item expressions and then apply aggregate functions to each resulting group. The group-by-item expressions can include column names or aliases, numerical references to positions in the SELECT list, general expressions, or all non-aggregate items in the SELECT list. - -The GROUP BY clause in Databend comes with the following extensions for more comprehensive data grouping and versatile data analysis: - -- [GROUP BY CUBE](group-by-cube.md) -- [GROUP BY GROUPING SETS](group-by-grouping-sets.md) -- [GROUP BY ROLLUP](group-by-rollup.md) - -## Syntax - -```sql -SELECT ... - FROM ... - [ ... ] -GROUP BY [ ALL | groupItem [ , groupItem [ , ... ] ] ] - [ ... ] -``` - -Where: - -- **ALL**: When the keyword "ALL" is used, Databend groups the data based on all non-aggregate items in the SELECT list. -- **groupItem**: A group item can be one of the following: - - A column name or alias defined in the SELECT list. - - A numerical reference to the position of a column in the SELECT list. - - Any expression that involves columns from the tables used in the current query context. - -## Examples - -The GROUP BY examples in this section are built upon the following data setup: - -```sql --- Create a sample table named "employees" -CREATE TABLE employees ( - id INT, - first_name VARCHAR(50), - last_name VARCHAR(50), - department_id INT, - job_id INT, - hire_date DATE -); - --- Insert sample data into the "employees" table -INSERT INTO employees (id, first_name, last_name, department_id, job_id, hire_date) -VALUES (1, 'John', 'Doe', 1, 101, '2021-01-15'), - (2, 'Jane', 'Smith', 1, 101, '2021-02-20'), - (3, 'Alice', 'Johnson', 1, 102, '2021-03-10'), - (4, 'Bob', 'Brown', 2, 201, '2021-03-15'), - (5, 'Charlie', 'Miller', 2, 202, '2021-04-10'), - (6, 'Eve', 'Davis', 2, 202, '2021-04-15'); -``` - -### Group By One Column - -This query groups employees by their `department_id` and counts the number of employees in each department: -```sql -SELECT department_id, COUNT(*) AS num_employees -FROM employees -GROUP BY department_id; -``` - -Output: -```sql -+---------------+---------------+ -| department_id | num_employees | -+---------------+---------------+ -| 1 | 3 | -| 2 | 3 | -+---------------+---------------+ -``` - -### Group By Multiple Columns - -This query groups employees by `department_id` and `job_id`, then counts the number of employees in each group: -```sql -SELECT department_id, job_id, COUNT(*) AS num_employees -FROM employees -GROUP BY department_id, job_id; -``` - -Output: -```sql -+---------------+--------+---------------+ -| department_id | job_id | num_employees | -+---------------+--------+---------------+ -| 1 | 101 | 2 | -| 1 | 102 | 1 | -| 2 | 201 | 1 | -| 2 | 202 | 2 | -+---------------+--------+---------------+ -``` - -### Group By ALL - -This query groups employees by using the GROUP BY ALL clause, which groups all non-aggregate columns in the SELECT list. Please note that, in this case, the result will be identical to grouping by `department_id` and `job_id` since these are the only non-aggregate items present in the SELECT list. - -```sql -SELECT department_id, job_id, COUNT(*) AS num_employees -FROM employees -GROUP BY ALL; -``` - -Output: -```sql -+---------------+--------+---------------+ -| department_id | job_id | num_employees | -+---------------+--------+---------------+ -| 1 | 101 | 2 | -| 1 | 102 | 1 | -| 2 | 201 | 1 | -| 2 | 202 | 2 | -+---------------+--------+---------------+ -``` - - -### Group By Position - -This query is equivalent to the "Group By One Column" example above. The position 1 refers to the first item in the SELECT list, which is `department_id`: -```sql -SELECT department_id, COUNT(*) AS num_employees -FROM employees -GROUP BY 1; -``` - -Output: -```sql -+---------------+---------------+ -| department_id | num_employees | -+---------------+---------------+ -| 1 | 3 | -| 2 | 3 | -+---------------+---------------+ -``` - -### Group By Expression - -This query groups employees by the year they were hired and counts the number of employees hired in each year: -```sql -SELECT EXTRACT(YEAR FROM hire_date) AS hire_year, COUNT(*) AS num_hires -FROM employees -GROUP BY EXTRACT(YEAR FROM hire_date); -``` - -Output: -```sql -+-----------+-----------+ -| hire_year | num_hires | -+-----------+-----------+ -| 2021 | 6 | -+-----------+-----------+ -``` \ No newline at end of file diff --git a/docs/en/guides/54-query/00-basics/index.md b/docs/en/guides/54-query/00-basics/index.md deleted file mode 100644 index 9a48389174..0000000000 --- a/docs/en/guides/54-query/00-basics/index.md +++ /dev/null @@ -1,63 +0,0 @@ ---- -title: Basic Queries ---- - -Master the fundamentals of querying data in Databend. Start here if you're new to SQL or need a refresher on core concepts. - -## What You'll Learn - -- Select and filter data effectively -- Sort and limit query results -- Group data and calculate aggregates -- Use advanced grouping techniques - -## Query Essentials - -### [Filtering & Selection](./filtering-selection.md) -Learn the basics: SELECT, WHERE, ORDER BY, and LIMIT -```sql -SELECT name, salary FROM employees -WHERE department = 'Engineering' -ORDER BY salary DESC; -``` - -### [Aggregating Data](./aggregating-data.md) -Summarize data with GROUP BY and aggregate functions -```sql -SELECT department, AVG(salary) as avg_salary -FROM employees -GROUP BY department; -``` - -### [Advanced Grouping](./groupby/index.md) -Multi-dimensional analysis with CUBE, ROLLUP, and GROUPING SETS -```sql --- Generate all possible grouping combinations -SELECT department, job_level, COUNT(*) -FROM employees -GROUP BY CUBE(department, job_level); -``` - -## Quick Reference - -### Most Common Patterns -```sql --- Top N query -SELECT * FROM table ORDER BY column DESC LIMIT 10; - --- Count by category -SELECT category, COUNT(*) FROM table GROUP BY category; - --- Filter and aggregate -SELECT region, AVG(sales) -FROM orders -WHERE order_date >= '2023-01-01' -GROUP BY region -HAVING AVG(sales) > 1000; -``` - -## Next Steps - -Once you're comfortable with basic queries: -- [Combining Data](../01-combining-data/index.md) - JOIN tables and use CTEs -- [Advanced Features](../02-advanced/index.md) - Custom functions and procedures \ No newline at end of file diff --git a/docs/en/guides/54-query/00-sql-analytics.md b/docs/en/guides/54-query/00-sql-analytics.md new file mode 100644 index 0000000000..33669d838f --- /dev/null +++ b/docs/en/guides/54-query/00-sql-analytics.md @@ -0,0 +1,277 @@ +--- +title: SQL Analytics +--- + +> **Scenario:** EverDrive Smart Vision analysts curate a shared set of drive sessions and key frames so every downstream workload can query the same IDs without copying data between systems. + +This tutorial builds a miniature **EverDrive Smart Vision** dataset and shows how Databend’s single optimizer works across the rest of the guides. Every ID you create here (`SES-20240801-SEA01`, `FRAME-0001` …) reappears in the JSON, vector, geo, and ETL walkthroughs for a consistent autonomous-driving story. + +## 1. Create Sample Tables +Two tables capture test sessions and the important frames extracted from dash-camera video. + +```sql +CREATE OR REPLACE TABLE drive_sessions ( + session_id VARCHAR, + vehicle_id VARCHAR, + route_name VARCHAR, + start_time TIMESTAMP, + end_time TIMESTAMP, + weather VARCHAR, + camera_setup VARCHAR +); + +CREATE OR REPLACE TABLE frame_events ( + frame_id VARCHAR, + session_id VARCHAR, + frame_index INT, + captured_at TIMESTAMP, + event_type VARCHAR, + risk_score DOUBLE +); + +INSERT INTO drive_sessions VALUES + ('SES-20240801-SEA01', 'VEH-01', 'Seattle → Bellevue → Seattle', '2024-08-01 09:00', '2024-08-01 10:10', 'Sunny', 'Dual 1080p'), + ('SES-20240802-SEA02', 'VEH-02', 'Downtown Night Loop', '2024-08-02 20:15', '2024-08-02 21:05', 'Light Rain','Night Vision'), + ('SES-20240803-SEA03', 'VEH-03', 'Harbor Industrial Route', '2024-08-03 14:05', '2024-08-03 15:30', 'Overcast', 'Thermal + RGB'); + +INSERT INTO frame_events VALUES + ('FRAME-0001', 'SES-20240801-SEA01', 120, '2024-08-01 09:32:15', 'SuddenBrake', 0.82), + ('FRAME-0002', 'SES-20240801-SEA01', 342, '2024-08-01 09:48:03', 'CrosswalkPedestrian', 0.67), + ('FRAME-0003', 'SES-20240802-SEA02', 88, '2024-08-02 20:29:41', 'NightLowVisibility', 0.59), + ('FRAME-0004', 'SES-20240802-SEA02', 214, '2024-08-02 20:48:12', 'EmergencyVehicle', 0.73), + ('FRAME-0005', 'SES-20240803-SEA03', 305, '2024-08-03 15:02:44', 'CyclistOvertake', 0.64); +``` + +> Need a refresher on table DDL? See [CREATE TABLE](/sql/sql-commands/ddl/table/ddl-create-table). + +--- + +## 2. Filter Recent Sessions +Keep analytics focused on the most recent drives. + +```sql +WITH recent_sessions AS ( + SELECT * + FROM drive_sessions + WHERE start_time >= DATEADD('day', -7, CURRENT_TIMESTAMP) +) +SELECT * +FROM recent_sessions +ORDER BY start_time DESC; +``` + +Filtering early keeps later joins and aggregations fast. Docs: [WHERE & CASE](/sql/sql-commands/query-syntax/query-select#where-clause). + +--- + +## 3. JOIN +### INNER JOIN ... USING +Combine session metadata with frame-level events. + +```sql +WITH recent_events AS ( + SELECT * + FROM frame_events + WHERE captured_at >= DATEADD('day', -7, CURRENT_TIMESTAMP) +) +SELECT e.frame_id, + e.captured_at, + e.event_type, + e.risk_score, + s.vehicle_id, + s.route_name, + s.weather +FROM recent_events e +JOIN drive_sessions s USING (session_id) +ORDER BY e.captured_at; +``` + +### NOT EXISTS (Anti Join) +Find events whose session metadata is missing. + +```sql +SELECT frame_id +FROM frame_events e +WHERE NOT EXISTS ( + SELECT 1 + FROM drive_sessions s + WHERE s.session_id = e.session_id +); +``` + +### LATERAL FLATTEN (JSON Unnest) +Combine events with detection objects stored inside JSON payloads. + +```sql +SELECT e.frame_id, + obj.value['type']::STRING AS object_type +FROM frame_events e +JOIN frame_payloads p USING (frame_id), + LATERAL FLATTEN(p.payload['objects']) AS obj; +``` + +More patterns: [JOIN reference](/sql/sql-commands/query-syntax/query-join). + +--- + +## 4. GROUP BY +### GROUP BY route_name, event_type +Standard `GROUP BY` to compare routes and event types. + +```sql +WITH recent_events AS ( + SELECT * + FROM frame_events + WHERE captured_at >= DATEADD('week', -4, CURRENT_TIMESTAMP) +) +SELECT route_name, + event_type, + COUNT(*) AS event_count, + AVG(risk_score) AS avg_risk +FROM recent_events +JOIN drive_sessions USING (session_id) +GROUP BY route_name, event_type +ORDER BY avg_risk DESC, event_count DESC; +``` + +### GROUP BY ROLLUP +Adds route subtotals plus a grand total. + +```sql +SELECT route_name, + event_type, + COUNT(*) AS event_count, + AVG(risk_score) AS avg_risk +FROM frame_events +JOIN drive_sessions USING (session_id) +GROUP BY ROLLUP(route_name, event_type) +ORDER BY route_name NULLS LAST, event_type; +``` + +### GROUP BY CUBE +Generates all combinations of route and event type. + +```sql +SELECT route_name, + event_type, + COUNT(*) AS event_count, + AVG(risk_score) AS avg_risk +FROM frame_events +JOIN drive_sessions USING (session_id) +GROUP BY CUBE(route_name, event_type) +ORDER BY route_name NULLS LAST, event_type; +``` + +--- + +## 5. WINDOW FUNCTION +### SUM(...) OVER (running total) +Track cumulative risk across each drive with a running `SUM`. + +```sql +WITH session_event_scores AS ( + SELECT session_id, + captured_at, + risk_score + FROM frame_events +) +SELECT session_id, + captured_at, + risk_score, + SUM(risk_score) OVER ( + PARTITION BY session_id + ORDER BY captured_at + ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW + ) AS cumulative_risk +FROM session_event_scores +ORDER BY session_id, captured_at; +``` + +### AVG(...) OVER (moving average) +Show a moving average of risk over the last three events: + +```sql +WITH session_event_scores AS ( + SELECT session_id, + captured_at, + risk_score + FROM frame_events +) +SELECT session_id, + captured_at, + risk_score, + AVG(risk_score) OVER ( + PARTITION BY session_id + ORDER BY captured_at + ROWS BETWEEN 3 PRECEDING AND CURRENT ROW + ) AS moving_avg_risk +FROM session_event_scores +ORDER BY session_id, captured_at; +``` + +Window functions let you express rolling totals or averages inline. Full list: [Window functions](/sql/sql-functions/window-functions). + +--- + +## 6. Aggregating Index Acceleration +Cache heavy summaries with an [Aggregating Index](/guides/performance/aggregating-index) so dashboards stay snappy. + +```sql +CREATE OR REPLACE AGGREGATING INDEX idx_route_event_summary ON frame_events +AS +SELECT session_id, + event_type, + COUNT(*) AS event_count, + AVG(risk_score) AS avg_risk +FROM frame_events +GROUP BY session_id, event_type; +``` + +Now run the same summary query as before—the optimizer will pull results from the index automatically: + +```sql +SELECT s.route_name, + e.event_type, + COUNT(*) AS event_count, + AVG(e.risk_score) AS avg_risk +FROM frame_events e +JOIN drive_sessions s USING (session_id) +WHERE s.start_time >= DATEADD('week', -8, CURRENT_TIMESTAMP) +GROUP BY s.route_name, e.event_type +ORDER BY avg_risk DESC; +``` + +`EXPLAIN` the statement to see the `AggregatingIndex` node instead of a full scan. Databend keeps the index fresh as new frames arrive, delivering sub-second dashboards without extra ETL jobs. + +--- + +## 7. Stored Procedure Automation +You can also wrap the reporting logic in a stored procedure so it runs exactly the way you expect during scheduled jobs. + +```sql +CREATE OR REPLACE PROCEDURE generate_weekly_route_report(days_back INT) +RETURNS TABLE(route_name VARCHAR, event_count BIGINT, avg_risk DOUBLE) +LANGUAGE SQL +AS +$$ +BEGIN + RETURN TABLE ( + SELECT s.route_name, + COUNT(*) AS event_count, + AVG(e.risk_score) AS avg_risk + FROM frame_events e + JOIN drive_sessions s USING (session_id) + WHERE e.captured_at >= DATEADD('day', -days_back, CURRENT_TIMESTAMP) + GROUP BY s.route_name + ); +END; +$$; + +CALL PROCEDURE generate_weekly_route_report(28); +``` + +Use the returned result set directly in notebooks, ETL tasks, or automated alerts. Learn more: [Stored procedure scripting](/sql/stored-procedure-scripting). + +--- + +You now have a full loop: ingest session data, filter, join, aggregate, accelerate heavy queries, trend over time, and publish. Swap filters or joins to adapt the same recipe to other smart-driving KPIs like driver scoring, sensor degradation, or algorithm comparisons. diff --git a/docs/en/guides/54-query/01-combining-data/_category_.json b/docs/en/guides/54-query/01-combining-data/_category_.json deleted file mode 100644 index bcf9b2c01b..0000000000 --- a/docs/en/guides/54-query/01-combining-data/_category_.json +++ /dev/null @@ -1,4 +0,0 @@ -{ - "label": "Combining Data", - "position": 2 -} \ No newline at end of file diff --git a/docs/en/guides/54-query/01-combining-data/cte.md b/docs/en/guides/54-query/01-combining-data/cte.md deleted file mode 100644 index 7998639958..0000000000 --- a/docs/en/guides/54-query/01-combining-data/cte.md +++ /dev/null @@ -1,94 +0,0 @@ ---- -title: Common Table Expressions (CTE) ---- - -CTEs break complex queries into simple, readable steps using `WITH`. - -## Quick Start - -```sql --- Instead of complex nested queries -WITH high_earners AS ( - SELECT * FROM employees WHERE salary > 70000 -) -SELECT department, COUNT(*) -FROM high_earners -GROUP BY department; -``` - -**Result**: Clean, readable code that's easy to debug. - -## When to Use CTE - -**✅ Use CTE when:** -- Query has multiple steps -- You need the same subquery twice -- Query is hard to read - -**❌ Skip CTE when:** -- Simple one-step query -- Performance is critical - -## Three Essential Patterns - -### 1. Filter → Analyze -```sql -WITH filtered_data AS ( - SELECT * FROM sales WHERE date >= '2023-01-01' -) -SELECT product, SUM(amount) -FROM filtered_data -GROUP BY product; -``` - -### 2. Multiple Steps -```sql -WITH step1 AS ( - SELECT department, AVG(salary) as avg_sal FROM employees GROUP BY department -), -step2 AS ( - SELECT * FROM step1 WHERE avg_sal > 70000 -) -SELECT * FROM step2; -``` - -### 3. Use Same Data Twice -```sql -WITH dept_stats AS ( - SELECT department, AVG(salary) as avg_sal FROM employees GROUP BY department -) -SELECT d1.department, d1.avg_sal -FROM dept_stats d1 -JOIN dept_stats d2 ON d1.avg_sal > d2.avg_sal; -``` - -## Advanced: Recursive CTE - -Recursive CTEs solve problems where you need to repeatedly apply the same logic. Think of it like climbing stairs - you start at step 1, then keep going up one step at a time. - -```sql --- Generate a sequence (useful for reports, testing, or filling gaps) -WITH RECURSIVE countdown AS ( - -- Base case: where we start - SELECT 10 as num, 'Starting countdown' as message - - UNION ALL - - -- Recursive case: what we do repeatedly - SELECT num - 1, CONCAT('Count: ', CAST(num - 1 AS VARCHAR)) - FROM countdown - WHERE num > 1 -- Stop condition: when to stop -) -SELECT num, message FROM countdown; -``` - -**Result**: Numbers from 10 down to 1 with messages. - -**Real-world example**: Generate missing months for a sales report -- Start: January 2024 -- Repeat: Add next month -- Stop: When we reach December 2024 - -**Key insight**: Recursion = Start somewhere + Repeat an action + Know when to stop. - -**That's it.** Start with simple CTEs, add complexity only when needed. \ No newline at end of file diff --git a/docs/en/guides/54-query/01-combining-data/index.md b/docs/en/guides/54-query/01-combining-data/index.md deleted file mode 100644 index e85749e608..0000000000 --- a/docs/en/guides/54-query/01-combining-data/index.md +++ /dev/null @@ -1,99 +0,0 @@ ---- -title: Combining Data ---- - -Learn to combine data from multiple sources using JOINs, CTEs, and advanced query structures. - -## Core Concepts - -### [JOINs](./joins.md) -Connect data from multiple tables -```sql --- Inner join (most common) -SELECT e.name, d.department_name -FROM employees e -JOIN departments d ON e.dept_id = d.id; -``` -**Covers**: Inner, Left, Right, Full Outer, Semi, Anti, and AsOf joins - -### [Common Table Expressions (CTEs)](./cte.md) -Structure complex queries with WITH clauses -```sql --- Break complex logic into steps -WITH high_performers AS ( - SELECT * FROM employees WHERE rating > 4.0 -) -SELECT department, COUNT(*) -FROM high_performers -GROUP BY department; -``` -**Covers**: Basic CTEs, Recursive CTEs, Materialized CTEs - -## Advanced Combinations - -### Subqueries -```sql --- Correlated subquery -SELECT name, salary, - (SELECT AVG(salary) FROM employees e2 - WHERE e2.department = e1.department) as dept_avg -FROM employees e1; - --- EXISTS clause -SELECT * FROM customers c -WHERE EXISTS ( - SELECT 1 FROM orders o - WHERE o.customer_id = c.id -); -``` - -### Set Operations -```sql --- Combine results from multiple queries -SELECT name FROM employees WHERE department = 'Sales' -UNION -SELECT name FROM contractors WHERE active = true; - --- Other set operations -INTERSECT -- Common rows only -EXCEPT -- Rows in first query but not second -``` - -## Practical Patterns - -### Data Enrichment -```sql --- Add lookup data to main table -WITH region_lookup AS ( - SELECT zip_code, region_name - FROM zip_regions -) -SELECT - c.customer_name, - c.zip_code, - r.region_name, - SUM(o.amount) as total_sales -FROM customers c -LEFT JOIN region_lookup r ON c.zip_code = r.zip_code -LEFT JOIN orders o ON c.id = o.customer_id -GROUP BY c.customer_name, c.zip_code, r.region_name; -``` - -### Hierarchical Data -```sql --- Recursive CTE for organizational structure -WITH RECURSIVE org_chart AS ( - -- Base case: top-level managers - SELECT id, name, manager_id, 1 as level - FROM employees WHERE manager_id IS NULL - - UNION ALL - - -- Recursive case: add direct reports - SELECT e.id, e.name, e.manager_id, o.level + 1 - FROM employees e - JOIN org_chart o ON e.manager_id = o.id -) -SELECT * FROM org_chart ORDER BY level, name; -``` - diff --git a/docs/en/guides/54-query/01-combining-data/joins.md b/docs/en/guides/54-query/01-combining-data/joins.md deleted file mode 100644 index 92771d9cc2..0000000000 --- a/docs/en/guides/54-query/01-combining-data/joins.md +++ /dev/null @@ -1,200 +0,0 @@ ---- -title: JOINs ---- - -A JOIN combines rows from two tables to create a new combined row that can be used in the query. - -## Introduction - -JOINs are useful when data is split across related tables. For example, one table holds employee information, and another holds department details. - -Let's look at our sample data: - -**Employees Table:** -```sql -SELECT * FROM employees ORDER BY id; -``` - -| id | name | department | salary | -|----|---------|-------------|----------| -| 1 | Alice | Engineering | 75000.00 | -| 2 | Bob | Engineering | 80000.00 | -| 3 | Carol | Marketing | 65000.00 | -| 4 | David | Sales | 70000.00 | -| 5 | Eve | Marketing | 68000.00 | - -**Departments Table:** -```sql -SELECT * FROM departments ORDER BY id; -``` - -| id | name | budget | -|----|-------------|------------| -| 1 | Engineering | 1000000.00 | -| 2 | Marketing | 500000.00 | -| 3 | Sales | 750000.00 | - -The two tables share a common relationship: `employees.department` matches `departments.name`. JOINs let us combine this related data. - -## Types of JOINs - -### INNER JOIN - -An INNER JOIN pairs each row in one table with matching rows in the other table. - -```sql --- Get employees with their department budgets -SELECT e.name, e.salary, d.name as department_name, d.budget -FROM employees e -INNER JOIN departments d ON e.department = d.name -ORDER BY e.name; -``` - -**Output:** -| name | salary | department_name | budget | -|-------|----------|----------------|------------| -| Alice | 75000.00 | Engineering | 1000000.00 | -| Bob | 80000.00 | Engineering | 1000000.00 | -| Carol | 65000.00 | Marketing | 500000.00 | -| David | 70000.00 | Sales | 750000.00 | -| Eve | 68000.00 | Marketing | 500000.00 | - -The output contains only rows where there's a match between `employees.department` and `departments.name`. - -### LEFT JOIN - -A LEFT JOIN returns all rows from the left table, with matching data from the right table. If there's no match, the right columns contain NULL. - -```sql --- Get all employees, show department budget if available -SELECT e.name, e.department, d.budget -FROM employees e -LEFT JOIN departments d ON e.department = d.name -ORDER BY e.name; -``` - -**Output:** -| name | department | budget | -|-------|-------------|------------| -| Alice | Engineering | 500000.00 | -| Bob | Engineering | 500000.00 | -| Carol | Marketing | 200000.00 | -| David | Sales | 300000.00 | -| Eve | Marketing | 200000.00 | - -All employees are shown, even if their department doesn't exist (budget would be NULL). - -### RIGHT JOIN - -A RIGHT JOIN returns all rows from the right table, with matching data from the left table. - -```sql --- Get all departments, show employees if any -SELECT d.name as department_name, e.name as employee_name -FROM employees e -RIGHT JOIN departments d ON e.department = d.name -ORDER BY d.name, e.name; -``` - -**Output:** -| department_name | employee_name | -|----------------|---------------| -| Engineering | Alice | -| Engineering | Bob | -| Marketing | Carol | -| Marketing | Eve | -| Sales | David | - -All departments are shown, even if they have no employees (employee_name would be NULL). - -### FULL JOIN - -A FULL JOIN returns all rows from both tables, matching where possible. - -```sql --- Get all employees and all departments -SELECT e.name as employee_name, d.name as department_name -FROM employees e -FULL JOIN departments d ON e.department = d.name -ORDER BY e.name, d.name; -``` - -**Output:** -| employee_name | department_name | -|--------------|----------------| -| Alice | Engineering | -| Bob | Engineering | -| Carol | Marketing | -| David | Sales | -| Eve | Marketing | - -Shows all employees and departments, with NULLs where there's no match. - -### CROSS JOIN - -A CROSS JOIN creates a Cartesian product - every row from the first table combined with every row from the second table. - -**Warning**: This can create very large result sets. Use with caution. - -```sql --- Every employee paired with every department (rarely useful) -SELECT e.name, d.name as department -FROM employees e -CROSS JOIN departments d -ORDER BY e.name, d.name; -``` - -**Output (partial - 15 rows total):** -| name | department | -|-------|-------------| -| Alice | Engineering | -| Alice | Marketing | -| Alice | Sales | -| Bob | Engineering | -| Bob | Marketing | -| Bob | Sales | -| Carol | Engineering | -| ... | ... | - -**Result**: 5 employees × 3 departments = 15 rows total. - -## JOIN Implementation - -### Using ON Clause (Recommended) - -```sql -SELECT e.name, d.budget -FROM employees e -JOIN departments d ON e.department = d.name; -``` - -### Using WHERE Clause (Legacy) - -```sql -SELECT e.name, d.budget -FROM employees e, departments d -WHERE e.department = d.name; -``` - -**Recommendation**: Use the ON clause syntax because it's clearer and handles outer joins correctly. - -## Multiple Table JOINs - -You can chain JOINs together to combine data from more than two tables: - -```sql --- Employees with department budgets and project info (if projects table existed) -SELECT e.name, d.name as department, d.budget -FROM employees e -JOIN departments d ON e.department = d.name -JOIN projects p ON d.id = p.department_id -WHERE p.status = 'Active'; -``` - -## When to Use Each JOIN Type - -- **INNER JOIN**: When you only want matching records from both tables -- **LEFT JOIN**: When you want all records from the left table, matched where possible -- **RIGHT JOIN**: When you want all records from the right table, matched where possible -- **FULL JOIN**: When you want all records from both tables -- **CROSS JOIN**: Rarely used; only when you specifically need a Cartesian product diff --git a/docs/en/guides/54-query/01-combining-data/subqueries.md b/docs/en/guides/54-query/01-combining-data/subqueries.md deleted file mode 100644 index f55e36ca35..0000000000 --- a/docs/en/guides/54-query/01-combining-data/subqueries.md +++ /dev/null @@ -1,158 +0,0 @@ ---- -title: Subqueries ---- - -A subquery is a query inside another query. Use subqueries to filter, compare, or compute values that depend on data from the main query. - -## Quick Start - -```sql --- Find employees earning above department average -SELECT name, salary, department -FROM employees -WHERE salary > ( - SELECT AVG(salary) - FROM employees AS e2 - WHERE e2.department = employees.department -); -``` - -**Result**: Employees who earn more than their department's average salary. - -## Types of Subqueries - -### 1. Scalar Subqueries (Single Value) -```sql --- Compare to overall average -SELECT name, salary, - (SELECT AVG(salary) FROM employees) AS company_avg -FROM employees -WHERE salary > (SELECT AVG(salary) FROM employees); -``` - -**Returns**: Single value (one row, one column). - -### 2. Table Subqueries (Multiple Rows) -```sql --- Filter by department conditions -SELECT name, department -FROM employees -WHERE department IN ( - SELECT name - FROM departments - WHERE budget > 500000 -); -``` - -**Returns**: Multiple rows, used with IN, EXISTS, ANY, ALL. - -## Subquery Placement - -### WHERE Clause - Filtering -```sql --- Employees in high-budget departments -SELECT name, salary -FROM employees -WHERE department IN ( - SELECT name FROM departments WHERE budget > 500000 -); -``` - -### FROM Clause - Data Source -```sql --- Analyze high earners by department -SELECT department, AVG(salary) as avg_salary -FROM ( - SELECT * FROM employees WHERE salary > 70000 -) AS high_earners -GROUP BY department; -``` - -### SELECT Clause - Computed Columns -```sql --- Show salary vs department average -SELECT name, salary, - (SELECT AVG(salary) - FROM employees e2 - WHERE e2.department = e1.department) as dept_avg -FROM employees e1; -``` - -## Correlated vs Uncorrelated - -### Uncorrelated - Independent -```sql --- Same subquery runs once -SELECT name, salary -FROM employees -WHERE salary > (SELECT AVG(salary) FROM employees); -``` - -**Performance**: Subquery executes once, result reused. - -### Correlated - Dependent -```sql --- Subquery runs for each main row -SELECT name, salary, department -FROM employees e1 -WHERE salary > ( - SELECT AVG(salary) - FROM employees e2 - WHERE e2.department = e1.department -); -``` - -**Performance**: Subquery executes for each outer row. - -## Common Patterns - -### EXISTS - Check for Related Data -```sql --- Employees who have projects -SELECT name, department -FROM employees e -WHERE EXISTS ( - SELECT 1 FROM projects p WHERE p.employee_id = e.id -); -``` - -### NOT EXISTS - Check for Missing Data -```sql --- Employees without projects -SELECT name, department -FROM employees e -WHERE NOT EXISTS ( - SELECT 1 FROM projects p WHERE p.employee_id = e.id -); -``` - -### ANY/ALL - Multiple Comparisons -```sql --- Employees earning more than ANY marketing employee -SELECT name, salary -FROM employees -WHERE salary > ANY ( - SELECT salary FROM employees WHERE department = 'Marketing' -); - --- Employees earning more than ALL marketing employees -SELECT name, salary -FROM employees -WHERE salary > ALL ( - SELECT salary FROM employees WHERE department = 'Marketing' -); -``` - -## When to Use Subqueries vs JOINs - -**✅ Use Subqueries when:** -- Filtering based on aggregate conditions -- Checking existence/non-existence -- Need computed values in SELECT -- Logic is clearer as nested steps - -**✅ Use JOINs when:** -- Need columns from multiple tables -- Better performance for large datasets -- Combining data rather than filtering - diff --git a/docs/en/guides/54-query/01-json-search.md b/docs/en/guides/54-query/01-json-search.md new file mode 100644 index 0000000000..0072c101ed --- /dev/null +++ b/docs/en/guides/54-query/01-json-search.md @@ -0,0 +1,140 @@ +--- +title: JSON & Search +--- + +> **Scenario:** EverDrive Smart Vision’s perception services emit JSON payloads for every observed frame, and safety analysts need to search detections without moving the data out of Databend. + +EverDrive’s perception pipeline emits JSON payloads that we query with Elasticsearch-style syntax. By storing payloads as VARIANT and declaring an inverted index during table creation, Databend lets you run Lucene `QUERY` filters directly on the data. + +## 1. CREATE SAMPLE TABLE +Each frame carries structured metadata from perception models (bounding boxes, velocities, classifications). + +```sql +CREATE OR REPLACE TABLE frame_payloads ( + frame_id VARCHAR, + run_stage VARCHAR, + payload VARIANT, + logged_at TIMESTAMP, + INVERTED INDEX idx_frame_payloads(payload) +); + +INSERT INTO frame_payloads VALUES + ('FRAME-0001', 'detection', PARSE_JSON('{ + "objects": [ + {"type":"vehicle","bbox":[545,220,630,380],"confidence":0.94}, + {"type":"pedestrian","bbox":[710,200,765,350],"confidence":0.88} + ], + "ego": {"speed_kmh": 32.5, "accel": -2.1} + }'), '2024-08-01 09:32:16'), + ('FRAME-0002', 'detection', PARSE_JSON('{ + "objects": [ + {"type":"pedestrian","bbox":[620,210,670,360],"confidence":0.91} + ], + "scene": {"lighting":"daytime","weather":"sunny"} + }'), '2024-08-01 09:48:04'), + ('FRAME-0003', 'tracking', PARSE_JSON('{ + "objects": [ + {"type":"vehicle","speed_kmh": 18.0,"distance_m": 6.2}, + {"type":"emergency_vehicle","sirens":true} + ], + "scene": {"lighting":"night","visibility":"low"} + }'), '2024-08-02 20:29:42'); +``` + +## 2. SELECT JSON Paths +Peek into the payload to confirm the structure. + +```sql +SELECT frame_id, + payload['objects'][0]['type']::STRING AS first_object, + payload['ego']['speed_kmh']::DOUBLE AS ego_speed, + payload['scene']['lighting']::STRING AS lighting +FROM frame_payloads +ORDER BY logged_at; +``` + +Casting with `::STRING` / `::DOUBLE` exposes JSON values to regular SQL filters. Databend also supports Elasticsearch-style search on top of this data via the `QUERY` function—reference variant fields by prefixing them with the column name (for example `payload.objects.type`). More tips: [Semi-structured data](/guides/load-data/load-semistructured/load-ndjson). + +--- + +## 3. Elasticsearch-style Search +`QUERY` uses Elasticsearch/Lucene syntax, so you can combine boolean logic, ranges, boosts, and lists. Below are a few patterns on the EverDrive payloads: + +### Array Match +Find frames that detected a pedestrian: + +```sql +SELECT frame_id +FROM frame_payloads +WHERE QUERY('payload.objects.type:pedestrian') +ORDER BY logged_at DESC +LIMIT 10; +``` + +### Boolean AND +Vehicle travelling faster than 30 km/h **and** a pedestrian detected: + +```sql +SELECT frame_id, + payload['ego']['speed_kmh']::DOUBLE AS ego_speed +FROM frame_payloads +WHERE QUERY('payload.objects.type:pedestrian AND payload.ego.speed_kmh:[30 TO *]') +ORDER BY ego_speed DESC; +``` + +### Boolean OR / List +Night drives encountering either an emergency vehicle or a cyclist: + +```sql +SELECT frame_id +FROM frame_payloads +WHERE QUERY('payload.scene.lighting:night AND payload.objects.type:(emergency_vehicle OR cyclist)'); +``` + +### Numeric Ranges +Speed between 10–25 km/h (inclusive) or strictly between 25–40 km/h: + +```sql +SELECT frame_id, + payload['ego']['speed_kmh'] AS speed +FROM frame_payloads +WHERE QUERY('payload.ego.speed_kmh:[10 TO 25] OR payload.ego.speed_kmh:{25 TO 40}') +ORDER BY speed; +``` + +### Boosting +Prioritise frames where both a pedestrian and a vehicle appear, but emphasise the pedestrian term: + +```sql +SELECT frame_id, + SCORE() AS relevance +FROM frame_payloads +WHERE QUERY('payload.objects.type:pedestrian^2 AND payload.objects.type:vehicle') +ORDER BY relevance DESC +LIMIT 10; +``` + +See [Search functions](/sql/sql-functions/search-functions) for complete Elasticsearch syntax supported by `QUERY`, `SCORE()`, and related helpers. + +--- + +## 4. Cross-Reference Frame Events +Join query results back to the frame-level risk scores created in the analytics guide. + +```sql +WITH risky_frames AS ( + SELECT frame_id, + payload['ego']['speed_kmh']::DOUBLE AS ego_speed + FROM frame_payloads + WHERE QUERY('payload.objects.type:pedestrian AND payload.ego.speed_kmh:[30 TO *]') +) +SELECT r.frame_id, + e.event_type, + e.risk_score, + r.ego_speed +FROM risky_frames r +JOIN frame_events e USING (frame_id) +ORDER BY e.risk_score DESC; +``` + +Because `frame_id` is shared across tables, you jump from raw payloads to curated analytics instantly. diff --git a/docs/en/guides/54-query/02-advanced/_category_.json b/docs/en/guides/54-query/02-advanced/_category_.json deleted file mode 100644 index dee56ac072..0000000000 --- a/docs/en/guides/54-query/02-advanced/_category_.json +++ /dev/null @@ -1,4 +0,0 @@ -{ - "label": "Advanced Features", - "position": 3 -} \ No newline at end of file diff --git a/docs/en/guides/54-query/02-advanced/external-function.md b/docs/en/guides/54-query/02-advanced/external-function.md deleted file mode 100644 index 2037940e1f..0000000000 --- a/docs/en/guides/54-query/02-advanced/external-function.md +++ /dev/null @@ -1,224 +0,0 @@ ---- -title: "External Functions in Databend Cloud" -sidebar_label: "External Function" -sidebar_position: 2 ---- - -External functions in Databend allow you to define custom operations for processing data using external servers written in programming languages like Python. These functions enable you to extend Databend's capabilities by integrating custom logic, leveraging external libraries, and handling complex processing tasks. Key features of external functions include: - -- **Scalability**: Ideal for complex and resource-intensive data operations. -- **External Libraries**: Leverage additional functionality through external libraries and dependencies. -- **Advanced Logic**: Implement sophisticated data processing logic for complex scenarios. - -## Supported Programming Languages - -The following table lists the supported languages and the required libraries for creating external functions in Databend: - -| Language | Required Library | -| -------- | ----------------------------------------------------- | -| Python | [databend-udf](https://pypi.org/project/databend-udf) | - -## Managing External Functions - -You can manage external functions using SQL commands such as `CREATE FUNCTION`, `DROP FUNCTION`, and `SHOW FUNCTIONS`. For more details, see [External Function](/sql/sql-commands/ddl/external-function/). - -## Configuring External Functions in Databend Cloud - -To use external functions in Databend Cloud, you need to **allowlist the addresses of your external function servers**. The external function server must be accessible via a domain name over HTTPS. Please contact Databend Cloud support to add your allowed UDF server addresses: - -1. Navigate to **Support** > **Create New Ticket** in the Databend Cloud console. -2. Provide the external server addresses (with HTTPS domain names) you wish to allowlist. -3. Submit the ticket and await confirmation from the support team. - -## Usage Example: Creating an External Function in Python - -This section demonstrates how to create an external function using Python. - -### 1. Install the Required Library - -Install the [databend-udf](https://pypi.org/project/databend-udf) library using `pip`: - -```bash -pip install databend-udf -``` - -### 2. Define Your Function - -Create a Python file (e.g., `external_function.py`) and define your external function. The following example defines an external server in Python that exposes a custom function `gcd` for calculating the greatest common divisor of two integers: - -```python -from databend_udf import udf, UDFServer - -@udf( - input_types=["INT", "INT"], - result_type="INT", - skip_null=True, -) -def gcd(x: int, y: int) -> int: - while y != 0: - x, y = y, x % y - return x - -if __name__ == '__main__': - # Create an external server listening at '0.0.0.0:8815' - server = UDFServer("0.0.0.0:8815") - # Add the defined function - server.add_function(gcd) - # Start the external server - server.serve() -``` - -**Explanation of `@udf` Decorator Parameters:** - -| Parameter | Description | -| ------------- | ------------------------------------------------------------------------------------------------------------------------------------ | -| `input_types` | A list of strings specifying the input data types (e.g., `["INT", "VARCHAR"]`). | -| `result_type` | A string specifying the return value type (e.g., `"INT"`). | -| `name` | (Optional) Custom name for the function. If not provided, the original function name is used. | -| `io_threads` | Number of I/O threads used per data chunk for I/O-bound functions. | -| `skip_null` | If set to `True`, NULL values are not passed to the function, and the corresponding return value is set to NULL. Default is `False`. | - -**Data Type Mappings Between Databend and Python:** - -| Databend Type | Python Type | -| ------------------- | ------------------- | -| BOOLEAN | `bool` | -| TINYINT (UNSIGNED) | `int` | -| SMALLINT (UNSIGNED) | `int` | -| INT (UNSIGNED) | `int` | -| BIGINT (UNSIGNED) | `int` | -| FLOAT | `float` | -| DOUBLE | `float` | -| DECIMAL | `decimal.Decimal` | -| DATE | `datetime.date` | -| TIMESTAMP | `datetime.datetime` | -| VARCHAR | `str` | -| VARIANT | `any` | -| MAP(K,V) | `dict` | -| ARRAY(T) | `list[T]` | -| TUPLE(T,...) | `tuple(T,...)` | - -### 3. Run the External Server - -Run the Python file to start the external server: - -```bash -python3 external_function.py -``` - -**Note:** Ensure that the server is accessible from Databend Cloud and that the address is allowlisted. If not already done, contact Databend Cloud support to add the server address to the allowlist. - -### 4. Register the Function in Databend Cloud - -Register the function `gcd` in Databend using the `CREATE FUNCTION` statement: - -```sql -CREATE FUNCTION gcd (INT, INT) - RETURNS INT - LANGUAGE PYTHON - HANDLER = 'gcd' - ADDRESS = ''; -``` - -- Replace `` with the actual address of your external server that has been allowlisted in Databend Cloud (must be an HTTPS domain). -- The `HANDLER` specifies the name of the function as defined in your Python code. -- The `ADDRESS` should match the address where your external server is running and must be allowlisted by Databend Cloud. - -**Example:** - -```sql -CREATE FUNCTION gcd (INT, INT) - RETURNS INT - LANGUAGE PYTHON - HANDLER = 'gcd' - ADDRESS = 'https://your-server-address'; -``` - -**Important:** Before executing this statement, ensure that `'https://your-server-address'` is allowlisted in Databend Cloud by contacting support. - -You can now use the external function `gcd` in your SQL queries: - -```sql -SELECT gcd(48, 18); -- Returns 6 -``` - -## Load Balancing External Functions - -When deploying multiple external function servers, you can implement load balancing based on function names. Databend includes a `X-DATABEND-FUNCTION` header in each UDF request, which contains the lowercased function name being called. This header can be used to route requests to different backend servers. - -### Using Nginx for Function-Based Routing - -Here's an example of how to configure Nginx to route different UDF requests to specific backend servers: - -```nginx -# Define upstream servers for different UDF functions -upstream udf_default { - server 10.0.0.1:8080; - server 10.0.0.2:8080 backup; -} - -upstream udf_math_functions { - server 10.0.1.1:8080; - server 10.0.1.2:8080 backup; -} - -upstream udf_string_functions { - server 10.0.2.1:8080; - server 10.0.2.2:8080 backup; -} - -upstream udf_database_functions { - server 10.0.3.1:8080; - server 10.0.3.2:8080 backup; -} - -# Map function names to backend servers -map $http_x_databend_function $udf_backend { - default "udf_default"; - gcd "udf_math_functions"; - lcm "udf_math_functions"; - string_* "udf_string_functions"; - *_db "udf_database_functions"; -} - -# Server configuration -server { - listen 443 ssl; - server_name udf.example.com; - - # SSL configuration - ssl_certificate /etc/nginx/ssl/udf.example.com.crt; - ssl_certificate_key /etc/nginx/ssl/udf.example.com.key; - - # Security headers - add_header Strict-Transport-Security "max-age=31536000" always; - - location / { - proxy_pass http://$udf_backend; - proxy_http_version 1.1; - proxy_set_header Host $host; - proxy_set_header X-Real-IP $remote_addr; - proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; - proxy_set_header X-Forwarded-Proto $scheme; - - # Timeouts - proxy_connect_timeout 60s; - proxy_send_timeout 60s; - proxy_read_timeout 60s; - } -} -``` - -When registering your functions in Databend, use the Nginx server's domain: - -```sql -CREATE FUNCTION gcd (INT, INT) - RETURNS INT - LANGUAGE PYTHON - HANDLER = 'gcd' - ADDRESS = 'https://udf.example.com'; -``` - -## Conclusion - -External functions in Databend Cloud provide a powerful way to extend the functionality of your data processing pipelines by integrating custom code written in languages like Python. By following the steps outlined above, you can create and use external functions to handle complex processing tasks, leverage external libraries, and implement advanced logic. diff --git a/docs/en/guides/54-query/02-advanced/index.md b/docs/en/guides/54-query/02-advanced/index.md deleted file mode 100644 index 33e5c04b9e..0000000000 --- a/docs/en/guides/54-query/02-advanced/index.md +++ /dev/null @@ -1,59 +0,0 @@ ---- -title: Advanced Features ---- - -Extend Databend's capabilities with custom functions, external integrations, stored procedures, and sequences. - -## [User-Defined Functions (UDF)](./udf.md) -Create reusable custom functions in SQL, Python, or JavaScript -```sql --- SQL function -CREATE FUNCTION calculate_bonus(salary FLOAT, rating FLOAT) -RETURNS FLOAT AS $$ salary * rating * 0.1 $$; - --- Use it -SELECT name, calculate_bonus(salary, performance_rating) as bonus -FROM employees; -``` - -## [External Functions](./external-function.md) -Integrate with external services and APIs -```sql --- Call external ML model -SELECT customer_id, predict_churn(age, tenure, usage) as churn_risk -FROM customers; -``` - -## [Stored Procedures](./stored-procedure.md) -Multi-step operations with control flow -```sql --- Complex business logic -CALL monthly_report_generation('2023-12'); -``` - -## [Sequences](./sequences.md) -Generate unique identifiers and sequential values -```sql --- Create auto-incrementing ID -CREATE SEQUENCE user_id_seq; -INSERT INTO users VALUES (NEXTVAL(user_id_seq), 'John Doe'); -``` - - -## Best Practices - -### Function Design -- **Keep functions pure** - Same input always produces same output -- **Handle NULLs** - Consider NULL input handling -- **Use appropriate types** - Match input/output types to usage -- **Document well** - Clear parameter and return descriptions - -### Performance Considerations -- **UDF overhead** - SQL functions are fastest, Python/JS slower -- **Batch operations** - Process data in batches when possible -- **Resource limits** - Monitor memory usage for complex functions - -### Security -- **Validate inputs** - Check parameters in UDF code -- **Least privilege** - External connections should have minimal permissions -- **Audit usage** - Monitor UDF and external function calls \ No newline at end of file diff --git a/docs/en/guides/54-query/02-advanced/sequences.md b/docs/en/guides/54-query/02-advanced/sequences.md deleted file mode 100644 index f2b3f8fc40..0000000000 --- a/docs/en/guides/54-query/02-advanced/sequences.md +++ /dev/null @@ -1,134 +0,0 @@ ---- -title: Sequences -sidebar_position: 4 ---- - -Sequences generate unique, sequential numeric values, commonly used for primary keys and auto-incrementing identifiers. - -## What are Sequences? - -Sequences provide a thread-safe way to generate unique numbers across concurrent operations. Unlike auto-increment columns, sequences are database objects that can be used across multiple tables. - -## Creating Sequences - -### Basic Sequence -```sql --- Create a simple sequence -CREATE SEQUENCE user_id_seq; - --- Use the sequence -SELECT NEXTVAL(user_id_seq); -- Returns: 1 -SELECT NEXTVAL(user_id_seq); -- Returns: 2 -SELECT NEXTVAL(user_id_seq); -- Returns: 3 -``` - -### Sequence with Options -```sql --- Create sequence with custom settings -CREATE SEQUENCE order_id_seq - START = 1000 - INCREMENT = 1 - MINVALUE = 1000 - MAXVALUE = 999999999 - CACHE = 50; -``` - -## Using Sequences - -### In INSERT Statements -```sql --- Create table with sequence-generated IDs -CREATE TABLE users ( - id BIGINT, - name VARCHAR(100), - email VARCHAR(100) -); - --- Insert with sequence value -INSERT INTO users VALUES - (NEXTVAL(user_id_seq), 'John Doe', 'john@example.com'), - (NEXTVAL(user_id_seq), 'Jane Smith', 'jane@example.com'); -``` - -### Current and Next Values -```sql --- Get next value (advances sequence) -SELECT NEXTVAL(user_id_seq); - --- Get current value (doesn't advance sequence) -SELECT CURRVAL(user_id_seq); -``` - -## Sequence Functions - -| Function | Description | Example | -|----------|-------------|---------| -| `NEXTVAL(seq)` | Get next value and advance sequence | `NEXTVAL(user_id_seq)` | -| `CURRVAL(seq)` | Get current value without advancing | `CURRVAL(user_id_seq)` | - -## Managing Sequences - -### View Sequences -```sql --- Show all sequences -SHOW SEQUENCES; - --- Describe specific sequence -DESC SEQUENCE user_id_seq; -``` - -### Modify Sequences -```sql --- Reset sequence to specific value -ALTER SEQUENCE user_id_seq RESTART = 5000; - --- Change increment value -ALTER SEQUENCE user_id_seq INCREMENT = 10; -``` - -### Drop Sequences -```sql --- Remove sequence -DROP SEQUENCE user_id_seq; -``` - -## Best Practices - -### Performance Optimization -- **Use CACHE** - Improves performance by pre-allocating values -- **Appropriate INCREMENT** - Match your application needs -- **Monitor gaps** - Cached values may create gaps if server restarts - -### Common Patterns -```sql --- Auto-incrementing primary key pattern -CREATE SEQUENCE pk_seq START = 1 INCREMENT = 1 CACHE = 100; - -CREATE TABLE products ( - id BIGINT DEFAULT NEXTVAL(pk_seq), - name VARCHAR(100), - price DECIMAL(10,2) -); - --- Order number pattern (readable IDs) -CREATE SEQUENCE order_seq START = 10000 INCREMENT = 1; - -INSERT INTO orders VALUES - (NEXTVAL(order_seq), customer_id, order_date); -``` - -## vs Auto-Increment Columns - -| Feature | Sequences | Auto-Increment | -|---------|-----------|----------------| -| **Reusability** | ✅ Multiple tables | ❌ Single column | -| **Control** | ✅ Full control | ❌ Limited options | -| **Gaps** | ✅ Predictable | ❌ May have gaps | -| **Performance** | ✅ Cacheable | ✅ Optimized | - -## Common Use Cases - -1. **Primary Keys** - Unique identifiers across tables -2. **Order Numbers** - Sequential business identifiers -3. **Version Numbers** - Document or record versioning -4. **Batch IDs** - Processing batch identification \ No newline at end of file diff --git a/docs/en/guides/54-query/02-advanced/stored-procedure.md b/docs/en/guides/54-query/02-advanced/stored-procedure.md deleted file mode 100644 index 37859ab917..0000000000 --- a/docs/en/guides/54-query/02-advanced/stored-procedure.md +++ /dev/null @@ -1,64 +0,0 @@ ---- -title: Stored Procedure -sidebar_position: 3 ---- - -A stored procedure is a set of executable commands or logic blocks stored within the database, written in SQL or other programming languages, designed to be reusable for efficiently performing specific tasks or operations. - -## Supported Languages - -**Databend currently supports [SQL Scripting](/sql/stored-procedure-scripting/) only**. Using SQL scripting, users can define procedures with control flow constructs like loops (FOR, WHILE, REPEAT) and conditionals (IF, CASE), enabling complex logic and effective multi-step operations. - -## Limitations - -The following limitations apply when working with stored procedures: - -- Stored procedures are an experimental feature. Before working with them, set `enable_experimental_procedure` to 1; - - ```sql - SET enable_experimental_procedure = 1; - ``` - -- Stored procedures return results as strings, regardless of the specified return type, without enforcing the declared type on the returned value. - -## Managing Stored Procedures - -Databend offers a range of commands for managing stored procedures. For more details, see [Stored Procedure](/sql/sql-commands/ddl/procedure/). - -## Usage Example - -Suppose we want to calculate the sum of all even numbers within a given range. This stored procedure accepts a starting value start_val and an ending value end_val and calculates the sum of all even numbers within this range. - -```sql -SET enable_experimental_procedure = 1; - -CREATE PROCEDURE sum_even_numbers(start_val UInt8, end_val UInt8) -RETURNS UInt8 NOT NULL -LANGUAGE SQL -COMMENT='Calculate the sum of all even numbers' -AS $$ -BEGIN - LET sum := 0; - FOR i IN start_val TO end_val DO - IF i % 2 = 0 THEN - sum := sum + i; - END IF; - END FOR; - - RETURN sum; -END; -$$; -``` - -If we want to calculate the sum of all even numbers from 1 to 10, we can call the procedure as follows: - -```sql -CALL PROCEDURE sum_even_numbers(1, 10); - --- Result: 2 + 4 + 6 + 8 + 10 = 30 -┌────────┐ -│ Result │ -├────────┤ -│ 30 │ -└────────┘ -``` diff --git a/docs/en/guides/54-query/02-advanced/udf.md b/docs/en/guides/54-query/02-advanced/udf.md deleted file mode 100644 index f8cbed380c..0000000000 --- a/docs/en/guides/54-query/02-advanced/udf.md +++ /dev/null @@ -1,123 +0,0 @@ ---- -title: User-Defined Function -sidebar_position: 1 ---- - -# User-Defined Functions (UDFs) in Databend - -User-Defined Functions (UDFs) allow you to create custom operations tailored to your specific data processing needs. Databend uses a **unified `$$` syntax** across all function types for consistency. - -## Quick Start Guide - -Choose your function type based on what you need to return: - -| **Need to Return** | **Function Type** | **Documentation** | -|-------------------|-------------------|-------------------| -| Single value (number, string, etc.) | **Scalar SQL** | [CREATE SCALAR FUNCTION](/sql/sql-commands/ddl/udf/ddl-create-function) | -| Multiple rows/columns | **Tabular SQL** | [CREATE TABLE FUNCTION](/sql/sql-commands/ddl/udf/ddl-create-table-function) | -| Complex logic with Python/JS/WASM | **Embedded** | [CREATE EMBEDDED FUNCTION](/sql/sql-commands/ddl/udf/ddl-create-function-embedded) | - -All function types use the same unified syntax pattern: -```sql -CREATE FUNCTION name(params) RETURNS type AS $$ logic $$; -``` - -## Scalar SQL Functions - -Return single values using SQL expressions. Perfect for calculations, formatting, and simple transformations. - -```sql --- Calculate BMI -CREATE FUNCTION calculate_bmi(weight FLOAT, height FLOAT) -RETURNS FLOAT -AS $$ weight / (height * height) $$; - --- Format full name -CREATE FUNCTION full_name(first VARCHAR, last VARCHAR) -RETURNS VARCHAR -AS $$ concat(first, ' ', last) $$; - --- Use the functions -SELECT - full_name('John', 'Doe') AS name, - calculate_bmi(70.0, 1.75) AS bmi; -``` - - -## Table Functions (UDTFs) - -Return result sets with multiple rows and columns. Perfect for encapsulating complex queries with parameters. - -```sql --- Get employees by department -CREATE FUNCTION get_dept_employees(dept_name VARCHAR) -RETURNS TABLE (id INT, name VARCHAR, salary DECIMAL) -AS $$ - SELECT id, name, salary - FROM employees - WHERE department = dept_name -$$; - --- Department statistics -CREATE FUNCTION dept_stats() -RETURNS TABLE (department VARCHAR, count INT, avg_salary DECIMAL) -AS $$ - SELECT department, COUNT(*), AVG(salary) - FROM employees - GROUP BY department -$$; - --- Use table functions -SELECT * FROM get_dept_employees('Engineering'); -SELECT * FROM dept_stats(); -``` - -## Embedded Functions - -Use Python, JavaScript, or WASM for complex logic that can't be easily expressed in SQL. - -| Language | Enterprise Required | Package Support | -|----------|-------------------|-----------------| -| Python | Yes | PyPI packages | -| JavaScript | No | No | -| WASM | No | No | - -### Python Example -```sql --- Simple calculation with type safety -CREATE FUNCTION py_calc(INT, INT) -RETURNS INT -LANGUAGE python HANDLER = 'calculate' -AS $$ -def calculate(x, y): - return x * y + 10 -$$; - -SELECT py_calc(5, 3); -- Returns: 25 -``` - -### JavaScript Example -```sql --- String processing -CREATE FUNCTION js_format(VARCHAR, INT) -RETURNS VARCHAR -LANGUAGE javascript HANDLER = 'formatPerson' -AS $$ -export function formatPerson(name, age) { - return `${name} is ${age} years old`; -} -$$; - -SELECT js_format('Alice', 25); -- Returns: "Alice is 25 years old" -``` - -## Function Management - -| Command | Documentation | -|---------|--------------| -| **CREATE** functions | [Scalar](/sql/sql-commands/ddl/udf/ddl-create-function), [Table](/sql/sql-commands/ddl/udf/ddl-create-table-function), [Embedded](/sql/sql-commands/ddl/udf/ddl-create-function-embedded) | -| **ALTER** functions | [ALTER FUNCTION](/sql/sql-commands/ddl/udf/ddl-alter-function) | -| **DROP** functions | [DROP FUNCTION](/sql/sql-commands/ddl/udf/ddl-drop-function) | -| **SHOW** functions | [SHOW USER FUNCTIONS](/sql/sql-commands/ddl/udf/ddl-show-user-functions) | - -For complete UDF overview and comparison, see [User-Defined Function Commands](/sql/sql-commands/ddl/udf/). \ No newline at end of file diff --git a/docs/en/guides/54-query/02-vector-db.md b/docs/en/guides/54-query/02-vector-db.md new file mode 100644 index 0000000000..8632600fa2 --- /dev/null +++ b/docs/en/guides/54-query/02-vector-db.md @@ -0,0 +1,100 @@ +--- +title: Vector Search +--- + +> **Scenario:** EverDrive Smart Vision attaches compact vision embeddings to risky frames so investigation teams can surface similar situations directly inside Databend. + +Every extracted frame also has a vision embedding so perception engineers can discover similar scenarios. This guide shows how to insert those vectors and perform semantic search on top of the same EverDrive IDs. + +## 1. CREATE SAMPLE TABLE +We store a compact example using four-dimensional vectors for readability. In production you might keep 512- or 1536-dim embeddings from CLIP or a self-supervised model. + +```sql +CREATE OR REPLACE TABLE frame_embeddings ( + frame_id VARCHAR, + session_id VARCHAR, + embedding VECTOR(4), + model_version VARCHAR, + created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, + VECTOR INDEX idx_frame_embeddings(embedding) distance='cosine' +); + +INSERT INTO frame_embeddings VALUES + ('FRAME-0001', 'SES-20240801-SEA01', [0.18, 0.42, 0.07, 0.12]::VECTOR(4), 'clip-mini-v1', DEFAULT), + ('FRAME-0002', 'SES-20240801-SEA01', [0.20, 0.38, 0.12, 0.18]::VECTOR(4), 'clip-mini-v1', DEFAULT), + ('FRAME-0003', 'SES-20240802-SEA02', [0.62, 0.55, 0.58, 0.61]::VECTOR(4), 'night-fusion-v2', DEFAULT), + ('FRAME-0004', 'SES-20240802-SEA02', [0.57, 0.49, 0.52, 0.55]::VECTOR(4), 'night-fusion-v2', DEFAULT); +``` + +Docs: [Vector data type](/sql/sql-reference/data-types/vector) and [Vector index](/sql/sql-reference/data-types/vector#vector-indexing). + +--- + +## 2. COSINE_DISTANCE Search +Search for the frames most similar to `FRAME-0001`. + +```sql +WITH query_embedding AS ( + SELECT embedding + FROM frame_embeddings + WHERE frame_id = 'FRAME-0001' + LIMIT 1 +) +SELECT e.frame_id, + e.session_id, + cosine_distance(e.embedding, q.embedding) AS distance +FROM frame_embeddings e +CROSS JOIN query_embedding q +ORDER BY distance +LIMIT 3; +``` + +The cosine distance calculation uses the HNSW index we created earlier, returning the closest frames first. + +--- + +## 3. WHERE Filter + Similarity +Combine similarity search with traditional predicates to narrow the results. + +```sql +WITH query_embedding AS ( + SELECT embedding + FROM frame_embeddings + WHERE frame_id = 'FRAME-0003' + LIMIT 1 +) +SELECT e.frame_id, + cosine_distance(e.embedding, q.embedding) AS distance +FROM frame_embeddings e +CROSS JOIN query_embedding q +WHERE e.session_id = 'SES-20240802-SEA02' +ORDER BY distance; +``` + +--- + +## 4. JOIN Semantic + Risk Metadata +Join the semantic results back to risk scores or detection payloads for richer investigation. + +```sql +WITH query_embedding AS ( + SELECT embedding FROM frame_embeddings WHERE frame_id = 'FRAME-0001' LIMIT 1 + ), + similar_frames AS ( + SELECT frame_id, + cosine_distance(e.embedding, q.embedding) AS distance + FROM frame_embeddings e + CROSS JOIN query_embedding q + ORDER BY distance + LIMIT 5 + ) +SELECT sf.frame_id, + fe.event_type, + fe.risk_score, + sf.distance +FROM similar_frames sf +LEFT JOIN frame_events fe USING (frame_id) +ORDER BY sf.distance; +``` + +This hybrid view surfaces “frames that look like FRAME-0001 and also triggered high-risk events”. diff --git a/docs/en/guides/54-query/03-geo-analytics.md b/docs/en/guides/54-query/03-geo-analytics.md new file mode 100644 index 0000000000..929caf3be5 --- /dev/null +++ b/docs/en/guides/54-query/03-geo-analytics.md @@ -0,0 +1,139 @@ +--- +title: Geo Analytics +--- + +> **Scenario:** EverDrive Smart Vision logs GPS coordinates for each key frame so operations teams can map risky driving hot spots across the city. + +Every frame is tagged with GPS coordinates so we can map risky situations across the city. This guide adds a geospatial table and demonstrates spatial filters, polygons, and H3 bucketing using the same EverDrive session IDs. + +## 1. CREATE SAMPLE TABLE +Each record represents the ego vehicle at the moment a key frame was captured. Store coordinates as `GEOMETRY` so you can reuse functions like `ST_X`, `ST_Y`, and `HAVERSINE` shown throughout this workload. + +```sql +CREATE OR REPLACE TABLE drive_geo ( + frame_id VARCHAR, + session_id VARCHAR, + location GEOMETRY, + speed_kmh DOUBLE, + heading_deg DOUBLE +); + +INSERT INTO drive_geo VALUES + ('FRAME-0001', 'SES-20240801-SEA01', TO_GEOMETRY('SRID=4326;POINT(-122.3321 47.6062)'), 28.0, 90), + ('FRAME-0002', 'SES-20240801-SEA01', TO_GEOMETRY('SRID=4326;POINT(-122.3131 47.6105)'), 35.4, 120), + ('FRAME-0003', 'SES-20240802-SEA02', TO_GEOMETRY('SRID=4326;POINT(-122.3419 47.6205)'), 18.5, 45), + ('FRAME-0004', 'SES-20240802-SEA02', TO_GEOMETRY('SRID=4326;POINT(-122.3490 47.6138)'), 22.3, 60), + ('FRAME-0005', 'SES-20240803-SEA03', TO_GEOMETRY('SRID=4326;POINT(-122.3610 47.6010)'), 30.1, 210); +``` + +Docs: [Geospatial data types](/sql/sql-reference/data-types/geospatial). + +--- + +## 2. ST_DISTANCE Radius Filter +The `ST_DISTANCE` function measures the distance between geometries. Transform both the frame location and the hotspot into Web Mercator (SRID 3857) so the result is expressed in meters, then filter to 500 m. + +```sql +SELECT g.frame_id, + g.session_id, + e.event_type, + e.risk_score, + ST_DISTANCE( + ST_TRANSFORM(g.location, 3857), + ST_TRANSFORM(TO_GEOMETRY('SRID=4326;POINT(-122.3350 47.6080)'), 3857) + ) AS meters_from_hotspot +FROM drive_geo g +JOIN frame_events e USING (frame_id) +WHERE ST_DISTANCE( + ST_TRANSFORM(g.location, 3857), + ST_TRANSFORM(TO_GEOMETRY('SRID=4326;POINT(-122.3350 47.6080)'), 3857) + ) <= 500 +ORDER BY meters_from_hotspot; +``` + +Need the raw geometry for debugging? Add `ST_ASTEXT(g.location)` to the projection. Prefer direct great-circle math instead? Swap in the `HAVERSINE` function, which operates on `ST_X`/`ST_Y` coordinates. + +--- + +## 3. ST_CONTAINS Polygon Filter +Check whether an event occurred inside a defined safety zone (for example, a school area). + +```sql +WITH school_zone AS ( + SELECT TO_GEOMETRY('SRID=4326;POLYGON(( + -122.3415 47.6150, + -122.3300 47.6150, + -122.3300 47.6070, + -122.3415 47.6070, + -122.3415 47.6150 + ))') AS poly +) +SELECT g.frame_id, + g.session_id, + e.event_type +FROM drive_geo g +JOIN frame_events e USING (frame_id) +CROSS JOIN school_zone +WHERE ST_CONTAINS(poly, g.location); +``` + +--- + +## 4. GEO_TO_H3 Heatmap +Aggregate events by hexagonal cell to build route heatmaps. + +```sql +SELECT GEO_TO_H3(ST_X(location), ST_Y(location), 8) AS h3_cell, + COUNT(*) AS frame_count, + AVG(e.risk_score) AS avg_risk +FROM drive_geo +JOIN frame_events e USING (frame_id) +GROUP BY h3_cell +ORDER BY avg_risk DESC; +``` + +Docs: [H3 functions](/sql/sql-functions/geospatial-functions#h3-indexing--conversion). + +--- + +## 5. ST_DISTANCE + JSON QUERY +Combine spatial distance checks with rich detection metadata (from the JSON guide) to build precise alerts. + +```sql +WITH near_intersection AS ( + SELECT frame_id + FROM drive_geo + WHERE ST_DISTANCE( + ST_TRANSFORM(location, 3857), + ST_TRANSFORM(TO_GEOMETRY('SRID=4326;POINT(-122.3410 47.6130)'), 3857) + ) <= 200 +) +SELECT n.frame_id, + p.payload['objects'][0]['type']::STRING AS first_object, + e.event_type, + e.risk_score +FROM near_intersection n +JOIN frame_payloads p USING (frame_id) +JOIN frame_events e USING (frame_id) +WHERE QUERY('payload.objects.type:pedestrian'); +``` + +Spatial filters, JSON operators, and classic SQL all run in one statement. + +--- + +## 6. CREATE VIEW Heatmap +Export hex-level summaries to visualization tools or map layers. + +```sql +CREATE OR REPLACE VIEW v_route_heatmap AS ( + SELECT GEO_TO_H3(ST_X(location), ST_Y(location), 7) AS h3_cell, + COUNT(*) AS frames, + AVG(e.risk_score) AS avg_risk + FROM drive_geo + JOIN frame_events e USING (frame_id) + GROUP BY h3_cell +); +``` + +Downstream systems can query `v_route_heatmap` directly to render risk hot spots on maps without reprocessing raw telemetry. diff --git a/docs/en/guides/54-query/03-optimization/_category_.json b/docs/en/guides/54-query/03-optimization/_category_.json deleted file mode 100644 index 8a21b6cede..0000000000 --- a/docs/en/guides/54-query/03-optimization/_category_.json +++ /dev/null @@ -1,4 +0,0 @@ -{ - "label": "Query Optimization", - "position": 4 -} \ No newline at end of file diff --git a/docs/en/guides/54-query/03-optimization/index.md b/docs/en/guides/54-query/03-optimization/index.md deleted file mode 100644 index 25c5c4ae5c..0000000000 --- a/docs/en/guides/54-query/03-optimization/index.md +++ /dev/null @@ -1,201 +0,0 @@ ---- -title: Query Optimization ---- - -Analyze and improve query performance with profiling tools, execution plans, and optimization techniques. - -## Performance Analysis Tools - -### [Query Profile](./query-profile.md) -Visual execution plan analysis in Databend Cloud -- **Access**: Monitor → SQL History → Query Profile tab -- **Shows**: Execution nodes, timing, resource usage -- **Use for**: Identifying bottlenecks, understanding query execution - -### [Query Hash](./query-hash.md) -Unique query fingerprinting for performance tracking -```sql --- Get query fingerprint -SELECT query_hash('SELECT * FROM table WHERE id = ?'); -``` - -## Query Optimization Fundamentals - -### Execution Plan Analysis -```sql --- View query execution plan -EXPLAIN SELECT * FROM orders o -JOIN customers c ON o.customer_id = c.id -WHERE o.order_date >= '2023-01-01'; -``` - -**Look for:** -- **Table scans** vs **index usage** -- **Join algorithms** (hash, merge, nested loop) -- **Filter pushdown** effectiveness -- **Resource consumption** estimates - -### Index Strategy -```sql --- Create indexes for common query patterns -CREATE INDEX idx_orders_date ON orders(order_date); -CREATE INDEX idx_orders_customer ON orders(customer_id); -``` - -**Index Guidelines:** -- Index WHERE clause columns -- Index JOIN columns on both sides -- Consider composite indexes for multi-column filters -- Monitor index usage statistics - -## Performance Optimization Techniques - -### Query Rewriting -```sql --- ❌ Inefficient: Function on column prevents index usage -SELECT * FROM orders WHERE YEAR(order_date) = 2023; - --- ✅ Optimized: Range condition can use index -SELECT * FROM orders -WHERE order_date >= '2023-01-01' - AND order_date < '2024-01-01'; -``` - -### Filter Pushdown -```sql --- ❌ Filter after join -SELECT * FROM ( - SELECT o.*, c.name - FROM orders o JOIN customers c ON o.customer_id = c.id -) WHERE order_date >= '2023-01-01'; - --- ✅ Filter before join -SELECT o.*, c.name -FROM orders o -JOIN customers c ON o.customer_id = c.id -WHERE o.order_date >= '2023-01-01'; -``` - -### Aggregation Optimization -```sql --- Use appropriate GROUP BY extensions -SELECT - region, - product_category, - COUNT(*) as sales_count, - SUM(amount) as total_sales -FROM sales -GROUP BY CUBE(region, product_category); -``` - -## Common Performance Issues - -### Issue 1: Large Result Sets -```sql --- ❌ Problem: No limit on large table -SELECT * FROM events ORDER BY timestamp DESC; - --- ✅ Solution: Always limit exploratory queries -SELECT * FROM events ORDER BY timestamp DESC LIMIT 1000; -``` - -### Issue 2: Inefficient Joins -```sql --- ❌ Problem: Cartesian product -SELECT * FROM table1, table2 WHERE condition; - --- ✅ Solution: Explicit join with proper conditions -SELECT * FROM table1 t1 -INNER JOIN table2 t2 ON t1.id = t2.foreign_id -WHERE condition; -``` - -### Issue 3: Unnecessary Complexity -```sql --- ❌ Problem: Nested subqueries -SELECT * FROM ( - SELECT * FROM ( - SELECT col1, col2 FROM table WHERE condition1 - ) WHERE condition2 -) WHERE condition3; - --- ✅ Solution: Combine conditions -SELECT col1, col2 FROM table -WHERE condition1 AND condition2 AND condition3; -``` - -## Monitoring and Metrics - -### Key Performance Indicators -- **Query execution time** -- **Rows scanned vs rows returned** -- **Memory usage** -- **CPU utilization** -- **I/O operations** - -### Performance Monitoring Query -```sql --- Find slow queries from query history -SELECT - query_text, - query_duration_ms, - scan_bytes, - result_bytes, - memory_usage -FROM system.query_log -WHERE query_duration_ms > 10000 -- Queries over 10 seconds -ORDER BY query_duration_ms DESC -LIMIT 10; -``` - -## Optimization Checklist - -### Query Design -- [ ] Use appropriate WHERE conditions -- [ ] Minimize data scanned with column selection -- [ ] Apply filters before joins -- [ ] Use proper join types -- [ ] Limit result sets appropriately - -### Indexing -- [ ] Index frequently filtered columns -- [ ] Index join columns -- [ ] Remove unused indexes -- [ ] Monitor index effectiveness - -### Schema Design -- [ ] Choose appropriate data types -- [ ] Normalize appropriately (avoid over-normalization) -- [ ] Consider partitioning for large tables -- [ ] Use clustering keys for sort optimization - -## Advanced Optimization - -### Aggregating Indexes -```sql --- Pre-compute expensive aggregations using Databend's aggregating indexes -CREATE AGGREGATING INDEX daily_sales_agg AS -SELECT - DATE(order_time) as order_date, - product_id, - COUNT(*) as order_count, - SUM(amount) as total_sales -FROM orders -GROUP BY DATE(order_time), product_id; -``` - -### Query Hints -```sql --- Force specific join algorithm when needed -SELECT /*+ USE_HASH_JOIN */ * -FROM large_table l -JOIN small_table s ON l.id = s.foreign_id; -``` - -## Best Practices Summary - -1. **Measure first** - Use Query Profile to identify bottlenecks -2. **Index strategically** - Cover your query patterns -3. **Filter early** - Apply WHERE conditions as soon as possible -4. **Limit appropriately** - Don't fetch more data than needed -5. **Monitor continuously** - Track query performance over time \ No newline at end of file diff --git a/docs/en/guides/54-query/03-optimization/query-hash.md b/docs/en/guides/54-query/03-optimization/query-hash.md deleted file mode 100644 index 0bc841c5ec..0000000000 --- a/docs/en/guides/54-query/03-optimization/query-hash.md +++ /dev/null @@ -1,112 +0,0 @@ ---- -title: Query Hash ---- - -A Query Hash is an identifier used to represent a unique SQL query. It converts the structure and content of the query into a fixed-length value, so even if the query text has slight differences, the hash will be the same as long as the logical structure is identical. This helps in identifying similar queries and frequently executed queries. - -## Query Hash Types - -Databend supports for two types of query hashes: - -- `query_hash`: The query_hash ensures that repeated queries, even with variations in white space or comments, share the same hash. For example, the following queries share the same hash: - - ```sql - SELECT * FROM t1 WHERE name = 'jim' - SELECT * FROM t1 WHERE name = 'jim' - ``` - -- `query_parameterized_hash`: The query_parameterized_hash normalizes queries by handling literals involved in comparison predicates (e.g., `=`, `!=`, `>=`, `<=`), enabling the identification of structurally similar queries regardless of the specific values used. For example, the following queries share the same hash: - - ```sql - SELECT * FROM t1 WHERE name = 'data' - SELECT * FROM t1 WHERE name = 'bend' - ``` - -## Retrieving Hash Values - -Databend stores the hash values of historical queries in the columns named `query_hash` and `query_parameterized_hash` in system table [system.query_log](/sql/sql-reference/system-tables/system-query-log). To retrieve the hash values of a query, you can pull them from the system table using a SELECT statement. For example: - -```sql -SELECT * FROM books; - -┌───────────────────────────────────────────────────────────────┐ -│ id │ title │ genre │ -├──────────────────┼─────────────────────────┼──────────────────┤ -│ 1 │ To Kill a Mockingbird │ Fiction │ -│ 2 │ A Brief History of Time │ Science │ -└───────────────────────────────────────────────────────────────┘ - -SELECT query_text, query_hash, query_parameterized_hash -FROM system.query_log -WHERE query_text = 'SELECT * FROM books'; - -┌───────────────────────────────────────────────────────────────────────────────────────────┐ -│ query_text │ query_hash │ query_parameterized_hash │ -├─────────────────────┼──────────────────────────────────┼──────────────────────────────────┤ -│ SELECT * FROM books │ 7e612be4897104109449c74d3970c9e7 │ 7e612be4897104109449c74d3970c9e7 │ -│ SELECT * FROM books │ 7e612be4897104109449c74d3970c9e7 │ 7e612be4897104109449c74d3970c9e7 │ -└───────────────────────────────────────────────────────────────────────────────────────────┘ -``` - -## Examples - -Suppose we have a table containing the following rows: - -```sql -SELECT * FROM books; - -┌───────────────────────────────────────────────────────────────┐ -│ id │ title │ genre │ -├──────────────────┼─────────────────────────┼──────────────────┤ -│ 1 │ To Kill a Mockingbird │ Fiction │ -│ 2 │ A Brief History of Time │ Science │ -└───────────────────────────────────────────────────────────────┘ -``` - -The following queries would share the same hash values: - -```sql -SELECT * FROM books WHERE id = 1; -SELECT * FROM books WHERE id = 1; -``` - -To check them out: - -```sql -SELECT query_text, query_hash, query_parameterized_hash -FROM system.query_log -WHERE query_text = 'SELECT * FROM books WHERE id = 1' - OR query_text = 'SELECT * FROM books WHERE id = 1'; - -┌────────────────────────────────────────────────────────────────────────────────────────────────────────┐ -│ query_text │ query_hash │ query_parameterized_hash │ -├──────────────────────────────────┼──────────────────────────────────┼──────────────────────────────────┤ -│ SELECT * FROM books WHERE id = 1 │ ae040c4b3a9388c75e10be76ba407b17 │ b68f516c17d3c15b2c070e4af528464c │ -│ SELECT * FROM books WHERE id = 1 │ ae040c4b3a9388c75e10be76ba407b17 │ b68f516c17d3c15b2c070e4af528464c │ -│ SELECT * FROM books WHERE id = 1 │ ae040c4b3a9388c75e10be76ba407b17 │ b68f516c17d3c15b2c070e4af528464c │ -│ SELECT * FROM books WHERE id = 1 │ ae040c4b3a9388c75e10be76ba407b17 │ b68f516c17d3c15b2c070e4af528464c │ -└────────────────────────────────────────────────────────────────────────────────────────────────────────┘ -``` - -The following queries share the same `query_parameterized_hash` value: - -```sql -SELECT * FROM books WHERE id = 1; -SELECT * FROM books WHERE id = 2; - -SELECT query_text, query_hash, query_parameterized_hash -FROM system.query_log -WHERE query_text = 'SELECT * FROM books WHERE id = 1' - OR query_text = 'SELECT * FROM books WHERE id = 2'; - -┌────────────────────────────────────────────────────────────────────────────────────────────────────────┐ -│ query_text │ query_hash │ query_parameterized_hash │ -├──────────────────────────────────┼──────────────────────────────────┼──────────────────────────────────┤ -│ SELECT * FROM books WHERE id = 1 │ ae040c4b3a9388c75e10be76ba407b17 │ b68f516c17d3c15b2c070e4af528464c │ -│ SELECT * FROM books WHERE id = 1 │ ae040c4b3a9388c75e10be76ba407b17 │ b68f516c17d3c15b2c070e4af528464c │ -│ SELECT * FROM books WHERE id = 1 │ ae040c4b3a9388c75e10be76ba407b17 │ b68f516c17d3c15b2c070e4af528464c │ -│ SELECT * FROM books WHERE id = 1 │ ae040c4b3a9388c75e10be76ba407b17 │ b68f516c17d3c15b2c070e4af528464c │ -│ SELECT * FROM books WHERE id = 2 │ 26f135b4936d6a21922074861e5180a4 │ b68f516c17d3c15b2c070e4af528464c │ -│ SELECT * FROM books WHERE id = 2 │ 26f135b4936d6a21922074861e5180a4 │ b68f516c17d3c15b2c070e4af528464c │ -└────────────────────────────────────────────────────────────────────────────────────────────────────────┘ -``` \ No newline at end of file diff --git a/docs/en/guides/54-query/03-optimization/query-profile.md b/docs/en/guides/54-query/03-optimization/query-profile.md deleted file mode 100644 index 326fda8611..0000000000 --- a/docs/en/guides/54-query/03-optimization/query-profile.md +++ /dev/null @@ -1,64 +0,0 @@ ---- -title: Query Profile ---- - -Query profile refers to the graphical representation or visual breakdown of how a specific SQL statement is executed. It is essentially a graphical version of the [EXPLAIN](/sql/sql-commands/explain-cmds/explain) command, providing insights into the execution plan and performance details of the query. - -## Accessing Query Profiles - -The query profile can be directly accessed in Databend Cloud. To view the query profile of a query, go to **Monitor** > **SQL History**. Select a SQL statement from the history list, then click on the **Query Profile** tab. If you are using a self-hosted Databend, you can use the [EXPLAIN](/sql/sql-commands/explain-cmds/explain) command as an alternative. - -## What Query Profile Includes - -Here is an example of a query profile, comprising a set of three operator nodes in a hierarchical structure. When executing the SQL statement, Databend Cloud processes the nodes in a bottom-to-top sequence. The quantity and types of operator nodes a query profile contains depend on the specifics of your SQL statement. For the common operators and their statistical fields, see [Common Operators & Fields](#common-operators--fields). - -![alt text](/img/cloud/query-profile-1.png) - -*Please note that the bracketed number in each node's title represents the node ID and does *not* indicate the execution step.* - -A query profile comes with a set of information panes that offer more details. The example above includes two information panes: - -| Pane | Description | -| -------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | -| Most Expensive Nodes | Lists the nodes with the longest execution times. | -| Profile Overview | Displays the percentage of time spent on CPU and I/O. Please note that if you select a node, this information pane shows the information specific to the node you select, rather than the whole query. | - -If you click on the `TableScan [4]` node, you will notice that two additional information panes have been added to the right: - -![alt text](/img/cloud/query-profile-2.png) - -| Pane | Description | -| ---------- | ------------------------------------------------------------------------------------------------------------------ | -| Statistics | Includes information such as scan progress, bytes scanned, percentage scanned from cache, partitions scanned, etc. | -| Attributes | Displays details specific to the node. The shown fields vary based on the functionality of the node. | - -## Common Operators & Fields - -Explained plans include a variety of operators, depending on the SQL statement you want Databend to EXPLAIN. The following is a list of common operators and their fields: - -* **TableScan**: Reads data from the table. - - table: The full name of the table. For example, `catalog1.database1.table1`. - - read rows: The number of rows to read. - - read bytes: The number of bytes of data to read. - - partition total: The total number of partitions of the table. - - partition scanned: The number of partitions to read. - - push downs: The filters and limits to be pushed down to the storage layer for processing. -* **Filter**: Filters the read data. - - filters: The predicate expression used to filter the data. Data that returns false for the expression evaluation will be filtered out. -* **EvalScalar**: Evaluates scalar expressions. For example, `a+1` in `SELECT a+1 AS b FROM t`. - - expressions: The scalar expressions to evaluate. -* **AggregatePartial** & **AggregateFinal**: Aggregates by keys and returns the result of the aggregation functions. - - group by: The keys used for aggregation. - - aggregate functions: The functions used for aggregation. -* **Sort**: Sorts data by keys. - - sort keys: The expressions used for sorting. -* **Limit**: Limits the number of rows returned. - - limit: The number of rows to return. - - offset: The number of rows to skip before returning any rows. -* **HashJoin**: Uses the Hash Join algorithm to perform Join operations for two tables. The Hash Join algorithm will select one of the two tables as the build side to build the Hash table. It will then use the other table as the probe side to read the matching data from the Hash table to form the result. - - join type: The JOIN type (INNER, LEFT OUTER, RIGHT OUTER, FULL OUTER, CROSS, SINGLE, or MARK). - - build keys: The expressions used by the build side to build the Hash table. - - probe keys: The expressions used by the probe side to read data from the Hash table. - - filters: The non-equivalence JOIN conditions, such as `t.a > t1.a`. -* **Exchange**: Exchanges data between Databend query nodes for distributed parallel computing. - - exchange type: Data repartition type (Hash, Broadcast, or Merge). \ No newline at end of file diff --git a/docs/en/guides/54-query/04-lakehouse-etl.md b/docs/en/guides/54-query/04-lakehouse-etl.md new file mode 100644 index 0000000000..6a2b912a17 --- /dev/null +++ b/docs/en/guides/54-query/04-lakehouse-etl.md @@ -0,0 +1,186 @@ +--- +title: Lakehouse ETL +--- + +> **Scenario:** EverDrive Smart Vision’s data engineering team ships every road-test batch as Parquet files so the unified workloads can load, query, and enrich the same telemetry inside Databend. + +EverDrive’s ingest loop is straightforward: + +``` +Object-store export (Parquet for example) → Stage → COPY INTO → (optional) Stream & Task +``` + +Adjust bucket paths/credentials (and swap Parquet for your actual format if different), then paste the commands below. All syntax mirrors the official [Load Data guides](/guides/load-data/). + +--- + +## 1. Stage +EverDrive’s data engineering team exports four files per batch—sessions, frame events, detection payloads (with nested JSON fields), and frame embeddings—to an S3 bucket. This guide uses Parquet as the example format, but you can plug in CSV, JSON, or other supported formats by adjusting the `FILE_FORMAT` clause. Create a named connection once, then reuse it across stages. + +```sql +CREATE OR REPLACE CONNECTION everdrive_s3 + STORAGE_TYPE = 's3' + ACCESS_KEY_ID = '' + SECRET_ACCESS_KEY = ''; + +CREATE OR REPLACE STAGE drive_stage + URL = 's3://everdrive-lakehouse/raw/' + CONNECTION = (CONNECTION_NAME = 'everdrive_s3') + FILE_FORMAT = (TYPE = 'PARQUET'); +``` + +See [Create Stage](/sql/sql-commands/ddl/stage/ddl-create-stage) for additional options. + +List the export folders (Parquet in this walkthrough) to confirm they are visible: + +```sql +LIST @drive_stage/sessions/; +LIST @drive_stage/frame-events/; +LIST @drive_stage/payloads/; +LIST @drive_stage/embeddings/; +``` + +--- + +## 2. Preview +Before loading anything, peek inside the Parquet files to validate the schema and sample records. + +```sql +SELECT * +FROM @drive_stage/sessions/session_2024_08_16.parquet +LIMIT 5; + +SELECT * +FROM @drive_stage/frame-events/frame_events_2024_08_16.parquet +LIMIT 5; +``` + +Repeat the preview for payloads and embeddings as needed. Databend automatically uses the file format specified on the stage. + +--- + +## 3. COPY INTO +Load each file into the tables used throughout the guides. Use inline casts to map incoming columns to table columns; the projections below assume Parquet but the same shape applies to other formats. + +### Sessions +```sql +COPY INTO drive_sessions (session_id, vehicle_id, route_name, start_time, end_time, weather, camera_setup) +FROM ( + SELECT session_id::STRING, + vehicle_id::STRING, + route_name::STRING, + start_time::TIMESTAMP, + end_time::TIMESTAMP, + weather::STRING, + camera_setup::STRING + FROM @drive_stage/sessions/ +) +FILE_FORMAT = (TYPE = 'PARQUET'); +``` + +### Frame Events +```sql +COPY INTO frame_events (frame_id, session_id, frame_index, captured_at, event_type, risk_score) +FROM ( + SELECT frame_id::STRING, + session_id::STRING, + frame_index::INT, + captured_at::TIMESTAMP, + event_type::STRING, + risk_score::DOUBLE + FROM @drive_stage/frame-events/ +) +FILE_FORMAT = (TYPE = 'PARQUET'); +``` + +### Detection Payloads +The payload files include nested columns (`payload` column is a JSON object). Use the same projection to copy them into the `frame_payloads` table. + +```sql +COPY INTO frame_payloads (frame_id, run_stage, payload, logged_at) +FROM ( + SELECT frame_id::STRING, + run_stage::STRING, + payload, + logged_at::TIMESTAMP + FROM @drive_stage/payloads/ +) +FILE_FORMAT = (TYPE = 'PARQUET'); +``` + +### Frame Embeddings +```sql +COPY INTO frame_embeddings (frame_id, session_id, embedding, model_version, created_at) +FROM ( + SELECT frame_id::STRING, + session_id::STRING, + embedding::VECTOR(4), -- Replace 4 with your actual embedding dimension + model_version::STRING, + created_at::TIMESTAMP + FROM @drive_stage/embeddings/ +) +FILE_FORMAT = (TYPE = 'PARQUET'); +``` + +All downstream guides (analytics/search/vector/geo) now see this batch. + +--- + +## 4. Stream (Optional) +If you want downstream jobs to react to new rows after each `COPY INTO`, create a stream on the key tables (for example `frame_events`). Stream usage follows the [Continuous Pipeline → Streams](/guides/load-data/continuous-data-pipelines/stream) guide. + +```sql +CREATE OR REPLACE STREAM frame_events_stream ON TABLE frame_events; + +SELECT * FROM frame_events_stream; -- Shows new rows since the last consumption +``` + +After processing the stream, call `CONSUME STREAM frame_events_stream;` (or insert the rows into another table) to advance the offset. + +--- + +## 5. Task (Optional) +Tasks execute **one SQL statement** on a schedule. Create a small task for each table (or call a stored procedure if you prefer a single entry point). + +```sql +CREATE OR REPLACE TASK task_load_sessions + WAREHOUSE = 'default' + SCHEDULE = 5 MINUTE +AS + COPY INTO drive_sessions (session_id, vehicle_id, route_name, start_time, end_time, weather, camera_setup) + FROM ( + SELECT session_id::STRING, + vehicle_id::STRING, + route_name::STRING, + start_time::TIMESTAMP, + end_time::TIMESTAMP, + weather::STRING, + camera_setup::STRING + FROM @drive_stage/sessions/ + ) + FILE_FORMAT = (TYPE = 'PARQUET'); + +ALTER TASK task_load_sessions RESUME; + +CREATE OR REPLACE TASK task_load_frame_events + WAREHOUSE = 'default' + SCHEDULE = 5 MINUTE +AS + COPY INTO frame_events (frame_id, session_id, frame_index, captured_at, event_type, risk_score) + FROM ( + SELECT frame_id::STRING, + session_id::STRING, + frame_index::INT, + captured_at::TIMESTAMP, + event_type::STRING, + risk_score::DOUBLE + FROM @drive_stage/frame-events/ + ) + FILE_FORMAT = (TYPE = 'PARQUET'); + +ALTER TASK task_load_frame_events RESUME; + +-- Repeat for frame_payloads and frame_embeddings +``` + +See [Continuous Pipeline → Tasks](/guides/load-data/continuous-data-pipelines/task) for cron syntax, dependencies, and error handling. diff --git a/docs/en/guides/54-query/_category_.json b/docs/en/guides/54-query/_category_.json index 9ea457a3d1..855b941590 100644 --- a/docs/en/guides/54-query/_category_.json +++ b/docs/en/guides/54-query/_category_.json @@ -1,3 +1,3 @@ { - "label": "Queries" -} \ No newline at end of file + "label": "Unified Workloads" +} diff --git a/docs/en/guides/54-query/index.md b/docs/en/guides/54-query/index.md index d358164e51..b9b28844b2 100644 --- a/docs/en/guides/54-query/index.md +++ b/docs/en/guides/54-query/index.md @@ -1,124 +1,15 @@ --- -title: Query Data in Databend +title: Unified Workloads --- -Databend supports standard SQL with ANSI SQL:2003 analytics extensions. This guide covers essential query techniques from basic to advanced, organized by learning path for optimal understanding. +Databend now serves as a unified engine for SQL analytics, multimodal search, vector similarity, geospatial analysis, and continuous ETL. This mini-series uses the **EverDrive Smart Vision** scenario (session IDs such as `SES-20240801-SEA01`, frame IDs such as `FRAME-0001`) to show how one dataset flows through every workload without copying data between systems. -## Learning Path +| Guide | What it covers | +|-------|----------------| +| [SQL Analytics](./00-sql-analytics.md) | Build shared tables, slice sessions, add window/aggregate speedups | +| [JSON & Search](./01-json-search.md) | Store detection payloads and `QUERY` risky scenes | +| [Vector Search](./02-vector-db.md) | Keep frame embeddings and find semantic neighbors | +| [Geo Analytics](./03-geo-analytics.md) | Map incidents with `HAVERSINE`, polygons, H3 | +| [Lakehouse ETL](./04-lakehouse-etl.md) | Stage files, `COPY INTO` tables, optional stream/task | -**📚 New to SQL?** Start with [Basic Queries](./00-basics/index.md) -**🔗 Joining data?** Go to [Combining Data](./01-combining-data/index.md) -**⚡ Need custom logic?** Check [Advanced Features](./02-advanced/index.md) -**🚀 Performance issues?** Visit [Query Optimization](./03-optimization/index.md) - ---- - -## 📚 [Basic Queries](./00-basics/index.md) - -Master fundamental SQL operations for data selection and aggregation. - -### [Filtering & Selection](./00-basics/filtering-selection.md) -```sql --- Select and filter data -SELECT name, salary FROM employees -WHERE department = 'Engineering' -ORDER BY salary DESC; -``` - -### [Aggregating Data](./00-basics/aggregating-data.md) -```sql --- Group and summarize data -SELECT department, - COUNT(*) as emp_count, - AVG(salary) as avg_salary -FROM employees -GROUP BY department; -``` - -### [Advanced Grouping](./00-basics/groupby/index.md) -Multi-dimensional analysis with CUBE, ROLLUP, and GROUPING SETS - ---- - -## 🔗 [Combining Data](./01-combining-data/index.md) - -Connect data from multiple sources using JOINs and CTEs. - -### [JOINs](./01-combining-data/joins.md) -```sql --- Combine related tables -SELECT e.name, d.department_name -FROM employees e -JOIN departments d ON e.department_id = d.id; -``` - -### [Common Table Expressions (CTE)](./01-combining-data/cte.md) -```sql --- Structure complex queries -WITH high_earners AS ( - SELECT * FROM employees WHERE salary > 75000 -) -SELECT department, COUNT(*) as count -FROM high_earners GROUP BY department; -``` - ---- - -## ⚡ [Advanced Features](./02-advanced/index.md) - -Extend capabilities with custom functions and external integrations. - -### [User-Defined Functions](./02-advanced/udf.md) -```sql --- Create reusable functions -CREATE FUNCTION calculate_bonus(salary FLOAT, rating FLOAT) -RETURNS FLOAT AS $$ salary * rating * 0.1 $$; -``` - -### More Advanced Features -- [External Functions](./02-advanced/external-function.md) - Cloud ML integration -- [Stored Procedures](./02-advanced/stored-procedure.md) - Multi-step operations -- [Sequences](./02-advanced/sequences.md) - Unique ID generation - ---- - -## 🚀 [Query Optimization](./03-optimization/index.md) - -Analyze and improve query performance with profiling tools. - -### [Query Profile](./03-optimization/query-profile.md) -Visual execution plan analysis (Databend Cloud: Monitor → SQL History) - -### [Performance Analysis](./03-optimization/query-hash.md) -```sql --- Analyze query execution -EXPLAIN SELECT * FROM orders o -JOIN customers c ON o.customer_id = c.id -WHERE o.order_date >= '2023-01-01'; -``` - ---- - -## Quick Reference - -### Most Common Patterns -```sql --- Top N query -SELECT * FROM employees ORDER BY salary DESC LIMIT 10; - --- Filter and aggregate -SELECT department, AVG(salary) -FROM employees -WHERE hire_date >= '2023-01-01' -GROUP BY department -HAVING AVG(salary) > 70000; - --- Join with CTE -WITH recent_orders AS ( - SELECT * FROM orders WHERE order_date >= '2023-01-01' -) -SELECT c.name, COUNT(*) as order_count -FROM customers c -JOIN recent_orders o ON c.id = o.customer_id -GROUP BY c.name; -``` +Work through them in sequence to see how Databend’s single optimizer powers analytics, search, vector, geo, and loading pipelines on the same fleet data. diff --git a/docs/en/guides/55-performance/03-fulltext-index.md b/docs/en/guides/55-performance/03-fulltext-index.md index 5d562c6a26..66b17e19aa 100644 --- a/docs/en/guides/55-performance/03-fulltext-index.md +++ b/docs/en/guides/55-performance/03-fulltext-index.md @@ -2,6 +2,10 @@ title: Full-Text Index --- +:::info +Looking for a hands-on walkthrough? See [JSON & Search Guide](/guides/query/json-search). +::: + # Full-Text Index: Automatic Lightning-Fast Text Search import EEFeature from '@site/src/components/EEFeature'; diff --git a/site-redirects.ts b/site-redirects.ts index 54c7a5256f..10bef71022 100644 --- a/site-redirects.ts +++ b/site-redirects.ts @@ -1116,48 +1116,48 @@ const siteRedirects = [ // Query guide restructuring - moved from flat to hierarchical structure { from: '/guides/query/filtering-selection', - to: '/guides/query/basics/filtering-selection' + to: '/guides/query/sql-analytics' }, { from: '/guides/query/aggregating-data', - to: '/guides/query/basics/aggregating-data' + to: '/guides/query/sql-analytics' }, { from: '/guides/query/joins', - to: '/guides/query/combining-data/joins' + to: '/guides/query/sql-analytics' }, { from: '/guides/query/cte', - to: '/guides/query/combining-data/cte' + to: '/guides/query/sql-analytics' }, { from: '/guides/query/udf', - to: '/guides/query/advanced/udf' + to: '/guides/query/' }, { from: '/guides/query/external-function', - to: '/guides/query/advanced/external-function' + to: '/guides/query/' }, { from: '/guides/query/stored-procedure', - to: '/guides/query/advanced/stored-procedure' + to: '/guides/query/sql-analytics' }, { from: '/guides/query/sequences', - to: '/guides/query/advanced/sequences' + to: '/guides/query/sql-analytics' }, { from: '/guides/query/query-profile', - to: '/guides/query/optimization/query-profile' + to: '/guides/performance/' }, { from: '/guides/query/query-hash', - to: '/guides/query/optimization/query-hash' + to: '/guides/performance/' }, - // Dictionary page removed - redirect to advanced features index + // Dictionary page removed - redirect to unified query index { from: '/guides/query/dictionary', - to: '/guides/query/advanced/' + to: '/guides/query/' }, // ANY function redirect to ANY_VALUE {