From 82acffdd17880a5cd1108e766ba6ce59e27f7665 Mon Sep 17 00:00:00 2001 From: Leto_b Date: Wed, 3 Dec 2025 09:49:21 +0800 Subject: [PATCH 1/4] add row pattern recognition to tablemodel --- .../sidebar_timecho/V2.0.x/en-Table.ts | 1 + .../sidebar_timecho/V2.0.x/zh-Table.ts | 1 + .../SQL-Manual/Row-Pattern-Recognition.md | 999 ++++++++++++++++++ .../SQL-Manual/Row-Pattern-Recognition.md | 999 ++++++++++++++++++ .../SQL-Manual/Row-Pattern-Recognition.md | 907 ++++++++++++++++ .../SQL-Manual/Row-Pattern-Recognition.md | 907 ++++++++++++++++ 6 files changed, 3814 insertions(+) create mode 100644 src/UserGuide/Master/Table/SQL-Manual/Row-Pattern-Recognition.md create mode 100644 src/UserGuide/latest-Table/SQL-Manual/Row-Pattern-Recognition.md create mode 100644 src/zh/UserGuide/Master/Table/SQL-Manual/Row-Pattern-Recognition.md create mode 100644 src/zh/UserGuide/latest-Table/SQL-Manual/Row-Pattern-Recognition.md diff --git a/src/.vuepress/sidebar_timecho/V2.0.x/en-Table.ts b/src/.vuepress/sidebar_timecho/V2.0.x/en-Table.ts index 8dccc2b19..2d123daea 100644 --- a/src/.vuepress/sidebar_timecho/V2.0.x/en-Table.ts +++ b/src/.vuepress/sidebar_timecho/V2.0.x/en-Table.ts @@ -217,6 +217,7 @@ export const enSidebar = { { text: 'ORDER BY Clause', link: 'OrderBy-Clause' }, { text: 'LIMIT&OFFSET Clause', link: 'Limit-Offset-Clause' }, { text: 'Nested Queries', link: 'Nested-Queries' }, + { text: 'Row Pattern Recognition', link: 'Row-Pattern-Recognition' }, ], }, { text: 'Maintenance Statements', link: 'SQL-Maintenance-Statements' }, diff --git a/src/.vuepress/sidebar_timecho/V2.0.x/zh-Table.ts b/src/.vuepress/sidebar_timecho/V2.0.x/zh-Table.ts index 5b22eb6d2..0172defe8 100644 --- a/src/.vuepress/sidebar_timecho/V2.0.x/zh-Table.ts +++ b/src/.vuepress/sidebar_timecho/V2.0.x/zh-Table.ts @@ -206,6 +206,7 @@ export const zhSidebar = { { text: 'ORDER BY子句', link: 'OrderBy-Clause' }, { text: 'LIMIT&OFFSET子句', link: 'Limit-Offset-Clause' }, { text: '嵌套查询', link: 'Nested-Queries' }, + { text: '行模式识别', link: 'Row-Pattern-Recognition' }, ], }, { text: '运维语句', link: 'SQL-Maintenance-Statements' }, diff --git a/src/UserGuide/Master/Table/SQL-Manual/Row-Pattern-Recognition.md b/src/UserGuide/Master/Table/SQL-Manual/Row-Pattern-Recognition.md new file mode 100644 index 000000000..c029bcaef --- /dev/null +++ b/src/UserGuide/Master/Table/SQL-Manual/Row-Pattern-Recognition.md @@ -0,0 +1,999 @@ + + +# Row Pattern Recognition + +## 1. Overview + +IoTDB supports Row Pattern Recognition. This feature enables capturing a segment of continuous data by defining the recognition logic of pattern variables and regular expressions, and performing analysis and calculation on each captured data segment. It is suitable for business scenarios such as identifying specific patterns in time-series data and detecting specific events. If we regard Row Pattern Recognition as grouping processing of data, the core process is roughly as follows: + +* Perform group capture through the PATTERN, DEFINE, and SUBSET clauses +* Conduct computational processing on the captured groups through the MEASURES clause +* Set the output format of groups through the ROWS PER MATCH clause +* Specify how to locate the start position of the next group through the AFTER MATCH SKIP clause + +> Note: This feature is available starting from version V2.0.5. + +## 2. Function Introduction +### 2.1 Syntax Format + +```SQL +MATCH_RECOGNIZE ( + [ PARTITION BY column [, ...] ] + [ ORDER BY column [, ...] ] + [ MEASURES measure_definition [, ...] ] + [ ROWS PER MATCH ] + [ AFTER MATCH skip_to ] + PATTERN ( row_pattern ) + [ SUBSET subset_definition [, ...] ] + DEFINE variable_definition [, ...] +) +``` + +**Note:** + +* PARTITION BY: Optional. Used to group the input table, and each group can perform pattern matching independently. If this clause is not specified, the entire input table will be processed as a single unit. +* ORDER BY: Optional. Used to ensure that input data is processed in a specific order during matching. +* MEASURES: Optional. Used to specify which information to extract from the matched segment of data. +* ROWS PER MATCH: Optional. Used to specify the output method of the result set after successful pattern matching. +* AFTER MATCH SKIP: Optional. Used to specify which row to resume from for the next pattern match after identifying a non-empty match. +* PATTERN: Used to define the row pattern to be matched. +* SUBSET: Optional. Used to merge rows matched by multiple basic pattern variables into a single logical set. +* DEFINE: Used to define the basic pattern variables for the row pattern. + +### 2.2 DEFINE Clause + +Used to specify the judgment condition for each basic pattern variable in pattern recognition. These variables are usually represented by identifiers (e.g., `A`, `B`), and the Boolean expressions in this clause precisely define which rows meet the requirements of the variable. + +* During pattern matching execution, a row is only marked as the variable (and thus included in the current matching group) if the Boolean expression returns TRUE. + +```SQL +-- A row can only be identified as B if its totalprice value is less than the totalprice value of the previous row. +DEFINE B AS totalprice < PREV(totalprice) +``` + +* Variables not **explicitly** defined in this clause have an implicitly set condition of always true (TRUE), meaning they can be successfully matched on any input row. + +### 2.3 SUBSET Clause + +Used to merge rows matched by multiple basic pattern variables (e.g., `A`, `B`) into a combined pattern variable (e.g., `U`), allowing these rows to be treated as a single logical set for operations. It can be used in the `MEASURES`, `DEFINE`, and `AFTER MATCH SKIP` clauses. + +```SQL +SUBSET U = (A, B) +``` +For example, for the pattern `PATTERN ((A | B){5} C+)`, it is impossible to determine whether the 5th repetition matches the basic pattern variable A or B during matching. Therefore: + +1. In the `MEASURES` clause, if you need to reference the last row matched in this phase, you can do so by defining the combined pattern variable `SUBSET U = (A, B)`. At this point, the expression `RPR_LAST(U.totalprice)` will directly return the `totalprice` value of the target row. +2. In the `AFTER MATCH SKIP` clause, if the matching result does not include the basic pattern variable A or B, executing `AFTER MATCH SKIP TO LAST B` or `AFTER MATCH SKIP TO LAST A` will fail to jump due to missing anchors. However, by introducing the combined pattern variable `SUBSET U = (A, B)`, using `AFTER MATCH SKIP TO LAST U` is always valid. + +### 2.4 PATTERN Clause + +Used to define the row pattern to be matched, whose basic building block is a row pattern variable. + +```SQL +PATTERN ( row_pattern ) +``` + +#### 2.4.1 Pattern Types + +| Row Pattern | Syntax Format | Description | +|-----------------------|---------------------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| Pattern Concatenation | `A B+ C+ D+` | Composed of subpatterns without any operators, matching all subpatterns in the declared order sequentially. | +| Pattern Alternation | `A \| B \| C` | Composed of multiple subpatterns separated by `\|`, matching only one of them. If multiple subpatterns can be matched, the leftmost one is selected. | +| Pattern Permutation | `PERMUTE(A, B, C)` | Equivalent to performing alternation matching on all different orders of the subpattern elements. It requires that A, B, and C must all be matched, but their order of appearance is not fixed. If multiple matching orders are possible, the priority is determined by the **lexicographical order** based on the definition sequence of elements in the PERMUTE list. For example, A B C has the highest priority, while C B A has the lowest. | +| Pattern Grouping | `(A B C)` | Encloses subpatterns in parentheses to treat them as a single unit, which can be used with other operators. For example, `(A B C)+` indicates a pattern where a group of `(A B C)` appears consecutively. | +| Empty Pattern | `()` | Represents an empty match that does not contain any rows. | +| Pattern Exclusion | `{- row_pattern -}` | Used to specify the matched part to be excluded from the output. Usually used with the `ALL ROWS PER MATCH` option to output rows of interest. For example, `PATTERN (A {- B+ C+ -} D+)` with ALL ROWS PER MATCH will only output the first row `(corresponding to A)` and the trailing rows `(corresponding to D+)` of the match. | + +#### 2.4.2 Partition Start/End Anchor + +* `^A` indicates matching a pattern that starts with A as the partition beginning + * When the value of the PATTERN clause is `^A`, the match must start from the first row of the partition, and this row must satisfy the definition of `A`. + * When the value of the PATTERN clause is `^A^` or `A^`, the output result is empty. +* `A$` indicates matching a pattern that ends with A as the partition end + * When the value of the PATTERN clause is `A$`, the match must end at the end of the partition, and this row must satisfy the definition of `A`. + * When the value of the PATTERN clause is `$A` or `$A$`, the output result is empty. + +For example illustrations, see[Section 3.3.1](./Row-Pattern-Recognition.md#_3-1-Patter Clause Partition Anchor) + +#### 2.4.3 Quantifiers + +Quantifiers are used to specify the number of times a subpattern repeats, placed after the corresponding subpattern (e.g., `(A | B)*`). + +Common quantifiers are as follows: + +| Quantifier | Description | +| -------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| `*` | Zero or more repetitions | +| `+` | One or more repetitions | +| `?` | Zero or one repetition | +| `{n}` | Exactly n repetitions | +| `{m, n}` | Repetitions between m and n times (m and n are non-negative integers). \* If the left bound is omitted, the default starts from 0; \* If the right bound is omitted, there is no upper limit on the number of repetitions (e.g., {5,} is equivalent to "at least five times"); \* If both left and right bounds are omitted (i.e., {,}), it is equivalent to `*`. | + +* The matching preference can be changed by adding `?` after the quantifier. + * `{3,5}`: Prefers 5 times, least prefers 3 times; `{3,5}?`: Prefers 3 times, least prefers 5 times. + * `?`: Prefers 1 time; `??`: Prefers 0 times. + +### 2.5 AFTER MATCH SKIP Clause + +Used to specify which row to start the next pattern match from after identifying a non-empty match. + +| Jump Strategy | Description | Allows Overlapping Matches? | +| ------------------------------------------------------------- | -------------------------------------------------------------------------------- | ----------------------------- | +| `AFTER MATCH SKIP PAST LAST ROW` | Default behavior. Starts from the row after the last row of the current match. | No | +| `AFTER MATCH SKIP TO NEXT ROW` | Starts from the second row in the current match. | Yes | +| `AFTER MATCH SKIP TO [ FIRST \| LAST ] pattern_variable` | Jumps to start from the [ first row | last row ] of a pattern variable. | Yes | + +* Among all possible configurations, only when `ALL ROWS PER MATCH WITH UNMATCHED ROWS` is used in combination with `AFTER MATCH SKIP PAST LAST ROW` can the system ensure that exactly one output record is generated for each input row. + +For example illustrations, see [Section 3.3.2](./Row-Pattern-Recognition.md#_3-2-AFTER-MATCH-SKIP-Clause) + +### 2.6 ROWS PER MATCH Clause + +Used to specify the output method of the result set after a successful pattern match, including the following two main options: + +| Output Method | Rule Description | Output Result | Handling Logic for **Empty Matches/Unmatched Rows** | +| -------------------- | -------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------ | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| ONE ROW PER MATCH | Generates one output row for each successful match. | \* Columns in the PARTITION BY clause\* Expressions defined in the MEASURES clause. | Outputs empty matches; skips unmatched rows. | +| ALL ROWS PER MATCH | Each row in a match generates an output record, unless the row is excluded via exclusion syntax. | \* Columns in the PARTITION BY clause\* Columns in the ORDER BY clause\* Expressions defined in the MEASURES clause\* Remaining columns in the input table | \* Default: Outputs empty matches; skips unmatched rows.\* ALL ROWS PER MATCH​**SHOW EMPTY MATCHES**​: Outputs empty matches by default; skips unmatched rows.\* ALL ROWS PER MATCH​**OMIT EMPTY MATCHES**​: Does not output empty matches; skips unmatched rows.\* ALL ROWS PER MATCH​**WITH UNMATCHED ROWS**​: Outputs empty matches and generates an additional output record for each unmatched row. | + +### 2.7 MEASURES Clause + +Used to specify which information to extract from a matched set of data. This clause is optional; if not explicitly specified, some input columns will become the output results of pattern recognition based on the settings of the ROWS PER MATCH clause. + +SQL + +```SQL +MEASURES measure_expression AS measure_name [, ...] +``` + +* A `measure_expression` is a scalar value calculated from the matched set of data. + +| Usage Example | Description | +| ---------------------------------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| `A.totalprice AS starting_price` | Returns the price from the first row in the matched group (i.e., the only row associated with variable A) as the starting price. | +| `RPR_LAST(B.totalprice) AS bottom_price` | Returns the price from the last row associated with variable B, representing the lowest price in the "V" shape pattern (corresponding to the end of the downward segment). | +| `RPR_LAST(U.totalprice) AS top_price` | Returns the highest price in the matched group, corresponding to the last row associated with variable C or D (i.e., the end of the entire matched group). [Assuming SUBSET U = (C, D)] | + +* Each `measure_expression` defines an output column, which can be referenced by its specified `measure_name`. + +### 2.8 Row Pattern Recognition Expressions + +Expressions used in the MEASURES and DEFINE clauses are ​**scalar expressions**​, evaluated in the row-level context of the input table. In addition to supporting standard SQL syntax, **scalar expressions** also support special extended functions for row pattern recognition. + +#### 2.8.1 Pattern Variable References + +SQL + +```SQL +A.totalprice +U.orderdate +orderstatus +``` + +* When a column name is prefixed with a **basic pattern variable** or a ​**combined pattern variable**​, it refers to the corresponding column values of all rows matched by that variable. +* If a column name has no prefix, it is equivalent to using the "​**global combined pattern variable**​" (i.e., the union of all basic pattern variables) as the prefix, referring to the column values of all rows in the current match. + +> Using table names as column name prefixes in pattern recognition expressions is not allowed. + +#### 2.8.2 Extended Functions + +| Function Name | Function Syntax | Description | +| ------------------------------- | ----------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | +| `MATCH_NUMBER` Function | `MATCH_NUMBER()` | Returns the sequence number of the current match within the partition, starting from 1. Empty matches occupy match sequence numbers just like non-empty matches. | +| `CLASSIFIER` Function | `CLASSIFIER(option)` | 1. Returns the name of the basic pattern variable mapped by the current row. 2. `option` is an optional parameter: a basic pattern variable `CLASSIFIER(A)` or a combined pattern variable `CLASSIFIER(U)` can be passed in to limit the function's scope; for rows outside the scope, NULL is returned directly. When used with a combined pattern variable, it can be used to distinguish which basic pattern variable in the union the row is mapped to. | +| Logical Navigation Functions | `RPR_FIRST(expr, k)` | 1. Indicates locating the first row satisfying `expr` in the ​**current match group**​, then searching for the k-th occurrence of the row corresponding to the same pattern variable towards the end of the group, and returning the specified column value of that row. If the k-th matching row is not found in the specified direction, the function returns NULL. 2. `k` is an optional parameter, defaulting to 0 (only locating the first row satisfying the condition); if explicitly specified, it must be a non-negative integer. | +| Logical Navigation Functions | `RPR_LAST(expr, k)` | 1. Indicates locating the last row satisfying `expr` in the ​**current match group**​, then searching for the k-th occurrence of the row corresponding to the same pattern variable towards the start of the group, and returning the specified column value of that row. If the k-th matching row is not found in the specified direction, the function returns NULL. 2. `k` is an optional parameter, defaulting to 0 (only locating the last row satisfying the condition); if explicitly specified, it must be a non-negative integer. | +| Physical Navigation Functions | `PREV(expr, k)` | 1. Indicates offsetting k rows towards the start from the last row matched to the given pattern variable, and returning the corresponding column value. If navigation exceeds the ​**partition boundary**​, the function returns NULL. 2. `k` is an optional parameter, defaulting to 1; if explicitly specified, it must be a non-negative integer. | +| Physical Navigation Functions | `NEXT(expr, k)` | 1. Indicates offsetting k rows towards the end from the last row matched to the given pattern variable, and returning the corresponding column value. If navigation exceeds the ​**partition boundary**​, the function returns NULL. 2. `k` is an optional parameter, defaulting to 1; if explicitly specified, it must be a non-negative integer. | +| Aggregate Functions | COUNT, SUM, AVG, MAX, MIN Functions | Can be used to calculate data in the current match. Aggregate functions and navigation functions are not allowed to be nested within each other. (Supported from version V2.0.6) | +| Nested Functions | `PREV/NEXT(CLASSIFIER())` | Nesting of physical navigation functions and the CLASSIFIER function. Used to obtain the pattern variables corresponding to the previous and next matching rows of the current row. | +| Nested Functions | `PREV/NEXT(RPR_FIRST/RPR_LAST(expr, k)`) | **Logical functions are allowed to be nested** inside physical functions; **physical functions are not allowed to be nested** inside logical functions. Used to perform logical offset first, then physical offset. | + +For example illustrations, see [Section 3.3.3](./Row-Pattern-Recognition.md#_3-3-Row-Pattern-Expressions-Extended-Functions) + +#### 2.8.3 RUNNING and FINAL Semantics + +1. Definition + +* `RUNNING`: Indicates the calculation scope is from the start row of the current match group to the row currently being processed (i.e., up to the current row). +* `FINAL`: Indicates the calculation scope is from the start row of the current match group to the final row of the group (i.e., the entire match group). + +2. Scope of Application + +* The DEFINE clause uses RUNNING semantics by default. +* The MEASURES clause uses RUNNING semantics by default and supports specifying FINAL semantics. When using the ONE ROW PER MATCH output mode, all expressions are calculated from the last row position of the match group, and at this time, RUNNING semantics are equivalent to FINAL semantics. + +3. Syntax Constraints + +* RUNNING and FINAL need to be written before **logical navigation functions** or aggregate functions, and cannot directly act on **column references.** + * Valid: `RUNNING RPP_LAST(A.totalprice)`, `FINAL RPP_LAST(A.totalprice)` + * Invalid: `RUNNING A.totalprice`, `FINAL A.totalprice`, `RUNNING PREV(A.totalprice)` + +## 3. Syntax Examples + +Original Data + +SQL + +```SQL +IoTDB:database3> select * from t ++-----------------------------+------+----------+ +| time|device|totalprice| ++-----------------------------+------+----------+ +|2025-01-01T00:01:00.000+08:00| d1| 90| +|2025-01-01T00:02:00.000+08:00| d1| 80| +|2025-01-01T00:03:00.000+08:00| d1| 70| +|2025-01-01T00:04:00.000+08:00| d1| 80| +|2025-01-01T00:05:00.000+08:00| d1| 70| +|2025-01-01T00:06:00.000+08:00| d1| 80| ++-----------------------------+------+----------+ + +-- Create Statement +create table t(device tag, totalprice int32 field) + +insert into t(time,device,totalprice) values(2025-01-01T00:01:00, 'd1', 90),(2025-01-01T00:02:00, 'd1', 80),(2025-01-01T00:03:00, 'd1', 70),(2025-01-01T00:04:00, 'd1', 80),(2025-01-01T00:05:00, 'd1', 70),(2025-01-01T00:06:00, 'd1', 80) +``` + +### 3.1 PATTERN Clause Partition Anchor + +* Query SQL + +SQL + +```SQL +SELECT m.time, m.match, m.price, m.label +FROM t +MATCH_RECOGNIZE ( + ORDER BY time + MEASURES + MATCH_NUMBER() AS match, + RUNNING RPR_LAST(totalprice) AS price, + CLASSIFIER() AS label + ALL ROWS PER MATCH + AFTER MATCH SKIP PAST LAST ROW + PATTERN %s -- PATTERN Clause + DEFINE A AS true +) AS m; +``` + +* Query Results + + * When the PATTERN clause is PATTERN (^A) + + SQL + + ```SQL + +-----------------------------+-----+-----+-----+ + | time|match|price|label| + +-----------------------------+-----+-----+-----+ + |2025-01-01T00:01:00.000+08:00| 1| 90| A| + +-----------------------------+-----+-----+-----+ + Total line number = 1 + ``` + + * When the PATTERN clause is PATTERN (^A^) + + SQL + + ```SQL + +----+-----+-----+-----+ + |time|match|price|label| + +----+-----+-----+-----+ + +----+-----+-----+-----+ + Empty set. + ``` + + * When the PATTERN clause is PATTERN (A\$) + + SQL + + ```SQL + +-----------------------------+-----+-----+-----+ + | time|match|price|label| + +-----------------------------+-----+-----+-----+ + |2025-01-01T00:06:00.000+08:00| 1| 80| A| + +-----------------------------+-----+-----+-----+ + Total line number = 1 + ``` + + * When the PATTERN clause is PATTERN (\$A\$) + + SQL + + ```SQL + +----+-----+-----+-----+ + |time|match|price|label| + +----+-----+-----+-----+ + +----+-----+-----+-----+ + Empty set. + ``` + +### 3.2 AFTER MATCH SKIP Clause + +* Query SQL + +SQL + +```SQL +SELECT m.time, m.match, m.price, m.label +FROM t +MATCH_RECOGNIZE ( + ORDER BY time + MEASURES + MATCH_NUMBER() AS match, + RUNNING RPR_LAST(totalprice) AS price, + CLASSIFIER() AS label + ALL ROWS PER MATCH + %s -- AFTER MATCH SKIP Clause + PATTERN (A B+ C+ D?) + SUBSET U = (C, D) + DEFINE + B AS B.totalprice < PREV (B.totalprice), + C AS C.totalprice > PREV (C.totalprice), + D AS false -- Never matches successfully +) AS m; +``` + +* Query Results + + * When AFTER MATCH SKIP PAST LAST ROW + * First match: Rows 1, 2, 3, 4 + * Second match: According to the semantics of `AFTER MATCH SKIP PAST LAST ROW`, starting from row 5, no valid match can be found + * This pattern will never have overlapping matches + + SQL + + ```SQL + +-----------------------------+-----+-----+-----+ + | time|match|price|label| + +-----------------------------+-----+-----+-----+ + |2025-01-01T00:01:00.000+08:00| 1| 90| A| + |2025-01-01T00:02:00.000+08:00| 1| 80| B| + |2025-01-01T00:03:00.000+08:00| 1| 70| B| + |2025-01-01T00:04:00.000+08:00| 1| 80| C| + +-----------------------------+-----+-----+-----+ + Total line number = 4 + ``` + + * When AFTER MATCH SKIP TO NEXT ROW + * First match: Rows 1, 2, 3, 4 + * Second match: According to the semantics of `AFTER MATCH SKIP TO NEXT ROW`, starting from row 2, matches: Rows 2, 3, 4 + * Third match: Attempts to start from row 3, fails + * Fourth match: Attempts to start from row 4, succeeds, matches rows 4, 5, 6 + * This pattern allows overlapping matches + + SQL + + ```SQL + +-----------------------------+-----+-----+-----+ + | time|match|price|label| + +-----------------------------+-----+-----+-----+ + |2025-01-01T00:01:00.000+08:00| 1| 90| A| + |2025-01-01T00:02:00.000+08:00| 1| 80| B| + |2025-01-01T00:03:00.000+08:00| 1| 70| B| + |2025-01-01T00:04:00.000+08:00| 1| 80| C| + |2025-01-01T00:02:00.000+08:00| 2| 80| A| + |2025-01-01T00:03:00.000+08:00| 2| 70| B| + |2025-01-01T00:04:00.000+08:00| 2| 80| C| + |2025-01-01T00:04:00.000+08:00| 3| 80| A| + |2025-01-01T00:05:00.000+08:00| 3| 70| B| + |2025-01-01T00:06:00.000+08:00| 3| 80| C| + +-----------------------------+-----+-----+-----+ + Total line number = 10 + ``` + + * When AFTER MATCH SKIP TO FIRST C + * First match: Rows 1, 2, 3, 4 + * Second match: Starts from the first C (i.e., row 4), matches rows 4, 5, 6 + * This pattern allows overlapping matches + + SQL + + ```SQL + +-----------------------------+-----+-----+-----+ + | time|match|price|label| + +-----------------------------+-----+-----+-----+ + |2025-01-01T00:01:00.000+08:00| 1| 90| A| + |2025-01-01T00:02:00.000+08:00| 1| 80| B| + |2025-01-01T00:03:00.000+08:00| 1| 70| B| + |2025-01-01T00:04:00.000+08:00| 1| 80| C| + |2025-01-01T00:04:00.000+08:00| 2| 80| A| + |2025-01-01T00:05:00.000+08:00| 2| 70| B| + |2025-01-01T00:06:00.000+08:00| 2| 80| C| + +-----------------------------+-----+-----+-----+ + Total line number = 7 + ``` + + * When AFTER MATCH SKIP TO LAST B or AFTER MATCH SKIP TO B + * First match: Rows 1, 2, 3, 4 + * Second match: Attempts to start from the last B (i.e., row 3), fails + * Third match: Attempts to start from row 4, successfully matches rows 4, 5, 6 + * This pattern allows overlapping matches + + SQL + + ```SQL + +-----------------------------+-----+-----+-----+ + | time|match|price|label| + +-----------------------------+-----+-----+-----+ + |2025-01-01T00:01:00.000+08:00| 1| 90| A| + |2025-01-01T00:02:00.000+08:00| 1| 80| B| + |2025-01-01T00:03:00.000+08:00| 1| 70| B| + |2025-01-01T00:04:00.000+08:00| 1| 80| C| + |2025-01-01T00:04:00.000+08:00| 2| 80| A| + |2025-01-01T00:05:00.000+08:00| 2| 70| B| + |2025-01-01T00:06:00.000+08:00| 2| 80| C| + +-----------------------------+-----+-----+-----+ + Total line number = 7 + ``` + + * When AFTER MATCH SKIP TO U + * First match: Rows 1, 2, 3, 4 + * Second match: `SKIP TO U` means jumping to the last C or D; D can never match successfully, so it jumps to the last C (i.e., row 4), successfully matching rows 4, 5, 6 + * This pattern allows overlapping matches + + SQL + + ```SQL + +-----------------------------+-----+-----+-----+ + | time|match|price|label| + +-----------------------------+-----+-----+-----+ + |2025-01-01T00:01:00.000+08:00| 1| 90| A| + |2025-01-01T00:02:00.000+08:00| 1| 80| B| + |2025-01-01T00:03:00.000+08:00| 1| 70| B| + |2025-01-01T00:04:00.000+08:00| 1| 80| C| + |2025-01-01T00:04:00.000+08:00| 2| 80| A| + |2025-01-01T00:05:00.000+08:00| 2| 70| B| + |2025-01-01T00:06:00.000+08:00| 2| 80| C| + +-----------------------------+-----+-----+-----+ + Total line number = 7 + ``` + + * When AFTER MATCH SKIP TO A, you cannot jump to the first row of the match, otherwise it will cause an infinite loop + + SQL + + ```SQL + Msg: org.apache.iotdb.jdbc.IoTDBSQLException: 701: AFTER MATCH SKIP TO failed: cannot skip to first row of match + ``` + + * When AFTER MATCH SKIP TO B, you cannot jump to a pattern variable that does not exist in the match group + + SQL + + ```SQL + Msg: org.apache.iotdb.jdbc.IoTDBSQLException: 701: AFTER MATCH SKIP TO failed: pattern variable is not present in match + ``` + +### 3.3 Row Pattern Expressions - Extended Functions + +#### 3.3.1 CLASSIFIER() Function + +* Query SQL + +SQL + +```SQL +SELECT m.time, m.match, m.price, m.lower_or_higher, m.label +FROM t +MATCH_RECOGNIZE ( + ORDER BY time + MEASURES + MATCH_NUMBER() AS match, + RUNNING RPR_LAST(totalprice) AS price, + CLASSIFIER(U) AS lower_or_higher, + CLASSIFIER(W) AS label + ALL ROWS PER MATCH + PATTERN ((L | H) A) + SUBSET + U = (L, H), + W = (A, L, H) + DEFINE + A AS A.totalprice = 80, + L AS L.totalprice < 80, + H AS H.totalprice > 80 +) AS m; +``` + +* Query Results + +SQL + +```SQL ++-----------------------------+-----+-----+---------------+-----+ +| time|match|price|lower_or_higher|label| ++-----------------------------+-----+-----+---------------+-----+ +|2025-01-01T00:01:00.000+08:00| 1| 90| H| H| +|2025-01-01T00:02:00.000+08:00| 1| 80| H| A| +|2025-01-01T00:03:00.000+08:00| 2| 70| L| L| +|2025-01-01T00:04:00.000+08:00| 2| 80| L| A| +|2025-01-01T00:05:00.000+08:00| 3| 70| L| L| +|2025-01-01T00:06:00.000+08:00| 3| 80| L| A| ++-----------------------------+-----+-----+---------------+-----+ +Total line number = 6 +``` + +#### 3.3.2 Logical Navigation Functions + +* Query SQL + +SQL + +```SQL +SELECT m.time, m.measure +FROM t +MATCH_RECOGNIZE ( + ORDER BY time + MEASURES + %s AS measure -- MEASURES Clause + ALL ROWS PER MATCH + PATTERN (A+) + DEFINE A AS true +) AS m; +``` + +* Query Results + + * When the value is totalprice, RPR\_LAST(totalprice), RUNNING RPR\_LAST(totalprice) + + SQL + + ```SQL + +-----------------------------+-------+ + | time|measure| + +-----------------------------+-------+ + |2025-01-01T00:01:00.000+08:00| 90| + |2025-01-01T00:02:00.000+08:00| 80| + |2025-01-01T00:03:00.000+08:00| 70| + |2025-01-01T00:04:00.000+08:00| 80| + |2025-01-01T00:05:00.000+08:00| 70| + |2025-01-01T00:06:00.000+08:00| 80| + +-----------------------------+-------+ + Total line number = 6 + ``` + + * When the value is FINAL RPR\_LAST(totalprice) + + SQL + + ```SQL + +-----------------------------+-------+ + | time|measure| + +-----------------------------+-------+ + |2025-01-01T00:01:00.000+08:00| 80| + |2025-01-01T00:02:00.000+08:00| 80| + |2025-01-01T00:03:00.000+08:00| 80| + |2025-01-01T00:04:00.000+08:00| 80| + |2025-01-01T00:05:00.000+08:00| 80| + |2025-01-01T00:06:00.000+08:00| 80| + +-----------------------------+-------+ + Total line number = 6 + ``` + + * When the value is RPR\_FIRST(totalprice), RUNNING RPR\_FIRST(totalprice), FINAL RPR\_FIRST(totalprice) + + SQL + + ```SQL + +-----------------------------+-------+ + | time|measure| + +-----------------------------+-------+ + |2025-01-01T00:01:00.000+08:00| 90| + |2025-01-01T00:02:00.000+08:00| 90| + |2025-01-01T00:03:00.000+08:00| 90| + |2025-01-01T00:04:00.000+08:00| 90| + |2025-01-01T00:05:00.000+08:00| 90| + |2025-01-01T00:06:00.000+08:00| 90| + +-----------------------------+-------+ + Total line number = 6 + ``` + + * When the value is RPR\_LAST(totalprice, 2) + + SQL + + ```SQL + +-----------------------------+-------+ + | time|measure| + +-----------------------------+-------+ + |2025-01-01T00:01:00.000+08:00| null| + |2025-01-01T00:02:00.000+08:00| null| + |2025-01-01T00:03:00.000+08:00| 90| + |2025-01-01T00:04:00.000+08:00| 80| + |2025-01-01T00:05:00.000+08:00| 70| + |2025-01-01T00:06:00.000+08:00| 80| + +-----------------------------+-------+ + Total line number = 6 + ``` + + * When the value is FINAL RPP\_LAST(totalprice, 2) + + SQL + + ```SQL + +-----------------------------+-------+ + | time|measure| + +-----------------------------+-------+ + |2025-01-01T00:01:00.000+08:00| 80| + |2025-01-01T00:02:00.000+08:00| 80| + |2025-01-01T00:03:00.000+08:00| 80| + |2025-01-01T00:04:00.000+08:00| 80| + |2025-01-01T00:05:00.000+08:00| 80| + |2025-01-01T00:06:00.000+08:00| 80| + +-----------------------------+-------+ + Total line number = 6 + ``` + + * When the value is RPR\_FIRST(totalprice, 2) and FINAL RPR\_FIRST(totalprice, 2) + + SQL + + ```SQL + +-----------------------------+-------+ + | time|measure| + +-----------------------------+-------+ + |2025-01-01T00:01:00.000+08:00| 70| + |2025-01-01T00:02:00.000+08:00| 70| + |2025-01-01T00:03:00.000+08:00| 70| + |2025-01-01T00:04:00.000+08:00| 70| + |2025-01-01T00:05:00.000+08:00| 70| + |2025-01-01T00:06:00.000+08:00| 70| + +-----------------------------+-------+ + Total line number = 6 + ``` + +#### 3.3.3 Physical Navigation Functions + +* Query SQL + +SQL + +```SQL +SELECT m.time, m.measure +FROM t +MATCH_RECOGNIZE ( + ORDER BY time + MEASURES + %s AS measure -- MEASURES Clause + ALL ROWS PER MATCH + PATTERN (B) + DEFINE B AS B.totalprice >= PREV(B.totalprice) +) AS m; +``` + +* Query Results + + * When the value is `PREV(totalprice)` + + SQL + + ```SQL + +-----------------------------+-------+ + | time|measure| + +-----------------------------+-------+ + |2025-01-01T00:04:00.000+08:00| 70| + |2025-01-01T00:06:00.000+08:00| 70| + +-----------------------------+-------+ + Total line number = 2 + ``` + + * When the value is `PREV(B.totalprice, 2)` + + SQL + + ```SQL + +-----------------------------+-------+ + | time|measure| + +-----------------------------+-------+ + |2025-01-01T00:04:00.000+08:00| 80| + |2025-01-01T00:06:00.000+08:00| 80| + +-----------------------------+-------+ + Total line number = 2 + ``` + + * When the value is `PREV(B.totalprice, 4)` + + SQL + + ```SQL + +-----------------------------+-------+ + | time|measure| + +-----------------------------+-------+ + |2025-01-01T00:04:00.000+08:00| null| + |2025-01-01T00:06:00.000+08:00| 80| + +-----------------------------+-------+ + Total line number = 2 + ``` + + * When the value is `NEXT(totalprice)` or `NEXT(B.totalprice, 1)` + + SQL + + ```SQL + +-----------------------------+-------+ + | time|measure| + +-----------------------------+-------+ + |2025-01-01T00:04:00.000+08:00| 70| + |2025-01-01T00:06:00.000+08:00| null| + +-----------------------------+-------+ + Total line number = 2 + ``` + + * When the value is `NEXT(B.totalprice, 2)` + + SQL + + ```SQL + +-----------------------------+-------+ + | time|measure| + +-----------------------------+-------+ + |2025-01-01T00:04:00.000+08:00| 80| + |2025-01-01T00:06:00.000+08:00| null| + +-----------------------------+-------+ + Total line number = 2 + ``` + +#### 3.3.4 Aggregate Functions + +* Query SQL + +SQL + +```SQL +SELECT m.time, m.count, m.avg, m.sum, m.min, m.max +FROM t +MATCH_RECOGNIZE ( + ORDER BY time + MEASURES + COUNT(*) AS count, + AVG(totalprice) AS avg, + SUM(totalprice) AS sum, + MIN(totalprice) AS min, + MAX(totalprice) AS max + ALL ROWS PER MATCH + PATTERN (A+) + DEFINE A AS true +) AS m; +``` + +* Query Results + +SQL + +```SQL ++-----------------------------+-----+-----------------+-----+---+---+ +| time|count| avg| sum|min|max| ++-----------------------------+-----+-----------------+-----+---+---+ +|2025-01-01T00:01:00.000+08:00| 1| 90.0| 90.0| 90| 90| +|2025-01-01T00:02:00.000+08:00| 2| 85.0|170.0| 80| 90| +|2025-01-01T00:03:00.000+08:00| 3| 80.0|240.0| 70| 90| +|2025-01-01T00:04:00.000+08:00| 4| 80.0|320.0| 70| 90| +|2025-01-01T00:05:00.000+08:00| 5| 78.0|390.0| 70| 90| +|2025-01-01T00:06:00.000+08:00| 6|78.33333333333333|470.0| 70| 90| ++-----------------------------+-----+-----------------+-----+---+---+ +Total line number = 6 +``` + +#### 3.3.5 Nested Functions + +1. Example 1 + +* Query SQL + +SQL + +```SQL +SELECT m.time, m.match, m.price, m.lower_or_higher, m.label, m.prev_label, m.next_label +FROM t +MATCH_RECOGNIZE ( + ORDER BY time + MEASURES + MATCH_NUMBER() AS match, + RUNNING RPR_LAST(totalprice) AS price, + CLASSIFIER(U) AS lower_or_higher, + CLASSIFIER(W) AS label, + PREV(CLASSIFIER(W)) AS prev_label, + NEXT(CLASSIFIER(W)) AS next_label + ALL ROWS PER MATCH + PATTERN ((L | H) A) + SUBSET + U = (L, H), + W = (A, L, H) + DEFINE + A AS A.totalprice = 80, + L AS L.totalprice < 80, + H AS H.totalprice > 80 +) AS m; +``` + +* Query Results + +SQL + +```SQL ++-----------------------------+-----+-----+---------------+-----+----------+----------+ +| time|match|price|lower_or_higher|label|prev_label|next_label| ++-----------------------------+-----+-----+---------------+-----+----------+----------+ +|2025-01-01T00:01:00.000+08:00| 1| 90| H| H| null| A| +|2025-01-01T00:02:00.000+08:00| 1| 80| H| A| H| null| +|2025-01-01T00:03:00.000+08:00| 2| 70| L| L| null| A| +|2025-01-01T00:04:00.000+08:00| 2| 80| L| A| L| null| +|2025-01-01T00:05:00.000+08:00| 3| 70| L| L| null| A| +|2025-01-01T00:06:00.000+08:00| 3| 80| L| A| L| null| ++-----------------------------+-----+-----+---------------+-----+----------+----------+ +Total line number = 6 +``` + +2. Example 2 + +* Query SQL + +SQL + +```SQL +SELECT m.time, m.prev_last_price, m.next_first_price +FROM t +MATCH_RECOGNIZE ( + ORDER BY time + MEASURES + PREV(RPR_LAST(totalprice), 2) AS prev_last_price, + NEXT(RPR_FIRST(totalprice), 2) as next_first_price + ALL ROWS PER MATCH + PATTERN (A+) + DEFINE A AS true +) AS m; +``` + +* Query Results + +SQL + +```SQL ++-----------------------------+---------------+----------------+ +| time|prev_last_price|next_first_price| ++-----------------------------+---------------+----------------+ +|2025-01-01T00:01:00.000+08:00| null| 70| +|2025-01-01T00:02:00.000+08:00| null| 70| +|2025-01-01T00:03:00.000+08:00| 90| 70| +|2025-01-01T00:04:00.000+08:00| 80| 70| +|2025-01-01T00:05:00.000+08:00| 70| 70| +|2025-01-01T00:06:00.000+08:00| 80| 70| ++-----------------------------+---------------+----------------+ +Total line number = 6 +``` + +## 4. Scenario Examples + +Using [Sample Data](./Reference/Sample-Data.md) as the source data + +### 4.1 Time Segment Query + +Segment the data in table1 by time intervals less than or equal to 24 hours, and query the total number of data entries in each segment, as well as the start and end times. + +Query SQL + +SQL + +```SQL +SELECT start_time, end_time, cnt +FROM table1 +MATCH_RECOGNIZE ( + ORDER BY time + MEASURES + RPR_FIRST(A.time) AS start_time, + RPR_LAST(time) AS end_time, + COUNT() AS cnt + PATTERN (A B*) + DEFINE B AS (cast(B.time as INT64) - cast(PREV(B.time) as INT64)) <= 86400000 +) AS m +``` + +Query Results + +SQL + +```SQL ++-----------------------------+-----------------------------+---+ +| start_time| end_time|cnt| ++-----------------------------+-----------------------------+---+ +|2024-11-26T13:37:00.000+08:00|2024-11-26T13:38:00.000+08:00| 2| +|2024-11-27T16:38:00.000+08:00|2024-11-30T14:30:00.000+08:00| 16| ++-----------------------------+-----------------------------+---+ +Total line number = 2 +``` + +### 4.2 Difference Segment Query + +Segment the data in table2 by humidity value differences less than 0.1, and query the total number of data entries in each segment, as well as the start and end times. + +* Query SQL + +SQL + +```SQL +SELECT start_time, end_time, cnt +FROM table2 +MATCH_RECOGNIZE ( + ORDER BY time + MEASURES + RPR_FIRST(A.time) AS start_time, + RPR_LAST(time) AS end_time, + COUNT() AS cnt + PATTERN (A B*) + DEFINE B AS (B.humidity - PREV(B.humidity )) <=0.1 +) AS m; +``` + +* Query Results + +SQL + +```SQL ++-----------------------------+-----------------------------+---+ +| start_time| end_time|cnt| ++-----------------------------+-----------------------------+---+ +|2024-11-26T13:37:00.000+08:00|2024-11-27T00:00:00.000+08:00| 2| +|2024-11-28T08:00:00.000+08:00|2024-11-29T00:00:00.000+08:00| 2| +|2024-11-29T11:00:00.000+08:00|2024-11-30T00:00:00.000+08:00| 2| ++-----------------------------+-----------------------------+---+ +Total line number = 3 +``` + +### 4.3 Event Statistics Query + +Group the data in table1 by device ID, and count the start and end times and maximum humidity value where the humidity in the Shanghai area is greater than 35. + +* Query SQL + +SQL + +```SQL +SELECT m.device_id, m.match, m.event_start, m.event_end, m.max_humidity +FROM table1 +MATCH_RECOGNIZE ( + PARTITION BY device_id + ORDER BY time + MEASURES + MATCH_NUMBER() AS match, + RPR_FIRST(A.time) AS event_start, + RPR_LAST(A.time) AS event_end, + MAX(A.humidity) AS max_humidity + ONE ROW PER MATCH + PATTERN (A+) + DEFINE + A AS A.region= 'Shanghai' AND A.humidity> 35 +) AS m +``` + +* Query Results + +SQL + +```SQL ++---------+-----+-----------------------------+-----------------------------+------------+ +|device_id|match| event_start| event_end|max_humidity| ++---------+-----+-----------------------------+-----------------------------+------------+ +| 100| 1|2024-11-28T09:00:00.000+08:00|2024-11-29T18:30:00.000+08:00| 45.1| +| 101| 1|2024-11-30T09:30:00.000+08:00|2024-11-30T09:30:00.000+08:00| 35.2| ++---------+-----+-----------------------------+-----------------------------+------------+ +Total line number = 2 +``` diff --git a/src/UserGuide/latest-Table/SQL-Manual/Row-Pattern-Recognition.md b/src/UserGuide/latest-Table/SQL-Manual/Row-Pattern-Recognition.md new file mode 100644 index 000000000..c029bcaef --- /dev/null +++ b/src/UserGuide/latest-Table/SQL-Manual/Row-Pattern-Recognition.md @@ -0,0 +1,999 @@ + + +# Row Pattern Recognition + +## 1. Overview + +IoTDB supports Row Pattern Recognition. This feature enables capturing a segment of continuous data by defining the recognition logic of pattern variables and regular expressions, and performing analysis and calculation on each captured data segment. It is suitable for business scenarios such as identifying specific patterns in time-series data and detecting specific events. If we regard Row Pattern Recognition as grouping processing of data, the core process is roughly as follows: + +* Perform group capture through the PATTERN, DEFINE, and SUBSET clauses +* Conduct computational processing on the captured groups through the MEASURES clause +* Set the output format of groups through the ROWS PER MATCH clause +* Specify how to locate the start position of the next group through the AFTER MATCH SKIP clause + +> Note: This feature is available starting from version V2.0.5. + +## 2. Function Introduction +### 2.1 Syntax Format + +```SQL +MATCH_RECOGNIZE ( + [ PARTITION BY column [, ...] ] + [ ORDER BY column [, ...] ] + [ MEASURES measure_definition [, ...] ] + [ ROWS PER MATCH ] + [ AFTER MATCH skip_to ] + PATTERN ( row_pattern ) + [ SUBSET subset_definition [, ...] ] + DEFINE variable_definition [, ...] +) +``` + +**Note:** + +* PARTITION BY: Optional. Used to group the input table, and each group can perform pattern matching independently. If this clause is not specified, the entire input table will be processed as a single unit. +* ORDER BY: Optional. Used to ensure that input data is processed in a specific order during matching. +* MEASURES: Optional. Used to specify which information to extract from the matched segment of data. +* ROWS PER MATCH: Optional. Used to specify the output method of the result set after successful pattern matching. +* AFTER MATCH SKIP: Optional. Used to specify which row to resume from for the next pattern match after identifying a non-empty match. +* PATTERN: Used to define the row pattern to be matched. +* SUBSET: Optional. Used to merge rows matched by multiple basic pattern variables into a single logical set. +* DEFINE: Used to define the basic pattern variables for the row pattern. + +### 2.2 DEFINE Clause + +Used to specify the judgment condition for each basic pattern variable in pattern recognition. These variables are usually represented by identifiers (e.g., `A`, `B`), and the Boolean expressions in this clause precisely define which rows meet the requirements of the variable. + +* During pattern matching execution, a row is only marked as the variable (and thus included in the current matching group) if the Boolean expression returns TRUE. + +```SQL +-- A row can only be identified as B if its totalprice value is less than the totalprice value of the previous row. +DEFINE B AS totalprice < PREV(totalprice) +``` + +* Variables not **explicitly** defined in this clause have an implicitly set condition of always true (TRUE), meaning they can be successfully matched on any input row. + +### 2.3 SUBSET Clause + +Used to merge rows matched by multiple basic pattern variables (e.g., `A`, `B`) into a combined pattern variable (e.g., `U`), allowing these rows to be treated as a single logical set for operations. It can be used in the `MEASURES`, `DEFINE`, and `AFTER MATCH SKIP` clauses. + +```SQL +SUBSET U = (A, B) +``` +For example, for the pattern `PATTERN ((A | B){5} C+)`, it is impossible to determine whether the 5th repetition matches the basic pattern variable A or B during matching. Therefore: + +1. In the `MEASURES` clause, if you need to reference the last row matched in this phase, you can do so by defining the combined pattern variable `SUBSET U = (A, B)`. At this point, the expression `RPR_LAST(U.totalprice)` will directly return the `totalprice` value of the target row. +2. In the `AFTER MATCH SKIP` clause, if the matching result does not include the basic pattern variable A or B, executing `AFTER MATCH SKIP TO LAST B` or `AFTER MATCH SKIP TO LAST A` will fail to jump due to missing anchors. However, by introducing the combined pattern variable `SUBSET U = (A, B)`, using `AFTER MATCH SKIP TO LAST U` is always valid. + +### 2.4 PATTERN Clause + +Used to define the row pattern to be matched, whose basic building block is a row pattern variable. + +```SQL +PATTERN ( row_pattern ) +``` + +#### 2.4.1 Pattern Types + +| Row Pattern | Syntax Format | Description | +|-----------------------|---------------------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| Pattern Concatenation | `A B+ C+ D+` | Composed of subpatterns without any operators, matching all subpatterns in the declared order sequentially. | +| Pattern Alternation | `A \| B \| C` | Composed of multiple subpatterns separated by `\|`, matching only one of them. If multiple subpatterns can be matched, the leftmost one is selected. | +| Pattern Permutation | `PERMUTE(A, B, C)` | Equivalent to performing alternation matching on all different orders of the subpattern elements. It requires that A, B, and C must all be matched, but their order of appearance is not fixed. If multiple matching orders are possible, the priority is determined by the **lexicographical order** based on the definition sequence of elements in the PERMUTE list. For example, A B C has the highest priority, while C B A has the lowest. | +| Pattern Grouping | `(A B C)` | Encloses subpatterns in parentheses to treat them as a single unit, which can be used with other operators. For example, `(A B C)+` indicates a pattern where a group of `(A B C)` appears consecutively. | +| Empty Pattern | `()` | Represents an empty match that does not contain any rows. | +| Pattern Exclusion | `{- row_pattern -}` | Used to specify the matched part to be excluded from the output. Usually used with the `ALL ROWS PER MATCH` option to output rows of interest. For example, `PATTERN (A {- B+ C+ -} D+)` with ALL ROWS PER MATCH will only output the first row `(corresponding to A)` and the trailing rows `(corresponding to D+)` of the match. | + +#### 2.4.2 Partition Start/End Anchor + +* `^A` indicates matching a pattern that starts with A as the partition beginning + * When the value of the PATTERN clause is `^A`, the match must start from the first row of the partition, and this row must satisfy the definition of `A`. + * When the value of the PATTERN clause is `^A^` or `A^`, the output result is empty. +* `A$` indicates matching a pattern that ends with A as the partition end + * When the value of the PATTERN clause is `A$`, the match must end at the end of the partition, and this row must satisfy the definition of `A`. + * When the value of the PATTERN clause is `$A` or `$A$`, the output result is empty. + +For example illustrations, see[Section 3.3.1](./Row-Pattern-Recognition.md#_3-1-Patter Clause Partition Anchor) + +#### 2.4.3 Quantifiers + +Quantifiers are used to specify the number of times a subpattern repeats, placed after the corresponding subpattern (e.g., `(A | B)*`). + +Common quantifiers are as follows: + +| Quantifier | Description | +| -------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| `*` | Zero or more repetitions | +| `+` | One or more repetitions | +| `?` | Zero or one repetition | +| `{n}` | Exactly n repetitions | +| `{m, n}` | Repetitions between m and n times (m and n are non-negative integers). \* If the left bound is omitted, the default starts from 0; \* If the right bound is omitted, there is no upper limit on the number of repetitions (e.g., {5,} is equivalent to "at least five times"); \* If both left and right bounds are omitted (i.e., {,}), it is equivalent to `*`. | + +* The matching preference can be changed by adding `?` after the quantifier. + * `{3,5}`: Prefers 5 times, least prefers 3 times; `{3,5}?`: Prefers 3 times, least prefers 5 times. + * `?`: Prefers 1 time; `??`: Prefers 0 times. + +### 2.5 AFTER MATCH SKIP Clause + +Used to specify which row to start the next pattern match from after identifying a non-empty match. + +| Jump Strategy | Description | Allows Overlapping Matches? | +| ------------------------------------------------------------- | -------------------------------------------------------------------------------- | ----------------------------- | +| `AFTER MATCH SKIP PAST LAST ROW` | Default behavior. Starts from the row after the last row of the current match. | No | +| `AFTER MATCH SKIP TO NEXT ROW` | Starts from the second row in the current match. | Yes | +| `AFTER MATCH SKIP TO [ FIRST \| LAST ] pattern_variable` | Jumps to start from the [ first row | last row ] of a pattern variable. | Yes | + +* Among all possible configurations, only when `ALL ROWS PER MATCH WITH UNMATCHED ROWS` is used in combination with `AFTER MATCH SKIP PAST LAST ROW` can the system ensure that exactly one output record is generated for each input row. + +For example illustrations, see [Section 3.3.2](./Row-Pattern-Recognition.md#_3-2-AFTER-MATCH-SKIP-Clause) + +### 2.6 ROWS PER MATCH Clause + +Used to specify the output method of the result set after a successful pattern match, including the following two main options: + +| Output Method | Rule Description | Output Result | Handling Logic for **Empty Matches/Unmatched Rows** | +| -------------------- | -------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------ | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| ONE ROW PER MATCH | Generates one output row for each successful match. | \* Columns in the PARTITION BY clause\* Expressions defined in the MEASURES clause. | Outputs empty matches; skips unmatched rows. | +| ALL ROWS PER MATCH | Each row in a match generates an output record, unless the row is excluded via exclusion syntax. | \* Columns in the PARTITION BY clause\* Columns in the ORDER BY clause\* Expressions defined in the MEASURES clause\* Remaining columns in the input table | \* Default: Outputs empty matches; skips unmatched rows.\* ALL ROWS PER MATCH​**SHOW EMPTY MATCHES**​: Outputs empty matches by default; skips unmatched rows.\* ALL ROWS PER MATCH​**OMIT EMPTY MATCHES**​: Does not output empty matches; skips unmatched rows.\* ALL ROWS PER MATCH​**WITH UNMATCHED ROWS**​: Outputs empty matches and generates an additional output record for each unmatched row. | + +### 2.7 MEASURES Clause + +Used to specify which information to extract from a matched set of data. This clause is optional; if not explicitly specified, some input columns will become the output results of pattern recognition based on the settings of the ROWS PER MATCH clause. + +SQL + +```SQL +MEASURES measure_expression AS measure_name [, ...] +``` + +* A `measure_expression` is a scalar value calculated from the matched set of data. + +| Usage Example | Description | +| ---------------------------------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| `A.totalprice AS starting_price` | Returns the price from the first row in the matched group (i.e., the only row associated with variable A) as the starting price. | +| `RPR_LAST(B.totalprice) AS bottom_price` | Returns the price from the last row associated with variable B, representing the lowest price in the "V" shape pattern (corresponding to the end of the downward segment). | +| `RPR_LAST(U.totalprice) AS top_price` | Returns the highest price in the matched group, corresponding to the last row associated with variable C or D (i.e., the end of the entire matched group). [Assuming SUBSET U = (C, D)] | + +* Each `measure_expression` defines an output column, which can be referenced by its specified `measure_name`. + +### 2.8 Row Pattern Recognition Expressions + +Expressions used in the MEASURES and DEFINE clauses are ​**scalar expressions**​, evaluated in the row-level context of the input table. In addition to supporting standard SQL syntax, **scalar expressions** also support special extended functions for row pattern recognition. + +#### 2.8.1 Pattern Variable References + +SQL + +```SQL +A.totalprice +U.orderdate +orderstatus +``` + +* When a column name is prefixed with a **basic pattern variable** or a ​**combined pattern variable**​, it refers to the corresponding column values of all rows matched by that variable. +* If a column name has no prefix, it is equivalent to using the "​**global combined pattern variable**​" (i.e., the union of all basic pattern variables) as the prefix, referring to the column values of all rows in the current match. + +> Using table names as column name prefixes in pattern recognition expressions is not allowed. + +#### 2.8.2 Extended Functions + +| Function Name | Function Syntax | Description | +| ------------------------------- | ----------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | +| `MATCH_NUMBER` Function | `MATCH_NUMBER()` | Returns the sequence number of the current match within the partition, starting from 1. Empty matches occupy match sequence numbers just like non-empty matches. | +| `CLASSIFIER` Function | `CLASSIFIER(option)` | 1. Returns the name of the basic pattern variable mapped by the current row. 2. `option` is an optional parameter: a basic pattern variable `CLASSIFIER(A)` or a combined pattern variable `CLASSIFIER(U)` can be passed in to limit the function's scope; for rows outside the scope, NULL is returned directly. When used with a combined pattern variable, it can be used to distinguish which basic pattern variable in the union the row is mapped to. | +| Logical Navigation Functions | `RPR_FIRST(expr, k)` | 1. Indicates locating the first row satisfying `expr` in the ​**current match group**​, then searching for the k-th occurrence of the row corresponding to the same pattern variable towards the end of the group, and returning the specified column value of that row. If the k-th matching row is not found in the specified direction, the function returns NULL. 2. `k` is an optional parameter, defaulting to 0 (only locating the first row satisfying the condition); if explicitly specified, it must be a non-negative integer. | +| Logical Navigation Functions | `RPR_LAST(expr, k)` | 1. Indicates locating the last row satisfying `expr` in the ​**current match group**​, then searching for the k-th occurrence of the row corresponding to the same pattern variable towards the start of the group, and returning the specified column value of that row. If the k-th matching row is not found in the specified direction, the function returns NULL. 2. `k` is an optional parameter, defaulting to 0 (only locating the last row satisfying the condition); if explicitly specified, it must be a non-negative integer. | +| Physical Navigation Functions | `PREV(expr, k)` | 1. Indicates offsetting k rows towards the start from the last row matched to the given pattern variable, and returning the corresponding column value. If navigation exceeds the ​**partition boundary**​, the function returns NULL. 2. `k` is an optional parameter, defaulting to 1; if explicitly specified, it must be a non-negative integer. | +| Physical Navigation Functions | `NEXT(expr, k)` | 1. Indicates offsetting k rows towards the end from the last row matched to the given pattern variable, and returning the corresponding column value. If navigation exceeds the ​**partition boundary**​, the function returns NULL. 2. `k` is an optional parameter, defaulting to 1; if explicitly specified, it must be a non-negative integer. | +| Aggregate Functions | COUNT, SUM, AVG, MAX, MIN Functions | Can be used to calculate data in the current match. Aggregate functions and navigation functions are not allowed to be nested within each other. (Supported from version V2.0.6) | +| Nested Functions | `PREV/NEXT(CLASSIFIER())` | Nesting of physical navigation functions and the CLASSIFIER function. Used to obtain the pattern variables corresponding to the previous and next matching rows of the current row. | +| Nested Functions | `PREV/NEXT(RPR_FIRST/RPR_LAST(expr, k)`) | **Logical functions are allowed to be nested** inside physical functions; **physical functions are not allowed to be nested** inside logical functions. Used to perform logical offset first, then physical offset. | + +For example illustrations, see [Section 3.3.3](./Row-Pattern-Recognition.md#_3-3-Row-Pattern-Expressions-Extended-Functions) + +#### 2.8.3 RUNNING and FINAL Semantics + +1. Definition + +* `RUNNING`: Indicates the calculation scope is from the start row of the current match group to the row currently being processed (i.e., up to the current row). +* `FINAL`: Indicates the calculation scope is from the start row of the current match group to the final row of the group (i.e., the entire match group). + +2. Scope of Application + +* The DEFINE clause uses RUNNING semantics by default. +* The MEASURES clause uses RUNNING semantics by default and supports specifying FINAL semantics. When using the ONE ROW PER MATCH output mode, all expressions are calculated from the last row position of the match group, and at this time, RUNNING semantics are equivalent to FINAL semantics. + +3. Syntax Constraints + +* RUNNING and FINAL need to be written before **logical navigation functions** or aggregate functions, and cannot directly act on **column references.** + * Valid: `RUNNING RPP_LAST(A.totalprice)`, `FINAL RPP_LAST(A.totalprice)` + * Invalid: `RUNNING A.totalprice`, `FINAL A.totalprice`, `RUNNING PREV(A.totalprice)` + +## 3. Syntax Examples + +Original Data + +SQL + +```SQL +IoTDB:database3> select * from t ++-----------------------------+------+----------+ +| time|device|totalprice| ++-----------------------------+------+----------+ +|2025-01-01T00:01:00.000+08:00| d1| 90| +|2025-01-01T00:02:00.000+08:00| d1| 80| +|2025-01-01T00:03:00.000+08:00| d1| 70| +|2025-01-01T00:04:00.000+08:00| d1| 80| +|2025-01-01T00:05:00.000+08:00| d1| 70| +|2025-01-01T00:06:00.000+08:00| d1| 80| ++-----------------------------+------+----------+ + +-- Create Statement +create table t(device tag, totalprice int32 field) + +insert into t(time,device,totalprice) values(2025-01-01T00:01:00, 'd1', 90),(2025-01-01T00:02:00, 'd1', 80),(2025-01-01T00:03:00, 'd1', 70),(2025-01-01T00:04:00, 'd1', 80),(2025-01-01T00:05:00, 'd1', 70),(2025-01-01T00:06:00, 'd1', 80) +``` + +### 3.1 PATTERN Clause Partition Anchor + +* Query SQL + +SQL + +```SQL +SELECT m.time, m.match, m.price, m.label +FROM t +MATCH_RECOGNIZE ( + ORDER BY time + MEASURES + MATCH_NUMBER() AS match, + RUNNING RPR_LAST(totalprice) AS price, + CLASSIFIER() AS label + ALL ROWS PER MATCH + AFTER MATCH SKIP PAST LAST ROW + PATTERN %s -- PATTERN Clause + DEFINE A AS true +) AS m; +``` + +* Query Results + + * When the PATTERN clause is PATTERN (^A) + + SQL + + ```SQL + +-----------------------------+-----+-----+-----+ + | time|match|price|label| + +-----------------------------+-----+-----+-----+ + |2025-01-01T00:01:00.000+08:00| 1| 90| A| + +-----------------------------+-----+-----+-----+ + Total line number = 1 + ``` + + * When the PATTERN clause is PATTERN (^A^) + + SQL + + ```SQL + +----+-----+-----+-----+ + |time|match|price|label| + +----+-----+-----+-----+ + +----+-----+-----+-----+ + Empty set. + ``` + + * When the PATTERN clause is PATTERN (A\$) + + SQL + + ```SQL + +-----------------------------+-----+-----+-----+ + | time|match|price|label| + +-----------------------------+-----+-----+-----+ + |2025-01-01T00:06:00.000+08:00| 1| 80| A| + +-----------------------------+-----+-----+-----+ + Total line number = 1 + ``` + + * When the PATTERN clause is PATTERN (\$A\$) + + SQL + + ```SQL + +----+-----+-----+-----+ + |time|match|price|label| + +----+-----+-----+-----+ + +----+-----+-----+-----+ + Empty set. + ``` + +### 3.2 AFTER MATCH SKIP Clause + +* Query SQL + +SQL + +```SQL +SELECT m.time, m.match, m.price, m.label +FROM t +MATCH_RECOGNIZE ( + ORDER BY time + MEASURES + MATCH_NUMBER() AS match, + RUNNING RPR_LAST(totalprice) AS price, + CLASSIFIER() AS label + ALL ROWS PER MATCH + %s -- AFTER MATCH SKIP Clause + PATTERN (A B+ C+ D?) + SUBSET U = (C, D) + DEFINE + B AS B.totalprice < PREV (B.totalprice), + C AS C.totalprice > PREV (C.totalprice), + D AS false -- Never matches successfully +) AS m; +``` + +* Query Results + + * When AFTER MATCH SKIP PAST LAST ROW + * First match: Rows 1, 2, 3, 4 + * Second match: According to the semantics of `AFTER MATCH SKIP PAST LAST ROW`, starting from row 5, no valid match can be found + * This pattern will never have overlapping matches + + SQL + + ```SQL + +-----------------------------+-----+-----+-----+ + | time|match|price|label| + +-----------------------------+-----+-----+-----+ + |2025-01-01T00:01:00.000+08:00| 1| 90| A| + |2025-01-01T00:02:00.000+08:00| 1| 80| B| + |2025-01-01T00:03:00.000+08:00| 1| 70| B| + |2025-01-01T00:04:00.000+08:00| 1| 80| C| + +-----------------------------+-----+-----+-----+ + Total line number = 4 + ``` + + * When AFTER MATCH SKIP TO NEXT ROW + * First match: Rows 1, 2, 3, 4 + * Second match: According to the semantics of `AFTER MATCH SKIP TO NEXT ROW`, starting from row 2, matches: Rows 2, 3, 4 + * Third match: Attempts to start from row 3, fails + * Fourth match: Attempts to start from row 4, succeeds, matches rows 4, 5, 6 + * This pattern allows overlapping matches + + SQL + + ```SQL + +-----------------------------+-----+-----+-----+ + | time|match|price|label| + +-----------------------------+-----+-----+-----+ + |2025-01-01T00:01:00.000+08:00| 1| 90| A| + |2025-01-01T00:02:00.000+08:00| 1| 80| B| + |2025-01-01T00:03:00.000+08:00| 1| 70| B| + |2025-01-01T00:04:00.000+08:00| 1| 80| C| + |2025-01-01T00:02:00.000+08:00| 2| 80| A| + |2025-01-01T00:03:00.000+08:00| 2| 70| B| + |2025-01-01T00:04:00.000+08:00| 2| 80| C| + |2025-01-01T00:04:00.000+08:00| 3| 80| A| + |2025-01-01T00:05:00.000+08:00| 3| 70| B| + |2025-01-01T00:06:00.000+08:00| 3| 80| C| + +-----------------------------+-----+-----+-----+ + Total line number = 10 + ``` + + * When AFTER MATCH SKIP TO FIRST C + * First match: Rows 1, 2, 3, 4 + * Second match: Starts from the first C (i.e., row 4), matches rows 4, 5, 6 + * This pattern allows overlapping matches + + SQL + + ```SQL + +-----------------------------+-----+-----+-----+ + | time|match|price|label| + +-----------------------------+-----+-----+-----+ + |2025-01-01T00:01:00.000+08:00| 1| 90| A| + |2025-01-01T00:02:00.000+08:00| 1| 80| B| + |2025-01-01T00:03:00.000+08:00| 1| 70| B| + |2025-01-01T00:04:00.000+08:00| 1| 80| C| + |2025-01-01T00:04:00.000+08:00| 2| 80| A| + |2025-01-01T00:05:00.000+08:00| 2| 70| B| + |2025-01-01T00:06:00.000+08:00| 2| 80| C| + +-----------------------------+-----+-----+-----+ + Total line number = 7 + ``` + + * When AFTER MATCH SKIP TO LAST B or AFTER MATCH SKIP TO B + * First match: Rows 1, 2, 3, 4 + * Second match: Attempts to start from the last B (i.e., row 3), fails + * Third match: Attempts to start from row 4, successfully matches rows 4, 5, 6 + * This pattern allows overlapping matches + + SQL + + ```SQL + +-----------------------------+-----+-----+-----+ + | time|match|price|label| + +-----------------------------+-----+-----+-----+ + |2025-01-01T00:01:00.000+08:00| 1| 90| A| + |2025-01-01T00:02:00.000+08:00| 1| 80| B| + |2025-01-01T00:03:00.000+08:00| 1| 70| B| + |2025-01-01T00:04:00.000+08:00| 1| 80| C| + |2025-01-01T00:04:00.000+08:00| 2| 80| A| + |2025-01-01T00:05:00.000+08:00| 2| 70| B| + |2025-01-01T00:06:00.000+08:00| 2| 80| C| + +-----------------------------+-----+-----+-----+ + Total line number = 7 + ``` + + * When AFTER MATCH SKIP TO U + * First match: Rows 1, 2, 3, 4 + * Second match: `SKIP TO U` means jumping to the last C or D; D can never match successfully, so it jumps to the last C (i.e., row 4), successfully matching rows 4, 5, 6 + * This pattern allows overlapping matches + + SQL + + ```SQL + +-----------------------------+-----+-----+-----+ + | time|match|price|label| + +-----------------------------+-----+-----+-----+ + |2025-01-01T00:01:00.000+08:00| 1| 90| A| + |2025-01-01T00:02:00.000+08:00| 1| 80| B| + |2025-01-01T00:03:00.000+08:00| 1| 70| B| + |2025-01-01T00:04:00.000+08:00| 1| 80| C| + |2025-01-01T00:04:00.000+08:00| 2| 80| A| + |2025-01-01T00:05:00.000+08:00| 2| 70| B| + |2025-01-01T00:06:00.000+08:00| 2| 80| C| + +-----------------------------+-----+-----+-----+ + Total line number = 7 + ``` + + * When AFTER MATCH SKIP TO A, you cannot jump to the first row of the match, otherwise it will cause an infinite loop + + SQL + + ```SQL + Msg: org.apache.iotdb.jdbc.IoTDBSQLException: 701: AFTER MATCH SKIP TO failed: cannot skip to first row of match + ``` + + * When AFTER MATCH SKIP TO B, you cannot jump to a pattern variable that does not exist in the match group + + SQL + + ```SQL + Msg: org.apache.iotdb.jdbc.IoTDBSQLException: 701: AFTER MATCH SKIP TO failed: pattern variable is not present in match + ``` + +### 3.3 Row Pattern Expressions - Extended Functions + +#### 3.3.1 CLASSIFIER() Function + +* Query SQL + +SQL + +```SQL +SELECT m.time, m.match, m.price, m.lower_or_higher, m.label +FROM t +MATCH_RECOGNIZE ( + ORDER BY time + MEASURES + MATCH_NUMBER() AS match, + RUNNING RPR_LAST(totalprice) AS price, + CLASSIFIER(U) AS lower_or_higher, + CLASSIFIER(W) AS label + ALL ROWS PER MATCH + PATTERN ((L | H) A) + SUBSET + U = (L, H), + W = (A, L, H) + DEFINE + A AS A.totalprice = 80, + L AS L.totalprice < 80, + H AS H.totalprice > 80 +) AS m; +``` + +* Query Results + +SQL + +```SQL ++-----------------------------+-----+-----+---------------+-----+ +| time|match|price|lower_or_higher|label| ++-----------------------------+-----+-----+---------------+-----+ +|2025-01-01T00:01:00.000+08:00| 1| 90| H| H| +|2025-01-01T00:02:00.000+08:00| 1| 80| H| A| +|2025-01-01T00:03:00.000+08:00| 2| 70| L| L| +|2025-01-01T00:04:00.000+08:00| 2| 80| L| A| +|2025-01-01T00:05:00.000+08:00| 3| 70| L| L| +|2025-01-01T00:06:00.000+08:00| 3| 80| L| A| ++-----------------------------+-----+-----+---------------+-----+ +Total line number = 6 +``` + +#### 3.3.2 Logical Navigation Functions + +* Query SQL + +SQL + +```SQL +SELECT m.time, m.measure +FROM t +MATCH_RECOGNIZE ( + ORDER BY time + MEASURES + %s AS measure -- MEASURES Clause + ALL ROWS PER MATCH + PATTERN (A+) + DEFINE A AS true +) AS m; +``` + +* Query Results + + * When the value is totalprice, RPR\_LAST(totalprice), RUNNING RPR\_LAST(totalprice) + + SQL + + ```SQL + +-----------------------------+-------+ + | time|measure| + +-----------------------------+-------+ + |2025-01-01T00:01:00.000+08:00| 90| + |2025-01-01T00:02:00.000+08:00| 80| + |2025-01-01T00:03:00.000+08:00| 70| + |2025-01-01T00:04:00.000+08:00| 80| + |2025-01-01T00:05:00.000+08:00| 70| + |2025-01-01T00:06:00.000+08:00| 80| + +-----------------------------+-------+ + Total line number = 6 + ``` + + * When the value is FINAL RPR\_LAST(totalprice) + + SQL + + ```SQL + +-----------------------------+-------+ + | time|measure| + +-----------------------------+-------+ + |2025-01-01T00:01:00.000+08:00| 80| + |2025-01-01T00:02:00.000+08:00| 80| + |2025-01-01T00:03:00.000+08:00| 80| + |2025-01-01T00:04:00.000+08:00| 80| + |2025-01-01T00:05:00.000+08:00| 80| + |2025-01-01T00:06:00.000+08:00| 80| + +-----------------------------+-------+ + Total line number = 6 + ``` + + * When the value is RPR\_FIRST(totalprice), RUNNING RPR\_FIRST(totalprice), FINAL RPR\_FIRST(totalprice) + + SQL + + ```SQL + +-----------------------------+-------+ + | time|measure| + +-----------------------------+-------+ + |2025-01-01T00:01:00.000+08:00| 90| + |2025-01-01T00:02:00.000+08:00| 90| + |2025-01-01T00:03:00.000+08:00| 90| + |2025-01-01T00:04:00.000+08:00| 90| + |2025-01-01T00:05:00.000+08:00| 90| + |2025-01-01T00:06:00.000+08:00| 90| + +-----------------------------+-------+ + Total line number = 6 + ``` + + * When the value is RPR\_LAST(totalprice, 2) + + SQL + + ```SQL + +-----------------------------+-------+ + | time|measure| + +-----------------------------+-------+ + |2025-01-01T00:01:00.000+08:00| null| + |2025-01-01T00:02:00.000+08:00| null| + |2025-01-01T00:03:00.000+08:00| 90| + |2025-01-01T00:04:00.000+08:00| 80| + |2025-01-01T00:05:00.000+08:00| 70| + |2025-01-01T00:06:00.000+08:00| 80| + +-----------------------------+-------+ + Total line number = 6 + ``` + + * When the value is FINAL RPP\_LAST(totalprice, 2) + + SQL + + ```SQL + +-----------------------------+-------+ + | time|measure| + +-----------------------------+-------+ + |2025-01-01T00:01:00.000+08:00| 80| + |2025-01-01T00:02:00.000+08:00| 80| + |2025-01-01T00:03:00.000+08:00| 80| + |2025-01-01T00:04:00.000+08:00| 80| + |2025-01-01T00:05:00.000+08:00| 80| + |2025-01-01T00:06:00.000+08:00| 80| + +-----------------------------+-------+ + Total line number = 6 + ``` + + * When the value is RPR\_FIRST(totalprice, 2) and FINAL RPR\_FIRST(totalprice, 2) + + SQL + + ```SQL + +-----------------------------+-------+ + | time|measure| + +-----------------------------+-------+ + |2025-01-01T00:01:00.000+08:00| 70| + |2025-01-01T00:02:00.000+08:00| 70| + |2025-01-01T00:03:00.000+08:00| 70| + |2025-01-01T00:04:00.000+08:00| 70| + |2025-01-01T00:05:00.000+08:00| 70| + |2025-01-01T00:06:00.000+08:00| 70| + +-----------------------------+-------+ + Total line number = 6 + ``` + +#### 3.3.3 Physical Navigation Functions + +* Query SQL + +SQL + +```SQL +SELECT m.time, m.measure +FROM t +MATCH_RECOGNIZE ( + ORDER BY time + MEASURES + %s AS measure -- MEASURES Clause + ALL ROWS PER MATCH + PATTERN (B) + DEFINE B AS B.totalprice >= PREV(B.totalprice) +) AS m; +``` + +* Query Results + + * When the value is `PREV(totalprice)` + + SQL + + ```SQL + +-----------------------------+-------+ + | time|measure| + +-----------------------------+-------+ + |2025-01-01T00:04:00.000+08:00| 70| + |2025-01-01T00:06:00.000+08:00| 70| + +-----------------------------+-------+ + Total line number = 2 + ``` + + * When the value is `PREV(B.totalprice, 2)` + + SQL + + ```SQL + +-----------------------------+-------+ + | time|measure| + +-----------------------------+-------+ + |2025-01-01T00:04:00.000+08:00| 80| + |2025-01-01T00:06:00.000+08:00| 80| + +-----------------------------+-------+ + Total line number = 2 + ``` + + * When the value is `PREV(B.totalprice, 4)` + + SQL + + ```SQL + +-----------------------------+-------+ + | time|measure| + +-----------------------------+-------+ + |2025-01-01T00:04:00.000+08:00| null| + |2025-01-01T00:06:00.000+08:00| 80| + +-----------------------------+-------+ + Total line number = 2 + ``` + + * When the value is `NEXT(totalprice)` or `NEXT(B.totalprice, 1)` + + SQL + + ```SQL + +-----------------------------+-------+ + | time|measure| + +-----------------------------+-------+ + |2025-01-01T00:04:00.000+08:00| 70| + |2025-01-01T00:06:00.000+08:00| null| + +-----------------------------+-------+ + Total line number = 2 + ``` + + * When the value is `NEXT(B.totalprice, 2)` + + SQL + + ```SQL + +-----------------------------+-------+ + | time|measure| + +-----------------------------+-------+ + |2025-01-01T00:04:00.000+08:00| 80| + |2025-01-01T00:06:00.000+08:00| null| + +-----------------------------+-------+ + Total line number = 2 + ``` + +#### 3.3.4 Aggregate Functions + +* Query SQL + +SQL + +```SQL +SELECT m.time, m.count, m.avg, m.sum, m.min, m.max +FROM t +MATCH_RECOGNIZE ( + ORDER BY time + MEASURES + COUNT(*) AS count, + AVG(totalprice) AS avg, + SUM(totalprice) AS sum, + MIN(totalprice) AS min, + MAX(totalprice) AS max + ALL ROWS PER MATCH + PATTERN (A+) + DEFINE A AS true +) AS m; +``` + +* Query Results + +SQL + +```SQL ++-----------------------------+-----+-----------------+-----+---+---+ +| time|count| avg| sum|min|max| ++-----------------------------+-----+-----------------+-----+---+---+ +|2025-01-01T00:01:00.000+08:00| 1| 90.0| 90.0| 90| 90| +|2025-01-01T00:02:00.000+08:00| 2| 85.0|170.0| 80| 90| +|2025-01-01T00:03:00.000+08:00| 3| 80.0|240.0| 70| 90| +|2025-01-01T00:04:00.000+08:00| 4| 80.0|320.0| 70| 90| +|2025-01-01T00:05:00.000+08:00| 5| 78.0|390.0| 70| 90| +|2025-01-01T00:06:00.000+08:00| 6|78.33333333333333|470.0| 70| 90| ++-----------------------------+-----+-----------------+-----+---+---+ +Total line number = 6 +``` + +#### 3.3.5 Nested Functions + +1. Example 1 + +* Query SQL + +SQL + +```SQL +SELECT m.time, m.match, m.price, m.lower_or_higher, m.label, m.prev_label, m.next_label +FROM t +MATCH_RECOGNIZE ( + ORDER BY time + MEASURES + MATCH_NUMBER() AS match, + RUNNING RPR_LAST(totalprice) AS price, + CLASSIFIER(U) AS lower_or_higher, + CLASSIFIER(W) AS label, + PREV(CLASSIFIER(W)) AS prev_label, + NEXT(CLASSIFIER(W)) AS next_label + ALL ROWS PER MATCH + PATTERN ((L | H) A) + SUBSET + U = (L, H), + W = (A, L, H) + DEFINE + A AS A.totalprice = 80, + L AS L.totalprice < 80, + H AS H.totalprice > 80 +) AS m; +``` + +* Query Results + +SQL + +```SQL ++-----------------------------+-----+-----+---------------+-----+----------+----------+ +| time|match|price|lower_or_higher|label|prev_label|next_label| ++-----------------------------+-----+-----+---------------+-----+----------+----------+ +|2025-01-01T00:01:00.000+08:00| 1| 90| H| H| null| A| +|2025-01-01T00:02:00.000+08:00| 1| 80| H| A| H| null| +|2025-01-01T00:03:00.000+08:00| 2| 70| L| L| null| A| +|2025-01-01T00:04:00.000+08:00| 2| 80| L| A| L| null| +|2025-01-01T00:05:00.000+08:00| 3| 70| L| L| null| A| +|2025-01-01T00:06:00.000+08:00| 3| 80| L| A| L| null| ++-----------------------------+-----+-----+---------------+-----+----------+----------+ +Total line number = 6 +``` + +2. Example 2 + +* Query SQL + +SQL + +```SQL +SELECT m.time, m.prev_last_price, m.next_first_price +FROM t +MATCH_RECOGNIZE ( + ORDER BY time + MEASURES + PREV(RPR_LAST(totalprice), 2) AS prev_last_price, + NEXT(RPR_FIRST(totalprice), 2) as next_first_price + ALL ROWS PER MATCH + PATTERN (A+) + DEFINE A AS true +) AS m; +``` + +* Query Results + +SQL + +```SQL ++-----------------------------+---------------+----------------+ +| time|prev_last_price|next_first_price| ++-----------------------------+---------------+----------------+ +|2025-01-01T00:01:00.000+08:00| null| 70| +|2025-01-01T00:02:00.000+08:00| null| 70| +|2025-01-01T00:03:00.000+08:00| 90| 70| +|2025-01-01T00:04:00.000+08:00| 80| 70| +|2025-01-01T00:05:00.000+08:00| 70| 70| +|2025-01-01T00:06:00.000+08:00| 80| 70| ++-----------------------------+---------------+----------------+ +Total line number = 6 +``` + +## 4. Scenario Examples + +Using [Sample Data](./Reference/Sample-Data.md) as the source data + +### 4.1 Time Segment Query + +Segment the data in table1 by time intervals less than or equal to 24 hours, and query the total number of data entries in each segment, as well as the start and end times. + +Query SQL + +SQL + +```SQL +SELECT start_time, end_time, cnt +FROM table1 +MATCH_RECOGNIZE ( + ORDER BY time + MEASURES + RPR_FIRST(A.time) AS start_time, + RPR_LAST(time) AS end_time, + COUNT() AS cnt + PATTERN (A B*) + DEFINE B AS (cast(B.time as INT64) - cast(PREV(B.time) as INT64)) <= 86400000 +) AS m +``` + +Query Results + +SQL + +```SQL ++-----------------------------+-----------------------------+---+ +| start_time| end_time|cnt| ++-----------------------------+-----------------------------+---+ +|2024-11-26T13:37:00.000+08:00|2024-11-26T13:38:00.000+08:00| 2| +|2024-11-27T16:38:00.000+08:00|2024-11-30T14:30:00.000+08:00| 16| ++-----------------------------+-----------------------------+---+ +Total line number = 2 +``` + +### 4.2 Difference Segment Query + +Segment the data in table2 by humidity value differences less than 0.1, and query the total number of data entries in each segment, as well as the start and end times. + +* Query SQL + +SQL + +```SQL +SELECT start_time, end_time, cnt +FROM table2 +MATCH_RECOGNIZE ( + ORDER BY time + MEASURES + RPR_FIRST(A.time) AS start_time, + RPR_LAST(time) AS end_time, + COUNT() AS cnt + PATTERN (A B*) + DEFINE B AS (B.humidity - PREV(B.humidity )) <=0.1 +) AS m; +``` + +* Query Results + +SQL + +```SQL ++-----------------------------+-----------------------------+---+ +| start_time| end_time|cnt| ++-----------------------------+-----------------------------+---+ +|2024-11-26T13:37:00.000+08:00|2024-11-27T00:00:00.000+08:00| 2| +|2024-11-28T08:00:00.000+08:00|2024-11-29T00:00:00.000+08:00| 2| +|2024-11-29T11:00:00.000+08:00|2024-11-30T00:00:00.000+08:00| 2| ++-----------------------------+-----------------------------+---+ +Total line number = 3 +``` + +### 4.3 Event Statistics Query + +Group the data in table1 by device ID, and count the start and end times and maximum humidity value where the humidity in the Shanghai area is greater than 35. + +* Query SQL + +SQL + +```SQL +SELECT m.device_id, m.match, m.event_start, m.event_end, m.max_humidity +FROM table1 +MATCH_RECOGNIZE ( + PARTITION BY device_id + ORDER BY time + MEASURES + MATCH_NUMBER() AS match, + RPR_FIRST(A.time) AS event_start, + RPR_LAST(A.time) AS event_end, + MAX(A.humidity) AS max_humidity + ONE ROW PER MATCH + PATTERN (A+) + DEFINE + A AS A.region= 'Shanghai' AND A.humidity> 35 +) AS m +``` + +* Query Results + +SQL + +```SQL ++---------+-----+-----------------------------+-----------------------------+------------+ +|device_id|match| event_start| event_end|max_humidity| ++---------+-----+-----------------------------+-----------------------------+------------+ +| 100| 1|2024-11-28T09:00:00.000+08:00|2024-11-29T18:30:00.000+08:00| 45.1| +| 101| 1|2024-11-30T09:30:00.000+08:00|2024-11-30T09:30:00.000+08:00| 35.2| ++---------+-----+-----------------------------+-----------------------------+------------+ +Total line number = 2 +``` diff --git a/src/zh/UserGuide/Master/Table/SQL-Manual/Row-Pattern-Recognition.md b/src/zh/UserGuide/Master/Table/SQL-Manual/Row-Pattern-Recognition.md new file mode 100644 index 000000000..87c42704b --- /dev/null +++ b/src/zh/UserGuide/Master/Table/SQL-Manual/Row-Pattern-Recognition.md @@ -0,0 +1,907 @@ + + +# 行模式识别 + +## 1. 概述 + +IoTDB 支持行模式识别,该功能支持通过定义模式变量的识别逻辑以及正则表达式来捕获一段连续的数据,并对每一段捕获的数据进行分析计算,适用于识别时序数据中的特定模式、检测特定事件等业务场景。如果将行模式识别看作对数据进行分组处理,则核心流程大致如下: + +* 通过 PATTERN、DEFINE、SUBSET 子句进行分组捕获 +* 通过 MEASURES 子句对捕获的分组进行计算处理 +* 通过 ROWS PER MATCH 子句设定分组的输出形式 +* 通过 AFTER MATCH SKIP 子句设定如何定位下一个分组的开始位置 + +> 注意:该功能从 V 2.0.5 版本开始提供。 + +## 2. 功能介绍 +### 2.1 语法格式 + +```SQL +MATCH_RECOGNIZE ( + [ PARTITION BY column [, ...] ] + [ ORDER BY column [, ...] ] + [ MEASURES measure_definition [, ...] ] + [ ROWS PER MATCH ] + [ AFTER MATCH skip_to ] + PATTERN ( row_pattern ) + [ SUBSET subset_definition [, ...] ] + DEFINE variable_definition [, ...] +) +``` + +**说明:** + +* PARTITION BY : 可选,用于对输入表进行分组,每个分组能独立进行模式匹配。如果未声明该子句,则整个输入表将作为一个整体进行处理。 +* ORDER BY :可选,用于确保输入数据按某种顺序进行匹配处理。 +* MEASURES :可选,用于指定从匹配到的一段数据中提取哪些信息。 +* ROWS PER MATCH :可选,用于指定模式匹配成功后结果集的输出方式。 +* AFTER MATCH SKIP :可选,用于指定在识别到一个非空匹配后,下一次模式匹配应从哪一行继续进行。 +* PATTERN :用于定义需要匹配的行模式。 +* SUBSET :可选,用于将多个基本模式变量所匹配的行合并为一个逻辑集合。 +* DEFINE :用于定义行模式的基本模式变量。 + +### 2.2 DEFINE 子句 + +用于为模式识别中的每个基本模式变量指定其判断条件。这些变量通常由标识符(如 `A`, `B`)代表,并通过该子句中的布尔表达式精确定义哪些行符合该变量的要求。 + +* 在模式匹配执行过程中,仅当布尔表达式返回 TRUE 时,才会将当前行标记为该变量,从而将其纳入到当前匹配分组中。 + +```SQL +-- 只有在当前行的 totalprice 值小于前一行 totalprice 值的情况下,当前行才可以被识别为 B。 +DEFINE B AS totalprice < PREV(totalprice) +``` + +* **未**在子句中**显式**定义的变量,其匹配条件隐含为恒真(TRUE),即可在任何输入行上成功匹配。 + +### 2.3 SUBSET 子句 + +用于将多个基本模式变量(如 `A`、`B`)匹配到的行合并成一个联合模式变量(如 `U`),使这些行可以被视为同一个逻辑集合进行操作。可用于`MEASURES`、`DEFINE `和`AFTER MATCH SKIP`子句。 + +```SQL +SUBSET U = (A, B) +``` + +例如,对于模式 `PATTERN ((A | B){5} C+)` ,在匹配过程中无法确定第五次重复时具体匹配的是基本模式变量 A 还是 B,因此 + +1. 在 `MEASURES `子句中,若需要引用该阶段最后一次匹配到的行,则可通过定义联合模式变量 `SUBSET U = (A, B)`实现。此时表达式 `RPR_LAST(U.totalprice)` 将直接返回该目标行的 `totalprice` 值。 +2. 在 `AFTER MATCH SKIP` 子句中,若匹配结果中未包含基本模式变量 A 或 B 时,执行 `AFTER MATCH SKIP TO LAST B` 或 `AFTER MATCH SKIP TO LAST A` 会因锚点缺失跳转失败;而通过引入联合模式变量 `SUBSET U = (A, B)`,使用 `AFTER MATCH SKIP TO LAST U` 则始终有效。 + +### 2.4 PATTERN 子句 + +用于定义需要匹配的行模式,其基本构成单元是**基本模式变量。** + +```SQL +PATTERN ( row_pattern ) +``` + +#### 2.4.1 模式种类 + +| 行模式 | 语法格式 | 描述 | +| ----------------------------------- |---------------------| ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | +| 模式连接(Pattern Concatenation) | `A B+ C+ D+` | 由不带任何运算符的子模式组成,按声明顺序依次匹配所有子模式。| +| 模式选择(Pattern Alternation) | `A \| B \| C` | 由以`\|`分隔的多个子模式组成,仅匹配其中一个。当多个子模式均可匹配时,选择最左侧的子模式匹配。 | +| 模式排列(Pattern Permutation) | `PERMUTE(A, B, C)` | 该模式等价于对所有子模式元素的不同顺序进行选择匹配,即要求 A、B、C 三者均须匹配,但其出现顺序不固定。当多种匹配顺序均可成功时,依据 PERMUTE 列表中元素的定义先后顺序,按**字典序原则**确定优先级。例如,A B C 为最高优先,C B A 则为最低优先。 | +| 模式分组(Pattern Grouping) | `(A B C)` | 用圆括号将子模式括起,视作一个整体对待,可与其他运算符配合使用。如`(A B C)+`表示连续出现一组`(A B C)`的模式。 | +| 空模式(Empty Pattern) | `()` | 表示一个不包含任何行的空匹配 | +| 模式排除(Pattern Exclusion) | `{- row_pattern -}` | 用于指定在输出中需要排除的匹配部分。通常与`ALL ROWS PER MATCH`选项结合使用,用于输出感兴趣的行。如`PATTERN (A {- B+ C+ -} D+)`,并使用`ALL ROWS PER MATCH`时,输出将仅包含匹配的首行(`A`对应行)与尾部行(`D+`对应行)。 | + +#### 2.4.2 分区起始/结束锚点(Partition Start/End Anchor) + +* `^A` 表示匹配以 A 为分区开始的模式 + * 当 PATTERN 子句的取值为 `^A` 时,要求匹配必须从分区的首行开始,且这一行要满足 `A` 的定义 + * 当 PATTERN 子句的取值为 `^A^` 或 `A^` 时,输出结果为空 +* `A$` 表示匹配以 A 为分区结束的模式 + * 当 PATTERN 子句的取值为 `A$` 时,要求必须在分区的结束位置匹配,并且这一行要满足 `A`的定义 + * 当 PATTERN 子句的取值为 `$A` 或 `$A$` 时,输出结果为空 + +示例介绍可见 [3.3.1 小节](./Row-Pattern-Recognition.md#_3-1-Patter 子句分区锚点) + +#### 2.4.3 量词(Quantifiers) + +量词用于指定子模式重复出现的次数,置于相应子模式之后,如 `(A | B)*`。 + +常用量词如下: + +| 量词 | 描述 | +| -------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| `*` | 零次或多次重复 | +| `+` | 一次或多次重复 | +| `?` | 零次或一次重复 | +| `{n}` | 恰好重复 n 次 | +| `{m, n}` | 重复次数在 m 到 n 之间(m、n 为非负整数)。* 若省略左界,则默认从 0 开始;* 若省略右界,则重复次数不设上限(如 {5,} 等同于“至少重复五次”);* 若同时省略左右界,即 {,},则与 \* 等价。 | + +* 可通过在量词后加 `?` 改变匹配偏好。 + * `{3,5}`:偏好 5 次,最不偏好 3 次;`{3,5}?`:偏好 3 次,最不偏好 5 次 + * `?`:偏好 1 次;`??`:偏好 0 次 + +### 2.5 AFTER MATCH SKIP 子句 + +用于指定在识别到一个非空匹配后,下一次模式匹配应从哪一行继续进行。 + +| 跳转策略 | 描述 | 是否允许识别重叠匹配项 | +| ------------------------------------------------------------- | --------------------------------------------------- | ------------------------ | +| `AFTER MATCH SKIP PAST LAST ROW` | 默认行为。在当前匹配的最后一行之后的下一行开始。 | 否 | +| `AFTER MATCH SKIP TO NEXT ROW` | 在当前匹配中的第二行开始。 | 是 | +| `AFTER MATCH SKIP TO [ FIRST \| LAST ] pattern_variable` | 跳转到某个模式变量的 [ 第一行 | 最后一行 ] 开始。 | 是 | + +* 在所有可能的配置中,仅当 `ALL ROWS PER MATCH WITH UNMATCHED ROWS` 与 `AFTER MATCH SKIP PAST LAST ROW` 联合使用时,系统才能确保对每个输入行恰好生成一条输出记录。 + +示例介绍可见 [3.3.2 小节](./Row-Pattern-Recognition.md#_3-2-AFTER MATCH SKIP 子句) + +### 2.6 ROWS PER MATCH 子句 + +用于指定模式匹配成功后结果集的输出方式,主要包括以下两种选项: + +| 输出方式 | 规则描述 | 输出结果 | **空匹配/未匹配行**处理逻辑 | +| -------------------- | ----------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | +| ONE ROW PER MATCH | 每一次成功匹配,产生一行输出结果。 | * PARTITION BY 子句中的列* MEASURES 子句中定义的表达式。 | 输出空匹配;跳过未匹配行。 | +| ALL ROWS PER MATCH | 每一次匹配中的每一行都将产生一条输出记录,除非该行通过 exclusion 语法排除。 | * PARTITION BY 子句中的列* ORDER BY 子句中的列* MEASURES 子句中定义的表达式* 输入表中的其余列 | * 默认:输出空匹配;跳过未匹配行。* ALL ROWS PER MATCH​**SHOW EMPTY MATCHES**​:默认输出空匹配,跳过未匹配行* ALL ROWS PER MATCH​**OMIT EMPTY MATCHES**​:不输出空匹配,跳过未匹配行* ALL ROWS PER MATCH​**WITH UNMATCHED ROWS**​:输出空匹配,并为每一条未匹配行额外生成一条输出记录| + +### 2.7 MEASURES 子句 + +用于指定从匹配到的一段数据中提取哪些信息。该子句为可选项,如果未显式指定,则根据 ROWS PER MATCH 子句的设置,部分输入列会成为模式识别的输出结果。 + +```SQL +MEASURES measure_expression AS measure_name [, ...] +``` + +* `measure_expression` 是根据匹配的一段数据计算出的标量值。 + +| 用法示例 | 说明 | +| ---------------------------------------------- | -------------------------------------------------------------------------------------------------------------- | +| `A.totalprice AS starting_price` | 返回匹配分组中第一行(即与变量 A 关联的唯一一行)中的价格,作为起始价格。 | +| `RPR_LAST(B.totalprice) AS bottom_price` | 返回与变量 B 关联的最后一行中的价格,代表“V”形模式中最低点的价格,对应下降区段的末尾。 | +| `RPR_LAST(U.totalprice) AS top_price` | 返回匹配分组中的最高价格,对应变量 C 或 D 所关联的最后一行,即整个匹配分组的末尾。【假设 SUBSET U = (C, D)】 | + +* 每个 `measure_expression `都会定义一个输出列,该列可通过其指定的 `measure_name `进行引用。 + +### 2.8 行模式识别表达式 + +在 MEASURES 与 DEFINE 子句中使用的表达式为​**标量表达式**​,用于在输入表的行级上下文中求值。**标量表达式**除了支持标准 SQL 语法外,还支持针对行模式识别的特殊扩展函数。 + +#### 2.8.1 模式变量引用 + +```SQL +A.totalprice +U.orderdate +orderstatus +``` + +* 当列名前缀为某**基本模式变量**或**联合模式变量**时,表示引用该变量所匹配的所有行的对应列值。 +* 若列名不带前缀,则等同于使用“​**全局联合模式变量**​”(即所有基本模式变量的并集)作前缀,表示引用当前匹配中所有行的该列值。 + +> 不允许在模式识别表达式中使用表名作列名前缀。 + +#### 2.8.2 扩展函数 + +| 函数名 | 函数式 | 描述 | +|------------------| ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| `MATCH_NUMBER`函数 | `MATCH_NUMBER()` | 返回当前匹配在分区内的序号,从 1 开始计数。空匹配与非空匹配一致,也占用匹配序号。 | +| `CLASSIFIER `函数 | `CLASSIFIER(option)`| 1. 返回当前行所映射的基本模式变量名称。1. `option`是一个可选参数:可以传入基本模式变量`CLASSIFIER(A)`或联合模式变量`CLASSIFIER(U)`,用于限定函数作用范围,对于不在范围内的行,直接返回 NULL。在对联合模式变量使用时,可用于辨别该行究竟映射至并集中哪一个基本模式变量。 | +| 逻辑导航函数 | `RPR_FIRST(expr, k)` | 1. 表示从**当前匹配分组**中,定位至第一个满足 expr 的行,在此基础上再向分组尾部方向搜索到第 k 次出现的同一模式变量对应行,返回该行的指定列值。如果在指定方向上未能找到第 k 次匹配行,则函数返回 NULL。1. 其中 k 是可选参数,默认为 0,表示仅定位至首个满足条件的行;若显式指定,必须为非负整数。 | +| 逻辑导航函数 | `RPR_LAST(expr, k)`| 1. 表示从**当前匹配分组**中,定位至最后一个满足 expr 的行,在此基础上再向分组开头方向搜索到第 k 次出现的同一模式变量对应行,返回该行的指定列值。如果在指定方向上未能找到第 k 次匹配行,则函数返回 NULL。1. 其中 k 是可选参数,默认为 0,表示仅定位至末个满足条件的行;若显式指定,必须为非负整数。 | +| 物理导航函数 | `PREV(expr, k)` | 1. 表示从最后一次匹配至给定模式变量的行开始,向开头方向偏移 k 行,返回对应列值。若导航超出​**分区边界**​,则函数返回 NULL。1. 其中 k 是可选参数,默认为 1;若显式指定,必须为非负整数。 | +| 物理导航函数 |`NEXT(expr, k)` | 1. 表示从最后一次匹配至给定模式变量的行开始,向尾部方向偏移 k 行,返回对应列值。若导航超出​**分区边界**​,则函数返回 NULL。1. 其中 k 是可选参数,默认为 1;若显式指定,必须为非负整数。 | +| 聚合函数 | COUNT、SUM、AVG、MAX、MIN 函数 | 可用于对当前匹配中的数据进行计算。聚合函数与导航函数不允许互相嵌套。(V 2.0.6 版本起支持) | +| 嵌套函数 | `PREV/NEXT(CLASSIFIER())` | 物理导航函数与 CLASSIFIER 函数嵌套。用于获取当前行的前一个和后一个匹配行所对应的模式变量 | +| 嵌套函数 |`PREV/NEXT(RPR_FIRST/RPR_LAST(expr, k)`) | 物理函数内部**允许嵌套**逻辑函数,逻辑函数内部**不允许嵌套**物理函数。用于先进行逻辑偏移,再进行物理偏移。 | + +示例介绍可见 [3.3.3 小节](./Row-Pattern-Recognition.md#_3-3-行模式表达式-扩展函数) + +#### 2.8.3 RUNNING 和 FINAL 语义 +1. 定义 + +* `RUNNING`: 表示计算范围为当前匹配分组内,从分组的起始行到当前正在处理的行(即到当前行为止)。 +* `FINAL`: 表示计算范围为当前匹配分组内,从分组的起始行到分组的最终行(即整个匹配分组)。 + +2. 作用范围 + +* DEFINE 子句默认采用 RUNNING 语义。 +* MEASURES 子句默认采用 RUNNING 语义,支持指定 FINAL 语义。当采用 ONE ROW PER MATCH 输出模式时,所有表达式都从匹配分组的末行位置进行计算,此时 RUNNING 语义与 FINAL 语义等价。 + +3. 语法约束 + +* RUNNING 和 FINAL 需要写在**逻辑导航函数**或聚合函数之前,不能直接作用于**列引用。** + * 合法:`RUNNING RPP_LAST(A.totalprice)`、`FINAL RPP_LAST(A.totalprice)` + * 非法:`RUNNING A.totalprice`、`FINAL A.totalprice`、 `RUNNING PREV(A.totalprice)` + +## 3. 语法示例 + +原始数据 + +```SQL +IoTDB:database3> select * from t ++-----------------------------+------+----------+ +| time|device|totalprice| ++-----------------------------+------+----------+ +|2025-01-01T00:01:00.000+08:00| d1| 90| +|2025-01-01T00:02:00.000+08:00| d1| 80| +|2025-01-01T00:03:00.000+08:00| d1| 70| +|2025-01-01T00:04:00.000+08:00| d1| 80| +|2025-01-01T00:05:00.000+08:00| d1| 70| +|2025-01-01T00:06:00.000+08:00| d1| 80| ++-----------------------------+------+----------+ + +-- 创建语句 +create table t(device tag, totalprice int32 field) + +insert into t(time,device,totalprice) values(2025-01-01T00:01:00, 'd1', 90),(2025-01-01T00:02:00, 'd1', 80),(2025-01-01T00:03:00, 'd1', 70),(2025-01-01T00:04:00, 'd1', 80),(2025-01-01T00:05:00, 'd1', 70),(2025-01-01T00:06:00, 'd1', 80) +``` + +### 3.1 Patter 子句分区锚点 + +* 查询 sql + +```SQL +SELECT m.time, m.match, m.price, m.label +FROM t +MATCH_RECOGNIZE ( + ORDER BY time + MEASURES + MATCH_NUMBER() AS match, + RUNNING RPR_LAST(totalprice) AS price, + CLASSIFIER() AS label + ALL ROWS PER MATCH + AFTER MATCH SKIP PAST LAST ROW + PATTERN %s -- PATTERN 子句 + DEFINE A AS true +) AS m; +``` + +* 查询结果 + * 当 PATTERN 子句为 PATTERN (^A) 时 + + ```SQL + +-----------------------------+-----+-----+-----+ + | time|match|price|label| + +-----------------------------+-----+-----+-----+ + |2025-01-01T00:01:00.000+08:00| 1| 90| A| + +-----------------------------+-----+-----+-----+ + Total line number = 1 + ``` + + * 当 PATTERN 子句为 PATTERN (^A^) 时 + + ```SQL + +----+-----+-----+-----+ + |time|match|price|label| + +----+-----+-----+-----+ + +----+-----+-----+-----+ + Empty set. + ``` + + * 当 PATTERN 子句为 PATTERN (A\$) 时 + + ```SQL + +-----------------------------+-----+-----+-----+ + | time|match|price|label| + +-----------------------------+-----+-----+-----+ + |2025-01-01T00:06:00.000+08:00| 1| 80| A| + +-----------------------------+-----+-----+-----+ + Total line number = 1 + ``` + + * 当 PATTERN 子句为 PATTERN (\$A\$) 时 + + ```SQL + +----+-----+-----+-----+ + |time|match|price|label| + +----+-----+-----+-----+ + +----+-----+-----+-----+ + Empty set. + ``` + +### 3.2 AFTER MATCH SKIP 子句 + +* 查询 sql + +```SQL +SELECT m.time, m.match, m.price, m.label +FROM t +MATCH_RECOGNIZE ( + ORDER BY time + MEASURES + MATCH_NUMBER() AS match, + RUNNING RPR_LAST(totalprice) AS price, + CLASSIFIER() AS label + ALL ROWS PER MATCH + %s -- AFTER MATCH SKIP 子句 + PATTERN (A B+ C+ D?) + SUBSET U = (C, D) + DEFINE + B AS B.totalprice < PREV (B.totalprice), + C AS C.totalprice > PREV (C.totalprice), + D AS false -- 永远不会匹配成功 +) AS m; +``` + +* 查询结果 + * 当 AFTER MATCH SKIP PAST LAST ROW 时 + * 第一次匹配:第 1、2、3、4 行 + * 第二次匹配:根据 `AFTER MATCH SKIP PAST LAST ROW` 语义,从第 5 行开始,无法再找寻到一个合法匹配 + * 此模式一定不会出现重叠匹配 + + ```SQL + +-----------------------------+-----+-----+-----+ + | time|match|price|label| + +-----------------------------+-----+-----+-----+ + |2025-01-01T00:01:00.000+08:00| 1| 90| A| + |2025-01-01T00:02:00.000+08:00| 1| 80| B| + |2025-01-01T00:03:00.000+08:00| 1| 70| B| + |2025-01-01T00:04:00.000+08:00| 1| 80| C| + +-----------------------------+-----+-----+-----+ + Total line number = 4 + ``` + + * 当 AFTER MATCH SKIP TO NEXT ROW 时 + * 第一次匹配:第 1、2、3、4 行 + * 第二次匹配:根据 `AFTER MATCH SKIP TO NEXT ROW` 语义,从第 2 行开始,匹配:第 2、3、4 行 + * 第三次匹配:尝试从第 3 行开始,失败 + * 第三次匹配:尝试从第 4 行开始,成功,匹配第 4、5、6行 + * 此模式允许出现重叠匹配 + + ```SQL + +-----------------------------+-----+-----+-----+ + | time|match|price|label| + +-----------------------------+-----+-----+-----+ + |2025-01-01T00:01:00.000+08:00| 1| 90| A| + |2025-01-01T00:02:00.000+08:00| 1| 80| B| + |2025-01-01T00:03:00.000+08:00| 1| 70| B| + |2025-01-01T00:04:00.000+08:00| 1| 80| C| + |2025-01-01T00:02:00.000+08:00| 2| 80| A| + |2025-01-01T00:03:00.000+08:00| 2| 70| B| + |2025-01-01T00:04:00.000+08:00| 2| 80| C| + |2025-01-01T00:04:00.000+08:00| 3| 80| A| + |2025-01-01T00:05:00.000+08:00| 3| 70| B| + |2025-01-01T00:06:00.000+08:00| 3| 80| C| + +-----------------------------+-----+-----+-----+ + Total line number = 10 + ``` + + * 当 AFTER MATCH SKIP TO FIRST C 时 + * 第一次匹配:第 1、2、3、4 行 + * 第二次匹配:从第一个 C (也就是第 4 行)处开始,匹配第4、5、6行 + * 此模式允许出现重叠匹配 + + ```SQL + +-----------------------------+-----+-----+-----+ + | time|match|price|label| + +-----------------------------+-----+-----+-----+ + |2025-01-01T00:01:00.000+08:00| 1| 90| A| + |2025-01-01T00:02:00.000+08:00| 1| 80| B| + |2025-01-01T00:03:00.000+08:00| 1| 70| B| + |2025-01-01T00:04:00.000+08:00| 1| 80| C| + |2025-01-01T00:04:00.000+08:00| 2| 80| A| + |2025-01-01T00:05:00.000+08:00| 2| 70| B| + |2025-01-01T00:06:00.000+08:00| 2| 80| C| + +-----------------------------+-----+-----+-----+ + Total line number = 7 + ``` + + * 当 AFTER MATCH SKIP TO LAST B 或 AFTER MATCH SKIP TO B 时 + * 第一次匹配:第 1、2、3、4 行 + * 第二次匹配:尝试从最后一个 B (也就是第 3 行)处开始,失败 + * 第二次匹配:尝试从第 4 行开始,成功匹配第4、5、6行 + * 此模式允许出现重叠匹配 + + ```SQL + +-----------------------------+-----+-----+-----+ + | time|match|price|label| + +-----------------------------+-----+-----+-----+ + |2025-01-01T00:01:00.000+08:00| 1| 90| A| + |2025-01-01T00:02:00.000+08:00| 1| 80| B| + |2025-01-01T00:03:00.000+08:00| 1| 70| B| + |2025-01-01T00:04:00.000+08:00| 1| 80| C| + |2025-01-01T00:04:00.000+08:00| 2| 80| A| + |2025-01-01T00:05:00.000+08:00| 2| 70| B| + |2025-01-01T00:06:00.000+08:00| 2| 80| C| + +-----------------------------+-----+-----+-----+ + Total line number = 7 + ``` + + * 当 AFTER MATCH SKIP TO U 时 + * 第一次匹配:第 1、2、3、4 行 + * 第二次匹配:`SKIP TO U` 表示跳转到最后一个 C 或 D,D 永远不可能匹配成功,所以就是跳转到最后一个 C(也就是第 4 行),成功匹配第4、5、6行 + * 此模式允许出现重叠匹配 + + ```SQL + +-----------------------------+-----+-----+-----+ + | time|match|price|label| + +-----------------------------+-----+-----+-----+ + |2025-01-01T00:01:00.000+08:00| 1| 90| A| + |2025-01-01T00:02:00.000+08:00| 1| 80| B| + |2025-01-01T00:03:00.000+08:00| 1| 70| B| + |2025-01-01T00:04:00.000+08:00| 1| 80| C| + |2025-01-01T00:04:00.000+08:00| 2| 80| A| + |2025-01-01T00:05:00.000+08:00| 2| 70| B| + |2025-01-01T00:06:00.000+08:00| 2| 80| C| + +-----------------------------+-----+-----+-----+ + Total line number = 7 + ``` + + * 当 AFTER MATCH SKIP TO A 时,不能跳转到匹配的第一行, 否则会造成死循环 + + ```SQL + Msg: org.apache.iotdb.jdbc.IoTDBSQLException: 701: AFTER MATCH SKIP TO failed: cannot skip to first row of match + ``` + + * 当 AFTER MATCH SKIP TO B 时,不能跳转到匹配分组中不存在的模式变量 + + ```SQL + Msg: org.apache.iotdb.jdbc.IoTDBSQLException: 701: AFTER MATCH SKIP TO failed: pattern variable is not present in match + ``` + +### 3.3 行模式表达式-扩展函数 +#### 3.3.1 CLASSIFIER()函数 + +* 查询 sql + +```SQL +SELECT m.time, m.match, m.price, m.lower_or_higher, m.label +FROM t +MATCH_RECOGNIZE ( + ORDER BY time + MEASURES + MATCH_NUMBER() AS match, + RUNNING RPR_LAST(totalprice) AS price, + CLASSIFIER(U) AS lower_or_higher, + CLASSIFIER(W) AS label + ALL ROWS PER MATCH + PATTERN ((L | H) A) + SUBSET + U = (L, H), + W = (A, L, H) + DEFINE + A AS A.totalprice = 80, + L AS L.totalprice < 80, + H AS H.totalprice > 80 +) AS m; +``` + +* 查询结果 + +```SQL ++-----------------------------+-----+-----+---------------+-----+ +| time|match|price|lower_or_higher|label| ++-----------------------------+-----+-----+---------------+-----+ +|2025-01-01T00:01:00.000+08:00| 1| 90| H| H| +|2025-01-01T00:02:00.000+08:00| 1| 80| H| A| +|2025-01-01T00:03:00.000+08:00| 2| 70| L| L| +|2025-01-01T00:04:00.000+08:00| 2| 80| L| A| +|2025-01-01T00:05:00.000+08:00| 3| 70| L| L| +|2025-01-01T00:06:00.000+08:00| 3| 80| L| A| ++-----------------------------+-----+-----+---------------+-----+ +Total line number = 6 +``` + +#### 3.3.2 逻辑导航函数 + +* 查询 sql + +```SQL +SELECT m.time, m.measure +FROM t +MATCH_RECOGNIZE ( + ORDER BY time + MEASURES + %s AS measure -- MEASURES 子句 + ALL ROWS PER MATCH + PATTERN (A+) + DEFINE A AS true +) AS m; +``` + +* 查询结果 + * 当取值为 totalprice、RPR\_LAST(totalprice)、RUNNING RPR\_LAST(totalprice) 时 + + ```SQL + +-----------------------------+-------+ + | time|measure| + +-----------------------------+-------+ + |2025-01-01T00:01:00.000+08:00| 90| + |2025-01-01T00:02:00.000+08:00| 80| + |2025-01-01T00:03:00.000+08:00| 70| + |2025-01-01T00:04:00.000+08:00| 80| + |2025-01-01T00:05:00.000+08:00| 70| + |2025-01-01T00:06:00.000+08:00| 80| + +-----------------------------+-------+ + Total line number = 6 + ``` + + * 当取值为 FINAL RPR\_LAST(totalprice) 时 + + ```SQL + +-----------------------------+-------+ + | time|measure| + +-----------------------------+-------+ + |2025-01-01T00:01:00.000+08:00| 80| + |2025-01-01T00:02:00.000+08:00| 80| + |2025-01-01T00:03:00.000+08:00| 80| + |2025-01-01T00:04:00.000+08:00| 80| + |2025-01-01T00:05:00.000+08:00| 80| + |2025-01-01T00:06:00.000+08:00| 80| + +-----------------------------+-------+ + Total line number = 6 + ``` + + * 当取值为 RPR\_FIRST(totalprice)、 RUNNING RPR\_FIRST(totalprice)、FINAL RPR\_FIRST(totalprice)时 + + ```SQL + +-----------------------------+-------+ + | time|measure| + +-----------------------------+-------+ + |2025-01-01T00:01:00.000+08:00| 90| + |2025-01-01T00:02:00.000+08:00| 90| + |2025-01-01T00:03:00.000+08:00| 90| + |2025-01-01T00:04:00.000+08:00| 90| + |2025-01-01T00:05:00.000+08:00| 90| + |2025-01-01T00:06:00.000+08:00| 90| + +-----------------------------+-------+ + Total line number = 6 + ``` + + * 当取值为 RPR\_LAST(totalprice, 2) 时 + + ```SQL + +-----------------------------+-------+ + | time|measure| + +-----------------------------+-------+ + |2025-01-01T00:01:00.000+08:00| null| + |2025-01-01T00:02:00.000+08:00| null| + |2025-01-01T00:03:00.000+08:00| 90| + |2025-01-01T00:04:00.000+08:00| 80| + |2025-01-01T00:05:00.000+08:00| 70| + |2025-01-01T00:06:00.000+08:00| 80| + +-----------------------------+-------+ + Total line number = 6 + ``` + + * 当取值为 FINAL RPP\_LAST(totalprice, 2) 时 + + ```SQL + +-----------------------------+-------+ + | time|measure| + +-----------------------------+-------+ + |2025-01-01T00:01:00.000+08:00| 80| + |2025-01-01T00:02:00.000+08:00| 80| + |2025-01-01T00:03:00.000+08:00| 80| + |2025-01-01T00:04:00.000+08:00| 80| + |2025-01-01T00:05:00.000+08:00| 80| + |2025-01-01T00:06:00.000+08:00| 80| + +-----------------------------+-------+ + Total line number = 6 + ``` + + * 当取值为 RPR\_FIRST(totalprice, 2) 和 FINAL RPR\_FIRST(totalprice, 2) 时 + + ```SQL + +-----------------------------+-------+ + | time|measure| + +-----------------------------+-------+ + |2025-01-01T00:01:00.000+08:00| 70| + |2025-01-01T00:02:00.000+08:00| 70| + |2025-01-01T00:03:00.000+08:00| 70| + |2025-01-01T00:04:00.000+08:00| 70| + |2025-01-01T00:05:00.000+08:00| 70| + |2025-01-01T00:06:00.000+08:00| 70| + +-----------------------------+-------+ + Total line number = 6 + ``` + +#### 3.3.3 物理导航函数 + +* 查询 sql + +```SQL +SELECT m.time, m.measure +FROM t +MATCH_RECOGNIZE ( + ORDER BY time + MEASURES + %s AS measure -- MEASURES 子句 + ALL ROWS PER MATCH + PATTERN (B) + DEFINE B AS B.totalprice >= PREV(B.totalprice) +) AS m; +``` + +* 查询结果 + * 当取值为 `PREV(totalprice)` 时 + + ```SQL + +-----------------------------+-------+ + | time|measure| + +-----------------------------+-------+ + |2025-01-01T00:04:00.000+08:00| 70| + |2025-01-01T00:06:00.000+08:00| 70| + +-----------------------------+-------+ + Total line number = 2 + ``` + + * 当取值为 `PREV(B.totalprice, 2)` 时 + + ```SQL + +-----------------------------+-------+ + | time|measure| + +-----------------------------+-------+ + |2025-01-01T00:04:00.000+08:00| 80| + |2025-01-01T00:06:00.000+08:00| 80| + +-----------------------------+-------+ + Total line number = 2 + ``` + + * 当取值为 `PREV(B.totalprice, 4)` 时 + + ```SQL + +-----------------------------+-------+ + | time|measure| + +-----------------------------+-------+ + |2025-01-01T00:04:00.000+08:00| null| + |2025-01-01T00:06:00.000+08:00| 80| + +-----------------------------+-------+ + Total line number = 2 + ``` + + * 当取值为 `NEXT(totalprice)` 或 `NEXT(B.totalprice, 1)` 时 + + ```SQL + +-----------------------------+-------+ + | time|measure| + +-----------------------------+-------+ + |2025-01-01T00:04:00.000+08:00| 70| + |2025-01-01T00:06:00.000+08:00| null| + +-----------------------------+-------+ + Total line number = 2 + ``` + + * `当取值为 NEXT(B.totalprice, 2)` 时 + + ```SQL + +-----------------------------+-------+ + | time|measure| + +-----------------------------+-------+ + |2025-01-01T00:04:00.000+08:00| 80| + |2025-01-01T00:06:00.000+08:00| null| + +-----------------------------+-------+ + Total line number = 2 + ``` + +#### 3.3.4 聚合函数 + +* 查询 sql + +```SQL +SELECT m.time, m.count, m.avg, m.sum, m.min, m.max +FROM t +MATCH_RECOGNIZE ( + ORDER BY time + MEASURES + COUNT(*) AS count, + AVG(totalprice) AS avg, + SUM(totalprice) AS sum, + MIN(totalprice) AS min, + MAX(totalprice) AS max + ALL ROWS PER MATCH + PATTERN (A+) + DEFINE A AS true +) AS m; +``` + +* 查询结果 + +```SQL ++-----------------------------+-----+-----------------+-----+---+---+ +| time|count| avg| sum|min|max| ++-----------------------------+-----+-----------------+-----+---+---+ +|2025-01-01T00:01:00.000+08:00| 1| 90.0| 90.0| 90| 90| +|2025-01-01T00:02:00.000+08:00| 2| 85.0|170.0| 80| 90| +|2025-01-01T00:03:00.000+08:00| 3| 80.0|240.0| 70| 90| +|2025-01-01T00:04:00.000+08:00| 4| 80.0|320.0| 70| 90| +|2025-01-01T00:05:00.000+08:00| 5| 78.0|390.0| 70| 90| +|2025-01-01T00:06:00.000+08:00| 6|78.33333333333333|470.0| 70| 90| ++-----------------------------+-----+-----------------+-----+---+---+ +Total line number = 6 +``` + +#### 3.3.5 嵌套函数 +1. 示例一 + +* 查询 sql + +```SQL +SELECT m.time, m.match, m.price, m.lower_or_higher, m.label, m.prev_label, m.next_label +FROM t +MATCH_RECOGNIZE ( + ORDER BY time + MEASURES + MATCH_NUMBER() AS match, + RUNNING RPR_LAST(totalprice) AS price, + CLASSIFIER(U) AS lower_or_higher, + CLASSIFIER(W) AS label, + PREV(CLASSIFIER(W)) AS prev_label, + NEXT(CLASSIFIER(W)) AS next_label + ALL ROWS PER MATCH + PATTERN ((L | H) A) + SUBSET + U = (L, H), + W = (A, L, H) + DEFINE + A AS A.totalprice = 80, + L AS L.totalprice < 80, + H AS H.totalprice > 80 +) AS m; +``` + +* 查询结果 + +```SQL ++-----------------------------+-----+-----+---------------+-----+----------+----------+ +| time|match|price|lower_or_higher|label|prev_label|next_label| ++-----------------------------+-----+-----+---------------+-----+----------+----------+ +|2025-01-01T00:01:00.000+08:00| 1| 90| H| H| null| A| +|2025-01-01T00:02:00.000+08:00| 1| 80| H| A| H| null| +|2025-01-01T00:03:00.000+08:00| 2| 70| L| L| null| A| +|2025-01-01T00:04:00.000+08:00| 2| 80| L| A| L| null| +|2025-01-01T00:05:00.000+08:00| 3| 70| L| L| null| A| +|2025-01-01T00:06:00.000+08:00| 3| 80| L| A| L| null| ++-----------------------------+-----+-----+---------------+-----+----------+----------+ +Total line number = 6 +``` + +2. 示例二 + +* 查询 sql + +```SQL +SELECT m.time, m.prev_last_price, m.next_first_price +FROM t +MATCH_RECOGNIZE ( + ORDER BY time + MEASURES + PREV(RPR_LAST(totalprice), 2) AS prev_last_price, + NEXT(RPR_FIRST(totalprice), 2) as next_first_price + ALL ROWS PER MATCH + PATTERN (A+) + DEFINE A AS true +) AS m; +``` + +* 查询结果 + +```SQL ++-----------------------------+---------------+----------------+ +| time|prev_last_price|next_first_price| ++-----------------------------+---------------+----------------+ +|2025-01-01T00:01:00.000+08:00| null| 70| +|2025-01-01T00:02:00.000+08:00| null| 70| +|2025-01-01T00:03:00.000+08:00| 90| 70| +|2025-01-01T00:04:00.000+08:00| 80| 70| +|2025-01-01T00:05:00.000+08:00| 70| 70| +|2025-01-01T00:06:00.000+08:00| 80| 70| ++-----------------------------+---------------+----------------+ +Total line number = 6 +``` + +## 4. 场景示例 + +以[示例数据](../Reference/Sample-Data.md)为源数据 + +### 4.1 时间分段查询 + +将 table1 中的数据按照时间间隔小于等于 24 小时分段,查询每段中的数据总条数,以及开始、结束时间。 + +查询SQL + +```SQL +SELECT start_time, end_time, cnt +FROM table1 +MATCH_RECOGNIZE ( + ORDER BY time + MEASURES + RPR_FIRST(A.time) AS start_time, + RPR_LAST(time) AS end_time, + COUNT() AS cnt + PATTERN (A B*) + DEFINE B AS (cast(B.time as INT64) - cast(PREV(B.time) as INT64)) <= 86400000 +) AS m +``` + +查询结果 + +```SQL ++-----------------------------+-----------------------------+---+ +| start_time| end_time|cnt| ++-----------------------------+-----------------------------+---+ +|2024-11-26T13:37:00.000+08:00|2024-11-26T13:38:00.000+08:00| 2| +|2024-11-27T16:38:00.000+08:00|2024-11-30T14:30:00.000+08:00| 16| ++-----------------------------+-----------------------------+---+ +Total line number = 2 +``` + +### 4.2 差值分段查询 + +将 table2 中的数据按照 humidity 湿度值差值小于 0.1 分段,查询每段中的数据总条数,以及开始、结束时间。 + +* 查询sql + +```SQL +SELECT start_time, end_time, cnt +FROM table2 +MATCH_RECOGNIZE ( + ORDER BY time + MEASURES + RPR_FIRST(A.time) AS start_time, + RPR_LAST(time) AS end_time, + COUNT() AS cnt + PATTERN (A B*) + DEFINE B AS (B.humidity - PREV(B.humidity )) <=0.1 +) AS m; +``` + +* 查询结果 + +```SQL ++-----------------------------+-----------------------------+---+ +| start_time| end_time|cnt| ++-----------------------------+-----------------------------+---+ +|2024-11-26T13:37:00.000+08:00|2024-11-27T00:00:00.000+08:00| 2| +|2024-11-28T08:00:00.000+08:00|2024-11-29T00:00:00.000+08:00| 2| +|2024-11-29T11:00:00.000+08:00|2024-11-30T00:00:00.000+08:00| 2| ++-----------------------------+-----------------------------+---+ +Total line number = 3 +``` + +### 4.3 事件统计查询 + +将 table1 中数据按照设备号分组,统计上海地区湿度大于 35 的开始、结束时间及最大湿度值。 + +* 查询sql + +```SQL +SELECT m.device_id, m.match, m.event_start, m.event_end, m.max_humidity +FROM table1 +MATCH_RECOGNIZE ( + PARTITION BY device_id + ORDER BY time + MEASURES + MATCH_NUMBER() AS match, + RPR_FIRST(A.time) AS event_start, + RPR_LAST(A.time) AS event_end, + MAX(A.humidity) AS max_humidity + ONE ROW PER MATCH + PATTERN (A+) + DEFINE + A AS A.region= '上海' AND A.humidity> 35 +) AS m +``` + +* 查询结果 + +```SQL ++---------+-----+-----------------------------+-----------------------------+------------+ +|device_id|match| event_start| event_end|max_humidity| ++---------+-----+-----------------------------+-----------------------------+------------+ +| 100| 1|2024-11-28T09:00:00.000+08:00|2024-11-29T18:30:00.000+08:00| 45.1| +| 101| 1|2024-11-30T09:30:00.000+08:00|2024-11-30T09:30:00.000+08:00| 35.2| ++---------+-----+-----------------------------+-----------------------------+------------+ +Total line number = 2**** +``` diff --git a/src/zh/UserGuide/latest-Table/SQL-Manual/Row-Pattern-Recognition.md b/src/zh/UserGuide/latest-Table/SQL-Manual/Row-Pattern-Recognition.md new file mode 100644 index 000000000..87c42704b --- /dev/null +++ b/src/zh/UserGuide/latest-Table/SQL-Manual/Row-Pattern-Recognition.md @@ -0,0 +1,907 @@ + + +# 行模式识别 + +## 1. 概述 + +IoTDB 支持行模式识别,该功能支持通过定义模式变量的识别逻辑以及正则表达式来捕获一段连续的数据,并对每一段捕获的数据进行分析计算,适用于识别时序数据中的特定模式、检测特定事件等业务场景。如果将行模式识别看作对数据进行分组处理,则核心流程大致如下: + +* 通过 PATTERN、DEFINE、SUBSET 子句进行分组捕获 +* 通过 MEASURES 子句对捕获的分组进行计算处理 +* 通过 ROWS PER MATCH 子句设定分组的输出形式 +* 通过 AFTER MATCH SKIP 子句设定如何定位下一个分组的开始位置 + +> 注意:该功能从 V 2.0.5 版本开始提供。 + +## 2. 功能介绍 +### 2.1 语法格式 + +```SQL +MATCH_RECOGNIZE ( + [ PARTITION BY column [, ...] ] + [ ORDER BY column [, ...] ] + [ MEASURES measure_definition [, ...] ] + [ ROWS PER MATCH ] + [ AFTER MATCH skip_to ] + PATTERN ( row_pattern ) + [ SUBSET subset_definition [, ...] ] + DEFINE variable_definition [, ...] +) +``` + +**说明:** + +* PARTITION BY : 可选,用于对输入表进行分组,每个分组能独立进行模式匹配。如果未声明该子句,则整个输入表将作为一个整体进行处理。 +* ORDER BY :可选,用于确保输入数据按某种顺序进行匹配处理。 +* MEASURES :可选,用于指定从匹配到的一段数据中提取哪些信息。 +* ROWS PER MATCH :可选,用于指定模式匹配成功后结果集的输出方式。 +* AFTER MATCH SKIP :可选,用于指定在识别到一个非空匹配后,下一次模式匹配应从哪一行继续进行。 +* PATTERN :用于定义需要匹配的行模式。 +* SUBSET :可选,用于将多个基本模式变量所匹配的行合并为一个逻辑集合。 +* DEFINE :用于定义行模式的基本模式变量。 + +### 2.2 DEFINE 子句 + +用于为模式识别中的每个基本模式变量指定其判断条件。这些变量通常由标识符(如 `A`, `B`)代表,并通过该子句中的布尔表达式精确定义哪些行符合该变量的要求。 + +* 在模式匹配执行过程中,仅当布尔表达式返回 TRUE 时,才会将当前行标记为该变量,从而将其纳入到当前匹配分组中。 + +```SQL +-- 只有在当前行的 totalprice 值小于前一行 totalprice 值的情况下,当前行才可以被识别为 B。 +DEFINE B AS totalprice < PREV(totalprice) +``` + +* **未**在子句中**显式**定义的变量,其匹配条件隐含为恒真(TRUE),即可在任何输入行上成功匹配。 + +### 2.3 SUBSET 子句 + +用于将多个基本模式变量(如 `A`、`B`)匹配到的行合并成一个联合模式变量(如 `U`),使这些行可以被视为同一个逻辑集合进行操作。可用于`MEASURES`、`DEFINE `和`AFTER MATCH SKIP`子句。 + +```SQL +SUBSET U = (A, B) +``` + +例如,对于模式 `PATTERN ((A | B){5} C+)` ,在匹配过程中无法确定第五次重复时具体匹配的是基本模式变量 A 还是 B,因此 + +1. 在 `MEASURES `子句中,若需要引用该阶段最后一次匹配到的行,则可通过定义联合模式变量 `SUBSET U = (A, B)`实现。此时表达式 `RPR_LAST(U.totalprice)` 将直接返回该目标行的 `totalprice` 值。 +2. 在 `AFTER MATCH SKIP` 子句中,若匹配结果中未包含基本模式变量 A 或 B 时,执行 `AFTER MATCH SKIP TO LAST B` 或 `AFTER MATCH SKIP TO LAST A` 会因锚点缺失跳转失败;而通过引入联合模式变量 `SUBSET U = (A, B)`,使用 `AFTER MATCH SKIP TO LAST U` 则始终有效。 + +### 2.4 PATTERN 子句 + +用于定义需要匹配的行模式,其基本构成单元是**基本模式变量。** + +```SQL +PATTERN ( row_pattern ) +``` + +#### 2.4.1 模式种类 + +| 行模式 | 语法格式 | 描述 | +| ----------------------------------- |---------------------| ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | +| 模式连接(Pattern Concatenation) | `A B+ C+ D+` | 由不带任何运算符的子模式组成,按声明顺序依次匹配所有子模式。| +| 模式选择(Pattern Alternation) | `A \| B \| C` | 由以`\|`分隔的多个子模式组成,仅匹配其中一个。当多个子模式均可匹配时,选择最左侧的子模式匹配。 | +| 模式排列(Pattern Permutation) | `PERMUTE(A, B, C)` | 该模式等价于对所有子模式元素的不同顺序进行选择匹配,即要求 A、B、C 三者均须匹配,但其出现顺序不固定。当多种匹配顺序均可成功时,依据 PERMUTE 列表中元素的定义先后顺序,按**字典序原则**确定优先级。例如,A B C 为最高优先,C B A 则为最低优先。 | +| 模式分组(Pattern Grouping) | `(A B C)` | 用圆括号将子模式括起,视作一个整体对待,可与其他运算符配合使用。如`(A B C)+`表示连续出现一组`(A B C)`的模式。 | +| 空模式(Empty Pattern) | `()` | 表示一个不包含任何行的空匹配 | +| 模式排除(Pattern Exclusion) | `{- row_pattern -}` | 用于指定在输出中需要排除的匹配部分。通常与`ALL ROWS PER MATCH`选项结合使用,用于输出感兴趣的行。如`PATTERN (A {- B+ C+ -} D+)`,并使用`ALL ROWS PER MATCH`时,输出将仅包含匹配的首行(`A`对应行)与尾部行(`D+`对应行)。 | + +#### 2.4.2 分区起始/结束锚点(Partition Start/End Anchor) + +* `^A` 表示匹配以 A 为分区开始的模式 + * 当 PATTERN 子句的取值为 `^A` 时,要求匹配必须从分区的首行开始,且这一行要满足 `A` 的定义 + * 当 PATTERN 子句的取值为 `^A^` 或 `A^` 时,输出结果为空 +* `A$` 表示匹配以 A 为分区结束的模式 + * 当 PATTERN 子句的取值为 `A$` 时,要求必须在分区的结束位置匹配,并且这一行要满足 `A`的定义 + * 当 PATTERN 子句的取值为 `$A` 或 `$A$` 时,输出结果为空 + +示例介绍可见 [3.3.1 小节](./Row-Pattern-Recognition.md#_3-1-Patter 子句分区锚点) + +#### 2.4.3 量词(Quantifiers) + +量词用于指定子模式重复出现的次数,置于相应子模式之后,如 `(A | B)*`。 + +常用量词如下: + +| 量词 | 描述 | +| -------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| `*` | 零次或多次重复 | +| `+` | 一次或多次重复 | +| `?` | 零次或一次重复 | +| `{n}` | 恰好重复 n 次 | +| `{m, n}` | 重复次数在 m 到 n 之间(m、n 为非负整数)。* 若省略左界,则默认从 0 开始;* 若省略右界,则重复次数不设上限(如 {5,} 等同于“至少重复五次”);* 若同时省略左右界,即 {,},则与 \* 等价。 | + +* 可通过在量词后加 `?` 改变匹配偏好。 + * `{3,5}`:偏好 5 次,最不偏好 3 次;`{3,5}?`:偏好 3 次,最不偏好 5 次 + * `?`:偏好 1 次;`??`:偏好 0 次 + +### 2.5 AFTER MATCH SKIP 子句 + +用于指定在识别到一个非空匹配后,下一次模式匹配应从哪一行继续进行。 + +| 跳转策略 | 描述 | 是否允许识别重叠匹配项 | +| ------------------------------------------------------------- | --------------------------------------------------- | ------------------------ | +| `AFTER MATCH SKIP PAST LAST ROW` | 默认行为。在当前匹配的最后一行之后的下一行开始。 | 否 | +| `AFTER MATCH SKIP TO NEXT ROW` | 在当前匹配中的第二行开始。 | 是 | +| `AFTER MATCH SKIP TO [ FIRST \| LAST ] pattern_variable` | 跳转到某个模式变量的 [ 第一行 | 最后一行 ] 开始。 | 是 | + +* 在所有可能的配置中,仅当 `ALL ROWS PER MATCH WITH UNMATCHED ROWS` 与 `AFTER MATCH SKIP PAST LAST ROW` 联合使用时,系统才能确保对每个输入行恰好生成一条输出记录。 + +示例介绍可见 [3.3.2 小节](./Row-Pattern-Recognition.md#_3-2-AFTER MATCH SKIP 子句) + +### 2.6 ROWS PER MATCH 子句 + +用于指定模式匹配成功后结果集的输出方式,主要包括以下两种选项: + +| 输出方式 | 规则描述 | 输出结果 | **空匹配/未匹配行**处理逻辑 | +| -------------------- | ----------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | +| ONE ROW PER MATCH | 每一次成功匹配,产生一行输出结果。 | * PARTITION BY 子句中的列* MEASURES 子句中定义的表达式。 | 输出空匹配;跳过未匹配行。 | +| ALL ROWS PER MATCH | 每一次匹配中的每一行都将产生一条输出记录,除非该行通过 exclusion 语法排除。 | * PARTITION BY 子句中的列* ORDER BY 子句中的列* MEASURES 子句中定义的表达式* 输入表中的其余列 | * 默认:输出空匹配;跳过未匹配行。* ALL ROWS PER MATCH​**SHOW EMPTY MATCHES**​:默认输出空匹配,跳过未匹配行* ALL ROWS PER MATCH​**OMIT EMPTY MATCHES**​:不输出空匹配,跳过未匹配行* ALL ROWS PER MATCH​**WITH UNMATCHED ROWS**​:输出空匹配,并为每一条未匹配行额外生成一条输出记录| + +### 2.7 MEASURES 子句 + +用于指定从匹配到的一段数据中提取哪些信息。该子句为可选项,如果未显式指定,则根据 ROWS PER MATCH 子句的设置,部分输入列会成为模式识别的输出结果。 + +```SQL +MEASURES measure_expression AS measure_name [, ...] +``` + +* `measure_expression` 是根据匹配的一段数据计算出的标量值。 + +| 用法示例 | 说明 | +| ---------------------------------------------- | -------------------------------------------------------------------------------------------------------------- | +| `A.totalprice AS starting_price` | 返回匹配分组中第一行(即与变量 A 关联的唯一一行)中的价格,作为起始价格。 | +| `RPR_LAST(B.totalprice) AS bottom_price` | 返回与变量 B 关联的最后一行中的价格,代表“V”形模式中最低点的价格,对应下降区段的末尾。 | +| `RPR_LAST(U.totalprice) AS top_price` | 返回匹配分组中的最高价格,对应变量 C 或 D 所关联的最后一行,即整个匹配分组的末尾。【假设 SUBSET U = (C, D)】 | + +* 每个 `measure_expression `都会定义一个输出列,该列可通过其指定的 `measure_name `进行引用。 + +### 2.8 行模式识别表达式 + +在 MEASURES 与 DEFINE 子句中使用的表达式为​**标量表达式**​,用于在输入表的行级上下文中求值。**标量表达式**除了支持标准 SQL 语法外,还支持针对行模式识别的特殊扩展函数。 + +#### 2.8.1 模式变量引用 + +```SQL +A.totalprice +U.orderdate +orderstatus +``` + +* 当列名前缀为某**基本模式变量**或**联合模式变量**时,表示引用该变量所匹配的所有行的对应列值。 +* 若列名不带前缀,则等同于使用“​**全局联合模式变量**​”(即所有基本模式变量的并集)作前缀,表示引用当前匹配中所有行的该列值。 + +> 不允许在模式识别表达式中使用表名作列名前缀。 + +#### 2.8.2 扩展函数 + +| 函数名 | 函数式 | 描述 | +|------------------| ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| `MATCH_NUMBER`函数 | `MATCH_NUMBER()` | 返回当前匹配在分区内的序号,从 1 开始计数。空匹配与非空匹配一致,也占用匹配序号。 | +| `CLASSIFIER `函数 | `CLASSIFIER(option)`| 1. 返回当前行所映射的基本模式变量名称。1. `option`是一个可选参数:可以传入基本模式变量`CLASSIFIER(A)`或联合模式变量`CLASSIFIER(U)`,用于限定函数作用范围,对于不在范围内的行,直接返回 NULL。在对联合模式变量使用时,可用于辨别该行究竟映射至并集中哪一个基本模式变量。 | +| 逻辑导航函数 | `RPR_FIRST(expr, k)` | 1. 表示从**当前匹配分组**中,定位至第一个满足 expr 的行,在此基础上再向分组尾部方向搜索到第 k 次出现的同一模式变量对应行,返回该行的指定列值。如果在指定方向上未能找到第 k 次匹配行,则函数返回 NULL。1. 其中 k 是可选参数,默认为 0,表示仅定位至首个满足条件的行;若显式指定,必须为非负整数。 | +| 逻辑导航函数 | `RPR_LAST(expr, k)`| 1. 表示从**当前匹配分组**中,定位至最后一个满足 expr 的行,在此基础上再向分组开头方向搜索到第 k 次出现的同一模式变量对应行,返回该行的指定列值。如果在指定方向上未能找到第 k 次匹配行,则函数返回 NULL。1. 其中 k 是可选参数,默认为 0,表示仅定位至末个满足条件的行;若显式指定,必须为非负整数。 | +| 物理导航函数 | `PREV(expr, k)` | 1. 表示从最后一次匹配至给定模式变量的行开始,向开头方向偏移 k 行,返回对应列值。若导航超出​**分区边界**​,则函数返回 NULL。1. 其中 k 是可选参数,默认为 1;若显式指定,必须为非负整数。 | +| 物理导航函数 |`NEXT(expr, k)` | 1. 表示从最后一次匹配至给定模式变量的行开始,向尾部方向偏移 k 行,返回对应列值。若导航超出​**分区边界**​,则函数返回 NULL。1. 其中 k 是可选参数,默认为 1;若显式指定,必须为非负整数。 | +| 聚合函数 | COUNT、SUM、AVG、MAX、MIN 函数 | 可用于对当前匹配中的数据进行计算。聚合函数与导航函数不允许互相嵌套。(V 2.0.6 版本起支持) | +| 嵌套函数 | `PREV/NEXT(CLASSIFIER())` | 物理导航函数与 CLASSIFIER 函数嵌套。用于获取当前行的前一个和后一个匹配行所对应的模式变量 | +| 嵌套函数 |`PREV/NEXT(RPR_FIRST/RPR_LAST(expr, k)`) | 物理函数内部**允许嵌套**逻辑函数,逻辑函数内部**不允许嵌套**物理函数。用于先进行逻辑偏移,再进行物理偏移。 | + +示例介绍可见 [3.3.3 小节](./Row-Pattern-Recognition.md#_3-3-行模式表达式-扩展函数) + +#### 2.8.3 RUNNING 和 FINAL 语义 +1. 定义 + +* `RUNNING`: 表示计算范围为当前匹配分组内,从分组的起始行到当前正在处理的行(即到当前行为止)。 +* `FINAL`: 表示计算范围为当前匹配分组内,从分组的起始行到分组的最终行(即整个匹配分组)。 + +2. 作用范围 + +* DEFINE 子句默认采用 RUNNING 语义。 +* MEASURES 子句默认采用 RUNNING 语义,支持指定 FINAL 语义。当采用 ONE ROW PER MATCH 输出模式时,所有表达式都从匹配分组的末行位置进行计算,此时 RUNNING 语义与 FINAL 语义等价。 + +3. 语法约束 + +* RUNNING 和 FINAL 需要写在**逻辑导航函数**或聚合函数之前,不能直接作用于**列引用。** + * 合法:`RUNNING RPP_LAST(A.totalprice)`、`FINAL RPP_LAST(A.totalprice)` + * 非法:`RUNNING A.totalprice`、`FINAL A.totalprice`、 `RUNNING PREV(A.totalprice)` + +## 3. 语法示例 + +原始数据 + +```SQL +IoTDB:database3> select * from t ++-----------------------------+------+----------+ +| time|device|totalprice| ++-----------------------------+------+----------+ +|2025-01-01T00:01:00.000+08:00| d1| 90| +|2025-01-01T00:02:00.000+08:00| d1| 80| +|2025-01-01T00:03:00.000+08:00| d1| 70| +|2025-01-01T00:04:00.000+08:00| d1| 80| +|2025-01-01T00:05:00.000+08:00| d1| 70| +|2025-01-01T00:06:00.000+08:00| d1| 80| ++-----------------------------+------+----------+ + +-- 创建语句 +create table t(device tag, totalprice int32 field) + +insert into t(time,device,totalprice) values(2025-01-01T00:01:00, 'd1', 90),(2025-01-01T00:02:00, 'd1', 80),(2025-01-01T00:03:00, 'd1', 70),(2025-01-01T00:04:00, 'd1', 80),(2025-01-01T00:05:00, 'd1', 70),(2025-01-01T00:06:00, 'd1', 80) +``` + +### 3.1 Patter 子句分区锚点 + +* 查询 sql + +```SQL +SELECT m.time, m.match, m.price, m.label +FROM t +MATCH_RECOGNIZE ( + ORDER BY time + MEASURES + MATCH_NUMBER() AS match, + RUNNING RPR_LAST(totalprice) AS price, + CLASSIFIER() AS label + ALL ROWS PER MATCH + AFTER MATCH SKIP PAST LAST ROW + PATTERN %s -- PATTERN 子句 + DEFINE A AS true +) AS m; +``` + +* 查询结果 + * 当 PATTERN 子句为 PATTERN (^A) 时 + + ```SQL + +-----------------------------+-----+-----+-----+ + | time|match|price|label| + +-----------------------------+-----+-----+-----+ + |2025-01-01T00:01:00.000+08:00| 1| 90| A| + +-----------------------------+-----+-----+-----+ + Total line number = 1 + ``` + + * 当 PATTERN 子句为 PATTERN (^A^) 时 + + ```SQL + +----+-----+-----+-----+ + |time|match|price|label| + +----+-----+-----+-----+ + +----+-----+-----+-----+ + Empty set. + ``` + + * 当 PATTERN 子句为 PATTERN (A\$) 时 + + ```SQL + +-----------------------------+-----+-----+-----+ + | time|match|price|label| + +-----------------------------+-----+-----+-----+ + |2025-01-01T00:06:00.000+08:00| 1| 80| A| + +-----------------------------+-----+-----+-----+ + Total line number = 1 + ``` + + * 当 PATTERN 子句为 PATTERN (\$A\$) 时 + + ```SQL + +----+-----+-----+-----+ + |time|match|price|label| + +----+-----+-----+-----+ + +----+-----+-----+-----+ + Empty set. + ``` + +### 3.2 AFTER MATCH SKIP 子句 + +* 查询 sql + +```SQL +SELECT m.time, m.match, m.price, m.label +FROM t +MATCH_RECOGNIZE ( + ORDER BY time + MEASURES + MATCH_NUMBER() AS match, + RUNNING RPR_LAST(totalprice) AS price, + CLASSIFIER() AS label + ALL ROWS PER MATCH + %s -- AFTER MATCH SKIP 子句 + PATTERN (A B+ C+ D?) + SUBSET U = (C, D) + DEFINE + B AS B.totalprice < PREV (B.totalprice), + C AS C.totalprice > PREV (C.totalprice), + D AS false -- 永远不会匹配成功 +) AS m; +``` + +* 查询结果 + * 当 AFTER MATCH SKIP PAST LAST ROW 时 + * 第一次匹配:第 1、2、3、4 行 + * 第二次匹配:根据 `AFTER MATCH SKIP PAST LAST ROW` 语义,从第 5 行开始,无法再找寻到一个合法匹配 + * 此模式一定不会出现重叠匹配 + + ```SQL + +-----------------------------+-----+-----+-----+ + | time|match|price|label| + +-----------------------------+-----+-----+-----+ + |2025-01-01T00:01:00.000+08:00| 1| 90| A| + |2025-01-01T00:02:00.000+08:00| 1| 80| B| + |2025-01-01T00:03:00.000+08:00| 1| 70| B| + |2025-01-01T00:04:00.000+08:00| 1| 80| C| + +-----------------------------+-----+-----+-----+ + Total line number = 4 + ``` + + * 当 AFTER MATCH SKIP TO NEXT ROW 时 + * 第一次匹配:第 1、2、3、4 行 + * 第二次匹配:根据 `AFTER MATCH SKIP TO NEXT ROW` 语义,从第 2 行开始,匹配:第 2、3、4 行 + * 第三次匹配:尝试从第 3 行开始,失败 + * 第三次匹配:尝试从第 4 行开始,成功,匹配第 4、5、6行 + * 此模式允许出现重叠匹配 + + ```SQL + +-----------------------------+-----+-----+-----+ + | time|match|price|label| + +-----------------------------+-----+-----+-----+ + |2025-01-01T00:01:00.000+08:00| 1| 90| A| + |2025-01-01T00:02:00.000+08:00| 1| 80| B| + |2025-01-01T00:03:00.000+08:00| 1| 70| B| + |2025-01-01T00:04:00.000+08:00| 1| 80| C| + |2025-01-01T00:02:00.000+08:00| 2| 80| A| + |2025-01-01T00:03:00.000+08:00| 2| 70| B| + |2025-01-01T00:04:00.000+08:00| 2| 80| C| + |2025-01-01T00:04:00.000+08:00| 3| 80| A| + |2025-01-01T00:05:00.000+08:00| 3| 70| B| + |2025-01-01T00:06:00.000+08:00| 3| 80| C| + +-----------------------------+-----+-----+-----+ + Total line number = 10 + ``` + + * 当 AFTER MATCH SKIP TO FIRST C 时 + * 第一次匹配:第 1、2、3、4 行 + * 第二次匹配:从第一个 C (也就是第 4 行)处开始,匹配第4、5、6行 + * 此模式允许出现重叠匹配 + + ```SQL + +-----------------------------+-----+-----+-----+ + | time|match|price|label| + +-----------------------------+-----+-----+-----+ + |2025-01-01T00:01:00.000+08:00| 1| 90| A| + |2025-01-01T00:02:00.000+08:00| 1| 80| B| + |2025-01-01T00:03:00.000+08:00| 1| 70| B| + |2025-01-01T00:04:00.000+08:00| 1| 80| C| + |2025-01-01T00:04:00.000+08:00| 2| 80| A| + |2025-01-01T00:05:00.000+08:00| 2| 70| B| + |2025-01-01T00:06:00.000+08:00| 2| 80| C| + +-----------------------------+-----+-----+-----+ + Total line number = 7 + ``` + + * 当 AFTER MATCH SKIP TO LAST B 或 AFTER MATCH SKIP TO B 时 + * 第一次匹配:第 1、2、3、4 行 + * 第二次匹配:尝试从最后一个 B (也就是第 3 行)处开始,失败 + * 第二次匹配:尝试从第 4 行开始,成功匹配第4、5、6行 + * 此模式允许出现重叠匹配 + + ```SQL + +-----------------------------+-----+-----+-----+ + | time|match|price|label| + +-----------------------------+-----+-----+-----+ + |2025-01-01T00:01:00.000+08:00| 1| 90| A| + |2025-01-01T00:02:00.000+08:00| 1| 80| B| + |2025-01-01T00:03:00.000+08:00| 1| 70| B| + |2025-01-01T00:04:00.000+08:00| 1| 80| C| + |2025-01-01T00:04:00.000+08:00| 2| 80| A| + |2025-01-01T00:05:00.000+08:00| 2| 70| B| + |2025-01-01T00:06:00.000+08:00| 2| 80| C| + +-----------------------------+-----+-----+-----+ + Total line number = 7 + ``` + + * 当 AFTER MATCH SKIP TO U 时 + * 第一次匹配:第 1、2、3、4 行 + * 第二次匹配:`SKIP TO U` 表示跳转到最后一个 C 或 D,D 永远不可能匹配成功,所以就是跳转到最后一个 C(也就是第 4 行),成功匹配第4、5、6行 + * 此模式允许出现重叠匹配 + + ```SQL + +-----------------------------+-----+-----+-----+ + | time|match|price|label| + +-----------------------------+-----+-----+-----+ + |2025-01-01T00:01:00.000+08:00| 1| 90| A| + |2025-01-01T00:02:00.000+08:00| 1| 80| B| + |2025-01-01T00:03:00.000+08:00| 1| 70| B| + |2025-01-01T00:04:00.000+08:00| 1| 80| C| + |2025-01-01T00:04:00.000+08:00| 2| 80| A| + |2025-01-01T00:05:00.000+08:00| 2| 70| B| + |2025-01-01T00:06:00.000+08:00| 2| 80| C| + +-----------------------------+-----+-----+-----+ + Total line number = 7 + ``` + + * 当 AFTER MATCH SKIP TO A 时,不能跳转到匹配的第一行, 否则会造成死循环 + + ```SQL + Msg: org.apache.iotdb.jdbc.IoTDBSQLException: 701: AFTER MATCH SKIP TO failed: cannot skip to first row of match + ``` + + * 当 AFTER MATCH SKIP TO B 时,不能跳转到匹配分组中不存在的模式变量 + + ```SQL + Msg: org.apache.iotdb.jdbc.IoTDBSQLException: 701: AFTER MATCH SKIP TO failed: pattern variable is not present in match + ``` + +### 3.3 行模式表达式-扩展函数 +#### 3.3.1 CLASSIFIER()函数 + +* 查询 sql + +```SQL +SELECT m.time, m.match, m.price, m.lower_or_higher, m.label +FROM t +MATCH_RECOGNIZE ( + ORDER BY time + MEASURES + MATCH_NUMBER() AS match, + RUNNING RPR_LAST(totalprice) AS price, + CLASSIFIER(U) AS lower_or_higher, + CLASSIFIER(W) AS label + ALL ROWS PER MATCH + PATTERN ((L | H) A) + SUBSET + U = (L, H), + W = (A, L, H) + DEFINE + A AS A.totalprice = 80, + L AS L.totalprice < 80, + H AS H.totalprice > 80 +) AS m; +``` + +* 查询结果 + +```SQL ++-----------------------------+-----+-----+---------------+-----+ +| time|match|price|lower_or_higher|label| ++-----------------------------+-----+-----+---------------+-----+ +|2025-01-01T00:01:00.000+08:00| 1| 90| H| H| +|2025-01-01T00:02:00.000+08:00| 1| 80| H| A| +|2025-01-01T00:03:00.000+08:00| 2| 70| L| L| +|2025-01-01T00:04:00.000+08:00| 2| 80| L| A| +|2025-01-01T00:05:00.000+08:00| 3| 70| L| L| +|2025-01-01T00:06:00.000+08:00| 3| 80| L| A| ++-----------------------------+-----+-----+---------------+-----+ +Total line number = 6 +``` + +#### 3.3.2 逻辑导航函数 + +* 查询 sql + +```SQL +SELECT m.time, m.measure +FROM t +MATCH_RECOGNIZE ( + ORDER BY time + MEASURES + %s AS measure -- MEASURES 子句 + ALL ROWS PER MATCH + PATTERN (A+) + DEFINE A AS true +) AS m; +``` + +* 查询结果 + * 当取值为 totalprice、RPR\_LAST(totalprice)、RUNNING RPR\_LAST(totalprice) 时 + + ```SQL + +-----------------------------+-------+ + | time|measure| + +-----------------------------+-------+ + |2025-01-01T00:01:00.000+08:00| 90| + |2025-01-01T00:02:00.000+08:00| 80| + |2025-01-01T00:03:00.000+08:00| 70| + |2025-01-01T00:04:00.000+08:00| 80| + |2025-01-01T00:05:00.000+08:00| 70| + |2025-01-01T00:06:00.000+08:00| 80| + +-----------------------------+-------+ + Total line number = 6 + ``` + + * 当取值为 FINAL RPR\_LAST(totalprice) 时 + + ```SQL + +-----------------------------+-------+ + | time|measure| + +-----------------------------+-------+ + |2025-01-01T00:01:00.000+08:00| 80| + |2025-01-01T00:02:00.000+08:00| 80| + |2025-01-01T00:03:00.000+08:00| 80| + |2025-01-01T00:04:00.000+08:00| 80| + |2025-01-01T00:05:00.000+08:00| 80| + |2025-01-01T00:06:00.000+08:00| 80| + +-----------------------------+-------+ + Total line number = 6 + ``` + + * 当取值为 RPR\_FIRST(totalprice)、 RUNNING RPR\_FIRST(totalprice)、FINAL RPR\_FIRST(totalprice)时 + + ```SQL + +-----------------------------+-------+ + | time|measure| + +-----------------------------+-------+ + |2025-01-01T00:01:00.000+08:00| 90| + |2025-01-01T00:02:00.000+08:00| 90| + |2025-01-01T00:03:00.000+08:00| 90| + |2025-01-01T00:04:00.000+08:00| 90| + |2025-01-01T00:05:00.000+08:00| 90| + |2025-01-01T00:06:00.000+08:00| 90| + +-----------------------------+-------+ + Total line number = 6 + ``` + + * 当取值为 RPR\_LAST(totalprice, 2) 时 + + ```SQL + +-----------------------------+-------+ + | time|measure| + +-----------------------------+-------+ + |2025-01-01T00:01:00.000+08:00| null| + |2025-01-01T00:02:00.000+08:00| null| + |2025-01-01T00:03:00.000+08:00| 90| + |2025-01-01T00:04:00.000+08:00| 80| + |2025-01-01T00:05:00.000+08:00| 70| + |2025-01-01T00:06:00.000+08:00| 80| + +-----------------------------+-------+ + Total line number = 6 + ``` + + * 当取值为 FINAL RPP\_LAST(totalprice, 2) 时 + + ```SQL + +-----------------------------+-------+ + | time|measure| + +-----------------------------+-------+ + |2025-01-01T00:01:00.000+08:00| 80| + |2025-01-01T00:02:00.000+08:00| 80| + |2025-01-01T00:03:00.000+08:00| 80| + |2025-01-01T00:04:00.000+08:00| 80| + |2025-01-01T00:05:00.000+08:00| 80| + |2025-01-01T00:06:00.000+08:00| 80| + +-----------------------------+-------+ + Total line number = 6 + ``` + + * 当取值为 RPR\_FIRST(totalprice, 2) 和 FINAL RPR\_FIRST(totalprice, 2) 时 + + ```SQL + +-----------------------------+-------+ + | time|measure| + +-----------------------------+-------+ + |2025-01-01T00:01:00.000+08:00| 70| + |2025-01-01T00:02:00.000+08:00| 70| + |2025-01-01T00:03:00.000+08:00| 70| + |2025-01-01T00:04:00.000+08:00| 70| + |2025-01-01T00:05:00.000+08:00| 70| + |2025-01-01T00:06:00.000+08:00| 70| + +-----------------------------+-------+ + Total line number = 6 + ``` + +#### 3.3.3 物理导航函数 + +* 查询 sql + +```SQL +SELECT m.time, m.measure +FROM t +MATCH_RECOGNIZE ( + ORDER BY time + MEASURES + %s AS measure -- MEASURES 子句 + ALL ROWS PER MATCH + PATTERN (B) + DEFINE B AS B.totalprice >= PREV(B.totalprice) +) AS m; +``` + +* 查询结果 + * 当取值为 `PREV(totalprice)` 时 + + ```SQL + +-----------------------------+-------+ + | time|measure| + +-----------------------------+-------+ + |2025-01-01T00:04:00.000+08:00| 70| + |2025-01-01T00:06:00.000+08:00| 70| + +-----------------------------+-------+ + Total line number = 2 + ``` + + * 当取值为 `PREV(B.totalprice, 2)` 时 + + ```SQL + +-----------------------------+-------+ + | time|measure| + +-----------------------------+-------+ + |2025-01-01T00:04:00.000+08:00| 80| + |2025-01-01T00:06:00.000+08:00| 80| + +-----------------------------+-------+ + Total line number = 2 + ``` + + * 当取值为 `PREV(B.totalprice, 4)` 时 + + ```SQL + +-----------------------------+-------+ + | time|measure| + +-----------------------------+-------+ + |2025-01-01T00:04:00.000+08:00| null| + |2025-01-01T00:06:00.000+08:00| 80| + +-----------------------------+-------+ + Total line number = 2 + ``` + + * 当取值为 `NEXT(totalprice)` 或 `NEXT(B.totalprice, 1)` 时 + + ```SQL + +-----------------------------+-------+ + | time|measure| + +-----------------------------+-------+ + |2025-01-01T00:04:00.000+08:00| 70| + |2025-01-01T00:06:00.000+08:00| null| + +-----------------------------+-------+ + Total line number = 2 + ``` + + * `当取值为 NEXT(B.totalprice, 2)` 时 + + ```SQL + +-----------------------------+-------+ + | time|measure| + +-----------------------------+-------+ + |2025-01-01T00:04:00.000+08:00| 80| + |2025-01-01T00:06:00.000+08:00| null| + +-----------------------------+-------+ + Total line number = 2 + ``` + +#### 3.3.4 聚合函数 + +* 查询 sql + +```SQL +SELECT m.time, m.count, m.avg, m.sum, m.min, m.max +FROM t +MATCH_RECOGNIZE ( + ORDER BY time + MEASURES + COUNT(*) AS count, + AVG(totalprice) AS avg, + SUM(totalprice) AS sum, + MIN(totalprice) AS min, + MAX(totalprice) AS max + ALL ROWS PER MATCH + PATTERN (A+) + DEFINE A AS true +) AS m; +``` + +* 查询结果 + +```SQL ++-----------------------------+-----+-----------------+-----+---+---+ +| time|count| avg| sum|min|max| ++-----------------------------+-----+-----------------+-----+---+---+ +|2025-01-01T00:01:00.000+08:00| 1| 90.0| 90.0| 90| 90| +|2025-01-01T00:02:00.000+08:00| 2| 85.0|170.0| 80| 90| +|2025-01-01T00:03:00.000+08:00| 3| 80.0|240.0| 70| 90| +|2025-01-01T00:04:00.000+08:00| 4| 80.0|320.0| 70| 90| +|2025-01-01T00:05:00.000+08:00| 5| 78.0|390.0| 70| 90| +|2025-01-01T00:06:00.000+08:00| 6|78.33333333333333|470.0| 70| 90| ++-----------------------------+-----+-----------------+-----+---+---+ +Total line number = 6 +``` + +#### 3.3.5 嵌套函数 +1. 示例一 + +* 查询 sql + +```SQL +SELECT m.time, m.match, m.price, m.lower_or_higher, m.label, m.prev_label, m.next_label +FROM t +MATCH_RECOGNIZE ( + ORDER BY time + MEASURES + MATCH_NUMBER() AS match, + RUNNING RPR_LAST(totalprice) AS price, + CLASSIFIER(U) AS lower_or_higher, + CLASSIFIER(W) AS label, + PREV(CLASSIFIER(W)) AS prev_label, + NEXT(CLASSIFIER(W)) AS next_label + ALL ROWS PER MATCH + PATTERN ((L | H) A) + SUBSET + U = (L, H), + W = (A, L, H) + DEFINE + A AS A.totalprice = 80, + L AS L.totalprice < 80, + H AS H.totalprice > 80 +) AS m; +``` + +* 查询结果 + +```SQL ++-----------------------------+-----+-----+---------------+-----+----------+----------+ +| time|match|price|lower_or_higher|label|prev_label|next_label| ++-----------------------------+-----+-----+---------------+-----+----------+----------+ +|2025-01-01T00:01:00.000+08:00| 1| 90| H| H| null| A| +|2025-01-01T00:02:00.000+08:00| 1| 80| H| A| H| null| +|2025-01-01T00:03:00.000+08:00| 2| 70| L| L| null| A| +|2025-01-01T00:04:00.000+08:00| 2| 80| L| A| L| null| +|2025-01-01T00:05:00.000+08:00| 3| 70| L| L| null| A| +|2025-01-01T00:06:00.000+08:00| 3| 80| L| A| L| null| ++-----------------------------+-----+-----+---------------+-----+----------+----------+ +Total line number = 6 +``` + +2. 示例二 + +* 查询 sql + +```SQL +SELECT m.time, m.prev_last_price, m.next_first_price +FROM t +MATCH_RECOGNIZE ( + ORDER BY time + MEASURES + PREV(RPR_LAST(totalprice), 2) AS prev_last_price, + NEXT(RPR_FIRST(totalprice), 2) as next_first_price + ALL ROWS PER MATCH + PATTERN (A+) + DEFINE A AS true +) AS m; +``` + +* 查询结果 + +```SQL ++-----------------------------+---------------+----------------+ +| time|prev_last_price|next_first_price| ++-----------------------------+---------------+----------------+ +|2025-01-01T00:01:00.000+08:00| null| 70| +|2025-01-01T00:02:00.000+08:00| null| 70| +|2025-01-01T00:03:00.000+08:00| 90| 70| +|2025-01-01T00:04:00.000+08:00| 80| 70| +|2025-01-01T00:05:00.000+08:00| 70| 70| +|2025-01-01T00:06:00.000+08:00| 80| 70| ++-----------------------------+---------------+----------------+ +Total line number = 6 +``` + +## 4. 场景示例 + +以[示例数据](../Reference/Sample-Data.md)为源数据 + +### 4.1 时间分段查询 + +将 table1 中的数据按照时间间隔小于等于 24 小时分段,查询每段中的数据总条数,以及开始、结束时间。 + +查询SQL + +```SQL +SELECT start_time, end_time, cnt +FROM table1 +MATCH_RECOGNIZE ( + ORDER BY time + MEASURES + RPR_FIRST(A.time) AS start_time, + RPR_LAST(time) AS end_time, + COUNT() AS cnt + PATTERN (A B*) + DEFINE B AS (cast(B.time as INT64) - cast(PREV(B.time) as INT64)) <= 86400000 +) AS m +``` + +查询结果 + +```SQL ++-----------------------------+-----------------------------+---+ +| start_time| end_time|cnt| ++-----------------------------+-----------------------------+---+ +|2024-11-26T13:37:00.000+08:00|2024-11-26T13:38:00.000+08:00| 2| +|2024-11-27T16:38:00.000+08:00|2024-11-30T14:30:00.000+08:00| 16| ++-----------------------------+-----------------------------+---+ +Total line number = 2 +``` + +### 4.2 差值分段查询 + +将 table2 中的数据按照 humidity 湿度值差值小于 0.1 分段,查询每段中的数据总条数,以及开始、结束时间。 + +* 查询sql + +```SQL +SELECT start_time, end_time, cnt +FROM table2 +MATCH_RECOGNIZE ( + ORDER BY time + MEASURES + RPR_FIRST(A.time) AS start_time, + RPR_LAST(time) AS end_time, + COUNT() AS cnt + PATTERN (A B*) + DEFINE B AS (B.humidity - PREV(B.humidity )) <=0.1 +) AS m; +``` + +* 查询结果 + +```SQL ++-----------------------------+-----------------------------+---+ +| start_time| end_time|cnt| ++-----------------------------+-----------------------------+---+ +|2024-11-26T13:37:00.000+08:00|2024-11-27T00:00:00.000+08:00| 2| +|2024-11-28T08:00:00.000+08:00|2024-11-29T00:00:00.000+08:00| 2| +|2024-11-29T11:00:00.000+08:00|2024-11-30T00:00:00.000+08:00| 2| ++-----------------------------+-----------------------------+---+ +Total line number = 3 +``` + +### 4.3 事件统计查询 + +将 table1 中数据按照设备号分组,统计上海地区湿度大于 35 的开始、结束时间及最大湿度值。 + +* 查询sql + +```SQL +SELECT m.device_id, m.match, m.event_start, m.event_end, m.max_humidity +FROM table1 +MATCH_RECOGNIZE ( + PARTITION BY device_id + ORDER BY time + MEASURES + MATCH_NUMBER() AS match, + RPR_FIRST(A.time) AS event_start, + RPR_LAST(A.time) AS event_end, + MAX(A.humidity) AS max_humidity + ONE ROW PER MATCH + PATTERN (A+) + DEFINE + A AS A.region= '上海' AND A.humidity> 35 +) AS m +``` + +* 查询结果 + +```SQL ++---------+-----+-----------------------------+-----------------------------+------------+ +|device_id|match| event_start| event_end|max_humidity| ++---------+-----+-----------------------------+-----------------------------+------------+ +| 100| 1|2024-11-28T09:00:00.000+08:00|2024-11-29T18:30:00.000+08:00| 45.1| +| 101| 1|2024-11-30T09:30:00.000+08:00|2024-11-30T09:30:00.000+08:00| 35.2| ++---------+-----+-----------------------------+-----------------------------+------------+ +Total line number = 2**** +``` From f0ee8d3d9d61cf9a183336eee9e9a6fad583a8fd Mon Sep 17 00:00:00 2001 From: Leto_b Date: Wed, 3 Dec 2025 09:55:59 +0800 Subject: [PATCH 2/4] add row pattern recognition to tablemodel --- .../Master/Table/SQL-Manual/Row-Pattern-Recognition.md | 2 +- .../latest-Table/SQL-Manual/Row-Pattern-Recognition.md | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/UserGuide/Master/Table/SQL-Manual/Row-Pattern-Recognition.md b/src/UserGuide/Master/Table/SQL-Manual/Row-Pattern-Recognition.md index c029bcaef..0fee25601 100644 --- a/src/UserGuide/Master/Table/SQL-Manual/Row-Pattern-Recognition.md +++ b/src/UserGuide/Master/Table/SQL-Manual/Row-Pattern-Recognition.md @@ -112,7 +112,7 @@ PATTERN ( row_pattern ) * When the value of the PATTERN clause is `A$`, the match must end at the end of the partition, and this row must satisfy the definition of `A`. * When the value of the PATTERN clause is `$A` or `$A$`, the output result is empty. -For example illustrations, see[Section 3.3.1](./Row-Pattern-Recognition.md#_3-1-Patter Clause Partition Anchor) +For example illustrations, see[Section 3.3.1](./Row-Pattern-Recognition.md#_3-1-Patter-Clause-Partition-Anchor) #### 2.4.3 Quantifiers diff --git a/src/UserGuide/latest-Table/SQL-Manual/Row-Pattern-Recognition.md b/src/UserGuide/latest-Table/SQL-Manual/Row-Pattern-Recognition.md index c029bcaef..0fee25601 100644 --- a/src/UserGuide/latest-Table/SQL-Manual/Row-Pattern-Recognition.md +++ b/src/UserGuide/latest-Table/SQL-Manual/Row-Pattern-Recognition.md @@ -112,7 +112,7 @@ PATTERN ( row_pattern ) * When the value of the PATTERN clause is `A$`, the match must end at the end of the partition, and this row must satisfy the definition of `A`. * When the value of the PATTERN clause is `$A` or `$A$`, the output result is empty. -For example illustrations, see[Section 3.3.1](./Row-Pattern-Recognition.md#_3-1-Patter Clause Partition Anchor) +For example illustrations, see[Section 3.3.1](./Row-Pattern-Recognition.md#_3-1-Patter-Clause-Partition-Anchor) #### 2.4.3 Quantifiers From 755f8d8692445b828c524a2adf34c8897b84e7f8 Mon Sep 17 00:00:00 2001 From: Leto_b Date: Wed, 3 Dec 2025 10:05:21 +0800 Subject: [PATCH 3/4] add row pattern recognition to tablemodel --- .../Master/Table/SQL-Manual/Row-Pattern-Recognition.md | 6 +++--- .../latest-Table/SQL-Manual/Row-Pattern-Recognition.md | 6 +++--- .../Master/Table/SQL-Manual/Row-Pattern-Recognition.md | 6 +++--- .../latest-Table/SQL-Manual/Row-Pattern-Recognition.md | 6 +++--- 4 files changed, 12 insertions(+), 12 deletions(-) diff --git a/src/UserGuide/Master/Table/SQL-Manual/Row-Pattern-Recognition.md b/src/UserGuide/Master/Table/SQL-Manual/Row-Pattern-Recognition.md index 0fee25601..1b8e4187a 100644 --- a/src/UserGuide/Master/Table/SQL-Manual/Row-Pattern-Recognition.md +++ b/src/UserGuide/Master/Table/SQL-Manual/Row-Pattern-Recognition.md @@ -112,7 +112,7 @@ PATTERN ( row_pattern ) * When the value of the PATTERN clause is `A$`, the match must end at the end of the partition, and this row must satisfy the definition of `A`. * When the value of the PATTERN clause is `$A` or `$A$`, the output result is empty. -For example illustrations, see[Section 3.3.1](./Row-Pattern-Recognition.md#_3-1-Patter-Clause-Partition-Anchor) +For example illustrations, see[Section 3.1](./Row-Pattern-Recognition.md#_3-1-Patter-Clause-Partition-Anchor) #### 2.4.3 Quantifiers @@ -144,7 +144,7 @@ Used to specify which row to start the next pattern match from after identifying * Among all possible configurations, only when `ALL ROWS PER MATCH WITH UNMATCHED ROWS` is used in combination with `AFTER MATCH SKIP PAST LAST ROW` can the system ensure that exactly one output record is generated for each input row. -For example illustrations, see [Section 3.3.2](./Row-Pattern-Recognition.md#_3-2-AFTER-MATCH-SKIP-Clause) +For example illustrations, see [Section 3.2](./Row-Pattern-Recognition.md#_3-2-AFTER-MATCH-SKIP-Clause) ### 2.6 ROWS PER MATCH Clause @@ -208,7 +208,7 @@ orderstatus | Nested Functions | `PREV/NEXT(CLASSIFIER())` | Nesting of physical navigation functions and the CLASSIFIER function. Used to obtain the pattern variables corresponding to the previous and next matching rows of the current row. | | Nested Functions | `PREV/NEXT(RPR_FIRST/RPR_LAST(expr, k)`) | **Logical functions are allowed to be nested** inside physical functions; **physical functions are not allowed to be nested** inside logical functions. Used to perform logical offset first, then physical offset. | -For example illustrations, see [Section 3.3.3](./Row-Pattern-Recognition.md#_3-3-Row-Pattern-Expressions-Extended-Functions) +For example illustrations, see [Section 3.3](./Row-Pattern-Recognition.md#_3-3-Row-Pattern-Expressions-Extended-Functions) #### 2.8.3 RUNNING and FINAL Semantics diff --git a/src/UserGuide/latest-Table/SQL-Manual/Row-Pattern-Recognition.md b/src/UserGuide/latest-Table/SQL-Manual/Row-Pattern-Recognition.md index 0fee25601..1b8e4187a 100644 --- a/src/UserGuide/latest-Table/SQL-Manual/Row-Pattern-Recognition.md +++ b/src/UserGuide/latest-Table/SQL-Manual/Row-Pattern-Recognition.md @@ -112,7 +112,7 @@ PATTERN ( row_pattern ) * When the value of the PATTERN clause is `A$`, the match must end at the end of the partition, and this row must satisfy the definition of `A`. * When the value of the PATTERN clause is `$A` or `$A$`, the output result is empty. -For example illustrations, see[Section 3.3.1](./Row-Pattern-Recognition.md#_3-1-Patter-Clause-Partition-Anchor) +For example illustrations, see[Section 3.1](./Row-Pattern-Recognition.md#_3-1-Patter-Clause-Partition-Anchor) #### 2.4.3 Quantifiers @@ -144,7 +144,7 @@ Used to specify which row to start the next pattern match from after identifying * Among all possible configurations, only when `ALL ROWS PER MATCH WITH UNMATCHED ROWS` is used in combination with `AFTER MATCH SKIP PAST LAST ROW` can the system ensure that exactly one output record is generated for each input row. -For example illustrations, see [Section 3.3.2](./Row-Pattern-Recognition.md#_3-2-AFTER-MATCH-SKIP-Clause) +For example illustrations, see [Section 3.2](./Row-Pattern-Recognition.md#_3-2-AFTER-MATCH-SKIP-Clause) ### 2.6 ROWS PER MATCH Clause @@ -208,7 +208,7 @@ orderstatus | Nested Functions | `PREV/NEXT(CLASSIFIER())` | Nesting of physical navigation functions and the CLASSIFIER function. Used to obtain the pattern variables corresponding to the previous and next matching rows of the current row. | | Nested Functions | `PREV/NEXT(RPR_FIRST/RPR_LAST(expr, k)`) | **Logical functions are allowed to be nested** inside physical functions; **physical functions are not allowed to be nested** inside logical functions. Used to perform logical offset first, then physical offset. | -For example illustrations, see [Section 3.3.3](./Row-Pattern-Recognition.md#_3-3-Row-Pattern-Expressions-Extended-Functions) +For example illustrations, see [Section 3.3](./Row-Pattern-Recognition.md#_3-3-Row-Pattern-Expressions-Extended-Functions) #### 2.8.3 RUNNING and FINAL Semantics diff --git a/src/zh/UserGuide/Master/Table/SQL-Manual/Row-Pattern-Recognition.md b/src/zh/UserGuide/Master/Table/SQL-Manual/Row-Pattern-Recognition.md index 87c42704b..acd684051 100644 --- a/src/zh/UserGuide/Master/Table/SQL-Manual/Row-Pattern-Recognition.md +++ b/src/zh/UserGuide/Master/Table/SQL-Manual/Row-Pattern-Recognition.md @@ -113,7 +113,7 @@ PATTERN ( row_pattern ) * 当 PATTERN 子句的取值为 `A$` 时,要求必须在分区的结束位置匹配,并且这一行要满足 `A`的定义 * 当 PATTERN 子句的取值为 `$A` 或 `$A$` 时,输出结果为空 -示例介绍可见 [3.3.1 小节](./Row-Pattern-Recognition.md#_3-1-Patter 子句分区锚点) +示例介绍可见 [3.1 小节](./Row-Pattern-Recognition.md#_3-1-Patter-子句分区锚点) #### 2.4.3 量词(Quantifiers) @@ -145,7 +145,7 @@ PATTERN ( row_pattern ) * 在所有可能的配置中,仅当 `ALL ROWS PER MATCH WITH UNMATCHED ROWS` 与 `AFTER MATCH SKIP PAST LAST ROW` 联合使用时,系统才能确保对每个输入行恰好生成一条输出记录。 -示例介绍可见 [3.3.2 小节](./Row-Pattern-Recognition.md#_3-2-AFTER MATCH SKIP 子句) +示例介绍可见 [3.2 小节](./Row-Pattern-Recognition.md#_3-2-AFTER-MATCH-SKIP-子句) ### 2.6 ROWS PER MATCH 子句 @@ -205,7 +205,7 @@ orderstatus | 嵌套函数 | `PREV/NEXT(CLASSIFIER())` | 物理导航函数与 CLASSIFIER 函数嵌套。用于获取当前行的前一个和后一个匹配行所对应的模式变量 | | 嵌套函数 |`PREV/NEXT(RPR_FIRST/RPR_LAST(expr, k)`) | 物理函数内部**允许嵌套**逻辑函数,逻辑函数内部**不允许嵌套**物理函数。用于先进行逻辑偏移,再进行物理偏移。 | -示例介绍可见 [3.3.3 小节](./Row-Pattern-Recognition.md#_3-3-行模式表达式-扩展函数) +示例介绍可见 [3.3 小节](./Row-Pattern-Recognition.md#_3-3-行模式表达式-扩展函数) #### 2.8.3 RUNNING 和 FINAL 语义 1. 定义 diff --git a/src/zh/UserGuide/latest-Table/SQL-Manual/Row-Pattern-Recognition.md b/src/zh/UserGuide/latest-Table/SQL-Manual/Row-Pattern-Recognition.md index 87c42704b..acd684051 100644 --- a/src/zh/UserGuide/latest-Table/SQL-Manual/Row-Pattern-Recognition.md +++ b/src/zh/UserGuide/latest-Table/SQL-Manual/Row-Pattern-Recognition.md @@ -113,7 +113,7 @@ PATTERN ( row_pattern ) * 当 PATTERN 子句的取值为 `A$` 时,要求必须在分区的结束位置匹配,并且这一行要满足 `A`的定义 * 当 PATTERN 子句的取值为 `$A` 或 `$A$` 时,输出结果为空 -示例介绍可见 [3.3.1 小节](./Row-Pattern-Recognition.md#_3-1-Patter 子句分区锚点) +示例介绍可见 [3.1 小节](./Row-Pattern-Recognition.md#_3-1-Patter-子句分区锚点) #### 2.4.3 量词(Quantifiers) @@ -145,7 +145,7 @@ PATTERN ( row_pattern ) * 在所有可能的配置中,仅当 `ALL ROWS PER MATCH WITH UNMATCHED ROWS` 与 `AFTER MATCH SKIP PAST LAST ROW` 联合使用时,系统才能确保对每个输入行恰好生成一条输出记录。 -示例介绍可见 [3.3.2 小节](./Row-Pattern-Recognition.md#_3-2-AFTER MATCH SKIP 子句) +示例介绍可见 [3.2 小节](./Row-Pattern-Recognition.md#_3-2-AFTER-MATCH-SKIP-子句) ### 2.6 ROWS PER MATCH 子句 @@ -205,7 +205,7 @@ orderstatus | 嵌套函数 | `PREV/NEXT(CLASSIFIER())` | 物理导航函数与 CLASSIFIER 函数嵌套。用于获取当前行的前一个和后一个匹配行所对应的模式变量 | | 嵌套函数 |`PREV/NEXT(RPR_FIRST/RPR_LAST(expr, k)`) | 物理函数内部**允许嵌套**逻辑函数,逻辑函数内部**不允许嵌套**物理函数。用于先进行逻辑偏移,再进行物理偏移。 | -示例介绍可见 [3.3.3 小节](./Row-Pattern-Recognition.md#_3-3-行模式表达式-扩展函数) +示例介绍可见 [3.3 小节](./Row-Pattern-Recognition.md#_3-3-行模式表达式-扩展函数) #### 2.8.3 RUNNING 和 FINAL 语义 1. 定义 From cbdd12bea027aaee82434ce506e4e9518c95dff2 Mon Sep 17 00:00:00 2001 From: Leto_b Date: Wed, 3 Dec 2025 10:09:20 +0800 Subject: [PATCH 4/4] add row pattern recognition to tablemodel --- .../Master/Table/SQL-Manual/Row-Pattern-Recognition.md | 2 +- .../latest-Table/SQL-Manual/Row-Pattern-Recognition.md | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/UserGuide/Master/Table/SQL-Manual/Row-Pattern-Recognition.md b/src/UserGuide/Master/Table/SQL-Manual/Row-Pattern-Recognition.md index 1b8e4187a..e428fe52c 100644 --- a/src/UserGuide/Master/Table/SQL-Manual/Row-Pattern-Recognition.md +++ b/src/UserGuide/Master/Table/SQL-Manual/Row-Pattern-Recognition.md @@ -883,7 +883,7 @@ Total line number = 6 ## 4. Scenario Examples -Using [Sample Data](./Reference/Sample-Data.md) as the source data +Using [Sample Data](../Reference/Sample-Data.md) as the source data ### 4.1 Time Segment Query diff --git a/src/UserGuide/latest-Table/SQL-Manual/Row-Pattern-Recognition.md b/src/UserGuide/latest-Table/SQL-Manual/Row-Pattern-Recognition.md index 1b8e4187a..e428fe52c 100644 --- a/src/UserGuide/latest-Table/SQL-Manual/Row-Pattern-Recognition.md +++ b/src/UserGuide/latest-Table/SQL-Manual/Row-Pattern-Recognition.md @@ -883,7 +883,7 @@ Total line number = 6 ## 4. Scenario Examples -Using [Sample Data](./Reference/Sample-Data.md) as the source data +Using [Sample Data](../Reference/Sample-Data.md) as the source data ### 4.1 Time Segment Query