From 5efe5d96e556e84a557ff809e0243d45f5caf69a Mon Sep 17 00:00:00 2001 From: Ramin Gharib Date: Fri, 24 Apr 2026 15:14:02 +0200 Subject: [PATCH 1/6] [FLINK-39537][table] Apply conditional SET_SEMANTIC_TABLE trait to FROM_CHANGELOG --- .../docs/sql/reference/queries/changelog.md | 12 +++- .../org/apache/flink/table/api/Table.java | 11 ++++ .../functions/BuiltInFunctionDefinitions.java | 15 +++-- .../strategies/FromChangelogTypeStrategy.java | 56 ++++++++++++---- .../stream/FromChangelogSemanticTests.java | 2 + .../stream/FromChangelogTestPrograms.java | 65 +++++++++++++++++++ .../plan/stream/sql/FromChangelogTest.java | 14 ++++ .../plan/stream/sql/FromChangelogTest.xml | 20 ++++++ .../functions/ptf/FromChangelogFunction.java | 19 +----- 9 files changed, 178 insertions(+), 36 deletions(-) diff --git a/docs/content/docs/sql/reference/queries/changelog.md b/docs/content/docs/sql/reference/queries/changelog.md index 34945439f92b0..4be00afd39df9 100644 --- a/docs/content/docs/sql/reference/queries/changelog.md +++ b/docs/content/docs/sql/reference/queries/changelog.md @@ -45,7 +45,7 @@ Note: This version requires that your CDC data encodes updates using a full imag ```sql SELECT * FROM FROM_CHANGELOG( - input => TABLE source_table, + input => TABLE source_table [PARTITION BY key_col], [op => DESCRIPTOR(op_column_name),] [op_mapping => MAP[ 'c, r', 'INSERT', @@ -61,7 +61,7 @@ SELECT * FROM FROM_CHANGELOG( | Parameter | Required | Description | |:-------------|:---------|:------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| -| `input` | Yes | The input table. Must be append-only. | +| `input` | Yes | The input table. Must be append-only. Use `PARTITION BY` to ensure rows for the same key are processed together. This is required when downstream operators are keyed on that column. | | `op` | No | A `DESCRIPTOR` with a single column name for the operation code column. Defaults to `op`. The column must exist in the input table and be of type STRING. | | `op_mapping` | No | A `MAP` mapping user-defined codes to Flink change operation names. Keys are user-defined codes (e.g., `'c'`, `'u'`, `'d'`), values are Flink change operation names (`INSERT`, `UPDATE_BEFORE`, `UPDATE_AFTER`, `DELETE`). Keys can contain comma-separated codes to map multiple codes to the same operation (e.g., `'c, r'`). Each change operation may appear at most once across all entries. | | `error_handling` | No | Controls behavior when an input row's operation code is `NULL` or not present in the `op_mapping`. Valid values: `FAIL` (default) — throw a `TableRuntimeException`, `SKIP` — silently drop the row. | @@ -127,6 +127,14 @@ SELECT * FROM FROM_CHANGELOG( -- The operation column named 'operation' is used instead of 'op' ``` +#### Partitioning by a key + +```sql +SELECT * FROM FROM_CHANGELOG( + input => TABLE cdc_stream PARTITION BY id +) +``` + #### Invalid operation code handling Two `error_handling` modes are supported. The job can either fail upon an invalid or unknown op code, or skip the row and continue processing. diff --git a/flink-table/flink-table-api-java/src/main/java/org/apache/flink/table/api/Table.java b/flink-table/flink-table-api-java/src/main/java/org/apache/flink/table/api/Table.java index eb61371329c2f..aa2417d4531ce 100644 --- a/flink-table/flink-table-api-java/src/main/java/org/apache/flink/table/api/Table.java +++ b/flink-table/flink-table-api-java/src/main/java/org/apache/flink/table/api/Table.java @@ -1467,6 +1467,17 @@ default TableResult executeInsert( * TableRuntimeException} when an input row's op code is {@code NULL} or not present in the * mapping; pass {@code error_handling => 'SKIP'} to silently drop those rows instead. * + *

By default, the input is processed with row semantics (each row independently). To + * co-locate rows with the same key in the same parallel operator instance, partition the input + * first via {@link #partitionBy(Expression...)} and invoke the function via {@link + * PartitionedTable#process(String, Object...)}: + * + *

{@code
+     * Table result = cdcStream
+     *     .partitionBy($("id"))
+     *     .process("FROM_CHANGELOG");
+     * }
+ * *

Optional arguments can be passed using named expressions: * *

{@code
diff --git a/flink-table/flink-table-common/src/main/java/org/apache/flink/table/functions/BuiltInFunctionDefinitions.java b/flink-table/flink-table-common/src/main/java/org/apache/flink/table/functions/BuiltInFunctionDefinitions.java
index abe54e6f48ebf..1085afa48b2f3 100644
--- a/flink-table/flink-table-common/src/main/java/org/apache/flink/table/functions/BuiltInFunctionDefinitions.java
+++ b/flink-table/flink-table-common/src/main/java/org/apache/flink/table/functions/BuiltInFunctionDefinitions.java
@@ -839,12 +839,15 @@ ANY, and(logical(LogicalTypeRoot.BOOLEAN), LITERAL)
                     .kind(PROCESS_TABLE)
                     .staticArguments(
                             StaticArgument.table(
-                                    "input",
-                                    Row.class,
-                                    false,
-                                    EnumSet.of(
-                                            StaticArgumentTrait.TABLE,
-                                            StaticArgumentTrait.ROW_SEMANTIC_TABLE)),
+                                            "input",
+                                            Row.class,
+                                            false,
+                                            EnumSet.of(
+                                                    StaticArgumentTrait.TABLE,
+                                                    StaticArgumentTrait.ROW_SEMANTIC_TABLE))
+                                    .withConditionalTrait(
+                                            StaticArgumentTrait.SET_SEMANTIC_TABLE,
+                                            TraitCondition.hasPartitionBy()),
                             StaticArgument.scalar("op", DataTypes.DESCRIPTOR(), true),
                             StaticArgument.scalar(
                                     "op_mapping",
diff --git a/flink-table/flink-table-common/src/main/java/org/apache/flink/table/types/inference/strategies/FromChangelogTypeStrategy.java b/flink-table/flink-table-common/src/main/java/org/apache/flink/table/types/inference/strategies/FromChangelogTypeStrategy.java
index d1d3c1c61dad8..a04ddf61a794a 100644
--- a/flink-table/flink-table-common/src/main/java/org/apache/flink/table/types/inference/strategies/FromChangelogTypeStrategy.java
+++ b/flink-table/flink-table-common/src/main/java/org/apache/flink/table/types/inference/strategies/FromChangelogTypeStrategy.java
@@ -31,12 +31,14 @@
 import org.apache.flink.types.ColumnList;
 import org.apache.flink.types.RowKind;
 
+import java.util.Arrays;
 import java.util.HashSet;
 import java.util.List;
 import java.util.Map;
 import java.util.Optional;
 import java.util.Set;
 import java.util.stream.Collectors;
+import java.util.stream.IntStream;
 
 /** Type strategies for the {@code FROM_CHANGELOG} process table function. */
 @Internal
@@ -50,7 +52,7 @@ public final class FromChangelogTypeStrategy {
     public static final int ARG_OP_MAPPING = 2;
     public static final int ARG_ERROR_HANDLING = 3;
 
-    public static final String DEFAULT_OP_COLUMN_NAME = "op";
+    private static final String DEFAULT_OP_COLUMN_NAME = "op";
 
     private static final Set VALID_ROW_KIND_NAMES =
             Set.of("INSERT", "UPDATE_BEFORE", "UPDATE_AFTER", "DELETE");
@@ -97,7 +99,6 @@ public Optional> inferInputTypes(
     // Helpers
     // --------------------------------------------------------------------------------------------
 
-    @SuppressWarnings("rawtypes")
     private static Optional> validateInputs(
             final CallContext callContext, final boolean throwOnFailure) {
         Optional> error;
@@ -158,21 +159,21 @@ private static Optional> validateOpColumn(
         final TableSemantics tableSemantics = callContext.getTableSemantics(ARG_TABLE).get();
         final String opColumnName = resolveOpColumnName(callContext);
         final List inputFields = DataType.getFields(tableSemantics.dataType());
-        final Optional opField =
-                inputFields.stream().filter(f -> f.getName().equals(opColumnName)).findFirst();
-        if (opField.isEmpty()) {
+        final Integer opIndex = buildFieldNameToIndex(inputFields).get(opColumnName);
+        if (opIndex == null) {
             return callContext.fail(
                     throwOnFailure,
                     String.format(
                             "The op column '%s' does not exist in the input schema.",
                             opColumnName));
         }
-        if (!opField.get().getDataType().getLogicalType().is(LogicalTypeFamily.CHARACTER_STRING)) {
+        final Field opField = inputFields.get(opIndex);
+        if (!opField.getDataType().getLogicalType().is(LogicalTypeFamily.CHARACTER_STRING)) {
             return callContext.fail(
                     throwOnFailure,
                     String.format(
                             "The op column '%s' must be of STRING type, but was '%s'.",
-                            opColumnName, opField.get().getDataType().getLogicalType()));
+                            opColumnName, opField.getDataType().getLogicalType()));
         }
         return Optional.empty();
     }
@@ -273,7 +274,11 @@ private static Optional> validateErrorHandling(
         return Optional.empty();
     }
 
-    private static String resolveOpColumnName(final CallContext callContext) {
+    /**
+     * Resolves the op column name from the {@code op} descriptor argument, falling back to {@link
+     * #DEFAULT_OP_COLUMN_NAME} when the argument is omitted or empty.
+     */
+    public static String resolveOpColumnName(final CallContext callContext) {
         return callContext
                 .getArgumentValue(ARG_OP, ColumnList.class)
                 .filter(cl -> !cl.getNames().isEmpty())
@@ -281,15 +286,42 @@ private static String resolveOpColumnName(final CallContext callContext) {
                 .orElse(DEFAULT_OP_COLUMN_NAME);
     }
 
-    private static List buildOutputFields(
+    /**
+     * Computes the indices of input columns that pass through to the function's output. Excludes
+     * the op column (becomes RowKind) and partition key columns (which the framework prepends
+     * automatically when the input has set semantics).
+     *
+     * 

Used by both the output type strategy and the runtime function so that the declared output + * schema and the actual emitted rows stay in sync. + */ + public static int[] computeOutputIndices( final TableSemantics tableSemantics, final String opColumnName) { final List inputFields = DataType.getFields(tableSemantics.dataType()); + final Set excluded = new HashSet<>(); + for (final int idx : tableSemantics.partitionByColumns()) { + excluded.add(idx); + } + final Integer opIndex = buildFieldNameToIndex(inputFields).get(opColumnName); + if (opIndex != null) { + excluded.add(opIndex); + } + return IntStream.range(0, inputFields.size()).filter(i -> !excluded.contains(i)).toArray(); + } - // Exclude the op column (becomes RowKind), keep all other columns - return inputFields.stream() - .filter(f -> !f.getName().equals(opColumnName)) + private static List buildOutputFields( + final TableSemantics tableSemantics, final String opColumnName) { + final List inputFields = DataType.getFields(tableSemantics.dataType()); + return Arrays.stream(computeOutputIndices(tableSemantics, opColumnName)) + .mapToObj(inputFields::get) .collect(Collectors.toList()); } + /** Builds a field-name → index lookup map for the given input fields. */ + private static Map buildFieldNameToIndex(final List fields) { + return IntStream.range(0, fields.size()) + .boxed() + .collect(Collectors.toMap(i -> fields.get(i).getName(), i -> i)); + } + private FromChangelogTypeStrategy() {} } diff --git a/flink-table/flink-table-planner/src/test/java/org/apache/flink/table/planner/plan/nodes/exec/stream/FromChangelogSemanticTests.java b/flink-table/flink-table-planner/src/test/java/org/apache/flink/table/planner/plan/nodes/exec/stream/FromChangelogSemanticTests.java index 4643a51be52e0..1584df63398c3 100644 --- a/flink-table/flink-table-planner/src/test/java/org/apache/flink/table/planner/plan/nodes/exec/stream/FromChangelogSemanticTests.java +++ b/flink-table/flink-table-planner/src/test/java/org/apache/flink/table/planner/plan/nodes/exec/stream/FromChangelogSemanticTests.java @@ -42,6 +42,8 @@ public List programs() { FromChangelogTestPrograms.DEFAULT_OP_MAPPING, FromChangelogTestPrograms.CUSTOM_OP_MAPPING, FromChangelogTestPrograms.CUSTOM_OP_NAME, + FromChangelogTestPrograms.SET_SEMANTICS_PARTITION_BY, + FromChangelogTestPrograms.DELETION_FLAG_PARTITION_BY, FromChangelogTestPrograms.SKIP_INVALID_OP_HANDLING, FromChangelogTestPrograms.SKIP_NULL_OP_CODE, FromChangelogTestPrograms.TABLE_API_DEFAULT, diff --git a/flink-table/flink-table-planner/src/test/java/org/apache/flink/table/planner/plan/nodes/exec/stream/FromChangelogTestPrograms.java b/flink-table/flink-table-planner/src/test/java/org/apache/flink/table/planner/plan/nodes/exec/stream/FromChangelogTestPrograms.java index 1858aabeed019..fb715000f511b 100644 --- a/flink-table/flink-table-planner/src/test/java/org/apache/flink/table/planner/plan/nodes/exec/stream/FromChangelogTestPrograms.java +++ b/flink-table/flink-table-planner/src/test/java/org/apache/flink/table/planner/plan/nodes/exec/stream/FromChangelogTestPrograms.java @@ -172,6 +172,71 @@ public class FromChangelogTestPrograms { + "op => DESCRIPTOR(operation))") .build(); + // -------------------------------------------------------------------------------------------- + // Set semantics with PARTITION BY + // -------------------------------------------------------------------------------------------- + + /** + * Verifies that {@code FROM_CHANGELOG(TABLE t PARTITION BY id)} produces the same logical + * output as the row-semantic call. The conditional {@code SET_SEMANTIC_TABLE} trait switches + * the execution to a co-located parallel mode but must not change row-level semantics. + */ + public static final TableTestProgram SET_SEMANTICS_PARTITION_BY = + TableTestProgram.of( + "from-changelog-set-semantics-partition-by", + "PARTITION BY enables set semantics without altering output rows") + .setupTableSource( + SourceTestStep.newBuilder("cdc_stream") + .addSchema(SIMPLE_CDC_SCHEMA) + .producedValues( + Row.of(1, "INSERT", "Alice"), + Row.of(2, "INSERT", "Bob"), + Row.of(1, "UPDATE_BEFORE", "Alice"), + Row.of(1, "UPDATE_AFTER", "Alice2"), + Row.of(2, "DELETE", "Bob")) + .build()) + .setupTableSink( + SinkTestStep.newBuilder("sink") + .addSchema("id INT", "name STRING") + .consumedValues( + Row.ofKind(RowKind.INSERT, 1, "Alice"), + Row.ofKind(RowKind.INSERT, 2, "Bob"), + Row.ofKind(RowKind.UPDATE_BEFORE, 1, "Alice"), + Row.ofKind(RowKind.UPDATE_AFTER, 1, "Alice2"), + Row.ofKind(RowKind.DELETE, 2, "Bob")) + .build()) + .runSql( + "INSERT INTO sink SELECT * FROM FROM_CHANGELOG(" + + "input => TABLE cdc_stream PARTITION BY id)") + .build(); + + public static final TableTestProgram DELETION_FLAG_PARTITION_BY = + TableTestProgram.of( + "from-changelog-deletion-flag-partition-by", + "deletion flag mapping with PARTITION BY: 'false' -> INSERT, 'true' -> DELETE") + .setupTableSource( + SourceTestStep.newBuilder("cdc_stream") + .addSchema("id INT", "deleted STRING", "name STRING") + .producedValues( + Row.of(1, "false", "Alice"), + Row.of(2, "false", "Bob"), + Row.of(2, "true", "Bob")) + .build()) + .setupTableSink( + SinkTestStep.newBuilder("sink") + .addSchema("id INT", "name STRING") + .consumedValues( + Row.ofKind(RowKind.INSERT, 1, "Alice"), + Row.ofKind(RowKind.INSERT, 2, "Bob"), + Row.ofKind(RowKind.DELETE, 2, "Bob")) + .build()) + .runSql( + "INSERT INTO sink SELECT * FROM FROM_CHANGELOG(" + + "input => TABLE cdc_stream PARTITION BY id, " + + "op => DESCRIPTOR(deleted), " + + "op_mapping => MAP['false', 'INSERT', 'true', 'DELETE'])") + .build(); + // -------------------------------------------------------------------------------------------- // Table API test // -------------------------------------------------------------------------------------------- diff --git a/flink-table/flink-table-planner/src/test/java/org/apache/flink/table/planner/plan/stream/sql/FromChangelogTest.java b/flink-table/flink-table-planner/src/test/java/org/apache/flink/table/planner/plan/stream/sql/FromChangelogTest.java index ba2c1a5690cb6..392abda3cca4d 100644 --- a/flink-table/flink-table-planner/src/test/java/org/apache/flink/table/planner/plan/stream/sql/FromChangelogTest.java +++ b/flink-table/flink-table-planner/src/test/java/org/apache/flink/table/planner/plan/stream/sql/FromChangelogTest.java @@ -75,4 +75,18 @@ void testCustomOpMapping() { + "error_handling => 'SKIP')", CHANGELOG_MODE); } + + @Test + void testSetSemanticsWithPartitionBy() { + util.tableEnv() + .executeSql( + "CREATE TABLE cdc_stream (" + + " id INT," + + " op STRING," + + " name STRING" + + ") WITH ('connector' = 'values')"); + util.verifyRelPlan( + "SELECT * FROM FROM_CHANGELOG(input => TABLE cdc_stream PARTITION BY id)", + CHANGELOG_MODE); + } } diff --git a/flink-table/flink-table-planner/src/test/resources/org/apache/flink/table/planner/plan/stream/sql/FromChangelogTest.xml b/flink-table/flink-table-planner/src/test/resources/org/apache/flink/table/planner/plan/stream/sql/FromChangelogTest.xml index 614eb5456b9aa..107133d104835 100644 --- a/flink-table/flink-table-planner/src/test/resources/org/apache/flink/table/planner/plan/stream/sql/FromChangelogTest.xml +++ b/flink-table/flink-table-planner/src/test/resources/org/apache/flink/table/planner/plan/stream/sql/FromChangelogTest.xml @@ -51,6 +51,26 @@ LogicalProject(id=[$0], name=[$1]) + + + + + TABLE cdc_stream PARTITION BY id)]]> + + + + + + diff --git a/flink-table/flink-table-runtime/src/main/java/org/apache/flink/table/runtime/functions/ptf/FromChangelogFunction.java b/flink-table/flink-table-runtime/src/main/java/org/apache/flink/table/runtime/functions/ptf/FromChangelogFunction.java index cc1f9deb21d11..8530e82526b2d 100644 --- a/flink-table/flink-table-runtime/src/main/java/org/apache/flink/table/runtime/functions/ptf/FromChangelogFunction.java +++ b/flink-table/flink-table-runtime/src/main/java/org/apache/flink/table/runtime/functions/ptf/FromChangelogFunction.java @@ -40,13 +40,12 @@ import java.util.HashMap; import java.util.Map; import java.util.stream.Collectors; -import java.util.stream.IntStream; import static org.apache.flink.table.types.inference.strategies.FromChangelogTypeStrategy.ARG_ERROR_HANDLING; -import static org.apache.flink.table.types.inference.strategies.FromChangelogTypeStrategy.ARG_OP; import static org.apache.flink.table.types.inference.strategies.FromChangelogTypeStrategy.ARG_OP_MAPPING; import static org.apache.flink.table.types.inference.strategies.FromChangelogTypeStrategy.ARG_TABLE; -import static org.apache.flink.table.types.inference.strategies.FromChangelogTypeStrategy.DEFAULT_OP_COLUMN_NAME; +import static org.apache.flink.table.types.inference.strategies.FromChangelogTypeStrategy.computeOutputIndices; +import static org.apache.flink.table.types.inference.strategies.FromChangelogTypeStrategy.resolveOpColumnName; /** * Runtime implementation of {@link BuiltInFunctionDefinitions#FROM_CHANGELOG}. @@ -90,12 +89,7 @@ public FromChangelogFunction(final SpecializedContext context) { final RowType inputType = (RowType) tableSemantics.dataType().getLogicalType(); final String opColumnName = resolveOpColumnName(callContext); this.opColumnIndex = inputType.getFieldNames().indexOf(opColumnName); - - // Exclude only the op column from output — all other columns pass through - this.outputIndices = - IntStream.range(0, inputType.getFieldCount()) - .filter(i -> i != opColumnIndex) - .toArray(); + this.outputIndices = computeOutputIndices(tableSemantics, opColumnName); this.rawOpMap = buildOpMap(callContext); @@ -114,13 +108,6 @@ public void open(final FunctionContext context) throws Exception { projectedOutput = ProjectedRowData.from(outputIndices); } - private static String resolveOpColumnName(final CallContext callContext) { - return callContext - .getArgumentValue(ARG_OP, ColumnList.class) - .map(cl -> cl.getNames().get(0)) - .orElse(DEFAULT_OP_COLUMN_NAME); - } - /** * Builds a String-to-RowKind map. Keys in the provided mapping may be comma-separated (e.g., * "INSERT, UPDATE_AFTER") to map multiple input codes to the same RowKind. From 2dc1bdd348729f1ff1b84db4d328f1f59db2e31c Mon Sep 17 00:00:00 2001 From: Ramin Gharib Date: Wed, 6 May 2026 15:56:17 +0200 Subject: [PATCH 2/6] [FLINK-39537][table] Address feedback and cleanup code --- .../docs/sql/reference/queries/changelog.md | 6 +++- .../strategies/FromChangelogTypeStrategy.java | 36 ++++++++++--------- .../stream/FromChangelogSemanticTests.java | 2 +- .../stream/FromChangelogTestPrograms.java | 28 +++++---------- .../plan/stream/sql/FromChangelogTest.java | 22 ++---------- .../plan/stream/sql/FromChangelogTest.xml | 23 ++---------- 6 files changed, 39 insertions(+), 78 deletions(-) diff --git a/docs/content/docs/sql/reference/queries/changelog.md b/docs/content/docs/sql/reference/queries/changelog.md index 4be00afd39df9..ea8dde63e93c3 100644 --- a/docs/content/docs/sql/reference/queries/changelog.md +++ b/docs/content/docs/sql/reference/queries/changelog.md @@ -61,7 +61,7 @@ SELECT * FROM FROM_CHANGELOG( | Parameter | Required | Description | |:-------------|:---------|:------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| -| `input` | Yes | The input table. Must be append-only. Use `PARTITION BY` to ensure rows for the same key are processed together. This is required when downstream operators are keyed on that column. | +| `input` | Yes | The input table. Must be append-only. Use `PARTITION BY` to ensure rows for the same key are processed together. | | `op` | No | A `DESCRIPTOR` with a single column name for the operation code column. Defaults to `op`. The column must exist in the input table and be of type STRING. | | `op_mapping` | No | A `MAP` mapping user-defined codes to Flink change operation names. Keys are user-defined codes (e.g., `'c'`, `'u'`, `'d'`), values are Flink change operation names (`INSERT`, `UPDATE_BEFORE`, `UPDATE_AFTER`, `DELETE`). Keys can contain comma-separated codes to map multiple codes to the same operation (e.g., `'c, r'`). Each change operation may appear at most once across all entries. | | `error_handling` | No | Controls behavior when an input row's operation code is `NULL` or not present in the `op_mapping`. Valid values: `FAIL` (default) — throw a `TableRuntimeException`, `SKIP` — silently drop the row. | @@ -129,6 +129,10 @@ SELECT * FROM FROM_CHANGELOG( #### Partitioning by a key +Prefer row semantics. `PARTITION BY` is currently only necessary if your rows for the same key are spread across partitions. In this case, consider also using `ORDER BY` to fix the ordering within a key. + +If you are producing an upsert table — that is, you are emitting `UPDATE_AFTER` but no `UPDATE_BEFORE` from your CDC input stream — the partition key you select here will be considered both the primary key and the upsert key by the engine. Make sure the `PARTITION BY` key matches your primary key exactly. + ```sql SELECT * FROM FROM_CHANGELOG( input => TABLE cdc_stream PARTITION BY id diff --git a/flink-table/flink-table-common/src/main/java/org/apache/flink/table/types/inference/strategies/FromChangelogTypeStrategy.java b/flink-table/flink-table-common/src/main/java/org/apache/flink/table/types/inference/strategies/FromChangelogTypeStrategy.java index a04ddf61a794a..40c9fab7d6cb8 100644 --- a/flink-table/flink-table-common/src/main/java/org/apache/flink/table/types/inference/strategies/FromChangelogTypeStrategy.java +++ b/flink-table/flink-table-common/src/main/java/org/apache/flink/table/types/inference/strategies/FromChangelogTypeStrategy.java @@ -27,6 +27,7 @@ import org.apache.flink.table.types.inference.CallContext; import org.apache.flink.table.types.inference.InputTypeStrategy; import org.apache.flink.table.types.inference.TypeStrategy; +import org.apache.flink.table.types.logical.LogicalType; import org.apache.flink.table.types.logical.LogicalTypeFamily; import org.apache.flink.types.ColumnList; import org.apache.flink.types.RowKind; @@ -36,6 +37,7 @@ import java.util.List; import java.util.Map; import java.util.Optional; +import java.util.OptionalInt; import java.util.Set; import java.util.stream.Collectors; import java.util.stream.IntStream; @@ -159,21 +161,22 @@ private static Optional> validateOpColumn( final TableSemantics tableSemantics = callContext.getTableSemantics(ARG_TABLE).get(); final String opColumnName = resolveOpColumnName(callContext); final List inputFields = DataType.getFields(tableSemantics.dataType()); - final Integer opIndex = buildFieldNameToIndex(inputFields).get(opColumnName); - if (opIndex == null) { + final OptionalInt opIndex = resolveOpColumnIndex(inputFields, opColumnName); + if (opIndex.isEmpty()) { return callContext.fail( throwOnFailure, String.format( "The op column '%s' does not exist in the input schema.", opColumnName)); } - final Field opField = inputFields.get(opIndex); - if (!opField.getDataType().getLogicalType().is(LogicalTypeFamily.CHARACTER_STRING)) { + final Field opField = inputFields.get(opIndex.getAsInt()); + final LogicalType opFieldType = opField.getDataType().getLogicalType(); + if (!opFieldType.is(LogicalTypeFamily.CHARACTER_STRING)) { return callContext.fail( throwOnFailure, String.format( "The op column '%s' must be of STRING type, but was '%s'.", - opColumnName, opField.getDataType().getLogicalType())); + opColumnName, opFieldType)); } return Optional.empty(); } @@ -288,8 +291,8 @@ public static String resolveOpColumnName(final CallContext callContext) { /** * Computes the indices of input columns that pass through to the function's output. Excludes - * the op column (becomes RowKind) and partition key columns (which the framework prepends - * automatically when the input has set semantics). + * the op column (becomes RowKind) and partition key columns, if present (which the framework + * prepends automatically when the input has set semantics). * *

Used by both the output type strategy and the runtime function so that the declared output * schema and the actual emitted rows stay in sync. @@ -301,10 +304,7 @@ public static int[] computeOutputIndices( for (final int idx : tableSemantics.partitionByColumns()) { excluded.add(idx); } - final Integer opIndex = buildFieldNameToIndex(inputFields).get(opColumnName); - if (opIndex != null) { - excluded.add(opIndex); - } + resolveOpColumnIndex(inputFields, opColumnName).ifPresent(excluded::add); return IntStream.range(0, inputFields.size()).filter(i -> !excluded.contains(i)).toArray(); } @@ -316,11 +316,15 @@ private static List buildOutputFields( .collect(Collectors.toList()); } - /** Builds a field-name → index lookup map for the given input fields. */ - private static Map buildFieldNameToIndex(final List fields) { - return IntStream.range(0, fields.size()) - .boxed() - .collect(Collectors.toMap(i -> fields.get(i).getName(), i -> i)); + /** + * Returns the index of the column matching {@code opColumnName} within the given input fields, + * or empty if no field matches. + */ + private static OptionalInt resolveOpColumnIndex( + final List inputFields, final String opColumnName) { + return IntStream.range(0, inputFields.size()) + .filter(i -> inputFields.get(i).getName().equals(opColumnName)) + .findFirst(); } private FromChangelogTypeStrategy() {} diff --git a/flink-table/flink-table-planner/src/test/java/org/apache/flink/table/planner/plan/nodes/exec/stream/FromChangelogSemanticTests.java b/flink-table/flink-table-planner/src/test/java/org/apache/flink/table/planner/plan/nodes/exec/stream/FromChangelogSemanticTests.java index 1584df63398c3..5d2d01bab14fc 100644 --- a/flink-table/flink-table-planner/src/test/java/org/apache/flink/table/planner/plan/nodes/exec/stream/FromChangelogSemanticTests.java +++ b/flink-table/flink-table-planner/src/test/java/org/apache/flink/table/planner/plan/nodes/exec/stream/FromChangelogSemanticTests.java @@ -42,7 +42,7 @@ public List programs() { FromChangelogTestPrograms.DEFAULT_OP_MAPPING, FromChangelogTestPrograms.CUSTOM_OP_MAPPING, FromChangelogTestPrograms.CUSTOM_OP_NAME, - FromChangelogTestPrograms.SET_SEMANTICS_PARTITION_BY, + FromChangelogTestPrograms.RETRACT_PARTITION_BY, FromChangelogTestPrograms.DELETION_FLAG_PARTITION_BY, FromChangelogTestPrograms.SKIP_INVALID_OP_HANDLING, FromChangelogTestPrograms.SKIP_NULL_OP_CODE, diff --git a/flink-table/flink-table-planner/src/test/java/org/apache/flink/table/planner/plan/nodes/exec/stream/FromChangelogTestPrograms.java b/flink-table/flink-table-planner/src/test/java/org/apache/flink/table/planner/plan/nodes/exec/stream/FromChangelogTestPrograms.java index fb715000f511b..8a50f53a6e3c8 100644 --- a/flink-table/flink-table-planner/src/test/java/org/apache/flink/table/planner/plan/nodes/exec/stream/FromChangelogTestPrograms.java +++ b/flink-table/flink-table-planner/src/test/java/org/apache/flink/table/planner/plan/nodes/exec/stream/FromChangelogTestPrograms.java @@ -146,7 +146,6 @@ public class FromChangelogTestPrograms { + "error_handling => 'SKIP')") .build(); - /** Custom op column name via DESCRIPTOR. */ public static final TableTestProgram CUSTOM_OP_NAME = TableTestProgram.of( "from-changelog-custom-op-name", "custom op column name via DESCRIPTOR") @@ -172,28 +171,19 @@ public class FromChangelogTestPrograms { + "op => DESCRIPTOR(operation))") .build(); - // -------------------------------------------------------------------------------------------- - // Set semantics with PARTITION BY - // -------------------------------------------------------------------------------------------- - - /** - * Verifies that {@code FROM_CHANGELOG(TABLE t PARTITION BY id)} produces the same logical - * output as the row-semantic call. The conditional {@code SET_SEMANTIC_TABLE} trait switches - * the execution to a co-located parallel mode but must not change row-level semantics. - */ - public static final TableTestProgram SET_SEMANTICS_PARTITION_BY = + public static final TableTestProgram RETRACT_PARTITION_BY = TableTestProgram.of( - "from-changelog-set-semantics-partition-by", - "PARTITION BY enables set semantics without altering output rows") + "from-changelog-retract-partition-by", + "retract changelog with PARTITION BY") .setupTableSource( SourceTestStep.newBuilder("cdc_stream") - .addSchema(SIMPLE_CDC_SCHEMA) + .addSchema("name STRING", "id INT", "op STRING") .producedValues( - Row.of(1, "INSERT", "Alice"), - Row.of(2, "INSERT", "Bob"), - Row.of(1, "UPDATE_BEFORE", "Alice"), - Row.of(1, "UPDATE_AFTER", "Alice2"), - Row.of(2, "DELETE", "Bob")) + Row.of("Alice", 1, "INSERT"), + Row.of("Bob", 2, "INSERT"), + Row.of("Alice", 1, "UPDATE_BEFORE"), + Row.of("Alice2", 1, "UPDATE_AFTER"), + Row.of("Bob", 2, "DELETE")) .build()) .setupTableSink( SinkTestStep.newBuilder("sink") diff --git a/flink-table/flink-table-planner/src/test/java/org/apache/flink/table/planner/plan/stream/sql/FromChangelogTest.java b/flink-table/flink-table-planner/src/test/java/org/apache/flink/table/planner/plan/stream/sql/FromChangelogTest.java index 392abda3cca4d..17d7f64fa4812 100644 --- a/flink-table/flink-table-planner/src/test/java/org/apache/flink/table/planner/plan/stream/sql/FromChangelogTest.java +++ b/flink-table/flink-table-planner/src/test/java/org/apache/flink/table/planner/plan/stream/sql/FromChangelogTest.java @@ -46,7 +46,7 @@ void setup() { } @Test - void testInsertOnlySource() { + void testRetract() { util.tableEnv() .executeSql( "CREATE TABLE cdc_stream (" @@ -59,25 +59,7 @@ void testInsertOnlySource() { } @Test - void testCustomOpMapping() { - util.tableEnv() - .executeSql( - "CREATE TABLE cdc_stream (" - + " id INT," - + " __op STRING," - + " name STRING" - + ") WITH ('connector' = 'values')"); - util.verifyRelPlan( - "SELECT * FROM FROM_CHANGELOG(" - + "input => TABLE cdc_stream, " - + "op => DESCRIPTOR(__op), " - + "op_mapping => MAP['c, r', 'INSERT', 'ub', 'UPDATE_BEFORE', 'ua', 'UPDATE_AFTER', 'd', 'DELETE'], " - + "error_handling => 'SKIP')", - CHANGELOG_MODE); - } - - @Test - void testSetSemanticsWithPartitionBy() { + void testRetractPartitionBy() { util.tableEnv() .executeSql( "CREATE TABLE cdc_stream (" diff --git a/flink-table/flink-table-planner/src/test/resources/org/apache/flink/table/planner/plan/stream/sql/FromChangelogTest.xml b/flink-table/flink-table-planner/src/test/resources/org/apache/flink/table/planner/plan/stream/sql/FromChangelogTest.xml index 107133d104835..2addfe9f5ff5f 100644 --- a/flink-table/flink-table-planner/src/test/resources/org/apache/flink/table/planner/plan/stream/sql/FromChangelogTest.xml +++ b/flink-table/flink-table-planner/src/test/resources/org/apache/flink/table/planner/plan/stream/sql/FromChangelogTest.xml @@ -16,26 +16,7 @@ See the License for the specific language governing permissions and limitations under the License. --> - - - TABLE cdc_stream, op => DESCRIPTOR(__op), op_mapping => MAP['c, r', 'INSERT', 'ub', 'UPDATE_BEFORE', 'ua', 'UPDATE_AFTER', 'd', 'DELETE'], error_handling => 'SKIP')]]> - - - - - - - - - + TABLE cdc_stream)]]> @@ -54,7 +35,7 @@ ProcessTableFunction(invocation=[FROM_CHANGELOG(TABLE(#0), DEFAULT(), DEFAULT(), ]]> - + TABLE cdc_stream PARTITION BY id)]]> From 3a0aed8c3708f506d04097260c73bfcab3575dcc Mon Sep 17 00:00:00 2001 From: Ramin Gharib Date: Wed, 6 May 2026 16:07:53 +0200 Subject: [PATCH 3/6] [FLINK-39537][table] Rename test --- .../plan/nodes/exec/stream/FromChangelogSemanticTests.java | 2 +- .../plan/nodes/exec/stream/FromChangelogTestPrograms.java | 6 ++---- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/flink-table/flink-table-planner/src/test/java/org/apache/flink/table/planner/plan/nodes/exec/stream/FromChangelogSemanticTests.java b/flink-table/flink-table-planner/src/test/java/org/apache/flink/table/planner/plan/nodes/exec/stream/FromChangelogSemanticTests.java index 5d2d01bab14fc..85efd0d48630f 100644 --- a/flink-table/flink-table-planner/src/test/java/org/apache/flink/table/planner/plan/nodes/exec/stream/FromChangelogSemanticTests.java +++ b/flink-table/flink-table-planner/src/test/java/org/apache/flink/table/planner/plan/nodes/exec/stream/FromChangelogSemanticTests.java @@ -39,7 +39,7 @@ protected void applyDefaultEnvironmentOptions(TableConfig config) { @Override public List programs() { return List.of( - FromChangelogTestPrograms.DEFAULT_OP_MAPPING, + FromChangelogTestPrograms.RETRACT, FromChangelogTestPrograms.CUSTOM_OP_MAPPING, FromChangelogTestPrograms.CUSTOM_OP_NAME, FromChangelogTestPrograms.RETRACT_PARTITION_BY, diff --git a/flink-table/flink-table-planner/src/test/java/org/apache/flink/table/planner/plan/nodes/exec/stream/FromChangelogTestPrograms.java b/flink-table/flink-table-planner/src/test/java/org/apache/flink/table/planner/plan/nodes/exec/stream/FromChangelogTestPrograms.java index 8a50f53a6e3c8..f7264ec3c9250 100644 --- a/flink-table/flink-table-planner/src/test/java/org/apache/flink/table/planner/plan/nodes/exec/stream/FromChangelogTestPrograms.java +++ b/flink-table/flink-table-planner/src/test/java/org/apache/flink/table/planner/plan/nodes/exec/stream/FromChangelogTestPrograms.java @@ -35,10 +35,8 @@ public class FromChangelogTestPrograms { // SQL tests // -------------------------------------------------------------------------------------------- - public static final TableTestProgram DEFAULT_OP_MAPPING = - TableTestProgram.of( - "from-changelog-default-op-mapping", - "default mapping with standard op names") + public static final TableTestProgram RETRACT = + TableTestProgram.of("from-changelog-retract", "retract changelog with default mapping") .setupTableSource( SourceTestStep.newBuilder("cdc_stream") .addSchema(SIMPLE_CDC_SCHEMA) From 4cb5146c556917735f6eca41491fec0b1f770d40 Mon Sep 17 00:00:00 2001 From: Ramin Gharib Date: Wed, 6 May 2026 17:41:46 +0200 Subject: [PATCH 4/6] [FLINK-39537][table] Address feedback --- .../ChangelogTypeStrategyUtils.java | 23 ++++++++-- .../strategies/FromChangelogTypeStrategy.java | 42 +++---------------- .../functions/ptf/FromChangelogFunction.java | 8 ++-- 3 files changed, 29 insertions(+), 44 deletions(-) diff --git a/flink-table/flink-table-common/src/main/java/org/apache/flink/table/types/inference/strategies/ChangelogTypeStrategyUtils.java b/flink-table/flink-table-common/src/main/java/org/apache/flink/table/types/inference/strategies/ChangelogTypeStrategyUtils.java index e9608ee779524..d546824b28de8 100644 --- a/flink-table/flink-table-common/src/main/java/org/apache/flink/table/types/inference/strategies/ChangelogTypeStrategyUtils.java +++ b/flink-table/flink-table-common/src/main/java/org/apache/flink/table/types/inference/strategies/ChangelogTypeStrategyUtils.java @@ -21,16 +21,32 @@ import org.apache.flink.annotation.Internal; import org.apache.flink.table.functions.TableSemantics; import org.apache.flink.table.types.DataType; +import org.apache.flink.table.types.inference.CallContext; +import org.apache.flink.types.ColumnList; import java.util.Arrays; -import java.util.HashSet; import java.util.Set; import java.util.stream.Collectors; import java.util.stream.IntStream; +import static org.apache.flink.table.types.inference.strategies.FromChangelogTypeStrategy.ARG_OP; + /** Shared helpers for changelog-style PTFs ({@code TO_CHANGELOG}, {@code FROM_CHANGELOG}). */ @Internal public final class ChangelogTypeStrategyUtils { + private static final String DEFAULT_OP_COLUMN_NAME = "op"; + + /** + * Resolves the op column name from the {@code op} descriptor argument, falling back to {@link + * #DEFAULT_OP_COLUMN_NAME} when the argument is omitted or empty. + */ + public static String resolveOpColumnName(final CallContext callContext) { + return callContext + .getArgumentValue(ARG_OP, ColumnList.class) + .filter(cl -> !cl.getNames().isEmpty()) + .map(cl -> cl.getNames().get(0)) + .orElse(DEFAULT_OP_COLUMN_NAME); + } /** * Returns the input column indices that pass through to the function's output, excluding the @@ -61,10 +77,9 @@ private static int[] computeOutputIndices( } private static Set collectPartitionKeyIndices(final TableSemantics tableSemantics) { - return new HashSet<>( - Arrays.stream(tableSemantics.partitionByColumns()) + return Arrays.stream(tableSemantics.partitionByColumns()) .boxed() - .collect(Collectors.toSet())); + .collect(Collectors.toSet()); } private ChangelogTypeStrategyUtils() {} diff --git a/flink-table/flink-table-common/src/main/java/org/apache/flink/table/types/inference/strategies/FromChangelogTypeStrategy.java b/flink-table/flink-table-common/src/main/java/org/apache/flink/table/types/inference/strategies/FromChangelogTypeStrategy.java index 40c9fab7d6cb8..576618623ad63 100644 --- a/flink-table/flink-table-common/src/main/java/org/apache/flink/table/types/inference/strategies/FromChangelogTypeStrategy.java +++ b/flink-table/flink-table-common/src/main/java/org/apache/flink/table/types/inference/strategies/FromChangelogTypeStrategy.java @@ -54,8 +54,6 @@ public final class FromChangelogTypeStrategy { public static final int ARG_OP_MAPPING = 2; public static final int ARG_ERROR_HANDLING = 3; - private static final String DEFAULT_OP_COLUMN_NAME = "op"; - private static final Set VALID_ROW_KIND_NAMES = Set.of("INSERT", "UPDATE_BEFORE", "UPDATE_AFTER", "DELETE"); @@ -90,7 +88,8 @@ public Optional> inferInputTypes( new ValidationException( "First argument must be a table for FROM_CHANGELOG.")); - final String opColumnName = resolveOpColumnName(callContext); + final String opColumnName = + ChangelogTypeStrategyUtils.resolveOpColumnName(callContext); final List outputFields = buildOutputFields(tableSemantics, opColumnName); @@ -159,7 +158,7 @@ private static Optional> validateOpColumn( final CallContext callContext, final boolean throwOnFailure) { final TableSemantics tableSemantics = callContext.getTableSemantics(ARG_TABLE).get(); - final String opColumnName = resolveOpColumnName(callContext); + final String opColumnName = ChangelogTypeStrategyUtils.resolveOpColumnName(callContext); final List inputFields = DataType.getFields(tableSemantics.dataType()); final OptionalInt opIndex = resolveOpColumnIndex(inputFields, opColumnName); if (opIndex.isEmpty()) { @@ -277,41 +276,12 @@ private static Optional> validateErrorHandling( return Optional.empty(); } - /** - * Resolves the op column name from the {@code op} descriptor argument, falling back to {@link - * #DEFAULT_OP_COLUMN_NAME} when the argument is omitted or empty. - */ - public static String resolveOpColumnName(final CallContext callContext) { - return callContext - .getArgumentValue(ARG_OP, ColumnList.class) - .filter(cl -> !cl.getNames().isEmpty()) - .map(cl -> cl.getNames().get(0)) - .orElse(DEFAULT_OP_COLUMN_NAME); - } - - /** - * Computes the indices of input columns that pass through to the function's output. Excludes - * the op column (becomes RowKind) and partition key columns, if present (which the framework - * prepends automatically when the input has set semantics). - * - *

Used by both the output type strategy and the runtime function so that the declared output - * schema and the actual emitted rows stay in sync. - */ - public static int[] computeOutputIndices( - final TableSemantics tableSemantics, final String opColumnName) { - final List inputFields = DataType.getFields(tableSemantics.dataType()); - final Set excluded = new HashSet<>(); - for (final int idx : tableSemantics.partitionByColumns()) { - excluded.add(idx); - } - resolveOpColumnIndex(inputFields, opColumnName).ifPresent(excluded::add); - return IntStream.range(0, inputFields.size()).filter(i -> !excluded.contains(i)).toArray(); - } - private static List buildOutputFields( final TableSemantics tableSemantics, final String opColumnName) { final List inputFields = DataType.getFields(tableSemantics.dataType()); - return Arrays.stream(computeOutputIndices(tableSemantics, opColumnName)) + return Arrays.stream( + ChangelogTypeStrategyUtils.computeOutputIndices( + tableSemantics, opColumnName)) .mapToObj(inputFields::get) .collect(Collectors.toList()); } diff --git a/flink-table/flink-table-runtime/src/main/java/org/apache/flink/table/runtime/functions/ptf/FromChangelogFunction.java b/flink-table/flink-table-runtime/src/main/java/org/apache/flink/table/runtime/functions/ptf/FromChangelogFunction.java index 8530e82526b2d..9a2607fb01bca 100644 --- a/flink-table/flink-table-runtime/src/main/java/org/apache/flink/table/runtime/functions/ptf/FromChangelogFunction.java +++ b/flink-table/flink-table-runtime/src/main/java/org/apache/flink/table/runtime/functions/ptf/FromChangelogFunction.java @@ -29,6 +29,7 @@ import org.apache.flink.table.functions.SpecializedFunction.SpecializedContext; import org.apache.flink.table.functions.TableSemantics; import org.apache.flink.table.types.inference.CallContext; +import org.apache.flink.table.types.inference.strategies.ChangelogTypeStrategyUtils; import org.apache.flink.table.types.inference.strategies.ErrorHandlingMode; import org.apache.flink.table.types.logical.RowType; import org.apache.flink.types.ColumnList; @@ -44,8 +45,6 @@ import static org.apache.flink.table.types.inference.strategies.FromChangelogTypeStrategy.ARG_ERROR_HANDLING; import static org.apache.flink.table.types.inference.strategies.FromChangelogTypeStrategy.ARG_OP_MAPPING; import static org.apache.flink.table.types.inference.strategies.FromChangelogTypeStrategy.ARG_TABLE; -import static org.apache.flink.table.types.inference.strategies.FromChangelogTypeStrategy.computeOutputIndices; -import static org.apache.flink.table.types.inference.strategies.FromChangelogTypeStrategy.resolveOpColumnName; /** * Runtime implementation of {@link BuiltInFunctionDefinitions#FROM_CHANGELOG}. @@ -87,9 +86,10 @@ public FromChangelogFunction(final SpecializedContext context) { .orElseThrow(() -> new IllegalStateException("Table argument expected.")); final RowType inputType = (RowType) tableSemantics.dataType().getLogicalType(); - final String opColumnName = resolveOpColumnName(callContext); + final String opColumnName = ChangelogTypeStrategyUtils.resolveOpColumnName(callContext); this.opColumnIndex = inputType.getFieldNames().indexOf(opColumnName); - this.outputIndices = computeOutputIndices(tableSemantics, opColumnName); + this.outputIndices = + ChangelogTypeStrategyUtils.computeOutputIndices(tableSemantics, opColumnName); this.rawOpMap = buildOpMap(callContext); From f4af64096bcf7b709ce2c2ba68d4edfcf636b57a Mon Sep 17 00:00:00 2001 From: Ramin Gharib Date: Thu, 7 May 2026 15:41:40 +0200 Subject: [PATCH 5/6] [FLINK-39537][table] Address feedback --- .../docs/sql/reference/queries/changelog.md | 18 +++++++++++---- .../strategies/FromChangelogTypeStrategy.java | 22 +++++-------------- 2 files changed, 20 insertions(+), 20 deletions(-) diff --git a/docs/content/docs/sql/reference/queries/changelog.md b/docs/content/docs/sql/reference/queries/changelog.md index ea8dde63e93c3..b713aebf07274 100644 --- a/docs/content/docs/sql/reference/queries/changelog.md +++ b/docs/content/docs/sql/reference/queries/changelog.md @@ -129,16 +129,26 @@ SELECT * FROM FROM_CHANGELOG( #### Partitioning by a key -Prefer row semantics. `PARTITION BY` is currently only necessary if your rows for the same key are spread across partitions. In this case, consider also using `ORDER BY` to fix the ordering within a key. - -If you are producing an upsert table — that is, you are emitting `UPDATE_AFTER` but no `UPDATE_BEFORE` from your CDC input stream — the partition key you select here will be considered both the primary key and the upsert key by the engine. Make sure the `PARTITION BY` key matches your primary key exactly. - ```sql +-- Input table 'cdc_stream' with columns (name, id, op, doc) +-- Default output schema: [name, id, doc] +-- Output schema with PARTITION BY: [id, name, doc] + SELECT * FROM FROM_CHANGELOG( input => TABLE cdc_stream PARTITION BY id ) ``` +When `PARTITION BY` is provided, **the output schema changes**. The partition key columns are moved to the front by the engine, and the function emits the remaining input columns (excluding the op column). The order becomes: + +``` +[partition_keys, non_partition_input_columns_excluding_op] +``` + +Prefer row semantics, when possible. `PARTITION BY` is only necessary when downstream operators are keyed on that column and you want to co-locate rows for the same key in the same parallel operator instance. + +If you are producing an upsert table — that is, you are emitting `UPDATE_AFTER` but no `UPDATE_BEFORE` from your CDC input stream — the partition key you select here will be considered both the primary key and the upsert key by the engine. Make sure the `PARTITION BY` key matches your primary key exactly. + #### Invalid operation code handling Two `error_handling` modes are supported. The job can either fail upon an invalid or unknown op code, or skip the row and continue processing. diff --git a/flink-table/flink-table-common/src/main/java/org/apache/flink/table/types/inference/strategies/FromChangelogTypeStrategy.java b/flink-table/flink-table-common/src/main/java/org/apache/flink/table/types/inference/strategies/FromChangelogTypeStrategy.java index 576618623ad63..f4d54f276d73c 100644 --- a/flink-table/flink-table-common/src/main/java/org/apache/flink/table/types/inference/strategies/FromChangelogTypeStrategy.java +++ b/flink-table/flink-table-common/src/main/java/org/apache/flink/table/types/inference/strategies/FromChangelogTypeStrategy.java @@ -40,7 +40,6 @@ import java.util.OptionalInt; import java.util.Set; import java.util.stream.Collectors; -import java.util.stream.IntStream; /** Type strategies for the {@code FROM_CHANGELOG} process table function. */ @Internal @@ -159,8 +158,8 @@ private static Optional> validateOpColumn( final TableSemantics tableSemantics = callContext.getTableSemantics(ARG_TABLE).get(); final String opColumnName = ChangelogTypeStrategyUtils.resolveOpColumnName(callContext); - final List inputFields = DataType.getFields(tableSemantics.dataType()); - final OptionalInt opIndex = resolveOpColumnIndex(inputFields, opColumnName); + final OptionalInt opIndex = + ChangelogTypeStrategyUtils.resolveOpColumnIndex(tableSemantics, opColumnName); if (opIndex.isEmpty()) { return callContext.fail( throwOnFailure, @@ -168,8 +167,10 @@ private static Optional> validateOpColumn( "The op column '%s' does not exist in the input schema.", opColumnName)); } - final Field opField = inputFields.get(opIndex.getAsInt()); - final LogicalType opFieldType = opField.getDataType().getLogicalType(); + final LogicalType opFieldType = + DataType.getFieldDataTypes(tableSemantics.dataType()) + .get(opIndex.getAsInt()) + .getLogicalType(); if (!opFieldType.is(LogicalTypeFamily.CHARACTER_STRING)) { return callContext.fail( throwOnFailure, @@ -286,16 +287,5 @@ private static List buildOutputFields( .collect(Collectors.toList()); } - /** - * Returns the index of the column matching {@code opColumnName} within the given input fields, - * or empty if no field matches. - */ - private static OptionalInt resolveOpColumnIndex( - final List inputFields, final String opColumnName) { - return IntStream.range(0, inputFields.size()) - .filter(i -> inputFields.get(i).getName().equals(opColumnName)) - .findFirst(); - } - private FromChangelogTypeStrategy() {} } From 24a253a9792ab80ecada0e2103a940b8627714d6 Mon Sep 17 00:00:00 2001 From: Ramin Gharib Date: Fri, 8 May 2026 08:36:15 +0200 Subject: [PATCH 6/6] [FLINK-39537][table] Rebase --- .../strategies/ChangelogTypeStrategyUtils.java | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/flink-table/flink-table-common/src/main/java/org/apache/flink/table/types/inference/strategies/ChangelogTypeStrategyUtils.java b/flink-table/flink-table-common/src/main/java/org/apache/flink/table/types/inference/strategies/ChangelogTypeStrategyUtils.java index d546824b28de8..d4e20c3021003 100644 --- a/flink-table/flink-table-common/src/main/java/org/apache/flink/table/types/inference/strategies/ChangelogTypeStrategyUtils.java +++ b/flink-table/flink-table-common/src/main/java/org/apache/flink/table/types/inference/strategies/ChangelogTypeStrategyUtils.java @@ -25,6 +25,8 @@ import org.apache.flink.types.ColumnList; import java.util.Arrays; +import java.util.List; +import java.util.OptionalInt; import java.util.Set; import java.util.stream.Collectors; import java.util.stream.IntStream; @@ -48,6 +50,18 @@ public static String resolveOpColumnName(final CallContext callContext) { .orElse(DEFAULT_OP_COLUMN_NAME); } + /** + * Returns the index of the column matching {@code opColumnName} within the input schema, or + * empty if no field matches. + */ + public static OptionalInt resolveOpColumnIndex( + final TableSemantics tableSemantics, final String opColumnName) { + final List fieldNames = DataType.getFieldNames(tableSemantics.dataType()); + return IntStream.range(0, fieldNames.size()) + .filter(i -> fieldNames.get(i).equals(opColumnName)) + .findFirst(); + } + /** * Returns the input column indices that pass through to the function's output, excluding the * partition key columns (the PTF framework prepends them when the input has set semantics). @@ -78,8 +92,8 @@ private static int[] computeOutputIndices( private static Set collectPartitionKeyIndices(final TableSemantics tableSemantics) { return Arrays.stream(tableSemantics.partitionByColumns()) - .boxed() - .collect(Collectors.toSet()); + .boxed() + .collect(Collectors.toSet()); } private ChangelogTypeStrategyUtils() {}