-
Notifications
You must be signed in to change notification settings - Fork 13.9k
[FLINK-39349][table] Support TO_CHANGELOG retract/upsert stream to upsert stream with deletion flag (4.2.2.1) #27847
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
097a978
5c04b7f
305a6f0
e47fadb
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -35,6 +35,7 @@ | |
| import org.apache.flink.types.ColumnList; | ||
|
|
||
| import java.util.ArrayList; | ||
| import java.util.HashSet; | ||
| import java.util.List; | ||
| import java.util.Map; | ||
| import java.util.Optional; | ||
|
|
@@ -140,21 +141,56 @@ private static Optional<List<DataType>> validateInputs( | |
|
|
||
| final Optional<Map> opMapping = callContext.getArgumentValue(2, Map.class); | ||
| if (opMapping.isPresent()) { | ||
| final boolean hasInvalidMappingKey = | ||
| opMapping.get().keySet().stream() | ||
| .anyMatch( | ||
| key -> | ||
| !(key instanceof String) | ||
| || !VALID_ROW_KIND_NAMES.contains(key)); | ||
| if (hasInvalidMappingKey) { | ||
| return callContext.fail( | ||
| throwOnFailure, "Invalid target mapping for argument 'op_mapping'."); | ||
| final Optional<List<DataType>> validationError = | ||
| validateOpMappingKeys(callContext, opMapping.get(), throwOnFailure); | ||
| if (validationError.isPresent()) { | ||
| return validationError; | ||
| } | ||
| } | ||
|
|
||
| return Optional.of(callContext.getArgumentDataTypes()); | ||
| } | ||
|
|
||
| /** | ||
| * Validates op_mapping keys. Keys may be comma-separated (e.g., {@code "INSERT, UPDATE_AFTER"}) | ||
| * to map multiple change operations to the same output code. Whitespace around names is | ||
| * trimmed. Names are case-sensitive and must match exactly (e.g., {@code INSERT}, not {@code | ||
| * insert}). Each name must be valid and appear at most once across all entries. | ||
| */ | ||
| @SuppressWarnings("rawtypes") | ||
| private static Optional<List<DataType>> validateOpMappingKeys( | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Recommend adding comments about whitespace and case handling for better user understanding.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I've updated the comment to be more specific about this e8d4956 |
||
| final CallContext callContext, final Map opMapping, final boolean throwOnFailure) { | ||
| final Set<String> allRowKindsSeen = new HashSet<>(); | ||
| for (final Object key : opMapping.keySet()) { | ||
| if (!(key instanceof String)) { | ||
| return callContext.fail( | ||
| throwOnFailure, "Invalid target mapping for argument 'op_mapping'."); | ||
| } | ||
| final String[] rowKindNames = ((String) key).split(","); | ||
| for (final String rawName : rowKindNames) { | ||
| final String rowKindName = rawName.trim(); | ||
| if (!VALID_ROW_KIND_NAMES.contains(rowKindName)) { | ||
| return callContext.fail( | ||
| throwOnFailure, | ||
| String.format( | ||
| "Invalid target mapping for argument 'op_mapping'. " | ||
| + "Unknown change operation: '%s'. Valid values are: %s.", | ||
| rowKindName, VALID_ROW_KIND_NAMES)); | ||
| } | ||
| final boolean isDuplicate = !allRowKindsSeen.add(rowKindName); | ||
| if (isDuplicate) { | ||
| return callContext.fail( | ||
| throwOnFailure, | ||
| String.format( | ||
| "Invalid target mapping for argument 'op_mapping'. " | ||
| + "Duplicate change operation: '%s'.", | ||
| rowKindName)); | ||
| } | ||
| } | ||
| } | ||
| return Optional.empty(); | ||
| } | ||
|
|
||
| private static String resolveOpColumnName(final CallContext callContext) { | ||
| return callContext | ||
| .getArgumentValue(1, ColumnList.class) | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -96,7 +96,7 @@ public class ToChangelogTestPrograms { | |
| public static final TableTestProgram CUSTOM_OP_MAPPING = | ||
| TableTestProgram.of( | ||
| "to-changelog-custom-op-mapping", | ||
| "custom op_mapping maps RowKinds to user-defined codes and drops unmapped") | ||
| "custom op_mapping maps change operations to user-defined codes and drops unmapped") | ||
| .setupTableSource( | ||
| SourceTestStep.newBuilder("t") | ||
| .addSchema( | ||
|
|
@@ -322,6 +322,46 @@ public class ToChangelogTestPrograms { | |
| + "FROM orders_changelog") | ||
| .build(); | ||
|
|
||
| // -------------------------------------------------------------------------------------------- | ||
| // Use case: deletion flag pattern (comma-separated change operation keys) | ||
| // -------------------------------------------------------------------------------------------- | ||
|
|
||
| /** | ||
| * Kafka Connect style deletion flag: INSERT and UPDATE_AFTER both produce deleted='false' and | ||
| * DELETE produces deleted='true'. UPDATE_BEFORE is silently dropped. | ||
| */ | ||
| public static final TableTestProgram DELETION_FLAG = | ||
| TableTestProgram.of( | ||
| "to-changelog-deletion-flag", | ||
| "comma-separated change operations produce deletion flag output") | ||
| .setupTableSource( | ||
| SourceTestStep.newBuilder("t") | ||
| .addSchema( | ||
| "name STRING PRIMARY KEY NOT ENFORCED", "score BIGINT") | ||
| .addMode(ChangelogMode.all()) | ||
| .producedValues( | ||
| Row.ofKind(RowKind.INSERT, "Alice", 10L), | ||
| Row.ofKind(RowKind.INSERT, "Bob", 20L), | ||
| Row.ofKind(RowKind.UPDATE_BEFORE, "Alice", 10L), | ||
| Row.ofKind(RowKind.UPDATE_AFTER, "Alice", 30L), | ||
| Row.ofKind(RowKind.DELETE, "Bob", 20L)) | ||
| .build()) | ||
| .setupTableSink( | ||
| SinkTestStep.newBuilder("sink") | ||
| .addSchema("name STRING", "deleted STRING", "score BIGINT") | ||
| .consumedValues( | ||
| "+I[Alice, false, 10]", | ||
| "+I[Bob, false, 20]", | ||
| "+I[Alice, false, 30]", | ||
| "+I[Bob, true, 20]") | ||
| .build()) | ||
| .runSql( | ||
| "INSERT INTO sink SELECT * FROM TO_CHANGELOG(" | ||
| + "input => TABLE t PARTITION BY name, " | ||
| + "op => DESCRIPTOR(deleted), " | ||
| + "op_mapping => MAP['INSERT, UPDATE_AFTER', 'false', 'DELETE', 'true'])") | ||
| .build(); | ||
|
|
||
| // -------------------------------------------------------------------------------------------- | ||
| // Error validation tests | ||
| // -------------------------------------------------------------------------------------------- | ||
|
|
@@ -353,13 +393,26 @@ public class ToChangelogTestPrograms { | |
| public static final TableTestProgram INVALID_OP_MAPPING = | ||
| TableTestProgram.of( | ||
| "to-changelog-invalid-op-mapping", | ||
| "fails when op_mapping has invalid RowKind name") | ||
| "fails when op_mapping has invalid change operation name") | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Recommend adding notes on why UPDATE_BEFORE is dropped and practical use cases of deletion flag pattern. |
||
| .setupTableSource(SIMPLE_SOURCE) | ||
| .runFailingSql( | ||
| "SELECT * FROM TO_CHANGELOG(" | ||
| + "input => TABLE t PARTITION BY id, " | ||
| + "op_mapping => MAP['INVALID_KIND', 'X'])", | ||
| ValidationException.class, | ||
| "Invalid target mapping for argument 'op_mapping'.") | ||
| "Unknown change operation: 'INVALID_KIND'") | ||
| .build(); | ||
|
|
||
| public static final TableTestProgram DUPLICATE_ROW_KIND = | ||
| TableTestProgram.of( | ||
| "to-changelog-duplicate-rowkind", | ||
| "fails when a change operation appears in multiple op_mapping entries") | ||
| .setupTableSource(SIMPLE_SOURCE) | ||
| .runFailingSql( | ||
| "SELECT * FROM TO_CHANGELOG(" | ||
| + "input => TABLE t PARTITION BY id, " | ||
| + "op_mapping => MAP['INSERT, DELETE', 'A', 'DELETE', 'B'])", | ||
| ValidationException.class, | ||
| "Duplicate change operation: 'DELETE'") | ||
| .build(); | ||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -109,13 +109,22 @@ private static int[] buildNonPartitionIndices( | |
| return IntStream.range(0, fieldCount).filter(i -> !partitionKeySet.contains(i)).toArray(); | ||
| } | ||
|
|
||
| /** | ||
| * Builds a RowKind-to-output-code map. Keys may be comma-separated (e.g., "INSERT, | ||
| * UPDATE_AFTER") to map multiple RowKinds to the same output code. | ||
| */ | ||
| private static Map<RowKind, String> buildOpMap(@Nullable final Map<String, String> opMapping) { | ||
| if (opMapping == null) { | ||
| return new EnumMap<>(DEFAULT_OP_MAPPING); | ||
| } | ||
| final Map<RowKind, String> map = new EnumMap<>(RowKind.class); | ||
| opMapping.forEach((name, code) -> map.put(RowKind.valueOf(name), code)); | ||
| return map; | ||
| final Map<RowKind, String> result = new EnumMap<>(RowKind.class); | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Recommend adding logging for conflicts or invalid mappings to help users debug configuration issues.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Hey, for TO_CHANGELOG invalid mappings are already caught at planning time by ToChangelogTypeStrategy, the user will see a validation error |
||
| opMapping.forEach( | ||
| (commaSeparatedRowKinds, outputCode) -> { | ||
| for (final String rawName : commaSeparatedRowKinds.split(",")) { | ||
| result.put(RowKind.valueOf(rawName.trim()), outputCode); | ||
| } | ||
| }); | ||
| return result; | ||
| } | ||
|
|
||
| public void eval( | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
uggest enhancing Javadoc to clarify how multiple RowKinds mapping to the same output may affect UPDATE_BEFORE behavior.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thanks for the review, Feat Zhang. The existing JavaDoc and documentation already explain that unmapped operations are dropped. Adding UPDATE_BEFORE-specific details to the generic toChangelog() JavaDoc would be too narrow, since it depends on the use case