From dff5c54f4a402b5d6109bacfef08d8a10e52be15 Mon Sep 17 00:00:00 2001 From: sxnan Date: Fri, 11 Aug 2023 15:16:43 +0800 Subject: [PATCH] [FLINK-33202][runtime] Support switching from batch to stream mode to improve throughput when processing backlog data --- ...347\232\204\346\216\245\345\217\243.patch" | 822 ++++++++++++++++++ .../source/reader/SourceReaderBaseTest.java | 4 + .../source/coordinator/SourceCoordinator.java | 9 + .../coordinator/SourceCoordinatorContext.java | 16 + .../runtime/source/event/IsBacklogEvent.java | 34 + .../streaming/api/graph/StreamConfig.java | 8 + .../api/graph/StreamGraphGenerator.java | 8 + .../api/operators/AbstractStreamOperator.java | 58 ++ .../api/operators/BacklogTimeService.java | 88 ++ .../api/operators/CountingOutput.java | 6 + .../flink/streaming/api/operators/Input.java | 9 + .../InternalBacklogAwareTimerServiceImpl.java | 114 +++ ...alBacklogAwareTimerServiceManagerImpl.java | 162 ++++ .../InternalTimeServiceManagerImpl.java | 4 +- .../operators/InternalTimerServiceImpl.java | 9 + .../api/operators/OperatorAttributes.java | 55 ++ .../operators/OperatorAttributesBuilder.java | 64 ++ .../flink/streaming/api/operators/Output.java | 11 + .../api/operators/SourceOperator.java | 8 + .../api/operators/StreamOperator.java | 6 + .../api/operators/StreamOperatorFactory.java | 6 + .../api/operators/TwoInputStreamOperator.java | 16 + .../sort/MultiInputSortingDataInput.java | 4 + .../sort/SortingBacklogDataInput.java | 318 +++++++ .../api/operators/sort/SortingDataInput.java | 4 + .../BatchExecutionInternalTimeService.java | 35 +- .../ProgressiveTimestampsAndWatermarks.java | 2 +- .../source/TimestampsAndWatermarks.java | 2 + .../AbstractMultipleInputTransformation.java | 5 + .../OneInputTransformation.java | 5 + .../PhysicalTransformation.java | 4 + .../TwoInputTransformation.java | 5 + .../io/AbstractStreamTaskNetworkInput.java | 23 +- .../runtime/io/FinishedDataOutput.java | 6 + .../runtime/io/PushingAsyncDataInput.java | 3 + .../runtime/io/RecordAttributesValve.java | 82 ++ .../runtime/io/RecordWriterOutput.java | 12 + .../StreamMultipleInputProcessorFactory.java | 6 + .../io/StreamTwoInputProcessorFactory.java | 10 + .../RescalingStreamTaskNetworkInput.java | 2 +- .../InternalRecordAttributes.java | 65 ++ .../streamrecord/RecordAttributes.java | 66 ++ .../streamrecord/RecordAttributesBuilder.java | 68 ++ .../runtime/streamrecord/StreamElement.java | 22 +- .../streamrecord/StreamElementSerializer.java | 6 + .../tasks/BroadcastingOutputCollector.java | 8 + .../runtime/tasks/ChainingOutput.java | 10 + .../runtime/tasks/OneInputStreamTask.java | 30 + .../tasks/SourceOperatorStreamTask.java | 6 + .../translators/BatchExecutionUtils.java | 2 +- .../MultiInputTransformationTranslator.java | 3 +- .../OneInputTransformationTranslator.java | 29 +- .../ReduceTransformationTranslator.java | 31 +- .../translators/StreamExecutionUtils.java | 62 ++ .../TwoInputTransformationTranslator.java | 2 + ...bstractUdfStreamOperatorLifecycleTest.java | 1 + .../api/operators/BacklogTimeServiceTest.java | 76 ++ ...ernalBacklogAwareTimerServiceImplTest.java | 111 +++ .../InternalTimerServiceImplTest.java | 39 +- .../api/operators/TestKeyContext.java | 34 + .../api/operators/TestTriggerable.java | 60 ++ .../api/operators/TimeServiceTestUtils.java | 86 ++ .../operators/sort/CollectingDataOutput.java | 6 + .../operators/sort/CollectionDataInput.java | 2 + .../sort/LargeSortingDataInputITCase.java | 4 + .../sort/SortingBacklogDataInputTest.java | 235 +++++ .../source/CollectingDataOutput.java | 6 + .../runtime/io/RecordAttributesValveTest.java | 57 ++ .../io/StreamTaskNetworkInputTest.java | 4 + .../StatusWatermarkValveTest.java | 6 + .../util/SourceOperatorTestHarness.java | 6 + .../planner/runtime/utils/TimeTestUtil.scala | 4 +- .../output/BroadcastingOutput.java | 8 + .../StreamingWithBacklogITCase.java | 182 ++++ 74 files changed, 3303 insertions(+), 79 deletions(-) create mode 100644 "[RED][Runtime]\345\242\236\345\212\240\346\217\220\344\272\244job\347\232\204\346\216\245\345\217\243.patch" create mode 100644 flink-runtime/src/main/java/org/apache/flink/runtime/source/event/IsBacklogEvent.java create mode 100644 flink-streaming-java/src/main/java/org/apache/flink/streaming/api/operators/BacklogTimeService.java create mode 100644 flink-streaming-java/src/main/java/org/apache/flink/streaming/api/operators/InternalBacklogAwareTimerServiceImpl.java create mode 100644 flink-streaming-java/src/main/java/org/apache/flink/streaming/api/operators/InternalBacklogAwareTimerServiceManagerImpl.java create mode 100644 flink-streaming-java/src/main/java/org/apache/flink/streaming/api/operators/OperatorAttributes.java create mode 100644 flink-streaming-java/src/main/java/org/apache/flink/streaming/api/operators/OperatorAttributesBuilder.java create mode 100644 flink-streaming-java/src/main/java/org/apache/flink/streaming/api/operators/sort/SortingBacklogDataInput.java create mode 100644 flink-streaming-java/src/main/java/org/apache/flink/streaming/runtime/io/RecordAttributesValve.java create mode 100644 flink-streaming-java/src/main/java/org/apache/flink/streaming/runtime/streamrecord/InternalRecordAttributes.java create mode 100644 flink-streaming-java/src/main/java/org/apache/flink/streaming/runtime/streamrecord/RecordAttributes.java create mode 100644 flink-streaming-java/src/main/java/org/apache/flink/streaming/runtime/streamrecord/RecordAttributesBuilder.java create mode 100644 flink-streaming-java/src/main/java/org/apache/flink/streaming/runtime/translators/StreamExecutionUtils.java create mode 100644 flink-streaming-java/src/test/java/org/apache/flink/streaming/api/operators/BacklogTimeServiceTest.java create mode 100644 flink-streaming-java/src/test/java/org/apache/flink/streaming/api/operators/InternalBacklogAwareTimerServiceImplTest.java create mode 100644 flink-streaming-java/src/test/java/org/apache/flink/streaming/api/operators/TestKeyContext.java create mode 100644 flink-streaming-java/src/test/java/org/apache/flink/streaming/api/operators/TestTriggerable.java create mode 100644 flink-streaming-java/src/test/java/org/apache/flink/streaming/api/operators/TimeServiceTestUtils.java create mode 100644 flink-streaming-java/src/test/java/org/apache/flink/streaming/api/operators/sort/SortingBacklogDataInputTest.java create mode 100644 flink-streaming-java/src/test/java/org/apache/flink/streaming/runtime/io/RecordAttributesValveTest.java create mode 100644 flink-tests/src/test/java/org/apache/flink/test/streaming/api/datastream/StreamingWithBacklogITCase.java diff --git "a/[RED][Runtime]\345\242\236\345\212\240\346\217\220\344\272\244job\347\232\204\346\216\245\345\217\243.patch" "b/[RED][Runtime]\345\242\236\345\212\240\346\217\220\344\272\244job\347\232\204\346\216\245\345\217\243.patch" new file mode 100644 index 00000000000000..cee86c6beb6122 --- /dev/null +++ "b/[RED][Runtime]\345\242\236\345\212\240\346\217\220\344\272\244job\347\232\204\346\216\245\345\217\243.patch" @@ -0,0 +1,822 @@ +Index: flink-runtime-web/pom.xml +IDEA additional info: +Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP +<+>UTF-8 +=================================================================== +diff --git a/flink-runtime-web/pom.xml b/flink-runtime-web/pom.xml +--- a/flink-runtime-web/pom.xml (revision 25d68b8918dae0506f250695c6411d5280bd3d52) ++++ b/flink-runtime-web/pom.xml (revision 8b803f30c5e99e4392c0d3c81ed59b03b2d2bf60) +@@ -113,6 +113,13 @@ + test + + ++ ++ red.data.platform.flink ++ flink-hub-sdk ++ 0.3.1-SNAPSHOT ++ ++ ++ + + + +Index: flink-runtime-web/src/main/java/org/apache/flink/runtime/webmonitor/WebSubmissionExtension.java +IDEA additional info: +Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP +<+>UTF-8 +=================================================================== +diff --git a/flink-runtime-web/src/main/java/org/apache/flink/runtime/webmonitor/WebSubmissionExtension.java b/flink-runtime-web/src/main/java/org/apache/flink/runtime/webmonitor/WebSubmissionExtension.java +--- a/flink-runtime-web/src/main/java/org/apache/flink/runtime/webmonitor/WebSubmissionExtension.java (revision 25d68b8918dae0506f250695c6411d5280bd3d52) ++++ b/flink-runtime-web/src/main/java/org/apache/flink/runtime/webmonitor/WebSubmissionExtension.java (revision 8b803f30c5e99e4392c0d3c81ed59b03b2d2bf60) +@@ -35,6 +35,8 @@ + import org.apache.flink.runtime.webmonitor.handlers.JarRunHeaders; + import org.apache.flink.runtime.webmonitor.handlers.JarUploadHandler; + import org.apache.flink.runtime.webmonitor.handlers.JarUploadHeaders; ++import org.apache.flink.runtime.webmonitor.handlers.RedJarRunHandler; ++import org.apache.flink.runtime.webmonitor.handlers.RedJarRunHeaders; + import org.apache.flink.runtime.webmonitor.retriever.GatewayRetriever; + + import org.apache.flink.shaded.netty4.io.netty.channel.ChannelInboundHandler; +@@ -123,6 +125,15 @@ + jarDir, + configuration, + executor); ++ final RedJarRunHandler redJarRunHandler = ++ new RedJarRunHandler( ++ leaderRetriever, ++ timeout, ++ responseHeaders, ++ JarRunHeaders.getInstance(), ++ jarDir, ++ configuration, ++ executor); + + webSubmissionHandlers.add(Tuple2.of(JarUploadHeaders.getInstance(), jarUploadHandler)); + webSubmissionHandlers.add(Tuple2.of(JarListHeaders.getInstance(), jarListHandler)); +@@ -130,6 +141,7 @@ + webSubmissionHandlers.add(Tuple2.of(JarDeleteHeaders.getInstance(), jarDeleteHandler)); + webSubmissionHandlers.add(Tuple2.of(JarPlanGetHeaders.getInstance(), jarPlanHandler)); + webSubmissionHandlers.add(Tuple2.of(JarPlanPostHeaders.getInstance(), postJarPlanHandler)); ++ webSubmissionHandlers.add(Tuple2.of(RedJarRunHeaders.getInstance(), redJarRunHandler)); + } + + @Override +Index: flink-runtime-web/src/main/java/org/apache/flink/runtime/webmonitor/handlers/JarPlanHandler.java +IDEA additional info: +Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP +<+>UTF-8 +=================================================================== +diff --git a/flink-runtime-web/src/main/java/org/apache/flink/runtime/webmonitor/handlers/JarPlanHandler.java b/flink-runtime-web/src/main/java/org/apache/flink/runtime/webmonitor/handlers/JarPlanHandler.java +--- a/flink-runtime-web/src/main/java/org/apache/flink/runtime/webmonitor/handlers/JarPlanHandler.java (revision 25d68b8918dae0506f250695c6411d5280bd3d52) ++++ b/flink-runtime-web/src/main/java/org/apache/flink/runtime/webmonitor/handlers/JarPlanHandler.java (revision 8b803f30c5e99e4392c0d3c81ed59b03b2d2bf60) +@@ -29,7 +29,7 @@ + import org.apache.flink.runtime.rest.messages.JobPlanInfo; + import org.apache.flink.runtime.rest.messages.MessageHeaders; + import org.apache.flink.runtime.webmonitor.RestfulGateway; +-import org.apache.flink.runtime.webmonitor.handlers.utils.JarHandlerUtils.JarHandlerContext; ++import org.apache.flink.runtime.webmonitor.handlers.utils.RedJarHandlerUtils; + import org.apache.flink.runtime.webmonitor.retriever.GatewayRetriever; + + import javax.annotation.Nonnull; +@@ -97,7 +97,8 @@ + @Nonnull final HandlerRequest request, + @Nonnull final RestfulGateway gateway) + throws RestHandlerException { +- final JarHandlerContext context = JarHandlerContext.fromRequest(request, jarDir, log); ++ final RedJarHandlerUtils.RedJarHandlerContext context = ++ RedJarHandlerUtils.RedJarHandlerContext.fromRequest(request, jarDir, log); + + return CompletableFuture.supplyAsync( + () -> { +Index: flink-runtime-web/src/main/java/org/apache/flink/runtime/webmonitor/handlers/RedJarRunHandler.java +IDEA additional info: +Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP +<+>UTF-8 +=================================================================== +diff --git a/flink-runtime-web/src/main/java/org/apache/flink/runtime/webmonitor/handlers/RedJarRunHandler.java b/flink-runtime-web/src/main/java/org/apache/flink/runtime/webmonitor/handlers/RedJarRunHandler.java +new file mode 100644 +--- /dev/null (revision 8b803f30c5e99e4392c0d3c81ed59b03b2d2bf60) ++++ b/flink-runtime-web/src/main/java/org/apache/flink/runtime/webmonitor/handlers/RedJarRunHandler.java (revision 8b803f30c5e99e4392c0d3c81ed59b03b2d2bf60) +@@ -0,0 +1,159 @@ ++/* ++ * Licensed to the Apache Software Foundation (ASF) under one ++ * or more contributor license agreements. See the NOTICE file ++ * distributed with this work for additional information ++ * regarding copyright ownership. The ASF licenses this file ++ * to you under the Apache License, Version 2.0 (the ++ * "License"); you may not use this file except in compliance ++ * with the License. You may obtain a copy of the License at ++ * ++ * http://www.apache.org/licenses/LICENSE-2.0 ++ * ++ * Unless required by applicable law or agreed to in writing, software ++ * distributed under the License is distributed on an "AS IS" BASIS, ++ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ++ * See the License for the specific language governing permissions and ++ * limitations under the License. ++ */ ++ ++package org.apache.flink.runtime.webmonitor.handlers; ++ ++import org.apache.flink.api.common.time.Time; ++import org.apache.flink.configuration.Configuration; ++import org.apache.flink.runtime.blob.BlobClient; ++import org.apache.flink.runtime.client.ClientUtils; ++import org.apache.flink.runtime.dispatcher.DispatcherGateway; ++import org.apache.flink.runtime.jobgraph.JobGraph; ++import org.apache.flink.runtime.jobgraph.SavepointRestoreSettings; ++import org.apache.flink.runtime.messages.Acknowledge; ++import org.apache.flink.runtime.rest.handler.AbstractRestHandler; ++import org.apache.flink.runtime.rest.handler.HandlerRequest; ++import org.apache.flink.runtime.rest.handler.RestHandlerException; ++import org.apache.flink.runtime.rest.messages.MessageHeaders; ++import org.apache.flink.runtime.webmonitor.handlers.utils.RedJarHandlerUtils; ++import org.apache.flink.runtime.webmonitor.retriever.GatewayRetriever; ++import org.apache.flink.util.FlinkException; ++ ++import javax.annotation.Nonnull; ++ ++import java.net.InetSocketAddress; ++import java.nio.file.Path; ++import java.util.Map; ++import java.util.concurrent.CompletableFuture; ++import java.util.concurrent.CompletionException; ++import java.util.concurrent.Executor; ++ ++import static java.util.Objects.requireNonNull; ++import static org.apache.flink.runtime.rest.handler.util.HandlerRequestUtils.fromRequestBodyOrQueryParameter; ++import static org.apache.flink.runtime.rest.handler.util.HandlerRequestUtils.getQueryParameter; ++import static org.apache.flink.shaded.guava18.com.google.common.base.Strings.emptyToNull; ++ ++/** Handler to submit jobs uploaded via the Web UI. */ ++public class RedJarRunHandler ++ extends AbstractRestHandler< ++ DispatcherGateway, JarRunRequestBody, JarRunResponseBody, JarRunMessageParameters> { ++ ++ private final Path jarDir; ++ ++ private final Configuration configuration; ++ ++ private final Executor executor; ++ ++ public RedJarRunHandler( ++ final GatewayRetriever leaderRetriever, ++ final Time timeout, ++ final Map responseHeaders, ++ final MessageHeaders ++ messageHeaders, ++ final Path jarDir, ++ final Configuration configuration, ++ final Executor executor) { ++ super(leaderRetriever, timeout, responseHeaders, messageHeaders); ++ ++ this.jarDir = requireNonNull(jarDir); ++ this.configuration = requireNonNull(configuration); ++ this.executor = requireNonNull(executor); ++ } ++ ++ @Override ++ protected CompletableFuture handleRequest( ++ @Nonnull final HandlerRequest request, ++ @Nonnull final DispatcherGateway gateway) ++ throws RestHandlerException { ++ final RedJarHandlerUtils.RedJarHandlerContext context = ++ RedJarHandlerUtils.RedJarHandlerContext.fromRequest(request, jarDir, log); ++ ++ final SavepointRestoreSettings savepointRestoreSettings = ++ getSavepointRestoreSettings(request); ++ ++ final CompletableFuture jobGraphFuture = ++ getJobGraphAsync(context, savepointRestoreSettings); ++ ++ CompletableFuture blobServerPortFuture = gateway.getBlobServerPort(timeout); ++ ++ CompletableFuture jarUploadFuture = ++ jobGraphFuture.thenCombine( ++ blobServerPortFuture, ++ (jobGraph, blobServerPort) -> { ++ final InetSocketAddress address = ++ new InetSocketAddress(gateway.getHostname(), blobServerPort); ++ try { ++ ClientUtils.extractAndUploadJobGraphFiles( ++ jobGraph, () -> new BlobClient(address, configuration)); ++ } catch (FlinkException e) { ++ throw new CompletionException(e); ++ } ++ ++ return jobGraph; ++ }); ++ ++ CompletableFuture jobSubmissionFuture = ++ jarUploadFuture.thenCompose(jobGraph -> gateway.submitJob(jobGraph, timeout)); ++ ++ return jobSubmissionFuture.thenCombine( ++ jarUploadFuture, (ack, jobGraph) -> new JarRunResponseBody(jobGraph.getJobID())); ++ } ++ ++ private SavepointRestoreSettings getSavepointRestoreSettings( ++ final @Nonnull HandlerRequest request) ++ throws RestHandlerException { ++ ++ final JarRunRequestBody requestBody = request.getRequestBody(); ++ ++ final boolean allowNonRestoredState = ++ fromRequestBodyOrQueryParameter( ++ requestBody.getAllowNonRestoredState(), ++ () -> getQueryParameter(request, AllowNonRestoredStateQueryParameter.class), ++ false, ++ log); ++ final String savepointPath = ++ fromRequestBodyOrQueryParameter( ++ emptyToNull(requestBody.getSavepointPath()), ++ () -> ++ emptyToNull( ++ getQueryParameter( ++ request, SavepointPathQueryParameter.class)), ++ null, ++ log); ++ final SavepointRestoreSettings savepointRestoreSettings; ++ if (savepointPath != null) { ++ savepointRestoreSettings = ++ SavepointRestoreSettings.forPath(savepointPath, allowNonRestoredState); ++ } else { ++ savepointRestoreSettings = SavepointRestoreSettings.none(); ++ } ++ return savepointRestoreSettings; ++ } ++ ++ private CompletableFuture getJobGraphAsync( ++ RedJarHandlerUtils.RedJarHandlerContext context, ++ final SavepointRestoreSettings savepointRestoreSettings) { ++ return CompletableFuture.supplyAsync( ++ () -> { ++ final JobGraph jobGraph = context.toJobGraph(configuration, false); ++ jobGraph.setSavepointRestoreSettings(savepointRestoreSettings); ++ return jobGraph; ++ }, ++ executor); ++ } ++} +Index: flink-runtime-web/src/main/java/org/apache/flink/runtime/webmonitor/handlers/RedJarRunHeaders.java +IDEA additional info: +Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP +<+>UTF-8 +=================================================================== +diff --git a/flink-runtime-web/src/main/java/org/apache/flink/runtime/webmonitor/handlers/RedJarRunHeaders.java b/flink-runtime-web/src/main/java/org/apache/flink/runtime/webmonitor/handlers/RedJarRunHeaders.java +new file mode 100644 +--- /dev/null (revision 8b803f30c5e99e4392c0d3c81ed59b03b2d2bf60) ++++ b/flink-runtime-web/src/main/java/org/apache/flink/runtime/webmonitor/handlers/RedJarRunHeaders.java (revision 8b803f30c5e99e4392c0d3c81ed59b03b2d2bf60) +@@ -0,0 +1,75 @@ ++/* ++ * Licensed to the Apache Software Foundation (ASF) under one ++ * or more contributor license agreements. See the NOTICE file ++ * distributed with this work for additional information ++ * regarding copyright ownership. The ASF licenses this file ++ * to you under the Apache License, Version 2.0 (the ++ * "License"); you may not use this file except in compliance ++ * with the License. You may obtain a copy of the License at ++ * ++ * http://www.apache.org/licenses/LICENSE-2.0 ++ * ++ * Unless required by applicable law or agreed to in writing, software ++ * distributed under the License is distributed on an "AS IS" BASIS, ++ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ++ * See the License for the specific language governing permissions and ++ * limitations under the License. ++ */ ++ ++package org.apache.flink.runtime.webmonitor.handlers; ++ ++import org.apache.flink.runtime.rest.HttpMethodWrapper; ++import org.apache.flink.runtime.rest.messages.MessageHeaders; ++ ++import org.apache.flink.shaded.netty4.io.netty.handler.codec.http.HttpResponseStatus; ++ ++/** {@link MessageHeaders} for {@link JarRunHandler}. */ ++public class RedJarRunHeaders ++ implements MessageHeaders { ++ ++ private static final RedJarRunHeaders INSTANCE = new RedJarRunHeaders(); ++ ++ private RedJarRunHeaders() {} ++ ++ @Override ++ public Class getResponseClass() { ++ return JarRunResponseBody.class; ++ } ++ ++ @Override ++ public HttpResponseStatus getResponseStatusCode() { ++ return HttpResponseStatus.OK; ++ } ++ ++ @Override ++ public Class getRequestClass() { ++ return JarRunRequestBody.class; ++ } ++ ++ @Override ++ public JarRunMessageParameters getUnresolvedMessageParameters() { ++ return new JarRunMessageParameters(); ++ } ++ ++ @Override ++ public HttpMethodWrapper getHttpMethod() { ++ return HttpMethodWrapper.POST; ++ } ++ ++ @Override ++ public String getTargetRestEndpointURL() { ++ return "/red-jars/:" + JarIdPathParameter.KEY + "/run"; ++ } ++ ++ public static RedJarRunHeaders getInstance() { ++ return INSTANCE; ++ } ++ ++ @Override ++ public String getDescription() { ++ return "Submits a job by running a jar previously uploaded via '" ++ + JarUploadHeaders.URL ++ + "'. " ++ + "Program arguments can be passed both via the JSON request (recommended) or query parameters."; ++ } ++} +Index: flink-runtime-web/src/main/java/org/apache/flink/runtime/webmonitor/handlers/utils/CLIParser.java +IDEA additional info: +Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP +<+>UTF-8 +=================================================================== +diff --git a/flink-runtime-web/src/main/java/org/apache/flink/runtime/webmonitor/handlers/utils/CLIParser.java b/flink-runtime-web/src/main/java/org/apache/flink/runtime/webmonitor/handlers/utils/CLIParser.java +new file mode 100644 +--- /dev/null (revision 8b803f30c5e99e4392c0d3c81ed59b03b2d2bf60) ++++ b/flink-runtime-web/src/main/java/org/apache/flink/runtime/webmonitor/handlers/utils/CLIParser.java (revision 8b803f30c5e99e4392c0d3c81ed59b03b2d2bf60) +@@ -0,0 +1,69 @@ ++/* ++ * Licensed to the Apache Software Foundation (ASF) under one ++ * or more contributor license agreements. See the NOTICE file ++ * distributed with this work for additional information ++ * regarding copyright ownership. The ASF licenses this file ++ * to you under the Apache License, Version 2.0 (the ++ * "License"); you may not use this file except in compliance ++ * with the License. You may obtain a copy of the License at ++ * ++ * http://www.apache.org/licenses/LICENSE-2.0 ++ * ++ * Unless required by applicable law or agreed to in writing, software ++ * distributed under the License is distributed on an "AS IS" BASIS, ++ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ++ * See the License for the specific language governing permissions and ++ * limitations under the License. ++ */ ++ ++package org.apache.flink.runtime.webmonitor.handlers.utils; ++ ++import org.apache.commons.cli.CommandLine; ++import org.apache.commons.cli.CommandLineParser; ++import org.apache.commons.cli.DefaultParser; ++import org.apache.commons.cli.Option; ++import org.apache.commons.cli.Options; ++import org.apache.commons.cli.ParseException; ++import red.data.platform.flink.conf.FlinkJobConf; ++ ++/** CLIParser. */ ++public class CLIParser { ++ ++ private static final Options OPTIONS = new Options(); ++ private static final CommandLineParser PARSER = new DefaultParser(); ++ private static CommandLine cli; ++ ++ public static void parse(String[] args) throws ParseException { ++ OPTIONS.addOption( ++ Option.builder() ++ .longOpt(FlinkJobConf.ConfVars.PARAMS.varname) ++ .desc(FlinkJobConf.ConfVars.PARAMS.description) ++ .required() ++ .hasArg() ++ .build()) ++ .addOption( ++ Option.builder() ++ .longOpt(FlinkJobConf.ConfVars.ENV.varname) ++ .desc(FlinkJobConf.ConfVars.ENV.description) ++ .optionalArg(true) ++ .hasArg() ++ .build()); ++ cli = PARSER.parse(OPTIONS, args); ++ } ++ ++ public static boolean hasOption(String opt) { ++ return cli.hasOption(opt); ++ } ++ ++ public static String getString(String opt, String defaultValue) { ++ return cli.getOptionValue(opt, defaultValue); ++ } ++ ++ public static String getString(String opt) { ++ return cli.getOptionValue(opt, ""); ++ } ++ ++ public static int getInt(String opt, int defaultValue) { ++ return Integer.parseInt(cli.getOptionValue(opt, Integer.toString(defaultValue))); ++ } ++} +Index: flink-runtime-web/src/main/java/org/apache/flink/runtime/webmonitor/handlers/utils/RedJarHandlerUtils.java +IDEA additional info: +Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP +<+>UTF-8 +=================================================================== +diff --git a/flink-runtime-web/src/main/java/org/apache/flink/runtime/webmonitor/handlers/utils/RedJarHandlerUtils.java b/flink-runtime-web/src/main/java/org/apache/flink/runtime/webmonitor/handlers/utils/RedJarHandlerUtils.java +new file mode 100644 +--- /dev/null (revision 8b803f30c5e99e4392c0d3c81ed59b03b2d2bf60) ++++ b/flink-runtime-web/src/main/java/org/apache/flink/runtime/webmonitor/handlers/utils/RedJarHandlerUtils.java (revision 8b803f30c5e99e4392c0d3c81ed59b03b2d2bf60) +@@ -0,0 +1,250 @@ ++/* ++ * Licensed to the Apache Software Foundation (ASF) under one ++ * or more contributor license agreements. See the NOTICE file ++ * distributed with this work for additional information ++ * regarding copyright ownership. The ASF licenses this file ++ * to you under the Apache License, Version 2.0 (the ++ * "License"); you may not use this file except in compliance ++ * with the License. You may obtain a copy of the License at ++ * ++ * http://www.apache.org/licenses/LICENSE-2.0 ++ * ++ * Unless required by applicable law or agreed to in writing, software ++ * distributed under the License is distributed on an "AS IS" BASIS, ++ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ++ * See the License for the specific language governing permissions and ++ * limitations under the License. ++ */ ++ ++package org.apache.flink.runtime.webmonitor.handlers.utils; ++ ++import org.apache.flink.annotation.VisibleForTesting; ++import org.apache.flink.api.common.ExecutionConfig; ++import org.apache.flink.api.common.JobID; ++import org.apache.flink.client.program.PackagedProgram; ++import org.apache.flink.client.program.PackagedProgramUtils; ++import org.apache.flink.client.program.ProgramInvocationException; ++import org.apache.flink.configuration.Configuration; ++import org.apache.flink.runtime.jobgraph.JobGraph; ++import org.apache.flink.runtime.rest.handler.HandlerRequest; ++import org.apache.flink.runtime.rest.handler.RestHandlerException; ++import org.apache.flink.runtime.rest.messages.MessageParameters; ++import org.apache.flink.runtime.webmonitor.handlers.EntryClassQueryParameter; ++import org.apache.flink.runtime.webmonitor.handlers.JarIdPathParameter; ++import org.apache.flink.runtime.webmonitor.handlers.JarRequestBody; ++import org.apache.flink.runtime.webmonitor.handlers.ParallelismQueryParameter; ++import org.apache.flink.runtime.webmonitor.handlers.ProgramArgQueryParameter; ++import org.apache.flink.runtime.webmonitor.handlers.ProgramArgsQueryParameter; ++ ++import org.apache.flink.shaded.jackson2.com.fasterxml.jackson.core.JsonParser; ++import org.apache.flink.shaded.jackson2.com.fasterxml.jackson.core.type.TypeReference; ++import org.apache.flink.shaded.jackson2.com.fasterxml.jackson.databind.ObjectMapper; ++import org.apache.flink.shaded.netty4.io.netty.handler.codec.http.HttpResponseStatus; ++ ++import org.apache.commons.cli.ParseException; ++import org.slf4j.Logger; ++import red.data.platform.flink.conf.FlinkJobConf; ++import red.data.platform.flink.desc.FlinkJobDesc; ++import red.data.platform.flink.desc.JarInfoDesc; ++ ++import javax.annotation.Nonnull; ++import javax.annotation.Nullable; ++ ++import java.io.IOException; ++import java.net.URL; ++import java.nio.file.Files; ++import java.nio.file.Path; ++import java.util.ArrayList; ++import java.util.Collections; ++import java.util.List; ++import java.util.concurrent.CompletionException; ++import java.util.regex.Matcher; ++import java.util.regex.Pattern; ++ ++import static org.apache.flink.runtime.rest.handler.util.HandlerRequestUtils.fromRequestBodyOrQueryParameter; ++import static org.apache.flink.runtime.rest.handler.util.HandlerRequestUtils.getQueryParameter; ++import static org.apache.flink.shaded.guava18.com.google.common.base.Strings.emptyToNull; ++ ++/** ++ * Utils for jar handlers. ++ * ++ * @see org.apache.flink.runtime.webmonitor.handlers.JarRunHandler ++ * @see org.apache.flink.runtime.webmonitor.handlers.JarPlanHandler ++ */ ++public class RedJarHandlerUtils { ++ ++ /** Standard jar handler parameters parsed from request. */ ++ public static class RedJarHandlerContext { ++ private final Path jarFile; ++ private final String entryClass; ++ private final List programArgs; ++ private final int parallelism; ++ private final JobID jobId; ++ ++ private RedJarHandlerContext( ++ Path jarFile, ++ String entryClass, ++ List programArgs, ++ int parallelism, ++ JobID jobId) { ++ this.jarFile = jarFile; ++ this.entryClass = entryClass; ++ this.programArgs = programArgs; ++ this.parallelism = parallelism; ++ this.jobId = jobId; ++ } ++ ++ public static RedJarHandlerContext fromRequest( ++ @Nonnull final HandlerRequest request, ++ @Nonnull final Path jarDir, ++ @Nonnull final Logger log) ++ throws RestHandlerException { ++ final JarRequestBody requestBody = request.getRequestBody(); ++ ++ final String pathParameter = request.getPathParameter(JarIdPathParameter.class); ++ Path jarFile = jarDir.resolve(pathParameter); ++ ++ String entryClass = ++ fromRequestBodyOrQueryParameter( ++ emptyToNull(requestBody.getEntryClassName()), ++ () -> ++ emptyToNull( ++ getQueryParameter( ++ request, EntryClassQueryParameter.class)), ++ null, ++ log); ++ ++ List programArgs = RedJarHandlerUtils.getProgramArgs(request, log); ++ ++ int parallelism = ++ fromRequestBodyOrQueryParameter( ++ requestBody.getParallelism(), ++ () -> getQueryParameter(request, ParallelismQueryParameter.class), ++ ExecutionConfig.PARALLELISM_DEFAULT, ++ log); ++ ++ JobID jobId = ++ fromRequestBodyOrQueryParameter( ++ requestBody.getJobId(), ++ () -> null, // No support via query parameter ++ null, // Delegate default job ID to actual JobGraph generation ++ log); ++ ++ return new RedJarHandlerContext(jarFile, entryClass, programArgs, parallelism, jobId); ++ } ++ ++ public JobGraph toJobGraph(Configuration configuration, boolean suppressOutput) { ++ try (PackagedProgram packagedProgram = toPackagedProgram(configuration)) { ++ return toJobGraph(packagedProgram, configuration, suppressOutput); ++ } ++ } ++ ++ public JobGraph toJobGraph( ++ PackagedProgram packagedProgram, ++ Configuration configuration, ++ boolean suppressOutput) { ++ ++ try { ++ return PackagedProgramUtils.createJobGraph( ++ packagedProgram, configuration, parallelism, jobId, suppressOutput); ++ } catch (final ProgramInvocationException e) { ++ throw new CompletionException(e); ++ } ++ } ++ ++ public PackagedProgram toPackagedProgram(Configuration configuration) { ++ if (!Files.exists(jarFile)) { ++ throw new CompletionException( ++ new RestHandlerException( ++ String.format("Jar file %s does not exist", jarFile), ++ HttpResponseStatus.BAD_REQUEST)); ++ } ++ ++ try { ++ // 获取url ++ String[] args = programArgs.toArray(new String[0]); ++ CLIParser.parse(args); ++ String jobDescStr = CLIParser.getString(FlinkJobConf.ConfVars.PARAMS.varname); ++ ObjectMapper mapper = new ObjectMapper(); ++ mapper.configure(JsonParser.Feature.ALLOW_UNQUOTED_CONTROL_CHARS, true); ++ FlinkJobDesc jobDesc = ++ mapper.readValue(jobDescStr, new TypeReference() {}); ++ List classpaths = new ArrayList<>(); ++ List jarInfoDescs = jobDesc.getJarList(); ++ if (jarInfoDescs != null) { ++ for (JarInfoDesc jarInfoDesc : jarInfoDescs) { ++ classpaths.add(new URL(jarInfoDesc.getUrl())); ++ } ++ } ++ return PackagedProgram.newBuilder() ++ .setJarFile(jarFile.toFile()) ++ .setEntryPointClassName(entryClass) ++ .setConfiguration(configuration) ++ .setArguments(programArgs.toArray(new String[0])) ++ .setUserClassPaths(classpaths) ++ .build(); ++ } catch (final ProgramInvocationException | ParseException | IOException e) { ++ throw new CompletionException(e); ++ } ++ } ++ } ++ ++ /** Parse program arguments in jar run or plan request. */ ++ private static ++ List getProgramArgs(HandlerRequest request, Logger log) ++ throws RestHandlerException { ++ JarRequestBody requestBody = request.getRequestBody(); ++ @SuppressWarnings("deprecation") ++ List programArgs = ++ tokenizeArguments( ++ fromRequestBodyOrQueryParameter( ++ emptyToNull(requestBody.getProgramArguments()), ++ () -> getQueryParameter(request, ProgramArgsQueryParameter.class), ++ null, ++ log)); ++ List programArgsList = ++ fromRequestBodyOrQueryParameter( ++ requestBody.getProgramArgumentsList(), ++ () -> request.getQueryParameter(ProgramArgQueryParameter.class), ++ null, ++ log); ++ if (!programArgsList.isEmpty()) { ++ if (!programArgs.isEmpty()) { ++ throw new RestHandlerException( ++ "Confusing request: programArgs and programArgsList are specified, please, use only programArgsList", ++ HttpResponseStatus.BAD_REQUEST); ++ } ++ return programArgsList; ++ } else { ++ return programArgs; ++ } ++ } ++ ++ private static final Pattern ARGUMENTS_TOKENIZE_PATTERN = ++ Pattern.compile("([^\"\']\\S*|\".+?\"|\'.+?\')\\s*"); ++ ++ /** ++ * Takes program arguments as a single string, and splits them into a list of string. ++ * ++ *
++     * tokenizeArguments("--foo bar")            = ["--foo" "bar"]
++     * tokenizeArguments("--foo \"bar baz\"")    = ["--foo" "bar baz"]
++     * tokenizeArguments("--foo 'bar baz'")      = ["--foo" "bar baz"]
++     * tokenizeArguments(null)                   = []
++     * 
++ * ++ * WARNING: This method does not respect escaped quotes. ++ */ ++ @VisibleForTesting ++ static List tokenizeArguments(@Nullable final String args) { ++ if (args == null) { ++ return Collections.emptyList(); ++ } ++ final Matcher matcher = ARGUMENTS_TOKENIZE_PATTERN.matcher(args); ++ final List tokens = new ArrayList<>(); ++ while (matcher.find()) { ++ tokens.add(matcher.group().trim().replace("\"", "").replace("\'", "")); ++ } ++ return tokens; ++ } ++} +Index: flink-runtime-web/src/test/java/org/apache/flink/runtime/webmonitor/handlers/JarHandlerTest.java +IDEA additional info: +Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP +<+>UTF-8 +=================================================================== +diff --git a/flink-runtime-web/src/test/java/org/apache/flink/runtime/webmonitor/handlers/JarHandlerTest.java b/flink-runtime-web/src/test/java/org/apache/flink/runtime/webmonitor/handlers/JarHandlerTest.java +--- a/flink-runtime-web/src/test/java/org/apache/flink/runtime/webmonitor/handlers/JarHandlerTest.java (revision 25d68b8918dae0506f250695c6411d5280bd3d52) ++++ b/flink-runtime-web/src/test/java/org/apache/flink/runtime/webmonitor/handlers/JarHandlerTest.java (revision 8b803f30c5e99e4392c0d3c81ed59b03b2d2bf60) +@@ -25,6 +25,7 @@ + + import org.junit.Assert; + import org.junit.ClassRule; ++import org.junit.Ignore; + import org.junit.Test; + import org.junit.rules.TemporaryFolder; + +@@ -37,6 +38,7 @@ + import static org.hamcrest.MatcherAssert.assertThat; + + /** Tests for the {@link JarRunHandler} and {@link JarPlanHandler}. */ ++@Ignore + public class JarHandlerTest extends TestLogger { + + private static final String JAR_NAME = "output-test-program.jar"; +Index: flink-runtime-web/src/test/java/org/apache/flink/runtime/webmonitor/handlers/JarPlanHandlerParameterTest.java +IDEA additional info: +Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP +<+>UTF-8 +=================================================================== +diff --git a/flink-runtime-web/src/test/java/org/apache/flink/runtime/webmonitor/handlers/JarPlanHandlerParameterTest.java b/flink-runtime-web/src/test/java/org/apache/flink/runtime/webmonitor/handlers/JarPlanHandlerParameterTest.java +--- a/flink-runtime-web/src/test/java/org/apache/flink/runtime/webmonitor/handlers/JarPlanHandlerParameterTest.java (revision 25d68b8918dae0506f250695c6411d5280bd3d52) ++++ b/flink-runtime-web/src/test/java/org/apache/flink/runtime/webmonitor/handlers/JarPlanHandlerParameterTest.java (revision 8b803f30c5e99e4392c0d3c81ed59b03b2d2bf60) +@@ -26,6 +26,7 @@ + import org.apache.flink.runtime.webmonitor.testutils.ParameterProgram; + + import org.junit.BeforeClass; ++import org.junit.Ignore; + + import java.util.Arrays; + import java.util.Collections; +@@ -33,6 +34,7 @@ + import java.util.stream.Collectors; + + /** Tests for the parameter handling of the {@link JarPlanHandler}. */ ++@Ignore + public class JarPlanHandlerParameterTest + extends JarHandlerParameterTest { + private static JarPlanHandler handler; +Index: flink-runtime-web/src/test/resources/rest_api_v1.snapshot +IDEA additional info: +Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP +<+>UTF-8 +=================================================================== +diff --git a/flink-runtime-web/src/test/resources/rest_api_v1.snapshot b/flink-runtime-web/src/test/resources/rest_api_v1.snapshot +--- a/flink-runtime-web/src/test/resources/rest_api_v1.snapshot (revision 25d68b8918dae0506f250695c6411d5280bd3d52) ++++ b/flink-runtime-web/src/test/resources/rest_api_v1.snapshot (revision 8b803f30c5e99e4392c0d3c81ed59b03b2d2bf60) +@@ -3022,6 +3022,82 @@ + } + } + }, { ++ "url" : "/red-jars/:jarid/run", ++ "method" : "POST", ++ "status-code" : "200 OK", ++ "file-upload" : false, ++ "path-parameters" : { ++ "pathParameters" : [ { ++ "key" : "jarid" ++ } ] ++ }, ++ "query-parameters" : { ++ "queryParameters" : [ { ++ "key" : "allowNonRestoredState", ++ "mandatory" : false ++ }, { ++ "key" : "savepointPath", ++ "mandatory" : false ++ }, { ++ "key" : "program-args", ++ "mandatory" : false ++ }, { ++ "key" : "programArg", ++ "mandatory" : false ++ }, { ++ "key" : "entry-class", ++ "mandatory" : false ++ }, { ++ "key" : "parallelism", ++ "mandatory" : false ++ } ] ++ }, ++ "request" : { ++ "type" : "object", ++ "id" : "urn:jsonschema:org:apache:flink:runtime:webmonitor:handlers:JarRunRequestBody", ++ "properties" : { ++ "entryClass" : { ++ "type" : "string" ++ }, ++ "programArgs" : { ++ "type" : "string" ++ }, ++ "programArgsList" : { ++ "type" : "array", ++ "items" : { ++ "type" : "string" ++ } ++ }, ++ "parallelism" : { ++ "type" : "integer" ++ }, ++ "jobId" : { ++ "type" : "any" ++ }, ++ "allowNonRestoredState" : { ++ "type" : "boolean" ++ }, ++ "savepointPath" : { ++ "type" : "string" ++ }, ++ "configuration" : { ++ "type" : "object", ++ "additionalProperties" : { ++ "type" : "string" ++ } ++ } ++ } ++ }, ++ "response" : { ++ "type" : "object", ++ "id" : "urn:jsonschema:org:apache:flink:runtime:webmonitor:handlers:JarRunResponseBody", ++ "properties" : { ++ "jobid" : { ++ "type" : "any" ++ } ++ } ++ } ++ }, { + "url" : "/savepoint-disposal", + "method" : "POST", + "status-code" : "200 OK", diff --git a/flink-connectors/flink-connector-base/src/test/java/org/apache/flink/connector/base/source/reader/SourceReaderBaseTest.java b/flink-connectors/flink-connector-base/src/test/java/org/apache/flink/connector/base/source/reader/SourceReaderBaseTest.java index f5882d667978e0..517c624b4803c6 100644 --- a/flink-connectors/flink-connector-base/src/test/java/org/apache/flink/connector/base/source/reader/SourceReaderBaseTest.java +++ b/flink-connectors/flink-connector-base/src/test/java/org/apache/flink/connector/base/source/reader/SourceReaderBaseTest.java @@ -46,6 +46,7 @@ import org.apache.flink.streaming.api.operators.SourceOperator; import org.apache.flink.streaming.runtime.io.PushingAsyncDataInput; import org.apache.flink.streaming.runtime.streamrecord.LatencyMarker; +import org.apache.flink.streaming.runtime.streamrecord.RecordAttributes; import org.apache.flink.streaming.runtime.streamrecord.StreamRecord; import org.apache.flink.streaming.runtime.watermarkstatus.WatermarkStatus; @@ -561,5 +562,8 @@ public void emitWatermarkStatus(WatermarkStatus watermarkStatus) {} @Override public void emitLatencyMarker(LatencyMarker latencyMarker) {} + + @Override + public void emitRecordAttributes(RecordAttributes recordAttributes) throws Exception {} } } diff --git a/flink-runtime/src/main/java/org/apache/flink/runtime/source/coordinator/SourceCoordinator.java b/flink-runtime/src/main/java/org/apache/flink/runtime/source/coordinator/SourceCoordinator.java index faeac9a8dc4973..60abd1890d18de 100644 --- a/flink-runtime/src/main/java/org/apache/flink/runtime/source/coordinator/SourceCoordinator.java +++ b/flink-runtime/src/main/java/org/apache/flink/runtime/source/coordinator/SourceCoordinator.java @@ -33,6 +33,7 @@ Licensed to the Apache Software Foundation (ASF) under one import org.apache.flink.runtime.operators.coordination.CoordinatorStore; import org.apache.flink.runtime.operators.coordination.OperatorCoordinator; import org.apache.flink.runtime.operators.coordination.OperatorEvent; +import org.apache.flink.runtime.source.event.IsBacklogEvent; import org.apache.flink.runtime.source.event.ReaderRegistrationEvent; import org.apache.flink.runtime.source.event.ReportedWatermarkEvent; import org.apache.flink.runtime.source.event.RequestSplitEvent; @@ -607,6 +608,14 @@ private void handleReaderRegistrationEvent( context.registerSourceReader(subtask, attemptNumber, event.location()); if (!subtaskReaderExisted) { enumerator.addReader(event.subtaskId()); + + if (context.isBacklog() != null) { + context.runInCoordinatorThread( + () -> { + context.sendEventToSourceOperatorIfTaskReady( + subtask, new IsBacklogEvent(context.isBacklog())); + }); + } } } diff --git a/flink-runtime/src/main/java/org/apache/flink/runtime/source/coordinator/SourceCoordinatorContext.java b/flink-runtime/src/main/java/org/apache/flink/runtime/source/coordinator/SourceCoordinatorContext.java index 492cdf19e28325..166e05cc6db470 100644 --- a/flink-runtime/src/main/java/org/apache/flink/runtime/source/coordinator/SourceCoordinatorContext.java +++ b/flink-runtime/src/main/java/org/apache/flink/runtime/source/coordinator/SourceCoordinatorContext.java @@ -35,6 +35,7 @@ Licensed to the Apache Software Foundation (ASF) under one import org.apache.flink.runtime.operators.coordination.OperatorCoordinator; import org.apache.flink.runtime.operators.coordination.OperatorEvent; import org.apache.flink.runtime.source.event.AddSplitEvent; +import org.apache.flink.runtime.source.event.IsBacklogEvent; import org.apache.flink.runtime.source.event.NoMoreSplitsEvent; import org.apache.flink.runtime.source.event.SourceEventWrapper; import org.apache.flink.util.ExceptionUtils; @@ -112,6 +113,7 @@ public class SourceCoordinatorContext private final boolean supportsConcurrentExecutionAttempts; private final boolean[] subtaskHasNoMoreSplits; private volatile boolean closed; + private volatile Boolean backlog = null; public SourceCoordinatorContext( SourceCoordinatorProvider.CoordinatorExecutorThreadFactory coordinatorThreadFactory, @@ -370,6 +372,16 @@ public void setIsProcessingBacklog(boolean isProcessingBacklog) { if (checkpointCoordinator != null) { checkpointCoordinator.setIsProcessingBacklog(operatorID, isProcessingBacklog); } + backlog = isProcessingBacklog; + callInCoordinatorThread( + () -> { + final IsBacklogEvent isBacklogEvent = new IsBacklogEvent(isProcessingBacklog); + for (int i = 0; i < getCoordinatorContext().currentParallelism(); i++) { + sendEventToSourceOperatorIfTaskReady(i, isBacklogEvent); + } + return null; + }, + "Failed to send IsBacklogEvent to reader."); } // --------- Package private additional methods for the SourceCoordinator ------------ @@ -629,6 +641,10 @@ private void sendCachedSplitsToNewlyRegisteredReader(int subtaskIndex, int attem } } + public Boolean isBacklog() { + return backlog; + } + /** Maintains the subtask gateways for different execution attempts of different subtasks. */ private static class SubtaskGateways { private final Map[] gateways; diff --git a/flink-runtime/src/main/java/org/apache/flink/runtime/source/event/IsBacklogEvent.java b/flink-runtime/src/main/java/org/apache/flink/runtime/source/event/IsBacklogEvent.java new file mode 100644 index 00000000000000..3006159e830633 --- /dev/null +++ b/flink-runtime/src/main/java/org/apache/flink/runtime/source/event/IsBacklogEvent.java @@ -0,0 +1,34 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.flink.runtime.source.event; + +import org.apache.flink.runtime.operators.coordination.OperatorEvent; + +/** A source event that notify the source of the backlog status. */ +public class IsBacklogEvent implements OperatorEvent { + + private final boolean backlog; + + public IsBacklogEvent(boolean backlog) { + this.backlog = backlog; + } + + public boolean isBacklog() { + return backlog; + } +} diff --git a/flink-streaming-java/src/main/java/org/apache/flink/streaming/api/graph/StreamConfig.java b/flink-streaming-java/src/main/java/org/apache/flink/streaming/api/graph/StreamConfig.java index 267289c181f305..3cac46f34485d6 100644 --- a/flink-streaming-java/src/main/java/org/apache/flink/streaming/api/graph/StreamConfig.java +++ b/flink-streaming-java/src/main/java/org/apache/flink/streaming/api/graph/StreamConfig.java @@ -795,6 +795,8 @@ public enum InputRequirement { */ SORTED, + SORTED_DURING_BACKLOG, + /** * Records from {@link #PASS_THROUGH} inputs are passed to the operator before passing any * records from {@link #SORTED} inputs. There are no guarantees on ordering between and @@ -879,4 +881,10 @@ public static boolean requiresSorting(StreamConfig.InputConfig inputConfig) { && ((StreamConfig.NetworkInputConfig) inputConfig).getInputRequirement() == StreamConfig.InputRequirement.SORTED; } + + public static boolean requiresSortingDuringBacklog(StreamConfig.InputConfig inputConfig) { + return inputConfig instanceof StreamConfig.NetworkInputConfig + && ((StreamConfig.NetworkInputConfig) inputConfig).getInputRequirement() + == InputRequirement.SORTED_DURING_BACKLOG; + } } diff --git a/flink-streaming-java/src/main/java/org/apache/flink/streaming/api/graph/StreamGraphGenerator.java b/flink-streaming-java/src/main/java/org/apache/flink/streaming/api/graph/StreamGraphGenerator.java index 26a2ef6ede3867..770c6622e3b2f8 100644 --- a/flink-streaming-java/src/main/java/org/apache/flink/streaming/api/graph/StreamGraphGenerator.java +++ b/flink-streaming-java/src/main/java/org/apache/flink/streaming/api/graph/StreamGraphGenerator.java @@ -47,6 +47,7 @@ import org.apache.flink.streaming.api.TimeCharacteristic; import org.apache.flink.streaming.api.environment.CheckpointConfig; import org.apache.flink.streaming.api.environment.ExecutionCheckpointingOptions; +import org.apache.flink.streaming.api.operators.InternalBacklogAwareTimerServiceManagerImpl; import org.apache.flink.streaming.api.operators.sorted.state.BatchExecutionCheckpointStorage; import org.apache.flink.streaming.api.operators.sorted.state.BatchExecutionInternalTimeServiceManager; import org.apache.flink.streaming.api.operators.sorted.state.BatchExecutionStateBackend; @@ -93,6 +94,7 @@ import javax.annotation.Nullable; +import java.time.Duration; import java.util.ArrayList; import java.util.Collection; import java.util.Collections; @@ -415,6 +417,12 @@ private void configureStreamGraphStreaming(final StreamGraph graph) { graph.setCheckpointStorage(checkpointStorage); graph.setSavepointDirectory(savepointDir); graph.setGlobalStreamExchangeMode(deriveGlobalStreamExchangeModeStreaming()); + + if (Duration.ZERO.equals( + configuration.get( + ExecutionCheckpointingOptions.CHECKPOINTING_INTERVAL_DURING_BACKLOG))) { + graph.setTimerServiceProvider(InternalBacklogAwareTimerServiceManagerImpl::create); + } } private String deriveJobName(String defaultJobName) { diff --git a/flink-streaming-java/src/main/java/org/apache/flink/streaming/api/operators/AbstractStreamOperator.java b/flink-streaming-java/src/main/java/org/apache/flink/streaming/api/operators/AbstractStreamOperator.java index 78fb35af4e0e3c..42bcdca6cf1e16 100644 --- a/flink-streaming-java/src/main/java/org/apache/flink/streaming/api/operators/AbstractStreamOperator.java +++ b/flink-streaming-java/src/main/java/org/apache/flink/streaming/api/operators/AbstractStreamOperator.java @@ -18,6 +18,7 @@ package org.apache.flink.streaming.api.operators; +import org.apache.flink.annotation.Experimental; import org.apache.flink.annotation.Internal; import org.apache.flink.annotation.PublicEvolving; import org.apache.flink.annotation.VisibleForTesting; @@ -49,7 +50,10 @@ import org.apache.flink.streaming.api.graph.StreamConfig; import org.apache.flink.streaming.api.operators.StreamOperatorStateHandler.CheckpointedStreamOperator; import org.apache.flink.streaming.api.watermark.Watermark; +import org.apache.flink.streaming.runtime.streamrecord.InternalRecordAttributes; import org.apache.flink.streaming.runtime.streamrecord.LatencyMarker; +import org.apache.flink.streaming.runtime.streamrecord.RecordAttributes; +import org.apache.flink.streaming.runtime.streamrecord.RecordAttributesBuilder; import org.apache.flink.streaming.runtime.streamrecord.StreamRecord; import org.apache.flink.streaming.runtime.tasks.ProcessingTimeService; import org.apache.flink.streaming.runtime.tasks.StreamTask; @@ -61,6 +65,9 @@ import org.slf4j.LoggerFactory; import java.io.Serializable; +import java.util.Arrays; +import java.util.Collections; +import java.util.List; import java.util.Locale; import java.util.Optional; @@ -148,6 +155,9 @@ public abstract class AbstractStreamOperator protected transient ProcessingTimeService processingTimeService; + protected transient RecordAttributes lastRecordAttributes1; + protected transient RecordAttributes lastRecordAttributes2; + // ------------------------------------------------------------------------ // Life Cycle // ------------------------------------------------------------------------ @@ -649,4 +659,52 @@ public OperatorID getOperatorID() { protected Optional> getTimeServiceManager() { return Optional.ofNullable(timeServiceManager); } + + @Experimental + public void processRecordAttributes(RecordAttributes recordAttributes) throws Exception { + lastRecordAttributes1 = recordAttributes; + if (timeServiceManager != null + && timeServiceManager instanceof InternalBacklogAwareTimerServiceManagerImpl) { + final InternalBacklogAwareTimerServiceManagerImpl backlogAwareTimerServiceManager = + (InternalBacklogAwareTimerServiceManagerImpl) timeServiceManager; + if (recordAttributes instanceof InternalRecordAttributes) { + backlogAwareTimerServiceManager.setMaxWatermarkDuringBacklog( + ((InternalRecordAttributes) recordAttributes) + .getMaxWatermarkDuringBacklog()); + } + backlogAwareTimerServiceManager.setBacklog(recordAttributes.isBacklog()); + } + output.emitRecordAttributes( + new RecordAttributesBuilder(Collections.singletonList(recordAttributes)).build()); + } + + @Experimental + public void processRecordAttributes1(RecordAttributes recordAttributes) throws Exception { + lastRecordAttributes1 = recordAttributes; + List lastRecordAttributes = getTwoInputsLastRecordAttributes(); + output.emitRecordAttributes(new RecordAttributesBuilder(lastRecordAttributes).build()); + } + + @Experimental + public void processRecordAttributes2(RecordAttributes recordAttributes) throws Exception { + lastRecordAttributes2 = recordAttributes; + List lastRecordAttributes = getTwoInputsLastRecordAttributes(); + output.emitRecordAttributes(new RecordAttributesBuilder(lastRecordAttributes).build()); + } + + private List getTwoInputsLastRecordAttributes() { + List lastRecordAttributes; + if (lastRecordAttributes1 == null && lastRecordAttributes2 == null) { + // should not reach here. + throw new RuntimeException( + "lastRecordAttributes1 and lastRecordAttributes2 cannot be both null."); + } else if (lastRecordAttributes1 != null && lastRecordAttributes2 != null) { + lastRecordAttributes = Arrays.asList(lastRecordAttributes1, lastRecordAttributes2); + } else if (lastRecordAttributes1 != null) { + lastRecordAttributes = Collections.singletonList(lastRecordAttributes1); + } else { + lastRecordAttributes = Collections.singletonList(lastRecordAttributes2); + } + return lastRecordAttributes; + } } diff --git a/flink-streaming-java/src/main/java/org/apache/flink/streaming/api/operators/BacklogTimeService.java b/flink-streaming-java/src/main/java/org/apache/flink/streaming/api/operators/BacklogTimeService.java new file mode 100644 index 00000000000000..ab4d897f7c20d5 --- /dev/null +++ b/flink-streaming-java/src/main/java/org/apache/flink/streaming/api/operators/BacklogTimeService.java @@ -0,0 +1,88 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.flink.streaming.api.operators; + +import org.apache.flink.annotation.Internal; +import org.apache.flink.runtime.state.KeyGroupedInternalPriorityQueue; +import org.apache.flink.streaming.api.operators.sorted.state.BatchExecutionInternalTimeService; +import org.apache.flink.streaming.runtime.tasks.ProcessingTimeService; + +import java.util.LinkedList; +import java.util.List; + +/** + * An implementation of a {@link InternalTimerService} that manages timers with a single active key + * at a time. This is used by {@link + * org.apache.flink.streaming.api.operators.InternalBacklogAwareTimerServiceImpl} during backlog + * processing. + */ +@Internal +public class BacklogTimeService extends BatchExecutionInternalTimeService { + private long maxWatermarkDuringBacklog; + + public BacklogTimeService( + ProcessingTimeService processingTimeService, + Triggerable triggerTarget, + KeyGroupedInternalPriorityQueue> eventTimeTimersQueue, + KeyGroupedInternalPriorityQueue> + processingTimeTimersQueue) { + super( + processingTimeService, + triggerTarget, + eventTimeTimersQueue, + processingTimeTimersQueue); + } + + @Override + public void registerProcessingTimeTimer(N namespace, long time) { + throw new UnsupportedOperationException( + "BacklogTimeService does not support registering processing timer."); + } + + public void setCurrentKey(K newKey) throws Exception { + if (newKey != null && newKey.equals(currentKey)) { + return; + } + + TimerHeapInternalTimer timer; + List> skippedTimers = new LinkedList<>(); + if (currentKey != null) { + while ((timer = eventTimeTimersQueue.peek()) != null + && timer.getTimestamp() <= maxWatermarkDuringBacklog) { + eventTimeTimersQueue.poll(); + + if (timer.getKey() != currentKey) { + skippedTimers.add(timer); + } else { + triggerTarget.onEventTime(timer); + } + } + eventTimeTimersQueue.addAll(skippedTimers); + } + + if (newKey == null) { + currentWatermark = maxWatermarkDuringBacklog; + } + + currentKey = newKey; + } + + public void setMaxWatermarkDuringBacklog(long watermark) { + maxWatermarkDuringBacklog = watermark; + } +} diff --git a/flink-streaming-java/src/main/java/org/apache/flink/streaming/api/operators/CountingOutput.java b/flink-streaming-java/src/main/java/org/apache/flink/streaming/api/operators/CountingOutput.java index 96028c139a69f1..824f8ebebcac5e 100644 --- a/flink-streaming-java/src/main/java/org/apache/flink/streaming/api/operators/CountingOutput.java +++ b/flink-streaming-java/src/main/java/org/apache/flink/streaming/api/operators/CountingOutput.java @@ -22,6 +22,7 @@ import org.apache.flink.metrics.Gauge; import org.apache.flink.streaming.api.watermark.Watermark; import org.apache.flink.streaming.runtime.streamrecord.LatencyMarker; +import org.apache.flink.streaming.runtime.streamrecord.RecordAttributes; import org.apache.flink.streaming.runtime.streamrecord.StreamRecord; import org.apache.flink.streaming.runtime.tasks.WatermarkGaugeExposingOutput; import org.apache.flink.streaming.runtime.watermarkstatus.WatermarkStatus; @@ -74,4 +75,9 @@ public void close() { public Gauge getWatermarkGauge() { return output.getWatermarkGauge(); } + + @Override + public void emitRecordAttributes(RecordAttributes recordAttributes) { + output.emitRecordAttributes(recordAttributes); + } } diff --git a/flink-streaming-java/src/main/java/org/apache/flink/streaming/api/operators/Input.java b/flink-streaming-java/src/main/java/org/apache/flink/streaming/api/operators/Input.java index d6ca8e6186a3d8..784db9db322265 100644 --- a/flink-streaming-java/src/main/java/org/apache/flink/streaming/api/operators/Input.java +++ b/flink-streaming-java/src/main/java/org/apache/flink/streaming/api/operators/Input.java @@ -18,9 +18,11 @@ package org.apache.flink.streaming.api.operators; +import org.apache.flink.annotation.Experimental; import org.apache.flink.annotation.PublicEvolving; import org.apache.flink.streaming.api.watermark.Watermark; import org.apache.flink.streaming.runtime.streamrecord.LatencyMarker; +import org.apache.flink.streaming.runtime.streamrecord.RecordAttributes; import org.apache.flink.streaming.runtime.streamrecord.StreamRecord; import org.apache.flink.streaming.runtime.watermarkstatus.WatermarkStatus; @@ -69,4 +71,11 @@ public interface Input { * guaranteed to not be called concurrently with other methods of the operator. */ void setKeyContextElement(StreamRecord record) throws Exception; + + /** + * Processes a {@link RecordAttributes} that arrived at this input. This method is guaranteed to + * not be called concurrently with other methods of the operator. + */ + @Experimental + default void processRecordAttributes(RecordAttributes recordAttributes) throws Exception {} } diff --git a/flink-streaming-java/src/main/java/org/apache/flink/streaming/api/operators/InternalBacklogAwareTimerServiceImpl.java b/flink-streaming-java/src/main/java/org/apache/flink/streaming/api/operators/InternalBacklogAwareTimerServiceImpl.java new file mode 100644 index 00000000000000..4acc93cdf92f7c --- /dev/null +++ b/flink-streaming-java/src/main/java/org/apache/flink/streaming/api/operators/InternalBacklogAwareTimerServiceImpl.java @@ -0,0 +1,114 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.flink.streaming.api.operators; + +import org.apache.flink.annotation.Internal; +import org.apache.flink.util.function.BiConsumerWithException; + +/** + * InternalBacklogAwareTimerServiceImpl uses {@link InternalTimerServiceImpl} for event time timers + * during non-backlog processing and uses {@link BacklogTimeService} for event time timers during + * backlog processing. All the processing time timers are managed by the {@link + * InternalTimerServiceImpl}. + */ +@Internal +public class InternalBacklogAwareTimerServiceImpl implements InternalTimerService { + + private final InternalTimerServiceImpl realTimeInternalTimeService; + private final BacklogTimeService backlogTimeService; + private InternalTimerService currentInternalTimerService; + + public InternalBacklogAwareTimerServiceImpl( + InternalTimerServiceImpl realTimeInternalTimeService, + BacklogTimeService backlogTimeService) { + this.realTimeInternalTimeService = realTimeInternalTimeService; + this.backlogTimeService = backlogTimeService; + this.currentInternalTimerService = realTimeInternalTimeService; + } + + @Override + public long currentProcessingTime() { + return realTimeInternalTimeService.currentProcessingTime(); + } + + @Override + public long currentWatermark() { + return currentInternalTimerService.currentWatermark(); + } + + @Override + public void registerProcessingTimeTimer(N namespace, long time) { + realTimeInternalTimeService.registerProcessingTimeTimer(namespace, time); + } + + @Override + public void deleteProcessingTimeTimer(N namespace, long time) { + realTimeInternalTimeService.deleteProcessingTimeTimer(namespace, time); + } + + @Override + public void registerEventTimeTimer(N namespace, long time) { + currentInternalTimerService.registerEventTimeTimer(namespace, time); + } + + @Override + public void deleteEventTimeTimer(N namespace, long time) { + currentInternalTimerService.deleteEventTimeTimer(namespace, time); + } + + @Override + public void forEachEventTimeTimer(BiConsumerWithException consumer) + throws Exception { + currentInternalTimerService.forEachEventTimeTimer(consumer); + } + + @Override + public void forEachProcessingTimeTimer(BiConsumerWithException consumer) + throws Exception { + realTimeInternalTimeService.forEachProcessingTimeTimer(consumer); + } + + public void advanceWatermark(long timestamp) throws Exception { + realTimeInternalTimeService.advanceWatermark(timestamp); + } + + public void setMaxWatermarkDuringBacklog(long timestamp) { + backlogTimeService.setMaxWatermarkDuringBacklog(timestamp); + } + + public void setBacklog(boolean backlog) throws Exception { + if (currentInternalTimerService == backlogTimeService && !backlog) { + // Switch to non backlog + backlogTimeService.setCurrentKey(null); + currentInternalTimerService = realTimeInternalTimeService; + return; + } + + if (currentInternalTimerService == realTimeInternalTimeService && backlog) { + // Switch to backlog + currentInternalTimerService = backlogTimeService; + } + } + + public void setCurrentKey(K newKey) throws Exception { + if (currentInternalTimerService != backlogTimeService) { + return; + } + backlogTimeService.setCurrentKey(newKey); + } +} diff --git a/flink-streaming-java/src/main/java/org/apache/flink/streaming/api/operators/InternalBacklogAwareTimerServiceManagerImpl.java b/flink-streaming-java/src/main/java/org/apache/flink/streaming/api/operators/InternalBacklogAwareTimerServiceManagerImpl.java new file mode 100644 index 00000000000000..0e6f5cec202ac0 --- /dev/null +++ b/flink-streaming-java/src/main/java/org/apache/flink/streaming/api/operators/InternalBacklogAwareTimerServiceManagerImpl.java @@ -0,0 +1,162 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.flink.streaming.api.operators; + +import org.apache.flink.annotation.Internal; +import org.apache.flink.api.common.typeutils.TypeSerializer; +import org.apache.flink.runtime.state.CheckpointableKeyedStateBackend; +import org.apache.flink.runtime.state.KeyGroupRange; +import org.apache.flink.runtime.state.KeyGroupStatePartitionStreamProvider; +import org.apache.flink.runtime.state.KeyGroupedInternalPriorityQueue; +import org.apache.flink.runtime.state.KeyedStateBackend; +import org.apache.flink.runtime.state.KeyedStateCheckpointOutputStream; +import org.apache.flink.runtime.state.PriorityQueueSetFactory; +import org.apache.flink.streaming.api.watermark.Watermark; +import org.apache.flink.streaming.runtime.tasks.ProcessingTimeService; +import org.apache.flink.streaming.runtime.tasks.StreamTaskCancellationContext; +import org.apache.flink.util.Preconditions; +import org.apache.flink.util.WrappingRuntimeException; + +import java.util.HashMap; +import java.util.Map; + +/** InternalBacklogAwareTimerServiceManagerImpl. */ +@Internal +public class InternalBacklogAwareTimerServiceManagerImpl + extends InternalTimeServiceManagerImpl + implements InternalTimeServiceManager, KeyedStateBackend.KeySelectionListener { + + private final Map> timerServices = + new HashMap<>(); + + private boolean backlog = false; + + InternalBacklogAwareTimerServiceManagerImpl( + KeyGroupRange localKeyGroupRange, + KeyContext keyContext, + PriorityQueueSetFactory priorityQueueSetFactory, + ProcessingTimeService processingTimeService, + StreamTaskCancellationContext cancellationContext) { + super( + localKeyGroupRange, + keyContext, + priorityQueueSetFactory, + processingTimeService, + cancellationContext); + } + + @Override + public InternalTimerService getInternalTimerService( + String name, + TypeSerializer keySerializer, + TypeSerializer namespaceSerializer, + Triggerable triggerable) { + + final InternalTimerServiceImpl internalTimerService = + (InternalTimerServiceImpl) + super.getInternalTimerService( + name, keySerializer, namespaceSerializer, triggerable); + final KeyGroupedInternalPriorityQueue> eventTimeTimersQueue = + internalTimerService.getEventTimeTimersQueue(); + final KeyGroupedInternalPriorityQueue> + processingTimeTimersQueue = internalTimerService.getProcessingTimeTimersQueue(); + + final BacklogTimeService backlogTimeService = + new BacklogTimeService<>( + processingTimeService, + triggerable, + eventTimeTimersQueue, + processingTimeTimersQueue); + + InternalBacklogAwareTimerServiceImpl timerService = + (InternalBacklogAwareTimerServiceImpl) timerServices.get(name); + if (timerService == null) { + timerService = + new InternalBacklogAwareTimerServiceImpl<>( + internalTimerService, backlogTimeService); + timerServices.put(name, timerService); + } + + return timerService; + } + + @Override + public void advanceWatermark(Watermark watermark) throws Exception { + for (InternalBacklogAwareTimerServiceImpl service : timerServices.values()) { + service.advanceWatermark(watermark.getTimestamp()); + } + } + + @Override + public void snapshotToRawKeyedState( + KeyedStateCheckpointOutputStream stateCheckpointOutputStream, String operatorName) + throws Exception { + Preconditions.checkState(!backlog, "Cannot snapshot state during backlog."); + super.snapshotToRawKeyedState(stateCheckpointOutputStream, operatorName); + } + + public static InternalBacklogAwareTimerServiceManagerImpl create( + CheckpointableKeyedStateBackend keyedStateBackend, + ClassLoader userClassloader, + KeyContext keyContext, + ProcessingTimeService processingTimeService, + Iterable rawKeyedStates, + StreamTaskCancellationContext cancellationContext) + throws Exception { + + final InternalBacklogAwareTimerServiceManagerImpl manager = + new InternalBacklogAwareTimerServiceManagerImpl<>( + keyedStateBackend.getKeyGroupRange(), + keyContext, + keyedStateBackend, + processingTimeService, + cancellationContext); + + keyedStateBackend.registerKeySelectionListener(manager); + + return manager; + } + + @Override + public void keySelected(K newKey) { + try { + for (InternalBacklogAwareTimerServiceImpl value : timerServices.values()) { + value.setCurrentKey(newKey); + } + } catch (Exception e) { + throw new WrappingRuntimeException(e); + } + } + + public void setMaxWatermarkDuringBacklog(long timestamp) { + for (InternalBacklogAwareTimerServiceImpl timerService : timerServices.values()) { + timerService.setMaxWatermarkDuringBacklog(timestamp); + } + } + + public void setBacklog(boolean backlog) { + try { + for (InternalBacklogAwareTimerServiceImpl value : timerServices.values()) { + value.setBacklog(backlog); + } + this.backlog = backlog; + } catch (Exception e) { + throw new WrappingRuntimeException(e); + } + } +} diff --git a/flink-streaming-java/src/main/java/org/apache/flink/streaming/api/operators/InternalTimeServiceManagerImpl.java b/flink-streaming-java/src/main/java/org/apache/flink/streaming/api/operators/InternalTimeServiceManagerImpl.java index 51a280bdda2bcb..f31ca166635516 100644 --- a/flink-streaming-java/src/main/java/org/apache/flink/streaming/api/operators/InternalTimeServiceManagerImpl.java +++ b/flink-streaming-java/src/main/java/org/apache/flink/streaming/api/operators/InternalTimeServiceManagerImpl.java @@ -69,12 +69,12 @@ public class InternalTimeServiceManagerImpl implements InternalTimeServiceMan private final KeyGroupRange localKeyGroupRange; private final KeyContext keyContext; private final PriorityQueueSetFactory priorityQueueSetFactory; - private final ProcessingTimeService processingTimeService; + protected final ProcessingTimeService processingTimeService; private final StreamTaskCancellationContext cancellationContext; private final Map> timerServices; - private InternalTimeServiceManagerImpl( + protected InternalTimeServiceManagerImpl( KeyGroupRange localKeyGroupRange, KeyContext keyContext, PriorityQueueSetFactory priorityQueueSetFactory, diff --git a/flink-streaming-java/src/main/java/org/apache/flink/streaming/api/operators/InternalTimerServiceImpl.java b/flink-streaming-java/src/main/java/org/apache/flink/streaming/api/operators/InternalTimerServiceImpl.java index e2c7e4139b2a4c..286389282b1051 100644 --- a/flink-streaming-java/src/main/java/org/apache/flink/streaming/api/operators/InternalTimerServiceImpl.java +++ b/flink-streaming-java/src/main/java/org/apache/flink/streaming/api/operators/InternalTimerServiceImpl.java @@ -396,6 +396,15 @@ public int numEventTimeTimers(N namespace) { return countTimersInNamespaceInternal(namespace, eventTimeTimersQueue); } + public KeyGroupedInternalPriorityQueue> + getProcessingTimeTimersQueue() { + return processingTimeTimersQueue; + } + + public KeyGroupedInternalPriorityQueue> getEventTimeTimersQueue() { + return eventTimeTimersQueue; + } + private int countTimersInNamespaceInternal( N namespace, InternalPriorityQueue> queue) { int count = 0; diff --git a/flink-streaming-java/src/main/java/org/apache/flink/streaming/api/operators/OperatorAttributes.java b/flink-streaming-java/src/main/java/org/apache/flink/streaming/api/operators/OperatorAttributes.java new file mode 100644 index 00000000000000..5889f92af81374 --- /dev/null +++ b/flink-streaming-java/src/main/java/org/apache/flink/streaming/api/operators/OperatorAttributes.java @@ -0,0 +1,55 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.flink.streaming.api.operators; + +import org.apache.flink.annotation.Experimental; + +/** + * OperatorAttributes element provides Job Manager with information that can be used to optimize job + * performance. + */ +@Experimental +public class OperatorAttributes { + + private final boolean internalSorterSupported; + + public OperatorAttributes(boolean internalSorterSupported) { + this.internalSorterSupported = internalSorterSupported; + } + + /** + * Returns true iff the operator uses an internal sorter to sort inputs by key when any of the + * following conditions are met: + * + *
    + *
  • execution.runtime-mode = BATCH. + *
  • execution.checkpointing.interval-during-backlog = 0 AND any of its input has + * isBacklog=true. + *
+ * + *

Here are the implications when it is true: + * + *

    + *
  • Its input records will not to be sorted externally before being fed into this operator. + *
  • Its managed memory will be set according to execution.sorted-inputs.memory. + *
+ */ + public boolean isInternalSorterSupported() { + return internalSorterSupported; + } +} diff --git a/flink-streaming-java/src/main/java/org/apache/flink/streaming/api/operators/OperatorAttributesBuilder.java b/flink-streaming-java/src/main/java/org/apache/flink/streaming/api/operators/OperatorAttributesBuilder.java new file mode 100644 index 00000000000000..e461a625e452e0 --- /dev/null +++ b/flink-streaming-java/src/main/java/org/apache/flink/streaming/api/operators/OperatorAttributesBuilder.java @@ -0,0 +1,64 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.flink.streaming.api.operators; + +import org.apache.flink.annotation.Experimental; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import javax.annotation.Nullable; + +/** The builder class for {@link OperatorAttributes}. */ +@Experimental +public class OperatorAttributesBuilder { + + private static final Logger LOG = LoggerFactory.getLogger(OperatorAttributesBuilder.class); + + @Nullable private Boolean internalSorterSupported = null; + + public OperatorAttributesBuilder() {} + + public OperatorAttributesBuilder setInternalSorterSupported(boolean internalSorterSupported) { + this.internalSorterSupported = internalSorterSupported; + return this; + } + + /** + * If any operator attribute is null, we will log it at DEBUG level and use the following + * default values. + * + *
    + *
  • internalSorterSupported defaults to false + *
+ */ + public OperatorAttributes build() { + return new OperatorAttributes( + getAttributeOrDefaultValue( + internalSorterSupported, "internalSorterSupported", false)); + } + + private T getAttributeOrDefaultValue( + @Nullable T attribute, String attributeName, T defaultValue) { + if (attribute == null) { + LOG.debug("{} is not set, set it to default value {}.", attributeName, defaultValue); + return defaultValue; + } + return attribute; + } +} diff --git a/flink-streaming-java/src/main/java/org/apache/flink/streaming/api/operators/Output.java b/flink-streaming-java/src/main/java/org/apache/flink/streaming/api/operators/Output.java index cdbeff8a6b11c0..fb3093c4dc1e9c 100644 --- a/flink-streaming-java/src/main/java/org/apache/flink/streaming/api/operators/Output.java +++ b/flink-streaming-java/src/main/java/org/apache/flink/streaming/api/operators/Output.java @@ -18,9 +18,11 @@ package org.apache.flink.streaming.api.operators; +import org.apache.flink.annotation.Experimental; import org.apache.flink.annotation.PublicEvolving; import org.apache.flink.streaming.api.watermark.Watermark; import org.apache.flink.streaming.runtime.streamrecord.LatencyMarker; +import org.apache.flink.streaming.runtime.streamrecord.RecordAttributes; import org.apache.flink.streaming.runtime.streamrecord.StreamRecord; import org.apache.flink.streaming.runtime.watermarkstatus.WatermarkStatus; import org.apache.flink.util.Collector; @@ -55,4 +57,13 @@ public interface Output extends Collector { void collect(OutputTag outputTag, StreamRecord record); void emitLatencyMarker(LatencyMarker latencyMarker); + + /** + * Emits a {@link RecordAttributes} from an operator. This element is broadcast to all + * downstream operators. + */ + @Experimental + default void emitRecordAttributes(RecordAttributes recordAttributes) { + throw new UnsupportedOperationException(); + } } diff --git a/flink-streaming-java/src/main/java/org/apache/flink/streaming/api/operators/SourceOperator.java b/flink-streaming-java/src/main/java/org/apache/flink/streaming/api/operators/SourceOperator.java index c4f624e443c97c..318b261fc69532 100644 --- a/flink-streaming-java/src/main/java/org/apache/flink/streaming/api/operators/SourceOperator.java +++ b/flink-streaming-java/src/main/java/org/apache/flink/streaming/api/operators/SourceOperator.java @@ -41,6 +41,7 @@ import org.apache.flink.runtime.operators.coordination.OperatorEventGateway; import org.apache.flink.runtime.operators.coordination.OperatorEventHandler; import org.apache.flink.runtime.source.event.AddSplitEvent; +import org.apache.flink.runtime.source.event.IsBacklogEvent; import org.apache.flink.runtime.source.event.NoMoreSplitsEvent; import org.apache.flink.runtime.source.event.ReaderRegistrationEvent; import org.apache.flink.runtime.source.event.ReportedWatermarkEvent; @@ -56,6 +57,7 @@ import org.apache.flink.streaming.runtime.io.DataInputStatus; import org.apache.flink.streaming.runtime.io.MultipleFuturesAvailabilityHelper; import org.apache.flink.streaming.runtime.io.PushingAsyncDataInput; +import org.apache.flink.streaming.runtime.streamrecord.RecordAttributesBuilder; import org.apache.flink.streaming.runtime.streamrecord.StreamRecord; import org.apache.flink.streaming.runtime.tasks.ProcessingTimeService; import org.apache.flink.streaming.runtime.tasks.StreamTask; @@ -569,6 +571,12 @@ public void handleOperatorEvent(OperatorEvent event) { sourceReader.handleSourceEvents(((SourceEventWrapper) event).getSourceEvent()); } else if (event instanceof NoMoreSplitsEvent) { sourceReader.notifyNoMoreSplits(); + } else if (event instanceof IsBacklogEvent) { + eventTimeLogic.triggerPeriodicEmit(System.currentTimeMillis()); + output.emitRecordAttributes( + new RecordAttributesBuilder(Collections.emptyList()) + .setBacklog(((IsBacklogEvent) event).isBacklog()) + .build()); } else { throw new IllegalStateException("Received unexpected operator event " + event); } diff --git a/flink-streaming-java/src/main/java/org/apache/flink/streaming/api/operators/StreamOperator.java b/flink-streaming-java/src/main/java/org/apache/flink/streaming/api/operators/StreamOperator.java index 134b9129a284f9..253aca83e7827b 100644 --- a/flink-streaming-java/src/main/java/org/apache/flink/streaming/api/operators/StreamOperator.java +++ b/flink-streaming-java/src/main/java/org/apache/flink/streaming/api/operators/StreamOperator.java @@ -17,6 +17,7 @@ package org.apache.flink.streaming.api.operators; +import org.apache.flink.annotation.Experimental; import org.apache.flink.annotation.PublicEvolving; import org.apache.flink.api.common.state.CheckpointListener; import org.apache.flink.metrics.groups.OperatorMetricGroup; @@ -137,6 +138,11 @@ OperatorSnapshotFutures snapshotState( /** Provides a context to initialize all state in the operator. */ void initializeState(StreamTaskStateInitializer streamTaskStateManager) throws Exception; + @Experimental + default OperatorAttributes getOperatorAttributes() { + return new OperatorAttributesBuilder().build(); + } + // ------------------------------------------------------------------------ // miscellaneous // ------------------------------------------------------------------------ diff --git a/flink-streaming-java/src/main/java/org/apache/flink/streaming/api/operators/StreamOperatorFactory.java b/flink-streaming-java/src/main/java/org/apache/flink/streaming/api/operators/StreamOperatorFactory.java index 410028fb0d5523..d7dc5b7479ce9a 100644 --- a/flink-streaming-java/src/main/java/org/apache/flink/streaming/api/operators/StreamOperatorFactory.java +++ b/flink-streaming-java/src/main/java/org/apache/flink/streaming/api/operators/StreamOperatorFactory.java @@ -17,6 +17,7 @@ package org.apache.flink.streaming.api.operators; +import org.apache.flink.annotation.Experimental; import org.apache.flink.annotation.PublicEvolving; import org.apache.flink.api.common.ExecutionConfig; import org.apache.flink.api.common.typeinfo.TypeInformation; @@ -87,4 +88,9 @@ default void setInputType(TypeInformation type, ExecutionConfig executionConf /** Returns the runtime class of the stream operator. */ Class getStreamOperatorClass(ClassLoader classLoader); + + @Experimental + default OperatorAttributes getOperatorAttributes() { + return new OperatorAttributesBuilder().build(); + } } diff --git a/flink-streaming-java/src/main/java/org/apache/flink/streaming/api/operators/TwoInputStreamOperator.java b/flink-streaming-java/src/main/java/org/apache/flink/streaming/api/operators/TwoInputStreamOperator.java index 8cb5ca342dcb1a..f4c2e022d3f712 100644 --- a/flink-streaming-java/src/main/java/org/apache/flink/streaming/api/operators/TwoInputStreamOperator.java +++ b/flink-streaming-java/src/main/java/org/apache/flink/streaming/api/operators/TwoInputStreamOperator.java @@ -18,9 +18,11 @@ package org.apache.flink.streaming.api.operators; +import org.apache.flink.annotation.Experimental; import org.apache.flink.annotation.PublicEvolving; import org.apache.flink.streaming.api.watermark.Watermark; import org.apache.flink.streaming.runtime.streamrecord.LatencyMarker; +import org.apache.flink.streaming.runtime.streamrecord.RecordAttributes; import org.apache.flink.streaming.runtime.streamrecord.StreamRecord; import org.apache.flink.streaming.runtime.watermarkstatus.WatermarkStatus; @@ -98,4 +100,18 @@ public interface TwoInputStreamOperator extends StreamOperator implements StreamTaskInput { + + private static final Logger LOG = LoggerFactory.getLogger(SortingBacklogDataInput.class); + + private final StreamTaskInput wrappedInput; + private final PushSorter>> sorter; + private final KeySelector keySelector; + private final TypeSerializer keySerializer; + private final DataOutputSerializer dataOutputSerializer; + private final SortingDataOutput sortingDataOutput; + private final StreamTask.CanEmitBatchOfRecordsChecker canEmitBatchOfRecords; + private MutableObjectIterator>> sortedInput = null; + private long watermarkSeenDuringBacklog = Long.MIN_VALUE; + + private volatile OperatingMode mode = OperatingMode.PROCESSING_REALTIME; + + private enum OperatingMode { + PROCESSING_REALTIME, + SORTING_BACKLOG, + FLUSHING_BACKLOG + } + + public SortingBacklogDataInput( + StreamTaskInput wrappedInput, + TypeSerializer typeSerializer, + TypeSerializer keySerializer, + KeySelector keySelector, + MemoryManager memoryManager, + IOManager ioManager, + boolean objectReuse, + double managedMemoryFraction, + Configuration taskManagerConfiguration, + TaskInvokable containingTask, + ExecutionConfig executionConfig, + StreamTask.CanEmitBatchOfRecordsChecker canEmitBatchOfRecords) { + try { + this.canEmitBatchOfRecords = canEmitBatchOfRecords; + this.sortingDataOutput = new SortingDataOutput(); + this.keySelector = keySelector; + this.keySerializer = keySerializer; + int keyLength = keySerializer.getLength(); + final TypeComparator>> comparator; + if (keyLength > 0) { + this.dataOutputSerializer = new DataOutputSerializer(keyLength); + comparator = new FixedLengthByteKeyComparator<>(keyLength); + } else { + this.dataOutputSerializer = new DataOutputSerializer(64); + comparator = new VariableLengthByteKeyComparator<>(); + } + KeyAndValueSerializer keyAndValueSerializer = + new KeyAndValueSerializer<>(typeSerializer, keyLength); + this.wrappedInput = wrappedInput; + this.sorter = + ExternalSorter.newBuilder( + memoryManager, + containingTask, + keyAndValueSerializer, + comparator, + executionConfig) + .memoryFraction(managedMemoryFraction) + .enableSpilling( + ioManager, + taskManagerConfiguration.get( + AlgorithmOptions.SORT_SPILLING_THRESHOLD)) + .maxNumFileHandles( + taskManagerConfiguration.get(AlgorithmOptions.SPILLING_MAX_FAN)) + .objectReuse(objectReuse) + .largeRecords( + taskManagerConfiguration.get( + AlgorithmOptions.USE_LARGE_RECORDS_HANDLER)) + .build(); + } catch (MemoryAllocationException e) { + throw new RuntimeException(e); + } + } + + @Override + public int getInputIndex() { + return wrappedInput.getInputIndex(); + } + + @Override + public CompletableFuture prepareSnapshot( + ChannelStateWriter channelStateWriter, long checkpointId) throws CheckpointException { + if (mode != OperatingMode.PROCESSING_REALTIME) { + throw new UnsupportedOperationException( + "Checkpoints are not supported during backlog."); + } + return wrappedInput.prepareSnapshot(channelStateWriter, checkpointId); + } + + @Override + public void close() throws IOException { + IOException ex = null; + try { + wrappedInput.close(); + } catch (IOException e) { + ex = ExceptionUtils.firstOrSuppressed(e, ex); + } + + try { + sorter.close(); + } catch (IOException e) { + ex = ExceptionUtils.firstOrSuppressed(e, ex); + } + + if (ex != null) { + throw ex; + } + } + + @Override + public DataInputStatus emitNext(DataOutput output) throws Exception { + LOG.debug("Emit next, current mode: {}", mode); + if (sortingDataOutput.innerOutput != output) { + sortingDataOutput.innerOutput = output; + } + + if (mode == OperatingMode.PROCESSING_REALTIME) { + return wrappedInput.emitNext(sortingDataOutput); + } + + if (mode == OperatingMode.SORTING_BACKLOG) { + return wrappedInput.emitNext(sortingDataOutput); + } + + if (mode == OperatingMode.FLUSHING_BACKLOG) { + while (true) { + final DataInputStatus status = emitNextSortedRecord(output); + if (status == DataInputStatus.MORE_AVAILABLE + && canEmitBatchOfRecords.check() + && mode == OperatingMode.FLUSHING_BACKLOG) { + continue; + } + return status; + } + } + + // Should never reach here + throw new RuntimeException(String.format("Unknown OperatingMode %s", mode)); + } + + @Nonnull + private DataInputStatus emitNextSortedRecord(DataOutput output) throws Exception { + Tuple2> next = sortedInput.next(); + if (next != null) { + output.emitRecord(next.f1); + } else { + // Finished flushing + mode = OperatingMode.PROCESSING_REALTIME; + + // Send backlog=false downstream + output.emitRecordAttributes( + new RecordAttributesBuilder(Collections.emptyList()).setBacklog(false).build()); + + if (watermarkSeenDuringBacklog > Long.MIN_VALUE) { + output.emitWatermark(new Watermark(watermarkSeenDuringBacklog)); + } + } + return DataInputStatus.MORE_AVAILABLE; + } + + @Override + public CompletableFuture getAvailableFuture() { + if (mode == OperatingMode.FLUSHING_BACKLOG) { + return AvailabilityProvider.AVAILABLE; + } else { + return wrappedInput.getAvailableFuture(); + } + } + + private class SortingDataOutput implements DataOutput { + + private DataOutput innerOutput; + + @Override + public void emitRecord(StreamRecord streamRecord) throws Exception { + LOG.debug("Emit record {}", streamRecord.getValue()); + if (mode == OperatingMode.PROCESSING_REALTIME) { + innerOutput.emitRecord(streamRecord); + return; + } + + if (mode == OperatingMode.SORTING_BACKLOG) { + K key = keySelector.getKey(streamRecord.getValue()); + + keySerializer.serialize(key, dataOutputSerializer); + byte[] serializedKey = dataOutputSerializer.getCopyOfBuffer(); + dataOutputSerializer.clear(); + + sorter.writeRecord(Tuple2.of(serializedKey, streamRecord)); + return; + } + + if (mode == OperatingMode.FLUSHING_BACKLOG) { + throw new RuntimeException("Unexpected StreamRecord during FLUSHING_BACKLOG."); + } + } + + @Override + public void emitWatermark(Watermark watermark) throws Exception { + if (mode == OperatingMode.PROCESSING_REALTIME) { + innerOutput.emitWatermark(watermark); + } else { + watermarkSeenDuringBacklog = + Math.max(watermarkSeenDuringBacklog, watermark.getTimestamp()); + } + } + + @Override + public void emitWatermarkStatus(WatermarkStatus watermarkStatus) throws Exception { + if (mode == OperatingMode.PROCESSING_REALTIME) { + innerOutput.emitWatermarkStatus(watermarkStatus); + } + + // Ignore watermark status during backlog + } + + @Override + public void emitLatencyMarker(LatencyMarker latencyMarker) throws Exception { + if (mode == OperatingMode.PROCESSING_REALTIME) { + innerOutput.emitLatencyMarker(latencyMarker); + } + + // Ignore watermark status during backlog + } + + @Override + public void emitRecordAttributes(RecordAttributes recordAttributes) throws Exception { + LOG.debug("Emit record attributes {}", recordAttributes); + if (mode == OperatingMode.PROCESSING_REALTIME && recordAttributes.isBacklog()) { + // switch to backlog + mode = OperatingMode.SORTING_BACKLOG; + innerOutput.emitRecordAttributes(recordAttributes); + return; + } + + if (mode == OperatingMode.SORTING_BACKLOG && !recordAttributes.isBacklog()) { + innerOutput.emitRecordAttributes( + new InternalRecordAttributes(true, watermarkSeenDuringBacklog)); + sorter.finishReading(); + sortedInput = sorter.getIterator(); + mode = OperatingMode.FLUSHING_BACKLOG; + return; + } + + if (mode == OperatingMode.FLUSHING_BACKLOG && recordAttributes.isBacklog()) { + throw new RuntimeException( + "Should not receive record attribute while flushing backlog."); + } + } + } +} diff --git a/flink-streaming-java/src/main/java/org/apache/flink/streaming/api/operators/sort/SortingDataInput.java b/flink-streaming-java/src/main/java/org/apache/flink/streaming/api/operators/sort/SortingDataInput.java index 088519a2dec516..97e664a3682312 100644 --- a/flink-streaming-java/src/main/java/org/apache/flink/streaming/api/operators/sort/SortingDataInput.java +++ b/flink-streaming-java/src/main/java/org/apache/flink/streaming/api/operators/sort/SortingDataInput.java @@ -39,6 +39,7 @@ import org.apache.flink.streaming.runtime.io.DataInputStatus; import org.apache.flink.streaming.runtime.io.StreamTaskInput; import org.apache.flink.streaming.runtime.streamrecord.LatencyMarker; +import org.apache.flink.streaming.runtime.streamrecord.RecordAttributes; import org.apache.flink.streaming.runtime.streamrecord.StreamRecord; import org.apache.flink.streaming.runtime.watermarkstatus.WatermarkStatus; import org.apache.flink.util.ExceptionUtils; @@ -183,6 +184,9 @@ public void emitWatermarkStatus(WatermarkStatus watermarkStatus) {} @Override public void emitLatencyMarker(LatencyMarker latencyMarker) {} + + @Override + public void emitRecordAttributes(RecordAttributes recordAttributes) {} } @Override diff --git a/flink-streaming-java/src/main/java/org/apache/flink/streaming/api/operators/sorted/state/BatchExecutionInternalTimeService.java b/flink-streaming-java/src/main/java/org/apache/flink/streaming/api/operators/sorted/state/BatchExecutionInternalTimeService.java index de590ffc4e2d41..c2f67741503e0b 100644 --- a/flink-streaming-java/src/main/java/org/apache/flink/streaming/api/operators/sorted/state/BatchExecutionInternalTimeService.java +++ b/flink-streaming-java/src/main/java/org/apache/flink/streaming/api/operators/sorted/state/BatchExecutionInternalTimeService.java @@ -40,38 +40,47 @@ public class BatchExecutionInternalTimeService implements InternalTimerSer private static final Logger LOG = LoggerFactory.getLogger(BatchExecutionInternalTimeService.class); - private final ProcessingTimeService processingTimeService; + protected final ProcessingTimeService processingTimeService; /** Processing time timers that are currently in-flight. */ - private final KeyGroupedInternalPriorityQueue> + protected final KeyGroupedInternalPriorityQueue> processingTimeTimersQueue; /** Event time timers that are currently in-flight. */ - private final KeyGroupedInternalPriorityQueue> + protected final KeyGroupedInternalPriorityQueue> eventTimeTimersQueue; /** * The local event time, as denoted by the last received {@link * org.apache.flink.streaming.api.watermark.Watermark Watermark}. */ - private long currentWatermark = Long.MIN_VALUE; + protected long currentWatermark = Long.MIN_VALUE; - private final Triggerable triggerTarget; + protected final Triggerable triggerTarget; - private K currentKey; + protected K currentKey; BatchExecutionInternalTimeService( ProcessingTimeService processingTimeService, Triggerable triggerTarget) { + this( + processingTimeService, + triggerTarget, + new BatchExecutionInternalPriorityQueueSet<>( + PriorityComparator.forPriorityComparableObjects(), 128), + new BatchExecutionInternalPriorityQueueSet<>( + PriorityComparator.forPriorityComparableObjects(), 128)); + } + protected BatchExecutionInternalTimeService( + ProcessingTimeService processingTimeService, + Triggerable triggerTarget, + KeyGroupedInternalPriorityQueue> eventTimeTimersQueue, + KeyGroupedInternalPriorityQueue> + processingTimeTimersQueue) { this.processingTimeService = checkNotNull(processingTimeService); this.triggerTarget = checkNotNull(triggerTarget); - - this.processingTimeTimersQueue = - new BatchExecutionInternalPriorityQueueSet<>( - PriorityComparator.forPriorityComparableObjects(), 128); - this.eventTimeTimersQueue = - new BatchExecutionInternalPriorityQueueSet<>( - PriorityComparator.forPriorityComparableObjects(), 128); + this.eventTimeTimersQueue = eventTimeTimersQueue; + this.processingTimeTimersQueue = processingTimeTimersQueue; } @Override diff --git a/flink-streaming-java/src/main/java/org/apache/flink/streaming/api/operators/source/ProgressiveTimestampsAndWatermarks.java b/flink-streaming-java/src/main/java/org/apache/flink/streaming/api/operators/source/ProgressiveTimestampsAndWatermarks.java index 14e6b6a7eb98fd..115aa9096b7c06 100644 --- a/flink-streaming-java/src/main/java/org/apache/flink/streaming/api/operators/source/ProgressiveTimestampsAndWatermarks.java +++ b/flink-streaming-java/src/main/java/org/apache/flink/streaming/api/operators/source/ProgressiveTimestampsAndWatermarks.java @@ -152,7 +152,7 @@ public void stopPeriodicWatermarkEmits() { } } - void triggerPeriodicEmit(@SuppressWarnings("unused") long wallClockTimestamp) { + public void triggerPeriodicEmit(@SuppressWarnings("unused") long wallClockTimestamp) { if (currentPerSplitOutputs != null) { currentPerSplitOutputs.emitPeriodicWatermark(); } diff --git a/flink-streaming-java/src/main/java/org/apache/flink/streaming/api/operators/source/TimestampsAndWatermarks.java b/flink-streaming-java/src/main/java/org/apache/flink/streaming/api/operators/source/TimestampsAndWatermarks.java index cd41ca9ecb4d2c..14cf6d39cd618e 100644 --- a/flink-streaming-java/src/main/java/org/apache/flink/streaming/api/operators/source/TimestampsAndWatermarks.java +++ b/flink-streaming-java/src/main/java/org/apache/flink/streaming/api/operators/source/TimestampsAndWatermarks.java @@ -80,6 +80,8 @@ ReaderOutput createMainOutput( /** Stops emitting periodic watermarks. */ void stopPeriodicWatermarkEmits(); + default void triggerPeriodicEmit(long wallClockTimestamp) {} + // ------------------------------------------------------------------------ // factories // ------------------------------------------------------------------------ diff --git a/flink-streaming-java/src/main/java/org/apache/flink/streaming/api/transformations/AbstractMultipleInputTransformation.java b/flink-streaming-java/src/main/java/org/apache/flink/streaming/api/transformations/AbstractMultipleInputTransformation.java index 65bbc53188f5b7..adf2ae12738363 100644 --- a/flink-streaming-java/src/main/java/org/apache/flink/streaming/api/transformations/AbstractMultipleInputTransformation.java +++ b/flink-streaming-java/src/main/java/org/apache/flink/streaming/api/transformations/AbstractMultipleInputTransformation.java @@ -86,4 +86,9 @@ public List> getTransitivePredecessors() { public final void setChainingStrategy(ChainingStrategy strategy) { operatorFactory.setChainingStrategy(strategy); } + + @Override + public boolean isInternalSorterSupported() { + return operatorFactory.getOperatorAttributes().isInternalSorterSupported(); + } } diff --git a/flink-streaming-java/src/main/java/org/apache/flink/streaming/api/transformations/OneInputTransformation.java b/flink-streaming-java/src/main/java/org/apache/flink/streaming/api/transformations/OneInputTransformation.java index 0a3957130fcd0b..b57eb680bce1e1 100644 --- a/flink-streaming-java/src/main/java/org/apache/flink/streaming/api/transformations/OneInputTransformation.java +++ b/flink-streaming-java/src/main/java/org/apache/flink/streaming/api/transformations/OneInputTransformation.java @@ -184,4 +184,9 @@ public List> getInputs() { public final void setChainingStrategy(ChainingStrategy strategy) { operatorFactory.setChainingStrategy(strategy); } + + @Override + public boolean isInternalSorterSupported() { + return operatorFactory.getOperatorAttributes().isInternalSorterSupported(); + } } diff --git a/flink-streaming-java/src/main/java/org/apache/flink/streaming/api/transformations/PhysicalTransformation.java b/flink-streaming-java/src/main/java/org/apache/flink/streaming/api/transformations/PhysicalTransformation.java index fa1869cd9fb688..8dd6f18cc04a91 100644 --- a/flink-streaming-java/src/main/java/org/apache/flink/streaming/api/transformations/PhysicalTransformation.java +++ b/flink-streaming-java/src/main/java/org/apache/flink/streaming/api/transformations/PhysicalTransformation.java @@ -76,4 +76,8 @@ public void setSupportsConcurrentExecutionAttempts( boolean supportsConcurrentExecutionAttempts) { this.supportsConcurrentExecutionAttempts = supportsConcurrentExecutionAttempts; } + + public boolean isInternalSorterSupported() { + return false; + } } diff --git a/flink-streaming-java/src/main/java/org/apache/flink/streaming/api/transformations/TwoInputTransformation.java b/flink-streaming-java/src/main/java/org/apache/flink/streaming/api/transformations/TwoInputTransformation.java index 4c576170741dfa..f338a690e287d9 100644 --- a/flink-streaming-java/src/main/java/org/apache/flink/streaming/api/transformations/TwoInputTransformation.java +++ b/flink-streaming-java/src/main/java/org/apache/flink/streaming/api/transformations/TwoInputTransformation.java @@ -229,4 +229,9 @@ public List> getTransitivePredecessors() { public final void setChainingStrategy(ChainingStrategy strategy) { operatorFactory.setChainingStrategy(strategy); } + + @Override + public boolean isInternalSorterSupported() { + return operatorFactory.getOperatorAttributes().isInternalSorterSupported(); + } } diff --git a/flink-streaming-java/src/main/java/org/apache/flink/streaming/runtime/io/AbstractStreamTaskNetworkInput.java b/flink-streaming-java/src/main/java/org/apache/flink/streaming/runtime/io/AbstractStreamTaskNetworkInput.java index db5198bd7160eb..a693064ef146c9 100644 --- a/flink-streaming-java/src/main/java/org/apache/flink/streaming/runtime/io/AbstractStreamTaskNetworkInput.java +++ b/flink-streaming-java/src/main/java/org/apache/flink/streaming/runtime/io/AbstractStreamTaskNetworkInput.java @@ -60,6 +60,7 @@ public abstract class AbstractStreamTaskNetworkInput< protected final StatusWatermarkValve statusWatermarkValve; protected final int inputIndex; + private final RecordAttributesValve recordAttributesValve; private InputChannelInfo lastChannel = null; private R currentRecordDeserializer = null; @@ -87,6 +88,8 @@ public AbstractStreamTaskNetworkInput( this.inputIndex = inputIndex; this.recordDeserializers = checkNotNull(recordDeserializers); this.canEmitBatchOfRecords = checkNotNull(canEmitBatchOfRecords); + this.recordAttributesValve = + new RecordAttributesValve(checkpointedInputGate.getNumberOfInputChannels()); } @Override @@ -107,8 +110,9 @@ public DataInputStatus emitNext(DataOutput output) throws Exception { } if (result.isFullRecord()) { - processElement(deserializationDelegate.getInstance(), output); - if (canEmitBatchOfRecords.check()) { + final boolean canContinue = + processElement(deserializationDelegate.getInstance(), output); + if (canEmitBatchOfRecords.check() && canContinue) { continue; } return DataInputStatus.MORE_AVAILABLE; @@ -141,25 +145,36 @@ public DataInputStatus emitNext(DataOutput output) throws Exception { } } - private void processElement(StreamElement recordOrMark, DataOutput output) throws Exception { + private boolean processElement(StreamElement recordOrMark, DataOutput output) + throws Exception { if (recordOrMark.isRecord()) { output.emitRecord(recordOrMark.asRecord()); + return true; } else if (recordOrMark.isWatermark()) { statusWatermarkValve.inputWatermark( recordOrMark.asWatermark(), flattenedChannelIndices.get(lastChannel), output); + return true; } else if (recordOrMark.isLatencyMarker()) { output.emitLatencyMarker(recordOrMark.asLatencyMarker()); + return true; } else if (recordOrMark.isWatermarkStatus()) { statusWatermarkValve.inputWatermarkStatus( recordOrMark.asWatermarkStatus(), flattenedChannelIndices.get(lastChannel), output); + return true; + } else if (recordOrMark.isRecordAttributes()) { + recordAttributesValve.inputRecordAttributes( + recordOrMark.asRecordAttributes(), + flattenedChannelIndices.get(lastChannel), + output); + return false; } else { throw new UnsupportedOperationException("Unknown type of StreamElement"); } } - protected DataInputStatus processEvent(BufferOrEvent bufferOrEvent) { + protected DataInputStatus processEvent(BufferOrEvent bufferOrEvent) throws Exception { // Event received final AbstractEvent event = bufferOrEvent.getEvent(); if (event.getClass() == EndOfData.class) { diff --git a/flink-streaming-java/src/main/java/org/apache/flink/streaming/runtime/io/FinishedDataOutput.java b/flink-streaming-java/src/main/java/org/apache/flink/streaming/runtime/io/FinishedDataOutput.java index 54769fa2f8ada8..60dff2e04f57ec 100644 --- a/flink-streaming-java/src/main/java/org/apache/flink/streaming/runtime/io/FinishedDataOutput.java +++ b/flink-streaming-java/src/main/java/org/apache/flink/streaming/runtime/io/FinishedDataOutput.java @@ -20,6 +20,7 @@ import org.apache.flink.streaming.api.watermark.Watermark; import org.apache.flink.streaming.runtime.streamrecord.LatencyMarker; +import org.apache.flink.streaming.runtime.streamrecord.RecordAttributes; import org.apache.flink.streaming.runtime.streamrecord.StreamRecord; import org.apache.flink.streaming.runtime.watermarkstatus.WatermarkStatus; @@ -53,4 +54,9 @@ public void emitWatermarkStatus(WatermarkStatus watermarkStatus) throws Exceptio public void emitLatencyMarker(LatencyMarker latencyMarker) throws Exception { LOG.debug("Unexpected latency marker after finish() received."); } + + @Override + public void emitRecordAttributes(RecordAttributes recordAttributes) throws Exception { + LOG.debug("Unexpected recordAttributes after finish() received."); + } } diff --git a/flink-streaming-java/src/main/java/org/apache/flink/streaming/runtime/io/PushingAsyncDataInput.java b/flink-streaming-java/src/main/java/org/apache/flink/streaming/runtime/io/PushingAsyncDataInput.java index 619c2d00954a4f..63be0726dd57f8 100644 --- a/flink-streaming-java/src/main/java/org/apache/flink/streaming/runtime/io/PushingAsyncDataInput.java +++ b/flink-streaming-java/src/main/java/org/apache/flink/streaming/runtime/io/PushingAsyncDataInput.java @@ -22,6 +22,7 @@ import org.apache.flink.runtime.io.PullingAsyncDataInput; import org.apache.flink.streaming.api.watermark.Watermark; import org.apache.flink.streaming.runtime.streamrecord.LatencyMarker; +import org.apache.flink.streaming.runtime.streamrecord.RecordAttributes; import org.apache.flink.streaming.runtime.streamrecord.StreamRecord; import org.apache.flink.streaming.runtime.watermarkstatus.WatermarkStatus; @@ -55,5 +56,7 @@ interface DataOutput { void emitWatermarkStatus(WatermarkStatus watermarkStatus) throws Exception; void emitLatencyMarker(LatencyMarker latencyMarker) throws Exception; + + void emitRecordAttributes(RecordAttributes recordAttributes) throws Exception; } } diff --git a/flink-streaming-java/src/main/java/org/apache/flink/streaming/runtime/io/RecordAttributesValve.java b/flink-streaming-java/src/main/java/org/apache/flink/streaming/runtime/io/RecordAttributesValve.java new file mode 100644 index 00000000000000..1e341d2b6051f4 --- /dev/null +++ b/flink-streaming-java/src/main/java/org/apache/flink/streaming/runtime/io/RecordAttributesValve.java @@ -0,0 +1,82 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.flink.streaming.runtime.io; + +import org.apache.flink.streaming.runtime.io.PushingAsyncDataInput.DataOutput; +import org.apache.flink.streaming.runtime.streamrecord.RecordAttributes; +import org.apache.flink.streaming.runtime.streamrecord.RecordAttributesBuilder; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.Collections; + +/** + * RecordAttributesValve combine RecordAttributes from different input channels. If any of the input + * channels is non backlog, the combined RecordAttributes is non backlog. + */ +public class RecordAttributesValve { + + private static final Logger LOG = LoggerFactory.getLogger(RecordAttributesValve.class); + + private final int numInputChannels; + + private final RecordAttributes[] allChannelRecordAttributes; + + private int backlogChannelsCnt = 0; + + private RecordAttributes lastOutputAttributes = null; + + public RecordAttributesValve(int numInputChannels) { + this.numInputChannels = numInputChannels; + this.allChannelRecordAttributes = new RecordAttributes[numInputChannels]; + } + + public void inputRecordAttributes( + RecordAttributes recordAttributes, int channelIdx, DataOutput output) + throws Exception { + LOG.debug("RecordAttributes: {} from channel idx: {}", recordAttributes, channelIdx); + RecordAttributes lastChannelRecordAttributes = allChannelRecordAttributes[channelIdx]; + allChannelRecordAttributes[channelIdx] = recordAttributes; + + if (lastChannelRecordAttributes == null) { + lastChannelRecordAttributes = + new RecordAttributesBuilder(Collections.emptyList()).build(); + } + + if (lastChannelRecordAttributes.isBacklog() == recordAttributes.isBacklog()) { + return; + } + + if (recordAttributes.isBacklog()) { + backlogChannelsCnt += 1; + } else { + backlogChannelsCnt -= 1; + } + + final RecordAttributesBuilder builder = + new RecordAttributesBuilder(Collections.emptyList()); + builder.setBacklog(backlogChannelsCnt >= numInputChannels); + final RecordAttributes outputAttribute = builder.build(); + if (lastOutputAttributes == null + || lastOutputAttributes.isBacklog() != outputAttribute.isBacklog()) { + lastOutputAttributes = outputAttribute; + output.emitRecordAttributes(outputAttribute); + } + } +} diff --git a/flink-streaming-java/src/main/java/org/apache/flink/streaming/runtime/io/RecordWriterOutput.java b/flink-streaming-java/src/main/java/org/apache/flink/streaming/runtime/io/RecordWriterOutput.java index 44e41e921971d0..1aa19e19727dc3 100644 --- a/flink-streaming-java/src/main/java/org/apache/flink/streaming/runtime/io/RecordWriterOutput.java +++ b/flink-streaming-java/src/main/java/org/apache/flink/streaming/runtime/io/RecordWriterOutput.java @@ -31,6 +31,7 @@ import org.apache.flink.streaming.api.watermark.Watermark; import org.apache.flink.streaming.runtime.metrics.WatermarkGauge; import org.apache.flink.streaming.runtime.streamrecord.LatencyMarker; +import org.apache.flink.streaming.runtime.streamrecord.RecordAttributes; import org.apache.flink.streaming.runtime.streamrecord.StreamElement; import org.apache.flink.streaming.runtime.streamrecord.StreamElementSerializer; import org.apache.flink.streaming.runtime.streamrecord.StreamRecord; @@ -213,4 +214,15 @@ public void close() { public Gauge getWatermarkGauge() { return watermarkGauge; } + + @Override + public void emitRecordAttributes(RecordAttributes recordAttributes) { + try { + serializationDelegate.setInstance(recordAttributes); + recordWriter.broadcastEmit(serializationDelegate); + recordWriter.flushAll(); + } catch (IOException e) { + throw new UncheckedIOException(e); + } + } } diff --git a/flink-streaming-java/src/main/java/org/apache/flink/streaming/runtime/io/StreamMultipleInputProcessorFactory.java b/flink-streaming-java/src/main/java/org/apache/flink/streaming/runtime/io/StreamMultipleInputProcessorFactory.java index ee504eb76cf0ac..807f03060b3c13 100644 --- a/flink-streaming-java/src/main/java/org/apache/flink/streaming/runtime/io/StreamMultipleInputProcessorFactory.java +++ b/flink-streaming-java/src/main/java/org/apache/flink/streaming/runtime/io/StreamMultipleInputProcessorFactory.java @@ -44,6 +44,7 @@ import org.apache.flink.streaming.runtime.metrics.WatermarkGauge; import org.apache.flink.streaming.runtime.partitioner.StreamPartitioner; import org.apache.flink.streaming.runtime.streamrecord.LatencyMarker; +import org.apache.flink.streaming.runtime.streamrecord.RecordAttributes; import org.apache.flink.streaming.runtime.streamrecord.StreamRecord; import org.apache.flink.streaming.runtime.tasks.OperatorChain; import org.apache.flink.streaming.runtime.tasks.SourceOperatorStreamTask; @@ -289,6 +290,11 @@ public void emitWatermarkStatus(WatermarkStatus watermarkStatus) throws Exceptio public void emitLatencyMarker(LatencyMarker latencyMarker) throws Exception { input.processLatencyMarker(latencyMarker); } + + @Override + public void emitRecordAttributes(RecordAttributes recordAttributes) throws Exception { + input.processRecordAttributes(recordAttributes); + } } @SuppressWarnings({"unchecked", "rawtypes"}) diff --git a/flink-streaming-java/src/main/java/org/apache/flink/streaming/runtime/io/StreamTwoInputProcessorFactory.java b/flink-streaming-java/src/main/java/org/apache/flink/streaming/runtime/io/StreamTwoInputProcessorFactory.java index dbfc295ace0ede..c7463e4fded12d 100644 --- a/flink-streaming-java/src/main/java/org/apache/flink/streaming/runtime/io/StreamTwoInputProcessorFactory.java +++ b/flink-streaming-java/src/main/java/org/apache/flink/streaming/runtime/io/StreamTwoInputProcessorFactory.java @@ -40,6 +40,7 @@ import org.apache.flink.streaming.runtime.metrics.WatermarkGauge; import org.apache.flink.streaming.runtime.partitioner.StreamPartitioner; import org.apache.flink.streaming.runtime.streamrecord.LatencyMarker; +import org.apache.flink.streaming.runtime.streamrecord.RecordAttributes; import org.apache.flink.streaming.runtime.streamrecord.StreamRecord; import org.apache.flink.streaming.runtime.tasks.OperatorChain; import org.apache.flink.streaming.runtime.tasks.StreamTask.CanEmitBatchOfRecordsChecker; @@ -289,6 +290,15 @@ public void emitLatencyMarker(LatencyMarker latencyMarker) throws Exception { operator.processLatencyMarker2(latencyMarker); } } + + @Override + public void emitRecordAttributes(RecordAttributes recordAttributes) throws Exception { + if (inputIndex == 0) { + operator.processRecordAttributes1(recordAttributes); + } else { + operator.processRecordAttributes2(recordAttributes); + } + } } private static class FinishedOnRestoreWatermarkBypass { diff --git a/flink-streaming-java/src/main/java/org/apache/flink/streaming/runtime/io/recovery/RescalingStreamTaskNetworkInput.java b/flink-streaming-java/src/main/java/org/apache/flink/streaming/runtime/io/recovery/RescalingStreamTaskNetworkInput.java index 7b8002eee2a97c..4230e60f1d10c1 100644 --- a/flink-streaming-java/src/main/java/org/apache/flink/streaming/runtime/io/recovery/RescalingStreamTaskNetworkInput.java +++ b/flink-streaming-java/src/main/java/org/apache/flink/streaming/runtime/io/recovery/RescalingStreamTaskNetworkInput.java @@ -177,7 +177,7 @@ protected DemultiplexingRecordDeserializer getActiveSerializer( return deserialier; } - protected DataInputStatus processEvent(BufferOrEvent bufferOrEvent) { + protected DataInputStatus processEvent(BufferOrEvent bufferOrEvent) throws Exception { // Event received final AbstractEvent event = bufferOrEvent.getEvent(); if (event instanceof SubtaskConnectionDescriptor) { diff --git a/flink-streaming-java/src/main/java/org/apache/flink/streaming/runtime/streamrecord/InternalRecordAttributes.java b/flink-streaming-java/src/main/java/org/apache/flink/streaming/runtime/streamrecord/InternalRecordAttributes.java new file mode 100644 index 00000000000000..dd4b3fb9f822d2 --- /dev/null +++ b/flink-streaming-java/src/main/java/org/apache/flink/streaming/runtime/streamrecord/InternalRecordAttributes.java @@ -0,0 +1,65 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.flink.streaming.runtime.streamrecord; + +import org.apache.flink.annotation.Internal; + +import java.util.Objects; + +/** The RecordAttributes that contains extra information to be used internally. */ +@Internal +public class InternalRecordAttributes extends RecordAttributes { + private final long maxWatermarkDuringBacklog; + + public InternalRecordAttributes(boolean backlog, long backlogWatermark) { + super(backlog); + this.maxWatermarkDuringBacklog = backlogWatermark; + } + + public long getMaxWatermarkDuringBacklog() { + return maxWatermarkDuringBacklog; + } + + @Override + public boolean equals(Object o) { + if (this == o) { + return true; + } + if (o == null || getClass() != o.getClass()) { + return false; + } + if (!super.equals(o)) { + return false; + } + InternalRecordAttributes that = (InternalRecordAttributes) o; + return maxWatermarkDuringBacklog == that.maxWatermarkDuringBacklog; + } + + @Override + public int hashCode() { + return Objects.hash(super.hashCode(), maxWatermarkDuringBacklog); + } + + @Override + public String toString() { + return "InternalRecordAttributes{" + + "watermarkDuringBacklog=" + + maxWatermarkDuringBacklog + + '}'; + } +} diff --git a/flink-streaming-java/src/main/java/org/apache/flink/streaming/runtime/streamrecord/RecordAttributes.java b/flink-streaming-java/src/main/java/org/apache/flink/streaming/runtime/streamrecord/RecordAttributes.java new file mode 100644 index 00000000000000..72007c1f0ec50e --- /dev/null +++ b/flink-streaming-java/src/main/java/org/apache/flink/streaming/runtime/streamrecord/RecordAttributes.java @@ -0,0 +1,66 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.flink.streaming.runtime.streamrecord; + +import org.apache.flink.annotation.Experimental; + +import java.util.Objects; + +/** + * A RecordAttributes element provides stream task with information that can be used to optimize the + * stream task's performance. + */ +@Experimental +public class RecordAttributes extends StreamElement { + private final boolean backlog; + + public RecordAttributes(boolean backlog) { + this.backlog = backlog; + } + + /** + * If it returns true, then the records received after this element are stale and an operator + * can optionally buffer records until isBacklog=false. This allows an operator to optimize + * throughput at the cost of processing latency. + */ + public boolean isBacklog() { + return backlog; + } + + @Override + public boolean equals(Object o) { + if (this == o) { + return true; + } + if (o == null || getClass() != o.getClass()) { + return false; + } + RecordAttributes that = (RecordAttributes) o; + return backlog == that.backlog; + } + + @Override + public int hashCode() { + return Objects.hash(backlog); + } + + @Override + public String toString() { + return "RecordAttributes{" + "backlog=" + backlog + '}'; + } +} diff --git a/flink-streaming-java/src/main/java/org/apache/flink/streaming/runtime/streamrecord/RecordAttributesBuilder.java b/flink-streaming-java/src/main/java/org/apache/flink/streaming/runtime/streamrecord/RecordAttributesBuilder.java new file mode 100644 index 00000000000000..417a45327c4c1c --- /dev/null +++ b/flink-streaming-java/src/main/java/org/apache/flink/streaming/runtime/streamrecord/RecordAttributesBuilder.java @@ -0,0 +1,68 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.flink.streaming.runtime.streamrecord; + +import org.apache.flink.annotation.Experimental; + +import javax.annotation.Nullable; + +import java.util.List; + +/** The builder class for {@link RecordAttributes}. */ +@Experimental +public class RecordAttributesBuilder { + private final List lastRecordAttributesOfInputs; + @Nullable private Boolean backlog = null; + + /** + * This constructor takes a list of the last recordAttributes received from each of the + * operator's inputs. When this list is not empty, it will be used to determine the default + * values for those attributes that have not been explicitly set by caller. + */ + public RecordAttributesBuilder(List lastRecordAttributesOfInputs) { + this.lastRecordAttributesOfInputs = lastRecordAttributesOfInputs; + } + + public RecordAttributesBuilder setBacklog(boolean backlog) { + this.backlog = backlog; + return this; + } + + /** + * If any operator attribute is null, we will log it at DEBUG level and determine a non-null + * default value as described below. + * + *

Default value for backlog: if any element in lastRecordAttributesOfInputs has + * backlog=true, use true. Otherwise, use false. + */ + public RecordAttributes build() { + if (backlog == null) { + backlog = getDefaultBacklog(lastRecordAttributesOfInputs); + } + return new RecordAttributes(backlog); + } + + private boolean getDefaultBacklog(List lastRecordAttributesOfInputs) { + for (RecordAttributes lastAttributes : lastRecordAttributesOfInputs) { + if (lastAttributes.isBacklog()) { + return true; + } + } + return false; + } +} diff --git a/flink-streaming-java/src/main/java/org/apache/flink/streaming/runtime/streamrecord/StreamElement.java b/flink-streaming-java/src/main/java/org/apache/flink/streaming/runtime/streamrecord/StreamElement.java index 3fbcf2e42dc353..c65b0b5bf7b1d1 100644 --- a/flink-streaming-java/src/main/java/org/apache/flink/streaming/runtime/streamrecord/StreamElement.java +++ b/flink-streaming-java/src/main/java/org/apache/flink/streaming/runtime/streamrecord/StreamElement.java @@ -22,7 +22,7 @@ import org.apache.flink.streaming.api.watermark.Watermark; import org.apache.flink.streaming.runtime.watermarkstatus.WatermarkStatus; -/** An element in a data stream. Can be a record or a Watermark. */ +/** An element in a data stream. Can be a record, a Watermark, or a RecordAttributes. */ @Internal public abstract class StreamElement { @@ -62,6 +62,15 @@ public final boolean isLatencyMarker() { return getClass() == LatencyMarker.class; } + /** + * Check whether this element is record attributes. + * + * @return True, if this element is record attributes, false otherwise. + */ + public final boolean isRecordAttributes() { + return getClass() == RecordAttributes.class; + } + /** * Casts this element into a StreamRecord. * @@ -103,4 +112,15 @@ public final WatermarkStatus asWatermarkStatus() { public final LatencyMarker asLatencyMarker() { return (LatencyMarker) this; } + + /** + * Casts this element into a RecordAttributes. + * + * @return This element as a RecordAttributes. + * @throws java.lang.ClassCastException Thrown, if this element is actually not a + * RecordAttributes. + */ + public final RecordAttributes asRecordAttributes() { + return (RecordAttributes) this; + } } diff --git a/flink-streaming-java/src/main/java/org/apache/flink/streaming/runtime/streamrecord/StreamElementSerializer.java b/flink-streaming-java/src/main/java/org/apache/flink/streaming/runtime/streamrecord/StreamElementSerializer.java index b1052131769d17..bcb223d6aa9851 100644 --- a/flink-streaming-java/src/main/java/org/apache/flink/streaming/runtime/streamrecord/StreamElementSerializer.java +++ b/flink-streaming-java/src/main/java/org/apache/flink/streaming/runtime/streamrecord/StreamElementSerializer.java @@ -50,6 +50,7 @@ public final class StreamElementSerializer extends TypeSerializer typeSerializer; @@ -175,6 +176,9 @@ public void serialize(StreamElement value, DataOutputView target) throws IOExcep target.writeLong(value.asLatencyMarker().getOperatorId().getLowerPart()); target.writeLong(value.asLatencyMarker().getOperatorId().getUpperPart()); target.writeInt(value.asLatencyMarker().getSubtaskIndex()); + } else if (value.isRecordAttributes()) { + target.write(TAG_RECORD_ATTRIBUTES); + target.writeBoolean(value.asRecordAttributes().isBacklog()); } else { throw new RuntimeException(); } @@ -197,6 +201,8 @@ public StreamElement deserialize(DataInputView source) throws IOException { source.readLong(), new OperatorID(source.readLong(), source.readLong()), source.readInt()); + } else if (tag == TAG_RECORD_ATTRIBUTES) { + return new RecordAttributes(source.readBoolean()); } else { throw new IOException("Corrupt stream, found tag: " + tag); } diff --git a/flink-streaming-java/src/main/java/org/apache/flink/streaming/runtime/tasks/BroadcastingOutputCollector.java b/flink-streaming-java/src/main/java/org/apache/flink/streaming/runtime/tasks/BroadcastingOutputCollector.java index 07f9a4d7677cbd..0f4613046ef9ca 100644 --- a/flink-streaming-java/src/main/java/org/apache/flink/streaming/runtime/tasks/BroadcastingOutputCollector.java +++ b/flink-streaming-java/src/main/java/org/apache/flink/streaming/runtime/tasks/BroadcastingOutputCollector.java @@ -23,6 +23,7 @@ import org.apache.flink.streaming.api.watermark.Watermark; import org.apache.flink.streaming.runtime.metrics.WatermarkGauge; import org.apache.flink.streaming.runtime.streamrecord.LatencyMarker; +import org.apache.flink.streaming.runtime.streamrecord.RecordAttributes; import org.apache.flink.streaming.runtime.streamrecord.StreamRecord; import org.apache.flink.streaming.runtime.watermarkstatus.WatermarkStatus; import org.apache.flink.util.OutputTag; @@ -103,4 +104,11 @@ public void close() { output.close(); } } + + @Override + public void emitRecordAttributes(RecordAttributes recordAttributes) { + for (OutputWithChainingCheck> output : outputs) { + output.emitRecordAttributes(recordAttributes); + } + } } diff --git a/flink-streaming-java/src/main/java/org/apache/flink/streaming/runtime/tasks/ChainingOutput.java b/flink-streaming-java/src/main/java/org/apache/flink/streaming/runtime/tasks/ChainingOutput.java index 13c3bc6a9b64d6..1ffe6cfb14bc0b 100644 --- a/flink-streaming-java/src/main/java/org/apache/flink/streaming/runtime/tasks/ChainingOutput.java +++ b/flink-streaming-java/src/main/java/org/apache/flink/streaming/runtime/tasks/ChainingOutput.java @@ -26,6 +26,7 @@ import org.apache.flink.streaming.runtime.io.RecordProcessorUtils; import org.apache.flink.streaming.runtime.metrics.WatermarkGauge; import org.apache.flink.streaming.runtime.streamrecord.LatencyMarker; +import org.apache.flink.streaming.runtime.streamrecord.RecordAttributes; import org.apache.flink.streaming.runtime.streamrecord.StreamRecord; import org.apache.flink.streaming.runtime.watermarkstatus.WatermarkStatus; import org.apache.flink.util.OutputTag; @@ -154,4 +155,13 @@ public void emitWatermarkStatus(WatermarkStatus watermarkStatus) { } } } + + @Override + public void emitRecordAttributes(RecordAttributes recordAttributes) { + try { + input.processRecordAttributes(recordAttributes); + } catch (Exception e) { + throw new ExceptionInChainedOperatorException(e); + } + } } diff --git a/flink-streaming-java/src/main/java/org/apache/flink/streaming/runtime/tasks/OneInputStreamTask.java b/flink-streaming-java/src/main/java/org/apache/flink/streaming/runtime/tasks/OneInputStreamTask.java index bc3abb489ec984..ea1533b3bb0d28 100644 --- a/flink-streaming-java/src/main/java/org/apache/flink/streaming/runtime/tasks/OneInputStreamTask.java +++ b/flink-streaming-java/src/main/java/org/apache/flink/streaming/runtime/tasks/OneInputStreamTask.java @@ -29,6 +29,7 @@ import org.apache.flink.streaming.api.graph.StreamConfig; import org.apache.flink.streaming.api.operators.Input; import org.apache.flink.streaming.api.operators.OneInputStreamOperator; +import org.apache.flink.streaming.api.operators.sort.SortingBacklogDataInput; import org.apache.flink.streaming.api.operators.sort.SortingDataInput; import org.apache.flink.streaming.api.watermark.Watermark; import org.apache.flink.streaming.runtime.io.PushingAsyncDataInput.DataOutput; @@ -42,6 +43,7 @@ import org.apache.flink.streaming.runtime.io.checkpointing.InputProcessorUtil; import org.apache.flink.streaming.runtime.metrics.WatermarkGauge; import org.apache.flink.streaming.runtime.streamrecord.LatencyMarker; +import org.apache.flink.streaming.runtime.streamrecord.RecordAttributes; import org.apache.flink.streaming.runtime.streamrecord.StreamRecord; import org.apache.flink.streaming.runtime.watermarkstatus.StatusWatermarkValve; import org.apache.flink.streaming.runtime.watermarkstatus.WatermarkStatus; @@ -57,6 +59,7 @@ import java.util.Optional; import static org.apache.flink.streaming.api.graph.StreamConfig.requiresSorting; +import static org.apache.flink.streaming.api.graph.StreamConfig.requiresSortingDuringBacklog; import static org.apache.flink.util.Preconditions.checkNotNull; import static org.apache.flink.util.Preconditions.checkState; @@ -112,6 +115,8 @@ public void init() throws Exception { !configuration.isCheckpointingEnabled(), "Checkpointing is not allowed with sorted inputs."); input = wrapWithSorted(input); + } else if (requiresSortingDuringBacklog(inputConfig)) { + input = wrapWithBacklogSorted(input); } getEnvironment() @@ -154,6 +159,26 @@ private StreamTaskInput wrapWithSorted(StreamTaskInput input) { getExecutionConfig()); } + private StreamTaskInput wrapWithBacklogSorted(StreamTaskInput input) { + ClassLoader userCodeClassLoader = getUserCodeClassLoader(); + return new SortingBacklogDataInput<>( + input, + configuration.getTypeSerializerIn(input.getInputIndex(), userCodeClassLoader), + configuration.getStateKeySerializer(userCodeClassLoader), + configuration.getStatePartitioner(input.getInputIndex(), userCodeClassLoader), + getEnvironment().getMemoryManager(), + getEnvironment().getIOManager(), + getExecutionConfig().isObjectReuseEnabled(), + configuration.getManagedMemoryFractionOperatorUseCaseOfSlot( + ManagedMemoryUseCase.OPERATOR, + getEnvironment().getTaskConfiguration(), + userCodeClassLoader), + getEnvironment().getTaskManagerInfo().getConfiguration(), + this, + getExecutionConfig(), + getCanEmitBatchOfRecords()); + } + @SuppressWarnings("unchecked") private CheckpointedInputGate createCheckpointedInputGate() { IndexedInputGate[] inputGates = getEnvironment().getAllInputGates(); @@ -252,5 +277,10 @@ public void emitWatermarkStatus(WatermarkStatus watermarkStatus) throws Exceptio public void emitLatencyMarker(LatencyMarker latencyMarker) throws Exception { operator.processLatencyMarker(latencyMarker); } + + @Override + public void emitRecordAttributes(RecordAttributes recordAttributes) throws Exception { + operator.processRecordAttributes(recordAttributes); + } } } diff --git a/flink-streaming-java/src/main/java/org/apache/flink/streaming/runtime/tasks/SourceOperatorStreamTask.java b/flink-streaming-java/src/main/java/org/apache/flink/streaming/runtime/tasks/SourceOperatorStreamTask.java index 70fa37fc1d0cad..c87ee6eead7c72 100644 --- a/flink-streaming-java/src/main/java/org/apache/flink/streaming/runtime/tasks/SourceOperatorStreamTask.java +++ b/flink-streaming-java/src/main/java/org/apache/flink/streaming/runtime/tasks/SourceOperatorStreamTask.java @@ -39,6 +39,7 @@ import org.apache.flink.streaming.runtime.io.StreamTaskSourceInput; import org.apache.flink.streaming.runtime.metrics.WatermarkGauge; import org.apache.flink.streaming.runtime.streamrecord.LatencyMarker; +import org.apache.flink.streaming.runtime.streamrecord.RecordAttributes; import org.apache.flink.streaming.runtime.streamrecord.StreamRecord; import org.apache.flink.streaming.runtime.watermarkstatus.WatermarkStatus; import org.apache.flink.util.concurrent.FutureUtils; @@ -314,6 +315,11 @@ public void emitLatencyMarker(LatencyMarker latencyMarker) { output.emitLatencyMarker(latencyMarker); } + @Override + public void emitRecordAttributes(RecordAttributes recordAttributes) { + output.emitRecordAttributes(recordAttributes); + } + @Override public void emitWatermark(Watermark watermark) { long watermarkTimestamp = watermark.getTimestamp(); diff --git a/flink-streaming-java/src/main/java/org/apache/flink/streaming/runtime/translators/BatchExecutionUtils.java b/flink-streaming-java/src/main/java/org/apache/flink/streaming/runtime/translators/BatchExecutionUtils.java index 0eaa0b65310d1e..525fd9cd755c49 100644 --- a/flink-streaming-java/src/main/java/org/apache/flink/streaming/runtime/translators/BatchExecutionUtils.java +++ b/flink-streaming-java/src/main/java/org/apache/flink/streaming/runtime/translators/BatchExecutionUtils.java @@ -70,7 +70,7 @@ public static void applyBatchExecutionSettings( } } - private static int deriveMemoryWeight(ReadableConfig configuration) { + public static int deriveMemoryWeight(ReadableConfig configuration) { return Math.max(1, configuration.get(ExecutionOptions.SORTED_INPUTS_MEMORY).getMebiBytes()); } diff --git a/flink-streaming-java/src/main/java/org/apache/flink/streaming/runtime/translators/MultiInputTransformationTranslator.java b/flink-streaming-java/src/main/java/org/apache/flink/streaming/runtime/translators/MultiInputTransformationTranslator.java index c61146995c4458..b6dbcb4b4d7a5b 100644 --- a/flink-streaming-java/src/main/java/org/apache/flink/streaming/runtime/translators/MultiInputTransformationTranslator.java +++ b/flink-streaming-java/src/main/java/org/apache/flink/streaming/runtime/translators/MultiInputTransformationTranslator.java @@ -64,7 +64,8 @@ protected Collection translateForBatchInternal( IntStream.range(0, inputs.size()) .mapToObj( idx -> { - if (keySelectors.get(idx) != null) { + if (keySelectors.get(idx) != null + && !transformation.isInternalSorterSupported()) { return StreamConfig.InputRequirement.SORTED; } else { return StreamConfig.InputRequirement.PASS_THROUGH; diff --git a/flink-streaming-java/src/main/java/org/apache/flink/streaming/runtime/translators/OneInputTransformationTranslator.java b/flink-streaming-java/src/main/java/org/apache/flink/streaming/runtime/translators/OneInputTransformationTranslator.java index a80b7c0692296d..225015e7d5ac80 100644 --- a/flink-streaming-java/src/main/java/org/apache/flink/streaming/runtime/translators/OneInputTransformationTranslator.java +++ b/flink-streaming-java/src/main/java/org/apache/flink/streaming/runtime/translators/OneInputTransformationTranslator.java @@ -21,6 +21,7 @@ import org.apache.flink.annotation.Internal; import org.apache.flink.api.java.functions.KeySelector; import org.apache.flink.streaming.api.graph.StreamConfig; +import org.apache.flink.streaming.api.graph.StreamNode; import org.apache.flink.streaming.api.graph.TransformationTranslator; import org.apache.flink.streaming.api.transformations.OneInputTransformation; @@ -52,8 +53,12 @@ public Collection translateForBatchInternal( context); boolean isKeyed = keySelector != null; if (isKeyed) { + final StreamConfig.InputRequirement inputRequirement = + transformation.isInternalSorterSupported() + ? StreamConfig.InputRequirement.PASS_THROUGH + : StreamConfig.InputRequirement.SORTED; BatchExecutionUtils.applyBatchExecutionSettings( - transformation.getId(), context, StreamConfig.InputRequirement.SORTED); + transformation.getId(), context, inputRequirement); } return ids; @@ -62,12 +67,20 @@ public Collection translateForBatchInternal( @Override public Collection translateForStreamingInternal( final OneInputTransformation transformation, final Context context) { - return translateInternal( - transformation, - transformation.getOperatorFactory(), - transformation.getInputType(), - transformation.getStateKeySelector(), - transformation.getStateKeyType(), - context); + final KeySelector keySelector = transformation.getStateKeySelector(); + final Collection ids = + translateInternal( + transformation, + transformation.getOperatorFactory(), + transformation.getInputType(), + keySelector, + transformation.getStateKeyType(), + context); + final StreamNode node = context.getStreamGraph().getStreamNode(transformation.getId()); + if (keySelector != null && StreamExecutionUtils.isCheckpointDisableDuringBacklog(context)) { + StreamExecutionUtils.applyBacklogProcessingSettings( + transformation, context, node, transformation.isInternalSorterSupported()); + } + return ids; } } diff --git a/flink-streaming-java/src/main/java/org/apache/flink/streaming/runtime/translators/ReduceTransformationTranslator.java b/flink-streaming-java/src/main/java/org/apache/flink/streaming/runtime/translators/ReduceTransformationTranslator.java index 63bf563e5500a5..0eb2c17d871b21 100644 --- a/flink-streaming-java/src/main/java/org/apache/flink/streaming/runtime/translators/ReduceTransformationTranslator.java +++ b/flink-streaming-java/src/main/java/org/apache/flink/streaming/runtime/translators/ReduceTransformationTranslator.java @@ -18,7 +18,9 @@ package org.apache.flink.streaming.runtime.translators; +import org.apache.flink.api.java.functions.KeySelector; import org.apache.flink.streaming.api.graph.StreamConfig; +import org.apache.flink.streaming.api.graph.StreamNode; import org.apache.flink.streaming.api.graph.TransformationTranslator; import org.apache.flink.streaming.api.operators.BatchGroupedReduceOperator; import org.apache.flink.streaming.api.operators.SimpleOperatorFactory; @@ -54,8 +56,12 @@ public Collection translateForBatchInternal( transformation.getKeySelector(), transformation.getKeyTypeInfo(), context); + final StreamConfig.InputRequirement inputRequirement = + transformation.isInternalSorterSupported() + ? StreamConfig.InputRequirement.PASS_THROUGH + : StreamConfig.InputRequirement.SORTED; BatchExecutionUtils.applyBatchExecutionSettings( - transformation.getId(), context, StreamConfig.InputRequirement.SORTED); + transformation.getId(), context, inputRequirement); return ids; } @@ -72,12 +78,21 @@ public Collection translateForStreamingInternal( SimpleOperatorFactory operatorFactory = SimpleOperatorFactory.of(groupedReduce); operatorFactory.setChainingStrategy(transformation.getChainingStrategy()); - return translateInternal( - transformation, - operatorFactory, - transformation.getInputType(), - transformation.getKeySelector(), - transformation.getKeyTypeInfo(), - context); + + final KeySelector keySelector = transformation.getKeySelector(); + final Collection ids = + translateInternal( + transformation, + operatorFactory, + transformation.getInputType(), + keySelector, + transformation.getKeyTypeInfo(), + context); + final StreamNode node = context.getStreamGraph().getStreamNode(transformation.getId()); + if (keySelector != null && StreamExecutionUtils.isCheckpointDisableDuringBacklog(context)) { + StreamExecutionUtils.applyBacklogProcessingSettings( + transformation, context, node, transformation.isInternalSorterSupported()); + } + return ids; } } diff --git a/flink-streaming-java/src/main/java/org/apache/flink/streaming/runtime/translators/StreamExecutionUtils.java b/flink-streaming-java/src/main/java/org/apache/flink/streaming/runtime/translators/StreamExecutionUtils.java new file mode 100644 index 00000000000000..e3da36778aff52 --- /dev/null +++ b/flink-streaming-java/src/main/java/org/apache/flink/streaming/runtime/translators/StreamExecutionUtils.java @@ -0,0 +1,62 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.flink.streaming.runtime.translators; + +import org.apache.flink.api.dag.Transformation; +import org.apache.flink.core.memory.ManagedMemoryUseCase; +import org.apache.flink.streaming.api.graph.StreamConfig; +import org.apache.flink.streaming.api.graph.StreamNode; +import org.apache.flink.streaming.api.graph.TransformationTranslator; + +import java.util.Collections; +import java.util.HashMap; +import java.util.Map; + +import static org.apache.flink.streaming.api.environment.ExecutionCheckpointingOptions.CHECKPOINTING_INTERVAL_DURING_BACKLOG; +import static org.apache.flink.streaming.runtime.translators.BatchExecutionUtils.deriveMemoryWeight; + +/** A utility class for applying sorting inputs during backlog processing. */ +public class StreamExecutionUtils { + static boolean isCheckpointDisableDuringBacklog(TransformationTranslator.Context context) { + return context.getGraphGeneratorConfig().get(CHECKPOINTING_INTERVAL_DURING_BACKLOG) != null + && context.getGraphGeneratorConfig() + .get(CHECKPOINTING_INTERVAL_DURING_BACKLOG) + .isZero(); + } + + static void applyBacklogProcessingSettings( + Transformation transformation, + TransformationTranslator.Context context, + StreamNode node, + boolean isInternalSorted) { + if (!isInternalSorted) { + node.addInputRequirement(0, StreamConfig.InputRequirement.SORTED_DURING_BACKLOG); + } + Map operatorScopeUseCaseWeights = new HashMap<>(); + Integer operatorMemoryWeights = + transformation + .getManagedMemoryOperatorScopeUseCaseWeights() + .get(ManagedMemoryUseCase.OPERATOR); + operatorScopeUseCaseWeights.put( + ManagedMemoryUseCase.OPERATOR, + operatorMemoryWeights == null + ? deriveMemoryWeight(context.getGraphGeneratorConfig()) + : operatorMemoryWeights); + node.setManagedMemoryUseCaseWeights(operatorScopeUseCaseWeights, Collections.emptySet()); + } +} diff --git a/flink-streaming-java/src/main/java/org/apache/flink/streaming/runtime/translators/TwoInputTransformationTranslator.java b/flink-streaming-java/src/main/java/org/apache/flink/streaming/runtime/translators/TwoInputTransformationTranslator.java index fde3a1aae760ad..ceb9f393648e97 100644 --- a/flink-streaming-java/src/main/java/org/apache/flink/streaming/runtime/translators/TwoInputTransformationTranslator.java +++ b/flink-streaming-java/src/main/java/org/apache/flink/streaming/runtime/translators/TwoInputTransformationTranslator.java @@ -46,11 +46,13 @@ protected Collection translateForBatchInternal( StreamConfig.InputRequirement input1Requirement = transformation.getStateKeySelector1() != null + && !transformation.isInternalSorterSupported() ? StreamConfig.InputRequirement.SORTED : StreamConfig.InputRequirement.PASS_THROUGH; StreamConfig.InputRequirement input2Requirement = transformation.getStateKeySelector2() != null + && !transformation.isInternalSorterSupported() ? StreamConfig.InputRequirement.SORTED : StreamConfig.InputRequirement.PASS_THROUGH; diff --git a/flink-streaming-java/src/test/java/org/apache/flink/streaming/api/operators/AbstractUdfStreamOperatorLifecycleTest.java b/flink-streaming-java/src/test/java/org/apache/flink/streaming/api/operators/AbstractUdfStreamOperatorLifecycleTest.java index 3d4125bac394a8..6833271f728bde 100644 --- a/flink-streaming-java/src/test/java/org/apache/flink/streaming/api/operators/AbstractUdfStreamOperatorLifecycleTest.java +++ b/flink-streaming-java/src/test/java/org/apache/flink/streaming/api/operators/AbstractUdfStreamOperatorLifecycleTest.java @@ -99,6 +99,7 @@ public class AbstractUdfStreamOperatorLifecycleTest { + "finish[], " + "getCurrentKey[], " + "getMetricGroup[], " + + "getOperatorAttributes[], " + "getOperatorID[], " + "initializeState[interface org.apache.flink.streaming.api.operators.StreamTaskStateInitializer], " + "notifyCheckpointAborted[long], " diff --git a/flink-streaming-java/src/test/java/org/apache/flink/streaming/api/operators/BacklogTimeServiceTest.java b/flink-streaming-java/src/test/java/org/apache/flink/streaming/api/operators/BacklogTimeServiceTest.java new file mode 100644 index 00000000000000..39167b5a4bfd3f --- /dev/null +++ b/flink-streaming-java/src/test/java/org/apache/flink/streaming/api/operators/BacklogTimeServiceTest.java @@ -0,0 +1,76 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.flink.streaming.api.operators; + +import org.apache.flink.api.common.typeutils.base.IntSerializer; +import org.apache.flink.api.common.typeutils.base.StringSerializer; +import org.apache.flink.runtime.state.KeyGroupRange; +import org.apache.flink.runtime.state.heap.HeapPriorityQueueSetFactory; +import org.apache.flink.streaming.runtime.tasks.TestProcessingTimeService; + +import org.junit.jupiter.api.Test; + +import java.util.ArrayList; +import java.util.List; + +import static org.apache.flink.streaming.api.operators.TimeServiceTestUtils.createBacklogTimerService; +import static org.apache.flink.streaming.api.operators.TimeServiceTestUtils.createTimerQueue; +import static org.assertj.core.api.Assertions.assertThat; + +/** Tests for {@link BacklogTimeService}. */ +public class BacklogTimeServiceTest { + + @Test + public void testTriggerEventTimeTimer() throws Exception { + List timers = new ArrayList<>(); + TestProcessingTimeService processingTimeService = new TestProcessingTimeService(); + KeyGroupRange testKeyGroupRange = new KeyGroupRange(0, 1); + final HeapPriorityQueueSetFactory priorityQueueSetFactory = + new HeapPriorityQueueSetFactory(testKeyGroupRange, 1, 128); + + final TimerSerializer timerSerializer = + new TimerSerializer<>(IntSerializer.INSTANCE, StringSerializer.INSTANCE); + final BacklogTimeService timeService = + createBacklogTimerService( + processingTimeService, + TestTriggerable.eventTimeTrigger( + (timer) -> timers.add(timer.getTimestamp())), + createTimerQueue( + "processingTimerQueue", timerSerializer, priorityQueueSetFactory), + createTimerQueue( + "eventTimerQueue", timerSerializer, priorityQueueSetFactory)); + + timeService.setCurrentKey(1); + timeService.registerEventTimeTimer("a", 0); + timeService.registerEventTimeTimer("a", 2); + timeService.registerEventTimeTimer("a", 1); + timeService.setMaxWatermarkDuringBacklog(2); + assertThat(timers).isEmpty(); + timeService.setCurrentKey(2); + assertThat(timers).containsExactly(0L, 1L, 2L); + timers.clear(); + + timeService.registerEventTimeTimer("a", 2); + timeService.registerEventTimeTimer("a", 1); + assertThat(timers).isEmpty(); + timeService.setCurrentKey(null); + assertThat(timers).containsExactly(1L, 2L); + + assertThat(timeService.currentWatermark()).isEqualTo(2); + } +} diff --git a/flink-streaming-java/src/test/java/org/apache/flink/streaming/api/operators/InternalBacklogAwareTimerServiceImplTest.java b/flink-streaming-java/src/test/java/org/apache/flink/streaming/api/operators/InternalBacklogAwareTimerServiceImplTest.java new file mode 100644 index 00000000000000..87810de3f33b3f --- /dev/null +++ b/flink-streaming-java/src/test/java/org/apache/flink/streaming/api/operators/InternalBacklogAwareTimerServiceImplTest.java @@ -0,0 +1,111 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.flink.streaming.api.operators; + +import org.apache.flink.api.common.typeutils.base.IntSerializer; +import org.apache.flink.api.common.typeutils.base.StringSerializer; +import org.apache.flink.api.java.tuple.Tuple2; +import org.apache.flink.runtime.state.KeyGroupRange; +import org.apache.flink.runtime.state.KeyGroupedInternalPriorityQueue; +import org.apache.flink.runtime.state.heap.HeapPriorityQueueSetFactory; +import org.apache.flink.streaming.runtime.tasks.TestProcessingTimeService; + +import org.junit.jupiter.api.Test; + +import java.util.ArrayList; +import java.util.List; + +import static org.apache.flink.streaming.api.operators.TimeServiceTestUtils.createBacklogTimerService; +import static org.apache.flink.streaming.api.operators.TimeServiceTestUtils.createInternalTimerService; +import static org.apache.flink.streaming.api.operators.TimeServiceTestUtils.createTimerQueue; +import static org.assertj.core.api.Assertions.assertThat; + +/** Tests for {@link InternalBacklogAwareTimerServiceImpl}. */ +class InternalBacklogAwareTimerServiceImplTest { + + @Test + void testTriggerEventTimeTimer() throws Exception { + KeyGroupRange testKeyGroupRange = new KeyGroupRange(0, 1); + List> timers = new ArrayList<>(); + TestProcessingTimeService processingTimeService = new TestProcessingTimeService(); + final HeapPriorityQueueSetFactory priorityQueueSetFactory = + new HeapPriorityQueueSetFactory(testKeyGroupRange, 1, 128); + TimerSerializer timerSerializer = + new TimerSerializer<>(IntSerializer.INSTANCE, StringSerializer.INSTANCE); + final TestTriggerable triggerable = + TestTriggerable.eventTimeTrigger( + (timer) -> timers.add(Tuple2.of(timer.getKey(), timer.getTimestamp()))); + final KeyGroupedInternalPriorityQueue> + eventTimersQueue = + createTimerQueue( + "eventTimersQueue", timerSerializer, priorityQueueSetFactory); + final KeyGroupedInternalPriorityQueue> + processingTimerQueue = + createTimerQueue( + "processingTimerQueue", timerSerializer, priorityQueueSetFactory); + final BacklogTimeService backlogTimeService = + createBacklogTimerService( + processingTimeService, triggerable, processingTimerQueue, eventTimersQueue); + final TestKeyContext keyContext = new TestKeyContext(); + final InternalTimerServiceImpl internalTimerService = + createInternalTimerService( + testKeyGroupRange, + keyContext, + processingTimeService, + processingTimerQueue, + eventTimersQueue); + internalTimerService.startTimerService( + IntSerializer.INSTANCE, StringSerializer.INSTANCE, triggerable); + + final InternalBacklogAwareTimerServiceImpl timerService = + new InternalBacklogAwareTimerServiceImpl<>( + internalTimerService, backlogTimeService); + + keyContext.setCurrentKey(1); + timerService.registerEventTimeTimer("a", 2); + timerService.registerEventTimeTimer("a", 1); + timerService.registerEventTimeTimer("a", 3); + keyContext.setCurrentKey(2); + timerService.registerEventTimeTimer("a", 3); + timerService.registerEventTimeTimer("a", 1); + timerService.advanceWatermark(2); + assertThat(timers).containsExactly(Tuple2.of(1, 1L), Tuple2.of(2, 1L), Tuple2.of(1, 2L)); + timers.clear(); + + // switch to backlog processing + timerService.setBacklog(true); + timerService.setMaxWatermarkDuringBacklog(5); + timerService.setCurrentKey(1); + timerService.registerEventTimeTimer("a", 5); + timerService.registerEventTimeTimer("a", 4); + timerService.setCurrentKey(2); + timerService.registerEventTimeTimer("a", 6); + timerService.setCurrentKey(null); + assertThat(timers) + .containsExactly( + Tuple2.of(1, 3L), Tuple2.of(1, 4L), Tuple2.of(1, 5L), Tuple2.of(2, 3L)); + timers.clear(); + + // switch to non backlog processing + timerService.setBacklog(false); + keyContext.setCurrentKey(1); + timerService.registerEventTimeTimer("a", 6); + timerService.advanceWatermark(6); + assertThat(timers).containsExactly(Tuple2.of(2, 6L), Tuple2.of(1, 6L)); + } +} diff --git a/flink-streaming-java/src/test/java/org/apache/flink/streaming/api/operators/InternalTimerServiceImplTest.java b/flink-streaming-java/src/test/java/org/apache/flink/streaming/api/operators/InternalTimerServiceImplTest.java index 915b2732982653..889eae53654bbd 100644 --- a/flink-streaming-java/src/test/java/org/apache/flink/streaming/api/operators/InternalTimerServiceImplTest.java +++ b/flink-streaming-java/src/test/java/org/apache/flink/streaming/api/operators/InternalTimerServiceImplTest.java @@ -18,7 +18,6 @@ package org.apache.flink.streaming.api.operators; -import org.apache.flink.api.common.typeutils.TypeSerializer; import org.apache.flink.api.common.typeutils.base.IntSerializer; import org.apache.flink.api.common.typeutils.base.StringSerializer; import org.apache.flink.api.java.tuple.Tuple3; @@ -30,7 +29,6 @@ import org.apache.flink.runtime.state.PriorityQueueSetFactory; import org.apache.flink.runtime.state.heap.HeapPriorityQueueSetFactory; import org.apache.flink.streaming.runtime.tasks.ProcessingTimeService; -import org.apache.flink.streaming.runtime.tasks.StreamTaskCancellationContext; import org.apache.flink.streaming.runtime.tasks.TestProcessingTimeService; import org.junit.Assert; @@ -51,6 +49,7 @@ import java.util.Random; import java.util.Set; +import static org.apache.flink.streaming.api.operators.TimeServiceTestUtils.createInternalTimerService; import static org.hamcrest.Matchers.containsInAnyOrder; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertThat; @@ -963,21 +962,6 @@ private void testSnapshotAndRebalancingRestore(int snapshotVersion) throws Excep assertEquals(0, timerService2.numEventTimeTimers()); } - private static class TestKeyContext implements KeyContext { - - private Object key; - - @Override - public void setCurrentKey(Object key) { - this.key = key; - } - - @Override - public Object getCurrentKey() { - return key; - } - } - private static int getKeyInKeyGroup(int keyGroup, int maxParallelism) { Random rand = new Random(System.currentTimeMillis()); int result = rand.nextInt(); @@ -1084,27 +1068,6 @@ public static Collection keyRanges() { }); } - private static InternalTimerServiceImpl createInternalTimerService( - KeyGroupRange keyGroupsList, - KeyContext keyContext, - ProcessingTimeService processingTimeService, - TypeSerializer keySerializer, - TypeSerializer namespaceSerializer, - PriorityQueueSetFactory priorityQueueSetFactory) { - - TimerSerializer timerSerializer = - new TimerSerializer<>(keySerializer, namespaceSerializer); - - return new InternalTimerServiceImpl<>( - keyGroupsList, - keyContext, - processingTimeService, - createTimerQueue( - "__test_processing_timers", timerSerializer, priorityQueueSetFactory), - createTimerQueue("__test_event_timers", timerSerializer, priorityQueueSetFactory), - StreamTaskCancellationContext.alwaysRunning()); - } - private static KeyGroupedInternalPriorityQueue> createTimerQueue( String name, diff --git a/flink-streaming-java/src/test/java/org/apache/flink/streaming/api/operators/TestKeyContext.java b/flink-streaming-java/src/test/java/org/apache/flink/streaming/api/operators/TestKeyContext.java new file mode 100644 index 00000000000000..2dfb0af4779101 --- /dev/null +++ b/flink-streaming-java/src/test/java/org/apache/flink/streaming/api/operators/TestKeyContext.java @@ -0,0 +1,34 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.flink.streaming.api.operators; + +/** KeyContext for test. */ +class TestKeyContext implements KeyContext { + + private Object key; + + @Override + public void setCurrentKey(Object key) { + this.key = key; + } + + @Override + public Object getCurrentKey() { + return key; + } +} diff --git a/flink-streaming-java/src/test/java/org/apache/flink/streaming/api/operators/TestTriggerable.java b/flink-streaming-java/src/test/java/org/apache/flink/streaming/api/operators/TestTriggerable.java new file mode 100644 index 00000000000000..8a932c3f449224 --- /dev/null +++ b/flink-streaming-java/src/test/java/org/apache/flink/streaming/api/operators/TestTriggerable.java @@ -0,0 +1,60 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.flink.streaming.api.operators; + +import org.assertj.core.api.Assertions; + +import java.util.function.Consumer; + +/** Triggerable for test. */ +class TestTriggerable implements Triggerable { + + private final Consumer> eventTimeHandler; + private final Consumer> processingTimeHandler; + + public static TestTriggerable eventTimeTrigger( + Consumer> eventTimeHandler) { + return new TestTriggerable<>( + eventTimeHandler, + timer -> Assertions.fail("We did not expect processing timer to be triggered.")); + } + + public static TestTriggerable processingTimeTrigger( + Consumer> processingTimeHandler) { + return new TestTriggerable<>( + timer -> Assertions.fail("We did not expect event timer to be triggered."), + processingTimeHandler); + } + + private TestTriggerable( + Consumer> eventTimeHandler, + Consumer> processingTimeHandler) { + this.eventTimeHandler = eventTimeHandler; + this.processingTimeHandler = processingTimeHandler; + } + + @Override + public void onEventTime(InternalTimer timer) throws Exception { + this.eventTimeHandler.accept(timer); + } + + @Override + public void onProcessingTime(InternalTimer timer) throws Exception { + this.processingTimeHandler.accept(timer); + } +} diff --git a/flink-streaming-java/src/test/java/org/apache/flink/streaming/api/operators/TimeServiceTestUtils.java b/flink-streaming-java/src/test/java/org/apache/flink/streaming/api/operators/TimeServiceTestUtils.java new file mode 100644 index 00000000000000..457ce344b29b69 --- /dev/null +++ b/flink-streaming-java/src/test/java/org/apache/flink/streaming/api/operators/TimeServiceTestUtils.java @@ -0,0 +1,86 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.flink.streaming.api.operators; + +import org.apache.flink.api.common.typeutils.TypeSerializer; +import org.apache.flink.runtime.state.KeyGroupRange; +import org.apache.flink.runtime.state.KeyGroupedInternalPriorityQueue; +import org.apache.flink.runtime.state.PriorityQueueSetFactory; +import org.apache.flink.streaming.runtime.tasks.ProcessingTimeService; +import org.apache.flink.streaming.runtime.tasks.StreamTaskCancellationContext; + +/** Util methods for TimeService tests. */ +public class TimeServiceTestUtils { + + public static + KeyGroupedInternalPriorityQueue> createTimerQueue( + String name, + TimerSerializer timerSerializer, + PriorityQueueSetFactory priorityQueueSetFactory) { + return priorityQueueSetFactory.create(name, timerSerializer); + } + + public static InternalTimerServiceImpl createInternalTimerService( + KeyGroupRange keyGroupsList, + KeyContext keyContext, + ProcessingTimeService processingTimeService, + TypeSerializer keySerializer, + TypeSerializer namespaceSerializer, + PriorityQueueSetFactory priorityQueueSetFactory) { + + TimerSerializer timerSerializer = + new TimerSerializer<>(keySerializer, namespaceSerializer); + + return createInternalTimerService( + keyGroupsList, + keyContext, + processingTimeService, + createTimerQueue( + "__test_processing_timers", timerSerializer, priorityQueueSetFactory), + createTimerQueue("__test_event_timers", timerSerializer, priorityQueueSetFactory)); + } + + public static InternalTimerServiceImpl createInternalTimerService( + KeyGroupRange keyGroupsList, + KeyContext keyContext, + ProcessingTimeService processingTimeService, + KeyGroupedInternalPriorityQueue> processingTimeTimersQueue, + KeyGroupedInternalPriorityQueue> eventTimeTimersQueue) { + + return new InternalTimerServiceImpl<>( + keyGroupsList, + keyContext, + processingTimeService, + processingTimeTimersQueue, + eventTimeTimersQueue, + StreamTaskCancellationContext.alwaysRunning()); + } + + public static BacklogTimeService createBacklogTimerService( + ProcessingTimeService processingTimeService, + Triggerable triggerable, + KeyGroupedInternalPriorityQueue> processingTimeTimersQueue, + KeyGroupedInternalPriorityQueue> eventTimeTimersQueue) { + + return new BacklogTimeService<>( + processingTimeService, + triggerable, + eventTimeTimersQueue, + processingTimeTimersQueue); + } +} diff --git a/flink-streaming-java/src/test/java/org/apache/flink/streaming/api/operators/sort/CollectingDataOutput.java b/flink-streaming-java/src/test/java/org/apache/flink/streaming/api/operators/sort/CollectingDataOutput.java index 1c5a9ae4fcfa21..ca8e061cf07159 100644 --- a/flink-streaming-java/src/test/java/org/apache/flink/streaming/api/operators/sort/CollectingDataOutput.java +++ b/flink-streaming-java/src/test/java/org/apache/flink/streaming/api/operators/sort/CollectingDataOutput.java @@ -21,6 +21,7 @@ import org.apache.flink.streaming.api.watermark.Watermark; import org.apache.flink.streaming.runtime.io.PushingAsyncDataInput; import org.apache.flink.streaming.runtime.streamrecord.LatencyMarker; +import org.apache.flink.streaming.runtime.streamrecord.RecordAttributes; import org.apache.flink.streaming.runtime.streamrecord.StreamRecord; import org.apache.flink.streaming.runtime.watermarkstatus.WatermarkStatus; @@ -53,4 +54,9 @@ public void emitRecord(StreamRecord streamRecord) throws Exception { public void emitLatencyMarker(LatencyMarker latencyMarker) throws Exception { events.add(latencyMarker); } + + @Override + public void emitRecordAttributes(RecordAttributes recordAttributes) throws Exception { + events.add(recordAttributes); + } } diff --git a/flink-streaming-java/src/test/java/org/apache/flink/streaming/api/operators/sort/CollectionDataInput.java b/flink-streaming-java/src/test/java/org/apache/flink/streaming/api/operators/sort/CollectionDataInput.java index 2926c3acd05aaa..bffe244c40706d 100644 --- a/flink-streaming-java/src/test/java/org/apache/flink/streaming/api/operators/sort/CollectionDataInput.java +++ b/flink-streaming-java/src/test/java/org/apache/flink/streaming/api/operators/sort/CollectionDataInput.java @@ -53,6 +53,8 @@ public DataInputStatus emitNext(DataOutput output) throws Exception { output.emitRecord(streamElement.asRecord()); } else if (streamElement instanceof Watermark) { output.emitWatermark(streamElement.asWatermark()); + } else if (streamElement.isRecordAttributes()) { + output.emitRecordAttributes(streamElement.asRecordAttributes()); } else { throw new IllegalStateException("Unsupported element type: " + streamElement); } diff --git a/flink-streaming-java/src/test/java/org/apache/flink/streaming/api/operators/sort/LargeSortingDataInputITCase.java b/flink-streaming-java/src/test/java/org/apache/flink/streaming/api/operators/sort/LargeSortingDataInputITCase.java index 21aa798b12f74f..1d6a76547dc5ce 100644 --- a/flink-streaming-java/src/test/java/org/apache/flink/streaming/api/operators/sort/LargeSortingDataInputITCase.java +++ b/flink-streaming-java/src/test/java/org/apache/flink/streaming/api/operators/sort/LargeSortingDataInputITCase.java @@ -42,6 +42,7 @@ import org.apache.flink.streaming.runtime.io.StreamOneInputProcessor; import org.apache.flink.streaming.runtime.io.StreamTaskInput; import org.apache.flink.streaming.runtime.streamrecord.LatencyMarker; +import org.apache.flink.streaming.runtime.streamrecord.RecordAttributes; import org.apache.flink.streaming.runtime.streamrecord.StreamRecord; import org.apache.flink.streaming.runtime.watermarkstatus.WatermarkStatus; @@ -219,6 +220,9 @@ public void emitWatermarkStatus(WatermarkStatus watermarkStatus) throws Exceptio @Override public void emitLatencyMarker(LatencyMarker latencyMarker) throws Exception {} + @Override + public void emitRecordAttributes(RecordAttributes recordAttributes) throws Exception {} + public int getSeenRecords() { return seenRecords; } diff --git a/flink-streaming-java/src/test/java/org/apache/flink/streaming/api/operators/sort/SortingBacklogDataInputTest.java b/flink-streaming-java/src/test/java/org/apache/flink/streaming/api/operators/sort/SortingBacklogDataInputTest.java new file mode 100644 index 00000000000000..6afb9232dccf95 --- /dev/null +++ b/flink-streaming-java/src/test/java/org/apache/flink/streaming/api/operators/sort/SortingBacklogDataInputTest.java @@ -0,0 +1,235 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.flink.streaming.api.operators.sort; + +import org.apache.flink.api.common.ExecutionConfig; +import org.apache.flink.api.common.typeutils.base.IntSerializer; +import org.apache.flink.api.common.typeutils.base.StringSerializer; +import org.apache.flink.api.java.functions.KeySelector; +import org.apache.flink.configuration.Configuration; +import org.apache.flink.runtime.operators.testutils.DummyInvokable; +import org.apache.flink.runtime.operators.testutils.MockEnvironment; +import org.apache.flink.streaming.api.watermark.Watermark; +import org.apache.flink.streaming.runtime.io.DataInputStatus; +import org.apache.flink.streaming.runtime.streamrecord.InternalRecordAttributes; +import org.apache.flink.streaming.runtime.streamrecord.RecordAttributesBuilder; +import org.apache.flink.streaming.runtime.streamrecord.StreamRecord; + +import org.junit.Test; + +import java.util.Arrays; +import java.util.Collections; + +/** Tests for {@link SortingBacklogDataInput}. */ +public class SortingBacklogDataInputTest { + @Test + public void sortingDuringBacklog() throws Exception { + CollectingDataOutput collectingDataOutput = new CollectingDataOutput<>(); + CollectionDataInput input = + new CollectionDataInput<>( + Arrays.asList( + new StreamRecord<>(2, 0), + new StreamRecord<>(1, 0), + new RecordAttributesBuilder(Collections.emptyList()) + .setBacklog(true) + .build(), + new StreamRecord<>(1, 3), + new StreamRecord<>(1, 1), + new StreamRecord<>(2, 1), + new StreamRecord<>(2, 3), + new StreamRecord<>(1, 2), + new StreamRecord<>(2, 2), + new RecordAttributesBuilder(Collections.emptyList()) + .setBacklog(false) + .build(), + new StreamRecord<>(1, 4), + new StreamRecord<>(2, 4), + new StreamRecord<>(1, 5))); + MockEnvironment environment = MockEnvironment.builder().build(); + SortingBacklogDataInput sortingDataInput = + new SortingBacklogDataInput<>( + input, + new IntSerializer(), + new IntSerializer(), + (KeySelector) value -> value, + environment.getMemoryManager(), + environment.getIOManager(), + true, + 1.0, + new Configuration(), + new DummyInvokable(), + new ExecutionConfig(), + () -> true); + + DataInputStatus inputStatus; + do { + inputStatus = sortingDataInput.emitNext(collectingDataOutput); + } while (inputStatus != DataInputStatus.END_OF_INPUT); + + org.assertj.core.api.Assertions.assertThat(collectingDataOutput.events) + .containsExactly( + new StreamRecord<>(2, 0), + new StreamRecord<>(1, 0), + new RecordAttributesBuilder(Collections.emptyList()) + .setBacklog(true) + .build(), + new InternalRecordAttributes(true, Long.MIN_VALUE), + new StreamRecord<>(1, 1), + new StreamRecord<>(1, 2), + new StreamRecord<>(1, 3), + new StreamRecord<>(2, 1), + new StreamRecord<>(2, 2), + new StreamRecord<>(2, 3), + new RecordAttributesBuilder(Collections.emptyList()) + .setBacklog(false) + .build(), + new StreamRecord<>(1, 4), + new StreamRecord<>(2, 4), + new StreamRecord<>(1, 5)); + } + + @Test + public void watermarkPropagation() throws Exception { + CollectingDataOutput collectingDataOutput = new CollectingDataOutput<>(); + CollectionDataInput input = + new CollectionDataInput<>( + Arrays.asList( + new StreamRecord<>(1, 3), + new Watermark(1), + new RecordAttributesBuilder(Collections.emptyList()) + .setBacklog(true) + .build(), + new StreamRecord<>(1, 1), + new Watermark(2), + new StreamRecord<>(2, 1), + new Watermark(3), + new StreamRecord<>(2, 3), + new Watermark(4), + new StreamRecord<>(1, 2), + new Watermark(5), + new StreamRecord<>(2, 2), + new Watermark(6), + new RecordAttributesBuilder(Collections.emptyList()) + .setBacklog(false) + .build())); + MockEnvironment environment = MockEnvironment.builder().build(); + SortingBacklogDataInput sortingDataInput = + new SortingBacklogDataInput<>( + input, + new IntSerializer(), + new IntSerializer(), + (KeySelector) value -> value, + environment.getMemoryManager(), + environment.getIOManager(), + true, + 1.0, + new Configuration(), + new DummyInvokable(), + new ExecutionConfig(), + () -> true); + + DataInputStatus inputStatus; + do { + inputStatus = sortingDataInput.emitNext(collectingDataOutput); + } while (inputStatus != DataInputStatus.END_OF_INPUT); + + org.assertj.core.api.Assertions.assertThat(collectingDataOutput.events) + .containsExactly( + new StreamRecord<>(1, 3), + new Watermark(1), + new RecordAttributesBuilder(Collections.emptyList()) + .setBacklog(true) + .build(), + new InternalRecordAttributes(true, 6L), + new StreamRecord<>(1, 1), + new StreamRecord<>(1, 2), + new StreamRecord<>(2, 1), + new StreamRecord<>(2, 2), + new StreamRecord<>(2, 3), + new RecordAttributesBuilder(Collections.emptyList()) + .setBacklog(false) + .build(), + new Watermark(6)); + } + + @Test + public void simpleVariableLengthKeySorting() throws Exception { + CollectingDataOutput collectingDataOutput = new CollectingDataOutput<>(); + CollectionDataInput input = + new CollectionDataInput<>( + Arrays.asList( + new StreamRecord<>(2, 0), + new StreamRecord<>(1, 0), + new RecordAttributesBuilder(Collections.emptyList()) + .setBacklog(true) + .build(), + new StreamRecord<>(1, 3), + new StreamRecord<>(1, 1), + new StreamRecord<>(2, 1), + new StreamRecord<>(2, 3), + new StreamRecord<>(1, 2), + new StreamRecord<>(2, 2), + new RecordAttributesBuilder(Collections.emptyList()) + .setBacklog(false) + .build(), + new StreamRecord<>(1, 4), + new StreamRecord<>(2, 4), + new StreamRecord<>(1, 5))); + MockEnvironment environment = MockEnvironment.builder().build(); + SortingBacklogDataInput sortingDataInput = + new SortingBacklogDataInput<>( + input, + new IntSerializer(), + new StringSerializer(), + (KeySelector) value -> "" + value, + environment.getMemoryManager(), + environment.getIOManager(), + true, + 1.0, + new Configuration(), + new DummyInvokable(), + new ExecutionConfig(), + () -> true); + + DataInputStatus inputStatus; + do { + inputStatus = sortingDataInput.emitNext(collectingDataOutput); + } while (inputStatus != DataInputStatus.END_OF_INPUT); + + org.assertj.core.api.Assertions.assertThat(collectingDataOutput.events) + .containsExactly( + new StreamRecord<>(2, 0), + new StreamRecord<>(1, 0), + new RecordAttributesBuilder(Collections.emptyList()) + .setBacklog(true) + .build(), + new InternalRecordAttributes(true, Long.MIN_VALUE), + new StreamRecord<>(1, 1), + new StreamRecord<>(1, 2), + new StreamRecord<>(1, 3), + new StreamRecord<>(2, 1), + new StreamRecord<>(2, 2), + new StreamRecord<>(2, 3), + new RecordAttributesBuilder(Collections.emptyList()) + .setBacklog(false) + .build(), + new StreamRecord<>(1, 4), + new StreamRecord<>(2, 4), + new StreamRecord<>(1, 5)); + } +} diff --git a/flink-streaming-java/src/test/java/org/apache/flink/streaming/api/operators/source/CollectingDataOutput.java b/flink-streaming-java/src/test/java/org/apache/flink/streaming/api/operators/source/CollectingDataOutput.java index 6a6c535b891011..51cf3eb24019bd 100644 --- a/flink-streaming-java/src/test/java/org/apache/flink/streaming/api/operators/source/CollectingDataOutput.java +++ b/flink-streaming-java/src/test/java/org/apache/flink/streaming/api/operators/source/CollectingDataOutput.java @@ -21,6 +21,7 @@ import org.apache.flink.streaming.api.watermark.Watermark; import org.apache.flink.streaming.runtime.io.PushingAsyncDataInput; import org.apache.flink.streaming.runtime.streamrecord.LatencyMarker; +import org.apache.flink.streaming.runtime.streamrecord.RecordAttributes; import org.apache.flink.streaming.runtime.streamrecord.StreamRecord; import org.apache.flink.streaming.runtime.watermarkstatus.WatermarkStatus; @@ -56,6 +57,11 @@ public void emitLatencyMarker(LatencyMarker latencyMarker) throws Exception { events.add(latencyMarker); } + @Override + public void emitRecordAttributes(RecordAttributes recordAttributes) throws Exception { + events.add(recordAttributes); + } + public List getEvents() { return events; } diff --git a/flink-streaming-java/src/test/java/org/apache/flink/streaming/runtime/io/RecordAttributesValveTest.java b/flink-streaming-java/src/test/java/org/apache/flink/streaming/runtime/io/RecordAttributesValveTest.java new file mode 100644 index 00000000000000..4d1e6ad42e13f2 --- /dev/null +++ b/flink-streaming-java/src/test/java/org/apache/flink/streaming/runtime/io/RecordAttributesValveTest.java @@ -0,0 +1,57 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.flink.streaming.runtime.io; + +import org.apache.flink.streaming.api.operators.source.CollectingDataOutput; +import org.apache.flink.streaming.runtime.streamrecord.RecordAttributes; +import org.apache.flink.streaming.runtime.streamrecord.RecordAttributesBuilder; + +import org.junit.jupiter.api.Test; + +import java.util.Collections; + +import static org.assertj.core.api.Assertions.assertThat; + +/** Test for {@link RecordAttributesValve}. */ +class RecordAttributesValveTest { + + @Test + void testRecordAttributesValve() throws Exception { + final RecordAttributesValve valve = new RecordAttributesValve(3); + CollectingDataOutput collectingDataOutput = new CollectingDataOutput<>(); + final RecordAttributes backlogRecordAttribute = + new RecordAttributesBuilder(Collections.emptyList()).setBacklog(true).build(); + final RecordAttributes nonBacklogRecordAttribute = + new RecordAttributesBuilder(Collections.emptyList()).setBacklog(false).build(); + + valve.inputRecordAttributes(backlogRecordAttribute, 0, collectingDataOutput); + valve.inputRecordAttributes(backlogRecordAttribute, 1, collectingDataOutput); + assertThat(collectingDataOutput.getEvents()).containsExactly(nonBacklogRecordAttribute); + + valve.inputRecordAttributes(backlogRecordAttribute, 2, collectingDataOutput); + assertThat(collectingDataOutput.getEvents()) + .containsExactly(nonBacklogRecordAttribute, backlogRecordAttribute); + + valve.inputRecordAttributes(nonBacklogRecordAttribute, 0, collectingDataOutput); + assertThat(collectingDataOutput.getEvents()) + .containsExactly( + nonBacklogRecordAttribute, + backlogRecordAttribute, + nonBacklogRecordAttribute); + } +} diff --git a/flink-streaming-java/src/test/java/org/apache/flink/streaming/runtime/io/StreamTaskNetworkInputTest.java b/flink-streaming-java/src/test/java/org/apache/flink/streaming/runtime/io/StreamTaskNetworkInputTest.java index 7338469171bb28..481bdaff082315 100644 --- a/flink-streaming-java/src/test/java/org/apache/flink/streaming/runtime/io/StreamTaskNetworkInputTest.java +++ b/flink-streaming-java/src/test/java/org/apache/flink/streaming/runtime/io/StreamTaskNetworkInputTest.java @@ -49,6 +49,7 @@ import org.apache.flink.streaming.runtime.io.checkpointing.SingleCheckpointBarrierHandler; import org.apache.flink.streaming.runtime.io.checkpointing.UpstreamRecoveryTracker; import org.apache.flink.streaming.runtime.streamrecord.LatencyMarker; +import org.apache.flink.streaming.runtime.streamrecord.RecordAttributes; import org.apache.flink.streaming.runtime.streamrecord.StreamElement; import org.apache.flink.streaming.runtime.streamrecord.StreamElementSerializer; import org.apache.flink.streaming.runtime.streamrecord.StreamRecord; @@ -399,6 +400,9 @@ public void emitWatermarkStatus(WatermarkStatus watermarkStatus) {} @Override public void emitLatencyMarker(LatencyMarker latencyMarker) {} + + @Override + public void emitRecordAttributes(RecordAttributes recordAttributes) {} } private static class VerifyRecordsDataOutput extends NoOpDataOutput { diff --git a/flink-streaming-java/src/test/java/org/apache/flink/streaming/runtime/watermarkstatus/StatusWatermarkValveTest.java b/flink-streaming-java/src/test/java/org/apache/flink/streaming/runtime/watermarkstatus/StatusWatermarkValveTest.java index 0db7b04e6c47f3..52bbc518541f16 100644 --- a/flink-streaming-java/src/test/java/org/apache/flink/streaming/runtime/watermarkstatus/StatusWatermarkValveTest.java +++ b/flink-streaming-java/src/test/java/org/apache/flink/streaming/runtime/watermarkstatus/StatusWatermarkValveTest.java @@ -21,6 +21,7 @@ import org.apache.flink.streaming.api.watermark.Watermark; import org.apache.flink.streaming.runtime.io.PushingAsyncDataInput; import org.apache.flink.streaming.runtime.streamrecord.LatencyMarker; +import org.apache.flink.streaming.runtime.streamrecord.RecordAttributes; import org.apache.flink.streaming.runtime.streamrecord.StreamElement; import org.apache.flink.streaming.runtime.streamrecord.StreamRecord; @@ -453,6 +454,11 @@ public void emitLatencyMarker(LatencyMarker latencyMarker) { throw new UnsupportedOperationException(); } + @Override + public void emitRecordAttributes(RecordAttributes recordAttributes) throws Exception { + throw new UnsupportedOperationException(); + } + public StreamElement popLastSeenOutput() { return allOutputs.poll(); } diff --git a/flink-streaming-java/src/test/java/org/apache/flink/streaming/util/SourceOperatorTestHarness.java b/flink-streaming-java/src/test/java/org/apache/flink/streaming/util/SourceOperatorTestHarness.java index 2dbfa280411f37..0509ab0bc748d8 100644 --- a/flink-streaming-java/src/test/java/org/apache/flink/streaming/util/SourceOperatorTestHarness.java +++ b/flink-streaming-java/src/test/java/org/apache/flink/streaming/util/SourceOperatorTestHarness.java @@ -25,6 +25,7 @@ import org.apache.flink.streaming.api.watermark.Watermark; import org.apache.flink.streaming.runtime.io.PushingAsyncDataInput.DataOutput; import org.apache.flink.streaming.runtime.streamrecord.LatencyMarker; +import org.apache.flink.streaming.runtime.streamrecord.RecordAttributes; import org.apache.flink.streaming.runtime.streamrecord.StreamRecord; import org.apache.flink.streaming.runtime.watermarkstatus.WatermarkStatus; @@ -79,5 +80,10 @@ public void emitWatermarkStatus(WatermarkStatus watermarkStatus) { public void emitLatencyMarker(LatencyMarker latencyMarker) { output.emitLatencyMarker(latencyMarker); } + + @Override + public void emitRecordAttributes(RecordAttributes recordAttributes) throws Exception { + output.emitRecordAttributes(recordAttributes); + } } } diff --git a/flink-table/flink-table-planner/src/test/scala/org/apache/flink/table/planner/runtime/utils/TimeTestUtil.scala b/flink-table/flink-table-planner/src/test/scala/org/apache/flink/table/planner/runtime/utils/TimeTestUtil.scala index b1ad73b7e2178b..8f22a3b494d6c5 100644 --- a/flink-table/flink-table-planner/src/test/scala/org/apache/flink/table/planner/runtime/utils/TimeTestUtil.scala +++ b/flink-table/flink-table-planner/src/test/scala/org/apache/flink/table/planner/runtime/utils/TimeTestUtil.scala @@ -25,7 +25,7 @@ import org.apache.flink.streaming.api.functions.source.SourceFunction import org.apache.flink.streaming.api.functions.source.SourceFunction.SourceContext import org.apache.flink.streaming.api.operators.{AbstractStreamOperator, OneInputStreamOperator} import org.apache.flink.streaming.api.watermark.Watermark -import org.apache.flink.streaming.runtime.streamrecord.StreamRecord +import org.apache.flink.streaming.runtime.streamrecord.{RecordAttributes, StreamRecord} import org.apache.flink.table.planner.JLong object TimeTestUtil { @@ -98,6 +98,8 @@ object TimeTestUtil { } } + override def processRecordAttributes(recordAttributes: RecordAttributes): Unit = + super.processRecordAttributes(recordAttributes) } } diff --git a/flink-table/flink-table-runtime/src/main/java/org/apache/flink/table/runtime/operators/multipleinput/output/BroadcastingOutput.java b/flink-table/flink-table-runtime/src/main/java/org/apache/flink/table/runtime/operators/multipleinput/output/BroadcastingOutput.java index 6d158dda1ff3f1..166dfd16e06c28 100644 --- a/flink-table/flink-table-runtime/src/main/java/org/apache/flink/table/runtime/operators/multipleinput/output/BroadcastingOutput.java +++ b/flink-table/flink-table-runtime/src/main/java/org/apache/flink/table/runtime/operators/multipleinput/output/BroadcastingOutput.java @@ -21,6 +21,7 @@ import org.apache.flink.streaming.api.operators.Output; import org.apache.flink.streaming.api.watermark.Watermark; import org.apache.flink.streaming.runtime.streamrecord.LatencyMarker; +import org.apache.flink.streaming.runtime.streamrecord.RecordAttributes; import org.apache.flink.streaming.runtime.streamrecord.StreamRecord; import org.apache.flink.streaming.runtime.tasks.OperatorChain; import org.apache.flink.streaming.runtime.watermarkstatus.WatermarkStatus; @@ -90,4 +91,11 @@ public void close() { output.close(); } } + + @Override + public void emitRecordAttributes(RecordAttributes recordAttributes) { + for (Output> output : outputs) { + output.emitRecordAttributes(recordAttributes); + } + } } diff --git a/flink-tests/src/test/java/org/apache/flink/test/streaming/api/datastream/StreamingWithBacklogITCase.java b/flink-tests/src/test/java/org/apache/flink/test/streaming/api/datastream/StreamingWithBacklogITCase.java new file mode 100644 index 00000000000000..49d9af32147c18 --- /dev/null +++ b/flink-tests/src/test/java/org/apache/flink/test/streaming/api/datastream/StreamingWithBacklogITCase.java @@ -0,0 +1,182 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.flink.test.streaming.api.datastream; + +import org.apache.flink.api.common.eventtime.WatermarkStrategy; +import org.apache.flink.api.common.functions.AggregateFunction; +import org.apache.flink.api.common.functions.ReduceFunction; +import org.apache.flink.api.common.typeinfo.Types; +import org.apache.flink.api.java.tuple.Tuple2; +import org.apache.flink.api.java.tuple.Tuple3; +import org.apache.flink.configuration.Configuration; +import org.apache.flink.connector.base.source.hybrid.HybridSource; +import org.apache.flink.connector.datagen.source.DataGeneratorSource; +import org.apache.flink.connector.datagen.source.GeneratorFunction; +import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator; +import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; +import org.apache.flink.streaming.api.functions.windowing.WindowFunction; +import org.apache.flink.streaming.api.windowing.assigners.TumblingEventTimeWindows; +import org.apache.flink.streaming.api.windowing.time.Time; +import org.apache.flink.streaming.api.windowing.windows.TimeWindow; +import org.apache.flink.util.CloseableIterator; +import org.apache.flink.util.CollectionUtil; + +import org.junit.jupiter.api.Test; + +import java.time.Duration; +import java.util.List; + +import static org.apache.flink.streaming.api.environment.ExecutionCheckpointingOptions.CHECKPOINTING_INTERVAL_DURING_BACKLOG; +import static org.assertj.core.api.Assertions.assertThat; + +/** Integration test for streaming job with backlog. */ +public class StreamingWithBacklogITCase { + @Test + void testKeyedAggregationWithBacklog() throws Exception { + final Configuration config = new Configuration(); + config.set(CHECKPOINTING_INTERVAL_DURING_BACKLOG, Duration.ZERO); + final StreamExecutionEnvironment env = + StreamExecutionEnvironment.getExecutionEnvironment(config); + env.setParallelism(1); + + final DataGeneratorSource> historicalData = + new DataGeneratorSource<>( + (GeneratorFunction>) + value -> new Tuple2<>(value.intValue() % 2, 1L), + 4, + Types.TUPLE(Types.INT, Types.LONG)); + + final DataGeneratorSource> realTimeData = + new DataGeneratorSource<>( + (GeneratorFunction>) + value -> new Tuple2<>(value.intValue() % 2, 1L), + 4, + Types.TUPLE(Types.INT, Types.LONG)); + + final HybridSource> source = + HybridSource.builder(historicalData).addSource(realTimeData).build(); + final SingleOutputStreamOperator> reduced = + env.fromSource(source, WatermarkStrategy.noWatermarks(), "source") + .returns(Types.TUPLE(Types.INT, Types.LONG)) + .keyBy(record -> record.f0) + .reduce( + (ReduceFunction>) + (value1, value2) -> + new Tuple2<>(value1.f0, value1.f1 + value2.f1)); + + try (final CloseableIterator> iter = reduced.executeAndCollect()) { + final List> result = CollectionUtil.iteratorToList(iter); + assertThat(result) + .containsExactlyInAnyOrder( + Tuple2.of(0, 1L), + Tuple2.of(0, 2L), + Tuple2.of(1, 1L), + Tuple2.of(1, 2L), + Tuple2.of(0, 3L), + Tuple2.of(1, 3L), + Tuple2.of(0, 4L), + Tuple2.of(1, 4L)); + } + } + + @Test + void testKeyedWindowedAggregationWithBacklog() throws Exception { + final Configuration config = new Configuration(); + config.set(CHECKPOINTING_INTERVAL_DURING_BACKLOG, Duration.ZERO); + final StreamExecutionEnvironment env = + StreamExecutionEnvironment.getExecutionEnvironment(config); + env.setParallelism(1); + + final int backlogCnt = 4; + final DataGeneratorSource> historicalData = + new DataGeneratorSource<>( + (GeneratorFunction>) + value -> new Tuple3<>(value.intValue() % 2, value * 1000, 1L), + backlogCnt, + Types.TUPLE(Types.INT, Types.LONG)); + + final DataGeneratorSource> realTimeData = + new DataGeneratorSource<>( + (GeneratorFunction>) + value -> + new Tuple3<>( + value.intValue() % 2, + (value + backlogCnt) * 1000, + 1L), + 4, + Types.TUPLE(Types.INT, Types.LONG)); + + final HybridSource> source = + HybridSource.builder(historicalData).addSource(realTimeData).build(); + final SingleOutputStreamOperator> output = + env.fromSource( + source, + WatermarkStrategy + .>forMonotonousTimestamps() + .withTimestampAssigner((event, timestamp) -> event.f1), + "source") + .returns(Types.TUPLE(Types.INT, Types.LONG, Types.LONG)) + .keyBy(record -> record.f0) + .window(TumblingEventTimeWindows.of(Time.seconds(4))) + .aggregate( + new AggregateFunction, Long, Long>() { + @Override + public Long createAccumulator() { + return 0L; + } + + @Override + public Long add( + Tuple3 value, Long accumulator) { + return accumulator + value.f2; + } + + @Override + public Long getResult(Long accumulator) { + return accumulator; + } + + @Override + public Long merge(Long a, Long b) { + return a + b; + } + }, + (WindowFunction< + Long, + Tuple3, + Integer, + TimeWindow>) + (key, window, input, out) -> { + for (Long i : input) { + out.collect(Tuple3.of(key, window.getEnd(), i)); + } + }) + .returns(Types.TUPLE(Types.INT, Types.LONG, Types.LONG)); + + try (final CloseableIterator> iter = + output.executeAndCollect()) { + final List> result = CollectionUtil.iteratorToList(iter); + assertThat(result) + .containsExactlyInAnyOrder( + Tuple3.of(0, 4000L, 2L), + Tuple3.of(1, 4000L, 2L), + Tuple3.of(0, 8000L, 2L), + Tuple3.of(1, 8000L, 2L)); + } + } +}