Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
46 changes: 46 additions & 0 deletions .test-infra/jenkins/job_PostCommit_Java11_Dataflow_Examples.groovy
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

import PostcommitJobBuilder
import CommonJobProperties as commonJobProperties

PostcommitJobBuilder.postCommitJob('beam_PostCommit_Java11_Examples_Dataflow',
'Run Java examples on Dataflow with Java 11', 'Google Cloud Dataflow Runner Examples Java 11', this) {

description('Runs the Java Examples suite on the Java 11 enabled Dataflow runner.')

commonJobProperties.setTopLevelMainJobProperties(delegate, 'master', 180)

publishers {
archiveJunit('**/build/test-results/**/*.xml')
}

steps {
gradle {
rootBuildScriptDir(commonJobProperties.checkoutDir)
tasks(':beam-runners-google-cloud-dataflow-java-examples:java11PostCommit')

// Increase parallel worker threads above processor limit since most time is
// spent waiting on Dataflow jobs. ValidatesRunner tests on Dataflow are slow
// because each one launches a Dataflow job with about 3 mins of overhead.
// 3 x num_cores strikes a good balance between maxing out parallelism without
// overloading the machines.
commonJobProperties.setGradleSwitches(delegate, 3 * Runtime.runtime.availableProcessors())
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

import PostcommitJobBuilder
import CommonJobProperties as commonJobProperties

PostcommitJobBuilder.postCommitJob('beam_PostCommit_Java11_Examples_Dataflow_Portability',
'Run Java Portability examples on Dataflow with Java 11', 'Google Cloud Dataflow Portability Runner Examples Java 11', this) {

description('Runs the Java Examples suite on the Java 11 enabled Dataflow runner with Portability API.')

commonJobProperties.setTopLevelMainJobProperties(delegate, 'master', 180)

publishers {
archiveJunit('**/build/test-results/**/*.xml')
}

steps {
gradle {
rootBuildScriptDir(commonJobProperties.checkoutDir)
tasks(':beam-runners-google-cloud-dataflow-java-examples:verifyPortabilityApi')
switches ('-Pdockerfile=Dockerfile-java11')

// Increase parallel worker threads above processor limit since most time is
// spent waiting on Dataflow jobs. ValidatesRunner tests on Dataflow are slow
// because each one launches a Dataflow job with about 3 mins of overhead.
// 3 x num_cores strikes a good balance between maxing out parallelism without
// overloading the machines.
commonJobProperties.setGradleSwitches(delegate, 3 * Runtime.runtime.availableProcessors())
}
}
}
2 changes: 1 addition & 1 deletion build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -145,7 +145,7 @@ task javaPreCommit() {

task javaPreCommitPortabilityApi() {
dependsOn ":beam-runners-google-cloud-dataflow-java-fn-api-worker:build"
dependsOn ":beam-runners-google-cloud-dataflow-java-examples:preCommitPortabilityApi"
dependsOn ":beam-runners-google-cloud-dataflow-java-examples:verifyPortabilityApi"
}

task javaPostCommit() {
Expand Down
70 changes: 36 additions & 34 deletions runners/google-cloud-dataflow-java/examples/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -42,57 +42,59 @@ def dockerImageName = project(':beam-runners-google-cloud-dataflow-java').ext.do
// If -PuseExecutableStage is set, the use_executable_stage_bundle_execution wil be enabled.
def fnapiExperiments = project.hasProperty('useExecutableStage') ? 'beam_fn_api,use_executable_stage_bundle_execution' : "beam_fn_api"

def commonConfig = { dataflowWorkerJar, workerHarnessContainerImage = '', additionalOptions = [] ->
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yep - this improves this code a lot. Great!

// return the preevaluated configuration closure
return {
testClassesDirs = files(project(":beam-examples-java").sourceSets.test.output.classesDirs)
include "**/WordCountIT.class"
include "**/WindowedWordCountIT.class"
forkEvery 1
maxParallelForks 4

def preCommitBeamTestPipelineOptions = [
"--project=${gcpProject}",
"--tempRoot=${gcsTempRoot}",
"--runner=TestDataflowRunner",
"--dataflowWorkerJar=${dataflowWorkerJar}",
workerHarnessContainerImage.isEmpty() ?'':"--workerHarnessContainerImage=${workerHarnessContainerImage}"
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

When dataflowWorkerJar is provided, the workerHarnessContainerImage should be set to empty explicitly, otherwise, the dataflow service will still try to pull harness image rather than using custom-built worker jar,

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fix here: #10635

] + additionalOptions
systemProperty "beamTestPipelineOptions", JsonOutput.toJson(preCommitBeamTestPipelineOptions)
}
}

task preCommitLegacyWorker(type: Test) {
dependsOn ":beam-runners-google-cloud-dataflow-java-legacy-worker:shadowJar"
def dataflowWorkerJar = project.findProperty('dataflowWorkerJar') ?: project(":beam-runners-google-cloud-dataflow-java-legacy-worker").shadowJar.archivePath

//Set workerHarnessContainerImage to empty to make Dataflow pick up the non-versioned container
//image, which handles a staged worker jar.
def preCommitBeamTestPipelineOptions = [
"--project=${gcpProject}",
"--tempRoot=${gcsTempRoot}",
"--runner=TestDataflowRunner",
"--dataflowWorkerJar=${dataflowWorkerJar}",
"--workerHarnessContainerImage=",
]
testClassesDirs = files(project(":beam-examples-java").sourceSets.test.output.classesDirs)
include "**/WordCountIT.class"
include "**/WindowedWordCountIT.class"
forkEvery 1
maxParallelForks 4
systemProperty "beamTestPipelineOptions", JsonOutput.toJson(preCommitBeamTestPipelineOptions)
with commonConfig(dataflowWorkerJar)
}

task preCommitFnApiWorker(type: Test) {
task verifyFnApiWorker(type: Test) {
dependsOn ":beam-runners-google-cloud-dataflow-java-fn-api-worker:shadowJar"
dependsOn ":beam-runners-google-cloud-dataflow-java:buildAndPushDockerContainer"

def dataflowWorkerJar = project.findProperty('dataflowWorkerJar') ?: project(":beam-runners-google-cloud-dataflow-java-fn-api-worker").shadowJar.archivePath
def preCommitBeamTestPipelineOptions = [
"--project=${gcpProject}",
"--tempRoot=${gcsTempRoot}",
"--runner=TestDataflowRunner",
"--dataflowWorkerJar=${dataflowWorkerJar}",
"--workerHarnessContainerImage=${dockerImageName}",
"--experiments=${fnapiExperiments}",
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this lost now?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Right, I missed it. Added it back

]
testClassesDirs = files(project(":beam-examples-java").sourceSets.test.output.classesDirs)
include "**/WordCountIT.class"
include "**/WindowedWordCountIT.class"
forkEvery 1
maxParallelForks 4
systemProperty "beamTestPipelineOptions", JsonOutput.toJson(preCommitBeamTestPipelineOptions)
with commonConfig(dataflowWorkerJar, dockerImageName, ["--experiments=${fnapiExperiments}"])
useJUnit {
excludeCategories 'org.apache.beam.sdk.testing.StreamingIT'
}
}

task postCommitLegacyWorkerJava11(type: Test) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Similarly to above, could we perform the same refactoring?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Unfortunately it wasn't possible. I couldn't find a way of extracting common configuration to another element. Setting system properties requires Test task type and those properties are restricted in scope to a single task - I experimented with setting a property in a task with dependsOn directive, but it wasn't effective. Do you know of another way of extracting this?
@adude3141 it's also a reply to your general remark, the solution you linked didn't work. I believe that currently the best way is to duplicate those tasks.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe something like this will do?

diff --git a/runners/google-cloud-dataflow-java/examples/build.gradle b/runners/google-cloud-dataflow-java/examples/build.gradle
index 72059bb064..f591e5009b 100644
--- a/runners/google-cloud-dataflow-java/examples/build.gradle
+++ b/runners/google-cloud-dataflow-java/examples/build.gradle
@@ -42,25 +42,33 @@ def dockerImageName = project(':beam-runners-google-cloud-dataflow-java').ext.do
 // If -PuseExecutableStage is set, the use_executable_stage_bundle_execution wil be enabled.
 def fnapiExperiments = project.hasProperty('useExecutableStage') ? 'beam_fn_api,use_executable_stage_bundle_execution' : "beam_fn_api"
 
+// we require dataflowWorker, optional workerHarnessContainerImage and optional additionalOptions
+def commonConfig = { dataflowWorkerJar, workerHarnessContainerImage = '', additionalOptions = [] ->
+  // return the preevaluated configuration closure
+  return {
+    testClassesDirs = files(project(":beam-examples-java").sourceSets.test.output.classesDirs)
+    include "**/WordCountIT.class"
+    include "**/WindowedWordCountIT.class"
+    forkEvery 1
+    maxParallelForks 4
+
+    def preCommitBeamTestPipelineOptions = [
+            "--project=${gcpProject}",
+            "--tempRoot=${gcsTempRoot}",
+            "--runner=TestDataflowRunner",
+            "--dataflowWorkerJar=${dataflowWorkerJar}",
+            "--workerHarnessContainerImage=${workerHarnessContainerImage}"
+    ] + additionalOptions
+    systemProperty "beamTestPipelineOptions", JsonOutput.toJson(preCommitBeamTestPipelineOptions)
+  }
+}
+
 task preCommitLegacyWorker(type: Test) {
   dependsOn ":beam-runners-google-cloud-dataflow-java-legacy-worker:shadowJar"
   def dataflowWorkerJar = project.findProperty('dataflowWorkerJar') ?: project(":beam-runners-google-cloud-dataflow-java-legacy-worker").shadowJar.archivePath
-
   //Set workerHarnessContainerImage to empty to make Dataflow pick up the non-versioned container
   //image, which handles a staged worker jar.
-  def preCommitBeamTestPipelineOptions = [
-     "--project=${gcpProject}",
-     "--tempRoot=${gcsTempRoot}",
-     "--runner=TestDataflowRunner",
-     "--dataflowWorkerJar=${dataflowWorkerJar}",
-     "--workerHarnessContainerImage=",
-  ]
-  testClassesDirs = files(project(":beam-examples-java").sourceSets.test.output.classesDirs)
-  include "**/WordCountIT.class"
-  include "**/WindowedWordCountIT.class"
-  forkEvery 1
-  maxParallelForks 4
-  systemProperty "beamTestPipelineOptions", JsonOutput.toJson(preCommitBeamTestPipelineOptions)
+  with commonConfig(dataflowWorkerJar)
 }
 
 task verifyFnApiWorker(type: Test) {
@@ -68,20 +76,7 @@ task verifyFnApiWorker(type: Test) {
   dependsOn ":beam-runners-google-cloud-dataflow-java:buildAndPushDockerContainer"
 
   def dataflowWorkerJar = project.findProperty('dataflowWorkerJar') ?: project(":beam-runners-google-cloud-dataflow-java-fn-api-worker").shadowJar.archivePath
-  def preCommitBeamTestPipelineOptions = [
-          "--project=${gcpProject}",
-          "--tempRoot=${gcsTempRoot}",
-          "--runner=TestDataflowRunner",
-          "--dataflowWorkerJar=${dataflowWorkerJar}",
-          "--workerHarnessContainerImage=${dockerImageName}",
-          "--experiments=${fnapiExperiments}",
-  ]
-  testClassesDirs = files(project(":beam-examples-java").sourceSets.test.output.classesDirs)
-  include "**/WordCountIT.class"
-  include "**/WindowedWordCountIT.class"
-  forkEvery 1
-  maxParallelForks 4
-  systemProperty "beamTestPipelineOptions", JsonOutput.toJson(preCommitBeamTestPipelineOptions)
+  with commonConfig(dataflowWorkerJar, dockerImageName, "--experiments=${fnapiExperiments}")
   useJUnit {
     excludeCategories 'org.apache.beam.sdk.testing.StreamingIT'
   }
@@ -90,24 +85,10 @@ task verifyFnApiWorker(type: Test) {
 task postCommitLegacyWorkerJava11(type: Test) {
   dependsOn ":beam-runners-google-cloud-dataflow-java-legacy-worker:shadowJar"
   def dataflowWorkerJar = project.findProperty('dataflowWorkerJar') ?: project(":beam-runners-google-cloud-dataflow-java-legacy-worker").shadowJar.archivePath
-
   //Set workerHarnessContainerImage to empty to make Dataflow pick up the non-versioned container
   //image, which handles a staged worker jar.
-  def preCommitBeamTestPipelineOptions = [
-          "--project=${gcpProject}",
-          "--tempRoot=${gcsTempRoot}",
-          "--runner=TestDataflowRunner",
-          "--dataflowWorkerJar=${dataflowWorkerJar}",
-          "--workerHarnessContainerImage=",
-  ]
-  
-  testClassesDirs = files(project(":beam-examples-java").sourceSets.test.output.classesDirs)
-  include "**/WordCountIT.class"
-  include "**/WindowedWordCountIT.class"
-  forkEvery 1
-  maxParallelForks 4
+  with commonConfig(dataflowWorkerJar)
   systemProperty "java.specification.version", "11"
-  systemProperty "beamTestPipelineOptions", JsonOutput.toJson(preCommitBeamTestPipelineOptions)
 }
 
 task java11PostCommit() {

But please bear with me. It is a fast hack and I did not run. Also, if you decide to use, doublecheck that I did not accidentally deleted some required configuration.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks for the suggestion, it worked with minimal modifications ;)

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nice. Glad I could help here!

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

👍

dependsOn ":beam-runners-google-cloud-dataflow-java-legacy-worker:shadowJar"
def dataflowWorkerJar = project.findProperty('dataflowWorkerJar') ?: project(":beam-runners-google-cloud-dataflow-java-legacy-worker").shadowJar.archivePath
systemProperty "java.specification.version", "11"
with commonConfig(dataflowWorkerJar)
}

task java11PostCommit() {
dependsOn postCommitLegacyWorkerJava11
}

task preCommit() {
dependsOn preCommitLegacyWorker
}

task preCommitPortabilityApi() {
dependsOn preCommitFnApiWorker
task verifyPortabilityApi() {
dependsOn verifyFnApiWorker
}

afterEvaluate {
Expand Down