From c787e317cf42b21e41cccdf4f2abfeb28f5ab7e3 Mon Sep 17 00:00:00 2001
From: Dariusz Aniszewski
Date: Tue, 7 Nov 2017 17:25:55 +0100
Subject: [PATCH 1/4] Dataflow and PerfKit profiles; hash for 100.000.000 lines
---
sdks/java/io/file-based-io-tests/pom.xml | 106 ++++++++++++++++++
.../org/apache/beam/sdk/io/text/TextIOIT.java | 5 +-
2 files changed, 109 insertions(+), 2 deletions(-)
diff --git a/sdks/java/io/file-based-io-tests/pom.xml b/sdks/java/io/file-based-io-tests/pom.xml
index ae7527c79ecf..1d1ff6b2ad1b 100644
--- a/sdks/java/io/file-based-io-tests/pom.xml
+++ b/sdks/java/io/file-based-io-tests/pom.xml
@@ -31,6 +31,112 @@
Apache Beam :: SDKs :: Java :: IO :: File-based-io-tests
Integration tests for reading/writing using file-based sources/sinks.
+
+
+
+ dataflow-runner
+
+
+ integrationTestRunner
+ dataflow
+
+
+
+
+ org.apache.beam
+ beam-runners-google-cloud-dataflow-java
+ runtime
+
+
+
+
+
+
+ io-it-suite
+
+ io-it-suite
+
+
+
+ ${project.parent.parent.parent.parent.basedir}
+
+
+
+
+ org.codehaus.gmaven
+ groovy-maven-plugin
+ ${groovy-maven-plugin.version}
+
+
+ find-supported-python-for-compile
+ initialize
+
+ execute
+
+
+ ${beamRootProjectDir}/sdks/python/findSupportedPython.groovy
+
+
+
+
+
+
+ org.codehaus.mojo
+ exec-maven-plugin
+ ${maven-exec-plugin.version}
+
+
+ verify
+
+ exec
+
+
+
+
+ ${python.interpreter.bin}
+
+ ${pkbLocation}
+ -benchmarks=beam_integration_benchmark
+ -beam_it_profile=io-it
+ -beam_location=${beamRootProjectDir}
+ -beam_prebuilt=true
+ -beam_sdk=java
+
+ ${pkbBeamRunnerProfile}
+ ${pkbBeamRunnerOption}
+
+ -beam_it_module=sdks/java/io/file-based-io-tests
+ -beam_it_class=org.apache.beam.sdk.io.text.TextIOIT
+
+ -beam_it_options=${integrationTestPipelineOptions}
+
+
+
+
+
+ org.apache.maven.plugins
+ maven-surefire-plugin
+ ${surefire-plugin.version}
+
+ true
+
+
+
+
+
+
+
org.apache.beam
diff --git a/sdks/java/io/file-based-io-tests/src/test/java/org/apache/beam/sdk/io/text/TextIOIT.java b/sdks/java/io/file-based-io-tests/src/test/java/org/apache/beam/sdk/io/text/TextIOIT.java
index ecab1d864971..fc7bec690272 100644
--- a/sdks/java/io/file-based-io-tests/src/test/java/org/apache/beam/sdk/io/text/TextIOIT.java
+++ b/sdks/java/io/file-based-io-tests/src/test/java/org/apache/beam/sdk/io/text/TextIOIT.java
@@ -54,7 +54,7 @@
*
* Run this test using the command below. Pass in connection information via PipelineOptions:
*
- * mvn -e -Pio-it verify -pl sdks/java/io/text -DintegrationTestPipelineOptions='[
+ * mvn -e -Pio-it verify -pl sdks/java/io/file-based-io-tests -DintegrationTestPipelineOptions='[
* "--numberOfRecords=100000",
* "--filenamePrefix=TEXTIOIT"
* ]'
@@ -107,7 +107,8 @@ public void writeThenReadAll() {
private static String getExpectedHashForLineCount(Long lineCount) {
Map expectedHashes = ImmutableMap.of(
100_000L, "4c8bb3b99dcc59459b20fefba400d446",
- 1_000_000L, "9796db06e7a7960f974d5a91164afff1"
+ 1_000_000L, "9796db06e7a7960f974d5a91164afff1",
+ 100_000_000L, "6ce05f456e2fdc846ded2abd0ec1de95"
);
String hash = expectedHashes.get(lineCount);
From 5dbd278c440ad877a58ffb4c84098bcc33e94b10 Mon Sep 17 00:00:00 2001
From: Dariusz Aniszewski
Date: Fri, 17 Nov 2017 18:09:01 +0100
Subject: [PATCH 2/4] modified io-it-suite description
---
sdks/java/io/file-based-io-tests/pom.xml | 3 +++
1 file changed, 3 insertions(+)
diff --git a/sdks/java/io/file-based-io-tests/pom.xml b/sdks/java/io/file-based-io-tests/pom.xml
index 1d1ff6b2ad1b..5b97ec07f19e 100644
--- a/sdks/java/io/file-based-io-tests/pom.xml
+++ b/sdks/java/io/file-based-io-tests/pom.xml
@@ -60,6 +60,9 @@
mvn verify -Dio-it-suite -pl sdks/java/io/file-based-io-tests
-DpkbLocation="path-to-pkb.py" \
-DintegrationTestPipelineOptions='["––numberOfRecords=100000"]'
+
+ For DirectRunner, please use -DforceDirectRunner=true argument
+ For other runners please check doc in BEAM-3060 and https://beam.apache.org/documentation/io/testing/
-->
io-it-suite
From 3aee69e3cb672292e365a905ae9ef4cec840db03 Mon Sep 17 00:00:00 2001
From: Dariusz Aniszewski
Date: Mon, 20 Nov 2017 17:18:26 +0100
Subject: [PATCH 3/4] parametrizable PerfKit profile
---
sdks/java/io/file-based-io-tests/pom.xml | 5 +++--
.../src/test/java/org/apache/beam/sdk/io/text/TextIOIT.java | 3 +++
2 files changed, 6 insertions(+), 2 deletions(-)
diff --git a/sdks/java/io/file-based-io-tests/pom.xml b/sdks/java/io/file-based-io-tests/pom.xml
index 5b97ec07f19e..6c3a7e3718b6 100644
--- a/sdks/java/io/file-based-io-tests/pom.xml
+++ b/sdks/java/io/file-based-io-tests/pom.xml
@@ -59,7 +59,8 @@
mvn verify -Dio-it-suite -pl sdks/java/io/file-based-io-tests
-DpkbLocation="path-to-pkb.py" \
- -DintegrationTestPipelineOptions='["––numberOfRecords=100000"]'
+ -DintegrationTestPipelineOptions='["––numberOfRecords=100000"]' \
+ -DfileBasedIoItClass=file-based IO IT class, eg. org.apache.beam.sdk.io.text.TextIOIT
For DirectRunner, please use -DforceDirectRunner=true argument
For other runners please check doc in BEAM-3060 and https://beam.apache.org/documentation/io/testing/
@@ -120,7 +121,7 @@
${pkbBeamRunnerOption}
-beam_it_module=sdks/java/io/file-based-io-tests
- -beam_it_class=org.apache.beam.sdk.io.text.TextIOIT
+ -beam_it_class=${fileBasedIoItClass}
-beam_it_options=${integrationTestPipelineOptions}
diff --git a/sdks/java/io/file-based-io-tests/src/test/java/org/apache/beam/sdk/io/text/TextIOIT.java b/sdks/java/io/file-based-io-tests/src/test/java/org/apache/beam/sdk/io/text/TextIOIT.java
index fc7bec690272..522febea9e59 100644
--- a/sdks/java/io/file-based-io-tests/src/test/java/org/apache/beam/sdk/io/text/TextIOIT.java
+++ b/sdks/java/io/file-based-io-tests/src/test/java/org/apache/beam/sdk/io/text/TextIOIT.java
@@ -59,6 +59,9 @@
* "--filenamePrefix=TEXTIOIT"
* ]'
*
+ *
+ * Please see 'sdks/java/io/file-based-io-tests/pom.xml' for instructions regarding
+ * running this test using Beam performance testing framework.
* */
@RunWith(JUnit4.class)
public class TextIOIT {
From e65cb9a6bef4a4e67745c13f90c9d976805b8be4 Mon Sep 17 00:00:00 2001
From: Dariusz Aniszewski
Date: Mon, 20 Nov 2017 17:25:42 +0100
Subject: [PATCH 4/4] modified TextIOIT description to invoke only this test
---
.../src/test/java/org/apache/beam/sdk/io/text/TextIOIT.java | 4 +++-
1 file changed, 3 insertions(+), 1 deletion(-)
diff --git a/sdks/java/io/file-based-io-tests/src/test/java/org/apache/beam/sdk/io/text/TextIOIT.java b/sdks/java/io/file-based-io-tests/src/test/java/org/apache/beam/sdk/io/text/TextIOIT.java
index 522febea9e59..d741f95df5fc 100644
--- a/sdks/java/io/file-based-io-tests/src/test/java/org/apache/beam/sdk/io/text/TextIOIT.java
+++ b/sdks/java/io/file-based-io-tests/src/test/java/org/apache/beam/sdk/io/text/TextIOIT.java
@@ -54,7 +54,9 @@
*
* Run this test using the command below. Pass in connection information via PipelineOptions:
*
- * mvn -e -Pio-it verify -pl sdks/java/io/file-based-io-tests -DintegrationTestPipelineOptions='[
+ * mvn -e -Pio-it verify -pl sdks/java/io/file-based-io-tests
+ * -Dit.test=org.apache.beam.sdk.io.text.TextIOIT
+ * -DintegrationTestPipelineOptions='[
* "--numberOfRecords=100000",
* "--filenamePrefix=TEXTIOIT"
* ]'