From c787e317cf42b21e41cccdf4f2abfeb28f5ab7e3 Mon Sep 17 00:00:00 2001 From: Dariusz Aniszewski Date: Tue, 7 Nov 2017 17:25:55 +0100 Subject: [PATCH 1/4] Dataflow and PerfKit profiles; hash for 100.000.000 lines --- sdks/java/io/file-based-io-tests/pom.xml | 106 ++++++++++++++++++ .../org/apache/beam/sdk/io/text/TextIOIT.java | 5 +- 2 files changed, 109 insertions(+), 2 deletions(-) diff --git a/sdks/java/io/file-based-io-tests/pom.xml b/sdks/java/io/file-based-io-tests/pom.xml index ae7527c79ecf..1d1ff6b2ad1b 100644 --- a/sdks/java/io/file-based-io-tests/pom.xml +++ b/sdks/java/io/file-based-io-tests/pom.xml @@ -31,6 +31,112 @@ Apache Beam :: SDKs :: Java :: IO :: File-based-io-tests Integration tests for reading/writing using file-based sources/sinks. + + + + dataflow-runner + + + integrationTestRunner + dataflow + + + + + org.apache.beam + beam-runners-google-cloud-dataflow-java + runtime + + + + + + + io-it-suite + + io-it-suite + + + + ${project.parent.parent.parent.parent.basedir} + + + + + org.codehaus.gmaven + groovy-maven-plugin + ${groovy-maven-plugin.version} + + + find-supported-python-for-compile + initialize + + execute + + + ${beamRootProjectDir}/sdks/python/findSupportedPython.groovy + + + + + + + org.codehaus.mojo + exec-maven-plugin + ${maven-exec-plugin.version} + + + verify + + exec + + + + + ${python.interpreter.bin} + + ${pkbLocation} + -benchmarks=beam_integration_benchmark + -beam_it_profile=io-it + -beam_location=${beamRootProjectDir} + -beam_prebuilt=true + -beam_sdk=java + + ${pkbBeamRunnerProfile} + ${pkbBeamRunnerOption} + + -beam_it_module=sdks/java/io/file-based-io-tests + -beam_it_class=org.apache.beam.sdk.io.text.TextIOIT + + -beam_it_options=${integrationTestPipelineOptions} + + + + + + org.apache.maven.plugins + maven-surefire-plugin + ${surefire-plugin.version} + + true + + + + + + + org.apache.beam diff --git a/sdks/java/io/file-based-io-tests/src/test/java/org/apache/beam/sdk/io/text/TextIOIT.java b/sdks/java/io/file-based-io-tests/src/test/java/org/apache/beam/sdk/io/text/TextIOIT.java index ecab1d864971..fc7bec690272 100644 --- a/sdks/java/io/file-based-io-tests/src/test/java/org/apache/beam/sdk/io/text/TextIOIT.java +++ b/sdks/java/io/file-based-io-tests/src/test/java/org/apache/beam/sdk/io/text/TextIOIT.java @@ -54,7 +54,7 @@ * *

Run this test using the command below. Pass in connection information via PipelineOptions: *

- *  mvn -e -Pio-it verify -pl sdks/java/io/text -DintegrationTestPipelineOptions='[
+ *  mvn -e -Pio-it verify -pl sdks/java/io/file-based-io-tests -DintegrationTestPipelineOptions='[
  *  "--numberOfRecords=100000",
  *  "--filenamePrefix=TEXTIOIT"
  *  ]'
@@ -107,7 +107,8 @@ public void writeThenReadAll() {
   private static String getExpectedHashForLineCount(Long lineCount) {
     Map expectedHashes = ImmutableMap.of(
         100_000L, "4c8bb3b99dcc59459b20fefba400d446",
-        1_000_000L, "9796db06e7a7960f974d5a91164afff1"
+        1_000_000L, "9796db06e7a7960f974d5a91164afff1",
+        100_000_000L, "6ce05f456e2fdc846ded2abd0ec1de95"
     );
 
     String hash = expectedHashes.get(lineCount);

From 5dbd278c440ad877a58ffb4c84098bcc33e94b10 Mon Sep 17 00:00:00 2001
From: Dariusz Aniszewski 
Date: Fri, 17 Nov 2017 18:09:01 +0100
Subject: [PATCH 2/4] modified io-it-suite description

---
 sdks/java/io/file-based-io-tests/pom.xml | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/sdks/java/io/file-based-io-tests/pom.xml b/sdks/java/io/file-based-io-tests/pom.xml
index 1d1ff6b2ad1b..5b97ec07f19e 100644
--- a/sdks/java/io/file-based-io-tests/pom.xml
+++ b/sdks/java/io/file-based-io-tests/pom.xml
@@ -60,6 +60,9 @@
             mvn verify -Dio-it-suite -pl sdks/java/io/file-based-io-tests
                 -DpkbLocation="path-to-pkb.py" \
                 -DintegrationTestPipelineOptions='["––numberOfRecords=100000"]'
+
+            For DirectRunner, please use -DforceDirectRunner=true argument
+            For other runners please check doc in BEAM-3060 and https://beam.apache.org/documentation/io/testing/
         -->
         
             io-it-suite

From 3aee69e3cb672292e365a905ae9ef4cec840db03 Mon Sep 17 00:00:00 2001
From: Dariusz Aniszewski 
Date: Mon, 20 Nov 2017 17:18:26 +0100
Subject: [PATCH 3/4] parametrizable PerfKit profile

---
 sdks/java/io/file-based-io-tests/pom.xml                     | 5 +++--
 .../src/test/java/org/apache/beam/sdk/io/text/TextIOIT.java  | 3 +++
 2 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/sdks/java/io/file-based-io-tests/pom.xml b/sdks/java/io/file-based-io-tests/pom.xml
index 5b97ec07f19e..6c3a7e3718b6 100644
--- a/sdks/java/io/file-based-io-tests/pom.xml
+++ b/sdks/java/io/file-based-io-tests/pom.xml
@@ -59,7 +59,8 @@
 
             mvn verify -Dio-it-suite -pl sdks/java/io/file-based-io-tests
                 -DpkbLocation="path-to-pkb.py" \
-                -DintegrationTestPipelineOptions='["––numberOfRecords=100000"]'
+                -DintegrationTestPipelineOptions='["––numberOfRecords=100000"]' \
+                -DfileBasedIoItClass=file-based IO IT class, eg. org.apache.beam.sdk.io.text.TextIOIT
 
             For DirectRunner, please use -DforceDirectRunner=true argument
             For other runners please check doc in BEAM-3060 and https://beam.apache.org/documentation/io/testing/
@@ -120,7 +121,7 @@
                                 ${pkbBeamRunnerOption}
                                 
                                 -beam_it_module=sdks/java/io/file-based-io-tests
-                                -beam_it_class=org.apache.beam.sdk.io.text.TextIOIT
+                                -beam_it_class=${fileBasedIoItClass}
                                 
                                 -beam_it_options=${integrationTestPipelineOptions}
                             
diff --git a/sdks/java/io/file-based-io-tests/src/test/java/org/apache/beam/sdk/io/text/TextIOIT.java b/sdks/java/io/file-based-io-tests/src/test/java/org/apache/beam/sdk/io/text/TextIOIT.java
index fc7bec690272..522febea9e59 100644
--- a/sdks/java/io/file-based-io-tests/src/test/java/org/apache/beam/sdk/io/text/TextIOIT.java
+++ b/sdks/java/io/file-based-io-tests/src/test/java/org/apache/beam/sdk/io/text/TextIOIT.java
@@ -59,6 +59,9 @@
  *  "--filenamePrefix=TEXTIOIT"
  *  ]'
  * 
+ *

+ *

Please see 'sdks/java/io/file-based-io-tests/pom.xml' for instructions regarding + * running this test using Beam performance testing framework.

* */ @RunWith(JUnit4.class) public class TextIOIT { From e65cb9a6bef4a4e67745c13f90c9d976805b8be4 Mon Sep 17 00:00:00 2001 From: Dariusz Aniszewski Date: Mon, 20 Nov 2017 17:25:42 +0100 Subject: [PATCH 4/4] modified TextIOIT description to invoke only this test --- .../src/test/java/org/apache/beam/sdk/io/text/TextIOIT.java | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/sdks/java/io/file-based-io-tests/src/test/java/org/apache/beam/sdk/io/text/TextIOIT.java b/sdks/java/io/file-based-io-tests/src/test/java/org/apache/beam/sdk/io/text/TextIOIT.java index 522febea9e59..d741f95df5fc 100644 --- a/sdks/java/io/file-based-io-tests/src/test/java/org/apache/beam/sdk/io/text/TextIOIT.java +++ b/sdks/java/io/file-based-io-tests/src/test/java/org/apache/beam/sdk/io/text/TextIOIT.java @@ -54,7 +54,9 @@ * *

Run this test using the command below. Pass in connection information via PipelineOptions: *

- *  mvn -e -Pio-it verify -pl sdks/java/io/file-based-io-tests -DintegrationTestPipelineOptions='[
+ *  mvn -e -Pio-it verify -pl sdks/java/io/file-based-io-tests
+ *  -Dit.test=org.apache.beam.sdk.io.text.TextIOIT
+ *  -DintegrationTestPipelineOptions='[
  *  "--numberOfRecords=100000",
  *  "--filenamePrefix=TEXTIOIT"
  *  ]'