diff --git a/.test-infra/jenkins/job_PostCommit_Java_Tpcds_Spark.groovy b/.test-infra/jenkins/job_PostCommit_Java_Tpcds_Spark.groovy new file mode 100644 index 0000000000000..859d5110cffd3 --- /dev/null +++ b/.test-infra/jenkins/job_PostCommit_Java_Tpcds_Spark.groovy @@ -0,0 +1,51 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import CommonJobProperties as commonJobProperties +import InfluxDBCredentialsHelper + +PhraseTriggeringPostCommitBuilder.postCommitJob('beam_PostCommit_Java_Tpcds_Spark', + 'Run Spark Runner Tpcds Tests', 'Spark Runner Tpcds Tests', this) { + + description('Runs the Tpcds suite on the Spark runner against a Pull Request, on demand.') + + // Set common parameters. + commonJobProperties.setTopLevelMainJobProperties(delegate, 'master', 240, true, 'beam-perf') + InfluxDBCredentialsHelper.useCredentials(delegate) + + // Gradle goals for this job. + steps { + shell('echo "*** RUN TPCDS IN BATCH MODE USING SPARK RUNNER ***"') + gradle { + rootBuildScriptDir(commonJobProperties.checkoutDir) + tasks(':sdks:java:testing:tpcds:run') + commonJobProperties.setGradleSwitches(delegate) + switches('-Ptpcds.runner=":runners:spark:2"' + + ' -Ptpcds.args="' + + [ + '--dataSize=1GB', + '--sourceType=PARQUET', + '--dataDirectory=gs://beam-tpcds/datasets/parquet/partitioned', + '--resultsDirectory=gs://beam-tpcds/results/', + '--tpcParallel=1', + '--runner=SparkRunner', + '--queries=3' + ].join(' ')) + } + } + } diff --git a/sdks/java/testing/tpcds/src/main/java/org/apache/beam/sdk/tpcds/TpcdsParametersReader.java b/sdks/java/testing/tpcds/src/main/java/org/apache/beam/sdk/tpcds/TpcdsParametersReader.java index 4a2ed544c96a9..09f6376344ff8 100644 --- a/sdks/java/testing/tpcds/src/main/java/org/apache/beam/sdk/tpcds/TpcdsParametersReader.java +++ b/sdks/java/testing/tpcds/src/main/java/org/apache/beam/sdk/tpcds/TpcdsParametersReader.java @@ -30,7 +30,8 @@ public class TpcdsParametersReader { /** The data sizes that have been supported. */ private static final Set supportedDataSizes = - Stream.of("1G", "10G", "100G").collect(Collectors.toCollection(HashSet::new)); + Stream.of("1G", "1GB", "10G", "10GB", "100G", "100GB") + .collect(Collectors.toCollection(HashSet::new)); private static final String QUERY_PREFIX = "query";