diff --git a/cloudbuild_CI.yaml b/cloudbuild_CI.yaml index 4375decde..c7bbf2ab3 100644 --- a/cloudbuild_CI.yaml +++ b/cloudbuild_CI.yaml @@ -50,6 +50,7 @@ steps: - '--run_unit_tests' - '--run_presubmit_tests' - '--run_preprocessor_tests' + - '--run_bq_to_vcf_tests' id: 'test-gcp-variant-transforms-docker' # By default the script uses a GS bucket of gcp-variant-transforms-test # project. For other projects we should either use the following option diff --git a/docs/bigquery_to_vcf.md b/docs/bigquery_to_vcf.md index b258736d8..d01d4ffac 100644 --- a/docs/bigquery_to_vcf.md +++ b/docs/bigquery_to_vcf.md @@ -89,6 +89,11 @@ In addition, the following optional flags are provided: You may change this flag if you have a dataset that is very dense and variants in each shard cannot be sorted in memory. +For large datasets, using +[Cloud Dataflow Shuffle](https://cloud.google.com/dataflow/service/dataflow-service-desc#cloud-dataflow-shuffle) +can speed up the pipeline, by specifying the parameter: +`--experiments shuffle_mode=service`. + ### Running from github In addition to using the docker image, you may run the pipeline directly from