diff --git a/.gitattributes b/.gitattributes
index 49c97c85f..9fcf38aa0 100644
--- a/.gitattributes
+++ b/.gitattributes
@@ -1,6 +1,3 @@
-# This file can always be added to, line additions should never collide.
-CHANGELOG.MD merge=union
-
# These files are text and should be normalized (Convert crlf => lf)
*.scala text
*.MD text
diff --git a/.gitignore b/.gitignore
index f9bb0a4f5..21f16ea18 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,10 +1,19 @@
-.idea
-log
-target
+# common scala config
+*~
+.DS_Store
.artifactory
+.idea/*
+!/.idea/inspectionProfiles/
+.idea/inspectionProfiles/*
+!/.idea/inspectionProfiles/Project_Default.xml
+target
+
+# custom config
cromwell-executions
cromwell-test-executions
-cromwell-workflow-logs
cromwell-test-workflow-logs
+cromwell-workflow-logs
local-cromwell-executions
+log
native
+scripts/docker-compose-mysql/compose/mysql/data
diff --git a/.idea/inspectionProfiles/Project_Default.xml b/.idea/inspectionProfiles/Project_Default.xml
new file mode 100644
index 000000000..de31e99ed
--- /dev/null
+++ b/.idea/inspectionProfiles/Project_Default.xml
@@ -0,0 +1,6 @@
+
+
+
+
+
+
diff --git a/.pullapprove.yml b/.pullapprove.yml
index a1b902b1c..c26c94ba2 100644
--- a/.pullapprove.yml
+++ b/.pullapprove.yml
@@ -1,18 +1,28 @@
-approve_by_comment: true
-approve_regex: ':\+1:'
-reset_on_push: false
-author_approval: ignored
-reviewers:
+# enabling version 2 turns github reviews on by default
+version: 2
+group_defaults:
+ approve_by_comment:
+ enabled: true
+ approve_regex: ':\+1:'
+ reset_on_push:
+ enabled: false
+groups:
+ reviewers:
required: 2
- members:
- - cjllanwarne
+ github_reviews:
+ enabled: true
+ author_approval:
+ ignored: true
+ users:
- Horneth
- - scottfrazer
- - mcovarr
- - geoffjentry
- - kshakir
+ - cjllanwarne
- francares
- gauravs90
- - jainh
+ - geoffjentry
+ - jsotobroad
+ - katevoss
- kcibul
+ - kshakir
+ - mcovarr
- ruchim
+ - danbills
diff --git a/.travis.yml b/.travis.yml
index 5784eaf62..7b8dfaa2f 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -1,21 +1,51 @@
sudo: required
dist: trusty
+services:
+ - docker
language: scala
scala:
- - 2.11.8
+ - 2.12.2
jdk:
- oraclejdk8
+cache:
+ # md5deep - https://github.com/travis-ci/travis-ci/issues/3122
+ branch: md5deep
+ directories:
+ - $HOME/.ivy2/cache
+ - $HOME/.sbt/boot/
+before_cache:
+ # Tricks to avoid unnecessary cache updates
+ - find $HOME/.ivy2 -name "ivydata-*.properties" -delete
+ - find $HOME/.sbt -name "*.lock" -delete
+before_install:
+ # https://github.com/travis-ci/travis-ci/issues/7940#issuecomment-310759657
+ - sudo rm -f /etc/boto.cfg
env:
- # Setting this variable twice will cause the 'script' section to run twice with the respective env var invoked
- - BUILD_TYPE=sbt
- - BUILD_TYPE=centaurJes
- - BUILD_TYPE=centaurLocal
+ global:
+ - CENTAUR_BRANCH=develop
+ - INTEGRATION_TESTS_DIR=src/main/resources/integrationTestCases
+ matrix:
+ # Setting this variable twice will cause the 'script' section to run twice with the respective env var invoked
+ - BUILD_TYPE=sbt
+ - BUILD_TYPE=checkPublish
+ - BUILD_TYPE=centaurJes
+ - BUILD_TYPE=centaurLocal
+ - BUILD_TYPE=centaurTes
script:
- src/bin/travis/test.sh
after_success:
- - src/bin/travis/publishSnapshot.sh
+ - src/bin/travis/afterSuccess.sh
deploy:
provider: script
script: src/bin/travis/publishRelease.sh
on:
tags: true
+notifications:
+ slack:
+ rooms:
+ - secure: B5KYcnhk/ujAUWlHsjzP7ROLm6MtYhaGikdYf6JYINovhMbVKnZCTlZEy7rqT3L2T5uJ25iefD500VQGk1Gn7puQ1sNq50wqjzQaj20PWEiBwoWalcV/nKBcQx1TyFT13LJv8fbFnVPxFCkC3YXoHedx8qAhDs8GH/tT5J8XOC8=
+ template:
+ - "Build <%{build_url}|#%{build_number}> (<%{compare_url}|%{commit}>) of %{repository}@%{branch} by %{author} %{result} in %{duration}"
+ on_success: change
+ on_failure: change
+ on_pull_requests: false
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 3ff214c55..794a378b3 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,22 +1,531 @@
# Cromwell Change Log
-## 0.20
+## 29
-* The default per-upload bytes size for GCS is now the minumum 256K
-instead of 64M. There is also an undocumented config key
-`google.upload-buffer-bytes` that allows adjusting this internal value.
+### Breaking Changes
-* Updated Docker Hub hash retriever to parse json with [custom media
-types](https://github.com/docker/distribution/blob/05b0ab0/docs/spec/manifest-v2-1.md).
+* Request timeouts for HTTP requests on the REST API now return a 503 status code instead of 500. The response for a request timeout is no longer in JSON format.
+* The metadata endpoint no longer returns gzipped responses by default. This now needs to be explicitly requested with an `Accept-Encoding: gzip` header
-* Added a `/batch` submit endpoint that accepts a single wdl with
-multiple input files.
+* Command line usage has been extensively revised for Cromwell 29. Please see the
+[README](https://github.com/broadinstitute/cromwell#command-line-usage) for details.
-* The `/query` endpoint now supports querying by `id`, and submitting
-parameters as a HTTP POST.
+* The engine endpoints are now served under `/engine`. Previousely engine endpoints were available under
+`/api/engine`. Workflow endpoints are still served under `/api/workflows`. The setting `api.routeUnwrapped` has been
+retired at the same time.
-## 0.21
+* The response format of the [callcaching/diff](https://github.com/broadinstitute/cromwell#get-apiworkflowsversioncallcachingdiff) endpoint has been updated.
+
+### Cromwell Server
+
+* Cromwell now attempts to gracefully shutdown when running in server mode and receiving a `SIGINT` (`Ctrl-C`) or `SIGTERM` (`kill`) signal. This includes waiting for all pending Database writes before exiting.
+A detailed explanation and information about how to configure this feature can be found in the [Cromwell Wiki](https://github.com/broadinstitute/cromwell/wiki/DevZone#graceful-server-shutdown).
+
+## 28
+
+### Bug Fixes
+
+#### WDL write_* functions add a final newline
+
+The following WDL functions now add a newline after the final line of output (the previous behavior of not adding this
+newline was inadvertent):
+- `write_lines`
+- `write_map`
+- `write_object`
+- `write_objects`
+- `write_tsv`
+
+For example:
+
+```
+task writer {
+ Array[String] a = ["foo", "bar"]
+ command {
+ # used to output: "foo\nbar"
+ # now outputs: "foo\nbar\n"
+ cat write_lines(a)
+ }
+}
+```
+
+#### `ContinueWhilePossible`
+
+A workflow utilizing the WorkflowFailureMode Workflow Option `ContinueWhilePossible` will now successfully reach a terminal state once all runnable jobs have completed.
+#### `FailOnStderr`
+When `FailOnStderr` is set to false, Cromwell no longer checks for the existence of a stderr file for that task.
+
+### WDL Functions
+
+#### New functions: floor, ceil and round:
+
+Enables the `floor`, `ceil` and `round` functions in WDL to convert floating point numbers to integers.
+
+For example we can now use the size of an input file to influence the amount of memory the task is given. In the example below a 500MB input file will result in a request for a VM with 2GB of memory:
+
+```
+task foo {
+ File in_file
+ command { ... }
+ runtime {
+ docker: "..."
+ memory: ceil(size(in_file)) * 4
+ }
+}
+```
+
+### Call Caching
+
+* Hash values calculated by Cromwell for a call when call caching is enabled are now published to the metadata.
+It is published even if the call failed. However if the call is attempted multiple times (because it has been preempted for example),
+since hash values are strictly identical for all attempts, they will only be published in the last attempt section of the metadata for this call.
+If the hashes fail to be calculated, the reason is indicated in a `hashFailures` field in the `callCaching` section of the call metadata.
+*Important*: Hashes are not retroactively published to the metadata. Which means only workflows run on Cromwell 28+ will have hashes in their metadata.
+
+See the [README](https://github.com/broadinstitute/cromwell#get-apiworkflowsversionidmetadata) for an example metadata response.
+
+* New endpoint returning the hash differential for 2 calls.
+
+`GET /api/workflows/:version/callcaching/diff`
+
+See the [README](https://github.com/broadinstitute/cromwell#get-apiworkflowsversioncallcachingdiff) for more details.
+
+### Workflow Submission
+
+* The workflow submission parameters `wdlSource` and `wdlDependencies` have been deprecated in favor of `workflowSource` and
+`workflowDependencies` respectively. The older names are still supported in Cromwell 28 with deprecation warnings but will
+be removed in a future version of Cromwell.
+
+### Labels
+* A new `/labels` endpoint has been added to update labels for an existing workflow. See the [README](README.md#patch-apiworkflowsversionidlabels) for more information.
+* Label formatting requirements have been updated, please check the [README](README.md#label-format) for more detailed documentation.
+
+
+### JES Backend
+
+The JES backend now supports a `filesystems.gcs.caching.duplication-strategy` configuration entry.
+It can be set to specify the desired behavior of Cromwell regarding call outputs when a call finds a hit in the cache.
+The default value is `copy` which will copy all output files to the new call directory.
+A second value is allowed, `reference`, that will instead point to the original output files, without copying them.
+
+
+```hocon
+filesystems {
+ gcs {
+ auth = "application-default"
+
+ caching {
+ duplication-strategy = "reference"
+ }
+ }
+}
+```
+
+A placeholder file will be placed in the execution folder of the cached call to explain the absence of output files and point to the location of the original ones.
+
+
+### Metadata Write Batching
+
+Metadata write batching works the same as in previous versions of Cromwell, but the default batch size has been changed from 1 to 200. It's possible that 200 is too high in some environments, but 200 is more likely to be an appropriate value
+than the previous default.
+
+
+## 27
+
+### Migration
+
+* Call Caching has been improved in this version of Cromwell, specifically the time needed to determine whether or not a job can be cached
+ has drastically decreased. To achieve that the database schema has been modified and a migration is required in order to preserve the pre-existing cached jobs.
+ This migration is relatively fast compared to previous migrations. To get an idea of the time needed, look at the size of your `CALL_CACHING_HASH_ENTRY` table.
+ As a benchmark, it takes 1 minute for a table with 6 million rows.
+ The migration will only be executed on MySQL. Other databases will lose their previous cached jobs.
+ In order to run properly on MySQL, **the following flag needs to be adjusted**: https://dev.mysql.com/doc/refman/5.5/en/server-system-variables.html#sysvar_group_concat_max_len
+ The following query will give you a minimum to set the group_concat_max_len value to:
+
+ ```sql
+SELECT MAX(aggregated) as group_concat_max_len FROM
+ (
+ SELECT cche.CALL_CACHING_ENTRY_ID, SUM(LENGTH(CONCAT(cche.HASH_KEY, cche.HASH_VALUE))) AS aggregated
+ FROM CALL_CACHING_HASH_ENTRY cche
+ GROUP BY cche.CALL_CACHING_ENTRY_ID
+ ) aggregation
+ ```
+
+ Here is the SQL command to run to set the group_concat_max_len flag to the proper value:
+
+ ```sql
+SET GLOBAL group_concat_max_len = value
+ ```
+
+ Where `value` is replaced with the value you want to set it to.
+
+ Note that the migration will fail if the flag is not set properly.
+
+### Breaking Changes
+
+* The update to Slick 3.2 requires a database stanza to
+[switch](http://slick.lightbend.com/doc/3.2.0/upgrade.html#profiles-vs-drivers) from using `driver` to `profile`.
+
+```hocon
+database {
+ #driver = "slick.driver.MySQLDriver$" #old
+ profile = "slick.jdbc.MySQLProfile$" #new
+ db {
+ driver = "com.mysql.jdbc.Driver"
+ url = "jdbc:mysql://host/cromwell?rewriteBatchedStatements=true"
+ user = "user"
+ password = "pass"
+ connectionTimeout = 5000
+ }
+}
+```
+
+### Call Caching
+
+Cromwell now supports call caching with floating Docker tags (e.g. `docker: "ubuntu:latest"`). Note it is still considered
+a best practice to specify Docker images as hashes where possible, especially for production usages.
+
+Within a single workflow Cromwell will attempt to resolve all floating tags to the same Docker hash, even if Cromwell is restarted
+during the execution of a workflow. In call metadata the `docker` runtime attribute is now the same as the
+value that actually appeared in the WDL:
+
+```
+ "runtimeAttributes": {
+ "docker": "ubuntu:latest",
+ "failOnStderr": "false",
+ "continueOnReturnCode": "0"
+ }
+```
+
+Previous versions of Cromwell rewrote the `docker` value to the hash of the Docker image.
+
+There is a new call-level metadata value `dockerImageUsed` which captures the hash of the Docker image actually used to
+run the call:
+
+```
+ "dockerImageUsed": "library/ubuntu@sha256:382452f82a8bbd34443b2c727650af46aced0f94a44463c62a9848133ecb1aa8"
+```
+
+### Docker
+
+* The Docker section of the configuration has been slightly reworked
+An option to specify how a Docker hash should be looked up has been added. Two methods are available.
+ "local" will try to look for the image on the machine where cromwell is running. If it can't be found, Cromwell will try to `pull` the image and use the hash from the retrieved image.
+ "remote" will try to look up the image hash directly on the remote repository where the image is located (Docker Hub and GCR are supported)
+Note that the "local" option will require docker to be installed on the machine running cromwell, in order for it to call the docker CLI.
+* Adds hash lookup support for public [quay.io](https://quay.io/) images.
+
+### WDL Feature Support
+* Added support for the new WDL `basename` function. Allows WDL authors to get just the file name from a File (i.e. removing the directory path)
+* Allows coercion of `Map` objects into `Array`s of `Pair`s. This also allows WDL authors to directly scatter over WDL `Map`s.
+
+### Miscellaneous
+* Adds support for JSON file format for google service account credentials. As of Cromwell 27, PEM credentials for PAPI are deprecated and support might be removed in a future version.
+
+```
+google {
+
+ application-name = "cromwell"
+
+ auths = [
+ {
+ name = "service-account"
+ scheme = "service_account"
+ json-file = "/path/to/file.json"
+ }
+ ]
+}
+```
+
+### General Changes
+
+* The `/query` endpoint now supports querying by `label`. See the [README](README.md#get-apiworkflowsversionquery) for more information.
+* The `read_X` standard library functions limit accepted filesizes. These differ by type, e.g. read_bool has a smaller limit than read_string. See reference.conf for default settings.
+
+## 26
+
+### Breaking Changes
+
+* Failure metadata for calls and workflows was being displayed inconsistently, with different formats depending on the originating Cromwell version. Failures will now always present as an array of JSON objects each representing a failure. Each failure will have a message and a causedBy field. The causedBy field will be an array of similar failure objects. An example is given below:
+```
+failures: [{
+ message: "failure1",
+ causedBy: [{
+ message: "cause1",
+ causedBy: []
+ }, {
+ message: "cause2",
+ causedBy: []
+ }]
+ }, {
+ message: "failure2",
+ causedBy: []
+}]
+```
+
+### Additional Upgrade Time
+
+* Upgrading to Cromwell 26 will take additional time due to the migration of failure metadata. Cromwell will automatically run a database query during the upgrade which appears to be roughly linear to the number of rows in the METADATA_ENTRY table. You can estimate upgrade time using the following equation: `time to migrate (in seconds) ~= (rows in METADATA_ENTRY) / 65000` Note that due to differences in hardware and database speed, this is only a rough estimate.
+
+### Config Changes
+
+* Added a configuration option under `system.io` to throttle the number of I/O queries that Cromwell makes, as well as configure retry parameters.
+ This is mostly useful for the JES backend and should be updated to match the GCS quota available for the project.
+
+```
+system.io {
+ # Global Throttling - This is mostly useful for GCS and can be adjusted to match
+ # the quota availble on the GCS API
+ number-of-requests = 100000
+ per = 100 seconds
+
+ # Number of times an I/O operation should be attempted before giving up and failing it.
+ number-of-attempts = 5
+}
+```
+
+## 25
+
+### External Contributors
+* A special thank you to @adamstruck, @antonkulaga and @delocalizer for their contributions to Cromwell.
+### Breaking Changes
+
+* Metadata keys for call caching are changed. All call caching keys are now in a `callCaching` stanza. `Call cache read result` has moved here and is now `result`. The `allowResultReuse` and `effectiveCallCachingMode` have moved here. The `hit` boolean is a simple indication of whether or not it was a hit, with no additional information. An example using the new format is:
+```
+"callCaching": {
+ "hit": false,
+ "effectiveCallCachingMode": "ReadAndWriteCache",
+ "result": "Cache Miss",
+ "allowResultReuse": true
+}
+```
+
+### Config Changes
+
+* Added a field `insert-batch-size` to the `database` stanza which defines how many values from a batch insert will be processed at a time. This value defaults to 2000.
+* Moved the config value `services.MetadataService.metadata-summary-refresh-interval` to `services.MetadataService.config.metadata-summary-refresh-interval`
+* Added ability to override the default zone(s) used by JES via the config structure by setting `genomics.default-zones` in the JES configuration
+* The cromwell server TCP binding timeout is now configurable via the config key `webservice.binding-timeout`, defaulted
+ to the previous value `5s` (five seconds) via the reference.conf.
+* For MySQL users, a massive scalability improvement via batched DB writing of internal metadata events. Note that one must add `rewriteBatchedStatements=true` to their JDBC URL in their config in order to take advantage of this
+
+### General Changes
+
+* Cromwell's WDL parser now recognizes empty array literals correctly, e.g. `Array[String] emptyArray = []`.
+* Cromwell now applies default labels automatically to JES pipeline runs.
+* Added support for new WDL functions:
+ * `length: (Array[X]) => Integer` - report the length of the specified array
+ * `prefix: (String, Array[X]) => Array[String]` - generate an array consisting of each element of the input array prefixed
+ by a specified `String`. The input array can have elements of any primitive type, the return array will always have
+ type `Array[String]`.
+ * `defined: (Any) => Boolean` - Will return false if the provided value is an optional that is not defined. Returns true in all other cases.
+* Cromwell's Config (Shared Filesystem) backend now supports invocation of commands which run in a Docker image as a non-root user.
+ The non-root user could either be the default user for a given Docker image (e.g. specified in a Dockerfile via a `USER` directive),
+ or the Config backend could pass an optional `"-u username"` as part of the `submit-docker` command.
+* In some cases the SFS backend, used for Local, SGE, etc., coerced `WdlFile` to `WdlString` by using `.toUri`. This
+resulted in strings prepended with `file:///path/to/file`. Now absolute file paths will not contain the uri scheme.
+* Launch jobs on servers that support the GA4GH Task Execution Schema using the TES backend.
+* **Call caching: Cromwell will no longer try to use the cache for WDL tasks that contain a floating docker tag.**
+ Call caching will still behave the same for tasks having a docker image with a specific hash.
+ See https://github.com/broadinstitute/cromwell#call-caching-docker-tags for more details.
+* Added docker hash lookup. Cromwell will try to lookup the hash for a docker image with a floating tag, and use that hash when executing the job.
+ This will be reflected in the metadata where the docker runtime attribute will contains the hash that was used.
+ If Cromwell is unable to lookup the docker hash, the job will be run with the original user defined floating tag.
+ Cromwell is currently able to lookup public and private docker hashes for images on Docker Hub and Google Container Engine for job running on the JES backend.
+ For other backends, cromwell is able to lookup public docker hashes for Docker Hub and Google Container Engine.
+ See https://github.com/broadinstitute/cromwell#call-caching-docker-tags for more details.
+
+### Database schema changes
+* Added CUSTOM_LABELS as a field of WORKFLOW_STORE_ENTRY, to store workflow store entries.
+
+## 24
+
+* When emitting workflow outputs to the Cromwell log only the first 1000 characters per output will be printed
+* Added support for conditional (`if`) statements.
+* Globs for Shared File System (SFS) backends, such as local or SGE, now use bash globbing instead of Java globbing, consistent with the JES backend.
+
+## 23
+
+* The `meta` and `parameter_meta` blocks are now valid within `workflow` blocks, not just `task`
+* The JES backend configuration now has an option `genomics-api-queries-per-100-seconds` to help tune the rate of batch polling against the JES servers. Users with quotas larger than default should make sure to set this value.
+* Added an option `call-caching.invalidate-bad-cache-results` (default: `true`). If true, Cromwell will invalidate cached results which have failed to copy as part of a cache hit.
+* Timing diagrams and metadata now receive more fine grained workflow states between submission and Running.
+* Support for the Pair WDL type (e.g. `Pair[Int, File] floo = (3, "gs://blar/blaz/qlux.txt")`)
+* Added support for new WDL functions:
+ * `zip: (Array[X], Array[Y]) => Array[Pair[X, Y]]` - align items in the two arrays by index and return them as WDL pairs
+ * `cross: (Array[X], Array[Y]) => Array[Pair[X, Y]]` - create every possible pair from the two input arrays and return them all as WDL pairs
+ * `transpose: (Array[Array[X]]) => Array[Array[X]]` compute the matrix transpose for a 2D array. Assumes each inner array has the same length.
+* By default, `system.abort-jobs-on-terminate` is false when running `java -jar cromwell.jar server`, and true when running `java -jar cromwell.jar run `.
+* Enable WDL imports when running in Single Workflow Runner Mode.
+* Both batch and non-batch REST workflow submissions now require a multipart/form-data encoded body.
+* Support for sub workflows (see [Annex A](#annex-a---workflow-outputs))
+* Enable WDL imports when running in Single Workflow Runner Mode as well as Server Mode
+* Support for WDL imports through an additional imports.zip parameter
+* Support for sub workflows
+* Corrected file globbing in JES to correctly report all generated files. Additionally, file globbing in JES now uses bash-style glob syntax instead of python style glob syntax
+* Support declarations as graph nodes
+* Added the ability to override the default service account that the compute VM is started with via the configuration option `JES.config.genomics.compute-service-account` or through the workflow options parameter `google_compute_service_account`. More details can be found in the README.md
+* Fix bugs related to the behavior of Cromwell in Single Workflow Runner Mode. Cromwell will now exit once a workflow completes in Single Workflow Runner Mode. Additionally, when restarting Cromwell in Single Workflow Runner Mode, Cromwell will no longer restart incomplete workflows from a previous session.
+
+### Annex A - Workflow outputs
+
+The WDL specification has changed regarding [workflow outputs](https://github.com/broadinstitute/wdl/blob/develop/SPEC.md#outputs) to accommodate sub workflows.
+This change is backward compatible in terms of runnable WDLs (WDL files using the deprecated workflow outputs syntax will still run the same).
+The only visible change lies in the metadata (as well as the console output in single workflow mode, when workflow outputs are printed out at the end of a successful workflow).
+
+TL;DR Unless you are parsing or manipulating the "key" by which workflow outputs are referenced in the metadata (and/or the console output for single workflow mode), you can skip the following explanation.
+
+*Metadata Response*
+```
+{
+ ...
+ outputs {
+ "task_output_1": "hello",
+ "task_output_2": "world"
+ ^
+ If you don't manipulate this part of the metadata, then skip this section
+ }
+}
+```
+
+In order to maintain backward compatibility, workflow outputs expressed with the deprecated syntax are "expanded" to the new syntax. Here is an example:
+
+```
+task t {
+ command {
+ #do something
+ }
+ output {
+ String out1 = "hello"
+ String out2 = "world"
+ }
+}
+```
+
+```
+ workflow old_syntax {
+ call t
+ output {
+ t.*
+ }
+ }
+```
+
+```
+ workflow new_syntax {
+ call t
+ output {
+ String wf_out1 = t.out1
+ String wf_out2 = t.out2
+ }
+ }
+```
+
+The new syntax allows for type checking of the outputs as well as expressions. It also allows for explicitly naming to the outputs.
+The old syntax doesn't give the ability to name workflow outputs. For consistency reasons, Cromwell will generate a "new syntax" workflow output for each task output, and name them.
+Their name will be generated using their FQN, which would give
+
+```
+output {
+ String w.t.out1 = t.out1
+ String w.t.out2 = t.out2
+}
+```
+
+However as the FQN separator is `.`, the name itself cannot contain any `.`.
+For that reason, `.` are replaced with `_` :
+
+*Old syntax expanded to new syntax*
+```
+output {
+ String w_t_out1 = t.out1
+ String w_t_out2 = t.out2
+}
+```
+
+The consequence is that the workflow outputs section of the metadata for `old_syntax` would previously look like
+
+ ```
+ outputs {
+ "w.t.out1": "hello",
+ "w.t.out2": "hello"
+ }
+ ```
+
+but it will now look like
+
+```
+ outputs {
+ "w_t_out1": "hello",
+ "w_t_out2": "hello"
+ }
+```
+
+The same applies for the console output of a workflow run in single workflow mode.
+
+
+## 0.22
+
+* Improved retries for Call Caching and general bug fixes.
+* Users will experience better scalability of status polling for Google JES.
+* Now there are configurable caching strategies for a SharedFileSystem backend (i.e. Local, SFS) in the backend's stanza:
+ See below for detailed descriptions of each configurable key.
+
+```
+backend {
+ ...
+ providers {
+ SFS_BackendName {
+ actor-factory = ...
+ config {
+ ...
+ filesystems {
+ local {
+ localization: [
+ ...
+ ]
+ caching {
+ duplication-strategy: [
+ "hard-link", "soft-link", "copy"
+ ]
+ # Possible values: file, path
+ # "file" will compute an md5 hash of the file content.
+ # "path" will compute an md5 hash of the file path. This strategy will only be effective if the duplication-strategy (above) is set to "soft-link",
+ # in order to allow for the original file path to be hashed.
+ hashing-strategy: "file"
+
+ # When true, will check if a sibling file with the same name and the .md5 extension exists, and if it does, use the content of this file as a hash.
+ # If false or the md5 does not exist, will proceed with the above-defined hashing strategy.
+ check-sibling-md5: false
+ }
+```
+* Multiple Input JSON files can now be submitted in server mode through the existing submission endpoint: /api/workflows/:version.
+ This endpoint accepts a POST request with a multipart/form-data encoded body. You can now include multiple keys for workflow inputs.
+
+ Each key below can contain an optional JSON file of the workflow inputs. A skeleton file can be generated from wdltool using the "inputs" subcommand.
+ NOTE: In case of key conflicts between multiple JSON files, higher values of x in workflowInputs_x override lower values. For example, an input
+ specified in workflowInputs_3 will override an input with the same name that was given in workflowInputs or workflowInputs_2. Similarly, an input
+ specified in workflowInputs_5 will override an input with the same name in any other input file.
+
+ workflowInputs
+ workflowInputs_2
+ workflowInputs_3
+ workflowInputs_4
+ workflowInputs_5
+
+* You can now limit the number of concurrent jobs for a backend by specifying the following option in the backend's config stanza:
+```
+backend {
+ ...
+ providers {
+ BackendName {
+ actor-factory = ...
+ config {
+ concurrent-job-limit = 5
+```
+
+
+## 0.21
* Warning: Significant database updates when you switch from version 0.19 to 0.21 of Cromwell.
There may be a long wait period for the migration to finish for large databases.
@@ -71,7 +580,7 @@ task {
command {
echo "I'm private !"
}
-
+
runtime {
docker: "ubuntu:latest"
noAddress: true
@@ -94,7 +603,7 @@ passed absolute paths for input `File`s.
* Override the default database configuration by setting the keys
`database.driver`, `database.db.driver`, `database.db.url`, etc.
* Override the default database configuration by setting the keys
-`database.driver`, `database.db.driver`, `database.db.url`, etc.
+`database.driver`, `database.db.driver`, `database.db.url`, etc.
For example:
```
@@ -111,3 +620,18 @@ database {
}
```
+## 0.20
+
+* The default per-upload bytes size for GCS is now the minimum 256K
+instead of 64M. There is also an undocumented config key
+`google.upload-buffer-bytes` that allows adjusting this internal value.
+
+* Updated Docker Hub hash retriever to parse json with [custom media
+types](https://github.com/docker/distribution/blob/05b0ab0/docs/spec/manifest-v2-1.md).
+
+* Added a `/batch` submit endpoint that accepts a single wdl with
+multiple input files.
+
+* The `/query` endpoint now supports querying by `id`, and submitting
+parameters as a HTTP POST.
+
diff --git a/NOTICE b/NOTICE
deleted file mode 100644
index 15d3c2b54..000000000
--- a/NOTICE
+++ /dev/null
@@ -1,4 +0,0 @@
-cromwell.webservice/PerRequest.scala (https://github.com/NET-A-PORTER/spray-actor-per-request)
-is distributed with this software under the Apache License, Version 2.0 (see the LICENSE-ASL file). In accordance
-with that license, that software comes with the following notices:
- Copyright (C) 2011-2012 Ian Forsey
diff --git a/README.md b/README.md
index 3c5efc009..97040254e 100644
--- a/README.md
+++ b/README.md
@@ -18,37 +18,43 @@ A [Workflow Management System](https://en.wikipedia.org/wiki/Workflow_management
* [Installing](#installing)
* [Upgrading from 0.19 to 0.21](#upgrading-from-019-to-021)
* [Command Line Usage](#command-line-usage)
- * [run](#run)
- * [server](#server)
* [Getting Started with WDL](#getting-started-with-wdl)
+ * [WDL Support](#wdl-support)
* [Configuring Cromwell](#configuring-cromwell)
* [Workflow Submission](#workflow-submission)
* [Database](#database)
* [SIGINT abort handler](#sigint-abort-handler)
+* [Security](#security)
* [Backends](#backends)
* [Backend Filesystems](#backend-filesystems)
* [Shared Local Filesystem](#shared-local-filesystem)
* [Google Cloud Storage Filesystem](#google-cloud-storage-filesystem)
* [Local Backend](#local-backend)
+ * [Google JES Backend](#google-jes-backend)
+ * [Configuring Google Project](#configuring-google-project)
+ * [Configuring Authentication](#configuring-authentication)
+ * [Application Default Credentials](#application-default-credentials)
+ * [Service Account](#service-account)
+ * [Refresh Token](#refresh-token)
+ * [Docker](#docker)
+ * [Monitoring](#monitoring)
+ * [GA4GH TES Backend](#ga4gh-tes-backend)
+ * [Configuring](#configuring)
+ * [Supported File Systems](#supported-file-systems)
+ * [Docker](#docker)
+ * [CPU, Memory and Disk](#cpu-memory-and-disk)
* [Sun GridEngine Backend](#sun-gridengine-backend)
* [HtCondor Backend](#htcondor-backend)
* [Caching configuration](#caching-configuration)
* [Docker](#docker)
* [CPU, Memory and Disk](#cpu-memory-and-disk)
+ * [Native Specifications](#native-specifications)
* [Spark Backend](#spark-backend)
* [Configuring Spark Project](#configuring-spark-project)
* [Configuring Spark Master and Deploy Mode](#configuring-spark-master-and-deploy-mode)
* [Spark runtime attributes](#spark-runtime-attributes)
* [Spark Environment](#spark-environment)
* [Sample Wdl](#sample-wdl)
- * [Google JES Backend](#google-jes-backend)
- * [Configuring Google Project](#configuring-google-project)
- * [Configuring Authentication](#configuring-authentication)
- * [Application Default Credentials](#application-default-credentials)
- * [Service Account](#service-account)
- * [Refresh Token](#refresh-token)
- * [Docker](#docker)
- * [Monitoring](#monitoring)
* [Runtime Attributes](#runtime-attributes)
* [Specifying Default Values](#specifying-default-values)
* [continueOnReturnCode](#continueonreturncode)
@@ -62,13 +68,24 @@ A [Workflow Management System](https://en.wikipedia.org/wiki/Workflow_management
* [preemptible](#preemptible)
* [Logging](#logging)
* [Workflow Options](#workflow-options)
+* [Labels](#labels)
+ * [Custom Labels File](#custom-labels-file)
+ * [Label Format](#label-format)
* [Call Caching](#call-caching)
+ * [Configuring Call Caching](#configuring-call-caching)
+ * [Call Caching Workflow Options](#call-caching-workflow-options)
+ * [Local Filesystem Options](#local-filesystem-options)
+* [Imports](#imports)
+* [Sub Workflows](#sub-workflows)
+ * [Execution](#execution)
+ * [Metadata](#metadata)
* [REST API](#rest-api)
* [REST API Versions](#rest-api-versions)
* [POST /api/workflows/:version](#post-apiworkflowsversion)
* [POST /api/workflows/:version/batch](#post-apiworkflowsversionbatch)
* [GET /api/workflows/:version/query](#get-apiworkflowsversionquery)
* [POST /api/workflows/:version/query](#post-apiworkflowsversionquery)
+ * [PATCH /api/workflows/:version/:id/labels](#patch-apiworkflowsversionidlabels)
* [GET /api/workflows/:version/:id/status](#get-apiworkflowsversionidstatus)
* [GET /api/workflows/:version/:id/outputs](#get-apiworkflowsversionidoutputs)
* [GET /api/workflows/:version/:id/timing](#get-apiworkflowsversionidtiming)
@@ -76,7 +93,9 @@ A [Workflow Management System](https://en.wikipedia.org/wiki/Workflow_management
* [GET /api/workflows/:version/:id/metadata](#get-apiworkflowsversionidmetadata)
* [POST /api/workflows/:version/:id/abort](#post-apiworkflowsversionidabort)
* [GET /api/workflows/:version/backends](#get-apiworkflowsversionbackends)
- * [GET /api/workflows/:version/stats](#get-apiworkflowsversionstats)
+ * [GET /api/workflows/:version/callcaching/diff](#get-apiworkflowsversioncallcachingdiff)
+ * [GET /engine/:version/stats](#get-engineversionstats)
+ * [GET /engine/:version/version](#get-engineversionversion)
* [Error handling](#error-handling)
* [Developer](#developer)
* [Generating table of contents on Markdown files](#generating-table-of-contents-on-markdown-files)
@@ -101,13 +120,13 @@ There is a [Cromwell gitter channel](https://gitter.im/broadinstitute/cromwell)
The following is the toolchain used for development of Cromwell. Other versions may work, but these are recommended.
-* [Scala 2.11.7](http://www.scala-lang.org/news/2.11.7/)
-* [SBT 0.13.8](https://github.com/sbt/sbt/releases/tag/v0.13.8)
+* [Scala 2.12.2](http://www.scala-lang.org/news/2.12.1#scala-212-notes)
+* [SBT 0.13.12](https://github.com/sbt/sbt/releases/tag/v0.13.12)
* [Java 8](http://www.oracle.com/technetwork/java/javase/overview/java8-2100321.html)
# Building
-`sbt assembly` will build a runnable JAR in `target/scala-2.11/`
+`sbt assembly` will build a runnable JAR in `target/scala-2.12/`
Tests are run via `sbt test`. Note that the tests do require Docker to be running. To test this out while downloading the Ubuntu image that is required for tests, run `docker pull ubuntu:latest` prior to running `sbt test`
@@ -121,128 +140,325 @@ See the [migration document](MIGRATION.md) for more details.
# Command Line Usage
-Run the JAR file with no arguments to get the usage message:
+For built-in documentation of Cromwell command line usage, run the Cromwell JAR file with no arguments:
```
-$ java -jar cromwell.jar
-java -jar cromwell.jar
-
-Actions:
-run [ [
- [