diff --git a/.gitattributes b/.gitattributes index 49c97c85f..9fcf38aa0 100644 --- a/.gitattributes +++ b/.gitattributes @@ -1,6 +1,3 @@ -# This file can always be added to, line additions should never collide. -CHANGELOG.MD merge=union - # These files are text and should be normalized (Convert crlf => lf) *.scala text *.MD text diff --git a/.gitignore b/.gitignore index f9bb0a4f5..21f16ea18 100644 --- a/.gitignore +++ b/.gitignore @@ -1,10 +1,19 @@ -.idea -log -target +# common scala config +*~ +.DS_Store .artifactory +.idea/* +!/.idea/inspectionProfiles/ +.idea/inspectionProfiles/* +!/.idea/inspectionProfiles/Project_Default.xml +target + +# custom config cromwell-executions cromwell-test-executions -cromwell-workflow-logs cromwell-test-workflow-logs +cromwell-workflow-logs local-cromwell-executions +log native +scripts/docker-compose-mysql/compose/mysql/data diff --git a/.idea/inspectionProfiles/Project_Default.xml b/.idea/inspectionProfiles/Project_Default.xml new file mode 100644 index 000000000..de31e99ed --- /dev/null +++ b/.idea/inspectionProfiles/Project_Default.xml @@ -0,0 +1,6 @@ + + + + diff --git a/.pullapprove.yml b/.pullapprove.yml index a1b902b1c..c26c94ba2 100644 --- a/.pullapprove.yml +++ b/.pullapprove.yml @@ -1,18 +1,28 @@ -approve_by_comment: true -approve_regex: ':\+1:' -reset_on_push: false -author_approval: ignored -reviewers: +# enabling version 2 turns github reviews on by default +version: 2 +group_defaults: + approve_by_comment: + enabled: true + approve_regex: ':\+1:' + reset_on_push: + enabled: false +groups: + reviewers: required: 2 - members: - - cjllanwarne + github_reviews: + enabled: true + author_approval: + ignored: true + users: - Horneth - - scottfrazer - - mcovarr - - geoffjentry - - kshakir + - cjllanwarne - francares - gauravs90 - - jainh + - geoffjentry + - jsotobroad + - katevoss - kcibul + - kshakir + - mcovarr - ruchim + - danbills diff --git a/.travis.yml b/.travis.yml index 5784eaf62..7b8dfaa2f 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,21 +1,51 @@ sudo: required dist: trusty +services: + - docker language: scala scala: - - 2.11.8 + - 2.12.2 jdk: - oraclejdk8 +cache: + # md5deep - https://github.com/travis-ci/travis-ci/issues/3122 + branch: md5deep + directories: + - $HOME/.ivy2/cache + - $HOME/.sbt/boot/ +before_cache: + # Tricks to avoid unnecessary cache updates + - find $HOME/.ivy2 -name "ivydata-*.properties" -delete + - find $HOME/.sbt -name "*.lock" -delete +before_install: + # https://github.com/travis-ci/travis-ci/issues/7940#issuecomment-310759657 + - sudo rm -f /etc/boto.cfg env: - # Setting this variable twice will cause the 'script' section to run twice with the respective env var invoked - - BUILD_TYPE=sbt - - BUILD_TYPE=centaurJes - - BUILD_TYPE=centaurLocal + global: + - CENTAUR_BRANCH=develop + - INTEGRATION_TESTS_DIR=src/main/resources/integrationTestCases + matrix: + # Setting this variable twice will cause the 'script' section to run twice with the respective env var invoked + - BUILD_TYPE=sbt + - BUILD_TYPE=checkPublish + - BUILD_TYPE=centaurJes + - BUILD_TYPE=centaurLocal + - BUILD_TYPE=centaurTes script: - src/bin/travis/test.sh after_success: - - src/bin/travis/publishSnapshot.sh + - src/bin/travis/afterSuccess.sh deploy: provider: script script: src/bin/travis/publishRelease.sh on: tags: true +notifications: + slack: + rooms: + - secure: B5KYcnhk/ujAUWlHsjzP7ROLm6MtYhaGikdYf6JYINovhMbVKnZCTlZEy7rqT3L2T5uJ25iefD500VQGk1Gn7puQ1sNq50wqjzQaj20PWEiBwoWalcV/nKBcQx1TyFT13LJv8fbFnVPxFCkC3YXoHedx8qAhDs8GH/tT5J8XOC8= + template: + - "Build <%{build_url}|#%{build_number}> (<%{compare_url}|%{commit}>) of %{repository}@%{branch} by %{author} %{result} in %{duration}" + on_success: change + on_failure: change + on_pull_requests: false diff --git a/CHANGELOG.md b/CHANGELOG.md index 3ff214c55..794a378b3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,22 +1,531 @@ # Cromwell Change Log -## 0.20 +## 29 -* The default per-upload bytes size for GCS is now the minumum 256K -instead of 64M. There is also an undocumented config key -`google.upload-buffer-bytes` that allows adjusting this internal value. +### Breaking Changes -* Updated Docker Hub hash retriever to parse json with [custom media -types](https://github.com/docker/distribution/blob/05b0ab0/docs/spec/manifest-v2-1.md). +* Request timeouts for HTTP requests on the REST API now return a 503 status code instead of 500. The response for a request timeout is no longer in JSON format. +* The metadata endpoint no longer returns gzipped responses by default. This now needs to be explicitly requested with an `Accept-Encoding: gzip` header -* Added a `/batch` submit endpoint that accepts a single wdl with -multiple input files. +* Command line usage has been extensively revised for Cromwell 29. Please see the +[README](https://github.com/broadinstitute/cromwell#command-line-usage) for details. -* The `/query` endpoint now supports querying by `id`, and submitting -parameters as a HTTP POST. +* The engine endpoints are now served under `/engine`. Previousely engine endpoints were available under +`/api/engine`. Workflow endpoints are still served under `/api/workflows`. The setting `api.routeUnwrapped` has been +retired at the same time. -## 0.21 +* The response format of the [callcaching/diff](https://github.com/broadinstitute/cromwell#get-apiworkflowsversioncallcachingdiff) endpoint has been updated. + +### Cromwell Server + +* Cromwell now attempts to gracefully shutdown when running in server mode and receiving a `SIGINT` (`Ctrl-C`) or `SIGTERM` (`kill`) signal. This includes waiting for all pending Database writes before exiting. +A detailed explanation and information about how to configure this feature can be found in the [Cromwell Wiki](https://github.com/broadinstitute/cromwell/wiki/DevZone#graceful-server-shutdown). + +## 28 + +### Bug Fixes + +#### WDL write_* functions add a final newline + +The following WDL functions now add a newline after the final line of output (the previous behavior of not adding this +newline was inadvertent): +- `write_lines` +- `write_map` +- `write_object` +- `write_objects` +- `write_tsv` + +For example: + +``` +task writer { + Array[String] a = ["foo", "bar"] + command { + # used to output: "foo\nbar" + # now outputs: "foo\nbar\n" + cat write_lines(a) + } +} +``` + +#### `ContinueWhilePossible` + +A workflow utilizing the WorkflowFailureMode Workflow Option `ContinueWhilePossible` will now successfully reach a terminal state once all runnable jobs have completed. +#### `FailOnStderr` +When `FailOnStderr` is set to false, Cromwell no longer checks for the existence of a stderr file for that task. + +### WDL Functions + +#### New functions: floor, ceil and round: + +Enables the `floor`, `ceil` and `round` functions in WDL to convert floating point numbers to integers. + +For example we can now use the size of an input file to influence the amount of memory the task is given. In the example below a 500MB input file will result in a request for a VM with 2GB of memory: + +``` +task foo { + File in_file + command { ... } + runtime { + docker: "..." + memory: ceil(size(in_file)) * 4 + } +} +``` + +### Call Caching + +* Hash values calculated by Cromwell for a call when call caching is enabled are now published to the metadata. +It is published even if the call failed. However if the call is attempted multiple times (because it has been preempted for example), +since hash values are strictly identical for all attempts, they will only be published in the last attempt section of the metadata for this call. +If the hashes fail to be calculated, the reason is indicated in a `hashFailures` field in the `callCaching` section of the call metadata. +*Important*: Hashes are not retroactively published to the metadata. Which means only workflows run on Cromwell 28+ will have hashes in their metadata. + +See the [README](https://github.com/broadinstitute/cromwell#get-apiworkflowsversionidmetadata) for an example metadata response. + +* New endpoint returning the hash differential for 2 calls. + +`GET /api/workflows/:version/callcaching/diff` + +See the [README](https://github.com/broadinstitute/cromwell#get-apiworkflowsversioncallcachingdiff) for more details. + +### Workflow Submission + +* The workflow submission parameters `wdlSource` and `wdlDependencies` have been deprecated in favor of `workflowSource` and +`workflowDependencies` respectively. The older names are still supported in Cromwell 28 with deprecation warnings but will +be removed in a future version of Cromwell. + +### Labels +* A new `/labels` endpoint has been added to update labels for an existing workflow. See the [README](README.md#patch-apiworkflowsversionidlabels) for more information. +* Label formatting requirements have been updated, please check the [README](README.md#label-format) for more detailed documentation. + + +### JES Backend + +The JES backend now supports a `filesystems.gcs.caching.duplication-strategy` configuration entry. +It can be set to specify the desired behavior of Cromwell regarding call outputs when a call finds a hit in the cache. +The default value is `copy` which will copy all output files to the new call directory. +A second value is allowed, `reference`, that will instead point to the original output files, without copying them. + + +```hocon +filesystems { + gcs { + auth = "application-default" + + caching { + duplication-strategy = "reference" + } + } +} +``` + +A placeholder file will be placed in the execution folder of the cached call to explain the absence of output files and point to the location of the original ones. + + +### Metadata Write Batching + +Metadata write batching works the same as in previous versions of Cromwell, but the default batch size has been changed from 1 to 200. It's possible that 200 is too high in some environments, but 200 is more likely to be an appropriate value +than the previous default. + + +## 27 + +### Migration + +* Call Caching has been improved in this version of Cromwell, specifically the time needed to determine whether or not a job can be cached + has drastically decreased. To achieve that the database schema has been modified and a migration is required in order to preserve the pre-existing cached jobs. + This migration is relatively fast compared to previous migrations. To get an idea of the time needed, look at the size of your `CALL_CACHING_HASH_ENTRY` table. + As a benchmark, it takes 1 minute for a table with 6 million rows. + The migration will only be executed on MySQL. Other databases will lose their previous cached jobs. + In order to run properly on MySQL, **the following flag needs to be adjusted**: https://dev.mysql.com/doc/refman/5.5/en/server-system-variables.html#sysvar_group_concat_max_len + The following query will give you a minimum to set the group_concat_max_len value to: + + ```sql +SELECT MAX(aggregated) as group_concat_max_len FROM + ( + SELECT cche.CALL_CACHING_ENTRY_ID, SUM(LENGTH(CONCAT(cche.HASH_KEY, cche.HASH_VALUE))) AS aggregated + FROM CALL_CACHING_HASH_ENTRY cche + GROUP BY cche.CALL_CACHING_ENTRY_ID + ) aggregation + ``` + + Here is the SQL command to run to set the group_concat_max_len flag to the proper value: + + ```sql +SET GLOBAL group_concat_max_len = value + ``` + + Where `value` is replaced with the value you want to set it to. + + Note that the migration will fail if the flag is not set properly. + +### Breaking Changes + +* The update to Slick 3.2 requires a database stanza to +[switch](http://slick.lightbend.com/doc/3.2.0/upgrade.html#profiles-vs-drivers) from using `driver` to `profile`. + +```hocon +database { + #driver = "slick.driver.MySQLDriver$" #old + profile = "slick.jdbc.MySQLProfile$" #new + db { + driver = "com.mysql.jdbc.Driver" + url = "jdbc:mysql://host/cromwell?rewriteBatchedStatements=true" + user = "user" + password = "pass" + connectionTimeout = 5000 + } +} +``` + +### Call Caching + +Cromwell now supports call caching with floating Docker tags (e.g. `docker: "ubuntu:latest"`). Note it is still considered +a best practice to specify Docker images as hashes where possible, especially for production usages. + +Within a single workflow Cromwell will attempt to resolve all floating tags to the same Docker hash, even if Cromwell is restarted +during the execution of a workflow. In call metadata the `docker` runtime attribute is now the same as the +value that actually appeared in the WDL: + +``` + "runtimeAttributes": { + "docker": "ubuntu:latest", + "failOnStderr": "false", + "continueOnReturnCode": "0" + } +``` + +Previous versions of Cromwell rewrote the `docker` value to the hash of the Docker image. + +There is a new call-level metadata value `dockerImageUsed` which captures the hash of the Docker image actually used to +run the call: + +``` + "dockerImageUsed": "library/ubuntu@sha256:382452f82a8bbd34443b2c727650af46aced0f94a44463c62a9848133ecb1aa8" +``` + +### Docker + +* The Docker section of the configuration has been slightly reworked +An option to specify how a Docker hash should be looked up has been added. Two methods are available. + "local" will try to look for the image on the machine where cromwell is running. If it can't be found, Cromwell will try to `pull` the image and use the hash from the retrieved image. + "remote" will try to look up the image hash directly on the remote repository where the image is located (Docker Hub and GCR are supported) +Note that the "local" option will require docker to be installed on the machine running cromwell, in order for it to call the docker CLI. +* Adds hash lookup support for public [quay.io](https://quay.io/) images. + +### WDL Feature Support +* Added support for the new WDL `basename` function. Allows WDL authors to get just the file name from a File (i.e. removing the directory path) +* Allows coercion of `Map` objects into `Array`s of `Pair`s. This also allows WDL authors to directly scatter over WDL `Map`s. + +### Miscellaneous +* Adds support for JSON file format for google service account credentials. As of Cromwell 27, PEM credentials for PAPI are deprecated and support might be removed in a future version. + +``` +google { + + application-name = "cromwell" + + auths = [ + { + name = "service-account" + scheme = "service_account" + json-file = "/path/to/file.json" + } + ] +} +``` + +### General Changes + +* The `/query` endpoint now supports querying by `label`. See the [README](README.md#get-apiworkflowsversionquery) for more information. +* The `read_X` standard library functions limit accepted filesizes. These differ by type, e.g. read_bool has a smaller limit than read_string. See reference.conf for default settings. + +## 26 + +### Breaking Changes + +* Failure metadata for calls and workflows was being displayed inconsistently, with different formats depending on the originating Cromwell version. Failures will now always present as an array of JSON objects each representing a failure. Each failure will have a message and a causedBy field. The causedBy field will be an array of similar failure objects. An example is given below: +``` +failures: [{ + message: "failure1", + causedBy: [{ + message: "cause1", + causedBy: [] + }, { + message: "cause2", + causedBy: [] + }] + }, { + message: "failure2", + causedBy: [] +}] +``` + +### Additional Upgrade Time + +* Upgrading to Cromwell 26 will take additional time due to the migration of failure metadata. Cromwell will automatically run a database query during the upgrade which appears to be roughly linear to the number of rows in the METADATA_ENTRY table. You can estimate upgrade time using the following equation: `time to migrate (in seconds) ~= (rows in METADATA_ENTRY) / 65000` Note that due to differences in hardware and database speed, this is only a rough estimate. + +### Config Changes + +* Added a configuration option under `system.io` to throttle the number of I/O queries that Cromwell makes, as well as configure retry parameters. + This is mostly useful for the JES backend and should be updated to match the GCS quota available for the project. + +``` +system.io { + # Global Throttling - This is mostly useful for GCS and can be adjusted to match + # the quota availble on the GCS API + number-of-requests = 100000 + per = 100 seconds + + # Number of times an I/O operation should be attempted before giving up and failing it. + number-of-attempts = 5 +} +``` + +## 25 + +### External Contributors +* A special thank you to @adamstruck, @antonkulaga and @delocalizer for their contributions to Cromwell. +### Breaking Changes + +* Metadata keys for call caching are changed. All call caching keys are now in a `callCaching` stanza. `Call cache read result` has moved here and is now `result`. The `allowResultReuse` and `effectiveCallCachingMode` have moved here. The `hit` boolean is a simple indication of whether or not it was a hit, with no additional information. An example using the new format is: +``` +"callCaching": { + "hit": false, + "effectiveCallCachingMode": "ReadAndWriteCache", + "result": "Cache Miss", + "allowResultReuse": true +} +``` + +### Config Changes + +* Added a field `insert-batch-size` to the `database` stanza which defines how many values from a batch insert will be processed at a time. This value defaults to 2000. +* Moved the config value `services.MetadataService.metadata-summary-refresh-interval` to `services.MetadataService.config.metadata-summary-refresh-interval` +* Added ability to override the default zone(s) used by JES via the config structure by setting `genomics.default-zones` in the JES configuration +* The cromwell server TCP binding timeout is now configurable via the config key `webservice.binding-timeout`, defaulted + to the previous value `5s` (five seconds) via the reference.conf. +* For MySQL users, a massive scalability improvement via batched DB writing of internal metadata events. Note that one must add `rewriteBatchedStatements=true` to their JDBC URL in their config in order to take advantage of this + +### General Changes + +* Cromwell's WDL parser now recognizes empty array literals correctly, e.g. `Array[String] emptyArray = []`. +* Cromwell now applies default labels automatically to JES pipeline runs. +* Added support for new WDL functions: + * `length: (Array[X]) => Integer` - report the length of the specified array + * `prefix: (String, Array[X]) => Array[String]` - generate an array consisting of each element of the input array prefixed + by a specified `String`. The input array can have elements of any primitive type, the return array will always have + type `Array[String]`. + * `defined: (Any) => Boolean` - Will return false if the provided value is an optional that is not defined. Returns true in all other cases. +* Cromwell's Config (Shared Filesystem) backend now supports invocation of commands which run in a Docker image as a non-root user. + The non-root user could either be the default user for a given Docker image (e.g. specified in a Dockerfile via a `USER` directive), + or the Config backend could pass an optional `"-u username"` as part of the `submit-docker` command. +* In some cases the SFS backend, used for Local, SGE, etc., coerced `WdlFile` to `WdlString` by using `.toUri`. This +resulted in strings prepended with `file:///path/to/file`. Now absolute file paths will not contain the uri scheme. +* Launch jobs on servers that support the GA4GH Task Execution Schema using the TES backend. +* **Call caching: Cromwell will no longer try to use the cache for WDL tasks that contain a floating docker tag.** + Call caching will still behave the same for tasks having a docker image with a specific hash. + See https://github.com/broadinstitute/cromwell#call-caching-docker-tags for more details. +* Added docker hash lookup. Cromwell will try to lookup the hash for a docker image with a floating tag, and use that hash when executing the job. + This will be reflected in the metadata where the docker runtime attribute will contains the hash that was used. + If Cromwell is unable to lookup the docker hash, the job will be run with the original user defined floating tag. + Cromwell is currently able to lookup public and private docker hashes for images on Docker Hub and Google Container Engine for job running on the JES backend. + For other backends, cromwell is able to lookup public docker hashes for Docker Hub and Google Container Engine. + See https://github.com/broadinstitute/cromwell#call-caching-docker-tags for more details. + +### Database schema changes +* Added CUSTOM_LABELS as a field of WORKFLOW_STORE_ENTRY, to store workflow store entries. + +## 24 + +* When emitting workflow outputs to the Cromwell log only the first 1000 characters per output will be printed +* Added support for conditional (`if`) statements. +* Globs for Shared File System (SFS) backends, such as local or SGE, now use bash globbing instead of Java globbing, consistent with the JES backend. + +## 23 + +* The `meta` and `parameter_meta` blocks are now valid within `workflow` blocks, not just `task` +* The JES backend configuration now has an option `genomics-api-queries-per-100-seconds` to help tune the rate of batch polling against the JES servers. Users with quotas larger than default should make sure to set this value. +* Added an option `call-caching.invalidate-bad-cache-results` (default: `true`). If true, Cromwell will invalidate cached results which have failed to copy as part of a cache hit. +* Timing diagrams and metadata now receive more fine grained workflow states between submission and Running. +* Support for the Pair WDL type (e.g. `Pair[Int, File] floo = (3, "gs://blar/blaz/qlux.txt")`) +* Added support for new WDL functions: + * `zip: (Array[X], Array[Y]) => Array[Pair[X, Y]]` - align items in the two arrays by index and return them as WDL pairs + * `cross: (Array[X], Array[Y]) => Array[Pair[X, Y]]` - create every possible pair from the two input arrays and return them all as WDL pairs + * `transpose: (Array[Array[X]]) => Array[Array[X]]` compute the matrix transpose for a 2D array. Assumes each inner array has the same length. +* By default, `system.abort-jobs-on-terminate` is false when running `java -jar cromwell.jar server`, and true when running `java -jar cromwell.jar run `. +* Enable WDL imports when running in Single Workflow Runner Mode. +* Both batch and non-batch REST workflow submissions now require a multipart/form-data encoded body. +* Support for sub workflows (see [Annex A](#annex-a---workflow-outputs)) +* Enable WDL imports when running in Single Workflow Runner Mode as well as Server Mode +* Support for WDL imports through an additional imports.zip parameter +* Support for sub workflows +* Corrected file globbing in JES to correctly report all generated files. Additionally, file globbing in JES now uses bash-style glob syntax instead of python style glob syntax +* Support declarations as graph nodes +* Added the ability to override the default service account that the compute VM is started with via the configuration option `JES.config.genomics.compute-service-account` or through the workflow options parameter `google_compute_service_account`. More details can be found in the README.md +* Fix bugs related to the behavior of Cromwell in Single Workflow Runner Mode. Cromwell will now exit once a workflow completes in Single Workflow Runner Mode. Additionally, when restarting Cromwell in Single Workflow Runner Mode, Cromwell will no longer restart incomplete workflows from a previous session. + +### Annex A - Workflow outputs + +The WDL specification has changed regarding [workflow outputs](https://github.com/broadinstitute/wdl/blob/develop/SPEC.md#outputs) to accommodate sub workflows. +This change is backward compatible in terms of runnable WDLs (WDL files using the deprecated workflow outputs syntax will still run the same). +The only visible change lies in the metadata (as well as the console output in single workflow mode, when workflow outputs are printed out at the end of a successful workflow). + +TL;DR Unless you are parsing or manipulating the "key" by which workflow outputs are referenced in the metadata (and/or the console output for single workflow mode), you can skip the following explanation. + +*Metadata Response* +``` +{ + ... + outputs { + "task_output_1": "hello", + "task_output_2": "world" + ^ + If you don't manipulate this part of the metadata, then skip this section + } +} +``` + +In order to maintain backward compatibility, workflow outputs expressed with the deprecated syntax are "expanded" to the new syntax. Here is an example: + +``` +task t { + command { + #do something + } + output { + String out1 = "hello" + String out2 = "world" + } +} +``` + +``` + workflow old_syntax { + call t + output { + t.* + } + } +``` + +``` + workflow new_syntax { + call t + output { + String wf_out1 = t.out1 + String wf_out2 = t.out2 + } + } +``` + +The new syntax allows for type checking of the outputs as well as expressions. It also allows for explicitly naming to the outputs. +The old syntax doesn't give the ability to name workflow outputs. For consistency reasons, Cromwell will generate a "new syntax" workflow output for each task output, and name them. +Their name will be generated using their FQN, which would give + +``` +output { + String w.t.out1 = t.out1 + String w.t.out2 = t.out2 +} +``` + +However as the FQN separator is `.`, the name itself cannot contain any `.`. +For that reason, `.` are replaced with `_` : + +*Old syntax expanded to new syntax* +``` +output { + String w_t_out1 = t.out1 + String w_t_out2 = t.out2 +} +``` + +The consequence is that the workflow outputs section of the metadata for `old_syntax` would previously look like + + ``` + outputs { + "w.t.out1": "hello", + "w.t.out2": "hello" + } + ``` + +but it will now look like + +``` + outputs { + "w_t_out1": "hello", + "w_t_out2": "hello" + } +``` + +The same applies for the console output of a workflow run in single workflow mode. + + +## 0.22 + +* Improved retries for Call Caching and general bug fixes. +* Users will experience better scalability of status polling for Google JES. +* Now there are configurable caching strategies for a SharedFileSystem backend (i.e. Local, SFS) in the backend's stanza: + See below for detailed descriptions of each configurable key. + +``` +backend { + ... + providers { + SFS_BackendName { + actor-factory = ... + config { + ... + filesystems { + local { + localization: [ + ... + ] + caching { + duplication-strategy: [ + "hard-link", "soft-link", "copy" + ] + # Possible values: file, path + # "file" will compute an md5 hash of the file content. + # "path" will compute an md5 hash of the file path. This strategy will only be effective if the duplication-strategy (above) is set to "soft-link", + # in order to allow for the original file path to be hashed. + hashing-strategy: "file" + + # When true, will check if a sibling file with the same name and the .md5 extension exists, and if it does, use the content of this file as a hash. + # If false or the md5 does not exist, will proceed with the above-defined hashing strategy. + check-sibling-md5: false + } +``` +* Multiple Input JSON files can now be submitted in server mode through the existing submission endpoint: /api/workflows/:version. + This endpoint accepts a POST request with a multipart/form-data encoded body. You can now include multiple keys for workflow inputs. + + Each key below can contain an optional JSON file of the workflow inputs. A skeleton file can be generated from wdltool using the "inputs" subcommand. + NOTE: In case of key conflicts between multiple JSON files, higher values of x in workflowInputs_x override lower values. For example, an input + specified in workflowInputs_3 will override an input with the same name that was given in workflowInputs or workflowInputs_2. Similarly, an input + specified in workflowInputs_5 will override an input with the same name in any other input file. + + workflowInputs + workflowInputs_2 + workflowInputs_3 + workflowInputs_4 + workflowInputs_5 + +* You can now limit the number of concurrent jobs for a backend by specifying the following option in the backend's config stanza: +``` +backend { + ... + providers { + BackendName { + actor-factory = ... + config { + concurrent-job-limit = 5 +``` + + +## 0.21 * Warning: Significant database updates when you switch from version 0.19 to 0.21 of Cromwell. There may be a long wait period for the migration to finish for large databases. @@ -71,7 +580,7 @@ task { command { echo "I'm private !" } - + runtime { docker: "ubuntu:latest" noAddress: true @@ -94,7 +603,7 @@ passed absolute paths for input `File`s. * Override the default database configuration by setting the keys `database.driver`, `database.db.driver`, `database.db.url`, etc. * Override the default database configuration by setting the keys -`database.driver`, `database.db.driver`, `database.db.url`, etc. +`database.driver`, `database.db.driver`, `database.db.url`, etc. For example: ``` @@ -111,3 +620,18 @@ database { } ``` +## 0.20 + +* The default per-upload bytes size for GCS is now the minimum 256K +instead of 64M. There is also an undocumented config key +`google.upload-buffer-bytes` that allows adjusting this internal value. + +* Updated Docker Hub hash retriever to parse json with [custom media +types](https://github.com/docker/distribution/blob/05b0ab0/docs/spec/manifest-v2-1.md). + +* Added a `/batch` submit endpoint that accepts a single wdl with +multiple input files. + +* The `/query` endpoint now supports querying by `id`, and submitting +parameters as a HTTP POST. + diff --git a/NOTICE b/NOTICE deleted file mode 100644 index 15d3c2b54..000000000 --- a/NOTICE +++ /dev/null @@ -1,4 +0,0 @@ -cromwell.webservice/PerRequest.scala (https://github.com/NET-A-PORTER/spray-actor-per-request) -is distributed with this software under the Apache License, Version 2.0 (see the LICENSE-ASL file). In accordance -with that license, that software comes with the following notices: -    Copyright (C) 2011-2012 Ian Forsey diff --git a/README.md b/README.md index 3c5efc009..97040254e 100644 --- a/README.md +++ b/README.md @@ -18,37 +18,43 @@ A [Workflow Management System](https://en.wikipedia.org/wiki/Workflow_management * [Installing](#installing) * [Upgrading from 0.19 to 0.21](#upgrading-from-019-to-021) * [Command Line Usage](#command-line-usage) - * [run](#run) - * [server](#server) * [Getting Started with WDL](#getting-started-with-wdl) + * [WDL Support](#wdl-support) * [Configuring Cromwell](#configuring-cromwell) * [Workflow Submission](#workflow-submission) * [Database](#database) * [SIGINT abort handler](#sigint-abort-handler) +* [Security](#security) * [Backends](#backends) * [Backend Filesystems](#backend-filesystems) * [Shared Local Filesystem](#shared-local-filesystem) * [Google Cloud Storage Filesystem](#google-cloud-storage-filesystem) * [Local Backend](#local-backend) + * [Google JES Backend](#google-jes-backend) + * [Configuring Google Project](#configuring-google-project) + * [Configuring Authentication](#configuring-authentication) + * [Application Default Credentials](#application-default-credentials) + * [Service Account](#service-account) + * [Refresh Token](#refresh-token) + * [Docker](#docker) + * [Monitoring](#monitoring) + * [GA4GH TES Backend](#ga4gh-tes-backend) + * [Configuring](#configuring) + * [Supported File Systems](#supported-file-systems) + * [Docker](#docker) + * [CPU, Memory and Disk](#cpu-memory-and-disk) * [Sun GridEngine Backend](#sun-gridengine-backend) * [HtCondor Backend](#htcondor-backend) * [Caching configuration](#caching-configuration) * [Docker](#docker) * [CPU, Memory and Disk](#cpu-memory-and-disk) + * [Native Specifications](#native-specifications) * [Spark Backend](#spark-backend) * [Configuring Spark Project](#configuring-spark-project) * [Configuring Spark Master and Deploy Mode](#configuring-spark-master-and-deploy-mode) * [Spark runtime attributes](#spark-runtime-attributes) * [Spark Environment](#spark-environment) * [Sample Wdl](#sample-wdl) - * [Google JES Backend](#google-jes-backend) - * [Configuring Google Project](#configuring-google-project) - * [Configuring Authentication](#configuring-authentication) - * [Application Default Credentials](#application-default-credentials) - * [Service Account](#service-account) - * [Refresh Token](#refresh-token) - * [Docker](#docker) - * [Monitoring](#monitoring) * [Runtime Attributes](#runtime-attributes) * [Specifying Default Values](#specifying-default-values) * [continueOnReturnCode](#continueonreturncode) @@ -62,13 +68,24 @@ A [Workflow Management System](https://en.wikipedia.org/wiki/Workflow_management * [preemptible](#preemptible) * [Logging](#logging) * [Workflow Options](#workflow-options) +* [Labels](#labels) + * [Custom Labels File](#custom-labels-file) + * [Label Format](#label-format) * [Call Caching](#call-caching) + * [Configuring Call Caching](#configuring-call-caching) + * [Call Caching Workflow Options](#call-caching-workflow-options) + * [Local Filesystem Options](#local-filesystem-options) +* [Imports](#imports) +* [Sub Workflows](#sub-workflows) + * [Execution](#execution) + * [Metadata](#metadata) * [REST API](#rest-api) * [REST API Versions](#rest-api-versions) * [POST /api/workflows/:version](#post-apiworkflowsversion) * [POST /api/workflows/:version/batch](#post-apiworkflowsversionbatch) * [GET /api/workflows/:version/query](#get-apiworkflowsversionquery) * [POST /api/workflows/:version/query](#post-apiworkflowsversionquery) + * [PATCH /api/workflows/:version/:id/labels](#patch-apiworkflowsversionidlabels) * [GET /api/workflows/:version/:id/status](#get-apiworkflowsversionidstatus) * [GET /api/workflows/:version/:id/outputs](#get-apiworkflowsversionidoutputs) * [GET /api/workflows/:version/:id/timing](#get-apiworkflowsversionidtiming) @@ -76,7 +93,9 @@ A [Workflow Management System](https://en.wikipedia.org/wiki/Workflow_management * [GET /api/workflows/:version/:id/metadata](#get-apiworkflowsversionidmetadata) * [POST /api/workflows/:version/:id/abort](#post-apiworkflowsversionidabort) * [GET /api/workflows/:version/backends](#get-apiworkflowsversionbackends) - * [GET /api/workflows/:version/stats](#get-apiworkflowsversionstats) + * [GET /api/workflows/:version/callcaching/diff](#get-apiworkflowsversioncallcachingdiff) + * [GET /engine/:version/stats](#get-engineversionstats) + * [GET /engine/:version/version](#get-engineversionversion) * [Error handling](#error-handling) * [Developer](#developer) * [Generating table of contents on Markdown files](#generating-table-of-contents-on-markdown-files) @@ -101,13 +120,13 @@ There is a [Cromwell gitter channel](https://gitter.im/broadinstitute/cromwell) The following is the toolchain used for development of Cromwell. Other versions may work, but these are recommended. -* [Scala 2.11.7](http://www.scala-lang.org/news/2.11.7/) -* [SBT 0.13.8](https://github.com/sbt/sbt/releases/tag/v0.13.8) +* [Scala 2.12.2](http://www.scala-lang.org/news/2.12.1#scala-212-notes) +* [SBT 0.13.12](https://github.com/sbt/sbt/releases/tag/v0.13.12) * [Java 8](http://www.oracle.com/technetwork/java/javase/overview/java8-2100321.html) # Building -`sbt assembly` will build a runnable JAR in `target/scala-2.11/` +`sbt assembly` will build a runnable JAR in `target/scala-2.12/` Tests are run via `sbt test`. Note that the tests do require Docker to be running. To test this out while downloading the Ubuntu image that is required for tests, run `docker pull ubuntu:latest` prior to running `sbt test` @@ -121,128 +140,325 @@ See the [migration document](MIGRATION.md) for more details. # Command Line Usage -Run the JAR file with no arguments to get the usage message: +For built-in documentation of Cromwell command line usage, run the Cromwell JAR file with no arguments: ``` -$ java -jar cromwell.jar -java -jar cromwell.jar - -Actions: -run [ [ - []]] +$ java -jar cromwell-.jar +``` - Given a WDL file and JSON file containing the value of the - workflow inputs, this will run the workflow locally and - print out the outputs in JSON format. The workflow - options file specifies some runtime configuration for the - workflow (see README for details). The workflow metadata - output is an optional file path to output the metadata. - Use a single dash ("-") to skip optional files. Ex: - run noinputs.wdl - - metadata.json +For example, `$ java -jar cromwell-29.jar`. You will get a usage message like the following: -server +``` +cromwell 29 +Usage: java -jar /path/to/cromwell.jar [server|run] [options] ... - Starts a web server on port 8000. See the web server - documentation for more details about the API endpoints. + --help Cromwell - Workflow Execution Engine + --version +Command: server +Starts a web server on port 8000. See the web server documentation for more details about the API endpoints. +Command: run [options] workflow-source +Run the workflow and print out the outputs in JSON format. + workflow-source Workflow source file. + -i, --inputs Workflow inputs file. + -o, --options Workflow options file. + -t, --type Workflow type. + -v, --type-version + Workflow type version. + -l, --labels Workflow labels file. + -p, --imports A directory or zipfile to search for workflow imports. + -m, --metadata-output + An optional directory path to output metadata. ``` +## --version + +The `--version` option prints the version of Cromwell and exits. + +## --help + +The `--help` option prints the full help text above and exits. + +## server + +The `server` command runs Cromwell as a web server. No arguments are accepted. +See the documentation for Cromwell's REST endpoints [here](#rest-api). + ## run -Given a WDL file and a JSON inputs file (see `inputs` subcommand), Run the workflow and print the outputs: +The `run` command executes a single workflow in Cromwell. -``` -$ java -jar cromwell.jar run 3step.wdl inputs.json -... play-by-play output ... -{ - "three_step.ps.procs": "/var/folders/kg/c7vgxnn902lc3qvc2z2g81s89xhzdz/T/stdout1272284837004786003.tmp", - "three_step.cgrep.count": 0, - "three_step.wc.count": 13 -} -``` +### workflow-source +The `run` command requires a single argument for the workflow source file. + +### --inputs +An optional file of workflow inputs. Although optional, it is a best practice to use an inputs file to satisfy workflow +requirements rather than hardcoding inputs directly into a workflow source file. -The JSON inputs can be left off if there's a file with the same name as the WDL file but with a `.inputs` extension. For example, this will assume that `3step.inputs` exists: +### --options +An optional file of workflow options. Some options are global (supported by all backends), while others are backend-specific. +See the [workflow options](#workflow-options) documentation for more details. + +### --type +An optional parameter to specify the language for the workflow source. Any value specified for this parameter is currently +ignored and internally the value `WDL` is used. + +### --type-version +An optional parameter to specify the version of the language for the workflow source. Currently any specified value is ignored. + +### --labels +An optional parameter to specify a file of JSON key-value label pairs to associate with the workflow. + +### --imports +You have the option of importing WDL workflows or tasks to use within your workflow, known as sub-workflows. +If you use sub-workflows within your primary workflow then you must include a zip file with the WDL import files. + +For example, say you have a directory of WDL files: ``` -$ java -jar cromwell.jar run 3step.wdl +wdl_library +└──cgrep.wdl +└──ps.wdl +└──wc.wdl ``` -If your workflow has no inputs, you can specify `-` as the value for the inputs parameter: +If you zip that directory into `wdl_library.zip`, then you can reference and use these WDLs within your primary WDL. + +This could be your primary WDL: ``` -$ java -jar cromwell.jar run my_workflow.wdl - +import "ps.wdl" as ps +import "cgrep.wdl" +import "wc.wdl" as wordCount + +workflow my_wf { + +call ps.ps as getStatus +call cgrep.cgrep { input: str = getStatus.x } +call wordCount { input: str = ... } + +} ``` -The third, optional parameter to the 'run' subcommand is a JSON file of workflow options. By default, the command line will look for a file with the same name as the WDL file but with the extension `.options`. But one can also specify a value of `-` manually to specify that there are no workflow options. +Then to run this WDL without any inputs, workflow options, or metadata files, you would enter: + +`$ java -jar cromwell-.jar run my_wf.wdl --imports /path/to/wdl_library.zip` + +### --metadata-output -See the section [workflow options](#workflow-options) for more details. +You can include a path where Cromwell will write the workflow metadata JSON, such as start/end timestamps, status, inputs, and outputs. By default, Cromwell does not write workflow metadata. + +This example includes a metadata path called `/path/to/my_wf.metadata`: ``` -$ java -jar cromwell.jar run my_jes_wf.wdl my_jes_wf.json wf_options.json +$ java -jar cromwell-.jar run my_wf.wdl --metadata-output /path/to/my_wf.metadata ``` -The fourth, optional parameter to the 'run' subcommand is a path where the workflow metadata will be written. By default, no workflow metadata will be written. +Again, Cromwell is very verbose. Here is the metadata output in my_wf.metadata: ``` -$ java -jar cromwell.jar run my_wf.wdl - - my_wf.metadata.json -... play-by-play output ... -$ cat my_wf.metadata.json { - "workflowName": "w", + "workflowName": "my_wf", + "submittedFiles": { + "inputs": "{\"my_wf.hello.addressee\":\"m'Lord\"}", + "workflow": "\ntask hello {\n String addressee\n command {\n echo \"Hello ${addressee}!\"\n }\n output {\n String salutation = read_string(stdout())\n }\n runtime {\n +\n }\n}\n\nworkflow my_wf {\n call hello\n output {\n hello.salutation\n }\n}\n", + "options": "{\n\n}" + }, "calls": { - "w.x": [{ - "executionStatus": "Done", - "stdout": "/Users/jdoe/projects/cromwell/cromwell-executions/w/a349534f-137b-4809-9425-1893ac272084/call-x/stdout", - "shardIndex": -1, - "outputs": { - "o": "local\nremote" - }, - "runtimeAttributes": { - "failOnStderr": "false", - "continueOnReturnCode": "0" - }, - "cache": { - "allowResultReuse": true - }, - "inputs": { - "remote": "/Users/jdoe/remote.txt", - "local": "local.txt" - }, - "returnCode": 0, - "backend": "Local", - "end": "2016-07-11T10:27:56.074-04:00", - "stderr": "/Users/jdoe/projects/cromwell/cromwell-executions/w/a349534f-137b-4809-9425-1893ac272084/call-x/stderr", - "callRoot": "cromwell-executions/w/a349534f-137b-4809-9425-1893ac272084/call-x", - "attempt": 1, - "start": "2016-07-11T10:27:55.992-04:00" - }] + "my_wf.hello": [ + { + "executionStatus": "Done", + "stdout": "/Users/jdoe/Documents/cromwell-executions/my_wf/cd0fe94a-984e-4a19-ab4c-8f7f07038068/call-hello/execution/stdout", + "backendStatus": "Done", + "shardIndex": -1, + "outputs": { + "salutation": "Hello m'Lord!" + }, + "runtimeAttributes": { + "continueOnReturnCode": "0", + "failOnStderr": "false" + }, + "callCaching": { + "allowResultReuse": false, + "effectiveCallCachingMode": "CallCachingOff" + }, + "inputs": { + "addressee": "m'Lord" + }, + "returnCode": 0, + "jobId": "28955", + "backend": "Local", + "end": "2017-04-19T10:53:25.045-04:00", + "stderr": "/Users/jdoe/Documents/cromwell-executions/my_wf/cd0fe94a-984e-4a19-ab4c-8f7f07038068/call-hello/execution/stderr", + "callRoot": "/Users/jdoe/Documents/cromwell-executions/my_wf/cd0fe94a-984e-4a19-ab4c-8f7f07038068/call-hello", + "attempt": 1, + "executionEvents": [ + { + "startTime": "2017-04-19T10:53:23.570-04:00", + "description": "PreparingJob", + "endTime": "2017-04-19T10:53:23.573-04:00" + }, + { + "startTime": "2017-04-19T10:53:23.569-04:00", + "description": "Pending", + "endTime": "2017-04-19T10:53:23.570-04:00" + }, + { + "startTime": "2017-04-19T10:53:25.040-04:00", + "description": "UpdatingJobStore", + "endTime": "2017-04-19T10:53:25.045-04:00" + }, + { + "startTime": "2017-04-19T10:53:23.570-04:00", + "description": "RequestingExecutionToken", + "endTime": "2017-04-19T10:53:23.570-04:00" + }, + { + "startTime": "2017-04-19T10:53:23.573-04:00", + "description": "RunningJob", + "endTime": "2017-04-19T10:53:25.040-04:00" + } + ], + "start": "2017-04-19T10:53:23.569-04:00" + } + ] }, "outputs": { - "w.x.o": "local\nremote" + "my_wf.hello.salutation": "Hello m'Lord!" }, - "workflowRoot": "cromwell-executions/w/a349534f-137b-4809-9425-1893ac272084", - "id": "a349534f-137b-4809-9425-1893ac272084", + "workflowRoot": "/Users/jdoe/Documents/cromwell-executions/my_wf/cd0fe94a-984e-4a19-ab4c-8f7f07038068", + "id": "cd0fe94a-984e-4a19-ab4c-8f7f07038068", "inputs": { - "w.x.remote": "/Users/jdoe/remote.txt", - "w.x.local": "local.txt" + "my_wf.hello.addressee": "m'Lord" }, - "submission": "2016-07-11T10:27:54.907-04:00", + "submission": "2017-04-19T10:53:19.565-04:00", "status": "Succeeded", - "end": "2016-07-11T10:27:56.108-04:00", - "start": "2016-07-11T10:27:54.919-04:00" + "end": "2017-04-19T10:53:25.063-04:00", + "start": "2017-04-19T10:53:23.535-04:00" } ``` -## server - -Start a server on port 8000, the API for the server is described in the [REST API](#rest-api) section. - # Getting Started with WDL For many examples on how to use WDL see [the WDL site](https://github.com/broadinstitute/wdl#getting-started-with-wdl) +## WDL Support + +:pig2: Cromwell supports the following subset of WDL features: + +* [Language Specification](https://github.com/broadinstitute/wdl/blob/develop/SPEC.md#language-specification) + * [Whitespace, Strings, Identifiers, Constants](https://github.com/broadinstitute/wdl/blob/develop/SPEC.md#whitespace-strings-identifiers-constants) + * [Types](https://github.com/broadinstitute/wdl/blob/develop/SPEC.md#types) + * [Fully Qualified Names & Namespaced Identifiers](https://github.com/broadinstitute/wdl/blob/develop/SPEC.md#fully-qualified-names--namespaced-identifiers) + * [Declarations](https://github.com/broadinstitute/wdl/blob/develop/SPEC.md#declarations) + * [Expressions](https://github.com/broadinstitute/wdl/blob/develop/SPEC.md#expressions) + * [Operator Precedence Table](https://github.com/broadinstitute/wdl/blob/develop/SPEC.md#operator-precedence-table) + * [Member Access](https://github.com/broadinstitute/wdl/blob/develop/SPEC.md#member-access) + * [Map and Array Indexing](https://github.com/broadinstitute/wdl/blob/develop/SPEC.md#map-and-array-indexing) + * [Function Calls](https://github.com/broadinstitute/wdl/blob/develop/SPEC.md#function-calls) + * [Array Literals](https://github.com/broadinstitute/wdl/blob/develop/SPEC.md#array-literals) + * [Map Literals](https://github.com/broadinstitute/wdl/blob/develop/SPEC.md#map-literals) +* [Document](https://github.com/broadinstitute/wdl/blob/develop/SPEC.md#document) +* [Task Definition](https://github.com/broadinstitute/wdl/blob/develop/SPEC.md#task-definition) + * [Sections](https://github.com/broadinstitute/wdl/blob/develop/SPEC.md#sections) + * [Command Section](https://github.com/broadinstitute/wdl/blob/develop/SPEC.md#command-section) + * [Command Parts](https://github.com/broadinstitute/wdl/blob/develop/SPEC.md#command-parts) + * [Command Part Options](https://github.com/broadinstitute/wdl/blob/develop/SPEC.md#command-part-options) + * [sep](https://github.com/broadinstitute/wdl/blob/develop/SPEC.md#sep) + * [true and false](https://github.com/broadinstitute/wdl/blob/develop/SPEC.md#true-and-false) + * [default](https://github.com/broadinstitute/wdl/blob/develop/SPEC.md#default) + * [Alternative heredoc syntax](https://github.com/broadinstitute/wdl/blob/develop/SPEC.md#alternative-heredoc-syntax) + * [Stripping Leading Whitespace](https://github.com/broadinstitute/wdl/blob/develop/SPEC.md#stripping-leading-whitespace) + * [Outputs Section](https://github.com/broadinstitute/wdl/blob/develop/SPEC.md#outputs-section) + * [String Interpolation](https://github.com/broadinstitute/wdl/blob/develop/SPEC.md#string-interpolation) + * [Runtime Section](https://github.com/broadinstitute/wdl/blob/develop/SPEC.md#runtime-section) + * [docker](https://github.com/broadinstitute/wdl/blob/develop/SPEC.md#docker) + * [memory](https://github.com/broadinstitute/wdl/blob/develop/SPEC.md#memory) + * [Parameter Metadata Section](https://github.com/broadinstitute/wdl/blob/develop/SPEC.md#parameter-metadata-section) + * [Metadata Section](https://github.com/broadinstitute/wdl/blob/develop/SPEC.md#metadata-section) +* [Workflow Definition](https://github.com/broadinstitute/wdl/blob/develop/SPEC.md#workflow-definition) + * [Call Statement](https://github.com/broadinstitute/wdl/blob/develop/SPEC.md#call-statement) + * [Sub Workflows](https://github.com/broadinstitute/wdl/blob/develop/SPEC.md#sub-workflows) + * [Scatter](https://github.com/broadinstitute/wdl/blob/develop/SPEC.md#scatter) + * [Parameter Metadata](https://github.com/broadinstitute/wdl/blob/develop/SPEC.md#parameter-metadata) + * [Metadata](https://github.com/broadinstitute/wdl/blob/develop/SPEC.md#metadata) + * [Outputs](https://github.com/broadinstitute/wdl/blob/develop/SPEC.md#outputs) +* [Namespaces](https://github.com/broadinstitute/wdl/blob/develop/SPEC.md#namespaces) +* [Scope](https://github.com/broadinstitute/wdl/blob/develop/SPEC.md#scope) +* [Optional Parameters & Type Constraints](https://github.com/broadinstitute/wdl/blob/develop/SPEC.md#optional-parameters--type-constraints) + * [Prepending a String to an Optional Parameter](https://github.com/broadinstitute/wdl/blob/develop/SPEC.md#prepending-a-string-to-an-optional-parameter) +* [Scatter / Gather](https://github.com/broadinstitute/wdl/blob/develop/SPEC.md#scatter--gather) +* [Variable Resolution](https://github.com/broadinstitute/wdl/blob/develop/SPEC.md#variable-resolution) + * [Task-Level Resolution](https://github.com/broadinstitute/wdl/blob/develop/SPEC.md#task-level-resolution) + * [Workflow-Level Resolution](https://github.com/broadinstitute/wdl/blob/develop/SPEC.md#workflow-level-resolution) +* [Computing Inputs](https://github.com/broadinstitute/wdl/blob/develop/SPEC.md#computing-inputs) + * [Task Inputs](https://github.com/broadinstitute/wdl/blob/develop/SPEC.md#task-inputs) + * [Workflow Inputs](https://github.com/broadinstitute/wdl/blob/develop/SPEC.md#workflow-inputs) + * [Specifying Workflow Inputs in JSON](https://github.com/broadinstitute/wdl/blob/develop/SPEC.md#specifying-workflow-inputs-in-json) +* [Type Coercion](https://github.com/broadinstitute/wdl/blob/develop/SPEC.md#type-coercion) +* [Standard Library](https://github.com/broadinstitute/wdl/blob/develop/SPEC.md#standard-library) + * [File stdout()](https://github.com/broadinstitute/wdl/blob/develop/SPEC.md#file-stdout) + * [File stderr()](https://github.com/broadinstitute/wdl/blob/develop/SPEC.md#file-stderr) + * [Array\[String\] read_lines(String|File)](https://github.com/broadinstitute/wdl/blob/develop/SPEC.md#arraystring-read_linesstringfile) + * File reads are limited to 128 KB. Configurable via conf file. + * [Array\[Array\[String\]\] read_tsv(String|File)](https://github.com/broadinstitute/wdl/blob/develop/SPEC.md#arrayarraystring-read_tsvstringfile) + * File reads are limited to 128 KB. Configurable via conf file. + * [Map\[String, String\] read_map(String|File)](https://github.com/broadinstitute/wdl/blob/develop/SPEC.md#mapstring-string-read_mapstringfile) + * File reads are limited to 128 KB. Configurable via conf file. + * [Object read_object(String|File)](https://github.com/broadinstitute/wdl/blob/develop/SPEC.md#object-read_objectstringfile) + * File reads are limited to 128 KB. Configurable via conf file. + * [Array\[Object\] read_objects(String|File)](https://github.com/broadinstitute/wdl/blob/develop/SPEC.md#arrayobject-read_objectsstringfile) + * File reads are limited to 128 KB. Configurable via conf file. + * [Int read_int(String|File)](https://github.com/broadinstitute/wdl/blob/develop/SPEC.md#int-read_intstringfile) + * File reads are limited to 19 B. Configurable via conf file. + * [String read_string(String|File)](https://github.com/broadinstitute/wdl/blob/develop/SPEC.md#string-read_stringstringfile) + * File reads are limited to 128 KB. Configurable via conf file. + * [Float read_float(String|File)](https://github.com/broadinstitute/wdl/blob/develop/SPEC.md#float-read_floatstringfile) + * File reads are limited to 50 B. Configurable via conf file. + * [Boolean read_boolean(String|File)](https://github.com/broadinstitute/wdl/blob/develop/SPEC.md#boolean-read_booleanstringfile) + * File reads are limited to 7 B. Configurable via conf file. + * [File write_lines(Array\[String\])](https://github.com/broadinstitute/wdl/blob/develop/SPEC.md#file-write_linesarraystring) + * [File write_tsv(Array\[Array\[String\]\])](https://github.com/broadinstitute/wdl/blob/develop/SPEC.md#file-write_tsvarrayarraystring) + * [File write_map(Map\[String, String\])](https://github.com/broadinstitute/wdl/blob/develop/SPEC.md#file-write_mapmapstring-string) + * [File write_object(Object)](https://github.com/broadinstitute/wdl/blob/develop/SPEC.md#file-write_objectobject) + * [File write_objects(Array\[Object\])](https://github.com/broadinstitute/wdl/blob/develop/SPEC.md#file-write_objectsarrayobject) + * [Float size(File, \[String\])](https://github.com/broadinstitute/wdl/blob/develop/SPEC.md#float-sizefile-string) + * [String sub(String, String, String)](https://github.com/broadinstitute/wdl/blob/develop/SPEC.md#string-substring-string-string) + * [Array\[Int\] range(Int)](https://github.com/broadinstitute/wdl/blob/develop/SPEC.md#arrayint-rangeint) + * [Array\[Array\[X\]\] transpose(Array\[Array\[X\]\])](https://github.com/broadinstitute/wdl/blob/develop/SPEC.md#arrayarrayx-transposearrayarrayx) + * [Array\[Pair\[X,Y\]\] zip(Array\[X\], Array\[Y\])](https://github.com/broadinstitute/wdl/blob/develop/SPEC.md#arraypairxy-ziparrayx-arrayy) + * [Array\[Pair\[X,Y\]\] cross(Array\[X\], Array\[Y\])](https://github.com/broadinstitute/wdl/blob/develop/SPEC.md#arraypairxy-crossarrayx-arrayy) + * [Integer length(Array\[X\])](https://github.com/broadinstitute/wdl/blob/develop/SPEC.md#integer-lengtharrayx) + * [Array\[String\] prefix(String, Array\[X\])](https://github.com/broadinstitute/wdl/blob/develop/SPEC.md#arraystring-prefixstring-arrayx) +* [Data Types & Serialization](https://github.com/broadinstitute/wdl/blob/develop/SPEC.md#data-types--serialization) + * [Serialization of Task Inputs](https://github.com/broadinstitute/wdl/blob/develop/SPEC.md#serialization-of-task-inputs) + * [Primitive Types](https://github.com/broadinstitute/wdl/blob/develop/SPEC.md#primitive-types) + * [Compound Types](https://github.com/broadinstitute/wdl/blob/develop/SPEC.md#compound-types) + * [Array serialization](https://github.com/broadinstitute/wdl/blob/develop/SPEC.md#array-serialization) + * [Array serialization by expansion](https://github.com/broadinstitute/wdl/blob/develop/SPEC.md#array-serialization-by-expansion) + * [Array serialization using write_lines()](https://github.com/broadinstitute/wdl/blob/develop/SPEC.md#array-serialization-using-write_lines) + * [Map serialization](https://github.com/broadinstitute/wdl/blob/develop/SPEC.md#map-serialization) + * [Map serialization using write_map()](https://github.com/broadinstitute/wdl/blob/develop/SPEC.md#map-serialization-using-write_map) + * [Object serialization](https://github.com/broadinstitute/wdl/blob/develop/SPEC.md#object-serialization) + * [Object serialization using write_object()](https://github.com/broadinstitute/wdl/blob/develop/SPEC.md#object-serialization-using-write_object) + * [Array\[Object\] serialization](https://github.com/broadinstitute/wdl/blob/develop/SPEC.md#arrayobject-serialization) + * [Array\[Object\] serialization using write_objects()](https://github.com/broadinstitute/wdl/blob/develop/SPEC.md#arrayobject-serialization-using-write_objects) + * [De-serialization of Task Outputs](https://github.com/broadinstitute/wdl/blob/develop/SPEC.md#de-serialization-of-task-outputs) + * [Primitive Types](https://github.com/broadinstitute/wdl/blob/develop/SPEC.md#primitive-types) + * [Compound Types](https://github.com/broadinstitute/wdl/blob/develop/SPEC.md#compound-types) + * [Array deserialization](https://github.com/broadinstitute/wdl/blob/develop/SPEC.md#array-deserialization) + * [Array deserialization using read_lines()](https://github.com/broadinstitute/wdl/blob/develop/SPEC.md#array-deserialization-using-read_lines) + * [Map deserialization](https://github.com/broadinstitute/wdl/blob/develop/SPEC.md#map-deserialization) + * [Map deserialization using read_map()](https://github.com/broadinstitute/wdl/blob/develop/SPEC.md#map-deserialization-using-read_map) + * [Object deserialization](https://github.com/broadinstitute/wdl/blob/develop/SPEC.md#object-deserialization) + * [Object deserialization using read_object()](https://github.com/broadinstitute/wdl/blob/develop/SPEC.md#object-deserialization-using-read_object) + * [Array\[Object\] deserialization](https://github.com/broadinstitute/wdl/blob/develop/SPEC.md#arrayobject-deserialization) + * [Object deserialization using read_objects()](https://github.com/broadinstitute/wdl/blob/develop/SPEC.md#object-deserialization-using-read_objects) + + # Configuring Cromwell -Cromwell's default configuration file is located at `src/main/resources/application.conf`. +Cromwell's default configuration file is located at `core/src/main/resources/reference.conf`. The configuration file is in [Hocon](https://github.com/typesafehub/config/blob/master/HOCON.md#hocon-human-optimized-config-object-notation) which means the configuration file can specify configuration as JSON-like stanzas like: @@ -250,6 +466,7 @@ The configuration file is in [Hocon](https://github.com/typesafehub/config/blob/ webservice { port = 8000 interface = 0.0.0.0 + binding-timeout = 5s instance.name = "reference" } ``` @@ -259,6 +476,7 @@ Or, alternatively, as dot-separated values: ```hocon webservice.port = 8000 webservice.interface = 0.0.0.0 +webservice.binding-timeout = 5s webservice.instance.name = "reference" ``` @@ -268,12 +486,40 @@ This allows any value to be overridden on the command line: java -Dwebservice.port=8080 cromwell.jar ... ``` -It is recommended that one copies `src/main/resources/application.conf`, modify it, then link to it via: + +To customize configuration it is recommended that one copies relevant stanzas from `core/src/main/resources/reference.conf` into a new file, modify it as appropriate, then pass it to Cromwell via: + +``` +java -Dconfig.file=/path/to/yourOverrides.conf cromwell.jar ... +``` + +## I/O + +Cromwell centralizes as many of its I/O operations as possible through a unique entry point. This allows users to effectively control and throttle the number of requests and resources allocated to those operations throughout the entire system. +It is possible to configure this throttling behavior in the configuration: + +``` +system.io { + number-of-requests = 100000 + per = 100 seconds +} +``` + +This is particularly useful when running Cromwell on a JES backend for example, as Google imposes a quota on the number of GCS queries that can be made. + +### Resilience + +I/O operations can fail for a number of reason from network failures to server errors. Some of those errors are not fatal and can be retried. +Cromwell will retry I/O operations on such retryable errors, up to a number of times. This number (more precisely the number of attempts that will be made) can be set using the following configuration option: ``` -java -Dconfig.file=/path/to/application.conf cromwell.jar ... +system.io { + # Number of times an I/O operation should be attempted before giving up and failing it. + number-of-attempts = 5 +} ``` + ## Workflow Submission Cromwell has a configurable cap on the number of workflows running at a time. To set this value provide an integer value to the `system.max-concurrent-workflows` config value. @@ -292,25 +538,19 @@ Then, edit the configuration file `database` stanza, as follows: ``` database { - config = main.mysql - - main { - mysql { - db.url = "jdbc:mysql://localhost:3306/cromwell" - db.user = "root" - db.password = "" - db.driver = "com.mysql.jdbc.Driver" - db.connectionTimeout = 5000 # NOTE: The default 1000ms is often too short for production mysql use - driver = "slick.driver.MySQLDriver$" - } - } - - test { - ... + profile = "slick.jdbc.MySQLProfile$" + db { + driver = "com.mysql.jdbc.Driver" + url = "jdbc:mysql://host/cromwell?rewriteBatchedStatements=true" + user = "user" + password = "pass" + connectionTimeout = 5000 } } ``` +By default batch inserts will be processed in blocks of 2000. To modify this value add the field `insert-batch-size` to the `database` stanza. + ## SIGINT abort handler For backends that support aborting task invocations, Cromwell can be configured to automatically try to abort all currently running calls (and set their status to `Aborted`) when a SIGINT is sent to the Cromwell process. To turn this feature on, set the configuration option @@ -323,6 +563,14 @@ system { Or, via `-Dsystem.abort-jobs-on-terminate=true` command line option. +By default, this value is false when running `java -jar cromwell.jar server`, and true when running `java -jar cromwell.jar run `. + +# Security + + - Cromwell is NOT on its own a security appliance! + - Only YOU are responsible for your own security! + - Some recommendations and suggestions on security can be found in the [SecurityRecommendations.md](SecurityRecommendations.md) document + # Backends A backend represents a way to run the user's command specified in the `task` section. Cromwell allows for backends conforming to @@ -331,6 +579,7 @@ Cromwell distribution: * Local / GridEngine / LSF / etc. - Run jobs as subprocesses or via a dispatcher. Supports launching in Docker containers. Use `bash`, `qsub`, `bsub`, etc. to run scripts. * Google JES - Launch jobs on Google Compute Engine through the Job Execution Service (JES). +* GA4GH TES - Launch jobs on servers that support the GA4GH Task Execution Schema (TES). * HtCondor - Allows to execute jobs using HTCondor. * Spark - Adds support for execution of spark jobs. @@ -430,7 +679,7 @@ When Cromwell runs a workflow, it first creates a directory `//call-`. This is the ``. For example, having a `stdout` and `stderr` file is common among both backends and they both write a shell script file to the `` as well. See the descriptions below for details about backend-specific files that are written to these directories. -An example of a workflow output directory for a three-step WDL file might look like this: +An example of a workflow output directory for a three-step workflow might look like this: ``` cromwell-executions/ @@ -561,7 +810,7 @@ cd echo $? > rc ``` -`` would be equal to `` for non-Docker jobs, or it would be under `/root//call-` if this is running in a Docker container. +`` would be equal to `` for non-Docker jobs, or it would be under `/cromwell-executions//call-` if this is running in a Docker container. When running without docker, the subprocess command that the local backend will launch is: @@ -585,116 +834,351 @@ docker run --rm -v : -i /bin/bash < - - + + + var parentWorkflow; + if (selectedRow) parentWorkflow = chartView.getValue(selectedRow, 0); + + var indexOfParentWorkflow = expandedParentWorkflows.indexOf(parentWorkflow); + + if (indexOfParentWorkflow != -1) { + // Remove the parent workflow from the list if it's in it + expandedParentWorkflows.splice(indexOfParentWorkflow, 1); + } else if (parentWorkflow && parentWorkflowNames.indexOf(parentWorkflow) != -1) { + // Add it if it's not + expandedParentWorkflows.push(parentWorkflow); + } + + var rowsToDisplay = dt.getFilteredRows([filter]); + var view = new google.visualization.DataView(dt); + view.setRows(rowsToDisplay); + return view; + } + + function hideAllSubWorkflows(dt) { + var view = new google.visualization.DataView(dt); + function filterFunction(cell, row, column, table) { + return table.getRowProperty(row, "ancestry").length != 0; + } + + view.hideRows(dt.getFilteredRows([{column: 0, test: filterFunction}])); + return view; + } + + + + -
diff --git a/engine/src/main/scala/cromwell/Simpletons.scala b/engine/src/main/scala/cromwell/Simpletons.scala index 95983bb32..895f76f1a 100644 --- a/engine/src/main/scala/cromwell/Simpletons.scala +++ b/engine/src/main/scala/cromwell/Simpletons.scala @@ -1,9 +1,10 @@ package cromwell import cromwell.core.simpleton.WdlValueSimpleton +import cromwell.database.sql.SqlConverters._ import cromwell.database.sql.tables.{CallCachingSimpletonEntry, JobStoreSimpletonEntry} -import wdl4s.types.{WdlBooleanType, WdlFloatType, WdlIntegerType, WdlStringType} -import wdl4s.values.{WdlPrimitive, WdlSingleFile, WdlValue} +import wdl4s.wdl.types.{WdlBooleanType, WdlFloatType, WdlIntegerType, WdlStringType} +import wdl4s.wdl.values.{WdlPrimitive, WdlSingleFile, WdlValue} import scala.util.Try @@ -12,11 +13,11 @@ import scala.util.Try */ object Simpletons { def toSimpleton(entry: CallCachingSimpletonEntry): WdlValueSimpleton = { - toSimpleton(entry.wdlType, entry.simpletonKey, entry.simpletonValue) + toSimpleton(entry.wdlType, entry.simpletonKey, entry.simpletonValue.toRawString) } def toSimpleton(entry: JobStoreSimpletonEntry): WdlValueSimpleton = { - toSimpleton(entry.wdlType, entry.simpletonKey, entry.simpletonValue) + toSimpleton(entry.wdlType, entry.simpletonKey, entry.simpletonValue.toRawString) } private def toSimpleton(wdlType: String, simpletonKey: String, simpletonValue: String): WdlValueSimpleton = { diff --git a/engine/src/main/scala/cromwell/engine/EngineFilesystems.scala b/engine/src/main/scala/cromwell/engine/EngineFilesystems.scala index e05df2bb6..d95ee8ed7 100644 --- a/engine/src/main/scala/cromwell/engine/EngineFilesystems.scala +++ b/engine/src/main/scala/cromwell/engine/EngineFilesystems.scala @@ -1,38 +1,50 @@ package cromwell.engine -import java.nio.file.{FileSystem, FileSystems} - -import com.typesafe.config.ConfigFactory +import akka.actor.ActorSystem +import cats.data.Validated.{Invalid, Valid} +import cats.instances.future._ +import cats.instances.list._ +import cats.syntax.traverse._ +import com.typesafe.config.{Config, ConfigFactory} import cromwell.core.WorkflowOptions -import cromwell.engine.backend.EnhancedWorkflowOptions._ -import cromwell.filesystems.gcs.{GcsFileSystem, GcsFileSystemProvider, GoogleConfiguration} -import lenthall.config.ScalaConfig._ +import cromwell.core.path.{DefaultPathBuilder, PathBuilder} +import cromwell.filesystems.gcs.auth.GoogleAuthMode +import cromwell.filesystems.gcs.{GcsPathBuilderFactory, GoogleConfiguration} import lenthall.exception.MessageAggregation +import lenthall.validation.ErrorOr.ErrorOr +import net.ceedubs.ficus.Ficus._ -import scala.concurrent.ExecutionContext +import scala.concurrent.{ExecutionContext, Future} +import scala.util.{Failure, Success, Try} object EngineFilesystems { - - private val config = ConfigFactory.load - private val googleConf: GoogleConfiguration = GoogleConfiguration(config) - private val googleAuthMode = config.getStringOption("engine.filesystems.gcs.auth") map { confMode => - googleConf.auth(confMode) match { - case scalaz.Success(mode) => mode - case scalaz.Failure(errors) => throw new RuntimeException() with MessageAggregation { - override def exceptionContext: String = s"Failed to create authentication mode for $confMode" - override def errorMessages: Traversable[String] = errors.list.toList + private val config: Config = ConfigFactory.load + + private val gcsPathBuilderFactory: Try[Option[GcsPathBuilderFactory]] = Try { + // Parse the configuration and create a GoogleConfiguration + val googleConf: GoogleConfiguration = GoogleConfiguration(config) + // Extract the specified authentication mode for engine gcs filesystem, if any + val engineAuthModeAsString: Option[String] = config.as[Option[String]]("engine.filesystems.gcs.auth") + // Validate it agasint the google configuration + val engineAuthModeValidation: Option[ErrorOr[GoogleAuthMode]] = engineAuthModeAsString map googleConf.auth + + engineAuthModeValidation map { + // If the authentication mode is recognized, create a GcsPathBuilderFactory for the engine + case Valid(mode) => GcsPathBuilderFactory(mode, googleConf.applicationName) + // Otherwise fail + case Invalid(errors) => throw new RuntimeException() with MessageAggregation { + override def exceptionContext: String = s"Failed to create authentication mode for $engineAuthModeAsString" + override def errorMessages: Traversable[String] = errors.toList } } } - def filesystemsForWorkflow(workflowOptions: WorkflowOptions)(implicit ec: ExecutionContext): List[FileSystem] = { - def gcsFileSystem: Option[GcsFileSystem] = { - googleAuthMode map { mode => - val storage = mode.buildStorage(workflowOptions.toGoogleAuthOptions, googleConf.applicationName) - GcsFileSystem(GcsFileSystemProvider(storage)) - } - } + private val defaultFileSystem = + Option(DefaultPathBuilder).filter(_ => config.as[Boolean]("engine.filesystems.local.enabled")) - List(gcsFileSystem, Option(FileSystems.getDefault)).flatten + def pathBuildersForWorkflow(workflowOptions: WorkflowOptions)(implicit as: ActorSystem, ec: ExecutionContext): Future[List[PathBuilder]] = gcsPathBuilderFactory match { + case Success(maybeBuilderFactory) => maybeBuilderFactory.toList.traverse(_.withOptions(workflowOptions)).map(_ ++ defaultFileSystem) + case Failure(failure) => Future.failed(failure) } + } diff --git a/engine/src/main/scala/cromwell/engine/EngineWorkflowDescriptor.scala b/engine/src/main/scala/cromwell/engine/EngineWorkflowDescriptor.scala index c493b41a6..6850e1681 100644 --- a/engine/src/main/scala/cromwell/engine/EngineWorkflowDescriptor.scala +++ b/engine/src/main/scala/cromwell/engine/EngineWorkflowDescriptor.scala @@ -1,20 +1,30 @@ package cromwell.engine -import java.nio.file.FileSystem - import cromwell.backend.BackendWorkflowDescriptor import cromwell.core.WorkflowOptions.WorkflowOption import cromwell.core.callcaching.CallCachingMode -import wdl4s._ +import cromwell.core.path.PathBuilder +import wdl4s.wdl._ + +case class EngineWorkflowDescriptor(namespace: WdlNamespaceWithWorkflow, + backendDescriptor: BackendWorkflowDescriptor, + backendAssignments: Map[WdlTaskCall, String], + failureMode: WorkflowFailureMode, + pathBuilders: List[PathBuilder], + callCachingMode: CallCachingMode, + parentWorkflow: Option[EngineWorkflowDescriptor] = None) { + + val rootWorkflow: EngineWorkflowDescriptor = parentWorkflow match { + case Some(parent) => parent.rootWorkflow + case None => this + } -final case class EngineWorkflowDescriptor(backendDescriptor: BackendWorkflowDescriptor, - workflowInputs: WorkflowCoercedInputs, - backendAssignments: Map[Call, String], - failureMode: WorkflowFailureMode, - engineFilesystems: List[FileSystem], - callCachingMode: CallCachingMode) { - def id = backendDescriptor.id - def namespace = backendDescriptor.workflowNamespace - def name = namespace.workflow.unqualifiedName + def isRootWorkflow = rootWorkflow.parentWorkflow.isEmpty + + lazy val id = backendDescriptor.id + lazy val workflow = backendDescriptor.workflow + lazy val name = workflow.unqualifiedName + lazy val knownValues = backendDescriptor.knownValues + def getWorkflowOption(key: WorkflowOption) = backendDescriptor.getWorkflowOption(key) } diff --git a/engine/src/main/scala/cromwell/engine/WdlFunctions.scala b/engine/src/main/scala/cromwell/engine/WdlFunctions.scala index 3cc8ee1ca..73f928cac 100644 --- a/engine/src/main/scala/cromwell/engine/WdlFunctions.scala +++ b/engine/src/main/scala/cromwell/engine/WdlFunctions.scala @@ -1,17 +1,13 @@ package cromwell.engine -import java.nio.file.FileSystem - -import cromwell.backend.wdl.{PureFunctions, ReadLikeFunctions} -import wdl4s.expression.WdlStandardLibraryFunctions -import wdl4s.values.{WdlFile, WdlValue} +import cromwell.backend.wdl.ReadLikeFunctions +import wdl4s.wdl.expression.PureStandardLibraryFunctionsLike +import cromwell.core.path.PathBuilder +import wdl4s.wdl.values.{WdlFile, WdlValue} import scala.util.{Failure, Try} -class WdlFunctions(val fileSystems: List[FileSystem]) extends WdlStandardLibraryFunctions with ReadLikeFunctions with PureFunctions { - /** - * Ordered list of filesystems to be used to execute WDL functions needing IO. - */ +class WdlFunctions(val pathBuilders: List[PathBuilder]) extends PureStandardLibraryFunctionsLike with ReadLikeFunctions { private def fail(name: String) = Failure(new NotImplementedError(s"$name() not supported at the workflow level yet")) override def write_json(params: Seq[Try[WdlValue]]): Try[WdlFile] = fail("write_json") diff --git a/engine/src/main/scala/cromwell/engine/backend/BackendConfiguration.scala b/engine/src/main/scala/cromwell/engine/backend/BackendConfiguration.scala index 866e11062..1dff0efa3 100644 --- a/engine/src/main/scala/cromwell/engine/backend/BackendConfiguration.scala +++ b/engine/src/main/scala/cromwell/engine/backend/BackendConfiguration.scala @@ -1,18 +1,16 @@ package cromwell.engine.backend -import akka.actor.ActorSystem import com.typesafe.config.{Config, ConfigFactory} import cromwell.backend.{BackendConfigurationDescriptor, BackendLifecycleActorFactory} -import lenthall.config.ScalaConfig._ - +import net.ceedubs.ficus.Ficus._ import scala.collection.JavaConverters._ import scala.util.{Failure, Success, Try} case class BackendConfigurationEntry(name: String, lifecycleActorFactoryClass: String, config: Config) { - def asBackendLifecycleActorFactory: BackendLifecycleActorFactory = { + def asBackendLifecycleActorFactory: Try[BackendLifecycleActorFactory] = Try { Class.forName(lifecycleActorFactoryClass) - .getConstructor(classOf[BackendConfigurationDescriptor]) - .newInstance(asBackendConfigurationDescriptor) + .getConstructor(classOf[String], classOf[BackendConfigurationDescriptor]) + .newInstance(name, asBackendConfigurationDescriptor) .asInstanceOf[BackendLifecycleActorFactory] } @@ -30,7 +28,7 @@ object BackendConfiguration { BackendConfigurationEntry( backendName, entry.getString("actor-factory"), - entry.getConfigOr("config") + entry.as[Option[Config]]("config").getOrElse(ConfigFactory.empty("empty")) ) } diff --git a/engine/src/main/scala/cromwell/engine/backend/BackendSingletonCollection.scala b/engine/src/main/scala/cromwell/engine/backend/BackendSingletonCollection.scala new file mode 100644 index 000000000..ecb1a6753 --- /dev/null +++ b/engine/src/main/scala/cromwell/engine/backend/BackendSingletonCollection.scala @@ -0,0 +1,5 @@ +package cromwell.engine.backend + +import akka.actor.ActorRef + +final case class BackendSingletonCollection(backendSingletonActors: Map[String, Option[ActorRef]]) diff --git a/engine/src/main/scala/cromwell/engine/backend/CromwellBackends.scala b/engine/src/main/scala/cromwell/engine/backend/CromwellBackends.scala index b9d86ee65..5a586edad 100644 --- a/engine/src/main/scala/cromwell/engine/backend/CromwellBackends.scala +++ b/engine/src/main/scala/cromwell/engine/backend/CromwellBackends.scala @@ -1,8 +1,8 @@ package cromwell.engine.backend import cromwell.backend.BackendLifecycleActorFactory +import lenthall.util.TryUtil -import scala.language.postfixOps import scala.util.{Failure, Success, Try} /** @@ -10,7 +10,8 @@ import scala.util.{Failure, Success, Try} */ case class CromwellBackends(backendEntries: List[BackendConfigurationEntry]) { - val backendLifecycleActorFactories = backendEntries.map(e => e.name -> e.asBackendLifecycleActorFactory).toMap + // Raise the exception here if some backend factories failed to instantiate + val backendLifecycleActorFactories = TryUtil.sequenceMap(backendEntries.map(e => e.name -> e.asBackendLifecycleActorFactory).toMap).get def backendLifecycleActorFactoryByName(backendName: String): Try[BackendLifecycleActorFactory] = { backendLifecycleActorFactories.get(backendName) match { diff --git a/engine/src/main/scala/cromwell/engine/backend/EnhancedWorkflowOptions.scala b/engine/src/main/scala/cromwell/engine/backend/EnhancedWorkflowOptions.scala deleted file mode 100644 index e2043cb65..000000000 --- a/engine/src/main/scala/cromwell/engine/backend/EnhancedWorkflowOptions.scala +++ /dev/null @@ -1,16 +0,0 @@ -package cromwell.engine.backend - -import cromwell.core.WorkflowOptions -import cromwell.filesystems.gcs.GoogleAuthMode -import cromwell.filesystems.gcs.GoogleAuthMode.GoogleAuthOptions - -import scala.util.Try - -object EnhancedWorkflowOptions { - - implicit class GoogleAuthWorkflowOptions(val workflowOptions: WorkflowOptions) extends AnyVal { - def toGoogleAuthOptions: GoogleAuthMode.GoogleAuthOptions = new GoogleAuthOptions { - override def get(key: String): Try[String] = workflowOptions.get(key) - } - } -} diff --git a/engine/src/main/scala/cromwell/engine/engine.scala b/engine/src/main/scala/cromwell/engine/engine.scala new file mode 100644 index 000000000..5bdcd166f --- /dev/null +++ b/engine/src/main/scala/cromwell/engine/engine.scala @@ -0,0 +1,22 @@ +package cromwell.engine + +import wdl4s.wdl._ + +import scala.util.{Failure, Success, Try} + +final case class AbortFunction(function: () => Unit) +final case class AbortRegistrationFunction(register: AbortFunction => Unit) + +final case class CallAttempt(fqn: FullyQualifiedName, attempt: Int) + +object WorkflowFailureMode { + def tryParse(mode: String): Try[WorkflowFailureMode] = { + val modes = Seq(ContinueWhilePossible, NoNewCalls) + modes find { _.toString.equalsIgnoreCase(mode) } map { Success(_) } getOrElse Failure(new Exception(s"Invalid workflow failure mode: $mode")) + } +} +sealed trait WorkflowFailureMode { + def allowNewCallsAfterFailure: Boolean +} +case object ContinueWhilePossible extends WorkflowFailureMode { override val allowNewCallsAfterFailure = true } +case object NoNewCalls extends WorkflowFailureMode { override val allowNewCallsAfterFailure = false } \ No newline at end of file diff --git a/engine/src/main/scala/cromwell/engine/io/IoActor.scala b/engine/src/main/scala/cromwell/engine/io/IoActor.scala new file mode 100644 index 000000000..64eae2536 --- /dev/null +++ b/engine/src/main/scala/cromwell/engine/io/IoActor.scala @@ -0,0 +1,173 @@ +package cromwell.engine.io + +import java.net.{SocketException, SocketTimeoutException} +import javax.net.ssl.SSLException + +import akka.NotUsed +import akka.actor.{Actor, ActorLogging, ActorRef, Props} +import akka.stream._ +import akka.stream.scaladsl.{Flow, GraphDSL, Merge, Partition, Source} +import com.google.cloud.storage.StorageException +import com.typesafe.config.ConfigFactory +import cromwell.core.Dispatcher +import cromwell.core.actor.StreamActorHelper +import cromwell.core.actor.StreamIntegration.StreamContext +import cromwell.core.io.{IoAck, IoCommand, Throttle} +import cromwell.engine.io.IoActor._ +import cromwell.engine.io.gcs.GcsBatchFlow.BatchFailedException +import cromwell.engine.io.gcs.{GcsBatchCommandContext, ParallelGcsBatchFlow} +import cromwell.engine.io.nio.NioFlow +import cromwell.filesystems.gcs.batch.GcsBatchIoCommand +import cromwell.core.Dispatcher.IoDispatcher + +/** + * Actor that performs IO operations asynchronously using akka streams + * + * @param queueSize size of the queue + * @param throttle optional throttler to control the throughput of requests. + * Applied to ALL incoming requests + * @param materializer actor materializer to run the stream + */ +final class IoActor(queueSize: Int, throttle: Option[Throttle])(implicit val materializer: ActorMaterializer) extends Actor with ActorLogging with StreamActorHelper[IoCommandContext[_]] { + + implicit private val system = context.system + implicit val ec = context.dispatcher + + private [io] lazy val defaultFlow = new NioFlow(parallelism = 100, context.system.scheduler).flow.withAttributes(ActorAttributes.dispatcher(Dispatcher.IoDispatcher)) + private [io] lazy val gcsBatchFlow = new ParallelGcsBatchFlow(parallelism = 10, batchSize = 100, context.system.scheduler).flow.withAttributes(ActorAttributes.dispatcher(Dispatcher.IoDispatcher)) + + protected val source = Source.queue[IoCommandContext[_]](queueSize, OverflowStrategy.dropNew) + + protected val flow = GraphDSL.create() { implicit builder => + import GraphDSL.Implicits._ + + val input = builder.add(Flow[IoCommandContext[_]]) + + // Partitions requests between gcs batch, and single nio requests + val batchPartitioner = builder.add(Partition[IoCommandContext[_]](2, { + case _: GcsBatchCommandContext[_, _] => 0 + case other @ _ => 1 + })) + + // Sub flow for batched gcs requests + val batches = batchPartitioner.out(0) collect { case batch: GcsBatchCommandContext[_, _] => batch } + + // Sub flow for single nio requests + val defaults = batchPartitioner.out(1) collect { case default: DefaultCommandContext[_] => default } + + // Merge results from both flows back together + val merger = builder.add(Merge[IoResult](2)) + + // Flow processing nio requests + val defaultFlowPorts = builder.add(defaultFlow) + + // Flow processing gcs batch requests + val batchFlowPorts = builder.add(gcsBatchFlow) + + input ~> batchPartitioner + defaults.outlet ~> defaultFlowPorts ~> merger + batches.outlet ~> batchFlowPorts ~> merger + + FlowShape[IoCommandContext[_], IoResult](input.in, merger.out) + } + + protected val throttledFlow = throttle map { t => + Flow[IoCommandContext[_]] + .throttle(t.elements, t.per, t.maximumBurst, ThrottleMode.Shaping) + .via(flow) + } getOrElse flow + + override protected lazy val streamSource = source.via(throttledFlow).withAttributes(ActorAttributes.dispatcher(Dispatcher.IoDispatcher)) + + override def actorReceive: Receive = { + /* GCS Batch command with context */ + case (clientContext: Any, gcsBatchCommand: GcsBatchIoCommand[_, _]) => + val replyTo = sender() + val commandContext= GcsBatchCommandContext(gcsBatchCommand, replyTo, Option(clientContext)) + sendToStream(commandContext) + + /* GCS Batch command without context */ + case gcsBatchCommand: GcsBatchIoCommand[_, _] => + val replyTo = sender() + val commandContext= GcsBatchCommandContext(gcsBatchCommand, replyTo) + sendToStream(commandContext) + + /* Default command with context */ + case (clientContext: Any, command: IoCommand[_]) => + val replyTo = sender() + val commandContext= DefaultCommandContext(command, replyTo, Option(clientContext)) + sendToStream(commandContext) + + /* Default command without context */ + case command: IoCommand[_] => + val replyTo = sender() + val commandContext= DefaultCommandContext(command, replyTo) + sendToStream(commandContext) + } +} + +trait IoCommandContext[T] extends StreamContext { + def request: IoCommand[T] + def replyTo: ActorRef + def fail(failure: Throwable): IoResult = (request.fail(failure), this) + def success(value: T): IoResult = (request.success(value), this) +} + +object IoActor { + import net.ceedubs.ficus.Ficus._ + + /** Flow that can consume an IoCommandContext and produce an IoResult */ + type IoFlow = Flow[IoCommandContext[_], IoResult, NotUsed] + + /** Result type of an IoFlow, contains the original command context and the final IoAck response. */ + type IoResult = (IoAck[_], IoCommandContext[_]) + + private val ioConfig = ConfigFactory.load().getConfig("system.io") + + /** Maximum number of times a command will be attempted: First attempt + 5 retries */ + val MaxAttemptsNumber = ioConfig.getOrElse[Int]("number-of-attempts", 5) + + case class DefaultCommandContext[T](request: IoCommand[T], replyTo: ActorRef, override val clientContext: Option[Any] = None) extends IoCommandContext[T] + + /** + * ATTENTION: Transient failures are retried *forever* + * Be careful when adding error codes to this method. + * Currently only 429 (= quota exceeded are considered truly transient) + */ + def isTransient(failure: Throwable): Boolean = failure match { + case gcs: StorageException => gcs.getCode == 429 + case _ => false + } + + val AdditionalRetryableHttpCodes = List( + // HTTP 410: Gone + // From Google doc (https://cloud.google.com/storage/docs/json_api/v1/status-codes): + // "You have attempted to use a resumable upload session that is no longer available. + // If the reported status code was not successful and you still wish to upload the file, you must start a new session." + 410, + // Some 503 errors seem to yield "false" on the "isRetryable" method because they are not retried. + // The CloudStorage exception mechanism is not flawless yet (https://github.com/GoogleCloudPlatform/google-cloud-java/issues/1545) + // so that could be the cause. + // For now explicitly lists 503 as a retryable code here to work around that. + 503 + ) + + /** + * Failures that are considered retryable. + * Retrying them should increase the "retry counter" + */ + def isRetryable(failure: Throwable): Boolean = failure match { + case gcs: StorageException => gcs.isRetryable || AdditionalRetryableHttpCodes.contains(gcs.getCode) || isRetryable(gcs.getCause) + case _: SSLException => true + case _: BatchFailedException => true + case _: SocketException => true + case _: SocketTimeoutException => true + case other => isTransient(other) + } + + def isFatal(failure: Throwable) = !isRetryable(failure) + + def props(queueSize: Int, throttle: Option[Throttle])(implicit materializer: ActorMaterializer) = { + Props(new IoActor(queueSize, throttle)).withDispatcher(IoDispatcher) + } +} diff --git a/engine/src/main/scala/cromwell/engine/io/gcs/GcsBatchCommandContext.scala b/engine/src/main/scala/cromwell/engine/io/gcs/GcsBatchCommandContext.scala new file mode 100644 index 000000000..097649fc1 --- /dev/null +++ b/engine/src/main/scala/cromwell/engine/io/gcs/GcsBatchCommandContext.scala @@ -0,0 +1,95 @@ +package cromwell.engine.io.gcs + +import akka.actor.ActorRef +import com.google.api.client.googleapis.batch.BatchRequest +import com.google.api.client.googleapis.batch.json.JsonBatchCallback +import com.google.api.client.googleapis.json.GoogleJsonError +import com.google.api.client.http.HttpHeaders +import com.google.api.client.util.ExponentialBackOff +import com.google.cloud.storage.StorageException +import cromwell.core.retry.{Backoff, SimpleExponentialBackoff} +import cromwell.engine.io.IoActor.IoResult +import cromwell.engine.io.gcs.GcsBatchCommandContext.BatchResponse +import cromwell.engine.io.{IoActor, IoCommandContext} +import cromwell.filesystems.gcs.batch.GcsBatchIoCommand + +import scala.concurrent.Promise +import scala.concurrent.duration._ +import scala.language.postfixOps + +object GcsBatchCommandContext { + def defaultBackoff = SimpleExponentialBackoff( + new ExponentialBackOff.Builder() + .setInitialIntervalMillis(1.second.toMillis.toInt) + .setMultiplier(4) + .setMaxIntervalMillis(30.seconds.toMillis.toInt) + .setRandomizationFactor(0.2D) + .setMaxElapsedTimeMillis(30.minutes.toMillis.toInt) + .build() + ) + type BatchResponse = Either[IoResult, GcsBatchCommandContext[_, _]] +} + +final case class GcsBatchCommandContext[T, U](request: GcsBatchIoCommand[T, U], + replyTo: ActorRef, + override val clientContext: Option[Any] = None, + backoff: Backoff = GcsBatchCommandContext.defaultBackoff, + currentAttempt: Int = 1, + promise: Promise[BatchResponse] = Promise[BatchResponse] + ) extends IoCommandContext[T] { + + /** + * None if no retry should be attempted, Some(timeToWaitBeforeNextAttempt) otherwise + */ + lazy val retryIn = if (currentAttempt >= IoActor.MaxAttemptsNumber) None else Option(backoff.backoffMillis milliseconds) + + /** + * Json batch call back for a batched request + */ + lazy val callback: JsonBatchCallback[U] = new JsonBatchCallback[U]() { + def onSuccess(response: U, httpHeaders: HttpHeaders) = onSuccessCallback(response, httpHeaders) + def onFailure(googleJsonError: GoogleJsonError, httpHeaders: HttpHeaders) = onFailureCallback(googleJsonError, httpHeaders) + } + + /** + * Increment backoff time and attempt count + */ + lazy val next: GcsBatchCommandContext[T, U] = { + this.copy(backoff = backoff.next, currentAttempt = currentAttempt + 1, promise = Promise[BatchResponse]) + } + + /** + * Only increment backoff. To be used for failure thas should be retried infinitely + */ + lazy val nextTransient: GcsBatchCommandContext[T, U] = { + this.copy(backoff = backoff.next, promise = Promise[BatchResponse]) + } + + /** + * Queue the request for batching + */ + def queue(batchRequest: BatchRequest) = request.operation.queue(batchRequest, callback) + + /** + * On success callback. Transform the request response to a stream-ready response that can complete the promise + */ + private def onSuccessCallback(response: U, httpHeaders: HttpHeaders) = { + val promiseResponse: BatchResponse = request.onSuccess(response, httpHeaders) match { + // Left means the command is complete, so just create the corresponding IoSuccess with the value + case Left(responseValue) => Left(success(responseValue)) + // Right means there is a subsequent request to be executed, clone this context with the new request and a new promise + case Right(nextCommand) => Right(this.copy(request = nextCommand, promise = Promise[BatchResponse])) + } + + promise.trySuccess(promiseResponse) + () + } + + /** + * On failure callback. Fail the promise with a StorageException + */ + private def onFailureCallback(googleJsonError: GoogleJsonError, httpHeaders: HttpHeaders) = { + promise.tryFailure(new StorageException(googleJsonError)) + () + } +} diff --git a/engine/src/main/scala/cromwell/engine/io/gcs/GcsBatchFlow.scala b/engine/src/main/scala/cromwell/engine/io/gcs/GcsBatchFlow.scala new file mode 100644 index 000000000..646a79799 --- /dev/null +++ b/engine/src/main/scala/cromwell/engine/io/gcs/GcsBatchFlow.scala @@ -0,0 +1,152 @@ +package cromwell.engine.io.gcs + +import java.io.IOException + +import akka.actor.Scheduler +import akka.stream._ +import akka.stream.scaladsl.{Flow, GraphDSL, MergePreferred, Partition} +import com.google.api.client.googleapis.batch.BatchRequest +import com.google.api.client.http.{HttpRequest, HttpRequestInitializer} +import cromwell.engine.io.IoActor +import cromwell.engine.io.IoActor.IoResult +import cromwell.engine.io.gcs.GcsBatchFlow.BatchFailedException +import cromwell.filesystems.gcs.{GcsPathBuilder, GoogleConfiguration} + +import scala.concurrent.duration._ +import scala.concurrent.{ExecutionContext, Future} +import scala.language.postfixOps +import scala.util.{Failure, Try} + +object GcsBatchFlow { + + /** + * Exception used to fail the request promises when the batch request itself fails. + * Is considered retryable. + */ + case class BatchFailedException(failure: Throwable) extends IOException(failure) +} + +class GcsBatchFlow(batchSize: Int, scheduler: Scheduler)(implicit ec: ExecutionContext) { + + // Does not carry any authentication, assumes all underlying requests are properly authenticated + private val httpRequestInitializer = new HttpRequestInitializer { + override def initialize(request: HttpRequest): Unit = { + request.setConnectTimeout(GoogleConfiguration.DefaultConnectionTimeout.toMillis.toInt) + request.setReadTimeout(GoogleConfiguration.DefaultReadTimeout.toMillis.toInt) + () + } + } + + private val batch: BatchRequest = new BatchRequest(GcsPathBuilder.HttpTransport, httpRequestInitializer) + + val flow = GraphDSL.create() { implicit builder => + import GraphDSL.Implicits._ + + // Source where batch commands are coming from. This is the input port of this flow + val source = builder.add(Flow[GcsBatchCommandContext[_, _]]) + + // Merge commands from source (above), and commands that need to be retried (see retries below) + val sourceMerger = builder.add(MergePreferred[GcsBatchCommandContext[_, _]](1)) + + // Process a batch and spit atomic GcsBatchResponses out for each internal request + val batchProcessor = builder.add( + Flow[GcsBatchCommandContext[_, _]] + // Group commands together in batches so they can be processed as such + .groupedWithin(batchSize, 5 seconds) + // execute the batch and outputs each sub-response individually, as a Future + .mapConcat[Future[GcsBatchResponse[_]]](executeBatch) + // Wait for each Future to complete + .mapAsyncUnordered[GcsBatchResponse[_]](batchSize) { identity } + ) + + // Partitions the responses: Terminal responses exit the flow, others go back to the sourceMerger + val responseHandler = builder.add(responseHandlerFlow) + + // Buffer commands to be retried to avoid backpressuring too rapidly + val nextRequestBuffer = builder.add(Flow[GcsBatchCommandContext[_, _]].buffer(batchSize, OverflowStrategy.backpressure)) + + source ~> sourceMerger ~> batchProcessor ~> responseHandler.in + sourceMerger.preferred <~ nextRequestBuffer <~ responseHandler.out1 + + FlowShape[GcsBatchCommandContext[_, _], IoResult](source.in, responseHandler.out0) + } + + /** + * Fan out shape splitting GcsBatchResponse into 2: + * First port emits terminal result that can exit the GcsBatch flow + * Second port emits request to be re-injected to be executed in a later batch + */ + private lazy val responseHandlerFlow = GraphDSL.create() { implicit builder => + import GraphDSL.Implicits._ + + val source = builder.add(Partition[GcsBatchResponse[_]](2, { + case _: GcsBatchTerminal[_] => 0 + case _ => 1 + })) + + // Terminal responses: output of this flow + val terminals = source.out(0) collect { case terminal: GcsBatchTerminal[_] => terminal.ioResult } + + // Next command context, can be a retry or another request needed by the command + val nextRequest = source.out(1).collect { + case retry: GcsBatchRetry[_] => retry.context + case nextRequest: GcsBatchNextRequest[_] => nextRequest.context + } + + new FanOutShape2[GcsBatchResponse[_], IoResult, GcsBatchCommandContext[_, _]](source.in, terminals.outlet, nextRequest.outlet) + } + + private def executeBatch(contexts: Seq[GcsBatchCommandContext[_, _]]): List[Future[GcsBatchResponse[_]]] = { + def failAllPromisesWith(failure: Throwable) = contexts foreach { context => + context.promise.tryFailure(failure) + () + } + + // Add all requests to the batch + contexts foreach { _.queue(batch) } + + // Try to execute the batch request. + // If it fails with an IO Exception, fail all the underlying promises with a retyrable BatchFailedException + // Otherwise fail with the original exception + Try(batch.execute()) match { + case Failure(failure: IOException) => failAllPromisesWith(BatchFailedException(failure)) + case Failure(failure) => failAllPromisesWith(failure) + case _ => + } + + // Map all promise responses to a GcsBatchResponse to be either sent back as a response or retried in the next batch + contexts.toList map { context => + context.promise.future map { + case Left(response) => GcsBatchTerminal(response) + case Right(nextRequest) => GcsBatchNextRequest(nextRequest) + } recoverWith recoverCommand(context) + } + } + + /** + * Handles a failed future. + * If the failure is retryable, and the command hasn't reached its max attempts: + * schedule the command to be retried in a later batch after waiting for the appropriate amount of time + * Otherwise create a GcsBatchTerminal response with the IoFailure + * In both cases, returns a successful Future to avoid failing the stream or dropping elements + */ + private def recoverCommand(context: GcsBatchCommandContext[_, _]): PartialFunction[Throwable, Future[GcsBatchResponse[_]]] = { + // If the failure is retryable - recover with a GcsBatchRetry so it can be retried in the next batch + case failure if IoActor.isRetryable(failure) => + context.retryIn match { + case Some(waitTime) if IoActor.isTransient(failure) => + akka.pattern.after(waitTime, scheduler)(Future.successful(GcsBatchRetry(context.nextTransient, failure))) + case Some(waitTime) => + akka.pattern.after(waitTime, scheduler)(Future.successful(GcsBatchRetry(context.next, failure))) + case None => fail(context, failure) + } + + // Otherwise just fail the command + case failure => fail(context, failure) + } + + /** + * Fail a command context with a failure. + */ + private def fail(context: GcsBatchCommandContext[_, _], failure: Throwable) = Future.successful(GcsBatchTerminal(context.fail(failure))) +} diff --git a/engine/src/main/scala/cromwell/engine/io/gcs/GcsResponse.scala b/engine/src/main/scala/cromwell/engine/io/gcs/GcsResponse.scala new file mode 100644 index 000000000..937e2b660 --- /dev/null +++ b/engine/src/main/scala/cromwell/engine/io/gcs/GcsResponse.scala @@ -0,0 +1,12 @@ +package cromwell.engine.io.gcs + +import cromwell.engine.io.IoActor._ + +/** + * ADT used only inside the batch stream + * @tparam T final type of the result of the Command + */ +private [gcs] sealed trait GcsBatchResponse[T] +private [gcs] case class GcsBatchTerminal[T](ioResult: IoResult) extends GcsBatchResponse[T] +private [gcs] case class GcsBatchRetry[T](context: GcsBatchCommandContext[T, _], failure: Throwable) extends GcsBatchResponse[T] +private [gcs] case class GcsBatchNextRequest[T](context: GcsBatchCommandContext[T, _]) extends GcsBatchResponse[T] diff --git a/engine/src/main/scala/cromwell/engine/io/gcs/ParallelGcsBatchFlow.scala b/engine/src/main/scala/cromwell/engine/io/gcs/ParallelGcsBatchFlow.scala new file mode 100644 index 000000000..633e270d8 --- /dev/null +++ b/engine/src/main/scala/cromwell/engine/io/gcs/ParallelGcsBatchFlow.scala @@ -0,0 +1,29 @@ +package cromwell.engine.io.gcs + +import akka.actor.Scheduler +import akka.stream.FlowShape +import akka.stream.scaladsl.{Balance, GraphDSL, Merge} +import cromwell.engine.io.IoActor.IoResult + +import scala.concurrent.ExecutionContext + +/** + * Balancer that distributes requests to multiple batch flows in parallel + */ +class ParallelGcsBatchFlow(parallelism: Int, batchSize: Int, scheduler: Scheduler)(implicit ec: ExecutionContext) { + + val flow = GraphDSL.create() { implicit builder => + import GraphDSL.Implicits._ + val balancer = builder.add(Balance[GcsBatchCommandContext[_, _]](parallelism, waitForAllDownstreams = false)) + val merge = builder.add(Merge[IoResult](parallelism)) + + for (_ <- 1 to parallelism) { + val workerFlow = new GcsBatchFlow(batchSize, scheduler).flow + // for each worker, add an edge from the balancer to the worker, then wire + // it to the merge element + balancer ~> workerFlow.async ~> merge + } + + FlowShape(balancer.in, merge.out) + } +} diff --git a/engine/src/main/scala/cromwell/engine/io/nio/NioFlow.scala b/engine/src/main/scala/cromwell/engine/io/nio/NioFlow.scala new file mode 100644 index 000000000..0ffd549e2 --- /dev/null +++ b/engine/src/main/scala/cromwell/engine/io/nio/NioFlow.scala @@ -0,0 +1,93 @@ +package cromwell.engine.io.nio + +import akka.actor.{ActorSystem, Scheduler} +import akka.stream.scaladsl.Flow +import cromwell.core.io._ +import cromwell.core.path.{DefaultPath, Path} +import cromwell.core.retry.Retry +import cromwell.engine.io.IoActor +import cromwell.engine.io.IoActor.{DefaultCommandContext, IoResult} +import cromwell.filesystems.gcs.GcsPath +import cromwell.util.TryWithResource._ + +import scala.concurrent.{ExecutionContext, Future} +import scala.io.Codec + +/** + * Flow that executes IO operations by calling java.nio.Path methods + */ +class NioFlow(parallelism: Int, scheduler: Scheduler, nbAttempts: Int = IoActor.MaxAttemptsNumber)(implicit ec: ExecutionContext, actorSystem: ActorSystem) { + private val processCommand: DefaultCommandContext[_] => Future[IoResult] = commandContext => { + val operationResult = Retry.withRetry( + () => handleSingleCommand(commandContext.request), + maxRetries = Option(nbAttempts), + backoff = IoCommand.defaultBackoff, + isTransient = IoActor.isTransient, + isFatal = IoActor.isFatal + ) + + operationResult map { (_, commandContext) } recoverWith { + case failure => Future.successful(commandContext.fail(failure)) + } + } + + private [nio] def handleSingleCommand(ioSingleCommand: IoCommand[_]) = { + ioSingleCommand match { + case copyCommand: IoCopyCommand => copy(copyCommand) map copyCommand.success + case writeCommand: IoWriteCommand => write(writeCommand) map writeCommand.success + case deleteCommand: IoDeleteCommand => delete(deleteCommand) map deleteCommand.success + case sizeCommand: IoSizeCommand => size(sizeCommand) map sizeCommand.success + case readAsStringCommand: IoContentAsStringCommand => readAsString(readAsStringCommand) map readAsStringCommand.success + case hashCommand: IoHashCommand => hash(hashCommand) map hashCommand.success + case touchCommand: IoTouchCommand => touch(touchCommand) map touchCommand.success + case _ => Future.failed(new NotImplementedError("Method not implemented")) + } + } + + val flow = Flow[DefaultCommandContext[_]].mapAsyncUnordered[IoResult](parallelism)(processCommand) + + private def copy(copy: IoCopyCommand) = Future { + createDirectoriesForSFSPath(copy.destination) + copy.source.copyTo(copy.destination, copy.overwrite) + () + } + + private def write(write: IoWriteCommand) = Future { + createDirectoriesForSFSPath(write.file) + write.file.write(write.content)(write.openOptions, Codec.UTF8) + () + } + + private def delete(delete: IoDeleteCommand) = Future { + delete.file.delete(delete.swallowIOExceptions) + () + } + + private def readAsString(read: IoContentAsStringCommand) = Future { + read.file.contentAsString + } + + private def size(size: IoSizeCommand) = Future { + size.file.size + } + + private def hash(hash: IoHashCommand) = { + hash.file match { + case gcsPath: GcsPath => Future { gcsPath.cloudStorage.get(gcsPath.blob).getCrc32c } + case path => Future.fromTry( + tryWithResource(() => path.newInputStream) { inputStream => + org.apache.commons.codec.digest.DigestUtils.md5Hex(inputStream) + } + ) + } + } + + private def touch(touch: IoTouchCommand) = Future { + touch.file.touch() + } + + private def createDirectoriesForSFSPath(path: Path) = path match { + case _: DefaultPath => path.parent.createPermissionedDirectories() + case _ => + } +} diff --git a/engine/src/main/scala/cromwell/engine/package.scala b/engine/src/main/scala/cromwell/engine/package.scala index 4ea1c964b..198645503 100644 --- a/engine/src/main/scala/cromwell/engine/package.scala +++ b/engine/src/main/scala/cromwell/engine/package.scala @@ -1,22 +1,11 @@ package cromwell -import java.time.OffsetDateTime - import cromwell.core.JobOutput -import wdl4s._ -import wdl4s.values.WdlValue - -import scala.language.implicitConversions -import scala.util.{Failure, Success, Try} +import wdl4s.wdl._ +import wdl4s.wdl.values.WdlValue package object engine { - final case class AbortFunction(function: () => Unit) - final case class AbortRegistrationFunction(register: AbortFunction => Unit) - - final case class FailureEventEntry(failure: String, timestamp: OffsetDateTime) - final case class CallAttempt(fqn: FullyQualifiedName, attempt: Int) - implicit class EnhancedFullyQualifiedName(val fqn: FullyQualifiedName) extends AnyVal { def scopeAndVariableName: (String, String) = { val array = fqn.split("\\.(?=[^\\.]+$)") @@ -30,15 +19,4 @@ package object engine { } } - object WorkflowFailureMode { - def tryParse(mode: String): Try[WorkflowFailureMode] = { - val modes = Seq(ContinueWhilePossible, NoNewCalls) - modes find { _.toString.equalsIgnoreCase(mode) } map { Success(_) } getOrElse Failure(new Exception(s"Invalid workflow failure mode: $mode")) - } - } - sealed trait WorkflowFailureMode { - def allowNewCallsAfterFailure: Boolean - } - case object ContinueWhilePossible extends WorkflowFailureMode { override val allowNewCallsAfterFailure = true } - case object NoNewCalls extends WorkflowFailureMode { override val allowNewCallsAfterFailure = false } } diff --git a/engine/src/main/scala/cromwell/engine/workflow/SingleWorkflowRunnerActor.scala b/engine/src/main/scala/cromwell/engine/workflow/SingleWorkflowRunnerActor.scala index d7605a2b9..995dd9551 100644 --- a/engine/src/main/scala/cromwell/engine/workflow/SingleWorkflowRunnerActor.scala +++ b/engine/src/main/scala/cromwell/engine/workflow/SingleWorkflowRunnerActor.scala @@ -1,22 +1,27 @@ package cromwell.engine.workflow -import java.nio.file.Path import java.util.UUID import akka.actor.FSM.{CurrentState, Transition} import akka.actor._ -import better.files._ +import akka.stream.ActorMaterializer +import cats.instances.try_._ +import cats.syntax.functor._ +import cromwell.core.Dispatcher.EngineDispatcher +import cromwell.core._ +import cromwell.core.path.Path import cromwell.core.retry.SimpleExponentialBackoff -import cromwell.core.{ExecutionStore => _, _} import cromwell.engine.workflow.SingleWorkflowRunnerActor._ import cromwell.engine.workflow.WorkflowManagerActor.RetrieveNewWorkflows -import cromwell.engine.workflow.workflowstore.WorkflowStoreActor import cromwell.engine.workflow.workflowstore.WorkflowStoreActor.SubmitWorkflow +import cromwell.engine.workflow.workflowstore.{InMemoryWorkflowStore, WorkflowStoreEngineActor, WorkflowStoreSubmitActor} +import cromwell.jobstore.EmptyJobStoreActor import cromwell.server.CromwellRootActor import cromwell.services.metadata.MetadataService.{GetSingleWorkflowMetadataAction, GetStatus, WorkflowOutputs} -import cromwell.webservice.PerRequest.RequestComplete +import cromwell.services.metadata.impl.WriteMetadataActor.{CheckPendingWrites, HasPendingWrites, NoPendingWrites} +import cromwell.subworkflowstore.EmptySubWorkflowStoreActor import cromwell.webservice.metadata.MetadataBuilderActor -import spray.http.StatusCodes +import cromwell.webservice.metadata.MetadataBuilderActor.{BuiltMetadataResponse, FailedMetadataResponse} import spray.json._ import scala.concurrent.ExecutionContext.Implicits.global @@ -24,158 +29,167 @@ import scala.concurrent.duration._ import scala.language.postfixOps import scala.util.{Failure, Try} -object SingleWorkflowRunnerActor { - def props(source: WorkflowSourceFiles, metadataOutputFile: Option[Path]): Props = { - Props(new SingleWorkflowRunnerActor(source, metadataOutputFile)) - } - - sealed trait RunnerMessage - // The message to actually run the workflow is made explicit so the non-actor Main can `ask` this actor to do the - // running and collect a result. - case object RunWorkflow extends RunnerMessage - private case object IssuePollRequest extends RunnerMessage - private case object IssueReply extends RunnerMessage - - sealed trait RunnerState - case object NotStarted extends RunnerState - case object RunningWorkflow extends RunnerState - case object RequestingOutputs extends RunnerState - case object RequestingMetadata extends RunnerState - case object Done extends RunnerState - - final case class RunnerData(replyTo: Option[ActorRef] = None, - terminalState: Option[WorkflowState] = None, - id: Option[WorkflowId] = None, - failures: Seq[Throwable] = Seq.empty) { - - def addFailure(message: String): RunnerData = addFailure(new RuntimeException(message)) - - def addFailure(e: Throwable): RunnerData = this.copy(failures = e +: failures) - } - - implicit class EnhancedJsObject(val jsObject: JsObject) extends AnyVal { - def state: WorkflowState = WorkflowState.fromString(jsObject.fields("status").asInstanceOf[JsString].value) - } - - private val Tag = "SingleWorkflowRunnerActor" -} - /** * Designed explicitly for the use case of the 'run' functionality in Main. This Actor will start a workflow, - * print out the outputs when complete and then shut down the actor system. Note that multiple aspects of this - * are sub-optimal for future use cases where one might want a single workflow being run. + * print out the outputs when complete and reply with a result. */ -class SingleWorkflowRunnerActor(source: WorkflowSourceFiles, metadataOutputPath: Option[Path]) - extends CromwellRootActor with LoggingFSM[RunnerState, RunnerData] { +class SingleWorkflowRunnerActor(source: WorkflowSourceFilesCollection, + metadataOutputPath: Option[Path], + gracefulShutdown: Boolean, + abortJobsOnTerminate: Boolean + )(implicit materializer: ActorMaterializer) + extends CromwellRootActor(gracefulShutdown, abortJobsOnTerminate) with LoggingFSM[RunnerState, SwraData] { + + override val serverMode = false import SingleWorkflowRunnerActor._ private val backoff = SimpleExponentialBackoff(1 second, 1 minute, 1.2) - startWith(NotStarted, RunnerData()) + override lazy val workflowStore = new InMemoryWorkflowStore() + override lazy val jobStoreActor = context.actorOf(EmptyJobStoreActor.props) + override lazy val subWorkflowStoreActor = context.actorOf(EmptySubWorkflowStoreActor.props) - private def requestMetadata: State = { - val metadataBuilder = context.actorOf(MetadataBuilderActor.props(serviceRegistryActor), s"MetadataRequest-Workflow-${stateData.id.get}") - metadataBuilder ! GetSingleWorkflowMetadataAction(stateData.id.get, None, None) - goto (RequestingMetadata) - } - - private def schedulePollRequest(): Unit = { - context.system.scheduler.scheduleOnce(backoff.backoffMillis.millis, self, IssuePollRequest) - } - - private def requestStatus(): Unit = { - // This requests status via the metadata service rather than instituting an FSM watch on the underlying workflow actor. - // Cromwell's eventual consistency means it isn't safe to use an FSM transition to a terminal state as the signal for - // when outputs or metadata have stabilized. - val metadataBuilder = context.actorOf(MetadataBuilderActor.props(serviceRegistryActor), s"StatusRequest-Workflow-${stateData.id.get}-request-${UUID.randomUUID()}") - metadataBuilder ! GetStatus(stateData.id.get) - } - - private def issueReply: State = { - self ! IssueReply - goto (Done) - } + startWith(NotStarted, EmptySwraData) when (NotStarted) { - case Event(RunWorkflow, data) => + case Event(RunWorkflow, EmptySwraData) => log.info(s"$Tag: Submitting workflow") workflowStoreActor ! SubmitWorkflow(source) - goto (RunningWorkflow) using data.copy(replyTo = Option(sender())) + goto(SubmittedWorkflow) using SubmittedSwraData(sender()) } - when (RunningWorkflow) { - case Event(WorkflowStoreActor.WorkflowSubmittedToStore(id), data) => + when (SubmittedWorkflow) { + case Event(WorkflowStoreSubmitActor.WorkflowSubmittedToStore(id), SubmittedSwraData(replyTo)) => log.info(s"$Tag: Workflow submitted UUID($id)") // Since we only have a single workflow, force the WorkflowManagerActor's hand in case the polling rate is long workflowManagerActor ! RetrieveNewWorkflows schedulePollRequest() - stay() using data.copy(id = Option(id)) - case Event(IssuePollRequest, data) => - data.id match { - case None => schedulePollRequest() - case _ => requestStatus() - } + goto(RunningWorkflow) using RunningSwraData(replyTo, id) + } + + when (RunningWorkflow) { + case Event(IssuePollRequest, RunningSwraData(_, id)) => + requestStatus(id) stay() - case Event(RequestComplete((StatusCodes.OK, jsObject: JsObject)), data) if !jsObject.state.isTerminal => + case Event(BuiltMetadataResponse(jsObject: JsObject), RunningSwraData(_, _)) if !jsObject.state.isTerminal => schedulePollRequest() stay() - case Event(RequestComplete((StatusCodes.OK, jsObject: JsObject)), data) if jsObject.state == WorkflowSucceeded => + case Event(BuiltMetadataResponse(jsObject: JsObject), RunningSwraData(replyTo, id)) if jsObject.state == WorkflowSucceeded => + log.info(s"$Tag workflow finished with status '$WorkflowSucceeded'.") + serviceRegistryActor ! CheckPendingWrites + goto(WaitingForFlushedMetadata) using SucceededSwraData(replyTo, id) + case Event(BuiltMetadataResponse(jsObject: JsObject), RunningSwraData(replyTo, id)) if jsObject.state == WorkflowFailed => + log.info(s"$Tag workflow finished with status '$WorkflowFailed'.") + serviceRegistryActor ! CheckPendingWrites + goto(WaitingForFlushedMetadata) using FailedSwraData(replyTo, id, new RuntimeException(s"Workflow $id transitioned to state $WorkflowFailed")) + case Event(BuiltMetadataResponse(jsObject: JsObject), RunningSwraData(replyTo, id)) if jsObject.state == WorkflowAborted => + log.info(s"$Tag workflow finished with status '$WorkflowAborted'.") + serviceRegistryActor ! CheckPendingWrites + goto(WaitingForFlushedMetadata) using AbortedSwraData(replyTo, id) + } + + when (WaitingForFlushedMetadata) { + case Event(HasPendingWrites, _) => + context.system.scheduler.scheduleOnce(1 second, serviceRegistryActor, CheckPendingWrites)(context.system.dispatcher, self) + stay() + case Event(NoPendingWrites, data: SucceededSwraData) => val metadataBuilder = context.actorOf(MetadataBuilderActor.props(serviceRegistryActor), - s"CompleteRequest-Workflow-${stateData.id.get}-request-${UUID.randomUUID()}") - metadataBuilder ! WorkflowOutputs(data.id.get) - goto(RequestingOutputs) using data.copy(terminalState = Option(WorkflowSucceeded)) - case Event(RequestComplete((StatusCodes.OK, jsObject: JsObject)), data) if jsObject.state == WorkflowFailed => - val updatedData = data.copy(terminalState = Option(WorkflowFailed)).addFailure(s"Workflow ${data.id.get} transitioned to state Failed") - // If there's an output path specified then request metadata, otherwise issue a reply to the original sender. - val nextState = if (metadataOutputPath.isDefined) requestMetadata else issueReply - nextState using updatedData + s"CompleteRequest-Workflow-${data.id}-request-${UUID.randomUUID()}") + metadataBuilder ! WorkflowOutputs(data.id) + goto(RequestingOutputs) + case Event(NoPendingWrites, data : TerminalSwraData) => + requestMetadataOrIssueReply(data) } when (RequestingOutputs) { - case Event(RequestComplete((StatusCodes.OK, outputs: JsObject)), _) => + case Event(BuiltMetadataResponse(outputs: JsObject), data: TerminalSwraData) => outputOutputs(outputs) - if (metadataOutputPath.isDefined) requestMetadata else issueReply + requestMetadataOrIssueReply(data) } when (RequestingMetadata) { - case Event(RequestComplete((StatusCodes.OK, metadata: JsObject)), _) => + case Event(BuiltMetadataResponse(metadata: JsObject), data: TerminalSwraData) => outputMetadata(metadata) - issueReply - } - - when (Done) { - case Event(IssueReply, data) => - data.terminalState foreach { state => log.info(s"$Tag workflow finished with status '$state'.") } - data.failures foreach { e => log.error(e, e.getMessage) } - - val message = data.terminalState collect { case WorkflowSucceeded => () } getOrElse Status.Failure(data.failures.head) - data.replyTo foreach { _ ! message } - stay() + issueReply(data) } onTransition { case NotStarted -> RunningWorkflow => schedulePollRequest() } - private def failAndFinish(e: Throwable): State = { - log.error(e, s"$Tag received Failure message: ${e.getMessage}") - issueReply using stateData.addFailure(e) - } - whenUnhandled { // Handle failures for all failure responses generically. - case Event(r: WorkflowStoreActor.WorkflowAbortFailed, data) => failAndFinish(r.reason) - case Event(Failure(e), data) => failAndFinish(e) - case Event(Status.Failure(e), data) => failAndFinish(e) - case Event(RequestComplete((_, snap)), _) => failAndFinish(new RuntimeException(s"Unexpected API completion message: $snap")) + case Event(r: WorkflowStoreEngineActor.WorkflowAbortFailed, data) => failAndFinish(r.reason, data) + case Event(Failure(e), data) => failAndFinish(e, data) + case Event(Status.Failure(e), data) => failAndFinish(e, data) + case Event(FailedMetadataResponse(e), data) => failAndFinish(e, data) case Event((CurrentState(_, _) | Transition(_, _, _)), _) => // ignore uninteresting current state and transition messages stay() - case Event(m, _) => - log.warning(s"$Tag: received unexpected message: $m") + case Event(m, d) => + log.warning(s"$Tag: received unexpected message: $m in state ${d.getClass.getSimpleName}") stay() } + private def requestMetadataOrIssueReply(newData: TerminalSwraData) = if (metadataOutputPath.isDefined) requestMetadata(newData) else issueReply(newData) + + private def requestMetadata(newData: TerminalSwraData): State = { + val metadataBuilder = context.actorOf(MetadataBuilderActor.props(serviceRegistryActor), s"MetadataRequest-Workflow-${newData.id}") + metadataBuilder ! GetSingleWorkflowMetadataAction(newData.id, None, None, expandSubWorkflows = true) + goto (RequestingMetadata) using newData + } + + private def schedulePollRequest(): Unit = { + // -Ywarn-value-discard should stash Cancellable to cancel + context.system.scheduler.scheduleOnce(backoff.backoffMillis.millis, self, IssuePollRequest) + () + } + + private def requestStatus(id: WorkflowId): Unit = { + // This requests status via the metadata service rather than instituting an FSM watch on the underlying workflow actor. + // Cromwell's eventual consistency means it isn't safe to use an FSM transition to a terminal state as the signal for + // when outputs or metadata have stabilized. + val metadataBuilder = context.actorOf(MetadataBuilderActor.props(serviceRegistryActor), s"StatusRequest-Workflow-$id-request-${UUID.randomUUID()}") + metadataBuilder ! GetStatus(id) + } + + private def issueSuccessReply(replyTo: ActorRef): State = { + replyTo.tell(msg = (), sender = self) // Because replyTo ! () is the parameterless call replyTo.!() + context.stop(self) + stay() + } + + private def issueFailureReply(replyTo: ActorRef, e: Throwable): State = { + replyTo ! Status.Failure(e) + context.stop(self) + stay() + } + + private def issueReply(data: TerminalSwraData) = { + data match { + case s: SucceededSwraData => issueSuccessReply(s.replyTo) + case f: FailedSwraData => issueFailureReply(f.replyTo, f.failure) + case a: AbortedSwraData => issueSuccessReply(a.replyTo) + + } + } + + private def failAndFinish(e: Throwable, data: SwraData): State = { + log.error(e, s"$Tag received Failure message: ${e.getMessage}") + data match { + case EmptySwraData => + log.error(e, "Cannot issue response. Need a 'replyTo' address to issue the exception response") + context.stop(self) + stay() + case SubmittedSwraData(replyTo) => + issueFailureReply(replyTo, e) + case RunningSwraData(replyTo, _) => + issueFailureReply(replyTo, e) + case c: TerminalSwraData => + issueFailureReply(c.replyTo, e) + } + } + /** * Outputs the outputs to stdout, and then requests the metadata. */ @@ -185,13 +199,58 @@ class SingleWorkflowRunnerActor(source: WorkflowSourceFiles, metadataOutputPath: private def outputMetadata(metadata: JsObject): Try[Unit] = { Try { - val path = File(metadataOutputPath.get) + val path = metadataOutputPath.get if (path.isDirectory) { log.error("Specified metadata path is a directory, should be a file: " + path) } else { log.info(s"$Tag writing metadata to $path") - path.createIfNotExists(asDirectory = false, createParents = true).write(metadata.prettyPrint) + path.createIfNotExists(createParents = true).write(metadata.prettyPrint) } - } + } void } } + +object SingleWorkflowRunnerActor { + def props(source: WorkflowSourceFilesCollection, + metadataOutputFile: Option[Path], + gracefulShutdown: Boolean, + abortJobsOnTerminate: Boolean)(implicit materializer: ActorMaterializer): Props = { + Props(new SingleWorkflowRunnerActor(source, metadataOutputFile, gracefulShutdown, abortJobsOnTerminate)).withDispatcher(EngineDispatcher) + } + + sealed trait RunnerMessage + // The message to actually run the workflow is made explicit so the non-actor Main can `ask` this actor to do the + // running and collect a result. + case object RunWorkflow extends RunnerMessage + private case object IssuePollRequest extends RunnerMessage + + sealed trait RunnerState + case object NotStarted extends RunnerState + case object SubmittedWorkflow extends RunnerState + case object RunningWorkflow extends RunnerState + case object WaitingForFlushedMetadata extends RunnerState + case object RequestingOutputs extends RunnerState + case object RequestingMetadata extends RunnerState + + sealed trait SwraData + case object EmptySwraData extends SwraData + final case class SubmittedSwraData(replyTo: ActorRef) extends SwraData + final case class RunningSwraData(replyTo: ActorRef, id: WorkflowId) extends SwraData + + sealed trait TerminalSwraData extends SwraData { def replyTo: ActorRef; def terminalState: WorkflowState; def id: WorkflowId } + final case class SucceededSwraData(replyTo: ActorRef, + id: WorkflowId) extends TerminalSwraData { override val terminalState = WorkflowSucceeded } + + final case class FailedSwraData(replyTo: ActorRef, + id: WorkflowId, + failure: Throwable) extends TerminalSwraData { override val terminalState = WorkflowFailed } + + final case class AbortedSwraData(replyTo: ActorRef, + id: WorkflowId) extends TerminalSwraData { override val terminalState = WorkflowAborted } + + implicit class EnhancedJsObject(val jsObject: JsObject) extends AnyVal { + def state: WorkflowState = WorkflowState.withName(jsObject.fields("status").asInstanceOf[JsString].value) + } + + private val Tag = "SingleWorkflowRunnerActor" +} diff --git a/engine/src/main/scala/cromwell/engine/workflow/WorkflowActor.scala b/engine/src/main/scala/cromwell/engine/workflow/WorkflowActor.scala index 3759136f8..16e8f08f0 100644 --- a/engine/src/main/scala/cromwell/engine/workflow/WorkflowActor.scala +++ b/engine/src/main/scala/cromwell/engine/workflow/WorkflowActor.scala @@ -1,29 +1,28 @@ package cromwell.engine.workflow -import java.time.OffsetDateTime - import akka.actor.SupervisorStrategy.Escalate import akka.actor._ import com.typesafe.config.Config -import cromwell.backend.AllBackendInitializationData +import cromwell.backend._ import cromwell.core.Dispatcher.EngineDispatcher import cromwell.core.WorkflowOptions.FinalWorkflowLogDir import cromwell.core._ import cromwell.core.logging.{WorkflowLogger, WorkflowLogging} +import cromwell.core.path.{PathBuilder, PathFactory} import cromwell.engine._ +import cromwell.engine.backend.BackendSingletonCollection import cromwell.engine.workflow.WorkflowActor._ import cromwell.engine.workflow.lifecycle.MaterializeWorkflowDescriptorActor.{MaterializeWorkflowDescriptorCommand, MaterializeWorkflowDescriptorFailureResponse, MaterializeWorkflowDescriptorSuccessResponse} import cromwell.engine.workflow.lifecycle.WorkflowFinalizationActor.{StartFinalizationCommand, WorkflowFinalizationFailedResponse, WorkflowFinalizationSucceededResponse} import cromwell.engine.workflow.lifecycle.WorkflowInitializationActor.{StartInitializationCommand, WorkflowInitializationFailedResponse, WorkflowInitializationSucceededResponse} import cromwell.engine.workflow.lifecycle._ -import cromwell.engine.workflow.lifecycle.execution.WorkflowExecutionActor import cromwell.engine.workflow.lifecycle.execution.WorkflowExecutionActor._ -import cromwell.services.metadata.MetadataService._ -import cromwell.services.metadata.{MetadataEvent, MetadataKey, MetadataValue} +import cromwell.engine.workflow.lifecycle.execution.{WorkflowExecutionActor, WorkflowMetadataHelper} +import cromwell.subworkflowstore.SubWorkflowStoreActor.WorkflowComplete import cromwell.webservice.EngineStatsActor -import scala.language.postfixOps -import scala.util.Random +import scala.concurrent.Future +import scala.util.Failure object WorkflowActor { @@ -135,14 +134,36 @@ object WorkflowActor { def props(workflowId: WorkflowId, startMode: StartMode, - wdlSource: WorkflowSourceFiles, + workflowSourceFilesCollection: WorkflowSourceFilesCollection, conf: Config, + ioActor: ActorRef, serviceRegistryActor: ActorRef, workflowLogCopyRouter: ActorRef, jobStoreActor: ActorRef, - callCacheReadActor: ActorRef): Props = { - Props(new WorkflowActor(workflowId, startMode, wdlSource, conf, serviceRegistryActor, workflowLogCopyRouter, - jobStoreActor, callCacheReadActor)).withDispatcher(EngineDispatcher) + subWorkflowStoreActor: ActorRef, + callCacheReadActor: ActorRef, + callCacheWriteActor: ActorRef, + dockerHashActor: ActorRef, + jobTokenDispenserActor: ActorRef, + backendSingletonCollection: BackendSingletonCollection, + serverMode: Boolean): Props = { + Props( + new WorkflowActor( + workflowId = workflowId, + startMode = startMode, + workflowSourceFilesCollection = workflowSourceFilesCollection, + conf = conf, + ioActor = ioActor, + serviceRegistryActor = serviceRegistryActor, + workflowLogCopyRouter = workflowLogCopyRouter, + jobStoreActor = jobStoreActor, + subWorkflowStoreActor = subWorkflowStoreActor, + callCacheReadActor = callCacheReadActor, + callCacheWriteActor = callCacheWriteActor, + dockerHashActor = dockerHashActor, + jobTokenDispenserActor = jobTokenDispenserActor, + backendSingletonCollection = backendSingletonCollection, + serverMode = serverMode)).withDispatcher(EngineDispatcher) } } @@ -151,59 +172,74 @@ object WorkflowActor { */ class WorkflowActor(val workflowId: WorkflowId, startMode: StartMode, - workflowSources: WorkflowSourceFiles, + workflowSourceFilesCollection: WorkflowSourceFilesCollection, conf: Config, - serviceRegistryActor: ActorRef, + ioActor: ActorRef, + override val serviceRegistryActor: ActorRef, workflowLogCopyRouter: ActorRef, jobStoreActor: ActorRef, - callCacheReadActor: ActorRef) - extends LoggingFSM[WorkflowActorState, WorkflowActorData] with WorkflowLogging with PathFactory { + subWorkflowStoreActor: ActorRef, + callCacheReadActor: ActorRef, + callCacheWriteActor: ActorRef, + dockerHashActor: ActorRef, + jobTokenDispenserActor: ActorRef, + backendSingletonCollection: BackendSingletonCollection, + serverMode: Boolean) + extends LoggingFSM[WorkflowActorState, WorkflowActorData] with WorkflowLogging with WorkflowMetadataHelper { implicit val ec = context.dispatcher + override val workflowIdForLogging = workflowId + + private val restarting = startMode match { + case StartNewWorkflow => false + case RestartExistingWorkflow => true + } + + private val workflowDockerLookupActor = context.actorOf( + WorkflowDockerLookupActor.props(workflowId, dockerHashActor, startMode), s"WorkflowDockerLookupActor-$workflowId") startWith(WorkflowUnstartedState, WorkflowActorData.empty) - pushCurrentStateToMetadataService(WorkflowUnstartedState.workflowState) + pushCurrentStateToMetadataService(workflowId, WorkflowUnstartedState.workflowState) override def supervisorStrategy: SupervisorStrategy = OneForOneStrategy() { case _ => Escalate } when(WorkflowUnstartedState) { case Event(StartWorkflowCommand, _) => - val actor = context.actorOf(MaterializeWorkflowDescriptorActor.props(serviceRegistryActor, workflowId), + val actor = context.actorOf(MaterializeWorkflowDescriptorActor.props(serviceRegistryActor, workflowId, importLocalFilesystem = !serverMode), "MaterializeWorkflowDescriptorActor") - val startEvent = MetadataEvent(MetadataKey(workflowId, None, WorkflowMetadataKeys.StartTime), MetadataValue(OffsetDateTime.now.toString)) - serviceRegistryActor ! PutMetadataAction(startEvent) - - actor ! MaterializeWorkflowDescriptorCommand(workflowSources, conf) + pushWorkflowStart(workflowId) + actor ! MaterializeWorkflowDescriptorCommand(workflowSourceFilesCollection, conf) goto(MaterializingWorkflowDescriptorState) using stateData.copy(currentLifecycleStateActor = Option(actor)) - case Event(AbortWorkflowCommand, stateData) => goto(WorkflowAbortedState) + case Event(AbortWorkflowCommand, _) => goto(WorkflowAbortedState) } when(MaterializingWorkflowDescriptorState) { case Event(MaterializeWorkflowDescriptorSuccessResponse(workflowDescriptor), data) => - val initializerActor = context.actorOf(WorkflowInitializationActor.props(workflowId, workflowDescriptor, serviceRegistryActor), + val initializerActor = context.actorOf(WorkflowInitializationActor.props(workflowId, workflowDescriptor, ioActor, serviceRegistryActor, restarting), name = s"WorkflowInitializationActor-$workflowId") initializerActor ! StartInitializationCommand goto(InitializingWorkflowState) using data.copy(currentLifecycleStateActor = Option(initializerActor), workflowDescriptor = Option(workflowDescriptor)) case Event(MaterializeWorkflowDescriptorFailureResponse(reason: Throwable), data) => goto(WorkflowFailedState) using data.copy(lastStateReached = StateCheckpoint(MaterializingWorkflowDescriptorState, Option(List(reason)))) - case Event(AbortWorkflowCommand, stateData) => + case Event(AbortWorkflowCommand, _) => // No lifecycle sub-actors exist yet, so no indirection via WorkflowAbortingState is necessary: goto(WorkflowAbortedState) } when(InitializingWorkflowState) { case Event(WorkflowInitializationSucceededResponse(initializationData), data @ WorkflowActorData(_, Some(workflowDescriptor), _, _)) => - val restarting = startMode match { - case StartNewWorkflow => false - case RestartExistingWorkflow => true - } - - val executionActor = context.actorOf(WorkflowExecutionActor.props(workflowId, + val executionActor = context.actorOf(WorkflowExecutionActor.props( workflowDescriptor, - serviceRegistryActor, - jobStoreActor, - callCacheReadActor, + ioActor = ioActor, + serviceRegistryActor = serviceRegistryActor, + jobStoreActor = jobStoreActor, + subWorkflowStoreActor = subWorkflowStoreActor, + callCacheReadActor = callCacheReadActor, + callCacheWriteActor = callCacheWriteActor, + workflowDockerLookupActor = workflowDockerLookupActor, + jobTokenDispenserActor = jobTokenDispenserActor, + backendSingletonCollection, initializationData, restarting = restarting), name = s"WorkflowExecutionActor-$workflowId") @@ -211,16 +247,16 @@ class WorkflowActor(val workflowId: WorkflowId, goto(ExecutingWorkflowState) using data.copy(currentLifecycleStateActor = Option(executionActor), initializationData = initializationData) case Event(WorkflowInitializationFailedResponse(reason), data @ WorkflowActorData(_, Some(workflowDescriptor), _, _)) => - finalizeWorkflow(data, workflowDescriptor, ExecutionStore.empty, OutputStore.empty, Option(reason.toList)) + finalizeWorkflow(data, workflowDescriptor, Map.empty, Map.empty, Option(reason.toList)) } when(ExecutingWorkflowState) { - case Event(WorkflowExecutionSucceededResponse(executionStore, outputStore), + case Event(WorkflowExecutionSucceededResponse(jobKeys, outputs), data @ WorkflowActorData(_, Some(workflowDescriptor), _, _)) => - finalizeWorkflow(data, workflowDescriptor, executionStore, outputStore, None) - case Event(WorkflowExecutionFailedResponse(executionStore, outputStore, failures), + finalizeWorkflow(data, workflowDescriptor, jobKeys, outputs, None) + case Event(WorkflowExecutionFailedResponse(jobKeys, failures), data @ WorkflowActorData(_, Some(workflowDescriptor), _, _)) => - finalizeWorkflow(data, workflowDescriptor, executionStore, outputStore, Option(failures.toList)) + finalizeWorkflow(data, workflowDescriptor, jobKeys, Map.empty, Option(List(failures))) case Event(msg @ EngineStatsActor.JobCountQuery, data) => data.currentLifecycleStateActor match { case Some(a) => a forward msg @@ -238,8 +274,8 @@ class WorkflowActor(val workflowId: WorkflowId, } when(WorkflowAbortingState) { - case Event(x: EngineLifecycleStateCompleteResponse, data @ WorkflowActorData(_, Some(workflowDescriptor), _, _)) => - finalizeWorkflow(data, workflowDescriptor, ExecutionStore.empty, OutputStore.empty, failures = None) + case Event(_: EngineLifecycleStateCompleteResponse, data @ WorkflowActorData(_, Some(workflowDescriptor), _, _)) => + finalizeWorkflow(data, workflowDescriptor, Map.empty, Map.empty, failures = None) case _ => stay() } @@ -249,11 +285,6 @@ class WorkflowActor(val workflowId: WorkflowId, when(WorkflowSucceededState) { FSM.NullFunction } whenUnhandled { - case Event(MetadataPutFailed(action, error), _) => - // Do something useful here?? - workflowLogger.warn(s"Put failed for Metadata action $action : ${error.getMessage}") - stay - case Event(MetadataPutAcknowledgement(_), _) => stay() case Event(AbortWorkflowCommand, WorkflowActorData(Some(actor), _, _, _)) => actor ! EngineLifecycleActorAbortCommand goto(WorkflowAbortingState) @@ -266,75 +297,94 @@ class WorkflowActor(val workflowId: WorkflowId, } onTransition { - case fromState -> toState => - workflowLogger.debug(s"transitioning from {} to {}", arg1 = fromState, arg2 = toState) - // This updates the workflow status - // Only publish "External" state to metadata service - // workflowState maps a state to an "external" state (e.g all states extending WorkflowActorRunningState map to WorkflowRunning) - if (fromState.workflowState != toState.workflowState) { - pushCurrentStateToMetadataService(toState.workflowState) - } - } - - onTransition { case (oldState, terminalState: WorkflowActorTerminalState) => workflowLogger.debug(s"transition from {} to {}. Stopping self.", arg1 = oldState, arg2 = terminalState) - // Add the end time of the workflow in the MetadataService - val now = OffsetDateTime.now - val metadataEventMsg = MetadataEvent(MetadataKey(workflowId, None, WorkflowMetadataKeys.EndTime), MetadataValue(now)) - serviceRegistryActor ! PutMetadataAction(metadataEventMsg) + pushWorkflowEnd(workflowId) + subWorkflowStoreActor ! WorkflowComplete(workflowId) terminalState match { case WorkflowFailedState => val failures = nextStateData.lastStateReached.failures.getOrElse(List.empty) - val failureEvents = failures flatMap { r => throwableToMetadataEvents(MetadataKey(workflowId, None, s"${WorkflowMetadataKeys.Failures}[${Random.nextInt(Int.MaxValue)}]"), r) } - serviceRegistryActor ! PutMetadataAction(failureEvents) + pushWorkflowFailures(workflowId, failures) context.parent ! WorkflowFailedResponse(workflowId, nextStateData.lastStateReached.state, failures) case _ => // The WMA is watching state transitions and needs no further info } // Copy/Delete workflow logs if (WorkflowLogger.isEnabled) { - stateData.workflowDescriptor foreach { wd => - wd.getWorkflowOption(FinalWorkflowLogDir) match { - case Some(destinationDir) => - workflowLogCopyRouter ! CopyWorkflowLogsActor.Copy(wd.id, buildPath(destinationDir, wd.engineFilesystems)) - case None if WorkflowLogger.isTemporary => workflowLogger.deleteLogFile() + /* + * The submitted workflow options have been previously validated by the CromwellApiHandler. These are + * being recreated so that in case MaterializeWorkflowDescriptor fails, the workflow logs can still + * be copied by accessing the workflow options outside of the EngineWorkflowDescriptor. + */ + def bruteForceWorkflowOptions: WorkflowOptions = WorkflowOptions.fromJsonString(workflowSourceFilesCollection.workflowOptionsJson).getOrElse(WorkflowOptions.fromJsonString("{}").get) + val system = context.system + val ec = context.system.dispatcher + def bruteForcePathBuilders: Future[List[PathBuilder]] = EngineFilesystems.pathBuildersForWorkflow(bruteForceWorkflowOptions)(system, ec) + + val (workflowOptions, pathBuilders) = stateData.workflowDescriptor match { + case Some(wd) => (wd.backendDescriptor.workflowOptions, Future.successful(wd.pathBuilders)) + case None => (bruteForceWorkflowOptions, bruteForcePathBuilders) + } + + workflowOptions.get(FinalWorkflowLogDir).toOption match { + case Some(destinationDir) => + pathBuilders.map(pb => workflowLogCopyRouter ! CopyWorkflowLogsActor.Copy(workflowId, PathFactory.buildPath(destinationDir, pb)))(ec) + case None if WorkflowLogger.isTemporary => workflowLogger.deleteLogFile() match { + case Failure(f) => log.error(f, "Failed to delete workflow log") case _ => } + case _ => } } - context stop self } + onTransition { + case fromState -> toState => + workflowLogger.debug(s"transitioning from {} to {}", arg1 = fromState, arg2 = toState) + // This updates the workflow status + // Only publish "External" state to metadata service + // workflowState maps a state to an "external" state (e.g all states extending WorkflowActorRunningState map to WorkflowRunning) + if (fromState.workflowState != toState.workflowState) { + pushCurrentStateToMetadataService(workflowId, toState.workflowState) + } + } + private def finalizationSucceeded(data: WorkflowActorData) = { val finalState = data.lastStateReached match { case StateCheckpoint(WorkflowAbortingState, None) => WorkflowAbortedState - case StateCheckpoint(state, Some(failures)) => WorkflowFailedState - case StateCheckpoint(state, None) => WorkflowSucceededState + case StateCheckpoint(_, Some(_)) => WorkflowFailedState + case StateCheckpoint(_, None) => WorkflowSucceededState } goto(finalState) using data.copy(currentLifecycleStateActor = None) } - private[workflow] def makeFinalizationActor(workflowDescriptor: EngineWorkflowDescriptor, executionStore: ExecutionStore, outputStore: OutputStore) = { - context.actorOf(WorkflowFinalizationActor.props(workflowId, workflowDescriptor, executionStore, outputStore, stateData.initializationData), name = s"WorkflowFinalizationActor") + private[workflow] def makeFinalizationActor(workflowDescriptor: EngineWorkflowDescriptor, jobExecutionMap: JobExecutionMap, workflowOutputs: CallOutputs) = { + val copyWorkflowOutputsActorProps = stateName match { + case InitializingWorkflowState => None + case _ => Option(CopyWorkflowOutputsActor.props(workflowIdForLogging, ioActor, workflowDescriptor, workflowOutputs, stateData.initializationData)) + } + + context.actorOf(WorkflowFinalizationActor.props( + workflowId = workflowId, + workflowDescriptor = workflowDescriptor, + ioActor = ioActor, + jobExecutionMap = jobExecutionMap, + workflowOutputs = workflowOutputs, + initializationData = stateData.initializationData, + copyWorkflowOutputsActor = copyWorkflowOutputsActorProps + ), name = s"WorkflowFinalizationActor") } /** * Run finalization actor and transition to FinalizingWorkflowState. */ private def finalizeWorkflow(data: WorkflowActorData, workflowDescriptor: EngineWorkflowDescriptor, - executionStore: ExecutionStore, outputStore: OutputStore, + jobExecutionMap: JobExecutionMap, workflowOutputs: CallOutputs, failures: Option[List[Throwable]]) = { - val finalizationActor = makeFinalizationActor(workflowDescriptor, executionStore, outputStore) + val finalizationActor = makeFinalizationActor(workflowDescriptor, jobExecutionMap, workflowOutputs) finalizationActor ! StartFinalizationCommand - goto(FinalizingWorkflowState) using data.copy(lastStateReached = StateCheckpoint(stateName, failures)) + goto(FinalizingWorkflowState) using data.copy(lastStateReached = StateCheckpoint (stateName, failures)) } - // Update the current State of the Workflow (corresponding to the FSM state) in the Metadata service - private def pushCurrentStateToMetadataService(workflowState: WorkflowState): Unit = { - val metadataEventMsg = MetadataEvent(MetadataKey(workflowId, None, WorkflowMetadataKeys.Status), - MetadataValue(workflowState)) - serviceRegistryActor ! PutMetadataAction(metadataEventMsg) - } } diff --git a/engine/src/main/scala/cromwell/engine/workflow/WorkflowDockerLookupActor.scala b/engine/src/main/scala/cromwell/engine/workflow/WorkflowDockerLookupActor.scala new file mode 100644 index 000000000..54c033a7a --- /dev/null +++ b/engine/src/main/scala/cromwell/engine/workflow/WorkflowDockerLookupActor.scala @@ -0,0 +1,290 @@ +package cromwell.engine.workflow + +import akka.actor.{ActorRef, LoggingFSM, Props} +import cromwell.core.{Dispatcher, WorkflowId} +import cromwell.database.sql.SqlDatabase +import cromwell.database.sql.tables.DockerHashStoreEntry +import cromwell.docker.DockerHashActor.{DockerHashFailureResponse, DockerHashSuccessResponse} +import cromwell.docker.{DockerClientHelper, DockerHashRequest, DockerHashResult, DockerImageIdentifier} +import cromwell.engine.workflow.WorkflowActor.{RestartExistingWorkflow, StartMode} +import cromwell.engine.workflow.WorkflowDockerLookupActor._ +import cromwell.services.SingletonServicesStore +import cromwell.core.Dispatcher.EngineDispatcher + +import lenthall.util.TryUtil + +import scala.concurrent.duration._ +import scala.language.postfixOps +import scala.util.{Failure, Success} + +/** + * Ensures docker hash consistency throughout a workflow. + * + * Caches successful docker hash lookups and serves them to subsequent identical requests. + * Persists those hashes in the database to be resilient to server restarts. + * + * Failure modes: + * 1) Failure to load hashes from the DB upon restart. + * 2) Failure to parse hashes from the DB upon restart. + * 3) Failure to write a hash result to the DB. + * 4) Failure to look up a docker hash. + * 5) Timeout from DockerHashActor. + * + * Behavior: + * 1-2) Return a terminal lookup failure for all pending requests, transition to a permanently Failed state in which any + * future requests will immediately return lookup failure. The JobPreparation actor should fail in response to this + * lookup termination message, which in turn should fail the workflow. + * 3-5) Return lookup failure for the current request and all pending requests for the same tag. Any future requests + * for this tag will be attempted again. + */ + +class WorkflowDockerLookupActor private[workflow](workflowId: WorkflowId, val dockerHashingActor: ActorRef, startMode: StartMode, databaseInterface: SqlDatabase) + extends LoggingFSM[WorkflowDockerLookupActorState, WorkflowDockerLookupActorData] with DockerClientHelper { + + implicit val ec = context.system.dispatchers.lookup(Dispatcher.EngineDispatcher) + + // Amount of time after which the docker request should be considered lost and sent again. + override protected def backpressureTimeout: FiniteDuration = 10 seconds + // A multiplier for the amount of time to wait when we get a Backpressure response before sending the request again. + // This effectively bounds the jitter. + override protected def backpressureRandomizerFactor: Double = 0.5D + + context.become(dockerReceive orElse receive) + + startWith( + stateName = if (startMode == RestartExistingWorkflow) AwaitingFirstRequestOnRestart else Running, + stateData = WorkflowDockerLookupActorData.empty + ) + + // `AwaitingFirstRequestOnRestart` is only used in restart scenarios. This state waits until there's at least one hash + // request before trying to load the docker hash mappings. This is so we'll have at least one `JobPreparationActor` + // reference available to message with a terminal failure in case the reading or parsing of these mappings fails. + when(AwaitingFirstRequestOnRestart) { + case Event(request: DockerHashRequest, data) => + loadDockerHashStoreEntries() + goto(LoadingCache) using data.addHashRequest(request, sender()) + } + + // Waiting for a response from the database with the hash mapping for this workflow. + when(LoadingCache) { + case Event(DockerHashStoreLoadingSuccess(dockerHashEntries), data) => + loadCacheAndHandleHashRequests(dockerHashEntries, data) + case Event(request: DockerHashRequest, data) => + stay using data.addHashRequest(request, sender()) + } + + // This is the normal operational mode. + when(Running) { + // This tag has already been looked up and its hash is in the mappings cache. + case Event(request: DockerHashRequest, data) if data.mappings.contains(request.dockerImageID) => + sender ! DockerHashSuccessResponse(data.mappings(request.dockerImageID), request) + stay() + // A request for the hash for this tag has already been made to the hashing actor. Don't request the hash again, + // just add this sender to the list of replyTos for when the hash arrives. + case Event(request: DockerHashRequest, data) if data.hashRequests.contains(request) => + stay using data.addHashRequest(request, sender()) + // This tag has not (successfully) been looked up before, so look it up now. + case Event(request: DockerHashRequest, data) => + requestDockerHash(request, data) + case Event(dockerResponse: DockerHashSuccessResponse, data) => + persistDockerHash(dockerResponse, data) + stay() + case Event(dockerResponse: DockerHashFailureResponse, data) => + handleLookupFailure(dockerResponse, data) + case Event(DockerHashStoreSuccess(response), data) => + recordMappingAndRespond(response, data) + case Event(DockerHashStoreFailure(request, e), data) => + handleStoreFailure(request, new Exception(s"Failure storing docker hash for ${request.dockerImageID.fullName}", e), data) + } + + // In state Terminal we reject all requests with the cause set in the state data. + when(Terminal) { + case Event(request: DockerHashRequest, data) => + sender() ! WorkflowDockerLookupFailure(data.failureCause.orNull, request) + stay() + } + + private def fail(reason: Throwable): State = { + self ! TransitionToFailed(reason) + stay() + } + + whenUnhandled { + case Event(DockerHashActorTimeout(request), data) => + val reason = new Exception(s"Timeout looking up docker hash") + data.hashRequests(request) foreach { _ ! WorkflowDockerLookupFailure(reason, request) } + val updatedData = data.copy(hashRequests = data.hashRequests - request) + stay() using updatedData + case Event(TransitionToFailed(cause), data) => + log.error(cause, s"Workflow Docker lookup actor for $workflowId transitioning to Failed") + val updatedData = respondToAllRequestsWithTerminalFailure(FailedException, data) + goto(Terminal) using updatedData.withFailureCause(FailedException) + } + + /** + * Load mappings from the database into the state data, reply to queued requests which have mappings, and initiate + * hash lookups for requests which don't have mappings. + */ + private def loadCacheAndHandleHashRequests(hashEntries: Map[String, String], data: WorkflowDockerLookupActorData): State = { + val dockerMappingsTry = hashEntries map { + case (dockerTag, dockerHash) => DockerImageIdentifier.fromString(dockerTag) -> DockerHashResult.fromString(dockerHash) + } + + TryUtil.sequenceKeyValues(dockerMappingsTry) match { + case Success(dockerMappings) => + // Figure out which of the queued requests already have established mappings. + val (hasMappings, doesNotHaveMappings) = data.hashRequests.partition { case (request, _) => dockerMappings.contains(request.dockerImageID) } + + // The requests which have mappings receive success responses. + hasMappings foreach { case (request, replyTos) => + val result = dockerMappings(request.dockerImageID) + replyTos foreach { _ ! DockerHashSuccessResponse(result, request)} + } + + // The requests without mappings need to be looked up. + doesNotHaveMappings.keys foreach { sendDockerCommand(_) } + + // Update state data accordingly. + val newData = data.copy(hashRequests = doesNotHaveMappings, mappings = dockerMappings, failureCause = None) + goto(Running) using newData + + case Failure(e) => + fail(new Exception("Failed to parse docker tag -> hash mappings from DB", e)) + } + } + + private def requestDockerHash(request: DockerHashRequest, data: WorkflowDockerLookupActorData): State = { + sendDockerCommand(request) + val replyTo = sender() + val updatedData = data.copy(hashRequests = data.hashRequests + (request -> List(replyTo))) + stay using updatedData + } + + private def recordMappingAndRespond(response: DockerHashSuccessResponse, data: WorkflowDockerLookupActorData): State = { + // Add the new label to hash mapping to the current set of mappings. + val request = response.request + data.hashRequests(request) foreach { _ ! DockerHashSuccessResponse(response.dockerHash, request) } + val updatedData = data.copy(hashRequests = data.hashRequests - request, mappings = data.mappings + (request.dockerImageID -> response.dockerHash)) + stay using updatedData + } + + private def respondToAllRequests(reason: Throwable, + data: WorkflowDockerLookupActorData, + messageBuilder: (Throwable, DockerHashRequest) => WorkflowDockerLookupResponse): WorkflowDockerLookupActorData = { + data.hashRequests foreach { case (request, replyTos) => + replyTos foreach { _ ! messageBuilder(reason, request) } + } + data.clearHashRequests + } + + private def respondToAllRequestsWithTerminalFailure(reason: Throwable, data: WorkflowDockerLookupActorData): WorkflowDockerLookupActorData = { + respondToAllRequests(reason, data, WorkflowDockerTerminalFailure.apply) + } + + private def persistDockerHash(response: DockerHashSuccessResponse, data: WorkflowDockerLookupActorData): Unit = { + val dockerHashStoreEntry = DockerHashStoreEntry(workflowId.toString, response.request.dockerImageID.fullName, response.dockerHash.algorithmAndHash) + databaseInterface.addDockerHashStoreEntry(dockerHashStoreEntry) onComplete { + case Success(_) => self ! DockerHashStoreSuccess(response) + case Failure(ex) => self ! DockerHashStoreFailure(response.request, ex) + } + } + + private def handleLookupFailure(dockerResponse: DockerHashFailureResponse, data: WorkflowDockerLookupActorData): State = { + // Fail all pending requests. This logic does not blacklist the tag, which will allow lookups to be attempted + // again in the future. + val failureResponse = WorkflowDockerLookupFailure(new Exception(dockerResponse.reason), dockerResponse.request) + val request = dockerResponse.request + data.hashRequests(request) foreach { _ ! failureResponse } + + val updatedData = data.copy(hashRequests = data.hashRequests - request) + stay using updatedData + } + + private def handleStoreFailure(dockerHashRequest: DockerHashRequest, reason: Throwable, data: WorkflowDockerLookupActorData): State = { + data.hashRequests(dockerHashRequest) foreach { _ ! WorkflowDockerLookupFailure(reason, dockerHashRequest) } + // Remove these requesters from the collection of those awaiting hashes. + stay() using data.copy(hashRequests = data.hashRequests - dockerHashRequest) + } + + def loadDockerHashStoreEntries(): Unit = { + databaseInterface.queryDockerHashStoreEntries(workflowId.toString) onComplete { + case Success(dockerHashEntries) => + val dockerMappings = dockerHashEntries.map(entry => entry.dockerTag -> entry.dockerHash).toMap + self ! DockerHashStoreLoadingSuccess(dockerMappings) + case Failure(ex) => + fail(new RuntimeException("Failed to load docker tag -> hash mappings from DB", ex)) + } + } + + override protected def onTimeout(message: Any, to: ActorRef): Unit = { + message match { + case r: DockerHashRequest => self ! DockerHashActorTimeout(r) + } + } +} + +object WorkflowDockerLookupActor { + /* States */ + sealed trait WorkflowDockerLookupActorState + case object AwaitingFirstRequestOnRestart extends WorkflowDockerLookupActorState + case object LoadingCache extends WorkflowDockerLookupActorState + case object Running extends WorkflowDockerLookupActorState + case object Terminal extends WorkflowDockerLookupActorState + private val FailedException = + new Exception(s"The WorkflowDockerLookupActor has failed. Subsequent docker tags for this workflow will not be resolved.") + + /* Internal ADTs */ + final case class DockerRequestContext(dockerHashRequest: DockerHashRequest, replyTo: ActorRef) + sealed trait DockerHashStoreResponse + final case class DockerHashStoreSuccess(successResponse: DockerHashSuccessResponse) extends DockerHashStoreResponse + final case class DockerHashStoreFailure(dockerHashRequest: DockerHashRequest, reason: Throwable) extends DockerHashStoreResponse + final case class DockerHashStoreLoadingSuccess(dockerMappings: Map[String, String]) + final case class DockerHashActorTimeout(request: DockerHashRequest) + + /* Messages */ + sealed trait WorkflowDockerLookupActorMessage + private final case class TransitionToFailed(cause: Throwable) extends WorkflowDockerLookupActorMessage + + /* Responses */ + sealed trait WorkflowDockerLookupResponse + final case class WorkflowDockerLookupFailure(reason: Throwable, request: DockerHashRequest) extends WorkflowDockerLookupResponse + final case class WorkflowDockerTerminalFailure(reason: Throwable, request: DockerHashRequest) extends WorkflowDockerLookupResponse + + def props(workflowId: WorkflowId, dockerHashingActor: ActorRef, startMode: StartMode, databaseInterface: SqlDatabase = SingletonServicesStore.databaseInterface) = { + Props(new WorkflowDockerLookupActor(workflowId, dockerHashingActor, startMode, databaseInterface)).withDispatcher(EngineDispatcher) + } + + object WorkflowDockerLookupActorData { + def empty = WorkflowDockerLookupActorData(hashRequests = Map.empty, mappings = Map.empty, failureCause = None) + } + + final case class WorkflowDockerLookupActorData(hashRequests: Map[DockerHashRequest, List[ActorRef]], + mappings: Map[DockerImageIdentifier, DockerHashResult], + failureCause: Option[Throwable]) { + /** + * Add the specified request and replyTo to this state data. + * + * @param request The request to be added. + * @param replyTo The actor to be informed of the hash or the failure to look up the hash. + * @return State data with the added request and replyTo. + */ + def addHashRequest(request: DockerHashRequest, replyTo: ActorRef): WorkflowDockerLookupActorData = { + // Prepend this `ActorRef` to the list of `ActorRef`s awaiting the hash for this request, or to Nil if this is the first. + val alreadyAwaiting = hashRequests.getOrElse(request, Nil) + this.copy(hashRequests = hashRequests + (request -> (replyTo :: alreadyAwaiting))) + } + + /** + * Empty the collection of hash requests. + * @return State data with all awaiting hash requests removed. + */ + def clearHashRequests: WorkflowDockerLookupActorData = this.copy(hashRequests = Map.empty) + + /** + * Add this failure cause to the state data. + * @param cause The failure cause. + * @return Updated state data. + */ + def withFailureCause(cause: Throwable): WorkflowDockerLookupActorData = this.copy(failureCause = Option(cause)) + } +} diff --git a/engine/src/main/scala/cromwell/engine/workflow/WorkflowManagerActor.scala b/engine/src/main/scala/cromwell/engine/workflow/WorkflowManagerActor.scala index e7879c803..ffff98559 100644 --- a/engine/src/main/scala/cromwell/engine/workflow/WorkflowManagerActor.scala +++ b/engine/src/main/scala/cromwell/engine/workflow/WorkflowManagerActor.scala @@ -1,25 +1,25 @@ package cromwell.engine.workflow -import java.util.UUID - import akka.actor.FSM.{CurrentState, SubscribeTransitionCallBack, Transition} import akka.actor._ import akka.event.Logging +import cats.data.NonEmptyList import com.typesafe.config.{Config, ConfigFactory} +import cromwell.backend.async.KnownJobFailureException import cromwell.core.Dispatcher.EngineDispatcher import cromwell.core.{WorkflowAborted, WorkflowId} +import cromwell.engine.backend.BackendSingletonCollection import cromwell.engine.workflow.WorkflowActor._ import cromwell.engine.workflow.WorkflowManagerActor._ -import cromwell.engine.workflow.workflowstore.{WorkflowStoreActor, WorkflowStoreState} +import cromwell.engine.workflow.lifecycle.execution.WorkflowMetadataHelper +import cromwell.engine.workflow.workflowstore.{WorkflowStoreActor, WorkflowStoreEngineActor, WorkflowStoreState} import cromwell.jobstore.JobStoreActor.{JobStoreWriteFailure, JobStoreWriteSuccess, RegisterWorkflowCompleted} -import cromwell.services.metadata.MetadataService._ import cromwell.webservice.EngineStatsActor -import lenthall.config.ScalaConfig.EnhancedScalaConfig +import lenthall.exception.ThrowableAggregation +import net.ceedubs.ficus.Ficus._ +import org.apache.commons.lang3.exception.ExceptionUtils import scala.concurrent.duration._ -import scala.concurrent.{Await, Promise} -import scala.language.postfixOps -import scalaz.NonEmptyList object WorkflowManagerActor { val DefaultMaxWorkflowsToRun = 5000 @@ -33,6 +33,7 @@ object WorkflowManagerActor { /** * Commands */ + case object PreventNewWorkflowsFromStarting extends WorkflowManagerActorMessage sealed trait WorkflowManagerActorCommand extends WorkflowManagerActorMessage case object RetrieveNewWorkflows extends WorkflowManagerActorCommand final case class AbortWorkflowCommand(id: WorkflowId, replyTo: ActorRef) extends WorkflowManagerActorCommand @@ -41,13 +42,21 @@ object WorkflowManagerActor { case object EngineStatsCommand extends WorkflowManagerActorCommand def props(workflowStore: ActorRef, + ioActor: ActorRef, serviceRegistryActor: ActorRef, workflowLogCopyRouter: ActorRef, jobStoreActor: ActorRef, - callCacheReadActor: ActorRef): Props = { - Props(new WorkflowManagerActor( - workflowStore, serviceRegistryActor, workflowLogCopyRouter, jobStoreActor, callCacheReadActor) - ).withDispatcher(EngineDispatcher) + subWorkflowStoreActor: ActorRef, + callCacheReadActor: ActorRef, + callCacheWriteActor: ActorRef, + dockerHashActor: ActorRef, + jobTokenDispenserActor: ActorRef, + backendSingletonCollection: BackendSingletonCollection, + serverMode: Boolean): Props = { + val params = WorkflowManagerActorParams(ConfigFactory.load, workflowStore, ioActor, serviceRegistryActor, + workflowLogCopyRouter, jobStoreActor, subWorkflowStoreActor, callCacheReadActor, callCacheWriteActor, + dockerHashActor, jobTokenDispenserActor, backendSingletonCollection, serverMode) + Props(new WorkflowManagerActor(params)).withDispatcher(EngineDispatcher) } /** @@ -55,6 +64,7 @@ object WorkflowManagerActor { */ sealed trait WorkflowManagerState case object Running extends WorkflowManagerState + case object RunningAndNotStartingNewWorkflows extends WorkflowManagerState case object Aborting extends WorkflowManagerState case object Done extends WorkflowManagerState @@ -66,7 +76,7 @@ object WorkflowManagerActor { def withAddition(entries: NonEmptyList[WorkflowIdToActorRef]): WorkflowManagerData = { val entryTuples = entries map { e => e.workflowId -> e.workflowActor } - this.copy(workflows = workflows ++ entryTuples.list.toList) + this.copy(workflows = workflows ++ entryTuples.toList) } def without(id: WorkflowId): WorkflowManagerData = this.copy(workflows = workflows - id) @@ -78,55 +88,44 @@ object WorkflowManagerActor { } } -class WorkflowManagerActor(config: Config, - val workflowStore: ActorRef, - val serviceRegistryActor: ActorRef, - val workflowLogCopyRouter: ActorRef, - val jobStoreActor: ActorRef, - val callCacheReadActor: ActorRef) - extends LoggingFSM[WorkflowManagerState, WorkflowManagerData] { - - def this(workflowStore: ActorRef, - serviceRegistryActor: ActorRef, - workflowLogCopyRouter: ActorRef, - jobStoreActor: ActorRef, - callCacheReadActor: ActorRef) = this( - ConfigFactory.load, workflowStore, serviceRegistryActor, workflowLogCopyRouter, jobStoreActor, callCacheReadActor) - - private val maxWorkflowsRunning = config.getConfig("system").getIntOr("max-concurrent-workflows", default=DefaultMaxWorkflowsToRun) - private val maxWorkflowsToLaunch = config.getConfig("system").getIntOr("max-workflow-launch-count", default=DefaultMaxWorkflowsToLaunch) - private val newWorkflowPollRate = config.getConfig("system").getIntOr("new-workflow-poll-rate", default=DefaultNewWorkflowPollRate).seconds +case class WorkflowManagerActorParams(config: Config, + workflowStore: ActorRef, + ioActor: ActorRef, + serviceRegistryActor: ActorRef, + workflowLogCopyRouter: ActorRef, + jobStoreActor: ActorRef, + subWorkflowStoreActor: ActorRef, + callCacheReadActor: ActorRef, + callCacheWriteActor: ActorRef, + dockerHashActor: ActorRef, + jobTokenDispenserActor: ActorRef, + backendSingletonCollection: BackendSingletonCollection, + serverMode: Boolean) + +class WorkflowManagerActor(params: WorkflowManagerActorParams) + extends LoggingFSM[WorkflowManagerState, WorkflowManagerData] with WorkflowMetadataHelper { + + private val config = params.config + override val serviceRegistryActor = params.serviceRegistryActor + + private val maxWorkflowsRunning = config.getConfig("system").as[Option[Int]]("max-concurrent-workflows").getOrElse(DefaultMaxWorkflowsToRun) + private val maxWorkflowsToLaunch = config.getConfig("system").as[Option[Int]]("max-workflow-launch-count").getOrElse(DefaultMaxWorkflowsToLaunch) + private val newWorkflowPollRate = config.getConfig("system").as[Option[Int]]("new-workflow-poll-rate").getOrElse(DefaultNewWorkflowPollRate).seconds private val logger = Logging(context.system, this) private val tag = self.path.name - private val donePromise = Promise[Unit]() - private var abortingWorkflowToReplyTo = Map.empty[WorkflowId, ActorRef] + private var nextPollCancellable: Option[Cancellable] = None - override def preStart() { - addShutdownHook() + override def preStart(): Unit = { // Starts the workflow polling cycle self ! RetrieveNewWorkflows } - private def addShutdownHook(): Unit = { - // Only abort jobs on SIGINT if the config explicitly sets backend.abortJobsOnTerminate = true. - val abortJobsOnTerminate = - config.getConfig("system").getBooleanOr("abort-jobs-on-terminate", default = false) - - if (abortJobsOnTerminate) { - sys.addShutdownHook { - logger.info(s"$tag: Received shutdown signal. Aborting all running workflows...") - self ! AbortAllWorkflowsCommand - Await.ready(donePromise.future, Duration.Inf) - } - } - } - startWith(Running, WorkflowManagerData(workflows = Map.empty)) - when (Running) { + val runningAndNotStartingNewWorkflowsStateFunction: StateFunction = { /* Commands from clients */ @@ -136,16 +135,14 @@ class WorkflowManagerActor(config: Config, Determine the number of available workflow slots and request the smaller of that number of maxWorkflowsToLaunch. */ val maxNewWorkflows = maxWorkflowsToLaunch min (maxWorkflowsRunning - stateData.workflows.size) - workflowStore ! WorkflowStoreActor.FetchRunnableWorkflows(maxNewWorkflows) + params.workflowStore ! WorkflowStoreActor.FetchRunnableWorkflows(maxNewWorkflows) stay() - case Event(WorkflowStoreActor.NoNewWorkflowsToStart, stateData) => + case Event(WorkflowStoreEngineActor.NoNewWorkflowsToStart, _) => log.debug("WorkflowStore provided no new workflows to start") - scheduleNextNewWorkflowPoll() stay() - case Event(WorkflowStoreActor.NewWorkflowsToStart(newWorkflows), stateData) => + case Event(WorkflowStoreEngineActor.NewWorkflowsToStart(newWorkflows), stateData) => val newSubmissions = newWorkflows map submitWorkflow - log.info("Retrieved {} workflows from the WorkflowStoreActor", newSubmissions.size) - scheduleNextNewWorkflowPoll() + log.info("Retrieved {} workflows from the WorkflowStoreActor", newSubmissions.toList.size) stay() using stateData.withAddition(newSubmissions) case Event(SubscribeToWorkflowCommand(id), data) => data.workflows.get(id) foreach {_ ! SubscribeTransitionCallBack(sender())} @@ -161,7 +158,8 @@ class WorkflowManagerActor(config: Config, case None => // All cool, if we got this far the workflow ID was found in the workflow store so this workflow must have never // made it to the workflow manager. - replyTo ! WorkflowStoreActor.WorkflowAborted(id) + pushCurrentStateToMetadataService(id, WorkflowAborted) + replyTo ! WorkflowStoreEngineActor.WorkflowAborted(id) stay() } case Event(AbortAllWorkflowsCommand, data) if data.workflows.isEmpty => @@ -173,8 +171,8 @@ class WorkflowManagerActor(config: Config, /* Responses from services */ - case Event(WorkflowFailedResponse(workflowId, inState, reasons), data) => - log.error(s"$tag Workflow $workflowId failed (during $inState): ${reasons.mkString("\n")}") + case Event(WorkflowFailedResponse(workflowId, inState, reasons), _) => + log.error(s"$tag Workflow $workflowId failed (during $inState): ${expandFailureReasons(reasons)}") stay() /* Watched transitions @@ -183,18 +181,26 @@ class WorkflowManagerActor(config: Config, log.info(s"$tag ${workflowActor.path.name} is in a terminal state: $toState") // This silently fails if idFromActor is None, but data.without call right below will as well data.idFromActor(workflowActor) foreach { workflowId => - jobStoreActor ! RegisterWorkflowCompleted(workflowId) + params.jobStoreActor ! RegisterWorkflowCompleted(workflowId) if (toState.workflowState == WorkflowAborted) { val replyTo = abortingWorkflowToReplyTo(workflowId) - replyTo ! WorkflowStoreActor.WorkflowAborted(workflowId) + replyTo ! WorkflowStoreEngineActor.WorkflowAborted(workflowId) abortingWorkflowToReplyTo -= workflowId - } else { - workflowStore ! WorkflowStoreActor.RemoveWorkflow(workflowId) } } stay using data.without(workflowActor) } + val scheduleNextNewWorkflowPollStateFunction: StateFunction = { + case event @ Event(WorkflowStoreEngineActor.NoNewWorkflowsToStart | _: WorkflowStoreEngineActor.NewWorkflowsToStart, _) => + scheduleNextNewWorkflowPoll() + runningAndNotStartingNewWorkflowsStateFunction(event) + } + + when (Running) (scheduleNextNewWorkflowPollStateFunction.orElse(runningAndNotStartingNewWorkflowsStateFunction)) + + when (RunningAndNotStartingNewWorkflows) (runningAndNotStartingNewWorkflowsStateFunction) + when (Aborting) { case Event(Transition(workflowActor, _, toState: WorkflowActorState), data) if toState.terminal => // Remove this terminal actor from the workflowStore and log a progress message. @@ -215,10 +221,10 @@ class WorkflowManagerActor(config: Config, when (Done) { FSM.NullFunction } whenUnhandled { - case Event(MetadataPutFailed(action, error), _) => - log.warning(s"$tag Put failed for Metadata action $action : ${error.getMessage}") - stay() - case Event(MetadataPutAcknowledgement(_), _) => stay() + case Event(PreventNewWorkflowsFromStarting, _) => + nextPollCancellable foreach { _.cancel() } + sender() ! akka.Done + goto(RunningAndNotStartingNewWorkflows) // Uninteresting transition and current state notifications. case Event((Transition(_, _, _) | CurrentState(_, _)), _) => stay() case Event(JobStoreWriteSuccess(_), _) => stay() // Snoozefest @@ -231,15 +237,15 @@ class WorkflowManagerActor(config: Config, context.actorOf(EngineStatsActor.props(data.workflows.values.toList, sndr), s"EngineStatsActor-${sndr.hashCode()}") stay() // Anything else certainly IS interesting: - case Event(unhandled, data) => + case Event(unhandled, _) => log.warning(s"$tag Unhandled message: $unhandled") stay() } onTransition { case _ -> Done => - logger.info(s"$tag All workflows finished. Stopping self.") - donePromise.trySuccess(()) + logger.info(s"$tag All workflows finished") + context stop self case fromState -> toState => logger.debug(s"$tag transitioning from $fromState to $toState") } @@ -259,8 +265,10 @@ class WorkflowManagerActor(config: Config, StartNewWorkflow } - val wfProps = WorkflowActor.props(workflowId, startMode, workflow.sources, config, serviceRegistryActor, - workflowLogCopyRouter, jobStoreActor, callCacheReadActor) + val wfProps = WorkflowActor.props(workflowId, startMode, workflow.sources, config, params.ioActor, params.serviceRegistryActor, + params.workflowLogCopyRouter, params.jobStoreActor, params.subWorkflowStoreActor, params.callCacheReadActor, params.callCacheWriteActor, + params.dockerHashActor, params.jobTokenDispenserActor, + params.backendSingletonCollection, params.serverMode) val wfActor = context.actorOf(wfProps, name = s"WorkflowActor-$workflowId") wfActor ! SubscribeTransitionCallBack(self) @@ -269,8 +277,19 @@ class WorkflowManagerActor(config: Config, WorkflowIdToActorRef(workflowId, wfActor) } - private def scheduleNextNewWorkflowPoll(): Unit = { - context.system.scheduler.scheduleOnce(newWorkflowPollRate, self, RetrieveNewWorkflows)(context.dispatcher) + private def scheduleNextNewWorkflowPoll() = { + nextPollCancellable = Option(context.system.scheduler.scheduleOnce(newWorkflowPollRate, self, RetrieveNewWorkflows)(context.dispatcher)) } -} + private def expandFailureReasons(reasons: Seq[Throwable]): String = { + + reasons map { + case reason: ThrowableAggregation => expandFailureReasons(reason.throwables.toSeq) + case reason: KnownJobFailureException => + val stderrMessage = reason.stderrPath map { path => s"\nCheck the content of stderr for potential additional information: ${path.pathAsString}" } getOrElse "" + reason.getMessage + stderrMessage + case reason => + reason.getMessage + "\n" + ExceptionUtils.getStackTrace(reason) + } mkString "\n" + } +} diff --git a/engine/src/main/scala/cromwell/engine/workflow/lifecycle/CopyWorkflowLogsActor.scala b/engine/src/main/scala/cromwell/engine/workflow/lifecycle/CopyWorkflowLogsActor.scala index ff3df1b20..e98da8983 100644 --- a/engine/src/main/scala/cromwell/engine/workflow/lifecycle/CopyWorkflowLogsActor.scala +++ b/engine/src/main/scala/cromwell/engine/workflow/lifecycle/CopyWorkflowLogsActor.scala @@ -1,14 +1,16 @@ package cromwell.engine.workflow.lifecycle import java.io.IOException -import java.nio.file.Path import akka.actor.SupervisorStrategy.Restart import akka.actor.{Actor, ActorLogging, ActorRef, OneForOneStrategy, Props} -import better.files._ import cromwell.core.Dispatcher.IoDispatcher import cromwell.core._ +import cromwell.core.io._ import cromwell.core.logging.WorkflowLogger +import cromwell.core.path.Path +import cromwell.engine.workflow.lifecycle.execution.WorkflowMetadataHelper +import cromwell.filesystems.gcs.batch.GcsBatchCommandBuilder import cromwell.services.metadata.MetadataService.PutMetadataAction import cromwell.services.metadata.{MetadataEvent, MetadataKey, MetadataValue} @@ -20,41 +22,77 @@ object CopyWorkflowLogsActor { case _: IOException => Restart } - def props(serviceRegistryActor: ActorRef) = Props(new CopyWorkflowLogsActor(serviceRegistryActor)).withDispatcher(IoDispatcher) + def props(serviceRegistryActor: ActorRef, ioActor: ActorRef) = Props(new CopyWorkflowLogsActor(serviceRegistryActor, ioActor)).withDispatcher(IoDispatcher) } // This could potentially be turned into a more generic "Copy/Move something from A to B" // Which could be used for other copying work (outputs, call logs..) -class CopyWorkflowLogsActor(serviceRegistryActor: ActorRef) - extends Actor - with ActorLogging - with PathFactory { +class CopyWorkflowLogsActor(override val serviceRegistryActor: ActorRef, override val ioActor: ActorRef) extends Actor + with ActorLogging with GcsBatchCommandBuilder with IoClientHelper with WorkflowMetadataHelper with MonitoringCompanionHelper { - def copyAndClean(src: Path, dest: Path): Unit = { - File(dest).parent.createDirectories() + implicit val ec = context.dispatcher + + def copyLog(src: Path, dest: Path, workflowId: WorkflowId) = { + dest.parent.createPermissionedDirectories() + // Send the workflowId as context along with the copy so we can update metadata when the response comes back + sendIoCommandWithContext(copyCommand(src, dest, overwrite = true), workflowId) + // In order to keep "copy and then delete" operations atomic as far as monitoring is concerned, removeWork will only be called + // when the delete is complete (successfully or not), or when the copy completes if WorkflowLogger.isTemporary is false + addWork() + } - File(src).copyTo(dest, overwrite = true) - if (WorkflowLogger.isTemporary) File(src).delete() + def deleteLog(src: Path) = if (WorkflowLogger.isTemporary) { + sendIoCommand(deleteCommand(src)) + } else removeWork() + + def updateLogsPathInMetadata(workflowId: WorkflowId, path: Path) = { + val metadataEventMsg = MetadataEvent(MetadataKey(workflowId, None, WorkflowMetadataKeys.WorkflowLog), MetadataValue(path.pathAsString)) + serviceRegistryActor ! PutMetadataAction(metadataEventMsg) } - override def receive = { + def copyLogsReceive: Receive = { case CopyWorkflowLogsActor.Copy(workflowId, destinationDir) => val workflowLogger = new WorkflowLogger(self.path.name, workflowId, Option(log)) workflowLogger.workflowLogPath foreach { src => - if (File(src).exists) { - val destPath = destinationDir.resolve(src.getFileName) + if (src.exists) { + val destPath = destinationDir.resolve(src.name) workflowLogger.info(s"Copying workflow logs from $src to $destPath") - copyAndClean(src, destPath) - - val metadataEventMsg = MetadataEvent(MetadataKey(workflowId, None, WorkflowMetadataKeys.WorkflowLog), MetadataValue(destPath)) - serviceRegistryActor ! PutMetadataAction(metadataEventMsg) + copyLog(src, destPath, workflowId) } } + + case (workflowId: WorkflowId, IoSuccess(copy: IoCopyCommand, _)) => + updateLogsPathInMetadata(workflowId, copy.destination) + deleteLog(copy.source) + + case (workflowId: WorkflowId, IoFailure(copy: IoCopyCommand, failure)) => + pushWorkflowFailures(workflowId, List(new IOException("Could not copy workflow logs", failure))) + log.error(failure, s"Failed to copy workflow logs from ${copy.source.pathAsString} to ${copy.destination.pathAsString}") + deleteLog(copy.source) + + case IoSuccess(_: IoDeleteCommand, _) => removeWork() + + case IoFailure(delete: IoDeleteCommand, failure) => + removeWork() + log.error(failure, s"Failed to delete workflow logs from ${delete.file.pathAsString}") + + case other => log.warning(s"CopyWorkflowLogsActor received an unexpected message: $other") } + + override def receive = monitoringReceive orElse ioReceive orElse copyLogsReceive override def preRestart(t: Throwable, message: Option[Any]) = { message foreach self.forward } + + override protected def onTimeout(message: Any, to: ActorRef): Unit = message match { + case copy: IoCopyCommand => + log.error(s"Failed to copy workflow logs from ${copy.source.pathAsString} to ${copy.destination.pathAsString}: Timeout") + deleteLog(copy.source) + case delete: IoDeleteCommand => + log.error(s"Failed to delete workflow logs from ${delete.file.pathAsString}: Timeout") + case _ => + } } diff --git a/engine/src/main/scala/cromwell/engine/workflow/lifecycle/CopyWorkflowOutputsActor.scala b/engine/src/main/scala/cromwell/engine/workflow/lifecycle/CopyWorkflowOutputsActor.scala index a5028e5af..627053d0f 100644 --- a/engine/src/main/scala/cromwell/engine/workflow/lifecycle/CopyWorkflowOutputsActor.scala +++ b/engine/src/main/scala/cromwell/engine/workflow/lifecycle/CopyWorkflowOutputsActor.scala @@ -1,64 +1,91 @@ package cromwell.engine.workflow.lifecycle -import java.nio.file.Path - -import akka.actor.Props -import cromwell.backend.BackendWorkflowFinalizationActor.{FinalizationResponse, FinalizationSuccess} +import akka.actor.{Actor, ActorLogging, ActorRef, Props} +import akka.event.LoggingReceive +import cromwell.backend.BackendLifecycleActor.BackendWorkflowLifecycleActorResponse +import cromwell.backend.BackendWorkflowFinalizationActor.{FinalizationFailed, FinalizationResponse, FinalizationSuccess, Finalize} import cromwell.backend.{AllBackendInitializationData, BackendConfigurationDescriptor, BackendInitializationData, BackendLifecycleActorFactory} -import cromwell.core._ import cromwell.core.Dispatcher.IoDispatcher import cromwell.core.WorkflowOptions._ +import cromwell.core._ +import cromwell.core.io.AsyncIo +import cromwell.core.path.{Path, PathCopier, PathFactory} import cromwell.engine.EngineWorkflowDescriptor import cromwell.engine.backend.{BackendConfiguration, CromwellBackends} -import wdl4s.ReportableSymbol -import wdl4s.values.WdlSingleFile +import cromwell.filesystems.gcs.batch.GcsBatchCommandBuilder +import wdl4s.wdl.values.{WdlArray, WdlMap, WdlSingleFile, WdlValue} import scala.concurrent.{ExecutionContext, Future} +import scala.util.{Failure, Success} object CopyWorkflowOutputsActor { - def props(workflowId: WorkflowId, workflowDescriptor: EngineWorkflowDescriptor, outputStore: OutputStore, + def props(workflowId: WorkflowId, ioActor: ActorRef, workflowDescriptor: EngineWorkflowDescriptor, workflowOutputs: CallOutputs, initializationData: AllBackendInitializationData) = Props( - new CopyWorkflowOutputsActor(workflowId, workflowDescriptor, outputStore, initializationData) + new CopyWorkflowOutputsActor(workflowId, ioActor, workflowDescriptor, workflowOutputs, initializationData) ).withDispatcher(IoDispatcher) } -class CopyWorkflowOutputsActor(workflowId: WorkflowId, val workflowDescriptor: EngineWorkflowDescriptor, outputStore: OutputStore, +class CopyWorkflowOutputsActor(workflowId: WorkflowId, override val ioActor: ActorRef, val workflowDescriptor: EngineWorkflowDescriptor, workflowOutputs: CallOutputs, initializationData: AllBackendInitializationData) - extends EngineWorkflowFinalizationActor with PathFactory { + extends Actor with ActorLogging with PathFactory with AsyncIo with GcsBatchCommandBuilder { - private def copyWorkflowOutputs(workflowOutputsFilePath: String): Unit = { - val workflowOutputsPath = buildPath(workflowOutputsFilePath, workflowDescriptor.engineFilesystems) + implicit val ec = context.dispatcher + override val pathBuilders = workflowDescriptor.pathBuilders - val reportableOutputs = workflowDescriptor.backendDescriptor.workflowNamespace.workflow.outputs + override def receive = ioReceive orElse LoggingReceive { + case Finalize => performActionThenRespond(afterAll()(context.dispatcher), FinalizationFailed)(context.dispatcher) + } + + private def performActionThenRespond(operation: => Future[BackendWorkflowLifecycleActorResponse], + onFailure: (Throwable) => BackendWorkflowLifecycleActorResponse) + (implicit ec: ExecutionContext) = { + val respondTo: ActorRef = sender + operation onComplete { + case Success(r) => respondTo ! r + case Failure(t) => respondTo ! onFailure(t) + } + } - val outputFilePaths = getOutputFilePaths(reportableOutputs) + private def copyWorkflowOutputs(workflowOutputsFilePath: String): Future[Seq[Unit]] = { + val workflowOutputsPath = buildPath(workflowOutputsFilePath) - outputFilePaths foreach { - case (workflowRootPath, srcPath) => - // WARNING: PathCopier does not do atomic copies. The files may be partially written. - PathCopier.copy(workflowRootPath, srcPath, workflowOutputsPath) + val outputFilePaths = getOutputFilePaths(workflowOutputsPath) + + val copies = outputFilePaths map { + case (srcPath, dstPath) => + dstPath.createDirectories() + copyAsync(srcPath, dstPath) } + + Future.sequence(copies) } - private def getOutputFilePaths(reportableOutputs: Seq[ReportableSymbol]): Seq[(Path, Path)] = { - for { - reportableOutput <- reportableOutputs + private def findFiles(values: Seq[WdlValue]): Seq[WdlSingleFile] = { + values flatMap { + case file: WdlSingleFile => Seq(file) + case array: WdlArray => findFiles(array.value) + case map: WdlMap => findFiles(map.value.values.toSeq) + case _ => Seq.empty + } + } + + private def getOutputFilePaths(workflowOutputsPath: Path): List[(Path, Path)] = { + val rootAndFiles = for { // NOTE: Without .toSeq, outputs in arrays only yield the last output - (backend, calls) <- workflowDescriptor.backendAssignments.groupBy(_._2).mapValues(_.keys.toSeq).toSeq + backend <- workflowDescriptor.backendAssignments.values.toSeq config <- BackendConfiguration.backendConfigurationDescriptor(backend).toOption.toSeq rootPath <- getBackendRootPath(backend, config).toSeq - call <- calls - // NOTE: Without .toSeq, outputs in arrays only yield the last output - (outputCallKey, outputEntries) <- outputStore.store.toSeq - // Only get paths for the original scatter call, not the indexed entries - if outputCallKey.call == call && outputCallKey.index.isEmpty - outputEntry <- outputEntries - if reportableOutput.fullyQualifiedName == s"${call.fullyQualifiedName}.${outputEntry.name}" - wdlValue <- outputEntry.wdlValue.toSeq - collected = wdlValue collectAsSeq { case f: WdlSingleFile => f } - wdlFile <- collected - wdlPath = rootPath.getFileSystem.getPath(wdlFile.value) - } yield (rootPath, wdlPath) + outputFiles = findFiles(workflowOutputs.values.map(_.wdlValue).toSeq).map(_.value) + } yield (rootPath, outputFiles) + + val outputFileDestinations = rootAndFiles flatMap { + case (workflowRoot, outputs) => + outputs map { output => + val outputPath = PathFactory.buildPath(output, pathBuilders) + outputPath -> PathCopier.getDestinationFilePath(workflowRoot, outputPath, workflowOutputsPath) + } + } + outputFileDestinations.distinct.toList } private def getBackendRootPath(backend: String, config: BackendConfigurationDescriptor): Option[Path] = { @@ -74,8 +101,13 @@ class CopyWorkflowOutputsActor(workflowId: WorkflowId, val workflowDescriptor: E backendFactory.getExecutionRootPath(workflowDescriptor.backendDescriptor, config.backendConfig, initializationData) } - final override def afterAll()(implicit ec: ExecutionContext): Future[FinalizationResponse] = Future { - workflowDescriptor.getWorkflowOption(FinalWorkflowOutputsDir) foreach copyWorkflowOutputs - FinalizationSuccess + /** + * Happens after everything else runs + */ + final def afterAll()(implicit ec: ExecutionContext): Future[FinalizationResponse] = { + workflowDescriptor.getWorkflowOption(FinalWorkflowOutputsDir) match { + case Some(outputs) => copyWorkflowOutputs(outputs) map { _ => FinalizationSuccess } + case None => Future.successful(FinalizationSuccess) + } } } diff --git a/engine/src/main/scala/cromwell/engine/workflow/lifecycle/EngineWorkflowFinalizationActor.scala b/engine/src/main/scala/cromwell/engine/workflow/lifecycle/EngineWorkflowFinalizationActor.scala deleted file mode 100644 index 401396cae..000000000 --- a/engine/src/main/scala/cromwell/engine/workflow/lifecycle/EngineWorkflowFinalizationActor.scala +++ /dev/null @@ -1,30 +0,0 @@ -package cromwell.engine.workflow.lifecycle - -import akka.actor.{Actor, ActorLogging, ActorRef} -import akka.event.LoggingReceive -import cromwell.backend.BackendLifecycleActor.BackendWorkflowLifecycleActorResponse -import cromwell.backend.BackendWorkflowFinalizationActor.{FinalizationFailed, FinalizationResponse, Finalize} - -import scala.concurrent.{ExecutionContext, Future} -import scala.util.{Failure, Success} - -trait EngineWorkflowFinalizationActor extends Actor with ActorLogging { - def receive: Receive = LoggingReceive { - case Finalize => performActionThenRespond(afterAll()(context.dispatcher), FinalizationFailed)(context.dispatcher) - } - - protected def performActionThenRespond(operation: => Future[BackendWorkflowLifecycleActorResponse], - onFailure: (Throwable) => BackendWorkflowLifecycleActorResponse) - (implicit ec: ExecutionContext) = { - val respondTo: ActorRef = sender - operation onComplete { - case Success(r) => respondTo ! r - case Failure(t) => respondTo ! onFailure(t) - } - } - - /** - * Happens after everything else runs - */ - def afterAll()(implicit ec: ExecutionContext): Future[FinalizationResponse] -} diff --git a/engine/src/main/scala/cromwell/engine/workflow/lifecycle/MaterializeWorkflowDescriptorActor.scala b/engine/src/main/scala/cromwell/engine/workflow/lifecycle/MaterializeWorkflowDescriptorActor.scala index 833001588..207f24063 100644 --- a/engine/src/main/scala/cromwell/engine/workflow/lifecycle/MaterializeWorkflowDescriptorActor.scala +++ b/engine/src/main/scala/cromwell/engine/workflow/lifecycle/MaterializeWorkflowDescriptorActor.scala @@ -1,31 +1,41 @@ package cromwell.engine.workflow.lifecycle -import java.nio.file.FileSystem - -import akka.actor.{ActorRef, FSM, LoggingFSM, Props} +import akka.actor.{ActorRef, FSM, LoggingFSM, Props, Status} +import akka.pattern.pipe +import cats.data.NonEmptyList +import cats.data.Validated._ +import cats.instances.list._ +import cats.instances.vector._ +import cats.syntax.cartesian._ +import cats.syntax.traverse._ +import cats.syntax.validated._ import com.typesafe.config.Config import com.typesafe.scalalogging.LazyLogging import cromwell.backend.BackendWorkflowDescriptor -import cromwell.core._ import cromwell.core.Dispatcher.EngineDispatcher +import cromwell.core._ import cromwell.core.WorkflowOptions.{ReadFromCache, WorkflowOption, WriteToCache} import cromwell.core.callcaching._ +import cromwell.core.labels.{Label, Labels} import cromwell.core.logging.WorkflowLogging +import cromwell.core.path.BetterFileMethods.OpenOptions +import cromwell.core.path.{DefaultPathBuilder, Path, PathBuilder} import cromwell.engine._ import cromwell.engine.backend.CromwellBackends -import cromwell.engine.workflow.lifecycle.MaterializeWorkflowDescriptorActor.{MaterializeWorkflowDescriptorActorData, MaterializeWorkflowDescriptorActorState} +import cromwell.engine.workflow.lifecycle.MaterializeWorkflowDescriptorActor.MaterializeWorkflowDescriptorActorState import cromwell.services.metadata.MetadataService._ -import cromwell.services.metadata.{MetadataValue, MetadataKey, MetadataEvent} -import lenthall.config.ScalaConfig.EnhancedScalaConfig -import spray.json.{JsObject, _} -import wdl4s._ -import wdl4s.expression.NoFunctions -import wdl4s.values.{WdlString, WdlValue} - +import cromwell.services.metadata.{MetadataEvent, MetadataKey, MetadataValue} +import lenthall.exception.MessageAggregation +import lenthall.validation.ErrorOr._ +import net.ceedubs.ficus.Ficus._ +import spray.json._ +import wdl4s.wdl._ +import wdl4s.wdl.expression.NoFunctions +import wdl4s.wdl.values.{WdlSingleFile, WdlString, WdlValue} + +import scala.concurrent.Future import scala.language.postfixOps import scala.util.{Failure, Success, Try} -import scalaz.Scalaz._ -import scalaz.Validation.FlatMap._ object MaterializeWorkflowDescriptorActor { @@ -37,15 +47,15 @@ object MaterializeWorkflowDescriptorActor { // exception if not initialized yet. def cromwellBackends = CromwellBackends.instance.get - def props(serviceRegistryActor: ActorRef, workflowId: WorkflowId, cromwellBackends: => CromwellBackends = cromwellBackends): Props = { - Props(new MaterializeWorkflowDescriptorActor(serviceRegistryActor, workflowId, cromwellBackends)).withDispatcher(EngineDispatcher) + def props(serviceRegistryActor: ActorRef, workflowId: WorkflowId, cromwellBackends: => CromwellBackends = cromwellBackends, importLocalFilesystem: Boolean): Props = { + Props(new MaterializeWorkflowDescriptorActor(serviceRegistryActor, workflowId, cromwellBackends, importLocalFilesystem)).withDispatcher(EngineDispatcher) } /* Commands */ sealed trait MaterializeWorkflowDescriptorActorMessage - case class MaterializeWorkflowDescriptorCommand(workflowSourceFiles: WorkflowSourceFiles, + case class MaterializeWorkflowDescriptorCommand(workflowSourceFiles: WorkflowSourceFilesCollection, conf: Config) extends MaterializeWorkflowDescriptorActorMessage case object MaterializeWorkflowDescriptorAbortCommand @@ -64,73 +74,92 @@ object MaterializeWorkflowDescriptorActor { override val terminal = true } case object ReadyToMaterializeState extends MaterializeWorkflowDescriptorActorState + case object MaterializingState extends MaterializeWorkflowDescriptorActorState case object MaterializationSuccessfulState extends MaterializeWorkflowDescriptorActorTerminalState case object MaterializationFailedState extends MaterializeWorkflowDescriptorActorTerminalState case object MaterializationAbortedState extends MaterializeWorkflowDescriptorActorTerminalState - /* - Data - */ - case class MaterializeWorkflowDescriptorActorData() - private val DefaultWorkflowFailureMode = NoNewCalls.toString private[lifecycle] def validateCallCachingMode(workflowOptions: WorkflowOptions, conf: Config): ErrorOr[CallCachingMode] = { def readOptionalOption(option: WorkflowOption): ErrorOr[Boolean] = { workflowOptions.getBoolean(option.name) match { - case Success(x) => x.successNel - case Failure(_: OptionNotFoundException) => true.successNel - case Failure(t) => t.getMessage.failureNel + case Success(x) => x.validNel + case Failure(_: OptionNotFoundException) => true.validNel + case Failure(t) => t.getMessage.invalidNel } } - val enabled = conf.getBooleanOption("call-caching.enabled").getOrElse(false) + val enabled = conf.as[Option[Boolean]]("call-caching.enabled").getOrElse(false) + val invalidateBadCacheResults = conf.as[Option[Boolean]]("call-caching.invalidate-bad-cache-results").getOrElse(true) + val callCachingOptions = CallCachingOptions(invalidateBadCacheResults) if (enabled) { val readFromCache = readOptionalOption(ReadFromCache) val writeToCache = readOptionalOption(WriteToCache) - (readFromCache |@| writeToCache) { + (readFromCache |@| writeToCache) map { case (false, false) => CallCachingOff - case (true, false) => CallCachingActivity(ReadCache) - case (false, true) => CallCachingActivity(WriteCache) - case (true, true) => CallCachingActivity(ReadAndWriteCache) + case (true, false) => CallCachingActivity(ReadCache, callCachingOptions) + case (false, true) => CallCachingActivity(WriteCache, callCachingOptions) + case (true, true) => CallCachingActivity(ReadAndWriteCache, callCachingOptions) } } else { - CallCachingOff.successNel + CallCachingOff.validNel } } } -class MaterializeWorkflowDescriptorActor(serviceRegistryActor: ActorRef, val workflowId: WorkflowId, cromwellBackends: => CromwellBackends) extends LoggingFSM[MaterializeWorkflowDescriptorActorState, MaterializeWorkflowDescriptorActorData] with LazyLogging with WorkflowLogging { +class MaterializeWorkflowDescriptorActor(serviceRegistryActor: ActorRef, + val workflowIdForLogging: WorkflowId, + cromwellBackends: => CromwellBackends, + importLocalFilesystem: Boolean) extends LoggingFSM[MaterializeWorkflowDescriptorActorState, Unit] with LazyLogging with WorkflowLogging { import MaterializeWorkflowDescriptorActor._ val tag = self.path.name val iOExecutionContext = context.system.dispatchers.lookup("akka.dispatchers.io-dispatcher") - - startWith(ReadyToMaterializeState, MaterializeWorkflowDescriptorActorData()) + implicit val ec = context.dispatcher + + startWith(ReadyToMaterializeState, ()) when(ReadyToMaterializeState) { case Event(MaterializeWorkflowDescriptorCommand(workflowSourceFiles, conf), _) => - buildWorkflowDescriptor(workflowId, workflowSourceFiles, conf) match { - case scalaz.Success(descriptor) => - sender() ! MaterializeWorkflowDescriptorSuccessResponse(descriptor) - goto(MaterializationSuccessfulState) - case scalaz.Failure(error) => - sender() ! MaterializeWorkflowDescriptorFailureResponse( - new IllegalArgumentException with ExceptionWithErrors { - val message = s"Workflow input processing failed." - val errors = error - }) + val replyTo = sender() + + workflowOptionsAndPathBuilders(workflowSourceFiles) match { + case Valid((workflowOptions, pathBuilders)) => + val futureDescriptor = pathBuilders map { + buildWorkflowDescriptor(workflowIdForLogging, workflowSourceFiles, conf, workflowOptions, _) + } + + // Pipe the response to self, but make it look like it comes from the sender of the command + // This way we can access it through sender() in the next state and don't have to store the value + // of replyTo in the data + pipe(futureDescriptor).to(self, replyTo) + goto(MaterializingState) + case Invalid(error) => + workflowInitializationFailed(error, replyTo) goto(MaterializationFailedState) } case Event(MaterializeWorkflowDescriptorAbortCommand, _) => goto(MaterializationAbortedState) } + when(MaterializingState) { + case Event(Valid(descriptor: EngineWorkflowDescriptor), _) => + sender() ! MaterializeWorkflowDescriptorSuccessResponse(descriptor) + goto(MaterializationSuccessfulState) + case Event(Invalid(error: NonEmptyList[String]@unchecked), _) => + workflowInitializationFailed(error, sender()) + goto(MaterializationFailedState) + case Event(Status.Failure(failure), _) => + workflowInitializationFailed(NonEmptyList.of(failure.getMessage), sender()) + goto(MaterializationFailedState) + } + // Let these fall through to the whenUnhandled handler: when(MaterializationSuccessfulState) { FSM.NullFunction } when(MaterializationFailedState) { FSM.NullFunction } @@ -152,90 +181,126 @@ class MaterializeWorkflowDescriptorActor(serviceRegistryActor: ActorRef, val wor stay } - private def buildWorkflowDescriptor(id: WorkflowId, - sourceFiles: WorkflowSourceFiles, - conf: Config): ErrorOr[EngineWorkflowDescriptor] = { - val namespaceValidation = validateNamespace(sourceFiles.wdlSource) + private def workflowInitializationFailed(errors: NonEmptyList[String], replyTo: ActorRef) = { + sender() ! MaterializeWorkflowDescriptorFailureResponse( + new IllegalArgumentException with MessageAggregation { + val exceptionContext = "Workflow input processing failed" + val errorMessages = errors.toList + }) + } + + private def workflowOptionsAndPathBuilders(sourceFiles: WorkflowSourceFilesCollection): ErrorOr[(WorkflowOptions, Future[List[PathBuilder]])] = { val workflowOptionsValidation = validateWorkflowOptions(sourceFiles.workflowOptionsJson) - (namespaceValidation |@| workflowOptionsValidation) { - (_, _) - } flatMap { case (namespace, workflowOptions) => + workflowOptionsValidation map { workflowOptions => + val pathBuilders = EngineFilesystems.pathBuildersForWorkflow(workflowOptions)(context.system, context.dispatcher) + (workflowOptions, pathBuilders) + } + } + + private def buildWorkflowDescriptor(id: WorkflowId, + sourceFiles: WorkflowSourceFilesCollection, + conf: Config, + workflowOptions: WorkflowOptions, + pathBuilders: List[PathBuilder]): ErrorOr[EngineWorkflowDescriptor] = { + val namespaceValidation = validateNamespace(sourceFiles) + val labelsValidation = validateLabels(sourceFiles.labelsJson) + + (namespaceValidation |@| labelsValidation).tupled flatMap { case (namespace, labels) => pushWfNameMetadataService(namespace.workflow.unqualifiedName) - val engineFileSystems = EngineFilesystems.filesystemsForWorkflow(workflowOptions)(iOExecutionContext) - buildWorkflowDescriptor(id, sourceFiles, namespace, workflowOptions, conf, engineFileSystems) + publishLabelsToMetadata(id, namespace.workflow.unqualifiedName, labels) + buildWorkflowDescriptor(id, sourceFiles, namespace, workflowOptions, labels, conf, pathBuilders) } } private def pushWfNameMetadataService(name: String): Unit = { // Workflow name: - val nameEvent = MetadataEvent(MetadataKey(workflowId, None, WorkflowMetadataKeys.Name), MetadataValue(name)) + val nameEvent = MetadataEvent(MetadataKey(workflowIdForLogging, None, WorkflowMetadataKeys.Name), MetadataValue(name)) serviceRegistryActor ! PutMetadataAction(nameEvent) } + private def publishLabelsToMetadata(rootWorkflowId: WorkflowId, unqualifiedName: String, labels: Labels): Unit = { + val defaultLabel = "cromwell-workflow-id" -> s"cromwell-$rootWorkflowId" + val customLabels = labels.asMap + labelsToMetadata(customLabels + defaultLabel, rootWorkflowId) + } + + protected def labelsToMetadata(labels: Map[String, String], workflowId: WorkflowId): Unit = { + labels foreach { case (k, v) => + serviceRegistryActor ! PutMetadataAction(MetadataEvent(MetadataKey(workflowId, None, s"${WorkflowMetadataKeys.Labels}:$k"), MetadataValue(v))) + } + } + private def buildWorkflowDescriptor(id: WorkflowId, - sourceFiles: WorkflowSourceFiles, - namespace: NamespaceWithWorkflow, + sourceFiles: WorkflowSourceFilesCollection, + namespace: WdlNamespaceWithWorkflow, workflowOptions: WorkflowOptions, + labels: Labels, conf: Config, - engineFilesystems: List[FileSystem]): ErrorOr[EngineWorkflowDescriptor] = { - val defaultBackendName = conf.getStringOption("backend.default") + pathBuilders: List[PathBuilder]): ErrorOr[EngineWorkflowDescriptor] = { + val defaultBackendName = conf.as[Option[String]]("backend.default") val rawInputsValidation = validateRawInputs(sourceFiles.inputsJson) + val failureModeValidation = validateWorkflowFailureMode(workflowOptions, conf) - val backendAssignmentsValidation = validateBackendAssignments(namespace.workflow.calls, workflowOptions, defaultBackendName) + val backendAssignmentsValidation = validateBackendAssignments(namespace.taskCalls, workflowOptions, defaultBackendName) val callCachingModeValidation = validateCallCachingMode(workflowOptions, conf) - (rawInputsValidation |@| failureModeValidation |@| backendAssignmentsValidation |@| callCachingModeValidation ) { - (_, _, _, _) - } flatMap { case (rawInputs, failureMode, backendAssignments, callCachingMode) => - buildWorkflowDescriptor(id, namespace, rawInputs, backendAssignments, workflowOptions, failureMode, engineFilesystems, callCachingMode) + + (rawInputsValidation |@| failureModeValidation |@| backendAssignmentsValidation |@| callCachingModeValidation).tupled flatMap { + case (rawInputs, failureMode, backendAssignments, callCachingMode) => + buildWorkflowDescriptor(id, namespace, rawInputs, backendAssignments, workflowOptions, labels, failureMode, pathBuilders, callCachingMode) } } private def buildWorkflowDescriptor(id: WorkflowId, - namespace: NamespaceWithWorkflow, + namespace: WdlNamespaceWithWorkflow, rawInputs: Map[String, JsValue], - backendAssignments: Map[Call, String], + backendAssignments: Map[WdlTaskCall, String], workflowOptions: WorkflowOptions, + labels: Labels, failureMode: WorkflowFailureMode, - engineFileSystems: List[FileSystem], + pathBuilders: List[PathBuilder], callCachingMode: CallCachingMode): ErrorOr[EngineWorkflowDescriptor] = { def checkTypes(inputs: Map[FullyQualifiedName, WdlValue]): ErrorOr[Map[FullyQualifiedName, WdlValue]] = { - val allDeclarations = namespace.workflow.scopedDeclarations ++ namespace.workflow.calls.flatMap(_.scopedDeclarations) - inputs.map({ case (k, v) => + val allDeclarations = namespace.workflow.declarations ++ namespace.workflow.calls.flatMap(_.declarations) + val list: List[ErrorOr[(FullyQualifiedName, WdlValue)]] = inputs.map({ case (k, v) => allDeclarations.find(_.fullyQualifiedName == k) match { case Some(decl) if decl.wdlType.coerceRawValue(v).isFailure => - s"Invalid right-side type of '$k'. Expecting ${decl.wdlType.toWdlString}, got ${v.wdlType.toWdlString}".failureNel - case _ => (k, v).successNel[String] + s"Invalid right-side type of '$k'. Expecting ${decl.wdlType.toWdlString}, got ${v.wdlType.toWdlString}".invalidNel + case _ => (k, v).validNel[String] } - }).toList.sequence[ErrorOr, (FullyQualifiedName, WdlValue)].map(_.toMap) + }).toList + + val validatedInputs: ErrorOr[List[(FullyQualifiedName, WdlValue)]] = list.sequence[ErrorOr, (FullyQualifiedName, WdlValue)] + validatedInputs.map(_.toMap) } for { coercedInputs <- validateCoercedInputs(rawInputs, namespace) - _ = pushWfInputsToMetadataService(coercedInputs) - declarations <- validateDeclarations(namespace, workflowOptions, coercedInputs, engineFileSystems) - declarationsAndInputs <- checkTypes(declarations ++ coercedInputs) - backendDescriptor = BackendWorkflowDescriptor(id, namespace, declarationsAndInputs, workflowOptions) - } yield EngineWorkflowDescriptor(backendDescriptor, coercedInputs, backendAssignments, failureMode, engineFileSystems, callCachingMode) + coercedValidatedFileInputs <- validateWdlFiles(coercedInputs) + _ = pushWfInputsToMetadataService(coercedValidatedFileInputs) + evaluatedWorkflowsDeclarations <- validateDeclarations(namespace, workflowOptions, coercedValidatedFileInputs, pathBuilders) + declarationsAndInputs <- checkTypes(evaluatedWorkflowsDeclarations ++ coercedValidatedFileInputs) + backendDescriptor = BackendWorkflowDescriptor(id, namespace.workflow, declarationsAndInputs, workflowOptions, labels) + } yield EngineWorkflowDescriptor(namespace, backendDescriptor, backendAssignments, failureMode, pathBuilders, callCachingMode) } private def pushWfInputsToMetadataService(workflowInputs: WorkflowCoercedInputs): Unit = { // Inputs val inputEvents = workflowInputs match { case empty if empty.isEmpty => - List(MetadataEvent.empty(MetadataKey(workflowId, None,WorkflowMetadataKeys.Inputs))) + List(MetadataEvent.empty(MetadataKey(workflowIdForLogging, None,WorkflowMetadataKeys.Inputs))) case inputs => inputs flatMap { case (inputName, wdlValue) => - wdlValueToMetadataEvents(MetadataKey(workflowId, None, s"${WorkflowMetadataKeys.Inputs}:$inputName"), wdlValue) + wdlValueToMetadataEvents(MetadataKey(workflowIdForLogging, None, s"${WorkflowMetadataKeys.Inputs}:$inputName"), wdlValue) } } serviceRegistryActor ! PutMetadataAction(inputEvents) } - private def validateBackendAssignments(calls: Seq[Call], workflowOptions: WorkflowOptions, defaultBackendName: Option[String]): ErrorOr[Map[Call, String]] = { + private def validateBackendAssignments(calls: Set[WdlTaskCall], workflowOptions: WorkflowOptions, defaultBackendName: Option[String]): ErrorOr[Map[WdlTaskCall, String]] = { val callToBackendMap = Try { calls map { call => val backendPriorities = Seq( @@ -256,15 +321,15 @@ class MaterializeWorkflowDescriptorActor(serviceRegistryActor: ActorRef, val wor case Success(backendMap) => val backendMapAsString = backendMap.map({case (k, v) => s"${k.fullyQualifiedName} -> $v"}).mkString(", ") workflowLogger.info(s"Call-to-Backend assignments: $backendMapAsString") - backendMap.successNel - case Failure(t) => t.getMessage.failureNel + backendMap.validNel + case Failure(t) => t.getMessage.invalidNel } } /** * Map a call to a backend name depending on the runtime attribute key */ - private def assignBackendUsingRuntimeAttrs(call: Call): Option[String] = { + private def assignBackendUsingRuntimeAttrs(call: WdlTaskCall): Option[String] = { val runtimeAttributesMap = call.task.runtimeAttributes.attrs runtimeAttributesMap.get(RuntimeBackendKey) map { wdlExpr => evaluateBackendNameExpression(call.fullyQualifiedName, wdlExpr) } } @@ -279,58 +344,162 @@ class MaterializeWorkflowDescriptorActor(serviceRegistryActor: ActorRef, val wor } } - private def validateDeclarations(namespace: NamespaceWithWorkflow, + private def validateDeclarations(namespace: WdlNamespaceWithWorkflow, options: WorkflowOptions, coercedInputs: WorkflowCoercedInputs, - engineFileSystems: List[FileSystem]): ErrorOr[WorkflowCoercedInputs] = { - namespace.staticWorkflowDeclarationsRecursive(coercedInputs, new WdlFunctions(engineFileSystems)) match { - case Success(d) => d.successNel - case Failure(e) => s"Workflow has invalid declarations: ${e.getMessage}".failureNel + pathBuilders: List[PathBuilder]): ErrorOr[WorkflowCoercedInputs] = { + namespace.staticDeclarationsRecursive(coercedInputs, new WdlFunctions(pathBuilders)) match { + case Success(d) => d.validNel + case Failure(e) => s"Workflow has invalid declarations: ${e.getMessage}".invalidNel + } + } + + private def validateImportsDirectory(zipContents: Array[Byte]): ErrorOr[Path] = { + + def makeZipFile(contents: Array[Byte]): Try[Path] = Try { + DefaultPathBuilder.createTempFile("", ".zip").writeByteArray(contents)(OpenOptions.default) + } + + def unZipFile(f: Path) = Try { + val unzippedFile = f.unzip() + val unzippedFileContents = unzippedFile.list.toSeq.head + if (unzippedFileContents.isDirectory) unzippedFileContents else unzippedFile + } + + val importsFile = for { + zipFile <- makeZipFile(zipContents) + unzipped <- unZipFile(zipFile) + _ <- Try(zipFile.delete(swallowIOExceptions = true)) + } yield unzipped + + importsFile match { + case Success(unzippedDirectory: Path) => unzippedDirectory.validNel + case Failure(t) => t.getMessage.invalidNel } } - private def validateNamespace(source: WdlSource): ErrorOr[NamespaceWithWorkflow] = { - try { - NamespaceWithWorkflow.load(source).successNel - } catch { - case e: Exception => s"Unable to load namespace from workflow: ${e.getMessage}".failureNel + private def validateNamespaceWithImports(w: WorkflowSourceFilesWithDependenciesZip): ErrorOr[WdlNamespaceWithWorkflow] = { + def getMetadatae(importsDir: Path, prefix: String = ""): List[(String, Path)] = { + importsDir.children.toList flatMap { + case f: Path if f.isDirectory => getMetadatae(f, prefix + f.name + "/") + case f: Path if f.name.endsWith(".wdl") => List((prefix + f.name, f)) + case _ => List.empty + } } + + def writeMetadatae(importsDir: Path) = { + val wfImportEvents = getMetadatae(importsDir) map { case (name: String, f: Path) => + val contents = f.lines.mkString(System.lineSeparator()) + MetadataEvent(MetadataKey(workflowIdForLogging, None, WorkflowMetadataKeys.SubmissionSection, WorkflowMetadataKeys.SubmissionSection_Imports, name), MetadataValue(contents)) + } + serviceRegistryActor ! PutMetadataAction(wfImportEvents) + } + + def importsAsNamespace(importsDir: Path): ErrorOr[WdlNamespaceWithWorkflow] = { + writeMetadatae(importsDir) + val importsDirFile = better.files.File(importsDir.pathAsString) // For wdl4s better file compatibility + val importResolvers: Seq[ImportResolver] = if (importLocalFilesystem) { + List(WdlNamespace.directoryResolver(importsDirFile), WdlNamespace.fileResolver) + } else { + List(WdlNamespace.directoryResolver(importsDirFile)) + } + val results = WdlNamespaceWithWorkflow.load(w.workflowSource, importResolvers) + importsDir.delete(swallowIOExceptions = true) + results match { + case Success(ns) => validateWorkflowNameLengths(ns) + case Failure(f) => f.getMessage.invalidNel + } + } + + validateImportsDirectory(w.importsZip) flatMap importsAsNamespace } - private def validateRawInputs(json: WdlJson): ErrorOr[Map[String, JsValue]] = { + private def validateWorkflowNameLengths(namespace: WdlNamespaceWithWorkflow): ErrorOr[WdlNamespaceWithWorkflow] = { + def allWorkflowNames(n: WdlNamespace): Seq[String] = n.workflows.map(_.unqualifiedName) ++ n.namespaces.flatMap(allWorkflowNames) + val tooLong = allWorkflowNames(namespace).filter(_.length >= 100) + if (tooLong.nonEmpty) { + ("Workflow names must be shorter than 100 characters: " + tooLong.mkString(" ")).invalidNel + } else { + namespace.validNel + } + } + + private def validateNamespace(source: WorkflowSourceFilesCollection): ErrorOr[WdlNamespaceWithWorkflow] = source match { + case w: WorkflowSourceFilesWithDependenciesZip => validateNamespaceWithImports(w) + case w: WorkflowSourceFilesWithoutImports => + val importResolvers: Seq[ImportResolver] = if (importLocalFilesystem) { + List(WdlNamespace.fileResolver) + } else { + List.empty + } + WdlNamespaceWithWorkflow.load(w.workflowSource, importResolvers) match { + case Failure(e) => s"Unable to load namespace from workflow: ${e.getMessage}".invalidNel + case Success(namespace) => validateWorkflowNameLengths(namespace) + } + } + + private def validateRawInputs(json: WorkflowJson): ErrorOr[Map[String, JsValue]] = { + Try(json.parseJson) match { + case Success(JsObject(inputs)) => inputs.validNel + case Failure(reason: Throwable) => s"Workflow contains invalid inputs JSON: ${reason.getMessage}".invalidNel + case _ => s"Workflow inputs JSON cannot be parsed to JsObject: $json".invalidNel + } + } + + private def validateLabels(json: WorkflowJson): ErrorOr[Labels] = { + + def toLabels(inputs: Map[String, JsValue]): ErrorOr[Labels] = { + val vectorOfValidatedLabel: Vector[ErrorOr[Label]] = inputs.toVector map { + case (key, JsString(s)) => Label.validateLabel(key, s) + case (key, other) => s"Invalid label $key: $other : Labels must be strings. ${Label.LabelExpectationsMessage}".invalidNel + } + + vectorOfValidatedLabel.sequence[ErrorOr, Label] map { validatedVectorofLabel => Labels(validatedVectorofLabel) } + } + Try(json.parseJson) match { - case Success(JsObject(inputs)) => inputs.successNel - case Failure(reason: Throwable) => s"Workflow contains invalid inputs JSON: ${reason.getMessage}".failureNel - case _ => s"Workflow inputs JSON cannot be parsed to JsObject: $json".failureNel + case Success(JsObject(inputs)) => toLabels(inputs) + case Failure(reason: Throwable) => s"Workflow contains invalid labels JSON: ${reason.getMessage}".invalidNel + case _ => """Invalid workflow labels JSON. Expected a JsObject of "labelKey": "labelValue" values.""".invalidNel } } private def validateCoercedInputs(rawInputs: Map[String, JsValue], - namespace: NamespaceWithWorkflow): ErrorOr[WorkflowCoercedInputs] = { + namespace: WdlNamespaceWithWorkflow): ErrorOr[WorkflowCoercedInputs] = { namespace.coerceRawInputs(rawInputs) match { - case Success(r) => r.successNel - case Failure(e: ExceptionWithErrors) => scalaz.Failure(e.errors) - case Failure(e) => e.getMessage.failureNel + case Success(r) => r.validNel + case Failure(e: MessageAggregation) if e.errorMessages.nonEmpty => Invalid(NonEmptyList.fromListUnsafe(e.errorMessages.toList)) + case Failure(e) => e.getMessage.invalidNel + } + } + + private def validateWdlFiles(workflowInputs: WorkflowCoercedInputs): ErrorOr[WorkflowCoercedInputs] = { + val failedFiles = workflowInputs.collect { + case (fullyQualifiedName , WdlSingleFile(value)) if value.startsWith("\"gs://") => s"""Invalid value for File input '$fullyQualifiedName': $value starts with a '\"' """ + } + NonEmptyList.fromList(failedFiles.toList) match { + case Some(errors) => Invalid(errors) + case None => workflowInputs.validNel } } - private def validateWorkflowOptions(workflowOptions: WdlJson): ErrorOr[WorkflowOptions] = { + private def validateWorkflowOptions(workflowOptions: WorkflowJson): ErrorOr[WorkflowOptions] = { WorkflowOptions.fromJsonString(workflowOptions) match { - case Success(opts) => opts.successNel - case Failure(e) => s"Workflow contains invalid options JSON: ${e.getMessage}".failureNel + case Success(opts) => opts.validNel + case Failure(e) => s"Workflow contains invalid options JSON: ${e.getMessage}".invalidNel } } private def validateWorkflowFailureMode(workflowOptions: WorkflowOptions, conf: Config): ErrorOr[WorkflowFailureMode] = { val modeString: Try[String] = workflowOptions.get(WorkflowOptions.WorkflowFailureMode) match { case Success(x) => Success(x) - case Failure(_: OptionNotFoundException) => Success(conf.getStringOption("workflow-options.workflow-failure-mode") getOrElse DefaultWorkflowFailureMode) + case Failure(_: OptionNotFoundException) => Success(conf.as[Option[String]]("workflow-options.workflow-failure-mode") getOrElse DefaultWorkflowFailureMode) case Failure(t) => Failure(t) } modeString flatMap WorkflowFailureMode.tryParse match { - case Success(mode) => mode.successNel - case Failure(t) => t.getMessage.failureNel + case Success(mode) => mode.validNel + case Failure(t) => t.getMessage.invalidNel } } } diff --git a/engine/src/main/scala/cromwell/engine/workflow/lifecycle/WorkflowFinalizationActor.scala b/engine/src/main/scala/cromwell/engine/workflow/lifecycle/WorkflowFinalizationActor.scala index 7c8d3748c..79f147a2b 100644 --- a/engine/src/main/scala/cromwell/engine/workflow/lifecycle/WorkflowFinalizationActor.scala +++ b/engine/src/main/scala/cromwell/engine/workflow/lifecycle/WorkflowFinalizationActor.scala @@ -1,16 +1,17 @@ package cromwell.engine.workflow.lifecycle -import akka.actor.{FSM, Props} -import cromwell.backend.AllBackendInitializationData +import akka.actor.SupervisorStrategy.Stop +import akka.actor.{ActorRef, FSM, OneForOneStrategy, Props} import cromwell.backend.BackendWorkflowFinalizationActor.{FinalizationFailed, FinalizationSuccess, Finalize} +import cromwell.backend._ import cromwell.core.Dispatcher.EngineDispatcher -import cromwell.core.{ExecutionStore, OutputStore, WorkflowId} +import cromwell.core.{CallOutputs, WorkflowId} import cromwell.engine.EngineWorkflowDescriptor import cromwell.engine.backend.CromwellBackends import cromwell.engine.workflow.lifecycle.WorkflowFinalizationActor._ import cromwell.engine.workflow.lifecycle.WorkflowLifecycleActor._ +import wdl4s.wdl.WdlTaskCall -import scala.language.postfixOps import scala.util.{Failure, Success, Try} object WorkflowFinalizationActor { @@ -38,14 +39,19 @@ object WorkflowFinalizationActor { case object WorkflowFinalizationSucceededResponse extends WorkflowLifecycleSuccessResponse final case class WorkflowFinalizationFailedResponse(reasons: Seq[Throwable]) extends WorkflowLifecycleFailureResponse - def props(workflowId: WorkflowId, workflowDescriptor: EngineWorkflowDescriptor, executionStore: ExecutionStore, - outputStore: OutputStore, initializationData: AllBackendInitializationData): Props = { - Props(new WorkflowFinalizationActor(workflowId, workflowDescriptor, executionStore, outputStore, initializationData)).withDispatcher(EngineDispatcher) + def props(workflowId: WorkflowId, workflowDescriptor: EngineWorkflowDescriptor, ioActor: ActorRef, jobExecutionMap: JobExecutionMap, + workflowOutputs: CallOutputs, initializationData: AllBackendInitializationData, copyWorkflowOutputsActor: Option[Props]): Props = { + Props(new WorkflowFinalizationActor(workflowId, workflowDescriptor, ioActor, jobExecutionMap, workflowOutputs, initializationData, copyWorkflowOutputsActor)).withDispatcher(EngineDispatcher) } } -case class WorkflowFinalizationActor(workflowId: WorkflowId, workflowDescriptor: EngineWorkflowDescriptor, - executionStore: ExecutionStore, outputStore: OutputStore, initializationData: AllBackendInitializationData) +case class WorkflowFinalizationActor(workflowIdForLogging: WorkflowId, + workflowDescriptor: EngineWorkflowDescriptor, + ioActor: ActorRef, + jobExecutionMap: JobExecutionMap, + workflowOutputs: CallOutputs, + initializationData: AllBackendInitializationData, + copyWorkflowOutputsActorProps: Option[Props]) extends WorkflowLifecycleActor[WorkflowFinalizationActorState] { val tag = self.path.name @@ -57,29 +63,33 @@ case class WorkflowFinalizationActor(workflowId: WorkflowId, workflowDescriptor: override def successResponse(data: WorkflowLifecycleActorData) = WorkflowFinalizationSucceededResponse override def failureResponse(reasons: Seq[Throwable]) = WorkflowFinalizationFailedResponse(reasons) + // If an engine or backend finalization actor (children of this actor) dies, send ourselves the failure and stop the child actor + override def supervisorStrategy = OneForOneStrategy() { + case failure => + self.tell(FinalizationFailed(failure), sender()) + Stop + } + startWith(FinalizationPendingState, WorkflowLifecycleActorData.empty) when(FinalizationPendingState) { case Event(StartFinalizationCommand, _) => val backendFinalizationActors = Try { for { - (backend, calls) <- workflowDescriptor.backendAssignments.groupBy(_._2).mapValues(_.keys.toSeq) + (backend, calls) <- workflowDescriptor.backendAssignments.groupBy(_._2).mapValues(_.keySet) props <- CromwellBackends.backendLifecycleFactoryActorByName(backend).map( - _.workflowFinalizationActorProps(workflowDescriptor.backendDescriptor, calls, executionStore, outputStore, initializationData.get(backend)) + _.workflowFinalizationActorProps(workflowDescriptor.backendDescriptor, ioActor, calls, filterJobExecutionsForBackend(calls), workflowOutputs, initializationData.get(backend)) ).get actor = context.actorOf(props, backend) } yield actor } - val engineFinalizationActor = Try { - context.actorOf(CopyWorkflowOutputsActor.props(workflowId, workflowDescriptor, outputStore, initializationData), - "CopyWorkflowOutputsActor") - } + val engineFinalizationActor = Try { copyWorkflowOutputsActorProps.map(context.actorOf(_, "CopyWorkflowOutputsActor")).toList } val allActors = for { backendFinalizationActorsFromTry <- backendFinalizationActors engineFinalizationActorFromTry <- engineFinalizationActor - } yield backendFinalizationActorsFromTry.toList.+:(engineFinalizationActorFromTry) + } yield backendFinalizationActorsFromTry.toList ++ engineFinalizationActorFromTry allActors match { case Failure(ex) => @@ -96,6 +106,15 @@ case class WorkflowFinalizationActor(workflowId: WorkflowId, workflowDescriptor: goto(WorkflowFinalizationFailedState) } } + + // Only send to each backend the jobs that it executed + private def filterJobExecutionsForBackend(calls: Set[WdlTaskCall]): JobExecutionMap = { + jobExecutionMap map { + case (wd, executedKeys) => wd -> (executedKeys filter { jobKey => calls.contains(jobKey.call) }) + } filter { + case (_, keys) => keys.nonEmpty + } + } when(FinalizationInProgressState) { case Event(FinalizationSuccess, stateData) => checkForDoneAndTransition(stateData.withSuccess(sender)) diff --git a/engine/src/main/scala/cromwell/engine/workflow/lifecycle/WorkflowInitializationActor.scala b/engine/src/main/scala/cromwell/engine/workflow/lifecycle/WorkflowInitializationActor.scala index fc18939f6..b8a0ea503 100644 --- a/engine/src/main/scala/cromwell/engine/workflow/lifecycle/WorkflowInitializationActor.scala +++ b/engine/src/main/scala/cromwell/engine/workflow/lifecycle/WorkflowInitializationActor.scala @@ -11,7 +11,6 @@ import cromwell.engine.backend.CromwellBackends import cromwell.engine.workflow.lifecycle.WorkflowInitializationActor._ import cromwell.engine.workflow.lifecycle.WorkflowLifecycleActor._ -import scala.language.postfixOps import scala.util.{Failure, Success, Try} object WorkflowInitializationActor { @@ -44,16 +43,20 @@ object WorkflowInitializationActor { def props(workflowId: WorkflowId, workflowDescriptor: EngineWorkflowDescriptor, - serviceRegistryActor: ActorRef): Props = { - Props(new WorkflowInitializationActor(workflowId, workflowDescriptor, serviceRegistryActor)).withDispatcher(EngineDispatcher) + ioActor: ActorRef, + serviceRegistryActor: ActorRef, + restarting: Boolean): Props = { + Props(new WorkflowInitializationActor(workflowId, workflowDescriptor, ioActor, serviceRegistryActor, restarting)).withDispatcher(EngineDispatcher) } case class BackendActorAndBackend(actor: ActorRef, backend: String) } -case class WorkflowInitializationActor(workflowId: WorkflowId, +case class WorkflowInitializationActor(workflowIdForLogging: WorkflowId, workflowDescriptor: EngineWorkflowDescriptor, - serviceRegistryActor: ActorRef) + ioActor: ActorRef, + serviceRegistryActor: ActorRef, + restarting: Boolean) extends AbortableWorkflowLifecycleActor[WorkflowInitializationActorState] { startWith(InitializationPendingState, WorkflowLifecycleActorData.empty) @@ -79,9 +82,9 @@ case class WorkflowInitializationActor(workflowId: WorkflowId, case Event(StartInitializationCommand, _) => val backendInitializationActors = Try { for { - (backend, calls) <- workflowDescriptor.backendAssignments.groupBy(_._2).mapValues(_.keys.toSeq) + (backend, calls) <- workflowDescriptor.backendAssignments.groupBy(_._2).mapValues(_.keySet) props <- CromwellBackends.backendLifecycleFactoryActorByName(backend).map(factory => - factory.workflowInitializationActorProps(workflowDescriptor.backendDescriptor, calls, serviceRegistryActor) + factory.workflowInitializationActorProps(workflowDescriptor.backendDescriptor, ioActor, calls, serviceRegistryActor, restarting) ).get actor = context.actorOf(props, backend) } yield BackendActorAndBackend(actor, backend) diff --git a/engine/src/main/scala/cromwell/engine/workflow/lifecycle/WorkflowLifecycleActor.scala b/engine/src/main/scala/cromwell/engine/workflow/lifecycle/WorkflowLifecycleActor.scala index 2990a6d2a..1cc4f302b 100644 --- a/engine/src/main/scala/cromwell/engine/workflow/lifecycle/WorkflowLifecycleActor.scala +++ b/engine/src/main/scala/cromwell/engine/workflow/lifecycle/WorkflowLifecycleActor.scala @@ -69,7 +69,7 @@ trait WorkflowLifecycleActor[S <: WorkflowLifecycleActorState] extends LoggingFS def successResponse(data: WorkflowLifecycleActorData): WorkflowLifecycleSuccessResponse def failureResponse(reasons: Seq[Throwable]): WorkflowLifecycleFailureResponse - override def supervisorStrategy = AllForOneStrategy() { + override def supervisorStrategy: SupervisorStrategy = AllForOneStrategy() { case ex: ActorInitializationException => context.parent ! failureResponse(Seq(ex)) context.stop(self) diff --git a/engine/src/main/scala/cromwell/engine/workflow/lifecycle/execution/CallMetadataHelper.scala b/engine/src/main/scala/cromwell/engine/workflow/lifecycle/execution/CallMetadataHelper.scala new file mode 100644 index 000000000..28132a2aa --- /dev/null +++ b/engine/src/main/scala/cromwell/engine/workflow/lifecycle/execution/CallMetadataHelper.scala @@ -0,0 +1,141 @@ +package cromwell.engine.workflow.lifecycle.execution + +import java.time.OffsetDateTime + +import akka.actor.ActorRef +import cromwell.backend.BackendJobDescriptorKey +import cromwell.core.ExecutionStatus._ +import cromwell.core._ +import cromwell.services.metadata.MetadataService._ +import cromwell.services.metadata._ +import wdl4s.wdl._ +import wdl4s.wdl.values.WdlValue + +import scala.util.Random + +trait CallMetadataHelper { + + def workflowIdForCallMetadata: WorkflowId + def serviceRegistryActor: ActorRef + + def pushNewCallMetadata(callKey: CallKey, backendName: Option[String]) = { + val startEvents = List( + Option(MetadataEvent(metadataKeyForCall(callKey, CallMetadataKeys.Start), MetadataValue(OffsetDateTime.now))), + backendName map { name => MetadataEvent(metadataKeyForCall(callKey, CallMetadataKeys.Backend), MetadataValue(name)) } + ).flatten + + serviceRegistryActor ! PutMetadataAction(startEvents) + } + + def pushQueuedCallMetadata(diffs: List[WorkflowExecutionDiff]) = { + val startingEvents = for { + diff <- diffs + (jobKey, executionState) <- diff.executionStoreChanges if jobKey.isInstanceOf[BackendJobDescriptorKey] && executionState == ExecutionStatus.QueuedInCromwell + } yield MetadataEvent(metadataKeyForCall(jobKey, CallMetadataKeys.ExecutionStatus), MetadataValue(ExecutionStatus.QueuedInCromwell)) + serviceRegistryActor ! PutMetadataAction(startingEvents) + } + + def pushStartingCallMetadata(callKey: CallKey) = { + val statusChange = MetadataEvent(metadataKeyForCall(callKey, CallMetadataKeys.ExecutionStatus), MetadataValue(ExecutionStatus.Starting)) + serviceRegistryActor ! PutMetadataAction(statusChange) + } + + def pushRunningCallMetadata(key: CallKey, evaluatedInputs: EvaluatedTaskInputs) = { + val inputEvents = evaluatedInputs match { + case empty if empty.isEmpty => + List(MetadataEvent.empty(metadataKeyForCall(key, s"${CallMetadataKeys.Inputs}"))) + case inputs => + inputs flatMap { + case (inputName, inputValue) => + wdlValueToMetadataEvents(metadataKeyForCall(key, s"${CallMetadataKeys.Inputs}:${inputName.unqualifiedName}"), inputValue) + } + } + + val runningEvent = List(MetadataEvent(metadataKeyForCall(key, CallMetadataKeys.ExecutionStatus), MetadataValue(ExecutionStatus.Running))) + + serviceRegistryActor ! PutMetadataAction(runningEvent ++ inputEvents) + } + + def pushWorkflowOutputMetadata(outputs: Map[LocallyQualifiedName, WdlValue]) = { + val events = outputs match { + case empty if empty.isEmpty => List(MetadataEvent.empty(MetadataKey(workflowIdForCallMetadata, None, WorkflowMetadataKeys.Outputs))) + case _ => outputs flatMap { + case (outputName, outputValue) => + wdlValueToMetadataEvents(MetadataKey(workflowIdForCallMetadata, None, s"${WorkflowMetadataKeys.Outputs}:$outputName"), outputValue) + } + } + + serviceRegistryActor ! PutMetadataAction(events) + } + + def pushSuccessfulCallMetadata(jobKey: JobKey, returnCode: Option[Int], outputs: CallOutputs) = { + val completionEvents = completedCallMetadataEvents(jobKey, ExecutionStatus.Done, returnCode) + + val outputEvents = outputs match { + case empty if empty.isEmpty => + List(MetadataEvent.empty(metadataKeyForCall(jobKey, s"${CallMetadataKeys.Outputs}"))) + case _ => + outputs flatMap { case (lqn, outputValue) => wdlValueToMetadataEvents(metadataKeyForCall(jobKey, s"${CallMetadataKeys.Outputs}:$lqn"), outputValue.wdlValue) } + } + + serviceRegistryActor ! PutMetadataAction(completionEvents ++ outputEvents) + } + + def pushFailedCallMetadata(jobKey: JobKey, returnCode: Option[Int], failure: Throwable, retryableFailure: Boolean) = { + val failedState = if (retryableFailure) ExecutionStatus.RetryableFailure else ExecutionStatus.Failed + val completionEvents = completedCallMetadataEvents(jobKey, failedState, returnCode) + val retryableFailureEvent = MetadataEvent(metadataKeyForCall(jobKey, CallMetadataKeys.RetryableFailure), MetadataValue(retryableFailure)) + val failureEvents = throwableToMetadataEvents(metadataKeyForCall(jobKey, s"${CallMetadataKeys.Failures}"), failure).+:(retryableFailureEvent) + + serviceRegistryActor ! PutMetadataAction(completionEvents ++ failureEvents) + } + + def pushAbortedCallMetadata(jobKey: JobKey) = { + val completionEvents = completedCallMetadataEvents(jobKey, ExecutionStatus.Aborted, None) + + serviceRegistryActor ! PutMetadataAction(completionEvents) + } + + def pushExecutionEventsToMetadataService(jobKey: JobKey, eventList: Seq[ExecutionEvent]) = { + def metadataEvent(k: String, value: Any) = { + val metadataValue = MetadataValue(value) + val metadataKey = metadataKeyForCall(jobKey, k) + MetadataEvent(metadataKey, metadataValue) + } + + eventList.headOption foreach { firstEvent => + // The final event is only used as the book-end for the final pairing so the name is never actually used... + val offset = firstEvent.offsetDateTime.getOffset + val now = OffsetDateTime.now.withOffsetSameInstant(offset) + val lastEvent = ExecutionEvent("!!Bring Back the Monarchy!!", now) + val tailedEventList = eventList :+ lastEvent + val events = tailedEventList.sliding(2) flatMap { + case Seq(eventCurrent, eventNext) => + val eventKey = s"executionEvents[$randomNumberString]" + List( + metadataEvent(s"$eventKey:description", eventCurrent.name), + metadataEvent(s"$eventKey:startTime", eventCurrent.offsetDateTime), + metadataEvent(s"$eventKey:endTime", eventNext.offsetDateTime) + ) + } + + serviceRegistryActor ! PutMetadataAction(events.toIterable) + } + } + + private def completedCallMetadataEvents(jobKey: JobKey, executionStatus: ExecutionStatus, returnCode: Option[Int]) = { + val returnCodeEvent = returnCode map { rc => + List(MetadataEvent(metadataKeyForCall(jobKey, CallMetadataKeys.ReturnCode), MetadataValue(rc))) + } + + List( + MetadataEvent(metadataKeyForCall(jobKey, CallMetadataKeys.ExecutionStatus), MetadataValue(executionStatus)), + MetadataEvent(metadataKeyForCall(jobKey, CallMetadataKeys.End), MetadataValue(OffsetDateTime.now)) + ) ++ returnCodeEvent.getOrElse(List.empty) + } + + def metadataKeyForCall(jobKey: JobKey, myKey: String) = MetadataKey(workflowIdForCallMetadata, Option(MetadataJobKey(jobKey.scope.fullyQualifiedName, jobKey.index, jobKey.attempt)), myKey) + + private def randomNumberString: String = Random.nextInt.toString.stripPrefix("-") + +} diff --git a/engine/src/main/scala/cromwell/engine/workflow/lifecycle/execution/EngineJobExecutionActor.scala b/engine/src/main/scala/cromwell/engine/workflow/lifecycle/execution/EngineJobExecutionActor.scala index 26aa93891..05cc4a115 100644 --- a/engine/src/main/scala/cromwell/engine/workflow/lifecycle/execution/EngineJobExecutionActor.scala +++ b/engine/src/main/scala/cromwell/engine/workflow/lifecycle/execution/EngineJobExecutionActor.scala @@ -1,9 +1,7 @@ package cromwell.engine.workflow.lifecycle.execution -import java.time.OffsetDateTime - -import akka.actor.{ActorRef, ActorRefFactory, LoggingFSM, Props} -import akka.routing.RoundRobinPool +import akka.actor.SupervisorStrategy.{Escalate, Stop} +import akka.actor.{ActorInitializationException, ActorRef, LoggingFSM, OneForOneStrategy, Props} import cromwell.backend.BackendCacheHitCopyingActor.CopyOutputsCommand import cromwell.backend.BackendJobExecutionActor._ import cromwell.backend.{BackendInitializationData, BackendJobDescriptor, BackendJobDescriptorKey, BackendLifecycleActorFactory} @@ -13,18 +11,26 @@ import cromwell.core._ import cromwell.core.callcaching._ import cromwell.core.logging.WorkflowLogging import cromwell.core.simpleton.WdlValueSimpleton +import cromwell.database.sql.tables.CallCachingEntry import cromwell.engine.workflow.lifecycle.execution.EngineJobExecutionActor._ -import cromwell.engine.workflow.lifecycle.execution.JobPreparationActor.{BackendJobPreparationFailed, BackendJobPreparationSucceeded} -import cromwell.engine.workflow.lifecycle.execution.callcaching.EngineJobHashingActor.{CacheHit, CacheMiss, CallCacheHashes, HashError} +import cromwell.engine.workflow.lifecycle.execution.callcaching.CallCache.CallCacheHashBundle +import cromwell.engine.workflow.lifecycle.execution.callcaching.CallCacheReadActor._ +import cromwell.engine.workflow.lifecycle.execution.callcaching.CallCacheReadingJobActor.NextHit +import cromwell.engine.workflow.lifecycle.execution.callcaching.CallCacheWriteActor._ +import cromwell.engine.workflow.lifecycle.execution.callcaching.EngineJobHashingActor.{CallCacheHashes, _} import cromwell.engine.workflow.lifecycle.execution.callcaching.FetchCachedResultsActor.{CachedOutputLookupFailed, CachedOutputLookupSucceeded} import cromwell.engine.workflow.lifecycle.execution.callcaching._ +import cromwell.engine.workflow.lifecycle.execution.preparation.CallPreparation.{BackendJobPreparationSucceeded, CallPreparationFailed} +import cromwell.engine.workflow.lifecycle.execution.preparation.{CallPreparation, JobPreparationActor} +import cromwell.engine.workflow.tokens.JobExecutionTokenDispenserActor.{JobExecutionTokenDenied, JobExecutionTokenDispensed, JobExecutionTokenRequest, JobExecutionTokenReturn} import cromwell.jobstore.JobStoreActor._ -import cromwell.jobstore.{Pending => _, _} +import cromwell.jobstore._ import cromwell.services.SingletonServicesStore -import cromwell.services.metadata.MetadataService.PutMetadataAction -import cromwell.services.metadata.{MetadataEvent, MetadataJobKey, MetadataKey, MetadataValue} -import wdl4s.TaskOutput +import cromwell.services.metadata.CallMetadataKeys.CallCachingKeys +import cromwell.services.metadata.{CallMetadataKeys, MetadataJobKey, MetadataKey} +import wdl4s.wdl.TaskOutput +import scala.concurrent.ExecutionContext import scala.util.{Failure, Success, Try} class EngineJobExecutionActor(replyTo: ActorRef, @@ -33,28 +39,58 @@ class EngineJobExecutionActor(replyTo: ActorRef, factory: BackendLifecycleActorFactory, initializationData: Option[BackendInitializationData], restarting: Boolean, - serviceRegistryActor: ActorRef, + val serviceRegistryActor: ActorRef, + ioActor: ActorRef, jobStoreActor: ActorRef, callCacheReadActor: ActorRef, + callCacheWriteActor: ActorRef, + workflowDockerLookupActor: ActorRef, + jobTokenDispenserActor: ActorRef, + backendSingletonActor: Option[ActorRef], backendName: String, - callCachingMode: CallCachingMode) extends LoggingFSM[EngineJobExecutionActorState, EJEAData] with WorkflowLogging { + callCachingMode: CallCachingMode) extends LoggingFSM[EngineJobExecutionActorState, EJEAData] with WorkflowLogging with CallMetadataHelper { + + override val workflowIdForLogging = executionData.workflowDescriptor.id + override val workflowIdForCallMetadata = executionData.workflowDescriptor.id - override val workflowId = executionData.workflowDescriptor.id + override val supervisorStrategy = OneForOneStrategy() { + // If an actor fails to initialize, send the exception to self before stopping it so we can fail the job properly + case e: ActorInitializationException => + self ! e + Stop + case t => + super.supervisorStrategy.decider.applyOrElse(t, (_: Any) => Escalate) + } - val jobTag = s"${workflowId.shortString}:${jobDescriptorKey.call.fullyQualifiedName}:${jobDescriptorKey.index.fromIndex}:${jobDescriptorKey.attempt}" + val jobTag = s"${workflowIdForLogging.shortString}:${jobDescriptorKey.call.fullyQualifiedName}:${jobDescriptorKey.index.fromIndex}:${jobDescriptorKey.attempt}" val tag = s"EJEA_$jobTag" // There's no need to check for a cache hit again if we got preempted, or if there's no result copying actor defined // NB: this can also change (e.g. if we have a HashError we just force this to CallCachingOff) - private var effectiveCallCachingMode = if (factory.cacheHitCopyingActorProps.isEmpty || jobDescriptorKey.attempt > 1) callCachingMode.withoutRead else callCachingMode + private[execution] var effectiveCallCachingMode = { + if (factory.fileHashingActorProps.isEmpty) CallCachingOff + else if (factory.cacheHitCopyingActorProps.isEmpty || jobDescriptorKey.attempt > 1) { + callCachingMode.withoutRead + } else callCachingMode + } // For tests: private[execution] def checkEffectiveCallCachingMode = effectiveCallCachingMode - private val effectiveCallCachingKey = "Effective call caching mode" + private[execution] var executionToken: Option[JobExecutionToken] = None - log.debug(s"$tag: $effectiveCallCachingKey: $effectiveCallCachingMode") - writeCallCachingModeToMetadata() + private val effectiveCallCachingKey = CallCachingKeys.EffectiveModeKey + private val callCachingReadResultMetadataKey = CallCachingKeys.ReadResultMetadataKey + private val callCachingHitResultMetadataKey = CallCachingKeys.HitResultMetadataKey + private val callCachingAllowReuseMetadataKey = CallCachingKeys.AllowReuseMetadataKey + private val callCachingHitFailures = CallCachingKeys.HitFailuresKey + private val callCachingHashes = CallCachingKeys.HashesKey + + implicit val ec: ExecutionContext = context.dispatcher + + override def preStart() = { + log.debug(s"$tag: $effectiveCallCachingKey: $effectiveCallCachingMode") + } startWith(Pending, NoData) private var eventList: Seq[ExecutionEvent] = Seq(ExecutionEvent(stateName.toString)) @@ -62,69 +98,105 @@ class EngineJobExecutionActor(replyTo: ActorRef, // When Pending, the FSM always has NoData when(Pending) { case Event(Execute, NoData) => + requestExecutionToken() + goto(RequestingExecutionToken) + } + + when(RequestingExecutionToken) { + case Event(JobExecutionTokenDispensed(jobExecutionToken), NoData) => + executionToken = Option(jobExecutionToken) + replyTo ! JobStarting(jobDescriptorKey) if (restarting) { - val jobStoreKey = jobDescriptorKey.toJobStoreKey(workflowId) + val jobStoreKey = jobDescriptorKey.toJobStoreKey(workflowIdForLogging) jobStoreActor ! QueryJobCompletion(jobStoreKey, jobDescriptorKey.call.task.outputs) goto(CheckingJobStore) } else { prepareJob() } + case Event(JobExecutionTokenDenied(positionInQueue), NoData) => + log.debug("Token denied so cannot start yet. Currently position {} in the queue", positionInQueue) + stay() } // When CheckingJobStore, the FSM always has NoData when(CheckingJobStore) { case Event(JobNotComplete, NoData) => - prepareJob() + checkCacheEntryExistence() case Event(JobComplete(jobResult), NoData) => val response = jobResult match { - case JobResultSuccess(returnCode, jobOutputs) => SucceededResponse(jobDescriptorKey, returnCode, jobOutputs, None, Seq.empty) - case JobResultFailure(returnCode, reason, false) => FailedNonRetryableResponse(jobDescriptorKey, reason, returnCode) - case JobResultFailure(returnCode, reason, true) => FailedRetryableResponse(jobDescriptorKey, reason, returnCode) + // Always puts `None` for `dockerImageUsed` for a successfully completed job on restart. This shouldn't be a + // problem since `saveJobCompletionToJobStore` will already have sent this to metadata. + case JobResultSuccess(returnCode, jobOutputs) => JobSucceededResponse(jobDescriptorKey, returnCode, jobOutputs, None, Seq.empty, None) + case JobResultFailure(returnCode, reason, false) => JobFailedNonRetryableResponse(jobDescriptorKey, reason, returnCode) + case JobResultFailure(returnCode, reason, true) => JobFailedRetryableResponse(jobDescriptorKey, reason, returnCode) } respondAndStop(response) case Event(f: JobStoreReadFailure, NoData) => + writeCallCachingModeToMetadata() log.error(f.reason, "{}: Error reading from JobStore", tag) // Escalate throw new RuntimeException(f.reason) } + // When we're restarting but the job store says the job is not complete. + // This is to cover for the case where Cromwell was stopped after writing Cache Info to the DB but before + // writing to the JobStore. In that case, we don't want to cache to ourselves (turn cache read off), nor do we want to + // try and write the cache info again, which would fail (turn cache write off). + // This means call caching should be disabled. + // Note that we check that there is not a Cache entry for *this* current job. It's still technically possible + // to call cache to another job that finished while this one was running (before the restart). + when(CheckingCacheEntryExistence) { + // There was already a cache entry for this job + case Event(HasCallCacheEntry(_), NoData) => + // Disable call caching + effectiveCallCachingMode = CallCachingOff + prepareJob() + // No cache entry for this job - keep going + case Event(NoCallCacheEntry(_), NoData) => + prepareJob() + case Event(CacheResultLookupFailure(reason), NoData) => + log.error(reason, "{}: Failure checking for cache entry existence: {}. Attempting to resume job anyway.", jobTag, reason.getMessage) + prepareJob() + } + // When PreparingJob, the FSM always has NoData when(PreparingJob) { case Event(BackendJobPreparationSucceeded(jobDescriptor, bjeaProps), NoData) => val updatedData = ResponsePendingData(jobDescriptor, bjeaProps) effectiveCallCachingMode match { - case activity: CallCachingActivity if activity.readFromCache => - initializeJobHashing(jobDescriptor, activity) - goto(CheckingCallCache) using updatedData - case activity: CallCachingActivity => - initializeJobHashing(jobDescriptor, activity) - runJob(updatedData) + case activity: CallCachingActivity if activity.readFromCache => handleReadFromCacheOn(jobDescriptor, activity, updatedData) + case activity: CallCachingActivity => handleReadFromCacheOff(jobDescriptor, activity, updatedData) case CallCachingOff => runJob(updatedData) } - case Event(response: BackendJobPreparationFailed, NoData) => - forwardAndStop(response) + case Event(CallPreparationFailed(jobKey: BackendJobDescriptorKey, throwable), NoData) => + respondAndStop(JobFailedNonRetryableResponse(jobKey, throwable, None)) } - private val callCachingReadResultMetadataKey = "Call caching read result" when(CheckingCallCache) { case Event(CacheMiss, data: ResponsePendingData) => - writeToMetadata(Map(callCachingReadResultMetadataKey -> "Cache Miss")) + writeToMetadata(Map( + callCachingHitResultMetadataKey -> false, + callCachingReadResultMetadataKey -> "Cache Miss")) log.debug("Cache miss for job {}", jobTag) runJob(data) - case Event(hit @ CacheHit(cacheResultId), data: ResponsePendingData) => - fetchCachedResults(data, jobDescriptorKey.call.task.outputs, hit) + case Event(hashes: CallCacheHashes, data: ResponsePendingData) => + addHashesAndStay(data, hashes) + case Event(hit: CacheHit, data: ResponsePendingData) => + fetchCachedResults(jobDescriptorKey.call.task.outputs, hit.cacheResultId, data.withCacheHit(hit)) case Event(HashError(t), data: ResponsePendingData) => writeToMetadata(Map(callCachingReadResultMetadataKey -> s"Hashing Error: ${t.getMessage}")) - disableCallCaching(t) + disableCallCaching(Option(t)) runJob(data) } when(FetchingCachedOutputsFromDatabase) { case Event(CachedOutputLookupSucceeded(wdlValueSimpletons, jobDetritus, returnCode, cacheResultId, cacheHitDetails), data: ResponsePendingData) => - writeToMetadata(Map(callCachingReadResultMetadataKey -> s"Cache Hit: $cacheHitDetails")) + writeToMetadata(Map( + callCachingHitResultMetadataKey -> true, + callCachingReadResultMetadataKey -> s"Cache Hit: $cacheHitDetails")) log.debug("Cache hit for {}! Fetching cached result {}", jobTag, cacheResultId) - makeBackendCopyCacheHit(cacheResultId, wdlValueSimpletons, jobDetritus, returnCode, data) - case Event(CachedOutputLookupFailed(metaInfoId, error), data: ResponsePendingData) => + makeBackendCopyCacheHit(wdlValueSimpletons, jobDetritus, returnCode, data, cacheResultId) using data.withCacheDetails(cacheHitDetails) + case Event(CachedOutputLookupFailed(_, error), data: ResponsePendingData) => log.warning("Can't make a copy of the cached job outputs for {} due to {}. Running job.", jobTag, error) runJob(data) case Event(hashes: CallCacheHashes, data: ResponsePendingData) => @@ -137,17 +209,18 @@ class EngineJobExecutionActor(replyTo: ActorRef, when(BackendIsCopyingCachedOutputs) { // Backend copying response: - case Event(response: SucceededResponse, data @ ResponsePendingData(_, _, Some(Success(hashes)))) => + case Event(response: JobSucceededResponse, data @ ResponsePendingData(_, _, Some(Success(hashes)), _, _)) => saveCacheResults(hashes, data.withSuccessResponse(response)) - case Event(response: SucceededResponse, data @ ResponsePendingData(_, _, None)) if effectiveCallCachingMode.writeToCache => + case Event(response: JobSucceededResponse, data @ ResponsePendingData(_, _, None, _, _)) if effectiveCallCachingMode.writeToCache => // Wait for the CallCacheHashes stay using data.withSuccessResponse(response) - case Event(response: SucceededResponse, data: ResponsePendingData) => // bad hashes or cache write off + case Event(response: JobSucceededResponse, data: ResponsePendingData) => // bad hashes or cache write off saveJobCompletionToJobStore(data.withSuccessResponse(response)) - case Event(response: BackendJobExecutionResponse, data: ResponsePendingData) => - // This matches all response types other than `SucceededResponse`. - log.error("{}: Failed copying cache results, falling back to running job.", jobDescriptorKey) - runJob(data) + case Event(response: BackendJobExecutionResponse, data @ ResponsePendingData(_, _, _, _, Some(cacheHit))) => + response match { + case f: BackendJobFailedResponse => invalidateCacheHitAndTransition(cacheHit, data, f.throwable) + case _ => runJob(data) + } // Hashes arrive: case Event(hashes: CallCacheHashes, data: SucceededResponseData) => @@ -165,25 +238,54 @@ class EngineJobExecutionActor(replyTo: ActorRef, stay using data.copy(hashes = Option(Failure(t))) } + when(InvalidatingCacheEntry) { + case Event(response: CallCacheInvalidatedResponse, data: ResponsePendingData) => + handleCacheInvalidatedResponse(response, data) + + // Hashes arrive: + case Event(hashes: CallCacheHashes, data: ResponsePendingData) => + addHashesAndStay(data, hashes) + + // Hash error occurs: + case Event(HashError(t), data: ResponsePendingData) => + disableCacheWrite(t) + // Can't write hashes for this job, but continue to wait for the copy response. + stay using data.copy(hashes = Option(Failure(t))) + } + when(RunningJob) { case Event(hashes: CallCacheHashes, data: SucceededResponseData) => saveCacheResults(hashes, data) case Event(hashes: CallCacheHashes, data: ResponsePendingData) => addHashesAndStay(data, hashes) + case Event(CacheMiss, _) => + stay() + case Event(_: CacheHit, _) => + stay() case Event(HashError(t), data: SucceededResponseData) => - disableCallCaching(t) + disableCallCaching(Option(t)) saveJobCompletionToJobStore(data.copy(hashes = Option(Failure(t)))) case Event(HashError(t), data: ResponsePendingData) => - disableCallCaching(t) + disableCallCaching(Option(t)) stay using data.copy(hashes = Option(Failure(t))) - case Event(response: SucceededResponse, data @ ResponsePendingData(_, _, Some(Success(hashes)))) if effectiveCallCachingMode.writeToCache => + // Success + // All hashes already retrieved - save to the cache + case Event(response: JobSucceededResponse, data @ ResponsePendingData(_, _, Some(Success(hashes)), _, _)) if effectiveCallCachingMode.writeToCache => eventList ++= response.executionEvents saveCacheResults(hashes, data.withSuccessResponse(response)) - case Event(response: SucceededResponse, data @ ResponsePendingData(_, _, None)) if effectiveCallCachingMode.writeToCache => + // Some hashes are still missing - waiting for them + case Event(response: JobSucceededResponse, data @ ResponsePendingData(_, _, None, _, _)) if effectiveCallCachingMode.writeToCache => + eventList ++= response.executionEvents log.debug(s"Got job result for {}, awaiting hashes", jobTag) stay using data.withSuccessResponse(response) + // writeToCache is OFF - complete the job + case Event(response: JobSucceededResponse, data: ResponsePendingData) => + eventList ++= response.executionEvents + saveJobCompletionToJobStore(data.withSuccessResponse(response)) + + // Non-success: case Event(response: BackendJobExecutionResponse, data: ResponsePendingData) => saveJobCompletionToJobStore(data.withResponse(response)) } @@ -201,8 +303,8 @@ class EngineJobExecutionActor(replyTo: ActorRef, when(UpdatingJobStore) { case Event(JobStoreWriteSuccess(_), data: ResponseData) => forwardAndStop(data.response) - case Event(JobStoreWriteFailure(t), data: ResponseData) => - respondAndStop(FailedNonRetryableResponse(jobDescriptorKey, new Exception(s"JobStore write failure: ${t.getMessage}", t), None)) + case Event(JobStoreWriteFailure(t), _: ResponseData) => + respondAndStop(JobFailedNonRetryableResponse(jobDescriptorKey, new Exception(s"JobStore write failure: ${t.getMessage}", t), None)) } onTransition { @@ -212,33 +314,86 @@ class EngineJobExecutionActor(replyTo: ActorRef, } whenUnhandled { + case Event(e: ActorInitializationException, _) => + respondAndStop(JobFailedNonRetryableResponse(jobDescriptorKey, e, None)) case Event(msg, _) => log.error("Bad message from {} to EngineJobExecutionActor in state {}(with data {}): {}", sender, stateName, stateData, msg) stay } + private def publishHashesToMetadata(maybeHashes: Option[Try[CallCacheHashes]]) = maybeHashes match { + case Some(Success(hashes)) => + val hashMap = hashes.hashes.collect({ + case HashResult(HashKey(useInCallCaching, keyComponents), HashValue(value)) if useInCallCaching => + (callCachingHashes + MetadataKey.KeySeparator + keyComponents.mkString(MetadataKey.KeySeparator.toString)) -> value + }).toMap + writeToMetadata(hashMap) + case _ => + } + + private def handleReadFromCacheOn(jobDescriptor: BackendJobDescriptor, activity: CallCachingActivity, updatedData: ResponsePendingData) = { + jobDescriptor.maybeCallCachingEligible match { + // If the job is eligible, initialize job hashing and go to CheckingCallCache state + case eligible: CallCachingEligible => + initializeJobHashing(jobDescriptor, activity, eligible) match { + case Success(ejha) => goto(CheckingCallCache) using updatedData.withEJHA(ejha) + case Failure(failure) => respondAndStop(JobFailedNonRetryableResponse(jobDescriptorKey, failure, None)) + } + case _ => + // If the job is ineligible, turn call caching off + writeToMetadata(Map(callCachingReadResultMetadataKey -> s"Cache Miss")) + disableCallCaching() + runJob(updatedData) + } + } + + private def handleReadFromCacheOff(jobDescriptor: BackendJobDescriptor, activity: CallCachingActivity, updatedData: ResponsePendingData) = { + jobDescriptor.maybeCallCachingEligible match { + // If the job is eligible, initialize job hashing so it can be written to the cache + case eligible: CallCachingEligible => initializeJobHashing(jobDescriptor, activity, eligible) match { + case Failure(failure) => log.error(failure, "Failed to initialize job hashing. The job will not be written to the cache") + case _ => + } + // Don't even initialize hashing to write to the cache if the job is ineligible + case _ => disableCallCaching() + } + // If read from cache is off, always run the job + runJob(updatedData) + } + + private def requestExecutionToken(): Unit = { + jobTokenDispenserActor ! JobExecutionTokenRequest(factory.jobExecutionTokenType) + } + + private def returnExecutionToken(): Unit = { + executionToken foreach { jobTokenDispenserActor ! JobExecutionTokenReturn(_) } + } + private def forwardAndStop(response: Any): State = { replyTo forward response - tellEventMetadata() + returnExecutionToken() + pushExecutionEventsToMetadataService(jobDescriptorKey, eventList) context stop self stay() } private def respondAndStop(response: Any): State = { replyTo ! response - tellEventMetadata() + returnExecutionToken() + pushExecutionEventsToMetadataService(jobDescriptorKey, eventList) context stop self stay() } - private def disableCallCaching(reason: Throwable) = { - log.error(reason, "{}: Hash error, disabling call caching for this job.", jobTag) + private def disableCallCaching(reason: Option[Throwable] = None) = { + reason foreach { log.error(_, "{}: Hash error, disabling call caching for this job.", jobTag) } effectiveCallCachingMode = CallCachingOff writeCallCachingModeToMetadata() + writeToMetadata(Map(callCachingHitResultMetadataKey -> false)) } private def disableCacheWrite(reason: Throwable) = { - log.error("{}: Disabling cache writing for this job.", jobTag) + log.error(reason, "{}: Disabling cache writing for this job.", jobTag) if (effectiveCallCachingMode.writeToCache) { effectiveCallCachingMode = effectiveCallCachingMode.withoutWrite writeCallCachingModeToMetadata() @@ -247,152 +402,215 @@ class EngineJobExecutionActor(replyTo: ActorRef, def writeCallCachingModeToMetadata(): Unit = { writeToMetadata(Map(effectiveCallCachingKey -> effectiveCallCachingMode.toString)) + writeToMetadata(Map(callCachingAllowReuseMetadataKey -> effectiveCallCachingMode.writeToCache)) } def createJobPreparationActor(jobPrepProps: Props, name: String): ActorRef = context.actorOf(jobPrepProps, name) def prepareJob() = { + writeCallCachingModeToMetadata() val jobPreparationActorName = s"BackendPreparationActor_for_$jobTag" - val jobPrepProps = JobPreparationActor.props(executionData, jobDescriptorKey, factory, initializationData, serviceRegistryActor) + val jobPrepProps = JobPreparationActor.props(executionData, jobDescriptorKey, factory, workflowDockerLookupActor = workflowDockerLookupActor, + initializationData, serviceRegistryActor = serviceRegistryActor, ioActor = ioActor, backendSingletonActor = backendSingletonActor) val jobPreparationActor = createJobPreparationActor(jobPrepProps, jobPreparationActorName) - jobPreparationActor ! JobPreparationActor.Start + jobPreparationActor ! CallPreparation.Start goto(PreparingJob) } - def initializeJobHashing(jobDescriptor: BackendJobDescriptor, activity: CallCachingActivity): Unit = { - val props = EngineJobHashingActor.props( - self, - jobDescriptor, - initializationData, - // Use context.system instead of context as the factory. Otherwise when we die, so will the child actors. - factoryFileHashingRouter(backendName, factory, context.system), - callCacheReadActor, - factory.runtimeAttributeDefinitions(initializationData), backendName, activity) - context.actorOf(props, s"ejha_for_$jobDescriptor") + def initializeJobHashing(jobDescriptor: BackendJobDescriptor, activity: CallCachingActivity, callCachingEligible: CallCachingEligible): Try[ActorRef] = { + val maybeFileHashingActorProps = factory.fileHashingActorProps map { + _.apply(jobDescriptor, initializationData, serviceRegistryActor, ioActor) + } + + maybeFileHashingActorProps match { + case Some(fileHashingActorProps) => + val props = EngineJobHashingActor.props( + self, + serviceRegistryActor, + jobDescriptor, + initializationData, + fileHashingActorProps, + CallCacheReadingJobActor.props(callCacheReadActor), + factory.runtimeAttributeDefinitions(initializationData), + backendName, + activity, + callCachingEligible + ) + val ejha = context.actorOf(props, s"ejha_for_$jobDescriptor") + + Success(ejha) + case None => Failure(new IllegalStateException("Tried to initialize job hashing without a file hashing actor !")) + } } - def makeFetchCachedResultsActor(cacheHit: CacheHit, taskOutputs: Seq[TaskOutput]): Unit = context.actorOf(FetchCachedResultsActor.props(cacheHit, self, new CallCache(SingletonServicesStore.databaseInterface))) - def fetchCachedResults(data: ResponsePendingData, taskOutputs: Seq[TaskOutput], cacheHit: CacheHit) = { - makeFetchCachedResultsActor(cacheHit, taskOutputs) - goto(FetchingCachedOutputsFromDatabase) + def makeFetchCachedResultsActor(callCachingEntryId: CallCachingEntryId, taskOutputs: Seq[TaskOutput]): Unit = { + context.actorOf(FetchCachedResultsActor.props(callCachingEntryId, self, new CallCache(SingletonServicesStore.databaseInterface))) + () } - def makeBackendCopyCacheHit(cacheHit: CacheHit, wdlValueSimpletons: Seq[WdlValueSimpleton], jobDetritusFiles: Map[String,String], returnCode: Option[Int], data: ResponsePendingData) = { + private def fetchCachedResults(taskOutputs: Seq[TaskOutput], callCachingEntryId: CallCachingEntryId, data: ResponsePendingData) = { + makeFetchCachedResultsActor(callCachingEntryId, taskOutputs) + goto(FetchingCachedOutputsFromDatabase) using data + } + + private def makeBackendCopyCacheHit(wdlValueSimpletons: Seq[WdlValueSimpleton], jobDetritusFiles: Map[String,String], returnCode: Option[Int], data: ResponsePendingData, cacheResultId: CallCachingEntryId) = { factory.cacheHitCopyingActorProps match { case Some(propsMaker) => - val backendCacheHitCopyingActorProps = propsMaker(data.jobDescriptor, initializationData, serviceRegistryActor) - val cacheHitCopyActor = context.actorOf(backendCacheHitCopyingActorProps, buildCacheHitCopyingActorName(data.jobDescriptor)) + val backendCacheHitCopyingActorProps = propsMaker(data.jobDescriptor, initializationData, serviceRegistryActor, ioActor) + val cacheHitCopyActor = context.actorOf(backendCacheHitCopyingActorProps, buildCacheHitCopyingActorName(data.jobDescriptor, cacheResultId)) cacheHitCopyActor ! CopyOutputsCommand(wdlValueSimpletons, jobDetritusFiles, returnCode) - replyTo ! JobRunning(data.jobDescriptor, None) - goto(BackendIsCopyingCachedOutputs) using data + replyTo ! JobRunning(data.jobDescriptor.key, data.jobDescriptor.inputDeclarations, None) + goto(BackendIsCopyingCachedOutputs) case None => // This should be impossible with the FSM, but luckily, we CAN recover if some foolish future programmer makes this happen: val errorMessage = "Call caching copying should never have even been attempted with no copy actor props! (Programmer error!)" log.error(errorMessage) - self ! FailedNonRetryableResponse(data.jobDescriptor.key, new RuntimeException(errorMessage), None) + self ! JobFailedNonRetryableResponse(data.jobDescriptor.key, new RuntimeException(errorMessage), None) goto(BackendIsCopyingCachedOutputs) } } - def runJob(data: ResponsePendingData) = { + private def runJob(data: ResponsePendingData) = { val backendJobExecutionActor = context.actorOf(data.bjeaProps, buildJobExecutionActorName(data.jobDescriptor)) val message = if (restarting) RecoverJobCommand else ExecuteJobCommand backendJobExecutionActor ! message - replyTo ! JobRunning(data.jobDescriptor, Option(backendJobExecutionActor)) + replyTo ! JobRunning(data.jobDescriptor.key, data.jobDescriptor.inputDeclarations, Option(backendJobExecutionActor)) goto(RunningJob) using data } + private def handleCacheInvalidatedResponse(response: CallCacheInvalidatedResponse, data: ResponsePendingData) = { + def updateMetadataForInvalidatedEntry(entry: CallCachingEntry) = { + import cromwell.core.ExecutionIndex._ + import cromwell.services.metadata.MetadataService.implicits.MetadataAutoPutter + + val workflowId = WorkflowId.fromString(entry.workflowExecutionUuid) + // If the entry doesn't have an attempt, it means that this cache entry was added before this change + // and we don't know which attempt yielded this cache entry + // In that case make a best effort and update the first attempt + val key = Option((entry.callFullyQualifiedName, entry.jobIndex.toIndex, entry.jobAttempt.getOrElse(1))) + serviceRegistryActor.putMetadataWithRawKey(workflowId, key, Map(callCachingAllowReuseMetadataKey -> false)) + } + + response match { + case CallCacheInvalidatedFailure(failure) => log.error(failure, "Failed to invalidate cache entry for job: {}", jobDescriptorKey) + case CallCacheInvalidatedSuccess(Some(entry)) => updateMetadataForInvalidatedEntry(entry) + case _ => + } + + data.ejha match { + case Some(ejha) => + log.info("Trying to use another cache hit for job: {}", jobDescriptorKey) + ejha ! NextHit + goto(CheckingCallCache) + case _ => + log.info("Could not find another cache hit, falling back to running job: {}", jobDescriptorKey) + runJob(data) + } + } + private def buildJobExecutionActorName(jobDescriptor: BackendJobDescriptor) = { - s"$workflowId-BackendJobExecutionActor-$jobTag" + s"$workflowIdForLogging-BackendJobExecutionActor-$jobTag" + } + + private def buildCacheHitCopyingActorName(jobDescriptor: BackendJobDescriptor, cacheResultId: CallCachingEntryId) = { + s"$workflowIdForLogging-BackendCacheHitCopyingActor-$jobTag-${cacheResultId.id}" } - private def buildCacheHitCopyingActorName(jobDescriptor: BackendJobDescriptor) = { - s"$workflowId-BackendCacheHitCopyingActor-$jobTag" + private def publishHitFailure(cache: EJEACacheHit, failure: Throwable) = { + import MetadataKey._ + import cromwell.services.metadata.MetadataService._ + + cache.details foreach { details => + val metadataKey = MetadataKey( + workflowIdForLogging, + Option(MetadataJobKey(jobDescriptorKey.call.fullyQualifiedName, jobDescriptorKey.index, jobDescriptorKey.attempt)), + s"$callCachingHitFailures[${cache.hitNumber}]:${details.escapeMeta}" + ) + + serviceRegistryActor ! PutMetadataAction(throwableToMetadataEvents(metadataKey, failure)) + } + } + + private def invalidateCacheHitAndTransition(ejeaCacheHit: EJEACacheHit, data: ResponsePendingData, reason: Throwable) = { + publishHitFailure(ejeaCacheHit, reason) + + val invalidationRequired = effectiveCallCachingMode match { + case CallCachingOff => throw new RuntimeException("Should not be calling invalidateCacheHit if call caching is off!") // Very unexpected. Fail out of this bad-state EJEA. + case activity: CallCachingActivity => activity.options.invalidateBadCacheResults + } + if (invalidationRequired) { + log.error(reason, "Failed copying cache results for job {}, invalidating cache entry.", jobDescriptorKey) + invalidateCacheHit(ejeaCacheHit.hit.cacheResultId) + goto(InvalidatingCacheEntry) + } else { + handleCacheInvalidatedResponse(CallCacheInvalidationUnnecessary, data) + } } - protected def createSaveCacheResultsActor(hashes: CallCacheHashes, success: SucceededResponse): Unit = { + protected def invalidateCacheHit(cacheId: CallCachingEntryId): Unit = { val callCache = new CallCache(SingletonServicesStore.databaseInterface) - context.actorOf(CallCacheWriteActor.props(callCache, workflowId, hashes, success), s"CallCacheWriteActor-$tag") + context.actorOf(CallCacheInvalidateActor.props(callCache, cacheId), s"CallCacheInvalidateActor${cacheId.id}-$tag") + () + } + + private def checkCacheEntryExistence() = { + callCacheReadActor ! CallCacheEntryForCall(workflowIdForLogging, jobDescriptorKey) + goto(CheckingCacheEntryExistence) } private def saveCacheResults(hashes: CallCacheHashes, data: SucceededResponseData) = { - createSaveCacheResultsActor(hashes, data.successResponse) + callCacheWriteActor ! SaveCallCacheHashes(CallCacheHashBundle(workflowIdForLogging, hashes, data.response)) val updatedData = data.copy(hashes = Option(Success(hashes))) goto(UpdatingCallCache) using updatedData } private def saveJobCompletionToJobStore(updatedData: ResponseData) = { updatedData.response match { - case SucceededResponse(jobKey: BackendJobDescriptorKey, returnCode: Option[Int], jobOutputs: JobOutputs, _, _) => saveSuccessfulJobResults(jobKey, returnCode, jobOutputs) - case AbortedResponse(jobKey: BackendJobDescriptorKey) => + case JobSucceededResponse(jobKey: BackendJobDescriptorKey, returnCode: Option[Int], jobOutputs: CallOutputs, _, _, _) => + publishHashesToMetadata(updatedData.hashes) + saveSuccessfulJobResults(jobKey, returnCode, jobOutputs) + case AbortedResponse(_: BackendJobDescriptorKey) => log.debug("{}: Won't save aborted job response to JobStore", jobTag) forwardAndStop(updatedData.response) - case FailedNonRetryableResponse(jobKey: BackendJobDescriptorKey, throwable: Throwable, returnCode: Option[Int]) => saveUnsuccessfulJobResults(jobKey, returnCode, throwable, retryable = false) - case FailedRetryableResponse(jobKey: BackendJobDescriptorKey, throwable: Throwable, returnCode: Option[Int]) => saveUnsuccessfulJobResults(jobKey, returnCode, throwable, retryable = true) + case JobFailedNonRetryableResponse(jobKey, throwable: Throwable, returnCode: Option[Int]) => + publishHashesToMetadata(updatedData.hashes) + writeToMetadata(Map(callCachingAllowReuseMetadataKey -> false)) + saveUnsuccessfulJobResults(jobKey, returnCode, throwable, retryable = false) + case JobFailedRetryableResponse(jobKey: BackendJobDescriptorKey, throwable: Throwable, returnCode: Option[Int]) => + writeToMetadata(Map(callCachingAllowReuseMetadataKey -> false)) + saveUnsuccessfulJobResults(jobKey, returnCode, throwable, retryable = true) } + updatedData.dockerImageUsed foreach { image => writeToMetadata(Map("dockerImageUsed" -> image)) } goto(UpdatingJobStore) using updatedData } - private def saveSuccessfulJobResults(jobKey: JobKey, returnCode: Option[Int], outputs: JobOutputs) = { - val jobStoreKey = jobKey.toJobStoreKey(workflowId) + private def saveSuccessfulJobResults(jobKey: JobKey, returnCode: Option[Int], outputs: CallOutputs) = { + val jobStoreKey = jobKey.toJobStoreKey(workflowIdForLogging) val jobStoreResult = JobResultSuccess(returnCode, outputs) jobStoreActor ! RegisterJobCompleted(jobStoreKey, jobStoreResult) } private def saveUnsuccessfulJobResults(jobKey: JobKey, returnCode: Option[Int], reason: Throwable, retryable: Boolean) = { - val jobStoreKey = jobKey.toJobStoreKey(workflowId) + val jobStoreKey = jobKey.toJobStoreKey(workflowIdForLogging) val jobStoreResult = JobResultFailure(returnCode, reason, retryable) jobStoreActor ! RegisterJobCompleted(jobStoreKey, jobStoreResult) } - private def writeToMetadata(keyValues: Map[String, String]) = { + private def writeToMetadata(keyValues: Map[String, Any]) = { import cromwell.services.metadata.MetadataService.implicits.MetadataAutoPutter - serviceRegistryActor.putMetadata(workflowId, Option(jobDescriptorKey), keyValues) + serviceRegistryActor.putMetadata(workflowIdForLogging, Option(jobDescriptorKey), keyValues) } private def addHashesAndStay(data: ResponsePendingData, hashes: CallCacheHashes): State = { val updatedData = data.copy(hashes = Option(Success(hashes))) stay using updatedData } - - /** - * Fire and forget events to the metadata service - */ - private def tellEventMetadata(): Unit = { - eventList.headOption foreach { firstEvent => - // The final event is only used as the book-end for the final pairing so the name is never actually used... - val offset = firstEvent.offsetDateTime.getOffset - val now = OffsetDateTime.now.withOffsetSameInstant(offset) - val lastEvent = ExecutionEvent("!!Bring Back the Monarchy!!", now) - val tailedEventList = eventList :+ lastEvent - val events = tailedEventList.sliding(2).zipWithIndex flatMap { - case (Seq(eventCurrent, eventNext), index) => - val eventKey = s"executionEvents[$index]" - List( - metadataEvent(s"$eventKey:description", eventCurrent.name), - metadataEvent(s"$eventKey:startTime", eventCurrent.offsetDateTime), - metadataEvent(s"$eventKey:endTime", eventNext.offsetDateTime) - ) - } - - serviceRegistryActor ! PutMetadataAction(events.toIterable) - } - } - - private def metadataEvent(key: String, value: Any) = { - val metadataValue = MetadataValue(value) - MetadataEvent(metadataKey(key), metadataValue) - } - - private lazy val metadataJobKey = { - MetadataJobKey(jobDescriptorKey.call.fullyQualifiedName, jobDescriptorKey.index, jobDescriptorKey.attempt) - } - private def metadataKey(key: String) = MetadataKey(workflowId, Option(metadataJobKey), key) } object EngineJobExecutionActor { /** States */ sealed trait EngineJobExecutionActorState case object Pending extends EngineJobExecutionActorState + case object RequestingExecutionToken extends EngineJobExecutionActorState case object CheckingJobStore extends EngineJobExecutionActorState case object CheckingCallCache extends EngineJobExecutionActorState case object FetchingCachedOutputsFromDatabase extends EngineJobExecutionActorState @@ -400,13 +618,15 @@ object EngineJobExecutionActor { case object PreparingJob extends EngineJobExecutionActorState case object RunningJob extends EngineJobExecutionActorState case object UpdatingCallCache extends EngineJobExecutionActorState + case object CheckingCacheEntryExistence extends EngineJobExecutionActorState case object UpdatingJobStore extends EngineJobExecutionActorState + case object InvalidatingCacheEntry extends EngineJobExecutionActorState /** Commands */ sealed trait EngineJobExecutionActorCommand case object Execute extends EngineJobExecutionActorCommand - final case class JobRunning(jobDescriptor: BackendJobDescriptor, backendJobExecutionActor: Option[ActorRef]) + val CacheMetadataKeyPrefix = CallMetadataKeys.CallCaching + MetadataKey.KeySeparator def props(replyTo: ActorRef, jobDescriptorKey: BackendJobDescriptorKey, @@ -415,8 +635,13 @@ object EngineJobExecutionActor { initializationData: Option[BackendInitializationData], restarting: Boolean, serviceRegistryActor: ActorRef, + ioActor: ActorRef, jobStoreActor: ActorRef, callCacheReadActor: ActorRef, + callCacheWriteActor: ActorRef, + workflowDockerLookupActor: ActorRef, + jobTokenDispenserActor: ActorRef, + backendSingletonActor: Option[ActorRef], backendName: String, callCachingMode: CallCachingMode) = { Props(new EngineJobExecutionActor( @@ -427,12 +652,19 @@ object EngineJobExecutionActor { initializationData = initializationData, restarting = restarting, serviceRegistryActor = serviceRegistryActor, + ioActor = ioActor, jobStoreActor = jobStoreActor, callCacheReadActor = callCacheReadActor, + callCacheWriteActor = callCacheWriteActor, + workflowDockerLookupActor = workflowDockerLookupActor, + jobTokenDispenserActor = jobTokenDispenserActor, + backendSingletonActor = backendSingletonActor, backendName = backendName: String, callCachingMode = callCachingMode)).withDispatcher(EngineDispatcher) } + case class EJEACacheHit(hit: CacheHit, hitNumber: Int, details: Option[String]) + private[execution] sealed trait EJEAData { override def toString = getClass.getSimpleName } @@ -441,84 +673,45 @@ object EngineJobExecutionActor { private[execution] case class ResponsePendingData(jobDescriptor: BackendJobDescriptor, bjeaProps: Props, - hashes: Option[Try[CallCacheHashes]] = None) extends EJEAData { + hashes: Option[Try[CallCacheHashes]] = None, + ejha: Option[ActorRef] = None, + ejeaCacheHit: Option[EJEACacheHit] = None) extends EJEAData { + + def withEJHA(ejha: ActorRef): EJEAData = this.copy(ejha = Option(ejha)) - def withSuccessResponse(success: SucceededResponse) = SucceededResponseData(success, hashes) + + def withSuccessResponse(success: JobSucceededResponse) = SucceededResponseData(success, hashes) def withResponse(response: BackendJobExecutionResponse) = response match { - case success: SucceededResponse => SucceededResponseData(success, hashes) + case success: JobSucceededResponse => SucceededResponseData(success, hashes) case failure => NotSucceededResponseData(failure, hashes) } + + def withCacheHit(cacheHit: CacheHit) = { + val newEjeaCacheHit = ejeaCacheHit map { currentCacheHit => + currentCacheHit.copy(hit = cacheHit, hitNumber = currentCacheHit.hitNumber + 1) + } getOrElse EJEACacheHit(cacheHit, 0, None) + + this.copy(ejeaCacheHit = Option(newEjeaCacheHit)) + } + + def withCacheDetails(details: String) = this.copy(ejeaCacheHit = ejeaCacheHit.map(_.copy(details = Option(details)))) } private[execution] trait ResponseData extends EJEAData { def response: BackendJobExecutionResponse def hashes: Option[Try[CallCacheHashes]] + def dockerImageUsed: Option[String] } - private[execution] case class SucceededResponseData(successResponse: SucceededResponse, + private[execution] case class SucceededResponseData(successResponse: JobSucceededResponse, hashes: Option[Try[CallCacheHashes]] = None) extends ResponseData { override def response = successResponse + override def dockerImageUsed = successResponse.dockerImageUsed } private[execution] case class NotSucceededResponseData(response: BackendJobExecutionResponse, - hashes: Option[Try[CallCacheHashes]] = None) extends ResponseData - - /** - * Deliberately a singleton (well, a singleton router), so we can globally rate limit hash lookups per backend. - * - * More refinement may appear via #1377. - */ - private var factoryFileHashingRouters = Map[BackendLifecycleActorFactory, ActorRef]() - - /** - * Returns a RoundRobinPool of actors based on the backend factory. - * - * @param backendName Name of the backend. - * @param backendLifecycleActorFactory A backend factory. - * @param actorRefFactory An actor factory. - * @return a RoundRobinPool of actors based on backend factory. - */ - private def factoryFileHashingRouter(backendName: String, - backendLifecycleActorFactory: BackendLifecycleActorFactory, - actorRefFactory: ActorRefFactory): ActorRef = { - synchronized { - val (originalOrUpdated, result) = getOrElseUpdated( - factoryFileHashingRouters, backendLifecycleActorFactory, { - val numberOfInstances = backendLifecycleActorFactory.fileHashingActorCount - val props = backendLifecycleActorFactory.fileHashingActorProps - actorRefFactory.actorOf(RoundRobinPool(numberOfInstances).props(props), s"FileHashingActor-$backendName") - } - ) - factoryFileHashingRouters = originalOrUpdated - result - } - } - - /** - * Immutable version of mutable.Map.getOrElseUpdate based on: - * http://stackoverflow.com/questions/4385976/idiomatic-get-or-else-update-for-immutable-map#answer-5840119 - * - * If given key is already in this map, returns associated value in the copy of the Map. - * - * Otherwise, computes value from given expression `op`, stores with key - * in map and returns that value in a copy of the Map. - * - * @param map the immutable map - * @param key the key to test - * @param op the computation yielding the value to associate with `key`, if - * `key` is previously unbound. - * @tparam K type of the key - * @tparam V type of the value - * @return the value associated with key (either previously or as a result - * of executing the method). - */ - def getOrElseUpdated[K, V](map: Map[K, V], key: K, op: => V): (Map[K, V], V) = { - map.get(key) match { - case Some(value) => (map, value) - case None => - val value = op - (map.updated(key, value), value) - } + hashes: Option[Try[CallCacheHashes]] = None) extends ResponseData { + override def dockerImageUsed = None } } diff --git a/engine/src/main/scala/cromwell/engine/workflow/lifecycle/execution/ExecutionStore.scala b/engine/src/main/scala/cromwell/engine/workflow/lifecycle/execution/ExecutionStore.scala new file mode 100644 index 000000000..906612963 --- /dev/null +++ b/engine/src/main/scala/cromwell/engine/workflow/lifecycle/execution/ExecutionStore.scala @@ -0,0 +1,154 @@ +package cromwell.engine.workflow.lifecycle.execution + +import cromwell.backend.BackendJobDescriptorKey +import cromwell.core.ExecutionStatus._ +import cromwell.core.{CallKey, JobKey} +import cromwell.engine.workflow.lifecycle.execution.ExecutionStore.{FqnIndex, RunnableScopes} +import cromwell.engine.workflow.lifecycle.execution.WorkflowExecutionActor.{apply => _, _} +import wdl4s.wdl._ + + +object ExecutionStore { + case class RunnableScopes(scopes: List[JobKey], truncated: Boolean) + + private type FqnIndex = (String, Option[Int]) + + def empty = ExecutionStore(Map.empty[JobKey, ExecutionStatus], hasNewRunnables = false) + + def apply(workflow: WdlWorkflow, workflowCoercedInputs: WorkflowCoercedInputs) = { + // Only add direct children to the store, the rest is dynamically created when necessary + val keys = workflow.children map { + case call: WdlTaskCall => Option(BackendJobDescriptorKey(call, None, 1)) + case call: WdlWorkflowCall => Option(SubWorkflowKey(call, None, 1)) + case scatter: Scatter => Option(ScatterKey(scatter)) + case conditional: If => Option(ConditionalKey(conditional, None)) + case declaration: Declaration => Option(DeclarationKey(declaration, None, workflowCoercedInputs)) + case _ => None + } + + new ExecutionStore(keys.flatten.map(_ -> NotStarted).toMap, keys.nonEmpty) + } + + val MaxJobsToStartPerTick = 1000 +} + +final case class ExecutionStore(private val statusStore: Map[JobKey, ExecutionStatus], hasNewRunnables: Boolean) { + + // View of the statusStore more suited for lookup based on status + lazy val store: Map[ExecutionStatus, List[JobKey]] = statusStore.groupBy(_._2).mapValues(_.keys.toList) + // Takes only keys that are done, and creates a map such that they're indexed by fqn and index + // This allows for quicker lookup (by hash) instead of traversing the whole list and yields + // significant improvements at large scale (run ExecutionStoreBenchmark) + lazy val (doneKeys, terminalKeys) = { + def toMapEntry(key: JobKey) = (key.scope.fullyQualifiedName, key.index) -> key + + store.foldLeft((Map.empty[FqnIndex, JobKey], Map.empty[FqnIndex, JobKey]))({ + case ((done, terminal), (status, keys)) => + lazy val newMapEntries = keys map toMapEntry + val newDone = if (status.isDoneOrBypassed) done ++ newMapEntries else done + val newTerminal = if (status.isTerminal) terminal ++ newMapEntries else terminal + + newDone -> newTerminal + }) + } + + private def keysWithStatus(status: ExecutionStatus) = store.getOrElse(status, List.empty) + + def isBypassedConditional(jobKey: JobKey, conditional: If): Boolean = { + keysWithStatus(Bypassed).exists { + case key: ConditionalKey => + key.scope.fullyQualifiedName.equals(conditional.fullyQualifiedName) && + key.index.equals(jobKey.index) + case _ => false + } + } + + def hasActiveJob: Boolean = { + def upstreamFailed(scope: Scope): Boolean = scope match { + case node: WdlGraphNode => node.upstreamAncestry exists hasFailedScope + } + + keysWithStatus(QueuedInCromwell).nonEmpty || + keysWithStatus(Starting).nonEmpty || + keysWithStatus(Running).nonEmpty || + keysWithStatus(NotStarted).exists(jobKey => !upstreamFailed(jobKey.scope)) + } + + def jobStatus(jobKey: JobKey): Option[ExecutionStatus] = statusStore.get(jobKey) + + def startedJobs: List[BackendJobDescriptorKey] = { + store.filterNot({ case (s, _) => s == NotStarted}).values.toList.flatten collect { + case k: BackendJobDescriptorKey => k + } + } + + private def hasFailedScope(s: WdlGraphNode): Boolean = keysWithStatus(Failed).exists(_.scope == s) + + def hasFailedJob: Boolean = keysWithStatus(Failed).nonEmpty + + override def toString = store.map { case (j, s) => s"$j -> $s" } mkString System.lineSeparator() + + def add(values: Map[JobKey, ExecutionStatus]) = { + this.copy(statusStore = statusStore ++ values, hasNewRunnables = hasNewRunnables || values.values.exists(_.isTerminalOrRetryable)) + } + + /** + * Returns the list of jobs ready to be run, along with a Boolean indicating whether or not the list has been truncated. + * The size of the list will be MaxJobsToStartPerTick at most. If more jobs where found runnable, the boolean will be true, otherwise false. + */ + def runnableScopes: RunnableScopes = { + val readyToStart = keysWithStatus(NotStarted).toStream filter arePrerequisitesDone + // Compute the first ExecutionStore.MaxJobsToStartPerTick + 1 runnable scopes + val scopesToStartPlusOne = readyToStart.take(ExecutionStore.MaxJobsToStartPerTick + 1).toList + // Only take the first ExecutionStore.MaxJobsToStartPerTick from the above list. + // Use the fact that we took one more to determine whether or not we truncated the result. + RunnableScopes(scopesToStartPlusOne.take(ExecutionStore.MaxJobsToStartPerTick), scopesToStartPlusOne.size > ExecutionStore.MaxJobsToStartPerTick) + } + + def findCompletedShardsForOutput(key: CollectorKey): List[JobKey] = doneKeys.values.toList collect { + case k @ (_: CallKey | _:DynamicDeclarationKey) if k.scope == key.scope && k.isShard => k + } + + private def emulateShardEntries(key: CollectorKey): Set[FqnIndex] = { + (0 until key.scatterWidth).toSet map { i: Int => key.scope match { + case c: WdlCall => c.fullyQualifiedName -> Option(i) + case d: Declaration => d.fullyQualifiedName -> Option(i) + case _ => throw new RuntimeException("Don't collect that.") + }} + } + + private def arePrerequisitesDone(key: JobKey): Boolean = { + lazy val upstreamAreDone = key.scope.upstream forall { + case n @ (_: WdlCall | _: Scatter | _: Declaration) => upstreamIsDone(key, n) + case _ => true + } + + val shardEntriesForCollectorAreTerminal: Boolean = key match { + case collector: CollectorKey => emulateShardEntries(collector).diff(terminalKeys.keys.toSet).isEmpty + case _ => true + } + + shardEntriesForCollectorAreTerminal && upstreamAreDone + } + + private def upstreamIsDone(entry: JobKey, prerequisiteScope: Scope): Boolean = { + prerequisiteScope.closestCommonAncestor(entry.scope) match { + /* + * If this entry refers to a Scope which has a common ancestor with prerequisiteScope + * and that common ancestor is a Scatter block, then find the shard with the same index + * as 'entry'. In other words, if you're in the same scatter block as your pre-requisite + * scope, then depend on the shard (with same index). + * + * NOTE: this algorithm was designed for ONE-LEVEL of scattering and probably does not + * work as-is for nested scatter blocks + */ + case Some(_: Scatter) => doneKeys.contains(prerequisiteScope.fullyQualifiedName -> entry.index) + + /* + * Otherwise, simply refer to the collector entry. This means that 'entry' depends + * on every shard of the pre-requisite scope to finish. + */ + case _ => doneKeys.contains(prerequisiteScope.fullyQualifiedName -> None) + } + } +} diff --git a/engine/src/main/scala/cromwell/engine/workflow/lifecycle/execution/JobPreparationActor.scala b/engine/src/main/scala/cromwell/engine/workflow/lifecycle/execution/JobPreparationActor.scala deleted file mode 100644 index cc7f76f92..000000000 --- a/engine/src/main/scala/cromwell/engine/workflow/lifecycle/execution/JobPreparationActor.scala +++ /dev/null @@ -1,119 +0,0 @@ -package cromwell.engine.workflow.lifecycle.execution - -import akka.actor.{Actor, ActorRef, Props} -import cromwell.backend._ -import cromwell.core.logging.WorkflowLogging -import cromwell.core.{ExecutionStore, JobKey, OutputStore} -import cromwell.engine.EngineWorkflowDescriptor -import cromwell.engine.workflow.lifecycle.execution.JobPreparationActor._ -import wdl4s._ -import wdl4s.expression.WdlStandardLibraryFunctions -import wdl4s.util.TryUtil -import wdl4s.values.WdlValue - -import scala.util.{Failure, Success, Try} - -final case class JobPreparationActor(executionData: WorkflowExecutionActorData, - jobKey: BackendJobDescriptorKey, - factory: BackendLifecycleActorFactory, - initializationData: Option[BackendInitializationData], - serviceRegistryActor: ActorRef) - extends Actor with WdlLookup with WorkflowLogging { - - override lazy val workflowDescriptor: EngineWorkflowDescriptor = executionData.workflowDescriptor - override lazy val workflowId = workflowDescriptor.id - override lazy val executionStore: ExecutionStore = executionData.executionStore - override lazy val outputStore: OutputStore = executionData.outputStore - override lazy val expressionLanguageFunctions = factory.expressionLanguageFunctions( - workflowDescriptor.backendDescriptor, jobKey, initializationData) - - override def receive = { - case Start => - val response = resolveAndEvaluateInputs(jobKey, expressionLanguageFunctions) map { prepareJobExecutionActor } - context.parent ! (response recover { case f => BackendJobPreparationFailed(jobKey, f) }).get - context stop self - - case unhandled => workflowLogger.warn(self.path.name + " received an unhandled message: " + unhandled) - } - - // Split inputs map (= evaluated workflow declarations + coerced json inputs) into [init\.*].last - private lazy val splitInputs = workflowDescriptor.backendDescriptor.inputs map { case (fqn, v) => splitFqn(fqn) -> v } - - def resolveAndEvaluateInputs(jobKey: BackendJobDescriptorKey, - wdlFunctions: WdlStandardLibraryFunctions): Try[Map[LocallyQualifiedName, WdlValue]] = { - import RuntimeAttributeDefinition.buildMapBasedLookup - Try { - val call = jobKey.call - lazy val callInputsFromFile = unqualifiedInputsFromInputFile(call) - lazy val workflowScopedLookup = hierarchicalLookup(jobKey.call, jobKey.index) _ - - // Try to resolve, evaluate and coerce declarations in order - val inputEvaluationAttempt = call.task.declarations.foldLeft(Map.empty[LocallyQualifiedName, Try[WdlValue]])((inputs, declaration) => { - val name = declaration.name - - // Try to resolve the declaration, and upon success evaluate the expression - // If the declaration is resolved but can't be evaluated this will throw an evaluation exception - // If it can't be resolved it's ignored and won't appear in the final input map - val evaluated: Option[Try[WdlValue]] = declaration.expression match { - // Static expression in the declaration - case Some(expr) => Option(expr.evaluate(buildMapBasedLookup(inputs), wdlFunctions)) - // Expression found in the input mappings - case None if call.inputMappings.contains(name) => Option(call.inputMappings(name).evaluate(workflowScopedLookup, wdlFunctions)) - // Expression found in the input file - case None if callInputsFromFile.contains(name) => Option(Success(callInputsFromFile(name))) - // Expression can't be found - case _ => None - } - - // Leave out unresolved declarations - evaluated match { - case Some(value) => - val coercedValue = value flatMap declaration.wdlType.coerceRawValue - inputs + ((name, coercedValue)) - case None => inputs - } - }) - - TryUtil.sequenceMap(inputEvaluationAttempt, s"Input evaluation for Call ${call.fullyQualifiedName} failed") - }.flatten - } - - // Unqualified call inputs for a specific call, from the input json - private def unqualifiedInputsFromInputFile(call: Call): Map[LocallyQualifiedName, WdlValue] = splitInputs collect { - case((root, inputName), v) if root == call.fullyQualifiedName => inputName -> v - } - - private def prepareJobExecutionActor(inputEvaluation: Map[LocallyQualifiedName, WdlValue]): JobPreparationActorResponse = { - import RuntimeAttributeDefinition.{addDefaultsToAttributes, evaluateRuntimeAttributes} - val curriedAddDefaultsToAttributes = addDefaultsToAttributes(factory.runtimeAttributeDefinitions(initializationData), workflowDescriptor.backendDescriptor.workflowOptions) _ - - (for { - unevaluatedRuntimeAttributes <- Try(jobKey.call.task.runtimeAttributes) - evaluatedRuntimeAttributes <- evaluateRuntimeAttributes(unevaluatedRuntimeAttributes, expressionLanguageFunctions, inputEvaluation) - attributesWithDefault = curriedAddDefaultsToAttributes(evaluatedRuntimeAttributes) - jobDescriptor = BackendJobDescriptor(workflowDescriptor.backendDescriptor, jobKey, attributesWithDefault, inputEvaluation) - } yield BackendJobPreparationSucceeded(jobDescriptor, factory.jobExecutionActorProps(jobDescriptor, initializationData, serviceRegistryActor))) match { - case Success(s) => s - case Failure(f) => BackendJobPreparationFailed(jobKey, f) - } - } -} - -object JobPreparationActor { - sealed trait JobPreparationActorCommands - case object Start extends JobPreparationActorCommands - - sealed trait JobPreparationActorResponse - case class BackendJobPreparationSucceeded(jobDescriptor: BackendJobDescriptor, bjeaProps: Props) extends JobPreparationActorResponse - case class BackendJobPreparationFailed(jobKey: JobKey, throwable: Throwable) extends JobPreparationActorResponse - - def props(executionData: WorkflowExecutionActorData, - jobKey: BackendJobDescriptorKey, - factory: BackendLifecycleActorFactory, - initializationData: Option[BackendInitializationData], - serviceRegistryActor: ActorRef) = { - // Note that JobPreparationActor doesn't run on the engine dispatcher as it mostly executes backend-side code - // (WDL expression evaluation using Backend's expressionLanguageFunctions) - Props(new JobPreparationActor(executionData, jobKey, factory, initializationData, serviceRegistryActor)) - } -} diff --git a/engine/src/main/scala/cromwell/engine/workflow/lifecycle/execution/OutputStore.scala b/engine/src/main/scala/cromwell/engine/workflow/lifecycle/execution/OutputStore.scala new file mode 100644 index 000000000..08bd9b821 --- /dev/null +++ b/engine/src/main/scala/cromwell/engine/workflow/lifecycle/execution/OutputStore.scala @@ -0,0 +1,102 @@ +package cromwell.engine.workflow.lifecycle.execution + +import cromwell.core.ExecutionIndex._ +import cromwell.core._ +import cromwell.engine.workflow.lifecycle.execution.OutputStore.{OutputCallKey, OutputEntry} +import cromwell.engine.workflow.lifecycle.execution.WorkflowExecutionActor.CollectorKey +import lenthall.util.TryUtil +import wdl4s.wdl.types.{WdlArrayType, WdlType} +import wdl4s.wdl.values.{WdlArray, WdlCallOutputsObject, WdlValue} +import wdl4s.wdl._ + +import scala.language.postfixOps +import scala.util.{Failure, Success, Try} + +object OutputStore { + case class OutputEntry(name: String, wdlType: WdlType, wdlValue: Option[WdlValue]) + case class OutputCallKey(call: Scope with WdlGraphNode, index: ExecutionIndex) + def empty = OutputStore(Map.empty) +} + +case class OutputStore(store: Map[OutputCallKey, List[OutputEntry]]) { + + override def toString = store.map { case (k, l) => s"$k -> ${l.mkString(" ")}" } mkString System.lineSeparator + + def add(values: Map[OutputCallKey, List[OutputEntry]]) = this.copy(store = store ++ values) + + def fetchNodeOutputEntries(node: WdlGraphNode, index: ExecutionIndex): Try[WdlValue] = { + def outputEntriesToMap(outputs: List[OutputEntry]): Map[String, Try[WdlValue]] = { + outputs map { output => + output.wdlValue match { + case Some(wdlValue) => output.name -> Success(wdlValue) + case None => output.name -> Failure(new RuntimeException(s"Could not retrieve output ${output.name} value")) + } + } toMap + } + + def callOutputs(call: WdlCall, outputs: List[OutputEntry]) = { + TryUtil.sequenceMap(outputEntriesToMap(outputs), s"Output fetching for call ${node.unqualifiedName}") map { outputsMap => + WdlCallOutputsObject(call, outputsMap) + } + } + + def declarationOutputs(declaration: Declaration, outputs: List[OutputEntry]) = { + outputs match { + case OutputEntry(_, _, Some(value)) :: Nil => Success(value) + case _ => Failure(new RuntimeException(s"Could not find value for declaration ${declaration.fullyQualifiedName}")) + } + } + + store.get(OutputCallKey(node, index)) match { + case Some(outputs) => + node match { + case call: WdlCall => callOutputs(call, outputs) + case declaration: Declaration => declarationOutputs(declaration, outputs) + case other => Failure(new RuntimeException(s"Only Calls and Declarations are allowed in the OutputStore, found ${other.getClass.getSimpleName}")) + } + case None => Failure(new RuntimeException(s"Could not find scope ${node.unqualifiedName}")) + } + } + + def collectCall(call: WdlCall, scatter: Scatter, sortedShards: Seq[JobKey]) = Try { + val shardsOutputs = sortedShards map { e => + fetchNodeOutputEntries(call, e.index) map { + case callOutputs: WdlCallOutputsObject => callOutputs.outputs + case _ => throw new RuntimeException("Call outputs should be a WdlCallOutputsObject") + } getOrElse(throw new RuntimeException(s"Could not retrieve output for shard ${e.scope} #${e.index}")) + } + + call.outputs map { taskOutput => + val wdlValues = shardsOutputs.map( + _.getOrElse(taskOutput.unqualifiedName, throw new RuntimeException(s"Could not retrieve output ${taskOutput.unqualifiedName}"))) + val arrayType = taskOutput.relativeWdlType(scatter).asInstanceOf[WdlArrayType] + val arrayOfValues = WdlArray(arrayType, wdlValues) + taskOutput.unqualifiedName -> JobOutput(arrayOfValues) + } toMap + } + + def collectDeclaration(declaration: Declaration, scatter: Scatter, sortedShards: Seq[JobKey]) = Try { + val shardsOutputs = sortedShards map { e => + fetchNodeOutputEntries(declaration, e.index) getOrElse { + throw new RuntimeException(s"Could not retrieve output for shard ${e.scope} #${e.index}") + } + } + val arrayType = declaration.relativeWdlType(scatter).asInstanceOf[WdlArrayType] + Map(declaration.unqualifiedName -> JobOutput(WdlArray(arrayType, shardsOutputs))) + } + + /** + * Try to generate output for a collector call, by collecting outputs for all of its shards. + * It's fail-fast on shard output retrieval + */ + def generateCollectorOutput(collector: CollectorKey, + shards: Iterable[JobKey]): Try[CallOutputs] = { + lazy val sortedShards = shards.toSeq sortBy { _.index.fromIndex } + + collector.scope match { + case call: WdlCall => collectCall(call, collector.scatter, sortedShards) + case declaration: Declaration => collectDeclaration(declaration, collector.scatter, sortedShards) + case other => Failure(new RuntimeException(s"Cannot retrieve outputs for ${other.fullyQualifiedName}")) + } + } +} diff --git a/engine/src/main/scala/cromwell/engine/workflow/lifecycle/execution/SubWorkflowExecutionActor.scala b/engine/src/main/scala/cromwell/engine/workflow/lifecycle/execution/SubWorkflowExecutionActor.scala new file mode 100644 index 000000000..1acd49094 --- /dev/null +++ b/engine/src/main/scala/cromwell/engine/workflow/lifecycle/execution/SubWorkflowExecutionActor.scala @@ -0,0 +1,283 @@ +package cromwell.engine.workflow.lifecycle.execution + +import akka.actor.SupervisorStrategy.Escalate +import akka.actor.{ActorRef, FSM, LoggingFSM, OneForOneStrategy, Props, SupervisorStrategy} +import cromwell.backend.{AllBackendInitializationData, BackendLifecycleActorFactory, BackendWorkflowDescriptor} +import cromwell.core._ +import cromwell.core.Dispatcher.EngineDispatcher +import cromwell.core.logging.JobLogging +import cromwell.engine.EngineWorkflowDescriptor +import cromwell.engine.backend.{BackendConfiguration, BackendSingletonCollection} +import cromwell.engine.workflow.lifecycle.execution.preparation.CallPreparation.{CallPreparationFailed, Start} +import cromwell.engine.workflow.lifecycle.execution.SubWorkflowExecutionActor._ +import cromwell.engine.workflow.lifecycle.execution.WorkflowExecutionActor._ +import cromwell.engine.workflow.lifecycle.execution.preparation.SubWorkflowPreparationActor +import cromwell.engine.workflow.lifecycle.execution.preparation.SubWorkflowPreparationActor.SubWorkflowPreparationSucceeded +import cromwell.services.metadata.MetadataService._ +import cromwell.services.metadata._ +import cromwell.subworkflowstore.SubWorkflowStoreActor._ +import wdl4s.wdl.EvaluatedTaskInputs + +class SubWorkflowExecutionActor(key: SubWorkflowKey, + data: WorkflowExecutionActorData, + factories: Map[String, BackendLifecycleActorFactory], + ioActor: ActorRef, + override val serviceRegistryActor: ActorRef, + jobStoreActor: ActorRef, + subWorkflowStoreActor: ActorRef, + callCacheReadActor: ActorRef, + callCacheWriteActor: ActorRef, + workflowDockerLookupActor: ActorRef, + jobTokenDispenserActor: ActorRef, + backendSingletonCollection: BackendSingletonCollection, + initializationData: AllBackendInitializationData, + restarting: Boolean) extends LoggingFSM[SubWorkflowExecutionActorState, SubWorkflowExecutionActorData] with JobLogging with WorkflowMetadataHelper with CallMetadataHelper { + + override def supervisorStrategy: SupervisorStrategy = OneForOneStrategy() { case _ => Escalate } + + private val parentWorkflow = data.workflowDescriptor + override val workflowId = parentWorkflow.id + override val workflowIdForCallMetadata = parentWorkflow.id + override def jobTag: String = key.tag + + startWith(SubWorkflowPendingState, SubWorkflowExecutionActorData.empty) + + private var eventList: Seq[ExecutionEvent] = Seq(ExecutionEvent(stateName.toString)) + + when(SubWorkflowPendingState) { + case Event(Execute, _) => + if (restarting) { + subWorkflowStoreActor ! QuerySubWorkflow(parentWorkflow.id, key) + goto(SubWorkflowCheckingStoreState) + } else { + prepareSubWorkflow(createSubWorkflowId()) + } + } + + when(SubWorkflowCheckingStoreState) { + case Event(SubWorkflowFound(entry), _) => + prepareSubWorkflow(WorkflowId.fromString(entry.subWorkflowExecutionUuid)) + case Event(_: SubWorkflowNotFound, _) => + prepareSubWorkflow(createSubWorkflowId()) + case Event(SubWorkflowStoreFailure(command, reason), _) => + jobLogger.error(reason, s"SubWorkflowStore failure for command $command, starting sub workflow with fresh ID.") + prepareSubWorkflow(createSubWorkflowId()) + } + + when(SubWorkflowPreparingState) { + case Event(SubWorkflowPreparationSucceeded(subWorkflowEngineDescriptor, inputs), _) => + startSubWorkflow(subWorkflowEngineDescriptor, inputs) + case Event(failure: CallPreparationFailed, _) => + context.parent ! SubWorkflowFailedResponse(key, Map.empty, failure.throwable) + context stop self + stay() + } + + when(SubWorkflowRunningState) { + case Event(WorkflowExecutionSucceededResponse(executedJobKeys, outputs), _) => + context.parent ! SubWorkflowSucceededResponse(key, executedJobKeys, outputs) + goto(SubWorkflowSucceededState) + case Event(WorkflowExecutionFailedResponse(executedJobKeys, reason), _) => + context.parent ! SubWorkflowFailedResponse(key, executedJobKeys, reason) + goto(SubWorkflowFailedState) + case Event(WorkflowExecutionAbortedResponse(executedJobKeys), _) => + context.parent ! SubWorkflowAbortedResponse(key, executedJobKeys) + goto(SubWorkflowAbortedState) + } + + when(SubWorkflowSucceededState) { FSM.NullFunction } + when(SubWorkflowFailedState) { FSM.NullFunction } + when(SubWorkflowAbortedState) { FSM.NullFunction } + + whenUnhandled { + case Event(SubWorkflowStoreRegisterSuccess(_), _) => + // Nothing to do here + stay() + case Event(SubWorkflowStoreFailure(command, reason), _) => + jobLogger.error(reason, s"SubWorkflowStore failure for command $command") + stay() + } + + onTransition { + case (_, toState) => + stateData.subWorkflowId foreach { id => pushCurrentStateToMetadataService(id, toState.workflowState) } + } + + onTransition { + case (_, _: SubWorkflowTerminalState) => + stateData.subWorkflowId match { + case Some(id) => + pushWorkflowEnd(id) + pushExecutionEventsToMetadataService(key, eventList) + case None => jobLogger.error("Sub workflow completed without a Sub Workflow UUID.") + } + context stop self + } + + onTransition { + case _ -> toState => eventList :+= ExecutionEvent(toState.toString) + } + + private def startSubWorkflow(subWorkflowEngineDescriptor: EngineWorkflowDescriptor, inputs: EvaluatedTaskInputs) = { + val subWorkflowActor = createSubWorkflowActor(subWorkflowEngineDescriptor) + + subWorkflowActor ! WorkflowExecutionActor.ExecuteWorkflowCommand + context.parent ! JobRunning(key, inputs, Option(subWorkflowActor)) + pushWorkflowRunningMetadata(subWorkflowEngineDescriptor.backendDescriptor, inputs) + + goto(SubWorkflowRunningState) + } + + private def prepareSubWorkflow(subWorkflowId: WorkflowId) = { + createSubWorkflowPreparationActor(subWorkflowId) ! Start + context.parent ! JobStarting(key) + pushCurrentStateToMetadataService(subWorkflowId, WorkflowRunning) + pushWorkflowStart(subWorkflowId) + goto(SubWorkflowPreparingState) using SubWorkflowExecutionActorData(Option(subWorkflowId)) + } + + def createSubWorkflowPreparationActor(subWorkflowId: WorkflowId) = { + context.actorOf( + SubWorkflowPreparationActor.props(data, key, subWorkflowId), + s"$subWorkflowId-SubWorkflowPreparationActor-${key.tag}" + ) + } + + def createSubWorkflowActor(subWorkflowEngineDescriptor: EngineWorkflowDescriptor) = { + context.actorOf( + WorkflowExecutionActor.props( + subWorkflowEngineDescriptor, + ioActor = ioActor, + serviceRegistryActor = serviceRegistryActor, + jobStoreActor = jobStoreActor, + subWorkflowStoreActor = subWorkflowStoreActor, + callCacheReadActor = callCacheReadActor, + callCacheWriteActor = callCacheWriteActor, + workflowDockerLookupActor = workflowDockerLookupActor, + jobTokenDispenserActor = jobTokenDispenserActor, + backendSingletonCollection, + initializationData, + restarting + ), + s"${subWorkflowEngineDescriptor.id}-SubWorkflowActor-${key.tag}" + ) + } + + private def pushWorkflowRunningMetadata(subWorkflowDescriptor: BackendWorkflowDescriptor, workflowInputs: EvaluatedTaskInputs) = { + val subWorkflowId = subWorkflowDescriptor.id + val parentWorkflowMetadataKey = MetadataKey(parentWorkflow.id, Option(MetadataJobKey(key.scope.fullyQualifiedName, key.index, key.attempt)), CallMetadataKeys.SubWorkflowId) + + val events = List( + MetadataEvent(parentWorkflowMetadataKey, MetadataValue(subWorkflowId)), + MetadataEvent(MetadataKey(subWorkflowId, None, WorkflowMetadataKeys.Name), MetadataValue(key.scope.callable.unqualifiedName)), + MetadataEvent(MetadataKey(subWorkflowId, None, WorkflowMetadataKeys.ParentWorkflowId), MetadataValue(parentWorkflow.id)) + ) + + val inputEvents = workflowInputs match { + case empty if empty.isEmpty => + List(MetadataEvent.empty(MetadataKey(subWorkflowId, None,WorkflowMetadataKeys.Inputs))) + case inputs => + inputs flatMap { case (inputName, wdlValue) => + wdlValueToMetadataEvents(MetadataKey(subWorkflowId, None, s"${WorkflowMetadataKeys.Inputs}:${inputName.unqualifiedName}"), wdlValue) + } + } + + val workflowRootEvents = buildWorkflowRootMetadataEvents(subWorkflowDescriptor) + + serviceRegistryActor ! PutMetadataAction(events ++ inputEvents ++ workflowRootEvents) + } + + private def buildWorkflowRootMetadataEvents(subWorkflowDescriptor: BackendWorkflowDescriptor) = { + val subWorkflowId = subWorkflowDescriptor.id + + factories flatMap { + case (backendName, factory) => + BackendConfiguration.backendConfigurationDescriptor(backendName).toOption map { config => + backendName -> factory.getWorkflowExecutionRootPath(subWorkflowDescriptor, config.backendConfig, initializationData.get(backendName)) + } + } map { + case (backend, wfRoot) => + MetadataEvent(MetadataKey(subWorkflowId, None, s"${WorkflowMetadataKeys.WorkflowRoot}[$backend]"), MetadataValue(wfRoot.toAbsolutePath)) + } + } + + private def createSubWorkflowId() = { + val subWorkflowId = WorkflowId.randomId() + // Register ID to the sub workflow store + subWorkflowStoreActor ! RegisterSubWorkflow(parentWorkflow.rootWorkflow.id, parentWorkflow.id, key, subWorkflowId) + subWorkflowId + } +} + +object SubWorkflowExecutionActor { + sealed trait SubWorkflowExecutionActorState { + def workflowState: WorkflowState + } + sealed trait SubWorkflowTerminalState extends SubWorkflowExecutionActorState + + case object SubWorkflowPendingState extends SubWorkflowExecutionActorState { + override val workflowState = WorkflowRunning + } + case object SubWorkflowCheckingStoreState extends SubWorkflowExecutionActorState { + override val workflowState = WorkflowRunning + } + case object SubWorkflowPreparingState extends SubWorkflowExecutionActorState { + override val workflowState = WorkflowRunning + } + case object SubWorkflowRunningState extends SubWorkflowExecutionActorState { + override val workflowState = WorkflowRunning + } + case object SubWorkflowAbortingState extends SubWorkflowExecutionActorState { + override val workflowState = WorkflowAborting + } + + case object SubWorkflowSucceededState extends SubWorkflowTerminalState { + override val workflowState = WorkflowSucceeded + } + case object SubWorkflowAbortedState extends SubWorkflowTerminalState { + override val workflowState = WorkflowAborted + } + case object SubWorkflowFailedState extends SubWorkflowTerminalState { + override val workflowState = WorkflowFailed + } + + object SubWorkflowExecutionActorData { + def empty = SubWorkflowExecutionActorData(None) + } + case class SubWorkflowExecutionActorData(subWorkflowId: Option[WorkflowId]) + + sealed trait EngineWorkflowExecutionActorCommand + case object Execute + + def props(key: SubWorkflowKey, + data: WorkflowExecutionActorData, + factories: Map[String, BackendLifecycleActorFactory], + ioActor: ActorRef, + serviceRegistryActor: ActorRef, + jobStoreActor: ActorRef, + subWorkflowStoreActor: ActorRef, + callCacheReadActor: ActorRef, + callCacheWriteActor: ActorRef, + workflowDockerLookupActor: ActorRef, + jobTokenDispenserActor: ActorRef, + backendSingletonCollection: BackendSingletonCollection, + initializationData: AllBackendInitializationData, + restarting: Boolean) = { + Props(new SubWorkflowExecutionActor( + key, + data, + factories, + ioActor = ioActor, + serviceRegistryActor = serviceRegistryActor, + jobStoreActor = jobStoreActor, + subWorkflowStoreActor = subWorkflowStoreActor, + callCacheReadActor = callCacheReadActor, + callCacheWriteActor = callCacheWriteActor, + workflowDockerLookupActor = workflowDockerLookupActor, + jobTokenDispenserActor = jobTokenDispenserActor, + backendSingletonCollection, + initializationData, + restarting) + ).withDispatcher(EngineDispatcher) + } +} \ No newline at end of file diff --git a/engine/src/main/scala/cromwell/engine/workflow/lifecycle/execution/WdlLookup.scala b/engine/src/main/scala/cromwell/engine/workflow/lifecycle/execution/WdlLookup.scala deleted file mode 100644 index 8b2af57ea..000000000 --- a/engine/src/main/scala/cromwell/engine/workflow/lifecycle/execution/WdlLookup.scala +++ /dev/null @@ -1,106 +0,0 @@ -package cromwell.engine.workflow.lifecycle.execution - -import cromwell.core.{ExecutionIndex, ExecutionStore, OutputStore} -import cromwell.engine.EngineWorkflowDescriptor -import ExecutionIndex._ -import wdl4s._ -import wdl4s.expression.WdlStandardLibraryFunctions -import wdl4s.values.{WdlArray, WdlCallOutputsObject, WdlValue} - -import scala.language.postfixOps -import scala.util.{Failure, Success, Try} - -trait WdlLookup { - - def workflowDescriptor: EngineWorkflowDescriptor - def executionStore: ExecutionStore - def outputStore: OutputStore - def expressionLanguageFunctions: WdlStandardLibraryFunctions - - private lazy val splitInputs = workflowDescriptor.backendDescriptor.inputs map { - case (fqn, v) => splitFqn(fqn) -> v - } - - // Unqualified workflow level inputs - private lazy val unqualifiedWorkflowInputs: Map[LocallyQualifiedName, WdlValue] = splitInputs collect { - case((root, inputName), v) if root == workflowDescriptor.namespace.workflow.unqualifiedName => inputName -> v - } - - /** - * Lookup an identifier by - * first looking at the completed calls map - * and if not found traversing up the scope hierarchy from the scope from which the lookup originated. - */ - def hierarchicalLookup(scope: Scope, index: ExecutionIndex)(identifier: String): WdlValue = { - // First lookup calls - lookupCall(scope, index, identifier) recoverWith { - // Lookup in the same scope (currently no scope support this but say we have scatter declarations, or multiple scatter variables, or nested workflows..) - case _: VariableNotFoundException | _: WdlExpressionException => scopedLookup(scope, index, identifier) - } recover { - // Lookup parent if present - case _: VariableNotFoundException | _: WdlExpressionException => scope.parent match { - case Some(parent) => hierarchicalLookup(parent, index)(identifier) - case None => throw new VariableNotFoundException(s"Can't find $identifier") - } - } get - } - - private def scopedLookup(scope: Scope, index: ExecutionIndex, identifier: String): Try[WdlValue] = { - def scopedLookupFunction = scope match { - case scatter: Scatter if index.isDefined => lookupScatter(scatter, index.get) _ - case workflow: Workflow => lookupWorkflowDeclaration _ - case _ => (_: String) => Failure(new VariableNotFoundException(s"Can't find $identifier in scope $scope")) - } - - scopedLookupFunction(identifier) - } - - // In this case, the scopedLookup function is effectively equivalent to looking into unqualifiedWorkflowInputs for the value - // because the resolution / evaluation / coercion has already happened in the MaterializeWorkflowDescriptorActor - private def lookupWorkflowDeclaration(identifier: String) = { - unqualifiedWorkflowInputs.get(identifier) match { - case Some(value) => Success(value) - case None => Failure(new WdlExpressionException(s"Could not resolve variable $identifier as a workflow input")) - } - } - - private def lookupScatter(scatter: Scatter, index: Int)(identifier: String): Try[WdlValue] = { - if (identifier == scatter.item) { - // Scatters are not indexed yet (they can't be nested) - val scatterLookup = hierarchicalLookup(scatter, None) _ - scatter.collection.evaluate(scatterLookup, expressionLanguageFunctions) map { - case collection: WdlArray if collection.value.isDefinedAt(index) => collection.value(index) - case collection: WdlArray => throw new RuntimeException(s"Index $index out of bound in $collection for scatter ${scatter.fullyQualifiedName}") - case other => throw new RuntimeException(s"Scatter ${scatter.fullyQualifiedName} collection is not an array: $other") - } recover { - case e => throw new RuntimeException(s"Failed to evaluate collection for scatter ${scatter.fullyQualifiedName}", e) - } - } else { - Failure(new VariableNotFoundException(identifier)) - } - } - - private def lookupCall(scope: Scope, scopeIndex: ExecutionIndex, identifier: String): Try[WdlCallOutputsObject] = { - val calls = executionStore.store.keys.view map { _.scope } collect { case c: Call => c } - - calls find { _.unqualifiedName == identifier } match { - case Some(matchedCall) => - /** - * After matching the Call, this determines if the `key` depends on a single shard - * of a scatter'd job or if it depends on the whole thing. Right now, the heuristic - * is "If we're both in a scatter block together, then I depend on a shard. If not, - * I depend on the collected value" - * - * TODO: nested-scatter - this will likely not be sufficient for nested scatters - */ - val index: ExecutionIndex = matchedCall.closestCommonAncestor(scope) flatMap { - case s: Scatter => scopeIndex - case _ => None - } - - outputStore.fetchCallOutputEntries(matchedCall, index) - case None => Failure(new WdlExpressionException(s"Could not find a call with identifier '$identifier'")) - } - } - -} diff --git a/engine/src/main/scala/cromwell/engine/workflow/lifecycle/execution/WorkflowExecutionActor.scala b/engine/src/main/scala/cromwell/engine/workflow/lifecycle/execution/WorkflowExecutionActor.scala index 750a215b5..7148acdb3 100644 --- a/engine/src/main/scala/cromwell/engine/workflow/lifecycle/execution/WorkflowExecutionActor.scala +++ b/engine/src/main/scala/cromwell/engine/workflow/lifecycle/execution/WorkflowExecutionActor.scala @@ -1,407 +1,220 @@ package cromwell.engine.workflow.lifecycle.execution -import java.time.OffsetDateTime - -import akka.actor.SupervisorStrategy.{Escalate, Stop} -import akka.actor._ -import com.typesafe.config.ConfigFactory -import cromwell.backend.BackendJobExecutionActor.{AbortedResponse, FailedRetryableResponse, FailedNonRetryableResponse, SucceededResponse} +import akka.actor.{Scope => _, _} +import cats.data.NonEmptyList +import cromwell.backend.BackendJobExecutionActor.{AbortedResponse, JobFailedNonRetryableResponse, JobFailedRetryableResponse, JobSucceededResponse} import cromwell.backend.BackendLifecycleActor.AbortJobCommand -import cromwell.backend.{AllBackendInitializationData, BackendJobDescriptor, BackendJobDescriptorKey} -import cromwell.core.Dispatcher.EngineDispatcher +import cromwell.backend.{AllBackendInitializationData, BackendJobDescriptorKey, JobExecutionMap} +import cromwell.core.Dispatcher._ import cromwell.core.ExecutionIndex._ import cromwell.core.ExecutionStatus._ -import cromwell.core.ExecutionStore.ExecutionStoreEntry -import cromwell.core.OutputStore.OutputEntry -import cromwell.core.WorkflowOptions.WorkflowFailureMode import cromwell.core._ import cromwell.core.logging.WorkflowLogging -import cromwell.engine.backend.CromwellBackends +import cromwell.engine.backend.{BackendSingletonCollection, CromwellBackends} +import cromwell.engine.workflow.lifecycle.execution.ExecutionStore.RunnableScopes +import cromwell.engine.workflow.lifecycle.execution.WorkflowExecutionActor._ import cromwell.engine.workflow.lifecycle.{EngineLifecycleActorAbortCommand, EngineLifecycleActorAbortedResponse} -import cromwell.engine.workflow.lifecycle.execution.EngineJobExecutionActor.JobRunning -import cromwell.engine.workflow.lifecycle.execution.JobPreparationActor.BackendJobPreparationFailed -import cromwell.engine.workflow.lifecycle.execution.WorkflowExecutionActor.WorkflowExecutionActorState import cromwell.engine.{ContinueWhilePossible, EngineWorkflowDescriptor} -import cromwell.services.metadata.MetadataService._ -import cromwell.services.metadata._ +import cromwell.util.StopAndLogSupervisor import cromwell.webservice.EngineStatsActor import lenthall.exception.ThrowableAggregation -import wdl4s.types.WdlArrayType -import wdl4s.util.TryUtil -import wdl4s.values.{WdlArray, WdlValue} -import wdl4s.{Scope, _} - -import scala.annotation.tailrec +import lenthall.util.TryUtil +import org.apache.commons.lang3.StringUtils +import wdl4s.wdl.WdlExpression.ScopedLookupFunction +import wdl4s.wdl._ +import wdl4s.wdl.expression.WdlFunctions +import wdl4s.wdl.values.WdlArray.WdlArrayLike +import wdl4s.wdl.values.{WdlBoolean, WdlOptionalValue, WdlString, WdlValue} + +import scala.concurrent.duration._ import scala.language.postfixOps -import scala.util.{Failure, Random, Success, Try} -import scalaz.NonEmptyList -import scalaz.Scalaz._ - -object WorkflowExecutionActor { - - /** - * States - */ - sealed trait WorkflowExecutionActorState { def terminal = false } - sealed trait WorkflowExecutionActorTerminalState extends WorkflowExecutionActorState { override val terminal = true } - - case object WorkflowExecutionPendingState extends WorkflowExecutionActorState - case object WorkflowExecutionInProgressState extends WorkflowExecutionActorState - case object WorkflowExecutionAbortingState extends WorkflowExecutionActorState - case object WorkflowExecutionSuccessfulState extends WorkflowExecutionActorTerminalState - case object WorkflowExecutionFailedState extends WorkflowExecutionActorTerminalState - case object WorkflowExecutionAbortedState extends WorkflowExecutionActorTerminalState - - /** - * Commands - */ - sealed trait WorkflowExecutionActorCommand - case object ExecuteWorkflowCommand extends WorkflowExecutionActorCommand - case object RestartExecutingWorkflowCommand extends WorkflowExecutionActorCommand - - /** - * Responses - */ - sealed trait WorkflowExecutionActorResponse { - def executionStore: ExecutionStore - - def outputStore: OutputStore - } - - case class WorkflowExecutionSucceededResponse(executionStore: ExecutionStore, outputStore: OutputStore) - extends WorkflowExecutionActorResponse { - override def toString = "WorkflowExecutionSucceededResponse" - } - - case class WorkflowExecutionAbortedResponse(executionStore: ExecutionStore, outputStore: OutputStore) - extends WorkflowExecutionActorResponse with EngineLifecycleActorAbortedResponse { - override def toString = "WorkflowExecutionAbortedResponse" - } - - final case class WorkflowExecutionFailedResponse(executionStore: ExecutionStore, outputStore: OutputStore, - reasons: Seq[Throwable]) extends WorkflowExecutionActorResponse { - override def toString = "WorkflowExecutionFailedResponse" - } - - /** - * Internal control flow messages - */ - private case class JobInitializationFailed(jobKey: JobKey, throwable: Throwable) - private case class ScatterCollectionFailedResponse(collectorKey: CollectorKey, throwable: Throwable) - private case class ScatterCollectionSucceededResponse(collectorKey: CollectorKey, outputs: JobOutputs) - - /** - * Internal ADTs - */ - case class ScatterKey(scope: Scatter) extends JobKey { - override val index = None // When scatters are nested, this might become Some(_) - override val attempt = 1 - override val tag = scope.unqualifiedName - - /** - * Creates a sub-ExecutionStore with Starting entries for each of the scoped children. - * - * @param count Number of ways to scatter the children. - * @return ExecutionStore of scattered children. - */ - def populate(count: Int): Map[JobKey, ExecutionStatus.Value] = { - val keys = this.scope.children flatMap { explode(_, count) } - keys map { _ -> ExecutionStatus.NotStarted } toMap - } - - private def explode(scope: Scope, count: Int): Seq[JobKey] = { - scope match { - case call: Call => - val shards = (0 until count) map { i => BackendJobDescriptorKey(call, Option(i), 1) } - shards :+ CollectorKey(call) - case scatter: Scatter => - throw new UnsupportedOperationException("Nested Scatters are not supported (yet).") - case e => - throw new UnsupportedOperationException(s"Scope ${e.getClass.getName} is not supported.") - } - } - } - - // Represents a scatter collection for a call in the execution store - case class CollectorKey(scope: Call) extends JobKey { - override val index = None - override val attempt = 1 - override val tag = s"Collector-${scope.unqualifiedName}" - } - - case class WorkflowExecutionException[T <: Throwable](exceptions: NonEmptyList[T]) extends ThrowableAggregation { - override val throwables = exceptions.list.toList - override val exceptionContext = s"WorkflowExecutionActor" - } - - def props(workflowId: WorkflowId, - workflowDescriptor: EngineWorkflowDescriptor, - serviceRegistryActor: ActorRef, - jobStoreActor: ActorRef, - callCacheReadActor: ActorRef, - initializationData: AllBackendInitializationData, - restarting: Boolean): Props = { - Props(WorkflowExecutionActor(workflowId, workflowDescriptor, serviceRegistryActor, jobStoreActor, - callCacheReadActor, initializationData, restarting)).withDispatcher(EngineDispatcher) - } - - private implicit class EnhancedExecutionStore(val executionStore: ExecutionStore) extends AnyVal { - // Convert the store to a `List` before `collect`ing to sidestep expensive and pointless hashing of `Scope`s when - // assembling the result. - def runnableScopes = executionStore.store.toList collect { case entry if isRunnable(entry) => entry._1 } - - private def isRunnable(entry: ExecutionStoreEntry) = { - entry match { - case (key, ExecutionStatus.NotStarted) => arePrerequisitesDone(key) - case _ => false - } - } - - def findShardEntries(key: CollectorKey): List[ExecutionStoreEntry] = executionStore.store.toList collect { - case (k: BackendJobDescriptorKey, v) if k.scope == key.scope && k.isShard => (k, v) - } - - private def arePrerequisitesDone(key: JobKey): Boolean = { - val upstream = key.scope.prerequisiteScopes.toList.map(s => upstreamEntries(key, s)) - val downstream = key match { - case collector: CollectorKey => findShardEntries(collector) - case _ => Nil - } - - val dependencies = upstream.flatten ++ downstream - val dependenciesResolved = dependencies forall { case (_, s) => s == ExecutionStatus.Done } - - /** - * We need to make sure that all prerequisiteScopes have been resolved to some entry before going forward. - * If a scope cannot be resolved it may be because it is in a scatter that has not been populated yet, - * therefore there is no entry in the executionStore for this scope. - * If that's the case this prerequisiteScope has not been run yet, hence the (upstream forall {_.nonEmpty}) - */ - (upstream forall { _.nonEmpty }) && dependenciesResolved - } - - private def upstreamEntries(entry: JobKey, prerequisiteScope: Scope): Seq[ExecutionStoreEntry] = { - prerequisiteScope.closestCommonAncestor(entry.scope) match { - /** - * If this entry refers to a Scope which has a common ancestor with prerequisiteScope - * and that common ancestor is a Scatter block, then find the shard with the same index - * as 'entry'. In other words, if you're in the same scatter block as your pre-requisite - * scope, then depend on the shard (with same index). - * - * NOTE: this algorithm was designed for ONE-LEVEL of scattering and probably does not - * work as-is for nested scatter blocks - */ - case Some(ancestor: Scatter) => - executionStore.store filter { - case (k, _) => k.scope == prerequisiteScope && k.index == entry.index - } toSeq - - /** - * Otherwise, simply refer to the entry the collector entry. This means that 'entry' depends - * on every shard of the pre-requisite scope to finish. - */ - case _ => - executionStore.store filter { - case (k, _) => k.scope == prerequisiteScope && k.index.isEmpty - } toSeq - } - } - } - - private implicit class EnhancedOutputStore(val outputStore: OutputStore) extends AnyVal { - /** - * Try to generate output for a collector call, by collecting outputs for all of its shards. - * It's fail-fast on shard output retrieval - */ - def generateCollectorOutput(collector: CollectorKey, - shards: Iterable[BackendJobDescriptorKey]): Try[JobOutputs] = Try { - val shardsOutputs = shards.toSeq sortBy { _.index.fromIndex } map { e => - outputStore.fetchCallOutputEntries(e.scope, e.index) map { - _.outputs - } getOrElse(throw new RuntimeException(s"Could not retrieve output for shard ${e.scope} #${e.index}")) - } - collector.scope.task.outputs map { taskOutput => - val wdlValues = shardsOutputs.map( - _.getOrElse(taskOutput.name, throw new RuntimeException(s"Could not retrieve output ${taskOutput.name}"))) - val arrayOfValues = new WdlArray(WdlArrayType(taskOutput.wdlType), wdlValues) - taskOutput.name -> JobOutput(arrayOfValues) - } toMap - } - } - -} - -final case class WorkflowExecutionActor(workflowId: WorkflowId, - workflowDescriptor: EngineWorkflowDescriptor, - serviceRegistryActor: ActorRef, - jobStoreActor: ActorRef, - callCacheReadActor: ActorRef, - initializationData: AllBackendInitializationData, - restarting: Boolean) - extends LoggingFSM[WorkflowExecutionActorState, WorkflowExecutionActorData] with WorkflowLogging { - - import WorkflowExecutionActor._ - import lenthall.config.ScalaConfig._ - - override def supervisorStrategy = AllForOneStrategy() { - case ex: ActorInitializationException => - context.parent ! WorkflowExecutionFailedResponse(stateData.executionStore, stateData.outputStore, List(ex)) - context.stop(self) - Stop - case t => super.supervisorStrategy.decider.applyOrElse(t, (_: Any) => Escalate) - } - - val tag = s"WorkflowExecutionActor [UUID(${workflowId.shortString})]" - private lazy val DefaultMaxRetriesFallbackValue = 10 - +import scala.util.{Failure, Success, Try} + +case class WorkflowExecutionActor(workflowDescriptor: EngineWorkflowDescriptor, + ioActor: ActorRef, + serviceRegistryActor: ActorRef, + jobStoreActor: ActorRef, + subWorkflowStoreActor: ActorRef, + callCacheReadActor: ActorRef, + callCacheWriteActor: ActorRef, + workflowDockerLookupActor: ActorRef, + jobTokenDispenserActor: ActorRef, + backendSingletonCollection: BackendSingletonCollection, + initializationData: AllBackendInitializationData, + restarting: Boolean) + extends LoggingFSM[WorkflowExecutionActorState, WorkflowExecutionActorData] with WorkflowLogging with CallMetadataHelper with StopAndLogSupervisor { + implicit val ec = context.dispatcher + + override val workflowIdForLogging = workflowDescriptor.id + override val workflowIdForCallMetadata = workflowDescriptor.id - val MaxRetries = ConfigFactory.load().getIntOption("system.max-retries") match { - case Some(value) => value - case None => - workflowLogger.warn(s"Failed to load the max-retries value from the configuration. Defaulting back to a value of '$DefaultMaxRetriesFallbackValue'.") - DefaultMaxRetriesFallbackValue - } + private val tag = s"WorkflowExecutionActor [UUID(${workflowDescriptor.id.shortString})]" + + private var checkRunnableCancellable: Option[Cancellable] = None - private val factories = TryUtil.sequenceMap(workflowDescriptor.backendAssignments.values.toSet[String] map { backendName => + private val backendFactories = TryUtil.sequenceMap(workflowDescriptor.backendAssignments.values.toSet[String] map { backendName => backendName -> CromwellBackends.backendLifecycleFactoryActorByName(backendName) } toMap) recover { case e => throw new RuntimeException("Could not instantiate backend factories", e) } get - // Initialize the StateData with ExecutionStore (all calls as NotStarted) and SymbolStore startWith( WorkflowExecutionPendingState, WorkflowExecutionActorData( workflowDescriptor, - executionStore = buildInitialExecutionStore(), + executionStore = ExecutionStore(workflowDescriptor.backendDescriptor.workflow, workflowDescriptor.knownValues), backendJobExecutionActors = Map.empty, + engineCallExecutionActors = Map.empty, + subWorkflowExecutionActors = Map.empty, + downstreamExecutionMap = Map.empty, outputStore = OutputStore.empty ) ) - private def buildInitialExecutionStore(): ExecutionStore = { - val workflow = workflowDescriptor.backendDescriptor.workflowNamespace.workflow - // Only add direct children to the store, the rest is dynamically created when necessary - val keys = workflow.children map { - case call: Call => BackendJobDescriptorKey(call, None, 1) - case scatter: Scatter => ScatterKey(scatter) - } - - ExecutionStore(keys.map(_ -> NotStarted).toMap) - } - - private def handleNonRetryableFailure(stateData: WorkflowExecutionActorData, failedJobKey: JobKey, reason: Throwable) = { - val mergedStateData = stateData.mergeExecutionDiff(WorkflowExecutionDiff(Map(failedJobKey -> ExecutionStatus.Failed))) - .removeBackendJobExecutionActor(failedJobKey) - - if (workflowDescriptor.getWorkflowOption(WorkflowFailureMode).contains(ContinueWhilePossible.toString)) { - mergedStateData.workflowCompletionStatus match { - case Some(completionStatus) if completionStatus == Failed => - context.parent ! WorkflowExecutionFailedResponse(stateData.executionStore, stateData.outputStore, List(reason)) - goto(WorkflowExecutionFailedState) using mergedStateData - case _ => - stay() using startRunnableScopes(mergedStateData) - } - } else { - context.parent ! WorkflowExecutionFailedResponse(stateData.executionStore, stateData.outputStore, List(reason)) - goto(WorkflowExecutionFailedState) using mergedStateData - } - } - when(WorkflowExecutionPendingState) { - case Event(ExecuteWorkflowCommand, stateData) => - val data = startRunnableScopes(stateData) - goto(WorkflowExecutionInProgressState) using data + case Event(ExecuteWorkflowCommand, _) => + scheduleStartRunnableCalls() + goto(WorkflowExecutionInProgressState) } when(WorkflowExecutionInProgressState) { - case Event(JobRunning(jobDescriptor, backendJobExecutionActor), stateData) => - pushRunningJobMetadata(jobDescriptor) + case Event(CheckRunnable, data) => handleCheckRunnable(data) + + case Event(JobStarting(jobKey), stateData) => + pushStartingCallMetadata(jobKey) stay() using stateData - .addBackendJobExecutionActor(jobDescriptor.key, backendJobExecutionActor) - .mergeExecutionDiff(WorkflowExecutionDiff(Map(jobDescriptor.key -> ExecutionStatus.Running))) - case Event(BackendJobPreparationFailed(jobKey, throwable), stateData) => - pushFailedJobMetadata(jobKey, None, throwable, retryableFailure = false) - context.parent ! WorkflowExecutionFailedResponse(stateData.executionStore, stateData.outputStore, List(throwable)) - goto(WorkflowExecutionFailedState) using stateData.mergeExecutionDiff(WorkflowExecutionDiff(Map(jobKey -> ExecutionStatus.Failed))) - case Event(SucceededResponse(jobKey, returnCode, callOutputs, _, _), stateData) => - pushSuccessfulJobMetadata(jobKey, returnCode, callOutputs) - handleJobSuccessful(jobKey, callOutputs, stateData) - case Event(FailedNonRetryableResponse(jobKey, reason, returnCode), stateData) => - pushFailedJobMetadata(jobKey, returnCode, reason, retryableFailure = false) - handleNonRetryableFailure(stateData, jobKey, reason) - case Event(FailedRetryableResponse(jobKey, reason, returnCode), stateData) => - workflowLogger.warn(s"Job ${jobKey.tag} failed with a retryable failure: ${reason.getMessage}") - pushFailedJobMetadata(jobKey, None, reason, retryableFailure = true) - handleRetryableFailure(jobKey, reason, returnCode) - case Event(JobInitializationFailed(jobKey, reason), stateData) => - pushFailedJobMetadata(jobKey, None, reason, retryableFailure = false) - handleNonRetryableFailure(stateData, jobKey, reason) + .mergeExecutionDiff(WorkflowExecutionDiff(Map(jobKey -> ExecutionStatus.Starting))) + case Event(JobRunning(key, inputs, callExecutionActor), stateData) => + pushRunningCallMetadata(key, inputs) + stay() using stateData + .addCallExecutionActor(key, callExecutionActor) + .mergeExecutionDiff(WorkflowExecutionDiff(Map(key -> ExecutionStatus.Running))) + + //Success + // Job + case Event(r: JobSucceededResponse, stateData) => + pushSuccessfulCallMetadata(r.jobKey, r.returnCode, r.jobOutputs) + handleCallSuccessful(r.jobKey, r.jobOutputs, stateData, Map.empty) + // Sub Workflow + case Event(SubWorkflowSucceededResponse(jobKey, descendantJobKeys, callOutputs), stateData) => + pushSuccessfulCallMetadata(jobKey, None, callOutputs) + handleCallSuccessful(jobKey, callOutputs, stateData, descendantJobKeys) + // Scatter case Event(ScatterCollectionSucceededResponse(jobKey, callOutputs), stateData) => - handleJobSuccessful(jobKey, callOutputs, stateData) + handleCallSuccessful(jobKey, callOutputs, stateData, Map.empty) + // Declaration + case Event(DeclarationEvaluationSucceededResponse(jobKey, callOutputs), stateData) => + handleDeclarationEvaluationSuccessful(jobKey, callOutputs, stateData) + // Conditional + case Event(BypassedCallResults(callOutputs), stateData) => + handleCallBypassed(callOutputs, stateData) + case Event(BypassedDeclaration(declKey), stateData) => + handleDeclarationEvaluationSuccessful(declKey, WdlOptionalValue.none(declKey.scope.wdlType), stateData) + + // Failure + // Initialization + case Event(JobInitializationFailed(jobKey, reason), stateData) => + pushFailedCallMetadata(jobKey, None, reason, retryableFailure = false) + handleNonRetryableFailure(stateData, jobKey, reason, Map.empty) + // Job Non Retryable + case Event(JobFailedNonRetryableResponse(jobKey, reason, returnCode), stateData) => + pushFailedCallMetadata(jobKey, returnCode, reason, retryableFailure = false) + handleNonRetryableFailure(stateData, jobKey, reason, Map.empty) + // Aborted? But we're outside of the AbortingState!?? Could happen if + // - The job was aborted by something external to Cromwell + // - The job lasted too long (eg JES 6 day timeout) + // - We've reconnected to an aborting job (some sort of shutdown race condition?) + // Treat it like any other non-retryable failure: + case Event(AbortedResponse(jobKey), stateData) => + val cause = new Exception("The job was aborted from outside Cromwell") + pushFailedCallMetadata(jobKey, None, cause, retryableFailure = false) + handleNonRetryableFailure(stateData, jobKey, cause, Map.empty) + // Job Retryable + case Event(JobFailedRetryableResponse(jobKey, reason, returnCode), _) => + pushFailedCallMetadata(jobKey, None, reason, retryableFailure = true) + handleRetryableFailure(jobKey, reason, returnCode) + // Sub Workflow - sub workflow failures are always non retryable + case Event(SubWorkflowFailedResponse(jobKey, descendantJobKeys, reason), stateData) => + pushFailedCallMetadata(jobKey, None, reason, retryableFailure = false) + handleNonRetryableFailure(stateData, jobKey, reason, descendantJobKeys) + case Event(DeclarationEvaluationFailedResponse(jobKey, reason), stateData) => + handleDeclarationEvaluationFailure(jobKey, reason, stateData) } + when(WorkflowExecutionAbortingState) { + case Event(AbortedResponse(jobKey), stateData) => + pushAbortedCallMetadata(jobKey) + handleCallAborted(stateData, jobKey, Map.empty) + case Event(SubWorkflowAbortedResponse(jobKey, executedKeys), stateData) => + pushAbortedCallMetadata(jobKey) + handleCallAborted(stateData, jobKey, executedKeys) + case Event(SubWorkflowSucceededResponse(subKey, executedKeys, _), stateData) => + pushAbortedCallMetadata(subKey) + handleCallAborted(stateData, subKey, executedKeys) + case Event(r: JobSucceededResponse, stateData) => + pushAbortedCallMetadata(r.jobKey) + handleCallAborted(stateData, r.jobKey, Map.empty) + } + when(WorkflowExecutionSuccessfulState) { FSM.NullFunction } when(WorkflowExecutionFailedState) { - alreadyFailedMopUp + FSM.NullFunction } when(WorkflowExecutionAbortedState) { - alreadyFailedMopUp + FSM.NullFunction } - /** - * Mop up function to handle a set of incoming results if this workflow has already failed: - */ - private def alreadyFailedMopUp: StateFunction = { - case Event(JobInitializationFailed(jobKey, reason), stateData) => - pushFailedJobMetadata(jobKey, None, reason, retryableFailure = false) - stay - case Event(FailedNonRetryableResponse(jobKey, reason, returnCode), stateData) => - pushFailedJobMetadata(jobKey, returnCode, reason, retryableFailure = false) - stay - case Event(FailedRetryableResponse(jobKey, reason, returnCode), stateData) => - pushFailedJobMetadata(jobKey, returnCode, reason, retryableFailure = true) - stay - case Event(SucceededResponse(jobKey, returnCode, callOutputs, _, _), stateData) => - pushSuccessfulJobMetadata(jobKey, returnCode, callOutputs) - stay + private def scheduleStartRunnableCalls() = { + checkRunnableCancellable = Option(context.system.scheduler.scheduleOnce(SweepInterval, self, CheckRunnable)) + } + + override def postStop() = { + checkRunnableCancellable foreach { _.cancel() } + super.postStop() } - when(WorkflowExecutionAbortingState) { - case Event(AbortedResponse(jobKey), stateData) => - workflowLogger.info(s"$tag job aborted: ${jobKey.tag}") - val newStateData = stateData.removeBackendJobExecutionActor(jobKey) - if (newStateData.backendJobExecutionActors.isEmpty) { - workflowLogger.info(s"$tag all jobs aborted") - goto(WorkflowExecutionAbortedState) - } else { - stay() using newStateData + def handleTerminated(actorRef: ActorRef) = { + // Both of these Should Never Happen (tm), assuming the state data is set correctly on EJEA creation. + // If they do, it's a big programmer error and the workflow execution fails. + val jobKey = stateData.engineCallExecutionActors.getOrElse(actorRef, throw new RuntimeException("Programmer Error: An EJEA has terminated but was not assigned a jobKey")) + val jobStatus = stateData.executionStore.jobStatus(jobKey).getOrElse(throw new RuntimeException("Programmer Error: An EJEA representing a jobKey which this workflow is not running has sent up a terminated message.")) + + if (!jobStatus.isTerminalOrRetryable) { + val terminationException = getFailureCause(actorRef) match { + case Some(e) => new RuntimeException("Unexpected failure (or early exit) in EJEA.", e) + case None => new RuntimeException(s"Unexpected failure (or early exit) in EJEA $actorRef (root cause not captured).") } + self ! JobFailedNonRetryableResponse(jobKey, terminationException, None) + } + + stay } whenUnhandled { - case Event(MetadataPutFailed(action, error), _) => - // Do something useful here?? - workflowLogger.warn(s"$tag Put failed for Metadata action $action : ${error.getMessage}") - stay - case Event(MetadataPutAcknowledgement(_), _) => stay() + case Event(CheckRunnable, _) => stay() + case Event(Terminated(actorRef), stateData) => handleTerminated(actorRef) using stateData.removeEngineJobExecutionActor(actorRef) case Event(EngineLifecycleActorAbortCommand, stateData) => - if (stateData.backendJobExecutionActors.nonEmpty) { - log.info(s"$tag: Abort received. Aborting ${stateData.backendJobExecutionActors.size} EJEAs") - stateData.backendJobExecutionActors.values foreach {_ ! AbortJobCommand} + if (stateData.hasRunningActors) { + log.info(s"$tag: Abort received. " + + s"Aborting ${stateData.backendJobExecutionActors.size} Job Execution Actors" + + s" and ${stateData.subWorkflowExecutionActors.size} Sub Workflow Execution Actors" + ) + stateData.backendJobExecutionActors.values foreach { _ ! AbortJobCommand } + stateData.subWorkflowExecutionActors.values foreach { _ ! EngineLifecycleActorAbortCommand } goto(WorkflowExecutionAbortingState) } else { goto(WorkflowExecutionAbortedState) } case Event(EngineStatsActor.JobCountQuery, data) => sender ! EngineStatsActor.JobCount(data.backendJobExecutionActors.size) + data.subWorkflowExecutionActors.values foreach { _ forward EngineStatsActor.JobCountQuery } stay() case unhandledMessage => workflowLogger.warn(s"$tag received an unhandled message: ${unhandledMessage.event} in state: $stateName") - stay + stay() } onTransition { @@ -413,164 +226,237 @@ final case class WorkflowExecutionActor(workflowId: WorkflowId, } onTransition { - case _ -> WorkflowExecutionSuccessfulState => - pushWorkflowOutputMetadata(nextStateData) - context.parent ! WorkflowExecutionSucceededResponse(nextStateData.executionStore, nextStateData.outputStore) case _ -> WorkflowExecutionAbortedState => - context.parent ! WorkflowExecutionAbortedResponse(nextStateData.executionStore, nextStateData.outputStore) - } - - private def handleRetryableFailure(jobKey: BackendJobDescriptorKey, reason: Throwable, returnCode: Option[Int]) = { - // We start with index 1 for #attempts, hence invariant breaks only if jobKey.attempt > MaxRetries - if (jobKey.attempt <= MaxRetries) { - val newJobKey = jobKey.copy(attempt = jobKey.attempt + 1) - workflowLogger.info(s"Retrying job execution for ${newJobKey.tag}") - /** Currently, we update the status of the old key to Preempted, and add a new entry (with the #attempts incremented by 1) - * to the execution store with status as NotStarted. This allows startRunnableCalls to re-execute this job */ - val executionDiff = WorkflowExecutionDiff(Map(jobKey -> ExecutionStatus.Preempted, newJobKey -> ExecutionStatus.NotStarted)) - val newData = stateData.mergeExecutionDiff(executionDiff) - stay() using startRunnableScopes(newData) + context.parent ! WorkflowExecutionAbortedResponse(nextStateData.jobExecutionMap) + } + + private def handleNonRetryableFailure(stateData: WorkflowExecutionActorData, failedJobKey: JobKey, reason: Throwable, jobExecutionMap: JobExecutionMap) = { + val newData = stateData + .removeCallExecutionActor(failedJobKey) + .addExecutions(jobExecutionMap) + + handleExecutionFailure(failedJobKey, newData, reason, jobExecutionMap) + } + + private def handleDeclarationEvaluationFailure(declarationKey: DeclarationKey, reason: Throwable, stateData: WorkflowExecutionActorData) = { + handleExecutionFailure(declarationKey, stateData, reason, Map.empty) + } + + private def handleExecutionFailure(failedJobKey: JobKey, data: WorkflowExecutionActorData, reason: Throwable, jobExecutionMap: JobExecutionMap) = { + val newData = data.executionFailed(failedJobKey) + + if (workflowDescriptor.failureMode == ContinueWhilePossible) { + newData.workflowCompletionStatus match { + case Some(completionStatus) if completionStatus == Failed => + context.parent ! WorkflowExecutionFailedResponse(newData.jobExecutionMap, reason) + goto(WorkflowExecutionFailedState) using newData + case _ => + stay() using newData + } } else { - workflowLogger.warn(s"Exhausted maximum number of retries for job ${jobKey.tag}. Failing.") - goto(WorkflowExecutionFailedState) using stateData.mergeExecutionDiff(WorkflowExecutionDiff(Map(jobKey -> ExecutionStatus.Failed))) + context.parent ! WorkflowExecutionFailedResponse(newData.jobExecutionMap, reason) + goto(WorkflowExecutionFailedState) using newData } } - - private def handleJobSuccessful(jobKey: JobKey, outputs: JobOutputs, data: WorkflowExecutionActorData) = { - workflowLogger.debug(s"Job ${jobKey.tag} succeeded!") - val newData = data.jobExecutionSuccess(jobKey, outputs) - - newData.workflowCompletionStatus match { - case Some(ExecutionStatus.Done) => - workflowLogger.info(newData.outputsJson()) - goto(WorkflowExecutionSuccessfulState) using newData - case Some(sts) => - context.parent ! WorkflowExecutionFailedResponse(stateData.executionStore, stateData.outputStore, List(new Exception("One or more jobs failed in fail-slow mode"))) - goto(WorkflowExecutionFailedState) using newData - case _ => - stay() using startRunnableScopes(newData) - } + + private def handleWorkflowSuccessful(data: WorkflowExecutionActorData) = { + import WorkflowExecutionActor.EnhancedWorkflowOutputs + import cromwell.util.JsonFormatting.WdlValueJsonFormatter._ + import spray.json._ + + case class ResponseAndFinalState(response: WorkflowExecutionActorResponse, + finalState: WorkflowExecutionActorTerminalState) + + val responseAndState = workflowDescriptor.workflow.evaluateOutputs( + workflowDescriptor.knownValues, + data.expressionLanguageFunctions, + data.outputStore.fetchNodeOutputEntries + ) map { workflowOutputs => + // For logging and metadata + val workflowScopeOutputs = workflowOutputs map { + case (output, value) => output.locallyQualifiedName(workflowDescriptor.workflow) -> value + } + workflowLogger.info( + s"""Workflow ${workflowDescriptor.workflow.unqualifiedName} complete. Final Outputs: + |${workflowScopeOutputs.stripLarge.toJson.prettyPrint}""".stripMargin + ) + pushWorkflowOutputMetadata(workflowScopeOutputs) + + // For cromwell internal storage of outputs + val unqualifiedWorkflowOutputs = workflowOutputs map { + // JobOutput is poorly named here - a WorkflowOutput type would be better + case (output, value) => output.unqualifiedName -> JobOutput(value) + } + ResponseAndFinalState( + WorkflowExecutionSucceededResponse(data.jobExecutionMap, unqualifiedWorkflowOutputs), + WorkflowExecutionSuccessfulState) + } recover { + case ex => + ResponseAndFinalState(WorkflowExecutionFailedResponse(data.jobExecutionMap, ex), WorkflowExecutionFailedState) + } get + + context.parent ! responseAndState.response + goto(responseAndState.finalState) using data } - private def pushWorkflowOutputMetadata(data: WorkflowExecutionActorData) = { - val reportableOutputs = workflowDescriptor.backendDescriptor.workflowNamespace.workflow.outputs - val keyValues = data.outputStore.store filterKeys { - _.index.isEmpty - } flatMap { - case (key, value) => - value collect { - case entry if isReportableOutput(key.call, entry, reportableOutputs) => - s"${key.call.fullyQualifiedName}.${entry.name}" -> entry.wdlValue - } - } collect { - case (key, Some(wdlValue)) => (key, wdlValue) - } - - val events = keyValues match { - case empty if empty.isEmpty => List(MetadataEvent.empty(MetadataKey(workflowId, None, WorkflowMetadataKeys.Outputs))) - case _ => keyValues flatMap { - case (outputName, outputValue) => - wdlValueToMetadataEvents(MetadataKey(workflowId, None, s"${WorkflowMetadataKeys.Outputs}:$outputName"), outputValue) - } - } - - serviceRegistryActor ! PutMetadataAction(events) + private def handleRetryableFailure(jobKey: BackendJobDescriptorKey, reason: Throwable, returnCode: Option[Int]) = { + val newJobKey = jobKey.copy(attempt = jobKey.attempt + 1) + workflowLogger.info(s"Retrying job execution for ${newJobKey.tag}") + /* Currently, we update the status of the old key to RetryableFailure, and add a new entry (with the #attempts incremented by 1) + * to the execution store with status as NotStarted. This allows startRunnableCalls to re-execute this job */ + val executionDiff = WorkflowExecutionDiff(Map(jobKey -> ExecutionStatus.RetryableFailure, newJobKey -> ExecutionStatus.NotStarted)) + val newData = stateData.mergeExecutionDiff(executionDiff).removeCallExecutionActor(jobKey) + stay() using newData } - private def isReportableOutput(scope: Scope, entry: OutputEntry, - reportableOutputs: Seq[ReportableSymbol]): Boolean = { - reportableOutputs exists { reportableOutput => - reportableOutput.fullyQualifiedName == s"${scope.fullyQualifiedName}.${entry.name}" - } + private def handleCallSuccessful(jobKey: JobKey, outputs: CallOutputs, data: WorkflowExecutionActorData, jobExecutionMap: JobExecutionMap) = { + stay() using data.callExecutionSuccess(jobKey, outputs).addExecutions(jobExecutionMap) } - - private def pushSuccessfulJobMetadata(jobKey: JobKey, returnCode: Option[Int], outputs: JobOutputs) = { - val completionEvents = completedJobMetadataEvents(jobKey, ExecutionStatus.Done, returnCode) - - val outputEvents = outputs match { - case empty if empty.isEmpty => - List(MetadataEvent.empty(metadataKey(jobKey, s"${CallMetadataKeys.Outputs}"))) - case _ => - outputs flatMap { case (lqn, value) => wdlValueToMetadataEvents(metadataKey(jobKey, s"${CallMetadataKeys.Outputs}:$lqn"), value.wdlValue) } - } - - serviceRegistryActor ! PutMetadataAction(completionEvents ++ outputEvents) + + private def handleDeclarationEvaluationSuccessful(key: DeclarationKey, value: WdlValue, data: WorkflowExecutionActorData) = { + stay() using data.declarationEvaluationSuccess(key, value) } - private def pushFailedJobMetadata(jobKey: JobKey, returnCode: Option[Int], failure: Throwable, retryableFailure: Boolean) = { - val failedState = if (retryableFailure) ExecutionStatus.Preempted else ExecutionStatus.Failed - val completionEvents = completedJobMetadataEvents(jobKey, failedState, returnCode) - val retryableFailureEvent = MetadataEvent(metadataKey(jobKey, CallMetadataKeys.RetryableFailure), MetadataValue(retryableFailure)) - val failureEvents = throwableToMetadataEvents(metadataKey(jobKey, s"${CallMetadataKeys.Failures}[$randomNumberString]"), failure).+:(retryableFailureEvent) + private def handleCallBypassed(callOutputs: Map[CallKey, CallOutputs], data: WorkflowExecutionActorData) = { + def foldFunc(d: WorkflowExecutionActorData, output: (CallKey, CallOutputs)) = d.callExecutionSuccess(output._1, output._2) - serviceRegistryActor ! PutMetadataAction(completionEvents ++ failureEvents) + val updatedData = callOutputs.foldLeft(data)(foldFunc) + stay() using updatedData } - private def randomNumberString: String = Random.nextInt.toString.stripPrefix("-") - - private def completedJobMetadataEvents(jobKey: JobKey, executionStatus: ExecutionStatus, returnCode: Option[Int]) = { - val returnCodeEvent = returnCode map { rc => - List(MetadataEvent(metadataKey(jobKey, CallMetadataKeys.ReturnCode), MetadataValue(rc))) + private def handleCheckRunnable(data: WorkflowExecutionActorData) = { + data.workflowCompletionStatus match { + case Some(ExecutionStatus.Done) => + handleWorkflowSuccessful(data) + case Some(_) => + context.parent ! WorkflowExecutionFailedResponse(data.jobExecutionMap, new Exception("One or more jobs failed in ContinueWhilePossible mode")) + goto(WorkflowExecutionFailedState) using data + case _ => + scheduleStartRunnableCalls() + if (data.hasNewRunnables) stay() using startRunnableScopes(data) else stay() + } + } + + private def handleCallAborted(data: WorkflowExecutionActorData, jobKey: JobKey, jobExecutionMap: JobExecutionMap) = { + workflowLogger.info(s"$tag job aborted: ${jobKey.tag}") + val newStateData = data + .mergeExecutionDiff(WorkflowExecutionDiff(Map(jobKey -> ExecutionStatus.Aborted))) + .removeCallExecutionActor(jobKey) + .addExecutions(jobExecutionMap) + if (!newStateData.hasRunningActors) { + workflowLogger.info(s"$tag all jobs aborted") + goto(WorkflowExecutionAbortedState) + } else { + stay() using newStateData } - - List( - MetadataEvent(metadataKey(jobKey, CallMetadataKeys.ExecutionStatus), MetadataValue(executionStatus)), - MetadataEvent(metadataKey(jobKey, CallMetadataKeys.End), MetadataValue(OffsetDateTime.now)) - ) ++ returnCodeEvent.getOrElse(List.empty) } /** * Attempt to start all runnable jobs and return updated state data. This will create a new copy - * of the state data including new pending persists. + * of the state data. */ - @tailrec private def startRunnableScopes(data: WorkflowExecutionActorData): WorkflowExecutionActorData = { - val runnableScopes = data.executionStore.runnableScopes - val runnableCalls = runnableScopes.view collect { case k if k.scope.isInstanceOf[Call] => k } sortBy { k => + val RunnableScopes(runnableScopes, truncated) = data.executionStore.runnableScopes + val runnableCalls = runnableScopes.view collect { case k if k.scope.isInstanceOf[WdlCall] => k } sortBy { k => (k.scope.fullyQualifiedName, k.index.getOrElse(-1)) } map { _.tag } + if (runnableCalls.nonEmpty) workflowLogger.info("Starting calls: " + runnableCalls.mkString(", ")) // Each process returns a Try[WorkflowExecutionDiff], which, upon success, contains potential changes to be made to the execution store. - val executionDiffs = runnableScopes map { - case k: BackendJobDescriptorKey => processRunnableJob(k, data) - case k: ScatterKey => processRunnableScatter(k, data) - case k: CollectorKey => processRunnableCollector(k, data) - case k => - val exception = new UnsupportedOperationException(s"Unknown entry in execution store: ${k.tag}") - self ! JobInitializationFailed(k, exception) - Failure(exception) + val diffs = runnableScopes map { scope => + scope -> Try(scope match { + case k: CallKey if isInBypassedScope(k, data) => processBypassedScope(k, data) + case k: DeclarationKey if isInBypassedScope(k, data) => processBypassedScope(k, data) + case k: BackendJobDescriptorKey => processRunnableJob(k, data) + case k: ScatterKey => processRunnableScatter(k, data, isInBypassedScope(k, data)) + case k: ConditionalKey => processRunnableConditional(k, data) + case k: CollectorKey => processRunnableCollector(k, data, isInBypassedScope(k, data)) + case k: SubWorkflowKey => processRunnableSubWorkflow(k, data) + case k: StaticDeclarationKey => processRunnableStaticDeclaration(k) + case k: DynamicDeclarationKey => processRunnableDynamicDeclaration(k, data) + case k => Failure(new UnsupportedOperationException(s"Unknown entry in execution store: ${k.tag}")) + }).flatten + } map { + case (_, Success(value)) => Success(value) + case (scope, Failure(throwable)) => + self ! JobInitializationFailed(scope, throwable) + Failure(throwable) + } collect { + /* + NOTE: This is filtering out all errors and only returning the successes, but only after the map above sent a + message that something is wrong. + + We used to throw an aggregation exception of all the collected errors, but _nothing_ in cromwell is actually + expecting that. Thus the workflows were being left in a Running state, jobs were left dispatched, etc. + + Meanwhile this actor and its children died or restarted, and one couldn't even attempt to abort the other jobs. + + Now, in the previous map, we send a message to ourselves about _every_ failure. But this method does not attempt + to further process the errors. The failures enqueue in the actor mailbox, and are handled by this actor's receive. + + At the moment, there is an issue in how this actor handles failure messages. That issue is tracked in: + https://github.com/broadinstitute/cromwell/issues/2029 + + Separately, we may also want to institute better supervision of actors, in general. But just throwing an exception + here doesn't actually force the correct handling. + + See also: + https://github.com/broadinstitute/cromwell/issues/1414 + https://github.com/broadinstitute/cromwell/issues/1874 + */ + case Success(value) => value } - TryUtil.sequence(executionDiffs) match { - case Success(diffs) if diffs.exists(_.containsNewEntry) => startRunnableScopes(data.mergeExecutionDiffs(diffs)) - case Success(diffs) => data.mergeExecutionDiffs(diffs) - case Failure(e) => data + // Update the metadata for the jobs we just sent to EJEAs (they'll start off queued up waiting for tokens): + pushQueuedCallMetadata(diffs) + val newData = data.mergeExecutionDiffs(diffs) + if (truncated || diffs.exists(_.containsNewEntry)) newData else newData.resetCheckRunnable + } + + private def isInBypassedScope(jobKey: JobKey, data: WorkflowExecutionActorData) = { + val result = jobKey.scope.ancestry.exists { + case i: If => data.executionStore.isBypassedConditional(jobKey, i) + case _ => false } + result } - private def pushNewJobMetadata(jobKey: BackendJobDescriptorKey, backendName: String) = { - val startEvents = List( - MetadataEvent(metadataKey(jobKey, CallMetadataKeys.Start), MetadataValue(OffsetDateTime.now)), - MetadataEvent(metadataKey(jobKey, CallMetadataKeys.Backend), MetadataValue(backendName)) - ) + def processBypassedScope(jobKey: JobKey, data: WorkflowExecutionActorData): Try[WorkflowExecutionDiff] = { + self ! bypassedScopeResults(jobKey) + Success(WorkflowExecutionDiff(Map(jobKey -> ExecutionStatus.Running))) + } - serviceRegistryActor ! PutMetadataAction(startEvents) + def bypassedScopeResults(jobKey: JobKey): BypassedScopeResults = jobKey match { + case callKey: CallKey => BypassedCallResults( + Map(callKey -> (callKey.scope.outputs map { callOutput => callOutput.unqualifiedName -> JobOutput(WdlOptionalValue.none(callOutput.wdlType)) } toMap))) + case declKey: DeclarationKey => BypassedDeclaration(declKey) + case _ => throw new RuntimeException("Only calls and declarations might generate results when Bypassed") } - private def pushRunningJobMetadata(jobDescriptor: BackendJobDescriptor) = { - val inputEvents = jobDescriptor.inputs match { - case empty if empty.isEmpty => - List(MetadataEvent.empty(metadataKey(jobDescriptor.key, s"${CallMetadataKeys.Inputs}"))) - case inputs => - inputs flatMap { - case (inputName, inputValue) => - wdlValueToMetadataEvents(metadataKey(jobDescriptor.key, s"${CallMetadataKeys.Inputs}:$inputName"), inputValue) - } - } + def processRunnableStaticDeclaration(declaration: StaticDeclarationKey) = { + self ! DeclarationEvaluationSucceededResponse(declaration, declaration.value) + Success(WorkflowExecutionDiff(Map(declaration -> ExecutionStatus.Running))) + } + + def processRunnableDynamicDeclaration(declaration: DynamicDeclarationKey, data: WorkflowExecutionActorData) = { + val scatterMap = declaration.index flatMap { i => + // Will need update for nested scatters + declaration.scope.ancestry collectFirst { case s: Scatter => Map(s -> i) } + } getOrElse Map.empty[Scatter, Int] - val runningEvent = List(MetadataEvent(metadataKey(jobDescriptor.key, CallMetadataKeys.ExecutionStatus), MetadataValue(ExecutionStatus.Running))) + val lookup = declaration.scope.lookupFunction( + workflowDescriptor.knownValues, + data.expressionLanguageFunctions, + data.outputStore.fetchNodeOutputEntries, + scatterMap + ) + + declaration.evaluate(lookup, data.expressionLanguageFunctions) match { + case Success(result) => self ! DeclarationEvaluationSucceededResponse(declaration, result) + case Failure(ex) => self ! DeclarationEvaluationFailedResponse(declaration, ex) + } - serviceRegistryActor ! PutMetadataAction(runningEvent ++ inputEvents) + Success(WorkflowExecutionDiff(Map(declaration -> ExecutionStatus.Running))) } private def processRunnableJob(jobKey: BackendJobDescriptorKey, data: WorkflowExecutionActorData): Try[WorkflowExecutionDiff] = { @@ -581,39 +467,348 @@ final case class WorkflowExecutionActor(workflowId: WorkflowId, workflowLogger.error(exception, s"$tag $message") throw exception case Some(backendName) => - factories.get(backendName) match { + backendFactories.get(backendName) match { case Some(factory) => val ejeaName = s"${workflowDescriptor.id}-EngineJobExecutionActor-${jobKey.tag}" + val backendSingleton = backendSingletonCollection.backendSingletonActors(backendName) val ejeaProps = EngineJobExecutionActor.props( - self, jobKey, data, factory, initializationData.get(backendName), restarting, serviceRegistryActor, - jobStoreActor, callCacheReadActor, backendName, workflowDescriptor.callCachingMode) + self, jobKey, data, factory, initializationData.get(backendName), restarting, + serviceRegistryActor = serviceRegistryActor, + ioActor = ioActor, + jobStoreActor = jobStoreActor, + callCacheReadActor = callCacheReadActor, + callCacheWriteActor = callCacheWriteActor, + workflowDockerLookupActor = workflowDockerLookupActor, + jobTokenDispenserActor = jobTokenDispenserActor, + backendSingleton, backendName, workflowDescriptor.callCachingMode) val ejeaRef = context.actorOf(ejeaProps, ejeaName) - pushNewJobMetadata(jobKey, backendName) + context watch ejeaRef + pushNewCallMetadata(jobKey, Option(backendName)) ejeaRef ! EngineJobExecutionActor.Execute - Success(WorkflowExecutionDiff(Map(jobKey -> ExecutionStatus.Starting))) + Success(WorkflowExecutionDiff( + executionStoreChanges = Map(jobKey -> ExecutionStatus.QueuedInCromwell), + engineJobExecutionActorAdditions = Map(ejeaRef -> jobKey))) case None => - throw WorkflowExecutionException(new Exception(s"Could not get BackendLifecycleActor for backend $backendName").wrapNel) + throw WorkflowExecutionException(NonEmptyList.of(new Exception(s"Could not get BackendLifecycleActor for backend $backendName"))) } } } + + private def processRunnableSubWorkflow(key: SubWorkflowKey, data: WorkflowExecutionActorData): Try[WorkflowExecutionDiff] = { + val sweaRef = context.actorOf( + SubWorkflowExecutionActor.props(key, data, backendFactories, + ioActor = ioActor, + serviceRegistryActor = serviceRegistryActor, + jobStoreActor = jobStoreActor, + subWorkflowStoreActor = subWorkflowStoreActor, + callCacheReadActor = callCacheReadActor, + callCacheWriteActor = callCacheWriteActor, + workflowDockerLookupActor = workflowDockerLookupActor, + jobTokenDispenserActor = jobTokenDispenserActor, + backendSingletonCollection, initializationData, restarting), s"SubWorkflowExecutionActor-${key.tag}" + ) - private def processRunnableScatter(scatterKey: ScatterKey, data: WorkflowExecutionActorData): Try[WorkflowExecutionDiff] = { - val lookup = data.hierarchicalLookup(scatterKey.scope, None) _ + context watch sweaRef + pushNewCallMetadata(key, None) + sweaRef ! SubWorkflowExecutionActor.Execute + + Success(WorkflowExecutionDiff(executionStoreChanges = Map(key -> ExecutionStatus.QueuedInCromwell), + engineJobExecutionActorAdditions = Map(sweaRef -> key))) + } + + private def processRunnableConditional(conditionalKey: ConditionalKey, data: WorkflowExecutionActorData): Try[WorkflowExecutionDiff] = { + val scatterMap = conditionalKey.index flatMap { i => + // Will need update for nested scatters + conditionalKey.scope.ancestry collectFirst { case s: Scatter => Map(s -> i) } + } getOrElse Map.empty[Scatter, Int] + + val lookup = conditionalKey.scope.lookupFunction( + workflowDescriptor.knownValues, + data.expressionLanguageFunctions, + data.outputStore.fetchNodeOutputEntries, + scatterMap + ) - scatterKey.scope.collection.evaluate(lookup, data.expressionLanguageFunctions) map { - case a: WdlArray => WorkflowExecutionDiff(scatterKey.populate(a.value.size) + (scatterKey -> ExecutionStatus.Done)) - case v: WdlValue => throw new RuntimeException("Scatter collection must evaluate to an array") + conditionalKey.scope.condition.evaluate(lookup, data.expressionLanguageFunctions) map { + case b: WdlBoolean => + val conditionalStatus = if (b.value) ExecutionStatus.Done else ExecutionStatus.Bypassed + val result = WorkflowExecutionDiff(conditionalKey.populate(workflowDescriptor.knownValues) + (conditionalKey -> conditionalStatus)) + result + case v: WdlValue => throw new RuntimeException( + s"'if' condition must evaluate to a boolean but instead got ${v.wdlType.toWdlString}") } } - private def processRunnableCollector(collector: CollectorKey, data: WorkflowExecutionActorData): Try[WorkflowExecutionDiff] = { - val shards = data.executionStore.findShardEntries(collector) collect { case (k: BackendJobDescriptorKey, v) if v == ExecutionStatus.Done => k } + private def processRunnableScatter(scatterKey: ScatterKey, data: WorkflowExecutionActorData, bypassed: Boolean): Try[WorkflowExecutionDiff] = { + val lookup = scatterKey.scope.lookupFunction( + workflowDescriptor.knownValues, + data.expressionLanguageFunctions, + data.outputStore.fetchNodeOutputEntries + ) + + if (bypassed) { + Success(WorkflowExecutionDiff(scatterKey.populate(0, Map.empty) + (scatterKey -> ExecutionStatus.Bypassed))) + } else { + scatterKey.scope.collection.evaluate(lookup, data.expressionLanguageFunctions) map { + case WdlArrayLike(a) => + WorkflowExecutionDiff(scatterKey.populate(a.value.size, workflowDescriptor.knownValues) + (scatterKey -> ExecutionStatus.Done)) + case v: WdlValue => throw new RuntimeException( + s"Scatter collection must evaluate to an array but instead got ${v.wdlType.toWdlString}") + } + } + } + + private def processRunnableCollector(collector: CollectorKey, data: WorkflowExecutionActorData, isInBypassed: Boolean): Try[WorkflowExecutionDiff] = { + + val shards = data.executionStore.findCompletedShardsForOutput(collector) + data.outputStore.generateCollectorOutput(collector, shards) match { - case Failure(e) => Failure(new RuntimeException(s"Failed to collect output shards for call ${collector.tag}")) - case Success(outputs) => self ! ScatterCollectionSucceededResponse(collector, outputs) + case Failure(e) => Failure(new RuntimeException(s"Failed to collect output shards for call ${collector.tag}", e)) + case Success(outputs) => + val adjustedOutputs: CallOutputs = if (isInBypassed) { + outputs map { output => (output._1, JobOutput(WdlOptionalValue.none(output._2.wdlValue.wdlType) )) } + } else outputs + self ! ScatterCollectionSucceededResponse(collector, adjustedOutputs) Success(WorkflowExecutionDiff(Map(collector -> ExecutionStatus.Starting))) } } +} + +object WorkflowExecutionActor { + + val SweepInterval = 1 second + + /** + * States + */ + sealed trait WorkflowExecutionActorState { + def terminal = false + } + + sealed trait WorkflowExecutionActorTerminalState extends WorkflowExecutionActorState { + override val terminal = true + } + + case object WorkflowExecutionPendingState extends WorkflowExecutionActorState + + case object WorkflowExecutionInProgressState extends WorkflowExecutionActorState + + case object WorkflowExecutionAbortingState extends WorkflowExecutionActorState + + case object WorkflowExecutionSuccessfulState extends WorkflowExecutionActorTerminalState + + case object WorkflowExecutionFailedState extends WorkflowExecutionActorTerminalState + + case object WorkflowExecutionAbortedState extends WorkflowExecutionActorTerminalState + + /** + * Commands + */ + sealed trait WorkflowExecutionActorCommand + + case object ExecuteWorkflowCommand extends WorkflowExecutionActorCommand + + /** + * Responses + */ + sealed trait WorkflowExecutionActorResponse { + def jobExecutionMap: JobExecutionMap + } + + case class WorkflowExecutionSucceededResponse(jobExecutionMap: JobExecutionMap, outputs: CallOutputs) + extends WorkflowExecutionActorResponse { + override def toString = "WorkflowExecutionSucceededResponse" + } + + case class WorkflowExecutionAbortedResponse(jobExecutionMap: JobExecutionMap) + extends WorkflowExecutionActorResponse with EngineLifecycleActorAbortedResponse { + override def toString = "WorkflowExecutionAbortedResponse" + } + + final case class WorkflowExecutionFailedResponse(jobExecutionMap: JobExecutionMap, reason: Throwable) extends WorkflowExecutionActorResponse { + override def toString = "WorkflowExecutionFailedResponse" + } - private def metadataKey(jobKey: JobKey, myKey: String) = MetadataKey(workflowDescriptor.id, Option(MetadataJobKey(jobKey.scope.fullyQualifiedName, jobKey.index, jobKey.attempt)), myKey) + /** + * Internal control flow messages + */ + private case class JobInitializationFailed(jobKey: JobKey, throwable: Throwable) + + private case class ScatterCollectionFailedResponse(collectorKey: CollectorKey, throwable: Throwable) + + private case class ScatterCollectionSucceededResponse(collectorKey: CollectorKey, outputs: CallOutputs) + + private case class DeclarationEvaluationSucceededResponse(declarationKey: DeclarationKey, value: WdlValue) + + private case object CheckRunnable + + private[execution] sealed trait BypassedScopeResults + + private case class BypassedCallResults(callOutputs: Map[CallKey, CallOutputs]) extends BypassedScopeResults + private case class BypassedDeclaration(declaration: DeclarationKey) extends BypassedScopeResults + + private case class DeclarationEvaluationFailedResponse(declarationKey: DeclarationKey, reason: Throwable) + + case class SubWorkflowSucceededResponse(key: SubWorkflowKey, jobExecutionMap: JobExecutionMap, outputs: CallOutputs) + + case class SubWorkflowFailedResponse(key: SubWorkflowKey, jobExecutionMap: JobExecutionMap, reason: Throwable) + + case class SubWorkflowAbortedResponse(key: SubWorkflowKey, jobExecutionMap: JobExecutionMap) + + /** + * Internal ADTs + */ + case class ScatterKey(scatter: Scatter) extends JobKey { + override val scope = scatter + override val index = None + // When scatters are nested, this might become Some(_) + override val attempt = 1 + override val tag = scope.unqualifiedName + + /** + * Creates a sub-ExecutionStore with Starting entries for each of the scoped children. + * + * @param count Number of ways to scatter the children. + * @return ExecutionStore of scattered children. + */ + def populate(count: Int, workflowCoercedInputs: WorkflowCoercedInputs): Map[JobKey, ExecutionStatus.Value] = { + val keys = this.scope.children flatMap { + explode(_, count, workflowCoercedInputs) + } + keys map { + _ -> ExecutionStatus.NotStarted + } toMap + } + + private def explode(scope: Scope, count: Int, workflowCoercedInputs: WorkflowCoercedInputs): Seq[JobKey] = { + def makeCollectors(scope: Scope): Seq[CollectorKey] = scope match { + case call: WdlCall => List(CollectorKey(call, scatter, count)) + case decl: Declaration => List(CollectorKey(decl, scatter, count)) + case i: If => i.children.flatMap(makeCollectors) + } + + (scope match { + case call: WdlTaskCall => (0 until count) map { i => BackendJobDescriptorKey(call, Option(i), 1) } + case call: WdlWorkflowCall => (0 until count) map { i => SubWorkflowKey(call, Option(i), 1) } + case declaration: Declaration => (0 until count) map { i => DeclarationKey(declaration, Option(i), workflowCoercedInputs) } + case conditional: If => (0 until count) map { i => ConditionalKey(conditional, Option(i)) } + case _: Scatter => + throw new UnsupportedOperationException("Nested Scatters are not supported (yet) ... but you might try a sub workflow to achieve the same effect!") + case e => + throw new UnsupportedOperationException(s"Scope ${e.getClass.getName} is not supported.") + }) ++ makeCollectors(scope) + } + } + + // Represents a scatter collection for a call in the execution store + case class CollectorKey(scope: Scope with WdlGraphNode, scatter: Scatter, scatterWidth: Int) extends JobKey { + override val index = None + override val attempt = 1 + override val tag = s"Collector-${scope.unqualifiedName}" + } + + case class SubWorkflowKey(scope: WdlWorkflowCall, index: ExecutionIndex, attempt: Int) extends CallKey { + override val tag = s"SubWorkflow-${scope.unqualifiedName}:${index.fromIndex}:$attempt" + } + + case class ConditionalKey(scope: If, index: ExecutionIndex) extends JobKey { + + override val tag = scope.unqualifiedName + override val attempt = 1 + + /** + * Creates a sub-ExecutionStore with entries for each of the scoped children. + * + * @return ExecutionStore of scattered children. + */ + def populate(workflowCoercedInputs: WorkflowCoercedInputs): Map[JobKey, ExecutionStatus.Value] = { + scope.children map { + keyify(_, workflowCoercedInputs) -> ExecutionStatus.NotStarted + } toMap + } + + /** + * Make a JobKey for all of the contained scopes. + */ + private def keyify(scope: Scope, workflowCoercedInputs: WorkflowCoercedInputs): JobKey = { + scope match { + case call: WdlTaskCall => BackendJobDescriptorKey(call, index, 1) + case call: WdlWorkflowCall => SubWorkflowKey(call, index, 1) + case declaration: Declaration => DeclarationKey(declaration, index, workflowCoercedInputs) + case i: If => ConditionalKey(i, index) + case scatter: Scatter if index.isEmpty => ScatterKey(scatter) + case _: Scatter => + throw new UnsupportedOperationException("Nested Scatters are not supported (yet) ... but you might try a sub workflow to achieve the same effect!") + case e => + throw new UnsupportedOperationException(s"Scope ${e.getClass.getName} is not supported in an If block.") + } + } + } + + object DeclarationKey { + def apply(declaration: Declaration, index: ExecutionIndex, inputs: WorkflowCoercedInputs): DeclarationKey = { + inputs.find(_._1 == declaration.fullyQualifiedName) match { + case Some((_, value)) => StaticDeclarationKey(declaration, index, value) + case None => declaration.expression map { expression => + DynamicDeclarationKey(declaration, index, expression) + } getOrElse { + throw new RuntimeException(s"Found a declaration ${declaration.fullyQualifiedName} without expression and without input value. This should have been a validation error.") + } + } + } + } + + sealed trait DeclarationKey extends JobKey { + override val scope: Declaration + override val attempt = 1 + override val tag = s"Declaration-${scope.unqualifiedName}:${index.fromIndex}:$attempt" + } + + case class StaticDeclarationKey(scope: Declaration, index: ExecutionIndex, value: WdlValue) extends DeclarationKey + + case class DynamicDeclarationKey(scope: Declaration, index: ExecutionIndex, requiredExpression: WdlExpression) extends DeclarationKey { + def evaluate(lookup: ScopedLookupFunction, wdlFunctions: WdlFunctions[WdlValue]) = { + requiredExpression.evaluate(lookup, wdlFunctions) flatMap scope.wdlType.coerceRawValue + } + } + + case class WorkflowExecutionException[T <: Throwable](exceptions: NonEmptyList[T]) extends ThrowableAggregation { + override val throwables = exceptions.toList + override val exceptionContext = s"WorkflowExecutionActor" + } + + def props(workflowDescriptor: EngineWorkflowDescriptor, + ioActor: ActorRef, + serviceRegistryActor: ActorRef, + jobStoreActor: ActorRef, + subWorkflowStoreActor: ActorRef, + callCacheReadActor: ActorRef, + callCacheWriteActor: ActorRef, + workflowDockerLookupActor: ActorRef, + jobTokenDispenserActor: ActorRef, + backendSingletonCollection: BackendSingletonCollection, + initializationData: AllBackendInitializationData, + restarting: Boolean): Props = { + Props(WorkflowExecutionActor(workflowDescriptor, + ioActor = ioActor, + serviceRegistryActor = serviceRegistryActor, + jobStoreActor = jobStoreActor, + subWorkflowStoreActor = subWorkflowStoreActor, + callCacheReadActor = callCacheReadActor, + callCacheWriteActor = callCacheWriteActor, + workflowDockerLookupActor = workflowDockerLookupActor, + jobTokenDispenserActor = jobTokenDispenserActor, + backendSingletonCollection, initializationData, restarting)).withDispatcher(EngineDispatcher) + } + + implicit class EnhancedWorkflowOutputs(val outputs: Map[LocallyQualifiedName, WdlValue]) extends AnyVal { + def maxStringLength = 1000 + + def stripLarge = outputs map { case (k, v) => + val wdlString = v.toWdlString + + if (wdlString.length > maxStringLength) (k, WdlString(StringUtils.abbreviate(wdlString, maxStringLength))) + else (k, v) + } + } } diff --git a/engine/src/main/scala/cromwell/engine/workflow/lifecycle/execution/WorkflowExecutionActorData.scala b/engine/src/main/scala/cromwell/engine/workflow/lifecycle/execution/WorkflowExecutionActorData.scala index 985d9c812..7c870517e 100644 --- a/engine/src/main/scala/cromwell/engine/workflow/lifecycle/execution/WorkflowExecutionActorData.scala +++ b/engine/src/main/scala/cromwell/engine/workflow/lifecycle/execution/WorkflowExecutionActorData.scala @@ -1,97 +1,136 @@ package cromwell.engine.workflow.lifecycle.execution import akka.actor.ActorRef +import cromwell.backend._ import cromwell.core.ExecutionStatus._ -import cromwell.core.OutputStore.{OutputCallKey, OutputEntry} import cromwell.core._ +import cromwell.engine.workflow.lifecycle.execution.OutputStore.{OutputCallKey, OutputEntry} +import cromwell.engine.workflow.lifecycle.execution.WorkflowExecutionActor.{DeclarationKey, SubWorkflowKey} import cromwell.engine.{EngineWorkflowDescriptor, WdlFunctions} -import cromwell.util.JsonFormatting.WdlValueJsonFormatter -import wdl4s.Scope - -import scala.language.postfixOps +import wdl4s.wdl.values.WdlValue object WorkflowExecutionDiff { def empty = WorkflowExecutionDiff(Map.empty) } /** Data differential between current execution data, and updates performed in a method that needs to be merged. */ -final case class WorkflowExecutionDiff(executionStore: Map[JobKey, ExecutionStatus]) { - def containsNewEntry = executionStore.exists(_._2 == NotStarted) +final case class WorkflowExecutionDiff(executionStoreChanges: Map[JobKey, ExecutionStatus], + engineJobExecutionActorAdditions: Map[ActorRef, JobKey] = Map.empty) { + def containsNewEntry = executionStoreChanges.exists(esc => esc._2 == NotStarted) +} + +object WorkflowExecutionActorData { + def empty(workflowDescriptor: EngineWorkflowDescriptor) = { + new WorkflowExecutionActorData( + workflowDescriptor, + ExecutionStore.empty, + Map.empty, + Map.empty, + Map.empty, + Map.empty, + OutputStore.empty + ) + } } case class WorkflowExecutionActorData(workflowDescriptor: EngineWorkflowDescriptor, executionStore: ExecutionStore, backendJobExecutionActors: Map[JobKey, ActorRef], - outputStore: OutputStore) extends WdlLookup { + engineCallExecutionActors: Map[ActorRef, JobKey], + subWorkflowExecutionActors: Map[SubWorkflowKey, ActorRef], + downstreamExecutionMap: JobExecutionMap, + outputStore: OutputStore) { + + val expressionLanguageFunctions = new WdlFunctions(workflowDescriptor.pathBuilders) + + def callExecutionSuccess(jobKey: JobKey, outputs: CallOutputs) = { + val (newJobExecutionActors, newSubWorkflowExecutionActors) = jobKey match { + case jobKey: BackendJobDescriptorKey => (backendJobExecutionActors - jobKey, subWorkflowExecutionActors) + case swKey: SubWorkflowKey => (backendJobExecutionActors, subWorkflowExecutionActors - swKey) + case _ => (backendJobExecutionActors, subWorkflowExecutionActors) + } + + this.copy( + executionStore = executionStore.add(Map(jobKey -> Done)), + backendJobExecutionActors = newJobExecutionActors, + subWorkflowExecutionActors = newSubWorkflowExecutionActors, + outputStore = outputStore.add(updateSymbolStoreEntry(jobKey, outputs)) + ) + } - override val expressionLanguageFunctions = new WdlFunctions(workflowDescriptor.engineFilesystems) + def declarationEvaluationSuccess(declarationKey: DeclarationKey, value: WdlValue) = { + val outputStoreKey = OutputCallKey(declarationKey.scope, declarationKey.index) + val outputStoreValue = OutputEntry(declarationKey.scope.unqualifiedName, value.wdlType, Option(value)) + this.copy( + executionStore = executionStore.add(Map(declarationKey -> Done)), + outputStore = outputStore.add(Map(outputStoreKey -> List(outputStoreValue))) + ) + } - def jobExecutionSuccess(jobKey: JobKey, outputs: JobOutputs) = this.copy( - executionStore = executionStore.add(Map(jobKey -> Done)), - backendJobExecutionActors = backendJobExecutionActors - jobKey, - outputStore = outputStore.add(updateSymbolStoreEntry(jobKey, outputs)) - ) + def executionFailed(jobKey: JobKey) = mergeExecutionDiff(WorkflowExecutionDiff(Map(jobKey -> ExecutionStatus.Failed))) /** Add the outputs for the specified `JobKey` to the symbol cache. */ - private def updateSymbolStoreEntry(jobKey: JobKey, outputs: JobOutputs) = { + private def updateSymbolStoreEntry(jobKey: JobKey, outputs: CallOutputs) = { val newOutputEntries = outputs map { case (name, value) => OutputEntry(name, value.wdlValue.wdlType, Option(value.wdlValue)) } - Map(OutputCallKey(jobKey.scope, jobKey.index) -> newOutputEntries) + Map(OutputCallKey(jobKey.scope, jobKey.index) -> newOutputEntries.toList) } - /** Checks if the workflow is completed by scanning through the executionStore. + /** Checks if the workflow is completed. * If complete, this will return Some(finalStatus). Otherwise, returns None */ def workflowCompletionStatus: Option[ExecutionStatus] = { - // `List`ify the `prerequisiteScopes` to avoid expensive hashing of `Scope`s when assembling the result. - def upstream(scope: Scope): List[Scope] = scope.prerequisiteScopes.toList ++ scope.prerequisiteScopes.toList.flatMap(upstream) - def upstreamFailed(scope: Scope) = upstream(scope) filter { s => - executionStore.store.map({ case (a, b) => a.scope -> b }).get(s).contains(Failed) - } - // activeJobs is the subset of the executionStore that are either running or will run in the future. - val activeJobs = executionStore.store.toList filter { - case (jobKey, jobStatus) => (jobStatus == NotStarted && upstreamFailed(jobKey.scope).isEmpty) || jobStatus == Starting || jobStatus == Running - } - - activeJobs match { - case jobs if jobs.isEmpty && hasFailedJob => Option(Failed) - case jobs if jobs.isEmpty && !hasFailedJob => Option(Done) + (executionStore.hasActiveJob, executionStore.hasFailedJob) match { + case (false, true) => Option(Failed) + case (false, false) => Option(Done) case _ => None } } - def hasFailedJob: Boolean = { - executionStore.store.values.exists(_ == ExecutionStatus.Failed) + def removeEngineJobExecutionActor(actorRef: ActorRef) = { + this.copy(engineCallExecutionActors = engineCallExecutionActors - actorRef) } - def addBackendJobExecutionActor(jobKey: JobKey, actor: Option[ActorRef]): WorkflowExecutionActorData = actor match { - case Some(actorRef) => this.copy(backendJobExecutionActors = backendJobExecutionActors + (jobKey -> actorRef)) + def addCallExecutionActor(jobKey: JobKey, actor: Option[ActorRef]): WorkflowExecutionActorData = actor match { + case Some(actorRef) => + jobKey match { + case jobKey: BackendJobDescriptorKey => this.copy(backendJobExecutionActors = backendJobExecutionActors + (jobKey -> actorRef)) + case swKey: SubWorkflowKey => this.copy(subWorkflowExecutionActors = subWorkflowExecutionActors + (swKey -> actorRef)) + case _ => this + } case None => this } - def removeBackendJobExecutionActor(jobKey: JobKey): WorkflowExecutionActorData = { - this.copy(backendJobExecutionActors = backendJobExecutionActors - jobKey) - } - - def outputsJson(): String = { - // Printing the final outputs, temporarily here until SingleWorkflowManagerActor is made in-sync with the shadow mode - import WdlValueJsonFormatter._ - import spray.json._ - val workflowOutputs = outputStore.store collect { - case (key, outputs) if key.index.isEmpty => outputs map { output => - s"${key.call.fullyQualifiedName}.${output.name}" -> (output.wdlValue map { _.valueString } getOrElse "N/A") - } + def removeCallExecutionActor(jobKey: JobKey): WorkflowExecutionActorData = { + jobKey match { + case jobKey: BackendJobDescriptorKey => this.copy(backendJobExecutionActors = backendJobExecutionActors - jobKey) + case swKey: SubWorkflowKey => this.copy(subWorkflowExecutionActors = subWorkflowExecutionActors - swKey) + case _ => this } + } - "Workflow complete. Final Outputs: \n" + workflowOutputs.flatten.toMap.toJson.prettyPrint + def addExecutions(jobExecutionMap: JobExecutionMap): WorkflowExecutionActorData = { + this.copy(downstreamExecutionMap = downstreamExecutionMap ++ jobExecutionMap) } def mergeExecutionDiff(diff: WorkflowExecutionDiff): WorkflowExecutionActorData = { - this.copy(executionStore = executionStore.add(diff.executionStore)) + this.copy( + executionStore = executionStore.add(diff.executionStoreChanges), + engineCallExecutionActors = engineCallExecutionActors ++ diff.engineJobExecutionActorAdditions + ) } def mergeExecutionDiffs(diffs: Traversable[WorkflowExecutionDiff]): WorkflowExecutionActorData = { diffs.foldLeft(this)((newData, diff) => newData.mergeExecutionDiff(diff)) } - + + def resetCheckRunnable = this.copy(executionStore = executionStore.copy(hasNewRunnables = false)) + + def hasNewRunnables = executionStore.hasNewRunnables + + def jobExecutionMap: JobExecutionMap = { + downstreamExecutionMap updated (workflowDescriptor.backendDescriptor, executionStore.startedJobs) + } + + def hasRunningActors = backendJobExecutionActors.nonEmpty || subWorkflowExecutionActors.nonEmpty } diff --git a/engine/src/main/scala/cromwell/engine/workflow/lifecycle/execution/WorkflowMetadataHelper.scala b/engine/src/main/scala/cromwell/engine/workflow/lifecycle/execution/WorkflowMetadataHelper.scala new file mode 100644 index 000000000..563c53006 --- /dev/null +++ b/engine/src/main/scala/cromwell/engine/workflow/lifecycle/execution/WorkflowMetadataHelper.scala @@ -0,0 +1,35 @@ +package cromwell.engine.workflow.lifecycle.execution + +import java.time.OffsetDateTime + +import akka.actor.ActorRef +import cromwell.core.{WorkflowId, WorkflowMetadataKeys, WorkflowState} +import cromwell.services.metadata.MetadataService._ +import cromwell.services.metadata.{MetadataEvent, MetadataKey, MetadataValue} + +trait WorkflowMetadataHelper { + + def serviceRegistryActor: ActorRef + + def pushWorkflowStart(workflowId: WorkflowId) = { + val startEvent = MetadataEvent(MetadataKey(workflowId, None, WorkflowMetadataKeys.StartTime), MetadataValue(OffsetDateTime.now.toString)) + serviceRegistryActor ! PutMetadataAction(startEvent) + } + + def pushWorkflowEnd(workflowId: WorkflowId) = { + val metadataEventMsg = MetadataEvent(MetadataKey(workflowId, None, WorkflowMetadataKeys.EndTime), MetadataValue(OffsetDateTime.now.toString)) + serviceRegistryActor ! PutMetadataAction(metadataEventMsg) + } + + def pushWorkflowFailures(workflowId: WorkflowId, failures: List[Throwable]) = { + val failureEvents = failures flatMap { r => throwableToMetadataEvents(MetadataKey(workflowId, None, s"${WorkflowMetadataKeys.Failures}"), r) } + serviceRegistryActor ! PutMetadataAction(failureEvents) + } + + def pushCurrentStateToMetadataService(workflowId: WorkflowId, workflowState: WorkflowState): Unit = { + val metadataEventMsg = MetadataEvent(MetadataKey(workflowId, None, WorkflowMetadataKeys.Status), + MetadataValue(workflowState)) + serviceRegistryActor ! PutMetadataAction(metadataEventMsg) + } + +} diff --git a/engine/src/main/scala/cromwell/engine/workflow/lifecycle/execution/callcaching/CallCache.scala b/engine/src/main/scala/cromwell/engine/workflow/lifecycle/execution/callcaching/CallCache.scala index 99e0fbd44..0afff7212 100644 --- a/engine/src/main/scala/cromwell/engine/workflow/lifecycle/execution/callcaching/CallCache.scala +++ b/engine/src/main/scala/cromwell/engine/workflow/lifecycle/execution/callcaching/CallCache.scala @@ -1,82 +1,146 @@ package cromwell.engine.workflow.lifecycle.execution.callcaching -import cromwell.backend.BackendJobExecutionActor.SucceededResponse -import cromwell.core.ExecutionIndex.IndexEnhancedIndex -import cromwell.core.WorkflowId +import cats.data.NonEmptyList +import cromwell.backend.BackendJobExecutionActor.{JobFailedNonRetryableResponse, JobSucceededResponse} +import cromwell.core.ExecutionIndex.{ExecutionIndex, IndexEnhancedIndex} import cromwell.core.callcaching.HashResult +import cromwell.core.path.Path import cromwell.core.simpleton.WdlValueSimpleton +import cromwell.core.{CallOutputs, FullyQualifiedName, JobOutput, LocallyQualifiedName, WorkflowId} +import cromwell.database.sql.SqlConverters._ import cromwell.database.sql._ import cromwell.database.sql.joins.CallCachingJoin -import cromwell.database.sql.tables.{CallCachingDetritusEntry, CallCachingEntry, CallCachingHashEntry, CallCachingSimpletonEntry} +import cromwell.database.sql.tables._ +import cromwell.engine.workflow.lifecycle.execution.callcaching.CallCache.CallCacheHashBundle +import cromwell.engine.workflow.lifecycle.execution.callcaching.CallCacheReadActor.AggregatedCallHashes import cromwell.engine.workflow.lifecycle.execution.callcaching.EngineJobHashingActor.CallCacheHashes import scala.concurrent.{ExecutionContext, Future} -import scala.language.postfixOps -import scalaz.Scalaz._ -import scalaz._ - -final case class MetaInfoId(id: Int) +final case class CallCachingEntryId(id: Int) /** * Given a database-layer CallCacheStore, this accessor can access the database with engine-friendly data types. */ class CallCache(database: CallCachingSqlDatabase) { - def addToCache(workflowId: WorkflowId, callCacheHashes: CallCacheHashes, response: SucceededResponse)(implicit ec: ExecutionContext): Future[Unit] = { - val metaInfo = CallCachingEntry( - workflowExecutionUuid = workflowId.toString, - callFullyQualifiedName = response.jobKey.call.fullyQualifiedName, - jobIndex = response.jobKey.index.fromIndex, - returnCode = response.returnCode, - allowResultReuse = true) - val hashes = callCacheHashes.hashes - import cromwell.core.simpleton.WdlValueSimpleton._ - val result = response.jobOutputs.mapValues(_.wdlValue).simplify - val jobDetritus = response.jobDetritusFiles.getOrElse(Map.empty) - - addToCache(metaInfo, hashes, result, jobDetritus) + def addToCache(bundles: Seq[CallCacheHashBundle], batchSize: Int)(implicit ec: ExecutionContext): Future[Unit] = { + val joins = bundles map { b => + val metaInfo = CallCachingEntry( + workflowExecutionUuid = b.workflowId.toString, + callFullyQualifiedName = b.fullyQualifiedName, + jobIndex = b.jobIndex.fromIndex, + jobAttempt = b.jobAttempt, + returnCode = b.returnCode, + allowResultReuse = b.allowResultReuse) + import cromwell.core.simpleton.WdlValueSimpleton._ + val result = b.callOutputs.mapValues(_.wdlValue).simplify + val jobDetritus = b.jobDetritusFiles.getOrElse(Map.empty) + buildCallCachingJoin(metaInfo, b.callCacheHashes, result, jobDetritus) + } + + database.addCallCaching(joins, batchSize) } - private def addToCache(metaInfo: CallCachingEntry, hashes: Set[HashResult], - result: Iterable[WdlValueSimpleton], jobDetritus: Map[String, String]) - (implicit ec: ExecutionContext): Future[Unit] = { + private def buildCallCachingJoin(callCachingEntry: CallCachingEntry, callCacheHashes: CallCacheHashes, + result: Iterable[WdlValueSimpleton], jobDetritus: Map[String, Path]): CallCachingJoin = { val hashesToInsert: Iterable[CallCachingHashEntry] = { - hashes map { hash => CallCachingHashEntry(hash.hashKey.key, hash.hashValue.value) } + callCacheHashes.hashes map { hash => CallCachingHashEntry(hash.hashKey.key, hash.hashValue.value) } + } + + val aggregatedHashesToInsert: Option[CallCachingAggregationEntry] = { + Option(CallCachingAggregationEntry( + baseAggregation = callCacheHashes.aggregatedInitialHash, + inputFilesAggregation = callCacheHashes.fileHashes.map(_.aggregatedHash) + )) } val resultToInsert: Iterable[CallCachingSimpletonEntry] = { result map { case WdlValueSimpleton(simpletonKey, wdlPrimitive) => - CallCachingSimpletonEntry(simpletonKey, wdlPrimitive.valueString, wdlPrimitive.wdlType.toWdlString) + CallCachingSimpletonEntry(simpletonKey, wdlPrimitive.valueString.toClobOption, wdlPrimitive.wdlType.toWdlString) } } val jobDetritusToInsert: Iterable[CallCachingDetritusEntry] = { jobDetritus map { - case (fileName, filePath) => CallCachingDetritusEntry(fileName, filePath) + case (fileName, filePath) => CallCachingDetritusEntry(fileName, filePath.pathAsString.toClobOption) } } - val callCachingJoin = - CallCachingJoin(metaInfo, hashesToInsert.toSeq, resultToInsert.toSeq, jobDetritusToInsert.toSeq) - - database.addCallCaching(callCachingJoin) + CallCachingJoin(callCachingEntry, hashesToInsert.toSeq, aggregatedHashesToInsert, resultToInsert.toSeq, jobDetritusToInsert.toSeq) } - def fetchMetaInfoIdsMatchingHashes(callCacheHashes: CallCacheHashes)(implicit ec: ExecutionContext): Future[Set[MetaInfoId]] = { - metaInfoIdsMatchingHashes(callCacheHashes.hashes.toList.toNel.get) + def hasBaseAggregatedHashMatch(baseAggregatedHash: String)(implicit ec: ExecutionContext): Future[Boolean] = { + database.hasMatchingCallCachingEntriesForBaseAggregation(baseAggregatedHash) } - private def metaInfoIdsMatchingHashes(hashKeyValuePairs: NonEmptyList[HashResult]) - (implicit ec: ExecutionContext): Future[Set[MetaInfoId]] = { - val result = database.queryCallCachingEntryIds(hashKeyValuePairs map { + def hasKeyValuePairHashMatch(hashes: NonEmptyList[HashResult])(implicit ec: ExecutionContext): Future[Boolean] = { + val hashKeyValuePairs = hashes map { case HashResult(hashKey, hashValue) => (hashKey.key, hashValue.value) - }) + } + database.hasMatchingCallCachingEntriesForHashKeyValues(hashKeyValuePairs) + } + + def callCachingHitForAggregatedHashes(aggregatedCallHashes: AggregatedCallHashes, hitNumber: Int) + (implicit ec: ExecutionContext): Future[Option[CallCachingEntryId]] = { + database.findCacheHitForAggregation( + baseAggregationHash = aggregatedCallHashes.baseAggregatedHash, + inputFilesAggregationHash = aggregatedCallHashes.inputFilesAggregatedHash, + hitNumber).map(_ map CallCachingEntryId.apply) + } + + def fetchCachedResult(callCachingEntryId: CallCachingEntryId)(implicit ec: ExecutionContext): Future[Option[CallCachingJoin]] = { + database.queryResultsForCacheId(callCachingEntryId.id) + } - result.map(_.toSet.map(MetaInfoId)) + def cacheEntryExistsForCall(workflowUuid: String, callFqn: String, index: Int)(implicit ec: ExecutionContext): Future[Boolean] = { + database.cacheEntryExistsForCall(workflowUuid, callFqn, index) } - def fetchCachedResult(metaInfoId: MetaInfoId)(implicit ec: ExecutionContext): Future[Option[CallCachingJoin]] = { - database.queryCallCaching(metaInfoId.id) + def invalidate(callCachingEntryId: CallCachingEntryId)(implicit ec: ExecutionContext) = { + database.invalidateCall(callCachingEntryId.id) + } +} + +object CallCache { + object CallCacheHashBundle { + def apply(workflowId: WorkflowId, callCacheHashes: CallCacheHashes, jobSucceededResponse: JobSucceededResponse) = { + new CallCacheHashBundle( + workflowId = workflowId, + callCacheHashes = callCacheHashes, + fullyQualifiedName = jobSucceededResponse.jobKey.call.fullyQualifiedName, + jobIndex = jobSucceededResponse.jobKey.index, + jobAttempt = Option(jobSucceededResponse.jobKey.attempt), + returnCode = jobSucceededResponse.returnCode, + allowResultReuse = true, + callOutputs = jobSucceededResponse.jobOutputs, + jobDetritusFiles = jobSucceededResponse.jobDetritusFiles + ) + } + + def apply(workflowId: WorkflowId, callCacheHashes: CallCacheHashes, jobFailedNonRetryableResponse: JobFailedNonRetryableResponse) = { + new CallCacheHashBundle( + workflowId = workflowId, + callCacheHashes = callCacheHashes, + fullyQualifiedName = jobFailedNonRetryableResponse.jobKey.scope.fullyQualifiedName, + jobIndex = jobFailedNonRetryableResponse.jobKey.index, + jobAttempt = Option(jobFailedNonRetryableResponse.jobKey.attempt), + returnCode = None, + allowResultReuse = false, + callOutputs = Map.empty[LocallyQualifiedName, JobOutput], + jobDetritusFiles = None + ) + } } + case class CallCacheHashBundle private ( + workflowId: WorkflowId, + callCacheHashes: CallCacheHashes, + fullyQualifiedName: FullyQualifiedName, + jobIndex: ExecutionIndex, + jobAttempt: Option[Int], + returnCode: Option[Int], + allowResultReuse: Boolean, + callOutputs: CallOutputs, + jobDetritusFiles: Option[Map[String, Path]] + ) } diff --git a/engine/src/main/scala/cromwell/engine/workflow/lifecycle/execution/callcaching/CallCacheDiffActor.scala b/engine/src/main/scala/cromwell/engine/workflow/lifecycle/execution/callcaching/CallCacheDiffActor.scala new file mode 100644 index 000000000..b4d661255 --- /dev/null +++ b/engine/src/main/scala/cromwell/engine/workflow/lifecycle/execution/callcaching/CallCacheDiffActor.scala @@ -0,0 +1,306 @@ +package cromwell.engine.workflow.lifecycle.execution.callcaching + +import akka.actor.{ActorRef, LoggingFSM, Props} +import cats.data.NonEmptyList +import cats.instances.list._ +import cats.syntax.foldable._ +import cromwell.core.WorkflowId +import cromwell.engine.workflow.lifecycle.execution.callcaching.CallCacheDiffActor.{CallCacheDiffActorData, _} +import cromwell.engine.workflow.lifecycle.execution.callcaching.CallCacheDiffQueryParameter.CallCacheDiffQueryCall +import cromwell.services.metadata.CallMetadataKeys.CallCachingKeys +import cromwell.services.metadata.MetadataService.{GetMetadataQueryAction, MetadataLookupResponse, MetadataServiceKeyLookupFailed} +import cromwell.services.metadata._ +import cromwell.webservice.metadata.MetadataComponent._ +import cromwell.webservice.metadata._ +import cromwell.core.Dispatcher.EngineDispatcher +import spray.json.JsObject + +import scala.language.postfixOps +import scala.util.{Failure, Success, Try} + +object CallCacheDiffActor { + private val PlaceholderMissingHashValue = MetadataPrimitive(MetadataValue("Error: there is a hash entry for this key but the value is null !")) + + final case class CachedCallNotFoundException(message: String) extends Exception { + override def getMessage = message + } + + // Exceptions when calls exist but have no hashes in their metadata, indicating they were run pre-28 + private val HashesForCallAAndBNotFoundException = new Exception("callA and callB have not finished yet, or were run on a previous version of Cromwell on which this endpoint was not supported.") + private val HashesForCallANotFoundException = new Exception("callA has not finished yet, or was run on a previous version of Cromwell on which this endpoint was not supported.") + private val HashesForCallBNotFoundException = new Exception("callB has not finished yet, or was run on a previous version of Cromwell on which this endpoint was not supported.") + + sealed trait CallCacheDiffActorState + case object Idle extends CallCacheDiffActorState + case object WaitingForMetadata extends CallCacheDiffActorState + + sealed trait CallCacheDiffActorData + case object CallCacheDiffNoData extends CallCacheDiffActorData + case class CallCacheDiffWithRequest(queryA: MetadataQuery, + queryB: MetadataQuery, + responseA: Option[MetadataLookupResponse], + responseB: Option[MetadataLookupResponse], + replyTo: ActorRef + ) extends CallCacheDiffActorData + + sealed abstract class CallCacheDiffActorResponse + case class BuiltCallCacheDiffResponse(response: JsObject) extends CallCacheDiffActorResponse + case class FailedCallCacheDiffResponse(reason: Throwable) extends CallCacheDiffActorResponse + + + def props(serviceRegistryActor: ActorRef) = Props(new CallCacheDiffActor(serviceRegistryActor)).withDispatcher(EngineDispatcher) +} + +class CallCacheDiffActor(serviceRegistryActor: ActorRef) extends LoggingFSM[CallCacheDiffActorState, CallCacheDiffActorData] { + startWith(Idle, CallCacheDiffNoData) + + when(Idle) { + case Event(CallCacheDiffQueryParameter(callA, callB), CallCacheDiffNoData) => + val queryA = makeMetadataQuery(callA) + val queryB = makeMetadataQuery(callB) + serviceRegistryActor ! GetMetadataQueryAction(queryA) + serviceRegistryActor ! GetMetadataQueryAction(queryB) + goto(WaitingForMetadata) using CallCacheDiffWithRequest(queryA, queryB, None, None, sender()) + } + + when(WaitingForMetadata) { + // First Response + // Response A + case Event(response: MetadataLookupResponse, data @ CallCacheDiffWithRequest(queryA, _, None, None, _)) if queryA == response.query => + stay() using data.copy(responseA = Option(response)) + // Response B + case Event(response: MetadataLookupResponse, data @ CallCacheDiffWithRequest(_, queryB, None, None, _)) if queryB == response.query => + stay() using data.copy(responseB = Option(response)) + // Second Response + // Response A + case Event(response: MetadataLookupResponse, CallCacheDiffWithRequest(queryA, queryB, None, Some(responseB), replyTo)) if queryA == response.query => + buildDiffAndRespond(queryA, queryB, response, responseB, replyTo) + // Response B + case Event(response: MetadataLookupResponse, CallCacheDiffWithRequest(queryA, queryB, Some(responseA), None, replyTo)) if queryB == response.query => + buildDiffAndRespond(queryA, queryB, responseA, response, replyTo) + case Event(MetadataServiceKeyLookupFailed(_, failure), data: CallCacheDiffWithRequest) => + data.replyTo ! FailedCallCacheDiffResponse(failure) + context stop self + stay() + } + + /** + * Builds a response and sends it back as Json. + * The response is structured in the following way + * { + * "callA": { + * -- information about call A -- + * }, + * "callB": { + * -- information about call B -- + * }, + * "hashDifferential": [ + * { + * "hash key": { + * "callA": -- hash value for call A, or null --, + * "callB": -- hash value for call B, or null -- + * } + * }, + * ... + * ] + * } + */ + private def buildDiffAndRespond(queryA: MetadataQuery, + queryB: MetadataQuery, + responseA: MetadataLookupResponse, + responseB: MetadataLookupResponse, + replyTo: ActorRef) = { + + lazy val buildResponse = { + diffHashes(responseA.eventList, responseB.eventList) match { + case Success(diff) => + val diffObject = MetadataObject(Map( + "callA" -> makeCallInfo(queryA, responseA.eventList), + "callB" -> makeCallInfo(queryB, responseB.eventList), + "hashDifferential" -> diff + )) + + BuiltCallCacheDiffResponse(metadataComponentJsonWriter.write(diffObject).asJsObject) + case Failure(f) => FailedCallCacheDiffResponse(f) + } + } + + val response = checkCallsExistence(queryA, queryB, responseA, responseB) match { + case Some(msg) => FailedCallCacheDiffResponse(CachedCallNotFoundException(msg)) + case None => buildResponse + } + + replyTo ! response + + context stop self + stay() + } + + /** + * Returns an error message if one or both of the calls are not found, or None if it does + */ + private def checkCallsExistence(queryA: MetadataQuery, + queryB: MetadataQuery, + responseA: MetadataLookupResponse, + responseB: MetadataLookupResponse): Option[String] = { + import cromwell.core.ExecutionIndex._ + + def makeTag(query: MetadataQuery) = { + s"${query.workflowId}:${query.jobKey.get.callFqn}:${query.jobKey.get.index.fromIndex}" + } + + def makeNotFoundMessage(queries: NonEmptyList[MetadataQuery]) = { + val plural = if (queries.tail.nonEmpty) "s" else "" + s"Cannot find call$plural ${queries.map(makeTag).toList.mkString(", ")}" + } + + (responseA.eventList, responseB.eventList) match { + case (a, b) if a.isEmpty && b.isEmpty => Option(makeNotFoundMessage(NonEmptyList.of(queryA, queryB))) + case (a, _) if a.isEmpty => Option(makeNotFoundMessage(NonEmptyList.of(queryA))) + case (_, b) if b.isEmpty => Option(makeNotFoundMessage(NonEmptyList.of(queryB))) + case _ => None + } + } + + /** + * Generates the "info" section of callA or callB + */ + private def makeCallInfo(query: MetadataQuery, eventList: Seq[MetadataEvent]): MetadataComponent = { + val callKey = MetadataObject(Map( + "workflowId" -> MetadataPrimitive(MetadataValue(query.workflowId.toString)), + "callFqn" -> MetadataPrimitive(MetadataValue(query.jobKey.get.callFqn)), + "jobIndex" -> MetadataPrimitive(MetadataValue(query.jobKey.get.index.getOrElse(-1))) + )) + + val allowResultReuse = attributeToComponent(eventList, { _ == CallCachingKeys.AllowReuseMetadataKey }, { _ => "allowResultReuse" }) + val executionStatus = attributeToComponent(eventList, { _ == CallMetadataKeys.ExecutionStatus }) + + List(callKey, allowResultReuse, executionStatus) combineAll + } + + /** + * Collects events from the list for which the keys verify the keyFilter predicate + * and apply keyModifier to the event's key + */ + private def collectEvents(events: Seq[MetadataEvent], + keyFilter: (String => Boolean), + keyModifier: (String => String)) = events collect { + case event @ MetadataEvent(metadataKey @ MetadataKey(_, _, key), _, _) if keyFilter(key) => + event.copy(key = metadataKey.copy(key = keyModifier(key))) + } + + /** + * Given a list of events, a keyFilter and a keyModifier, returns the associated MetadataComponent. + * Ensures that events are properly aggregated together (CRDTs and latest timestamp rule) + */ + private def attributeToComponent(events: Seq[MetadataEvent], keyFilter: (String => Boolean), keyModifier: (String => String) = identity[String]) = { + MetadataComponent(collectEvents(events, keyFilter, keyModifier)) + } + + /** + * Makes a diff object out of a key and a pair of values. + * Values are Option[Option[MetadataValue]] for the following reason: + * + * The outer option represents whether or not this key had a corresponding hash metadata entry for the given call + * If the above is true, the inner value is the metadata value for this entry, which is nullable, hence an Option. + * The first outer option will determine whether the resulting json value will be null (no hash entry for this key), + * or the actual value. + * If the metadata value (inner option) happens to be None, it's an error, as we don't expect to publish null hash values. + * In that case we replace it with the placeholderMissingHashValue. + */ + private def makeHashDiffObject(key: String, valueA: Option[Option[MetadataValue]], valueB: Option[Option[MetadataValue]]) = { + def makeFinalValue(value: Option[Option[MetadataValue]]) = value match { + case Some(Some(metadataValue)) => MetadataPrimitive(metadataValue) + case Some(None) => PlaceholderMissingHashValue + case None => MetadataNull + } + + MetadataObject( + "hashKey" -> MetadataPrimitive(MetadataValue(key.trim, MetadataString)), + "callA" -> makeFinalValue(valueA), + "callB" -> makeFinalValue(valueB) + ) + } + + /** + * Creates the hash differential between 2 list of events + */ + private def diffHashes(eventsA: Seq[MetadataEvent], eventsB: Seq[MetadataEvent]): Try[MetadataComponent] = { + val hashesKey = CallCachingKeys.HashesKey + MetadataKey.KeySeparator + // Collect hashes events and map their key to only keep the meaningful part of the key + // Then map the result to get a Map of hashKey -> Option[MetadataValue]. This will allow for fast lookup when + // comparing the 2 hash sets. + // Note that it's an Option[MetadataValue] because metadata values can be null, although for this particular + // case we don't expect it to be (we should never publish a hash metadata event with a null value) + // If that happens we will place a placeholder value in place of the hash to signify of the unexpected absence of it + def collectHashes(events: Seq[MetadataEvent]) = { + collectEvents(events, { _.startsWith(hashesKey) }, { _.stripPrefix(hashesKey) }) map { + case MetadataEvent(MetadataKey(_, _, keyA), valueA, _) => keyA -> valueA + } toMap + } + + val hashesA: Map[String, Option[MetadataValue]] = collectHashes(eventsA) + val hashesB: Map[String, Option[MetadataValue]] = collectHashes(eventsB) + + (hashesA.isEmpty, hashesB.isEmpty) match { + case (true, true) => Failure(HashesForCallAAndBNotFoundException) + case (true, false) => Failure(HashesForCallANotFoundException) + case (false, true) => Failure(HashesForCallBNotFoundException) + case (false, false) => Success(diffHashEvents(hashesA, hashesB)) + } + + } + + private def diffHashEvents(hashesA: Map[String, Option[MetadataValue]], hashesB: Map[String, Option[MetadataValue]]) = { + val hashesUniqueToB: Map[String, Option[MetadataValue]] = hashesB.filterNot({ case (k, _) => hashesA.keySet.contains(k) }) + + val hashDiff: List[MetadataComponent] = { + // Start with all hashes in A + hashesA + // Try to find the corresponding pair in B. + // We end up with a + // List[(Option[String, Option[MetadataValue], Option[String, Option[MetadataValue])] + // ^ ^ ^ ^ + // hashKey hashValue hashKey hashValue + // for for for for + // A A B B + // |____________________________________| |___________________________________| + // hashPair for A hashPair for B + // + // HashPairs are Some or None depending on whether or not they have a metadata entry for the corresponding hashKey + // At this stage we only have Some(hashPair) for A, and either Some(hashPair) or None for B depending on if we found it in hashesB + .map({ + hashPairA => Option(hashPairA) -> hashesB.find(_._1 == hashPairA._1) + }) + // Add the missing hashes that are in B but not in A. The left hashPair is therefore None + .++(hashesUniqueToB.map(None -> Option(_))) + .collect({ + // Both have a value but they're different. We can assume the keys are the same (if we did our job right until here) + case (Some((keyA, valueA)), Some((_, valueB))) if valueA != valueB => + makeHashDiffObject(keyA, Option(valueA), Option(valueB)) + // Key is in A but not in B + case (Some((keyA, valueA)), None) => + makeHashDiffObject(keyA, Option(valueA), None) + // Key is in B but not in A + case (None, Some((keyB, valueB))) => + makeHashDiffObject(keyB, None, Option(valueB)) + }) + .toList + } + + MetadataList(hashDiff) + } + + /** + * Create a Metadata query from a CallCacheDiffQueryCall + */ + private def makeMetadataQuery(call: CallCacheDiffQueryCall) = MetadataQuery( + WorkflowId.fromString(call.workflowId), + // jobAttempt None will return keys for all attempts + Option(MetadataQueryJobKey(call.callFqn, call.jobIndex, None)), + None, + Option(NonEmptyList.of("callCaching", "executionStatus")), + None, + expandSubWorkflows = false + ) +} diff --git a/engine/src/main/scala/cromwell/engine/workflow/lifecycle/execution/callcaching/CallCacheDiffQueryParameter.scala b/engine/src/main/scala/cromwell/engine/workflow/lifecycle/execution/callcaching/CallCacheDiffQueryParameter.scala new file mode 100644 index 000000000..3c2058720 --- /dev/null +++ b/engine/src/main/scala/cromwell/engine/workflow/lifecycle/execution/callcaching/CallCacheDiffQueryParameter.scala @@ -0,0 +1,52 @@ +package cromwell.engine.workflow.lifecycle.execution.callcaching + +import cats.data.{NonEmptyList, Validated} +import cats.implicits._ +import cromwell.engine.workflow.lifecycle.execution.callcaching.CallCacheDiffQueryParameter.CallCacheDiffQueryCall + +import scala.util.{Failure, Success, Try} + +object CallCacheDiffQueryParameter { + case class CallCacheDiffQueryCall(workflowId: String, callFqn: String, jobIndex: Option[Int]) + + private def missingWorkflowError(attribute: String) = s"missing $attribute query parameter".invalidNel + + def fromParameters(parameters: Seq[(String, String)]): Validated[NonEmptyList[String], CallCacheDiffQueryParameter] = { + def extractIndex(parameter: String): Validated[NonEmptyList[String], Option[Int]] = { + parameters.find(_._1 == parameter) match { + case Some((_, value)) => Try(value.toInt) match { + case Success(index) => Option(index).validNel + case Failure(f) => f.getMessage.invalidNel + } + case None => None.validNel + } + } + + def extractAttribute(parameter: String): Validated[NonEmptyList[String], String] = { + parameters.find(_._1 == parameter) match { + case Some((_, value)) => value.validNel + case None => missingWorkflowError(parameter) + } + } + + val workflowAValidation = extractAttribute("workflowA") + val workflowBValidation = extractAttribute("workflowB") + + val callAValidation = extractAttribute("callA") + val callBValidation = extractAttribute("callB") + + val indexAValidation = extractIndex("indexA") + val indexBValidation = extractIndex("indexB") + + (workflowAValidation |@| callAValidation |@| indexAValidation |@| + workflowBValidation |@| callBValidation |@| indexBValidation) map { + case ((workflowA, callA, indexA, workflowB, callB, indexB)) => + CallCacheDiffQueryParameter( + CallCacheDiffQueryCall(workflowA, callA, indexA), + CallCacheDiffQueryCall(workflowB, callB, indexB) + ) + } + } +} + +case class CallCacheDiffQueryParameter(callA: CallCacheDiffQueryCall, callB: CallCacheDiffQueryCall) diff --git a/engine/src/main/scala/cromwell/engine/workflow/lifecycle/execution/callcaching/CallCacheHashingJobActor.scala b/engine/src/main/scala/cromwell/engine/workflow/lifecycle/execution/callcaching/CallCacheHashingJobActor.scala new file mode 100644 index 000000000..05591c555 --- /dev/null +++ b/engine/src/main/scala/cromwell/engine/workflow/lifecycle/execution/callcaching/CallCacheHashingJobActor.scala @@ -0,0 +1,294 @@ +package cromwell.engine.workflow.lifecycle.execution.callcaching + +import java.security.MessageDigest +import javax.xml.bind.DatatypeConverter + +import akka.actor.{ActorRef, LoggingFSM, Props, Terminated} +import cats.data.NonEmptyList +import cromwell.backend.standard.callcaching.StandardFileHashingActor.{FileHashResponse, SingleFileHashRequest} +import cromwell.backend.validation.RuntimeAttributesKeys +import cromwell.backend.{BackendInitializationData, BackendJobDescriptor, RuntimeAttributeDefinition} +import cromwell.core.callcaching._ +import cromwell.core.simpleton.WdlValueSimpleton +import cromwell.engine.workflow.lifecycle.execution.callcaching.CallCacheHashingJobActor.CallCacheHashingJobActorData._ +import cromwell.engine.workflow.lifecycle.execution.callcaching.CallCacheHashingJobActor._ +import cromwell.engine.workflow.lifecycle.execution.callcaching.EngineJobHashingActor.CacheMiss +import cromwell.core.Dispatcher.EngineDispatcher + +import wdl4s.wdl.values.WdlFile + +/** + * Actor responsible for calculating individual as well as aggregated hashes for a job. + * First calculate the initial hashes (individual and aggregated), and send them to its parent + * as well as the callCacheReadingJobActor if one was provided. + * From there, either wait for a NextBatchOfFileHashesRequest message from the callCacheReadingJobActor to trigger the next + * batch of file hash requests, or send it itself if there is no callCacheReadingJobActor. + * When we get all the file hashes for one batch, send those hashes to the CCRead actor and wait for the next NextBatchOfFileHashesRequest. + * If it was the last batch and all the files have been hashed, send all the hashes along with the aggregated file hash. + * If at any point the callCacheReadingJobActor dies (either because it decided this was a CacheMiss or it found a successful + * CacheHit), either keep hashing the files if writeToCache is true, or die if it's not. + * Both this actor and the CCRJA are children of the EJHA. + * The CCRJA reports its findings (cache hit / miss) directly to the EJHA that forwards them to the EJEA. + * This actor does not need nor care about cache hit/miss. + */ +class CallCacheHashingJobActor(jobDescriptor: BackendJobDescriptor, + callCacheReadingJobActor: Option[ActorRef], + initializationData: Option[BackendInitializationData], + runtimeAttributeDefinitions: Set[RuntimeAttributeDefinition], + backendName: String, + fileHashingActorProps: Props, + writeToCache: Boolean, + callCachingEligible: CallCachingEligible + ) extends LoggingFSM[CallCacheHashingJobActorState, CallCacheHashingJobActorData] { + + val fileHashingActor = makeFileHashingActor() + + // Watch the read actor, as it will die when it's done (cache miss or successful cache hit) + // When that happens we want to either stop if writeToCache is false, or keep going + callCacheReadingJobActor map context.watch + + initializeCCHJA() + + override def preStart(): Unit = { + if (callCacheReadingJobActor.isEmpty && !writeToCache) { + log.error("Programmer error ! There is no reason to have a hashing actor if both read and write to cache are off") + context.parent ! CacheMiss + context stop self + } + super.preStart() + } + + when(WaitingForHashFileRequest) { + case Event(NextBatchOfFileHashesRequest, data) => + data.fileHashRequestsRemaining.headOption match { + case Some(batch) if batch.nonEmpty => + batch foreach { fileHashingActor ! _ } + goto(HashingFiles) + case _ => + sendToCCReadActor(NoFileHashesResult, data) + stopAndStay(Option(NoFileHashesResult)) + } + case Event(Terminated(_), data) if writeToCache => + self ! NextBatchOfFileHashesRequest + stay() using data.copy(callCacheReadingJobActor = None) + } + + when(HashingFiles) { + case Event(FileHashResponse(result), data) => + addFileHash(result, data) match { + case (newData, Some(partialHashes: PartialFileHashingResult)) => + sendToCCReadActor(partialHashes, data) + // If there is no CCReader, send a message to itself to trigger the next batch + if (newData.callCacheReadingJobActor.isEmpty) self ! NextBatchOfFileHashesRequest + goto(WaitingForHashFileRequest) using newData + case (newData, Some(finalResult: FinalFileHashingResult)) => + sendToCCReadActor(finalResult, newData) + stopAndStay(Option(finalResult)) + case (newData, None) => + stay() using newData + } + case Event(Terminated(_), data) if writeToCache => + stay() using data.copy(callCacheReadingJobActor = None) + } + + whenUnhandled { + case Event(Terminated(_), _) => + stopAndStay(None) + case Event(error: HashingFailedMessage, data) => + log.error(error.reason, s"Failed to hash ${error.file}") + sendToCCReadActor(error, data) + context.parent ! error + stopAndStay(None) + } + + // Is its own function so it can be overriden in the test + private [callcaching] def addFileHash(hashResult: HashResult, data: CallCacheHashingJobActorData) = { + data.withFileHash(hashResult) + } + + private def stopAndStay(fileHashResult: Option[FinalFileHashingResult]) = { + fileHashResult foreach { context.parent ! _ } + context stop fileHashingActor + context stop self + stay() + } + + private def sendToCCReadActor(message: Any, data: CallCacheHashingJobActorData) = { + data.callCacheReadingJobActor foreach { _ ! message } + } + + private def initializeCCHJA() = { + import cromwell.core.simpleton.WdlValueSimpleton._ + + val unqualifiedInputs = jobDescriptor.inputDeclarations map { case (declaration, value) => declaration.unqualifiedName -> value } + + val inputSimpletons = unqualifiedInputs.simplify + val (fileInputSimpletons, nonFileInputSimpletons) = inputSimpletons partition { + case WdlValueSimpleton(_, _: WdlFile) => true + case _ => false + } + + val initialHashes = calculateInitialHashes(nonFileInputSimpletons, fileInputSimpletons) + + val fileHashRequests = fileInputSimpletons collect { + case WdlValueSimpleton(name, x: WdlFile) => SingleFileHashRequest(jobDescriptor.key, HashKey(true, "input", s"File $name"), x, initializationData) + } + + val hashingJobActorData = CallCacheHashingJobActorData(fileHashRequests.toList, callCacheReadingJobActor) + startWith(WaitingForHashFileRequest, hashingJobActorData) + + val initialHashingResult = InitialHashingResult(initialHashes, calculateHashAggregation(initialHashes, MessageDigest.getInstance("MD5"))) + + sendToCCReadActor(initialHashingResult, hashingJobActorData) + context.parent ! initialHashingResult + + // If there is no CCRead actor, we need to send ourselves the next NextBatchOfFileHashesRequest + if (hashingJobActorData.callCacheReadingJobActor.isEmpty) self ! NextBatchOfFileHashesRequest + } + + private def calculateInitialHashes(nonFileInputs: Iterable[WdlValueSimpleton], fileInputs: Iterable[WdlValueSimpleton]): Set[HashResult] = { + + val commandTemplateHash = HashResult(HashKey("command template"), jobDescriptor.call.task.commandTemplateString.md5HashValue) + val backendNameHash = HashResult(HashKey("backend name"), backendName.md5HashValue) + val inputCountHash = HashResult(HashKey("input count"), (nonFileInputs.size + fileInputs.size).toString.md5HashValue) + val outputCountHash = HashResult(HashKey("output count"), jobDescriptor.call.task.outputs.size.toString.md5HashValue) + + val runtimeAttributeHashes = runtimeAttributeDefinitions map { definition => jobDescriptor.runtimeAttributes.get(definition.name) match { + case Some(_) if definition.name == RuntimeAttributesKeys.DockerKey && callCachingEligible.dockerHash.isDefined => + HashResult(HashKey(definition.usedInCallCaching, "runtime attribute", definition.name), callCachingEligible.dockerHash.get.md5HashValue) + case Some(wdlValue) => HashResult(HashKey(definition.usedInCallCaching, "runtime attribute", definition.name), wdlValue.valueString.md5HashValue) + case None => HashResult(HashKey(definition.usedInCallCaching, "runtime attribute", definition.name), UnspecifiedRuntimeAttributeHashValue) + }} + + val inputHashResults = nonFileInputs map { + case WdlValueSimpleton(name, value) => HashResult(HashKey("input", s"${value.wdlType.toWdlString} $name"), value.toWdlString.md5HashValue) + } + + val outputExpressionHashResults = jobDescriptor.call.task.outputs map { output => + HashResult(HashKey("output expression", s"${output.wdlType.toWdlString} ${output.unqualifiedName}"), output.requiredExpression.valueString.md5HashValue) + } + + // Build these all together for the final set of initial hashes: + Set(commandTemplateHash, backendNameHash, inputCountHash, outputCountHash) ++ runtimeAttributeHashes ++ inputHashResults ++ outputExpressionHashResults + } + + private [callcaching] def makeFileHashingActor() = { + val fileHashingActorName = s"FileHashingActor_for_${jobDescriptor.key.tag}" + context.actorOf(fileHashingActorProps, fileHashingActorName) + } +} + +object CallCacheHashingJobActor { + + def props(jobDescriptor: BackendJobDescriptor, + callCacheReadingJobActor: Option[ActorRef], + initializationData: Option[BackendInitializationData], + runtimeAttributeDefinitions: Set[RuntimeAttributeDefinition], + backendName: String, + fileHashingActorProps: Props, + writeToCache: Boolean, + callCachingEligible: CallCachingEligible + ) = Props(new CallCacheHashingJobActor( + jobDescriptor, + callCacheReadingJobActor, + initializationData, + runtimeAttributeDefinitions, + backendName, + fileHashingActorProps, + writeToCache, + callCachingEligible + )).withDispatcher(EngineDispatcher) + + sealed trait CallCacheHashingJobActorState + case object WaitingForHashFileRequest extends CallCacheHashingJobActorState + case object HashingFiles extends CallCacheHashingJobActorState + + /** + * Aggregates hash results together in a predictable and reproducible manner. + * Purposefully takes an Iterable, because it will be sorted appropriately in this method + * to ensure deterministic result, so the type of collection doesn't matter. + * Only aggregates hash values for which checkForHitOrMiss is true. + * If several hash keys are identical, the result of this method is undefined. + */ + private def calculateHashAggregation(hashes: Iterable[HashResult], messageDigest: MessageDigest) = { + val sortedHashes = hashes.toList + .filter(_.hashKey.checkForHitOrMiss) + .sortBy(_.hashKey.key) + .map({ case HashResult(hashKey, HashValue(hashValue)) => hashKey.key + hashValue }) + .map(_.getBytes) + sortedHashes foreach messageDigest.update + DatatypeConverter.printHexBinary(messageDigest.digest()) + } + + object CallCacheHashingJobActorData { + // Slick will eventually build a prepared statement with that many parameters. Don't set this too high or it will stackoverflow. + val BatchSize = 100 + + def apply(fileHashRequestsRemaining: List[SingleFileHashRequest], callCacheReadingJobActor: Option[ActorRef]) = { + new CallCacheHashingJobActorData(fileHashRequestsRemaining.grouped(BatchSize).toList, List.empty, callCacheReadingJobActor) + } + } + + final case class CallCacheHashingJobActorData( + fileHashRequestsRemaining: List[List[SingleFileHashRequest]], + fileHashResults: List[HashResult], + callCacheReadingJobActor: Option[ActorRef] + ) { + private val md5Digest = MessageDigest.getInstance("MD5") + + /** + * Returns the updated state data along with an optional message to be sent back to CCRJA and parent. + */ + def withFileHash(hashResult: HashResult): (CallCacheHashingJobActorData, Option[CCHJAFileHashResponse]) = { + // Add the hash result to the list of known hash results + val newFileHashResults = hashResult +: fileHashResults + + // Returns a pair of the updated fileHashRequestsRemaining and optionally either a PartialFileHashingResult or a CompleteFileHashingResult + val (updatedRequestsList, responseMessage) = fileHashRequestsRemaining match { + case Nil => (List.empty, Option(NoFileHashesResult)) + case lastBatch :: Nil => + val updatedBatch = lastBatch.filterNot(_.hashKey == hashResult.hashKey) + // If we're processing the last batch, and it's now empty, then we're done + // In that case compute the aggregated hash and send that + if (updatedBatch.isEmpty) (List.empty, Option(CompleteFileHashingResult(newFileHashResults.toSet, calculateHashAggregation(newFileHashResults, md5Digest)))) + // Otherwise just return the updated batch and no message + else (List(updatedBatch), None) + case currentBatch :: otherBatches => + val updatedBatch = currentBatch.filterNot(_.hashKey == hashResult.hashKey) + // If the current batch is empty, we got a partial result, take the first BatchSize of the list + if (updatedBatch.isEmpty) { + // hashResult + fileHashResults.take(BatchSize - 1) -> BatchSize elements + val partialHashes = NonEmptyList.of[HashResult](hashResult, fileHashResults.take(BatchSize - 1): _*) + (otherBatches, Option(PartialFileHashingResult(partialHashes))) + } + // Otherwise just return the updated request list and no message + else (updatedBatch :: otherBatches, None) + } + + (this.copy(fileHashRequestsRemaining = updatedRequestsList, fileHashResults = newFileHashResults), responseMessage) + } + } + + object UnspecifiedRuntimeAttributeHashValue extends HashValue("N/A") + + sealed trait CCHJARequest + case object NextBatchOfFileHashesRequest extends CCHJARequest + + sealed trait CCHJAResponse + case class InitialHashingResult(initialHashes: Set[HashResult], aggregatedBaseHash: String) extends CCHJAResponse + + // File Hashing responses + sealed trait CCHJAFileHashResponse extends CCHJAResponse + case class PartialFileHashingResult(initialHashes: NonEmptyList[HashResult]) extends CCHJAFileHashResponse + + sealed trait FinalFileHashingResult extends CCHJAFileHashResponse + case class CompleteFileHashingResult(fileHashes: Set[HashResult], aggregatedFileHash: String) extends FinalFileHashingResult + case object NoFileHashesResult extends FinalFileHashingResult + + implicit class StringMd5er(val unhashedString: String) extends AnyVal { + def md5HashValue: HashValue = { + val hashBytes = java.security.MessageDigest.getInstance("MD5").digest(unhashedString.getBytes) + HashValue(javax.xml.bind.DatatypeConverter.printHexBinary(hashBytes)) + } + } +} diff --git a/engine/src/main/scala/cromwell/engine/workflow/lifecycle/execution/callcaching/CallCacheInvalidateActor.scala b/engine/src/main/scala/cromwell/engine/workflow/lifecycle/execution/callcaching/CallCacheInvalidateActor.scala new file mode 100644 index 000000000..ebea293ea --- /dev/null +++ b/engine/src/main/scala/cromwell/engine/workflow/lifecycle/execution/callcaching/CallCacheInvalidateActor.scala @@ -0,0 +1,39 @@ +package cromwell.engine.workflow.lifecycle.execution.callcaching + +import akka.actor.{Actor, ActorLogging, Props} +import cromwell.core.Dispatcher.EngineDispatcher +import cromwell.database.sql.tables.CallCachingEntry + +import scala.concurrent.ExecutionContext +import scala.util.{Failure, Success} + +class CallCacheInvalidateActor(callCache: CallCache, cacheId: CallCachingEntryId) extends Actor with ActorLogging { + + implicit val ec: ExecutionContext = context.dispatcher + + def receiver = context.parent + + callCache.invalidate(cacheId) onComplete { + case Success(maybeEntry) => + receiver ! CallCacheInvalidatedSuccess(maybeEntry) + context.stop(self) + case Failure(t) => + receiver ! CallCacheInvalidatedFailure(t) + context.stop(self) + } + + override def receive: Receive = { + case any => log.error("Unexpected message to InvalidateCallCacheActor: " + any) + } +} + +object CallCacheInvalidateActor { + def props(callCache: CallCache, cacheId: CallCachingEntryId) = { + Props(new CallCacheInvalidateActor(callCache: CallCache, cacheId: CallCachingEntryId)).withDispatcher(EngineDispatcher) + } +} + +sealed trait CallCacheInvalidatedResponse +case class CallCacheInvalidatedSuccess(maybeEntry: Option[CallCachingEntry]) extends CallCacheInvalidatedResponse +case object CallCacheInvalidationUnnecessary extends CallCacheInvalidatedResponse +case class CallCacheInvalidatedFailure(t: Throwable) extends CallCacheInvalidatedResponse \ No newline at end of file diff --git a/engine/src/main/scala/cromwell/engine/workflow/lifecycle/execution/callcaching/CallCacheReadActor.scala b/engine/src/main/scala/cromwell/engine/workflow/lifecycle/execution/callcaching/CallCacheReadActor.scala index 2080ae8e8..7ee8eb693 100644 --- a/engine/src/main/scala/cromwell/engine/workflow/lifecycle/execution/callcaching/CallCacheReadActor.scala +++ b/engine/src/main/scala/cromwell/engine/workflow/lifecycle/execution/callcaching/CallCacheReadActor.scala @@ -1,10 +1,13 @@ package cromwell.engine.workflow.lifecycle.execution.callcaching -import akka.actor.{Actor, ActorLogging, ActorRef, Props} +import akka.actor.{Actor, ActorLogging, ActorRef, Props, Status} import akka.pattern.pipe +import cats.data.NonEmptyList +import cromwell.backend.BackendJobDescriptorKey +import cromwell.core.Dispatcher.EngineDispatcher +import cromwell.core.WorkflowId import cromwell.core.callcaching.HashResult import cromwell.engine.workflow.lifecycle.execution.callcaching.CallCacheReadActor._ -import cromwell.engine.workflow.lifecycle.execution.callcaching.EngineJobHashingActor.CallCacheHashes import scala.concurrent.ExecutionContext @@ -15,29 +18,54 @@ import scala.concurrent.ExecutionContext */ class CallCacheReadActor(cache: CallCache) extends Actor with ActorLogging { - implicit val ec: ExecutionContext = context.dispatcher + private implicit val ec: ExecutionContext = context.dispatcher - var requestQueue: List[RequestTuple] = List.empty - var currentRequester: Option[ActorRef] = None + private var requestQueue: List[RequestTuple] = List.empty + private var currentRequester: Option[ActorRef] = None override def receive: Receive = { - case CacheLookupRequest(callCacheHashes) => - receiveNewRequest(callCacheHashes) - case r: CallCacheReadActorResponse => - currentRequester foreach { _ ! r } + case request: CallCacheReadActorRequest => receiveNewRequest(request) + case response: CallCacheReadActorResponse => + currentRequester foreach { _ ! response } + cycleRequestQueue() + case Status.Failure(f) => + currentRequester foreach { _ ! CacheResultLookupFailure(new Exception(s"Call Cache query failure: ${f.getMessage}")) } cycleRequestQueue() case other => log.error("Unexpected message type to CallCacheReadActor: " + other.getClass.getSimpleName) } - private def runRequest(callCacheHashes: CallCacheHashes) = { - val response = cache.fetchMetaInfoIdsMatchingHashes(callCacheHashes) map { - CacheResultMatchesForHashes(callCacheHashes.hashes, _) - } recover { + private def runRequest(request: CallCacheReadActorRequest): Unit = { + val response = request match { + case HasMatchingInitialHashLookup(initialHash) => + cache.hasBaseAggregatedHashMatch(initialHash) map { + case true => HasMatchingEntries + case false => NoMatchingEntries + } + case HasMatchingInputFilesHashLookup(fileHashes) => + cache.hasKeyValuePairHashMatch(fileHashes) map { + case true => HasMatchingEntries + case false => NoMatchingEntries + } + case CacheLookupRequest(aggregatedCallHashes, cacheHitNumber) => + cache.callCachingHitForAggregatedHashes(aggregatedCallHashes, cacheHitNumber) map { + case Some(nextHit) => CacheLookupNextHit(nextHit) + case None => CacheLookupNoHit + } + case call @ CallCacheEntryForCall(workflowId, jobKey) => + import cromwell.core.ExecutionIndex._ + cache.cacheEntryExistsForCall(workflowId.toString, jobKey.call.fullyQualifiedName, jobKey.index.fromIndex) map { + case true => HasCallCacheEntry(call) + case false => NoCallCacheEntry(call) + } + } + + val recovered = response recover { case t => CacheResultLookupFailure(t) } - response.pipeTo(self) + recovered.pipeTo(self) + () } private def cycleRequestQueue() = requestQueue match { @@ -49,22 +77,44 @@ class CallCacheReadActor(cache: CallCache) extends Actor with ActorLogging { currentRequester = None } - private def receiveNewRequest(callCacheHashes: CallCacheHashes) = currentRequester match { - case Some(x) => requestQueue :+= RequestTuple(sender, callCacheHashes) + private def receiveNewRequest(request: CallCacheReadActorRequest): Unit = currentRequester match { + case Some(_) => requestQueue :+= RequestTuple(sender, request) case None => currentRequester = Option(sender) - runRequest(callCacheHashes) + runRequest(request) } } object CallCacheReadActor { - def props(callCache: CallCache): Props = Props(new CallCacheReadActor(callCache)) + def props(callCache: CallCache): Props = Props(new CallCacheReadActor(callCache)).withDispatcher(EngineDispatcher) - private[CallCacheReadActor] case class RequestTuple(requester: ActorRef, hashes: CallCacheHashes) + private[CallCacheReadActor] case class RequestTuple(requester: ActorRef, request: CallCacheReadActorRequest) - case class CacheLookupRequest(callCacheHashes: CallCacheHashes) + object AggregatedCallHashes { + def apply(baseAggregatedHash: String, inputFilesAggregatedHash: String) = { + new AggregatedCallHashes(baseAggregatedHash, Option(inputFilesAggregatedHash)) + } + } + case class AggregatedCallHashes(baseAggregatedHash: String, inputFilesAggregatedHash: Option[String]) + + sealed trait CallCacheReadActorRequest + final case class CacheLookupRequest(aggregatedCallHashes: AggregatedCallHashes, cacheHitNumber: Int) extends CallCacheReadActorRequest + final case class HasMatchingInitialHashLookup(aggregatedTaskHash: String) extends CallCacheReadActorRequest + final case class HasMatchingInputFilesHashLookup(fileHashes: NonEmptyList[HashResult]) extends CallCacheReadActorRequest + final case class CallCacheEntryForCall(workflowId: WorkflowId, jobKey: BackendJobDescriptorKey) extends CallCacheReadActorRequest sealed trait CallCacheReadActorResponse - case class CacheResultMatchesForHashes(hashResults: Set[HashResult], cacheResultIds: Set[MetaInfoId]) extends CallCacheReadActorResponse + // Responses on whether or not there is at least one matching entry (can for initial matches of file matches) + case object HasMatchingEntries extends CallCacheReadActorResponse + case object NoMatchingEntries extends CallCacheReadActorResponse + + // Responses when asking for the next cache hit + final case class CacheLookupNextHit(hit: CallCachingEntryId) extends CallCacheReadActorResponse + case object CacheLookupNoHit extends CallCacheReadActorResponse + + final case class HasCallCacheEntry(call: CallCacheEntryForCall) extends CallCacheReadActorResponse + final case class NoCallCacheEntry(call: CallCacheEntryForCall) extends CallCacheReadActorResponse + + // Failure Response case class CacheResultLookupFailure(reason: Throwable) extends CallCacheReadActorResponse } diff --git a/engine/src/main/scala/cromwell/engine/workflow/lifecycle/execution/callcaching/CallCacheReadingJobActor.scala b/engine/src/main/scala/cromwell/engine/workflow/lifecycle/execution/callcaching/CallCacheReadingJobActor.scala new file mode 100644 index 000000000..85f607952 --- /dev/null +++ b/engine/src/main/scala/cromwell/engine/workflow/lifecycle/execution/callcaching/CallCacheReadingJobActor.scala @@ -0,0 +1,102 @@ +package cromwell.engine.workflow.lifecycle.execution.callcaching + +import akka.actor.{ActorRef, LoggingFSM, Props} +import cromwell.core.callcaching.HashingFailedMessage +import cromwell.engine.workflow.lifecycle.execution.callcaching.CallCacheHashingJobActor.{CompleteFileHashingResult, InitialHashingResult, NextBatchOfFileHashesRequest, NoFileHashesResult, PartialFileHashingResult} +import cromwell.engine.workflow.lifecycle.execution.callcaching.CallCacheReadActor._ +import cromwell.engine.workflow.lifecycle.execution.callcaching.CallCacheReadingJobActor._ +import cromwell.engine.workflow.lifecycle.execution.callcaching.EngineJobHashingActor.{CacheHit, CacheMiss, HashError} +import cromwell.core.Dispatcher.EngineDispatcher + +/** + * Receives hashes from the CallCacheHashingJobActor and makes requests to the database to determine whether or not there might be a hit + * for this job. + * + * First receives the initial hashes, and asks the database if there is at least one entry with the same aggregated initial hash. + * If not, it's a CacheMiss. + * If yes, ask the CallCacheHashingJobActor for the next batch of file hashes. + * Every time a new batch of hashes is received, check against the database if at least one entry matches all those hashes. + * Keep asking for new batches until either one returns no matching entry, in which case it's a CacheMiss, or until it receives + * the last batch along with the aggregated file hash. + * In the latter case, asks the database for the first entry matching both the initial and aggregated file hash (if any). + * Sends the response to its parent. + * In case of a CacheHit, stays alive in case using the hit fails and it needs to fetch the next one. Otherwise just dies. + */ +class CallCacheReadingJobActor(callCacheReadActor: ActorRef) extends LoggingFSM[CallCacheReadingJobActorState, CCRJAData] { + + startWith(WaitingForInitialHash, CCRJANoData) + + when(WaitingForInitialHash) { + case Event(InitialHashingResult(_, aggregatedBaseHash), CCRJANoData) => + callCacheReadActor ! HasMatchingInitialHashLookup(aggregatedBaseHash) + goto(WaitingForHashCheck) using CCRJAWithData(sender(), aggregatedBaseHash, None, 1) + } + + when(WaitingForHashCheck) { + case Event(HasMatchingEntries, CCRJAWithData(hashingActor, _, _, _)) => + hashingActor ! NextBatchOfFileHashesRequest + goto(WaitingForFileHashes) + case Event(NoMatchingEntries, _) => + cacheMiss + } + + when(WaitingForFileHashes) { + case Event(PartialFileHashingResult(hashes), _) => + callCacheReadActor ! HasMatchingInputFilesHashLookup(hashes) + goto(WaitingForHashCheck) + case Event(CompleteFileHashingResult(_, aggregatedFileHash), data: CCRJAWithData) => + callCacheReadActor ! CacheLookupRequest(AggregatedCallHashes(data.initialHash, aggregatedFileHash), data.currentHitNumber) + goto(WaitingForCacheHitOrMiss) using data.withFileHash(aggregatedFileHash) + case Event(NoFileHashesResult, data: CCRJAWithData) => + callCacheReadActor ! CacheLookupRequest(AggregatedCallHashes(data.initialHash, None), data.currentHitNumber) + goto(WaitingForCacheHitOrMiss) + } + + when(WaitingForCacheHitOrMiss) { + case Event(CacheLookupNextHit(hit), data: CCRJAWithData) => + context.parent ! CacheHit(hit) + stay() using data.increment + case Event(CacheLookupNoHit, _) => + cacheMiss + case Event(NextHit, CCRJAWithData(_, aggregatedInitialHash, aggregatedFileHash, currentHitNumber)) => + callCacheReadActor ! CacheLookupRequest(AggregatedCallHashes(aggregatedInitialHash, aggregatedFileHash), currentHitNumber) + stay() + } + + whenUnhandled { + case Event(_: HashingFailedMessage, _) => + // No need to send to the parent since it also receives file hash updates + cacheMiss + case Event(CacheResultLookupFailure(failure), _) => + context.parent ! HashError(failure) + cacheMiss + } + + def cacheMiss = { + context.parent ! CacheMiss + context stop self + stay() + } +} + +object CallCacheReadingJobActor { + + def props(callCacheReadActor: ActorRef) = { + Props(new CallCacheReadingJobActor(callCacheReadActor)).withDispatcher(EngineDispatcher) + } + + sealed trait CallCacheReadingJobActorState + case object WaitingForInitialHash extends CallCacheReadingJobActorState + case object WaitingForHashCheck extends CallCacheReadingJobActorState + case object WaitingForFileHashes extends CallCacheReadingJobActorState + case object WaitingForCacheHitOrMiss extends CallCacheReadingJobActorState + + sealed trait CCRJAData + case object CCRJANoData extends CCRJAData + case class CCRJAWithData(hashingActor: ActorRef, initialHash: String, fileHash: Option[String], currentHitNumber: Int) extends CCRJAData { + def increment = this.copy(currentHitNumber = currentHitNumber + 1) + def withFileHash(aggregatedFileHash: String) = this.copy(fileHash = Option(aggregatedFileHash)) + } + + case object NextHit +} diff --git a/engine/src/main/scala/cromwell/engine/workflow/lifecycle/execution/callcaching/CallCacheWriteActor.scala b/engine/src/main/scala/cromwell/engine/workflow/lifecycle/execution/callcaching/CallCacheWriteActor.scala index f0e9c0186..8e7aad74e 100644 --- a/engine/src/main/scala/cromwell/engine/workflow/lifecycle/execution/callcaching/CallCacheWriteActor.scala +++ b/engine/src/main/scala/cromwell/engine/workflow/lifecycle/execution/callcaching/CallCacheWriteActor.scala @@ -1,38 +1,73 @@ package cromwell.engine.workflow.lifecycle.execution.callcaching -import akka.actor.{Actor, ActorLogging, Props} -import cromwell.backend.BackendJobExecutionActor -import cromwell.backend.BackendJobExecutionActor.SucceededResponse -import cromwell.core.WorkflowId -import cromwell.engine.workflow.lifecycle.execution.callcaching.EngineJobHashingActor.CallCacheHashes +import akka.actor.{ActorRef, LoggingFSM, Props} +import cats.instances.list._ +import cats.instances.tuple._ +import cats.syntax.foldable._ +import cromwell.core.Dispatcher.EngineDispatcher +import cromwell.core.actor.BatchingDbWriter._ +import cromwell.core.actor.{BatchingDbWriter, BatchingDbWriterActor} +import cromwell.engine.workflow.lifecycle.execution.callcaching.CallCache.CallCacheHashBundle +import cromwell.engine.workflow.lifecycle.execution.callcaching.CallCacheWriteActor.{SaveCallCacheHashes, _} import scala.concurrent.ExecutionContext -import scala.util.{Failure, Success} +import scala.concurrent.duration._ +import scala.language.postfixOps -case class CallCacheWriteActor(callCache: CallCache, workflowId: WorkflowId, callCacheHashes: CallCacheHashes, succeededResponse: BackendJobExecutionActor.SucceededResponse) extends Actor with ActorLogging { +case class CallCacheWriteActor(callCache: CallCache) extends LoggingFSM[BatchingDbWriterState, BatchingDbWriter.BatchingDbWriterData] with BatchingDbWriterActor { implicit val ec: ExecutionContext = context.dispatcher + + override val dbFlushRate = CallCacheWriteActor.dbFlushRate - def receiver = context.parent + startWith(WaitingToWrite, NoData) - callCache.addToCache(workflowId, callCacheHashes, succeededResponse) onComplete { - case Success(_) => - receiver ! CallCacheWriteSuccess - context.stop(self) - case Failure(t) => - receiver ! CallCacheWriteFailure(t) - context.stop(self) + when(WaitingToWrite) { + case Event(command: SaveCallCacheHashes, curData) => + curData.addData(CommandAndReplyTo(command, sender)) match { + case newData: HasData[_] if newData.length >= dbBatchSize => goto(WritingToDb) using newData + case newData => stay() using newData + } + case Event(ScheduledFlushToDb, _) => + log.debug("Initiating periodic call cache flush to DB") + goto(WritingToDb) } - override def receive = { - case any => log.error("Unexpected message to CallCacheWriteActor: " + any) + when(WritingToDb) { + case Event(ScheduledFlushToDb, _) => stay + case Event(command: SaveCallCacheHashes, curData) => stay using curData.addData(CommandAndReplyTo(command, sender)) + case Event(FlushBatchToDb, NoData) => + log.debug("Attempted call cache hash set flush to DB but had nothing to write") + goto(WaitingToWrite) + case Event(FlushBatchToDb, HasData(data)) => + log.debug("Flushing {} call cache hashes sets to the DB", data.length) + + // Collect all the bundles of hashes that should be written and all the senders which should be informed of + // success or failure. + val (bundles, replyTos) = data.foldMap { case CommandAndReplyTo(s: SaveCallCacheHashes, r: ActorRef) => (List(s.bundle), List(r)) } + if (bundles.nonEmpty) { + val futureMessage = callCache.addToCache(bundles, dbBatchSize) map { _ => CallCacheWriteSuccess } recover { case t => CallCacheWriteFailure(t) } + futureMessage map { message => + replyTos foreach { _ ! message } + self ! DbWriteComplete + } + } + stay using NoData + case Event(DbWriteComplete, _) => + log.debug("Flush of cache data complete") + goto(WaitingToWrite) } } object CallCacheWriteActor { - def props(callCache: CallCache, workflowId: WorkflowId, callCacheHashes: CallCacheHashes, succeededResponse: SucceededResponse): Props = - Props(CallCacheWriteActor(callCache, workflowId, callCacheHashes, succeededResponse)) + def props(callCache: CallCache): Props = Props(CallCacheWriteActor(callCache)).withDispatcher(EngineDispatcher) + + case class SaveCallCacheHashes(bundle: CallCacheHashBundle) + + val dbBatchSize = 100 + val dbFlushRate = 3 seconds } -case object CallCacheWriteSuccess -case class CallCacheWriteFailure(t: Throwable) +sealed trait CallCacheWriteResponse +case object CallCacheWriteSuccess extends CallCacheWriteResponse +case class CallCacheWriteFailure(t: Throwable) extends CallCacheWriteResponse diff --git a/engine/src/main/scala/cromwell/engine/workflow/lifecycle/execution/callcaching/EngineJobHashingActor.scala b/engine/src/main/scala/cromwell/engine/workflow/lifecycle/execution/callcaching/EngineJobHashingActor.scala index 0f03a5840..94933ba73 100644 --- a/engine/src/main/scala/cromwell/engine/workflow/lifecycle/execution/callcaching/EngineJobHashingActor.scala +++ b/engine/src/main/scala/cromwell/engine/workflow/lifecycle/execution/callcaching/EngineJobHashingActor.scala @@ -1,263 +1,144 @@ package cromwell.engine.workflow.lifecycle.execution.callcaching -import akka.actor.{ActorLogging, ActorRef, LoggingFSM, Props} -import cromwell.backend.callcaching.FileHashingActor.SingleFileHashRequest +import akka.actor.{Actor, ActorLogging, ActorRef, Props} import cromwell.backend.{BackendInitializationData, BackendJobDescriptor, RuntimeAttributeDefinition} +import cromwell.core.Dispatcher.EngineDispatcher +import cromwell.core.WorkflowId import cromwell.core.callcaching._ -import cromwell.core.simpleton.WdlValueSimpleton -import cromwell.engine.workflow.lifecycle.execution.callcaching.CallCacheReadActor.{CacheLookupRequest, CacheResultLookupFailure, CacheResultMatchesForHashes} +import cromwell.core.logging.JobLogging +import cromwell.engine.workflow.lifecycle.execution.CallMetadataHelper +import cromwell.engine.workflow.lifecycle.execution.callcaching.CallCacheHashingJobActor.{CompleteFileHashingResult, FinalFileHashingResult, InitialHashingResult, NoFileHashesResult} +import cromwell.engine.workflow.lifecycle.execution.callcaching.CallCacheReadingJobActor.NextHit import cromwell.engine.workflow.lifecycle.execution.callcaching.EngineJobHashingActor._ -import wdl4s.values.WdlFile +import cromwell.services.metadata.CallMetadataKeys /** + * Coordinates the CallCacheHashingJobActor and the CallCacheReadingJobActor. * Over time will emit up to two messages back to its parent: * * (if read enabled): Either a CacheHit(id) or CacheMiss message * * (if write enabled): A CallCacheHashes(hashes) message */ -case class EngineJobHashingActor(receiver: ActorRef, - jobDescriptor: BackendJobDescriptor, - initializationData: Option[BackendInitializationData], - fileHashingActor: ActorRef, - callCacheReadActor: ActorRef, - runtimeAttributeDefinitions: Set[RuntimeAttributeDefinition], - backendName: String, - activity: CallCachingActivity) extends LoggingFSM[EJHAState, EJHAData] with ActorLogging { - - initializeEJHA() - - when(DeterminingHitOrMiss) { - case Event(hashResultMessage: SuccessfulHashResultMessage, _) => - // This is in DeterminingHitOrMiss, so use the new hash results to search for cache results. - // Also update the state data with these new hash results. - val newHashResults = hashResultMessage.hashes - findCacheResults(newHashResults) - updateStateDataWithNewHashResultsAndTransition(newHashResults) - case Event(newCacheResults: CacheResultMatchesForHashes, _) => - checkWhetherHitOrMissIsKnownThenTransition(stateData.intersectCacheResults(newCacheResults)) - } - - when(GeneratingAllHashes) { - case Event(hashResultMessage: SuccessfulHashResultMessage, _) => checkWhetherAllHashesAreKnownAndTransition(stateData.withNewKnownHashes(hashResultMessage.hashes)) - case Event(CacheResultMatchesForHashes(_, _), _) => stay // Don't care; we already know the hit/miss status. Ignore this message +class EngineJobHashingActor(receiver: ActorRef, + override val serviceRegistryActor: ActorRef, + jobDescriptor: BackendJobDescriptor, + initializationData: Option[BackendInitializationData], + fileHashingActorProps: Props, + callCacheReadingJobActorProps: Props, + runtimeAttributeDefinitions: Set[RuntimeAttributeDefinition], + backendName: String, + activity: CallCachingActivity, + callCachingEligible: CallCachingEligible) extends Actor with ActorLogging with JobLogging with CallMetadataHelper { + + override val jobTag = jobDescriptor.key.tag + override val workflowId = jobDescriptor.workflowDescriptor.id + override val workflowIdForCallMetadata: WorkflowId = workflowId + + private [callcaching] var initialHash: Option[InitialHashingResult] = None + + private [callcaching] val callCacheReadingJobActor = if (activity.readFromCache) { + Option(context.actorOf(callCacheReadingJobActorProps)) + } else None + + override def preStart(): Unit = { + context.actorOf(CallCacheHashingJobActor.props( + jobDescriptor, + callCacheReadingJobActor, + initializationData, + runtimeAttributeDefinitions, + backendName, + fileHashingActorProps, + activity.writeToCache, + callCachingEligible + )) + super.preStart() } - whenUnhandled { - case Event(CacheResultLookupFailure(reason), _) => - receiver ! HashError(new Exception(s"Failure looking up call cache results: ${reason.getMessage}")) - context.stop(self) - stay - case Event(HashingFailedMessage(hashKey, reason), _) => - receiver ! HashError(new Exception(s"Unable to generate ${hashKey.key} hash. Caused by ${reason.getMessage}", reason)) - context.stop(self) - stay - case Event(other, _) => - log.error(s"Bad message in $stateName with $stateData: $other") - stay + override def receive = { + case initialHashResult: InitialHashingResult => initialHash = Option(initialHashResult) + case finalFileHashResult: FinalFileHashingResult => sendHashes(finalFileHashResult) + case CacheMiss => receiver ! CacheMiss + case hit: CacheHit => receiver ! hit + case NextHit => + callCacheReadingJobActor match { + case Some(readActor) => + readActor ! NextHit + case None => + failAndStop(new IllegalStateException("Requested cache hit but there is no cache read actor")) + } + case hashingFailed: HashingFailedMessage => + failAndStop(hashingFailed.reason) + case unexpected => + jobLogger.error(s"Received unexpected event $unexpected") } - onTransition { - case fromState -> toState => - log.debug("Transitioning from {}({}) to {}({})", fromState, stateData, toState, nextStateData) + private def publishHashFailure(failure: Throwable) = { + import cromwell.services.metadata.MetadataService._ + val failureAsEvents = throwableToMetadataEvents(metadataKeyForCall(jobDescriptor.key, CallMetadataKeys.CallCachingKeys.HashFailuresKey), failure) + serviceRegistryActor ! PutMetadataAction(failureAsEvents) } - private def initializeEJHA() = { - - import cromwell.core.simpleton.WdlValueSimpleton._ - - val inputSimpletons = jobDescriptor.inputs.simplify - val (fileInputSimpletons, nonFileInputSimpletons) = inputSimpletons partition { - case WdlValueSimpleton(_, f: WdlFile) => true - case _ => false - } - - val initialHashes = calculateInitialHashes(nonFileInputSimpletons, fileInputSimpletons) - - val fileContentHashesNeeded = fileInputSimpletons collect { - case WdlValueSimpleton(name, x: WdlFile) => SingleFileHashRequest(jobDescriptor.key, HashKey(s"input: File $name"), x, initializationData) - } - - val hashesNeeded: Set[HashKey] = initialHashes.map(_.hashKey) ++ fileContentHashesNeeded.map(_.hashKey) - - val initialState = if (activity.readFromCache) DeterminingHitOrMiss else GeneratingAllHashes - val initialData = EJHAData(hashesNeeded, activity) - - startWith(initialState, initialData) - - // Submit the set of initial hashes for checking against the DB: - self ! EJHAInitialHashingResults(initialHashes) - // Find the hashes for all input files: - fileContentHashesNeeded.foreach(fileHashingActor ! _) + private def failAndStop(reason: Throwable) = { + publishHashFailure(reason) + receiver ! HashError(reason) + context stop self } - private def calculateInitialHashes(nonFileInputs: Iterable[WdlValueSimpleton], fileInputs: Iterable[WdlValueSimpleton]): Set[HashResult] = { - - val commandTemplateHash = HashResult(HashKey("command template"), jobDescriptor.call.task.commandTemplateString.md5HashValue) - val backendNameHash = HashResult(HashKey("backend name"), backendName.md5HashValue) - val inputCountHash = HashResult(HashKey("input count"), (nonFileInputs.size + fileInputs.size).toString.md5HashValue) - val outputCountHash = HashResult(HashKey("output count"), jobDescriptor.call.task.outputs.size.toString.md5HashValue) - - val runtimeAttributeHashes = runtimeAttributeDefinitions map { definition => jobDescriptor.runtimeAttributes.get(definition.name) match { - case Some(wdlValue) => HashResult(HashKey("runtime attribute: " + definition.name, definition.usedInCallCaching), wdlValue.valueString.md5HashValue) - case None => HashResult(HashKey("runtime attribute: " + definition.name, definition.usedInCallCaching), UnspecifiedRuntimeAttributeHashValue) - }} - - val inputHashResults = nonFileInputs map { - case WdlValueSimpleton(name, value) => HashResult(HashKey(s"input: ${value.wdlType.toWdlString} $name"), value.toWdlString.md5HashValue) - } - - val outputExpressionHashResults = jobDescriptor.call.task.outputs map { output => - HashResult(HashKey(s"output expression: ${output.wdlType.toWdlString} ${output.name}"), output.requiredExpression.valueString.md5HashValue) + private def sendHashes(finalFileHashingResult: FinalFileHashingResult) = { + val fileHashes = finalFileHashingResult match { + case CompleteFileHashingResult(fileHashResults, aggregatedFileHash) => + Option(FileHashes(fileHashResults, aggregatedFileHash)) + case NoFileHashesResult => None } - // Build these all together for the final set of initial hashes: - Set(commandTemplateHash, backendNameHash, inputCountHash, outputCountHash) ++ runtimeAttributeHashes ++ inputHashResults ++ outputExpressionHashResults - } - - private def checkWhetherHitOrMissIsKnownThenTransition(newData: EJHAData) = { - if (newData.isDefinitelyCacheHitOrMiss) { - respondWithHitOrMissThenTransition(newData) - } - else { - stay() using newData // Stay in DeterminingHitOrMiss + initialHash match { + case Some(initData) => + receiver ! CallCacheHashes(initData.initialHashes, initData.aggregatedBaseHash, fileHashes) + case None => + failAndStop(new IllegalStateException("Received file hashes without initial hash.")) } } +} - private def respondWithHitOrMissThenTransition(newData: EJHAData) = { - val hitOrMissResponse: EJHAResponse = newData.cacheHit map CacheHit getOrElse CacheMiss - - receiver ! hitOrMissResponse - if (!activity.writeToCache) { - context.stop(self) - stay - } else { - checkWhetherAllHashesAreKnownAndTransition(newData) - } - } +object EngineJobHashingActor { + sealed trait EJHAState + case object Running extends EJHAState + case object WaitingForHashes extends EJHAState + case object WaitingForJobSuccess extends EJHAState + case object Done extends EJHAState - private def checkWhetherAllHashesAreKnownAndTransition(newData: EJHAData) = { - if (newData.allHashesKnown) { - receiver ! CallCacheHashes(newData.hashesKnown) - context.stop(self) - } - goto(GeneratingAllHashes) using newData + object EJHAData { + def empty = EJHAData(None) } - /** - * Needs to convert a hash result into the set of CachedResults which are consistent with it - */ - private def findCacheResults(hashResults: Set[HashResult]) = { - val filtered = hashResults.filter(_.hashKey.checkForHitOrMiss) + case class EJHAData(initialHash: Option[InitialHashingResult]) - if (filtered.nonEmpty) { - val hashes = CallCacheHashes(filtered) - val subsets = hashes.hashes.grouped(100) - subsets foreach { subset => - callCacheReadActor ! CacheLookupRequest(CallCacheHashes(subset)) - } - } else () - } - - def updateStateDataWithNewHashResultsAndTransition(hashResults: Set[HashResult]) = { - if (activity.writeToCache) { - val newData = stateData.withNewKnownHashes(hashResults) - if (newData.isDefinitelyCacheHitOrMiss) { - log.info("New hash results, hit or miss already known (none are cache-checked. Checking if we're done...)") - checkWhetherAllHashesAreKnownAndTransition(newData) - } else { - stay using newData - } - } else { - stay using stateData - } + sealed trait EJHAResponse + case object CacheMiss extends EJHAResponse + case class CacheHit(cacheResultId: CallCachingEntryId) extends EJHAResponse + case class HashError(reason: Throwable) extends EJHAResponse + case class FileHashes(hashes: Set[HashResult], aggregatedHash: String) + case class CallCacheHashes(initialHashes: Set[HashResult], aggregatedInitialHash: String, fileHashes: Option[FileHashes]) extends EJHAResponse { + val hashes = initialHashes ++ fileHashes.map(_.hashes).getOrElse(Set.empty) } -} - -object EngineJobHashingActor { def props(receiver: ActorRef, + serviceRegistryActor: ActorRef, jobDescriptor: BackendJobDescriptor, initializationData: Option[BackendInitializationData], - fileHashingActor: ActorRef, - callCacheReadActor: ActorRef, + fileHashingActorProps: Props, + callCacheReadingJobActorProps: Props, runtimeAttributeDefinitions: Set[RuntimeAttributeDefinition], backendName: String, - activity: CallCachingActivity): Props = Props(new EngineJobHashingActor( - receiver = receiver, - jobDescriptor = jobDescriptor, - initializationData = initializationData, - fileHashingActor = fileHashingActor, - callCacheReadActor = callCacheReadActor, - runtimeAttributeDefinitions = runtimeAttributeDefinitions, - backendName = backendName, - activity = activity)) - - private[callcaching] case class EJHAInitialHashingResults(hashes: Set[HashResult]) extends SuccessfulHashResultMessage - private[callcaching] case object CheckWhetherAllHashesAreKnown - - sealed trait EJHAState - case object DeterminingHitOrMiss extends EJHAState - case object GeneratingAllHashes extends EJHAState - - sealed trait EJHAResponse - case class CacheHit(cacheResultId: MetaInfoId) extends EJHAResponse - case object CacheMiss extends EJHAResponse - case class HashError(t: Throwable) extends EJHAResponse { - override def toString = s"HashError(${t.getMessage})" - } - case class CallCacheHashes(hashes: Set[HashResult]) extends EJHAResponse - object UnspecifiedRuntimeAttributeHashValue extends HashValue("N/A") - - implicit class StringMd5er(unhashedString: String) { - def md5HashValue: HashValue = { - val hashBytes = java.security.MessageDigest.getInstance("MD5").digest(unhashedString.getBytes) - HashValue(javax.xml.bind.DatatypeConverter.printHexBinary(hashBytes)) - } - } -} - -/** - * Transient data for the EJHA. - * - * @param possibleCacheResults The set of cache results which have matched all currently tried hashes - * @param remainingCacheChecks The set of hash keys which have not yet had their cache results fetched - * @param hashesKnown The set of all hashes calculated so far (including initial hashes) - * @param remainingHashesNeeded The set of hashes which are still needed for writing to the database - */ -private[callcaching] case class EJHAData(possibleCacheResults: Option[Set[MetaInfoId]], - remainingCacheChecks: Set[HashKey], - hashesKnown: Set[HashResult], - remainingHashesNeeded: Set[HashKey]) { - // Manipulators - def intersectCacheResults(newCacheResults: CacheResultMatchesForHashes) = { - val newIds = newCacheResults.cacheResultIds - val intersectedIds = possibleCacheResults match { - case None => newIds - case Some(currentCacheResults) => currentCacheResults.intersect(newIds) - } - this.copy( - possibleCacheResults = Option(intersectedIds), - remainingCacheChecks = remainingCacheChecks.diff(newCacheResults.hashResults.map(_.hashKey)) - ) - } - def withNewKnownHashes(hashResults: Set[HashResult]) = { - this.copy( - hashesKnown = hashesKnown ++ hashResults, - remainingHashesNeeded = remainingHashesNeeded.diff(hashResults.map(_.hashKey))) - } - - // Queries - def allHashesKnown = remainingHashesNeeded.isEmpty - def allCacheResultsIntersected = remainingCacheChecks.isEmpty - def cacheHit = if (allCacheResultsIntersected) possibleCacheResults flatMap { _.headOption } else None - def isDefinitelyCacheHit = cacheHit.isDefined - def isDefinitelyCacheMiss = possibleCacheResults.exists(_.isEmpty) - def isDefinitelyCacheHitOrMiss = isDefinitelyCacheHit || isDefinitelyCacheMiss -} - -private[callcaching] object EJHAData { - def apply(hashesNeeded: Set[HashKey], activity: CallCachingActivity): EJHAData = EJHAData( - None, - if (activity.readFromCache) hashesNeeded.filter(_.checkForHitOrMiss) else Set.empty, - Set.empty, - if (activity.writeToCache) hashesNeeded else Set.empty) + activity: CallCachingActivity, + callCachingEligible: CallCachingEligible): Props = Props(new EngineJobHashingActor( + receiver = receiver, + serviceRegistryActor = serviceRegistryActor, + jobDescriptor = jobDescriptor, + initializationData = initializationData, + fileHashingActorProps = fileHashingActorProps, + callCacheReadingJobActorProps = callCacheReadingJobActorProps, + runtimeAttributeDefinitions = runtimeAttributeDefinitions, + backendName = backendName, + activity = activity, + callCachingEligible = callCachingEligible)).withDispatcher(EngineDispatcher) } diff --git a/engine/src/main/scala/cromwell/engine/workflow/lifecycle/execution/callcaching/FetchCachedResultsActor.scala b/engine/src/main/scala/cromwell/engine/workflow/lifecycle/execution/callcaching/FetchCachedResultsActor.scala index c8f6be4a2..194561b8c 100644 --- a/engine/src/main/scala/cromwell/engine/workflow/lifecycle/execution/callcaching/FetchCachedResultsActor.scala +++ b/engine/src/main/scala/cromwell/engine/workflow/lifecycle/execution/callcaching/FetchCachedResultsActor.scala @@ -2,36 +2,36 @@ package cromwell.engine.workflow.lifecycle.execution.callcaching import akka.actor.{Actor, ActorLogging, ActorRef, Props} import cromwell.Simpletons._ +import cromwell.core.Dispatcher.EngineDispatcher import cromwell.core.simpleton.WdlValueSimpleton -import cromwell.engine.workflow.lifecycle.execution.callcaching.EngineJobHashingActor.CacheHit +import cromwell.database.sql.SqlConverters._ import cromwell.engine.workflow.lifecycle.execution.callcaching.FetchCachedResultsActor.{CachedOutputLookupFailed, CachedOutputLookupSucceeded} import scala.concurrent.ExecutionContext import scala.util.{Failure, Success} object FetchCachedResultsActor { - def props(cacheHit: CacheHit, replyTo: ActorRef, callCache: CallCache): Props = - Props(new FetchCachedResultsActor(cacheHit, replyTo, callCache)) + def props(callCachingEntryId: CallCachingEntryId, replyTo: ActorRef, callCache: CallCache): Props = + Props(new FetchCachedResultsActor(callCachingEntryId, replyTo, callCache)).withDispatcher(EngineDispatcher) sealed trait CachedResultResponse - case class CachedOutputLookupFailed(metaInfoId: MetaInfoId, failure: Throwable) extends CachedResultResponse + case class CachedOutputLookupFailed(callCachingEntryId: CallCachingEntryId, failure: Throwable) extends CachedResultResponse case class CachedOutputLookupSucceeded(simpletons: Seq[WdlValueSimpleton], callOutputFiles: Map[String,String], - returnCode: Option[Int], cacheHit: CacheHit, cacheHitDetails: String) extends CachedResultResponse + returnCode: Option[Int], cacheHit: CallCachingEntryId, cacheHitDetails: String) extends CachedResultResponse } -class FetchCachedResultsActor(cacheHit: CacheHit, replyTo: ActorRef, callCache: CallCache) +class FetchCachedResultsActor(cacheResultId: CallCachingEntryId, replyTo: ActorRef, callCache: CallCache) extends Actor with ActorLogging { { implicit val ec: ExecutionContext = context.dispatcher - val cacheResultId = cacheHit.cacheResultId callCache.fetchCachedResult(cacheResultId) onComplete { case Success(Some(result)) => val simpletons = result.callCachingSimpletonEntries map toSimpleton val jobDetritusFiles = result.callCachingDetritusEntries map { jobDetritusEntry => - jobDetritusEntry.detritusKey -> jobDetritusEntry.detritusValue + jobDetritusEntry.detritusKey -> jobDetritusEntry.detritusValue.toRawString } val sourceCacheDetails = Seq(result.callCachingEntry.workflowExecutionUuid, result.callCachingEntry.callFullyQualifiedName, @@ -39,7 +39,7 @@ class FetchCachedResultsActor(cacheHit: CacheHit, replyTo: ActorRef, callCache: replyTo ! CachedOutputLookupSucceeded(simpletons, jobDetritusFiles.toMap, result.callCachingEntry.returnCode, - cacheHit, sourceCacheDetails) + cacheResultId, sourceCacheDetails) case Success(None) => val reason = new RuntimeException(s"Cache hit vanished between discovery and retrieval: $cacheResultId") replyTo ! CachedOutputLookupFailed(cacheResultId, reason) diff --git a/engine/src/main/scala/cromwell/engine/workflow/lifecycle/execution/package.scala b/engine/src/main/scala/cromwell/engine/workflow/lifecycle/execution/package.scala index 1d3eedd9f..d44006f7c 100644 --- a/engine/src/main/scala/cromwell/engine/workflow/lifecycle/execution/package.scala +++ b/engine/src/main/scala/cromwell/engine/workflow/lifecycle/execution/package.scala @@ -1,10 +1,12 @@ package cromwell.engine.workflow.lifecycle -import wdl4s._ +import akka.actor.ActorRef +import wdl4s.wdl._ -package object execution { - def splitFqn(fullyQualifiedName: FullyQualifiedName): (String, String) = { - val lastIndex = fullyQualifiedName.lastIndexOf(".") - (fullyQualifiedName.substring(0, lastIndex), fullyQualifiedName.substring(lastIndex + 1)) - } +package execution { + + import cromwell.core.CallKey + + final case class JobRunning(key: CallKey, inputs: EvaluatedTaskInputs, executionActor: Option[ActorRef]) + final case class JobStarting(callKey: CallKey) } diff --git a/engine/src/main/scala/cromwell/engine/workflow/lifecycle/execution/preparation/CallPreparation.scala b/engine/src/main/scala/cromwell/engine/workflow/lifecycle/execution/preparation/CallPreparation.scala new file mode 100644 index 000000000..3ec044208 --- /dev/null +++ b/engine/src/main/scala/cromwell/engine/workflow/lifecycle/execution/preparation/CallPreparation.scala @@ -0,0 +1,45 @@ +package cromwell.engine.workflow.lifecycle.execution.preparation + +import akka.actor.Props +import cromwell.backend.BackendJobDescriptor +import cromwell.core.{CallKey, JobKey} +import cromwell.engine.EngineWorkflowDescriptor +import cromwell.engine.workflow.lifecycle.execution.OutputStore +import wdl4s.wdl.exception.VariableLookupException +import wdl4s.wdl.expression.WdlStandardLibraryFunctions +import wdl4s.wdl.values.WdlValue +import wdl4s.wdl.{Declaration, Scatter} + +import scala.util.{Failure, Try} + +object CallPreparation { + sealed trait CallPreparationActorCommands + case object Start extends CallPreparationActorCommands + + trait CallPreparationActorResponse + + case class BackendJobPreparationSucceeded(jobDescriptor: BackendJobDescriptor, bjeaProps: Props) extends CallPreparationActorResponse + + case class JobCallPreparationFailed(jobKey: JobKey, throwable: Throwable) extends CallPreparationActorResponse + case class CallPreparationFailed(jobKey: JobKey, throwable: Throwable) extends CallPreparationActorResponse + + def resolveAndEvaluateInputs(callKey: CallKey, + workflowDescriptor: EngineWorkflowDescriptor, + expressionLanguageFunctions: WdlStandardLibraryFunctions, + outputStore: OutputStore): Try[Map[Declaration, WdlValue]] = { + val call = callKey.scope + val scatterMap = callKey.index flatMap { i => + // Will need update for nested scatters + call.ancestry collectFirst { case s: Scatter => Map(s -> i) } + } getOrElse Map.empty[Scatter, Int] + + call.evaluateTaskInputs( + workflowDescriptor.backendDescriptor.knownValues, + expressionLanguageFunctions, + outputStore.fetchNodeOutputEntries, + scatterMap + ) recoverWith { + case t: Throwable => Failure[Map[Declaration, WdlValue]](new VariableLookupException(s"Couldn't resolve all inputs for ${callKey.scope.fullyQualifiedName} at index ${callKey.index}.", List(t))) + } + } +} diff --git a/engine/src/main/scala/cromwell/engine/workflow/lifecycle/execution/preparation/JobPreparationActor.scala b/engine/src/main/scala/cromwell/engine/workflow/lifecycle/execution/preparation/JobPreparationActor.scala new file mode 100644 index 000000000..7b3a57335 --- /dev/null +++ b/engine/src/main/scala/cromwell/engine/workflow/lifecycle/execution/preparation/JobPreparationActor.scala @@ -0,0 +1,212 @@ +package cromwell.engine.workflow.lifecycle.execution.preparation + +import akka.actor.{ActorRef, FSM, Props} +import cromwell.backend._ +import cromwell.backend.validation.{DockerValidation, RuntimeAttributesKeys} +import cromwell.core.Dispatcher.EngineDispatcher +import cromwell.core.callcaching._ +import cromwell.core.logging.WorkflowLogging +import cromwell.docker.DockerHashActor.DockerHashSuccessResponse +import cromwell.docker._ +import cromwell.engine.workflow.WorkflowDockerLookupActor.{WorkflowDockerLookupFailure, WorkflowDockerTerminalFailure} +import cromwell.engine.workflow.lifecycle.execution.WorkflowExecutionActorData +import cromwell.engine.workflow.lifecycle.execution.preparation.CallPreparation._ +import cromwell.engine.workflow.lifecycle.execution.preparation.JobPreparationActor._ +import cromwell.services.keyvalue.KeyValueServiceActor.{KvGet, KvJobKey, KvResponse, ScopedKey} +import wdl4s.wdl._ +import wdl4s.wdl.values.WdlValue + +import scala.concurrent.duration._ +import scala.language.postfixOps +import scala.util.{Failure, Success, Try} + +class JobPreparationActor(executionData: WorkflowExecutionActorData, + jobKey: BackendJobDescriptorKey, + factory: BackendLifecycleActorFactory, + val workflowDockerLookupActor: ActorRef, + initializationData: Option[BackendInitializationData], + serviceRegistryActor: ActorRef, + ioActor: ActorRef, + backendSingletonActor: Option[ActorRef]) + extends FSM[JobPreparationActorState, JobPreparationActorData] with WorkflowLogging { + + override lazy val workflowIdForLogging = workflowDescriptor.id + + private[preparation] lazy val noResponseTimeout: FiniteDuration = 3 minutes + + private lazy val workflowDescriptor = executionData.workflowDescriptor + private[preparation] lazy val expressionLanguageFunctions = factory.expressionLanguageFunctions(workflowDescriptor.backendDescriptor, jobKey, initializationData) + private[preparation] lazy val dockerHashCredentials = factory.dockerHashCredentials(initializationData) + private[preparation] lazy val runtimeAttributeDefinitions = factory.runtimeAttributeDefinitions(initializationData) + private[preparation] lazy val hasDockerDefinition = runtimeAttributeDefinitions.exists(_.name == DockerValidation.instance.key) + + startWith(Idle, JobPreparationActorNoData) + + when(Idle) { + case Event(Start, JobPreparationActorNoData) => + val keyLookupRequests = kvStoreKeysToPrefetch + if (keyLookupRequests.nonEmpty) { + lookupKeyValueEntries(keyLookupRequests) + } else { + evaluateInputsAndFetchDockerHashes(KeyValueLookupResults(Map.empty)) + } + } + + when(FetchingKeyValueStoreEntries) { + case Event(kvResponse: KvResponse, JobPreparationKeyLookupData(keyLookups)) => + keyLookups.withResponse(kvResponse.key, kvResponse) match { + case newPartialLookup: PartialKeyValueLookups => stay using JobPreparationKeyLookupData(newPartialLookup) + case finished: KeyValueLookupResults => evaluateInputsAndFetchDockerHashes(finished) + } + } + + when(WaitingForDockerHash) { + case Event(DockerHashSuccessResponse(dockerHash, _), data: JobPreparationHashLookupData) => + handleDockerHashSuccess(dockerHash, data) + case Event(WorkflowDockerLookupFailure(reason, _), data: JobPreparationHashLookupData) => + workflowLogger.warn("Docker lookup failed", reason) + handleDockerHashFailed(data) + case Event(WorkflowDockerTerminalFailure(reason, _), _: JobPreparationHashLookupData) => + sendFailureAndStop(reason) + } + + whenUnhandled { + case Event(unexpectedMessage, _) => + workflowLogger.warn(s"JobPreparation actor received an unexpected message in state $stateName: $unexpectedMessage") + stay() + } + + private[preparation] lazy val kvStoreKeysToPrefetch = factory.requestedKeyValueStoreKeys + private[preparation] def scopedKey(key: String) = ScopedKey(workflowDescriptor.id, KvJobKey(jobKey), key) + private[preparation] def lookupKeyValueEntries(lookups: Seq[String]) = { + val keysToLookup: Seq[ScopedKey] = lookups map scopedKey + keysToLookup foreach { serviceRegistryActor ! KvGet(_) } + goto(FetchingKeyValueStoreEntries) using JobPreparationKeyLookupData(PartialKeyValueLookups(Map.empty, keysToLookup)) + } + + private [preparation] def evaluateInputsAndAttributes = { + for { + evaluatedInputs <- resolveAndEvaluateInputs(jobKey, workflowDescriptor, expressionLanguageFunctions, executionData.outputStore) + runtimeAttributes <- prepareRuntimeAttributes(evaluatedInputs) + } yield (evaluatedInputs, runtimeAttributes) + } + + private def evaluateInputsAndFetchDockerHashes(kvStoreLookupResults: KeyValueLookupResults) = { + evaluateInputsAndAttributes match { + case Success((inputs, attributes)) => fetchDockerHashes(kvStoreLookupResults, inputs, attributes) + case Failure(failure) => sendFailureAndStop(failure) + } + } + + private def fetchDockerHashes(kvStoreLookupResults: KeyValueLookupResults, inputs: Map[Declaration, WdlValue], attributes: Map[LocallyQualifiedName, WdlValue]) = { + def sendDockerRequest(dockerImageId: DockerImageIdentifierWithoutHash) = { + val dockerHashRequest = DockerHashRequest(dockerImageId, dockerHashCredentials) + val newData = JobPreparationHashLookupData(kvStoreLookupResults, dockerHashRequest, inputs, attributes) + workflowDockerLookupActor ! dockerHashRequest + goto(WaitingForDockerHash) using newData + } + + def handleDockerValue(value: String) = DockerImageIdentifier.fromString(value) match { + case Success(dockerImageId: DockerImageIdentifierWithoutHash) if hasDockerDefinition => sendDockerRequest(dockerImageId) + case Success(_: DockerImageIdentifierWithoutHash) if !hasDockerDefinition => + // If the backend doesn't support docker - no need to lookup and we're ok for call caching + val response = prepareBackendDescriptor(inputs, attributes, NoDocker, kvStoreLookupResults.unscoped) + sendResponseAndStop(response) + case Success(dockerImageId: DockerImageIdentifierWithHash) => + // If the docker value already has a hash - no need to lookup and we're ok for call caching + val response = prepareBackendDescriptor(inputs, attributes, DockerWithHash(dockerImageId.fullName), kvStoreLookupResults.unscoped) + sendResponseAndStop(response) + case Failure(failure) => sendFailureAndStop(failure) + } + + attributes.get(RuntimeAttributesKeys.DockerKey) match { + case Some(dockerValue) => handleDockerValue(dockerValue.valueString) + case None => + // If there is no docker attribute at all - we're ok for call caching + val response = prepareBackendDescriptor(inputs, attributes, NoDocker, kvStoreLookupResults.unscoped) + sendResponseAndStop(response) + } + } + + private def handleDockerHashSuccess(dockerHashResult: DockerHashResult, data: JobPreparationHashLookupData) = { + val hashValue = data.dockerHashRequest.dockerImageID.withHash(dockerHashResult) + val response = prepareBackendDescriptor(data.inputs, data.attributes, DockerWithHash(hashValue.fullName), data.keyLookupResults.unscoped) + sendResponseAndStop(response) + } + + private def handleDockerHashFailed(data: JobPreparationHashLookupData) = { + val floatingDockerTag = data.dockerHashRequest.dockerImageID.fullName + val response = prepareBackendDescriptor(data.inputs, data.attributes, FloatingDockerTagWithoutHash(floatingDockerTag), data.keyLookupResults.unscoped) + sendResponseAndStop(response) + } + + private def sendResponseAndStop(response: CallPreparationActorResponse) = { + context.parent ! response + stay() + } + + private def sendFailureAndStop(failure: Throwable) = { + sendResponseAndStop(CallPreparationFailed(jobKey, failure)) + } + + // 'jobExecutionProps' is broken into a separate function for TestJobPreparationActor to override: + private[preparation] def jobExecutionProps(jobDescriptor: BackendJobDescriptor, + initializationData: Option[BackendInitializationData], + serviceRegistryActor: ActorRef, + ioActor: ActorRef, + backendSingletonActor: Option[ActorRef]) = factory.jobExecutionActorProps(jobDescriptor, initializationData, serviceRegistryActor, ioActor, backendSingletonActor) + + private[preparation] def prepareBackendDescriptor(inputEvaluation: Map[Declaration, WdlValue], + runtimeAttributes: Map[LocallyQualifiedName, WdlValue], + maybeCallCachingEligible: MaybeCallCachingEligible, + prefetchedJobStoreEntries: Map[String, KvResponse]): BackendJobPreparationSucceeded = { + val jobDescriptor = BackendJobDescriptor(workflowDescriptor.backendDescriptor, jobKey, runtimeAttributes, inputEvaluation, maybeCallCachingEligible, prefetchedJobStoreEntries) + BackendJobPreparationSucceeded(jobDescriptor, jobExecutionProps(jobDescriptor, initializationData, serviceRegistryActor, ioActor, backendSingletonActor)) + } + + private [preparation] def prepareRuntimeAttributes(inputEvaluation: Map[Declaration, WdlValue]): Try[Map[LocallyQualifiedName, WdlValue]] = { + import RuntimeAttributeDefinition.{addDefaultsToAttributes, evaluateRuntimeAttributes} + val curriedAddDefaultsToAttributes = addDefaultsToAttributes(runtimeAttributeDefinitions, workflowDescriptor.backendDescriptor.workflowOptions) _ + + for { + unevaluatedRuntimeAttributes <- Try(jobKey.call.task.runtimeAttributes) + evaluatedRuntimeAttributes <- evaluateRuntimeAttributes(unevaluatedRuntimeAttributes, expressionLanguageFunctions, inputEvaluation) + } yield curriedAddDefaultsToAttributes(evaluatedRuntimeAttributes) + } +} + +object JobPreparationActor { + + sealed trait JobPreparationActorData + case object JobPreparationActorNoData extends JobPreparationActorData + case class JobPreparationKeyLookupData(keyLookups: PartialKeyValueLookups) extends JobPreparationActorData + private final case class JobPreparationHashLookupData(keyLookupResults: KeyValueLookupResults, + dockerHashRequest: DockerHashRequest, + inputs: Map[Declaration, WdlValue], + attributes: Map[LocallyQualifiedName, WdlValue]) extends JobPreparationActorData + + sealed trait JobPreparationActorState + case object Idle extends JobPreparationActorState + case object WaitingForDockerHash extends JobPreparationActorState + case object FetchingKeyValueStoreEntries extends JobPreparationActorState + + def props(executionData: WorkflowExecutionActorData, + jobKey: BackendJobDescriptorKey, + factory: BackendLifecycleActorFactory, + workflowDockerLookupActor: ActorRef, + initializationData: Option[BackendInitializationData], + serviceRegistryActor: ActorRef, + ioActor: ActorRef, + backendSingletonActor: Option[ActorRef]) = { + // Note that JobPreparationActor doesn't run on the engine dispatcher as it mostly executes backend-side code + // (WDL expression evaluation using Backend's expressionLanguageFunctions) + Props(new JobPreparationActor(executionData, + jobKey, + factory, + workflowDockerLookupActor = workflowDockerLookupActor, + initializationData, + serviceRegistryActor = serviceRegistryActor, + ioActor = ioActor, + backendSingletonActor = backendSingletonActor)).withDispatcher(EngineDispatcher) + } +} diff --git a/engine/src/main/scala/cromwell/engine/workflow/lifecycle/execution/preparation/KeyValueLookups.scala b/engine/src/main/scala/cromwell/engine/workflow/lifecycle/execution/preparation/KeyValueLookups.scala new file mode 100644 index 000000000..2f3dec446 --- /dev/null +++ b/engine/src/main/scala/cromwell/engine/workflow/lifecycle/execution/preparation/KeyValueLookups.scala @@ -0,0 +1,24 @@ +package cromwell.engine.workflow.lifecycle.execution.preparation + +import cromwell.services.keyvalue.KeyValueServiceActor.{KvResponse, ScopedKey} + +/** + * Handles the determination of when we know key lookups are successful. + */ +private sealed trait KeyValueLookups + +private[preparation] final case class PartialKeyValueLookups(responses: Map[ScopedKey, KvResponse], awaiting: Seq[ScopedKey]) { + def withResponse(key: ScopedKey, response: KvResponse) = { + val newResponses = responses + (key -> response) + val newAwaiting = awaiting diff List(key) + if (newAwaiting.isEmpty) { + KeyValueLookupResults(newResponses) + } else { + PartialKeyValueLookups(newResponses, newAwaiting) + } + } +} + +private final case class KeyValueLookupResults(values: Map[ScopedKey, KvResponse]) { + def unscoped: Map[String, KvResponse] = values map { case (k, v) => k.key -> v } +} diff --git a/engine/src/main/scala/cromwell/engine/workflow/lifecycle/execution/preparation/SubWorkflowPreparationActor.scala b/engine/src/main/scala/cromwell/engine/workflow/lifecycle/execution/preparation/SubWorkflowPreparationActor.scala new file mode 100644 index 000000000..11dddea5e --- /dev/null +++ b/engine/src/main/scala/cromwell/engine/workflow/lifecycle/execution/preparation/SubWorkflowPreparationActor.scala @@ -0,0 +1,60 @@ +package cromwell.engine.workflow.lifecycle.execution.preparation + +import akka.actor.{Actor, Props} +import cromwell.backend.BackendJobBreadCrumb +import cromwell.core.Dispatcher._ +import cromwell.core.WorkflowId +import cromwell.core.logging.WorkflowLogging +import cromwell.engine.EngineWorkflowDescriptor +import cromwell.engine.workflow.lifecycle.execution.WorkflowExecutionActor.SubWorkflowKey +import cromwell.engine.workflow.lifecycle.execution.WorkflowExecutionActorData +import cromwell.engine.workflow.lifecycle.execution.preparation.CallPreparation.{CallPreparationFailed, Start, _} +import cromwell.engine.workflow.lifecycle.execution.preparation.SubWorkflowPreparationActor.SubWorkflowPreparationSucceeded +import wdl4s.wdl._ +import wdl4s.wdl.values.WdlValue + +class SubWorkflowPreparationActor(executionData: WorkflowExecutionActorData, + callKey: SubWorkflowKey, + subWorkflowId: WorkflowId) extends Actor with WorkflowLogging { + + private val workflowDescriptor = executionData.workflowDescriptor + lazy val outputStore = executionData.outputStore + lazy val expressionLanguageFunctions = executionData.expressionLanguageFunctions + + lazy val workflowIdForLogging = workflowDescriptor.id + + def prepareExecutionActor(inputEvaluation: Map[Declaration, WdlValue]): CallPreparationActorResponse = { + val oldBackendDescriptor = workflowDescriptor.backendDescriptor + + val newBackendDescriptor = oldBackendDescriptor.copy( + id = subWorkflowId, + workflow = callKey.scope.calledWorkflow, + knownValues = workflowDescriptor.knownValues ++ (inputEvaluation map { case (k, v) => k.fullyQualifiedName -> v }), + breadCrumbs = oldBackendDescriptor.breadCrumbs :+ BackendJobBreadCrumb(workflowDescriptor.workflow, workflowDescriptor.id, callKey) + ) + val engineDescriptor = workflowDescriptor.copy(backendDescriptor = newBackendDescriptor, parentWorkflow = Option(workflowDescriptor)) + SubWorkflowPreparationSucceeded(engineDescriptor, inputEvaluation) + } + + override def receive = { + case Start => + val evaluatedInputs = resolveAndEvaluateInputs(callKey, workflowDescriptor, expressionLanguageFunctions, outputStore) + val response = evaluatedInputs map { prepareExecutionActor } + context.parent ! (response recover { case f => CallPreparationFailed(callKey, f) }).get + context stop self + + case unhandled => workflowLogger.warn(self.path.name + " received an unhandled message: " + unhandled) + } +} + +object SubWorkflowPreparationActor { + case class SubWorkflowPreparationSucceeded(workflowDescriptor: EngineWorkflowDescriptor, inputs: EvaluatedTaskInputs) extends CallPreparationActorResponse + + def props(executionData: WorkflowExecutionActorData, + key: SubWorkflowKey, + subWorkflowId: WorkflowId) = { + // Note that JobPreparationActor doesn't run on the engine dispatcher as it mostly executes backend-side code + // (WDL expression evaluation using Backend's expressionLanguageFunctions) + Props(new SubWorkflowPreparationActor(executionData, key, subWorkflowId)).withDispatcher(EngineDispatcher) + } +} diff --git a/engine/src/main/scala/cromwell/engine/workflow/lifecycle/lifecycle.scala b/engine/src/main/scala/cromwell/engine/workflow/lifecycle/lifecycle.scala new file mode 100644 index 000000000..bb0d8fa80 --- /dev/null +++ b/engine/src/main/scala/cromwell/engine/workflow/lifecycle/lifecycle.scala @@ -0,0 +1,7 @@ +package cromwell.engine.workflow.lifecycle + +case object EngineLifecycleActorAbortCommand + +trait EngineLifecycleStateCompleteResponse + +trait EngineLifecycleActorAbortedResponse extends EngineLifecycleStateCompleteResponse diff --git a/engine/src/main/scala/cromwell/engine/workflow/lifecycle/package.scala b/engine/src/main/scala/cromwell/engine/workflow/lifecycle/package.scala deleted file mode 100644 index 1f240dc7b..000000000 --- a/engine/src/main/scala/cromwell/engine/workflow/lifecycle/package.scala +++ /dev/null @@ -1,8 +0,0 @@ -package cromwell.engine.workflow - -package object lifecycle { - case object EngineLifecycleActorAbortCommand - - trait EngineLifecycleStateCompleteResponse - trait EngineLifecycleActorAbortedResponse extends EngineLifecycleStateCompleteResponse -} diff --git a/engine/src/main/scala/cromwell/engine/workflow/tokens/JobExecutionTokenDispenserActor.scala b/engine/src/main/scala/cromwell/engine/workflow/tokens/JobExecutionTokenDispenserActor.scala new file mode 100644 index 000000000..8d0850c54 --- /dev/null +++ b/engine/src/main/scala/cromwell/engine/workflow/tokens/JobExecutionTokenDispenserActor.scala @@ -0,0 +1,135 @@ +package cromwell.engine.workflow.tokens + +import akka.actor.{Actor, ActorLogging, ActorRef, Props, Terminated} +import cromwell.core.JobExecutionToken +import JobExecutionToken._ +import cromwell.core.Dispatcher.EngineDispatcher +import cromwell.engine.workflow.tokens.JobExecutionTokenDispenserActor._ +import cromwell.engine.workflow.tokens.TokenPool.TokenPoolPop + +import scala.collection.immutable.Queue + +class JobExecutionTokenDispenserActor extends Actor with ActorLogging { + + /** + * Lazily created token pool. We only create a pool for a token type when we need it + */ + var tokenPools: Map[JobExecutionTokenType, TokenPool] = Map.empty + var tokenAssignments: Map[ActorRef, JobExecutionToken] = Map.empty + + override def receive: Actor.Receive = { + case JobExecutionTokenRequest(tokenType) => sendTokenRequestResult(sender, tokenType) + case JobExecutionTokenReturn(token) => unassign(sender, token) + case Terminated(terminee) => onTerminate(terminee) + } + + private def sendTokenRequestResult(sndr: ActorRef, tokenType: JobExecutionTokenType): Unit = { + if (tokenAssignments.contains(sndr)) { + sndr ! JobExecutionTokenDispensed(tokenAssignments(sndr)) + } else { + context.watch(sndr) + val updatedTokenPool = getTokenPool(tokenType).pop() match { + case TokenPoolPop(newTokenPool, Some(token)) => + assignAndSendToken(sndr, token) + newTokenPool + case TokenPoolPop(sizedTokenPoolAndQueue: SizedTokenPoolAndActorQueue, None) => + val (poolWithActorEnqueued, positionInQueue) = sizedTokenPoolAndQueue.enqueue(sndr) + sndr ! JobExecutionTokenDenied(positionInQueue) + poolWithActorEnqueued + case TokenPoolPop(someOtherTokenPool, None) => + //If this has happened, somebody's been playing around in this class and not covered this case: + throw new RuntimeException(s"Unexpected token pool type didn't return a token: ${someOtherTokenPool.getClass.getSimpleName}") + } + + tokenPools += tokenType -> updatedTokenPool + } + } + + private def getTokenPool(tokenType: JobExecutionTokenType): TokenPool = tokenPools.getOrElse(tokenType, createNewPool(tokenType)) + + private def createNewPool(tokenType: JobExecutionTokenType): TokenPool = { + val newPool = TokenPool(tokenType) match { + case s: SizedTokenPool => SizedTokenPoolAndActorQueue(s, Queue.empty) + case anythingElse => anythingElse + } + tokenPools += tokenType -> newPool + newPool + } + + private def assignAndSendToken(actor: ActorRef, token: JobExecutionToken) = { + tokenAssignments += actor -> token + actor ! JobExecutionTokenDispensed(token) + } + + private def unassign(actor: ActorRef, token: JobExecutionToken): Unit = { + if (tokenAssignments.contains(actor) && tokenAssignments(actor) == token) { + tokenAssignments -= actor + + val pool = getTokenPool(token.jobExecutionTokenType) match { + case SizedTokenPoolAndActorQueue(innerPool, queue) if queue.nonEmpty => + val (nextInLine, newQueue) = queue.dequeue + assignAndSendToken(nextInLine, token) + SizedTokenPoolAndActorQueue(innerPool, newQueue) + case other => + other.push(token) + } + + tokenPools += token.jobExecutionTokenType -> pool + context.unwatch(actor) + () + } else { + log.error("Job execution token returned from incorrect actor: {}", token) + } + } + + private def onTerminate(terminee: ActorRef): Unit = { + tokenAssignments.get(terminee) match { + case Some(token) => + log.debug("Actor {} stopped without returning its Job Execution Token. Reclaiming it!", terminee) + self.tell(msg = JobExecutionTokenReturn(token), sender = terminee) + case None => + log.debug("Actor {} stopped while we were still watching it... but it doesn't have a token. Removing it from any queues if necessary", terminee) + tokenPools = tokenPools map { + case (tokenType, SizedTokenPoolAndActorQueue(pool, queue)) => tokenType -> SizedTokenPoolAndActorQueue(pool, queue.filterNot(_ == terminee)) + case (tokenType, other) => tokenType -> other + } + } + context.unwatch(terminee) + () + } +} + +object JobExecutionTokenDispenserActor { + + def props = Props(new JobExecutionTokenDispenserActor).withDispatcher(EngineDispatcher) + + case class JobExecutionTokenRequest(jobExecutionTokenType: JobExecutionTokenType) + case class JobExecutionTokenReturn(jobExecutionToken: JobExecutionToken) + + sealed trait JobExecutionTokenRequestResult + case class JobExecutionTokenDispensed(jobExecutionToken: JobExecutionToken) extends JobExecutionTokenRequestResult + case class JobExecutionTokenDenied(positionInQueue: Integer) extends JobExecutionTokenRequestResult + + case class SizedTokenPoolAndActorQueue(sizedPool: SizedTokenPool, queue: Queue[ActorRef]) extends TokenPool { + override def currentLoans = sizedPool.currentLoans + override def push(jobExecutionToken: JobExecutionToken) = SizedTokenPoolAndActorQueue(sizedPool.push(jobExecutionToken), queue) + override def pop() = { + val underlyingPop = sizedPool.pop() + TokenPoolPop(SizedTokenPoolAndActorQueue(underlyingPop.newTokenPool.asInstanceOf[SizedTokenPool], queue), underlyingPop.poppedItem) + } + + /** + * Enqueues an actor (or just finds its current position) + * + * @return The actor's position in the queue + */ + def enqueue(actor: ActorRef): (SizedTokenPoolAndActorQueue, Int) = { + queue.indexOf(actor) match { + case -1 => + val newQueue = queue :+ actor + (SizedTokenPoolAndActorQueue(sizedPool, newQueue), newQueue.size - 1) // Convert from 1-indexed to 0-indexed + case index => (this, index) + } + } + } +} diff --git a/engine/src/main/scala/cromwell/engine/workflow/tokens/TokenPool.scala b/engine/src/main/scala/cromwell/engine/workflow/tokens/TokenPool.scala new file mode 100644 index 000000000..ee378856c --- /dev/null +++ b/engine/src/main/scala/cromwell/engine/workflow/tokens/TokenPool.scala @@ -0,0 +1,50 @@ +package cromwell.engine.workflow.tokens + +import java.util.UUID + +import cromwell.core.JobExecutionToken +import JobExecutionToken.JobExecutionTokenType +import cromwell.engine.workflow.tokens.TokenPool.TokenPoolPop + +import scala.language.postfixOps + +trait TokenPool { + def currentLoans: Set[JobExecutionToken] + def pop(): TokenPoolPop + def push(jobExecutionToken: JobExecutionToken): TokenPool +} + +object TokenPool { + + case class TokenPoolPop(newTokenPool: TokenPool, poppedItem: Option[JobExecutionToken]) + + def apply(tokenType: JobExecutionTokenType): TokenPool = { + tokenType.maxPoolSize map { ps => + val pool = (1 to ps toList) map { _ => JobExecutionToken(tokenType, UUID.randomUUID()) } + SizedTokenPool(pool, Set.empty) + } getOrElse { + InfiniteTokenPool(tokenType, Set.empty) + } + } +} + +final case class SizedTokenPool(pool: List[JobExecutionToken], override val currentLoans: Set[JobExecutionToken]) extends TokenPool { + + override def pop(): TokenPoolPop = pool match { + case head :: tail => TokenPoolPop(SizedTokenPool(tail, currentLoans + head), Option(head)) + case Nil => TokenPoolPop(SizedTokenPool(List.empty, currentLoans), None) + } + + + override def push(token: JobExecutionToken): SizedTokenPool = { + if (currentLoans.contains(token)) { SizedTokenPool(pool :+ token, currentLoans - token) } else this + } +} + +final case class InfiniteTokenPool(tokenType: JobExecutionTokenType, override val currentLoans: Set[JobExecutionToken]) extends TokenPool { + override def pop() = { + val newToken = JobExecutionToken(tokenType, UUID.randomUUID()) + TokenPoolPop(InfiniteTokenPool(tokenType, currentLoans + newToken), Option(newToken)) + } + override def push(token: JobExecutionToken): InfiniteTokenPool = if (currentLoans.contains(token)) { InfiniteTokenPool(tokenType, currentLoans - token) } else this +} diff --git a/engine/src/test/scala/cromwell/engine/workflow/workflowstore/InMemoryWorkflowStore.scala b/engine/src/main/scala/cromwell/engine/workflow/workflowstore/InMemoryWorkflowStore.scala similarity index 84% rename from engine/src/test/scala/cromwell/engine/workflow/workflowstore/InMemoryWorkflowStore.scala rename to engine/src/main/scala/cromwell/engine/workflow/workflowstore/InMemoryWorkflowStore.scala index 9ee29439d..0f04212f8 100644 --- a/engine/src/test/scala/cromwell/engine/workflow/workflowstore/InMemoryWorkflowStore.scala +++ b/engine/src/main/scala/cromwell/engine/workflow/workflowstore/InMemoryWorkflowStore.scala @@ -1,10 +1,10 @@ package cromwell.engine.workflow.workflowstore -import cromwell.core.{WorkflowId, WorkflowSourceFiles} +import cats.data.NonEmptyList +import cromwell.core.{WorkflowId, WorkflowSourceFilesCollection} import cromwell.engine.workflow.workflowstore.WorkflowStoreState.StartableState import scala.concurrent.{ExecutionContext, Future} -import scalaz.NonEmptyList class InMemoryWorkflowStore extends WorkflowStore { @@ -14,9 +14,9 @@ class InMemoryWorkflowStore extends WorkflowStore { * Adds the requested WorkflowSourceFiles to the store and returns a WorkflowId for each one (in order) * for tracking purposes. */ - override def add(sources: NonEmptyList[WorkflowSourceFiles])(implicit ec: ExecutionContext): Future[NonEmptyList[WorkflowId]] = { + override def add(sources: NonEmptyList[WorkflowSourceFilesCollection])(implicit ec: ExecutionContext): Future[NonEmptyList[WorkflowId]] = { val submittedWorkflows = sources map { SubmittedWorkflow(WorkflowId.randomId(), _, WorkflowStoreState.Submitted) } - workflowStore = workflowStore ++ submittedWorkflows.list.toList + workflowStore = workflowStore ++ submittedWorkflows.toList Future.successful(submittedWorkflows map { _.id }) } @@ -44,7 +44,7 @@ class InMemoryWorkflowStore extends WorkflowStore { override def initialize(implicit ec: ExecutionContext): Future[Unit] = Future.successful(()) } -final case class SubmittedWorkflow(id: WorkflowId, sources: WorkflowSourceFiles, state: WorkflowStoreState) { +final case class SubmittedWorkflow(id: WorkflowId, sources: WorkflowSourceFilesCollection, state: WorkflowStoreState) { def toWorkflowToStart: WorkflowToStart = { state match { case r: StartableState => WorkflowToStart(id, sources, r) diff --git a/engine/src/main/scala/cromwell/engine/workflow/workflowstore/SqlWorkflowStore.scala b/engine/src/main/scala/cromwell/engine/workflow/workflowstore/SqlWorkflowStore.scala index 63a12a154..d8ca17ab6 100644 --- a/engine/src/main/scala/cromwell/engine/workflow/workflowstore/SqlWorkflowStore.scala +++ b/engine/src/main/scala/cromwell/engine/workflow/workflowstore/SqlWorkflowStore.scala @@ -2,20 +2,28 @@ package cromwell.engine.workflow.workflowstore import java.time.OffsetDateTime -import cromwell.core.{WorkflowId, WorkflowSourceFiles} +import cats.data.NonEmptyList +import com.typesafe.config.ConfigFactory +import cromwell.core.{WorkflowId, WorkflowSourceFilesCollection} import cromwell.database.sql.SqlConverters._ import cromwell.database.sql.WorkflowStoreSqlDatabase import cromwell.database.sql.tables.WorkflowStoreEntry import cromwell.engine.workflow.workflowstore.WorkflowStoreState.StartableState +import eu.timepit.refined.api.Refined +import eu.timepit.refined.collection._ +import net.ceedubs.ficus.Ficus._ import scala.concurrent.{ExecutionContext, Future} -import scalaz.NonEmptyList case class SqlWorkflowStore(sqlDatabase: WorkflowStoreSqlDatabase) extends WorkflowStore { override def initialize(implicit ec: ExecutionContext): Future[Unit] = { - sqlDatabase.updateWorkflowState( - WorkflowStoreState.Running.toString, - WorkflowStoreState.Restartable.toString) + if (ConfigFactory.load().as[Option[Boolean]]("system.workflow-restart").getOrElse(true)) { + sqlDatabase.updateWorkflowState( + WorkflowStoreState.Running.toString, + WorkflowStoreState.Restartable.toString) + } else { + Future.successful(()) + } } override def remove(id: WorkflowId)(implicit ec: ExecutionContext): Future[Boolean] = { @@ -36,34 +44,46 @@ case class SqlWorkflowStore(sqlDatabase: WorkflowStoreSqlDatabase) extends Workf * Adds the requested WorkflowSourceFiles to the store and returns a WorkflowId for each one (in order) * for tracking purposes. */ - override def add(sources: NonEmptyList[WorkflowSourceFiles])(implicit ec: ExecutionContext): Future[NonEmptyList[WorkflowId]] = { + override def add(sources: NonEmptyList[WorkflowSourceFilesCollection])(implicit ec: ExecutionContext): Future[NonEmptyList[WorkflowId]] = { val asStoreEntries = sources map toWorkflowStoreEntry val returnValue = asStoreEntries map { workflowStore => WorkflowId.fromString(workflowStore.workflowExecutionUuid) } // The results from the Future aren't useful, so on completion map it into the precalculated return value instead. Magic! - sqlDatabase.addWorkflowStoreEntries(asStoreEntries.list.toList) map { _ => returnValue } + sqlDatabase.addWorkflowStoreEntries(asStoreEntries.toList) map { _ => returnValue } } private def fromWorkflowStoreEntry(workflowStoreEntry: WorkflowStoreEntry): WorkflowToStart = { - val sources = WorkflowSourceFiles( - workflowStoreEntry.workflowDefinition.toRawString, - workflowStoreEntry.workflowInputs.toRawString, - workflowStoreEntry.workflowOptions.toRawString) + val sources = WorkflowSourceFilesCollection( + workflowSource = workflowStoreEntry.workflowDefinition.toRawString, + workflowType = workflowStoreEntry.workflowType, + workflowTypeVersion = workflowStoreEntry.workflowTypeVersion, + inputsJson = workflowStoreEntry.workflowInputs.toRawString, + workflowOptionsJson = workflowStoreEntry.workflowOptions.toRawString, + labelsJson = workflowStoreEntry.customLabels.toRawString, + importsFile = workflowStoreEntry.importsZip.toBytesOption + ) WorkflowToStart( WorkflowId.fromString(workflowStoreEntry.workflowExecutionUuid), sources, fromDbStateStringToStartableState(workflowStoreEntry.workflowState)) } - private def toWorkflowStoreEntry(workflowSourceFiles: WorkflowSourceFiles): WorkflowStoreEntry = { + private def toWorkflowStoreEntry(workflowSourceFiles: WorkflowSourceFilesCollection): WorkflowStoreEntry = { + import eu.timepit.refined._ + val nonEmptyJsonString: String Refined NonEmpty = refineMV[NonEmpty]("{}") + WorkflowStoreEntry( - WorkflowId.randomId().toString, - workflowSourceFiles.wdlSource.toClob, - workflowSourceFiles.inputsJson.toClob, - workflowSourceFiles.workflowOptionsJson.toClob, - WorkflowStoreState.Submitted.toString, - OffsetDateTime.now.toSystemTimestamp + workflowExecutionUuid = WorkflowId.randomId().toString, + workflowDefinition = workflowSourceFiles.workflowSource.toClobOption, + workflowType = workflowSourceFiles.workflowType, + workflowTypeVersion = workflowSourceFiles.workflowTypeVersion, + workflowInputs = workflowSourceFiles.inputsJson.toClobOption, + workflowOptions = workflowSourceFiles.workflowOptionsJson.toClobOption, + customLabels = workflowSourceFiles.labelsJson.toClob(default = nonEmptyJsonString), + workflowState = WorkflowStoreState.Submitted.toString, + submissionTime = OffsetDateTime.now.toSystemTimestamp, + importsZip = workflowSourceFiles.importsZipFileOption.toBlobOption ) } diff --git a/engine/src/main/scala/cromwell/engine/workflow/workflowstore/WorkflowStore.scala b/engine/src/main/scala/cromwell/engine/workflow/workflowstore/WorkflowStore.scala index f24cd99cb..f4734f7bb 100644 --- a/engine/src/main/scala/cromwell/engine/workflow/workflowstore/WorkflowStore.scala +++ b/engine/src/main/scala/cromwell/engine/workflow/workflowstore/WorkflowStore.scala @@ -1,10 +1,10 @@ package cromwell.engine.workflow.workflowstore -import cromwell.core.{WorkflowId, WorkflowSourceFiles} +import cats.data.NonEmptyList +import cromwell.core.{WorkflowId, WorkflowSourceFilesCollection} import cromwell.engine.workflow.workflowstore.WorkflowStoreState.StartableState import scala.concurrent.{ExecutionContext, Future} -import scalaz.NonEmptyList trait WorkflowStore { @@ -14,7 +14,7 @@ trait WorkflowStore { * Adds the requested WorkflowSourceFiles to the store and returns a WorkflowId for each one (in order) * for tracking purposes. */ - def add(sources: NonEmptyList[WorkflowSourceFiles])(implicit ec: ExecutionContext): Future[NonEmptyList[WorkflowId]] + def add(sources: NonEmptyList[WorkflowSourceFilesCollection])(implicit ec: ExecutionContext): Future[NonEmptyList[WorkflowId]] /** * Retrieves up to n workflows which have not already been pulled into the engine and sets their pickedUp diff --git a/engine/src/main/scala/cromwell/engine/workflow/workflowstore/WorkflowStoreActor.scala b/engine/src/main/scala/cromwell/engine/workflow/workflowstore/WorkflowStoreActor.scala index 3655b6938..dad70fb96 100644 --- a/engine/src/main/scala/cromwell/engine/workflow/workflowstore/WorkflowStoreActor.scala +++ b/engine/src/main/scala/cromwell/engine/workflow/workflowstore/WorkflowStoreActor.scala @@ -1,210 +1,38 @@ package cromwell.engine.workflow.workflowstore -import java.time.OffsetDateTime +import akka.actor.{Actor, ActorLogging, ActorRef, Props} +import cats.data.NonEmptyList +import cromwell.core._ +import cromwell.core.Dispatcher.EngineDispatcher +import cromwell.database.sql.SqlDatabase +import cromwell.util.GracefulShutdownHelper +import cromwell.util.GracefulShutdownHelper.ShutdownCommand -import akka.actor.{ActorLogging, ActorRef, LoggingFSM, Props} -import cromwell.core.{WorkflowId, WorkflowMetadataKeys, WorkflowSourceFiles} -import cromwell.engine.workflow.WorkflowManagerActor -import cromwell.engine.workflow.WorkflowManagerActor.WorkflowNotFoundException -import cromwell.engine.workflow.workflowstore.WorkflowStoreActor._ -import cromwell.engine.workflow.workflowstore.WorkflowStoreState.StartableState -import cromwell.services.metadata.{MetadataEvent, MetadataKey, MetadataValue} -import cromwell.services.metadata.MetadataService.{MetadataPutAcknowledgement, PutMetadataAction} -import org.apache.commons.lang3.exception.ExceptionUtils +final case class WorkflowStoreActor private(store: WorkflowStore, serviceRegistryActor: ActorRef, database: SqlDatabase) extends Actor with ActorLogging with GracefulShutdownHelper { + import WorkflowStoreActor._ -import scala.concurrent.{ExecutionContext, Future} -import scala.language.postfixOps -import scala.util.{Failure, Success} -import scalaz.NonEmptyList + lazy val workflowStoreSubmitActor: ActorRef = context.actorOf(WorkflowStoreSubmitActor.props(store, serviceRegistryActor), "WorkflowStoreSubmitActor") + lazy val workflowStoreEngineActor: ActorRef = context.actorOf(WorkflowStoreEngineActor.props(store, serviceRegistryActor, database), "WorkflowStoreEngineActor") -case class WorkflowStoreActor(store: WorkflowStore, serviceRegistryActor: ActorRef) - extends LoggingFSM[WorkflowStoreActorState, WorkflowStoreActorData] with ActorLogging { - - implicit val ec: ExecutionContext = context.dispatcher - - startWith(Unstarted, WorkflowStoreActorData(None, List.empty)) - self ! InitializerCommand - - when(Unstarted) { - case Event(InitializerCommand, _) => - val work = store.initialize map { _ => - log.debug("Workflow store initialization successful") - } - addWorkCompletionHooks(InitializerCommand, work) - goto(Working) using stateData.withCurrentCommand(InitializerCommand, sender) - case Event(x: WorkflowStoreActorCommand, _) => - stay using stateData.withPendingCommand(x, sender) - } - - when(Idle) { - case Event(cmd: WorkflowStoreActorCommand, _) => - if (stateData.currentOperation.nonEmpty || stateData.pendingOperations.nonEmpty) { - log.error("Non-empty WorkflowStoreActorData when in Idle state: {}", stateData) - } - startNewWork(cmd, sender, stateData.withCurrentCommand(cmd, sender)) - } - - when(Working) { - case Event(WorkDone, data) => - val newData = data.pop - newData.currentOperation match { - case None => goto(Idle) using newData - case Some(WorkflowStoreActorCommandWithSender(cmd, sndr)) => startNewWork(cmd, sndr, newData) - } - case Event(cmd: WorkflowStoreActorCommand, data) => stay using data.withPendingCommand(cmd, sender) - } - - whenUnhandled { - case Event(MetadataPutAcknowledgement(_), _) => - stay // Ignored - case Event(msg, _) => - log.warning("Unexpected message to WorkflowStoreActor in state {} with data {}: {}", stateName, stateData, msg) - stay - } - - onTransition { - case fromState -> toState => - log.debug("WorkflowStore moving from {} (using {}) to {} (using {})", fromState, stateData, toState, nextStateData) - } - - private def startNewWork(command: WorkflowStoreActorCommand, sndr: ActorRef, nextData: WorkflowStoreActorData) = { - val work: Future[Any] = command match { - case cmd @ SubmitWorkflow(sourceFiles) => - store.add(NonEmptyList(sourceFiles)) map { ids => - val id = ids.head - registerSubmissionWithMetadataService(id, sourceFiles) - sndr ! WorkflowSubmittedToStore(id) - log.info("Workflow {} submitted.", id) - } - case cmd @ BatchSubmitWorkflows(sources) => - store.add(sources) map { ids => - val assignedSources = ids.zip(sources) - assignedSources foreach { case (id, sourceFiles) => registerSubmissionWithMetadataService(id, sourceFiles) } - sndr ! WorkflowsBatchSubmittedToStore(ids) - log.info("Workflows {} submitted.", ids.list.toList.mkString(", ")) - } - case cmd @ FetchRunnableWorkflows(n) => - newWorkflowMessage(n) map { nwm => - nwm match { - case NewWorkflowsToStart(workflows) => log.info("{} new workflows fetched", workflows.size) - case NoNewWorkflowsToStart => log.debug("No workflows fetched") - case _ => log.error("Unexpected response from newWorkflowMessage({}): {}", n, nwm) - } - sndr ! nwm - } - case cmd @ AbortWorkflow(id, manager) => - store.remove(id) map { removed => - if (removed) { - log.debug(s"Workflow $id aborted and removed from the workflow store.") - manager ! WorkflowManagerActor.AbortWorkflowCommand(id, sndr) - } else { - sndr ! WorkflowAbortFailed(id, new WorkflowNotFoundException(s"Couldn't abort $id because no workflow with that ID is in progress")) - } - } - case cmd @ RemoveWorkflow(id) => - store.remove(id) map { removed => - if (removed) { - log.debug("Workflow {} removed from store successfully.", id) - } else { - log.warning(s"Attempted to remove ID {} from the WorkflowStore but it didn't exist", id) - } - } - case oops => - log.error("Unexpected type of start work command: {}", oops.getClass.getSimpleName) - Future.successful(self ! WorkDone) - } - addWorkCompletionHooks(command, work) - goto(Working) using nextData - } - - private def addWorkCompletionHooks[A](command: WorkflowStoreActorCommand, work: Future[A]) = { - work.onComplete { - case Success(_) => - self ! WorkDone - case Failure(t) => - log.error("Error occurred during {}: {} because {}", command.getClass.getSimpleName, t.toString, ExceptionUtils.getStackTrace(t)) - self ! WorkDone - } - } - - /** - * Fetches at most n workflows, and builds the correct response message based on if there were any workflows or not - */ - private def newWorkflowMessage(maxWorkflows: Int): Future[WorkflowStoreActorResponse] = { - def fetchRunnableWorkflowsIfNeeded(maxWorkflowsInner: Int, state: StartableState) = { - if (maxWorkflows > 0) { - store.fetchRunnableWorkflows(maxWorkflowsInner, state) - } else { - Future.successful(List.empty[WorkflowToStart]) - } - } - - val runnableWorkflows = for { - restartableWorkflows <- fetchRunnableWorkflowsIfNeeded(maxWorkflows, WorkflowStoreState.Restartable) - submittedWorkflows <- fetchRunnableWorkflowsIfNeeded(maxWorkflows - restartableWorkflows.size, WorkflowStoreState.Submitted) - } yield restartableWorkflows ++ submittedWorkflows - - runnableWorkflows map { - case x :: xs => NewWorkflowsToStart(NonEmptyList.nels(x, xs: _*)) - case _ => NoNewWorkflowsToStart - } - } - - /** - * Takes the workflow id and sends it over to the metadata service w/ default empty values for inputs/outputs - */ - private def registerSubmissionWithMetadataService(id: WorkflowId, sourceFiles: WorkflowSourceFiles): Unit = { - val submissionEvents = List( - MetadataEvent(MetadataKey(id, None, WorkflowMetadataKeys.SubmissionTime), MetadataValue(OffsetDateTime.now.toString)), - MetadataEvent.empty(MetadataKey(id, None, WorkflowMetadataKeys.Inputs)), - MetadataEvent.empty(MetadataKey(id, None, WorkflowMetadataKeys.Outputs)), - - MetadataEvent(MetadataKey(id, None, WorkflowMetadataKeys.SubmissionSection, WorkflowMetadataKeys.SubmissionSection_Workflow), MetadataValue(sourceFiles.wdlSource)), - MetadataEvent(MetadataKey(id, None, WorkflowMetadataKeys.SubmissionSection, WorkflowMetadataKeys.SubmissionSection_Inputs), MetadataValue(sourceFiles.inputsJson)), - MetadataEvent(MetadataKey(id, None, WorkflowMetadataKeys.SubmissionSection, WorkflowMetadataKeys.SubmissionSection_Options), MetadataValue(sourceFiles.workflowOptionsJson)) - ) - - serviceRegistryActor ! PutMetadataAction(submissionEvents) + override def receive = { + case ShutdownCommand => waitForActorsAndShutdown(NonEmptyList.of(workflowStoreSubmitActor)) + case cmd: WorkflowStoreActorSubmitCommand => workflowStoreSubmitActor forward cmd + case cmd: WorkflowStoreActorEngineCommand => workflowStoreEngineActor forward cmd } } object WorkflowStoreActor { - - private[workflowstore] case class WorkflowStoreActorCommandWithSender(command: WorkflowStoreActorCommand, sender: ActorRef) - - private[workflowstore] case class WorkflowStoreActorData(currentOperation: Option[WorkflowStoreActorCommandWithSender], pendingOperations: List[WorkflowStoreActorCommandWithSender]) { - def withCurrentCommand(command: WorkflowStoreActorCommand, sender: ActorRef) = this.copy(currentOperation = Option(WorkflowStoreActorCommandWithSender(command, sender))) - def withPendingCommand(newCommand: WorkflowStoreActorCommand, sender: ActorRef) = this.copy(pendingOperations = this.pendingOperations :+ WorkflowStoreActorCommandWithSender(newCommand, sender)) - def pop = { - if (pendingOperations.isEmpty) { WorkflowStoreActorData(None, List.empty) } - else { WorkflowStoreActorData(Option(pendingOperations.head), pendingOperations.tail) } - } - } - - private[workflowstore] sealed trait WorkflowStoreActorState - private[workflowstore] case object Unstarted extends WorkflowStoreActorState - private[workflowstore] case object Working extends WorkflowStoreActorState - private[workflowstore] case object Idle extends WorkflowStoreActorState - - sealed trait WorkflowStoreActorCommand - final case class SubmitWorkflow(source: WorkflowSourceFiles) extends WorkflowStoreActorCommand - final case class BatchSubmitWorkflows(sources: NonEmptyList[WorkflowSourceFiles]) extends WorkflowStoreActorCommand - final case class FetchRunnableWorkflows(n: Int) extends WorkflowStoreActorCommand - final case class RemoveWorkflow(id: WorkflowId) extends WorkflowStoreActorCommand - final case class AbortWorkflow(id: WorkflowId, manager: ActorRef) extends WorkflowStoreActorCommand - - private case object InitializerCommand extends WorkflowStoreActorCommand - private case object WorkDone - - sealed trait WorkflowStoreActorResponse - final case class WorkflowSubmittedToStore(workflowId: WorkflowId) extends WorkflowStoreActorResponse - final case class WorkflowsBatchSubmittedToStore(workflowIds: NonEmptyList[WorkflowId]) extends WorkflowStoreActorResponse - case object NoNewWorkflowsToStart extends WorkflowStoreActorResponse - final case class NewWorkflowsToStart(workflows: NonEmptyList[WorkflowToStart]) extends WorkflowStoreActorResponse - final case class WorkflowAborted(workflowId: WorkflowId) extends WorkflowStoreActorResponse - final case class WorkflowAbortFailed(workflowId: WorkflowId, reason: Throwable) extends WorkflowStoreActorResponse - - def props(workflowStoreDatabase: WorkflowStore, serviceRegistryActor: ActorRef) = { - Props(WorkflowStoreActor(workflowStoreDatabase, serviceRegistryActor)) + sealed trait WorkflowStoreActorEngineCommand + final case class FetchRunnableWorkflows(n: Int) extends WorkflowStoreActorEngineCommand + final case class AbortWorkflow(id: WorkflowId, manager: ActorRef) extends WorkflowStoreActorEngineCommand + case object InitializerCommand extends WorkflowStoreActorEngineCommand + case object WorkDone extends WorkflowStoreActorEngineCommand + + sealed trait WorkflowStoreActorSubmitCommand + final case class SubmitWorkflow(source: WorkflowSourceFilesCollection) extends WorkflowStoreActorSubmitCommand + final case class BatchSubmitWorkflows(sources: NonEmptyList[WorkflowSourceFilesCollection]) extends WorkflowStoreActorSubmitCommand + + def props(workflowStoreDatabase: WorkflowStore, serviceRegistryActor: ActorRef, database: SqlDatabase) = { + Props(WorkflowStoreActor(workflowStoreDatabase, serviceRegistryActor, database)).withDispatcher(EngineDispatcher) } } diff --git a/engine/src/main/scala/cromwell/engine/workflow/workflowstore/WorkflowStoreEngineActor.scala b/engine/src/main/scala/cromwell/engine/workflow/workflowstore/WorkflowStoreEngineActor.scala new file mode 100644 index 000000000..e2a7ac9a1 --- /dev/null +++ b/engine/src/main/scala/cromwell/engine/workflow/workflowstore/WorkflowStoreEngineActor.scala @@ -0,0 +1,169 @@ +package cromwell.engine.workflow.workflowstore + +import akka.actor.{ActorLogging, ActorRef, LoggingFSM, Props} +import cats.data.NonEmptyList +import cromwell.core.Dispatcher._ +import cromwell.core.WorkflowId +import cromwell.database.sql.SqlDatabase +import cromwell.engine.workflow.WorkflowManagerActor +import cromwell.engine.workflow.WorkflowManagerActor.WorkflowNotFoundException +import cromwell.engine.workflow.workflowstore.WorkflowStoreActor._ +import cromwell.engine.workflow.workflowstore.WorkflowStoreEngineActor.{WorkflowStoreActorState, _} +import cromwell.engine.workflow.workflowstore.WorkflowStoreState.StartableState +import org.apache.commons.lang3.exception.ExceptionUtils + +import scala.concurrent.{ExecutionContext, Future} +import scala.util.{Failure, Success} + +final case class WorkflowStoreEngineActor private(store: WorkflowStore, serviceRegistryActor: ActorRef, database: SqlDatabase) + extends LoggingFSM[WorkflowStoreActorState, WorkflowStoreActorData] with ActorLogging { + + implicit val ec: ExecutionContext = context.dispatcher + + startWith(Unstarted, WorkflowStoreActorData(None, List.empty)) + self ! InitializerCommand + + when(Unstarted) { + case Event(InitializerCommand, _) => + val work = store.initialize map { _ => + log.debug("Workflow store initialization successful") + } + addWorkCompletionHooks(InitializerCommand, work) + goto(Working) using stateData.withCurrentCommand(InitializerCommand, sender) + case Event(x: WorkflowStoreActorEngineCommand, _) => + stay using stateData.withPendingCommand(x, sender) + } + + when(Idle) { + case Event(cmd: WorkflowStoreActorEngineCommand, _) => + if (stateData.currentOperation.nonEmpty || stateData.pendingOperations.nonEmpty) { + log.error("Non-empty WorkflowStoreActorData when in Idle state: {}", stateData) + } + startNewWork(cmd, sender, stateData.withCurrentCommand(cmd, sender)) + } + + when(Working) { + case Event(WorkDone, data) => + val newData = data.pop + newData.currentOperation match { + case None => goto(Idle) using newData + case Some(WorkflowStoreActorCommandWithSender(cmd, sndr)) => startNewWork(cmd, sndr, newData) + } + case Event(cmd: WorkflowStoreActorEngineCommand, data) => stay using data.withPendingCommand(cmd, sender) + } + + whenUnhandled { + case Event(msg, _) => + log.warning("Unexpected message to WorkflowStoreActor in state {} with data {}: {}", stateName, stateData, msg) + stay + } + + onTransition { + case fromState -> toState => + log.debug("WorkflowStore moving from {} (using {}) to {} (using {})", fromState, stateData, toState, nextStateData) + } + + private def startNewWork(command: WorkflowStoreActorEngineCommand, sndr: ActorRef, nextData: WorkflowStoreActorData) = { + val work: Future[Any] = command match { + case FetchRunnableWorkflows(n) => + newWorkflowMessage(n) map { nwm => + nwm match { + case NewWorkflowsToStart(workflows) => log.info("{} new workflows fetched", workflows.toList.size) + case NoNewWorkflowsToStart => log.debug("No workflows fetched") + case _ => log.error("Unexpected response from newWorkflowMessage({}): {}", n, nwm) + } + sndr ! nwm + } + case AbortWorkflow(id, manager) => + store.remove(id) map { removed => + if (removed) { + manager ! WorkflowManagerActor.AbortWorkflowCommand(id, sndr) + log.debug(s"Workflow $id removed from the workflow store, abort requested.") + } else { + sndr ! WorkflowAbortFailed(id, new WorkflowNotFoundException(s"Couldn't abort $id because no workflow with that ID is in progress")) + } + } recover { + case t => + val message = s"Error aborting workflow $id: could not remove from workflow store" + log.error(t, message) + // A generic exception type like RuntimeException will produce a 500 at the API layer, which seems appropriate + // given we don't know much about what went wrong here. `t.getMessage` so the cause propagates to the client. + val e = new RuntimeException(s"$message: ${t.getMessage}", t) + sndr ! WorkflowAbortFailed(id, e) + } + case oops => + log.error("Unexpected type of start work command: {}", oops.getClass.getSimpleName) + Future.successful(self ! WorkDone) + } + addWorkCompletionHooks(command, work) + goto(Working) using nextData + } + + private def addWorkCompletionHooks[A](command: WorkflowStoreActorEngineCommand, work: Future[A]) = { + work.onComplete { + case Success(_) => + self ! WorkDone + case Failure(t) => + log.error("Error occurred during {}: {} because {}", command.getClass.getSimpleName, t.toString, ExceptionUtils.getStackTrace(t)) + self ! WorkDone + } + } + + /** + * Fetches at most n workflows, and builds the correct response message based on if there were any workflows or not + */ + private def newWorkflowMessage(maxWorkflows: Int): Future[WorkflowStoreEngineActorResponse] = { + def fetchRunnableWorkflowsIfNeeded(maxWorkflowsInner: Int, state: StartableState) = { + if (maxWorkflows > 0) { + store.fetchRunnableWorkflows(maxWorkflowsInner, state) + } else { + Future.successful(List.empty[WorkflowToStart]) + } + } + + val runnableWorkflows = for { + restartableWorkflows <- fetchRunnableWorkflowsIfNeeded(maxWorkflows, WorkflowStoreState.Restartable) + submittedWorkflows <- fetchRunnableWorkflowsIfNeeded(maxWorkflows - restartableWorkflows.size, WorkflowStoreState.Submitted) + } yield restartableWorkflows ++ submittedWorkflows + + runnableWorkflows map { + case x :: xs => NewWorkflowsToStart(NonEmptyList.of(x, xs: _*)) + case _ => NoNewWorkflowsToStart + } recover { + case e => + // Log the error but return a successful Future so as not to hang future workflow store polls. + log.error(e, "Error trying to fetch new workflows") + NoNewWorkflowsToStart + } + } +} + +object WorkflowStoreEngineActor { + def props(workflowStoreDatabase: WorkflowStore, serviceRegistryActor: ActorRef, database: SqlDatabase) = { + Props(WorkflowStoreEngineActor(workflowStoreDatabase, serviceRegistryActor, database)).withDispatcher(EngineDispatcher) + } + + sealed trait WorkflowStoreEngineActorResponse + case object NoNewWorkflowsToStart extends WorkflowStoreEngineActorResponse + final case class NewWorkflowsToStart(workflows: NonEmptyList[WorkflowToStart]) extends WorkflowStoreEngineActorResponse + sealed abstract class WorkflowStoreEngineAbortResponse extends WorkflowStoreEngineActorResponse + final case class WorkflowAborted(workflowId: WorkflowId) extends WorkflowStoreEngineAbortResponse + final case class WorkflowAbortFailed(workflowId: WorkflowId, reason: Throwable) extends WorkflowStoreEngineAbortResponse + + + final case class WorkflowStoreActorCommandWithSender(command: WorkflowStoreActorEngineCommand, sender: ActorRef) + + final case class WorkflowStoreActorData(currentOperation: Option[WorkflowStoreActorCommandWithSender], pendingOperations: List[WorkflowStoreActorCommandWithSender]) { + def withCurrentCommand(command: WorkflowStoreActorEngineCommand, sender: ActorRef) = this.copy(currentOperation = Option(WorkflowStoreActorCommandWithSender(command, sender))) + def withPendingCommand(newCommand: WorkflowStoreActorEngineCommand, sender: ActorRef) = this.copy(pendingOperations = this.pendingOperations :+ WorkflowStoreActorCommandWithSender(newCommand, sender)) + def pop = { + if (pendingOperations.isEmpty) { WorkflowStoreActorData(None, List.empty) } + else { WorkflowStoreActorData(Option(pendingOperations.head), pendingOperations.tail) } + } + } + + sealed trait WorkflowStoreActorState + case object Unstarted extends WorkflowStoreActorState + case object Working extends WorkflowStoreActorState + case object Idle extends WorkflowStoreActorState +} diff --git a/engine/src/main/scala/cromwell/engine/workflow/workflowstore/WorkflowStoreSubmitActor.scala b/engine/src/main/scala/cromwell/engine/workflow/workflowstore/WorkflowStoreSubmitActor.scala new file mode 100644 index 000000000..67e06b4df --- /dev/null +++ b/engine/src/main/scala/cromwell/engine/workflow/workflowstore/WorkflowStoreSubmitActor.scala @@ -0,0 +1,144 @@ +package cromwell.engine.workflow.workflowstore + +import java.time.OffsetDateTime + +import akka.actor.{Actor, ActorLogging, ActorRef, Props} +import cats.data.NonEmptyList +import cats.instances.future._ +import cats.instances.list._ +import cats.syntax.traverse._ +import cromwell.core.Dispatcher._ +import cromwell.core._ +import cromwell.engine.workflow.lifecycle.execution.WorkflowMetadataHelper +import cromwell.engine.workflow.workflowstore.WorkflowStoreActor._ +import cromwell.engine.workflow.workflowstore.WorkflowStoreSubmitActor.{WorkflowSubmitFailed, WorkflowSubmittedToStore, WorkflowsBatchSubmittedToStore} +import cromwell.services.metadata.MetadataService.PutMetadataAction +import cromwell.services.metadata.{MetadataEvent, MetadataKey, MetadataValue} + +import scala.concurrent.{ExecutionContext, Future} +import scala.util.{Failure, Success} + +final case class WorkflowStoreSubmitActor(store: WorkflowStore, serviceRegistryActor: ActorRef) extends Actor with ActorLogging with WorkflowMetadataHelper with MonitoringCompanionHelper { + implicit val ec: ExecutionContext = context.dispatcher + + val workflowStoreReceive: Receive = { + case cmd: SubmitWorkflow => + addWork() + val sndr = sender() + + val futureId = for { + ids <- storeWorkflowSources(NonEmptyList.of(cmd.source)) + id = ids.head + _ <- registerSubmissionWithMetadataService(id, cmd.source) + } yield id + + futureId onComplete { + case Success(id) => + log.info("Workflow {} submitted.", id) + sndr ! WorkflowSubmittedToStore(id) + removeWork() + case Failure(throwable) => + log.error("Workflow {} submit failed.", throwable) + sndr ! WorkflowSubmitFailed(throwable) + removeWork() + } + + case cmd: BatchSubmitWorkflows => + addWork() + val sndr = sender() + + val futureIds = for { + ids <- storeWorkflowSources(cmd.sources) + _ <- (ids.toList zip cmd.sources.toList) traverse (registerSubmissionWithMetadataService _).tupled + } yield ids + + futureIds onComplete { + case Success(ids) => + log.info("Workflows {} submitted.", ids.toList.mkString(", ")) + sndr ! WorkflowsBatchSubmittedToStore(ids) + removeWork() + case Failure(throwable) => + log.error("Workflow {} submit failed.", throwable) + sndr ! WorkflowSubmitFailed(throwable) + removeWork() + } + } + + override def receive = workflowStoreReceive.orElse(monitoringReceive) + + private def storeWorkflowSources(sources: NonEmptyList[WorkflowSourceFilesCollection]): Future[NonEmptyList[WorkflowId]] = { + for { + processedSources <- processSources(sources, _.asPrettyJson) + workflowIds <- store.add(processedSources) + } yield workflowIds + } + + private def processSources(sources: NonEmptyList[WorkflowSourceFilesCollection], + processOptions: WorkflowOptions => WorkflowOptionsJson): Future[NonEmptyList[WorkflowSourceFilesCollection]] = { + val nelFutures: NonEmptyList[Future[WorkflowSourceFilesCollection]] = sources map processSource(processOptions) + val listFutures: List[Future[WorkflowSourceFilesCollection]] = nelFutures.toList + val futureLists: Future[List[WorkflowSourceFilesCollection]] = Future.sequence(listFutures) + futureLists.map(seq => NonEmptyList.fromList(seq).get) + } + + /** + * Runs processing on workflow source files before they are stored. + * + * @param processOptions How to process the workflow options + * @param source Original workflow source + * @return Attempted updated workflow source + */ + private def processSource(processOptions: WorkflowOptions => WorkflowOptionsJson) + (source: WorkflowSourceFilesCollection): Future[WorkflowSourceFilesCollection] = { + val options = Future { + WorkflowOptions.fromJsonString(source.workflowOptionsJson) + }.flatMap { + case Success(s) => Future.successful(s) + case Failure(regrets) => Future.failed(regrets) + } + + options map {o => source.copyOptions(processOptions(o)) } + } + + /** + * Takes the workflow id and sends it over to the metadata service w/ default empty values for inputs/outputs + */ + private def registerSubmissionWithMetadataService( + id: WorkflowId, + originalSourceFiles: WorkflowSourceFilesCollection): Future[Unit] = { + processSource(_.clearEncryptedValues)(originalSourceFiles) map { sourceFiles => + val submissionEvents: List[MetadataEvent] = List( + MetadataEvent(MetadataKey(id, None, WorkflowMetadataKeys.SubmissionTime), MetadataValue(OffsetDateTime.now.toString)), + MetadataEvent.empty(MetadataKey(id, None, WorkflowMetadataKeys.Inputs)), + MetadataEvent.empty(MetadataKey(id, None, WorkflowMetadataKeys.Outputs)), + MetadataEvent(MetadataKey(id, None, WorkflowMetadataKeys.Status), MetadataValue(WorkflowSubmitted)), + + MetadataEvent(MetadataKey(id, None, WorkflowMetadataKeys.SubmissionSection, WorkflowMetadataKeys.SubmissionSection_Workflow), MetadataValue(sourceFiles.workflowSource)), + MetadataEvent(MetadataKey(id, None, WorkflowMetadataKeys.SubmissionSection, WorkflowMetadataKeys.SubmissionSection_Inputs), MetadataValue(sourceFiles.inputsJson)), + MetadataEvent(MetadataKey(id, None, WorkflowMetadataKeys.SubmissionSection, WorkflowMetadataKeys.SubmissionSection_Options), MetadataValue(sourceFiles.workflowOptionsJson)), + MetadataEvent(MetadataKey(id, None, WorkflowMetadataKeys.SubmissionSection, WorkflowMetadataKeys.SubmissionSection_Labels), MetadataValue(sourceFiles.labelsJson)) + ) + + // Don't publish metadata for either workflow type or workflow type version if not defined. + val workflowTypeAndVersionEvents: List[Option[MetadataEvent]] = List( + sourceFiles.workflowType map { wt => MetadataEvent(MetadataKey(id, None, WorkflowMetadataKeys.SubmissionSection, WorkflowMetadataKeys.SubmissionSection_WorkflowType), MetadataValue(wt)) }, + sourceFiles.workflowTypeVersion map { wtv => MetadataEvent(MetadataKey(id, None, WorkflowMetadataKeys.SubmissionSection, WorkflowMetadataKeys.SubmissionSection_WorkflowTypeVersion), MetadataValue(wtv)) } + ) + + serviceRegistryActor ! PutMetadataAction(submissionEvents ++ workflowTypeAndVersionEvents.flatten) + () + } + } +} + +object WorkflowStoreSubmitActor { + def props(workflowStoreDatabase: WorkflowStore, serviceRegistryActor: ActorRef) = { + Props(WorkflowStoreSubmitActor(workflowStoreDatabase, serviceRegistryActor)).withDispatcher(ApiDispatcher) + } + + sealed trait WorkflowStoreSubmitActorResponse + final case class WorkflowSubmittedToStore(workflowId: WorkflowId) extends WorkflowStoreSubmitActorResponse + final case class WorkflowsBatchSubmittedToStore(workflowIds: NonEmptyList[WorkflowId]) extends WorkflowStoreSubmitActorResponse + + final case class WorkflowSubmitFailed(throwable: Throwable) extends WorkflowStoreSubmitActorResponse +} diff --git a/engine/src/main/scala/cromwell/engine/workflow/workflowstore/package.scala b/engine/src/main/scala/cromwell/engine/workflow/workflowstore/package.scala deleted file mode 100644 index e8f90bd74..000000000 --- a/engine/src/main/scala/cromwell/engine/workflow/workflowstore/package.scala +++ /dev/null @@ -1,18 +0,0 @@ -package cromwell.engine.workflow - -import cromwell.core.{WorkflowId, WorkflowSourceFiles} -import cromwell.engine.workflow.workflowstore.WorkflowStoreState.StartableState - -package object workflowstore { - - sealed trait WorkflowStoreState {def isStartable: Boolean} - - object WorkflowStoreState { - case object Running extends WorkflowStoreState { override def isStartable = false } - sealed trait StartableState extends WorkflowStoreState { override def isStartable = true } - case object Submitted extends StartableState - case object Restartable extends StartableState - } - - final case class WorkflowToStart(id: WorkflowId, sources: WorkflowSourceFiles, state: StartableState) -} diff --git a/engine/src/main/scala/cromwell/engine/workflow/workflowstore/workflowstore_.scala b/engine/src/main/scala/cromwell/engine/workflow/workflowstore/workflowstore_.scala new file mode 100644 index 000000000..61bc37ee0 --- /dev/null +++ b/engine/src/main/scala/cromwell/engine/workflow/workflowstore/workflowstore_.scala @@ -0,0 +1,15 @@ +package cromwell.engine.workflow.workflowstore + +import cromwell.core.{WorkflowId, WorkflowSourceFilesCollection} +import cromwell.engine.workflow.workflowstore.WorkflowStoreState.StartableState + +sealed trait WorkflowStoreState {def isStartable: Boolean} + +object WorkflowStoreState { + case object Running extends WorkflowStoreState { override def isStartable = false } + sealed trait StartableState extends WorkflowStoreState { override def isStartable = true } + case object Submitted extends StartableState + case object Restartable extends StartableState +} + +final case class WorkflowToStart(id: WorkflowId, sources: WorkflowSourceFilesCollection, state: StartableState) diff --git a/engine/src/main/scala/cromwell/jobstore/EmptyJobStoreActor.scala b/engine/src/main/scala/cromwell/jobstore/EmptyJobStoreActor.scala new file mode 100644 index 000000000..598f65085 --- /dev/null +++ b/engine/src/main/scala/cromwell/jobstore/EmptyJobStoreActor.scala @@ -0,0 +1,16 @@ +package cromwell.jobstore + +import akka.actor.{Actor, Props} +import cromwell.jobstore.JobStoreActor._ +import cromwell.core.Dispatcher.EngineDispatcher + +class EmptyJobStoreActor extends Actor { + override def receive: Receive = { + case w: JobStoreWriterCommand => sender ! JobStoreWriteSuccess(w) + case _: QueryJobCompletion => sender ! JobNotComplete + } +} + +object EmptyJobStoreActor { + def props: Props = Props(new EmptyJobStoreActor()).withDispatcher(EngineDispatcher) +} diff --git a/engine/src/main/scala/cromwell/jobstore/JobStore.scala b/engine/src/main/scala/cromwell/jobstore/JobStore.scala index eba177251..ad57bcaaf 100644 --- a/engine/src/main/scala/cromwell/jobstore/JobStore.scala +++ b/engine/src/main/scala/cromwell/jobstore/JobStore.scala @@ -1,11 +1,18 @@ package cromwell.jobstore import cromwell.core.WorkflowId -import wdl4s.TaskOutput +import cromwell.jobstore.JobStore.{JobCompletion, WorkflowCompletion} +import wdl4s.wdl.TaskOutput import scala.concurrent.{ExecutionContext, Future} trait JobStore { - def writeToDatabase(jobCompletions: Map[JobStoreKey, JobResult], workflowCompletions: List[WorkflowId])(implicit ec: ExecutionContext): Future[Unit] + def writeToDatabase(workflowCompletions: Seq[WorkflowCompletion], jobCompletions: Seq[JobCompletion], batchSize: Int)(implicit ec: ExecutionContext): Future[Unit] def readJobResult(jobStoreKey: JobStoreKey, taskOutputs: Seq[TaskOutput])(implicit ec: ExecutionContext): Future[Option[JobResult]] } + +object JobStore { + sealed trait Completion + case class WorkflowCompletion(workflowId: WorkflowId) extends Completion + case class JobCompletion(key: JobStoreKey, result: JobResult) extends Completion +} diff --git a/engine/src/main/scala/cromwell/jobstore/JobStoreActor.scala b/engine/src/main/scala/cromwell/jobstore/JobStoreActor.scala index c5910c2be..73b031dfd 100644 --- a/engine/src/main/scala/cromwell/jobstore/JobStoreActor.scala +++ b/engine/src/main/scala/cromwell/jobstore/JobStoreActor.scala @@ -1,20 +1,28 @@ package cromwell.jobstore -import akka.actor.{Actor, Props} +import akka.actor.{Actor, ActorLogging, Props} +import cats.data.NonEmptyList +import cromwell.core.Dispatcher.EngineDispatcher import cromwell.core.WorkflowId -import cromwell.jobstore.JobStoreActor.{JobStoreReaderCommand, JobStoreWriterCommand} -import wdl4s.TaskOutput +import cromwell.jobstore.JobStore.{Completion, JobCompletion, WorkflowCompletion} +import cromwell.util.GracefulShutdownHelper +import cromwell.util.GracefulShutdownHelper.ShutdownCommand +import wdl4s.wdl.TaskOutput +import scala.concurrent.duration._ +import scala.language.postfixOps /** * Joins the service registry API to the JobStoreReaderActor and JobStoreWriterActor. * * This level of indirection is a tiny bit awkward but allows the database to be injected. */ -class JobStoreActor(database: JobStore) extends Actor { - val jobStoreWriterActor = context.actorOf(JobStoreWriterActor.props(database)) - val jobStoreReaderActor = context.actorOf(JobStoreReaderActor.props(database)) +class JobStoreActor(jobStore: JobStore, dbBatchSize: Int, dbFlushRate: FiniteDuration) extends Actor with ActorLogging with GracefulShutdownHelper { + import JobStoreActor._ + val jobStoreWriterActor = context.actorOf(JobStoreWriterActor.props(jobStore, dbBatchSize, dbFlushRate), "JobStoreWriterActor") + val jobStoreReaderActor = context.actorOf(JobStoreReaderActor.props(jobStore), "JobStoreReaderActor") override def receive: Receive = { + case ShutdownCommand => waitForActorsAndShutdown(NonEmptyList.of(jobStoreWriterActor)) case command: JobStoreWriterCommand => jobStoreWriterActor.tell(command, sender()) case command: JobStoreReaderCommand => jobStoreReaderActor.tell(command, sender()) } @@ -23,9 +31,15 @@ class JobStoreActor(database: JobStore) extends Actor { object JobStoreActor { sealed trait JobStoreCommand - sealed trait JobStoreWriterCommand extends JobStoreCommand - case class RegisterJobCompleted(jobKey: JobStoreKey, jobResult: JobResult) extends JobStoreWriterCommand - case class RegisterWorkflowCompleted(workflowId: WorkflowId) extends JobStoreWriterCommand + sealed trait JobStoreWriterCommand extends JobStoreCommand { + def completion: Completion + } + case class RegisterJobCompleted(jobKey: JobStoreKey, jobResult: JobResult) extends JobStoreWriterCommand { + override def completion = JobCompletion(jobKey, jobResult) + } + case class RegisterWorkflowCompleted(workflowId: WorkflowId) extends JobStoreWriterCommand { + override def completion = WorkflowCompletion(workflowId) + } sealed trait JobStoreWriterResponse case class JobStoreWriteSuccess(originalCommand: JobStoreWriterCommand) extends JobStoreWriterResponse @@ -50,5 +64,11 @@ object JobStoreActor { case class JobStoreReadFailure(reason: Throwable) extends JobStoreReaderResponse - def props(database: JobStore) = Props(new JobStoreActor(database)) + def props(database: JobStore) = Props(new JobStoreActor(database, dbBatchSize, dbFlushRate)).withDispatcher(EngineDispatcher) + + val dbFlushRate = 1 second + + // `dbBatchSize` applies to simpletons only. Batching job store entry writes while returning the inserted IDs works at the + // Slick API level, but didn't actually batch the SQL. Unfortunately these IDs are required to assign into the simpletons. + val dbBatchSize = 1000 } diff --git a/engine/src/main/scala/cromwell/jobstore/JobStoreReaderActor.scala b/engine/src/main/scala/cromwell/jobstore/JobStoreReaderActor.scala index 151b0a711..9065a8f59 100644 --- a/engine/src/main/scala/cromwell/jobstore/JobStoreReaderActor.scala +++ b/engine/src/main/scala/cromwell/jobstore/JobStoreReaderActor.scala @@ -2,12 +2,13 @@ package cromwell.jobstore import akka.actor.{Actor, ActorLogging, Props} import akka.event.LoggingReceive +import cromwell.core.Dispatcher.EngineDispatcher import cromwell.jobstore.JobStoreActor.{JobComplete, JobNotComplete, JobStoreReadFailure, QueryJobCompletion} import scala.util.{Failure, Success} object JobStoreReaderActor { - def props(database: JobStore) = Props(new JobStoreReaderActor(database)) + def props(database: JobStore) = Props(new JobStoreReaderActor(database)).withDispatcher(EngineDispatcher) } class JobStoreReaderActor(database: JobStore) extends Actor with ActorLogging { diff --git a/engine/src/main/scala/cromwell/jobstore/JobStoreWriterActor.scala b/engine/src/main/scala/cromwell/jobstore/JobStoreWriterActor.scala index cf7b37acb..bb91ae81e 100644 --- a/engine/src/main/scala/cromwell/jobstore/JobStoreWriterActor.scala +++ b/engine/src/main/scala/cromwell/jobstore/JobStoreWriterActor.scala @@ -1,95 +1,69 @@ package cromwell.jobstore -import akka.actor.{ActorRef, LoggingFSM, Props} +import akka.actor.{LoggingFSM, Props} +import cromwell.core.Dispatcher.EngineDispatcher +import cromwell.core.actor.BatchingDbWriter._ +import cromwell.core.actor.{BatchingDbWriter, BatchingDbWriterActor} +import cromwell.jobstore.JobStore.{JobCompletion, WorkflowCompletion} import cromwell.jobstore.JobStoreActor._ +import scala.concurrent.duration._ +import scala.language.postfixOps import scala.util.{Failure, Success} -/** - * Singleton actor to coordinate writing job statuses to the database. - * - * State: Represents an actor either doing nothing, or currently writing to the database - * Data: If currently writing, the actor stores pending updates in the data. When one write completes, any further writes are written - */ -case class JobStoreWriterActor(jsd: JobStore) extends LoggingFSM[JobStoreWriterState, JobStoreWriterData] { - implicit val ec = context.dispatcher - - startWith(Pending, JobStoreWriterData.empty) +case class JobStoreWriterActor(jsd: JobStore, dbBatchSize: Int, override val dbFlushRate: FiniteDuration) extends LoggingFSM[BatchingDbWriterState, BatchingDbWriter.BatchingDbWriterData] with BatchingDbWriterActor { - when(Pending) { - case Event(command: JobStoreWriterCommand, stateData) => - val newData = writeNextOperationToDatabase(stateData.withNewOperation(sender, command)) - goto(WritingToDatabase) using newData - } - - when(WritingToDatabase) { - case Event(command: JobStoreWriterCommand, stateData) => - stay using stateData.withNewOperation(sender, command) - case Event(WriteComplete, stateData) => - val newData = writeNextOperationToDatabase(stateData) - goto(if (newData.isEmpty) Pending else WritingToDatabase) using newData - } + implicit val ec = context.dispatcher - whenUnhandled { - case Event(someMessage, stateData) => - log.error(s"JobStoreWriter: Unexpected message received in state $stateName: $someMessage") - stay() - } + startWith(WaitingToWrite, NoData) - onTransition { - case (oldState, newState) => - log.debug(s"Transitioning from $oldState to $newState") + when(WaitingToWrite) { + case Event(command: JobStoreWriterCommand, curData) => + curData.addData(CommandAndReplyTo(command, sender)) match { + case newData: HasData[_] if newData.length >= dbBatchSize => goto(WritingToDb) using newData + case newData => stay() using newData + } + case Event(ScheduledFlushToDb, curData) => + log.debug("Initiating periodic job store flush to DB") + goto(WritingToDb) using curData } - def writeNextOperationToDatabase(data: JobStoreWriterData): JobStoreWriterData = { - - val newData = data.rolledOver - - val workflowCompletions = newData.currentOperation collect { - case (_, RegisterWorkflowCompleted(wfid)) => wfid - } - - val jobCompletions = newData.currentOperation collect { - case (_, RegisterJobCompleted(jobStoreKey, jobResult)) if !workflowCompletions.contains(jobStoreKey.workflowId) => (jobStoreKey, jobResult) - } - - if (!(workflowCompletions.isEmpty && jobCompletions.isEmpty)) { - jsd.writeToDatabase(jobCompletions.toMap, workflowCompletions) onComplete { - case Success(_) => - newData.currentOperation foreach { case (actor, message) => - val msg = JobStoreWriteSuccess(message) - actor ! msg - } - self ! WriteComplete - case Failure(reason) => - log.error(s"Failed to write to database: $reason") - newData.currentOperation foreach { case (actor, message) => actor ! JobStoreWriteFailure(reason) } - self ! WriteComplete + when(WritingToDb) { + case Event(ScheduledFlushToDb, _) => stay + case Event(command: JobStoreWriterCommand, curData) => stay using curData.addData(CommandAndReplyTo(command, sender)) + case Event(FlushBatchToDb, NoData) => + log.debug("Attempted job store flush to DB but had nothing to write") + goto(WaitingToWrite) + case Event(FlushBatchToDb, HasData(data)) => + log.debug("Flushing {} job store commands to the DB", data.length) + val completions = data.toVector.collect({ case CommandAndReplyTo(c: JobStoreWriterCommand, _) => c.completion }) + + if (completions.nonEmpty) { + val workflowCompletions = completions collect { case w: WorkflowCompletion => w } + val completedWorkflowIds = workflowCompletions map { _.workflowId } toSet + // Filter job completions that also have a corresponding workflow completion; these would just be + // immediately deleted anyway. + val jobCompletions = completions.toList collect { case j: JobCompletion if !completedWorkflowIds.contains(j.key.workflowId) => j } + + jsd.writeToDatabase(workflowCompletions, jobCompletions, dbBatchSize) onComplete { + case Success(_) => + data map { case CommandAndReplyTo(c: JobStoreWriterCommand, r) => r ! JobStoreWriteSuccess(c) } + self ! DbWriteComplete + case Failure(regerts) => + log.error("Failed to properly job store entries to database", regerts) + data map { case CommandAndReplyTo(_, r) => r ! JobStoreWriteFailure(regerts) } + self ! DbWriteComplete + } } - } - - newData + stay using NoData + case Event(DbWriteComplete, _) => + log.debug("Flush of job store commands complete") + goto(WaitingToWrite) } } object JobStoreWriterActor { - def props(jobStoreDatabase: JobStore): Props = Props(new JobStoreWriterActor(jobStoreDatabase)) -} -object JobStoreWriterData { - def empty = JobStoreWriterData(List.empty, List.empty) + def props(jobStoreDatabase: JobStore, dbBatchSize: Int, dbFlushRate: FiniteDuration): Props = Props(new JobStoreWriterActor(jobStoreDatabase, dbBatchSize, dbFlushRate)).withDispatcher(EngineDispatcher) } - -case class JobStoreWriterData(currentOperation: List[(ActorRef, JobStoreWriterCommand)], nextOperation: List[(ActorRef, JobStoreWriterCommand)]) { - def isEmpty = nextOperation.isEmpty && currentOperation.isEmpty - def withNewOperation(sender: ActorRef, command: JobStoreWriterCommand) = this.copy(nextOperation = this.nextOperation :+ (sender, command)) - def rolledOver = JobStoreWriterData(this.nextOperation, List.empty) -} - -sealed trait JobStoreWriterState -case object Pending extends JobStoreWriterState -case object WritingToDatabase extends JobStoreWriterState - -sealed trait JobStoreWriterInternalMessage -case object WriteComplete extends JobStoreWriterInternalMessage diff --git a/engine/src/main/scala/cromwell/jobstore/SqlJobStore.scala b/engine/src/main/scala/cromwell/jobstore/SqlJobStore.scala index 2ef0614c0..37504beb0 100644 --- a/engine/src/main/scala/cromwell/jobstore/SqlJobStore.scala +++ b/engine/src/main/scala/cromwell/jobstore/SqlJobStore.scala @@ -1,28 +1,40 @@ package cromwell.jobstore +import cats.instances.future._ +import cats.instances.list._ +import cats.syntax.traverse._ + import cromwell.Simpletons._ +import cromwell.backend.async.JobAlreadyFailedInJobStore import cromwell.core.ExecutionIndex._ -import cromwell.core.WorkflowId import cromwell.core.simpleton.WdlValueBuilder import cromwell.core.simpleton.WdlValueSimpleton._ -import cromwell.database.sql.JobStoreSqlDatabase +import cromwell.database.sql.SqlConverters._ +import cromwell.database.sql.SqlDatabase import cromwell.database.sql.joins.JobStoreJoin import cromwell.database.sql.tables.{JobStoreEntry, JobStoreSimpletonEntry} -import wdl4s.TaskOutput +import cromwell.jobstore.JobStore.{JobCompletion, WorkflowCompletion} +import org.slf4j.LoggerFactory +import wdl4s.wdl.TaskOutput import scala.concurrent.{ExecutionContext, Future} -class SqlJobStore(sqlDatabase: JobStoreSqlDatabase) extends JobStore { - override def writeToDatabase(jobCompletions: Map[JobStoreKey, JobResult], workflowCompletions: List[WorkflowId])(implicit ec: ExecutionContext): Future[Unit] = { +class SqlJobStore(sqlDatabase: SqlDatabase) extends JobStore { + val log = LoggerFactory.getLogger(classOf[SqlJobStore]) + + override def writeToDatabase(workflowCompletions: Seq[WorkflowCompletion], jobCompletions: Seq[JobCompletion], batchSize: Int)(implicit ec: ExecutionContext): Future[Unit] = { + val completedWorkflowIds = workflowCompletions.toList.map(_.workflowId.toString) for { - _ <- sqlDatabase.addJobStores(jobCompletions.toSeq map toDatabase) - _ <- sqlDatabase.removeJobStores(workflowCompletions.map(_.toString)) + _ <- sqlDatabase.addJobStores(jobCompletions map toDatabase, batchSize) + _ <- completedWorkflowIds traverse sqlDatabase.removeWorkflowStoreEntry + _ <- completedWorkflowIds traverse sqlDatabase.removeDockerHashStoreEntries + _ <- sqlDatabase.removeJobStores(completedWorkflowIds) } yield () } - private def toDatabase(jobCompletion: (JobStoreKey, JobResult)): JobStoreJoin = { + private def toDatabase(jobCompletion: JobCompletion): JobStoreJoin = { jobCompletion match { - case (key, JobResultSuccess(returnCode, jobOutputs)) => + case JobCompletion(key, JobResultSuccess(returnCode, jobOutputs)) => val entry = JobStoreEntry( key.workflowId.toString, key.callFqn, @@ -35,11 +47,11 @@ class SqlJobStore(sqlDatabase: JobStoreSqlDatabase) extends JobStore { val jobStoreResultSimpletons = jobOutputs.mapValues(_.wdlValue).simplify.map { wdlValueSimpleton => JobStoreSimpletonEntry( - wdlValueSimpleton.simpletonKey, wdlValueSimpleton.simpletonValue.valueString, + wdlValueSimpleton.simpletonKey, wdlValueSimpleton.simpletonValue.valueString.toClobOption, wdlValueSimpleton.simpletonValue.wdlType.toWdlString) } JobStoreJoin(entry, jobStoreResultSimpletons.toSeq) - case (key, JobResultFailure(returnCode, throwable, retryable)) => + case JobCompletion(key, JobResultFailure(returnCode, throwable, retryable)) => val entry = JobStoreEntry( key.workflowId.toString, key.callFqn, @@ -47,7 +59,7 @@ class SqlJobStore(sqlDatabase: JobStoreSqlDatabase) extends JobStore { key.attempt, jobSuccessful = false, returnCode, - Option(throwable.getMessage), + throwable.getMessage.toClobOption, Option(retryable)) JobStoreJoin(entry, Seq.empty) } @@ -62,8 +74,10 @@ class SqlJobStore(sqlDatabase: JobStoreSqlDatabase) extends JobStore { val simpletons = simpletonEntries map toSimpleton val jobOutputs = WdlValueBuilder.toJobOutputs(taskOutputs, simpletons) JobResultSuccess(returnCode, jobOutputs) - case JobStoreEntry(_, _, _, _, false, returnCode, Some(exceptionMessage), Some(retryable), _) => - JobResultFailure(returnCode, new Exception(exceptionMessage), retryable) + case JobStoreEntry(_, _, _, _, false, returnCode, Some(_), Some(retryable), _) => + JobResultFailure(returnCode, + JobAlreadyFailedInJobStore(jobStoreKey.tag, entry.exceptionMessage.toRawString), + retryable) case bad => throw new Exception(s"Invalid contents of JobStore table: $bad") } diff --git a/engine/src/main/scala/cromwell/jobstore/jobstore_.scala b/engine/src/main/scala/cromwell/jobstore/jobstore_.scala new file mode 100644 index 000000000..5ba67b462 --- /dev/null +++ b/engine/src/main/scala/cromwell/jobstore/jobstore_.scala @@ -0,0 +1,13 @@ +package cromwell.jobstore + +import cromwell.core.{WorkflowId, _} + +case class JobStoreKey(workflowId: WorkflowId, callFqn: String, index: Option[Int], attempt: Int) { + private lazy val indexString = index map { _.toString } getOrElse "NA" + lazy val tag = s"$workflowId:$callFqn:$indexString:$attempt" +} + +sealed trait JobResult +case class JobResultSuccess(returnCode: Option[Int], jobOutputs: CallOutputs) extends JobResult +case class JobResultFailure(returnCode: Option[Int], reason: Throwable, retryable: Boolean) extends JobResult + diff --git a/engine/src/main/scala/cromwell/jobstore/package.scala b/engine/src/main/scala/cromwell/jobstore/package.scala index 2e82109e0..f96ca4041 100644 --- a/engine/src/main/scala/cromwell/jobstore/package.scala +++ b/engine/src/main/scala/cromwell/jobstore/package.scala @@ -1,14 +1,8 @@ package cromwell -import cromwell.core.{JobKey, JobOutputs, WorkflowId} +import cromwell.core.{JobKey, WorkflowId} package object jobstore { - case class JobStoreKey(workflowId: WorkflowId, callFqn: String, index: Option[Int], attempt: Int) - - sealed trait JobResult - case class JobResultSuccess(returnCode: Option[Int], jobOutputs: JobOutputs) extends JobResult - case class JobResultFailure(returnCode: Option[Int], reason: Throwable, retryable: Boolean) extends JobResult - implicit class EnhancedJobKey(val jobKey: JobKey) extends AnyVal { def toJobStoreKey(workflowId: WorkflowId): JobStoreKey = JobStoreKey(workflowId, jobKey.scope.fullyQualifiedName, jobKey.index, jobKey.attempt) } diff --git a/engine/src/main/scala/cromwell/logging/TerminalLayout.scala b/engine/src/main/scala/cromwell/logging/TerminalLayout.scala index d18545d09..b157563dd 100644 --- a/engine/src/main/scala/cromwell/logging/TerminalLayout.scala +++ b/engine/src/main/scala/cromwell/logging/TerminalLayout.scala @@ -7,7 +7,7 @@ import ch.qos.logback.classic.Level import ch.qos.logback.classic.pattern.ThrowableProxyConverter import ch.qos.logback.classic.spi.ILoggingEvent import ch.qos.logback.core.LayoutBase -import cromwell.util.TerminalUtil +import lenthall.util.TerminalUtil object TerminalLayout { val Converter = new ThrowableProxyConverter diff --git a/engine/src/main/scala/cromwell/server/CromwellRootActor.scala b/engine/src/main/scala/cromwell/server/CromwellRootActor.scala index 3289e9df4..6b0e7d765 100644 --- a/engine/src/main/scala/cromwell/server/CromwellRootActor.scala +++ b/engine/src/main/scala/cromwell/server/CromwellRootActor.scala @@ -1,17 +1,41 @@ package cromwell.server -import akka.actor.SupervisorStrategy.Escalate -import akka.actor.{Actor, ActorInitializationException, ActorRef, OneForOneStrategy} +import akka.actor.SupervisorStrategy.{Escalate, Restart} +import akka.actor.{Actor, ActorInitializationException, ActorLogging, ActorRef, OneForOneStrategy} import akka.event.Logging +import akka.http.scaladsl.Http +import akka.pattern.GracefulStopSupport import akka.routing.RoundRobinPool +import akka.stream.ActorMaterializer import com.typesafe.config.ConfigFactory +import cromwell.core.actor.StreamActorHelper.ActorRestartException +import cromwell.core.io.Throttle +import cromwell.core.{Dispatcher, DockerConfiguration, DockerLocalLookup, DockerRemoteLookup} +import cromwell.docker.DockerHashActor +import cromwell.docker.DockerHashActor.DockerHashContext +import cromwell.docker.local.DockerCliFlow +import cromwell.docker.registryv2.flows.HttpFlowWithRetry.ContextWithRequest +import cromwell.docker.registryv2.flows.dockerhub.DockerHubFlow +import cromwell.docker.registryv2.flows.gcr.GoogleFlow +import cromwell.docker.registryv2.flows.quay.QuayFlow +import cromwell.engine.backend.{BackendSingletonCollection, CromwellBackends} +import cromwell.engine.io.IoActor import cromwell.engine.workflow.WorkflowManagerActor +import cromwell.engine.workflow.WorkflowManagerActor.AbortAllWorkflowsCommand import cromwell.engine.workflow.lifecycle.CopyWorkflowLogsActor -import cromwell.engine.workflow.lifecycle.execution.callcaching.{CallCache, CallCacheReadActor} +import cromwell.engine.workflow.lifecycle.execution.callcaching.{CallCache, CallCacheReadActor, CallCacheWriteActor} +import cromwell.engine.workflow.tokens.JobExecutionTokenDispenserActor import cromwell.engine.workflow.workflowstore.{SqlWorkflowStore, WorkflowStore, WorkflowStoreActor} import cromwell.jobstore.{JobStore, JobStoreActor, SqlJobStore} import cromwell.services.{ServiceRegistryActor, SingletonServicesStore} -import lenthall.config.ScalaConfig.EnhancedScalaConfig +import cromwell.subworkflowstore.{SqlSubWorkflowStore, SubWorkflowStoreActor} +import cromwell.util.GracefulShutdownHelper +import net.ceedubs.ficus.Ficus._ + +import scala.concurrent.Await +import scala.concurrent.duration._ +import scala.language.postfixOps +import scala.util.{Failure, Success, Try} /** * An actor which serves as the lord protector for the rest of Cromwell, allowing us to have more fine grain @@ -22,39 +46,112 @@ import lenthall.config.ScalaConfig.EnhancedScalaConfig * * If any of the actors created by CromwellRootActor fail to initialize the ActorSystem will die, which means that * Cromwell will fail to start in a bad state regardless of the entry point. + * + * READ THIS: If you add a "system-level" actor here, make sure to consider what should be its + * position in the shutdown process and modify CromwellShutdown accordingly. */ - abstract class CromwellRootActor extends Actor { +abstract class CromwellRootActor(gracefulShutdown: Boolean, abortJobsOnTerminate: Boolean)(implicit materializer: ActorMaterializer) extends Actor with ActorLogging with GracefulShutdownHelper { import CromwellRootActor._ private val logger = Logging(context.system, this) private val config = ConfigFactory.load() + private implicit val system = context.system - lazy val serviceRegistryActor: ActorRef = context.actorOf(ServiceRegistryActor.props(config), "ServiceRegistryActor") - lazy val numberOfWorkflowLogCopyWorkers = config.getConfig("system").getIntOr("number-of-workflow-log-copy-workers", default=DefaultNumberOfWorkflowLogCopyWorkers) + val serverMode: Boolean - lazy val workflowLogCopyRouter: ActorRef = context.actorOf(RoundRobinPool(numberOfWorkflowLogCopyWorkers) - .withSupervisorStrategy(CopyWorkflowLogsActor.strategy) - .props(CopyWorkflowLogsActor.props(serviceRegistryActor)), - "WorkflowLogCopyRouter") + lazy val systemConfig = config.getConfig("system") + lazy val serviceRegistryActor: ActorRef = context.actorOf(ServiceRegistryActor.props(config), "ServiceRegistryActor") + lazy val numberOfWorkflowLogCopyWorkers = systemConfig.as[Option[Int]]("number-of-workflow-log-copy-workers").getOrElse(DefaultNumberOfWorkflowLogCopyWorkers) lazy val workflowStore: WorkflowStore = SqlWorkflowStore(SingletonServicesStore.databaseInterface) - lazy val workflowStoreActor = context.actorOf(WorkflowStoreActor.props(workflowStore, serviceRegistryActor), "WorkflowStoreActor") + lazy val workflowStoreActor = context.actorOf(WorkflowStoreActor.props(workflowStore, serviceRegistryActor, SingletonServicesStore.databaseInterface), "WorkflowStoreActor") lazy val jobStore: JobStore = new SqlJobStore(SingletonServicesStore.databaseInterface) lazy val jobStoreActor = context.actorOf(JobStoreActor.props(jobStore), "JobStoreActor") + lazy val subWorkflowStore = new SqlSubWorkflowStore(SingletonServicesStore.databaseInterface) + lazy val subWorkflowStoreActor = context.actorOf(SubWorkflowStoreActor.props(subWorkflowStore), "SubWorkflowStoreActor") + + // Io Actor + lazy val throttleElements = systemConfig.as[Option[Int]]("io.number-of-requests").getOrElse(100000) + lazy val throttlePer = systemConfig.as[Option[FiniteDuration]]("io.per").getOrElse(100 seconds) + lazy val ioThrottle = Throttle(throttleElements, throttlePer, throttleElements) + lazy val ioActor = context.actorOf(IoActor.props(1000, Option(ioThrottle)), "IoActor") + + lazy val workflowLogCopyRouter: ActorRef = context.actorOf(RoundRobinPool(numberOfWorkflowLogCopyWorkers) + .withSupervisorStrategy(CopyWorkflowLogsActor.strategy) + .props(CopyWorkflowLogsActor.props(serviceRegistryActor, ioActor)), + "WorkflowLogCopyRouter") + lazy val callCache: CallCache = new CallCache(SingletonServicesStore.databaseInterface) - lazy val callCacheReadActor = context.actorOf(RoundRobinPool(25) + + lazy val numberOfCacheReadWorkers = config.getConfig("system").as[Option[Int]]("number-of-cache-read-workers").getOrElse(DefaultNumberOfCacheReadWorkers) + lazy val callCacheReadActor = context.actorOf(RoundRobinPool(numberOfCacheReadWorkers) .props(CallCacheReadActor.props(callCache)), "CallCacheReadActor") + lazy val callCacheWriteActor = context.actorOf(CallCacheWriteActor.props(callCache), "CallCacheWriteActor") + + // Docker Actor + lazy val ioEc = context.system.dispatchers.lookup(Dispatcher.IoDispatcher) + lazy val dockerConf = DockerConfiguration.instance + // Sets the number of requests that the docker actor will accept before it starts backpressuring (modulo the number of in flight requests) + lazy val dockerActorQueueSize = 500 + + lazy val dockerHttpPool = Http().superPool[ContextWithRequest[DockerHashContext]]() + lazy val googleFlow = new GoogleFlow(dockerHttpPool, dockerConf.gcrApiQueriesPer100Seconds)(ioEc, materializer, system.scheduler) + lazy val dockerHubFlow = new DockerHubFlow(dockerHttpPool)(ioEc, materializer, system.scheduler) + lazy val quayFlow = new QuayFlow(dockerHttpPool)(ioEc, materializer, system.scheduler) + lazy val dockerCliFlow = new DockerCliFlow()(ioEc, system.scheduler) + lazy val dockerFlows = dockerConf.method match { + case DockerLocalLookup => Seq(dockerCliFlow) + case DockerRemoteLookup => Seq(dockerHubFlow, googleFlow, quayFlow) + } + + lazy val dockerHashActor = context.actorOf(DockerHashActor.props(dockerFlows, dockerActorQueueSize, + dockerConf.cacheEntryTtl, dockerConf.cacheSize)(materializer), "DockerHashActor") + + lazy val backendSingletons = CromwellBackends.instance.get.backendLifecycleActorFactories map { + case (name, factory) => name -> (factory.backendSingletonActorProps map context.actorOf) + } + lazy val backendSingletonCollection = BackendSingletonCollection(backendSingletons) + + lazy val jobExecutionTokenDispenserActor = context.actorOf(JobExecutionTokenDispenserActor.props) + lazy val workflowManagerActor = context.actorOf( WorkflowManagerActor.props( - workflowStoreActor, serviceRegistryActor, workflowLogCopyRouter, jobStoreActor, callCacheReadActor), + workflowStoreActor, ioActor, serviceRegistryActor, workflowLogCopyRouter, jobStoreActor, subWorkflowStoreActor, callCacheReadActor, callCacheWriteActor, + dockerHashActor, jobExecutionTokenDispenserActor, backendSingletonCollection, serverMode), "WorkflowManagerActor") + if (gracefulShutdown) { + // If abortJobsOnTerminate is true, aborting all workflows will be handled by the graceful shutdown process + CromwellShutdown.registerShutdownTasks( + abortJobsOnTerminate, + actorSystem = context.system, + workflowManagerActor = workflowManagerActor, + logCopyRouter = workflowLogCopyRouter, + jobStoreActor = jobStoreActor, + workflowStoreActor = workflowStoreActor, + subWorkflowStoreActor = subWorkflowStoreActor, + callCacheWriteActor = callCacheWriteActor, + ioActor = ioActor, + dockerHashActor = dockerHashActor, + serviceRegistryActor = serviceRegistryActor, + materializer = materializer + ) + } else if (abortJobsOnTerminate) { + // If gracefulShutdown is false but abortJobsOnTerminate is true, set up a classic JVM shutdown hook + sys.addShutdownHook { + Try(Await.result(gracefulStop(workflowManagerActor, AbortTimeout, AbortAllWorkflowsCommand), AbortTimeout)) match { + case Success(_) => logger.info("All workflows aborted") + case Failure(f) => logger.error("Failed to abort workflows", f) + } + } + } + override def receive = { - case _ => logger.error("CromwellRootActor is receiving a message. It prefers to be left alone!") + case message => logger.error(s"Unknown message received by CromwellRootActor: $message") } /** @@ -62,13 +159,16 @@ import lenthall.config.ScalaConfig.EnhancedScalaConfig * of Cromwell by passing a Throwable to the guardian. */ override val supervisorStrategy = OneForOneStrategy() { - case actorInitializationException: ActorInitializationException => throw new RuntimeException( - s"Unable to create actor for ActorRef ${actorInitializationException.getActor}", - actorInitializationException.getCause) + case _: ActorInitializationException => Escalate + case _: ActorRestartException => Restart case t => super.supervisorStrategy.decider.applyOrElse(t, (_: Any) => Escalate) } } -object CromwellRootActor { +object CromwellRootActor extends GracefulStopSupport { + import net.ceedubs.ficus.Ficus._ + val AbortTimeout = ConfigFactory.load().as[FiniteDuration]("akka.coordinated-shutdown.phases.abort-all-workflows.timeout") val DefaultNumberOfWorkflowLogCopyWorkers = 10 + val DefaultCacheTTL = 20 minutes + val DefaultNumberOfCacheReadWorkers = 25 } diff --git a/engine/src/main/scala/cromwell/server/CromwellServer.scala b/engine/src/main/scala/cromwell/server/CromwellServer.scala index c3772f3f7..c060dce86 100644 --- a/engine/src/main/scala/cromwell/server/CromwellServer.scala +++ b/engine/src/main/scala/cromwell/server/CromwellServer.scala @@ -1,70 +1,66 @@ package cromwell.server -import java.util.concurrent.TimeoutException +import akka.actor.{ActorContext, ActorLogging, Props} +import akka.http.scaladsl.Http +import akka.http.scaladsl.server.Directives._ +import akka.http.scaladsl.server.Route +import akka.stream.ActorMaterializer +import cromwell.core.Dispatcher.EngineDispatcher +import cromwell.webservice.{CromwellApiService, SwaggerService} -import akka.actor.Props -import akka.util.Timeout -import com.typesafe.config.{Config, ConfigFactory} -import cromwell.services.ServiceRegistryActor -import cromwell.webservice.{APIResponse, CromwellApiService, SwaggerService} -import lenthall.spray.SprayCanHttpService._ -import spray.http.HttpHeaders.`Content-Type` -import spray.http.MediaTypes._ -import spray.http.{ContentType, MediaTypes, _} -import lenthall.spray.WrappedRoute._ -import lenthall.config.ScalaConfig._ -import cromwell.webservice.WorkflowJsonSupport._ -import spray.json._ - -import scala.concurrent.{Await, Future} -import scala.concurrent.duration._ +import scala.concurrent.Future import scala.util.{Failure, Success} // Note that as per the language specification, this is instantiated lazily and only used when necessary (i.e. server mode) object CromwellServer { - implicit val timeout = Timeout(5.seconds) - import scala.concurrent.ExecutionContext.Implicits.global + def run(gracefulShutdown: Boolean, abortJobsOnTerminate: Boolean)(cromwellSystem: CromwellSystem): Future[Any] = { + implicit val actorSystem = cromwellSystem.actorSystem + implicit val materializer = cromwellSystem.materializer + actorSystem.actorOf(CromwellServerActor.props(cromwellSystem, gracefulShutdown, abortJobsOnTerminate), "cromwell-service") + actorSystem.whenTerminated + } +} +class CromwellServerActor(cromwellSystem: CromwellSystem, gracefulShutdown: Boolean, abortJobsOnTerminate: Boolean)(override implicit val materializer: ActorMaterializer) + extends CromwellRootActor(gracefulShutdown, abortJobsOnTerminate) + with CromwellApiService + with SwaggerService + with ActorLogging { + implicit val actorSystem = context.system + override implicit val ec = context.dispatcher + override def actorRefFactory: ActorContext = context - def run(cromwellSystem: CromwellSystem): Future[Any] = { - implicit val actorSystem = cromwellSystem.actorSystem + override val serverMode = true + + val webserviceConf = cromwellSystem.conf.getConfig("webservice") + val interface = webserviceConf.getString("interface") + val port = webserviceConf.getInt("port") + + /** + * /api routes have special meaning to devops' proxy servers. NOTE: the oauth mentioned on the /api endpoints in + * cromwell.yaml is broken unless the swagger index.html is patched. Copy/paste the code from rawls or cromiam if + * actual cromwell+swagger+oauth+/api support is needed. + */ + val apiRoutes: Route = pathPrefix("api")(concat(workflowRoutes)) + val nonApiRoutes: Route = concat(engineRoutes, swaggerUiResourceRoute) + val allRoutes: Route = concat(apiRoutes, nonApiRoutes) - val service = actorSystem.actorOf(CromwellServerActor.props(cromwellSystem.conf), "cromwell-service") - val webserviceConf = cromwellSystem.conf.getConfig("webservice") + val serverBinding = Http().bindAndHandle(allRoutes, interface, port) - val interface = webserviceConf.getString("interface") - val port = webserviceConf.getInt("port") - val futureBind = service.bind(interface = interface, port = port) - futureBind andThen { - case Success(_) => - actorSystem.log.info("Cromwell service started...") - Await.result(actorSystem.whenTerminated, Duration.Inf) - case Failure(throwable) => - /* + CromwellShutdown.registerUnbindTask(actorSystem, serverBinding) + + serverBinding onComplete { + case Success(_) => actorSystem.log.info("Cromwell service started...") + case Failure(e) => + /* TODO: If/when CromwellServer behaves like a better async citizen, we may be less paranoid about our async log messages not appearing due to the actor system shutdown. For now, synchronously print to the stderr so that the user has some idea of why the server failed to start up. - */ - Console.err.println(s"Binding failed interface $interface port $port") - throwable.printStackTrace(Console.err) - cromwellSystem.shutdownActorSystem() - } - } -} - -class CromwellServerActor(config: Config) extends CromwellRootActor with CromwellApiService with SwaggerService { - implicit def executionContext = actorRefFactory.dispatcher - - override def actorRefFactory = context - override def receive = handleTimeouts orElse runRoute(possibleRoutes) - - val possibleRoutes = workflowRoutes.wrapped("api", config.getBooleanOr("api.routeUnwrapped")) ~ swaggerUiResourceRoute - val timeoutError = APIResponse.error(new TimeoutException("The server was not able to produce a timely response to your request.")).toJson.prettyPrint - - def handleTimeouts: Receive = { - case Timedout(_: HttpRequest) => - sender() ! HttpResponse(StatusCodes.InternalServerError, HttpEntity(ContentType(MediaTypes.`application/json`), timeoutError)) + */ + Console.err.println(s"Binding failed interface $interface port $port") + e.printStackTrace(Console.err) + cromwellSystem.shutdownActorSystem() } /* @@ -75,7 +71,7 @@ class CromwellServerActor(config: Config) extends CromwellRootActor with Cromwel } object CromwellServerActor { - def props(config: Config): Props = { - Props(new CromwellServerActor(config)) + def props(cromwellSystem: CromwellSystem, gracefulShutdown: Boolean, abortJobsOnTerminate: Boolean)(implicit materializer: ActorMaterializer): Props = { + Props(new CromwellServerActor(cromwellSystem, gracefulShutdown, abortJobsOnTerminate)).withDispatcher(EngineDispatcher) } } diff --git a/engine/src/main/scala/cromwell/server/CromwellShutdown.scala b/engine/src/main/scala/cromwell/server/CromwellShutdown.scala new file mode 100644 index 000000000..bbb11193a --- /dev/null +++ b/engine/src/main/scala/cromwell/server/CromwellShutdown.scala @@ -0,0 +1,208 @@ +package cromwell.server + +import java.util.concurrent.atomic.AtomicBoolean + +import akka.Done +import akka.actor.{ActorRef, ActorSystem, CoordinatedShutdown, _} +import akka.http.scaladsl.Http +import akka.pattern.{AskTimeoutException, GracefulStopSupport} +import akka.routing.Broadcast +import akka.stream.ActorMaterializer +import akka.util.Timeout +import cats.instances.future._ +import cats.syntax.functor._ +import cromwell.engine.workflow.WorkflowManagerActor.{AbortAllWorkflowsCommand, PreventNewWorkflowsFromStarting} +import cromwell.services.SingletonServicesStore +import cromwell.util.GracefulShutdownHelper.ShutdownCommand +import org.slf4j.LoggerFactory + +import scala.concurrent.duration._ +import scala.concurrent.{ExecutionContext, Future} +import scala.util.{Failure, Success} + +/** + * Collection of methods and objects used to control Cromwell graceful shutdown process. + */ +object CromwellShutdown extends GracefulStopSupport { + private val logger = LoggerFactory.getLogger("CromwellShutdown") + + // Includes DB writing actors, I/O Actor and DockerHashActor + private val PhaseStopIoActivity = "stop-io-activity" + // Shutdown phase allocated when "abort-jobs-on-terminate" is true to give time to the system to abort all workflows + // This phase is at the same level as PhaseServiceRequestsDone in the dependency graph. + private val PhaseAbortAllWorkflows = "abort-all-workflows" + + private val _shutdownInProgress: AtomicBoolean = new AtomicBoolean(false) + private var _coordinatedShutdown: Option[CoordinatedShutdown] = None + + /** + * Single instance of CoordinatedShutdown. Assumes only one ActorSystem. + */ + def instance(implicit actorSystem: ActorSystem): CoordinatedShutdown = synchronized { + _coordinatedShutdown match { + case Some(v) => v + case None => + val coordinatedShutdown = CoordinatedShutdown(actorSystem) + _coordinatedShutdown = Option(coordinatedShutdown) + coordinatedShutdown + } + } + + /** + * Returns true of the coordinated shutdown process is in progress. False otherwise. + */ + def shutdownInProgress(): Boolean = _shutdownInProgress.get() + + /** + * Register a task to unbind from the port during the ServiceUnbind phase. + */ + def registerUnbindTask(actorSystem: ActorSystem, serverBinding: Future[Http.ServerBinding]) = { + instance(actorSystem).addTask(CoordinatedShutdown.PhaseServiceUnbind, "UnbindingServerPort") { () => + // At this point it's still safe to schedule work on the actor system's dispatcher + implicit val ec = actorSystem.dispatcher + for { + binding <- serverBinding + _ <- binding.unbind() + _ = logger.info("Http server unbound.") + } yield Done + } + } + + /** + * Register tasks on the coordinated shutdown instance allowing a controlled, ordered shutdown process + * meant to prevent data loss and ensure consistency. + * Calling this method will add a JVM shutdown hook. + */ + def registerShutdownTasks( + abortJobsOnTerminate: Boolean, + actorSystem: ActorSystem, + workflowManagerActor: ActorRef, + logCopyRouter: ActorRef, + jobStoreActor: ActorRef, + workflowStoreActor: ActorRef, + subWorkflowStoreActor: ActorRef, + callCacheWriteActor: ActorRef, + ioActor: ActorRef, + dockerHashActor: ActorRef, + serviceRegistryActor: ActorRef, + materializer: ActorMaterializer + ): Unit = { + + val coordinatedShutdown = this.instance(actorSystem) + + def shutdownActor(actor: ActorRef, + phase: String, + message: AnyRef, + customTimeout: Option[FiniteDuration] = None)(implicit executionContext: ExecutionContext) = { + coordinatedShutdown.addTask(phase, s"stop${actor.path.name.capitalize}") { () => + val action = gracefulStop(actor, customTimeout.getOrElse(coordinatedShutdown.timeout(phase)), message) + + action onComplete { + case Success(_) => logger.info(s"${actor.path.name} stopped") + case Failure(_: AskTimeoutException) => + logger.error(s"Timed out trying to gracefully stop ${actor.path.name}. Forcefully stopping it.") + actorSystem.stop(actor) + case Failure(f) => logger.error(s"An error occurred trying to gracefully stop ${actor.path.name}.", f) + } + + action map { _ => Done } + } + } + + implicit val ec = actorSystem.dispatcher + + // 1) Stop starting new workflows. This will stop the WMA from sending pull messages to the WorkflowStore + coordinatedShutdown.addTask(CoordinatedShutdown.PhaseBeforeServiceUnbind, "stopWorkflowPulling") { () => + import akka.pattern.ask + _shutdownInProgress.set(true) + implicit val timeout = Timeout(coordinatedShutdown.timeout(CoordinatedShutdown.PhaseBeforeServiceUnbind)) + (workflowManagerActor ? PreventNewWorkflowsFromStarting) map { _ => + logger.info("Workflow polling stopped") + Done + } + } + + /* 2) The socket is unbound from the port in the CoordinatedShutdown.PhaseServiceUnbind + * See cromwell.engine.server.CromwellServer + */ + + /* 3) Finish processing all requests: + * - Stop the WorkflowStore: The port is not bound anymore so we can't have new submissions. + * Process what's left in the message queue and stop. + * Note that it's possible that some submissions are still asynchronously being prepared at the + * akka http API layer (CromwellApiService) to be sent to the WorkflowStore. + * Those submissions might be lost if the WorkflowStore shuts itself down when it's finished processing its current work. + * In that case the "ask" over in the CromwellAPIService will fail with a AskTimeoutException and should be handled appropriately. + * This process still ensures that no submission can make it to the database without a response being sent back to the client. + * + * - Stop WorkflowManagerActor: We've already stopped starting new workflows but the Running workflows are still + * going. This is tricky because all the actor hierarchy under the WMA can be in a variety of state combinations. + * Specifically there is an asynchronous gap in several cases between emission of messages towards engine level + * actors (job store, cache store, etc...) and emission of messages towards the metadata service for the same logical + * event (e.g: job complete). + * The current behavior upon restart however is to re-play the graph, skipping execution of completed jobs (determined by + * engine job store) but still re-submitting all related metadata events. This is likely sub-optimal, but is used here + * to simply stop the WMA (which will trigger all its descendants to be stopped recursively) without more coordination. + * Indeed even if the actor is stopped in between the above mentioned gap, metadata will be re-submitted anyway on restart, + * even for completed jobs. + * + * - Stop the LogCopyRouter: it can generate metadata events and must therefore be stopped before the service registry. + * Wrap the ShutdownCommand in a Broadcast message so the router forwards it to all its routees + * Use the ShutdownCommand because a PoisonPill could stop the routees in the middle of "transaction" + * with the IoActor. The routees handle the ShutdownCommand properly and shutdown only when they have + * no outstanding requests to the IoActor. When all routees are dead the router automatically stops itself. + */ + shutdownActor(workflowStoreActor, CoordinatedShutdown.PhaseServiceRequestsDone, ShutdownCommand) + shutdownActor(logCopyRouter, CoordinatedShutdown.PhaseServiceRequestsDone, Broadcast(ShutdownCommand)) + + /* + * Aborting is only a special case of shutdown. Instead of sending a PoisonPill, send a AbortAllWorkflowsCommand + * Also attach this task to a special shutdown phase allowing for a longer timeout. + */ + if (abortJobsOnTerminate) { + val abortTimeout = coordinatedShutdown.timeout(PhaseAbortAllWorkflows) + shutdownActor(workflowManagerActor, PhaseAbortAllWorkflows, AbortAllWorkflowsCommand, Option(abortTimeout)) + } else { + // This is a pretty rough shutdown of the WMA, depending on which route we go (full let it crash, or more fine grained + // state management) we might want to revisit this way of terminating the WMA and its descendants. + shutdownActor(workflowManagerActor, CoordinatedShutdown.PhaseServiceRequestsDone, PoisonPill) + } + + /* 4) Shutdown connection pools + * This will close all akka http opened connection pools tied to the actor system. + * The pools stop accepting new work but are given a chance to execute the work submitted prior to the shutdown call. + * When this future returns, all outstanding connections to client will be terminated. + * Note that this also includes connection pools like the one used to lookup docker hashes. + */ + coordinatedShutdown.addTask(CoordinatedShutdown.PhaseServiceStop, "TerminatingConnections") { () => + Http(actorSystem).shutdownAllConnectionPools() as { + logger.info("Connection pools shut down") + Done + } + } + + /* 5) Stop system level actors that require writing to the database or I/O + * - SubWorkflowStoreActor + * - JobStoreActor + * - CallCacheWriteActor + * - ServiceRegistryActor + * - DockerHashActor + * - IoActor + */ + List(subWorkflowStoreActor, jobStoreActor, callCacheWriteActor, serviceRegistryActor, dockerHashActor, ioActor) foreach { + shutdownActor(_, PhaseStopIoActivity, ShutdownCommand) + } + + // 6) Close database and stream materializer + coordinatedShutdown.addTask(CoordinatedShutdown.PhaseBeforeActorSystemTerminate, "closeDatabase") { () => + SingletonServicesStore.databaseInterface.close() + logger.info("Database closed") + Future.successful(Done) + } + coordinatedShutdown.addTask(CoordinatedShutdown.PhaseBeforeActorSystemTerminate, "shutdownMaterializer") { () => + materializer.shutdown() + logger.info("Stream materializer shut down") + Future.successful(Done) + } + } +} diff --git a/engine/src/main/scala/cromwell/server/CromwellSystem.scala b/engine/src/main/scala/cromwell/server/CromwellSystem.scala index d125b8c9c..862c53d7d 100644 --- a/engine/src/main/scala/cromwell/server/CromwellSystem.scala +++ b/engine/src/main/scala/cromwell/server/CromwellSystem.scala @@ -1,18 +1,47 @@ package cromwell.server -import akka.actor.ActorSystem +import akka.actor.{ActorSystem, Terminated} +import akka.http.scaladsl.Http +import akka.stream.ActorMaterializer import com.typesafe.config.ConfigFactory import cromwell.engine.backend.{BackendConfiguration, CromwellBackends} -import org.slf4j.LoggerFactory +import cromwell.services.SingletonServicesStore + +import scala.concurrent.Future trait CromwellSystem { + /* + Initialize the services-store (aka a collection of DAOs), if they haven't been already, before starting any services + such as Metadata refresh and especially HTTP server binding. Some services like HTTP binding terminate the app when + they failed to start, while the DAOs were still in the middle of non-atomic transaction such as a database upgrade. + + This is the closest we have to: + - https://github.com/broadinstitute/firecloud-orchestration/blob/3c9482b/DEVNOTES.md + - https://github.com/broadinstitute/clio/blob/7253ec0/clio-server/src/main/scala/org/broadinstitute/clio/server/service/ServerService.scala#L58-L66 + - https://en.wikipedia.org/wiki/Data_access_object + */ + SingletonServicesStore + protected def systemName = "cromwell-system" - protected def newActorSystem(): ActorSystem = ActorSystem(systemName) val conf = ConfigFactory.load() - val logger = LoggerFactory.getLogger(getClass.getName) + + protected def newActorSystem(): ActorSystem = ActorSystem(systemName, conf) implicit final lazy val actorSystem = newActorSystem() + implicit final lazy val materializer = ActorMaterializer() + implicit private final lazy val ec = actorSystem.dispatcher - def shutdownActorSystem(): Unit = actorSystem.terminate() + def shutdownActorSystem(): Future[Terminated] = { + Http().shutdownAllConnectionPools() flatMap { _ => + shutdownMaterializerAndActorSystem() + } recoverWith { + case _ => shutdownMaterializerAndActorSystem() + } + } + + private def shutdownMaterializerAndActorSystem() = { + materializer.shutdown() + actorSystem.terminate() + } CromwellBackends.initBackends(BackendConfiguration.AllBackendEntries) } diff --git a/engine/src/main/scala/cromwell/subworkflowstore/EmptySubWorkflowStoreActor.scala b/engine/src/main/scala/cromwell/subworkflowstore/EmptySubWorkflowStoreActor.scala new file mode 100644 index 000000000..29b545f70 --- /dev/null +++ b/engine/src/main/scala/cromwell/subworkflowstore/EmptySubWorkflowStoreActor.scala @@ -0,0 +1,18 @@ +package cromwell.subworkflowstore + +import akka.actor.{Actor, ActorLogging, Props} +import cromwell.subworkflowstore.SubWorkflowStoreActor._ +import cromwell.core.Dispatcher.EngineDispatcher + +class EmptySubWorkflowStoreActor extends Actor with ActorLogging { + override def receive: Receive = { + case register: RegisterSubWorkflow => sender() ! SubWorkflowStoreRegisterSuccess(register) + case query: QuerySubWorkflow => sender() ! SubWorkflowNotFound(query) + case _: WorkflowComplete => // No-op! + case unknown => log.error(s"SubWorkflowStoreActor received unknown message: $unknown") + } +} + +object EmptySubWorkflowStoreActor { + def props: Props = Props(new EmptySubWorkflowStoreActor()).withDispatcher(EngineDispatcher) +} diff --git a/engine/src/main/scala/cromwell/subworkflowstore/SqlSubWorkflowStore.scala b/engine/src/main/scala/cromwell/subworkflowstore/SqlSubWorkflowStore.scala new file mode 100644 index 000000000..64f21275f --- /dev/null +++ b/engine/src/main/scala/cromwell/subworkflowstore/SqlSubWorkflowStore.scala @@ -0,0 +1,31 @@ +package cromwell.subworkflowstore +import cromwell.database.sql.SubWorkflowStoreSqlDatabase +import cromwell.database.sql.tables.SubWorkflowStoreEntry + +import scala.concurrent.{ExecutionContext, Future} + +class SqlSubWorkflowStore(subWorkflowStoreSqlDatabase: SubWorkflowStoreSqlDatabase) extends SubWorkflowStore { + override def addSubWorkflowStoreEntry(rootWorkflowExecutionUuid: String, + parentWorkflowExecutionUuid: String, + callFullyQualifiedName: String, + jobIndex: Int, + jobAttempt: Int, + subWorkflowExecutionUuid: String)(implicit ec: ExecutionContext): Future[Unit] = { + subWorkflowStoreSqlDatabase.addSubWorkflowStoreEntry( + rootWorkflowExecutionUuid, + parentWorkflowExecutionUuid, + callFullyQualifiedName, + jobIndex, + jobAttempt, + subWorkflowExecutionUuid + ) + } + + override def querySubWorkflowStore(parentWorkflowExecutionUuid: String, callFqn: String, jobIndex: Int, jobAttempt: Int)(implicit ec: ExecutionContext): Future[Option[SubWorkflowStoreEntry]] = { + subWorkflowStoreSqlDatabase.querySubWorkflowStore(parentWorkflowExecutionUuid, callFqn, jobIndex, jobAttempt) + } + + override def removeSubWorkflowStoreEntries(parentWorkflowExecutionUuid: String)(implicit ec: ExecutionContext): Future[Int] = { + subWorkflowStoreSqlDatabase.removeSubWorkflowStoreEntries(parentWorkflowExecutionUuid) + } +} diff --git a/engine/src/main/scala/cromwell/subworkflowstore/SubWorkflowStore.scala b/engine/src/main/scala/cromwell/subworkflowstore/SubWorkflowStore.scala new file mode 100644 index 000000000..8ad92fa9b --- /dev/null +++ b/engine/src/main/scala/cromwell/subworkflowstore/SubWorkflowStore.scala @@ -0,0 +1,19 @@ +package cromwell.subworkflowstore + +import cromwell.database.sql.tables.SubWorkflowStoreEntry + +import scala.concurrent.{ExecutionContext, Future} + +trait SubWorkflowStore { + def addSubWorkflowStoreEntry(rootWorkflowExecutionUuid: String, + parentWorkflowExecutionUuid: String, + callFullyQualifiedName: String, + jobIndex: Int, + jobAttempt: Int, + subWorkflowExecutionUuid: String)(implicit ec: ExecutionContext): Future[Unit] + + def querySubWorkflowStore(parentWorkflowExecutionUuid: String, callFqn: String, jobIndex: Int, jobAttempt: Int) + (implicit ec: ExecutionContext): Future[Option[SubWorkflowStoreEntry]] + + def removeSubWorkflowStoreEntries(parentWorkflowExecutionUuid: String)(implicit ec: ExecutionContext): Future[Int] +} diff --git a/engine/src/main/scala/cromwell/subworkflowstore/SubWorkflowStoreActor.scala b/engine/src/main/scala/cromwell/subworkflowstore/SubWorkflowStoreActor.scala new file mode 100644 index 000000000..c5d76c0b8 --- /dev/null +++ b/engine/src/main/scala/cromwell/subworkflowstore/SubWorkflowStoreActor.scala @@ -0,0 +1,81 @@ +package cromwell.subworkflowstore + +import akka.actor.{Actor, ActorLogging, ActorRef, Props} +import cromwell.core.Dispatcher.EngineDispatcher +import cromwell.core.ExecutionIndex._ +import cromwell.core.{JobKey, MonitoringCompanionHelper, WorkflowId} +import cromwell.database.sql.tables.SubWorkflowStoreEntry +import cromwell.subworkflowstore.SubWorkflowStoreActor._ + +import scala.concurrent.ExecutionContext +import scala.util.{Failure, Success} + +class SubWorkflowStoreActor(database: SubWorkflowStore) extends Actor with ActorLogging with MonitoringCompanionHelper { + + implicit val ec: ExecutionContext = context.dispatcher + + val subWorkflowStoreReceive: Receive = { + case register: RegisterSubWorkflow => registerSubWorkflow(sender(), register) + case query: QuerySubWorkflow => querySubWorkflow(sender(), query) + case complete: WorkflowComplete => workflowComplete(sender(), complete) + } + + override def receive = subWorkflowStoreReceive.orElse(monitoringReceive) + + private def registerSubWorkflow(replyTo: ActorRef, command: RegisterSubWorkflow) = { + addWork() + database.addSubWorkflowStoreEntry( + command.rootWorkflowExecutionUuid.toString, + command.parentWorkflowExecutionUuid.toString, + command.jobKey.scope.fullyQualifiedName, + command.jobKey.index.fromIndex, + command.jobKey.attempt, + command.subWorkflowExecutionUuid.toString + ) onComplete { + case Success(_) => + replyTo ! SubWorkflowStoreRegisterSuccess(command) + removeWork() + case Failure(ex) => + replyTo ! SubWorkflowStoreFailure(command, ex) + removeWork() + } + } + + private def querySubWorkflow(replyTo: ActorRef, command: QuerySubWorkflow) = { + val jobKey = command.jobKey + database.querySubWorkflowStore(command.parentWorkflowExecutionUuid.toString, jobKey.scope.fullyQualifiedName, jobKey.index.fromIndex, jobKey.attempt) onComplete { + case Success(Some(result)) => replyTo ! SubWorkflowFound(result) + case Success(None) => replyTo ! SubWorkflowNotFound(command) + case Failure(ex) => replyTo ! SubWorkflowStoreFailure(command, ex) + } + } + + private def workflowComplete(replyTo: ActorRef, command: WorkflowComplete) = { + addWork() + database.removeSubWorkflowStoreEntries(command.workflowExecutionUuid.toString) onComplete { + case Success(_) => removeWork() + case Failure(ex) => + replyTo ! SubWorkflowStoreFailure(command, ex) + removeWork() + } + } + +} + +object SubWorkflowStoreActor { + sealed trait SubWorkflowStoreActorCommand + case class RegisterSubWorkflow(rootWorkflowExecutionUuid: WorkflowId, parentWorkflowExecutionUuid: WorkflowId, jobKey: JobKey, subWorkflowExecutionUuid: WorkflowId) extends SubWorkflowStoreActorCommand + case class QuerySubWorkflow(parentWorkflowExecutionUuid: WorkflowId, jobKey: JobKey) extends SubWorkflowStoreActorCommand + case class WorkflowComplete(workflowExecutionUuid: WorkflowId) extends SubWorkflowStoreActorCommand + + sealed trait SubWorkflowStoreActorResponse + case class SubWorkflowStoreRegisterSuccess(command: RegisterSubWorkflow) extends SubWorkflowStoreActorResponse + case class SubWorkflowFound(subWorkflowStoreEntry: SubWorkflowStoreEntry) extends SubWorkflowStoreActorResponse + case class SubWorkflowNotFound(command: QuerySubWorkflow) extends SubWorkflowStoreActorResponse + + case class SubWorkflowStoreFailure(command: SubWorkflowStoreActorCommand, failure: Throwable) extends SubWorkflowStoreActorResponse + + def props(database: SubWorkflowStore) = Props( + new SubWorkflowStoreActor(database) + ).withDispatcher(EngineDispatcher) +} diff --git a/engine/src/main/scala/cromwell/webservice/ApiDataModels.scala b/engine/src/main/scala/cromwell/webservice/ApiDataModels.scala index c910ecce9..759abe62d 100644 --- a/engine/src/main/scala/cromwell/webservice/ApiDataModels.scala +++ b/engine/src/main/scala/cromwell/webservice/ApiDataModels.scala @@ -1,10 +1,10 @@ package cromwell.webservice +import lenthall.exception.MessageAggregation import spray.json._ -import wdl4s.values.WdlValue -import wdl4s.{FullyQualifiedName, ExceptionWithErrors} +import wdl4s.wdl.FullyQualifiedName +import wdl4s.wdl.values.WdlValue -import scala.language.postfixOps case class WorkflowStatusResponse(id: String, status: String) @@ -19,14 +19,12 @@ case class CallOutputResponse(id: String, callFqn: String, outputs: Map[FullyQua case class WorkflowMetadataQueryParameters(outputs: Boolean = true, timings: Boolean = true) object APIResponse { - import WorkflowJsonSupport._ - import spray.httpx.SprayJsonSupport._ private def constructFailureResponse(status: String, ex: Throwable) ={ ex match { - case exceptionWithErrors: ExceptionWithErrors => - FailureResponse(status, exceptionWithErrors.message, - Option(JsArray(exceptionWithErrors.errors.list.toList.map(JsString(_)).toVector))) + case exceptionWithErrors: MessageAggregation => + FailureResponse(status, exceptionWithErrors.getMessage, + Option(JsArray(exceptionWithErrors.errorMessages.toList.map(JsString(_)).toVector))) case e: Throwable => FailureResponse(status, e.getMessage, None) } } diff --git a/engine/src/main/scala/cromwell/webservice/CromwellApiHandler.scala b/engine/src/main/scala/cromwell/webservice/CromwellApiHandler.scala deleted file mode 100644 index d7afa5834..000000000 --- a/engine/src/main/scala/cromwell/webservice/CromwellApiHandler.scala +++ /dev/null @@ -1,76 +0,0 @@ -package cromwell.webservice - -import akka.actor.{Actor, ActorRef, Props} -import akka.event.Logging -import com.typesafe.config.ConfigFactory -import cromwell.core._ -import cromwell.engine.workflow.WorkflowManagerActor -import cromwell.engine.workflow.WorkflowManagerActor.WorkflowNotFoundException -import cromwell.engine.workflow.workflowstore.WorkflowStoreActor -import cromwell.webservice.PerRequest.RequestComplete -import cromwell.webservice.metadata.WorkflowQueryPagination -import spray.http.{StatusCodes, Uri} -import spray.httpx.SprayJsonSupport._ - -import scala.language.postfixOps -import scalaz.NonEmptyList - -object CromwellApiHandler { - def props(requestHandlerActor: ActorRef): Props = { - Props(new CromwellApiHandler(requestHandlerActor)) - } - - sealed trait ApiHandlerMessage - - final case class ApiHandlerWorkflowSubmit(source: WorkflowSourceFiles) extends ApiHandlerMessage - final case class ApiHandlerWorkflowSubmitBatch(sources: NonEmptyList[WorkflowSourceFiles]) extends ApiHandlerMessage - final case class ApiHandlerWorkflowQuery(uri: Uri, parameters: Seq[(String, String)]) extends ApiHandlerMessage - final case class ApiHandlerWorkflowStatus(id: WorkflowId) extends ApiHandlerMessage - final case class ApiHandlerWorkflowOutputs(id: WorkflowId) extends ApiHandlerMessage - final case class ApiHandlerWorkflowAbort(id: WorkflowId, manager: ActorRef) extends ApiHandlerMessage - final case class ApiHandlerCallOutputs(id: WorkflowId, callFqn: String) extends ApiHandlerMessage - final case class ApiHandlerCallStdoutStderr(id: WorkflowId, callFqn: String) extends ApiHandlerMessage - final case class ApiHandlerWorkflowStdoutStderr(id: WorkflowId) extends ApiHandlerMessage - final case class ApiHandlerCallCaching(id: WorkflowId, parameters: QueryParameters, callName: Option[String]) extends ApiHandlerMessage - case object ApiHandlerEngineStats extends ApiHandlerMessage -} - -class CromwellApiHandler(requestHandlerActor: ActorRef) extends Actor with WorkflowQueryPagination { - import CromwellApiHandler._ - import WorkflowJsonSupport._ - - val log = Logging(context.system, classOf[CromwellApiHandler]) - val conf = ConfigFactory.load() - - def callNotFound(callFqn: String, id: WorkflowId) = { - RequestComplete(StatusCodes.NotFound, APIResponse.error( - new RuntimeException(s"Call $callFqn not found for workflow '$id'."))) - } - - private def error(t: Throwable)(f: Throwable => RequestComplete[_]): Unit = context.parent ! f(t) - - override def receive = { - case ApiHandlerEngineStats => requestHandlerActor ! WorkflowManagerActor.EngineStatsCommand - case stats: EngineStatsActor.EngineStats => context.parent ! RequestComplete(StatusCodes.OK, stats) - case ApiHandlerWorkflowAbort(id, manager) => requestHandlerActor ! WorkflowStoreActor.AbortWorkflow(id, manager) - case WorkflowStoreActor.WorkflowAborted(id) => - context.parent ! RequestComplete(StatusCodes.OK, WorkflowAbortResponse(id.toString, WorkflowAborted.toString)) - case WorkflowStoreActor.WorkflowAbortFailed(_, e) => - error(e) { - case _: IllegalStateException => RequestComplete(StatusCodes.Forbidden, APIResponse.error(e)) - case _: WorkflowNotFoundException => RequestComplete(StatusCodes.NotFound, APIResponse.error(e)) - case _ => RequestComplete(StatusCodes.InternalServerError, APIResponse.error(e)) - } - - case ApiHandlerWorkflowSubmit(source) => requestHandlerActor ! WorkflowStoreActor.SubmitWorkflow(source) - - case WorkflowStoreActor.WorkflowSubmittedToStore(id) => - context.parent ! RequestComplete(StatusCodes.Created, WorkflowSubmitResponse(id.toString, WorkflowSubmitted.toString)) - - case ApiHandlerWorkflowSubmitBatch(sources) => requestHandlerActor ! WorkflowStoreActor.BatchSubmitWorkflows(sources) - - case WorkflowStoreActor.WorkflowsBatchSubmittedToStore(ids) => - val responses = ids map { id => WorkflowSubmitResponse(id.toString, WorkflowSubmitted.toString) } - context.parent ! RequestComplete(StatusCodes.OK, responses.list.toList) - } -} diff --git a/engine/src/main/scala/cromwell/webservice/CromwellApiService.scala b/engine/src/main/scala/cromwell/webservice/CromwellApiService.scala index 170ed3020..754f5d793 100644 --- a/engine/src/main/scala/cromwell/webservice/CromwellApiService.scala +++ b/engine/src/main/scala/cromwell/webservice/CromwellApiService.scala @@ -1,207 +1,300 @@ package cromwell.webservice -import akka.actor._ -import cromwell.core.{WorkflowId, WorkflowSourceFiles} +import java.util.UUID + +import akka.actor.{ActorRef, ActorRefFactory} +import akka.http.scaladsl.server.Directives._ + +import scala.concurrent.{ExecutionContext, Future} +import akka.http.scaladsl.model._ +import akka.http.scaladsl.model.Multipart.BodyPart +import akka.stream.ActorMaterializer import cromwell.engine.backend.BackendConfiguration +import akka.http.scaladsl.marshallers.sprayjson.SprayJsonSupport._ +import cromwell.core.{WorkflowAborted, WorkflowId, WorkflowSubmitted} +import cromwell.core.Dispatcher.ApiDispatcher +import cromwell.engine.workflow.workflowstore.{WorkflowStoreActor, WorkflowStoreEngineActor, WorkflowStoreSubmitActor} +import akka.pattern.{AskTimeoutException, ask} +import akka.util.{ByteString, Timeout} +import net.ceedubs.ficus.Ficus._ +import cromwell.engine.workflow.WorkflowManagerActor import cromwell.services.metadata.MetadataService._ -import cromwell.webservice.WorkflowJsonSupport._ -import cromwell.webservice.metadata.MetadataBuilderActor -import lenthall.spray.SwaggerUiResourceHttpService -import spray.http.MediaTypes._ -import spray.http._ -import spray.httpx.SprayJsonSupport._ -import spray.json._ -import spray.routing._ +import cromwell.webservice.metadata.{MetadataBuilderActor, WorkflowQueryPagination} +import cromwell.webservice.metadata.MetadataBuilderActor.{BuiltMetadataResponse, FailedMetadataResponse, MetadataBuilderActorResponse} +import WorkflowJsonSupport._ +import akka.http.scaladsl.server.Route +import cats.data.NonEmptyList +import cats.data.Validated.{Invalid, Valid} +import com.typesafe.config.ConfigFactory +import cromwell.core.labels.Labels +import cromwell.engine.workflow.WorkflowManagerActor.WorkflowNotFoundException +import cromwell.engine.workflow.lifecycle.execution.callcaching.CallCacheDiffActor.{BuiltCallCacheDiffResponse, CachedCallNotFoundException, CallCacheDiffActorResponse, FailedCallCacheDiffResponse} +import cromwell.engine.workflow.lifecycle.execution.callcaching.{CallCacheDiffActor, CallCacheDiffQueryParameter} +import cromwell.engine.workflow.workflowstore.WorkflowStoreEngineActor.WorkflowStoreEngineAbortResponse +import cromwell.server.CromwellShutdown +import cromwell.webservice.LabelsManagerActor._ +import lenthall.exception.AggregatedMessageException +import spray.json.JsObject -import scalaz.NonEmptyList +import scala.concurrent.duration._ +import scala.util.{Failure, Success, Try} -trait SwaggerService extends SwaggerUiResourceHttpService { - override def swaggerServiceName = "cromwell" +trait CromwellApiService { + import cromwell.webservice.CromwellApiService._ - override def swaggerUiVersion = "2.1.1" -} + implicit def actorRefFactory: ActorRefFactory + implicit val materializer: ActorMaterializer + implicit val ec: ExecutionContext -trait CromwellApiService extends HttpService with PerRequestCreator { - val workflowManagerActor: ActorRef val workflowStoreActor: ActorRef + val workflowManagerActor: ActorRef val serviceRegistryActor: ActorRef - def metadataBuilderProps: Props = MetadataBuilderActor.props(serviceRegistryActor) - - def handleMetadataRequest(message: AnyRef): Route = { - requestContext => - perRequest(requestContext, metadataBuilderProps, message) - } - - private def failBadRequest(exception: Exception, statusCode: StatusCode = StatusCodes.BadRequest) = respondWithMediaType(`application/json`) { - complete(statusCode, APIResponse.fail(exception).toJson.prettyPrint) - } - - val workflowRoutes = queryRoute ~ queryPostRoute ~ workflowOutputsRoute ~ submitRoute ~ submitBatchRoute ~ - workflowLogsRoute ~ abortRoute ~ metadataRoute ~ timingRoute ~ statusRoute ~ backendRoute ~ statsRoute + // Derive timeouts (implicit and not) from akka http's request timeout since there's no point in being higher than that + implicit val duration = ConfigFactory.load().as[FiniteDuration]("akka.http.server.request-timeout") + implicit val timeout: Timeout = duration - private def withRecognizedWorkflowId(possibleWorkflowId: String)(recognizedWorkflowId: WorkflowId => Route): Route = { - def callback(requestContext: RequestContext) = new ValidationCallback { - // The submitted value is malformed as a UUID and therefore not possibly recognized. - override def onMalformed(possibleWorkflowId: String): Unit = { - val exception = new RuntimeException(s"Invalid workflow ID: '$possibleWorkflowId'.") - failBadRequest(exception)(requestContext) - } - - override def onUnrecognized(possibleWorkflowId: String): Unit = { - val exception = new RuntimeException(s"Unrecognized workflow ID: $possibleWorkflowId") - failBadRequest(exception, StatusCodes.NotFound)(requestContext) - } - - override def onFailure(possibleWorkflowId: String, throwable: Throwable): Unit = { - val exception = new RuntimeException(s"Failed lookup attempt for workflow ID $possibleWorkflowId", throwable) - failBadRequest(exception)(requestContext) + val engineRoutes = concat( + path("engine" / Segment / "stats") { version => + get { + onComplete(workflowManagerActor.ask(WorkflowManagerActor.EngineStatsCommand).mapTo[EngineStatsActor.EngineStats]) { + case Success(stats) => complete(stats) + case Failure(_) => new RuntimeException("Unable to gather engine stats").failRequest(StatusCodes.InternalServerError) + } } + }, - override def onRecognized(workflowId: WorkflowId): Unit = { - recognizedWorkflowId(workflowId)(requestContext) - } + path("engine" / Segment / "version") { version => + get { complete(versionResponse) } } + ) - requestContext => { - val message = ValidateWorkflowIdAndExecute(possibleWorkflowId, callback(requestContext)) - serviceRegistryActor ! message - } - } - - def statusRoute = + val workflowRoutes = + path("workflows" / Segment / "backends") { version => + get { complete(backendResponse) } + } ~ path("workflows" / Segment / Segment / "status") { (version, possibleWorkflowId) => + get { metadataBuilderRequest(possibleWorkflowId, (w: WorkflowId) => GetStatus(w)) } + } ~ + path("workflows" / Segment / Segment / "outputs") { (version, possibleWorkflowId) => + get { metadataBuilderRequest(possibleWorkflowId, (w: WorkflowId) => WorkflowOutputs(w)) } + } ~ + path("workflows" / Segment / Segment / "logs") { (version, possibleWorkflowId) => + get { metadataBuilderRequest(possibleWorkflowId, (w: WorkflowId) => GetLogs(w)) } + } ~ + path("workflows" / Segment / "query") { _ => get { - withRecognizedWorkflowId(possibleWorkflowId) { id => - handleMetadataRequest(GetStatus(id)) + parameterSeq { parameters => + extractUri { uri => + metadataQueryRequest(parameters, uri) + } + } + } ~ + post { + entity(as[Seq[Map[String, String]]]) { parameterMap => + extractUri { uri => + metadataQueryRequest(parameterMap.flatMap(_.toSeq), uri) + } } } - } + } ~ + encodeResponse { + path("workflows" / Segment / Segment / "metadata") { (version, possibleWorkflowId) => + parameters(('includeKey.*, 'excludeKey.*, 'expandSubWorkflows.as[Boolean].?)) { (includeKeys, excludeKeys, expandSubWorkflowsOption) => + val includeKeysOption = NonEmptyList.fromList(includeKeys.toList) + val excludeKeysOption = NonEmptyList.fromList(excludeKeys.toList) + val expandSubWorkflows = expandSubWorkflowsOption.getOrElse(false) - def queryRoute = - path("workflows" / Segment / "query") { version => + (includeKeysOption, excludeKeysOption) match { + case (Some(_), Some(_)) => + val e = new IllegalArgumentException("includeKey and excludeKey may not be specified together") + e.failRequest(StatusCodes.BadRequest) + case (_, _) => metadataBuilderRequest(possibleWorkflowId, (w: WorkflowId) => GetSingleWorkflowMetadataAction(w, includeKeysOption, excludeKeysOption, expandSubWorkflows)) + } + } + } + } ~ + path("workflows" / Segment / "callcaching" / "diff") { version => parameterSeq { parameters => get { - requestContext => - perRequest(requestContext, metadataBuilderProps, WorkflowQuery(requestContext.request.uri, parameters)) + CallCacheDiffQueryParameter.fromParameters(parameters) match { + case Valid(queryParameter) => + val diffActor = actorRefFactory.actorOf(CallCacheDiffActor.props(serviceRegistryActor), "CallCacheDiffActor-" + UUID.randomUUID()) + onComplete(diffActor.ask(queryParameter).mapTo[CallCacheDiffActorResponse]) { + case Success(r: BuiltCallCacheDiffResponse) => complete(r.response) + case Success(r: FailedCallCacheDiffResponse) => r.reason.errorRequest(StatusCodes.InternalServerError) + case Failure(_: AskTimeoutException) if CromwellShutdown.shutdownInProgress() => serviceShuttingDownResponse + case Failure(e: CachedCallNotFoundException) => e.errorRequest(StatusCodes.NotFound) + case Failure(e) => e.errorRequest(StatusCodes.InternalServerError) + } + case Invalid(errors) => + val e = AggregatedMessageException("Wrong parameters for call cache diff query", errors.toList) + e.errorRequest(StatusCodes.BadRequest) + } } } - } - - def queryPostRoute = - path("workflows" / Segment / "query") { version => - entity(as[Seq[Map[String, String]]]) { parameterMap => - post { - requestContext => - perRequest(requestContext, metadataBuilderProps, WorkflowQuery(requestContext.request.uri, parameterMap.flatMap(_.toSeq))) - } + } ~ + path("workflows" / Segment / Segment / "timing") { (version, possibleWorkflowId) => + onComplete(validateWorkflowId(possibleWorkflowId)) { + case Success(_) => getFromResource("workflowTimings/workflowTimings.html") + case Failure(e) => e.failRequest(StatusCodes.InternalServerError) } - } - - def abortRoute = + } ~ path("workflows" / Segment / Segment / "abort") { (version, possibleWorkflowId) => post { - withRecognizedWorkflowId(possibleWorkflowId) { id => - requestContext => perRequest(requestContext, CromwellApiHandler.props(workflowStoreActor), CromwellApiHandler.ApiHandlerWorkflowAbort(id, workflowManagerActor)) + val response = validateWorkflowId(possibleWorkflowId) flatMap { w => + workflowStoreActor.ask(WorkflowStoreActor.AbortWorkflow(w, workflowManagerActor)).mapTo[WorkflowStoreEngineAbortResponse] } - } - } - def submitRoute = - path("workflows" / Segment) { version => - post { - formFields("wdlSource", "workflowInputs".?, "workflowOptions".?) { (wdlSource, workflowInputs, workflowOptions) => - requestContext => - val workflowSourceFiles = WorkflowSourceFiles(wdlSource, workflowInputs.getOrElse("{}"), workflowOptions.getOrElse("{}")) - perRequest(requestContext, CromwellApiHandler.props(workflowStoreActor), CromwellApiHandler.ApiHandlerWorkflowSubmit(workflowSourceFiles)) + onComplete(response) { + case Success(WorkflowStoreEngineActor.WorkflowAborted(id)) => complete(WorkflowAbortResponse(id.toString, WorkflowAborted.toString)) + case Success(WorkflowStoreEngineActor.WorkflowAbortFailed(_, e: IllegalStateException)) => e.errorRequest(StatusCodes.Forbidden) + case Success(WorkflowStoreEngineActor.WorkflowAbortFailed(_, e: WorkflowNotFoundException)) => e.errorRequest(StatusCodes.NotFound) + case Success(WorkflowStoreEngineActor.WorkflowAbortFailed(_, e)) => e.errorRequest(StatusCodes.InternalServerError) + case Failure(_: AskTimeoutException) if CromwellShutdown.shutdownInProgress() => serviceShuttingDownResponse + case Failure(e: UnrecognizedWorkflowException) => e.failRequest(StatusCodes.NotFound) + case Failure(e: InvalidWorkflowException) => e.failRequest(StatusCodes.BadRequest) + case Failure(e) => e.errorRequest(StatusCodes.InternalServerError) } } - } + } ~ + path("workflows" / Segment / Segment / "labels") { (version, possibleWorkflowId) => + entity(as[Map[String, String]]) { parameterMap => + patch { + Labels.validateMapOfLabels(parameterMap) match { + case Valid(labels) => + val response = validateWorkflowId(possibleWorkflowId) flatMap { id => + val lma = actorRefFactory.actorOf(LabelsManagerActor.props(serviceRegistryActor).withDispatcher(ApiDispatcher)) + lma.ask(LabelsAddition(LabelsData(id, labels))).mapTo[LabelsManagerActorResponse] + } + onComplete(response) { + case Success(r: BuiltLabelsManagerResponse) => complete(r.response) + case Success(e: FailedLabelsManagerResponse) => e.reason.failRequest(StatusCodes.InternalServerError) + case Failure(e) => e.errorRequest(StatusCodes.InternalServerError) - def submitBatchRoute = - path("workflows" / Segment / "batch") { version => - post { - formFields("wdlSource", "workflowInputs", "workflowOptions".?) { - (wdlSource, workflowInputs, workflowOptions) => - requestContext => - import spray.json._ - workflowInputs.parseJson match { - case JsArray(Seq(x, xs@_*)) => - val nelInputses = NonEmptyList.nels(x, xs: _*) - val sources = nelInputses.map(inputs => WorkflowSourceFiles(wdlSource, inputs.compactPrint, workflowOptions.getOrElse("{}"))) - perRequest(requestContext, CromwellApiHandler.props(workflowStoreActor), CromwellApiHandler.ApiHandlerWorkflowSubmitBatch(sources)) - case JsArray(_) => failBadRequest(new RuntimeException("Nothing was submitted")) - case _ => reject } + case Invalid(e) => + val iae = new IllegalArgumentException(e.toList.mkString(",")) + iae.failRequest(StatusCodes.BadRequest) + } } } - } - - def workflowOutputsRoute = - path("workflows" / Segment / Segment / "outputs") { (version, possibleWorkflowId) => - get { - withRecognizedWorkflowId(possibleWorkflowId) { id => - handleMetadataRequest(WorkflowOutputs(id)) + } ~ + path("workflows" / Segment) { version => + post { + entity(as[Multipart.FormData]) { formData => + submitRequest(formData, true) } } + } ~ + path("workflows" / Segment / "batch") { version => + post { + entity(as[Multipart.FormData]) { formData => + submitRequest(formData, false) + } } + } - def workflowLogsRoute = - path("workflows" / Segment / Segment / "logs") { (version, possibleWorkflowId) => - get { - withRecognizedWorkflowId(possibleWorkflowId) { id => - handleMetadataRequest(GetLogs(id)) - } + private def submitRequest(formData: Multipart.FormData, isSingleSubmission: Boolean): Route = { + val allParts: Future[Map[String, ByteString]] = formData.parts.mapAsync[(String, ByteString)](1) { + case b: BodyPart => b.toStrict(duration).map(strict => b.name -> strict.entity.data) + }.runFold(Map.empty[String, ByteString])((map, tuple) => map + tuple) + + def toResponse(workflowId: WorkflowId): WorkflowSubmitResponse = { + WorkflowSubmitResponse(workflowId.toString, WorkflowSubmitted.toString) + } + + def askSubmit(command: WorkflowStoreActor.WorkflowStoreActorSubmitCommand): Route = { + // NOTE: Do not blindly coppy the akka-http -to- ask-actor pattern below without knowing the pros and cons. + onComplete(workflowStoreActor.ask(command).mapTo[WorkflowStoreSubmitActor.WorkflowStoreSubmitActorResponse]) { + case Success(w) => + w match { + case WorkflowStoreSubmitActor.WorkflowSubmittedToStore(workflowId) => + complete((StatusCodes.Created, toResponse(workflowId))) + case WorkflowStoreSubmitActor.WorkflowsBatchSubmittedToStore(workflowIds) => + complete((StatusCodes.Created, workflowIds.toList map toResponse)) + case WorkflowStoreSubmitActor.WorkflowSubmitFailed(throwable) => + throwable.failRequest(StatusCodes.BadRequest) + } + case Failure(_: AskTimeoutException) if CromwellShutdown.shutdownInProgress() => serviceShuttingDownResponse + case Failure(e) => + e.failRequest(StatusCodes.InternalServerError) } } - def metadataRoute = - path("workflows" / Segment / Segment / "metadata") { (version, possibleWorkflowId) => - parameterMultiMap { parameters => - // import scalaz_ & Scalaz._ add too many slow implicits, on top of the spray and json implicits - import scalaz.syntax.std.list._ - val includeKeysOption = parameters.getOrElse("includeKey", List.empty).toNel - val excludeKeysOption = parameters.getOrElse("excludeKey", List.empty).toNel - (includeKeysOption, excludeKeysOption) match { - case (Some(_), Some(_)) => - failBadRequest(new IllegalArgumentException("includeKey and excludeKey may not be specified together")) - case _ => - withRecognizedWorkflowId(possibleWorkflowId) { id => - handleMetadataRequest(GetSingleWorkflowMetadataAction(id, includeKeysOption, excludeKeysOption)) - } + onComplete(allParts) { + case Success(data) => + PartialWorkflowSources.fromSubmitRoute(data, allowNoInputs = isSingleSubmission) match { + case Success(workflowSourceFiles) if isSingleSubmission && workflowSourceFiles.size == 1 => + askSubmit(WorkflowStoreActor.SubmitWorkflow(workflowSourceFiles.head)) + // Catches the case where someone has gone through the single submission endpoint w/ more than one workflow + case Success(_) if isSingleSubmission => + val e = new IllegalArgumentException("To submit more than one workflow at a time, use the batch endpoint.") + e.failRequest(StatusCodes.BadRequest) + case Success(workflowSourceFiles) => + askSubmit(WorkflowStoreActor.BatchSubmitWorkflows(NonEmptyList.fromListUnsafe(workflowSourceFiles.toList))) + case Failure(t) => t.failRequest(StatusCodes.BadRequest) } - } + case Failure(e) => e.failRequest(StatusCodes.InternalServerError) } + } - def timingRoute = - path("workflows" / Segment / Segment / "timing") { (version, possibleWorkflowId) => - withRecognizedWorkflowId(possibleWorkflowId) { id => - getFromResource("workflowTimings/workflowTimings.html") - } + private def validateWorkflowId(possibleWorkflowId: String): Future[WorkflowId] = { + Try(WorkflowId.fromString(possibleWorkflowId)) match { + case Success(w) => + serviceRegistryActor.ask(ValidateWorkflowId(w)).mapTo[WorkflowValidationResponse] map { + case RecognizedWorkflowId => w + case UnrecognizedWorkflowId => throw UnrecognizedWorkflowException(s"Unrecognized workflow ID: $w") + case FailedToCheckWorkflowId(t) => throw t + } + case Failure(_) => Future.failed(InvalidWorkflowException(s"Invalid workflow ID: '$possibleWorkflowId'.")) } + } - def statsRoute = - path("engine" / Segment / "stats") { version => - get { - requestContext => - perRequest(requestContext, CromwellApiHandler.props(workflowManagerActor), CromwellApiHandler.ApiHandlerEngineStats) - } + private def metadataBuilderRequest(possibleWorkflowId: String, request: WorkflowId => ReadAction): Route = { + val metadataBuilderActor = actorRefFactory.actorOf(MetadataBuilderActor.props(serviceRegistryActor).withDispatcher(ApiDispatcher), MetadataBuilderActor.uniqueActorName) + val response = validateWorkflowId(possibleWorkflowId) flatMap { w => metadataBuilderActor.ask(request(w)).mapTo[MetadataBuilderActorResponse] } + + onComplete(response) { + case Success(r: BuiltMetadataResponse) => complete(r.response) + case Success(r: FailedMetadataResponse) => r.reason.errorRequest(StatusCodes.InternalServerError) + case Failure(_: AskTimeoutException) if CromwellShutdown.shutdownInProgress() => serviceShuttingDownResponse + case Failure(e: UnrecognizedWorkflowException) => e.failRequest(StatusCodes.NotFound) + case Failure(e: InvalidWorkflowException) => e.failRequest(StatusCodes.BadRequest) + case Failure(e) => e.errorRequest(StatusCodes.InternalServerError) } + } - def backendRoute = - path("workflows" / Segment / "backends") { version => - get { - complete { - // Note that this is not using our standard per-request scheme, since the result is pre-calculated already - backendResponse + protected[this] def metadataQueryRequest(parameters: Seq[(String, String)], uri: Uri): Route = { + val response = serviceRegistryActor.ask(WorkflowQuery(parameters)).mapTo[MetadataQueryResponse] + + onComplete(response) { + case Success(w: WorkflowQuerySuccess) => + val headers = WorkflowQueryPagination.generateLinkHeaders(uri, w.meta) + respondWithHeaders(headers) { + complete(w.response) } - } + case Success(w: WorkflowQueryFailure) => w.reason.failRequest(StatusCodes.BadRequest) + case Failure(_: AskTimeoutException) if CromwellShutdown.shutdownInProgress() => serviceShuttingDownResponse + case Failure(e) => e.errorRequest(StatusCodes.InternalServerError) } + } +} - val backendResponse = JsObject(Map( - "supportedBackends" -> BackendConfiguration.AllBackendEntries.map(_.name).sorted.toJson, - "defaultBackend" -> BackendConfiguration.DefaultBackendEntry.name.toJson - )) +object CromwellApiService { + import spray.json._ -} + implicit class EnhancedThrowable(val e: Throwable) extends AnyVal { + def failRequest(statusCode: StatusCode): Route = complete((statusCode, APIResponse.fail(e).toJson.prettyPrint)) + def errorRequest(statusCode: StatusCode): Route = complete((statusCode, APIResponse.error(e).toJson.prettyPrint)) + } + + final case class BackendResponse(supportedBackends: List[String], defaultBackend: String) + final case class UnrecognizedWorkflowException(message: String) extends Exception(message) + final case class InvalidWorkflowException(message: String) extends Exception(message) + + val backendResponse = BackendResponse(BackendConfiguration.AllBackendEntries.map(_.name).sorted, BackendConfiguration.DefaultBackendEntry.name) + val versionResponse = JsObject(Map("cromwell" -> ConfigFactory.load("cromwell-version.conf").getConfig("version").getString("cromwell").toJson)) + val serviceShuttingDownResponse = new Exception("Cromwell service is shutting down.").failRequest(StatusCodes.ServiceUnavailable) +} diff --git a/engine/src/main/scala/cromwell/webservice/EngineStatsActor.scala b/engine/src/main/scala/cromwell/webservice/EngineStatsActor.scala index 3b83955a3..047eeccb9 100644 --- a/engine/src/main/scala/cromwell/webservice/EngineStatsActor.scala +++ b/engine/src/main/scala/cromwell/webservice/EngineStatsActor.scala @@ -19,9 +19,10 @@ final case class EngineStatsActor(workflowActors: List[ActorRef], replyTo: Actor private var jobCounts = Map.empty[ActorRef, Int] /* - It's possible that WorkflowActors might disappear behind us and never manage to write us back. - Instead of waiting longingly, watching a mailbox which might never receive some love instead wait - a specified period of time and assume anything which was going to reply already has + * FIXME + * Because of sub workflows there is currently no reliable way to know if we received responses from all running WEAs. + * For now, we always wait for the timeout duration before responding to give a chance to all WEAs to respond (even nested ones). + * This could be improved by having WEAs wait for their sub WEAs before sending back the response. */ val scheduledMsg = context.system.scheduler.scheduleOnce(timeout, self, ShutItDown) @@ -31,7 +32,6 @@ final case class EngineStatsActor(workflowActors: List[ActorRef], replyTo: Actor override def receive = { case JobCount(count) => jobCounts += (sender -> count) - if (jobCounts.size == workflowActors.size) reportStats() case ShutItDown => reportStats() case wompWomp => log.error("Unexpected message to EngineStatsActor: {}", wompWomp) @@ -59,5 +59,5 @@ object EngineStatsActor { final case class EngineStats(workflows: Int, jobs: Int) - val MaxTimeToWait = 30 seconds + val MaxTimeToWait = 3 seconds } diff --git a/engine/src/main/scala/cromwell/webservice/LabelsManagerActor.scala b/engine/src/main/scala/cromwell/webservice/LabelsManagerActor.scala new file mode 100644 index 000000000..7a950ba01 --- /dev/null +++ b/engine/src/main/scala/cromwell/webservice/LabelsManagerActor.scala @@ -0,0 +1,68 @@ +package cromwell.webservice + +import akka.actor.{Actor, ActorLogging, ActorRef, Props} +import cromwell.core.labels.Labels +import cromwell.core.{Dispatcher, WorkflowId, WorkflowMetadataKeys} +import cromwell.services.metadata.{MetadataEvent, MetadataKey, MetadataValue} +import cromwell.services.metadata.MetadataService._ +import cromwell.webservice.LabelsManagerActor._ +import spray.json.{DefaultJsonProtocol, JsObject, JsString} + +import scala.language.postfixOps + + +object LabelsManagerActor { + + def props(serviceRegistryActor: ActorRef) = Props(new LabelsManagerActor(serviceRegistryActor)).withDispatcher(Dispatcher.ApiDispatcher) + + final case class LabelsData(workflowId: WorkflowId, labels: Labels) + + sealed trait LabelsMessage { + def data: LabelsData + } + + sealed trait LabelsAction extends LabelsMessage + final case class LabelsAddition(data: LabelsData) extends LabelsAction + + sealed trait LabelsResponse extends LabelsMessage + + def processLabelsResponse(workflowId: WorkflowId, labels: Map[String, String]): JsObject = { + JsObject(Map( + WorkflowMetadataKeys.Id -> JsString(workflowId.toString), + WorkflowMetadataKeys.Labels -> JsObject(labels mapValues JsString.apply) + )) + } + + def metadataEventsToLabels(events: Iterable[MetadataEvent]): Map[String, String] = { + events collect { case MetadataEvent(MetadataKey(_, _, key), Some(MetadataValue(value, _)), _) => key.split("\\:").last -> value } toMap + } + + def labelsToMetadataEvents(labels: Labels, workflowId: WorkflowId): Iterable[MetadataEvent] = { + labels.value map { l => MetadataEvent(MetadataKey(workflowId, None, s"${WorkflowMetadataKeys.Labels}:${l.key}"), MetadataValue(l.value)) } + } + + sealed abstract class LabelsManagerActorResponse + final case class BuiltLabelsManagerResponse(response: JsObject) extends LabelsManagerActorResponse + final case class FailedLabelsManagerResponse(reason: Throwable) extends LabelsManagerActorResponse +} + +class LabelsManagerActor(serviceRegistryActor: ActorRef) extends Actor with ActorLogging with DefaultJsonProtocol { + + implicit val ec = context.dispatcher + + private var wfId: Option[WorkflowId] = None + private var target: ActorRef = ActorRef.noSender + + def receive = { + case LabelsAddition(data) => + wfId = Option(data.workflowId) + target = sender() + serviceRegistryActor ! PutMetadataActionAndRespond(labelsToMetadataEvents(data.labels, data.workflowId), self) + case MetadataWriteSuccess(events) => + target ! BuiltLabelsManagerResponse(processLabelsResponse(wfId.get, metadataEventsToLabels(events))) + context stop self + case MetadataWriteFailure(failure, events @ _) => + target ! FailedLabelsManagerResponse(new RuntimeException(s"Unable to update labels for ${wfId.get} due to ${failure.getMessage}")) + context stop self + } +} diff --git a/engine/src/main/scala/cromwell/webservice/PartialWorkflowSources.scala b/engine/src/main/scala/cromwell/webservice/PartialWorkflowSources.scala new file mode 100644 index 000000000..b4e1a82dd --- /dev/null +++ b/engine/src/main/scala/cromwell/webservice/PartialWorkflowSources.scala @@ -0,0 +1,142 @@ +package cromwell.webservice + +import akka.util.ByteString +import cromwell.core.{WorkflowOptions, WorkflowOptionsJson, WorkflowSourceFilesCollection} +import wdl4s.wdl.{WorkflowJson, WorkflowSource} +import cats.data.Validated.{Invalid, Valid} +import cats.syntax.validated._ +import cats.syntax.cartesian._ +import lenthall.validation.ErrorOr.ErrorOr +import cromwell.core._ +import org.slf4j.LoggerFactory +import spray.json.{JsObject, JsValue} + +import scala.util.Try + +final case class PartialWorkflowSources(workflowSource: Option[WorkflowSource], + workflowType: Option[WorkflowType], + workflowTypeVersion: Option[WorkflowTypeVersion], + workflowInputs: Vector[WorkflowJson], + workflowInputsAux: Map[Int, WorkflowJson], + workflowOptions: Option[WorkflowOptionsJson], + customLabels: Option[WorkflowJson], + zippedImports: Option[Array[Byte]]) + +object PartialWorkflowSources { + val log = LoggerFactory.getLogger(classOf[PartialWorkflowSources]) + + def empty = PartialWorkflowSources( + workflowSource = None, + // TODO do not hardcode, especially not out here at the boundary layer good gravy + workflowType = Option("WDL"), + workflowTypeVersion = None, + workflowInputs = Vector.empty, + workflowInputsAux = Map.empty, + workflowOptions = None, + customLabels = None, + zippedImports = None + ) + + def fromSubmitRoute(formData: Map[String, ByteString], allowNoInputs: Boolean): Try[Seq[WorkflowSourceFilesCollection]] = { + val partialSources = Try(formData.foldLeft(PartialWorkflowSources.empty) { (partialSources: PartialWorkflowSources, kv: (String, ByteString)) => + val name = kv._1 + val data = kv._2 + + if (name == "wdlSource" || name == "workflowSource") { + if (name == "wdlSource") deprecationWarning(out = "wdlSource", in = "workflowSource") + partialSources.copy(workflowSource = Option(data.utf8String)) + } else if (name == "workflowType") { + partialSources.copy(workflowType = Option(data.utf8String)) + } else if (name == "workflowTypeVersion") { + partialSources.copy(workflowTypeVersion = Option(data.utf8String)) + } else if (name == "workflowInputs") { + partialSources.copy(workflowInputs = workflowInputs(data.utf8String)) + } else if (name.startsWith("workflowInputs_")) { + val index = name.stripPrefix("workflowInputs_").toInt + partialSources.copy(workflowInputsAux = partialSources.workflowInputsAux + (index -> data.utf8String)) + } else if (name == "workflowOptions") { + partialSources.copy(workflowOptions = Option(data.utf8String)) + } else if (name == "wdlDependencies" || name == "workflowDependencies") { + if (name == "wdlDependencies") deprecationWarning(out = "wdlDependencies", in = "workflowDependencies") + partialSources.copy(zippedImports = Option(data.toArray)) + } else if (name == "customLabels") { + partialSources.copy(customLabels = Option(data.utf8String)) + } else { + throw new IllegalArgumentException(s"Unexpected body part name: $name") + } + }) + + partialSourcesToSourceCollections(partialSources.tryToErrorOr, allowNoInputs).errorOrToTry + } + + private def workflowInputs(data: String): Vector[WorkflowJson] = { + import spray.json._ + data.parseJson match { + case JsArray(Seq(x, xs@_*)) => (Vector(x) ++ xs).map(_.compactPrint) + case JsArray(_) => Vector.empty + case v: JsValue => Vector(v.compactPrint) + } + } + + private def partialSourcesToSourceCollections(partialSources: ErrorOr[PartialWorkflowSources], allowNoInputs: Boolean): ErrorOr[Seq[WorkflowSourceFilesCollection]] = { + def validateInputs(pws: PartialWorkflowSources): ErrorOr[Seq[WorkflowJson]] = + (pws.workflowInputs.isEmpty, allowNoInputs) match { + case (true, true) => Vector("{}").validNel + case (true, false) => "No inputs were provided".invalidNel + case _ => + val sortedInputAuxes = pws.workflowInputsAux.toSeq.sortBy { case (index, _) => index } map { case(_, inputJson) => Option(inputJson) } + (pws.workflowInputs map { workflowInputSet: WorkflowJson => mergeMaps(Seq(Option(workflowInputSet)) ++ sortedInputAuxes).toString }).validNel + } + + def validateOptions(options: Option[WorkflowOptionsJson]): ErrorOr[WorkflowOptions] = + WorkflowOptions.fromJsonString(options.getOrElse("{}")).tryToErrorOr leftMap { _ map { i => s"Invalid workflow options provided: $i" } } + + def validateWorkflowSource(partialSource: PartialWorkflowSources): ErrorOr[WorkflowJson] = partialSource.workflowSource match { + case Some(src) => src.validNel + case _ => s"Incomplete workflow submission: $partialSource".invalidNel + } + + partialSources match { + case Valid(partialSource) => + (validateWorkflowSource(partialSource) |@| validateInputs(partialSource) |@| validateOptions(partialSource.workflowOptions)) map { + case (wfSource, wfInputs, wfOptions) => + wfInputs.map(inputsJson => WorkflowSourceFilesCollection( + workflowSource = wfSource, + workflowType = partialSource.workflowType, + workflowTypeVersion = partialSource.workflowTypeVersion, + inputsJson = inputsJson, + workflowOptionsJson = wfOptions.asPrettyJson, + labelsJson = partialSource.customLabels.getOrElse("{}"), + importsFile = partialSource.zippedImports)) } + case Invalid(err) => err.invalid + } + } + + private def deprecationWarning(out: String, in: String): Unit = { + val warning = + s""" + |The '$out' parameter name has been deprecated in favor of '$in'. + |Support for '$out' will be removed from future versions of Cromwell. + |Please switch to using '$in' in future submissions. + """.stripMargin + log.warn(warning) + } + + def mergeMaps(allInputs: Seq[Option[String]]): JsObject = { + val convertToMap = allInputs.map(x => toMap(x)) + JsObject(convertToMap reduce (_ ++ _)) + } + + private def toMap(someInput: Option[String]): Map[String, JsValue] = { + import spray.json._ + someInput match { + case Some(inputs: String) => inputs.parseJson match { + case JsObject(inputMap) => inputMap + case _ => + throw new RuntimeException(s"Submitted inputs couldn't be processed, please check for syntactical errors") + } + case None => Map.empty + } + } +} + diff --git a/engine/src/main/scala/cromwell/webservice/PerRequest.scala b/engine/src/main/scala/cromwell/webservice/PerRequest.scala deleted file mode 100644 index 0ae86447a..000000000 --- a/engine/src/main/scala/cromwell/webservice/PerRequest.scala +++ /dev/null @@ -1,119 +0,0 @@ -package cromwell.webservice - -import java.util.UUID - -import akka.actor.SupervisorStrategy.Stop -import akka.actor.{OneForOneStrategy, _} -import cromwell.core.Dispatcher.ApiDispatcher -import cromwell.webservice.PerRequest._ -import spray.http.StatusCodes._ -import spray.http._ -import spray.httpx.marshalling.ToResponseMarshaller -import spray.routing.RequestContext - -import scala.concurrent.duration._ -import scala.language.postfixOps - -/** - * This actor controls the lifecycle of a request. It is responsible for forwarding the initial message - * to a target handling actor. This actor waits for the target actor to signal completion (via a message), - * timeout, or handle an exception. It is this actors responsibility to respond to the request and - * shutdown itself and child actors. - * - * Request completion can be signaled in 2 ways: - * 1) with just a response object - * 2) with a RequestComplete message which can specify http status code as well as the response - */ -trait PerRequest extends Actor { - import context._ - - def r: RequestContext - def target: ActorRef - def message: AnyRef - def timeout: Duration - - setReceiveTimeout(timeout) - target ! message - - def receive = { - // The [Any] type parameter appears to be required for version of Scala > 2.11.2, - // the @ unchecked is required to muzzle erasure warnings. - case message: RequestComplete[Any] @ unchecked => complete(message.response)(message.marshaller) - case message: RequestCompleteWithHeaders[Any] @ unchecked => complete(message.response, message.headers:_*)(message.marshaller) - case ReceiveTimeout => complete(GatewayTimeout) - case x => - system.log.error("Unsupported response message sent to PreRequest actor: " + Option(x).getOrElse("null").toString) - complete(InternalServerError) - } - - /** - * Complete the request sending the given response and status code - * @param response to send to the caller - * @param marshaller to use for marshalling the response - * @tparam T the type of the response - * @return - */ - private def complete[T](response: T, headers: HttpHeader*)(implicit marshaller: ToResponseMarshaller[T]) = { - val additionalHeaders = None - r.withHttpResponseHeadersMapped(h => h ++ headers ++ additionalHeaders).complete(response) - stop(self) - } - - override val supervisorStrategy = - OneForOneStrategy() { - case e => - system.log.error(e, "error processing request: " + r.request.uri) - r.complete(InternalServerError, e.getMessage) - Stop - } -} - -object PerRequest { - sealed trait PerRequestMessage - /** - * Report complete, follows same pattern as spray.routing.RequestContext.complete; examples of how to call - * that method should apply here too. E.g. even though this method has only one parameter, it can be called - * with 2 where the first is a StatusCode: RequestComplete(StatusCode.Created, response) - */ - case class RequestComplete[T](response: T)(implicit val marshaller: ToResponseMarshaller[T]) extends PerRequestMessage - - /** - * Report complete with response headers. To response with a special status code the first parameter can be a - * tuple where the first element is StatusCode: RequestCompleteWithHeaders((StatusCode.Created, results), header). - * Note that this is here so that RequestComplete above can behave like spray.routing.RequestContext.complete. - */ - case class RequestCompleteWithHeaders[T](response: T, headers: HttpHeader*)(implicit val marshaller: ToResponseMarshaller[T]) extends PerRequestMessage - - /** allows for pattern matching with extraction of marshaller */ - private object RequestComplete_ { - def unapply[T](requestComplete: RequestComplete[T]) = Some((requestComplete.response, requestComplete.marshaller)) - } - - /** allows for pattern matching with extraction of marshaller */ - private object RequestCompleteWithHeaders_ { - def unapply[T](requestComplete: RequestCompleteWithHeaders[T]) = Some((requestComplete.response, requestComplete.headers, requestComplete.marshaller)) - } - - case class WithProps(r: RequestContext, props: Props, message: AnyRef, timeout: Duration, name: String) extends PerRequest { - lazy val target = context.actorOf(props.withDispatcher(ApiDispatcher), name) - } -} - -/** - * Provides factory methods for creating per request actors - */ -trait PerRequestCreator { - implicit def actorRefFactory: ActorRefFactory - - def perRequest(r: RequestContext, - props: Props, message: AnyRef, - timeout: Duration = 1 minutes, - name: String = PerRequestCreator.endpointActorName) = { - actorRefFactory.actorOf(Props(WithProps(r, props, message, timeout, name)).withDispatcher(ApiDispatcher), name) - } -} - -object PerRequestCreator { - // This scheme was changed away from the Agora System.nanoTime approach due to actor naming collisions (!) - def endpointActorName = List("Endpoint", java.lang.Thread.currentThread.getStackTrace()(1).getMethodName, UUID.randomUUID()).mkString("-") -} diff --git a/engine/src/main/scala/cromwell/webservice/SwaggerService.scala b/engine/src/main/scala/cromwell/webservice/SwaggerService.scala new file mode 100644 index 000000000..0af4edd9b --- /dev/null +++ b/engine/src/main/scala/cromwell/webservice/SwaggerService.scala @@ -0,0 +1,7 @@ +package cromwell.webservice + +trait SwaggerService extends SwaggerUiResourceHttpService { + override def swaggerServiceName = "cromwell" + + override def swaggerUiVersion = "2.1.1" +} diff --git a/engine/src/main/scala/cromwell/webservice/SwaggerUiHttpService.scala b/engine/src/main/scala/cromwell/webservice/SwaggerUiHttpService.scala new file mode 100644 index 000000000..ea0eb98ff --- /dev/null +++ b/engine/src/main/scala/cromwell/webservice/SwaggerUiHttpService.scala @@ -0,0 +1,161 @@ +package cromwell.webservice + +import akka.http.scaladsl.model.StatusCodes +import akka.http.scaladsl.server.Route +import com.typesafe.config.Config +import net.ceedubs.ficus.Ficus._ +import akka.http.scaladsl.server.Directives._ + +/** + * Serves up the swagger UI from org.webjars/swagger-ui. + */ +trait SwaggerUiHttpService { + /** + * @return The version of the org.webjars/swagger-ui artifact. For example "2.1.1". + */ + def swaggerUiVersion: String + + /** + * Informs the swagger UI of the base of the application url, as hosted on the server. + * If your entire app is served under "http://myserver/myapp", then the base URL is "/myapp". + * If the app is served at the root of the application, leave this value as the empty string. + * + * @return The base URL used by the application, or the empty string if there is no base URL. For example "/myapp". + */ + def swaggerUiBaseUrl: String = "" + + /** + * @return The path to the swagger UI html documents. For example "swagger" + */ + def swaggerUiPath: String = "swagger" + + /** + * The path to the actual swagger documentation in either yaml or json, to be rendered by the swagger UI html. + * + * @return The path to the api documentation to render in the swagger UI. + * For example "api-docs" or "swagger/lenthall.yaml". + */ + def swaggerUiDocsPath: String = "api-docs" + + /** + * @return When true, if someone requests / (or /baseUrl if setup), redirect to the swagger UI. + */ + def swaggerUiFromRoot: Boolean = true + + private def routeFromRoot: Route = get { + pathEndOrSingleSlash { + // Redirect / to the swagger UI + redirect(s"$swaggerUiBaseUrl/$swaggerUiPath", StatusCodes.TemporaryRedirect) + } + } + + /** + * Serves up the swagger UI only. Redirects requests to the root of the UI path to the index.html. + * + * @return Route serving the swagger UI. + */ + final def swaggerUiRoute: Route = { + val route = get { + pathPrefix(separateOnSlashes(swaggerUiPath)) { + // when the user hits the doc url, redirect to the index.html with api docs specified on the url + pathEndOrSingleSlash { + redirect( + s"$swaggerUiBaseUrl/$swaggerUiPath/index.html?url=$swaggerUiBaseUrl/$swaggerUiDocsPath", + StatusCodes.TemporaryRedirect) + } ~ getFromResourceDirectory(s"META-INF/resources/webjars/swagger-ui/$swaggerUiVersion") + } + } + if (swaggerUiFromRoot) route ~ routeFromRoot else route + } + +} + +/** + * Extends the SwaggerUiHttpService to gets UI configuration values from a provided Typesafe Config. + */ +trait SwaggerUiConfigHttpService extends SwaggerUiHttpService { + /** + * @return The swagger UI config. + */ + def swaggerUiConfig: Config + + override def swaggerUiVersion = swaggerUiConfig.getString("uiVersion") + + abstract override def swaggerUiBaseUrl = swaggerUiConfig.as[Option[String]]("baseUrl").getOrElse(super.swaggerUiBaseUrl) + + abstract override def swaggerUiPath = swaggerUiConfig.as[Option[String]]("uiPath").getOrElse(super.swaggerUiPath) + + abstract override def swaggerUiDocsPath = swaggerUiConfig.as[Option[String]]("docsPath").getOrElse(super.swaggerUiDocsPath) +} + +/** + * An extension of HttpService to serve up a resource containing the swagger api as yaml or json. The resource + * directory and path on the classpath must match the path for route. The resource can be any file type supported by the + * swagger UI, but defaults to "yaml". This is an alternative to spray-swagger's SwaggerHttpService. + */ +trait SwaggerResourceHttpService { + /** + * @return The directory for the resource under the classpath, and in the url + */ + def swaggerDirectory: String = "swagger" + + /** + * @return Name of the service, used to map the documentation resource at "/uiPath/serviceName.resourceType". + */ + def swaggerServiceName: String + + /** + * @return The type of the resource, usually "yaml" or "json". + */ + def swaggerResourceType: String = "yaml" + + /** + * Swagger UI sends HTTP OPTIONS before ALL requests, and expects a status 200 / OK. When true (the default) the + * swaggerResourceRoute will return 200 / OK for requests for OPTIONS. + * + * See also: + * - https://github.com/swagger-api/swagger-ui/issues/1209 + * - https://github.com/swagger-api/swagger-ui/issues/161 + * - https://groups.google.com/forum/#!topic/swagger-swaggersocket/S6_I6FBjdZ8 + * + * @return True if status code 200 should be returned for HTTP OPTIONS requests for the swagger resource. + */ + def swaggerAllOptionsOk: Boolean = true + + /** + * @return The path to the swagger docs. + */ + protected def swaggerDocsPath = s"$swaggerDirectory/$swaggerServiceName.$swaggerResourceType" + + /** + * @return A route that returns the swagger resource. + */ + final def swaggerResourceRoute: Route = { + val swaggerDocsDirective = path(separateOnSlashes(swaggerDocsPath)) + val route = get { + swaggerDocsDirective { + // Return /uiPath/serviceName.resourceType from the classpath resources. + getFromResource(swaggerDocsPath) + } + } + + if (swaggerAllOptionsOk) { + route ~ options { + // Also return status 200 / OK for all OPTIONS requests. + complete(StatusCodes.OK) + } + } else route + } +} + +/** + * Extends the SwaggerUiHttpService and SwaggerResourceHttpService to serve up both. + */ +trait SwaggerUiResourceHttpService extends SwaggerUiHttpService with SwaggerResourceHttpService { + override def swaggerUiDocsPath = swaggerDocsPath + + /** + * @return A route that redirects to the swagger UI and returns the swagger resource. + */ + final def swaggerUiResourceRoute: Route = swaggerUiRoute ~ swaggerResourceRoute +} diff --git a/engine/src/main/scala/cromwell/webservice/WorkflowJsonSupport.scala b/engine/src/main/scala/cromwell/webservice/WorkflowJsonSupport.scala index a294c58fc..a813278dc 100644 --- a/engine/src/main/scala/cromwell/webservice/WorkflowJsonSupport.scala +++ b/engine/src/main/scala/cromwell/webservice/WorkflowJsonSupport.scala @@ -1,13 +1,17 @@ package cromwell.webservice +import java.nio.file.Paths import java.time.OffsetDateTime -import cromwell.core.WorkflowSourceFiles +import cromwell.core._ import cromwell.engine._ import cromwell.services.metadata.MetadataService -import MetadataService.{WorkflowQueryResponse, WorkflowQueryResult} +import MetadataService._ import cromwell.util.JsonFormatting.WdlValueJsonFormatter import WdlValueJsonFormatter._ +import better.files.File +import cromwell.webservice.CromwellApiService.BackendResponse +import cromwell.webservice.metadata.MetadataBuilderActor.BuiltMetadataResponse import spray.json.{DefaultJsonProtocol, JsString, JsValue, RootJsonFormat} object WorkflowJsonSupport extends DefaultJsonProtocol { @@ -17,8 +21,20 @@ object WorkflowJsonSupport extends DefaultJsonProtocol { implicit val workflowOutputResponseProtocol = jsonFormat2(WorkflowOutputResponse) implicit val callOutputResponseProtocol = jsonFormat3(CallOutputResponse) implicit val engineStatsProtocol = jsonFormat2(EngineStatsActor.EngineStats) + implicit val BackendResponseFormat = jsonFormat2(BackendResponse) + implicit val BuiltStatusResponseFormat = jsonFormat1(BuiltMetadataResponse) implicit val callAttempt = jsonFormat2(CallAttempt) - implicit val workflowSourceData = jsonFormat3(WorkflowSourceFiles) + implicit val workflowSourceData = jsonFormat6(WorkflowSourceFilesWithoutImports) + + implicit object fileJsonFormat extends RootJsonFormat[File] { + override def write(obj: File) = JsString(obj.path.toAbsolutePath.toString) + override def read(json: JsValue): File = json match { + case JsString(str) => Paths.get(str) + case unknown => throw new NotImplementedError(s"Cannot parse $unknown to a File") + } + } + + implicit val workflowSourceDataWithImports = jsonFormat7(WorkflowSourceFilesWithDependenciesZip) implicit val errorResponse = jsonFormat3(FailureResponse) implicit val successResponse = jsonFormat3(SuccessResponse) @@ -31,7 +47,6 @@ object WorkflowJsonSupport extends DefaultJsonProtocol { } } - implicit val unqualifiedFailureEventEntry = jsonFormat2(FailureEventEntry) implicit val workflowQueryResult = jsonFormat5(WorkflowQueryResult) implicit val workflowQueryResponse = jsonFormat1(WorkflowQueryResponse) } diff --git a/engine/src/main/scala/cromwell/webservice/metadata/IndexedJsonValue.scala b/engine/src/main/scala/cromwell/webservice/metadata/IndexedJsonValue.scala deleted file mode 100644 index 4f1ba9bcc..000000000 --- a/engine/src/main/scala/cromwell/webservice/metadata/IndexedJsonValue.scala +++ /dev/null @@ -1,50 +0,0 @@ -package cromwell.webservice.metadata - -import java.time.OffsetDateTime - -import spray.json._ - -import scalaz.{Monoid, Semigroup} -// This is useful, do not remove -import scalaz.Scalaz._ - -private object IndexedJsonValue { - private implicit val dateTimeOrdering: Ordering[OffsetDateTime] = scala.Ordering.fromLessThan(_ isBefore _) - private val timestampedJsValueOrdering: Ordering[TimestampedJsValue] = scala.Ordering.by(_.timestamp) - - implicit val TimestampedJsonMonoid: Monoid[TimestampedJsValue] = new Monoid[TimestampedJsValue] { - def append(f1: TimestampedJsValue, f2: => TimestampedJsValue): TimestampedJsValue = { - (f1, f2) match { - case (o1: TimestampedJsObject, o2: TimestampedJsObject) => - val sg = implicitly[Semigroup[Map[String, TimestampedJsValue]]] - TimestampedJsObject(sg.append(o1.v, o2.v), dateTimeOrdering.max(o1.timestamp, o2.timestamp)) - case (o1: TimestampedJsList, o2: TimestampedJsList) => - val sg = implicitly[Semigroup[Map[Int, TimestampedJsValue]]] - TimestampedJsList(sg.append(o1.v, o2.v), dateTimeOrdering.max(o1.timestamp, o2.timestamp)) - case (o1, o2) => timestampedJsValueOrdering.max(o1, o2) - } - } - - override def zero: TimestampedJsValue = TimestampedJsObject(Map.empty, OffsetDateTime.now) - } -} - -/** Customized version of Json data structure, to account for timestamped values and lazy array creation */ -private sealed trait TimestampedJsValue { - def toJson: JsValue - def timestamp: OffsetDateTime -} - -private case class TimestampedJsList(v: Map[Int, TimestampedJsValue], timestamp: OffsetDateTime) extends TimestampedJsValue { - override val toJson = JsArray(v.values.toVector map { _.toJson }) -} - -private case class TimestampedJsObject(v: Map[String, TimestampedJsValue], timestamp: OffsetDateTime) extends TimestampedJsValue { - override val toJson = JsObject(v mapValues { _.toJson }) -} - -private class TimestampedJsPrimitive(val v: JsValue, val timestamp: OffsetDateTime) extends TimestampedJsValue { - override val toJson = v -} - -private case class TimestampedEmptyJson(override val timestamp: OffsetDateTime) extends TimestampedJsPrimitive(JsObject(Map.empty[String, JsValue]), timestamp) \ No newline at end of file diff --git a/engine/src/main/scala/cromwell/webservice/metadata/MetadataBuilderActor.scala b/engine/src/main/scala/cromwell/webservice/metadata/MetadataBuilderActor.scala index 35ea1aaa5..30ec2757a 100644 --- a/engine/src/main/scala/cromwell/webservice/metadata/MetadataBuilderActor.scala +++ b/engine/src/main/scala/cromwell/webservice/metadata/MetadataBuilderActor.scala @@ -1,33 +1,42 @@ package cromwell.webservice.metadata -import java.time.OffsetDateTime +import java.util.UUID import akka.actor.{ActorRef, LoggingFSM, Props} +import cromwell.webservice.metadata.MetadataComponent._ import cromwell.core.Dispatcher.ApiDispatcher import cromwell.core.ExecutionIndex.ExecutionIndex import cromwell.core.{WorkflowId, WorkflowMetadataKeys, WorkflowState} import cromwell.services.ServiceRegistryActor.ServiceRegistryFailure import cromwell.services.metadata.MetadataService._ import cromwell.services.metadata._ -import cromwell.webservice.PerRequest.{RequestComplete, RequestCompleteWithHeaders} -import cromwell.webservice.metadata.IndexedJsonValue._ -import cromwell.webservice.metadata.MetadataBuilderActor.{Idle, MetadataBuilderActorState, WaitingForMetadataService} -import cromwell.webservice.{APIResponse, WorkflowJsonSupport} +import cromwell.webservice.metadata.MetadataBuilderActor._ import org.slf4j.LoggerFactory -import spray.http.{StatusCodes, Uri} -import spray.httpx.SprayJsonSupport._ import spray.json._ -import scala.collection.immutable.TreeMap -import scala.language.postfixOps -import scala.util.{Failure, Success, Try} -import scalaz.std.list._ -import scalaz.syntax.foldable._ object MetadataBuilderActor { + sealed abstract class MetadataBuilderActorResponse + case class BuiltMetadataResponse(response: JsObject) extends MetadataBuilderActorResponse + case class FailedMetadataResponse(reason: Throwable) extends MetadataBuilderActorResponse + sealed trait MetadataBuilderActorState case object Idle extends MetadataBuilderActorState case object WaitingForMetadataService extends MetadataBuilderActorState + case object WaitingForSubWorkflows extends MetadataBuilderActorState + + case class MetadataBuilderActorData( + originalQuery: MetadataQuery, + originalEvents: Seq[MetadataEvent], + subWorkflowsMetadata: Map[String, JsValue], + waitFor: Int + ) { + def withSubWorkflow(id: String, metadata: JsValue) = { + this.copy(subWorkflowsMetadata = subWorkflowsMetadata + ((id, metadata))) + } + + def isComplete = subWorkflowsMetadata.size == waitFor + } def props(serviceRegistryActor: ActorRef) = { Props(new MetadataBuilderActor(serviceRegistryActor)).withDispatcher(ApiDispatcher) @@ -35,110 +44,21 @@ object MetadataBuilderActor { val log = LoggerFactory.getLogger("MetadataBuilder") - private val KeySeparator = MetadataKey.KeySeparator - private val bracketMatcher = """\[(\d*)\]""".r - private val startMatcher = """^([^\[]+)\[""".r private val AttemptKey = "attempt" private val ShardKey = "shardIndex" - /** Types of element supported in a dotted key notation */ - private sealed trait KeyElement { - def toIndexedJson(value: TimestampedJsValue): TimestampedJsValue - } - - private case class ListElement(name: String, indexes: List[String]) extends KeyElement { - def toIndexedJson(innerValue: TimestampedJsValue) = { - if (indexes.isEmpty) { - TimestampedJsObject(Map(name -> innerValue), innerValue.timestamp) - } else { - /* - * The last index is the one that the innerValue should have in the innerList. - * From there lists are fold into one another until we reach the first index. - * e.g l[1][2] = "a" means - * "l": [ - * [ - * "a" <- will have index 2 in the inner list - * ] <- inner list: will have index 1 in the outer list - * ] - * - * Important note: Indexes are used for sorting purposes ONLY - * An index of 2 DOES NOT guarantee that a value will be at index 2 in a list - */ - val list = innerValue match { - // Empty value in a list means empty list - case TimestampedEmptyJson(timestamp) => TimestampedJsList(Map.empty, timestamp) - case nonEmptyValue => - /* - * This creates a (possibly nested) list, by folding over the indexes. - * The resulting list will be as deep as there are elements in "indexes" - * First we create the deepest list, that will contain innerValue (the actual value we want in the list) - * e.g with l[1][2] = "a". indexes will be List[1, 2]. innerValue will be "a". - * innerList is TimestampedJsList(Map(2 -> "a"), [timestamp of a]) - */ - val innerList = TimestampedJsList(TreeMap(indexes.last.toInt -> innerValue), nonEmptyValue.timestamp) - /* Then, stating with this innerList, we wrap around it as many lists as (indexes.length - 1) (because we used the last index for the innerValue above) - * Continuing with this example, result will be TimestampedJsList(Map(1 -> TimestampedJsList(Map(2 -> "a")))) - */ - indexes.init.foldRight(innerList)((index, acc) => { - TimestampedJsList(TreeMap(index.toInt -> acc), acc.timestamp) - }) - } - - TimestampedJsObject(Map(name -> list), list.timestamp) - } - } - } - private case class ObjectElement(name: String) extends KeyElement { - def toIndexedJson(value: TimestampedJsValue) = TimestampedJsObject(Map(name -> value), value.timestamp) - } - - private def parseKeyChunk(chunk: String): KeyElement = { - startMatcher.findFirstMatchIn(chunk) match { - case Some(listNameRegex) => - val indexes = bracketMatcher.findAllMatchIn(chunk).map(_.group(1)).toList - ListElement(listNameRegex.group(1), indexes) - case _ => ObjectElement(chunk) - } - } - - private def metadataValueToIndexedJson(value: Option[MetadataValue], timestamp: OffsetDateTime): TimestampedJsValue = { - value map { someValue => - val coerced: Try[TimestampedJsPrimitive] = someValue.valueType match { - case MetadataInt => Try(new TimestampedJsPrimitive(JsNumber(someValue.value.toInt), timestamp)) - case MetadataNumber => Try(new TimestampedJsPrimitive(JsNumber(someValue.value.toDouble), timestamp)) - case MetadataBoolean => Try(new TimestampedJsPrimitive(JsBoolean(someValue.value.toBoolean), timestamp)) - case MetadataString => Try(new TimestampedJsPrimitive(JsString(someValue.value), timestamp)) - } - - coerced match { - case Success(v) => v - case Failure(e) => - log.warn(s"Failed to coerce ${someValue.value} to ${someValue.valueType}. Falling back to String.", e) - new TimestampedJsPrimitive(JsString(someValue.value), timestamp) - } - } getOrElse TimestampedEmptyJson(timestamp) - } - - private def keyValueToIndexedJson(str: String, value: Option[MetadataValue], timestamp: OffsetDateTime): TimestampedJsValue = { - val innerValue: TimestampedJsValue = metadataValueToIndexedJson(value, timestamp) - str.split(KeySeparator).foldRight(innerValue)((chunk, acc) => { parseKeyChunk(chunk).toIndexedJson(acc) }) - } - + /** + * Metadata for a call attempt + */ private case class MetadataForAttempt(attempt: Int, metadata: JsObject) - /** There's one TimestampedJsValue per attempt, hence the list. */ - private case class MetadataForIndex(index: Int, metadata: List[JsObject]) - implicit val dateTimeOrdering: Ordering[OffsetDateTime] = scala.Ordering.fromLessThan(_ isBefore _) - - /** Sort events by timestamp, transform them into TimestampedJsValues, and merge them together. */ - private def eventsToIndexedJson(events: Seq[MetadataEvent]): TimestampedJsValue = { - // The `List` has a `Foldable` instance defined in scope, and because the `List`'s elements have a `Monoid` instance - // defined in scope, `suml` can derive a sane `TimestampedJsValue` value even if the `List` of events is empty. - events.toList map { e => keyValueToIndexedJson(e.key.key, e.value, e.offsetDateTime) } suml - } + /** + * Metadata objects of all attempts for one shard + */ + private case class MetadataForIndex(index: Int, metadata: List[JsObject]) - private def eventsToAttemptMetadata(attempt: Int, events: Seq[MetadataEvent]) = { - val withAttemptField = JsObject(eventsToIndexedJson(events).toJson.asJsObject.fields + (AttemptKey -> JsNumber(attempt))) + private def eventsToAttemptMetadata(subWorkflowMetadata: Map[String, JsValue])(attempt: Int, events: Seq[MetadataEvent]) = { + val withAttemptField = JsObject(MetadataComponent(events, subWorkflowMetadata).toJson.asJsObject.fields + (AttemptKey -> JsNumber(attempt))) MetadataForAttempt(attempt, withAttemptField) } @@ -148,61 +68,85 @@ object MetadataBuilderActor { MetadataForIndex(index.getOrElse(-1), metadata) } - private def reduceWorkflowEvents(workflowEvents: Seq[MetadataEvent]): Seq[MetadataEvent] = { - // This handles state specially so a sensible final value is returned irrespective of the order in which raw state - // events were recorded in the journal. - val (workflowStatusEvents, workflowNonStatusEvents) = workflowEvents partition(_.key.key == WorkflowMetadataKeys.Status) - - val ordering = implicitly[Ordering[WorkflowState]] - // This orders by value in WorkflowState CRDT resolution, not necessarily the chronologically most recent state. - val sortedStateEvents = workflowStatusEvents.filter(_.value.isDefined) sortWith { case (a, b) => ordering.gt(a.value.get.toWorkflowState, b.value.get.toWorkflowState) } - workflowNonStatusEvents ++ sortedStateEvents.headOption.toList - } - - private def parseWorkflowEventsToTimestampedJsValue(events: Seq[MetadataEvent], includeCallsIfEmpty: Boolean): JsObject = { - // Partition if sequence of events in a pair of (Workflow level events, Call level events) + private def buildMetadataJson(events: Seq[MetadataEvent], includeCallsIfEmpty: Boolean, expandedValues: Map[String, JsValue]): JsObject = { + // Partition events into workflow level and call level events val (workflowLevel, callLevel) = events partition { _.key.jobKey.isEmpty } - val foldedWorkflowValues = eventsToIndexedJson(reduceWorkflowEvents(workflowLevel)).toJson.asJsObject - + val workflowLevelJson = MetadataComponent(workflowLevel, Map.empty).toJson.asJsObject + + /* + * Map( + * "fqn" -> Seq[Events], + * "fqn2" -> Seq[Events], + * ... + * ) + * Note that groupBy will preserve the ordering of the events in the Seq, which means that as long as the DB sorts them by timestamp, we can always assume the last one is the newest one. + * This is guaranteed by the groupBy invariant and the fact that filter preservers the ordering. (See scala doc for groupBy and filter) + */ val callsGroupedByFQN = callLevel groupBy { _.key.jobKey.get.callFqn } + /* + * Map( + * "fqn" -> Map( //Shard index + * Option(0) -> Seq[Events], + * Option(1) -> Seq[Events] + * ... + * ), + * ... + * ) + */ val callsGroupedByFQNAndIndex = callsGroupedByFQN mapValues { _ groupBy { _.key.jobKey.get.index } } + /* + * Map( + * "fqn" -> Map( + * Option(0) -> Map( //Attempt + * 1 -> Seq[Events], + * 2 -> Seq[Events], + * ... + * ), + * ... + * ), + * ... + * ) + */ val callsGroupedByFQNAndIndexAndAttempt = callsGroupedByFQNAndIndex mapValues { _ mapValues { _ groupBy { _.key.jobKey.get.attempt } } } - val callsMap = callsGroupedByFQNAndIndexAndAttempt mapValues { eventsForIndex => - eventsForIndex mapValues { eventsForAttempt => - eventsForAttempt map Function.tupled(eventsToAttemptMetadata) - } map { Function.tupled(attemptMetadataToIndexMetadata) } - } mapValues { md => JsArray(md.toVector.sortBy(_.index) flatMap { _.metadata }) } + val eventsToAttemptFunction = Function.tupled(eventsToAttemptMetadata(expandedValues) _) + val attemptToIndexFunction = (attemptMetadataToIndexMetadata _).tupled + + val callsMap = callsGroupedByFQNAndIndexAndAttempt mapValues { _ mapValues { _ map eventsToAttemptFunction } map attemptToIndexFunction } mapValues { md => + JsArray(md.toVector.sortBy(_.index) flatMap { _.metadata }) + } val wrappedCalls = JsObject(Map(WorkflowMetadataKeys.Calls -> JsObject(callsMap))) val callData = if (callsMap.isEmpty && !includeCallsIfEmpty) Nil else wrappedCalls.fields - JsObject(foldedWorkflowValues.fields ++ callData) + JsObject(workflowLevelJson.fields ++ callData) } - private def parseWorkflowEvents(includeCallsIfEmpty: Boolean)(events: Seq[MetadataEvent]): JsObject = parseWorkflowEventsToTimestampedJsValue(events, includeCallsIfEmpty) + private def parseWorkflowEvents(includeCallsIfEmpty: Boolean, expandedValues: Map[String, JsValue])(events: Seq[MetadataEvent]): JsObject = { + buildMetadataJson(events, includeCallsIfEmpty, expandedValues) + } /** * Parse a Seq of MetadataEvent into a full Json metadata response. */ - private def parse(events: Seq[MetadataEvent]): JsObject = { - JsObject(events.groupBy(_.key.workflowId.toString) mapValues parseWorkflowEvents(includeCallsIfEmpty = true)) + private def parse(events: Seq[MetadataEvent], expandedValues: Map[String, JsValue]): JsObject = { + JsObject(events.groupBy(_.key.workflowId.toString) mapValues parseWorkflowEvents(includeCallsIfEmpty = true, expandedValues)) } - implicit class EnhancedMetadataValue(val value: MetadataValue) extends AnyVal { - def toWorkflowState: WorkflowState = WorkflowState.fromString(value.value) - } + def uniqueActorName: String = List("MetadataBuilderActor", UUID.randomUUID()).mkString("-") } -class MetadataBuilderActor(serviceRegistryActor: ActorRef) extends LoggingFSM[MetadataBuilderActorState, Unit] - with DefaultJsonProtocol with WorkflowQueryPagination { +class MetadataBuilderActor(serviceRegistryActor: ActorRef) extends LoggingFSM[MetadataBuilderActorState, Option[MetadataBuilderActorData]] + with DefaultJsonProtocol { + import MetadataBuilderActor._ - import WorkflowJsonSupport._ + private var target: ActorRef = ActorRef.noSender - startWith(Idle, ()) + startWith(Idle, None) val tag = self.path.name when(Idle) { case Event(action: MetadataServiceAction, _) => + target = sender() serviceRegistryActor ! action goto(WaitingForMetadataService) } @@ -213,48 +157,100 @@ class MetadataBuilderActor(serviceRegistryActor: ActorRef) extends LoggingFSM[Me } when(WaitingForMetadataService) { - case Event(MetadataLookupResponse(query, metadata), _) => - context.parent ! RequestComplete(StatusCodes.OK, processMetadataResponse(query, metadata)) - allDone case Event(StatusLookupResponse(w, status), _) => - context.parent ! RequestComplete(StatusCodes.OK, processStatusResponse(w, status)) - allDone - case Event(failure: ServiceRegistryFailure, _) => - val response = APIResponse.fail(new RuntimeException("Can't find metadata service")) - context.parent ! RequestComplete(StatusCodes.InternalServerError, response) - allDone - case Event(WorkflowQuerySuccess(uri: Uri, response, metadata), _) => - context.parent ! RequestCompleteWithHeaders(response, generateLinkHeaders(uri, metadata):_*) - allDone - case Event(failure: WorkflowQueryFailure, _) => - context.parent ! RequestComplete(StatusCodes.BadRequest, APIResponse.fail(failure.reason)) + target ! BuiltMetadataResponse(processStatusResponse(w, status)) allDone case Event(WorkflowOutputsResponse(id, events), _) => // Add in an empty output event if there aren't already any output events. val hasOutputs = events exists { _.key.key.startsWith(WorkflowMetadataKeys.Outputs + ":") } val updatedEvents = if (hasOutputs) events else MetadataEvent.empty(MetadataKey(id, None, WorkflowMetadataKeys.Outputs)) +: events - context.parent ! RequestComplete(StatusCodes.OK, workflowMetadataResponse(id, updatedEvents, includeCallsIfEmpty = false)) + target ! BuiltMetadataResponse(workflowMetadataResponse(id, updatedEvents, includeCallsIfEmpty = false, Map.empty)) allDone case Event(LogsResponse(w, l), _) => - context.parent ! RequestComplete(StatusCodes.OK, workflowMetadataResponse(w, l, includeCallsIfEmpty = false)) + target ! BuiltMetadataResponse(workflowMetadataResponse(w, l, includeCallsIfEmpty = false, Map.empty)) + allDone + case Event(MetadataLookupResponse(query, metadata), None) => processMetadataResponse(query, metadata) + case Event(_: ServiceRegistryFailure, _) => + target ! FailedMetadataResponse(new RuntimeException("Can't find metadata service")) allDone case Event(failure: MetadataServiceFailure, _) => - context.parent ! RequestComplete(StatusCodes.InternalServerError, APIResponse.error(failure.reason)) + target ! FailedMetadataResponse(failure.reason) allDone case Event(unexpectedMessage, stateData) => - val response = APIResponse.fail(new RuntimeException(s"MetadataBuilderActor $tag(WaitingForMetadataService, $stateData) got an unexpected message: $unexpectedMessage")) - context.parent ! RequestComplete(StatusCodes.InternalServerError, response) + target ! FailedMetadataResponse(new RuntimeException(s"MetadataBuilderActor $tag(WaitingForMetadataService, $stateData) got an unexpected message: $unexpectedMessage")) context stop self stay() } - def processMetadataResponse(query: MetadataQuery, eventsList: Seq[MetadataEvent]): JsObject = { + when(WaitingForSubWorkflows) { + case Event(mbr: MetadataBuilderActorResponse, Some(data)) => + processSubWorkflowMetadata(mbr, data) + } + + whenUnhandled { + case Event(message, data) => + log.error(s"Received unexpected message $message in state $stateName with data $data") + stay() + } + + def processSubWorkflowMetadata(metadataResponse: MetadataBuilderActorResponse, data: MetadataBuilderActorData) = { + metadataResponse match { + case BuiltMetadataResponse(js) => + js.fields.get(WorkflowMetadataKeys.Id) match { + case Some(subId: JsString) => + val newData = data.withSubWorkflow(subId.value, js) + + if (newData.isComplete) { + buildAndStop(data.originalQuery, data.originalEvents, newData.subWorkflowsMetadata) + } else { + stay() using Option(newData) + } + case _ => failAndDie(new RuntimeException("Received unexpected response while waiting for sub workflow metadata.")) + } + case FailedMetadataResponse(e) => failAndDie(new RuntimeException("Failed to retrieve metadata for a sub workflow.", e)) + } + } + + def failAndDie(reason: Throwable) = { + target ! FailedMetadataResponse(reason) + context stop self + stay() + } + + def buildAndStop(query: MetadataQuery, eventsList: Seq[MetadataEvent], expandedValues: Map[String, JsValue]) = { + target ! BuiltMetadataResponse(processMetadataEvents(query, eventsList, expandedValues)) + allDone + } + + def processMetadataResponse(query: MetadataQuery, eventsList: Seq[MetadataEvent]) = { + if (query.expandSubWorkflows) { + // Scan events for sub workflow ids + val subWorkflowIds = eventsList.collect({ + case MetadataEvent(key, value, _) if key.key.endsWith(CallMetadataKeys.SubWorkflowId) => value map { _.value } + }).flatten + + // If none is found just proceed to build metadata + if (subWorkflowIds.isEmpty) buildAndStop(query, eventsList, Map.empty) + else { + // Otherwise spin up a metadata builder actor for each sub workflow + subWorkflowIds foreach { subId => + val subMetadataBuilder = context.actorOf(MetadataBuilderActor.props(serviceRegistryActor), uniqueActorName) + subMetadataBuilder ! GetMetadataQueryAction(query.copy(workflowId = WorkflowId.fromString(subId))) + } + goto(WaitingForSubWorkflows) using Option(MetadataBuilderActorData(query, eventsList, Map.empty, subWorkflowIds.size)) + } + } else { + buildAndStop(query, eventsList, Map.empty) + } + } + + def processMetadataEvents(query: MetadataQuery, eventsList: Seq[MetadataEvent], expandedValues: Map[String, JsValue]): JsObject = { // Should we send back some message ? Or even fail the request instead ? if (eventsList.isEmpty) JsObject(Map.empty[String, JsValue]) else { query match { - case MetadataQuery(w, _, _, _, _) => workflowMetadataResponse(w, eventsList) - case _ => MetadataBuilderActor.parse(eventsList) + case MetadataQuery(w, _, _, _, _, _) => workflowMetadataResponse(w, eventsList, includeCallsIfEmpty = true, expandedValues) + case _ => MetadataBuilderActor.parse(eventsList, expandedValues) } } } @@ -266,7 +262,10 @@ class MetadataBuilderActor(serviceRegistryActor: ActorRef) extends LoggingFSM[Me )) } - private def workflowMetadataResponse(workflowId: WorkflowId, eventsList: Seq[MetadataEvent], includeCallsIfEmpty: Boolean = true) = { - JsObject(MetadataBuilderActor.parseWorkflowEvents(includeCallsIfEmpty)(eventsList).fields + ("id" -> JsString(workflowId.toString))) + private def workflowMetadataResponse(workflowId: WorkflowId, + eventsList: Seq[MetadataEvent], + includeCallsIfEmpty: Boolean, + expandedValues: Map[String, JsValue]): JsObject = { + JsObject(MetadataBuilderActor.parseWorkflowEvents(includeCallsIfEmpty, expandedValues)(eventsList).fields + ("id" -> JsString(workflowId.toString))) } } diff --git a/engine/src/main/scala/cromwell/webservice/metadata/MetadataComponent.scala b/engine/src/main/scala/cromwell/webservice/metadata/MetadataComponent.scala new file mode 100644 index 000000000..ee70d412a --- /dev/null +++ b/engine/src/main/scala/cromwell/webservice/metadata/MetadataComponent.scala @@ -0,0 +1,156 @@ +package cromwell.webservice.metadata + +import cats.{Monoid, Semigroup} +import cats.instances.map._ +import cats.instances.list._ +import cats.syntax.foldable._ +import cromwell.core.{ExecutionStatus, WorkflowMetadataKeys, WorkflowState} +import cromwell.services.metadata._ +import spray.json.{JsArray, _} + +import scala.collection.immutable.TreeMap +import scala.language.postfixOps +import scala.util.{Random, Try} + +object MetadataComponent { + implicit val MetadataComponentMonoid: Monoid[MetadataComponent] = new Monoid[MetadataComponent] { + private lazy val stringKeyMapSg = implicitly[Semigroup[Map[String, MetadataComponent]]] + private lazy val intKeyMapSg = implicitly[Semigroup[Map[Int, MetadataComponent]]] + + def combine(f1: MetadataComponent, f2: MetadataComponent): MetadataComponent = { + (f1, f2) match { + case (MetadataObject(v1), MetadataObject(v2)) => MetadataObject(stringKeyMapSg.combine(v1, v2)) + case (MetadataList(v1), MetadataList(v2)) => MetadataList(intKeyMapSg.combine(v1, v2)) + // If there's a custom ordering, use it + case (v1 @ MetadataPrimitive(_, Some(o1)), v2 @ MetadataPrimitive(_, Some(o2))) if o1 == o2 => o1.max(v1, v2) + // Otherwise assume it's ordered by default and take the new one + case (_, o2) => o2 + } + } + + override def empty: MetadataComponent = MetadataObject.empty + } + + val metadataPrimitiveJsonWriter: JsonWriter[MetadataPrimitive] = JsonWriter.func2Writer[MetadataPrimitive] { + case MetadataPrimitive(MetadataValue(value, MetadataInt), _) => Try(value.toInt) map JsNumber.apply getOrElse JsString(value) + case MetadataPrimitive(MetadataValue(value, MetadataNumber), _) => Try(value.toDouble) map JsNumber.apply getOrElse JsString(value) + case MetadataPrimitive(MetadataValue(value, MetadataBoolean), _) => Try(value.toBoolean) map JsBoolean.apply getOrElse JsString(value) + case MetadataPrimitive(MetadataValue(value, MetadataString), _) => JsString(value) + } + + implicit val metadataComponentJsonWriter: JsonWriter[MetadataComponent] = JsonWriter.func2Writer[MetadataComponent] { + case MetadataList(values) => JsArray(values.values.toVector map { _.toJson(this.metadataComponentJsonWriter) }) + case MetadataObject(values) => JsObject(values.mapValues(_.toJson(this.metadataComponentJsonWriter))) + case primitive: MetadataPrimitive => metadataPrimitiveJsonWriter.write(primitive) + case MetadataEmpty => JsObject.empty + case MetadataNull => JsNull + case MetadataJsonComponent(jsValue) => jsValue + } + + /* ******************************* */ + /* *** Metadata Events Parsing *** */ + /* ******************************* */ + + private val KeySeparator = MetadataKey.KeySeparator + // Split on every unescaped KeySeparator + val KeySplitter = s"(? MetadataObject(Map(chunk -> innerValue)) + // If there's a bracket it's a named list. e.g.: "executionEvents[0][1]" + case bracketIndex => + // Name: "executionEvents" + val objectName = chunk.substring(0, bracketIndex) + + // Empty value means empty list + if (innerValue == MetadataEmpty) MetadataObject(Map(objectName -> MetadataList.empty)) + else { + // Brackets: "[0][1]" + val brackets = chunk.substring(bracketIndex) + // Indices as a list: List(0, 1) + val listIndices = for { + m <- bracketMatcher.findAllMatchIn(brackets) + // It's possible for a bracket pair to be empty, in which case we just give it a random number + asInt = if (m.group(1).isEmpty) Random.nextInt() else m.group(1).toInt + } yield asInt + // Fold into a MetadataList: MetadataList(0 -> MetadataList(1 -> innerValue)) + val metadataList = listIndices.toList.foldRight(innerValue)((index, acc) => MetadataList(TreeMap(index -> acc))) + + MetadataObject(Map(objectName -> metadataList)) + } + } + } + + private def customOrdering(event: MetadataEvent): Option[Ordering[MetadataPrimitive]] = event match { + case MetadataEvent(MetadataKey(_, Some(_), key), _, _) if key == CallMetadataKeys.ExecutionStatus => Option(MetadataPrimitive.ExecutionStatusOrdering) + case MetadataEvent(MetadataKey(_, None, key), _, _) if key == WorkflowMetadataKeys.Status => Option(MetadataPrimitive.WorkflowStateOrdering) + case _ => None + } + + private def toMetadataComponent(subWorkflowMetadata: Map[String, JsValue])(event: MetadataEvent) = { + lazy val primitive = event.value map { MetadataPrimitive(_, customOrdering(event)) } getOrElse MetadataEmpty + lazy val originalKeyAndPrimitive = (event.key.key, primitive) + + val keyAndPrimitive: (String, MetadataComponent) = if (event.key.key.endsWith(CallMetadataKeys.SubWorkflowId)) { + (for { + metadataValue <- event.value + subWorkflowMetadata <- subWorkflowMetadata.get(metadataValue.value) + keyWithSubWorkflowMetadata = event.key.key.replace(CallMetadataKeys.SubWorkflowId, CallMetadataKeys.SubWorkflowMetadata) + subWorkflowComponent = MetadataJsonComponent(subWorkflowMetadata) + } yield (keyWithSubWorkflowMetadata, subWorkflowComponent)) getOrElse originalKeyAndPrimitive + } else originalKeyAndPrimitive + + fromMetadataKeyAndPrimitive(keyAndPrimitive._1, keyAndPrimitive._2) + } + + /** Sort events by timestamp, transform them into MetadataComponent, and merge them together. */ + def apply(events: Seq[MetadataEvent], subWorkflowMetadata: Map[String, JsValue] = Map.empty): MetadataComponent = { + // The `List` has a `Foldable` instance defined in scope, and because the `List`'s elements have a `Monoid` instance + // defined in scope, `combineAll` can derive a sane `TimestampedJsValue` value even if the `List` of events is empty. + events.toList map toMetadataComponent(subWorkflowMetadata) combineAll + } + + def fromMetadataKeyAndPrimitive(metadataKey: String, innerComponent: MetadataComponent) = { + import MetadataKey._ + metadataKey.split(KeySplitter).map(_.unescapeMeta).foldRight(innerComponent)(parseKeyChunk) + } +} + +sealed trait MetadataComponent +case object MetadataEmpty extends MetadataComponent +case object MetadataNull extends MetadataComponent + +// Metadata Object +object MetadataObject { + def empty = new MetadataObject(Map.empty) + def apply(kvPair: (String, MetadataComponent)*) = { + new MetadataObject(kvPair.toMap) + } +} + +case class MetadataObject(v: Map[String, MetadataComponent]) extends MetadataComponent + +// Metadata List +object MetadataList { + def empty = new MetadataList(Map.empty) + def apply(components: List[MetadataComponent]) = new MetadataList(components.zipWithIndex.map({case (c, i) => i -> c}).toMap) +} +case class MetadataList(v: Map[Int, MetadataComponent]) extends MetadataComponent + +// Metadata Primitive +object MetadataPrimitive { + val ExecutionStatusOrdering: Ordering[MetadataPrimitive] = Ordering.by { primitive: MetadataPrimitive => + ExecutionStatus.withName(primitive.v.value) + } + + val WorkflowStateOrdering: Ordering[MetadataPrimitive] = Ordering.by { primitive: MetadataPrimitive => + WorkflowState.withName(primitive.v.value) + } +} +case class MetadataPrimitive(v: MetadataValue, customOrdering: Option[Ordering[MetadataPrimitive]] = None) extends MetadataComponent + +// Metadata Component that owns an already computed JsValue +case class MetadataJsonComponent(jsValue: JsValue) extends MetadataComponent diff --git a/engine/src/main/scala/cromwell/webservice/metadata/WorkflowQueryPagination.scala b/engine/src/main/scala/cromwell/webservice/metadata/WorkflowQueryPagination.scala index 8f6650e88..9ae121067 100644 --- a/engine/src/main/scala/cromwell/webservice/metadata/WorkflowQueryPagination.scala +++ b/engine/src/main/scala/cromwell/webservice/metadata/WorkflowQueryPagination.scala @@ -1,33 +1,36 @@ package cromwell.webservice.metadata +import akka.http.scaladsl.model.Uri.Query +import akka.http.scaladsl.model.headers.{Link, LinkParams} +import akka.http.scaladsl.model.{HttpHeader, Uri} import cromwell.services.metadata.MetadataService.QueryMetadata -import spray.http.HttpHeaders.Link -import spray.http.{HttpHeader, Uri} + /** * Attempts to add query parameters for pagination. * - * NOTE: This trait is effectively broken, as the returned links are not suitable for use by cromwell clients. + * NOTE: This is effectively broken, as the returned links are not suitable for use by cromwell clients. * - * The trait discards the search parameters for GETs, for example it drops parameters such as "start" and "end". Also + * This discards the search parameters for GETs, for example it drops parameters such as "start" and "end". Also * generates links incompatible with POSTs, as the endpoints read parameters from the HTTP body during POST, __not__ * from the URI. * - * This trait may need to receive an entire `spray.http.HttpRequest` and not just the `spray.http.Uri` to ensure that - * it doesn't generate links for POST. + * This may need to receive an entire `HttpRequest` and not just the `Uri` to ensure that it doesn't generate links for POST. * * The existing `CromwellApiServiceSpec` should be updated to verify the expected behavior for both GET and POST. * * Left behind for legacy reasons, but don't believe anyone has ever used these non-functional links. + * + * Note: As of 6/7/17 the above is confirmed by JG, but leaving it mostly as-is for now */ -trait WorkflowQueryPagination { +object WorkflowQueryPagination { - protected def generatePaginationParams(page: Int, pageSize: Int): String = { - s"page=$page&pagesize=$pageSize" + private def generatePaginationParams(page: Int, pageSize: Int): Query = { + Query(s"page=$page&pagesize=$pageSize") } //Generates link headers for pagination navigation https://tools.ietf.org/html/rfc5988#page-6 - protected def generateLinkHeaders(uri: Uri, metadata: Option[QueryMetadata]): Seq[HttpHeader] = { + def generateLinkHeaders(uri: Uri, metadata: Option[QueryMetadata]): List[HttpHeader] = { //strip off the query params val baseUrl = uri.scheme + ":" + uri.authority + uri.path metadata match { @@ -35,22 +38,21 @@ trait WorkflowQueryPagination { (meta.page, meta.pageSize) match { case (Some(p), Some(ps)) => - val firstLink = Link(Uri(baseUrl).withQuery(generatePaginationParams(1, ps)), Link.first) + val firstLink = Link(Uri(baseUrl).withQuery(generatePaginationParams(1, ps)), LinkParams.first) val prevPage = math.max(p - 1, 1) - val prevLink = Link(Uri(baseUrl).withQuery(generatePaginationParams(prevPage, ps)), Link.prev) + val prevLink = Link(Uri(baseUrl).withQuery(generatePaginationParams(prevPage, ps)), LinkParams.prev) val lastPage = math.ceil(meta.totalRecords.getOrElse(1).toDouble / ps.toDouble).toInt - val lastLink = Link(Uri(baseUrl).withQuery(generatePaginationParams(lastPage, ps)), Link.last) + val lastLink = Link(Uri(baseUrl).withQuery(generatePaginationParams(lastPage, ps)), LinkParams.last) val nextPage = math.min(p + 1, lastPage) - val nextLink = Link(Uri(baseUrl).withQuery(generatePaginationParams(nextPage, ps)), Link.next) - - Seq(firstLink, prevLink, nextLink, lastLink) + val nextLink = Link(Uri(baseUrl).withQuery(generatePaginationParams(nextPage, ps)), LinkParams.next) - case _ => Seq() + List(firstLink, prevLink, nextLink, lastLink) + case _ => List.empty } - case None => Seq() + case None => List.empty } } } diff --git a/engine/src/main/scala/cromwell/webservice/package.scala b/engine/src/main/scala/cromwell/webservice/package.scala index 1fd8ec1d4..5b22ab108 100644 --- a/engine/src/main/scala/cromwell/webservice/package.scala +++ b/engine/src/main/scala/cromwell/webservice/package.scala @@ -1,42 +1,5 @@ package cromwell package object webservice { - case class QueryParameter(key: String, value: String) type QueryParameters = Seq[QueryParameter] - - object Patterns { - val WorkflowName = """ - (?x) # Turn on comments and whitespace insensitivity. - - ( # Begin capture. - - [a-zA-Z][a-zA-Z0-9_]* # WDL identifier naming pattern of an initial alpha character followed by zero - # or more alphanumeric or underscore characters. - - ) # End capture. - """.trim.r - - val CallFullyQualifiedName = """ - (?x) # Turn on comments and whitespace insensitivity. - - ( # Begin outer capturing group for FQN. - - (?:[a-zA-Z][a-zA-Z0-9_]*) # Inner noncapturing group for top-level workflow name. This is the WDL - # identifier naming pattern of an initial alpha character followed by zero - # or more alphanumeric or underscore characters. - - (?:\.[a-zA-Z][a-zA-Z0-9_]*){1} # Inner noncapturing group for call name, a literal dot followed by a WDL - # identifier. Currently this is quantified to {1} since the call name is - # mandatory and nested workflows are not supported. This could be changed - # to + or a different quantifier if these assumptions change. - - ) # End outer capturing group for FQN. - - - (?: # Begin outer noncapturing group for shard. - \. # Literal dot. - (\d+) # Captured shard digits. - )? # End outer optional noncapturing group for shard. - """.trim.r // The trim is necessary as (?x) must be at the beginning of the regex. - } } diff --git a/engine/src/main/scala/cromwell/webservice/webservice_.scala b/engine/src/main/scala/cromwell/webservice/webservice_.scala new file mode 100644 index 000000000..d68ba0bdb --- /dev/null +++ b/engine/src/main/scala/cromwell/webservice/webservice_.scala @@ -0,0 +1,39 @@ +package cromwell.webservice + +case class QueryParameter(key: String, value: String) + +object Patterns { + val WorkflowName = """ + (?x) # Turn on comments and whitespace insensitivity. + + ( # Begin capture. + + [a-zA-Z][a-zA-Z0-9_]* # WDL identifier naming pattern of an initial alpha character followed by zero + # or more alphanumeric or underscore characters. + + ) # End capture. + """.trim.r + + val CallFullyQualifiedName = """ + (?x) # Turn on comments and whitespace insensitivity. + + ( # Begin outer capturing group for FQN. + + (?:[a-zA-Z][a-zA-Z0-9_]*) # Inner noncapturing group for top-level workflow name. This is the WDL + # identifier naming pattern of an initial alpha character followed by zero + # or more alphanumeric or underscore characters. + + (?:\.[a-zA-Z][a-zA-Z0-9_]*){1} # Inner noncapturing group for call name, a literal dot followed by a WDL + # identifier. Currently this is quantified to {1} since the call name is + # mandatory and nested workflows are not supported. This could be changed + # to + or a different quantifier if these assumptions change. + + ) # End outer capturing group for FQN. + + + (?: # Begin outer noncapturing group for shard. + \. # Literal dot. + (\d+) # Captured shard digits. + )? # End outer optional noncapturing group for shard. + """.trim.r // The trim is necessary as (?x) must be at the beginning of the regex. +} diff --git a/engine/src/test/resources/swagger/testservice.json b/engine/src/test/resources/swagger/testservice.json new file mode 100644 index 000000000..3ef100ba2 --- /dev/null +++ b/engine/src/test/resources/swagger/testservice.json @@ -0,0 +1,29 @@ +{ + "swagger": "2.0", + "info": { + "title": "Test Service API", + "description": "Test Service API", + "version": "1.2.3" + }, + "produces": [ + "application/json" + ], + "paths": { + "/hello": { + "get": { + "responses": { + "200": { + "description": "Says hello via get" + } + } + }, + "post": { + "responses": { + "200": { + "description": "Says hello via post" + } + } + } + } + } +} diff --git a/engine/src/test/resources/swagger/testservice.yaml b/engine/src/test/resources/swagger/testservice.yaml new file mode 100644 index 000000000..186a0b913 --- /dev/null +++ b/engine/src/test/resources/swagger/testservice.yaml @@ -0,0 +1,17 @@ +swagger: '2.0' +info: + title: Test Service API + description: Test Service API + version: 1.2.3 +produces: + - application/json +paths: + /hello: + get: + responses: + '200': + description: Says hello via get + post: + responses: + '200': + description: Says hello via post diff --git a/engine/src/test/scala/cromwell/ArrayOfArrayCoercionSpec.scala b/engine/src/test/scala/cromwell/ArrayOfArrayCoercionSpec.scala index 06f437de6..e02b87c4c 100644 --- a/engine/src/test/scala/cromwell/ArrayOfArrayCoercionSpec.scala +++ b/engine/src/test/scala/cromwell/ArrayOfArrayCoercionSpec.scala @@ -1,13 +1,12 @@ package cromwell import akka.testkit._ -import wdl4s.types.{WdlArrayType, WdlStringType} -import wdl4s.values.{WdlArray, WdlString} +import wdl4s.wdl.types.{WdlArrayType, WdlStringType} +import wdl4s.wdl.values.{WdlArray, WdlString} import cromwell.util.SampleWdl -import scala.language.postfixOps -class ArrayOfArrayCoercionSpec extends CromwellTestkitSpec { +class ArrayOfArrayCoercionSpec extends CromwellTestKitWordSpec { "A workflow that has an Array[Array[File]] input " should { "accept an Array[Array[String]] as the value for the input" in { runWdlAndAssertOutputs( diff --git a/engine/src/test/scala/cromwell/ArrayWorkflowSpec.scala b/engine/src/test/scala/cromwell/ArrayWorkflowSpec.scala index dada6157f..51f998e72 100644 --- a/engine/src/test/scala/cromwell/ArrayWorkflowSpec.scala +++ b/engine/src/test/scala/cromwell/ArrayWorkflowSpec.scala @@ -1,46 +1,27 @@ package cromwell -import java.nio.file.Files -import java.util.UUID - import akka.testkit._ -import better.files._ +import cromwell.core.path.DefaultPathBuilder import cromwell.util.SampleWdl -import wdl4s.NamespaceWithWorkflow -import wdl4s.expression.NoFunctions -import wdl4s.types.{WdlArrayType, WdlFileType, WdlStringType} -import wdl4s.values.{WdlArray, WdlFile, WdlInteger, WdlString} - -import scala.language.postfixOps +import wdl4s.wdl.expression.NoFunctions +import wdl4s.wdl.types.{WdlArrayType, WdlFileType, WdlStringType} +import wdl4s.wdl.values.{WdlArray, WdlFile, WdlString} +import wdl4s.wdl.{ImportResolver, WdlNamespaceWithWorkflow} -class ArrayWorkflowSpec extends CromwellTestkitSpec { - val tmpDir = Files.createTempDirectory("ArrayWorkflowSpec") - val ns = NamespaceWithWorkflow.load(SampleWdl.ArrayLiteral(tmpDir).wdlSource("")) +class ArrayWorkflowSpec extends CromwellTestKitWordSpec { + val tmpDir = DefaultPathBuilder.createTempDirectory("ArrayWorkflowSpec") + val ns = WdlNamespaceWithWorkflow.load(SampleWdl.ArrayLiteral(tmpDir).workflowSource(), Seq.empty[ImportResolver]).get val expectedArray = WdlArray(WdlArrayType(WdlFileType), Seq(WdlFile("f1"), WdlFile("f2"), WdlFile("f3"))) - "A task which contains a parameter " should { - "accept an array for the value" in { - runWdlAndAssertOutputs( - sampleWdl = SampleWdl.ArrayIO, - eventFilter = EventFilter.info(pattern = "Workflow complete", occurrences = 1), - expectedOutputs = Map( - "wf.count_lines.count" -> WdlInteger(3), - "wf.count_lines_array.count" -> WdlInteger(3), - "wf.serialize.contents" -> WdlString("str1\nstr2\nstr3") - ) - ) - } - } - "A static Array[File] declaration" should { "be a valid declaration" in { - val declaration = ns.workflow.declarations.find {_.name == "arr"}.getOrElse { + val declaration = ns.workflow.declarations.find {_.unqualifiedName == "arr"}.getOrElse { fail("Expected declaration 'arr' to be found") } val expression = declaration.expression.getOrElse { fail("Expected an expression for declaration 'arr'") } - val value = expression.evaluate((s:String) => fail("No lookups"), NoFunctions).getOrElse { + val value = expression.evaluate((_: String) => fail("No lookups"), NoFunctions).getOrElse { fail("Expected expression for 'arr' to evaluate") } value shouldEqual WdlArray(WdlArrayType(WdlStringType), Seq(WdlString("f1"), WdlString("f2"), WdlString("f3"))) @@ -49,7 +30,7 @@ class ArrayWorkflowSpec extends CromwellTestkitSpec { val catTask = ns.findTask("cat").getOrElse { fail("Expected to find task 'cat'") } - val command = catTask.instantiateCommand(Map("files" -> expectedArray), NoFunctions).getOrElse { + val command = catTask.instantiateCommand(catTask.inputsFromMap(Map("cat.files" -> expectedArray)), NoFunctions).getOrElse { fail("Expected instantiation to work") } command shouldEqual "cat -s f1 f2 f3" @@ -65,9 +46,8 @@ class ArrayWorkflowSpec extends CromwellTestkitSpec { ) ) ) - val uuid = UUID.randomUUID() - val pwd = File(".") - val sampleWdl = SampleWdl.ArrayLiteral(pwd.path) + val pwd = DefaultPathBuilder.get(".") + val sampleWdl = SampleWdl.ArrayLiteral(pwd) runWdlAndAssertOutputs( sampleWdl, eventFilter = EventFilter.info(pattern = "Starting calls: wf.cat", occurrences = 1), diff --git a/engine/src/test/scala/cromwell/CallCachingWorkflowSpec.scala b/engine/src/test/scala/cromwell/CallCachingWorkflowSpec.scala index 5718ad337..24fa0ec22 100644 --- a/engine/src/test/scala/cromwell/CallCachingWorkflowSpec.scala +++ b/engine/src/test/scala/cromwell/CallCachingWorkflowSpec.scala @@ -5,17 +5,13 @@ import java.util.UUID import akka.testkit._ import com.typesafe.config.ConfigFactory import cromwell.CallCachingWorkflowSpec._ -import cromwell.core.Tags.DockerTest -import cromwell.core.Tags._ -import cromwell.engine.workflow.WorkflowManagerActor -import cromwell.engine.workflow.workflowstore.{InMemoryWorkflowStore, WorkflowStoreActor} +import cromwell.core.Tags.{DockerTest, _} import cromwell.util.SampleWdl -import wdl4s.types.{WdlArrayType, WdlIntegerType, WdlStringType} -import wdl4s.values.{WdlArray, WdlFile, WdlInteger, WdlString} +import wdl4s.wdl.types.{WdlArrayType, WdlIntegerType, WdlStringType} +import wdl4s.wdl.values.{WdlArray, WdlFile, WdlInteger, WdlString} -import scala.language.postfixOps -class CallCachingWorkflowSpec extends CromwellTestkitSpec { +class CallCachingWorkflowSpec extends CromwellTestKitWordSpec { def cacheHitMessageForCall(name: String) = s"Call Caching: Cache hit. Using UUID\\(.{8}\\):$name\\.*" val expectedOutputs = Map( @@ -146,11 +142,11 @@ class CallCachingWorkflowSpec extends CromwellTestkitSpec { FIXME: This test had been constructing a custom WorkflowManagerActor. I don't believe this is still necessary but this test is being ignored so I'm not sure */ - val workflowId = runWdlAndAssertOutputs( - sampleWdl = SampleWdl.CallCachingWorkflow(UUID.randomUUID().toString), - eventFilter = EventFilter.info(pattern = cacheHitMessageForCall("a"), occurrences = 1), - expectedOutputs = expectedOutputs, - config = CallCachingWorkflowSpec.callCachingConfig) +// val workflowId = runWdlAndAssertOutputs( +// sampleWdl = SampleWdl.CallCachingWorkflow(UUID.randomUUID().toString), +// eventFilter = EventFilter.info(pattern = cacheHitMessageForCall("a"), occurrences = 1), +// expectedOutputs = expectedOutputs, +// config = CallCachingWorkflowSpec.callCachingConfig) // val status = messageAndWait[WorkflowManagerStatusSuccess](WorkflowStatus(workflowId)).state // status shouldEqual WorkflowSucceeded diff --git a/engine/src/test/scala/cromwell/CopyWorkflowOutputsSpec.scala b/engine/src/test/scala/cromwell/CopyWorkflowOutputsSpec.scala index c43346330..643a9c725 100644 --- a/engine/src/test/scala/cromwell/CopyWorkflowOutputsSpec.scala +++ b/engine/src/test/scala/cromwell/CopyWorkflowOutputsSpec.scala @@ -1,21 +1,20 @@ package cromwell -import java.nio.file.{Files, Paths} - import akka.testkit.EventFilter +import cromwell.core.path.DefaultPathBuilder import cromwell.util.SampleWdl import org.scalatest.prop.TableDrivenPropertyChecks._ import org.scalatest.prop.Tables.Table import scala.language.postfixOps -class CopyWorkflowOutputsSpec extends CromwellTestkitSpec { +class CopyWorkflowOutputsSpec extends CromwellTestKitWordSpec { "CopyWorkflowOutputsCall" should { "copy workflow outputs" in { val workflowOutputsPath = "copy-workflow-outputs" - val tmpDir = Files.createTempDirectory(workflowOutputsPath).toAbsolutePath + val tmpDir = DefaultPathBuilder.createTempDirectory(workflowOutputsPath).toAbsolutePath val outputs = Table( ("call", "file"), @@ -29,24 +28,23 @@ class CopyWorkflowOutputsSpec extends CromwellTestkitSpec { sampleWdl = SampleWdl.WorkflowOutputsWithFiles, eventFilter = EventFilter.info( pattern = "transition from FinalizingWorkflowState to WorkflowSucceededState", occurrences = 1), - runtime = "", workflowOptions = s""" { "final_workflow_outputs_dir": "$tmpDir" } """, - expectedOutputs = Seq("A.out", "A.out2", "B.outs") map { o => ("wfoutputs." + o) -> CromwellTestkitSpec.AnyValueIsFine } toMap, + expectedOutputs = Seq("A.out", "A.out2", "B.outs") map { o => ("wfoutputs." + o) -> CromwellTestKitSpec.AnyValueIsFine } toMap, allowOtherOutputs = false ) forAll(outputs) { (call, file) => - val path = tmpDir.resolve(Paths.get("wfoutputs", workflowId.id.toString, call, "execution", file)) + val path = tmpDir / "wfoutputs" / workflowId.id.toString / call / "execution" / file path.toFile should exist } - val path = tmpDir.resolve(Paths.get("wfoutputs", workflowId.id.toString, "call-C", "execution", "out")) + val path = tmpDir / "wfoutputs" / workflowId.id.toString / "call-C" / "execution" / "out" path.toFile shouldNot exist } "copy scattered workflow outputs" in { val workflowOutputsPath = "copy-workflow-outputs" - val tmpDir = Files.createTempDirectory(workflowOutputsPath).toAbsolutePath + val tmpDir = DefaultPathBuilder.createTempDirectory(workflowOutputsPath).toAbsolutePath val shards = 0 to 9 val outputNames = List("B1", "B2") @@ -62,15 +60,14 @@ class CopyWorkflowOutputsSpec extends CromwellTestkitSpec { sampleWdl = SampleWdl.WorkflowScatterOutputsWithFileArrays, eventFilter = EventFilter.info( pattern = "transition from FinalizingWorkflowState to WorkflowSucceededState", occurrences = 1), - runtime = "", workflowOptions = s""" { "final_workflow_outputs_dir": "$tmpDir" } """, - expectedOutputs = Map("wfoutputs.A.outs" -> CromwellTestkitSpec.AnyValueIsFine), + expectedOutputs = Map("wfoutputs.A.outs" -> CromwellTestKitSpec.AnyValueIsFine), allowOtherOutputs = false ) forAll(outputs) { (call, file) => - val path = tmpDir.resolve(Paths.get("wfoutputs", workflowId.id.toString, call, file)) - Files.exists(path) shouldBe true + val path = tmpDir / "wfoutputs" / workflowId.id.toString / call / file + path.toFile should exist } } } diff --git a/engine/src/test/scala/cromwell/CromwellSpec.scala b/engine/src/test/scala/cromwell/CromwellSpec.scala index f1ff598d7..da1720f63 100644 --- a/engine/src/test/scala/cromwell/CromwellSpec.scala +++ b/engine/src/test/scala/cromwell/CromwellSpec.scala @@ -1,7 +1,6 @@ package cromwell import com.typesafe.config.ConfigFactory -import org.scalatest.Tag object CromwellSpec { val BackendConfText = diff --git a/engine/src/test/scala/cromwell/CromwellTestkitSpec.scala b/engine/src/test/scala/cromwell/CromwellTestKitSpec.scala similarity index 76% rename from engine/src/test/scala/cromwell/CromwellTestkitSpec.scala rename to engine/src/test/scala/cromwell/CromwellTestKitSpec.scala index c6f6b3045..318feeaa6 100644 --- a/engine/src/test/scala/cromwell/CromwellTestkitSpec.scala +++ b/engine/src/test/scala/cromwell/CromwellTestKitSpec.scala @@ -1,54 +1,63 @@ package cromwell -import java.nio.file.Paths import java.util.UUID +import java.util.concurrent.atomic.AtomicInteger -import akka.actor.{Actor, ActorRef, ActorSystem, Props} +import akka.actor.{Actor, ActorRef, ActorSystem, Props, Terminated} import akka.pattern.ask +import akka.stream.ActorMaterializer import akka.testkit._ import com.typesafe.config.{Config, ConfigFactory} -import cromwell.CromwellTestkitSpec._ +import cromwell.CromwellTestKitSpec._ import cromwell.backend._ import cromwell.core._ +import cromwell.core.path.BetterFileMethods.Cmds +import cromwell.core.path.DefaultPathBuilder +import cromwell.docker.DockerHashActor.DockerHashSuccessResponse +import cromwell.docker.{DockerHashRequest, DockerHashResult} import cromwell.engine.backend.BackendConfigurationEntry import cromwell.engine.workflow.WorkflowManagerActor.RetrieveNewWorkflows -import cromwell.engine.workflow.lifecycle.execution.callcaching.CallCacheReadActor.{CacheLookupRequest, CacheResultMatchesForHashes} -import cromwell.engine.workflow.lifecycle.execution.callcaching.EngineJobHashingActor.CallCacheHashes -import cromwell.engine.workflow.workflowstore.WorkflowStoreActor.WorkflowSubmittedToStore +import cromwell.engine.workflow.lifecycle.execution.callcaching.CallCacheReadActor.{CacheLookupNoHit, CacheLookupRequest} +import cromwell.engine.workflow.lifecycle.execution.callcaching.CallCacheWriteActor.SaveCallCacheHashes +import cromwell.engine.workflow.lifecycle.execution.callcaching.CallCacheWriteSuccess +import cromwell.engine.workflow.workflowstore.WorkflowStoreSubmitActor.WorkflowSubmittedToStore import cromwell.engine.workflow.workflowstore.{InMemoryWorkflowStore, WorkflowStoreActor} import cromwell.jobstore.JobStoreActor.{JobStoreWriteSuccess, JobStoreWriterCommand} import cromwell.server.{CromwellRootActor, CromwellSystem} import cromwell.services.ServiceRegistryActor -import cromwell.services.metadata.MetadataQuery import cromwell.services.metadata.MetadataService._ +import cromwell.subworkflowstore.EmptySubWorkflowStoreActor import cromwell.util.SampleWdl -import cromwell.webservice.PerRequest.RequestComplete import cromwell.webservice.metadata.MetadataBuilderActor +import cromwell.webservice.metadata.MetadataBuilderActor.{BuiltMetadataResponse, FailedMetadataResponse, MetadataBuilderActorResponse} import org.scalactic.Equality +import org.scalatest._ import org.scalatest.concurrent.{Eventually, ScalaFutures} import org.scalatest.time.{Millis, Seconds, Span} -import org.scalatest.{BeforeAndAfterAll, Matchers, OneInstancePerTest, WordSpecLike} -import spray.http.StatusCode import spray.json._ -import wdl4s.Call -import wdl4s.expression.{NoFunctions, WdlStandardLibraryFunctions} -import wdl4s.types._ -import wdl4s.values._ +import wdl4s.wdl.WdlTaskCall +import wdl4s.wdl.expression.{NoFunctions, WdlStandardLibraryFunctions} +import wdl4s.wdl.types._ +import wdl4s.wdl.values._ import scala.concurrent.duration._ -import scala.concurrent.{Await, ExecutionContext} +import scala.concurrent.{Await, ExecutionContext, Future} import scala.language.postfixOps import scala.util.matching.Regex case class TestBackendLifecycleActorFactory(configurationDescriptor: BackendConfigurationDescriptor) extends BackendLifecycleActorFactory { override def workflowInitializationActorProps(workflowDescriptor: BackendWorkflowDescriptor, - calls: Seq[Call], - serviceRegistryActor: ActorRef): Option[Props] = None + ioActor: ActorRef, + calls: Set[WdlTaskCall], + serviceRegistryActor: ActorRef, + restarting: Boolean): Option[Props] = None override def jobExecutionActorProps(jobDescriptor: BackendJobDescriptor, initializationData: Option[BackendInitializationData], - serviceRegistryActor: ActorRef): Props = { + serviceRegistryActor: ActorRef, + ioActor: ActorRef, + backendSingletonActor: Option[ActorRef]): Props = { throw new NotImplementedError("this is not implemented") } @@ -62,7 +71,7 @@ case class TestBackendLifecycleActorFactory(configurationDescriptor: BackendConf case class OutputNotFoundException(outputFqn: String, actualOutputs: String) extends RuntimeException(s"Expected output $outputFqn was not found in: '$actualOutputs'") case class LogNotFoundException(log: String) extends RuntimeException(s"Expected log $log was not found") -object CromwellTestkitSpec { +object CromwellTestKitSpec { val ConfigText = """ |akka { @@ -90,7 +99,7 @@ object CromwellTestkitSpec { | } | | # A dispatcher for engine actors - | # Because backends behaviour is unpredictable (potentially blocking, slow) the engine runs + | # Because backends behavior is unpredictable (potentially blocking, slow) the engine runs | # on its own dispatcher to prevent backends from affecting its performance. | engine-dispatcher { | type = Dispatcher @@ -109,7 +118,7 @@ object CromwellTestkitSpec { | # Some of our tests fire off a message, then expect a particular event message within 3s (the default). | # Especially on CI, the metadata test does not seem to be returning in time. So, overriding the timeouts | # with slightly higher values. Alternatively, could also adjust the akka.test.timefactor only in CI. - | filter-leeway = 5s + | filter-leeway = 10s | single-expect-default = 5s | default-timeout = 10s | } @@ -120,31 +129,22 @@ object CromwellTestkitSpec { val TimeoutDuration = 60 seconds + private val testWorkflowManagerSystemCount = new AtomicInteger() + class TestWorkflowManagerSystem extends CromwellSystem { - override protected def systemName: String = "test-system" - override protected def newActorSystem() = ActorSystem(systemName, ConfigFactory.parseString(CromwellTestkitSpec.ConfigText)) + override protected def systemName: String = "test-system-" + testWorkflowManagerSystemCount.incrementAndGet() + override protected def newActorSystem() = ActorSystem(systemName, ConfigFactory.parseString(CromwellTestKitSpec.ConfigText)) /** * Do NOT shut down the test actor system inside the normal flow. * The actor system will be externally shutdown outside the block. */ - override def shutdownActorSystem() = {} + // -Ywarn-value-discard + override def shutdownActorSystem(): Future[Terminated] = { Future.successful(null) } def shutdownTestActorSystem() = super.shutdownActorSystem() } /** - * Loans a test actor system. NOTE: This should be run OUTSIDE of a wait block, never within one. - */ - def withTestWorkflowManagerSystem[T](block: CromwellSystem => T): T = { - val testWorkflowManagerSystem = new CromwellTestkitSpec.TestWorkflowManagerSystem - try { - block(testWorkflowManagerSystem) - } finally { - TestKit.shutdownActorSystem(testWorkflowManagerSystem.actorSystem, TimeoutDuration) - } - } - - /** * Wait for exactly one occurrence of the specified info pattern in the specified block. The block is in its own * parameter list for usage syntax reasons. */ @@ -186,6 +186,21 @@ object CromwellTestkitSpec { * the actual value was. */ lazy val AnyValueIsFine: WdlValue = WdlString("Today you are you! That is truer than true! There is no one alive who is you-er than you!") + + def replaceVariables(wdlValue: WdlValue, workflowId: WorkflowId): WdlValue = { + wdlValue match { + case WdlString(value) => WdlString(replaceVariables(value, workflowId)) + case _ => wdlValue + } + } + + def replaceVariables(value: String, workflowId: WorkflowId): String = { + val variables = Map("PWD" -> Cmds.pwd, "UUID" -> workflowId) + variables.foldLeft(value) { + case (result, (variableName, variableValue)) => result.replace(s"<<$variableName>>", s"$variableValue") + } + } + lazy val DefaultConfig = ConfigFactory.load lazy val DefaultLocalBackendConfig = ConfigFactory.parseString( """ @@ -261,25 +276,30 @@ object CromwellTestkitSpec { ServiceRegistryActorSystem.actorOf(ServiceRegistryActor.props(ConfigFactory.load()), "ServiceRegistryActor") } - class TestCromwellRootActor(config: Config) extends CromwellRootActor { + class TestCromwellRootActor(config: Config)(implicit materializer: ActorMaterializer) extends CromwellRootActor(false, false) { + override val serverMode = true override lazy val serviceRegistryActor = ServiceRegistryActorInstance override lazy val workflowStore = new InMemoryWorkflowStore - def submitWorkflow(sources: WorkflowSourceFiles): WorkflowId = { + def submitWorkflow(sources: WorkflowSourceFilesWithoutImports): WorkflowId = { val submitMessage = WorkflowStoreActor.SubmitWorkflow(sources) val result = Await.result(workflowStoreActor.ask(submitMessage)(TimeoutDuration), Duration.Inf).asInstanceOf[WorkflowSubmittedToStore].workflowId workflowManagerActor ! RetrieveNewWorkflows result } } + + def defaultTwms = new CromwellTestKitSpec.TestWorkflowManagerSystem() } -abstract class CromwellTestkitSpec(val twms: TestWorkflowManagerSystem = new CromwellTestkitSpec.TestWorkflowManagerSystem()) extends TestKit(twms.actorSystem) - with DefaultTimeout with ImplicitSender with WordSpecLike with Matchers with BeforeAndAfterAll with ScalaFutures with OneInstancePerTest with Eventually { +abstract class CromwellTestKitWordSpec extends CromwellTestKitSpec with WordSpecLike +abstract class CromwellTestKitSpec(val twms: TestWorkflowManagerSystem = defaultTwms) extends TestKit(twms.actorSystem) + with DefaultTimeout with ImplicitSender with Matchers with ScalaFutures with Eventually with Suite with OneInstancePerTest with BeforeAndAfterAll { - override protected def afterAll() = { twms.shutdownTestActorSystem() } + override protected def afterAll() = { twms.shutdownTestActorSystem(); () } - implicit val defaultPatience = PatienceConfig(timeout = Span(30, Seconds), interval = Span(100, Millis)) + implicit val defaultPatience = PatienceConfig(timeout = Span(200, Seconds), interval = Span(1000, Millis)) implicit val ec = system.dispatcher + implicit val materializer = twms.materializer val dummyServiceRegistryActor = system.actorOf(Props.empty) val dummyLogCopyRouter = system.actorOf(Props.empty) @@ -294,9 +314,10 @@ abstract class CromwellTestkitSpec(val twms: TestWorkflowManagerSystem = new Cro } } - // Allow to use shouldEqual between 2 WdlValues while acknowledging for edge cases and checking for WdlType compatibilty + // Allow to use shouldEqual between 2 WdlValues while acknowledging for edge cases and checking for WdlType compatibility implicit val wdlEquality = new Equality[WdlValue] { - def fileEquality(f1: String, f2: String) = Paths.get(f1).getFileName == Paths.get(f2).getFileName + def fileEquality(f1: String, f2: String) = + DefaultPathBuilder.get(f1).getFileName == DefaultPathBuilder.get(f2).getFileName override def areEqual(a: WdlValue, b: Any): Boolean = { val typeEquality = b match { @@ -324,17 +345,24 @@ abstract class CromwellTestkitSpec(val twms: TestWorkflowManagerSystem = new Cro } private def buildCromwellRootActor(config: Config) = { - TestActorRef(new TestCromwellRootActor(config), name = "TestCromwellRootActor") + TestActorRef(new TestCromwellRootActor(config), name = "TestCromwellRootActor" + UUID.randomUUID().toString) } def runWdl(sampleWdl: SampleWdl, runtime: String = "", workflowOptions: String = "{}", + customLabels: String = "{}", terminalState: WorkflowState = WorkflowSucceeded, config: Config = DefaultConfig, patienceConfig: PatienceConfig = defaultPatience)(implicit ec: ExecutionContext): Map[FullyQualifiedName, WdlValue] = { val rootActor = buildCromwellRootActor(config) - val sources = WorkflowSourceFiles(sampleWdl.wdlSource(runtime), sampleWdl.wdlJson, workflowOptions) + val sources = WorkflowSourceFilesWithoutImports( + workflowSource = sampleWdl.workflowSource(runtime), + workflowType = Option("WDL"), + workflowTypeVersion = None, + inputsJson = sampleWdl.workflowJson, + workflowOptionsJson = workflowOptions, + labelsJson = customLabels) val workflowId = rootActor.underlyingActor.submitWorkflow(sources) eventually { verifyWorkflowState(rootActor.underlyingActor.serviceRegistryActor, workflowId, terminalState) } (config = patienceConfig, pos = implicitly[org.scalactic.source.Position]) val outcome = getWorkflowOutputsFromMetadata(workflowId, rootActor.underlyingActor.serviceRegistryActor) @@ -365,7 +393,7 @@ abstract class CromwellTestkitSpec(val twms: TestWorkflowManagerSystem = new Cro expectedOutputs foreach { case (outputFqn, expectedValue) => val actualValue = outputs.getOrElse(outputFqn, throw OutputNotFoundException(outputFqn, actualOutputNames)) - if (expectedValue != AnyValueIsFine) actualValue shouldEqual expectedValue + if (expectedValue != AnyValueIsFine) actualValue shouldEqual replaceVariables(expectedValue, workflowId) } if (!allowOtherOutputs) { outputs foreach { case (actualFqn, actualValue) => @@ -378,22 +406,6 @@ abstract class CromwellTestkitSpec(val twms: TestWorkflowManagerSystem = new Cro workflowId } - def getWorkflowMetadata(workflowId: WorkflowId, serviceRegistryActor: ActorRef, key: Option[String] = None)(implicit ec: ExecutionContext): JsObject = { - // MetadataBuilderActor sends its response to context.parent, so we can't just use an ask to talk to it here - val message = GetMetadataQueryAction(MetadataQuery(workflowId, None, key, None, None)) - val parentProbe = TestProbe() - - TestActorRef(MetadataBuilderActor.props(serviceRegistryActor), parentProbe.ref, s"MetadataActor-${UUID.randomUUID()}") ! message - val metadata = parentProbe.expectMsgPF(TimeoutDuration) { - // Because of type erasure the scala compiler can't check that the RequestComplete generic type will be (StatusCode, JsObject), which would generate a warning - // As long as Metadata sends back a JsObject this is safe - case response: RequestComplete[(StatusCode, JsObject)] @unchecked => response.response._2 - } - - system.stop(parentProbe.ref) - metadata - } - /** * Verifies that a state is correct. // TODO: There must be a better way...? */ @@ -407,10 +419,20 @@ abstract class CromwellTestkitSpec(val twms: TestWorkflowManagerSystem = new Cro } getWorkflowState(workflowId, serviceRegistryActor) should equal (expectedState) + () } private def getWorkflowOutputsFromMetadata(id: WorkflowId, serviceRegistryActor: ActorRef): Map[FullyQualifiedName, WdlValue] = { - getWorkflowMetadata(id, serviceRegistryActor, None).getFields(WorkflowMetadataKeys.Outputs).toList match { + val mba = system.actorOf(MetadataBuilderActor.props(serviceRegistryActor)) + val response = mba.ask(WorkflowOutputs(id)).mapTo[MetadataBuilderActorResponse] collect { + case BuiltMetadataResponse(r) => r + case FailedMetadataResponse(e) => throw e + } + val jsObject = Await.result(response, TimeoutDuration) + + system.stop(mba) + + jsObject.getFields(WorkflowMetadataKeys.Outputs).toList match { case head::_ => head.asInstanceOf[JsObject].fields.map( x => (x._1, jsValueToWdlValue(x._2))) case _ => Map.empty } @@ -441,16 +463,40 @@ class AlwaysHappyJobStoreActor extends Actor { } } +object AlwaysHappySubWorkflowStoreActor { + def props: Props = Props(new EmptySubWorkflowStoreActor) +} + object AlwaysHappyJobStoreActor { def props: Props = Props(new AlwaysHappyJobStoreActor) } class EmptyCallCacheReadActor extends Actor { override def receive: Receive = { - case CacheLookupRequest(CallCacheHashes(hashes)) => sender ! CacheResultMatchesForHashes(hashes, Set.empty) + case _: CacheLookupRequest => sender ! CacheLookupNoHit + } +} + +class EmptyCallCacheWriteActor extends Actor { + override def receive: Receive = { + case SaveCallCacheHashes => sender ! CallCacheWriteSuccess } } object EmptyCallCacheReadActor { def props: Props = Props(new EmptyCallCacheReadActor) } + +object EmptyCallCacheWriteActor { + def props: Props = Props(new EmptyCallCacheWriteActor) +} + +class EmptyDockerHashActor extends Actor { + override def receive: Receive = { + case request: DockerHashRequest => sender ! DockerHashSuccessResponse(DockerHashResult("alg", "hash"), request) + } +} + +object EmptyDockerHashActor { + def props: Props = Props(new EmptyDockerHashActor) +} diff --git a/engine/src/test/scala/cromwell/DeclarationWorkflowSpec.scala b/engine/src/test/scala/cromwell/DeclarationWorkflowSpec.scala index 9671706ef..65a541df6 100644 --- a/engine/src/test/scala/cromwell/DeclarationWorkflowSpec.scala +++ b/engine/src/test/scala/cromwell/DeclarationWorkflowSpec.scala @@ -1,20 +1,19 @@ package cromwell -import wdl4s.types.{WdlFileType, WdlStringType} -import wdl4s.{NamespaceWithWorkflow, WorkflowInput} +import wdl4s.wdl.types.{WdlFileType, WdlStringType} +import wdl4s.wdl.{ImportResolver, WdlNamespaceWithWorkflow, WorkflowInput} import cromwell.util.SampleWdl import org.scalatest.{Matchers, WordSpecLike} -import scala.language.postfixOps class DeclarationWorkflowSpec extends Matchers with WordSpecLike { "A workflow with declarations in it" should { "compute inputs properly" in { - NamespaceWithWorkflow.load(SampleWdl.DeclarationsWorkflow.wdlSource(runtime="")).workflow.inputs shouldEqual Map( - "two_step.cat.file" -> WorkflowInput("two_step.cat.file", WdlFileType, postfixQuantifier = None), - "two_step.cgrep.str_decl" -> WorkflowInput("two_step.cgrep.str_decl", WdlStringType, postfixQuantifier = None), - "two_step.cgrep.pattern" -> WorkflowInput("two_step.cgrep.pattern", WdlStringType, postfixQuantifier = None), - "two_step.flags_suffix" -> WorkflowInput("two_step.flags_suffix", WdlStringType, postfixQuantifier = None) + WdlNamespaceWithWorkflow.load(SampleWdl.DeclarationsWorkflow.workflowSource(runtime=""), Seq.empty[ImportResolver]).get.workflow.inputs shouldEqual Map( + "two_step.cat.file" -> WorkflowInput("two_step.cat.file", WdlFileType), + "two_step.cgrep.str_decl" -> WorkflowInput("two_step.cgrep.str_decl", WdlStringType), + "two_step.cgrep.pattern" -> WorkflowInput("two_step.cgrep.pattern", WdlStringType), + "two_step.flags_suffix" -> WorkflowInput("two_step.flags_suffix", WdlStringType) ) } } diff --git a/engine/src/test/scala/cromwell/FilePassingWorkflowSpec.scala b/engine/src/test/scala/cromwell/FilePassingWorkflowSpec.scala index 3ddffcba7..6521286dc 100644 --- a/engine/src/test/scala/cromwell/FilePassingWorkflowSpec.scala +++ b/engine/src/test/scala/cromwell/FilePassingWorkflowSpec.scala @@ -1,14 +1,12 @@ package cromwell import akka.testkit._ -import wdl4s.values.{WdlFile, WdlString} import cromwell.util.SampleWdl +import wdl4s.wdl.values.{WdlFile, WdlString} -import scala.concurrent.ExecutionContext import scala.concurrent.duration._ -import scala.language.postfixOps -class FilePassingWorkflowSpec extends CromwellTestkitSpec { +class FilePassingWorkflowSpec extends CromwellTestKitWordSpec { "A workflow that passes files between tasks" should { "pass files properly" in { runWdlAndAssertOutputs( diff --git a/engine/src/test/scala/cromwell/MapWorkflowSpec.scala b/engine/src/test/scala/cromwell/MapWorkflowSpec.scala index 1176a4f99..296de9ad2 100644 --- a/engine/src/test/scala/cromwell/MapWorkflowSpec.scala +++ b/engine/src/test/scala/cromwell/MapWorkflowSpec.scala @@ -1,20 +1,19 @@ package cromwell import akka.testkit._ -import better.files._ +import cromwell.core.path.DefaultPathBuilder import cromwell.util.SampleWdl -import wdl4s.NamespaceWithWorkflow -import wdl4s.expression.{NoFunctions, WdlFunctions} -import wdl4s.types.{WdlFileType, WdlIntegerType, WdlMapType, WdlStringType} -import wdl4s.values._ +import wdl4s.wdl.expression.{NoFunctions, WdlFunctions} +import wdl4s.wdl.types.{WdlFileType, WdlIntegerType, WdlMapType, WdlStringType} +import wdl4s.wdl.values._ +import wdl4s.wdl.{ImportResolver, WdlNamespaceWithWorkflow} -import scala.language.postfixOps import scala.util.{Success, Try} -class MapWorkflowSpec extends CromwellTestkitSpec { - private val pwd = File(".") - private val sampleWdl = SampleWdl.MapLiteral(pwd.path) - val ns = NamespaceWithWorkflow.load(sampleWdl.wdlSource("")) +class MapWorkflowSpec extends CromwellTestKitWordSpec { + private val pwd = DefaultPathBuilder.get(".") + private val sampleWdl = SampleWdl.MapLiteral(pwd) + val ns = WdlNamespaceWithWorkflow.load(sampleWdl.workflowSource(), Seq.empty[ImportResolver]).get val expectedMap = WdlMap(WdlMapType(WdlFileType, WdlStringType), Map( WdlFile("f1") -> WdlString("alice"), WdlFile("f2") -> WdlString("bob"), @@ -24,7 +23,8 @@ class MapWorkflowSpec extends CromwellTestkitSpec { "A task which contains a parameter " should { "accept an array for the value" in { - val sampleWdl = SampleWdl.MapLiteral(pwd.path) + val sampleWdl = SampleWdl.MapLiteral(pwd) + val callDir = "<>/cromwell-executions/wf/<>/call-write_map/inputs<>" runWdlAndAssertOutputs( sampleWdl = sampleWdl, EventFilter.info(pattern = "Starting calls: wf.read_map:NA:1, wf.write_map:NA:1", occurrences = 1), @@ -34,7 +34,7 @@ class MapWorkflowSpec extends CromwellTestkitSpec { WdlString("y") -> WdlInteger(600), WdlString("z") -> WdlInteger(700) )), - "wf.write_map.contents" -> WdlString("f1\talice\nf2\tbob\nf3\tchuck") + "wf.write_map.contents" -> WdlString(s"$callDir/f1\talice\n$callDir/f2\tbob\n$callDir/f3\tchuck") ) ) sampleWdl.cleanup() @@ -43,13 +43,13 @@ class MapWorkflowSpec extends CromwellTestkitSpec { "A static Map[File, String] declaration" should { "be a valid declaration" in { - val declaration = ns.workflow.declarations.find {_.name == "map"}.getOrElse { + val declaration = ns.workflow.declarations.find {_.unqualifiedName == "map"}.getOrElse { fail("Expected declaration 'map' to be found") } val expression = declaration.expression.getOrElse { fail("Expected an expression for declaration 'map'") } - val value = expression.evaluate((s:String) => fail("No lookups"), NoFunctions).getOrElse { + val value = expression.evaluate((_: String) => fail("No lookups"), NoFunctions).getOrElse { fail("Expected expression for 'map' to evaluate") } expectedMap.wdlType.coerceRawValue(value).get shouldEqual expectedMap @@ -65,13 +65,13 @@ class MapWorkflowSpec extends CromwellTestkitSpec { case _ => throw new UnsupportedOperationException("Only write_map should be called") } } - val command = writeMapTask.instantiateCommand(Map("file_to_name" -> expectedMap), new CannedFunctions).getOrElse { + val command = writeMapTask.instantiateCommand(writeMapTask.inputsFromMap(Map("file_to_name" -> expectedMap)), new CannedFunctions).getOrElse { fail("Expected instantiation to work") } command shouldEqual "cat /test/map/path" } "Coerce Map[String, String] to Map[String, Int] when running the workflow" in { - val sampleWdl = SampleWdl.MapLiteral(pwd.path) + val sampleWdl = SampleWdl.MapLiteral(pwd) runWdlAndAssertOutputs( sampleWdl, eventFilter = EventFilter.info(pattern = "Starting calls: wf.read_map:NA:1, wf.write_map:NA:1", occurrences = 1), diff --git a/engine/src/test/scala/cromwell/MetadataWatchActor.scala b/engine/src/test/scala/cromwell/MetadataWatchActor.scala index cc334b05f..0a47bc38b 100644 --- a/engine/src/test/scala/cromwell/MetadataWatchActor.scala +++ b/engine/src/test/scala/cromwell/MetadataWatchActor.scala @@ -1,16 +1,18 @@ package cromwell -import akka.actor.{Actor, Props} -import cromwell.services.metadata.{MetadataEvent, MetadataJobKey, MetadataString} +import akka.actor.{Actor, ActorLogging, Props} +import cromwell.core.Dispatcher.EngineDispatcher +import cromwell.services.metadata.{MetadataEvent, MetadataJobKey, MetadataString, MetadataValue} import cromwell.services.metadata.MetadataService.PutMetadataAction import MetadataWatchActor._ +import cromwell.services.keyvalue.KeyValueServiceActor.{KvPut, KvPutSuccess} import scala.concurrent.Promise // This actor stands in for the service registry and watches for metadata messages that match the optional `matcher`. // If there is no predicate then use `ignoringBehavior` which ignores all messages. This is here because there is no // WorkflowManagerActor in this test that would spin up a real ServiceRegistry. -final case class MetadataWatchActor(promise: Promise[Unit], matchers: Matcher*) extends Actor { +final case class MetadataWatchActor(promise: Promise[Unit], matchers: Matcher*) extends Actor with ActorLogging { var unsatisfiedMatchers = matchers @@ -19,38 +21,57 @@ final case class MetadataWatchActor(promise: Promise[Unit], matchers: Matcher*) unsatisfiedMatchers = unsatisfiedMatchers.filterNot { m => m.matches(events) } if (unsatisfiedMatchers.isEmpty) { promise.trySuccess(()) + () } case PutMetadataAction(_) => // Superfluous message. Ignore - case _ => throw new Exception("Invalid message to MetadataWatchActor") + // Because the MetadataWatchActor is sometimes used in place of the ServiceRegistryActor, this allows WFs to continue: + case kvPut: KvPut => sender ! KvPutSuccess(kvPut) + case other => throw new Exception(s"Invalid message to MetadataWatchActor: $other") } } object MetadataWatchActor { - def props(promise: Promise[Unit], matchers: Matcher*): Props = Props(MetadataWatchActor(promise, matchers: _*)) + def props(promise: Promise[Unit], matchers: Matcher*): Props = Props(MetadataWatchActor(promise, matchers: _*)).withDispatcher(EngineDispatcher) trait Matcher { - def matches(events: Traversable[MetadataEvent]): Boolean + private var _fullEventList: List[MetadataEvent] = List.empty + final def matches(events: Traversable[MetadataEvent]): Boolean = { + _fullEventList ++= events + _matches(events) + } + def _matches(events: Traversable[MetadataEvent]): Boolean + private var _nearMisses: List[String] = List.empty + private def addNearMissInfo(miss: String) = _nearMisses :+= miss + def nearMissInformation = _nearMisses + def fullEventList = _fullEventList + + def checkMetadataValueContains(key: String, actual: MetadataValue, expected: String): Boolean = { + val result = actual.value.contains(expected) + if (!result) addNearMissInfo(s"Key $key had unexpected value.\nActual value: ${actual.value}\n\nDid not contain: $expected") + result + } } def metadataKeyAttemptChecker(attempt: Int): Option[MetadataJobKey] => Boolean = { case Some(jobKey) => jobKey.attempt == attempt case None => false } + final case class JobKeyMetadataKeyAndValueContainStringMatcher(jobKeyCheck: Option[MetadataJobKey] => Boolean, key: String, value: String) extends Matcher { - def matches(events: Traversable[MetadataEvent]): Boolean = { - events.exists(e => e.key.key.contains(key) && jobKeyCheck(e.key.jobKey) && e.value.exists { v => v.valueType == MetadataString && v.value.contains(value) }) + def _matches(events: Traversable[MetadataEvent]): Boolean = { + events.exists(e => e.key.key.contains(key) && jobKeyCheck(e.key.jobKey) && e.value.exists { v => v.valueType == MetadataString && checkMetadataValueContains(e.key.key, v, value) }) } } abstract class KeyMatchesRegexAndValueContainsStringMatcher(keyTemplate: String, value: String) extends Matcher { val templateRegex = keyTemplate.r - def matches(events: Traversable[MetadataEvent]): Boolean = { - events.exists(e => templateRegex.findFirstIn(e.key.key).isDefined && e.value.exists { v => v.value.contains(value) }) + def _matches(events: Traversable[MetadataEvent]): Boolean = { + events.exists(e => templateRegex.findFirstIn(e.key.key).isDefined && + e.value.exists { v => checkMetadataValueContains(e.key.key, v, value) }) } } - val failurePattern = """failures\[\d*\].message""" - final case class FailureMatcher(value: String) extends KeyMatchesRegexAndValueContainsStringMatcher(failurePattern, value) { - } + val failurePattern = """failures\[\d*\].*\:message""" + final case class FailureMatcher(value: String) extends KeyMatchesRegexAndValueContainsStringMatcher(failurePattern, value) { } } diff --git a/engine/src/test/scala/cromwell/MultipleFilesWithSameNameWorkflowSpec.scala b/engine/src/test/scala/cromwell/MultipleFilesWithSameNameWorkflowSpec.scala index ca7e2720b..4d0e4f51a 100644 --- a/engine/src/test/scala/cromwell/MultipleFilesWithSameNameWorkflowSpec.scala +++ b/engine/src/test/scala/cromwell/MultipleFilesWithSameNameWorkflowSpec.scala @@ -1,12 +1,11 @@ package cromwell import akka.testkit._ -import wdl4s.values.{WdlString, WdlFile} import cromwell.util.SampleWdl +import wdl4s.wdl.values.WdlString -import scala.language.postfixOps -class MultipleFilesWithSameNameWorkflowSpec extends CromwellTestkitSpec { +class MultipleFilesWithSameNameWorkflowSpec extends CromwellTestKitWordSpec { "A workflow with two file inputs that have the same name" should { "not clobber one file with the contents of another" in { runWdlAndAssertOutputs( diff --git a/engine/src/test/scala/cromwell/OptionalParamWorkflowSpec.scala b/engine/src/test/scala/cromwell/OptionalParamWorkflowSpec.scala index af3d68c29..f9db993f9 100644 --- a/engine/src/test/scala/cromwell/OptionalParamWorkflowSpec.scala +++ b/engine/src/test/scala/cromwell/OptionalParamWorkflowSpec.scala @@ -1,21 +1,20 @@ package cromwell import org.scalatest.{Matchers, WordSpecLike} -import wdl4s.WdlNamespace -import wdl4s.expression.NoFunctions -import wdl4s.values.{WdlFile, WdlString} +import wdl4s.wdl.WdlNamespace +import wdl4s.wdl.expression.NoFunctions +import wdl4s.wdl.values.{WdlFile, WdlString} -import scala.language.postfixOps class OptionalParamWorkflowSpec extends Matchers with WordSpecLike { "A workflow with an optional parameter that has a prefix inside the tag" should { "not include that prefix if no value is specified" in { - val wf = """ + val wf = s""" |task find { | String? pattern | File root | command { - | find ${root} ${"-name " + pattern} + | find $${root} $${"-name " + pattern} | } |} | @@ -23,20 +22,18 @@ class OptionalParamWorkflowSpec extends Matchers with WordSpecLike { | call find |} """.stripMargin - val ns = WdlNamespace.load(wf) + val ns = WdlNamespace.loadUsingSource(wf, None, None).get val findTask = ns.findTask("find") getOrElse { fail("Expected to find task 'find'") } - val instantiateWithoutValue = findTask.instantiateCommand(Map("root" -> WdlFile("src")), NoFunctions) getOrElse { - fail("Expected instantiation to work") - } - instantiateWithoutValue shouldEqual "find src" + val instantiateWithoutValue = findTask.instantiateCommand(findTask.inputsFromMap(Map("find.root" -> WdlFile("src"))), NoFunctions) + instantiateWithoutValue.get shouldEqual "find src" - val instantiateWithValue = findTask.instantiateCommand(Map( - "root" -> WdlFile("src"), - "pattern" -> WdlString("*.java") - ), NoFunctions).getOrElse {fail("Expected instantiation to work")} + val instantiateWithValue = findTask.instantiateCommand(findTask.inputsFromMap(Map( + "find.root" -> WdlFile("src"), + "find.pattern" -> WdlString("*.java") + )), NoFunctions).getOrElse {fail("Expected instantiation to work")} instantiateWithValue shouldEqual "find src -name *.java" } } diff --git a/engine/src/test/scala/cromwell/PostfixQuantifierWorkflowSpec.scala b/engine/src/test/scala/cromwell/PostfixQuantifierWorkflowSpec.scala index 8daf9be69..1935e3409 100644 --- a/engine/src/test/scala/cromwell/PostfixQuantifierWorkflowSpec.scala +++ b/engine/src/test/scala/cromwell/PostfixQuantifierWorkflowSpec.scala @@ -1,12 +1,11 @@ package cromwell import akka.testkit._ -import wdl4s.values.WdlString +import wdl4s.wdl.values.WdlString import cromwell.util.SampleWdl -import scala.language.postfixOps -class PostfixQuantifierWorkflowSpec extends CromwellTestkitSpec { +class PostfixQuantifierWorkflowSpec extends CromwellTestKitWordSpec { "A task which contains a parameter with a zero-or-more postfix quantifier" should { "accept an array of size 3" in { runWdlAndAssertOutputs( diff --git a/engine/src/test/scala/cromwell/RestartWorkflowSpec.scala b/engine/src/test/scala/cromwell/RestartWorkflowSpec.scala index b999e3623..acb0afbb9 100644 --- a/engine/src/test/scala/cromwell/RestartWorkflowSpec.scala +++ b/engine/src/test/scala/cromwell/RestartWorkflowSpec.scala @@ -6,25 +6,29 @@ import cromwell.core.Tags._ import cromwell.core._ import cromwell.engine.workflow.WorkflowDescriptorBuilder -class RestartWorkflowSpec extends CromwellTestkitSpec with WorkflowDescriptorBuilder { +class RestartWorkflowSpec extends CromwellTestKitWordSpec with WorkflowDescriptorBuilder { - val actorSystem = ActorSystem("RestartWorkflowSpec", ConfigFactory.parseString(CromwellTestkitSpec.ConfigText)) + val actorSystem = ActorSystem("RestartWorkflowSpec", ConfigFactory.parseString(CromwellTestKitSpec.ConfigText)) //val localBackend = new OldStyleLocalBackend(CromwellTestkitSpec.DefaultLocalBackendConfigEntry, actorSystem) - val sources = WorkflowSourceFiles( - wdlSource="""task a {command{}} - |workflow w { - | call a - | call a as b - |} - """.stripMargin, - inputsJson="{}", - workflowOptionsJson="{}" + val sources = WorkflowSourceFilesWithoutImports( + workflowSource = + """task a {command{}} + |workflow w { + | call a + | call a as b + |} + """.stripMargin, + workflowType = Option("WDL"), + workflowTypeVersion = None, + inputsJson = "{}", + workflowOptionsJson = "{}", + labelsJson = "{}" ) "RestartWorkflowSpec" should { "restart a call in Running state" taggedAs PostMVP ignore { - val id = WorkflowId.randomId() - val descriptor = createMaterializedEngineWorkflowDescriptor(id, sources) +// val id = WorkflowId.randomId() +// val descriptor = createMaterializedEngineWorkflowDescriptor(id, sources) // val a = ExecutionDatabaseKey("w.a", Option(-1), 1) // val b = ExecutionDatabaseKey("w.b", Option(-1), 1) // diff --git a/engine/src/test/scala/cromwell/ScatterWorkflowSpec.scala b/engine/src/test/scala/cromwell/ScatterWorkflowSpec.scala index 035004669..0f896de48 100644 --- a/engine/src/test/scala/cromwell/ScatterWorkflowSpec.scala +++ b/engine/src/test/scala/cromwell/ScatterWorkflowSpec.scala @@ -2,13 +2,11 @@ package cromwell import akka.testkit._ import cromwell.core.Tags.DockerTest -import wdl4s.types.{WdlArrayType, WdlFileType, WdlIntegerType, WdlStringType} -import wdl4s.values.{WdlArray, WdlFile, WdlInteger, WdlString} +import wdl4s.wdl.types.{WdlArrayType, WdlFileType, WdlIntegerType, WdlStringType} +import wdl4s.wdl.values.{WdlArray, WdlFile, WdlInteger, WdlString} import cromwell.util.SampleWdl -import scala.language.postfixOps - -class ScatterWorkflowSpec extends CromwellTestkitSpec { +class ScatterWorkflowSpec extends CromwellTestKitWordSpec { "A workflow with a stand-alone scatter block in it" should { "run properly" in { runWdlAndAssertOutputs( @@ -29,7 +27,7 @@ class ScatterWorkflowSpec extends CromwellTestkitSpec { expectedOutputs = Map( "w.E.E_out" -> WdlArray(WdlArrayType(WdlIntegerType), Seq(9, 9, 9, 9, 9, 9).map(WdlInteger(_))), "w.C.C_out" -> WdlArray(WdlArrayType(WdlIntegerType), Seq(400, 500, 600, 800, 600, 500).map(WdlInteger(_))), - "w.A.A_out" -> WdlArray(WdlArrayType(WdlStringType), Seq("jeff", "chris", "miguel", "thibault", "khalid", "scott").map(WdlString)), + "w.A.A_out" -> WdlArray(WdlArrayType(WdlStringType), Seq("jeff", "chris", "miguel", "thibault", "khalid", "ruchi").map(WdlString)), "w.D.D_out" -> WdlInteger(34), "w.B.B_out" -> WdlArray(WdlArrayType(WdlIntegerType), Seq(4, 5, 6, 8, 6, 5).map(WdlInteger(_))) ) @@ -45,7 +43,7 @@ class ScatterWorkflowSpec extends CromwellTestkitSpec { "w.E.E_out" -> WdlArray(WdlArrayType(WdlIntegerType), Seq(9, 9, 9, 9, 9, 9).map(WdlInteger(_))), "w.F.B_out" -> WdlArray(WdlArrayType(WdlIntegerType), Seq(4, 5, 6, 8, 6, 5).map(WdlInteger(_))), "w.C.C_out" -> WdlArray(WdlArrayType(WdlIntegerType), Seq(400, 500, 600, 800, 600, 500).map(WdlInteger(_))), - "w.A.A_out" -> WdlArray(WdlArrayType(WdlStringType), Seq("jeff", "chris", "miguel", "thibault", "khalid", "scott").map(WdlString)), + "w.A.A_out" -> WdlArray(WdlArrayType(WdlStringType), Seq("jeff", "chris", "miguel", "thibault", "khalid", "ruchi").map(WdlString)), "w.D.D_out" -> WdlInteger(34), "w.B.B_out" -> WdlArray(WdlArrayType(WdlIntegerType), Seq(4, 5, 6, 8, 6, 5).map(WdlInteger(_))) ) diff --git a/engine/src/test/scala/cromwell/SimpleWorkflowActorSpec.scala b/engine/src/test/scala/cromwell/SimpleWorkflowActorSpec.scala index 8925f1142..f67e4e3cd 100644 --- a/engine/src/test/scala/cromwell/SimpleWorkflowActorSpec.scala +++ b/engine/src/test/scala/cromwell/SimpleWorkflowActorSpec.scala @@ -7,16 +7,17 @@ import akka.testkit._ import com.typesafe.config.ConfigFactory import cromwell.MetadataWatchActor.{FailureMatcher, Matcher} import cromwell.SimpleWorkflowActorSpec._ -import cromwell.core.{WorkflowId, WorkflowSourceFiles} +import cromwell.core.{SimpleIoActor, WorkflowId, WorkflowSourceFilesWithoutImports} +import cromwell.engine.backend.BackendSingletonCollection import cromwell.engine.workflow.WorkflowActor import cromwell.engine.workflow.WorkflowActor._ +import cromwell.engine.workflow.tokens.JobExecutionTokenDispenserActor import cromwell.util.SampleWdl import cromwell.util.SampleWdl.HelloWorld.Addressee import org.scalatest.BeforeAndAfter import scala.concurrent.duration._ import scala.concurrent.{Await, Promise} -import scala.language.postfixOps object SimpleWorkflowActorSpec { @@ -27,22 +28,36 @@ object SimpleWorkflowActorSpec { promise: Promise[Unit]) } -class SimpleWorkflowActorSpec extends CromwellTestkitSpec with BeforeAndAfter { +class SimpleWorkflowActorSpec extends CromwellTestKitWordSpec with BeforeAndAfter { private def buildWorkflowActor(sampleWdl: SampleWdl, rawInputsOverride: String, workflowId: WorkflowId, matchers: Matcher*): TestableWorkflowActorAndMetadataPromise = { - val workflowSources = WorkflowSourceFiles(sampleWdl.wdlSource(), rawInputsOverride, "{}") + val workflowSources = WorkflowSourceFilesWithoutImports( + workflowSource = sampleWdl.workflowSource(), + workflowType = Option("WDL"), + workflowTypeVersion = None, + inputsJson = rawInputsOverride, + workflowOptionsJson = "{}", + labelsJson = "{}" + ) val promise = Promise[Unit]() val watchActor = system.actorOf(MetadataWatchActor.props(promise, matchers: _*), s"service-registry-$workflowId-${UUID.randomUUID()}") val supervisor = TestProbe() val workflowActor = TestFSMRef( factory = new WorkflowActor(workflowId, StartNewWorkflow, workflowSources, ConfigFactory.load(), + ioActor = system.actorOf(SimpleIoActor.props), serviceRegistryActor = watchActor, workflowLogCopyRouter = system.actorOf(Props.empty, s"workflow-copy-log-router-$workflowId-${UUID.randomUUID()}"), jobStoreActor = system.actorOf(AlwaysHappyJobStoreActor.props), - callCacheReadActor = system.actorOf(EmptyCallCacheReadActor.props)), + subWorkflowStoreActor = system.actorOf(AlwaysHappySubWorkflowStoreActor.props), + callCacheReadActor = system.actorOf(EmptyCallCacheReadActor.props), + callCacheWriteActor = system.actorOf(EmptyCallCacheWriteActor.props), + dockerHashActor = system.actorOf(EmptyDockerHashActor.props), + jobTokenDispenserActor = system.actorOf(JobExecutionTokenDispenserActor.props), + backendSingletonCollection = BackendSingletonCollection(Map("Local" -> None)), + serverMode = true), supervisor = supervisor.ref, name = s"workflow-actor-$workflowId" ) @@ -58,10 +73,10 @@ class SimpleWorkflowActorSpec extends CromwellTestkitSpec with BeforeAndAfter { "A WorkflowActor" should { "start, run, succeed and die" in { - val TestableWorkflowActorAndMetadataPromise(workflowActor, supervisor, _) = buildWorkflowActor(SampleWdl.HelloWorld, SampleWdl.HelloWorld.wdlJson, workflowId) + val TestableWorkflowActorAndMetadataPromise(workflowActor, supervisor, _) = buildWorkflowActor(SampleWdl.HelloWorld, SampleWdl.HelloWorld.workflowJson, workflowId) val probe = TestProbe() probe watch workflowActor - startingCallsFilter("hello.hello") { + startingCallsFilter("wf_hello.hello") { workflowActor ! StartWorkflowCommand } @@ -72,7 +87,7 @@ class SimpleWorkflowActorSpec extends CromwellTestkitSpec with BeforeAndAfter { } "fail to construct with missing inputs" in { - val expectedError = "Required workflow input 'hello.hello.addressee' not specified." + val expectedError = "Required workflow input 'wf_hello.hello.addressee' not specified." val failureMatcher = FailureMatcher(expectedError) val TestableWorkflowActorAndMetadataPromise(workflowActor, supervisor, promise) = buildWorkflowActor(SampleWdl.HelloWorld, "{}", workflowId, failureMatcher) val probe = TestProbe() @@ -89,7 +104,7 @@ class SimpleWorkflowActorSpec extends CromwellTestkitSpec with BeforeAndAfter { } "fail to construct with inputs of the wrong type" in { - val expectedError = "Could not coerce value for 'hello.hello.addressee' into: WdlStringType" + val expectedError = "Could not coerce JsNumber value for 'wf_hello.hello.addressee' (3) into: WdlStringType" val failureMatcher = FailureMatcher(expectedError) val TestableWorkflowActorAndMetadataPromise(workflowActor, supervisor, promise) = buildWorkflowActor(SampleWdl.HelloWorld, s""" { "$Addressee" : 3} """, workflowId, failureMatcher) @@ -97,7 +112,13 @@ class SimpleWorkflowActorSpec extends CromwellTestkitSpec with BeforeAndAfter { val probe = TestProbe() probe watch workflowActor workflowActor ! StartWorkflowCommand - Await.result(promise.future, TestExecutionTimeout) + try { + Await.result(promise.future, TestExecutionTimeout) + } catch { + case _: Throwable => + val info = failureMatcher.nearMissInformation + fail(s"We didn't see the expected error message $expectedError within $TestExecutionTimeout. ${info.mkString(", ")}") + } probe.expectTerminated(workflowActor, AwaitAlmostNothing) supervisor.expectMsgPF(AwaitAlmostNothing, "parent should get a failed response") { case x: WorkflowFailedResponse => @@ -108,12 +129,12 @@ class SimpleWorkflowActorSpec extends CromwellTestkitSpec with BeforeAndAfter { } "fail when a call fails" in { - val expectedError = "Call goodbye.goodbye: return code was 1" + val expectedError = "Job wf_goodbye.goodbye:NA:1 exited with return code 1 which has not been declared as a valid return code. See 'continueOnReturnCode' runtime attribute for more details." val failureMatcher = FailureMatcher(expectedError) - val TestableWorkflowActorAndMetadataPromise(workflowActor, supervisor, promise) = buildWorkflowActor(SampleWdl.GoodbyeWorld, SampleWdl.GoodbyeWorld.wdlJson, workflowId, failureMatcher) + val TestableWorkflowActorAndMetadataPromise(workflowActor, supervisor, promise) = buildWorkflowActor(SampleWdl.GoodbyeWorld, SampleWdl.GoodbyeWorld.workflowJson, workflowId, failureMatcher) val probe = TestProbe() probe watch workflowActor - startingCallsFilter("goodbye.goodbye") { + startingCallsFilter("wf_goodbye.goodbye") { workflowActor ! StartWorkflowCommand } Await.result(promise.future, TestExecutionTimeout) @@ -127,13 +148,22 @@ class SimpleWorkflowActorSpec extends CromwellTestkitSpec with BeforeAndAfter { } "gracefully handle malformed WDL" in { - val expectedError = "Input evaluation for Call test1.summary failedVariable 'Can't find bfile' not found" + val expectedError = "Variable 'bfile' not found" val failureMatcher = FailureMatcher(expectedError) - val TestableWorkflowActorAndMetadataPromise(workflowActor, supervisor, promise) = buildWorkflowActor(SampleWdl.CoercionNotDefined, SampleWdl.CoercionNotDefined.wdlJson, workflowId, failureMatcher) + val TestableWorkflowActorAndMetadataPromise(workflowActor, supervisor, promise) = buildWorkflowActor(SampleWdl.CoercionNotDefined, SampleWdl.CoercionNotDefined.workflowJson, workflowId, failureMatcher) val probe = TestProbe() probe watch workflowActor workflowActor ! StartWorkflowCommand - Await.result(promise.future, TestExecutionTimeout) + try { + Await.result(promise.future, TestExecutionTimeout) + } catch { + case _: Throwable => + val info = failureMatcher.nearMissInformation + val errorString = + if (info.nonEmpty) "We had a near miss: " + info.mkString(", ") + else s"The expected key was never seen. We saw: [\n ${failureMatcher.fullEventList.map(e => s"${e.key} -> ${e.value}").mkString("\n ")}\n]." + fail(s"We didn't see the expected error message '$expectedError' within $TestExecutionTimeout. $errorString}") + } probe.expectTerminated(workflowActor, AwaitAlmostNothing) supervisor.expectMsgPF(AwaitAlmostNothing, "parent should get a failed response") { case x: WorkflowFailedResponse => @@ -145,9 +175,9 @@ class SimpleWorkflowActorSpec extends CromwellTestkitSpec with BeforeAndAfter { } private def startingCallsFilter[T](callNames: String*)(block: => T): T = { - import CromwellTestkitSpec.waitForInfo + import CromwellTestKitSpec.waitForInfo within(TestExecutionTimeout) { - waitForInfo(s"Starting calls: ${callNames.mkString("", ":NA:1, ", ":NA:1")}$$", 1) { + waitForInfo(s"Starting calls: ${callNames.mkString("", ":NA:1, ", ":NA:1")}$$") { block } } diff --git a/engine/src/test/scala/cromwell/WdlFunctionsAtWorkflowLevelSpec.scala b/engine/src/test/scala/cromwell/WdlFunctionsAtWorkflowLevelSpec.scala index 4a13be95d..2e63123ed 100644 --- a/engine/src/test/scala/cromwell/WdlFunctionsAtWorkflowLevelSpec.scala +++ b/engine/src/test/scala/cromwell/WdlFunctionsAtWorkflowLevelSpec.scala @@ -1,14 +1,12 @@ package cromwell import akka.testkit._ -import wdl4s.types.{WdlMapType, WdlStringType, WdlArrayType} -import wdl4s.values.{WdlMap, WdlArray, WdlString} -import cromwell.core.Tags.DockerTest import cromwell.util.SampleWdl +import wdl4s.wdl.types.{WdlMapType, WdlStringType} +import wdl4s.wdl.values.{WdlMap, WdlString} -import scala.language.postfixOps -class WdlFunctionsAtWorkflowLevelSpec extends CromwellTestkitSpec { +class WdlFunctionsAtWorkflowLevelSpec extends CromwellTestKitWordSpec { val outputMap = WdlMap(WdlMapType(WdlStringType, WdlStringType), Map( WdlString("k1") -> WdlString("v1"), WdlString("k2") -> WdlString("v2"), diff --git a/engine/src/test/scala/cromwell/WorkflowFailSlowSpec.scala b/engine/src/test/scala/cromwell/WorkflowFailSlowSpec.scala index 1cd7a7ef3..c9c6c5f8a 100644 --- a/engine/src/test/scala/cromwell/WorkflowFailSlowSpec.scala +++ b/engine/src/test/scala/cromwell/WorkflowFailSlowSpec.scala @@ -5,7 +5,7 @@ import cromwell.util.SampleWdl // TODO: These tests are (and were) somewhat unsatisfactory. They'd be much better if we use TestFSMRefs and TestProbes to simulate job completions against the WorkflowActor and make sure it only completes the workflow at the appropriate time. -class WorkflowFailSlowSpec extends CromwellTestkitSpec { +class WorkflowFailSlowSpec extends CromwellTestKitWordSpec { val FailFastOptions = """ |{ diff --git a/engine/src/test/scala/cromwell/WorkflowOutputsSpec.scala b/engine/src/test/scala/cromwell/WorkflowOutputsSpec.scala index bbdd67a6e..8669051e8 100644 --- a/engine/src/test/scala/cromwell/WorkflowOutputsSpec.scala +++ b/engine/src/test/scala/cromwell/WorkflowOutputsSpec.scala @@ -2,11 +2,10 @@ package cromwell import akka.testkit._ import cromwell.util.SampleWdl -import cromwell.CromwellTestkitSpec.AnyValueIsFine +import cromwell.CromwellTestKitSpec.AnyValueIsFine -import scala.language.postfixOps -class WorkflowOutputsSpec extends CromwellTestkitSpec { +class WorkflowOutputsSpec extends CromwellTestKitWordSpec { "Workflow outputs" should { "use all outputs if none are specified" in { runWdlAndAssertOutputs( diff --git a/engine/src/test/scala/cromwell/engine/EngineFunctionsSpec.scala b/engine/src/test/scala/cromwell/engine/EngineFunctionsSpec.scala index ec512107e..deb5c7365 100644 --- a/engine/src/test/scala/cromwell/engine/EngineFunctionsSpec.scala +++ b/engine/src/test/scala/cromwell/engine/EngineFunctionsSpec.scala @@ -1,28 +1,28 @@ package cromwell.engine -import java.nio.file.{FileSystem, FileSystems, Path} - -import cromwell.backend.wdl.{PureFunctions, ReadLikeFunctions, WriteFunctions} +import cromwell.backend.wdl.{ReadLikeFunctions, WriteFunctions} +import cromwell.core.path.{DefaultPathBuilder, Path, PathBuilder} import org.scalatest.prop.TableDrivenPropertyChecks._ import org.scalatest.prop.Tables.Table import org.scalatest.{FlatSpec, Matchers} -import wdl4s.expression.{NoFunctions, WdlStandardLibraryFunctions} -import wdl4s.values.{WdlFile, WdlInteger, WdlString, WdlValue} +import wdl4s.wdl.expression.{NoFunctions, PureStandardLibraryFunctionsLike, WdlStandardLibraryFunctions} +import wdl4s.wdl.values.{WdlFile, WdlInteger, WdlString, WdlValue} import scala.util.{Failure, Success, Try} class EngineFunctionsSpec extends FlatSpec with Matchers { - trait WdlStandardLibraryImpl extends WdlStandardLibraryFunctions with ReadLikeFunctions with WriteFunctions with PureFunctions { + trait WdlStandardLibraryImpl extends WdlStandardLibraryFunctions with ReadLikeFunctions with WriteFunctions with PureStandardLibraryFunctionsLike { private def fail(name: String) = Failure(new NotImplementedError(s"$name() not implemented yet")) + override def writeTempFile(path: String, prefix: String, suffix: String, content: String): String = super[WriteFunctions].writeTempFile(path, prefix, suffix, content) override def stdout(params: Seq[Try[WdlValue]]): Try[WdlFile] = fail("stdout") override def stderr(params: Seq[Try[WdlValue]]): Try[WdlFile] = fail("stderr") } def expectFailure(value: Try[WdlValue]) = value match { case Success(s) => fail(s"$s: Expected this function invocation to fail") - case Failure(ex) => // expected + case Failure(_) => // expected } "EngineFunctions" should "all initially be undefined" in { val stdFunctions = Seq( @@ -37,9 +37,9 @@ class EngineFunctionsSpec extends FlatSpec with Matchers { "sub" should "replace a string according to a pattern" in { class TestEngineFn extends WdlStandardLibraryImpl { - override def glob(path: String, pattern: String): Seq[String] = ??? - override def fileSystems: List[FileSystem] = List(FileSystems.getDefault) - override def writeDirectory: Path = ??? + override def glob(path: String, pattern: String): Seq[String] = throw new NotImplementedError + override def pathBuilders: List[PathBuilder] = List(DefaultPathBuilder) + override def writeDirectory: Path = throw new NotImplementedError } val engineFn = new TestEngineFn diff --git a/engine/src/test/scala/cromwell/engine/WorkflowAbortSpec.scala b/engine/src/test/scala/cromwell/engine/WorkflowAbortSpec.scala index 6a4b0a077..0157adec2 100644 --- a/engine/src/test/scala/cromwell/engine/WorkflowAbortSpec.scala +++ b/engine/src/test/scala/cromwell/engine/WorkflowAbortSpec.scala @@ -1,8 +1,8 @@ package cromwell.engine -import cromwell.CromwellTestkitSpec +import cromwell.CromwellTestKitWordSpec -class WorkflowAbortSpec extends CromwellTestkitSpec { +class WorkflowAbortSpec extends CromwellTestKitWordSpec { // TODO: When re-enabled, this test also needs to check that child processes have actually been stopped. "A WorkflowManagerActor" should { @@ -14,7 +14,7 @@ class WorkflowAbortSpec extends CromwellTestkitSpec { // val waitThreshold = 10 // // // Start the workflow: -// val workflowId = messageAndWait[WorkflowId](SubmitWorkflow(TripleSleep.wdlSource(), TripleSleep.wdlJson, TripleSleep.rawInputs)) +// val workflowId = messageAndWait[WorkflowId](SubmitWorkflow(TripleSleep.workflowSource(), TripleSleep.WorkflowJson, TripleSleep.rawInputs)) // // def waitForStarted(currentAttempt: Int): Unit = { // val status = messageAndWait[Option[WorkflowState]](WorkflowStatus(workflowId)) diff --git a/engine/src/test/scala/cromwell/engine/WorkflowManagerActorSpec.scala b/engine/src/test/scala/cromwell/engine/WorkflowManagerActorSpec.scala index 9ecff4ba5..164c3fedb 100644 --- a/engine/src/test/scala/cromwell/engine/WorkflowManagerActorSpec.scala +++ b/engine/src/test/scala/cromwell/engine/WorkflowManagerActorSpec.scala @@ -1,12 +1,11 @@ package cromwell.engine -import cromwell.CromwellTestkitSpec +import cromwell.CromwellTestKitWordSpec import cromwell.engine.workflow.WorkflowDescriptorBuilder import cromwell.util.SampleWdl -import scala.language.postfixOps -class WorkflowManagerActorSpec extends CromwellTestkitSpec with WorkflowDescriptorBuilder { +class WorkflowManagerActorSpec extends CromwellTestKitWordSpec with WorkflowDescriptorBuilder { override implicit val actorSystem = system "A WorkflowManagerActor" should { @@ -14,8 +13,8 @@ class WorkflowManagerActorSpec extends CromwellTestkitSpec with WorkflowDescript "run workflows in the correct directory" in { val outputs = runWdl(sampleWdl = SampleWdl.CurrentDirectory) - val outputName = "whereami.whereami.pwd" - val salutation = outputs.get(outputName).get + val outputName = "wf_whereami.whereami.pwd" + val salutation = outputs(outputName) val actualOutput = salutation.valueString.trim actualOutput should endWith("/call-whereami/execution") } diff --git a/engine/src/test/scala/cromwell/engine/WorkflowStoreActorSpec.scala b/engine/src/test/scala/cromwell/engine/WorkflowStoreActorSpec.scala index 43764e705..b86fb99df 100644 --- a/engine/src/test/scala/cromwell/engine/WorkflowStoreActorSpec.scala +++ b/engine/src/test/scala/cromwell/engine/WorkflowStoreActorSpec.scala @@ -1,18 +1,33 @@ package cromwell.engine -import cromwell.CromwellTestkitSpec -import cromwell.core.WorkflowId +import cats.data.NonEmptyList +import cromwell.core.WorkflowSourceFilesCollection +import cromwell.database.sql.SqlDatabase import cromwell.engine.workflow.workflowstore.WorkflowStoreActor._ +import cromwell.engine.workflow.workflowstore.WorkflowStoreEngineActor.{NewWorkflowsToStart, NoNewWorkflowsToStart} +import cromwell.engine.workflow.workflowstore.WorkflowStoreSubmitActor.{WorkflowSubmittedToStore, WorkflowsBatchSubmittedToStore} import cromwell.engine.workflow.workflowstore._ +import cromwell.services.metadata.MetadataQuery +import cromwell.services.metadata.MetadataService.{GetMetadataQueryAction, MetadataLookupResponse} +import cromwell.services.metadata.impl.ReadMetadataActor +import cromwell.util.EncryptionSpec import cromwell.util.SampleWdl.HelloWorld -import org.scalatest.Matchers +import cromwell.{CromwellTestKitSpec, CromwellTestKitWordSpec} +import org.mockito.Mockito._ +import org.scalatest.concurrent.Eventually +import org.scalatest.{BeforeAndAfter, Matchers} +import org.specs2.mock.Mockito import scala.concurrent.duration._ +import scala.concurrent.{ExecutionContext, Future} import scala.language.postfixOps -import scalaz.NonEmptyList -class WorkflowStoreActorSpec extends CromwellTestkitSpec with Matchers { +class WorkflowStoreActorSpec extends CromwellTestKitWordSpec with Matchers with BeforeAndAfter with Mockito with Eventually { val helloWorldSourceFiles = HelloWorld.asWorkflowSources() + val helloCwlWorldSourceFiles = HelloWorld.asWorkflowSources(workflowType = Option("CWL"), workflowTypeVersion = Option("v1.0")) + + val database: SqlDatabase = mock[SqlDatabase] + when(database.removeDockerHashStoreEntries(any[String])(any[ExecutionContext])).thenReturn(Future.successful(1)) /** * Fold down a list of WorkflowToStart's, checking that their IDs are all unique @@ -31,83 +46,137 @@ class WorkflowStoreActorSpec extends CromwellTestkitSpec with Matchers { list.foldLeft((List.empty[WorkflowToStart], true))(folderFunction)._2 } + private def prettyOptions(workflowSourceFiles: WorkflowSourceFilesCollection): WorkflowSourceFilesCollection = { + import spray.json._ + workflowSourceFiles.copyOptions(workflowSourceFiles.workflowOptionsJson.parseJson.prettyPrint) + } + "The WorkflowStoreActor" should { "return an ID for a submitted workflow" in { val store = new InMemoryWorkflowStore - val storeActor = system.actorOf(WorkflowStoreActor.props(store, CromwellTestkitSpec.ServiceRegistryActorInstance)) + val storeActor = system.actorOf(WorkflowStoreActor.props(store, CromwellTestKitSpec.ServiceRegistryActorInstance, database)) storeActor ! SubmitWorkflow(helloWorldSourceFiles) expectMsgType[WorkflowSubmittedToStore](10 seconds) } "return 3 IDs for a batch submission of 3" in { val store = new InMemoryWorkflowStore - val storeActor = system.actorOf(WorkflowStoreActor.props(store, CromwellTestkitSpec.ServiceRegistryActorInstance)) - storeActor ! BatchSubmitWorkflows(NonEmptyList(helloWorldSourceFiles, helloWorldSourceFiles, helloWorldSourceFiles)) + val storeActor = system.actorOf(WorkflowStoreActor.props(store, CromwellTestKitSpec.ServiceRegistryActorInstance, database)) + storeActor ! BatchSubmitWorkflows(NonEmptyList.of(helloWorldSourceFiles, helloWorldSourceFiles, helloWorldSourceFiles)) expectMsgPF(10 seconds) { - case WorkflowsBatchSubmittedToStore(ids) => ids.size shouldBe 3 + case WorkflowsBatchSubmittedToStore(ids) => ids.toList.size shouldBe 3 } } "fetch exactly N workflows" in { val store = new InMemoryWorkflowStore - val storeActor = system.actorOf(WorkflowStoreActor.props(store, CromwellTestkitSpec.ServiceRegistryActorInstance)) - storeActor ! BatchSubmitWorkflows(NonEmptyList(helloWorldSourceFiles, helloWorldSourceFiles, helloWorldSourceFiles)) - val insertedIds = expectMsgType[WorkflowsBatchSubmittedToStore](10 seconds).workflowIds.list.toList - + val storeActor = system.actorOf(WorkflowStoreActor.props(store, CromwellTestKitSpec.ServiceRegistryActorInstance, database)) + storeActor ! BatchSubmitWorkflows(NonEmptyList.of(helloWorldSourceFiles, helloWorldSourceFiles, helloCwlWorldSourceFiles)) + val insertedIds = expectMsgType[WorkflowsBatchSubmittedToStore](10 seconds).workflowIds.toList storeActor ! FetchRunnableWorkflows(2) expectMsgPF(10 seconds) { case NewWorkflowsToStart(workflowNel) => - workflowNel.size shouldBe 2 - checkDistinctIds(workflowNel.list.toList) shouldBe true - workflowNel.foreach { + workflowNel.toList.size shouldBe 2 + checkDistinctIds(workflowNel.toList) shouldBe true + workflowNel map { case WorkflowToStart(id, sources, state) => insertedIds.contains(id) shouldBe true - sources shouldBe helloWorldSourceFiles + sources shouldBe prettyOptions(helloWorldSourceFiles) + state shouldBe WorkflowStoreState.Submitted + } + } + + storeActor ! FetchRunnableWorkflows(1) + expectMsgPF(10 seconds) { + case NewWorkflowsToStart(workflowNel) => + workflowNel.toList.size shouldBe 1 + checkDistinctIds(workflowNel.toList) shouldBe true + workflowNel map { + case WorkflowToStart(id, sources, state) => + insertedIds.contains(id) shouldBe true + sources shouldBe prettyOptions(helloCwlWorldSourceFiles) state shouldBe WorkflowStoreState.Submitted } } } - "return only the remaining workflows if N is larger than size" in { - val store = new InMemoryWorkflowStore - val storeActor = system.actorOf(WorkflowStoreActor.props(store, CromwellTestkitSpec.ServiceRegistryActorInstance)) - storeActor ! BatchSubmitWorkflows(NonEmptyList(helloWorldSourceFiles, helloWorldSourceFiles, helloWorldSourceFiles)) - val insertedIds = expectMsgType[WorkflowsBatchSubmittedToStore](10 seconds).workflowIds.list.toList + "fetch encrypted and cleared workflow options" in { + EncryptionSpec.assumeAes256Cbc() + val optionedSourceFiles = HelloWorld.asWorkflowSources(workflowOptions = + s"""|{ + | "key": "value", + | "refresh_token": "it's a secret" + |} + |""".stripMargin) - storeActor ! FetchRunnableWorkflows(100) + + val store = new InMemoryWorkflowStore + val storeActor = system.actorOf(WorkflowStoreActor.props(store, CromwellTestKitSpec.ServiceRegistryActorInstance, database)) + val readMetadataActor = system.actorOf(ReadMetadataActor.props()) + storeActor ! BatchSubmitWorkflows(NonEmptyList.of(optionedSourceFiles)) + val insertedIds = expectMsgType[WorkflowsBatchSubmittedToStore](10 seconds).workflowIds.toList + + storeActor ! FetchRunnableWorkflows(1) expectMsgPF(10 seconds) { case NewWorkflowsToStart(workflowNel) => - workflowNel.size shouldBe 3 - checkDistinctIds(workflowNel.list.toList) shouldBe true - workflowNel.foreach { + workflowNel.toList.size should be(1) + checkDistinctIds(workflowNel.toList) should be(true) + workflowNel.toList.foreach { case WorkflowToStart(id, sources, state) => - insertedIds.contains(id) shouldBe true - sources shouldBe helloWorldSourceFiles - state shouldBe WorkflowStoreState.Submitted + insertedIds.contains(id) should be(true) + sources.workflowSource should be(optionedSourceFiles.workflowSource) + sources.inputsJson should be(optionedSourceFiles.inputsJson) + state should be(WorkflowStoreState.Submitted) + + import spray.json._ + + val encryptedJsObject = sources.workflowOptionsJson.parseJson.asJsObject + encryptedJsObject.fields.keys should contain theSameElementsAs Seq("key", "refresh_token") + encryptedJsObject.fields("key") should be(JsString("value")) + encryptedJsObject.fields("refresh_token").asJsObject.fields.keys should contain theSameElementsAs + Seq("iv", "ciphertext") + + // We need to wait for workflow metadata to be flushed before we can successfully query for it + eventually(timeout(15 seconds), interval(5 seconds)) { + readMetadataActor ! GetMetadataQueryAction(MetadataQuery.forWorkflow(id)) + expectMsgPF(10 seconds) { + case MetadataLookupResponse(_, eventList) => + val optionsEvent = eventList.find(_.key.key == "submittedFiles:options").get + val clearedJsObject = optionsEvent.value.get.value.parseJson.asJsObject + clearedJsObject.fields.keys should contain theSameElementsAs Seq("key", "refresh_token") + clearedJsObject.fields("key") should be(JsString("value")) + clearedJsObject.fields("refresh_token") should be(JsString("cleared")) + } + } } } } - "remove workflows which exist" in { + "return only the remaining workflows if N is larger than size" in { val store = new InMemoryWorkflowStore - val storeActor = system.actorOf(WorkflowStoreActor.props(store, CromwellTestkitSpec.ServiceRegistryActorInstance)) - storeActor ! SubmitWorkflow(helloWorldSourceFiles) - val id = expectMsgType[WorkflowSubmittedToStore](10 seconds).workflowId - storeActor ! RemoveWorkflow(id) + val storeActor = system.actorOf(WorkflowStoreActor.props(store, CromwellTestKitSpec.ServiceRegistryActorInstance, database)) + storeActor ! BatchSubmitWorkflows(NonEmptyList.of(helloWorldSourceFiles, helloWorldSourceFiles, helloWorldSourceFiles)) + val insertedIds = expectMsgType[WorkflowsBatchSubmittedToStore](10 seconds).workflowIds.toList + storeActor ! FetchRunnableWorkflows(100) expectMsgPF(10 seconds) { - case NoNewWorkflowsToStart => // Great - case x => fail(s"Unexpected response from supposedly empty WorkflowStore: $x") + case NewWorkflowsToStart(workflowNel) => + workflowNel.toList.size shouldBe 3 + checkDistinctIds(workflowNel.toList) shouldBe true + workflowNel map { + case WorkflowToStart(id, sources, state) => + insertedIds.contains(id) shouldBe true + sources shouldBe prettyOptions(helloWorldSourceFiles) + state shouldBe WorkflowStoreState.Submitted + } } } "remain responsive if you ask to remove a workflow it doesn't have" in { val store = new InMemoryWorkflowStore - val storeActor = system.actorOf(WorkflowStoreActor.props(store, CromwellTestkitSpec.ServiceRegistryActorInstance)) - val id = WorkflowId.randomId() - storeActor ! RemoveWorkflow(id) + val storeActor = system.actorOf(WorkflowStoreActor.props(store, CromwellTestKitSpec.ServiceRegistryActorInstance, database)) storeActor ! FetchRunnableWorkflows(100) expectMsgPF(10 seconds) { diff --git a/engine/src/test/scala/cromwell/engine/backend/mock/DefaultBackendJobExecutionActor.scala b/engine/src/test/scala/cromwell/engine/backend/mock/DefaultBackendJobExecutionActor.scala index 4b6f55e4a..95d778dd2 100644 --- a/engine/src/test/scala/cromwell/engine/backend/mock/DefaultBackendJobExecutionActor.scala +++ b/engine/src/test/scala/cromwell/engine/backend/mock/DefaultBackendJobExecutionActor.scala @@ -1,10 +1,10 @@ package cromwell.engine.backend.mock import akka.actor.{ActorRef, Props} -import cromwell.backend.BackendJobExecutionActor.{BackendJobExecutionResponse, SucceededResponse} +import cromwell.backend.BackendJobExecutionActor.{BackendJobExecutionResponse, JobSucceededResponse} import cromwell.backend._ -import wdl4s.Call -import wdl4s.expression.{NoFunctions, WdlStandardLibraryFunctions} +import wdl4s.wdl.WdlTaskCall +import wdl4s.wdl.expression.{NoFunctions, WdlStandardLibraryFunctions} import scala.concurrent.Future @@ -14,7 +14,7 @@ object DefaultBackendJobExecutionActor { case class DefaultBackendJobExecutionActor(override val jobDescriptor: BackendJobDescriptor, override val configurationDescriptor: BackendConfigurationDescriptor) extends BackendJobExecutionActor { override def execute: Future[BackendJobExecutionResponse] = { - Future.successful(SucceededResponse(jobDescriptor.key, Some(0), (jobDescriptor.call.task.outputs map taskOutputToJobOutput).toMap, None, Seq.empty)) + Future.successful(JobSucceededResponse(jobDescriptor.key, Some(0), (jobDescriptor.call.task.outputs map taskOutputToJobOutput).toMap, None, Seq.empty, dockerImageUsed = None)) } override def recover = execute @@ -22,15 +22,19 @@ case class DefaultBackendJobExecutionActor(override val jobDescriptor: BackendJo override def abort(): Unit = () } -class DefaultBackendLifecycleActorFactory(configurationDescriptor: BackendConfigurationDescriptor) +class DefaultBackendLifecycleActorFactory(name: String, configurationDescriptor: BackendConfigurationDescriptor) extends BackendLifecycleActorFactory { override def workflowInitializationActorProps(workflowDescriptor: BackendWorkflowDescriptor, - calls: Seq[Call], - serviceRegistryActor: ActorRef): Option[Props] = None + ioActor: ActorRef, + calls: Set[WdlTaskCall], + serviceRegistryActor: ActorRef, + restarting: Boolean): Option[Props] = None override def jobExecutionActorProps(jobDescriptor: BackendJobDescriptor, initializationData: Option[BackendInitializationData], - serviceRegistryActor: ActorRef): Props = { + serviceRegistryActor: ActorRef, + ioActor: ActorRef, + backendSingletonActor: Option[ActorRef]): Props = { DefaultBackendJobExecutionActor.props(jobDescriptor, configurationDescriptor) } diff --git a/engine/src/test/scala/cromwell/engine/backend/mock/RetryableBackendJobExecutionActor.scala b/engine/src/test/scala/cromwell/engine/backend/mock/RetryableBackendJobExecutionActor.scala index 60617f468..6bc7b04ad 100644 --- a/engine/src/test/scala/cromwell/engine/backend/mock/RetryableBackendJobExecutionActor.scala +++ b/engine/src/test/scala/cromwell/engine/backend/mock/RetryableBackendJobExecutionActor.scala @@ -2,7 +2,7 @@ package cromwell.engine.backend.mock import akka.actor.Props import cromwell.backend.{BackendConfigurationDescriptor, BackendJobDescriptor, BackendJobExecutionActor} -import cromwell.backend.BackendJobExecutionActor.{FailedRetryableResponse, BackendJobExecutionResponse, SucceededResponse} +import cromwell.backend.BackendJobExecutionActor.{BackendJobExecutionResponse, JobFailedNonRetryableResponse, JobFailedRetryableResponse} import scala.concurrent.Future @@ -10,15 +10,17 @@ object RetryableBackendJobExecutionActor { def props(jobDescriptor: BackendJobDescriptor, configurationDescriptor: BackendConfigurationDescriptor) = Props(RetryableBackendJobExecutionActor(jobDescriptor, configurationDescriptor)) } -case class RetryableBackendJobExecutionActor(override val jobDescriptor: BackendJobDescriptor, override val configurationDescriptor: BackendConfigurationDescriptor) extends BackendJobExecutionActor { +final case class RetryableBackendJobExecutionActor(override val jobDescriptor: BackendJobDescriptor, override val configurationDescriptor: BackendConfigurationDescriptor) extends BackendJobExecutionActor { val attempts = 3 override def execute: Future[BackendJobExecutionResponse] = { - if (jobDescriptor.key.attempt < attempts) - Future.successful(FailedRetryableResponse(jobDescriptor.key, new RuntimeException("An apparent transient Exception!"), None)) - else - Future.successful(SucceededResponse(jobDescriptor.key, Some(0), (jobDescriptor.call.task.outputs map taskOutputToJobOutput).toMap, None, Seq.empty)) + if (jobDescriptor.key.attempt < attempts) { + Future.successful(JobFailedRetryableResponse(jobDescriptor.key, new RuntimeException("An apparent transient Exception!"), None)) + } + else { + Future.successful(JobFailedNonRetryableResponse(jobDescriptor.key, new RuntimeException("A permanent Exception! Yikes, what a pickle!"), None)) + } } override def recover = execute diff --git a/engine/src/test/scala/cromwell/engine/backend/mock/RetryableBackendLifecycleActorFactory.scala b/engine/src/test/scala/cromwell/engine/backend/mock/RetryableBackendLifecycleActorFactory.scala index ec527db26..15639c3f1 100644 --- a/engine/src/test/scala/cromwell/engine/backend/mock/RetryableBackendLifecycleActorFactory.scala +++ b/engine/src/test/scala/cromwell/engine/backend/mock/RetryableBackendLifecycleActorFactory.scala @@ -2,18 +2,22 @@ package cromwell.engine.backend.mock import akka.actor.{ActorRef, Props} import cromwell.backend._ -import wdl4s.Call -import wdl4s.expression.{NoFunctions, WdlStandardLibraryFunctions} +import wdl4s.wdl.WdlTaskCall +import wdl4s.wdl.expression.{NoFunctions, WdlStandardLibraryFunctions} -class RetryableBackendLifecycleActorFactory(configurationDescriptor: BackendConfigurationDescriptor) +class RetryableBackendLifecycleActorFactory(name: String, configurationDescriptor: BackendConfigurationDescriptor) extends BackendLifecycleActorFactory { override def workflowInitializationActorProps(workflowDescriptor: BackendWorkflowDescriptor, - calls: Seq[Call], - serviceRegistryActor: ActorRef): Option[Props] = None + ioActor: ActorRef, + calls: Set[WdlTaskCall], + serviceRegistryActor: ActorRef, + restarting: Boolean): Option[Props] = None override def jobExecutionActorProps(jobDescriptor: BackendJobDescriptor, initializationData: Option[BackendInitializationData], - serviceRegistryActor: ActorRef): Props = { + serviceRegistryActor: ActorRef, + ioActor: ActorRef, + backendSingletonActor: Option[ActorRef]): Props = { RetryableBackendJobExecutionActor.props(jobDescriptor, configurationDescriptor) } diff --git a/engine/src/test/scala/cromwell/engine/backend/mock/package.scala b/engine/src/test/scala/cromwell/engine/backend/mock/package.scala index 4baeb9c33..6dbf7e925 100644 --- a/engine/src/test/scala/cromwell/engine/backend/mock/package.scala +++ b/engine/src/test/scala/cromwell/engine/backend/mock/package.scala @@ -1,15 +1,15 @@ package cromwell.engine.backend import cromwell.core.JobOutput -import wdl4s.TaskOutput -import wdl4s.types._ -import wdl4s.values._ +import wdl4s.wdl.TaskOutput +import wdl4s.wdl.types._ +import wdl4s.wdl.values._ package object mock { // This is used by stubbed backends that are to be used in tests to prepare dummy outputs for job def taskOutputToJobOutput(taskOutput: TaskOutput) = - taskOutput.name -> JobOutput(sampleValue(taskOutput.wdlType)) + taskOutput.unqualifiedName -> JobOutput(sampleValue(taskOutput.wdlType)) private def sampleValue(wdlType: WdlType): WdlValue = wdlType match { case WdlIntegerType => WdlInteger(3) diff --git a/engine/src/test/scala/cromwell/engine/io/IoActorGcsBatchSpec.scala b/engine/src/test/scala/cromwell/engine/io/IoActorGcsBatchSpec.scala new file mode 100644 index 000000000..fa611133b --- /dev/null +++ b/engine/src/test/scala/cromwell/engine/io/IoActorGcsBatchSpec.scala @@ -0,0 +1,103 @@ +package cromwell.engine.io + +import java.util.UUID + +import akka.stream.ActorMaterializer +import akka.testkit.{ImplicitSender, TestActorRef} +import cromwell.core.Tags.IntegrationTest +import cromwell.core.io._ +import cromwell.core.{TestKitSuite, WorkflowOptions} +import cromwell.filesystems.gcs.auth.ApplicationDefaultMode +import cromwell.filesystems.gcs.batch.{GcsBatchCopyCommand, GcsBatchCrc32Command, GcsBatchDeleteCommand, GcsBatchSizeCommand} +import cromwell.filesystems.gcs.{GcsPathBuilder, GcsPathBuilderFactory} +import org.scalatest.concurrent.Eventually +import org.scalatest.{FlatSpecLike, Matchers} + +import scala.concurrent.{Await, ExecutionContext} +import scala.concurrent.duration._ +import scala.language.postfixOps + +class IoActorGcsBatchSpec extends TestKitSuite with FlatSpecLike with Matchers with ImplicitSender with Eventually { + behavior of "IoActor [GCS Batch]" + + implicit val actorSystem = system + implicit val ec: ExecutionContext = system.dispatcher + implicit val materializer = ActorMaterializer() + + override def afterAll() = { + materializer.shutdown() + src.delete(swallowIOExceptions = true) + dst.delete(swallowIOExceptions = true) + srcRegional.delete(swallowIOExceptions = true) + dstMultiRegional.delete(swallowIOExceptions = true) + super.afterAll() + } + + lazy val gcsPathBuilder = GcsPathBuilderFactory(ApplicationDefaultMode("default"), "cromwell-test") + lazy val pathBuilder: GcsPathBuilder = Await.result(gcsPathBuilder.withOptions(WorkflowOptions.empty), 1 second) + + lazy val randomUUID = UUID.randomUUID().toString + + lazy val src = pathBuilder.build(s"gs://cloud-cromwell-dev/unit-test/$randomUUID/testFile.txt").get + lazy val dst = pathBuilder.build(s"gs://cloud-cromwell-dev/unit-test/$randomUUID/testFile-copy.txt").get + lazy val srcRegional = pathBuilder.build(s"gs://cloud-cromwell-dev-regional/unit-test/$randomUUID/testRegional.txt").get + lazy val dstMultiRegional = pathBuilder.build(s"gs://cloud-cromwell-dev/unit-test/$randomUUID/testFileRegional-copy.txt").get + + override def beforeAll() = { + // Write commands can't be batched, so for the sake of this test, just create a file in GCS synchronously here + src.write("hello") + srcRegional.write("hello") + super.beforeAll() + } + + it should "batch queries" taggedAs IntegrationTest in { + val testActor = TestActorRef(new IoActor(10, None)) + + val copyCommand = GcsBatchCopyCommand(src, dst, overwrite = false) + val sizeCommand = GcsBatchSizeCommand(src) + val hashCommand = GcsBatchCrc32Command(src) + + val deleteSrcCommand = GcsBatchDeleteCommand(src, swallowIOExceptions = false) + val deleteDstCommand = GcsBatchDeleteCommand(dst, swallowIOExceptions = false) + + testActor ! copyCommand + testActor ! sizeCommand + testActor ! hashCommand + + val received1 = receiveN(3, 10 seconds) + + received1.size shouldBe 3 + received1 forall { _.isInstanceOf[IoSuccess[_]] } shouldBe true + + received1 collect { + case IoSuccess(_: GcsBatchSizeCommand, fileSize: Long) => fileSize shouldBe 5 + } + + received1 collect { + case IoSuccess(_: GcsBatchCrc32Command, hash: String) => hash shouldBe "mnG7TA==" + } + + testActor ! deleteSrcCommand + testActor ! deleteDstCommand + + val received2 = receiveN(2, 10 seconds) + + received2.size shouldBe 2 + received2 forall { _.isInstanceOf[IoSuccess[_]] } shouldBe true + + src.exists shouldBe false + dst.exists shouldBe false + } + + it should "copy files across GCS storage classes" taggedAs IntegrationTest in { + val testActor = TestActorRef(new IoActor(10, None)) + + val copyCommand = GcsBatchCopyCommand(srcRegional, dstMultiRegional, overwrite = false) + + testActor ! copyCommand + + expectMsgClass(30 seconds, classOf[IoSuccess[_]]) + + dstMultiRegional.exists shouldBe true + } +} diff --git a/engine/src/test/scala/cromwell/engine/io/IoActorSpec.scala b/engine/src/test/scala/cromwell/engine/io/IoActorSpec.scala new file mode 100644 index 000000000..5b0ec5594 --- /dev/null +++ b/engine/src/test/scala/cromwell/engine/io/IoActorSpec.scala @@ -0,0 +1,174 @@ +package cromwell.engine.io + +import java.net.{SocketException, SocketTimeoutException} + +import akka.stream.ActorMaterializer +import akka.testkit.{ImplicitSender, TestActorRef} +import better.files.File.OpenOptions +import com.google.cloud.storage.StorageException +import cromwell.core.TestKitSuite +import cromwell.core.io.DefaultIoCommand._ +import cromwell.core.io._ +import cromwell.core.path.{DefaultPathBuilder, Path} +import cromwell.engine.io.gcs.GcsBatchFlow.BatchFailedException +import org.scalatest.{FlatSpecLike, Matchers} + +import scala.concurrent.ExecutionContext +import scala.concurrent.duration._ +import scala.language.postfixOps + +class IoActorSpec extends TestKitSuite with FlatSpecLike with Matchers with ImplicitSender { + behavior of "IoActor" + + implicit val actorSystem = system + implicit val ec: ExecutionContext = system.dispatcher + implicit val materializer = ActorMaterializer() + + override def afterAll() = { + materializer.shutdown() + super.afterAll() + } + + it should "copy a file" in { + val testActor = TestActorRef(new IoActor(1, None)) + + val src = DefaultPathBuilder.createTempFile() + val dst: Path = src.parent.resolve(src.name + "-dst") + + val copyCommand = DefaultIoCopyCommand(src, dst, overwrite = true) + + testActor ! copyCommand + expectMsgPF(5 seconds) { + case response: IoSuccess[_] => response.command.isInstanceOf[IoCopyCommand] shouldBe true + case response: IoFailure[_] => fail("Expected an IoSuccess", response.failure) + } + + dst.toFile should exist + src.delete() + dst.delete() + } + + it should "write to a file" in { + val testActor = TestActorRef(new IoActor(1, None)) + + val src = DefaultPathBuilder.createTempFile() + + val writeCommand = DefaultIoWriteCommand(src, "hello", OpenOptions.default) + + testActor ! writeCommand + expectMsgPF(5 seconds) { + case response: IoSuccess[_] => response.command.isInstanceOf[IoWriteCommand] shouldBe true + case response: IoFailure[_] => fail("Expected an IoSuccess", response.failure) + } + + src.contentAsString shouldBe "hello" + src.delete() + } + + it should "delete a file" in { + val testActor = TestActorRef(new IoActor(1, None)) + + val src = DefaultPathBuilder.createTempFile() + + val deleteCommand = DefaultIoDeleteCommand(src, swallowIOExceptions = false) + + testActor ! deleteCommand + expectMsgPF(5 seconds) { + case response: IoSuccess[_] => response.command.isInstanceOf[IoDeleteCommand] shouldBe true + case response: IoFailure[_] => fail("Expected an IoSuccess", response.failure) + } + + src.toFile shouldNot exist + } + + it should "read a file" in { + val testActor = TestActorRef(new IoActor(1, None)) + + val src = DefaultPathBuilder.createTempFile() + src.write("hello") + + val readCommand = DefaultIoContentAsStringCommand(src) + + testActor ! readCommand + expectMsgPF(5 seconds) { + case response: IoSuccess[_] => + response.command.isInstanceOf[IoContentAsStringCommand] shouldBe true + response.result.asInstanceOf[String] shouldBe "hello" + case response: IoFailure[_] => fail("Expected an IoSuccess", response.failure) + } + + src.delete() + } + + it should "return a file size" in { + val testActor = TestActorRef(new IoActor(1, None)) + + val src = DefaultPathBuilder.createTempFile() + src.write("hello") + + val sizeCommand = DefaultIoSizeCommand(src) + + testActor ! sizeCommand + expectMsgPF(5 seconds) { + case response: IoSuccess[_] => + response.command.isInstanceOf[IoSizeCommand] shouldBe true + response.result.asInstanceOf[Long] shouldBe 5 + case response: IoFailure[_] => fail("Expected an IoSuccess", response.failure) + } + + src.delete() + } + + it should "return a file md5 hash (local)" in { + val testActor = TestActorRef(new IoActor(1, None)) + + val src = DefaultPathBuilder.createTempFile() + src.write("hello") + + val hashCommand = DefaultIoHashCommand(src) + + testActor ! hashCommand + expectMsgPF(5 seconds) { + case response: IoSuccess[_] => + response.command.isInstanceOf[IoHashCommand] shouldBe true + response.result.asInstanceOf[String] shouldBe "5d41402abc4b2a76b9719d911017c592" + case response: IoFailure[_] => fail("Expected an IoSuccess", response.failure) + } + + src.delete() + } + + it should "touch a file (local)" in { + val testActor = TestActorRef(new IoActor(1, None)) + + val src = DefaultPathBuilder.createTempFile() + src.write("hello") + + val touchCommand = DefaultIoTouchCommand(src) + + testActor ! touchCommand + expectMsgPF(5 seconds) { + case _: IoSuccess[_] => + case response: IoFailure[_] => fail("Expected an IoSuccess", response.failure) + } + + src.delete() + } + + it should "have correct retryable exceptions" in { + val retryables = List( + new StorageException(500, "message"), + new StorageException(502, "message"), + new StorageException(503, "message"), + new StorageException(504, "message"), + new StorageException(408, "message"), + new StorageException(429, "message"), + BatchFailedException(new Exception), + new SocketException(), + new SocketTimeoutException() + ) + + retryables foreach { IoActor.isRetryable(_) shouldBe true } + retryables foreach { IoActor.isFatal(_) shouldBe false } + } +} diff --git a/engine/src/test/scala/cromwell/engine/io/nio/NioFlowSpec.scala b/engine/src/test/scala/cromwell/engine/io/nio/NioFlowSpec.scala new file mode 100644 index 000000000..e88669ef9 --- /dev/null +++ b/engine/src/test/scala/cromwell/engine/io/nio/NioFlowSpec.scala @@ -0,0 +1,216 @@ +package cromwell.engine.io.nio + +import java.nio.file.{FileAlreadyExistsException, NoSuchFileException} +import java.util.UUID + +import akka.actor.ActorRef +import akka.stream.ActorMaterializer +import akka.stream.scaladsl.{Keep, Sink, Source} +import com.google.cloud.storage.StorageException +import cromwell.core.io._ +import cromwell.core.path.DefaultPathBuilder +import cromwell.core.{CromwellFatalException, TestKitSuite} +import cromwell.engine.io.IoActor.DefaultCommandContext +import cromwell.engine.io.IoCommandContext +import org.scalatest.mockito.MockitoSugar +import org.scalatest.{AsyncFlatSpecLike, Matchers} + +import scala.concurrent.Future + +class NioFlowSpec extends TestKitSuite with AsyncFlatSpecLike with Matchers with MockitoSugar with DefaultIoCommandBuilder { + + behavior of "NioFlowSpec" + + val flow = new NioFlow(1, system.scheduler)(system.dispatcher, system).flow + + implicit val materializer = ActorMaterializer() + val replyTo = mock[ActorRef] + val readSink = Sink.head[(IoAck[_], IoCommandContext[_])] + + override def afterAll() = { + materializer.shutdown() + super.afterAll() + } + + it should "write to a Nio Path" in { + val testPath = DefaultPathBuilder.createTempFile() + val context = DefaultCommandContext(writeCommand(testPath, "hello", Seq.empty), replyTo) + val testSource = Source.single(context) + + val stream = testSource.via(flow).toMat(readSink)(Keep.right) + + stream.run() map { _ => + assert(testPath.contentAsString == "hello") + } + } + + it should "read from a Nio Path" in { + val testPath = DefaultPathBuilder.createTempFile() + testPath.write("hello") + + val context = DefaultCommandContext(contentAsStringCommand(testPath), replyTo) + val testSource = Source.single(context) + + val stream = testSource.via(flow).toMat(readSink)(Keep.right) + + stream.run() map { + case (success: IoSuccess[_], _) => assert(success.result.asInstanceOf[String] == "hello") + case _ => fail("read returned an unexpected message") + } + } + + it should "get size from a Nio Path" in { + val testPath = DefaultPathBuilder.createTempFile() + testPath.write("hello") + + val context = DefaultCommandContext(sizeCommand(testPath), replyTo) + val testSource = Source.single(context) + + val stream = testSource.via(flow).toMat(readSink)(Keep.right) + + stream.run() map { + case (success: IoSuccess[_], _) => assert(success.result.asInstanceOf[Long] == 5) + case _ => fail("size returned an unexpected message") + } + } + + it should "get hash from a Nio Path" in { + val testPath = DefaultPathBuilder.createTempFile() + testPath.write("hello") + + val context = DefaultCommandContext(hashCommand(testPath), replyTo) + val testSource = Source.single(context) + + val stream = testSource.via(flow).toMat(readSink)(Keep.right) + + stream.run() map { + case (success: IoSuccess[_], _) => assert(success.result.asInstanceOf[String] == "5d41402abc4b2a76b9719d911017c592") + case _ => fail("hash returned an unexpected message") + } + } + + it should "copy Nio paths" in { + val testPath = DefaultPathBuilder.createTempFile() + val testCopyPath = testPath.sibling(UUID.randomUUID().toString) + + val context = DefaultCommandContext(copyCommand(testPath, testCopyPath, overwrite = false), replyTo) + + val testSource = Source.single(context) + + val stream = testSource.via(flow).toMat(readSink)(Keep.right) + + stream.run() map { + case (_: IoSuccess[_], _) => assert(testCopyPath.exists) + case _ => fail("copy returned an unexpected message") + } + } + + it should "copy Nio paths with overwrite true" in { + val testPath = DefaultPathBuilder.createTempFile() + testPath.write("goodbye") + + val testCopyPath = DefaultPathBuilder.createTempFile() + testCopyPath.write("hello") + + val context = DefaultCommandContext(copyCommand(testPath, testCopyPath, overwrite = true), replyTo) + + val testSource = Source.single(context) + + val stream = testSource.via(flow).toMat(readSink)(Keep.right) + + stream.run() map { + case (_: IoSuccess[_], _) => + assert(testCopyPath.exists) + assert(testCopyPath.contentAsString == "goodbye") + case _ => fail("copy returned an unexpected message") + } + } + + it should "copy Nio paths with overwrite false" in { + val testPath = DefaultPathBuilder.createTempFile() + val testCopyPath = DefaultPathBuilder.createTempFile() + + val context = DefaultCommandContext(copyCommand(testPath, testCopyPath, overwrite = false), replyTo) + + val testSource = Source.single(context) + + val stream = testSource.via(flow).toMat(readSink)(Keep.right) + + stream.run() map { + case (failure: IoFailure[_], _) => + assert(failure.failure.isInstanceOf[CromwellFatalException]) + assert(failure.failure.getCause.isInstanceOf[FileAlreadyExistsException]) + case _ => fail("copy returned an unexpected message") + } + } + + it should "delete a Nio path" in { + val testPath = DefaultPathBuilder.createTempFile() + val context = DefaultCommandContext(deleteCommand(testPath, swallowIoExceptions = false), replyTo) + val testSource = Source.single(context) + + val stream = testSource.via(flow).toMat(readSink)(Keep.right) + + stream.run() map { + case (_: IoSuccess[_], _) => assert(!testPath.exists) + case _ => fail("delete returned an unexpected message") + } + } + + it should "delete a Nio path with swallowIoExceptions true" in { + val testPath = DefaultPathBuilder.build("/this/does/not/exist").get + + val context = DefaultCommandContext(deleteCommand(testPath, swallowIoExceptions = true), replyTo) + + val testSource = Source.single(context) + + val stream = testSource.via(flow).toMat(readSink)(Keep.right) + + stream.run() map { + case (_: IoSuccess[_], _) => assert(!testPath.exists) + case _ => fail("delete returned an unexpected message") + } + } + + it should "delete a Nio path with swallowIoExceptions false" in { + val testPath = DefaultPathBuilder.build("/this/does/not/exist").get + + val context = DefaultCommandContext(deleteCommand(testPath, swallowIoExceptions = false), replyTo) + + val testSource = Source.single(context) + + val stream = testSource.via(flow).toMat(readSink)(Keep.right) + + stream.run() map { + case (failure: IoFailure[_], _) => + assert(failure.failure.isInstanceOf[CromwellFatalException]) + assert(failure.failure.getCause.isInstanceOf[NoSuchFileException]) + case other @ _ => fail(s"delete returned an unexpected message") + } + } + + it should "retry on retryable exceptions" in { + val testPath = DefaultPathBuilder.build("does/not/matter").get + + val context = DefaultCommandContext(contentAsStringCommand(testPath), replyTo) + + val testSource = Source.single(context) + + val customFlow = new NioFlow(1, system.scheduler, 3)(system.dispatcher, system) { + private var tries = 0 + override def handleSingleCommand(ioSingleCommand: IoCommand[_]) = { + tries += 1 + if (tries < 3) Future.failed(new StorageException(500, "message")) + else Future.successful(IoSuccess(ioSingleCommand, "content")) + } + }.flow + + val stream = testSource.via(customFlow).toMat(readSink)(Keep.right) + + stream.run() map { + case (success: IoSuccess[_], _) => assert(success.result.asInstanceOf[String] == "content") + case _ => fail("read returned an unexpected message") + } + } + +} diff --git a/engine/src/test/scala/cromwell/engine/workflow/SingleWorkflowRunnerActorSpec.scala b/engine/src/test/scala/cromwell/engine/workflow/SingleWorkflowRunnerActorSpec.scala index 46e4c00a8..049b228d8 100644 --- a/engine/src/test/scala/cromwell/engine/workflow/SingleWorkflowRunnerActorSpec.scala +++ b/engine/src/test/scala/cromwell/engine/workflow/SingleWorkflowRunnerActorSpec.scala @@ -1,27 +1,30 @@ package cromwell.engine.workflow -import java.nio.file.Path import java.time.OffsetDateTime import akka.actor._ import akka.pattern.ask -import akka.testkit.TestKit -import better.files._ +import akka.stream.ActorMaterializer +import akka.util.Timeout import com.typesafe.config.ConfigFactory -import cromwell.CromwellTestkitSpec._ -import cromwell.core.WorkflowSourceFiles +import cromwell.CromwellTestKitSpec._ +import cromwell._ +import cromwell.core.path.{DefaultPathBuilder, Path} +import cromwell.core.{SimpleIoActor, WorkflowSourceFilesCollection} +import cromwell.database.sql.SqlDatabase +import cromwell.engine.backend.BackendSingletonCollection import cromwell.engine.workflow.SingleWorkflowRunnerActor.RunWorkflow import cromwell.engine.workflow.SingleWorkflowRunnerActorSpec._ +import cromwell.engine.workflow.tokens.JobExecutionTokenDispenserActor import cromwell.engine.workflow.workflowstore.{InMemoryWorkflowStore, WorkflowStoreActor} import cromwell.util.SampleWdl import cromwell.util.SampleWdl.{ExpressionsInInputs, GoodbyeWorld, ThreeStep} -import cromwell.{AlwaysHappyJobStoreActor, CromwellTestkitSpec, EmptyCallCacheReadActor} import org.scalatest.prop.{TableDrivenPropertyChecks, TableFor3} +import org.specs2.mock.Mockito import spray.json._ import scala.concurrent.Await -import scala.concurrent.duration.Duration -import scala.language.postfixOps +import scala.concurrent.duration._ import scala.util._ /** @@ -30,40 +33,53 @@ import scala.util._ * * Currently, as instance of the actor system are created via an instance of CromwellTestkitSpec, and the * SingleWorkflowRunnerActor also tests halting its actor system, each spec is currently in a separate instance of the - * CromwellTestkitSpec. + * CromwellTestKitSpec. */ object SingleWorkflowRunnerActorSpec { - def tempFile() = File.newTemporaryFile("metadata.", ".json") + def tempFile() = DefaultPathBuilder.createTempFile("metadata.", ".json") - def tempDir() = File.newTemporaryDirectory("metadata.dir.") + def tempDir() = DefaultPathBuilder.createTempDirectory("metadata.dir.") implicit class OptionJsValueEnhancer(val jsValue: Option[JsValue]) extends AnyVal { def toOffsetDateTime = OffsetDateTime.parse(jsValue.toStringValue) - def toStringValue = jsValue.get.asInstanceOf[JsString].value + def toStringValue = jsValue.getOrElse(JsString("{}")).asInstanceOf[JsString].value def toFields = jsValue.get.asJsObject.fields } - class TestSingleWorkflowRunnerActor(source: WorkflowSourceFiles, - metadataOutputPath: Option[Path]) - extends SingleWorkflowRunnerActor(source, metadataOutputPath) { - override lazy val serviceRegistryActor = CromwellTestkitSpec.ServiceRegistryActorInstance + class TestSingleWorkflowRunnerActor(source: WorkflowSourceFilesCollection, + metadataOutputPath: Option[Path])(implicit materializer: ActorMaterializer) + extends SingleWorkflowRunnerActor(source, metadataOutputPath, false, false) { + override lazy val serviceRegistryActor = CromwellTestKitSpec.ServiceRegistryActorInstance } } -abstract class SingleWorkflowRunnerActorSpec extends CromwellTestkitSpec { - private val workflowStore = system.actorOf(WorkflowStoreActor.props(new InMemoryWorkflowStore, dummyServiceRegistryActor)) +abstract class SingleWorkflowRunnerActorSpec extends CromwellTestKitWordSpec with Mockito { + private val workflowStore = system.actorOf(WorkflowStoreActor.props(new InMemoryWorkflowStore, dummyServiceRegistryActor, mock[SqlDatabase])) private val jobStore = system.actorOf(AlwaysHappyJobStoreActor.props) + private val ioActor = system.actorOf(SimpleIoActor.props) + private val subWorkflowStore = system.actorOf(AlwaysHappySubWorkflowStoreActor.props) private val callCacheReadActor = system.actorOf(EmptyCallCacheReadActor.props) + private val callCacheWriteActor = system.actorOf(EmptyCallCacheWriteActor.props) + private val dockerHashActor = system.actorOf(EmptyDockerHashActor.props) + private val jobTokenDispenserActor = system.actorOf(JobExecutionTokenDispenserActor.props) def workflowManagerActor(): ActorRef = { - system.actorOf(Props(new WorkflowManagerActor(ConfigFactory.load(), - workflowStore, - dummyServiceRegistryActor, - dummyLogCopyRouter, - jobStore, - callCacheReadActor)), "WorkflowManagerActor") + val params = WorkflowManagerActorParams(ConfigFactory.load(), + workflowStore = workflowStore, + ioActor = ioActor, + serviceRegistryActor = dummyServiceRegistryActor, + workflowLogCopyRouter = dummyLogCopyRouter, + jobStoreActor = jobStore, + subWorkflowStoreActor = subWorkflowStore, + callCacheReadActor = callCacheReadActor, + callCacheWriteActor = callCacheWriteActor, + dockerHashActor = dockerHashActor, + jobTokenDispenserActor = jobTokenDispenserActor, + backendSingletonCollection = BackendSingletonCollection(Map.empty), + serverMode = false) + system.actorOf(Props(new WorkflowManagerActor(params)), "WorkflowManagerActor") } def createRunnerActor(sampleWdl: SampleWdl = ThreeStep, managerActor: => ActorRef = workflowManagerActor(), @@ -74,8 +90,9 @@ abstract class SingleWorkflowRunnerActorSpec extends CromwellTestkitSpec { def singleWorkflowActor(sampleWdl: SampleWdl = ThreeStep, managerActor: => ActorRef = workflowManagerActor(), outputFile: => Option[Path] = None): Unit = { val actorRef = createRunnerActor(sampleWdl, managerActor, outputFile) - val futureResult = actorRef ? RunWorkflow + val futureResult = actorRef.ask(RunWorkflow)(timeout = new Timeout(TimeoutDuration)) Await.ready(futureResult, Duration.Inf) + () } } @@ -87,7 +104,6 @@ class SingleWorkflowRunnerActorNormalSpec extends SingleWorkflowRunnerActorSpec singleWorkflowActor() } } - TestKit.shutdownActorSystem(system, TimeoutDuration) } } } @@ -100,49 +116,49 @@ class SingleWorkflowRunnerActorWithMetadataSpec extends SingleWorkflowRunnerActo super.afterAll() } - private def doTheTest(wdlFile: SampleWdl, expectedCalls: TableFor3[String, Int, Int], workflowInputs: Int, workflowOutputs: Int) = { + private def doTheTest(wdlFile: SampleWdl, expectedCalls: TableFor3[String, Long, Long], workflowInputs: Long, workflowOutputs: Long) = { val testStart = OffsetDateTime.now within(TimeoutDuration) { singleWorkflowActor( sampleWdl = wdlFile, - outputFile = Option(metadataFile.path)) + outputFile = Option(metadataFile)) } - TestKit.shutdownActorSystem(system, TimeoutDuration) - - val metadataFileContent = metadataFile.contentAsString - val metadata = metadataFileContent.parseJson.asJsObject.fields - metadata.get("id") shouldNot be(empty) - metadata.get("status").toStringValue should be("Succeeded") - metadata.get("submission").toOffsetDateTime should be >= testStart - val workflowStart = metadata.get("start").toOffsetDateTime - workflowStart should be >= metadata.get("submission").toOffsetDateTime - val workflowEnd = metadata.get("end").toOffsetDateTime - workflowEnd should be >= metadata.get("start").toOffsetDateTime - metadata.get("inputs").toFields should have size workflowInputs - metadata.get("outputs").toFields should have size workflowOutputs - val calls = metadata.get("calls").toFields - calls should not be empty + eventually { + val metadataFileContent = metadataFile.contentAsString + val metadata = metadataFileContent.parseJson.asJsObject.fields + metadata.get("id") shouldNot be(empty) + metadata.get("status").toStringValue should be("Succeeded") + metadata.get("submission").toOffsetDateTime should be >= testStart + val workflowStart = metadata.get("start").toOffsetDateTime + workflowStart should be >= metadata.get("submission").toOffsetDateTime + val workflowEnd = metadata.get("end").toOffsetDateTime + workflowEnd should be >= metadata.get("start").toOffsetDateTime + metadata.get("inputs").toFields should have size workflowInputs + metadata.get("outputs").toFields should have size workflowOutputs + val calls = metadata.get("calls").toFields + calls should not be empty - forAll(expectedCalls) { (callName, numInputs, numOutputs) => - val callSeq = calls(callName).asInstanceOf[JsArray].elements - callSeq should have size 1 - val call = callSeq.head.asJsObject.fields - val inputs = call.get("inputs").toFields - inputs should have size numInputs - call.get("executionStatus").toStringValue should be("Done") - call.get("backend").toStringValue should be("Local") - call.get("backendStatus") should be(empty) - call.get("outputs").toFields should have size numOutputs - val callStart = call.get("start").toOffsetDateTime - callStart should be >= workflowStart - val callEnd = call.get("end").toOffsetDateTime - callEnd should be >= callStart - callEnd should be <= workflowEnd - call.get("jobId") shouldNot be(empty) - call("returnCode").asInstanceOf[JsNumber].value should be (0) - call.get("stdout") shouldNot be(empty) - call.get("stderr") shouldNot be(empty) - call("attempt").asInstanceOf[JsNumber].value should be (1) + forAll(expectedCalls) { (callName, numInputs, numOutputs) => + val callSeq = calls(callName).asInstanceOf[JsArray].elements + callSeq should have size 1 + val call = callSeq.head.asJsObject.fields + val inputs = call.get("inputs").toFields + inputs should have size numInputs + call.get("executionStatus").toStringValue should be("Done") + call.get("backend").toStringValue should be("Local") + call.get("backendStatus").toStringValue should be("Done") + call.get("outputs").toFields should have size numOutputs + val callStart = call.get("start").toOffsetDateTime + callStart should be >= workflowStart + val callEnd = call.get("end").toOffsetDateTime + callEnd should be >= callStart + callEnd should be <= workflowEnd + call.get("jobId") shouldNot be(empty) + call("returnCode").asInstanceOf[JsNumber].value should be(0) + call.get("stdout") shouldNot be(empty) + call.get("stderr") shouldNot be(empty) + call("attempt").asInstanceOf[JsNumber].value should be(1) + } } } @@ -150,18 +166,18 @@ class SingleWorkflowRunnerActorWithMetadataSpec extends SingleWorkflowRunnerActo "successfully run a workflow outputting metadata" in { val expectedCalls = Table( ("callName", "numInputs", "numOutputs"), - ("three_step.wc", 1, 1), - ("three_step.ps", 0, 1), - ("three_step.cgrep", 2, 1)) + ("three_step.wc", 1L, 1L), + ("three_step.ps", 0L, 1L), + ("three_step.cgrep", 2L, 1L)) - doTheTest(ThreeStep, expectedCalls, 1, 3) + doTheTest(ThreeStep, expectedCalls, 1L, 3L) } "run a workflow outputting metadata with no remaining input expressions" in { val expectedCalls = Table( ("callName", "numInputs", "numOutputs"), - ("wf.echo", 1, 1), - ("wf.echo2", 1, 1)) - doTheTest(ExpressionsInInputs, expectedCalls, 2, 2) + ("wf.echo", 1L, 1L), + ("wf.echo2", 1L, 1L)) + doTheTest(ExpressionsInInputs, expectedCalls, 2L, 2L) } } } @@ -178,9 +194,8 @@ class SingleWorkflowRunnerActorWithMetadataOnFailureSpec extends SingleWorkflowR "fail to run a workflow and still output metadata" in { val testStart = OffsetDateTime.now within(TimeoutDuration) { - singleWorkflowActor(sampleWdl = GoodbyeWorld, outputFile = Option(metadataFile.path)) + singleWorkflowActor(sampleWdl = GoodbyeWorld, outputFile = Option(metadataFile)) } - TestKit.shutdownActorSystem(system, TimeoutDuration) val metadata = metadataFile.contentAsString.parseJson.asJsObject.fields metadata.get("id") shouldNot be(empty) @@ -195,14 +210,14 @@ class SingleWorkflowRunnerActorWithMetadataOnFailureSpec extends SingleWorkflowR val calls = metadata.get("calls").toFields calls should not be empty - val callSeq = calls("goodbye.goodbye").asInstanceOf[JsArray].elements + val callSeq = calls("wf_goodbye.goodbye").asInstanceOf[JsArray].elements callSeq should have size 1 val call = callSeq.head.asJsObject.fields val inputs = call.get("inputs").toFields inputs should have size 0 call.get("executionStatus").toStringValue should be("Failed") call.get("backend").toStringValue should be("Local") - call.get("backendStatus") should be(empty) + call.get("backendStatus").toStringValue should be("Done") call.get("outputs") shouldBe empty val callStart = call.get("start").toOffsetDateTime callStart should be >= workflowStart @@ -230,9 +245,9 @@ class SingleWorkflowRunnerActorWithBadMetadataSpec extends SingleWorkflowRunnerA "A SingleWorkflowRunnerActor" should { "successfully run a workflow requesting a bad metadata path" in { within(TimeoutDuration) { - val runner = createRunnerActor(outputFile = Option(metadataDir.path)) + val runner = createRunnerActor(outputFile = Option(metadataDir)) waitForErrorWithException(s"Specified metadata path is a directory, should be a file: $metadataDir") { - val futureResult = runner ? RunWorkflow + val futureResult = runner.ask(RunWorkflow)(30.seconds, implicitly) Await.ready(futureResult, Duration.Inf) futureResult.value.get match { case Success(_) => @@ -242,7 +257,6 @@ class SingleWorkflowRunnerActorWithBadMetadataSpec extends SingleWorkflowRunnerA } } } - TestKit.shutdownActorSystem(system, TimeoutDuration) } } } @@ -262,7 +276,6 @@ class SingleWorkflowRunnerActorFailureSpec extends SingleWorkflowRunnerActorSpec case Failure(e) => e.getMessage should include("expected error") } } - TestKit.shutdownActorSystem(system, TimeoutDuration) } } } @@ -278,7 +291,6 @@ class SingleWorkflowRunnerActorUnexpectedSpec extends SingleWorkflowRunnerActorS } assert(!system.whenTerminated.isCompleted) } - TestKit.shutdownActorSystem(system, TimeoutDuration) } } } diff --git a/engine/src/test/scala/cromwell/engine/workflow/WorkflowActorSpec.scala b/engine/src/test/scala/cromwell/engine/workflow/WorkflowActorSpec.scala index cc3581234..23134cba1 100644 --- a/engine/src/test/scala/cromwell/engine/workflow/WorkflowActorSpec.scala +++ b/engine/src/test/scala/cromwell/engine/workflow/WorkflowActorSpec.scala @@ -3,22 +3,25 @@ package cromwell.engine.workflow import akka.actor.{Actor, ActorRef} import akka.testkit.{TestActorRef, TestFSMRef, TestProbe} import com.typesafe.config.{Config, ConfigFactory} -import cromwell.backend.AllBackendInitializationData -import cromwell.core.{ExecutionStore, OutputStore, WorkflowId, WorkflowSourceFiles} +import cromwell._ +import cromwell.backend.{AllBackendInitializationData, JobExecutionMap} +import cromwell.core._ +import cromwell.core.path.DefaultPathBuilder import cromwell.engine.EngineWorkflowDescriptor +import cromwell.engine.backend.BackendSingletonCollection import cromwell.engine.workflow.WorkflowActor._ -import cromwell.engine.workflow.lifecycle.EngineLifecycleActorAbortCommand +import cromwell.engine.workflow.lifecycle.MaterializeWorkflowDescriptorActor.MaterializeWorkflowDescriptorFailureResponse import cromwell.engine.workflow.lifecycle.WorkflowFinalizationActor.{StartFinalizationCommand, WorkflowFinalizationSucceededResponse} import cromwell.engine.workflow.lifecycle.WorkflowInitializationActor.{WorkflowInitializationAbortedResponse, WorkflowInitializationFailedResponse} import cromwell.engine.workflow.lifecycle.execution.WorkflowExecutionActor.{WorkflowExecutionAbortedResponse, WorkflowExecutionFailedResponse, WorkflowExecutionSucceededResponse} +import cromwell.engine.workflow.lifecycle.{CopyWorkflowLogsActor, EngineLifecycleActorAbortCommand} import cromwell.util.SampleWdl.ThreeStep -import cromwell.{AlwaysHappyJobStoreActor, CromwellTestkitSpec, EmptyCallCacheReadActor} import org.scalatest.BeforeAndAfter import org.scalatest.concurrent.Eventually import scala.concurrent.duration._ -class WorkflowActorSpec extends CromwellTestkitSpec with WorkflowDescriptorBuilder with BeforeAndAfter with Eventually { +class WorkflowActorSpec extends CromwellTestKitWordSpec with WorkflowDescriptorBuilder with BeforeAndAfter with Eventually { override implicit val actorSystem = system val mockServiceRegistryActor = TestActorRef(new Actor { @@ -27,18 +30,23 @@ class WorkflowActorSpec extends CromwellTestkitSpec with WorkflowDescriptorBuild } }) + val mockDir = DefaultPathBuilder.get("/where/to/copy/wf/logs") + val mockWorkflowOptions = s"""{ "final_workflow_log_dir" : "$mockDir" }""" + var currentWorkflowId: WorkflowId = _ val currentLifecycleActor = TestProbe() - val wdlSources = ThreeStep.asWorkflowSources() - val descriptor = createMaterializedEngineWorkflowDescriptor(WorkflowId.randomId(), workflowSources = wdlSources) + val workflowSources = ThreeStep.asWorkflowSources(workflowOptions = mockWorkflowOptions) + val descriptor = createMaterializedEngineWorkflowDescriptor(WorkflowId.randomId(), workflowSources = workflowSources) val supervisorProbe = TestProbe() val deathwatch = TestProbe() val finalizationProbe = TestProbe() - + var copyWorkflowLogsProbe: TestProbe = _ val AwaitAlmostNothing = 100.milliseconds before { currentWorkflowId = WorkflowId.randomId() + + copyWorkflowLogsProbe = TestProbe() } private def createWorkflowActor(state: WorkflowActorState) = { @@ -47,12 +55,17 @@ class WorkflowActorSpec extends CromwellTestkitSpec with WorkflowDescriptorBuild finalizationProbe = finalizationProbe, workflowId = currentWorkflowId, startMode = StartNewWorkflow, - workflowSources = wdlSources, + workflowSources = workflowSources, conf = ConfigFactory.load, + ioActor = system.actorOf(SimpleIoActor.props), serviceRegistryActor = mockServiceRegistryActor, - workflowLogCopyRouter = TestProbe().ref, + workflowLogCopyRouter = copyWorkflowLogsProbe.ref, jobStoreActor = system.actorOf(AlwaysHappyJobStoreActor.props), - callCacheReadActor = system.actorOf(EmptyCallCacheReadActor.props) + subWorkflowStoreActor = system.actorOf(AlwaysHappySubWorkflowStoreActor.props), + callCacheReadActor = system.actorOf(EmptyCallCacheReadActor.props), + callCacheWriteActor = system.actorOf(EmptyCallCacheWriteActor.props), + dockerHashActor = system.actorOf(EmptyDockerHashActor.props), + jobTokenDispenserActor = TestProbe().ref ), supervisor = supervisorProbe.ref) actor.setState(stateName = state, stateData = WorkflowActorData(Option(currentLifecycleActor.ref), Option(descriptor), @@ -60,12 +73,11 @@ class WorkflowActorSpec extends CromwellTestkitSpec with WorkflowDescriptorBuild actor } - implicit val TimeoutDuration = CromwellTestkitSpec.TimeoutDuration + implicit val TimeoutDuration = CromwellTestKitSpec.TimeoutDuration "WorkflowActor" should { "run Finalization actor if Initialization fails" in { - val workflowId = WorkflowId.randomId() val actor = createWorkflowActor(InitializingWorkflowState) deathwatch watch actor actor ! WorkflowInitializationFailedResponse(Seq(new Exception("Materialization Failed"))) @@ -94,7 +106,7 @@ class WorkflowActorSpec extends CromwellTestkitSpec with WorkflowDescriptorBuild "run Finalization if Execution fails" in { val actor = createWorkflowActor(ExecutingWorkflowState) deathwatch watch actor - actor ! WorkflowExecutionFailedResponse(ExecutionStore.empty, OutputStore.empty, Seq(new Exception("Execution Failed"))) + actor ! WorkflowExecutionFailedResponse(Map.empty, new Exception("Execution Failed")) finalizationProbe.expectMsg(StartFinalizationCommand) actor.stateName should be(FinalizingWorkflowState) actor ! WorkflowFinalizationSucceededResponse @@ -107,9 +119,9 @@ class WorkflowActorSpec extends CromwellTestkitSpec with WorkflowDescriptorBuild deathwatch watch actor actor ! AbortWorkflowCommand eventually { actor.stateName should be(WorkflowAbortingState) } - currentLifecycleActor.expectMsgPF(CromwellTestkitSpec.TimeoutDuration) { + currentLifecycleActor.expectMsgPF(CromwellTestKitSpec.TimeoutDuration) { case EngineLifecycleActorAbortCommand => - actor ! WorkflowExecutionAbortedResponse(ExecutionStore.empty, OutputStore.empty) + actor ! WorkflowExecutionAbortedResponse(Map.empty) } finalizationProbe.expectMsg(StartFinalizationCommand) actor.stateName should be(FinalizingWorkflowState) @@ -121,7 +133,7 @@ class WorkflowActorSpec extends CromwellTestkitSpec with WorkflowDescriptorBuild "run Finalization actor if Execution succeeds" in { val actor = createWorkflowActor(ExecutingWorkflowState) deathwatch watch actor - actor ! WorkflowExecutionSucceededResponse(ExecutionStore.empty, OutputStore.empty) + actor ! WorkflowExecutionSucceededResponse(Map.empty, Map.empty) finalizationProbe.expectMsg(StartFinalizationCommand) actor.stateName should be(FinalizingWorkflowState) actor ! WorkflowFinalizationSucceededResponse @@ -144,18 +156,35 @@ class WorkflowActorSpec extends CromwellTestkitSpec with WorkflowDescriptorBuild finalizationProbe.expectNoMsg(AwaitAlmostNothing) deathwatch.expectTerminated(actor) } + + "copy workflow logs in the event of MaterializeWorkflowDescriptorFailureResponse" in { + val actor = createWorkflowActor(MaterializingWorkflowDescriptorState) + deathwatch watch actor + + copyWorkflowLogsProbe.expectNoMsg(AwaitAlmostNothing) + actor ! MaterializeWorkflowDescriptorFailureResponse(new Exception("Intentionally failing workflow materialization to test log copying")) + copyWorkflowLogsProbe.expectMsg(CopyWorkflowLogsActor.Copy(currentWorkflowId, mockDir)) + + finalizationProbe.expectNoMsg(AwaitAlmostNothing) + deathwatch.expectTerminated(actor) + } } } class MockWorkflowActor(val finalizationProbe: TestProbe, workflowId: WorkflowId, startMode: StartMode, - workflowSources: WorkflowSourceFiles, + workflowSources: WorkflowSourceFilesCollection, conf: Config, + ioActor: ActorRef, serviceRegistryActor: ActorRef, workflowLogCopyRouter: ActorRef, jobStoreActor: ActorRef, - callCacheReadActor: ActorRef) extends WorkflowActor(workflowId, startMode, workflowSources, conf, serviceRegistryActor, workflowLogCopyRouter, jobStoreActor, callCacheReadActor) { + subWorkflowStoreActor: ActorRef, + callCacheReadActor: ActorRef, + callCacheWriteActor: ActorRef, + dockerHashActor: ActorRef, + jobTokenDispenserActor: ActorRef) extends WorkflowActor(workflowId, startMode, workflowSources, conf, ioActor, serviceRegistryActor, workflowLogCopyRouter, jobStoreActor, subWorkflowStoreActor, callCacheReadActor, callCacheWriteActor, dockerHashActor, jobTokenDispenserActor, BackendSingletonCollection(Map.empty), serverMode = true) { - override def makeFinalizationActor(workflowDescriptor: EngineWorkflowDescriptor, executionStore: ExecutionStore, outputStore: OutputStore) = finalizationProbe.ref + override def makeFinalizationActor(workflowDescriptor: EngineWorkflowDescriptor, jobExecutionMap: JobExecutionMap, worfklowOutputs: CallOutputs) = finalizationProbe.ref } diff --git a/engine/src/test/scala/cromwell/engine/workflow/WorkflowDescriptorBuilder.scala b/engine/src/test/scala/cromwell/engine/workflow/WorkflowDescriptorBuilder.scala index 73013e712..e4f5e1350 100644 --- a/engine/src/test/scala/cromwell/engine/workflow/WorkflowDescriptorBuilder.scala +++ b/engine/src/test/scala/cromwell/engine/workflow/WorkflowDescriptorBuilder.scala @@ -2,8 +2,8 @@ package cromwell.engine.workflow import akka.actor.{ActorSystem, Props} import com.typesafe.config.ConfigFactory -import cromwell.CromwellTestkitSpec -import cromwell.core.{WorkflowId, WorkflowSourceFiles} +import cromwell.CromwellTestKitSpec +import cromwell.core.{WorkflowId, WorkflowSourceFilesCollection} import cromwell.engine.EngineWorkflowDescriptor import cromwell.engine.workflow.lifecycle.MaterializeWorkflowDescriptorActor import cromwell.engine.workflow.lifecycle.MaterializeWorkflowDescriptorActor.{MaterializeWorkflowDescriptorCommand, MaterializeWorkflowDescriptorFailureResponse, MaterializeWorkflowDescriptorSuccessResponse, WorkflowDescriptorMaterializationResult} @@ -12,16 +12,16 @@ import scala.concurrent.Await trait WorkflowDescriptorBuilder { - implicit val awaitTimeout = CromwellTestkitSpec.TimeoutDuration + implicit val awaitTimeout = CromwellTestKitSpec.TimeoutDuration implicit val actorSystem: ActorSystem - def createMaterializedEngineWorkflowDescriptor(id: WorkflowId, workflowSources: WorkflowSourceFiles): EngineWorkflowDescriptor = { + def createMaterializedEngineWorkflowDescriptor(id: WorkflowId, workflowSources: WorkflowSourceFilesCollection): EngineWorkflowDescriptor = { import akka.pattern.ask implicit val timeout = akka.util.Timeout(awaitTimeout) implicit val ec = actorSystem.dispatcher val serviceRegistryIgnorer = actorSystem.actorOf(Props.empty) - val actor = actorSystem.actorOf(MaterializeWorkflowDescriptorActor.props(serviceRegistryIgnorer, id), "MaterializeWorkflowDescriptorActor-" + id.id) + val actor = actorSystem.actorOf(MaterializeWorkflowDescriptorActor.props(serviceRegistryIgnorer, id, importLocalFilesystem = false), "MaterializeWorkflowDescriptorActor-" + id.id) val workflowDescriptorFuture = actor.ask( MaterializeWorkflowDescriptorCommand(workflowSources, ConfigFactory.load) ).mapTo[WorkflowDescriptorMaterializationResult] diff --git a/engine/src/test/scala/cromwell/engine/workflow/WorkflowDockerLookupActorSpec.scala b/engine/src/test/scala/cromwell/engine/workflow/WorkflowDockerLookupActorSpec.scala new file mode 100644 index 000000000..d2255dcce --- /dev/null +++ b/engine/src/test/scala/cromwell/engine/workflow/WorkflowDockerLookupActorSpec.scala @@ -0,0 +1,274 @@ +package cromwell.engine.workflow + +import akka.actor.{ActorRef, Props} +import akka.testkit.{ImplicitSender, TestActorRef, TestProbe} +import com.typesafe.config.ConfigFactory +import cromwell.core.actor.StreamIntegration.BackPressure +import cromwell.core.{TestKitSuite, WorkflowId} +import cromwell.database.slick.SlickDatabase +import cromwell.database.sql.tables.DockerHashStoreEntry +import cromwell.docker.DockerHashActor.{DockerHashFailedResponse, DockerHashSuccessResponse} +import cromwell.docker.{DockerHashRequest, DockerHashResult, DockerImageIdentifier, DockerImageIdentifierWithoutHash} +import cromwell.engine.workflow.WorkflowActor.{RestartExistingWorkflow, StartMode, StartNewWorkflow} +import cromwell.engine.workflow.WorkflowDockerLookupActor.{DockerHashActorTimeout, WorkflowDockerLookupFailure, WorkflowDockerTerminalFailure} +import cromwell.engine.workflow.WorkflowDockerLookupActorSpec._ +import cromwell.services.ServicesStore._ +import cromwell.services.SingletonServicesStore +import org.scalatest.{BeforeAndAfter, FlatSpecLike, Matchers} +import org.specs2.mock.Mockito + +import scala.concurrent.duration._ +import scala.concurrent.{ExecutionContext, Future} +import scala.language.postfixOps + + +class WorkflowDockerLookupActorSpec extends TestKitSuite("WorkflowDockerLookupActorSpecSystem") with FlatSpecLike with Matchers with ImplicitSender with BeforeAndAfter with Mockito { + var workflowId: WorkflowId = _ + var dockerHashingActor: TestProbe = _ + var numReads: Int = _ + var numWrites: Int = _ + + before { + workflowId = WorkflowId.randomId() + dockerHashingActor = TestProbe() + numReads = 0 + numWrites = 0 + } + + it should "wait and resubmit the docker request when it gets a backpressure message" in { + val backpressureWaitTime = 2 seconds + + val lookupActor = TestActorRef(Props(new TestWorkflowDockerLookupActor(workflowId, dockerHashingActor.ref, StartNewWorkflow, backpressureWaitTime)), self) + lookupActor ! LatestRequest + + dockerHashingActor.expectMsg(LatestRequest) + dockerHashingActor.reply(BackPressure(LatestRequest)) + // Give a couple of seconds of margin to account for test latency etc... + dockerHashingActor.expectMsg(backpressureWaitTime.+(2 seconds), LatestRequest) + } + + it should "not look up the same tag again after a successful lookup" in { + val db = dbWithWrite { + numWrites = numWrites + 1 + Future.successful(()) + } + + val lookupActor = TestActorRef(WorkflowDockerLookupActor.props(workflowId, dockerHashingActor.ref, StartNewWorkflow, db)) + lookupActor ! LatestRequest + + // The WorkflowDockerLookupActor should not have the hash for this tag yet and will need to query the dockerHashingActor. + dockerHashingActor.expectMsg(LatestRequest) + dockerHashingActor.reply(LatestSuccessResponse) + // The WorkflowDockerLookupActor should forward the success message to this actor. + expectMsg(LatestSuccessResponse) + numWrites should equal(1) + + // Now the WorkflowDockerLookupActor should now have this hash in its mappings and should not query the dockerHashingActor again. + lookupActor ! LatestRequest + dockerHashingActor.expectNoMsg() + // The WorkflowDockerLookupActor should forward the success message to this actor. + expectMsg(LatestSuccessResponse) + numWrites should equal(1) + } + + it should "soldier on after docker hashing actor timeouts" in { + val lookupActor = TestActorRef(WorkflowDockerLookupActor.props(workflowId, dockerHashingActor.ref, StartNewWorkflow)) + + lookupActor ! LatestRequest + lookupActor ! OlderRequest + + val timeout = DockerHashActorTimeout(LatestRequest) + + // The WorkflowDockerLookupActor should not have the hash for this tag yet and will need to query the dockerHashingActor. + dockerHashingActor.expectMsg(LatestRequest) + dockerHashingActor.expectMsg(OlderRequest) + dockerHashingActor.reply(timeout) + // Send a response for the older request after sending the timeout. This should cause a mapping to be entered in the + // WorkflowDockerLookupActor for the older request, which will keep the WorkflowDockerLookupActor from querying the + // DockerHashActor for this hash again. + dockerHashingActor.reply(OlderSuccessResponse) + + val results = receiveN(2, 2 seconds).toSet + val failedRequests = results collect { + case f: WorkflowDockerLookupFailure if f.request == LatestRequest => f.request + } + + failedRequests should equal(Set(LatestRequest)) + + // Try again. The hashing actor should receive the latest message and this time won't time out. + lookupActor ! LatestRequest + lookupActor ! OlderRequest + dockerHashingActor.expectMsg(LatestRequest) + dockerHashingActor.reply(LatestSuccessResponse) + + val responses = receiveN(2, 2 seconds).toSet + val hashResponses = responses collect { case msg: DockerHashSuccessResponse => msg } + // Success after transient timeout failures: + hashResponses should equal(Set(LatestSuccessResponse, OlderSuccessResponse)) + } + + it should "respond appropriately to docker hash lookup failures" in { + val lookupActor = TestActorRef(WorkflowDockerLookupActor.props(workflowId, dockerHashingActor.ref, StartNewWorkflow)) + lookupActor ! LatestRequest + lookupActor ! OlderRequest + + // The WorkflowDockerLookupActor should not have the hash for this tag yet and will need to query the dockerHashingActor. + dockerHashingActor.expectMsg(LatestRequest) + dockerHashingActor.expectMsg(OlderRequest) + val olderFailedResponse = DockerHashFailedResponse(new RuntimeException("Lookup failed"), OlderRequest) + + dockerHashingActor.reply(LatestSuccessResponse) + dockerHashingActor.reply(olderFailedResponse) + + val results = receiveN(2, 2 seconds).toSet + val mixedResponses = results collect { + case msg: DockerHashSuccessResponse => msg + // Scoop out the request here since we can't match the exception on the whole message. + case msg: WorkflowDockerLookupFailure if msg.reason.getMessage == "Failed to get docker hash for ubuntu:older Lookup failed" => msg.request + } + + Set(LatestSuccessResponse, OlderRequest) should equal(mixedResponses) + + // Try again, I have a good feeling about this. + lookupActor ! OlderRequest + dockerHashingActor.expectMsg(OlderRequest) + dockerHashingActor.reply(OlderSuccessResponse) + expectMsg(OlderSuccessResponse) + } + + it should "reuse previously looked up hashes following a restart" in { + val db = dbWithQuery { + Future.successful( + Seq(LatestStoreEntry(workflowId), OlderStoreEntry(workflowId))) + } + + val lookupActor = TestActorRef(WorkflowDockerLookupActor.props(workflowId, dockerHashingActor.ref, RestartExistingWorkflow, db)) + + lookupActor ! LatestRequest + lookupActor ! OlderRequest + + dockerHashingActor.expectNoMsg() + + val results = receiveN(2, 2 seconds).toSet + val successes = results collect { case result: DockerHashSuccessResponse => result } + + successes should equal(Set(LatestSuccessResponse, OlderSuccessResponse)) + } + + it should "not try to look up hashes if not restarting" in { + val db = dbWithWrite(Future.successful(())) + val lookupActor = TestActorRef(WorkflowDockerLookupActor.props(workflowId, dockerHashingActor.ref, StartNewWorkflow, db)) + + lookupActor ! LatestRequest + lookupActor ! OlderRequest + + dockerHashingActor.expectMsg(LatestRequest) + dockerHashingActor.expectMsg(OlderRequest) + dockerHashingActor.reply(LatestSuccessResponse) + dockerHashingActor.reply(OlderSuccessResponse) + + val results = receiveN(2, 2 seconds).toSet + val successes = results collect { case result: DockerHashSuccessResponse => result } + + successes should equal(Set(LatestSuccessResponse, OlderSuccessResponse)) + } + + it should "handle hash write errors appropriately" in { + val db = dbWithWrite { + numWrites = numWrites + 1 + if (numWrites == 1) Future.failed(new RuntimeException("Fake exception from a test.")) else Future.successful(()) + } + + val lookupActor = TestActorRef(WorkflowDockerLookupActor.props(workflowId, dockerHashingActor.ref, StartNewWorkflow, db)) + lookupActor ! LatestRequest + + // The WorkflowDockerLookupActor should not have the hash for this tag yet and will need to query the dockerHashingActor. + dockerHashingActor.expectMsg(LatestRequest) + dockerHashingActor.reply(LatestSuccessResponse) + // The WorkflowDockerLookupActor is going to fail when it tries to write to that broken DB. + expectMsgClass(classOf[WorkflowDockerLookupFailure]) + numWrites should equal(1) + + lookupActor ! LatestRequest + // The WorkflowDockerLookupActor will query the dockerHashingActor again. + dockerHashingActor.expectMsg(LatestRequest) + dockerHashingActor.reply(LatestSuccessResponse) + // The WorkflowDockerLookupActor should forward the success message to this actor. + expectMsg(LatestSuccessResponse) + numWrites should equal(2) + } + + it should "emit a terminal failure message if failing to read hashes on restart" in { + val db = dbWithQuery { + numReads = numReads + 1 + Future.failed(new Exception("Don't worry this is just a dummy failure in a test")) + } + + val lookupActor = TestActorRef(WorkflowDockerLookupActor.props(workflowId, dockerHashingActor.ref, RestartExistingWorkflow, db)) + lookupActor ! LatestRequest + + dockerHashingActor.expectNoMsg() + expectMsgClass(classOf[WorkflowDockerTerminalFailure]) + numReads should equal(1) + } + + it should "emit a terminal failure message if unable to parse hashes read from the database on restart" in { + val db = dbWithQuery { + numReads = numReads + 1 + Future.successful(Seq( + DockerHashStoreEntry(workflowId.toString, Latest, "md5:AAAAA"), + // missing the "algorithm:" preceding the hash value so this should fail parsing. + DockerHashStoreEntry(workflowId.toString, Older, "BBBBB") + )) + } + + val lookupActor = TestActorRef(WorkflowDockerLookupActor.props(workflowId, dockerHashingActor.ref, RestartExistingWorkflow, db)) + lookupActor ! LatestRequest + + dockerHashingActor.expectNoMsg() + expectMsgClass(classOf[WorkflowDockerTerminalFailure]) + numReads should equal(1) + } + + def dbWithWrite(writeFn: => Future[Unit]): SlickDatabase = { + databaseInterface(write = _ => writeFn) + } + + def dbWithQuery(queryFn: => Future[Seq[DockerHashStoreEntry]]): SlickDatabase = { + databaseInterface(query = _ => queryFn) + } + + def databaseInterface(query: String => Future[Seq[DockerHashStoreEntry]] = abjectFailure, + write: DockerHashStoreEntry => Future[Unit] = abjectFailure): SlickDatabase = { + new SlickDatabase(DatabaseConfig) { + override def queryDockerHashStoreEntries(workflowExecutionUuid: String)(implicit ec: ExecutionContext): Future[Seq[DockerHashStoreEntry]] = query(workflowExecutionUuid) + + override def addDockerHashStoreEntry(dockerHashStoreEntry: DockerHashStoreEntry)(implicit ec: ExecutionContext): Future[Unit] = write(dockerHashStoreEntry) + }.initialized + } +} + + +object WorkflowDockerLookupActorSpec { + val Latest = "ubuntu:latest" + val Older = "ubuntu:older" + + val LatestImageId = DockerImageIdentifier.fromString(Latest).get.asInstanceOf[DockerImageIdentifierWithoutHash] + val OlderImageId = DockerImageIdentifier.fromString(Older).get.asInstanceOf[DockerImageIdentifierWithoutHash] + + val LatestRequest = DockerHashRequest(LatestImageId) + val OlderRequest = DockerHashRequest(OlderImageId) + + def LatestStoreEntry(workflowId: WorkflowId): DockerHashStoreEntry = DockerHashStoreEntry(workflowId.toString, Latest, "md5:AAAAAAAA") + def OlderStoreEntry(workflowId: WorkflowId): DockerHashStoreEntry = DockerHashStoreEntry(workflowId.toString, Older, "md5:BBBBBBBB") + + val LatestSuccessResponse = DockerHashSuccessResponse(DockerHashResult("md5", "AAAAAAAA"), LatestRequest) + val OlderSuccessResponse = DockerHashSuccessResponse(DockerHashResult("md5", "BBBBBBBB"), OlderRequest) + + val DatabaseConfig = ConfigFactory.load.getConfig("database") + + def abjectFailure[A, B]: A => Future[B] = _ => Future.failed(new RuntimeException("Should not be called!")) + + class TestWorkflowDockerLookupActor(workflowId: WorkflowId, dockerHashingActor: ActorRef, startMode: StartMode, override val backpressureTimeout: FiniteDuration) + extends WorkflowDockerLookupActor(workflowId, dockerHashingActor, startMode, SingletonServicesStore.databaseInterface) +} diff --git a/engine/src/test/scala/cromwell/engine/workflow/lifecycle/CachingConfigSpec.scala b/engine/src/test/scala/cromwell/engine/workflow/lifecycle/CachingConfigSpec.scala index 50c0218e7..499e6f0c2 100644 --- a/engine/src/test/scala/cromwell/engine/workflow/lifecycle/CachingConfigSpec.scala +++ b/engine/src/test/scala/cromwell/engine/workflow/lifecycle/CachingConfigSpec.scala @@ -1,12 +1,12 @@ package cromwell.engine.workflow.lifecycle +import cats.data.Validated.{Invalid, Valid} import com.typesafe.config.{Config, ConfigFactory} import cromwell.core.WorkflowOptions import cromwell.core.callcaching.CallCachingMode -import org.scalatest.{FlatSpec, Matchers} +import org.scalatest.{Assertion, FlatSpec, Matchers} import scala.collection.JavaConverters._ -import scalaz.{Failure => ScalazFailure, Success => ScalazSuccess} import scala.util.{Success, Try} class CachingConfigSpec extends FlatSpec with Matchers { @@ -51,20 +51,20 @@ class CachingConfigSpec extends FlatSpec with Matchers { val writeCacheOffCombinations = allCombinations -- writeCacheOnCombinations val readCacheOffCombinations = allCombinations -- readCacheOnCombinations - validateCallCachingMode("write cache on options", writeCacheOnCombinations) { mode => mode.writeToCache should be(true) } - validateCallCachingMode("read cache on options", readCacheOnCombinations) { mode => mode.readFromCache should be(true) } - validateCallCachingMode("write cache off options", writeCacheOffCombinations) { mode => mode.writeToCache should be(false) } - validateCallCachingMode("read cache off options", readCacheOffCombinations) { mode => mode.readFromCache should be(false) } + validateCallCachingMode("write cache on options", writeCacheOnCombinations) { _.writeToCache should be(true) } + validateCallCachingMode("read cache on options", readCacheOnCombinations) { _.readFromCache should be(true) } + validateCallCachingMode("write cache off options", writeCacheOffCombinations) { _.writeToCache should be(false) } + validateCallCachingMode("read cache off options", readCacheOffCombinations) { _.readFromCache should be(false) } - private def validateCallCachingMode(testName: String, combinations: Set[(Config, Try[WorkflowOptions])])(verificationFunction: CallCachingMode => Unit) = { + private def validateCallCachingMode(testName: String, combinations: Set[(Config, Try[WorkflowOptions])])(verificationFunction: CallCachingMode => Assertion) = { it should s"correctly identify $testName" in { combinations foreach { case (config, Success(wfOptions)) => MaterializeWorkflowDescriptorActor.validateCallCachingMode(wfOptions, config) match { - case ScalazSuccess(activity) => verificationFunction(activity) - case ScalazFailure(errors) => - val errorsList = errors.list.toList.mkString(", ") + case Valid(activity) => verificationFunction(activity) + case Invalid(errors) => + val errorsList = errors.toList.mkString(", ") fail(s"Failure generating Call Config Mode: $errorsList") } case x => fail(s"Unexpected test tuple: $x") diff --git a/engine/src/test/scala/cromwell/engine/workflow/lifecycle/MaterializeWorkflowDescriptorActorSpec.scala b/engine/src/test/scala/cromwell/engine/workflow/lifecycle/MaterializeWorkflowDescriptorActorSpec.scala index 9414647bd..9cc7cff8d 100644 --- a/engine/src/test/scala/cromwell/engine/workflow/lifecycle/MaterializeWorkflowDescriptorActorSpec.scala +++ b/engine/src/test/scala/cromwell/engine/workflow/lifecycle/MaterializeWorkflowDescriptorActorSpec.scala @@ -3,8 +3,9 @@ package cromwell.engine.workflow.lifecycle import akka.actor.Props import akka.testkit.TestDuration import com.typesafe.config.ConfigFactory -import cromwell.CromwellTestkitSpec -import cromwell.core.{WorkflowId, WorkflowOptions, WorkflowSourceFiles} +import cromwell.CromwellTestKitWordSpec +import cromwell.core.labels.{Label, Labels} +import cromwell.core.{WorkflowId, WorkflowOptions, WorkflowSourceFilesWithoutImports} import cromwell.engine.backend.{BackendConfigurationEntry, CromwellBackends} import cromwell.engine.workflow.lifecycle.MaterializeWorkflowDescriptorActor.{MaterializeWorkflowDescriptorCommand, MaterializeWorkflowDescriptorFailureResponse, MaterializeWorkflowDescriptorSuccessResponse} import cromwell.util.SampleWdl.HelloWorld @@ -12,12 +13,11 @@ import org.scalatest.BeforeAndAfter import org.scalatest.mockito.MockitoSugar import spray.json.DefaultJsonProtocol._ import spray.json._ -import wdl4s.values.{WdlInteger, WdlString} +import wdl4s.wdl.values.WdlString import scala.concurrent.duration._ -import scala.language.postfixOps -class MaterializeWorkflowDescriptorActorSpec extends CromwellTestkitSpec with BeforeAndAfter with MockitoSugar { +class MaterializeWorkflowDescriptorActorSpec extends CromwellTestKitWordSpec with BeforeAndAfter with MockitoSugar { val workflowId = WorkflowId.randomId() val minimumConf = ConfigFactory.parseString( @@ -39,42 +39,51 @@ class MaterializeWorkflowDescriptorActorSpec extends CromwellTestkitSpec with Be """.stripMargin) val unstructuredFile = "fubar badness!" val validOptionsFile =""" { "write_to_cache": "true" } """ + val validCustomLabelsFile="""{ "label1": "value1", "label2": "value2" }""" + val badCustomLabelsFile="""{ "Label1": "valu£1", "--label2": "valuevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevalue" }""" val validInputsJson = HelloWorld.rawInputs.toJson.toString() - val wdlSourceWithDocker = HelloWorld.wdlSource(""" runtime { docker: "ubuntu:latest" } """) - val wdlSourceNoDocker = HelloWorld.wdlSource(""" runtime { } """) + val workflowSourceWithDocker = HelloWorld.workflowSource(""" runtime { docker: "ubuntu:latest" } """) + val workflowSourceNoDocker = HelloWorld.workflowSource(""" runtime { } """) val Timeout = 10.second.dilated - val NoBehaviourActor = system.actorOf(Props.empty) + val NoBehaviorActor = system.actorOf(Props.empty) before { } after { - system.stop(NoBehaviourActor) + system.stop(NoBehaviorActor) } "MaterializeWorkflowDescriptorActor" should { "accept valid WDL, inputs and options files" in { - val materializeWfActor = system.actorOf(MaterializeWorkflowDescriptorActor.props(NoBehaviourActor, workflowId)) - val sources = WorkflowSourceFiles(wdlSourceNoDocker, validInputsJson, validOptionsFile) + val materializeWfActor = system.actorOf(MaterializeWorkflowDescriptorActor.props(NoBehaviorActor, workflowId, importLocalFilesystem = false)) + val sources = WorkflowSourceFilesWithoutImports( + workflowSource = workflowSourceNoDocker, + workflowType = Option("WDL"), + workflowTypeVersion = None, + inputsJson = validInputsJson, + workflowOptionsJson = validOptionsFile, + labelsJson = validCustomLabelsFile) materializeWfActor ! MaterializeWorkflowDescriptorCommand(sources, minimumConf) within(Timeout) { expectMsgPF() { case MaterializeWorkflowDescriptorSuccessResponse(wfDesc) => wfDesc.id shouldBe workflowId - wfDesc.name shouldBe "hello" + wfDesc.name shouldBe "wf_hello" wfDesc.namespace.tasks.size shouldBe 1 - wfDesc.workflowInputs.head shouldBe ("hello.hello.addressee", WdlString("world")) - wfDesc.backendDescriptor.inputs.head shouldBe ("hello.hello.addressee", WdlString("world")) - wfDesc.getWorkflowOption(WorkflowOptions.WriteToCache) shouldBe Some("true") + wfDesc.knownValues.head shouldBe (("wf_hello.hello.addressee", WdlString("world"))) + wfDesc.backendDescriptor.knownValues.head shouldBe (("wf_hello.hello.addressee", WdlString("world"))) + wfDesc.getWorkflowOption(WorkflowOptions.WriteToCache) shouldBe Option("true") wfDesc.getWorkflowOption(WorkflowOptions.ReadFromCache) shouldBe None + wfDesc.backendDescriptor.customLabels shouldBe Labels("label1" -> "value1", "label2" -> "value2") // Default backend assignment is "Local": wfDesc.backendAssignments foreach { case (call, assignment) if call.task.name.equals("hello") => assignment shouldBe "Local" - case (call, assignment) => fail(s"Unexpected call: ${call.task.name}") + case (call, _) => fail(s"Unexpected call: ${call.task.name}") } - wfDesc.engineFilesystems.size shouldBe 1 + wfDesc.pathBuilders.size shouldBe 1 case MaterializeWorkflowDescriptorFailureResponse(reason) => fail(s"Materialization failed with $reason") case unknown => fail(s"Unexpected materialization response: $unknown") @@ -84,50 +93,6 @@ class MaterializeWorkflowDescriptorActorSpec extends CromwellTestkitSpec with Be system.stop(materializeWfActor) } - // Note to whoever comes next: I don't really know why this distinction exists. I've added this test but would - // not be at all upset if the whole thing gets removed. - "differently construct engine workflow inputs and backend inputs" in { - val wdl = - """ - |task bar { command { echo foobar } } - |workflow foo { - | Int i - | Int j = 5 - |} - """.stripMargin - val inputs = - """ - |{ "foo.i": "17" } - """.stripMargin - - val materializeWfActor = system.actorOf(MaterializeWorkflowDescriptorActor.props(NoBehaviourActor, workflowId)) - val sources = WorkflowSourceFiles(wdl, inputs, validOptionsFile) - materializeWfActor ! MaterializeWorkflowDescriptorCommand(sources, minimumConf) - - within(Timeout) { - expectMsgPF() { - case MaterializeWorkflowDescriptorSuccessResponse(wfDesc) => - - - wfDesc.workflowInputs foreach { - case ("foo.i", wdlValue) => wdlValue shouldBe WdlInteger(17) - case ("foo.j", wdlValue) => fail("Workflow declarations should not appear as workflow inputs") - case (x, y) => fail(s"Unexpected input $x -> $y") - } - - wfDesc.backendDescriptor.inputs foreach { - case ("foo.i", wdlValue) => wdlValue shouldBe WdlInteger(17) - case ("foo.j", wdlValue) => wdlValue shouldBe WdlInteger(5) - case (x, y) => fail(s"Unexpected input $x -> $y") - } - case MaterializeWorkflowDescriptorFailureResponse(reason) => fail(s"Unexpected materialization failure: $reason") - case unknown => fail(s"Unexpected materialization response: $unknown") - } - } - - system.stop(materializeWfActor) - } - "assign default runtime attributes" ignore { val wdl = """ @@ -150,8 +115,14 @@ class MaterializeWorkflowDescriptorActorSpec extends CromwellTestkitSpec with Be | } |} """.stripMargin - val materializeWfActor = system.actorOf(MaterializeWorkflowDescriptorActor.props(NoBehaviourActor, workflowId)) - val sources = WorkflowSourceFiles(wdl, "{}", defaultDocker) + val materializeWfActor = system.actorOf(MaterializeWorkflowDescriptorActor.props(NoBehaviorActor, workflowId, importLocalFilesystem = false)) + val sources = WorkflowSourceFilesWithoutImports( + workflowSource = wdl, + workflowType = Option("WDL"), + workflowTypeVersion = None, + inputsJson = "{}", + workflowOptionsJson = defaultDocker, + labelsJson = validCustomLabelsFile) materializeWfActor ! MaterializeWorkflowDescriptorCommand(sources, minimumConf) within(Timeout) { @@ -197,14 +168,20 @@ class MaterializeWorkflowDescriptorActorSpec extends CromwellTestkitSpec with Be val cromwellBackends = CromwellBackends(fauxBackendEntries) // Run the test: - val materializeWfActor = system.actorOf(MaterializeWorkflowDescriptorActor.props(NoBehaviourActor, workflowId, cromwellBackends)) - val sources = WorkflowSourceFiles(wdl, "{}", "{}") + val materializeWfActor = system.actorOf(MaterializeWorkflowDescriptorActor.props(NoBehaviorActor, workflowId, cromwellBackends, importLocalFilesystem = false)) + val sources = WorkflowSourceFilesWithoutImports( + workflowSource = wdl, + workflowType = Option("WDL"), + workflowTypeVersion = None, + inputsJson = "{}", + workflowOptionsJson = "{}", + labelsJson = validCustomLabelsFile) materializeWfActor ! MaterializeWorkflowDescriptorCommand(sources, differentDefaultBackendConf) within(Timeout) { expectMsgPF() { case MaterializeWorkflowDescriptorSuccessResponse(wfDesc) => - wfDesc.namespace.workflow.calls foreach { + wfDesc.namespace.workflow.taskCalls foreach { case call if call.task.name.equals("a") => wfDesc.backendAssignments(call) shouldBe "SpecifiedBackend" case call if call.task.name.equals("b") => @@ -232,8 +209,14 @@ class MaterializeWorkflowDescriptorActorSpec extends CromwellTestkitSpec with Be |} """.stripMargin - val materializeWfActor = system.actorOf(MaterializeWorkflowDescriptorActor.props(NoBehaviourActor, workflowId)) - val sources = WorkflowSourceFiles(wdl, "{}", "{}") + val materializeWfActor = system.actorOf(MaterializeWorkflowDescriptorActor.props(NoBehaviorActor, workflowId, importLocalFilesystem = false)) + val sources = WorkflowSourceFilesWithoutImports( + workflowSource = wdl, + workflowType = Option("WDL"), + workflowTypeVersion = None, + inputsJson = "{}", + workflowOptionsJson = "{}", + labelsJson = "{}") materializeWfActor ! MaterializeWorkflowDescriptorCommand(sources, differentDefaultBackendConf) within(Timeout) { @@ -241,7 +224,7 @@ class MaterializeWorkflowDescriptorActorSpec extends CromwellTestkitSpec with Be case MaterializeWorkflowDescriptorFailureResponse(reason) => if (!reason.getMessage.contains("Backend for call foo.a ('NoSuchBackend') not registered in configuration file")) fail(s"Unexpected failure message from MaterializeWorkflowDescriptorActor: ${reason.getMessage}") - case MaterializeWorkflowDescriptorSuccessResponse(wfDesc) => fail("This materialization should not have succeeded!") + case _: MaterializeWorkflowDescriptorSuccessResponse => fail("This materialization should not have succeeded!") case unknown => fail(s"Unexpected materialization response: $unknown") } @@ -251,15 +234,21 @@ class MaterializeWorkflowDescriptorActorSpec extends CromwellTestkitSpec with Be } "reject an invalid WDL source" in { - val materializeWfActor = system.actorOf(MaterializeWorkflowDescriptorActor.props(NoBehaviourActor, workflowId)) - val sources = WorkflowSourceFiles(unstructuredFile, validInputsJson, validOptionsFile) + val materializeWfActor = system.actorOf(MaterializeWorkflowDescriptorActor.props(NoBehaviorActor, workflowId, importLocalFilesystem = false)) + val sources = WorkflowSourceFilesWithoutImports( + workflowSource = unstructuredFile, + workflowType = Option("WDL"), + workflowTypeVersion = None, + inputsJson = validInputsJson, + workflowOptionsJson = validOptionsFile, + labelsJson = validCustomLabelsFile) materializeWfActor ! MaterializeWorkflowDescriptorCommand(sources, minimumConf) within(Timeout) { expectMsgPF() { case MaterializeWorkflowDescriptorFailureResponse(reason) => - reason.getMessage should startWith("Workflow input processing failed.\nUnable to load namespace from workflow: ERROR: Finished parsing without consuming all tokens.") - case MaterializeWorkflowDescriptorSuccessResponse(wfDesc) => fail("This materialization should not have succeeded!") + reason.getMessage should startWith("Workflow input processing failed:\nUnable to load namespace from workflow: ERROR: Finished parsing without consuming all tokens.") + case _: MaterializeWorkflowDescriptorSuccessResponse => fail("This materialization should not have succeeded!") case unknown => fail(s"Unexpected materialization response: $unknown") } @@ -275,15 +264,21 @@ class MaterializeWorkflowDescriptorActorSpec extends CromwellTestkitSpec with Be | |# no workflow foo { ... } block!! """.stripMargin - val materializeWfActor = system.actorOf(MaterializeWorkflowDescriptorActor.props(NoBehaviourActor, workflowId)) - val sources = WorkflowSourceFiles(noWorkflowWdl, validInputsJson, validOptionsFile) + val materializeWfActor = system.actorOf(MaterializeWorkflowDescriptorActor.props(NoBehaviorActor, workflowId, importLocalFilesystem = false)) + val sources = WorkflowSourceFilesWithoutImports( + workflowSource = noWorkflowWdl, + workflowType = Option("WDL"), + workflowTypeVersion = None, + inputsJson = validInputsJson, + workflowOptionsJson = validOptionsFile, + labelsJson = validCustomLabelsFile) materializeWfActor ! MaterializeWorkflowDescriptorCommand(sources, minimumConf) within(Timeout) { expectMsgPF() { case MaterializeWorkflowDescriptorFailureResponse(reason) => - reason.getMessage should startWith("Workflow input processing failed.\nUnable to load namespace from workflow: Namespace does not have a local workflow to run") - case MaterializeWorkflowDescriptorSuccessResponse(wfDesc) => fail("This materialization should not have succeeded!") + reason.getMessage should startWith("Workflow input processing failed:\nUnable to load namespace from workflow: Namespace does not have a local workflow to run") + case _: MaterializeWorkflowDescriptorSuccessResponse => fail("This materialization should not have succeeded!") case unknown => fail(s"Unexpected materialization response: $unknown") } @@ -300,15 +295,21 @@ class MaterializeWorkflowDescriptorActorSpec extends CromwellTestkitSpec with Be | |workflow foo { } """.stripMargin - val materializeWfActor = system.actorOf(MaterializeWorkflowDescriptorActor.props(NoBehaviourActor, workflowId)) - val badWdlSources = WorkflowSourceFiles(noWorkflowWdl, validInputsJson, validOptionsFile) + val materializeWfActor = system.actorOf(MaterializeWorkflowDescriptorActor.props(NoBehaviorActor, workflowId, importLocalFilesystem = false)) + val badWdlSources = WorkflowSourceFilesWithoutImports( + workflowSource = noWorkflowWdl, + workflowType = Option("WDL"), + workflowTypeVersion = None, + inputsJson = validInputsJson, + workflowOptionsJson = validOptionsFile, + labelsJson = validCustomLabelsFile) materializeWfActor ! MaterializeWorkflowDescriptorCommand(badWdlSources, minimumConf) within(Timeout) { expectMsgPF() { case MaterializeWorkflowDescriptorFailureResponse(reason) => - reason.getMessage should startWith("Workflow input processing failed.\nUnable to load namespace from workflow: Namespace does not have a local workflow to run") - case MaterializeWorkflowDescriptorSuccessResponse(wfDesc) => fail("This materialization should not have succeeded!") + reason.getMessage should startWith("Workflow input processing failed:\nUnable to load namespace from workflow: Namespace does not have a local workflow to run") + case _: MaterializeWorkflowDescriptorSuccessResponse => fail("This materialization should not have succeeded!") case unknown => fail(s"Unexpected materialization response: $unknown") } @@ -319,15 +320,77 @@ class MaterializeWorkflowDescriptorActorSpec extends CromwellTestkitSpec with Be "reject an invalid options file" in { - val materializeWfActor = system.actorOf(MaterializeWorkflowDescriptorActor.props(NoBehaviourActor, workflowId)) - val sources = WorkflowSourceFiles(wdlSourceNoDocker, validInputsJson, unstructuredFile) + val materializeWfActor = system.actorOf(MaterializeWorkflowDescriptorActor.props(NoBehaviorActor, workflowId, importLocalFilesystem = false)) + val sources = WorkflowSourceFilesWithoutImports( + workflowSource = workflowSourceNoDocker, + workflowType = Option("WDL"), + workflowTypeVersion = None, + inputsJson = validInputsJson, + workflowOptionsJson = unstructuredFile, + labelsJson = validCustomLabelsFile) + materializeWfActor ! MaterializeWorkflowDescriptorCommand(sources, minimumConf) + + within(Timeout) { + expectMsgPF() { + case MaterializeWorkflowDescriptorFailureResponse(reason) => + reason.getMessage should startWith("Workflow input processing failed:\nWorkflow contains invalid options JSON") + case _: MaterializeWorkflowDescriptorSuccessResponse => fail("This materialization should not have succeeded!") + case unknown => + fail(s"Unexpected materialization response: $unknown") + } + } + + system.stop(materializeWfActor) + } + + "reject an unstructured labels file" in { + val materializeWfActor = system.actorOf(MaterializeWorkflowDescriptorActor.props(NoBehaviorActor, workflowId, importLocalFilesystem = false)) + val sources = WorkflowSourceFilesWithoutImports( + workflowSource = workflowSourceNoDocker, + workflowType = Option("WDL"), + workflowTypeVersion = None, + inputsJson = validInputsJson, + workflowOptionsJson = validOptionsFile, + labelsJson = unstructuredFile) + materializeWfActor ! MaterializeWorkflowDescriptorCommand(sources, minimumConf) + + within(Timeout) { + expectMsgPF() { + case MaterializeWorkflowDescriptorFailureResponse(reason) => + reason.getMessage should startWith( + """Workflow input processing failed: + |Workflow contains invalid labels JSON: Unexpected character 'u'""".stripMargin) + case _: MaterializeWorkflowDescriptorSuccessResponse => fail("This materialization should not have succeeded!") + case unknown => + fail(s"Unexpected materialization response: $unknown") + } + } + + system.stop(materializeWfActor) + } + + "reject invalid labels" in { + val materializeWfActor = system.actorOf(MaterializeWorkflowDescriptorActor.props(NoBehaviorActor, workflowId, importLocalFilesystem = false)) + val sources = WorkflowSourceFilesWithoutImports( + workflowSource = workflowSourceNoDocker, + workflowType = Option("WDL"), + workflowTypeVersion = None, + inputsJson = validInputsJson, + workflowOptionsJson = validOptionsFile, + labelsJson = badCustomLabelsFile) materializeWfActor ! MaterializeWorkflowDescriptorCommand(sources, minimumConf) within(Timeout) { expectMsgPF() { case MaterializeWorkflowDescriptorFailureResponse(reason) => - reason.getMessage should startWith("Workflow input processing failed.\nWorkflow contains invalid options JSON") - case MaterializeWorkflowDescriptorSuccessResponse(wfDesc) => fail("This materialization should not have succeeded!") + val expectedMessage = + s"""Workflow input processing failed: + |Invalid label: `Label1` did not match the regex ${Label.LabelKeyRegex}. + |Invalid label: `valu£1` did not match the regex ${Label.LabelValueRegex}. + |Invalid label: `--label2` did not match the regex ${Label.LabelKeyRegex}. + |Invalid label: `valuevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevaluevalue` is 65 characters. The maximum is 63.""".stripMargin + reason.getMessage shouldBe expectedMessage + case _: MaterializeWorkflowDescriptorSuccessResponse => fail("This materialization should not have succeeded!") case unknown => fail(s"Unexpected materialization response: $unknown") } @@ -337,15 +400,21 @@ class MaterializeWorkflowDescriptorActorSpec extends CromwellTestkitSpec with Be } "reject an invalid workflow inputs file" in { - val materializeWfActor = system.actorOf(MaterializeWorkflowDescriptorActor.props(NoBehaviourActor, workflowId)) - val sources = WorkflowSourceFiles(wdlSourceNoDocker, unstructuredFile, validOptionsFile) + val materializeWfActor = system.actorOf(MaterializeWorkflowDescriptorActor.props(NoBehaviorActor, workflowId, importLocalFilesystem = false)) + val sources = WorkflowSourceFilesWithoutImports( + workflowSource = workflowSourceNoDocker, + workflowType = Option("WDL"), + workflowTypeVersion = None, + inputsJson = unstructuredFile, + workflowOptionsJson = validOptionsFile, + labelsJson = validCustomLabelsFile) materializeWfActor ! MaterializeWorkflowDescriptorCommand(sources, minimumConf) within(Timeout) { expectMsgPF() { case MaterializeWorkflowDescriptorFailureResponse(reason) => - reason.getMessage should startWith("Workflow input processing failed.\nWorkflow contains invalid inputs JSON") - case MaterializeWorkflowDescriptorSuccessResponse(wfDesc) => fail("This materialization should not have succeeded!") + reason.getMessage should startWith("Workflow input processing failed:\nWorkflow contains invalid inputs JSON") + case _: MaterializeWorkflowDescriptorSuccessResponse => fail("This materialization should not have succeeded!") case unknown => fail(s"Unexpected materialization response: $unknown") } @@ -355,16 +424,22 @@ class MaterializeWorkflowDescriptorActorSpec extends CromwellTestkitSpec with Be } "reject requests if any required inputs are missing" in { - val materializeWfActor = system.actorOf(MaterializeWorkflowDescriptorActor.props(NoBehaviourActor, workflowId)) + val materializeWfActor = system.actorOf(MaterializeWorkflowDescriptorActor.props(NoBehaviorActor, workflowId, importLocalFilesystem = false)) val noInputsJson = "{}" - val badOptionsSources = WorkflowSourceFiles(wdlSourceNoDocker, noInputsJson, validOptionsFile) + val badOptionsSources = WorkflowSourceFilesWithoutImports( + workflowSource = workflowSourceNoDocker, + workflowType = Option("WDL"), + workflowTypeVersion = None, + inputsJson = noInputsJson, + workflowOptionsJson = validOptionsFile, + labelsJson = validCustomLabelsFile) materializeWfActor ! MaterializeWorkflowDescriptorCommand(badOptionsSources, minimumConf) within(Timeout) { expectMsgPF() { case MaterializeWorkflowDescriptorFailureResponse(reason) => - reason.getMessage should startWith("Workflow input processing failed.\nRequired workflow input 'hello.hello.addressee' not specified") - case MaterializeWorkflowDescriptorSuccessResponse(wfDesc) => fail("This materialization should not have succeeded!") + reason.getMessage should startWith("Workflow input processing failed:\nRequired workflow input 'wf_hello.hello.addressee' not specified") + case _: MaterializeWorkflowDescriptorSuccessResponse => fail("This materialization should not have succeeded!") case unknown => fail(s"Unexpected materialization response: $unknown") } @@ -382,15 +457,57 @@ class MaterializeWorkflowDescriptorActorSpec extends CromwellTestkitSpec with Be | call bar |} """.stripMargin - val materializeWfActor = system.actorOf(MaterializeWorkflowDescriptorActor.props(NoBehaviourActor, workflowId)) - val sources = WorkflowSourceFiles(wdl, "{}", validOptionsFile) + val materializeWfActor = system.actorOf(MaterializeWorkflowDescriptorActor.props(NoBehaviorActor, workflowId, importLocalFilesystem = false)) + val sources = WorkflowSourceFilesWithoutImports( + workflowSource = wdl, + workflowType = Option("WDL"), + workflowTypeVersion = None, + inputsJson = "{}", + workflowOptionsJson = validOptionsFile, + labelsJson = validCustomLabelsFile) + materializeWfActor ! MaterializeWorkflowDescriptorCommand(sources, minimumConf) + + within(Timeout) { + expectMsgPF() { + case MaterializeWorkflowDescriptorFailureResponse(reason) => + reason.getMessage should startWith("Workflow input processing failed:\nUnable to load namespace from workflow: ERROR: Value for j is not coerceable into a Int") + case _: MaterializeWorkflowDescriptorSuccessResponse => fail("This materialization should not have succeeded!") + case unknown => fail(s"Unexpected materialization response: $unknown") + } + } + + system.stop(materializeWfActor) + } + + "identify all malformed input file names in an input json" in { + val wdl = + """ + |task bar { command { echo foobar } } + |workflow foo { + | File bad_one + | File good_one + | File bad_two + | + | call bar + |} + """.stripMargin + val jsonInput = Map("foo.bad_one" -> "\"gs://this/is/a/bad/gcs/path.txt", "foo.good_one" -> "\"/local/path/is/ok.txt", "foo.bad_two" -> "\"gs://another/bad/gcs/path.txt").toJson.toString + val materializeWfActor = system.actorOf(MaterializeWorkflowDescriptorActor.props(NoBehaviorActor, workflowId, importLocalFilesystem = false)) + val sources = WorkflowSourceFilesWithoutImports( + workflowSource = wdl, + workflowType = Option("WDL"), + workflowTypeVersion = None, + inputsJson = jsonInput, + workflowOptionsJson = validOptionsFile, + labelsJson = validCustomLabelsFile) materializeWfActor ! MaterializeWorkflowDescriptorCommand(sources, minimumConf) within(Timeout) { expectMsgPF() { case MaterializeWorkflowDescriptorFailureResponse(reason) => - reason.getMessage should startWith("Workflow input processing failed.\nInvalid right-side type of 'foo.j'. Expecting Int, got String") - case MaterializeWorkflowDescriptorSuccessResponse(wfDesc) => fail("This materialization should not have succeeded!") + reason.getMessage should equal("Workflow input processing failed:\nInvalid value for File input 'foo.bad_one': \"gs://this/is/a/bad/gcs/path.txt starts with a '\"' " + + "\nInvalid value for File input 'foo.bad_two': \"gs://another/bad/gcs/path.txt starts with a '\"' ") + case _: MaterializeWorkflowDescriptorSuccessResponse => fail("This materialization should not have succeeded!") case unknown => fail(s"Unexpected materialization response: $unknown") } } diff --git a/engine/src/test/scala/cromwell/engine/workflow/lifecycle/execution/ExecutionStoreBenchmark.scala b/engine/src/test/scala/cromwell/engine/workflow/lifecycle/execution/ExecutionStoreBenchmark.scala new file mode 100644 index 000000000..a70f68e49 --- /dev/null +++ b/engine/src/test/scala/cromwell/engine/workflow/lifecycle/execution/ExecutionStoreBenchmark.scala @@ -0,0 +1,60 @@ +package cromwell.engine.workflow.lifecycle.execution + +import cromwell.backend.BackendJobDescriptorKey +import cromwell.core.ExecutionStatus.{apply => _, _} +import cromwell.core.{ExecutionStatus, JobKey} +import cromwell.engine.workflow.lifecycle.execution.WorkflowExecutionActor.CollectorKey +import cromwell.util.SampleWdl +import org.scalameter.api._ +import wdl4s.wdl.{WdlTaskCall, WdlNamespaceWithWorkflow} +import org.scalameter.picklers.Implicits._ + +/** + * Benchmarks the performance of the execution store using ScalaMeter (http://scalameter.github.io/) + * This is not run automatically by "sbt test". To run this test specifically, either use intellij integration, or run + * sbt "project engine" "benchmark:test-only cromwell.engine.workflow.lifecycle.execution.ExecutionStoreBenchmark" + * sbt benchmark:test will run all ScalaMeter tests + */ +object ExecutionStoreBenchmark extends Bench[Double] { + + /* Benchmark configuration */ + lazy val measurer = new Measurer.Default + lazy val executor = SeparateJvmsExecutor(new Executor.Warmer.Default, Aggregator.average, measurer) + lazy val reporter = new LoggingReporter[Double] + lazy val persistor = Persistor.None + + val wdl = WdlNamespaceWithWorkflow.load(SampleWdl.PrepareScatterGatherWdl().workflowSource(), Seq.empty).get + val prepareCall: WdlTaskCall = wdl.workflow.findCallByName("do_prepare").get.asInstanceOf[WdlTaskCall] + val scatterCall: WdlTaskCall = wdl.workflow.findCallByName("do_scatter").get.asInstanceOf[WdlTaskCall] + val scatter = wdl.workflow.namespace.scatters.head + + def makeKey(call: WdlTaskCall, executionStatus: ExecutionStatus)(index: Int) = { + BackendJobDescriptorKey(call, Option(index), 1) -> executionStatus + } + + // Generates numbers from 1000 to 10000 with 1000 gap: + // 1000, 2000, ..., 10000 + val sizes: Gen[Int] = Gen.range("size")(10000, 100000, 10000) + + // Generates executionStores using the given above sizes + // Each execution store contains X simulated shards of "prepareCall" in status Done and X simulated shards of "scatterCall" in status NotStarted + // This provides a good starting point to evaluate the speed of "runnableCalls", as it needs to iterate over all "NotStarted" keys, and for each one + // look for their upstreams keys in status "Done" + val executionStores: Gen[ExecutionStore] = for { + size <- sizes + doneMap = (0 until size map makeKey(prepareCall, ExecutionStatus.Done)).toMap + collectorKey = Map(CollectorKey(scatterCall, scatter, size) -> ExecutionStatus.NotStarted) + notStartedMap = (0 until size map makeKey(scatterCall, ExecutionStatus.NotStarted)).toMap ++ collectorKey + finalMap: Map[JobKey, ExecutionStatus] = doneMap ++ notStartedMap + } yield new ExecutionStore(finalMap, true) + + performance of "ExecutionStore" in { + // Measures how fast the execution store can find runnable calls with lots of "Done" calls and "NotStarted" calls. + // Other "shapes" would be valuable to get a better sense of how this method behaves in various situations (with Collector Keys etc...) + measure method "runnableCalls" in { + using(executionStores) in { es => + es.runnableScopes + } + } + } +} diff --git a/engine/src/test/scala/cromwell/engine/workflow/lifecycle/execution/SubWorkflowExecutionActorSpec.scala b/engine/src/test/scala/cromwell/engine/workflow/lifecycle/execution/SubWorkflowExecutionActorSpec.scala new file mode 100644 index 000000000..f3466a7da --- /dev/null +++ b/engine/src/test/scala/cromwell/engine/workflow/lifecycle/execution/SubWorkflowExecutionActorSpec.scala @@ -0,0 +1,219 @@ +package cromwell.engine.workflow.lifecycle.execution + +import java.util.UUID + +import akka.actor.Props +import akka.testkit.{TestFSMRef, TestProbe} +import cromwell.backend.{AllBackendInitializationData, BackendWorkflowDescriptor, JobExecutionMap} +import cromwell.core._ +import cromwell.core.callcaching.CallCachingOff +import cromwell.database.sql.tables.SubWorkflowStoreEntry +import cromwell.engine.backend.BackendSingletonCollection +import cromwell.engine.workflow.lifecycle.execution.SubWorkflowExecutionActor._ +import cromwell.engine.workflow.lifecycle.execution.WorkflowExecutionActor._ +import cromwell.engine.workflow.lifecycle.execution.preparation.CallPreparation +import cromwell.engine.workflow.lifecycle.execution.preparation.CallPreparation.CallPreparationFailed +import cromwell.engine.workflow.lifecycle.execution.preparation.SubWorkflowPreparationActor.SubWorkflowPreparationSucceeded +import cromwell.engine.{ContinueWhilePossible, EngineWorkflowDescriptor} +import cromwell.subworkflowstore.SubWorkflowStoreActor.{QuerySubWorkflow, SubWorkflowFound, SubWorkflowNotFound} +import org.scalatest.concurrent.Eventually +import org.scalatest.{FlatSpecLike, Matchers} +import org.specs2.mock.Mockito +import wdl4s.wdl._ + +import scala.concurrent.duration._ +import scala.language.postfixOps + +class SubWorkflowExecutionActorSpec extends TestKitSuite with FlatSpecLike with Matchers with Mockito with Eventually { + + behavior of "SubWorkflowExecutionActor" + + val serviceRegistryProbe = TestProbe() + val jobStoreProbe = TestProbe() + val subWorkflowStoreProbe = TestProbe() + val callCacheReadActorProbe = TestProbe() + val callCacheWriteActorProbe = TestProbe() + val dockerHashActorProbe = TestProbe() + val ioActorProbe = TestProbe() + val jobTokenDispenserProbe = TestProbe() + val preparationActor = TestProbe() + val subWorkflowActor = TestProbe() + val deathWatch = TestProbe() + val parentProbe = TestProbe() + val parentBackendDescriptor = mock[BackendWorkflowDescriptor] + val parentWorkflowId: WorkflowId = WorkflowId.randomId() + parentBackendDescriptor.id returns parentWorkflowId + val parentWorkflowDescriptor = EngineWorkflowDescriptor( + mock[WdlNamespaceWithWorkflow], + parentBackendDescriptor, + Map.empty, + ContinueWhilePossible, + List.empty, + CallCachingOff + ) + val subWorkflow = mock[WdlWorkflow] + subWorkflow.unqualifiedName returns "sub_wf" + val subWorkflowCall = mock[WdlWorkflowCall] + subWorkflowCall.fullyQualifiedName returns "foo.bar" + subWorkflowCall.callable returns subWorkflow + val subKey = SubWorkflowKey(subWorkflowCall, None, 1) + + val awaitTimeout: FiniteDuration = 10 seconds + + def buildEWEA(restart: Boolean = false) = { + new TestFSMRef[SubWorkflowExecutionActorState, SubWorkflowExecutionActorData, SubWorkflowExecutionActor](system, Props( + new SubWorkflowExecutionActor( + subKey, + WorkflowExecutionActorData.empty(parentWorkflowDescriptor), + Map.empty, + ioActorProbe.ref, + serviceRegistryProbe.ref, + jobStoreProbe.ref, + subWorkflowStoreProbe.ref, + callCacheReadActorProbe.ref, + callCacheWriteActorProbe.ref, + dockerHashActorProbe.ref, + jobTokenDispenserProbe.ref, + BackendSingletonCollection(Map.empty), + AllBackendInitializationData(Map.empty), + restart + ) { + override def createSubWorkflowPreparationActor(subWorkflowId: WorkflowId) = preparationActor.ref + override def createSubWorkflowActor(createSubWorkflowActor: EngineWorkflowDescriptor) = subWorkflowActor.ref + }), parentProbe.ref, s"SubWorkflowExecutionActorSpec-${UUID.randomUUID()}") + } + + it should "Check the sub workflow store when restarting" in { + val ewea = buildEWEA(restart = true) + ewea.setState(SubWorkflowPendingState) + + ewea ! Execute + subWorkflowStoreProbe.expectMsg(QuerySubWorkflow(parentWorkflowId, subKey)) + eventually { + ewea.stateName shouldBe SubWorkflowCheckingStoreState + } + } + + it should "Reuse sub workflow id if found in the store" in { + import cromwell.core.ExecutionIndex._ + + val ewea = buildEWEA(restart = true) + ewea.setState(SubWorkflowCheckingStoreState) + + val subWorkflowUuid = WorkflowId.randomId() + ewea ! SubWorkflowFound(SubWorkflowStoreEntry(Option(0), parentWorkflowId.toString, subKey.scope.fullyQualifiedName, subKey.index.fromIndex, subKey.attempt, subWorkflowUuid.toString, None)) + preparationActor.expectMsg(CallPreparation.Start) + parentProbe.expectMsg(JobStarting(subKey)) + + eventually { + ewea.stateName shouldBe SubWorkflowPreparingState + ewea.stateData.subWorkflowId shouldBe Some(subWorkflowUuid) + } + } + + it should "Fall back to a random Id if the sub workflow id is not found in the store" in { + val ewea = buildEWEA(restart = true) + ewea.setState(SubWorkflowCheckingStoreState) + + ewea ! SubWorkflowNotFound(QuerySubWorkflow(parentWorkflowId, subKey)) + preparationActor.expectMsg(CallPreparation.Start) + parentProbe.expectMsg(JobStarting(subKey)) + + eventually { + ewea.stateName shouldBe SubWorkflowPreparingState + ewea.stateData.subWorkflowId should not be empty + } + } + + it should "Prepare a sub workflow" in { + val ewea = buildEWEA() + ewea.setState(SubWorkflowPendingState) + + ewea ! Execute + preparationActor.expectMsg(CallPreparation.Start) + parentProbe.expectMsg(JobStarting(subKey)) + eventually { + ewea.stateName shouldBe SubWorkflowPreparingState + } + } + + it should "Run a sub workflow" in { + val ewea = buildEWEA() + ewea.setState(SubWorkflowPreparingState, SubWorkflowExecutionActorData(Some(WorkflowId.randomId()))) + + val subWorkflowId = WorkflowId.randomId() + val subBackendDescriptor = mock[BackendWorkflowDescriptor] + subBackendDescriptor.id returns subWorkflowId + val subWorkflowDescriptor = EngineWorkflowDescriptor( + mock[WdlNamespaceWithWorkflow], + subBackendDescriptor, + Map.empty, + ContinueWhilePossible, + List.empty, + CallCachingOff + ) + + ewea ! SubWorkflowPreparationSucceeded(subWorkflowDescriptor, Map.empty) + subWorkflowActor.expectMsg(WorkflowExecutionActor.ExecuteWorkflowCommand) + parentProbe.expectMsg(JobRunning(subKey, Map.empty, Option(subWorkflowActor.ref))) + eventually { + ewea.stateName shouldBe SubWorkflowRunningState + } + } + + it should "Fail a sub workflow if preparation failed" in { + val ewea = buildEWEA() + ewea.setState(SubWorkflowPreparingState) + deathWatch watch ewea + + val subWorkflowKey = mock[SubWorkflowKey] + val throwable: Exception = new Exception("Expected test exception") + val preparationFailedMessage: CallPreparationFailed = CallPreparationFailed(subWorkflowKey, throwable) + ewea ! preparationFailedMessage + parentProbe.expectMsg(SubWorkflowFailedResponse(subKey, Map.empty, throwable)) + deathWatch.expectTerminated(ewea, awaitTimeout) + } + + it should "Relay Workflow Successful message" in { + val ewea = buildEWEA() + ewea.setState(SubWorkflowRunningState, SubWorkflowExecutionActorData(Some(WorkflowId.randomId()))) + + deathWatch watch ewea + + val jobExecutionMap: JobExecutionMap = Map.empty + val outputs: CallOutputs = Map.empty[LocallyQualifiedName, JobOutput] + val workflowSuccessfulMessage = WorkflowExecutionSucceededResponse(jobExecutionMap, outputs) + ewea ! workflowSuccessfulMessage + parentProbe.expectMsg(SubWorkflowSucceededResponse(subKey, jobExecutionMap, outputs)) + deathWatch.expectTerminated(ewea, awaitTimeout) + } + + it should "Relay Workflow Failed message" in { + val ewea = buildEWEA() + ewea.setState(SubWorkflowRunningState, SubWorkflowExecutionActorData(Some(WorkflowId.randomId()))) + + deathWatch watch ewea + + val jobExecutionMap: JobExecutionMap = Map.empty + val expectedException: Exception = new Exception("Expected test exception") + + val workflowSuccessfulMessage = WorkflowExecutionFailedResponse(jobExecutionMap, expectedException) + ewea ! workflowSuccessfulMessage + parentProbe.expectMsg(SubWorkflowFailedResponse(subKey, jobExecutionMap, expectedException)) + deathWatch.expectTerminated(ewea, awaitTimeout) + } + + it should "Relay Workflow Aborted message" in { + val ewea = buildEWEA() + ewea.setState(SubWorkflowRunningState, SubWorkflowExecutionActorData(Some(WorkflowId.randomId()))) + + deathWatch watch ewea + + val jobExecutionMap: JobExecutionMap = Map.empty + val workflowAbortedMessage = WorkflowExecutionAbortedResponse(jobExecutionMap) + ewea ! workflowAbortedMessage + parentProbe.expectMsg(SubWorkflowAbortedResponse(subKey, jobExecutionMap)) + deathWatch.expectTerminated(ewea, awaitTimeout) + } + +} diff --git a/engine/src/test/scala/cromwell/engine/workflow/lifecycle/execution/WorkflowExecutionActorSpec.scala b/engine/src/test/scala/cromwell/engine/workflow/lifecycle/execution/WorkflowExecutionActorSpec.scala index 3d9ef1938..c60b31dae 100644 --- a/engine/src/test/scala/cromwell/engine/workflow/lifecycle/execution/WorkflowExecutionActorSpec.scala +++ b/engine/src/test/scala/cromwell/engine/workflow/lifecycle/execution/WorkflowExecutionActorSpec.scala @@ -3,21 +3,22 @@ package cromwell.engine.workflow.lifecycle.execution import akka.actor.{Actor, Props} import akka.testkit.{EventFilter, TestActorRef, TestDuration, TestProbe} import com.typesafe.config.ConfigFactory +import cromwell._ import cromwell.backend.AllBackendInitializationData -import cromwell.core.WorkflowId -import cromwell.engine.backend.{BackendConfigurationEntry, CromwellBackends} +import cromwell.core.{SimpleIoActor, WorkflowId} +import cromwell.engine.backend.{BackendConfigurationEntry, BackendSingletonCollection, CromwellBackends} import cromwell.engine.workflow.WorkflowDescriptorBuilder -import cromwell.engine.workflow.lifecycle.execution.WorkflowExecutionActor.ExecuteWorkflowCommand +import cromwell.engine.workflow.lifecycle.execution.WorkflowExecutionActor.{ExecuteWorkflowCommand, WorkflowExecutionFailedResponse} +import cromwell.engine.workflow.tokens.JobExecutionTokenDispenserActor import cromwell.services.ServiceRegistryActor import cromwell.services.metadata.MetadataService import cromwell.util.SampleWdl -import cromwell.{AlwaysHappyJobStoreActor, CromwellTestkitSpec, EmptyCallCacheReadActor, MetadataWatchActor} -import org.scalatest.BeforeAndAfter +import org.scalatest.{BeforeAndAfter, FlatSpecLike, Matchers} -import scala.concurrent.{Await, Promise} import scala.concurrent.duration._ +import scala.concurrent.{Await, Promise} -class WorkflowExecutionActorSpec extends CromwellTestkitSpec with BeforeAndAfter with WorkflowDescriptorBuilder { +class WorkflowExecutionActorSpec extends CromwellTestKitSpec with FlatSpecLike with Matchers with BeforeAndAfter with WorkflowDescriptorBuilder { override implicit val actorSystem = system implicit val DefaultDuration = 20.seconds.dilated @@ -28,6 +29,9 @@ class WorkflowExecutionActorSpec extends CromwellTestkitSpec with BeforeAndAfter } }) + val MockBackendName = "Mock" + val MockBackendSingletonCollection = BackendSingletonCollection(Map(MockBackendName -> None)) + val stubbedConfig = ConfigFactory.load().getConfig("backend.providers.Mock").getConfig("config") val runtimeSection = @@ -37,76 +41,97 @@ class WorkflowExecutionActorSpec extends CromwellTestkitSpec with BeforeAndAfter |} """.stripMargin - "WorkflowExecutionActor" should { - "retry a job 2 times and succeed in the third attempt" in { - import MetadataWatchActor.metadataKeyAttemptChecker - val metadataSuccessPromise = Promise[Unit]() - val requiredMetadataMatchers: Seq[MetadataWatchActor.Matcher] = List( - MetadataWatchActor.JobKeyMetadataKeyAndValueContainStringMatcher(metadataKeyAttemptChecker(1), "executionStatus", "Preempted"), - MetadataWatchActor.JobKeyMetadataKeyAndValueContainStringMatcher(metadataKeyAttemptChecker(2), "executionStatus", "Preempted"), - MetadataWatchActor.JobKeyMetadataKeyAndValueContainStringMatcher(metadataKeyAttemptChecker(3), "executionStatus", "Done") - ) - val metadataWatcherProps = Props(MetadataWatchActor(metadataSuccessPromise, requiredMetadataMatchers: _*)) - val serviceRegistryActor = system.actorOf(ServiceRegistryActor.props(ConfigFactory.load(), overrides = Map(MetadataService.MetadataServiceName -> metadataWatcherProps))) - val jobStoreActor = system.actorOf(AlwaysHappyJobStoreActor.props) - val MockBackendConfigEntry = BackendConfigurationEntry( - name = "Mock", - lifecycleActorFactoryClass = "cromwell.engine.backend.mock.RetryableBackendLifecycleActorFactory", - stubbedConfig - ) - CromwellBackends.initBackends(List(MockBackendConfigEntry)) - - val workflowId = WorkflowId.randomId() - val engineWorkflowDescriptor = createMaterializedEngineWorkflowDescriptor(workflowId, SampleWdl.HelloWorld.asWorkflowSources(runtime = runtimeSection)) - val callCacheReadActor = TestProbe() - - val workflowExecutionActor = system.actorOf( - WorkflowExecutionActor.props(workflowId, engineWorkflowDescriptor, serviceRegistryActor, jobStoreActor, - callCacheReadActor.ref, AllBackendInitializationData.empty, restarting = false), - "WorkflowExecutionActor") - - EventFilter.info(pattern = ".*Final Outputs", occurrences = 1).intercept { - EventFilter.info(pattern = "Starting calls: hello.hello", occurrences = 3).intercept { - workflowExecutionActor ! ExecuteWorkflowCommand + behavior of "WorkflowExecutionActor" + + it should "allow a backend to tell it to retry... up to a point" in { + import MetadataWatchActor.metadataKeyAttemptChecker + val metadataSuccessPromise = Promise[Unit]() + val requiredMetadataMatchers: Seq[MetadataWatchActor.Matcher] = List( + MetadataWatchActor.JobKeyMetadataKeyAndValueContainStringMatcher(metadataKeyAttemptChecker(1), "executionStatus", "RetryableFailure"), + MetadataWatchActor.JobKeyMetadataKeyAndValueContainStringMatcher(metadataKeyAttemptChecker(2), "executionStatus", "RetryableFailure"), + MetadataWatchActor.JobKeyMetadataKeyAndValueContainStringMatcher(metadataKeyAttemptChecker(3), "executionStatus", "Failed") + ) + val metadataWatcherActor = TestActorRef[MetadataWatchActor](Props(MetadataWatchActor(metadataSuccessPromise, requiredMetadataMatchers: _*))) + val serviceRegistryActor = system.actorOf(ServiceRegistryActor.props(ConfigFactory.load(), overrides = Map(MetadataService.MetadataServiceName -> metadataWatcherActor.props))) + val jobStoreActor = system.actorOf(AlwaysHappyJobStoreActor.props) + val ioActor = system.actorOf(SimpleIoActor.props) + val subWorkflowStoreActor = system.actorOf(AlwaysHappySubWorkflowStoreActor.props) + val jobTokenDispenserActor = system.actorOf(JobExecutionTokenDispenserActor.props) + val MockBackendConfigEntry = BackendConfigurationEntry( + name = "Mock", + lifecycleActorFactoryClass = "cromwell.engine.backend.mock.RetryableBackendLifecycleActorFactory", + stubbedConfig + ) + CromwellBackends.initBackends(List(MockBackendConfigEntry)) + + val workflowId = WorkflowId.randomId() + val engineWorkflowDescriptor = createMaterializedEngineWorkflowDescriptor(workflowId, SampleWdl.HelloWorld.asWorkflowSources(runtime = runtimeSection)) + val callCacheReadActor = TestProbe() + val callCacheWriteActor = TestProbe() + val dockerHashActor = TestProbe() + + val weaSupervisor = TestProbe() + val workflowExecutionActor = TestActorRef( + props = WorkflowExecutionActor.props(engineWorkflowDescriptor, ioActor, serviceRegistryActor, jobStoreActor, subWorkflowStoreActor, + callCacheReadActor.ref, callCacheWriteActor.ref, dockerHashActor.ref, jobTokenDispenserActor, MockBackendSingletonCollection, AllBackendInitializationData.empty, restarting = false), + name = "WorkflowExecutionActor", + supervisor = weaSupervisor.ref) + + EventFilter.info(pattern = "Starting calls: wf_hello.hello", occurrences = 3).intercept { + workflowExecutionActor ! ExecuteWorkflowCommand + } + + weaSupervisor.expectMsgClass(classOf[WorkflowExecutionFailedResponse]) + + // Super-helpful debug in case the metadata watcher is still unhappy: + if(metadataWatcherActor.underlyingActor.unsatisfiedMatchers.nonEmpty) { + requiredMetadataMatchers foreach { matcher => + matcher.nearMissInformation.foreach { info => + System.out.println("A matcher had a near miss (it might still get a matching value later!): " + info.replace("\n", "...")) } } + } - // TODO: Yes, this might be slow... I'd advocate for refactoring away from the run-a-wdl style, but (shrug) - // (but in fact, this never really takes 2 minutes. That's just for safety) - Await.result(awaitable = metadataSuccessPromise.future, atMost = 2.minutes.dilated) + // TODO: Yes, this might be slow... I'd advocate for refactoring away from the run-a-wdl style, but (shrug) + // (but in fact, this never really takes 2 minutes. That's just for safety) + Await.result(awaitable = metadataSuccessPromise.future, atMost = 2.minutes.dilated) - system.stop(serviceRegistryActor) - } + system.stop(serviceRegistryActor) + } - "execute a workflow with scatters" in { - val serviceRegistry = mockServiceRegistryActor - val jobStore = system.actorOf(AlwaysHappyJobStoreActor.props) - val callCacheReadActor = system.actorOf(EmptyCallCacheReadActor.props) - - val MockBackendConfigEntry = BackendConfigurationEntry( - name = "Mock", - lifecycleActorFactoryClass = "cromwell.engine.backend.mock.DefaultBackendLifecycleActorFactory", - stubbedConfig - ) - CromwellBackends.initBackends(List(MockBackendConfigEntry)) - - val workflowId = WorkflowId.randomId() - val engineWorkflowDescriptor = createMaterializedEngineWorkflowDescriptor(workflowId, SampleWdl.SimpleScatterWdl.asWorkflowSources(runtime = runtimeSection)) - val workflowExecutionActor = system.actorOf( - WorkflowExecutionActor.props(workflowId, engineWorkflowDescriptor, serviceRegistry, jobStore, - callCacheReadActor, AllBackendInitializationData.empty, restarting = false), - "WorkflowExecutionActor") - - val scatterLog = "Starting calls: scatter0.inside_scatter:0:1, scatter0.inside_scatter:1:1, scatter0.inside_scatter:2:1, scatter0.inside_scatter:3:1, scatter0.inside_scatter:4:1" - - EventFilter.info(pattern = ".*Final Outputs", occurrences = 1).intercept { - EventFilter.info(pattern = scatterLog, occurrences = 1).intercept { - EventFilter.info(pattern = "Starting calls: scatter0.outside_scatter:NA:1", occurrences = 1).intercept { - workflowExecutionActor ! ExecuteWorkflowCommand - } + it should "execute a workflow with scatters" in { + val serviceRegistry = mockServiceRegistryActor + val jobStore = system.actorOf(AlwaysHappyJobStoreActor.props) + val subWorkflowStoreActor = system.actorOf(AlwaysHappySubWorkflowStoreActor.props) + val callCacheReadActor = system.actorOf(EmptyCallCacheReadActor.props) + val callCacheWriteActor = system.actorOf(EmptyCallCacheWriteActor.props) + val dockerHashActor = system.actorOf(EmptyDockerHashActor.props) + val ioActor = system.actorOf(SimpleIoActor.props) + val jobTokenDispenserActor = system.actorOf(JobExecutionTokenDispenserActor.props) + + val MockBackendConfigEntry = BackendConfigurationEntry( + name = MockBackendName, + lifecycleActorFactoryClass = "cromwell.engine.backend.mock.DefaultBackendLifecycleActorFactory", + stubbedConfig + ) + CromwellBackends.initBackends(List(MockBackendConfigEntry)) + + val workflowId = WorkflowId.randomId() + val engineWorkflowDescriptor = createMaterializedEngineWorkflowDescriptor(workflowId, SampleWdl.SimpleScatterWdl.asWorkflowSources(runtime = runtimeSection)) + val workflowExecutionActor = system.actorOf( + WorkflowExecutionActor.props(engineWorkflowDescriptor, ioActor, serviceRegistry, jobStore, subWorkflowStoreActor, + callCacheReadActor, callCacheWriteActor, dockerHashActor, jobTokenDispenserActor, MockBackendSingletonCollection, AllBackendInitializationData.empty, restarting = false), + "WorkflowExecutionActor") + + val scatterLog = "Starting calls: scatter0.inside_scatter:0:1, scatter0.inside_scatter:1:1, scatter0.inside_scatter:2:1, scatter0.inside_scatter:3:1, scatter0.inside_scatter:4:1" + + EventFilter.info(pattern = ".*Final Outputs", occurrences = 1).intercept { + EventFilter.info(pattern = scatterLog, occurrences = 1).intercept { + EventFilter.info(pattern = "Starting calls: scatter0.outside_scatter:NA:1", occurrences = 1).intercept { + workflowExecutionActor ! ExecuteWorkflowCommand } } - system.stop(serviceRegistry) } + system.stop(serviceRegistry) } } diff --git a/engine/src/test/scala/cromwell/engine/workflow/lifecycle/execution/callcaching/CallCacheDiffActorSpec.scala b/engine/src/test/scala/cromwell/engine/workflow/lifecycle/execution/callcaching/CallCacheDiffActorSpec.scala new file mode 100644 index 000000000..801e5bcad --- /dev/null +++ b/engine/src/test/scala/cromwell/engine/workflow/lifecycle/execution/callcaching/CallCacheDiffActorSpec.scala @@ -0,0 +1,267 @@ +package cromwell.engine.workflow.lifecycle.execution.callcaching + +import akka.testkit.{ImplicitSender, TestFSMRef, TestProbe} +import cats.data.NonEmptyList +import cromwell.core.{TestKitSuite, WorkflowId} +import cromwell.engine.workflow.lifecycle.execution.callcaching.CallCacheDiffActor._ +import cromwell.engine.workflow.lifecycle.execution.callcaching.CallCacheDiffQueryParameter.CallCacheDiffQueryCall +import cromwell.services.metadata.MetadataService.{GetMetadataQueryAction, MetadataLookupResponse, MetadataServiceKeyLookupFailed} +import cromwell.services.metadata._ +import org.scalatest.concurrent.Eventually +import org.scalatest.{FlatSpecLike, Matchers} + +class CallCacheDiffActorSpec extends TestKitSuite with FlatSpecLike with Matchers with ImplicitSender with Eventually { + + behavior of "CallCacheDiffActor" + + val workflowIdA = WorkflowId.fromString("971652a6-139c-4ef3-96b5-aeb611a40dbf") + val workflowIdB = WorkflowId.fromString("bb85b3ec-e179-4f12-b90f-5191216da598") + + val callFqnA = "callFqnA" + val callFqnB = "callFqnB" + + val metadataJobKeyA = Option(MetadataJobKey(callFqnA, Option(1), 1)) + val metadataJobKeyB = Option(MetadataJobKey(callFqnB, None, 1)) + + val callA = CallCacheDiffQueryCall(workflowIdA.toString, callFqnA, Option(1)) + val callB = CallCacheDiffQueryCall(workflowIdB.toString, callFqnB, None) + + val queryA = MetadataQuery( + workflowIdA, + Option(MetadataQueryJobKey(callFqnA, Option(1), None)), + None, + Option(NonEmptyList.of("callCaching", "executionStatus")), + None, + expandSubWorkflows = false + ) + + val queryB = MetadataQuery( + workflowIdB, + Option(MetadataQueryJobKey(callFqnB, None, None)), + None, + Option(NonEmptyList.of("callCaching", "executionStatus")), + None, + expandSubWorkflows = false + ) + + val eventsA = List( + MetadataEvent(MetadataKey(workflowIdA, metadataJobKeyA, "executionStatus"), MetadataValue("Done")), + MetadataEvent(MetadataKey(workflowIdA, metadataJobKeyA, "callCaching:allowResultReuse"), MetadataValue(true)), + MetadataEvent(MetadataKey(workflowIdA, metadataJobKeyA, "callCaching:hashes: hash in only in A"), MetadataValue("hello")), + MetadataEvent(MetadataKey(workflowIdA, metadataJobKeyA, "callCaching:hashes: hash in A and B with same value"), MetadataValue(1)), + MetadataEvent(MetadataKey(workflowIdA, metadataJobKeyA, "callCaching:hashes: hash in A and B with different value"), MetadataValue("I'm the hash for A !")) + ) + + val eventsB = List( + MetadataEvent(MetadataKey(workflowIdB, metadataJobKeyB, "executionStatus"), MetadataValue("Failed")), + MetadataEvent(MetadataKey(workflowIdB, metadataJobKeyB, "callCaching:allowResultReuse"), MetadataValue(false)), + MetadataEvent(MetadataKey(workflowIdB, metadataJobKeyB, "callCaching:hashes: hash in only in B"), MetadataValue("hello")), + MetadataEvent(MetadataKey(workflowIdB, metadataJobKeyB, "callCaching:hashes: hash in A and B with same value"), MetadataValue(1)), + MetadataEvent(MetadataKey(workflowIdA, metadataJobKeyA, "callCaching:hashes: hash in A and B with different value"), MetadataValue("I'm the hash for B !")) + ) + + it should "send correct queries to MetadataService when receiving a CallCacheDiffRequest" in { + val mockServiceRegistryActor = TestProbe() + val actor = TestFSMRef(new CallCacheDiffActor(mockServiceRegistryActor.ref)) + + actor ! CallCacheDiffQueryParameter(callA, callB) + + mockServiceRegistryActor.expectMsg(GetMetadataQueryAction(queryA)) + mockServiceRegistryActor.expectMsg(GetMetadataQueryAction(queryB)) + } + + it should "save response for callA and wait for callB" in { + val mockServiceRegistryActor = TestProbe() + val actor = TestFSMRef(new CallCacheDiffActor(mockServiceRegistryActor.ref)) + + actor.setState(WaitingForMetadata, CallCacheDiffWithRequest(queryA, queryB, None, None, self)) + + val response = MetadataLookupResponse(queryA, eventsA) + actor ! response + + eventually { + actor.stateData shouldBe CallCacheDiffWithRequest(queryA, queryB, Some(response), None, self) + actor.stateName shouldBe WaitingForMetadata + } + } + + it should "save response for callB and wait for callA" in { + val mockServiceRegistryActor = TestProbe() + val actor = TestFSMRef(new CallCacheDiffActor(mockServiceRegistryActor.ref)) + + actor.setState(WaitingForMetadata, CallCacheDiffWithRequest(queryA, queryB, None, None, self)) + + val response = MetadataLookupResponse(queryB, eventsB) + actor ! response + + eventually { + actor.stateData shouldBe CallCacheDiffWithRequest(queryA, queryB, None, Some(response), self) + actor.stateName shouldBe WaitingForMetadata + } + } + + it should "build the response when receiving response for A and already has B" in { + val mockServiceRegistryActor = TestProbe() + val actor = TestFSMRef(new CallCacheDiffActor(mockServiceRegistryActor.ref)) + watch(actor) + val responseB = MetadataLookupResponse(queryB, eventsB) + + actor.setState(WaitingForMetadata, CallCacheDiffWithRequest(queryA, queryB, None, Option(responseB), self)) + + actor ! MetadataLookupResponse(queryA, eventsA) + + expectMsgClass(classOf[CallCacheDiffActorResponse]) + expectTerminated(actor) + } + + it should "build the response when receiving response for B and already has A" in { + val mockServiceRegistryActor = TestProbe() + val actor = TestFSMRef(new CallCacheDiffActor(mockServiceRegistryActor.ref)) + watch(actor) + val responseA = MetadataLookupResponse(queryA, eventsA) + + actor.setState(WaitingForMetadata, CallCacheDiffWithRequest(queryA, queryB, Option(responseA), None, self)) + + actor ! MetadataLookupResponse(queryB, eventsB) + + expectMsgClass(classOf[CallCacheDiffActorResponse]) + expectTerminated(actor) + } + + it should "build a correct response" in { + import cromwell.services.metadata.MetadataService.MetadataLookupResponse + import spray.json._ + + val mockServiceRegistryActor = TestProbe() + val actor = TestFSMRef(new CallCacheDiffActor(mockServiceRegistryActor.ref)) + watch(actor) + actor.setState(WaitingForMetadata, CallCacheDiffWithRequest(queryA, queryB, None, None, self)) + + actor ! MetadataLookupResponse(queryB, eventsB) + actor ! MetadataLookupResponse(queryA, eventsA) + + val expectedJson: JsObject = + s""" + |{ + | "callA":{ + | "executionStatus": "Done", + | "allowResultReuse": true, + | "callFqn": "callFqnA", + | "jobIndex": 1, + | "workflowId": "971652a6-139c-4ef3-96b5-aeb611a40dbf" + | }, + | "callB":{ + | "executionStatus": "Failed", + | "allowResultReuse": false, + | "callFqn": "callFqnB", + | "jobIndex": -1, + | "workflowId": "bb85b3ec-e179-4f12-b90f-5191216da598" + | }, + | "hashDifferential":[ + | { + | "hashKey": "hash in only in A", + | "callA":"hello", + | "callB":null + | }, + | { + | "hashKey": "hash in A and B with different value", + | "callA":"I'm the hash for A !", + | "callB":"I'm the hash for B !" + | }, + | { + | "hashKey": "hash in only in B", + | "callA":null, + | "callB":"hello" + | } + | ] + |} + """.stripMargin.parseJson.asJsObject + + val expectedResponse = BuiltCallCacheDiffResponse(expectedJson) + + expectMsg(expectedResponse) + expectTerminated(actor) + } + + it should "fail properly" in { + import scala.concurrent.duration._ + import scala.language.postfixOps + + val mockServiceRegistryActor = TestProbe() + val actor = TestFSMRef(new CallCacheDiffActor(mockServiceRegistryActor.ref)) + watch(actor) + val exception = new Exception("Query lookup failed - but it's ok ! this is a test !") + val responseA = MetadataServiceKeyLookupFailed(queryA, exception) + + actor.setState(WaitingForMetadata, CallCacheDiffWithRequest(queryA, queryB, None, None, self)) + + actor ! responseA + + expectMsgPF(1 second) { + case FailedCallCacheDiffResponse(e: Throwable) => + e.getMessage shouldBe "Query lookup failed - but it's ok ! this is a test !" + } + + expectTerminated(actor) + } + + it should "Respond with an appropriate message if hashes are missing" in { + import scala.concurrent.duration._ + import scala.language.postfixOps + + val mockServiceRegistryActor = TestProbe() + val actor = TestFSMRef(new CallCacheDiffActor(mockServiceRegistryActor.ref)) + watch(actor) + val responseB = MetadataLookupResponse(queryB, eventsB.filterNot(_.key.key.contains("hashes"))) + + actor.setState(WaitingForMetadata, CallCacheDiffWithRequest(queryA, queryB, None, Option(responseB), self)) + + actor ! MetadataLookupResponse(queryA, eventsA.filterNot(_.key.key.contains("hashes"))) + + expectMsgPF(1 second) { + case FailedCallCacheDiffResponse(e) => + e.getMessage shouldBe "callA and callB have not finished yet, or were run on a previous version of Cromwell on which this endpoint was not supported." + } + expectTerminated(actor) + } + + it should "Respond with CachedCallNotFoundException if a call is missing" in { + import scala.concurrent.duration._ + import scala.language.postfixOps + + val mockServiceRegistryActor = TestProbe() + val actor = TestFSMRef(new CallCacheDiffActor(mockServiceRegistryActor.ref)) + watch(actor) + val responseB = MetadataLookupResponse(queryB, eventsB.filterNot(_.key.key.contains("hashes"))) + + actor.setState(WaitingForMetadata, CallCacheDiffWithRequest(queryA, queryB, None, Option(responseB), self)) + + actor ! MetadataLookupResponse(queryA, List.empty) + + expectMsgPF(1 second) { + case FailedCallCacheDiffResponse(e) => + e.getMessage shouldBe "Cannot find call 971652a6-139c-4ef3-96b5-aeb611a40dbf:callFqnA:1" + } + expectTerminated(actor) + } + + it should "Respond with CachedCallNotFoundException if both calls are missing" in { + import scala.concurrent.duration._ + import scala.language.postfixOps + + val mockServiceRegistryActor = TestProbe() + val actor = TestFSMRef(new CallCacheDiffActor(mockServiceRegistryActor.ref)) + watch(actor) + val responseB = MetadataLookupResponse(queryB, List.empty) + + actor.setState(WaitingForMetadata, CallCacheDiffWithRequest(queryA, queryB, None, Option(responseB), self)) + + actor ! MetadataLookupResponse(queryA, List.empty) + + expectMsgPF(1 second) { + case FailedCallCacheDiffResponse(e) => + e.getMessage shouldBe "Cannot find calls 971652a6-139c-4ef3-96b5-aeb611a40dbf:callFqnA:1, bb85b3ec-e179-4f12-b90f-5191216da598:callFqnB:-1" + } + expectTerminated(actor) + } +} diff --git a/engine/src/test/scala/cromwell/engine/workflow/lifecycle/execution/callcaching/CallCacheHashingJobActorDataSpec.scala b/engine/src/test/scala/cromwell/engine/workflow/lifecycle/execution/callcaching/CallCacheHashingJobActorDataSpec.scala new file mode 100644 index 000000000..1f1c874f1 --- /dev/null +++ b/engine/src/test/scala/cromwell/engine/workflow/lifecycle/execution/callcaching/CallCacheHashingJobActorDataSpec.scala @@ -0,0 +1,92 @@ +package cromwell.engine.workflow.lifecycle.execution.callcaching + +import cats.data.NonEmptyList +import cromwell.backend._ +import cromwell.backend.standard.callcaching.StandardFileHashingActor.SingleFileHashRequest +import cromwell.core.TestKitSuite +import cromwell.core.callcaching._ +import cromwell.engine.workflow.lifecycle.execution.callcaching.CallCacheHashingJobActor.{CallCacheHashingJobActorData, CompleteFileHashingResult, NoFileHashesResult, PartialFileHashingResult} +import org.scalatest.concurrent.Eventually +import org.scalatest.prop.TableDrivenPropertyChecks +import org.scalatest.{FlatSpecLike, Matchers} + +class CallCacheHashingJobActorDataSpec extends TestKitSuite with FlatSpecLike with BackendSpec with Matchers with Eventually with TableDrivenPropertyChecks { + behavior of "CallCacheReadingJobActorData" + + val fileHash1 = HashResult(HashKey("key"), HashValue("value")) + val fileHash2 = HashResult(HashKey("key2"), HashValue("value2")) + val fileHash3 = HashResult(HashKey("key3"), HashValue("value3")) + val fileHashRequest1 = SingleFileHashRequest(null, fileHash1.hashKey, null, null) + val fileHashRequest2 = SingleFileHashRequest(null, fileHash2.hashKey, null, null) + val fileHashRequest3 = SingleFileHashRequest(null, fileHash3.hashKey, null, null) + + val testCases = Table( + ("dataBefore", "dataAfter", "result"), + // No fileHashRequestsRemaining + ( + CallCacheHashingJobActorData( + List.empty, List.empty, None + ), + CallCacheHashingJobActorData( + List.empty, List(fileHash1), None + ), + Option(NoFileHashesResult) + ), + // Last fileHashRequestsRemaining + ( + CallCacheHashingJobActorData( + List(List(fileHashRequest1)), List.empty, None + ), + CallCacheHashingJobActorData( + List.empty, List(fileHash1), None + ), + Option(CompleteFileHashingResult(Set(fileHash1), "6A02F950958AEDA3DBBF83FBB306A030")) + ), + // Last batch and not last value + ( + CallCacheHashingJobActorData( + List(List(fileHashRequest1, fileHashRequest2)), List.empty, None + ), + CallCacheHashingJobActorData( + List(List(fileHashRequest2)), List(fileHash1), None + ), + None + ), + // Not last batch but last value of this batch + ( + CallCacheHashingJobActorData( + List(List(fileHashRequest1), List(fileHashRequest2)), List.empty, None + ), + CallCacheHashingJobActorData( + List(List(fileHashRequest2)), List(fileHash1), None + ), + Option(PartialFileHashingResult(NonEmptyList.of(fileHash1))) + ), + // Not last batch and not last value of this batch + ( + CallCacheHashingJobActorData( + List(List(fileHashRequest1, fileHashRequest2), List(fileHashRequest3)), List.empty, None + ), + CallCacheHashingJobActorData( + List(List(fileHashRequest2), List(fileHashRequest3)), List(fileHash1), None + ), + None + ), + // Makes sure new hash is added at the front of the list + ( + CallCacheHashingJobActorData( + List(List(fileHashRequest1, fileHashRequest2), List(fileHashRequest3)), List(fileHash2), None + ), + CallCacheHashingJobActorData( + List(List(fileHashRequest2), List(fileHashRequest3)), List(fileHash1, fileHash2), None + ), + None + ) + ) + + it should "process new file hashes" in { + forAll(testCases) { case ((oldData, newData, result)) => + oldData.withFileHash(fileHash1) shouldBe (newData -> result) + } + } +} diff --git a/engine/src/test/scala/cromwell/engine/workflow/lifecycle/execution/callcaching/CallCacheHashingJobActorSpec.scala b/engine/src/test/scala/cromwell/engine/workflow/lifecycle/execution/callcaching/CallCacheHashingJobActorSpec.scala new file mode 100644 index 000000000..a3ea45458 --- /dev/null +++ b/engine/src/test/scala/cromwell/engine/workflow/lifecycle/execution/callcaching/CallCacheHashingJobActorSpec.scala @@ -0,0 +1,313 @@ +package cromwell.engine.workflow.lifecycle.execution.callcaching + +import akka.actor.{ActorRef, Props} +import akka.testkit.{TestFSMRef, TestProbe} +import cats.data.NonEmptyList +import cromwell.backend._ +import cromwell.backend.standard.callcaching.StandardFileHashingActor.{FileHashResponse, SingleFileHashRequest} +import cromwell.core.callcaching.{HashingFailedMessage, _} +import cromwell.core.{LocallyQualifiedName, TestKitSuite} +import cromwell.engine.workflow.lifecycle.execution.callcaching.CallCacheHashingJobActor.{CCHJAFileHashResponse, CallCacheHashingJobActorData, CompleteFileHashingResult, HashingFiles, InitialHashingResult, NextBatchOfFileHashesRequest, NoFileHashesResult, PartialFileHashingResult, WaitingForHashFileRequest} +import cromwell.engine.workflow.lifecycle.execution.callcaching.EngineJobHashingActor.CacheMiss +import org.mockito.Mockito._ +import org.scalatest.concurrent.Eventually +import org.scalatest.prop.TableDrivenPropertyChecks +import org.scalatest.{FlatSpecLike, Matchers} +import wdl4s.wdl.values.{WdlFile, WdlInteger, WdlString, WdlValue} +import wdl4s.wdl.{WdlTask, WdlTaskCall} + +class CallCacheHashingJobActorSpec extends TestKitSuite with FlatSpecLike with BackendSpec with Matchers with Eventually with TableDrivenPropertyChecks { + behavior of "CallCacheReadingJobActor" + + def templateJobDescriptor(inputs: Map[LocallyQualifiedName, WdlValue] = Map.empty) = { + val task = mock[WdlTask] + val call = mock[WdlTaskCall] + when(task.commandTemplateString).thenReturn("Do the stuff... now!!") + when(task.outputs).thenReturn(List.empty) + when(call.task).thenReturn(task) + val workflowDescriptor = mock[BackendWorkflowDescriptor] + val runtimeAttributes = Map( + "cpu" -> WdlInteger(1), + "memory" -> WdlString("3 GB"), + "continueOnReturnCode" -> WdlInteger(0), + "docker" -> WdlString("ubuntu:latest") + ) + val jobDescriptor = BackendJobDescriptor(workflowDescriptor, BackendJobDescriptorKey(call, None, 1), runtimeAttributes, fqnMapToDeclarationMap(inputs), NoDocker, Map.empty) + jobDescriptor + } + + it should "die immediately if created without cache read actor and write to cache turned off" in { + val parent = TestProbe() + val testActor = TestFSMRef(new CallCacheHashingJobActor( + templateJobDescriptor(), + None, + None, + Set.empty, + "backedName", + Props.empty, + false, + DockerWithHash("ubuntu@sha256:blablablba") + ), parent.ref) + watch(testActor) + expectTerminated(testActor) + parent.expectMsgClass(classOf[InitialHashingResult]) + parent.expectMsg(CacheMiss) + } + + it should "send a correct InitialHashingResult upon starting" in { + val parent = TestProbe() + val inputs = Map( + "stringInput" -> WdlString("hello"), + "fileInput" -> WdlFile("world") + ) + // Do not include "memory" on purpose, even though it's in the map of runtime attributes. + // This way we can verify that only attributes with a RuntimeAttributeDefinition are used for hashing + // Vice versa include a "failOnStderr" definition even though it's not in the map. + // This ensures that we still record the fact that there was no failOnStderr attribute with a "N/A" value + val runtimeAttributeDefinitions = Set( + RuntimeAttributeDefinition("docker", None, usedInCallCaching = true), + RuntimeAttributeDefinition("failOnStderr", None, usedInCallCaching = true), + RuntimeAttributeDefinition("continueOnReturnCode", Option(WdlInteger(0)), usedInCallCaching = true), + RuntimeAttributeDefinition("cpu", None, usedInCallCaching = false) + ) + val callCacheRead = TestProbe() + val jobDescriptor: BackendJobDescriptor = templateJobDescriptor(inputs) + val actorUnderTest = TestFSMRef(new CallCacheHashingJobActor( + jobDescriptor, + Option(callCacheRead.ref), + None, + runtimeAttributeDefinitions, + "backedName", + Props.empty, + true, + DockerWithHash("ubuntu@sha256:blablablba") + ), parent.ref) + + val expectedInitialHashes = Set( + // md5 of Do the stuff... now + HashResult(HashKey("command template"), HashValue("2259B15D9120F50C1BD4B2A3E2CE5A0E")), + // md5 of backendName + HashResult(HashKey("backend name"), HashValue("DC3D1A5AB4B8064660ADE07FFDECBFFE")), + // md5 of 2 + HashResult(HashKey("input count"), HashValue("C81E728D9D4C2F636F067F89CC14862C")), + // md5 of 0 + HashResult(HashKey("output count"), HashValue("CFCD208495D565EF66E7DFF9F98764DA")), + HashResult(HashKey("runtime attribute", "failOnStderr"), HashValue("N/A")), + // md5 of 1 + HashResult(HashKey(checkForHitOrMiss = false, "runtime attribute", "cpu"), HashValue("C4CA4238A0B923820DCC509A6F75849B")), + // md5 of 0 + HashResult(HashKey("runtime attribute", "continueOnReturnCode"), HashValue("CFCD208495D565EF66E7DFF9F98764DA")), + // md5 of "hello" (with quotes) + HashResult(HashKey("input", "String stringInput"), HashValue("5DEAEE1C1332199E5B5BC7C5E4F7F0C2")), + // md5 of ubuntu@sha256:blablablba - make sure we use the dockerWithHash and not the docker runtime attribute + HashResult(HashKey("runtime attribute", "docker"), HashValue("C811916EA68009B0EFE0A3A86D73280E")) + ) + val expectedAggregatedInitialHash = "F1A7118BED69B5A976A17C83FBA0D9D6" + val expectedInitialHashResult = InitialHashingResult(expectedInitialHashes, expectedAggregatedInitialHash) + parent.expectMsg(expectedInitialHashResult) + callCacheRead.expectMsg(expectedInitialHashResult) + actorUnderTest.stateName shouldBe WaitingForHashFileRequest + actorUnderTest.stateData shouldBe CallCacheHashingJobActorData( + List(SingleFileHashRequest(jobDescriptor.key, HashKey("input", "File fileInput"), WdlFile("world"), None)), + Option(callCacheRead.ref) + ) + } + + def makeCCHJA(callCacheReader: Option[ActorRef], + testFileHashingActor: ActorRef, + parent: ActorRef, + writeToCache: Boolean = true, + addFileHashMockResult: Option[(CallCacheHashingJobActorData, Option[CCHJAFileHashResponse])] = None) = { + TestFSMRef(new CallCacheHashingJobActor( + templateJobDescriptor(), + callCacheReader, + None, + Set.empty, + "backend", + Props.empty, + writeToCache = writeToCache, + DockerWithHash("ubuntu@256:blablabla") + ) { + override def makeFileHashingActor() = testFileHashingActor + override def addFileHash(hashResult: HashResult, data: CallCacheHashingJobActorData) = { + addFileHashMockResult.getOrElse(super.addFileHash(hashResult, data)) + } + }, parent) + } + + it should "send hash file requests when receiving a NextBatchOfFileHashesRequest" in { + val callCacheReadProbe = TestProbe() + val fileHashingActor = TestProbe() + + val cchja = makeCCHJA(Option(callCacheReadProbe.ref), fileHashingActor.ref, TestProbe().ref) + + val fileHashRequest1 = SingleFileHashRequest(null, null, null, null) + val fileHashRequest2 = SingleFileHashRequest(null, null, null, null) + cchja.setState( + WaitingForHashFileRequest, + CallCacheHashingJobActorData(List(List(fileHashRequest1, fileHashRequest2)), List.empty, None) + ) + + cchja ! NextBatchOfFileHashesRequest + + fileHashingActor.expectMsg(fileHashRequest1) + fileHashingActor.expectMsg(fileHashRequest2) + cchja.stateName shouldBe HashingFiles + } + + it should "send NoFileHashesResult and stop if there are no input files" in { + val parent = TestProbe() + val callCacheReadProbe = TestProbe() + + val cchja = makeCCHJA(Option(callCacheReadProbe.ref), TestProbe().ref, parent.ref) + parent.watch(cchja) + + cchja.setState( + WaitingForHashFileRequest, + CallCacheHashingJobActorData(List.empty, List.empty, Option(callCacheReadProbe.ref)) + ) + + cchja ! NextBatchOfFileHashesRequest + + callCacheReadProbe.expectMsgClass(classOf[InitialHashingResult]) + parent.expectMsgClass(classOf[InitialHashingResult]) + + callCacheReadProbe.expectMsg(NoFileHashesResult) + parent.expectMsg(NoFileHashesResult) + parent.expectTerminated(cchja) + } + + it should "send the PartialFileHashingResult when a batch is complete" in { + val callCacheReadProbe = TestProbe() + val hashResults = NonEmptyList.of(mock[HashResult]) + + val result: PartialFileHashingResult = PartialFileHashingResult(hashResults) + val newData: CallCacheHashingJobActorData = CallCacheHashingJobActorData(List.empty, List.empty, Option(callCacheReadProbe.ref)) + + val cchja = makeCCHJA(Option(callCacheReadProbe.ref), TestProbe().ref, TestProbe().ref, writeToCache = true, Option(newData -> Option(result))) + + cchja.setState(HashingFiles) + + cchja ! FileHashResponse(mock[HashResult]) + + callCacheReadProbe.expectMsgClass(classOf[InitialHashingResult]) + callCacheReadProbe.expectMsg(result) + cchja.stateName shouldBe WaitingForHashFileRequest + cchja.stateData shouldBe newData + } + + it should "send itself a NextBatchOfFileHashesRequest when a batch is complete and there is no CCReader" in { + val fileHashingActor = TestProbe() + val result: PartialFileHashingResult = PartialFileHashingResult(NonEmptyList.of(mock[HashResult])) + val fileHashRequest = SingleFileHashRequest(null, null, null, null) + val newData = CallCacheHashingJobActorData(List(List(fileHashRequest)), List.empty, None) + // still gives a CCReader when instantiating the actor, but not in the data (above) + // This ensures the check is done with the data and not the actor attribute, as the data will change if the ccreader dies but the actor attribute + // will stay Some(...) + val cchja = makeCCHJA(Option(TestProbe().ref), fileHashingActor.ref, TestProbe().ref, writeToCache = true, Option(newData -> Option(result))) + watch(cchja) + cchja.setState(HashingFiles) + + cchja ! FileHashResponse(mock[HashResult]) + + // This proves that the ccjha keeps hashing files even though there is no ccreader requesting more hashes + fileHashingActor.expectMsg(fileHashRequest) + cchja.stateName shouldBe HashingFiles + } + + it should "send FinalFileHashingResult to parent and CCReader and die" in { + val parent = TestProbe() + val callCacheReadProbe = TestProbe() + List(CompleteFileHashingResult(Set(mock[HashResult]), "AggregatedFileHash"), NoFileHashesResult) foreach { result => + val newData = CallCacheHashingJobActorData(List.empty, List.empty, Option(callCacheReadProbe.ref)) + val cchja = makeCCHJA(Option(callCacheReadProbe.ref), TestProbe().ref, parent.ref, writeToCache = true, Option(newData -> Option(result))) + + parent.expectMsgClass(classOf[InitialHashingResult]) + callCacheReadProbe.expectMsgClass(classOf[InitialHashingResult]) + + parent.watch(cchja) + cchja.setState(HashingFiles) + + cchja ! FileHashResponse(mock[HashResult]) + + callCacheReadProbe.expectMsg(result) + parent.expectMsg(result) + parent.expectTerminated(cchja) + } + } + + it should "wait for next file hash if the batch is not complete yet" in { + val callCacheReadProbe = TestProbe() + val parent = TestProbe() + val newData: CallCacheHashingJobActorData = CallCacheHashingJobActorData(List.empty, List.empty, Option(callCacheReadProbe.ref)) + val cchja = makeCCHJA(Option(callCacheReadProbe.ref), TestProbe().ref, parent.ref, writeToCache = true, Option(newData -> None)) + + parent.expectMsgClass(classOf[InitialHashingResult]) + callCacheReadProbe.expectMsgClass(classOf[InitialHashingResult]) + + cchja.setState(HashingFiles) + + cchja ! FileHashResponse(mock[HashResult]) + + callCacheReadProbe.expectNoMsg() + parent.expectNoMsg() + cchja.stateName shouldBe HashingFiles + } + + it should "stop if the read actor dies and writeToCache is off" in { + val callCacheReadProbe = TestProbe() + val parent = TestProbe() + val cchja = makeCCHJA(Option(callCacheReadProbe.ref), TestProbe().ref, parent.ref, writeToCache = false) + parent.watch(cchja) + system stop callCacheReadProbe.ref + parent.expectMsgClass(classOf[InitialHashingResult]) + parent.expectTerminated(cchja) + } + + it should "keep going if the read actor dies and writeToCache is on" in { + val callCacheReadProbe = TestProbe() + val parent = TestProbe() + val fileHasher = TestProbe() + val cchja = makeCCHJA(Option(callCacheReadProbe.ref), fileHasher.ref, parent.ref) + parent.expectMsgClass(classOf[InitialHashingResult]) + + val hashKey = HashKey("file") + val fileHashRequest: SingleFileHashRequest = SingleFileHashRequest(null, hashKey, null, null) + val data = CallCacheHashingJobActorData(List(List(fileHashRequest)), List.empty, Option(callCacheReadProbe.ref)) + + cchja.setState(WaitingForHashFileRequest, data) + + system stop callCacheReadProbe.ref + fileHasher.expectMsg(fileHashRequest) + val result: HashResult = HashResult(hashKey, HashValue("fileHash")) + fileHasher.reply(FileHashResponse(result)) + + parent.expectMsg(CompleteFileHashingResult(Set(result), "45F27DD26834DBACBB05BBB1D651F5D1")) + } + + it should "propagate HashingFailedMessage errors and die" in { + val callCacheReadProbe = TestProbe() + val parent = TestProbe() + val cchja = makeCCHJA(Option(callCacheReadProbe.ref), TestProbe().ref, parent.ref) + parent.watch(cchja) + cchja.setState(WaitingForHashFileRequest) + parent.expectMsgClass(classOf[InitialHashingResult]) + callCacheReadProbe.expectMsgClass(classOf[InitialHashingResult]) + + val hashFailed = HashingFailedMessage("fileName", new Exception("Hashing failed ! - part of test flow")) + cchja ! hashFailed + parent.expectMsg(hashFailed) + callCacheReadProbe.expectMsg(hashFailed) + parent.expectTerminated(cchja) + } + + it should "run properly when writeToCache is ON and there is no CCRead actor" in { + val parent = TestProbe() + val cchja = makeCCHJA(None, TestProbe().ref, parent.ref) + parent.watch(cchja) + + parent.expectMsgClass(classOf[InitialHashingResult]) + parent.expectMsg(NoFileHashesResult) + parent.expectTerminated(cchja) + } +} diff --git a/engine/src/test/scala/cromwell/engine/workflow/lifecycle/execution/callcaching/CallCacheReadingJobActorSpec.scala b/engine/src/test/scala/cromwell/engine/workflow/lifecycle/execution/callcaching/CallCacheReadingJobActorSpec.scala new file mode 100644 index 000000000..195e8a802 --- /dev/null +++ b/engine/src/test/scala/cromwell/engine/workflow/lifecycle/execution/callcaching/CallCacheReadingJobActorSpec.scala @@ -0,0 +1,219 @@ +package cromwell.engine.workflow.lifecycle.execution.callcaching + +import akka.testkit.{TestFSMRef, TestProbe} +import cats.data.NonEmptyList +import cromwell.core.TestKitSuite +import cromwell.core.callcaching.{HashKey, HashResult, HashValue, HashingFailedMessage} +import cromwell.engine.workflow.lifecycle.execution.callcaching.CallCacheHashingJobActor.{CompleteFileHashingResult, InitialHashingResult, NextBatchOfFileHashesRequest, NoFileHashesResult, PartialFileHashingResult} +import cromwell.engine.workflow.lifecycle.execution.callcaching.CallCacheReadActor._ +import cromwell.engine.workflow.lifecycle.execution.callcaching.CallCacheReadingJobActor.{CCRJAWithData, WaitingForCacheHitOrMiss, _} +import cromwell.engine.workflow.lifecycle.execution.callcaching.EngineJobHashingActor.{CacheHit, CacheMiss, HashError} +import org.scalatest.concurrent.Eventually +import org.scalatest.{FlatSpecLike, Matchers} + +class CallCacheReadingJobActorSpec extends TestKitSuite with FlatSpecLike with Matchers with Eventually { + behavior of "CallCacheReadingJobActor" + + it should "try to match initial hashes against DB" in { + val callCacheReadProbe = TestProbe() + val callCacheHasingActor = TestProbe() + val actorUnderTest = TestFSMRef(new CallCacheReadingJobActor(callCacheReadProbe.ref)) + actorUnderTest.stateName shouldBe WaitingForInitialHash + + // The actual hashes don't matter here, we only care about the aggregated hash + val aggregatedInitialhash: String = "AggregatedInitialHash" + callCacheHasingActor.send(actorUnderTest, InitialHashingResult(Set.empty, aggregatedInitialhash)) + callCacheReadProbe.expectMsg(HasMatchingInitialHashLookup(aggregatedInitialhash)) + eventually { + actorUnderTest.stateName shouldBe WaitingForHashCheck + actorUnderTest.stateData shouldBe CCRJAWithData(callCacheHasingActor.ref, aggregatedInitialhash, None, 1) + } + } + + it should "ask for file hashes if it found matching entries for initial aggregated hash" in { + val callCacheReadProbe = TestProbe() + val callCacheHashingActor = TestProbe() + val actorUnderTest = TestFSMRef(new CallCacheReadingJobActor(callCacheReadProbe.ref)) + actorUnderTest.setState(WaitingForHashCheck, CCRJAWithData(callCacheHashingActor.ref, "AggregatedInitialHash", None, 1)) + + callCacheReadProbe.send(actorUnderTest, HasMatchingEntries) + callCacheHashingActor.expectMsg(NextBatchOfFileHashesRequest) + eventually { + actorUnderTest.stateName shouldBe WaitingForFileHashes + } + } + + it should "cache miss and die if it didn't find any matching entries for initial aggregated hash" in { + val callCacheReadProbe = TestProbe() + val callCacheHashingActor = TestProbe() + val parent = TestProbe() + + val actorUnderTest = TestFSMRef(new CallCacheReadingJobActor(callCacheReadProbe.ref), parent.ref) + parent.watch(actorUnderTest) + + actorUnderTest.setState(WaitingForHashCheck, CCRJAWithData(callCacheHashingActor.ref, "AggregatedInitialHash", None, 1)) + + callCacheReadProbe.send(actorUnderTest, NoMatchingEntries) + parent.expectMsg(CacheMiss) + parent.expectTerminated(actorUnderTest) + } + + it should "try to match partial file hashes against DB" in { + val callCacheReadProbe = TestProbe() + val callCacheHashingActor = TestProbe() + + val actorUnderTest = TestFSMRef(new CallCacheReadingJobActor(callCacheReadProbe.ref), TestProbe().ref) + + actorUnderTest.setState(WaitingForFileHashes) + + val fileHashes = NonEmptyList.of(HashResult(HashKey("f1"), HashValue("h1")), HashResult(HashKey("f2"), HashValue("h2"))) + callCacheHashingActor.send(actorUnderTest, PartialFileHashingResult(fileHashes)) + callCacheReadProbe.expectMsg(HasMatchingInputFilesHashLookup(fileHashes)) + + eventually { + actorUnderTest.stateName shouldBe WaitingForHashCheck + } + } + + it should "ask for matching cache entries for both aggregated hashes when got both" in { + val callCacheReadProbe = TestProbe() + val callCacheHashingActor = TestProbe() + + val actorUnderTest = TestFSMRef(new CallCacheReadingJobActor(callCacheReadProbe.ref), TestProbe().ref) + + val aggregatedInitialHash: String = "AggregatedInitialHash" + val aggregatedFileHash: String = "AggregatedFileHash" + actorUnderTest.setState(WaitingForFileHashes, CCRJAWithData(callCacheHashingActor.ref, aggregatedInitialHash, None, 1)) + + val fileHashes = Set(HashResult(HashKey("f1"), HashValue("h1")), HashResult(HashKey("f2"), HashValue("h2"))) + callCacheHashingActor.send(actorUnderTest, CompleteFileHashingResult(fileHashes, aggregatedFileHash)) + callCacheReadProbe.expectMsg(CacheLookupRequest(AggregatedCallHashes(aggregatedInitialHash, aggregatedFileHash), 1)) + + eventually { + actorUnderTest.stateName shouldBe WaitingForCacheHitOrMiss + actorUnderTest.stateData shouldBe CCRJAWithData(callCacheHashingActor.ref, aggregatedInitialHash, Some(aggregatedFileHash), 1) + } + } + + it should "ask for matching cache entries for initial hashes when there is no file input" in { + val callCacheReadProbe = TestProbe() + val callCacheHashingActor = TestProbe() + + val actorUnderTest = TestFSMRef(new CallCacheReadingJobActor(callCacheReadProbe.ref), TestProbe().ref) + + val aggregatedInitialHash: String = "AggregatedInitialHash" + actorUnderTest.setState(WaitingForFileHashes, CCRJAWithData(callCacheHashingActor.ref, aggregatedInitialHash, None, 1)) + + callCacheHashingActor.send(actorUnderTest, NoFileHashesResult) + callCacheReadProbe.expectMsg(CacheLookupRequest(AggregatedCallHashes(aggregatedInitialHash, None), 1)) + + eventually { + actorUnderTest.stateName shouldBe WaitingForCacheHitOrMiss + actorUnderTest.stateData shouldBe CCRJAWithData(callCacheHashingActor.ref, aggregatedInitialHash, None, 1) + } + } + + it should "reply with next hit when cache hit is successful" in { + val callCacheReadProbe = TestProbe() + val callCacheHashingActor = TestProbe() + val parent = TestProbe() + + val actorUnderTest = TestFSMRef(new CallCacheReadingJobActor(callCacheReadProbe.ref), parent.ref) + + val aggregatedInitialHash: String = "AggregatedInitialHash" + actorUnderTest.setState(WaitingForCacheHitOrMiss, CCRJAWithData(callCacheHashingActor.ref, aggregatedInitialHash, None, 1)) + + val id: CallCachingEntryId = CallCachingEntryId(8) + callCacheReadProbe.send(actorUnderTest, CacheLookupNextHit(id)) + parent.expectMsg(CacheHit(id)) + + eventually { + actorUnderTest.stateName shouldBe WaitingForCacheHitOrMiss + actorUnderTest.stateData shouldBe CCRJAWithData(callCacheHashingActor.ref, aggregatedInitialHash, None, 2) + } + } + + it should "reply with cache miss if there's no hit" in { + val callCacheReadProbe = TestProbe() + val callCacheHashingActor = TestProbe() + val parent = TestProbe() + + val actorUnderTest = TestFSMRef(new CallCacheReadingJobActor(callCacheReadProbe.ref), parent.ref) + parent.watch(actorUnderTest) + + val aggregatedInitialHash: String = "AggregatedInitialHash" + actorUnderTest.setState(WaitingForCacheHitOrMiss, CCRJAWithData(callCacheHashingActor.ref, aggregatedInitialHash, None, 1)) + + callCacheReadProbe.send(actorUnderTest, CacheLookupNoHit) + parent.expectMsg(CacheMiss) + + parent.expectTerminated(actorUnderTest) + } + + it should "ask callCacheReadActor for next hit when requested (initial hash only)" in { + val callCacheReadProbe = TestProbe() + val callCacheHashingActor = TestProbe() + + val actorUnderTest = TestFSMRef(new CallCacheReadingJobActor(callCacheReadProbe.ref), TestProbe().ref) + + val aggregatedInitialHash: String = "AggregatedInitialHash" + actorUnderTest.setState(WaitingForCacheHitOrMiss, CCRJAWithData(callCacheHashingActor.ref, aggregatedInitialHash, None, 2)) + + actorUnderTest ! NextHit + callCacheReadProbe.expectMsg(CacheLookupRequest(AggregatedCallHashes(aggregatedInitialHash, None), 2)) + + actorUnderTest.stateName shouldBe WaitingForCacheHitOrMiss + } + + it should "ask callCacheReadActor for next hit when requested (with file hash)" in { + val callCacheReadProbe = TestProbe() + val callCacheHashingActor = TestProbe() + + val actorUnderTest = TestFSMRef(new CallCacheReadingJobActor(callCacheReadProbe.ref), TestProbe().ref) + + val aggregatedInitialHash: String = "AggregatedInitialHash" + val aggregatedFileHash: String = "AggregatedFileHash" + actorUnderTest.setState(WaitingForCacheHitOrMiss, CCRJAWithData(callCacheHashingActor.ref, aggregatedInitialHash, Option(aggregatedFileHash), 2)) + + actorUnderTest ! NextHit + callCacheReadProbe.expectMsg(CacheLookupRequest(AggregatedCallHashes(aggregatedInitialHash, Option(aggregatedFileHash)), 2)) + + actorUnderTest.stateName shouldBe WaitingForCacheHitOrMiss + } + + it should "reply with cache miss if there's a hash failure" in { + val callCacheReadProbe = TestProbe() + val callCacheHashingActor = TestProbe() + val parent = TestProbe() + + val actorUnderTest = TestFSMRef(new CallCacheReadingJobActor(callCacheReadProbe.ref), parent.ref) + parent.watch(actorUnderTest) + + val aggregatedInitialHash: String = "AggregatedInitialHash" + actorUnderTest.setState(WaitingForCacheHitOrMiss, CCRJAWithData(callCacheHashingActor.ref, aggregatedInitialHash, None, 1)) + + callCacheHashingActor.send(actorUnderTest, HashingFailedMessage("file", new Exception("Hashing failed"))) + parent.expectMsg(CacheMiss) + + parent.expectTerminated(actorUnderTest) + } + + it should "reply with cache miss if there's a lookup failure" in { + val callCacheReadProbe = TestProbe() + val callCacheHashingActor = TestProbe() + val parent = TestProbe() + + val actorUnderTest = TestFSMRef(new CallCacheReadingJobActor(callCacheReadProbe.ref), parent.ref) + parent.watch(actorUnderTest) + + val aggregatedInitialHash: String = "AggregatedInitialHash" + actorUnderTest.setState(WaitingForCacheHitOrMiss, CCRJAWithData(callCacheHashingActor.ref, aggregatedInitialHash, None, 1)) + + val reason: Exception = new Exception("Lookup failed") + callCacheHashingActor.send(actorUnderTest, CacheResultLookupFailure(reason)) + parent.expectMsg(HashError(reason)) + parent.expectMsg(CacheMiss) + + parent.expectTerminated(actorUnderTest) + } +} diff --git a/engine/src/test/scala/cromwell/engine/workflow/lifecycle/execution/callcaching/CallCachingSlickDatabaseSpec.scala b/engine/src/test/scala/cromwell/engine/workflow/lifecycle/execution/callcaching/CallCachingSlickDatabaseSpec.scala new file mode 100644 index 000000000..72ea41700 --- /dev/null +++ b/engine/src/test/scala/cromwell/engine/workflow/lifecycle/execution/callcaching/CallCachingSlickDatabaseSpec.scala @@ -0,0 +1,82 @@ +package cromwell.engine.workflow.lifecycle.execution.callcaching + +import cats.data.NonEmptyList +import com.typesafe.config.ConfigFactory +import cromwell.core.Tags.DbmsTest +import cromwell.core.WorkflowId +import cromwell.database.slick.SlickDatabase +import cromwell.database.sql.joins.CallCachingJoin +import cromwell.database.sql.tables.{CallCachingAggregationEntry, CallCachingEntry, CallCachingHashEntry} +import cromwell.services.ServicesStore +import org.scalatest.concurrent.ScalaFutures +import org.scalatest.time.{Millis, Seconds, Span} +import org.scalatest.{BeforeAndAfterAll, FlatSpec, Matchers} +import org.specs2.mock.Mockito + +import scala.concurrent.ExecutionContext + +class CallCachingSlickDatabaseSpec extends FlatSpec with Matchers with ScalaFutures with BeforeAndAfterAll with Mockito { + + implicit val ec = ExecutionContext.global + implicit val defaultPatience = PatienceConfig(scaled(Span(5, Seconds)), scaled(Span(100, Millis))) + + "SlickDatabase (hsqldb)" should behave like testWith("database") + + "SlickDatabase (mysql)" should behave like testWith("database-test-mysql") + + def testWith(configPath: String): Unit = { + import ServicesStore.EnhancedSqlDatabase + + lazy val databaseConfig = ConfigFactory.load.getConfig(configPath) + lazy val dataAccess = new SlickDatabase(databaseConfig).initialized + + val callCachingEntryA = CallCachingEntry( + WorkflowId.randomId().toString, + "AwesomeWorkflow.GoodJob", + 1, + None, + None, + allowResultReuse = false + ) + + val callCachingHashEntriesA = Seq( + CallCachingHashEntry( + hashKey = "input: String s1", + hashValue = "HASH_S1" + ), + CallCachingHashEntry( + hashKey = "input: String s2", + hashValue = "HASH_S2" + ), + CallCachingHashEntry( + hashKey = "input: String s4", + hashValue = "HASH_S4" + ) + ) + + val aggregation = Option(CallCachingAggregationEntry("BASE_AGGREGATION", Option("FILE_AGGREGATION"))) + + it should "honor allowResultReuse" taggedAs DbmsTest in { + (for { + _ <- dataAccess.addCallCaching(Seq( + CallCachingJoin( + callCachingEntryA, + callCachingHashEntriesA, + aggregation, + Seq.empty, Seq.empty + ) + ), + 100 + ) + hasBaseAggregation <- dataAccess.hasMatchingCallCachingEntriesForBaseAggregation("BASE_AGGREGATION") + _ = hasBaseAggregation shouldBe false + hasHashPairMatch <- dataAccess.hasMatchingCallCachingEntriesForHashKeyValues( + NonEmptyList.of("input: String s1" -> "HASH_S1") + ) + _ = hasHashPairMatch shouldBe false + hit <- dataAccess.findCacheHitForAggregation("BASE_AGGREGATION", Option("FILE_AGGREGATION"), 1) + _ = hit shouldBe empty + } yield ()).futureValue + } + } +} diff --git a/engine/src/test/scala/cromwell/engine/workflow/lifecycle/execution/callcaching/EJHADataSpec.scala b/engine/src/test/scala/cromwell/engine/workflow/lifecycle/execution/callcaching/EJHADataSpec.scala deleted file mode 100644 index 45b996018..000000000 --- a/engine/src/test/scala/cromwell/engine/workflow/lifecycle/execution/callcaching/EJHADataSpec.scala +++ /dev/null @@ -1,96 +0,0 @@ -package cromwell.engine.workflow.lifecycle.execution.callcaching - -import cromwell.core.callcaching._ -import cromwell.engine.workflow.lifecycle.execution.callcaching.CallCacheReadActor.CacheResultMatchesForHashes -import org.scalatest.{FlatSpec, Matchers} - -class EJHADataSpec extends FlatSpec with Matchers { - - behavior of "EJHA data" - - val hashKey1 = HashKey("Peter Piper picked a peck of pickled peppers") - val hashKey2 = HashKey("I saw Susie sitting in a shoe shine shop: Where she sits she shines, and where she shines she sits") - val hashKey3 = HashKey("How many boards could the Mongols hoard if the Mongol hordes got bored?") - val hashKey4 = HashKey("The sixth sick Sikh's sixth sheep is sick.") - val hashKey5 = HashKey("The Doge did what a Doge does, when a Doge does his duty to a Duke, that is. When the Doge did his duty and the Duke didn't, that's when the Duchess did the dirt to the Duke with the Doge. There they were in the dark: The Duke with his dagger, the Doge with his dart and the Duchess with her dirk. The Duchess dug at the Duke just when the Duke dove at the Doge. Now the Duke ducked, the Doge dodged, and the Duchess didn't. So the Duke got the Duchess, the Duchess got the Doge, and the Doge got the Duke.") - - val allHashKeys = Set(hashKey1, hashKey2, hashKey3, hashKey4, hashKey5) - - it should "create lists appropriately in the apply method" in { - val readWriteData = EJHAData(allHashKeys, CallCachingActivity(ReadAndWriteCache)) - readWriteData.remainingCacheChecks should be(allHashKeys) - readWriteData.remainingHashesNeeded should be(allHashKeys) - - val readOnlyData = EJHAData(allHashKeys, CallCachingActivity(ReadCache)) - readOnlyData.remainingCacheChecks should be(allHashKeys) - readOnlyData.remainingHashesNeeded should be(Set.empty[HashKey]) - - val writeOnlyData = EJHAData(allHashKeys, CallCachingActivity(WriteCache)) - writeOnlyData.remainingCacheChecks should be(Set.empty[HashKey]) - writeOnlyData.remainingHashesNeeded should be(allHashKeys) - } - - - it should "accumulate new hashes" in { - val data = EJHAData(allHashKeys, CallCachingActivity(WriteCache)) - data.hashesKnown should be(Set.empty) - data.remainingHashesNeeded should be(allHashKeys) - data.allHashesKnown should be(false) - - val mostHashKeys = Set(hashKey1, hashKey2, hashKey4, hashKey5) - val hashResults = mostHashKeys map { x => Set(makeHashResult(x)) } - val newData = hashResults.foldLeft(data)( (d, h) => d.withNewKnownHashes(h) ) - newData.hashesKnown.map(_.hashKey) should be(mostHashKeys) - newData.remainingHashesNeeded should be(Set(hashKey3)) - newData.allHashesKnown should be(false) - - val newerData = newData.withNewKnownHashes(Set(makeHashResult(hashKey3))) - newerData.hashesKnown.map(_.hashKey) should be(allHashKeys) - newerData.remainingHashesNeeded should be(Set.empty) - newerData.allHashesKnown should be(true) - } - - it should "intersect new cache meta info result IDs for cache hits" in { - val data = EJHAData(allHashKeys, CallCachingActivity(ReadCache)) - data.possibleCacheResults should be(None) - data.allCacheResultsIntersected should be(false) - data.isDefinitelyCacheHit should be(false) - data.isDefinitelyCacheMiss should be(false) - - // To save you time I'll just tell you: the intersection of all these sets is Set(5) - val cacheLookupResults: List[CacheResultMatchesForHashes] = List( - CacheResultMatchesForHashes(Set(makeHashResult(hashKey1)), Set(1, 2, 3, 4, 5, 6, 7, 8, 9, 10) map MetaInfoId), - CacheResultMatchesForHashes(Set(makeHashResult(hashKey2)), Set(1, 2, 3, 4, 5, 6) map MetaInfoId), - CacheResultMatchesForHashes(Set(makeHashResult(hashKey3)), Set(1, 2, 3, 5, 7, 8, 9, 10) map MetaInfoId), - CacheResultMatchesForHashes(Set(makeHashResult(hashKey4)), Set(4, 5, 6, 7, 8, 9, 10) map MetaInfoId), - CacheResultMatchesForHashes(Set(makeHashResult(hashKey5)), Set(1, 2, 5, 6, 7, 10) map MetaInfoId)) - val newData = cacheLookupResults.foldLeft(data)( (d, c) => d.intersectCacheResults(c) ) - newData.possibleCacheResults match{ - case Some(set) => set should be(Set(MetaInfoId(5))) - case None => fail("There should be a cache result set") - } - newData.allCacheResultsIntersected should be(true) - newData.isDefinitelyCacheHit should be(true) - newData.isDefinitelyCacheMiss should be(false) - } - - it should "intersect new cache meta info result IDs for cache misses" in { - val data = EJHAData(allHashKeys, CallCachingActivity(ReadCache)) - - // To save you time I'll just tell you: the intersection of all these sets is empty Set() - val cacheLookupResults: List[CacheResultMatchesForHashes] = List( - CacheResultMatchesForHashes(Set(makeHashResult(hashKey1)), Set(1, 2, 3, 4, 5, 6) map MetaInfoId), - CacheResultMatchesForHashes(Set(makeHashResult(hashKey2)), Set(1, 2, 3, 7, 8, 9) map MetaInfoId), - CacheResultMatchesForHashes(Set(makeHashResult(hashKey3)), Set(5, 7, 8, 9, 10) map MetaInfoId)) - val newData = cacheLookupResults.foldLeft(data)( (d, c) => d.intersectCacheResults(c) ) - newData.possibleCacheResults match{ - case Some(set) => set should be(Set.empty) - case None => fail("There should be a cache result set") - } - newData.allCacheResultsIntersected should be(false) - newData.isDefinitelyCacheHit should be(false) - newData.isDefinitelyCacheMiss should be(true) - } - - private def makeHashResult(hashKey: HashKey): HashResult = HashResult(hashKey, HashValue("whatever")) -} diff --git a/engine/src/test/scala/cromwell/engine/workflow/lifecycle/execution/callcaching/EngineJobHashingActorSpec.scala b/engine/src/test/scala/cromwell/engine/workflow/lifecycle/execution/callcaching/EngineJobHashingActorSpec.scala index 25ceffe3c..1b744532c 100644 --- a/engine/src/test/scala/cromwell/engine/workflow/lifecycle/execution/callcaching/EngineJobHashingActorSpec.scala +++ b/engine/src/test/scala/cromwell/engine/workflow/lifecycle/execution/callcaching/EngineJobHashingActorSpec.scala @@ -1,223 +1,222 @@ package cromwell.engine.workflow.lifecycle.execution.callcaching -import akka.actor.{ActorRef, ActorSystem, Props} -import akka.testkit.{ImplicitSender, TestKit, TestProbe} -import cromwell.CromwellTestkitSpec -import cromwell.backend.callcaching.FileHashingActor.{FileHashResponse, SingleFileHashRequest} -import cromwell.backend.{BackendInitializationData, BackendJobDescriptor, BackendJobDescriptorKey, BackendWorkflowDescriptor, RuntimeAttributeDefinition} +import akka.actor.{Actor, ActorRef, Props} +import akka.testkit.{TestActorRef, TestProbe} +import cromwell.backend._ +import cromwell.core._ import cromwell.core.callcaching._ -import cromwell.engine.workflow.lifecycle.execution.callcaching.EngineJobHashingActor.{CacheHit, CacheMiss, CallCacheHashes} -import org.scalatest.mockito.MockitoSugar -import org.scalatest.{BeforeAndAfterAll, Matchers, WordSpecLike} -import wdl4s._ -import wdl4s.values.{WdlFile, WdlValue} +import cromwell.engine.workflow.lifecycle.execution.callcaching.CallCacheHashingJobActor.{CompleteFileHashingResult, InitialHashingResult, NoFileHashesResult} +import cromwell.engine.workflow.lifecycle.execution.callcaching.CallCacheReadingJobActor.NextHit +import cromwell.engine.workflow.lifecycle.execution.callcaching.EngineJobHashingActor._ +import cromwell.services.metadata.MetadataService.PutMetadataAction +import org.scalatest.concurrent.Eventually +import org.scalatest.prop.TableDrivenPropertyChecks +import org.scalatest.{FlatSpecLike, Matchers} +import wdl4s.wdl.values.WdlValue +import wdl4s.wdl.{WdlTask, WdlTaskCall} + +class EngineJobHashingActorSpec extends TestKitSuite with FlatSpecLike with Matchers with BackendSpec with TableDrivenPropertyChecks with Eventually { + behavior of "EngineJobHashingActor" -import scala.concurrent.duration._ -import scala.language.postfixOps - -class EngineJobHashingActorSpec extends TestKit(new CromwellTestkitSpec.TestWorkflowManagerSystem().actorSystem) - with ImplicitSender with WordSpecLike with Matchers with MockitoSugar with BeforeAndAfterAll { - - import EngineJobHashingActorSpec._ + def templateJobDescriptor(inputs: Map[LocallyQualifiedName, WdlValue] = Map.empty) = { + val task = mock[WdlTask] + val call = mock[WdlTaskCall] + task.commandTemplateString returns "Do the stuff... now!!" + task.outputs returns List.empty + task.fullyQualifiedName returns "workflow.hello" + call.task returns task + val workflowDescriptor = mock[BackendWorkflowDescriptor] + workflowDescriptor.id returns WorkflowId.randomId() + val jobDescriptor = BackendJobDescriptor(workflowDescriptor, BackendJobDescriptorKey(call, None, 1), Map.empty, fqnMapToDeclarationMap(inputs), NoDocker, Map.empty) + jobDescriptor + } + + val serviceRegistryActorProbe = TestProbe() + + def makeEJHA(receiver: ActorRef, activity: CallCachingActivity, ccReaderProps: Props = Props.empty) = { + TestActorRef[EngineJobHashingActor]( + EngineJobHashingActorTest.props( + receiver, + serviceRegistryActorProbe.ref, + templateJobDescriptor(), + None, + Props.empty, + ccReaderProps, + Set.empty, + "backend", + activity, + DockerWithHash("ubuntu@sha256:blablabla") + ) + ) + } - implicit val actorSystem: ActorSystem = system + it should "record initial hashes" in { + val receiver = TestProbe() + val activity = CallCachingActivity(ReadAndWriteCache) + val actorUnderTest = makeEJHA(receiver.ref, activity) - val readModes = List(CallCachingActivity(ReadCache), CallCachingActivity(ReadAndWriteCache)) - val writeModes = List(CallCachingActivity(WriteCache), CallCachingActivity(ReadAndWriteCache)) - val allModes = List(CallCachingActivity(ReadCache), CallCachingActivity(WriteCache), CallCachingActivity(ReadAndWriteCache)) + val initialResult: InitialHashingResult = mock[InitialHashingResult] + actorUnderTest ! initialResult + eventually { + actorUnderTest.underlyingActor.initialHash shouldBe Some(initialResult) + } + } - "Engine job hashing actor" must { - allModes foreach { activity => - val expectation = activity.readWriteMode match { - case ReadCache => "cache hit" - case WriteCache => "hashes" - case ReadAndWriteCache => "cache hit and hashes" - } + it should "create a CCReader actor or not depending on CC activity" in { + val activities = Table( + ("readWriteMode", "hasCCReadActor"), + (ReadCache, true), + (WriteCache, false), + (ReadAndWriteCache, true) + ) + forAll(activities) { case ((readWriteMode, hasCCReadActor)) => + val receiver = TestProbe() + val actorUnderTest = makeEJHA(receiver.ref, CallCachingActivity(readWriteMode)) + actorUnderTest.underlyingActor.callCacheReadingJobActor.isDefined shouldBe hasCCReadActor + } + } - s"Respect the CallCachingMode and report back $expectation for the ${activity.readWriteMode} activity" in { - val singleMetaInfoIdSet = Set(MetaInfoId(1)) - val replyTo = TestProbe() - val deathWatch = TestProbe() + it should "send hashes to receiver when receiving a NoFileHashesResult" in { + val receiver = TestProbe() + val actorUnderTest = makeEJHA(receiver.ref, CallCachingActivity(ReadAndWriteCache)) + val initialHashes = Set(HashResult(HashKey("key"), HashValue("value"))) + val initialAggregatedHash = "aggregatedHash" + val initialResult = InitialHashingResult(initialHashes, initialAggregatedHash) + actorUnderTest ! initialResult + actorUnderTest ! NoFileHashesResult + receiver.expectMsg(CallCacheHashes(initialHashes, initialAggregatedHash, None)) + } - val cacheLookupResponses: Map[String, Set[MetaInfoId]] = if (activity.readFromCache) standardCacheLookupResponses(singleMetaInfoIdSet, singleMetaInfoIdSet, singleMetaInfoIdSet, singleMetaInfoIdSet) else Map.empty - val ejha = createEngineJobHashingActor( - replyTo = replyTo.ref, - activity = activity, - cacheLookupResponses = cacheLookupResponses) + it should "send hashes to receiver when receiving a CompleteFileHashingResult" in { + val receiver = TestProbe() + val actorUnderTest = makeEJHA(receiver.ref, CallCachingActivity(ReadAndWriteCache)) + + val initialHashes = Set(HashResult(HashKey("key"), HashValue("value"))) + val initialAggregatedHash = "aggregatedHash" + val initialResult = InitialHashingResult(initialHashes, initialAggregatedHash) + val fileHashes = Set(HashResult(HashKey("file key"), HashValue("value"))) + val fileAggregatedHash = "aggregatedFileHash" + val fileResult = CompleteFileHashingResult(fileHashes, fileAggregatedHash) + + actorUnderTest ! initialResult + actorUnderTest ! fileResult + receiver.expectMsg(CallCacheHashes(initialHashes, initialAggregatedHash, Option(FileHashes(fileHashes, fileAggregatedHash)))) + } - deathWatch watch ejha + it should "forward CacheMiss to receiver" in { + val receiver = TestProbe() + val activity = CallCachingActivity(ReadAndWriteCache) + val actorUnderTest = makeEJHA(receiver.ref, activity) - if (activity.readFromCache) replyTo.expectMsg(CacheHit(MetaInfoId(1))) - if (activity.writeToCache) replyTo.expectMsgPF(max = 5 seconds, hint = "awaiting cache hit message") { - case CallCacheHashes(hashes) => hashes.size should be(4) - case x => fail(s"Cache hit anticipated! Instead got a ${x.getClass.getSimpleName}") - } + actorUnderTest ! CacheMiss + receiver.expectMsg(CacheMiss) + } - deathWatch.expectTerminated(ejha, 5 seconds) - } + it should "forward CacheHit to receiver" in { + val receiver = TestProbe() + val activity = CallCachingActivity(ReadAndWriteCache) + val actorUnderTest = makeEJHA(receiver.ref, activity) - s"Wait for requests to the FileHashingActor for the ${activity.readWriteMode} activity" in { - val singleMetaInfoIdSet = Set(MetaInfoId(1)) - val replyTo = TestProbe() - val fileHashingActor = TestProbe() - val deathWatch = TestProbe() - - val initialCacheLookupResponses: Map[String, Set[MetaInfoId]] = if (activity.readFromCache) standardCacheLookupResponses(singleMetaInfoIdSet, singleMetaInfoIdSet, singleMetaInfoIdSet, singleMetaInfoIdSet) else Map.empty - val fileCacheLookupResponses = Map("input: File inputFile1" -> singleMetaInfoIdSet, "input: File inputFile2" -> singleMetaInfoIdSet) - - val jobDescriptor = templateJobDescriptor(inputs = Map( - "inputFile1" -> WdlFile("path"), - "inputFile2" -> WdlFile("path"))) - val ejha = createEngineJobHashingActor( - replyTo = replyTo.ref, - activity = activity, - jobDescriptor = jobDescriptor, - fileHashingActor = Option(fileHashingActor.ref), - cacheLookupResponses = initialCacheLookupResponses ++ fileCacheLookupResponses) - - deathWatch watch ejha - - twice { iteration => - fileHashingActor.expectMsgPF(max = 5 seconds, hint = s"awaiting file hash request #$iteration") { - case SingleFileHashRequest(jobKey, hashKey, file, initializationData) => - file should be(WdlFile("path")) - fileHashingActor.send(ejha, FileHashResponse(HashResult(hashKey, HashValue("blah di blah")))) - case x => fail(s"SingleFileHashRequest anticipated! Instead got a ${x.getClass.getSimpleName}") - } - } - - if (activity.readFromCache) replyTo.expectMsg(CacheHit(MetaInfoId(1))) - if (activity.writeToCache) replyTo.expectMsgPF(max = 5 seconds, hint = "awaiting cache hit message") { - case CallCacheHashes(hashes) => hashes.size should be(6) - case x => fail(s"Cache hit anticipated! Instead got a ${x.getClass.getSimpleName}") - } - - deathWatch.expectTerminated(ejha, 5 seconds) - } + val cacheHit = mock[CacheHit] + actorUnderTest ! cacheHit + receiver.expectMsg(cacheHit) + } - s"Cache miss for bad FileHashingActor results but still return hashes in the ${activity.readWriteMode} activity" in { - val singleMetaInfoIdSet = Set(MetaInfoId(1)) - val replyTo = TestProbe() - val fileHashingActor = TestProbe() - val deathWatch = TestProbe() - - val initialCacheLookupResponses: Map[String, Set[MetaInfoId]] = if (activity.readFromCache) standardCacheLookupResponses(singleMetaInfoIdSet, singleMetaInfoIdSet, singleMetaInfoIdSet, singleMetaInfoIdSet) else Map.empty - val fileCacheLookupResponses = Map("input: File inputFile1" -> Set(MetaInfoId(2)), "input: File inputFile2" -> singleMetaInfoIdSet) - - val jobDescriptor = templateJobDescriptor(inputs = Map( - "inputFile1" -> WdlFile("path"), - "inputFile2" -> WdlFile("path"))) - val ejha = createEngineJobHashingActor( - replyTo = replyTo.ref, - activity = activity, - jobDescriptor = jobDescriptor, - fileHashingActor = Option(fileHashingActor.ref), - cacheLookupResponses = initialCacheLookupResponses ++ fileCacheLookupResponses) - - deathWatch watch ejha - - // Hello, future Cromwellian! I imagine you're reading this because you've just introduced file hash short-circuiting on cache miss and, - // depending on timings, you may not get the second file hash request in read-only mode. You might want to refactor this test to reply - // only to the "cache miss" file and then check that the test probe receives the appropriate "cancellation" message. - // ... or not! Don't just blindly allow a ghost of the past to tell you what to do! Live your own life and excel! - twice { iteration => - fileHashingActor.expectMsgPF(max = 5 seconds, hint = s"awaiting file hash request #$iteration") { - case SingleFileHashRequest(jobKey, hashKey, file, initializationData) => - file should be(WdlFile("path")) - fileHashingActor.send(ejha, FileHashResponse(HashResult(hashKey, HashValue("blah di blah")))) - case x => fail(s"SingleFileHashRequest anticipated! Instead got a ${x.getClass.getSimpleName}") - } - } - - if (activity.readFromCache) replyTo.expectMsg(CacheMiss) - if (activity.writeToCache) replyTo.expectMsgPF(max = 5 seconds, hint = "awaiting cache hit message") { - case CallCacheHashes(hashes) => hashes.size should be(6) - case x => fail(s"Cache hit anticipated! Instead got a ${x.getClass.getSimpleName}") - } - - deathWatch.expectTerminated(ejha, 5 seconds) + it should "forward NextHit to CCRead actor" in { + val receiver = TestProbe() + val activity = CallCachingActivity(ReadAndWriteCache) + val monitorProbe = TestProbe() + val ccReadActorProps = Props(new Actor { + override def receive: Receive = { + case NextHit => monitorProbe.ref forward NextHit } + }) + + val actorUnderTest = makeEJHA(receiver.ref, activity, ccReadActorProps) - s"Detect call cache misses for the ${activity.readWriteMode} activity" in { - val singleMetaInfoIdSet = Set(MetaInfoId(1)) - val replyTo = TestProbe() - val deathWatch = TestProbe() - - val cacheLookupResponses: Map[String, Set[MetaInfoId]] = if (activity.readFromCache) standardCacheLookupResponses(singleMetaInfoIdSet, singleMetaInfoIdSet, Set(MetaInfoId(2)), singleMetaInfoIdSet) else Map.empty - val ejha = createEngineJobHashingActor( - replyTo = replyTo.ref, - activity = activity, - cacheLookupResponses = cacheLookupResponses) - - deathWatch watch ejha - - if (activity.readFromCache) replyTo.expectMsg(CacheMiss) - if (activity.writeToCache) replyTo.expectMsgPF(max = 5 seconds, hint = "awaiting cache hit message") { - case CallCacheHashes(hashes) => hashes.size should be(4) - case x => fail(s"Cache hit anticipated! Instead got a ${x.getClass.getSimpleName}") - } + actorUnderTest ! NextHit + monitorProbe.expectMsg(NextHit) + } - deathWatch.expectTerminated(ejha, 5 seconds) - } - } + it should "fail if it receives NextHit and doesn't have a CCRead actor" in { + val receiver = TestProbe() + val activity = CallCachingActivity(WriteCache) + val actorUnderTest = makeEJHA(receiver.ref, activity) + receiver.watch(actorUnderTest) + actorUnderTest ! NextHit + receiver.expectMsgClass(classOf[HashError]) + serviceRegistryActorProbe.expectMsgClass(classOf[PutMetadataAction]) + receiver.expectTerminated(actorUnderTest) } - override def afterAll() = { - TestKit.shutdownActorSystem(system) + it should "fail if it receives a HashingFailedMessage" in { + val receiver = TestProbe() + val activity = CallCachingActivity(ReadAndWriteCache) + val actorUnderTest = makeEJHA(receiver.ref, activity) + receiver.watch(actorUnderTest) + actorUnderTest ! HashingFailedMessage("someFile", new Exception("[TEST] Some exception")) + receiver.expectMsgClass(classOf[HashError]) + serviceRegistryActorProbe.expectMsgClass(classOf[PutMetadataAction]) + receiver.expectTerminated(actorUnderTest) } -} -object EngineJobHashingActorSpec extends MockitoSugar { - import org.mockito.Mockito._ - - def createEngineJobHashingActor - ( - replyTo: ActorRef, - activity: CallCachingActivity, - jobDescriptor: BackendJobDescriptor = templateJobDescriptor(), - initializationData: Option[BackendInitializationData] = None, - fileHashingActor: Option[ActorRef] = None, - cacheLookupResponses: Map[String, Set[MetaInfoId]] = Map.empty, - runtimeAttributeDefinitions: Set[RuntimeAttributeDefinition] = Set.empty, - backendName: String = "whatever" - )(implicit system: ActorSystem) = { - val callCacheReadActor = system.actorOf(Props(new PredictableCallCacheReadActor(cacheLookupResponses))) - system.actorOf(EngineJobHashingActor.props( - receiver = replyTo, + it should "fail if it receives a FinalFileHashingResult but has no InitialHashingResult" in { + val receiver = TestProbe() + val activity = CallCachingActivity(ReadAndWriteCache) + val actorUnderTest = makeEJHA(receiver.ref, activity) + receiver.watch(actorUnderTest) + actorUnderTest ! NoFileHashesResult + receiver.expectMsgClass(classOf[HashError]) + serviceRegistryActorProbe.expectMsgClass(classOf[PutMetadataAction]) + receiver.expectTerminated(actorUnderTest) + } + + object EngineJobHashingActorTest { + def props(receiver: ActorRef, + serviceRegistryActor: ActorRef, + jobDescriptor: BackendJobDescriptor, + initializationData: Option[BackendInitializationData], + fileHashingActorProps: Props, + callCacheReadingJobActorProps: Props, + runtimeAttributeDefinitions: Set[RuntimeAttributeDefinition], + backendName: String, + activity: CallCachingActivity, + callCachingEligible: CallCachingEligible): Props = Props(new EngineJobHashingActorTest( + receiver = receiver, + serviceRegistryActor = serviceRegistryActor, jobDescriptor = jobDescriptor, initializationData = initializationData, - fileHashingActor = fileHashingActor.getOrElse(emptyActor), - callCacheReadActor = callCacheReadActor, + fileHashingActorProps = fileHashingActorProps, + callCacheReadingJobActorProps = callCacheReadingJobActorProps, runtimeAttributeDefinitions = runtimeAttributeDefinitions, backendName = backendName, - activity = activity)) + activity = activity, + callCachingEligible = callCachingEligible)) } - - def emptyActor(implicit actorSystem: ActorSystem) = actorSystem.actorOf(Props.empty) - - def templateJobDescriptor(inputs: Map[LocallyQualifiedName, WdlValue] = Map.empty) = { - val task = mock[Task] - val call = mock[Call] - when(task.commandTemplateString).thenReturn("Do the stuff... now!!") - when(task.outputs).thenReturn(List.empty) - when(call.task).thenReturn(task) - val workflowDescriptor = mock[BackendWorkflowDescriptor] - val jobDescriptor = BackendJobDescriptor(workflowDescriptor, BackendJobDescriptorKey(call, None, 1), Map.empty, inputs) - jobDescriptor + + class EngineJobHashingActorTest(receiver: ActorRef, + serviceRegistryActor: ActorRef, + jobDescriptor: BackendJobDescriptor, + initializationData: Option[BackendInitializationData], + fileHashingActorProps: Props, + callCacheReadingJobActorProps: Props, + runtimeAttributeDefinitions: Set[RuntimeAttributeDefinition], + backendName: String, + activity: CallCachingActivity, + callCachingEligible: CallCachingEligible) extends EngineJobHashingActor( + receiver = receiver, + serviceRegistryActor = serviceRegistryActor, + jobDescriptor = jobDescriptor, + initializationData = initializationData, + fileHashingActorProps = fileHashingActorProps, + callCacheReadingJobActorProps = callCacheReadingJobActorProps, + runtimeAttributeDefinitions = runtimeAttributeDefinitions, + backendName = backendName, + activity = activity, + callCachingEligible = callCachingEligible) { + // override preStart to nothing to prevent the creation of the CCHJA. + // This way it doesn't interfere with the tests and we can manually inject the messages we want + override def preStart() = () } - def standardCacheLookupResponses(commandTemplate: Set[MetaInfoId], - inputCount: Set[MetaInfoId], - backendName: Set[MetaInfoId], - outputCount: Set[MetaInfoId]) = Map( - "command template" -> commandTemplate, - "input count" -> inputCount, - "backend name" -> backendName, - "output count" -> outputCount - ) - - def twice[A](block: Int => A) = { - block(1) - block(2) - } } diff --git a/engine/src/test/scala/cromwell/engine/workflow/lifecycle/execution/callcaching/PredictableCallCacheReadActor.scala b/engine/src/test/scala/cromwell/engine/workflow/lifecycle/execution/callcaching/PredictableCallCacheReadActor.scala deleted file mode 100644 index 4182095c6..000000000 --- a/engine/src/test/scala/cromwell/engine/workflow/lifecycle/execution/callcaching/PredictableCallCacheReadActor.scala +++ /dev/null @@ -1,42 +0,0 @@ -package cromwell.engine.workflow.lifecycle.execution.callcaching - -import akka.actor.{Actor, ActorLogging, ActorRef} -import cromwell.core.callcaching.HashResult -import cromwell.engine.workflow.lifecycle.execution.callcaching.CallCacheReadActor.{CacheLookupRequest, CacheResultLookupFailure, CacheResultMatchesForHashes} -import cromwell.engine.workflow.lifecycle.execution.callcaching.EngineJobHashingActor.CallCacheHashes - -import scala.util.{Failure, Success, Try} - -/** - * Has a set of responses which it will respond with. If it gets a request for anything that it's not expecting to respond to will generate a failure. - */ -class PredictableCallCacheReadActor(responses: Map[String, Set[MetaInfoId]]) extends Actor with ActorLogging { - - var responsesRemaining = responses - - override def receive = { - case CacheLookupRequest(callCacheHashes: CallCacheHashes) => - callCacheHashes.hashes.toList match { - case Nil => - throw new Exception("Should never be looking up 0 hash keys!?") - case head :: tail => - val startSet = toTry(head.hashKey.key, responses.get(head.hashKey.key)) - respond(sender, callCacheHashes.hashes, tail.foldLeft(startSet)(resultLookupFolder)) - } - } - - private def respond(sndr: ActorRef, hashes: Set[HashResult], result: Try[Set[MetaInfoId]]) = result match { - case Success(cacheMatches) => sndr ! CacheResultMatchesForHashes(hashes, cacheMatches) - case Failure(t) => sndr ! CacheResultLookupFailure(t) - } - - private def toTry[A](name: String, option: Option[A]): Try[A] = option match { - case Some(x) => Success(x) - case None => Failure(new Exception(s"Error looking up response $name!")) - } - - private def resultLookupFolder(current: Try[Set[MetaInfoId]], next: HashResult): Try[Set[MetaInfoId]] = current flatMap { c => - val lookedUp = toTry(next.hashKey.key, responses.get(next.hashKey.key)) - lookedUp map { l => c.intersect(l) } - } -} diff --git a/engine/src/test/scala/cromwell/engine/workflow/lifecycle/execution/ejea/EjeaBackendIsCopyingCachedOutputsSpec.scala b/engine/src/test/scala/cromwell/engine/workflow/lifecycle/execution/ejea/EjeaBackendIsCopyingCachedOutputsSpec.scala index 51de21e6b..b58485f3f 100644 --- a/engine/src/test/scala/cromwell/engine/workflow/lifecycle/execution/ejea/EjeaBackendIsCopyingCachedOutputsSpec.scala +++ b/engine/src/test/scala/cromwell/engine/workflow/lifecycle/execution/ejea/EjeaBackendIsCopyingCachedOutputsSpec.scala @@ -1,23 +1,26 @@ package cromwell.engine.workflow.lifecycle.execution.ejea +import cromwell.core.callcaching._ import cromwell.engine.workflow.lifecycle.execution.EngineJobExecutionActor._ -import EngineJobExecutionActorSpec._ -import cromwell.core.callcaching.CallCachingMode -import cromwell.engine.workflow.lifecycle.execution.callcaching.EngineJobHashingActor.{CallCacheHashes, EJHAResponse, HashError} -import scala.util.{Failure, Success, Try} +import cromwell.engine.workflow.lifecycle.execution.callcaching.CallCacheReadingJobActor.NextHit +import cromwell.engine.workflow.lifecycle.execution.callcaching.CallCachingEntryId +import cromwell.engine.workflow.lifecycle.execution.callcaching.EngineJobHashingActor.{CacheHit, CallCacheHashes, EJHAResponse, HashError} +import cromwell.engine.workflow.lifecycle.execution.ejea.EngineJobExecutionActorSpec._ import cromwell.engine.workflow.lifecycle.execution.ejea.HasJobSuccessResponse.SuccessfulCallCacheHashes -class EjeaBackendIsCopyingCachedOutputsSpec extends EngineJobExecutionActorSpec with HasJobSuccessResponse with HasJobFailureResponses with CanExpectJobStoreWrites with CanExpectCacheWrites { +import scala.util.{Failure, Success, Try} + +class EjeaBackendIsCopyingCachedOutputsSpec extends EngineJobExecutionActorSpec with HasJobSuccessResponse with HasJobFailureResponses with CanExpectJobStoreWrites with CanExpectCacheWrites with CanExpectCacheInvalidation { override implicit val stateUnderTest = BackendIsCopyingCachedOutputs - "An EJEA in FetchingCachedOutputsFromDatabase state" should { + "An EJEA in BackendIsCopyingCachedOutputs state" should { val hashErrorCause = new Exception("blah") val hashResultsDataValue = Some(Success(SuccessfulCallCacheHashes)) val hashErrorDataValue = Some(Failure(hashErrorCause)) - val hashResultsEjhaResponse = Some(SuccessfulCallCacheHashes) + val hashResultsEjhaResponse: Option[CallCacheHashes] = Some(SuccessfulCallCacheHashes) val hashErrorEjhaResponse = Some(HashError(hashErrorCause)) case class InitialHashDataAndEjhaResponseCombination(name: String, @@ -50,7 +53,6 @@ class EjeaBackendIsCopyingCachedOutputsSpec extends EngineJobExecutionActorSpec // Nothing should happen here: helper.jobStoreProbe.expectNoMsg(awaitAlmostNothing) - helper.callCacheWriteActorCreations should be(NothingYet) // Rely on the await timeout from the previous step to allow time to pass // Send the response from the copying actor ejea ! successResponse @@ -76,7 +78,6 @@ class EjeaBackendIsCopyingCachedOutputsSpec extends EngineJobExecutionActorSpec ejhaResponse foreach { resp => // Nothing should have happened yet: helper.jobStoreProbe.expectNoMsg(awaitAlmostNothing) - helper.callCacheWriteActorCreations should be(NothingYet) // Rely on the await timeout from the previous step to allow time to pass // Ok, now send the response from the EJHA (if there was one!): ejea ! resp @@ -95,38 +96,65 @@ class EjeaBackendIsCopyingCachedOutputsSpec extends EngineJobExecutionActorSpec } } - RestartOrExecuteCommandTuples foreach { case RestartOrExecuteCommandTuple(operationName, restarting, expectedMessage) => - s"$operationName the job immediately when it gets a failure result, and it was going to receive $hashComboName, if call caching is $mode" in { - ejea = ejeaInBackendIsCopyingCachedOutputsState(initialHashData, mode, restarting = restarting) + if (mode.readFromCache) { + s"invalidate a call for caching if backend coping failed when it was going to receive $hashComboName, if call caching is $mode" in { + ejea = ejeaInBackendIsCopyingCachedOutputsState(initialHashData, mode) + // Send the response from the copying actor + ejea ! failureNonRetryableResponse + + expectInvalidateCallCacheActor(cacheId) + eventually { + ejea.stateName should be(InvalidatingCacheEntry) + } + ejea.stateData should be(ResponsePendingData(helper.backendJobDescriptor, helper.bjeaProps, initialHashData, Option(helper.ejhaProbe.ref), cacheHit)) + } + + s"not invalidate a call for caching if backend coping failed when invalidation is disabled, when it was going to receive $hashComboName, if call caching is $mode" in { + val invalidationDisabledOptions = CallCachingOptions(invalidateBadCacheResults = false) + val cacheInvalidationDisabledMode = mode match { + case CallCachingActivity(rw, _) => CallCachingActivity(rw, invalidationDisabledOptions) + case _ => fail(s"Mode $mode not appropriate for cache invalidation tests") + } + ejea = ejeaInBackendIsCopyingCachedOutputsState(initialHashData, cacheInvalidationDisabledMode) // Send the response from the copying actor ejea ! failureNonRetryableResponse - helper.bjeaProbe.expectMsg(awaitTimeout, expectedMessage) - ejea.stateName should be(RunningJob) - ejea.stateData should be(ResponsePendingData(helper.backendJobDescriptor, helper. bjeaProps, initialHashData)) + helper.ejhaProbe.expectMsg(NextHit) + + eventually { + ejea.stateName should be(CheckingCallCache) + } + // Make sure we didn't start invalidating anything: + helper.invalidateCacheActorCreations.hasExactlyOne should be(false) + ejea.stateData should be(ResponsePendingData(helper.backendJobDescriptor, helper.bjeaProps, initialHashData, Option(helper.ejhaProbe.ref), cacheHit)) } - s"$operationName the job (preserving and received hashes) when call caching is $mode, the EJEA has $hashComboName and then gets a success result" in { - ejea = ejeaInBackendIsCopyingCachedOutputsState(initialHashData, mode, restarting = restarting) + s"invalidate a call for caching if backend copying failed (preserving and received hashes) when call caching is $mode, the EJEA has $hashComboName and then gets a success result" in { + ejea = ejeaInBackendIsCopyingCachedOutputsState(initialHashData, mode) // Send the response from the EJHA (if there was one!): - ejhaResponse foreach { ejea ! _ } + ejhaResponse foreach { + ejea ! _ + } // Nothing should happen here: helper.jobStoreProbe.expectNoMsg(awaitAlmostNothing) - helper.callCacheWriteActorCreations should be(NothingYet) // Rely on the await timeout from the previous step to allow time to pass // Send the response from the copying actor ejea ! failureNonRetryableResponse - helper.bjeaProbe.expectMsg(awaitTimeout, expectedMessage) - ejea.stateName should be(RunningJob) - ejea.stateData should be(ResponsePendingData(helper.backendJobDescriptor, helper. bjeaProps, finalHashData)) + expectInvalidateCallCacheActor(cacheId) + eventually { + ejea.stateName should be(InvalidatingCacheEntry) + } + ejea.stateData should be(ResponsePendingData(helper.backendJobDescriptor, helper.bjeaProps, finalHashData, Option(helper.ejhaProbe.ref), cacheHit)) } } } } } - def standardResponsePendingData(hashes: Option[Try[CallCacheHashes]]) = ResponsePendingData(helper.backendJobDescriptor, helper.bjeaProps, hashes) + private val cacheId: CallCachingEntryId = CallCachingEntryId(74) + private val cacheHit = Option(EJEACacheHit(CacheHit(cacheId), 0, None)) + def standardResponsePendingData(hashes: Option[Try[CallCacheHashes]]) = ResponsePendingData(helper.backendJobDescriptor, helper.bjeaProps, hashes, Option(helper.ejhaProbe.ref), cacheHit) def ejeaInBackendIsCopyingCachedOutputsState(initialHashes: Option[Try[CallCacheHashes]], callCachingMode: CallCachingMode, restarting: Boolean = false) = helper.buildEJEA(restarting = restarting, callCachingMode = callCachingMode).setStateInline(data = standardResponsePendingData(initialHashes)) } diff --git a/engine/src/test/scala/cromwell/engine/workflow/lifecycle/execution/ejea/EjeaCheckingCacheEntryExistenceSpec.scala b/engine/src/test/scala/cromwell/engine/workflow/lifecycle/execution/ejea/EjeaCheckingCacheEntryExistenceSpec.scala new file mode 100644 index 000000000..683a551e2 --- /dev/null +++ b/engine/src/test/scala/cromwell/engine/workflow/lifecycle/execution/ejea/EjeaCheckingCacheEntryExistenceSpec.scala @@ -0,0 +1,44 @@ +package cromwell.engine.workflow.lifecycle.execution.ejea + +import cromwell.core.callcaching.CallCachingOff +import cromwell.engine.workflow.lifecycle.execution.EngineJobExecutionActor.{CheckingCacheEntryExistence, CheckingJobStore, NoData, PreparingJob} +import cromwell.engine.workflow.lifecycle.execution.callcaching.CallCacheReadActor._ +import cromwell.engine.workflow.lifecycle.execution.ejea.EngineJobExecutionActorSpec.EnhancedTestEJEA +import cromwell.engine.workflow.lifecycle.execution.preparation.CallPreparation + +class EjeaCheckingCacheEntryExistenceSpec extends EngineJobExecutionActorSpec { + + override implicit val stateUnderTest = CheckingJobStore + + "An EJEA in EjeaCheckingCacheEntryExistence state should" should { + "disable call caching and prepare job if a cache entry already exists for this job" in { + createCheckingCacheEntryExistenceEjea() + + ejea ! HasCallCacheEntry(CallCacheEntryForCall(helper.workflowId, helper.jobDescriptorKey)) + helper.jobPreparationProbe.expectMsg(awaitTimeout, "expecting CallPreparation Start", CallPreparation.Start) + ejea.stateName should be(PreparingJob) + + ejea.underlyingActor.effectiveCallCachingMode shouldBe CallCachingOff + } + + "prepare job if no cache entry already exists" in { + createCheckingCacheEntryExistenceEjea() + + ejea ! NoCallCacheEntry(CallCacheEntryForCall(helper.workflowId, helper.jobDescriptorKey)) + + helper.jobPreparationProbe.expectMsg(awaitTimeout, "expecting CallPreparation Start", CallPreparation.Start) + ejea.stateName should be(PreparingJob) + } + + "prepare job if cache entry existence lookup fails" in { + createCheckingCacheEntryExistenceEjea() + + ejea ! CacheResultLookupFailure(new Exception("[TEST] Failed to lookup cache entry existence")) + + helper.jobPreparationProbe.expectMsg(awaitTimeout, "expecting CallPreparation Start", CallPreparation.Start) + ejea.stateName should be(PreparingJob) + } + } + + private def createCheckingCacheEntryExistenceEjea(): Unit = { ejea = helper.buildEJEA(restarting = true).setStateInline(state = CheckingCacheEntryExistence, data = NoData) } +} diff --git a/engine/src/test/scala/cromwell/engine/workflow/lifecycle/execution/ejea/EjeaCheckingCallCacheSpec.scala b/engine/src/test/scala/cromwell/engine/workflow/lifecycle/execution/ejea/EjeaCheckingCallCacheSpec.scala index 498180780..74f379964 100644 --- a/engine/src/test/scala/cromwell/engine/workflow/lifecycle/execution/ejea/EjeaCheckingCallCacheSpec.scala +++ b/engine/src/test/scala/cromwell/engine/workflow/lifecycle/execution/ejea/EjeaCheckingCallCacheSpec.scala @@ -1,25 +1,21 @@ package cromwell.engine.workflow.lifecycle.execution.ejea -import cromwell.engine.workflow.lifecycle.execution.EngineJobExecutionActor.{CheckingCallCache, FetchingCachedOutputsFromDatabase, ResponsePendingData, RunningJob} -import EngineJobExecutionActorSpec.EnhancedTestEJEA import cromwell.core.callcaching.{CallCachingActivity, CallCachingOff, ReadCache} +import cromwell.engine.workflow.lifecycle.execution.EngineJobExecutionActor.{CheckingCallCache, FetchingCachedOutputsFromDatabase, ResponsePendingData, RunningJob} +import cromwell.engine.workflow.lifecycle.execution.callcaching.CallCachingEntryId import cromwell.engine.workflow.lifecycle.execution.callcaching.EngineJobHashingActor.{CacheHit, CacheMiss, HashError} -import cromwell.engine.workflow.lifecycle.execution.callcaching.MetaInfoId +import cromwell.engine.workflow.lifecycle.execution.ejea.EngineJobExecutionActorSpec.EnhancedTestEJEA import org.scalatest.concurrent.Eventually -class EjeaCheckingCallCacheSpec extends EngineJobExecutionActorSpec with Eventually { +class EjeaCheckingCallCacheSpec extends EngineJobExecutionActorSpec with Eventually with CanExpectFetchCachedResults { override implicit val stateUnderTest = CheckingCallCache "An EJEA in CheckingCallCache mode" should { "Try to fetch the call cache outputs if it gets a CacheHit" in { createCheckingCallCacheEjea() - ejea ! CacheHit(MetaInfoId(75)) - eventually { helper.fetchCachedResultsActorCreations.hasExactlyOne should be(true) } - helper.fetchCachedResultsActorCreations checkIt { - case (CacheHit(metainfoId), _) => metainfoId should be(MetaInfoId(75)) - case _ => fail("Incorrect creation of the fetchCachedResultsActor") - } + ejea ! CacheHit(CallCachingEntryId(75)) + expectFetchCachedResultsActor(CallCachingEntryId(75)) ejea.stateName should be(FetchingCachedOutputsFromDatabase) } @@ -46,5 +42,6 @@ class EjeaCheckingCallCacheSpec extends EngineJobExecutionActorSpec with Eventua private def createCheckingCallCacheEjea(restarting: Boolean = false): Unit = { ejea = helper.buildEJEA(restarting = restarting, callCachingMode = CallCachingActivity(ReadCache)) ejea.setStateInline(state = CheckingCallCache, data = ResponsePendingData(helper.backendJobDescriptor, helper.bjeaProps, None)) + () } } diff --git a/engine/src/test/scala/cromwell/engine/workflow/lifecycle/execution/ejea/EjeaCheckingJobStoreSpec.scala b/engine/src/test/scala/cromwell/engine/workflow/lifecycle/execution/ejea/EjeaCheckingJobStoreSpec.scala index e5a8ff36e..859dbaafe 100644 --- a/engine/src/test/scala/cromwell/engine/workflow/lifecycle/execution/ejea/EjeaCheckingJobStoreSpec.scala +++ b/engine/src/test/scala/cromwell/engine/workflow/lifecycle/execution/ejea/EjeaCheckingJobStoreSpec.scala @@ -1,15 +1,12 @@ package cromwell.engine.workflow.lifecycle.execution.ejea -import akka.testkit.TestProbe -import cromwell.backend.BackendJobDescriptorKey -import cromwell.backend.BackendJobExecutionActor.{FailedNonRetryableResponse, FailedRetryableResponse, RecoverJobCommand, SucceededResponse} +import cromwell.backend.BackendJobExecutionActor.{JobFailedNonRetryableResponse, JobFailedRetryableResponse, JobSucceededResponse} import cromwell.core._ -import cromwell.engine.workflow.lifecycle.execution.EngineJobExecutionActor.{CheckingJobStore, JobRunning, NoData, PreparingJob} -import cromwell.engine.workflow.lifecycle.execution.JobPreparationActor.BackendJobPreparationFailed -import cromwell.jobstore.{JobResultFailure, JobResultSuccess} +import cromwell.engine.workflow.lifecycle.execution.EngineJobExecutionActor.{CheckingCacheEntryExistence, CheckingJobStore, NoData} +import cromwell.engine.workflow.lifecycle.execution.callcaching.CallCacheReadActor.CallCacheEntryForCall +import cromwell.engine.workflow.lifecycle.execution.ejea.EngineJobExecutionActorSpec.EnhancedTestEJEA import cromwell.jobstore.JobStoreActor.{JobComplete, JobNotComplete} -import EngineJobExecutionActorSpec.EnhancedTestEJEA -import cromwell.engine.workflow.lifecycle.execution.JobPreparationActor +import cromwell.jobstore.{JobResultFailure, JobResultSuccess} class EjeaCheckingJobStoreSpec extends EngineJobExecutionActorSpec { @@ -20,12 +17,12 @@ class EjeaCheckingJobStoreSpec extends EngineJobExecutionActorSpec { createCheckingJobStoreEjea() ejea.setState(CheckingJobStore) val returnCode: Option[Int] = Option(0) - val jobOutputs: JobOutputs = Map.empty + val jobOutputs: CallOutputs = Map.empty ejea ! JobComplete(JobResultSuccess(returnCode, jobOutputs)) helper.replyToProbe.expectMsgPF(awaitTimeout) { - case response: SucceededResponse => + case response: JobSucceededResponse => response.returnCode shouldBe returnCode response.jobOutputs shouldBe jobOutputs } @@ -43,11 +40,11 @@ class EjeaCheckingJobStoreSpec extends EngineJobExecutionActorSpec { ejea ! JobComplete(JobResultFailure(returnCode, reason, retryable)) helper.replyToProbe.expectMsgPF(awaitTimeout) { - case response: FailedNonRetryableResponse => + case response: JobFailedNonRetryableResponse => false should be(retryable) response.returnCode shouldBe returnCode response.throwable shouldBe reason - case response: FailedRetryableResponse => + case response: JobFailedRetryableResponse => true should be(retryable) response.returnCode shouldBe returnCode response.throwable shouldBe reason @@ -57,13 +54,13 @@ class EjeaCheckingJobStoreSpec extends EngineJobExecutionActorSpec { } } - "begin preparing the job if it's not already complete" in { + "check for cache entry existence if it's not already complete" in { createCheckingJobStoreEjea() ejea.setState(CheckingJobStore) ejea ! JobNotComplete - helper.jobPreparationProbe.expectMsg(awaitTimeout, "expecting RecoverJobCommand", JobPreparationActor.Start) - ejea.stateName should be(PreparingJob) + helper.callCacheReadActorProbe.expectMsg(awaitTimeout, "expecting CallCacheEntryForCall", CallCacheEntryForCall(helper.workflowId, helper.jobDescriptorKey)) + ejea.stateName should be(CheckingCacheEntryExistence) ejea.stop() } diff --git a/engine/src/test/scala/cromwell/engine/workflow/lifecycle/execution/ejea/EjeaFetchingCachedOutputsFromDatabaseSpec.scala b/engine/src/test/scala/cromwell/engine/workflow/lifecycle/execution/ejea/EjeaFetchingCachedOutputsFromDatabaseSpec.scala index cc6c83b41..ba8a84b91 100644 --- a/engine/src/test/scala/cromwell/engine/workflow/lifecycle/execution/ejea/EjeaFetchingCachedOutputsFromDatabaseSpec.scala +++ b/engine/src/test/scala/cromwell/engine/workflow/lifecycle/execution/ejea/EjeaFetchingCachedOutputsFromDatabaseSpec.scala @@ -1,16 +1,16 @@ package cromwell.engine.workflow.lifecycle.execution.ejea -import cromwell.core.WorkflowId -import cromwell.engine.workflow.lifecycle.execution.EngineJobExecutionActor._ -import EngineJobExecutionActorSpec._ import cromwell.backend.BackendCacheHitCopyingActor.CopyOutputsCommand +import cromwell.core.WorkflowId import cromwell.core.callcaching.{CallCachingActivity, ReadAndWriteCache} import cromwell.core.simpleton.WdlValueSimpleton -import cromwell.engine.workflow.lifecycle.execution.callcaching.EngineJobHashingActor.{CacheHit, HashError} +import cromwell.engine.workflow.lifecycle.execution.EngineJobExecutionActor._ +import cromwell.engine.workflow.lifecycle.execution.callcaching.EngineJobHashingActor.HashError import cromwell.engine.workflow.lifecycle.execution.callcaching.FetchCachedResultsActor.{CachedOutputLookupFailed, CachedOutputLookupSucceeded} -import cromwell.engine.workflow.lifecycle.execution.callcaching.MetaInfoId +import cromwell.engine.workflow.lifecycle.execution.callcaching.CallCachingEntryId +import cromwell.engine.workflow.lifecycle.execution.ejea.EngineJobExecutionActorSpec._ import cromwell.engine.workflow.lifecycle.execution.ejea.HasJobSuccessResponse.SuccessfulCallCacheHashes -import wdl4s.values.WdlString +import wdl4s.wdl.values.WdlString import scala.util.{Failure, Success} @@ -36,7 +36,7 @@ class EjeaFetchingCachedOutputsFromDatabaseSpec extends EngineJobExecutionActorS val detritusMap = Map("stdout" -> "//somePath") val cachedReturnCode = Some(17) val sourceCacheDetails = s"${WorkflowId.randomId}:call-someTask:1" - ejea ! CachedOutputLookupSucceeded(cachedSimpletons, detritusMap, cachedReturnCode, CacheHit(MetaInfoId(75)), sourceCacheDetails) + ejea ! CachedOutputLookupSucceeded(cachedSimpletons, detritusMap, cachedReturnCode, CallCachingEntryId(75), sourceCacheDetails) helper.callCacheHitCopyingProbe.expectMsg(CopyOutputsCommand(cachedSimpletons, detritusMap, cachedReturnCode)) // Check we end up in the right state: @@ -61,7 +61,7 @@ class EjeaFetchingCachedOutputsFromDatabaseSpec extends EngineJobExecutionActorS // Send the response from the "Fetch" actor val failureReason = new Exception("You can't handle the truth!") - ejea ! CachedOutputLookupFailed(MetaInfoId(90210), failureReason) + ejea ! CachedOutputLookupFailed(CallCachingEntryId(90210), failureReason) helper.bjeaProbe.expectMsg(awaitTimeout, expectedMessage) // Check we end up in the right state: diff --git a/engine/src/test/scala/cromwell/engine/workflow/lifecycle/execution/ejea/EjeaInvalidatingCacheEntrySpec.scala b/engine/src/test/scala/cromwell/engine/workflow/lifecycle/execution/ejea/EjeaInvalidatingCacheEntrySpec.scala new file mode 100644 index 000000000..165e030c8 --- /dev/null +++ b/engine/src/test/scala/cromwell/engine/workflow/lifecycle/execution/ejea/EjeaInvalidatingCacheEntrySpec.scala @@ -0,0 +1,48 @@ +package cromwell.engine.workflow.lifecycle.execution.ejea + +import akka.actor.ActorRef +import cromwell.core.callcaching.{CallCachingActivity, ReadCache} +import cromwell.engine.workflow.lifecycle.execution.EngineJobExecutionActor._ +import cromwell.engine.workflow.lifecycle.execution.callcaching.CallCacheReadingJobActor.NextHit +import cromwell.engine.workflow.lifecycle.execution.callcaching.{CallCacheInvalidatedFailure, CallCacheInvalidatedSuccess} +import cromwell.engine.workflow.lifecycle.execution.ejea.EngineJobExecutionActorSpec._ + +class EjeaInvalidatingCacheEntrySpec extends EngineJobExecutionActorSpec { + + override implicit val stateUnderTest = InvalidatingCacheEntry + + "An EJEA in InvalidatingCacheEntry state" should { + + val invalidationErrorCause = new Exception("blah") + val invalidateSuccess = CallCacheInvalidatedSuccess(None) + val invalidateFailure = CallCacheInvalidatedFailure(invalidationErrorCause) + + List(invalidateSuccess, invalidateFailure) foreach { invalidateActorResponse => + s"ask the ejha for the next hit when response is $invalidateActorResponse" in { + ejea = ejeaInvalidatingCacheEntryState(Option(helper.ejhaProbe.ref)) + // Send the response from the invalidate actor + ejea ! invalidateActorResponse + + helper.bjeaProbe.expectNoMsg(awaitAlmostNothing) + helper.ejhaProbe.expectMsg(NextHit) + eventually { ejea.stateName should be(CheckingCallCache) } + ejea.stateData should be(ResponsePendingData(helper.backendJobDescriptor, helper.bjeaProps, None, Option(helper.ejhaProbe.ref), None)) + } + + RestartOrExecuteCommandTuples foreach { case RestartOrExecuteCommandTuple(operationName, restarting, expectedMessage) => + s"$operationName a job if there is no ejha when invalidate response is $invalidateActorResponse" in { + ejea = ejeaInvalidatingCacheEntryState(None, restarting = restarting) + // Send the response from the invalidate actor + ejea ! invalidateActorResponse + + helper.bjeaProbe.expectMsg(awaitTimeout, expectedMessage) + eventually { ejea.stateName should be(RunningJob) } + ejea.stateData should be(ResponsePendingData(helper.backendJobDescriptor, helper. bjeaProps, None, None)) + } + } + } + } + + def standardResponsePendingData(ejha: Option[ActorRef]) = ResponsePendingData(helper.backendJobDescriptor, helper.bjeaProps, None, ejha) + def ejeaInvalidatingCacheEntryState(ejha: Option[ActorRef], restarting: Boolean = false) = helper.buildEJEA(restarting = restarting, callCachingMode = CallCachingActivity(ReadCache)).setStateInline(state = InvalidatingCacheEntry, data = standardResponsePendingData(ejha)) +} diff --git a/engine/src/test/scala/cromwell/engine/workflow/lifecycle/execution/ejea/EjeaPendingSpec.scala b/engine/src/test/scala/cromwell/engine/workflow/lifecycle/execution/ejea/EjeaPendingSpec.scala index 9fc78f1f7..2810753cc 100644 --- a/engine/src/test/scala/cromwell/engine/workflow/lifecycle/execution/ejea/EjeaPendingSpec.scala +++ b/engine/src/test/scala/cromwell/engine/workflow/lifecycle/execution/ejea/EjeaPendingSpec.scala @@ -1,8 +1,7 @@ package cromwell.engine.workflow.lifecycle.execution.ejea -import cromwell.engine.workflow.lifecycle.execution.EngineJobExecutionActor.{CheckingJobStore, EngineJobExecutionActorState, Execute, Pending, PreparingJob} -import cromwell.engine.workflow.lifecycle.execution.JobPreparationActor -import cromwell.jobstore.JobStoreActor.QueryJobCompletion +import cromwell.engine.workflow.lifecycle.execution.EngineJobExecutionActor._ +import cromwell.engine.workflow.tokens.JobExecutionTokenDispenserActor.JobExecutionTokenRequest import org.scalatest.concurrent.Eventually class EjeaPendingSpec extends EngineJobExecutionActorSpec with CanValidateJobStoreKey with Eventually { @@ -11,29 +10,16 @@ class EjeaPendingSpec extends EngineJobExecutionActorSpec with CanValidateJobSto "An EJEA in the Pending state" should { - CallCachingModes foreach { mode => - s"check against the Job Store if restarting is true ($mode)" in { - ejea = helper.buildEJEA(restarting = true) + List(false, true) foreach { restarting => + s"wait for the Execute signal then request an execution token (with restarting=$restarting)" in { + ejea = helper.buildEJEA(restarting = restarting) ejea ! Execute - helper.jobStoreProbe.expectMsgPF(max = awaitTimeout, hint = "Awaiting job store lookup") { - case QueryJobCompletion(jobKey, taskOutputs) => - validateJobStoreKey(jobKey) - taskOutputs should be(helper.task.outputs) - } - helper.bjeaProbe.expectNoMsg(awaitAlmostNothing) - helper.jobHashingInitializations shouldBe NothingYet - ejea.stateName should be(CheckingJobStore) - } - - - s"bypass the Job Store and start preparing the job for running or call caching ($mode)" in { - ejea = helper.buildEJEA(restarting = false) - ejea ! Execute + helper.jobTokenDispenserProbe.expectMsgClass(max = awaitTimeout, classOf[JobExecutionTokenRequest]) - helper.jobPreparationProbe.expectMsg(max = awaitTimeout, hint = "Awaiting job preparation", JobPreparationActor.Start) - helper.jobStoreProbe.expectNoMsg(awaitAlmostNothing) - ejea.stateName should be(PreparingJob) + helper.jobPreparationProbe.msgAvailable should be(false) + helper.jobStoreProbe.msgAvailable should be(false) + ejea.stateName should be(RequestingExecutionToken) } } } diff --git a/engine/src/test/scala/cromwell/engine/workflow/lifecycle/execution/ejea/EjeaPreparingJobSpec.scala b/engine/src/test/scala/cromwell/engine/workflow/lifecycle/execution/ejea/EjeaPreparingJobSpec.scala index 1d6f2ccbe..8deb32b2e 100644 --- a/engine/src/test/scala/cromwell/engine/workflow/lifecycle/execution/ejea/EjeaPreparingJobSpec.scala +++ b/engine/src/test/scala/cromwell/engine/workflow/lifecycle/execution/ejea/EjeaPreparingJobSpec.scala @@ -1,9 +1,11 @@ package cromwell.engine.workflow.lifecycle.execution.ejea +import cromwell.backend.BackendJobDescriptor +import cromwell.backend.BackendJobExecutionActor.JobFailedNonRetryableResponse +import cromwell.core.callcaching.{CallCachingMode, DockerWithHash} import cromwell.engine.workflow.lifecycle.execution.EngineJobExecutionActor._ -import EngineJobExecutionActorSpec._ -import cromwell.core.callcaching.CallCachingMode -import cromwell.engine.workflow.lifecycle.execution.JobPreparationActor.{BackendJobPreparationFailed, BackendJobPreparationSucceeded} +import cromwell.engine.workflow.lifecycle.execution.ejea.EngineJobExecutionActorSpec._ +import cromwell.engine.workflow.lifecycle.execution.preparation.CallPreparation.{BackendJobPreparationSucceeded, CallPreparationFailed} import org.scalatest.concurrent.Eventually class EjeaPreparingJobSpec extends EngineJobExecutionActorSpec with CanExpectHashingInitialization with Eventually { @@ -14,18 +16,26 @@ class EjeaPreparingJobSpec extends EngineJobExecutionActorSpec with CanExpectHas CallCachingModes foreach { mode => if (mode.readFromCache) { - s"Start checking for a cache hit when job preparation succeeds ($mode)" in { + s"Start checking for a cache hit when job preparation succeeds and a docker hash is available ($mode)" in { + val jobDescriptor = helper.backendJobDescriptor.copy(maybeCallCachingEligible = DockerWithHash("hello")) ejea = ejeaInPreparingState(mode) - ejea ! jobPrepSuccessResponse - expectHashingActorInitialization(mode) + ejea ! jobPrepSuccessResponse(jobDescriptor) + expectHashingActorInitialization(mode, jobDescriptor) ejea.stateName should be(CheckingCallCache) + ejea.stateData should be(ResponsePendingData(jobDescriptor, helper.bjeaProps, None, Option(helper.ejhaProbe.ref))) + } + + s"Not check for a cache hit when job preparation succeeds and no docker hash is available ($mode)" in { + ejea = ejeaInPreparingState(mode) + ejea ! jobPrepSuccessResponse(helper.backendJobDescriptor) + ejea.stateName should be(RunningJob) ejea.stateData should be(ResponsePendingData(helper.backendJobDescriptor, helper.bjeaProps, None)) } } else { RestartOrExecuteCommandTuples foreach { case RestartOrExecuteCommandTuple(operationName, restarting, expectedMessage) => s"Send BJEA '$operationName' when job preparation succeeds ($mode)" in { ejea = ejeaInPreparingState(mode = mode, restarting = restarting) - ejea ! jobPrepSuccessResponse + ejea ! jobPrepSuccessResponse(helper.backendJobDescriptor) helper.bjeaProbe.expectMsg(awaitTimeout, "job preparation", expectedMessage) ejea.stateName should be(RunningJob) ejea.stateData should be(ResponsePendingData(helper.backendJobDescriptor, helper.bjeaProps, None)) @@ -34,16 +44,17 @@ class EjeaPreparingJobSpec extends EngineJobExecutionActorSpec with CanExpectHas } s"Not proceed if Job Preparation fails ($mode)" in { - val prepFailedResponse = BackendJobPreparationFailed(helper.jobDescriptorKey, new Exception("The goggles! They do nothing!")) + val prepActorResponse = CallPreparationFailed(helper.jobDescriptorKey, new Exception("The goggles! They do nothing!")) + val prepFailedEjeaResponse = JobFailedNonRetryableResponse(helper.jobDescriptorKey, prepActorResponse.throwable, None) ejea = ejeaInPreparingState(mode) - ejea ! prepFailedResponse - helper.replyToProbe.expectMsg(prepFailedResponse) + ejea ! prepActorResponse + helper.replyToProbe.expectMsg(prepFailedEjeaResponse) helper.deathwatch.expectTerminated(ejea, awaitTimeout) } } } - def jobPrepSuccessResponse = BackendJobPreparationSucceeded(helper.backendJobDescriptor, helper.bjeaProps) + def jobPrepSuccessResponse(jobDescriptor: BackendJobDescriptor) = BackendJobPreparationSucceeded(jobDescriptor, helper.bjeaProps) def ejeaInPreparingState(mode: CallCachingMode, restarting: Boolean = false) = helper.buildEJEA(restarting = restarting, callCachingMode = mode).setStateInline(state = PreparingJob, data = NoData) diff --git a/engine/src/test/scala/cromwell/engine/workflow/lifecycle/execution/ejea/EjeaRequestingExecutionTokenSpec.scala b/engine/src/test/scala/cromwell/engine/workflow/lifecycle/execution/ejea/EjeaRequestingExecutionTokenSpec.scala new file mode 100644 index 000000000..543d54550 --- /dev/null +++ b/engine/src/test/scala/cromwell/engine/workflow/lifecycle/execution/ejea/EjeaRequestingExecutionTokenSpec.scala @@ -0,0 +1,54 @@ +package cromwell.engine.workflow.lifecycle.execution.ejea + +import cromwell.engine.workflow.lifecycle.execution.EngineJobExecutionActor._ +import cromwell.engine.workflow.lifecycle.execution.preparation.CallPreparation +import cromwell.engine.workflow.tokens.JobExecutionTokenDispenserActor.{JobExecutionTokenDenied, JobExecutionTokenDispensed} +import cromwell.jobstore.JobStoreActor.QueryJobCompletion +import org.scalatest.concurrent.Eventually + +class EjeaRequestingExecutionTokenSpec extends EngineJobExecutionActorSpec with CanValidateJobStoreKey with Eventually { + + override implicit val stateUnderTest: EngineJobExecutionActorState = RequestingExecutionToken + + "An EJEA in the RequestingExecutionToken state" should { + + List(true, false) foreach { restarting => + s"do nothing when denied a token (with restarting=$restarting)" in { + ejea = helper.buildEJEA(restarting = restarting) + ejea ! JobExecutionTokenDenied(1) // 1 is arbitrary. Doesn't matter what position in the queue we are. + + helper.jobTokenDispenserProbe.expectNoMsg(max = awaitAlmostNothing) + helper.jobPreparationProbe.msgAvailable should be(false) + helper.jobStoreProbe.msgAvailable should be(false) + + ejea.stateName should be(RequestingExecutionToken) + } + } + + CallCachingModes foreach { mode => + s"check against the Job Store if restarting is true ($mode)" in { + ejea = helper.buildEJEA(restarting = true) + ejea ! JobExecutionTokenDispensed(helper.executionToken) + + helper.jobStoreProbe.expectMsgPF(max = awaitTimeout, hint = "Awaiting job store lookup") { + case QueryJobCompletion(jobKey, taskOutputs) => + validateJobStoreKey(jobKey) + taskOutputs should be(helper.task.outputs) + } + helper.bjeaProbe.expectNoMsg(awaitAlmostNothing) + helper.jobHashingInitializations shouldBe NothingYet + ejea.stateName should be(CheckingJobStore) + } + + + s"bypass the Job Store and start preparing the job for running or call caching ($mode)" in { + ejea = helper.buildEJEA(restarting = false) + ejea ! JobExecutionTokenDispensed(helper.executionToken) + + helper.jobPreparationProbe.expectMsg(max = awaitTimeout, hint = "Awaiting job preparation", CallPreparation.Start) + helper.jobStoreProbe.expectNoMsg(awaitAlmostNothing) + ejea.stateName should be(PreparingJob) + } + } + } +} diff --git a/engine/src/test/scala/cromwell/engine/workflow/lifecycle/execution/ejea/EjeaRunningJobSpec.scala b/engine/src/test/scala/cromwell/engine/workflow/lifecycle/execution/ejea/EjeaRunningJobSpec.scala index 5d2ccd4ce..2239a2843 100644 --- a/engine/src/test/scala/cromwell/engine/workflow/lifecycle/execution/ejea/EjeaRunningJobSpec.scala +++ b/engine/src/test/scala/cromwell/engine/workflow/lifecycle/execution/ejea/EjeaRunningJobSpec.scala @@ -18,7 +18,7 @@ class EjeaRunningJobSpec extends EngineJobExecutionActorSpec with Eventually wit val hashError = HashError(new Exception("ARGH!!!")) "A 'RunningJob' EJEA" should { - CallCachingModes foreach { case mode => + CallCachingModes foreach { mode => val andMaybeCallCacheHashes = if (mode.writeToCache) "then CallCacheHashes " else "" s"Handle receiving a SucceededResponse ${andMaybeCallCacheHashes}correctly in $mode mode" in { ejea = ejeaInRunningState(mode) @@ -53,16 +53,16 @@ class EjeaRunningJobSpec extends EngineJobExecutionActorSpec with Eventually wit eventually { ejea.stateData should be(SucceededResponseData(successResponse, None)) } ejea.stateName should be(RunningJob) ejea ! hashError - expectJobStoreWrite(SucceededResponseData(successResponse, Some(Failure(hashError.t)))) + expectJobStoreWrite(SucceededResponseData(successResponse, Some(Failure(hashError.reason)))) } s"Handle receiving HashError then SuccessResponse correctly in $mode mode" in { ejea = ejeaInRunningState(mode) ejea ! hashError - eventually { ejea.stateData should be(ResponsePendingData(helper.backendJobDescriptor, helper.bjeaProps, Some(Failure(hashError.t)))) } + eventually { ejea.stateData should be(ResponsePendingData(helper.backendJobDescriptor, helper.bjeaProps, Some(Failure(hashError.reason)))) } ejea.stateName should be(RunningJob) ejea ! successResponse - expectJobStoreWrite(SucceededResponseData(successResponse, Some(Failure(hashError.t)))) + expectJobStoreWrite(SucceededResponseData(successResponse, Some(Failure(hashError.reason)))) } } } @@ -94,7 +94,6 @@ class EjeaRunningJobSpec extends EngineJobExecutionActorSpec with Eventually wit ejea ! abortedResponse helper.replyToProbe.expectMsg(max = awaitTimeout, hint = "parent wants the response", abortedResponse) - helper.callCacheWriteActorCreations should be(NothingYet) helper.deathwatch.expectTerminated(ejea, awaitTimeout) // Make sure nothing was sent to the JobStore or CacheResultSaver in the meanwhile: diff --git a/engine/src/test/scala/cromwell/engine/workflow/lifecycle/execution/ejea/EjeaUpdatingJobStoreSpec.scala b/engine/src/test/scala/cromwell/engine/workflow/lifecycle/execution/ejea/EjeaUpdatingJobStoreSpec.scala index 1b783a69e..41e3f548e 100644 --- a/engine/src/test/scala/cromwell/engine/workflow/lifecycle/execution/ejea/EjeaUpdatingJobStoreSpec.scala +++ b/engine/src/test/scala/cromwell/engine/workflow/lifecycle/execution/ejea/EjeaUpdatingJobStoreSpec.scala @@ -1,7 +1,7 @@ package cromwell.engine.workflow.lifecycle.execution.ejea import EngineJobExecutionActorSpec._ -import cromwell.backend.BackendJobExecutionActor.{BackendJobExecutionResponse, FailedNonRetryableResponse} +import cromwell.backend.BackendJobExecutionActor.{BackendJobExecutionResponse, JobFailedNonRetryableResponse} import cromwell.engine.workflow.lifecycle.execution.EngineJobExecutionActor._ import cromwell.jobstore.JobStoreActor.{JobStoreWriteFailure, JobStoreWriteSuccess} import cromwell.engine.workflow.lifecycle.execution.ejea.HasJobSuccessResponse.SuccessfulCallCacheHashes @@ -17,7 +17,7 @@ class EjeaUpdatingJobStoreSpec extends EngineJobExecutionActorSpec with HasJobSu ("SucceededResponse", successResponse _, true), ("FailedRetryableResponse", failureRetryableResponse _, true), ("FailedNonRetryableResponse", failureNonRetryableResponse _, false) - ) foreach { case (name, responseMaker, retryable) => + ) foreach { case (name, responseMaker, retryable @ _) => s"Forward a saved $name response on and shut down, if the JobStore write is successful" in { val response = responseMaker.apply ejea = ejeaInUpdatingJobStoreState(response) @@ -33,7 +33,7 @@ class EjeaUpdatingJobStoreSpec extends EngineJobExecutionActorSpec with HasJobSu val exception = new Exception("I loved Ophelia: forty thousand brothers\\ Could not, with all their quantity of love,\\ Make up my sum. What wilt thou do for her?") ejea ! JobStoreWriteFailure(exception) helper.replyToProbe.expectMsgPF(awaitTimeout) { - case FailedNonRetryableResponse(jobDescriptorKey, reason, None) => + case JobFailedNonRetryableResponse(jobDescriptorKey, reason, None) => jobDescriptorKey should be(helper.jobDescriptorKey) reason.getCause should be(exception) } diff --git a/engine/src/test/scala/cromwell/engine/workflow/lifecycle/execution/ejea/EngineJobExecutionActorSpec.scala b/engine/src/test/scala/cromwell/engine/workflow/lifecycle/execution/ejea/EngineJobExecutionActorSpec.scala index 87f2c5131..ac2a1143b 100644 --- a/engine/src/test/scala/cromwell/engine/workflow/lifecycle/execution/ejea/EngineJobExecutionActorSpec.scala +++ b/engine/src/test/scala/cromwell/engine/workflow/lifecycle/execution/ejea/EngineJobExecutionActorSpec.scala @@ -3,8 +3,7 @@ package cromwell.engine.workflow.lifecycle.execution.ejea import akka.actor.Actor import akka.testkit.TestFSMRef import cromwell.engine.workflow.lifecycle.execution.EngineJobExecutionActor._ -import cromwell.jobstore.{Pending => _} -import cromwell.CromwellTestkitSpec +import cromwell.CromwellTestKitWordSpec import cromwell.backend.BackendJobExecutionActor import cromwell.backend.BackendJobExecutionActor.BackendJobExecutionActorCommand import cromwell.core.callcaching._ @@ -15,7 +14,7 @@ import scala.concurrent.duration._ import scala.language.postfixOps -trait EngineJobExecutionActorSpec extends CromwellTestkitSpec +trait EngineJobExecutionActorSpec extends CromwellTestKitWordSpec with Matchers with Mockito with BeforeAndAfterAll with BeforeAndAfter { // If we WANT something to happen, make sure it happens within this window: @@ -29,8 +28,8 @@ trait EngineJobExecutionActorSpec extends CromwellTestkitSpec // The default values for these are "null". The helper is created in "before", the ejea is up to the test cases - var helper: PerTestHelper = _ - var ejea: TestFSMRef[EngineJobExecutionActorState, EJEAData, MockEjea] = _ + private[ejea] var helper: PerTestHelper = _ + private[ejea] var ejea: TestFSMRef[EngineJobExecutionActorState, EJEAData, MockEjea] = _ implicit def stateUnderTest: EngineJobExecutionActorState before { @@ -40,7 +39,7 @@ trait EngineJobExecutionActorSpec extends CromwellTestkitSpec List( ("FetchCachedResultsActor", helper.fetchCachedResultsActorCreations), ("JobHashingActor", helper.jobHashingInitializations), - ("CallCacheWriteActor", helper.callCacheWriteActorCreations)) foreach { + ("CallCacheInvalidateActor", helper.invalidateCacheActorCreations)) foreach { case (name, GotTooMany(list)) => fail(s"Too many $name creations (${list.size})") case _ => // Fine. } @@ -55,11 +54,6 @@ trait EngineJobExecutionActorSpec extends CromwellTestkitSpec * var callCacheWriteActorCreations: ExpectOne[(CallCacheHashes, SucceededResponse)] = NothingYet */ - override def afterAll(): Unit = { - system.terminate() - super.afterAll() - } - // Some helper lists val CallCachingModes = List(CallCachingOff, CallCachingActivity(ReadCache), CallCachingActivity(WriteCache), CallCachingActivity(ReadAndWriteCache)) case class RestartOrExecuteCommandTuple(operationName: String, restarting: Boolean, expectedMessageToBjea: BackendJobExecutionActorCommand) diff --git a/engine/src/test/scala/cromwell/engine/workflow/lifecycle/execution/ejea/EngineJobExecutionActorSpecUtil.scala b/engine/src/test/scala/cromwell/engine/workflow/lifecycle/execution/ejea/EngineJobExecutionActorSpecUtil.scala index 5a2e7bff1..88163d352 100644 --- a/engine/src/test/scala/cromwell/engine/workflow/lifecycle/execution/ejea/EngineJobExecutionActorSpecUtil.scala +++ b/engine/src/test/scala/cromwell/engine/workflow/lifecycle/execution/ejea/EngineJobExecutionActorSpecUtil.scala @@ -1,14 +1,16 @@ package cromwell.engine.workflow.lifecycle.execution.ejea -import cromwell.backend.BackendJobExecutionActor.{AbortedResponse, FailedNonRetryableResponse, FailedRetryableResponse, SucceededResponse} +import cromwell.backend.BackendJobDescriptor +import cromwell.backend.BackendJobExecutionActor.{AbortedResponse, JobFailedNonRetryableResponse, JobFailedRetryableResponse, JobSucceededResponse} import cromwell.core.JobOutput import cromwell.core.callcaching._ import cromwell.engine.workflow.lifecycle.execution.EngineJobExecutionActor.{EJEAData, SucceededResponseData, UpdatingCallCache, UpdatingJobStore} -import cromwell.engine.workflow.lifecycle.execution.callcaching.EngineJobHashingActor.CallCacheHashes +import cromwell.engine.workflow.lifecycle.execution.callcaching.CallCachingEntryId +import cromwell.engine.workflow.lifecycle.execution.callcaching.EngineJobHashingActor.{CallCacheHashes, FileHashes} import cromwell.jobstore.JobStoreActor.RegisterJobCompleted import cromwell.jobstore.{JobResultSuccess, JobStoreKey} import org.scalatest.concurrent.Eventually -import wdl4s.values.{WdlInteger, WdlString} +import wdl4s.wdl.values.{WdlInteger, WdlString} import scala.util.Success @@ -22,16 +24,10 @@ private[ejea] trait CanValidateJobStoreKey { self: EngineJobExecutionActorSpec = } private[ejea] trait CanExpectCacheWrites extends Eventually { self: EngineJobExecutionActorSpec => - def expectCacheWrite(expectedResponse: SucceededResponse, expectedCallCacheHashes: CallCacheHashes): Unit = { + def expectCacheWrite(expectedResponse: JobSucceededResponse, expectedCallCacheHashes: CallCacheHashes): Unit = { eventually { ejea.stateName should be(UpdatingCallCache) } ejea.stateData should be(SucceededResponseData(expectedResponse, Some(Success(expectedCallCacheHashes)))) - helper.callCacheWriteActorCreations match { - case GotOne(creation) => - creation._1 should be(expectedCallCacheHashes) - creation._2 should be(expectedResponse) - case _ => fail("Expected exactly one cache write actor creation.") - } - + () } } @@ -46,33 +42,60 @@ private[ejea] trait CanExpectJobStoreWrites extends CanValidateJobStoreKey { sel ejea.stateName should be(UpdatingJobStore) ejea.stateData should be(expectedData) } + () } } private[ejea] trait CanExpectHashingInitialization extends Eventually { self: EngineJobExecutionActorSpec => - def expectHashingActorInitialization(mode: CallCachingMode): Unit = { + def expectHashingActorInitialization(mode: CallCachingMode, jobDescriptor: BackendJobDescriptor): Unit = { eventually { helper.jobHashingInitializations.hasExactlyOne should be(true) } helper.jobHashingInitializations.checkIt { initialization => - initialization._1 should be(helper.backendJobDescriptor) + initialization._1 should be(jobDescriptor) initialization._2 should be(mode) } } } +private[ejea] trait CanExpectFetchCachedResults extends Eventually { self: EngineJobExecutionActorSpec => + def expectFetchCachedResultsActor(expectedCallCachingEntryId: CallCachingEntryId): Unit = { + eventually { helper.fetchCachedResultsActorCreations.hasExactlyOne should be(true) } + helper.fetchCachedResultsActorCreations.checkIt { + case (callCachingEntryId, _) => callCachingEntryId should be(expectedCallCachingEntryId) + case _ => fail("Incorrect creation of the fetchCachedResultsActor") + } + } +} + +private[ejea] trait CanExpectCacheInvalidation extends Eventually { self: EngineJobExecutionActorSpec => + def expectInvalidateCallCacheActor(expectedCacheId: CallCachingEntryId): Unit = { + eventually { helper.invalidateCacheActorCreations.hasExactlyOne should be(true) } + helper.invalidateCacheActorCreations.checkIt { cacheId => + cacheId shouldBe expectedCacheId + } + } +} + private[ejea] trait HasJobSuccessResponse { self: EngineJobExecutionActorSpec => val successRc = Option(171) val successOutputs = Map("a" -> JobOutput(WdlInteger(3)), "b" -> JobOutput(WdlString("bee"))) - def successResponse = SucceededResponse(helper.jobDescriptorKey, successRc, successOutputs, None, Seq.empty) + def successResponse = JobSucceededResponse(helper.jobDescriptorKey, successRc, successOutputs, None, Seq.empty, None) } private[ejea] object HasJobSuccessResponse { - val SuccessfulCallCacheHashes = CallCacheHashes(Set(HashResult(HashKey("whatever you want"), HashValue("whatever you need")))) + val SuccessfulCallCacheHashes = CallCacheHashes( + Set(HashResult(HashKey("whatever you want"), HashValue("whatever you need"))), + "initialHash", + Option(FileHashes( + Set(HashResult(HashKey("whatever file you want"), HashValue("whatever file you need"))), + "fileHash" + )) + ) } private[ejea] trait HasJobFailureResponses { self: EngineJobExecutionActorSpec => val failedRc = Option(12) val failureReason = new Exception("The sixth sheik's sheep is sick!") // Need to delay making the response because job descriptors come from the per-test "helper", which is null outside tests! - def failureRetryableResponse = FailedRetryableResponse(helper.jobDescriptorKey, failureReason, failedRc) - def failureNonRetryableResponse = FailedNonRetryableResponse(helper.jobDescriptorKey, failureReason, Option(12)) + def failureRetryableResponse = JobFailedRetryableResponse(helper.jobDescriptorKey, failureReason, failedRc) + def failureNonRetryableResponse = JobFailedNonRetryableResponse(helper.jobDescriptorKey, failureReason, Option(12)) def abortedResponse = AbortedResponse(helper.jobDescriptorKey) } \ No newline at end of file diff --git a/engine/src/test/scala/cromwell/engine/workflow/lifecycle/execution/ejea/ExpectOne.scala b/engine/src/test/scala/cromwell/engine/workflow/lifecycle/execution/ejea/ExpectOne.scala index bf8049098..7ddfd340f 100644 --- a/engine/src/test/scala/cromwell/engine/workflow/lifecycle/execution/ejea/ExpectOne.scala +++ b/engine/src/test/scala/cromwell/engine/workflow/lifecycle/execution/ejea/ExpectOne.scala @@ -1,10 +1,10 @@ package cromwell.engine.workflow.lifecycle.execution.ejea private[ejea] sealed trait ExpectOne[+A] { - def checkIt(block: A => Unit): Unit = throw new IllegalStateException("An ExpectOne must have exactly one element for checkIt to work") + def checkIt(block: A => Any): Unit = throw new IllegalStateException("An ExpectOne must have exactly one element for checkIt to work") def hasExactlyOne: Boolean def foundOne[B >: A](theFoundOne: B) = this match { - case NothingYet => new GotOne(theFoundOne) + case NothingYet => GotOne(theFoundOne) case GotOne(theOriginalOne) => GotTooMany(List(theOriginalOne, theFoundOne)) case GotTooMany(theOnes) => GotTooMany(theOnes :+ theFoundOne) } @@ -15,7 +15,7 @@ private[ejea] case object NothingYet extends ExpectOne[scala.Nothing] { } private[ejea] case class GotOne[+A](theOne: A) extends ExpectOne[A] { - override def checkIt(block: A => Unit): Unit = block(theOne) + override def checkIt(block: A => Any): Unit = { block(theOne); () } override def hasExactlyOne = true } diff --git a/engine/src/test/scala/cromwell/engine/workflow/lifecycle/execution/ejea/PerTestHelper.scala b/engine/src/test/scala/cromwell/engine/workflow/lifecycle/execution/ejea/PerTestHelper.scala index 6fd15b90a..29278c556 100644 --- a/engine/src/test/scala/cromwell/engine/workflow/lifecycle/execution/ejea/PerTestHelper.scala +++ b/engine/src/test/scala/cromwell/engine/workflow/lifecycle/execution/ejea/PerTestHelper.scala @@ -1,28 +1,31 @@ package cromwell.engine.workflow.lifecycle.execution.ejea +import java.util.UUID + import akka.actor.{ActorRef, ActorSystem, Props} import akka.testkit.{TestFSMRef, TestProbe} -import cromwell.backend.BackendJobExecutionActor.SucceededResponse -import cromwell.backend.{BackendInitializationData, BackendJobDescriptor, BackendJobDescriptorKey, BackendLifecycleActorFactory, BackendWorkflowDescriptor} -import cromwell.core.callcaching.{CallCachingActivity, CallCachingMode, CallCachingOff} -import cromwell.core.{ExecutionStore, OutputStore, WorkflowId} +import cromwell.backend._ +import cromwell.backend.standard.callcaching._ +import cromwell.core.JobExecutionToken.JobExecutionTokenType +import cromwell.core.callcaching._ +import cromwell.core.{CallOutputs, JobExecutionToken, WorkflowId} import cromwell.engine.EngineWorkflowDescriptor -import cromwell.engine.workflow.lifecycle.execution.{EngineJobExecutionActor, WorkflowExecutionActorData} import cromwell.engine.workflow.lifecycle.execution.EngineJobExecutionActor.{EJEAData, EngineJobExecutionActorState} -import cromwell.engine.workflow.lifecycle.execution.callcaching.EngineJobHashingActor.{CacheHit, CallCacheHashes} -import org.specs2.mock.Mockito -import wdl4s.WdlExpression.ScopedLookupFunction -import wdl4s.expression.{NoFunctions, WdlFunctions, WdlStandardLibraryFunctions} -import wdl4s.types.{WdlIntegerType, WdlStringType} -import wdl4s.values.{WdlInteger, WdlString, WdlValue} -import wdl4s._ -import cromwell.util.AkkaTestUtil._ +import cromwell.engine.workflow.lifecycle.execution.callcaching.CallCachingEntryId import cromwell.engine.workflow.lifecycle.execution.ejea.EngineJobExecutionActorSpec._ +import cromwell.engine.workflow.lifecycle.execution.{EngineJobExecutionActor, WorkflowExecutionActorData} +import cromwell.engine.workflow.mocks.{DeclarationMock, TaskMock, WdlExpressionMock} +import cromwell.util.AkkaTestUtil._ +import org.specs2.mock.Mockito +import wdl4s.wdl._ +import wdl4s.wdl.expression.{NoFunctions, WdlStandardLibraryFunctions} +import wdl4s.parser.WdlParser.Ast +import wdl4s.wdl.types.{WdlIntegerType, WdlStringType} import scala.util.Success -private[ejea] class PerTestHelper(implicit val system: ActorSystem) extends Mockito { +private[ejea] class PerTestHelper(implicit val system: ActorSystem) extends Mockito with TaskMock with WdlExpressionMock with DeclarationMock { val workflowId = WorkflowId.randomId() val workflowName = "wf" @@ -31,34 +34,31 @@ private[ejea] class PerTestHelper(implicit val system: ActorSystem) extends Mock val jobIndex = Some(1) val jobAttempt = 1 - val task = mock[Task] - task.declarations returns Seq.empty - task.runtimeAttributes returns RuntimeAttributes(Map.empty) - task.commandTemplateString returns "!!shazam!!" - val stringOutputExpression = mock[WdlExpression] - stringOutputExpression.valueString returns "hello" - stringOutputExpression.evaluate(any[ScopedLookupFunction], any[ WdlFunctions[WdlValue]]) returns Success(WdlString("hello")) - task.outputs returns Seq(TaskOutput("outString", WdlStringType, stringOutputExpression)) - - val intInputExpression = mock[WdlExpression] - intInputExpression.valueString returns "543" - intInputExpression.evaluate(any[ScopedLookupFunction], any[WdlFunctions[WdlValue]]) returns Success(WdlInteger(543)) - - val intInputDeclaration = mock[Declaration] - intInputDeclaration.name returns "inInt" - intInputDeclaration.expression returns Option(intInputExpression) - intInputDeclaration.wdlType returns WdlIntegerType - task.declarations returns Seq(intInputDeclaration) - - val call: Call = Call(None, jobFqn, task, Set.empty, Map.empty, None) + val executionToken = JobExecutionToken(JobExecutionTokenType("test", None), UUID.randomUUID()) + + val task = mockTask( + taskName, + declarations = Seq(mockDeclaration("inInt", WdlIntegerType, mockIntExpression(543))), + outputs = Seq(("outString", WdlStringType, mockStringExpression("hello"))) + ) + + val workflow = new WdlWorkflow( + unqualifiedName = workflowName, + workflowOutputWildcards = Seq.empty, + wdlSyntaxErrorFormatter = mock[WdlSyntaxErrorFormatter], + meta = Map.empty, + parameterMeta = Map.empty, + ast = mock[Ast]) + val call: WdlTaskCall = WdlTaskCall(None, task, Map.empty, mock[Ast]) + call.parent_=(workflow) val jobDescriptorKey = BackendJobDescriptorKey(call, jobIndex, jobAttempt) - val backendWorkflowDescriptor = BackendWorkflowDescriptor(workflowId, null, null, null) - val backendJobDescriptor = BackendJobDescriptor(backendWorkflowDescriptor, jobDescriptorKey, runtimeAttributes = Map.empty, inputs = Map.empty) + val backendWorkflowDescriptor = BackendWorkflowDescriptor(workflowId, null, null, null, null) + val backendJobDescriptor = BackendJobDescriptor(backendWorkflowDescriptor, jobDescriptorKey, runtimeAttributes = Map.empty, inputDeclarations = Map.empty, FloatingDockerTagWithoutHash("ubuntu:latest"), Map.empty) - var fetchCachedResultsActorCreations: ExpectOne[(CacheHit, Seq[TaskOutput])] = NothingYet + var fetchCachedResultsActorCreations: ExpectOne[(CallCachingEntryId, Seq[TaskOutput])] = NothingYet var jobHashingInitializations: ExpectOne[(BackendJobDescriptor, CallCachingActivity)] = NothingYet - var callCacheWriteActorCreations: ExpectOne[(CallCacheHashes, SucceededResponse)] = NothingYet + var invalidateCacheActorCreations: ExpectOne[CallCachingEntryId] = NothingYet val deathwatch = TestProbe() val bjeaProbe = TestProbe() @@ -66,32 +66,55 @@ private[ejea] class PerTestHelper(implicit val system: ActorSystem) extends Mock val replyToProbe = TestProbe() val parentProbe = TestProbe() val serviceRegistryProbe = TestProbe() + val ioActorProbe = TestProbe() val jobStoreProbe = TestProbe() val callCacheReadActorProbe = TestProbe() + val callCacheWriteActorProbe = TestProbe() + val dockerHashActorProbe = TestProbe() val callCacheHitCopyingProbe = TestProbe() val jobPreparationProbe = TestProbe() + val jobTokenDispenserProbe = TestProbe() + val ejhaProbe = TestProbe() def buildFactory() = new BackendLifecycleActorFactory { override def jobExecutionActorProps(jobDescriptor: BackendJobDescriptor, initializationData: Option[BackendInitializationData], - serviceRegistryActor: ActorRef): Props = bjeaProps + serviceRegistryActor: ActorRef, + ioActor: ActorRef, + backendSingletonActor: Option[ActorRef]): Props = bjeaProps - override def cacheHitCopyingActorProps: Option[(BackendJobDescriptor, Option[BackendInitializationData], ActorRef) => Props] = Option((_, _, _) => callCacheHitCopyingProbe.props) + override def cacheHitCopyingActorProps: Option[(BackendJobDescriptor, Option[BackendInitializationData], ActorRef, ActorRef) => Props] = Option((_, _, _, _) => callCacheHitCopyingProbe.props) override def expressionLanguageFunctions(workflowDescriptor: BackendWorkflowDescriptor, jobKey: BackendJobDescriptorKey, initializationData: Option[BackendInitializationData]): WdlStandardLibraryFunctions = { NoFunctions } + override def fileHashingActorProps: + Option[(BackendJobDescriptor, Option[BackendInitializationData], ActorRef, ActorRef) => Props] = { + Option(fileHashingActorInner(classOf[DefaultStandardFileHashingActor])) + } + + def fileHashingActorInner(standardFileHashingActor: Class[_ <: StandardFileHashingActor]) + (jobDescriptor: BackendJobDescriptor, + initializationDataOption: Option[BackendInitializationData], + serviceRegistryActor: ActorRef, + ioActor: ActorRef): Props = { + Props.empty + } + // These two factory methods should never be called from EJEA or any of its descendants: override def workflowFinalizationActorProps(workflowDescriptor: BackendWorkflowDescriptor, - calls: Seq[Call], - executionStore: ExecutionStore, - outputStore: OutputStore, + ioActor: ActorRef, + calls: Set[WdlTaskCall], + jobExecutionMap: JobExecutionMap, + workflowOutputs: CallOutputs, initializationData: Option[BackendInitializationData]): Option[Props] = throw new UnsupportedOperationException("Unexpected finalization actor creation!") override def workflowInitializationActorProps(workflowDescriptor: BackendWorkflowDescriptor, - calls: Seq[Call], - serviceRegistryActor: ActorRef): Option[Props] = throw new UnsupportedOperationException("Unexpected finalization actor creation!") + ioActor: ActorRef, + calls: Set[WdlTaskCall], + serviceRegistryActor: ActorRef, + restarting: Boolean): Option[Props] = throw new UnsupportedOperationException("Unexpected finalization actor creation!") } def buildEJEA(restarting: Boolean = true, @@ -99,20 +122,24 @@ private[ejea] class PerTestHelper(implicit val system: ActorSystem) extends Mock (implicit startingState: EngineJobExecutionActorState): TestFSMRef[EngineJobExecutionActorState, EJEAData, MockEjea] = { val factory: BackendLifecycleActorFactory = buildFactory() - val descriptor = EngineWorkflowDescriptor(backendWorkflowDescriptor, Map.empty, null, null, null, callCachingMode) + val descriptor = EngineWorkflowDescriptor(mock[WdlNamespaceWithWorkflow], backendWorkflowDescriptor, null, null, null, callCachingMode) val myBrandNewEjea = new TestFSMRef[EngineJobExecutionActorState, EJEAData, MockEjea](system, Props(new MockEjea( helper = this, jobPreparationProbe = jobPreparationProbe, replyTo = replyToProbe.ref, jobDescriptorKey = jobDescriptorKey, - executionData = WorkflowExecutionActorData(descriptor, ExecutionStore(Map.empty), Map.empty, OutputStore(Map.empty)), + executionData = WorkflowExecutionActorData.empty(descriptor), factory = factory, initializationData = None, restarting = restarting, serviceRegistryActor = serviceRegistryProbe.ref, + ioActor = ioActorProbe.ref, jobStoreActor = jobStoreProbe.ref, callCacheReadActor = callCacheReadActorProbe.ref, + callCacheWriteActor = callCacheWriteActorProbe.ref, + dockerHashActor = dockerHashActorProbe.ref, + jobTokenDispenserActor = jobTokenDispenserProbe.ref, backendName = "NOT USED", callCachingMode = callCachingMode )), parentProbe.ref, s"EngineJobExecutionActorSpec-$workflowId") @@ -131,14 +158,21 @@ private[ejea] class MockEjea(helper: PerTestHelper, initializationData: Option[BackendInitializationData], restarting: Boolean, serviceRegistryActor: ActorRef, + ioActor: ActorRef, jobStoreActor: ActorRef, callCacheReadActor: ActorRef, + callCacheWriteActor: ActorRef, + dockerHashActor: ActorRef, + jobTokenDispenserActor: ActorRef, backendName: String, - callCachingMode: CallCachingMode) extends EngineJobExecutionActor(replyTo, jobDescriptorKey, executionData, factory, initializationData, restarting, serviceRegistryActor, jobStoreActor, callCacheReadActor, backendName, callCachingMode) { - - override def makeFetchCachedResultsActor(cacheHit: CacheHit, taskOutputs: Seq[TaskOutput]) = helper.fetchCachedResultsActorCreations = helper.fetchCachedResultsActorCreations.foundOne((cacheHit, taskOutputs)) - override def initializeJobHashing(jobDescriptor: BackendJobDescriptor, activity: CallCachingActivity) = helper.jobHashingInitializations = helper.jobHashingInitializations.foundOne((jobDescriptor, activity)) - override def createSaveCacheResultsActor(hashes: CallCacheHashes, success: SucceededResponse) = helper.callCacheWriteActorCreations = helper.callCacheWriteActorCreations.foundOne((hashes, success)) + callCachingMode: CallCachingMode) extends EngineJobExecutionActor(replyTo, jobDescriptorKey, executionData, factory, initializationData, restarting, serviceRegistryActor, ioActor, jobStoreActor, callCacheReadActor, callCacheWriteActor, dockerHashActor, jobTokenDispenserActor, None, backendName, callCachingMode) { + implicit val system = context.system + override def makeFetchCachedResultsActor(cacheId: CallCachingEntryId, taskOutputs: Seq[TaskOutput]) = helper.fetchCachedResultsActorCreations = helper.fetchCachedResultsActorCreations.foundOne((cacheId, taskOutputs)) + override def initializeJobHashing(jobDescriptor: BackendJobDescriptor, activity: CallCachingActivity, callCachingEligible: CallCachingEligible) = { + helper.jobHashingInitializations = helper.jobHashingInitializations.foundOne((jobDescriptor, activity)) + Success(helper.ejhaProbe.ref) + } + override def invalidateCacheHit(cacheId: CallCachingEntryId): Unit = { helper.invalidateCacheActorCreations = helper.invalidateCacheActorCreations.foundOne(cacheId) } override def createJobPreparationActor(jobPrepProps: Props, name: String) = jobPreparationProbe.ref } diff --git a/engine/src/test/scala/cromwell/engine/workflow/lifecycle/execution/preparation/JobPreparationActorSpec.scala b/engine/src/test/scala/cromwell/engine/workflow/lifecycle/execution/preparation/JobPreparationActorSpec.scala new file mode 100644 index 000000000..cea38cd81 --- /dev/null +++ b/engine/src/test/scala/cromwell/engine/workflow/lifecycle/execution/preparation/JobPreparationActorSpec.scala @@ -0,0 +1,142 @@ +package cromwell.engine.workflow.lifecycle.execution.preparation + +import akka.testkit.{ImplicitSender, TestActorRef} +import cromwell.core.callcaching.{DockerWithHash, FloatingDockerTagWithoutHash} +import cromwell.core.{LocallyQualifiedName, TestKitSuite} +import cromwell.docker.DockerHashActor.DockerHashSuccessResponse +import cromwell.docker.{DockerHashRequest, DockerHashResult, DockerImageIdentifier, DockerImageIdentifierWithoutHash} +import cromwell.engine.workflow.WorkflowDockerLookupActor.WorkflowDockerLookupFailure +import cromwell.engine.workflow.lifecycle.execution.preparation.CallPreparation.{BackendJobPreparationSucceeded, CallPreparationFailed, Start} +import cromwell.services.keyvalue.KeyValueServiceActor.{KvGet, KvKeyLookupFailed, KvPair} +import org.scalatest.{BeforeAndAfter, FlatSpecLike, Matchers} +import org.specs2.mock.Mockito +import wdl4s.wdl.Declaration +import wdl4s.wdl.values.{WdlString, WdlValue} + +import scala.concurrent.duration._ +import scala.language.postfixOps +import scala.util.{Failure, Success} + +class JobPreparationActorSpec extends TestKitSuite("JobPrepActorSpecSystem") with FlatSpecLike with Matchers with ImplicitSender with BeforeAndAfter with Mockito { + + behavior of "JobPreparationActor" + + // Generated fresh for each test case in 'before' + var helper: JobPreparationTestHelper = _ + var inputs: Map[Declaration, WdlValue] = _ + + before { + helper = new JobPreparationTestHelper() + inputs = Map.empty[Declaration, WdlValue] + } + + it should "fail preparation if it can't evaluate inputs or runtime attributes" in { + val exception = new Exception("Failed to prepare inputs/attributes - part of test flow") + val failure = Failure(exception) + val expectedResponse = CallPreparationFailed(helper.jobKey, exception) + val actor = TestActorRef(helper.buildTestJobPreparationActor(null, null, null, failure, List.empty), self) + actor ! Start + expectMsg(expectedResponse) + helper.workflowDockerLookupActor.expectNoMsg(100 millis) + } + + it should "prepare successfully a job without docker attribute" in { + val attributes = Map.empty[LocallyQualifiedName, WdlValue] + val inputsAndAttributes = Success((inputs, attributes)) + val actor = TestActorRef(helper.buildTestJobPreparationActor(null, null, null, inputsAndAttributes, List.empty), self) + actor ! Start + expectMsgPF(5 seconds) { + case success: BackendJobPreparationSucceeded => + success.jobDescriptor.maybeCallCachingEligible.dockerHash shouldBe None + } + helper.workflowDockerLookupActor.expectNoMsg(1 second) + } + + it should "not ask for the docker hash if the attribute already contains a hash" in { + val dockerValue = "ubuntu@sha256:71cd81252a3563a03ad8daee81047b62ab5d892ebbfbf71cf53415f29c130950" + val attributes = Map( + "docker" -> WdlString(dockerValue) + ) + val inputsAndAttributes = Success((inputs, attributes)) + val actor = TestActorRef(helper.buildTestJobPreparationActor(null, null, null, inputsAndAttributes, List.empty), self) + actor ! Start + expectMsgPF(5 seconds) { + case success: BackendJobPreparationSucceeded => + success.jobDescriptor.runtimeAttributes("docker").valueString shouldBe dockerValue + success.jobDescriptor.maybeCallCachingEligible shouldBe DockerWithHash("ubuntu@sha256:71cd81252a3563a03ad8daee81047b62ab5d892ebbfbf71cf53415f29c130950") + } + helper.workflowDockerLookupActor.expectNoMsg(1 second) + } + + it should "lookup any requested key/value prefetches before (not) performing a docker hash lookup" in { + val dockerValue = "ubuntu:latest" + val attributes = Map ( + "docker" -> WdlString(dockerValue) + ) + val hashResult = DockerHashResult("sha256", "71cd81252a3563a03ad8daee81047b62ab5d892ebbfbf71cf53415f29c130950") + val inputsAndAttributes = Success((inputs, attributes)) + val prefetchedKey1 = "hello" + val prefetchedVal1 = KvPair(helper.scopedKeyMaker(prefetchedKey1), Some("world")) + val prefetchedKey2 = "bonjour" + val prefetchedVal2 = KvKeyLookupFailed(KvGet(helper.scopedKeyMaker(prefetchedKey2))) + val prefetchedValues = Map(prefetchedKey1 -> prefetchedVal1, prefetchedKey2 -> prefetchedVal2) + var keysToPrefetch = List(prefetchedKey1, prefetchedKey2) + val actor = TestActorRef(helper.buildTestJobPreparationActor(1 minute, 1 minutes, List.empty, inputsAndAttributes, List(prefetchedKey1, prefetchedKey2)), self) + actor ! Start + + def respondFromKv() = { + helper.serviceRegistryProbe.expectMsgPF(max = 100 milliseconds) { + case KvGet(k) if keysToPrefetch.contains(k.key) => + actor.tell(msg = prefetchedValues(k.key), sender = helper.serviceRegistryProbe.ref) + keysToPrefetch = keysToPrefetch diff List(k.key) + } + } + respondFromKv() + helper.workflowDockerLookupActor.expectNoMsg(max = 100 milliseconds) + respondFromKv() + + val req = helper.workflowDockerLookupActor.expectMsgClass(classOf[DockerHashRequest]) + helper.workflowDockerLookupActor.reply(DockerHashSuccessResponse(hashResult, req)) + expectMsgPF(5 seconds) { + case success: BackendJobPreparationSucceeded => + success.jobDescriptor.prefetchedKvStoreEntries should be(Map(prefetchedKey1 -> prefetchedVal1, prefetchedKey2 -> prefetchedVal2)) + } + } + + it should "leave the docker attribute as is and provide a DockerWithHash value" in { + val dockerValue = "ubuntu:latest" + val attributes = Map ( + "docker" -> WdlString(dockerValue) + ) + val hashResult = DockerHashResult("sha256", "71cd81252a3563a03ad8daee81047b62ab5d892ebbfbf71cf53415f29c130950") + val inputsAndAttributes = Success((inputs, attributes)) + val finalValue = "ubuntu@sha256:71cd81252a3563a03ad8daee81047b62ab5d892ebbfbf71cf53415f29c130950" + val actor = TestActorRef(helper.buildTestJobPreparationActor(1 minute, 1 minutes, List.empty, inputsAndAttributes, List.empty), self) + actor ! Start + helper.workflowDockerLookupActor.expectMsgClass(classOf[DockerHashRequest]) + helper.workflowDockerLookupActor.reply(DockerHashSuccessResponse(hashResult, mock[DockerHashRequest])) + expectMsgPF(5 seconds) { + case success: BackendJobPreparationSucceeded => + success.jobDescriptor.runtimeAttributes("docker").valueString shouldBe dockerValue + success.jobDescriptor.maybeCallCachingEligible shouldBe DockerWithHash(finalValue) + } + } + + it should "not provide a DockerWithHash value if it can't get the docker hash" in { + val dockerValue = "ubuntu:latest" + val request = DockerHashRequest(DockerImageIdentifier.fromString(dockerValue).get.asInstanceOf[DockerImageIdentifierWithoutHash]) + val attributes = Map ( + "docker" -> WdlString(dockerValue) + ) + val inputsAndAttributes = Success((inputs, attributes)) + val actor = TestActorRef(helper.buildTestJobPreparationActor(1 minute, 1 minutes, List.empty, inputsAndAttributes, List.empty), self) + actor ! Start + helper.workflowDockerLookupActor.expectMsgClass(classOf[DockerHashRequest]) + helper.workflowDockerLookupActor.reply(WorkflowDockerLookupFailure(new Exception("Failed to get docker hash - part of test flow"), request)) + expectMsgPF(5 seconds) { + case success: BackendJobPreparationSucceeded => + success.jobDescriptor.runtimeAttributes("docker").valueString shouldBe dockerValue + success.jobDescriptor.maybeCallCachingEligible shouldBe FloatingDockerTagWithoutHash("ubuntu:latest") + } + } +} diff --git a/engine/src/test/scala/cromwell/engine/workflow/lifecycle/execution/preparation/JobPreparationTestHelper.scala b/engine/src/test/scala/cromwell/engine/workflow/lifecycle/execution/preparation/JobPreparationTestHelper.scala new file mode 100644 index 000000000..4d129ba00 --- /dev/null +++ b/engine/src/test/scala/cromwell/engine/workflow/lifecycle/execution/preparation/JobPreparationTestHelper.scala @@ -0,0 +1,93 @@ +package cromwell.engine.workflow.lifecycle.execution.preparation + +import akka.actor.{ActorRef, ActorSystem, Props} +import akka.testkit.TestProbe +import cromwell.backend._ +import cromwell.core.WorkflowId +import cromwell.engine.EngineWorkflowDescriptor +import cromwell.engine.workflow.lifecycle.execution.WorkflowExecutionActorData +import cromwell.services.keyvalue.KeyValueServiceActor.{KvJobKey, ScopedKey} +import org.specs2.mock.Mockito +import wdl4s.wdl._ +import wdl4s.wdl.values.WdlValue + +import scala.concurrent.duration.FiniteDuration +import scala.util.Try +import JobPreparationTestHelper._ +import wdl4s.wdl.expression.NoFunctions + +class JobPreparationTestHelper(implicit val system: ActorSystem) extends Mockito { + val executionData = mock[WorkflowExecutionActorData] + val workflowDescriptor = mock[EngineWorkflowDescriptor] + val backendDescriptor = mock[BackendWorkflowDescriptor] + workflowDescriptor.backendDescriptor returns backendDescriptor + workflowDescriptor.id returns WorkflowId.randomId() + executionData.workflowDescriptor returns workflowDescriptor + val jobKey = mock[BackendJobDescriptorKey] + val serviceRegistryProbe = TestProbe() + val ioActor = TestProbe() + val workflowDockerLookupActor = TestProbe() + + val workflowId = WorkflowId.randomId() + val scopedKeyMaker: ScopedKeyMaker = key => ScopedKey(workflowId, KvJobKey("correct.horse.battery.staple", None, 1), key) + + def buildTestJobPreparationActor(backpressureTimeout: FiniteDuration, + noResponseTimeout: FiniteDuration, + dockerHashCredentials: List[Any], + inputsAndAttributes: Try[(Map[Declaration, WdlValue], Map[wdl4s.wdl.LocallyQualifiedName, WdlValue])], + kvStoreKeysForPrefetch: List[String]) = { + + Props(new TestJobPreparationActor( + kvStoreKeysForPrefetch = kvStoreKeysForPrefetch, + dockerHashCredentialsInput = dockerHashCredentials, + backpressureWaitTimeInput = backpressureTimeout, + dockerNoResponseTimeoutInput = noResponseTimeout, + inputsAndAttributes = inputsAndAttributes, + executionData = executionData, + jobKey = jobKey, + workflowDockerLookupActor = workflowDockerLookupActor.ref, + serviceRegistryActor = serviceRegistryProbe.ref, + ioActor = ioActor.ref, + scopedKeyMaker)) + } +} + +private[preparation] class TestJobPreparationActor(kvStoreKeysForPrefetch: List[String], + dockerHashCredentialsInput: List[Any], + backpressureWaitTimeInput: FiniteDuration, + dockerNoResponseTimeoutInput: FiniteDuration, + inputsAndAttributes: Try[(Map[Declaration, WdlValue], Map[wdl4s.wdl.LocallyQualifiedName, WdlValue])], + executionData: WorkflowExecutionActorData, + jobKey: BackendJobDescriptorKey, + workflowDockerLookupActor: ActorRef, + serviceRegistryActor: ActorRef, + ioActor: ActorRef, + scopedKeyMaker: ScopedKeyMaker) extends JobPreparationActor(executionData = executionData, + jobKey = jobKey, + factory = null, + workflowDockerLookupActor = workflowDockerLookupActor, + initializationData = None, + serviceRegistryActor = serviceRegistryActor, + ioActor = ioActor, + backendSingletonActor = None) { + + override lazy val kvStoreKeysToPrefetch = kvStoreKeysForPrefetch + + override private[preparation] lazy val expressionLanguageFunctions = NoFunctions + override private[preparation] lazy val dockerHashCredentials = dockerHashCredentialsInput + override private[preparation] lazy val noResponseTimeout = dockerNoResponseTimeoutInput + override private[preparation] lazy val hasDockerDefinition = true + + override def scopedKey(key: String) = scopedKeyMaker.apply(key) + override def evaluateInputsAndAttributes = inputsAndAttributes + + override private[preparation] def jobExecutionProps(jobDescriptor: BackendJobDescriptor, + initializationData: Option[BackendInitializationData], + serviceRegistryActor: ActorRef, + ioActor: ActorRef, + backendSingletonActor: Option[ActorRef]) = Props.empty +} + +object JobPreparationTestHelper { + type ScopedKeyMaker = String => ScopedKey +} diff --git a/engine/src/test/scala/cromwell/engine/workflow/mocks/DeclarationMock.scala b/engine/src/test/scala/cromwell/engine/workflow/mocks/DeclarationMock.scala new file mode 100644 index 000000000..337bb0820 --- /dev/null +++ b/engine/src/test/scala/cromwell/engine/workflow/mocks/DeclarationMock.scala @@ -0,0 +1,21 @@ +package cromwell.engine.workflow.mocks + +import org.specs2.mock.Mockito +import wdl4s.wdl.{Declaration, WdlExpression} +import wdl4s.wdl.types.WdlType + +object DeclarationMock { + type DeclarationMockType = (String, WdlType, WdlExpression) +} + +trait DeclarationMock extends Mockito { + def mockDeclaration(name: String, + wdlType: WdlType, + expression: WdlExpression) = { + val declaration = mock[Declaration] + declaration.unqualifiedName returns name + declaration.expression returns Option(expression) + declaration.wdlType returns wdlType + declaration + } +} diff --git a/engine/src/test/scala/cromwell/engine/workflow/mocks/TaskMock.scala b/engine/src/test/scala/cromwell/engine/workflow/mocks/TaskMock.scala new file mode 100644 index 000000000..45d5e076c --- /dev/null +++ b/engine/src/test/scala/cromwell/engine/workflow/mocks/TaskMock.scala @@ -0,0 +1,27 @@ +package cromwell.engine.workflow.mocks + +import cromwell.engine.workflow.mocks.DeclarationMock.DeclarationMockType +import org.specs2.mock.Mockito +import wdl4s.wdl._ +import wdl4s.parser.WdlParser.Ast + +trait TaskMock extends Mockito { + + def mockTask(name: String, + declarations: Seq[Declaration] = Seq.empty, + runtimeAttributes: RuntimeAttributes = new RuntimeAttributes(Map.empty), + commandTemplateString: String = "!!shazam!!", + outputs: Seq[DeclarationMockType] = Seq.empty + ) = { + val task = mock[WdlTask] + task.declarations returns declarations + task.runtimeAttributes returns runtimeAttributes + task.commandTemplateString returns commandTemplateString + task.name returns name + task.unqualifiedName returns name + task.outputs returns (outputs map { + case (outputName, wdlType, expression) => TaskOutput(outputName, wdlType, expression, mock[Ast], Option(task)) + }) + task + } +} diff --git a/engine/src/test/scala/cromwell/engine/workflow/mocks/WdlExpressionMock.scala b/engine/src/test/scala/cromwell/engine/workflow/mocks/WdlExpressionMock.scala new file mode 100644 index 000000000..735a8b852 --- /dev/null +++ b/engine/src/test/scala/cromwell/engine/workflow/mocks/WdlExpressionMock.scala @@ -0,0 +1,32 @@ +package cromwell.engine.workflow.mocks + +import org.specs2.mock.Mockito +import wdl4s.wdl.WdlExpression +import wdl4s.wdl.WdlExpression._ +import wdl4s.wdl.expression.WdlFunctions +import wdl4s.wdl.values.{WdlInteger, WdlString, WdlValue} + +import scala.util.Success + +trait WdlExpressionMock extends Mockito { + val helloStringExpression = { + val expression = mock[WdlExpression] + expression.valueString returns "hello" + expression.evaluate(any[ScopedLookupFunction], any[ WdlFunctions[WdlValue]]) returns Success(WdlString("hello")) + expression + } + + def mockStringExpression(value: String) = { + val expression = mock[WdlExpression] + expression.valueString returns value + expression.evaluate(any[ScopedLookupFunction], any[ WdlFunctions[WdlValue]]) returns Success(WdlString(value)) + expression + } + + def mockIntExpression(value: Int) = { + val expression = mock[WdlExpression] + expression.valueString returns value.toString + expression.evaluate(any[ScopedLookupFunction], any[ WdlFunctions[WdlValue]]) returns Success(WdlInteger(value)) + expression + } +} diff --git a/engine/src/test/scala/cromwell/engine/workflow/tokens/JobExecutionTokenDispenserActorSpec.scala b/engine/src/test/scala/cromwell/engine/workflow/tokens/JobExecutionTokenDispenserActorSpec.scala new file mode 100644 index 000000000..444948c0e --- /dev/null +++ b/engine/src/test/scala/cromwell/engine/workflow/tokens/JobExecutionTokenDispenserActorSpec.scala @@ -0,0 +1,304 @@ +package cromwell.engine.workflow.tokens + +import java.util.UUID + +import akka.actor.{ActorSystem, PoisonPill} +import akka.testkit.{ImplicitSender, TestActorRef, TestKit, TestProbe} +import cromwell.core.JobExecutionToken +import cromwell.core.JobExecutionToken.JobExecutionTokenType +import cromwell.engine.workflow.tokens.JobExecutionTokenDispenserActor.{JobExecutionTokenDenied, JobExecutionTokenDispensed, JobExecutionTokenRequest, JobExecutionTokenReturn} +import cromwell.engine.workflow.tokens.JobExecutionTokenDispenserActorSpec._ +import cromwell.engine.workflow.tokens.TestTokenGrabbingActor.StoppingSupervisor +import cromwell.util.AkkaTestUtil +import org.scalatest._ +import org.scalatest.concurrent.Eventually + +import scala.concurrent.duration._ + +class JobExecutionTokenDispenserActorSpec extends TestKit(ActorSystem("JETDASpec")) with ImplicitSender with FlatSpecLike with Matchers with BeforeAndAfter with BeforeAndAfterAll with Eventually { + + val MaxWaitTime = 100.milliseconds + implicit val pc: PatienceConfig = PatienceConfig(MaxWaitTime) + + behavior of "JobExecutionTokenDispenserActor" + + it should "dispense an infinite token correctly" in { + actorRefUnderTest ! JobExecutionTokenRequest(TestInfiniteTokenType) + expectMsgPF(max = MaxWaitTime, hint = "token dispensed message") { + case JobExecutionTokenDispensed(token) => + token.jobExecutionTokenType should be(TestInfiniteTokenType) + } + } + + it should "accept return of an infinite token correctly" in { + actorRefUnderTest ! JobExecutionTokenRequest(TestInfiniteTokenType) + expectMsgPF(max = MaxWaitTime, hint = "token dispensed message") { + case JobExecutionTokenDispensed(token) => + actorRefUnderTest ! JobExecutionTokenReturn(token) + } + } + + it should "dispense indefinitely for an infinite token type" in { + var currentSet: Set[JobExecutionToken] = Set.empty + 100 indexedTimes { i => + val sender = TestProbe() + actorRefUnderTest.tell(msg = JobExecutionTokenRequest(TestInfiniteTokenType), sender = sender.ref) + sender.expectMsgPF(max = MaxWaitTime, hint = "token dispensed message") { + case JobExecutionTokenDispensed(token) => + token.jobExecutionTokenType should be(TestInfiniteTokenType) + currentSet.contains(token) should be(false) + currentSet += token + } + } + } + + it should "dispense a limited token correctly" in { + + actorRefUnderTest ! JobExecutionTokenRequest(LimitedTo5Tokens) + expectMsgPF(max = MaxWaitTime, hint = "token dispensed message") { + case JobExecutionTokenDispensed(token) => token.jobExecutionTokenType should be(LimitedTo5Tokens) + } + } + + it should "accept return of a limited token type correctly" in { + actorRefUnderTest ! JobExecutionTokenRequest(LimitedTo5Tokens) + expectMsgPF(max = MaxWaitTime, hint = "token dispensed message") { + case JobExecutionTokenDispensed(token) => actorRefUnderTest ! JobExecutionTokenReturn(token) + } + } + + it should "limit the dispensing of a limited token type" in { + + var currentTokens: Map[TestProbe, JobExecutionToken] = Map.empty + val dummyActors = (0 until 100 map { i => i -> TestProbe("dummy_" + i) }).toMap + + // Dispense the first 5: + 5 indexedTimes { i => + val sndr = dummyActors(i) + actorRefUnderTest.tell(msg = JobExecutionTokenRequest(LimitedTo5Tokens), sender = sndr.ref) + sndr.expectMsgPF(max = MaxWaitTime, hint = "token dispensed message") { + case JobExecutionTokenDispensed(token) => + token.jobExecutionTokenType should be(LimitedTo5Tokens) + currentTokens.values.toList.contains(token) should be(false) // Check we didn't already get this token + currentTokens += sndr -> token + } + } + + // Queue the next 95: + 95 indexedTimes { i => + val sndr = dummyActors(5 + i) + actorRefUnderTest.tell(msg = JobExecutionTokenRequest(LimitedTo5Tokens), sender = sndr.ref) + sndr.expectMsgPF(max = MaxWaitTime, hint = "token denied message") { + case JobExecutionTokenDenied(positionInQueue) => + positionInQueue should be(i) + } + } + + // It should allow queued actors to check their position in the queue: + 95 indexedTimes { i => + val sndr = dummyActors(5 + i) + actorRefUnderTest.tell(msg = JobExecutionTokenRequest(LimitedTo5Tokens), sender = sndr.ref) + sndr.expectMsgPF(max = MaxWaitTime, hint = "token denied message") { + case JobExecutionTokenDenied(positionInQueue) => + positionInQueue should be(i) + } + } + + // It should release tokens as soon as they're available (while there's still a queue...): + 95 indexedTimes { i => + val returner = dummyActors(i) + val nextInLine = dummyActors(i + 5) + val tokenBeingReturned = currentTokens(returner) + actorRefUnderTest.tell(msg = JobExecutionTokenReturn(tokenBeingReturned), sender = returner.ref) + currentTokens -= returner + nextInLine.expectMsgPF(max = MaxWaitTime, hint = s"token dispensed message to the next in line actor (#${i + 5})") { + case JobExecutionTokenDispensed(token) => + token should be(tokenBeingReturned) // It just gets immediately passed out again! + currentTokens += nextInLine -> token + } + } + + // Double-check the queue state: when we request a token now, we should still be denied: + actorRefUnderTest ! JobExecutionTokenRequest(LimitedTo5Tokens) + expectMsgClass(classOf[JobExecutionTokenDenied]) + + //And finally, silently release the remaining tokens: + 5 indexedTimes { i => + val returner = dummyActors(i + 95) + val tokenBeingReturned = currentTokens(returner) + actorRefUnderTest.tell(msg = JobExecutionTokenReturn(tokenBeingReturned), sender = returner.ref) + currentTokens -= returner + } + + // And we should have gotten our own token by now: + expectMsgClass(classOf[JobExecutionTokenDispensed]) + + // Check we didn't get anything else in the meanwhile: + msgAvailable should be(false) + dummyActors.values foreach { testProbe => testProbe.msgAvailable should be(false) } + } + + it should "resend the same token to an actor which already has one" in { + actorRefUnderTest ! JobExecutionTokenRequest(LimitedTo5Tokens) + val firstResponse = expectMsgClass(classOf[JobExecutionTokenDispensed]) + + 5 indexedTimes { i => + actorRefUnderTest ! JobExecutionTokenRequest(LimitedTo5Tokens) + expectMsg(MaxWaitTime, s"same token again (attempt ${i + 1})", firstResponse) // Always the same + } + } + + + // Incidentally, also covers: it should "not be fooled if the wrong actor returns a token" + it should "not be fooled by a doubly-returned token" in { + var currentTokens: Map[TestProbe, JobExecutionToken] = Map.empty + val dummyActors = (0 until 7 map { i => i -> TestProbe("dummy_" + i) }).toMap + + // Set up by taking all 5 tokens out, and then adding 2 to the queue: + 5 indexedTimes { i => + val sndr = dummyActors(i) + actorRefUnderTest.tell(msg = JobExecutionTokenRequest(LimitedTo5Tokens), sender = sndr.ref) + currentTokens += dummyActors(i) -> sndr.expectMsgClass(classOf[JobExecutionTokenDispensed]).jobExecutionToken + } + 2 indexedTimes { i => + val sndr = dummyActors(5 + i) + actorRefUnderTest.tell(msg = JobExecutionTokenRequest(LimitedTo5Tokens), sender = sndr.ref) + sndr.expectMsgClass(classOf[JobExecutionTokenDenied]) + } + + // The first time we return a token, the next in line should be given it: + val returningActor = dummyActors(0) + val nextInLine1 = dummyActors(5) + val nextInLine2 = dummyActors(6) + val tokenBeingReturned = currentTokens(returningActor) + currentTokens -= returningActor + actorRefUnderTest.tell(msg = JobExecutionTokenReturn(tokenBeingReturned), sender = returningActor.ref) + val tokenPassedOn = nextInLine1.expectMsgClass(classOf[JobExecutionTokenDispensed]).jobExecutionToken + tokenPassedOn should be(tokenBeingReturned) + currentTokens += nextInLine1 -> tokenPassedOn + + // But the next time, nothing should happen because the wrong actor is returning the token: + actorRefUnderTest.tell(msg = JobExecutionTokenReturn(tokenBeingReturned), sender = returningActor.ref) + nextInLine2.expectNoMsg(MaxWaitTime) + } + + it should "not be fooled if an actor returns a token which doesn't exist" in { + var currentTokens: Map[TestProbe, JobExecutionToken] = Map.empty + val dummyActors = (0 until 6 map { i => i -> TestProbe("dummy_" + i) }).toMap + + // Set up by taking all 5 tokens out, and then adding 2 to the queue: + 5 indexedTimes { i => + val sndr = dummyActors(i) + actorRefUnderTest.tell(msg = JobExecutionTokenRequest(LimitedTo5Tokens), sender = sndr.ref) + currentTokens += dummyActors(i) -> sndr.expectMsgClass(classOf[JobExecutionTokenDispensed]).jobExecutionToken + } + 1 indexedTimes { i => + val sndr = dummyActors(5 + i) + actorRefUnderTest.tell(msg = JobExecutionTokenRequest(LimitedTo5Tokens), sender = sndr.ref) + sndr.expectMsgClass(classOf[JobExecutionTokenDenied]) + } + + actorRefUnderTest.tell(msg = JobExecutionTokenReturn(JobExecutionToken(LimitedTo5Tokens, UUID.randomUUID())), sender = dummyActors(0).ref) + dummyActors(5).expectNoMsg(MaxWaitTime) + } + + AkkaTestUtil.actorDeathMethods(system) foreach { case (name, stopMethod) => + it should s"recover tokens lost to actors which are $name before they hand back their token" in { + var currentTokens: Map[TestActorRef[TestTokenGrabbingActor], JobExecutionToken] = Map.empty + var tokenGrabbingActors: Map[Int, TestActorRef[TestTokenGrabbingActor]] = Map.empty + val grabberSupervisor = TestActorRef(new StoppingSupervisor()) + + // Set up by taking all 5 tokens out, and then adding 2 to the queue: + 5 indexedTimes { i => + val newGrabbingActor = TestActorRef[TestTokenGrabbingActor](TestTokenGrabbingActor.props(actorRefUnderTest, LimitedTo5Tokens), grabberSupervisor, s"grabber_${name}_" + i) + tokenGrabbingActors += i -> newGrabbingActor + eventually { + newGrabbingActor.underlyingActor.token.isDefined should be(true) + } + currentTokens += newGrabbingActor -> newGrabbingActor.underlyingActor.token.get + } + + val unassignedActorIndex = 5 + val newGrabbingActor = TestActorRef(new TestTokenGrabbingActor(actorRefUnderTest, LimitedTo5Tokens), s"grabber_${name}_" + unassignedActorIndex) + tokenGrabbingActors += unassignedActorIndex -> newGrabbingActor + eventually { + newGrabbingActor.underlyingActor.rejections should be(1) + } + + val actorToStop = tokenGrabbingActors(0) + val actorToStopsToken = currentTokens(actorToStop) + val nextInLine = tokenGrabbingActors(unassignedActorIndex) + + val deathwatch = TestProbe() + deathwatch watch actorToStop + stopMethod(actorToStop) + deathwatch.expectTerminated(actorToStop) + eventually { nextInLine.underlyingActor.token should be(Some(actorToStopsToken)) } + } + } + + it should "skip over dead actors when assigning tokens to the actor queue" in { + var currentTokens: Map[TestActorRef[TestTokenGrabbingActor], JobExecutionToken] = Map.empty + var tokenGrabbingActors: Map[Int, TestActorRef[TestTokenGrabbingActor]] = Map.empty + val grabberSupervisor = TestActorRef(new StoppingSupervisor()) + + // Set up by taking all 5 tokens out, and then adding 2 to the queue: + 5 indexedTimes { i => + val newGrabbingActor = TestActorRef[TestTokenGrabbingActor](TestTokenGrabbingActor.props(actorRefUnderTest, LimitedTo5Tokens), grabberSupervisor, s"skip_test_" + i) + tokenGrabbingActors += i -> newGrabbingActor + eventually { + newGrabbingActor.underlyingActor.token.isDefined should be(true) + } + currentTokens += newGrabbingActor -> newGrabbingActor.underlyingActor.token.get + } + 2 indexedTimes { i => + val index = i + 5 + val newGrabbingActor = TestActorRef[TestTokenGrabbingActor](TestTokenGrabbingActor.props(actorRefUnderTest, LimitedTo5Tokens), grabberSupervisor, s"skip_test_" + index) + tokenGrabbingActors += index -> newGrabbingActor + eventually { + newGrabbingActor.underlyingActor.rejections should be(1) + } + } + + val returningActor = tokenGrabbingActors(0) + val returnedToken = currentTokens(returningActor) + val nextInLine1 = tokenGrabbingActors(5) + val nextInLine2 = tokenGrabbingActors(6) + + // First, kill off the actor which would otherwise be first in line: + val deathwatch = TestProbe() + deathwatch watch nextInLine1 + nextInLine1 ! PoisonPill + deathwatch.expectTerminated(nextInLine1) + + // Now, stop one of the workers unexpectedly and check that the released token goes to the right place: + actorRefUnderTest.tell(msg = JobExecutionTokenReturn(returnedToken), sender = returningActor) + eventually { nextInLine2.underlyingActor.token should be(Some(returnedToken)) } // Some is OK. This is the **expected** value! + } + + var actorRefUnderTest: TestActorRef[JobExecutionTokenDispenserActor] = _ + + before { + actorRefUnderTest = TestActorRef(new JobExecutionTokenDispenserActor()) + + } + after { + actorRefUnderTest = null + } + + override def afterAll = { + TestKit.shutdownActorSystem(system) + } +} + +object JobExecutionTokenDispenserActorSpec { + + implicit class intWithTimes(n: Int) { + def times(f: => Unit) = 1 to n foreach { _ => f } + def indexedTimes(f: Int => Any) = 0 until n foreach { i => f(i) } + } + + val TestInfiniteTokenType = JobExecutionTokenType("infinite", maxPoolSize = None) + def limitedTokenType(limit: Int) = JobExecutionTokenType(s"$limit-limit", maxPoolSize = Option(limit)) + val LimitedTo5Tokens = limitedTokenType(5) +} diff --git a/engine/src/test/scala/cromwell/engine/workflow/tokens/TestTokenGrabbingActor.scala b/engine/src/test/scala/cromwell/engine/workflow/tokens/TestTokenGrabbingActor.scala new file mode 100644 index 000000000..5677a3eb6 --- /dev/null +++ b/engine/src/test/scala/cromwell/engine/workflow/tokens/TestTokenGrabbingActor.scala @@ -0,0 +1,35 @@ +package cromwell.engine.workflow.tokens + +import akka.actor.{Actor, ActorRef, Props, SupervisorStrategy} +import cromwell.core.JobExecutionToken +import cromwell.core.JobExecutionToken.JobExecutionTokenType +import cromwell.engine.workflow.tokens.JobExecutionTokenDispenserActor.{JobExecutionTokenDenied, JobExecutionTokenDispensed, JobExecutionTokenRequest} +import cromwell.util.AkkaTestUtil + +/** + * Grabs a token and doesn't let it go! + */ +class TestTokenGrabbingActor(tokenDispenser: ActorRef, tokenType: JobExecutionTokenType) extends Actor { + + var token: Option[JobExecutionToken] = None + var rejections = 0 + + def receive = { + case JobExecutionTokenDispensed(dispensedToken) => token = Option(dispensedToken) + case JobExecutionTokenDenied(_) => rejections += 1 + case AkkaTestUtil.ThrowException => throw new RuntimeException("Test exception (don't be scared by the stack trace, it's deliberate!)") + case AkkaTestUtil.InternalStop => context.stop(self) + } + + tokenDispenser ! JobExecutionTokenRequest(tokenType) +} + +object TestTokenGrabbingActor { + + def props(tokenDispenserActor: ActorRef, tokenType: JobExecutionTokenType) = Props(new TestTokenGrabbingActor(tokenDispenserActor, tokenType)) + + class StoppingSupervisor extends Actor { + override val supervisorStrategy = SupervisorStrategy.stoppingStrategy + override def receive = Actor.emptyBehavior + } +} diff --git a/engine/src/test/scala/cromwell/jobstore/JobResultSpec.scala b/engine/src/test/scala/cromwell/jobstore/JobResultSpec.scala index d997fb56c..d84c1d5df 100644 --- a/engine/src/test/scala/cromwell/jobstore/JobResultSpec.scala +++ b/engine/src/test/scala/cromwell/jobstore/JobResultSpec.scala @@ -3,10 +3,10 @@ package cromwell.jobstore import cromwell.core.JobOutput import cromwell.jobstore.JobResultJsonFormatter._ import org.scalatest.{FlatSpec, Matchers} -import wdl4s.values.WdlString +import wdl4s.wdl.values.WdlString import spray.json._ -import wdl4s.types.{WdlIntegerType, WdlMapType, WdlStringType} -import wdl4s.values._ +import wdl4s.wdl.types.{WdlIntegerType, WdlMapType, WdlStringType} +import wdl4s.wdl.values._ class JobResultSpec extends FlatSpec with Matchers { diff --git a/engine/src/test/scala/cromwell/jobstore/JobStoreServiceSpec.scala b/engine/src/test/scala/cromwell/jobstore/JobStoreServiceSpec.scala index e1853e7e1..37e04579e 100644 --- a/engine/src/test/scala/cromwell/jobstore/JobStoreServiceSpec.scala +++ b/engine/src/test/scala/cromwell/jobstore/JobStoreServiceSpec.scala @@ -1,7 +1,6 @@ package cromwell.jobstore -import com.typesafe.config.ConfigFactory -import cromwell.CromwellTestkitSpec +import cromwell.CromwellTestKitWordSpec import cromwell.backend.BackendJobDescriptorKey import cromwell.core.{JobOutput, WorkflowId} import cromwell.jobstore.JobStoreActor._ @@ -9,9 +8,10 @@ import cromwell.jobstore.JobStoreServiceSpec._ import cromwell.services.SingletonServicesStore import org.scalatest.Matchers import org.specs2.mock.Mockito -import wdl4s.types.WdlStringType -import wdl4s.values.WdlString -import wdl4s.{Call, Task, TaskOutput, WdlExpression} +import wdl4s.parser.WdlParser.Ast +import wdl4s.wdl.types.WdlStringType +import wdl4s.wdl.values.WdlString +import wdl4s.wdl._ import scala.concurrent.duration._ import scala.language.postfixOps @@ -21,19 +21,18 @@ object JobStoreServiceSpec { val EmptyExpression = WdlExpression.fromString(""" "" """) } -class JobStoreServiceSpec extends CromwellTestkitSpec with Matchers with Mockito { +class JobStoreServiceSpec extends CromwellTestKitWordSpec with Matchers with Mockito { "JobStoreService" should { "work" in { - val config = ConfigFactory.parseString("{}") lazy val jobStore: JobStore = new SqlJobStore(SingletonServicesStore.databaseInterface) val jobStoreService = system.actorOf(JobStoreActor.props(jobStore)) val workflowId = WorkflowId.randomId() - val successCall = mock[Call] + val successCall = mock[WdlTaskCall] successCall.fullyQualifiedName returns "foo.bar" - val mockTask = mock[Task] - mockTask.outputs returns Seq(TaskOutput("baz", WdlStringType, EmptyExpression)) + val mockTask = mock[WdlTask] + mockTask.outputs returns Seq(TaskOutput("baz", WdlStringType, EmptyExpression, mock[Ast], Option(mockTask))) successCall.task returns mockTask val successKey = BackendJobDescriptorKey(successCall, None, 1).toJobStoreKey(workflowId) @@ -51,7 +50,7 @@ class JobStoreServiceSpec extends CromwellTestkitSpec with Matchers with Mockito case JobComplete(JobResultSuccess(Some(0), os)) if os == outputs => } - val failureCall = mock[Call] + val failureCall = mock[WdlTaskCall] failureCall.fullyQualifiedName returns "baz.qux" val failureKey = BackendJobDescriptorKey(failureCall, None, 1).toJobStoreKey(workflowId) diff --git a/engine/src/test/scala/cromwell/jobstore/JobStoreWriterSpec.scala b/engine/src/test/scala/cromwell/jobstore/JobStoreWriterSpec.scala index 5699f61ed..3ffd3cfec 100644 --- a/engine/src/test/scala/cromwell/jobstore/JobStoreWriterSpec.scala +++ b/engine/src/test/scala/cromwell/jobstore/JobStoreWriterSpec.scala @@ -1,31 +1,36 @@ package cromwell.jobstore import akka.testkit.TestFSMRef -import cromwell.CromwellTestkitSpec +import cromwell.CromwellTestKitWordSpec import cromwell.core.WorkflowId +import cromwell.core.actor.BatchingDbWriter +import cromwell.core.actor.BatchingDbWriter.{BatchingDbWriterState, WritingToDb} +import cromwell.jobstore.JobStore.{JobCompletion, WorkflowCompletion} import cromwell.jobstore.JobStoreActor.{JobStoreWriteSuccess, RegisterJobCompleted, RegisterWorkflowCompleted} import org.scalatest.{BeforeAndAfter, Matchers} -import wdl4s.TaskOutput +import wdl4s.wdl.TaskOutput import scala.concurrent.duration._ import scala.concurrent.{ExecutionContext, Future, Promise} import scala.language.postfixOps -class JobStoreWriterSpec extends CromwellTestkitSpec with Matchers with BeforeAndAfter { +class JobStoreWriterSpec extends CromwellTestKitWordSpec with Matchers with BeforeAndAfter { var database: WriteCountingJobStore = _ - var jobStoreWriter: TestFSMRef[JobStoreWriterState, JobStoreWriterData, JobStoreWriterActor] = _ + var jobStoreWriter: TestFSMRef[BatchingDbWriterState, BatchingDbWriter.BatchingDbWriterData, JobStoreWriterActor] = _ var workflowId: WorkflowId = _ val successResult: JobResult = JobResultSuccess(Some(0), Map.empty) + val flushFrequency = 0.5 second + val sleepMillis = flushFrequency.toMillis * 5 before { database = WriteCountingJobStore.makeNew - jobStoreWriter = TestFSMRef(new JobStoreWriterActor(database)) + jobStoreWriter = TestFSMRef(new JobStoreWriterActor(database, 5, flushFrequency)) workflowId = WorkflowId.randomId() } - private def sendRegisterCompletions(attempts: Int): Unit = { - 0 until attempts foreach { a => jobStoreWriter ! RegisterJobCompleted(jobKey(attempt = a), successResult) } + private def sendRegisterCompletion(attempt: Int): Unit = { + jobStoreWriter ! RegisterJobCompleted(jobKey(attempt), successResult) } private def jobKey(attempt: Int): JobStoreKey = JobStoreKey(workflowId, s"call.fqn", None, attempt) @@ -35,12 +40,14 @@ class JobStoreWriterSpec extends CromwellTestkitSpec with Matchers with BeforeAn key.callFqn shouldBe "call.fqn" key.index shouldBe None result shouldBe successResult + () } private def assertDb(totalWritesCalled: Int, jobCompletionsRecorded: Int, workflowCompletionsRecorded: Int): Unit = { database.totalWritesCalled shouldBe totalWritesCalled database.jobCompletionsRecorded shouldBe jobCompletionsRecorded database.workflowCompletionsRecorded shouldBe workflowCompletionsRecorded + () } private def assertReceived(expectedJobStoreWriteAcks: Int): Unit = { @@ -50,25 +57,26 @@ class JobStoreWriterSpec extends CromwellTestkitSpec with Matchers with BeforeAn case JobStoreWriteSuccess(RegisterWorkflowCompleted(id)) => id shouldBe workflowId case message => fail(s"Unexpected response message: $message") } - jobStoreWriter.underlyingActor.stateName shouldBe Pending + () } "JobStoreWriter" should { "be able to collapse writes together if they arrive while a database access is ongoing" in { - sendRegisterCompletions(attempts = 3) - + // Send a job completion. The database will hang. + sendRegisterCompletion(1) val writer = jobStoreWriter.underlyingActor - writer.stateName shouldBe WritingToDatabase - writer.stateData.currentOperation should have size 1 - writer.stateData.nextOperation should have size 2 + eventually { + writer.stateName shouldBe WritingToDb + } - // The testing DB intentionally blocks after the first write until `continue` is called. + // Send some more completions. These should pile up in the state data. + List(2, 3) foreach sendRegisterCompletion + + writer.stateData.length should equal(2) database.continue() assertReceived(expectedJobStoreWriteAcks = 3) - writer.stateData.currentOperation shouldBe empty - writer.stateData.nextOperation shouldBe empty assertDb( totalWritesCalled = 2, @@ -79,20 +87,22 @@ class JobStoreWriterSpec extends CromwellTestkitSpec with Matchers with BeforeAn "be able to skip job-completion writes if the workflow completes, but still respond appropriately" in { - sendRegisterCompletions(attempts = 2) - jobStoreWriter ! RegisterWorkflowCompleted(workflowId) - + // Send a job completion. The database will hang. + sendRegisterCompletion(1) val writer = jobStoreWriter.underlyingActor - writer.stateName shouldBe WritingToDatabase - writer.stateData.currentOperation should have size 1 - writer.stateData.nextOperation should have size 2 + eventually { + writer.stateName shouldBe WritingToDb + } + + // Send some more completions. These should pile up in the state data. + List(2, 3) foreach sendRegisterCompletion + jobStoreWriter ! RegisterWorkflowCompleted(workflowId) + writer.stateData.length should equal(3) // The testing DB intentionally blocks after the first write until `continue` is called. database.continue() assertReceived(expectedJobStoreWriteAcks = 3) - writer.stateData.currentOperation shouldBe empty - writer.stateData.nextOperation shouldBe empty assertDb( totalWritesCalled = 2, @@ -111,8 +121,9 @@ class WriteCountingJobStore(var totalWritesCalled: Int, var jobCompletionsRecord def continue() = writePromise.trySuccess(()) - override def writeToDatabase(jobCompletions: Map[JobStoreKey, JobResult], workflowCompletions: List[WorkflowId]) + override def writeToDatabase(workflowCompletions: Seq[WorkflowCompletion], jobCompletions: Seq[JobCompletion], batchSize: Int) (implicit ec: ExecutionContext): Future[Unit] = { + totalWritesCalled += 1 jobCompletionsRecorded += jobCompletions.size workflowCompletionsRecorded += workflowCompletions.size diff --git a/engine/src/test/scala/cromwell/subworkflowstore/SubWorkflowStoreSpec.scala b/engine/src/test/scala/cromwell/subworkflowstore/SubWorkflowStoreSpec.scala new file mode 100644 index 000000000..27c8b61b2 --- /dev/null +++ b/engine/src/test/scala/cromwell/subworkflowstore/SubWorkflowStoreSpec.scala @@ -0,0 +1,100 @@ +package cromwell.subworkflowstore + +import cromwell.CromwellTestKitWordSpec +import cromwell.core.{JobKey, WorkflowId, WorkflowSourceFilesWithoutImports} +import cromwell.services.SingletonServicesStore +import cromwell.subworkflowstore.SubWorkflowStoreActor._ +import org.scalatest.Matchers +import org.specs2.mock.Mockito +import wdl4s.wdl.{WdlTaskCall, WdlExpression} +import cromwell.core.ExecutionIndex._ + +import scala.concurrent.duration._ +import SubWorkflowStoreSpec._ +import akka.testkit.TestProbe +import cromwell.database.sql.SqlDatabase +import cromwell.database.sql.tables.SubWorkflowStoreEntry +import cromwell.engine.workflow.workflowstore.WorkflowStoreActor.SubmitWorkflow +import cromwell.engine.workflow.workflowstore.WorkflowStoreSubmitActor.WorkflowSubmittedToStore +import cromwell.engine.workflow.workflowstore.{SqlWorkflowStore, WorkflowStoreActor} + +import scala.language.postfixOps + +object SubWorkflowStoreSpec { + val MaxWait = 5 seconds + val EmptyExpression = WdlExpression.fromString(""" "" """) +} + +class SubWorkflowStoreSpec extends CromwellTestKitWordSpec with Matchers with Mockito { + "SubWorkflowStore" should { + "work" in { + lazy val subWorkflowStore = new SqlSubWorkflowStore(SingletonServicesStore.databaseInterface) + val subWorkflowStoreService = system.actorOf(SubWorkflowStoreActor.props(subWorkflowStore)) + + lazy val workflowStore = SqlWorkflowStore(SingletonServicesStore.databaseInterface) + val workflowStoreService = system.actorOf(WorkflowStoreActor.props(workflowStore, TestProbe().ref, mock[SqlDatabase])) + + val parentWorkflowId = WorkflowId.randomId() + val subWorkflowId = WorkflowId.randomId() + val subSubWorkflowId = WorkflowId.randomId() + val call = mock[WdlTaskCall] + call.fullyQualifiedName returns "foo.bar" + val jobKey = new JobKey { + override def scope = call + override def index: Option[Int] = None + override def attempt: Int = 0 + override def tag: String = "foobar" + } + + workflowStoreService ! SubmitWorkflow(WorkflowSourceFilesWithoutImports( + workflowSource = "", + workflowType = Option("WDL"), + workflowTypeVersion = None, + inputsJson = "{}", + workflowOptionsJson = "{}", + labelsJson = "{}")) + val rootWorkflowId = expectMsgType[WorkflowSubmittedToStore](10 seconds).workflowId + + // Query for non existing sub workflow + subWorkflowStoreService ! QuerySubWorkflow(parentWorkflowId, jobKey) + expectMsgType[SubWorkflowNotFound](MaxWait) + + // Register sub workflow + subWorkflowStoreService ! RegisterSubWorkflow(rootWorkflowId, parentWorkflowId, jobKey, subWorkflowId) + expectMsgType[SubWorkflowStoreRegisterSuccess](MaxWait) + + // Query for sub workflow + subWorkflowStoreService ! QuerySubWorkflow(parentWorkflowId, jobKey) + val subWorkflowEntry = SubWorkflowStoreEntry(Option(0), parentWorkflowId.toString, jobKey.scope.fullyQualifiedName, jobKey.index.fromIndex, jobKey.attempt, subWorkflowId.toString, Some(0)) + expectMsg[SubWorkflowFound](SubWorkflowFound(subWorkflowEntry)) + + // Register sub sub workflow + subWorkflowStoreService ! RegisterSubWorkflow(rootWorkflowId, subWorkflowId, jobKey, subSubWorkflowId) + expectMsgType[SubWorkflowStoreRegisterSuccess](MaxWait) + + // Query for sub sub workflow + subWorkflowStoreService ! QuerySubWorkflow(subWorkflowId, jobKey) + val subSubWorkflowEntry = SubWorkflowStoreEntry(Option(0), subWorkflowId.toString, jobKey.scope.fullyQualifiedName, jobKey.index.fromIndex, jobKey.attempt, subSubWorkflowId.toString, Some(1)) + expectMsg[SubWorkflowFound](SubWorkflowFound(subSubWorkflowEntry)) + + // Delete root workflow + subWorkflowStoreService ! WorkflowComplete(rootWorkflowId) + + // Verify that everything is gone (eventually!) + eventually { + subWorkflowStoreService ! QuerySubWorkflow(rootWorkflowId, jobKey) + expectMsgType[SubWorkflowNotFound](MaxWait) + } + + eventually { + subWorkflowStoreService ! QuerySubWorkflow(parentWorkflowId, jobKey) + expectMsgType[SubWorkflowNotFound](MaxWait) + } + + eventually { + subWorkflowStoreService ! QuerySubWorkflow(subWorkflowId, jobKey) + expectMsgType[SubWorkflowNotFound](MaxWait) + } + } + } +} diff --git a/engine/src/test/scala/cromwell/webservice/CromwellApiServiceSpec.scala b/engine/src/test/scala/cromwell/webservice/CromwellApiServiceSpec.scala index d6dc8919d..558e3657a 100644 --- a/engine/src/test/scala/cromwell/webservice/CromwellApiServiceSpec.scala +++ b/engine/src/test/scala/cromwell/webservice/CromwellApiServiceSpec.scala @@ -1,612 +1,585 @@ package cromwell.webservice -import java.time.OffsetDateTime -import java.util.UUID - -import akka.actor.{Actor, Props} -import akka.pattern.ask -import akka.util.Timeout -import com.typesafe.config.ConfigFactory -import cromwell.CromwellTestkitSpec -import cromwell.core._ -import cromwell.engine.workflow.workflowstore.WorkflowStoreActor -import cromwell.engine.workflow.workflowstore.WorkflowStoreActor.{WorkflowAborted => _, _} -import cromwell.server.{CromwellServerActor, CromwellSystem} +import akka.actor.{Actor, ActorLogging, ActorSystem, Props} +import cromwell.core.{WorkflowId, WorkflowMetadataKeys, WorkflowSubmitted, WorkflowSucceeded} +import akka.http.scaladsl.coding.{Decoder, Gzip} +import akka.http.scaladsl.server.Route +import akka.http.scaladsl.marshallers.sprayjson.SprayJsonSupport._ +import spray.json.DefaultJsonProtocol._ +import cromwell.engine.workflow.workflowstore.WorkflowStoreActor.{AbortWorkflow, BatchSubmitWorkflows, SubmitWorkflow} +import cromwell.engine.workflow.workflowstore.WorkflowStoreEngineActor +import cromwell.engine.workflow.workflowstore.WorkflowStoreEngineActor.WorkflowAbortFailed +import cromwell.engine.workflow.workflowstore.WorkflowStoreSubmitActor.{WorkflowSubmittedToStore, WorkflowsBatchSubmittedToStore} import cromwell.services.metadata.MetadataService._ +import akka.http.scaladsl.model._ +import akka.http.scaladsl.model.headers.{HttpEncodings, `Accept-Encoding`} +import akka.http.scaladsl.testkit.{RouteTestTimeout, ScalatestRouteTest} +import akka.http.scaladsl.unmarshalling.Unmarshal +import akka.stream.ActorMaterializer +import cromwell.engine.workflow.WorkflowManagerActor import cromwell.services.metadata._ -import cromwell.services.metadata.impl.MetadataSummaryRefreshActor.MetadataSummarySuccess import cromwell.util.SampleWdl.HelloWorld -import org.scalatest.concurrent.{PatienceConfiguration, ScalaFutures} -import org.scalatest.{FlatSpec, Matchers} -import org.specs2.mock.Mockito -import spray.http.{DateTime => _, _} -import spray.json.DefaultJsonProtocol._ +import org.scalatest.{AsyncFlatSpec, Matchers} import spray.json._ -import spray.testkit.ScalatestRouteTest - -object MockWorkflowStoreActor { - val submittedWorkflowId = WorkflowId(UUID.randomUUID()) - val runningWorkflowId = WorkflowId.randomId() - val unknownId = WorkflowId.randomId() - val submittedScatterWorkflowId = WorkflowId.randomId() - val abortedWorkflowId = WorkflowId.randomId() -} - -class MockWorkflowStoreActor extends Actor { - import MockWorkflowStoreActor.submittedWorkflowId - - override def receive = { - case SubmitWorkflow(source) => sender ! WorkflowSubmittedToStore(submittedWorkflowId) - case BatchSubmitWorkflows(sources) => - val response = WorkflowsBatchSubmittedToStore(sources map { _ => submittedWorkflowId }) - sender ! response - case AbortWorkflow(id, manager) => - val message = id match { - case MockWorkflowStoreActor.runningWorkflowId => - WorkflowStoreActor.WorkflowAborted(id) - case MockWorkflowStoreActor.abortedWorkflowId => - WorkflowAbortFailed(id, new IllegalStateException(s"Workflow ID '$id' is in terminal state 'Aborted' and cannot be aborted.")) - } - sender ! message - } -} - -class CromwellApiServiceSpec extends FlatSpec with CromwellApiService with ScalatestRouteTest with Matchers - with ScalaFutures with Mockito { - import spray.httpx.SprayJsonSupport._ - // BUG: Must be called once to statically initialize the backends, otherwise this Spec won't run if run alone. - new CromwellSystem {} +import scala.concurrent.duration._ - import akka.testkit._ - - import scala.concurrent.duration._ - - // The submit route takes a bit longer than the default 1 s while things initialize, when this spec is run by itself - implicit val defaultTimeout = RouteTestTimeout(30.seconds.dilated) - - override def actorRefFactory = system - override val serviceRegistryActor = CromwellTestkitSpec.ServiceRegistryActorInstance - - override val workflowStoreActor = actorRefFactory.actorOf(Props(new MockWorkflowStoreActor())) - override val workflowManagerActor = actorRefFactory.actorOf(Props.empty) +class CromwellApiServiceSpec extends AsyncFlatSpec with ScalatestRouteTest with Matchers { + import CromwellApiServiceSpec._ + val akkaHttpService = new MockApiService() val version = "v1" - def publishMetadata(events: Seq[MetadataEvent]): Unit = { - val timeout: Timeout = 5.seconds.dilated - - import akka.pattern.ask - val putResult = serviceRegistryActor.ask(PutMetadataAction(events))(timeout) - putResult.futureValue(PatienceConfiguration.Timeout(timeout.duration)) shouldBe a[MetadataPutAcknowledgement] - } - - def forceSummary(): Unit = { - val timeout: Timeout = 5.seconds.dilated - val summaryResult = serviceRegistryActor.ask(RefreshSummary)(timeout) - - val askResult = summaryResult.futureValue(PatienceConfiguration.Timeout(timeout.duration)) - askResult match { - case MetadataSummarySuccess => - case _ => - fail() - } - - } - - behavior of "REST API /status endpoint" - - it should "return 500 errors as Json" in { - val apiActor = TestActorRef(new CromwellServerActor(ConfigFactory.empty())) - val probe = TestProbe() - probe.send(apiActor, Timedout(mock[HttpRequest])) - probe.expectMsgPF(defaultTimeout.duration) { - case response: HttpResponse => - response.entity.toOption shouldBe defined - response.entity.toOption.get.contentType.toString() shouldBe ContentTypes.`application/json`.mediaType.value.toString - } - - system.stop(apiActor) - system.stop(probe.ref) - } + implicit def default = RouteTestTimeout(5.seconds) - it should "return 404 for get of unknown workflow" in { - val workflowId = WorkflowId.randomId() - - Get(s"/workflows/$version/$workflowId/status") ~> - statusRoute ~> + "REST ENGINE /stats endpoint" should "return 200 for stats" in { + Get(s"/engine/$version/stats") ~> + akkaHttpService.engineRoutes ~> check { - assertResult(StatusCodes.NotFound) { - status - } + status should be(StatusCodes.OK) + val resp = responseAs[JsObject] + val workflows = resp.fields("workflows").asInstanceOf[JsNumber].value.toInt + val jobs = resp.fields("jobs").asInstanceOf[JsNumber].value.toInt + workflows should be(1) + jobs should be(23) } } - it should "return 400 for get of a malformed workflow id's status" in { - Get(s"/workflows/$version/foobar/status") ~> - statusRoute ~> + "REST ENGINE /version endpoint" should "return 200 for version" in { + Get(s"/engine/$version/version") ~> + akkaHttpService.engineRoutes ~> check { - assertResult(StatusCodes.BadRequest) { - status - } - assertResult( - """{ - | "status": "fail", - | "message": "Invalid workflow ID: 'foobar'." - |}""".stripMargin - ) { - responseAs[String] - } + status should be(StatusCodes.OK) + val resp = responseAs[JsObject] + val cromwellVersion = resp.fields("cromwell").asInstanceOf[JsString].value + cromwellVersion should fullyMatch regex """\d+-([0-9a-f]){7}(-SNAP)?""" } } - private def publishStatusAndSubmission(workflowId: WorkflowId, state: WorkflowState): Unit = { - val events = Seq( - MetadataEvent(MetadataKey(workflowId, None, WorkflowMetadataKeys.SubmissionTime), MetadataValue(OffsetDateTime.now())), - MetadataEvent(MetadataKey(workflowId, None, WorkflowMetadataKeys.Status), MetadataValue(state)) - ) - publishMetadata(events) - } + behavior of "REST API /status endpoint" + it should "return 200 for get of a known workflow id" in { + val workflowId = CromwellApiServiceSpec.ExistingWorkflowId - it should "return 200 for get of a known workflow id" in { - val workflowId = WorkflowId.randomId() - publishStatusAndSubmission(workflowId, WorkflowSubmitted) + Get(s"/workflows/$version/$workflowId/status") ~> + akkaHttpService.workflowRoutes ~> + check { + status should be(StatusCodes.OK) + // Along w/ checking value, ensure it is valid JSON despite the requested content type + responseAs[JsObject].fields(WorkflowMetadataKeys.Status) should be(JsString("Submitted")) + } + } - Get(s"/workflows/$version/$workflowId/status") ~> - statusRoute ~> - check { - status should be(StatusCodes.OK) - val result = responseAs[JsObject] - result.fields(WorkflowMetadataKeys.Status) should be(JsString("Submitted")) - } - } + it should "return 404 for get of unknown workflow" in { + val workflowId = CromwellApiServiceSpec.UnrecognizedWorkflowId + Get(s"/workflows/$version/$workflowId/status") ~> + akkaHttpService.workflowRoutes ~> + check { + assertResult(StatusCodes.NotFound) { + status + } + } + } - behavior of "REST API /abort endpoint" + it should "return 400 for get of a malformed workflow id's status" in { + Get(s"/workflows/$version/foobar/status") ~> + akkaHttpService.workflowRoutes ~> + check { + assertResult(StatusCodes.BadRequest) { + status + } + assertResult( + """{ + | "status": "fail", + | "message": "Invalid workflow ID: 'foobar'." + |}""".stripMargin + ) { + responseAs[String] + } + } + } - it should "return 404 for abort of unknown workflow" in { - Post(s"/workflows/$version/${MockWorkflowStoreActor.unknownId}/abort") ~> - abortRoute ~> - check { - assertResult(StatusCodes.NotFound) { - status + behavior of "REST API /abort endpoint" + it should "return 404 for abort of unknown workflow" in { + val workflowId = CromwellApiServiceSpec.UnrecognizedWorkflowId + + Post(s"/workflows/$version/$workflowId/abort") ~> + akkaHttpService.workflowRoutes ~> + check { + assertResult(StatusCodes.NotFound) { + status + } } - assertResult( - s"""{ + } + + it should "return 400 for abort of a malformed workflow id" in { + Post(s"/workflows/$version/foobar/abort") ~> + akkaHttpService.workflowRoutes ~> + check { + assertResult(StatusCodes.BadRequest) { + status + } + assertResult( + """{ | "status": "fail", - | "message": "Unrecognized workflow ID: ${MockWorkflowStoreActor.unknownId}" + | "message": "Invalid workflow ID: 'foobar'." |}""".stripMargin - ) { - responseAs[String] + ) { + responseAs[String] + } } - } - } + } - it should "return 400 for abort of a malformed workflow id" in { - Post(s"/workflows/$version/foobar/abort") ~> - abortRoute ~> + it should "return 403 for abort of a workflow in a terminal state" in { + Post(s"/workflows/$version/${CromwellApiServiceSpec.AbortedWorkflowId}/abort") ~> + akkaHttpService.workflowRoutes ~> check { - assertResult(StatusCodes.BadRequest) { + assertResult(StatusCodes.Forbidden) { status } assertResult( - """{ - | "status": "fail", - | "message": "Invalid workflow ID: 'foobar'." + s"""{ + | "status": "error", + | "message": "Workflow ID '${CromwellApiServiceSpec.AbortedWorkflowId}' is in terminal state 'Aborted' and cannot be aborted." |}""".stripMargin ) { responseAs[String] } } - } - - it should "return 403 for abort of a workflow in a terminal state" in { - publishStatusAndSubmission(MockWorkflowStoreActor.abortedWorkflowId, WorkflowAborted) - - Post(s"/workflows/$version/${MockWorkflowStoreActor.abortedWorkflowId}/abort") ~> - abortRoute ~> - check { - assertResult(StatusCodes.Forbidden) { - status - } - assertResult( - s"""{ - | "status": "error", - | "message": "Workflow ID '${MockWorkflowStoreActor.abortedWorkflowId}' is in terminal state 'Aborted' and cannot be aborted." - |}""".stripMargin - ) { - responseAs[String] - } } - } - - it should "return 200 for abort of a known workflow id" in { - publishStatusAndSubmission(MockWorkflowStoreActor.runningWorkflowId, WorkflowRunning) - - Post(s"/workflows/$version/${MockWorkflowStoreActor.runningWorkflowId}/abort") ~> - abortRoute ~> - check { - assertResult( - s"""{ - | "id": "${MockWorkflowStoreActor.runningWorkflowId.toString}", - | "status": "Aborted" - |}""" - .stripMargin) { - responseAs[String] - } - assertResult(StatusCodes.OK) { - status + it should "return 200 for abort of a known workflow id" in { + Post(s"/workflows/$version/${CromwellApiServiceSpec.ExistingWorkflowId}/abort") ~> + akkaHttpService.workflowRoutes ~> + check { + assertResult( + s"""{"id":"${CromwellApiServiceSpec.ExistingWorkflowId.toString}","status":"Aborted"}""") { + responseAs[String] + } + assertResult(StatusCodes.OK) { + status + } } - } - } + } - behavior of "REST API submission endpoint" - it should "return 201 for a successful workflow submission " in { - Post("/workflows/$version", FormData(Seq("wdlSource" -> HelloWorld.wdlSource(), "workflowInputs" -> HelloWorld.rawInputs.toJson.toString()))) ~> - submitRoute ~> - check { - assertResult( - s"""{ - | "id": "${MockWorkflowStoreActor.submittedWorkflowId.toString}", - | "status": "Submitted" - |}""".stripMargin) { - responseAs[String] - } - assertResult(StatusCodes.Created) { - status + behavior of "REST API submission endpoint" + it should "return 201 for a successful workflow submission " in { + val workflowSource = Multipart.FormData.BodyPart("workflowSource", HttpEntity(MediaTypes.`application/json`, HelloWorld.workflowSource())) + val workflowInputs = Multipart.FormData.BodyPart("workflowInputs", HttpEntity(MediaTypes.`application/json`, HelloWorld.rawInputs.toJson.toString())) + val formData = Multipart.FormData(workflowSource, workflowInputs).toEntity() + Post(s"/workflows/$version", formData) ~> + akkaHttpService.workflowRoutes ~> + check { + assertResult( + s"""{ + | "id": "${CromwellApiServiceSpec.ExistingWorkflowId.toString}", + | "status": "Submitted" + |}""".stripMargin) { + responseAs[String].parseJson.prettyPrint + } + assertResult(StatusCodes.Created) { + status + } } - } - } - - behavior of "REST API batch submission endpoint" - it should "return 200 for a successful workflow submission " in { - val inputs = HelloWorld.rawInputs.toJson + } - Post("/workflows/$version/batch", - FormData(Seq("wdlSource" -> HelloWorld.wdlSource(), "workflowInputs" -> s"[$inputs, $inputs]"))) ~> - submitBatchRoute ~> - check { - assertResult( - s"""[{ - | "id": "${MockWorkflowStoreActor.submittedWorkflowId.toString}", - | "status": "Submitted" - |}, { - | "id": "${MockWorkflowStoreActor.submittedWorkflowId.toString}", - | "status": "Submitted" - |}]""".stripMargin) { - responseAs[String] + it should "return 400 for an unrecognized form data request parameter " in { + val formData = Multipart.FormData(Multipart.FormData.BodyPart("incorrectParameter", HttpEntity(MediaTypes.`application/json`, HelloWorld.workflowSource()))).toEntity() + Post(s"/workflows/$version", formData) ~> + akkaHttpService.workflowRoutes ~> + check { + assertResult( + s"""{ + | "status": "fail", + | "message": "Error(s): Unexpected body part name: incorrectParameter" + |}""".stripMargin) { + responseAs[String] + } + assertResult(StatusCodes.BadRequest) { + status + } } - assertResult(StatusCodes.OK) { - status - } - } - } - - // TODO: Test tha batch submission returns expected workflow ids in order - // TODO: Also (assuming we still validate on submit) test a batch of mixed inputs that return submitted and failed - - behavior of "REST API /outputs endpoint" - - it should "return 200 with GET of outputs on successful execution of workflow" in { - val workflowId = WorkflowId.randomId() - val events = Seq( - MetadataEvent(MetadataKey(workflowId, None, s"${WorkflowMetadataKeys.Outputs}:myfirst"), MetadataValue("myOutput")) - ) - - publishMetadata(events) - - Get(s"/workflows/$version/$workflowId/outputs") ~> - workflowOutputsRoute ~> - check { - status should be(StatusCodes.OK) - val result = responseAs[JsObject] - result.fields.keys should contain allOf(WorkflowMetadataKeys.Id, WorkflowMetadataKeys.Outputs) - } - } - - it should "return 404 with outputs on unknown workflow" in { - Get(s"/workflows/$version/${MockWorkflowStoreActor.unknownId}/outputs") ~> - workflowOutputsRoute ~> - check { - assertResult(StatusCodes.NotFound) { - status - } } - } - it should "return 405 with POST of outputs on successful execution of workflow" in { - Post(s"/workflows/$version/${MockWorkflowStoreActor.submittedWorkflowId.toString}/outputs") ~> - sealRoute(workflowOutputsRoute) ~> - check { - assertResult(StatusCodes.MethodNotAllowed) { - status + it should "return 400 for a workflow submission with unsupported workflow option keys" in { + val options = """ + |{ + | "defaultRuntimeOptions": { + | "cpu":1 + | } + |} + |""".stripMargin + + val workflowSource = Multipart.FormData.BodyPart("workflowSource", HttpEntity(MediaTypes.`application/json`, HelloWorld.workflowSource())) + val workflowInputs = Multipart.FormData.BodyPart("workflowOptions", HttpEntity(MediaTypes.`application/json`, options)) + val formData = Multipart.FormData(workflowSource, workflowInputs).toEntity() + + Post(s"/workflows/$version", formData) ~> + akkaHttpService.workflowRoutes ~> + check { + assertResult(StatusCodes.BadRequest) { + status + } } - } - } - - behavior of "REST API /logs endpoint" - - it should "return 200 with paths to stdout/stderr/backend log" in { + } - val workflowId = WorkflowId.randomId() - val jobKey = Option(MetadataJobKey("mycall", None, 1)) - val events = Seq( - MetadataEvent(MetadataKey(workflowId, jobKey, CallMetadataKeys.Stdout), MetadataValue("stdout.txt")), - MetadataEvent(MetadataKey(workflowId, jobKey, CallMetadataKeys.Stderr), MetadataValue("stderr.txt")), - MetadataEvent(MetadataKey(workflowId, jobKey, s"${CallMetadataKeys.BackendLogsPrefix}:log"), MetadataValue("backend.log")) - ) + it should "return 400 for a workflow submission with malformed workflow options json" in { + val options = s""" + |{"read_from_cache": "true" + |""".stripMargin + + val workflowSource = Multipart.FormData.BodyPart("workflowSource", HttpEntity(MediaTypes.`application/json`, HelloWorld.workflowSource())) + val workflowInputs = Multipart.FormData.BodyPart("workflowOptions", HttpEntity(MediaTypes.`application/json`, options)) + val formData = Multipart.FormData(workflowSource, workflowInputs).toEntity() + + Post(s"/workflows/$version", formData) ~> + akkaHttpService.workflowRoutes ~> + check { + assertResult(StatusCodes.BadRequest) { + status + } + } + } - publishMetadata(events) + behavior of "REST API batch submission endpoint" + it should "return 200 for a successful workflow submission " in { + val inputs = HelloWorld.rawInputs.toJson + val workflowSource = Multipart.FormData.BodyPart("workflowSource", HttpEntity(MediaTypes.`application/json`, HelloWorld.workflowSource())) + val workflowInputs = Multipart.FormData.BodyPart("workflowInputs", HttpEntity(MediaTypes.`application/json`, s"[$inputs, $inputs]")) + val formData = Multipart.FormData(workflowSource, workflowInputs).toEntity() + + Post(s"/workflows/$version/batch", formData) ~> + akkaHttpService.workflowRoutes ~> + check { + assertResult( + s"""[{ + | "id": "${CromwellApiServiceSpec.ExistingWorkflowId.toString}", + | "status": "Submitted" + |}, { + | "id": "${CromwellApiServiceSpec.ExistingWorkflowId.toString}", + | "status": "Submitted" + |}]""".stripMargin) { + responseAs[String].parseJson.prettyPrint + } + assertResult(StatusCodes.Created) { + status + } + } + } - Get(s"/workflows/$version/$workflowId/logs") ~> - workflowLogsRoute ~> - check { - status should be(StatusCodes.OK) - val result = responseAs[JsObject] + it should "return 400 for an submission with no inputs" in { + val formData = Multipart.FormData(Multipart.FormData.BodyPart("workflowSource", HttpEntity(MediaTypes.`application/json`, HelloWorld.workflowSource()))).toEntity() + + Post(s"/workflows/$version/batch", formData) ~> + akkaHttpService.workflowRoutes ~> + check { + assertResult( + s"""{ + | "status": "fail", + | "message": "Error(s): No inputs were provided" + |}""".stripMargin) { + responseAs[String] + } + assertResult(StatusCodes.BadRequest) { + status + } + } + } - val call = result.fields("calls").convertTo[JsObject].fields("mycall").convertTo[Seq[JsObject]].head - call.fields("stdout") should be(JsString("stdout.txt")) - call.fields("stderr") should be(JsString("stderr.txt")) - call.fields("stdout") should be(JsString("stdout.txt")) - call.fields("backendLogs").convertTo[JsObject].fields("log") should be (JsString("backend.log")) - } - } + behavior of "REST API /outputs endpoint" + it should "return 200 with GET of outputs on successful execution of workflow" in { + Get(s"/workflows/$version/${CromwellApiServiceSpec.ExistingWorkflowId}/outputs") ~> + akkaHttpService.workflowRoutes ~> + check { + status should be(StatusCodes.OK) + responseAs[JsObject].fields.keys should contain allOf(WorkflowMetadataKeys.Id, WorkflowMetadataKeys.Outputs) + } + } - it should "return 404 with logs on unknown workflow" in { - Get(s"/workflows/$version/${MockWorkflowStoreActor.unknownId}/logs") ~> - workflowLogsRoute ~> + it should "return 404 with outputs on unknown workflow" in { + Get(s"/workflows/$version/${CromwellApiServiceSpec.UnrecognizedWorkflowId}/outputs") ~> + akkaHttpService.workflowRoutes ~> check { assertResult(StatusCodes.NotFound) { status } } - } - - - behavior of "REST API /metadata endpoint" - - it should "return with full metadata from the metadata route" in { - val workflowId = WorkflowId.randomId() - val events = Seq( - MetadataEvent(MetadataKey(workflowId, None, "testKey1"), MetadataValue("myValue1")), - MetadataEvent(MetadataKey(workflowId, None, "testKey2"), MetadataValue("myValue2")) - ) - - publishMetadata(events) - - Get(s"/workflows/$version/$workflowId/metadata") ~> - metadataRoute ~> - check { - status should be(StatusCodes.OK) - val result = responseAs[JsObject] - result.fields.keys should contain allOf("testKey1", "testKey2") - result.fields.keys shouldNot contain("testKey3") - result.fields("testKey1") should be(JsString("myValue1")) - result.fields("testKey2") should be(JsString("myValue2")) - } - } - - it should "return with included metadata from the metadata route" in { - val workflowId = WorkflowId.randomId() - - val events = Seq( - MetadataEvent(MetadataKey(workflowId, None, "testKey1a"), MetadataValue("myValue1a")), - MetadataEvent(MetadataKey(workflowId, None, "testKey1b"), MetadataValue("myValue1b")), - MetadataEvent(MetadataKey(workflowId, None, "testKey2a"), MetadataValue("myValue2a")), - MetadataEvent(MetadataKey(workflowId, None, "testKey2b"), MetadataValue("myValue2b")) - ) - - publishMetadata(events) - - Get(s"/workflows/$version/$workflowId/metadata?includeKey=testKey1&includeKey=testKey2a") ~> - metadataRoute ~> - check { - status should be(StatusCodes.OK) - val result = responseAs[JsObject] - result.fields.keys should contain allOf("testKey1a", "testKey1b", "testKey2a") - result.fields.keys should contain noneOf("testKey2b", "testKey3") - result.fields("testKey1a") should be(JsString("myValue1a")) - result.fields("testKey1b") should be(JsString("myValue1b")) - result.fields("testKey2a") should be(JsString("myValue2a")) - } - } - - it should "return with excluded metadata from the metadata route" in { - val workflowId = WorkflowId.randomId() - - val events = Seq( - MetadataEvent(MetadataKey(workflowId, None, "testKey1a"), MetadataValue("myValue1a")), - MetadataEvent(MetadataKey(workflowId, None, "testKey1b"), MetadataValue("myValue1b")), - MetadataEvent(MetadataKey(workflowId, None, "testKey2a"), MetadataValue("myValue2a")), - MetadataEvent(MetadataKey(workflowId, None, "testKey2b"), MetadataValue("myValue2b")) - ) - - publishMetadata(events) - - Get(s"/workflows/$version/$workflowId/metadata?excludeKey=testKey2b&excludeKey=testKey3") ~> - metadataRoute ~> - check { - status should be(StatusCodes.OK) - val result = responseAs[JsObject] - result.fields.keys should contain allOf("testKey1a", "testKey1b", "testKey2a") - result.fields.keys should contain noneOf("testKey2b", "testKey3") - result.fields("testKey1a") should be(JsString("myValue1a")) - result.fields("testKey1b") should be(JsString("myValue1b")) - result.fields("testKey2a") should be(JsString("myValue2a")) - } - } - - it should "return an error when included and excluded metadata requested from the metadata route" in { - val workflowId = WorkflowId.randomId() - publishStatusAndSubmission(workflowId, WorkflowSucceeded) + } - Get(s"/workflows/$version/$workflowId/metadata?includeKey=testKey1&excludeKey=testKey2") ~> - metadataRoute ~> - check { - assertResult(StatusCodes.BadRequest) { - status - } - assertResult( - s"""{ - | "status": "fail", - | "message": "includeKey and excludeKey may not be specified together" - |}""".stripMargin - ) { - responseAs[String] + it should "return 405 with POST of outputs on successful execution of workflow" in { + Post(s"/workflows/$version/${CromwellApiServiceSpec.UnrecognizedWorkflowId}/outputs") ~> + Route.seal(akkaHttpService.workflowRoutes) ~> + check { + assertResult(StatusCodes.MethodNotAllowed) { + status + } } - } - } + } - behavior of "REST API /timing endpoint" + behavior of "REST API /logs endpoint" + it should "return 200 with paths to stdout/stderr/backend log" in { + Get(s"/workflows/$version/${CromwellApiServiceSpec.ExistingWorkflowId}/logs") ~> + akkaHttpService.workflowRoutes ~> + check { + status should be(StatusCodes.OK) + + val call = responseAs[JsObject].fields("calls").convertTo[JsObject].fields("mycall").convertTo[Seq[JsObject]].head + call.fields("stdout") should be(JsString("stdout.txt")) + call.fields("stderr") should be(JsString("stderr.txt")) + call.fields("stdout") should be(JsString("stdout.txt")) + call.fields("backendLogs").convertTo[JsObject].fields("log") should be (JsString("backend.log")) + } + } - it should "return 200 with an HTML document for the timings route" in { - publishStatusAndSubmission(MockWorkflowStoreActor.submittedWorkflowId, WorkflowSucceeded) + it should "return 404 with logs on unknown workflow" in { + Get(s"/workflows/$version/${CromwellApiServiceSpec.UnrecognizedWorkflowId}/logs") ~> + akkaHttpService.workflowRoutes ~> + check { + assertResult(StatusCodes.NotFound) { + status + } + } + } - Get(s"/workflows/$version/${MockWorkflowStoreActor.submittedWorkflowId}/timing") ~> - timingRoute ~> - check { - assertResult(StatusCodes.OK) { status } - assertResult("") { - responseAs[String].substring(0, 6) + behavior of "REST API /metadata endpoint" + it should "return with full metadata from the metadata route" in { + Get(s"/workflows/$version/${CromwellApiServiceSpec.ExistingWorkflowId}/metadata") ~> + akkaHttpService.workflowRoutes ~> + check { + status should be(StatusCodes.OK) + val result = responseAs[JsObject] + result.fields.keys should contain allOf("testKey1", "testKey2") + result.fields.keys shouldNot contain("testKey3") + result.fields("testKey1") should be(JsString("myValue1")) + result.fields("testKey2") should be(JsString("myValue2")) } - } - } + } - behavior of "REST API /query GET endpoint" + it should "return with gzip encoding when requested" in { + Get(s"/workflows/$version/${CromwellApiServiceSpec.ExistingWorkflowId}/metadata").addHeader(`Accept-Encoding`(HttpEncodings.gzip)) ~> + akkaHttpService.workflowRoutes ~> + check { + response.headers.find(_.name == "Content-Encoding").get.value should be("gzip") + } + } - it should "return 400 for a bad query" in { - Get(s"/workflows/$version/query?BadKey=foo") ~> - queryRoute ~> - check { - assertResult(StatusCodes.BadRequest) { - status + it should "not return with gzip encoding when not requested" in { + Get(s"/workflows/$version/${CromwellApiServiceSpec.ExistingWorkflowId}/metadata") ~> + akkaHttpService.workflowRoutes ~> + check { + response.headers.find(_.name == "Content-Encoding") shouldBe None } - assertResult( - s"""{ - | "status": "fail", - | "message": "Unrecognized query keys: BadKey" - |}""".stripMargin - ) { - responseAs[String] + } + + it should "return with included metadata from the metadata route" in { + Get(s"/workflows/$version/${CromwellApiServiceSpec.ExistingWorkflowId}/metadata?includeKey=testKey1&includeKey=testKey2a") ~> + akkaHttpService.workflowRoutes ~> + check { + status should be(StatusCodes.OK) + val result = responseAs[JsObject] + result.fields.keys should contain allOf("testKey1a", "testKey1b", "testKey2a") + result.fields.keys should contain noneOf("testKey2b", "testKey3") + result.fields("testKey1a") should be(JsString("myValue1a")) + result.fields("testKey1b") should be(JsString("myValue1b")) + result.fields("testKey2a") should be(JsString("myValue2a")) } - } - } + } - it should "return good results for a good query" in { - val workflowId = WorkflowId.randomId() - val runningId = WorkflowId.randomId() - val events = Seq( - MetadataEvent(MetadataKey(workflowId, None, WorkflowMetadataKeys.Status), MetadataValue("Succeeded")), - MetadataEvent(MetadataKey(runningId, None, WorkflowMetadataKeys.Status), MetadataValue("Running")) - ) + it should "return with excluded metadata from the metadata route" in { + Get(s"/workflows/$version/${CromwellApiServiceSpec.ExistingWorkflowId}/metadata?excludeKey=testKey2b&excludeKey=testKey3") ~> + akkaHttpService.workflowRoutes ~> + check { + status should be(StatusCodes.OK) + val result = responseAs[JsObject] + result.fields.keys should contain allOf("testKey1a", "testKey1b", "testKey2a") + result.fields.keys should contain noneOf("testKey2b", "testKey3") + result.fields("testKey1a") should be(JsString("myValue1a")) + result.fields("testKey1b") should be(JsString("myValue1b")) + result.fields("testKey2a") should be(JsString("myValue2a")) + } + } - publishMetadata(events) - forceSummary() + it should "return an error when included and excluded metadata requested from the metadata route" in { + Get(s"/workflows/$version/${CromwellApiServiceSpec.ExistingWorkflowId}/metadata?includeKey=testKey1&excludeKey=testKey2") ~> + akkaHttpService.workflowRoutes ~> + check { + assertResult(StatusCodes.BadRequest) { + status + } + + val decoder: Decoder = Gzip + Unmarshal(decoder.decodeMessage(response)).to[String] map { r => + assertResult( + s"""{ + | "status": "fail", + | "message": "includeKey and excludeKey may not be specified together" + |}""".stripMargin + ) { r } + } + } + } - Get(s"/workflows/$version/query?status=Succeeded&id=$workflowId") ~> - queryRoute ~> - check { - status should be(StatusCodes.OK) - val results = responseAs[JsObject].fields("results").convertTo[Seq[JsObject]] + behavior of "REST API /timing endpoint" + it should "return 200 with an HTML document for the timings route" in { + Get(s"/workflows/$version/${CromwellApiServiceSpec.ExistingWorkflowId}/timing") ~> + akkaHttpService.workflowRoutes ~> + check { + assertResult(StatusCodes.OK) { status } + assertResult("") { + responseAs[String].substring(0, 6) + } + } + } - results.head.fields("id") should be(JsString(workflowId.toString)) - results.head.fields("status") should be(JsString("Succeeded")) - } - } + behavior of "REST API /query GET endpoint" + it should "return good results for a good query" in { + Get(s"/workflows/$version/query?status=Succeeded&id=${CromwellApiServiceSpec.ExistingWorkflowId}") ~> + akkaHttpService.workflowRoutes ~> + check { + status should be(StatusCodes.OK) + contentType should be(ContentTypes.`application/json`) + val results = responseAs[JsObject].fields("results").convertTo[Seq[JsObject]] + results.head.fields("id") should be(JsString(CromwellApiServiceSpec.ExistingWorkflowId.toString)) + results.head.fields("status") should be(JsString("Succeeded")) + } + } - it should "return link headers for pagination when page and pagesize are set for a good query" in { - val workflowId = WorkflowId.randomId() - val events = Seq( - MetadataEvent(MetadataKey(workflowId, None, WorkflowMetadataKeys.Status), MetadataValue("Succeeded")) - ) + behavior of "REST API /query POST endpoint" + it should "return good results for a good query map body" in { + Post(s"/workflows/$version/query", HttpEntity(ContentTypes.`application/json`, """[{"status":"Succeeded"}]""")) ~> + akkaHttpService.workflowRoutes ~> + check { + assertResult(StatusCodes.OK) { + status + } + assertResult(true) { + entityAs[String].contains("\"status\":\"Succeeded\"") + } + } + } - publishMetadata(events) - forceSummary() + behavior of "REST API /labels PATCH endpoint" + it should "return successful status response when assigning valid labels to an existing workflow ID" in { + + val validLabelsJson = + """ + |{ + | "label-key-1":"label-value-1", + | "label-key-2":"label-value-2" + |} + """.stripMargin + + val workflowId = CromwellApiServiceSpec.ExistingWorkflowId + + Patch(s"/workflows/$version/$workflowId/labels", HttpEntity(ContentTypes.`application/json`, validLabelsJson)) ~> + akkaHttpService.workflowRoutes ~> + check { + status shouldBe StatusCodes.OK + val actualResult = responseAs[JsObject] + val expectedResults = + s""" + |{ + | "id": "$workflowId", + | "labels": { + | "label-key-1":"label-value-1", + | "label-key-2":"label-value-2" + | } + |} + """.stripMargin.parseJson + + actualResult shouldBe expectedResults + } + } +} - Get(s"/workflows/$version/query?status=Succeeded&id=$workflowId&page=1&pagesize=5") ~> - queryRoute ~> - check { - status should be(StatusCodes.OK) - val results = responseAs[JsObject].fields("results").convertTo[Seq[JsObject]] +object CromwellApiServiceSpec { + val ExistingWorkflowId = WorkflowId.fromString("c4c6339c-8cc9-47fb-acc5-b5cb8d2809f5") + val AbortedWorkflowId = WorkflowId.fromString("0574111c-c7d3-4145-8190-7a7ed8e8324a") + val UnrecognizedWorkflowId = WorkflowId.fromString("2bdd06cc-e794-46c8-a897-4c86cedb6a06") + val RecognizedWorkflowIds = Set(ExistingWorkflowId, AbortedWorkflowId) - results.head.fields("id") should be(JsString(workflowId.toString)) - results.head.fields("status") should be(JsString("Succeeded")) + class MockApiService()(implicit val system: ActorSystem) extends CromwellApiService { + override def actorRefFactory = system - assertResult(4) { - headers count { header => header.is("link") } - } - } + override val materializer = ActorMaterializer() + override val ec = system.dispatcher + override val workflowStoreActor = actorRefFactory.actorOf(Props(new MockWorkflowStoreActor())) + override val serviceRegistryActor = actorRefFactory.actorOf(Props(new MockServiceRegistryActor())) + override val workflowManagerActor = actorRefFactory.actorOf(Props(new MockWorkflowManagerActor())) } - behavior of "REST API /query POST endpoint" + object MockServiceRegistryActor { + def fullMetadataResponse(workflowId: WorkflowId) = { + List(MetadataEvent(MetadataKey(workflowId, None, "testKey1"), MetadataValue("myValue1", MetadataString)), + MetadataEvent(MetadataKey(workflowId, None, "testKey2"), MetadataValue("myValue2", MetadataString))) + } + def filteredMetadataResponse(workflowId: WorkflowId) = { + List(MetadataEvent(MetadataKey(workflowId, None, "testKey1a"), MetadataValue("myValue1a", MetadataString)), + MetadataEvent(MetadataKey(workflowId, None, "testKey1b"), MetadataValue("myValue1b", MetadataString)), + MetadataEvent(MetadataKey(workflowId, None, "testKey2a"), MetadataValue("myValue2a", MetadataString))) + } - it should "return 400 for a bad query map body" in { - Post(s"/workflows/$version/query", HttpEntity(ContentTypes.`application/json`, """[{"BadKey":"foo"}]""")) ~> - queryPostRoute ~> - check { - assertResult(StatusCodes.BadRequest) { - status - } - assertResult( - s"""{ - | "status": "fail", - | "message": "Unrecognized query keys: BadKey" - |}""".stripMargin - ) { - responseAs[String] - } - } - } + def metadataQuery(workflowId: WorkflowId) = MetadataQuery(workflowId, None, None, None, None, false) - it should "return good results for a good query map body" in { - Post(s"/workflows/$version/query", HttpEntity(ContentTypes.`application/json`, """[{"status":"Succeeded"}]""")) ~> - queryPostRoute ~> - check { - assertResult(StatusCodes.OK) { - status - } - assertResult(true) { - body.asString.contains("\"status\": \"Succeeded\"") - } - } + def logsEvents(id: WorkflowId) = { + val stdout = MetadataEvent(MetadataKey(id, Some(MetadataJobKey("mycall", None, 1)), CallMetadataKeys.Stdout), MetadataValue("stdout.txt", MetadataString)) + val stderr = MetadataEvent(MetadataKey(id, Some(MetadataJobKey("mycall", None, 1)), CallMetadataKeys.Stderr), MetadataValue("stderr.txt", MetadataString)) + val backend = MetadataEvent(MetadataKey(id, Some(MetadataJobKey("mycall", None, 1)), s"${CallMetadataKeys.BackendLogsPrefix}:log"), MetadataValue("backend.log", MetadataString)) + Vector(stdout, stderr, backend) + } } - it should "return link headers for pagination when page and pagesize are set for a good query map body" in { - Post(s"/workflows/$version/query", HttpEntity(ContentTypes.`application/json`, """[{"status":"Succeeded"}, {"page": "1"}, {"pagesize": "5"}]""")) ~> - queryPostRoute ~> - check { - assertResult(StatusCodes.OK) { - status + class MockServiceRegistryActor extends Actor { + import MockServiceRegistryActor._ + override def receive = { + case WorkflowQuery(_) => + val response = WorkflowQuerySuccess(WorkflowQueryResponse(List(WorkflowQueryResult(ExistingWorkflowId.toString, + None, Some(WorkflowSucceeded.toString), None, None))), None) + sender ! response + case ValidateWorkflowId(id) => + if (RecognizedWorkflowIds.contains(id)) sender ! MetadataService.RecognizedWorkflowId + else sender ! MetadataService.UnrecognizedWorkflowId + case GetStatus(id) => sender ! StatusLookupResponse(id, WorkflowSubmitted) + case WorkflowOutputs(id) => + val event = Vector(MetadataEvent(MetadataKey(id, None, "outputs:test.hello.salutation"), MetadataValue("Hello foo!", MetadataString))) + sender ! WorkflowOutputsResponse(id, event) + case GetLogs(id) => sender ! LogsResponse(id, logsEvents(id)) + case GetSingleWorkflowMetadataAction(id, None, None, _) => sender ! MetadataLookupResponse(metadataQuery(id), fullMetadataResponse(id)) + case GetSingleWorkflowMetadataAction(id, Some(_), None, _) => sender ! MetadataLookupResponse(metadataQuery(id), filteredMetadataResponse(id)) + case GetSingleWorkflowMetadataAction(id, None, Some(_), _) => sender ! MetadataLookupResponse(metadataQuery(id), filteredMetadataResponse(id)) + case PutMetadataActionAndRespond(events, _) => + events.head.key.workflowId match { + case CromwellApiServiceSpec.ExistingWorkflowId => sender ! MetadataWriteSuccess(events) + case CromwellApiServiceSpec.AbortedWorkflowId => sender ! MetadataWriteFailure(new Exception("mock exception of db failure"), events) + case WorkflowId(_) => throw new Exception("Something untoward happened, this situation is not believed to be possible at this time") } - assertResult(true) { - body.asString.contains("\"status\": \"Succeeded\"") - (headers count { header => header.is("link") }) == 4 - } - } + } } - - it should "return good results for a multiple query map body" in { - Post(s"/workflows/$version/query", HttpEntity(ContentTypes.`application/json`, - """[{"status":"Succeeded"}, {"status":"Failed"}]""")) ~> - queryPostRoute ~> - check { - assertResult(StatusCodes.OK) { - status + class MockWorkflowStoreActor extends Actor { + override def receive = { + case SubmitWorkflow(_) => sender ! WorkflowSubmittedToStore(ExistingWorkflowId) + case BatchSubmitWorkflows(sources) => + val response = WorkflowsBatchSubmittedToStore(sources map { _ => ExistingWorkflowId }) + sender ! response + case AbortWorkflow(id, manager @ _) => + val message = id match { + case ExistingWorkflowId => WorkflowStoreEngineActor.WorkflowAborted(id) + case AbortedWorkflowId => + WorkflowAbortFailed(id, new IllegalStateException(s"Workflow ID '$id' is in terminal state 'Aborted' and cannot be aborted.")) + case WorkflowId(_) => throw new Exception("Something untoward happened") } - assertResult(true) { - body.asString.contains("\"status\": \"Succeeded\"") - } - } + sender ! message + } } - it should "return 400 bad request for a bad query format body" in { - Post(s"/workflows/$version/query", HttpEntity(ContentTypes.`application/json`, """[{"status":["Succeeded"]}]""")) ~> - sealRoute(queryPostRoute) ~> - check { - assertResult(StatusCodes.BadRequest) { - status - } - } + class MockWorkflowManagerActor extends Actor with ActorLogging { + override def receive: Receive = { + case WorkflowManagerActor.EngineStatsCommand => + val response = EngineStatsActor.EngineStats(1, 23) + sender ! response + case unexpected => + val sndr = sender() + log.error(s"Unexpected message {} from {}", unexpected, sndr) + sender ! s"Unexpected message received: $unexpected" + } } } - diff --git a/engine/src/test/scala/cromwell/webservice/EngineStatsActorSpec.scala b/engine/src/test/scala/cromwell/webservice/EngineStatsActorSpec.scala index 021e4a43c..b32ceb1c9 100644 --- a/engine/src/test/scala/cromwell/webservice/EngineStatsActorSpec.scala +++ b/engine/src/test/scala/cromwell/webservice/EngineStatsActorSpec.scala @@ -14,22 +14,22 @@ class EngineStatsActorSpec extends TestKitSuite("EngineStatsActor") with FlatSpe behavior of "EngineStatsActor" val replyTo = TestProbe() - val defaultTimeout = 100 millis + val defaultTimeout = 500 millis it should "return double zeros with no WorkflowActors" in { - TestActorRef(EngineStatsActor.props(List.empty[ActorRef], replyTo.ref)) + TestActorRef(EngineStatsActor.props(List.empty[ActorRef], replyTo.ref, timeout = 200 millis)) replyTo.expectMsg(defaultTimeout, EngineStats(0, 0)) } it should "return snakeyes with a single workflow with one job" in { val workflowActors = List(Props(FakeWorkflowActor(1))) map { TestActorRef(_) } - TestActorRef(EngineStatsActor.props(workflowActors, replyTo.ref)) + TestActorRef(EngineStatsActor.props(workflowActors, replyTo.ref, timeout = 200 millis)) replyTo.expectMsg(defaultTimeout, EngineStats(1, 1)) } it should "return an unemployed workflow when that's the world it lives in" in { val workflowActors = List(Props(FakeWorkflowActor(0))) map { TestActorRef(_) } - TestActorRef(EngineStatsActor.props(workflowActors, replyTo.ref)) + TestActorRef(EngineStatsActor.props(workflowActors, replyTo.ref, timeout = 200 millis)) replyTo.expectMsg(defaultTimeout, EngineStats(1, 0)) } @@ -41,7 +41,7 @@ class EngineStatsActorSpec extends TestKitSuite("EngineStatsActor") with FlatSpe it should "return the summation of jobs for all WorkflowActors" in { val workflowActors = List(Props(FakeWorkflowActor(1)), Props(FakeWorkflowActor(2))) map { TestActorRef(_) } - TestActorRef(EngineStatsActor.props(workflowActors, replyTo.ref)) + TestActorRef(EngineStatsActor.props(workflowActors, replyTo.ref, timeout = 200 millis)) replyTo.expectMsg(defaultTimeout, EngineStats(2, 3)) } } diff --git a/engine/src/test/scala/cromwell/webservice/MetadataBuilderActorSpec.scala b/engine/src/test/scala/cromwell/webservice/MetadataBuilderActorSpec.scala index 08c1e5b16..9fb608dcf 100644 --- a/engine/src/test/scala/cromwell/webservice/MetadataBuilderActorSpec.scala +++ b/engine/src/test/scala/cromwell/webservice/MetadataBuilderActorSpec.scala @@ -4,44 +4,42 @@ import java.time.OffsetDateTime import java.util.UUID import akka.testkit._ +import akka.pattern.ask +import akka.util.Timeout import cromwell.core.{TestKitSuite, WorkflowId} +import cromwell.services.metadata.MetadataService._ import cromwell.services.metadata._ -import MetadataService._ -import cromwell.services._ -import cromwell.webservice.PerRequest.RequestComplete import cromwell.webservice.metadata.MetadataBuilderActor +import cromwell.webservice.metadata.MetadataBuilderActor.{BuiltMetadataResponse, MetadataBuilderActorResponse} import org.scalatest.prop.TableDrivenPropertyChecks -import org.scalatest.{FlatSpecLike, Matchers} -import spray.http.{StatusCode, StatusCodes} +import org.scalatest.{Assertion, AsyncFlatSpecLike, Matchers, Succeeded} +import org.specs2.mock.Mockito import spray.json._ +import scala.concurrent.Future import scala.concurrent.duration._ import scala.language.postfixOps -class MetadataBuilderActorSpec extends TestKitSuite("Metadata") with FlatSpecLike with Matchers +class MetadataBuilderActorSpec extends TestKitSuite("Metadata") with AsyncFlatSpecLike with Matchers with Mockito with TableDrivenPropertyChecks with ImplicitSender { behavior of "MetadataParser" - val defaultTimeout = 100 millis - val mockServiceRegistry = TestProbe() - val parentProbe = TestProbe() + val defaultTimeout = 200 millis + implicit val timeout: Timeout = defaultTimeout + val mockServiceRegistry = TestProbe() def assertMetadataResponse(action: MetadataServiceAction, queryReply: MetadataQuery, events: Seq[MetadataEvent], - expectedRes: String) = { - val metadataBuilder = TestActorRef(MetadataBuilderActor.props(mockServiceRegistry.ref), parentProbe.ref, s"MetadataActor-${UUID.randomUUID()}") - metadataBuilder ! action // Ask for everything - mockServiceRegistry.expectMsg(defaultTimeout, action) // TestActor runs on CallingThreadDispatcher + expectedRes: String): Future[Assertion] = { + val mba = system.actorOf(MetadataBuilderActor.props(mockServiceRegistry.ref)) + val response = mba.ask(action).mapTo[MetadataBuilderActorResponse] + mockServiceRegistry.expectMsg(defaultTimeout, action) mockServiceRegistry.reply(MetadataLookupResponse(queryReply, events)) - - parentProbe.expectMsgPF(defaultTimeout) { - case response: RequestComplete[(StatusCode, JsObject)] @unchecked => - response.response._1 shouldBe StatusCodes.OK - response.response._2 shouldBe expectedRes.parseJson - } + response map { r => r shouldBe a [BuiltMetadataResponse] } + response.mapTo[BuiltMetadataResponse] map { b => b.response shouldBe expectedRes.parseJson} } it should "build workflow scope tree from metadata events" in { @@ -52,12 +50,12 @@ class MetadataBuilderActorSpec extends TestKitSuite("Metadata") with FlatSpecLik val workflowA = WorkflowId.randomId() val workflowACalls = List( - Option(new MetadataJobKey("callB", Some(1), 3)), - Option(new MetadataJobKey("callB", None, 1)), - Option(new MetadataJobKey("callB", Some(1), 2)), - Option(new MetadataJobKey("callA", None, 1)), - Option(new MetadataJobKey("callB", Some(1), 1)), - Option(new MetadataJobKey("callB", Some(0), 1)), + Option(MetadataJobKey("callB", Some(1), 3)), + Option(MetadataJobKey("callB", None, 1)), + Option(MetadataJobKey("callB", Some(1), 2)), + Option(MetadataJobKey("callA", None, 1)), + Option(MetadataJobKey("callB", Some(1), 1)), + Option(MetadataJobKey("callB", Some(0), 1)), None ) val workflowAEvents = workflowACalls map { makeEvent(workflowA, _) } @@ -97,40 +95,47 @@ class MetadataBuilderActorSpec extends TestKitSuite("Metadata") with FlatSpecLik | "id": "$workflowA" |}""".stripMargin - val mdQuery = MetadataQuery(workflowA, None, None, None, None) + val mdQuery = MetadataQuery(workflowA, None, None, None, None, expandSubWorkflows = false) val queryAction = GetMetadataQueryAction(mdQuery) assertMetadataResponse(queryAction, mdQuery, workflowAEvents, expectedRes) } type EventBuilder = (String, String, OffsetDateTime) - def makeEvent(workflow: WorkflowId)(key: String, value: MetadataValue, offsetDateTime: OffsetDateTime) = { + def makeEvent(workflow: WorkflowId)(key: String, value: MetadataValue, offsetDateTime: OffsetDateTime): MetadataEvent = { MetadataEvent(MetadataKey(workflow, None, key), Option(value), offsetDateTime) } - def assertMetadataKeyStructure(eventList: List[EventBuilder], expectedJson: String) = { - val workflow = WorkflowId.randomId() + def makeCallEvent(workflow: WorkflowId)(key: String, value: MetadataValue, offsetDateTime: OffsetDateTime) = { + val jobKey = MetadataJobKey("fqn", None, 1) + MetadataEvent(MetadataKey(workflow, Option(jobKey), key), Option(value), offsetDateTime) + } + + def assertMetadataKeyStructure(eventList: List[EventBuilder], + expectedJson: String, + workflow: WorkflowId = WorkflowId.randomId(), + eventMaker: WorkflowId => (String, MetadataValue, OffsetDateTime) => MetadataEvent = makeEvent) = { - val events = eventList map { e => (e._1, MetadataValue(e._2), e._3) } map Function.tupled(makeEvent(workflow)) + val events = eventList map { e => (e._1, MetadataValue(e._2), e._3) } map Function.tupled(eventMaker(workflow)) val expectedRes = s"""{ "calls": {}, $expectedJson, "id":"$workflow" }""" - val mdQuery = MetadataQuery(workflow, None, None, None, None) - val queryAction = GetSingleWorkflowMetadataAction(workflow, None, None) + val mdQuery = MetadataQuery(workflow, None, None, None, None, expandSubWorkflows = false) + val queryAction = GetSingleWorkflowMetadataAction(workflow, None, None, expandSubWorkflows = false) assertMetadataResponse(queryAction, mdQuery, events, expectedRes) } - it should "keep event with later timestamp for the same key in metadata" in { + it should "assume the event list is ordered and keep last event if 2 events have same key" in { val eventBuilderList = List( ("a", "aLater", OffsetDateTime.parse("2000-01-02T12:00:00Z")), ("a", "a", OffsetDateTime.parse("2000-01-01T12:00:00Z")) ) val expectedRes = - """"a": "aLater"""".stripMargin + """"a": "a"""".stripMargin assertMetadataKeyStructure(eventBuilderList, expectedRes) } - it should "use CRDT ordering instead of timestamp for status" in { + it should "use CRDT ordering instead of timestamp for workflow state" in { val eventBuilderList = List( ("status", "Succeeded", OffsetDateTime.now), ("status", "Running", OffsetDateTime.now.plusSeconds(1)) @@ -141,6 +146,25 @@ class MetadataBuilderActorSpec extends TestKitSuite("Metadata") with FlatSpecLik assertMetadataKeyStructure(eventBuilderList, expectedRes) } + it should "use CRDT ordering instead of timestamp for call execution status" in { + val eventBuilderList = List( + ("executionStatus", "Done", OffsetDateTime.now), + ("executionStatus", "Running", OffsetDateTime.now.plusSeconds(1)) + ) + val workflowId = WorkflowId.randomId() + val expectedRes = + s""""calls": { + | "fqn": [{ + | "attempt": 1, + | "executionStatus": "Done", + | "shardIndex": -1 + | }] + | }, + | "id": "$workflowId"""".stripMargin + + assertMetadataKeyStructure(eventBuilderList, expectedRes, workflowId, makeCallEvent) + } + it should "build JSON object structure from dotted key syntax" in { val eventBuilderList = List( ("a:b:c", "abc", OffsetDateTime.now), @@ -267,15 +291,16 @@ class MetadataBuilderActorSpec extends TestKitSuite("Metadata") with FlatSpecLik val kisv3 = ("key[0]:subkey", "value3", OffsetDateTime.now.plusSeconds(2)) val kiv4 = ("key[0]", "value4", OffsetDateTime.now.plusSeconds(3)) - val t = Table( - ("list", "res"), + val t = List( (List(kv), """"key": "value""""), (List(kv, ksv2), """"key": { "subkey": "value2" }"""), (List(kv, ksv2, kisv3), """"key": [ { "subkey": "value3" } ]"""), (List(kv, ksv2, kisv3, kiv4), """"key": [ "value4" ]""") ) - forAll(t) { (l, r) => assertMetadataKeyStructure(l, r) } + Future.sequence(t map { case (l, r) => assertMetadataKeyStructure(l, r) }) map { assertions => + assertions should contain only Succeeded + } } it should "coerce values to supported types" in { @@ -306,7 +331,7 @@ class MetadataBuilderActorSpec extends TestKitSuite("Metadata") with FlatSpecLik | } """.stripMargin - val mdQuery = MetadataQuery(workflowId, None, None, None, None) + val mdQuery = MetadataQuery(workflowId, None, None, None, None, expandSubWorkflows = false) val queryAction = GetMetadataQueryAction(mdQuery) assertMetadataResponse(queryAction, mdQuery, events, expectedResponse) } @@ -327,7 +352,7 @@ class MetadataBuilderActorSpec extends TestKitSuite("Metadata") with FlatSpecLik |} """.stripMargin - val mdQuery = MetadataQuery(workflowId, None, None, None, None) + val mdQuery = MetadataQuery(workflowId, None, None, None, None, expandSubWorkflows = false) val queryAction = GetMetadataQueryAction(mdQuery) assertMetadataResponse(queryAction, mdQuery, events, expectedResponse) } @@ -347,14 +372,14 @@ class MetadataBuilderActorSpec extends TestKitSuite("Metadata") with FlatSpecLik |} """.stripMargin - val mdQuery = MetadataQuery(workflowId, None, None, None, None) + val mdQuery = MetadataQuery(workflowId, None, None, None, None, expandSubWorkflows = false) val queryAction = GetMetadataQueryAction(mdQuery) assertMetadataResponse(queryAction, mdQuery, events, expectedResponse) } it should "render empty Json" in { val workflowId = WorkflowId.randomId() - val mdQuery = MetadataQuery(workflowId, None, None, None, None) + val mdQuery = MetadataQuery(workflowId, None, None, None, None, expandSubWorkflows = false) val queryAction = GetMetadataQueryAction(mdQuery) val expectedEmptyResponse = """{}""" assertMetadataResponse(queryAction, mdQuery, List.empty, expectedEmptyResponse) @@ -384,7 +409,7 @@ class MetadataBuilderActorSpec extends TestKitSuite("Metadata") with FlatSpecLik |} """.stripMargin - val mdQuery = MetadataQuery(workflowId, None, None, None, None) + val mdQuery = MetadataQuery(workflowId, None, None, None, None, expandSubWorkflows = false) val queryAction = GetMetadataQueryAction(mdQuery) assertMetadataResponse(queryAction, mdQuery, emptyEvents, expectedEmptyResponse) @@ -399,4 +424,95 @@ class MetadataBuilderActorSpec extends TestKitSuite("Metadata") with FlatSpecLik assertMetadataResponse(queryAction, mdQuery, valueEvents, expectedNonEmptyResponse) } + + it should "expand sub workflow metadata when asked for" in { + val mainWorkflowId = WorkflowId.randomId() + val subWorkflowId = WorkflowId.randomId() + + val mainEvents = List( + MetadataEvent(MetadataKey(mainWorkflowId, Option(MetadataJobKey("callA", None, 1)), "subWorkflowId"), MetadataValue(subWorkflowId)) + ) + + val subEvents = List( + MetadataEvent(MetadataKey(mainWorkflowId, None, "some"), MetadataValue("sub workflow info")) + ) + + val mainQuery = MetadataQuery(mainWorkflowId, None, None, None, None, expandSubWorkflows = true) + val mainQueryAction = GetMetadataQueryAction(mainQuery) + + val subQuery = MetadataQuery(subWorkflowId, None, None, None, None, expandSubWorkflows = true) + val subQueryAction = GetMetadataQueryAction(subQuery) + + val parentProbe = TestProbe() + val metadataBuilder = TestActorRef(MetadataBuilderActor.props(mockServiceRegistry.ref), parentProbe.ref, s"MetadataActor-${UUID.randomUUID()}") + val response = metadataBuilder.ask(mainQueryAction).mapTo[MetadataBuilderActorResponse] + mockServiceRegistry.expectMsg(defaultTimeout, mainQueryAction) + mockServiceRegistry.reply(MetadataLookupResponse(mainQuery, mainEvents)) + mockServiceRegistry.expectMsg(defaultTimeout, subQueryAction) + mockServiceRegistry.reply(MetadataLookupResponse(subQuery, subEvents)) + + val expandedRes = + s""" + |{ + | "calls": { + | "callA": [ + | { + | "subWorkflowMetadata": { + | "some": "sub workflow info", + | "calls": {}, + | "id": "$subWorkflowId" + | }, + | "attempt": 1, + | "shardIndex": -1 + | } + | ] + | }, + | "id": "$mainWorkflowId" + |} + """.stripMargin + + response map { r => r shouldBe a [BuiltMetadataResponse] } + val bmr = response.mapTo[BuiltMetadataResponse] + bmr map { b => b.response shouldBe expandedRes.parseJson} + } + + it should "NOT expand sub workflow metadata when NOT asked for" in { + val mainWorkflowId = WorkflowId.randomId() + val subWorkflowId = WorkflowId.randomId() + + val mainEvents = List( + MetadataEvent(MetadataKey(mainWorkflowId, Option(MetadataJobKey("callA", None, 1)), "subWorkflowId"), MetadataValue(subWorkflowId)) + ) + + val queryNoExpand = MetadataQuery(mainWorkflowId, None, None, None, None, expandSubWorkflows = false) + val queryNoExpandAction = GetMetadataQueryAction(queryNoExpand) + + val parentProbe = TestProbe() + val metadataBuilder = TestActorRef(MetadataBuilderActor.props(mockServiceRegistry.ref), parentProbe.ref, s"MetadataActor-${UUID.randomUUID()}") + val response = metadataBuilder.ask(queryNoExpandAction).mapTo[MetadataBuilderActorResponse] + mockServiceRegistry.expectMsg(defaultTimeout, queryNoExpandAction) + mockServiceRegistry.reply(MetadataLookupResponse(queryNoExpand, mainEvents)) + + + val nonExpandedRes = + s""" + |{ + | "calls": { + | "callA": [ + | { + | "subWorkflowId": "$subWorkflowId", + | "attempt": 1, + | "shardIndex": -1 + | } + | ] + | }, + | "id": "$mainWorkflowId" + |} + """.stripMargin + + response map { r => r shouldBe a [BuiltMetadataResponse] } + val bmr = response.mapTo[BuiltMetadataResponse] + bmr map { b => b.response shouldBe nonExpandedRes.parseJson} + + } } diff --git a/engine/src/test/scala/cromwell/webservice/PartialWorkflowSourcesSpec.scala b/engine/src/test/scala/cromwell/webservice/PartialWorkflowSourcesSpec.scala new file mode 100644 index 000000000..cd7422ffc --- /dev/null +++ b/engine/src/test/scala/cromwell/webservice/PartialWorkflowSourcesSpec.scala @@ -0,0 +1,17 @@ +package cromwell.webservice + +import org.scalatest.{FlatSpec, Matchers} +import spray.json._ +import spray.json.DefaultJsonProtocol._ + +class PartialWorkflowSourcesSpec extends FlatSpec with Matchers { + it should "succesfully merge and override multiple input files" in { + val input1 = Map("wf.a1" -> "hello", "wf.a2" -> "world").toJson.toString + val input2 = Map.empty[String, String].toJson.toString + val overrideInput1 = Map("wf.a2" -> "universe").toJson.toString + val allInputs = PartialWorkflowSources.mergeMaps(Seq(Option(input1), Option(input2), Option(overrideInput1))) + + allInputs.fields.keys should contain allOf("wf.a1", "wf.a2") + allInputs.fields("wf.a2") should be(JsString("universe")) + } +} diff --git a/engine/src/test/scala/cromwell/webservice/SwaggerServiceSpec.scala b/engine/src/test/scala/cromwell/webservice/SwaggerServiceSpec.scala index 18a8f81af..20c7c3e6d 100644 --- a/engine/src/test/scala/cromwell/webservice/SwaggerServiceSpec.scala +++ b/engine/src/test/scala/cromwell/webservice/SwaggerServiceSpec.scala @@ -1,5 +1,7 @@ package cromwell.webservice +import akka.http.scaladsl.model.StatusCodes +import akka.http.scaladsl.testkit.ScalatestRouteTest import io.swagger.models.properties.RefProperty import io.swagger.parser.SwaggerParser import org.scalatest.prop.TableDrivenPropertyChecks @@ -8,8 +10,6 @@ import org.yaml.snakeyaml.constructor.Constructor import org.yaml.snakeyaml.error.YAMLException import org.yaml.snakeyaml.nodes.MappingNode import org.yaml.snakeyaml.{Yaml => SnakeYaml} -import spray.http._ -import spray.testkit.ScalatestRouteTest import scala.collection.JavaConverters._ diff --git a/engine/src/test/scala/cromwell/webservice/SwaggerUiHttpServiceSpec.scala b/engine/src/test/scala/cromwell/webservice/SwaggerUiHttpServiceSpec.scala new file mode 100644 index 000000000..292d5ad73 --- /dev/null +++ b/engine/src/test/scala/cromwell/webservice/SwaggerUiHttpServiceSpec.scala @@ -0,0 +1,320 @@ +package cromwell.webservice + +import akka.http.scaladsl.model.{StatusCodes, Uri} +import akka.http.scaladsl.model.headers.Location +import akka.http.scaladsl.server.Route +import akka.http.scaladsl.testkit.ScalatestRouteTest +import com.typesafe.config.ConfigFactory +import cromwell.webservice.SwaggerUiHttpServiceSpec._ +import org.scalatest.prop.TableDrivenPropertyChecks +import org.scalatest.{FlatSpec, Matchers} + + +trait SwaggerUiHttpServiceSpec extends FlatSpec with Matchers with ScalatestRouteTest with SwaggerUiHttpService { + override def swaggerUiVersion = TestSwaggerUiVersion +} + +trait SwaggerResourceHttpServiceSpec extends FlatSpec with Matchers with ScalatestRouteTest with +TableDrivenPropertyChecks with SwaggerResourceHttpService { + val testPathsForOptions = Table("endpoint", "/", "/swagger", "/swagger/index.html", "/api", "/api/example", + "/api/example?with=param", "/api/example/path") +} + +trait SwaggerUiResourceHttpServiceSpec extends SwaggerUiHttpServiceSpec with SwaggerResourceHttpServiceSpec with +SwaggerUiResourceHttpService + +object SwaggerUiHttpServiceSpec { + val TestSwaggerUiVersion = "2.1.1" + val SwaggerIndexPreamble = + """ + | + | + | + | + | Swagger UI""".stripMargin.trim // workaround IDEA's weird formatting of interpolated strings +} + +class BasicSwaggerUiHttpServiceSpec extends SwaggerUiHttpServiceSpec { + behavior of "SwaggerUiHttpService" + + it should "redirect / to /swagger" in { + Get() ~> swaggerUiRoute ~> check { + status should be(StatusCodes.TemporaryRedirect) + header("Location") should be(Option(Location(Uri("/swagger")))) + } + } + + it should "not return options for /" in { + Options() ~> Route.seal(swaggerUiRoute) ~> check { + status should be(StatusCodes.MethodNotAllowed) + } + } + + it should "redirect /swagger to the index.html" in { + Get("/swagger") ~> swaggerUiRoute ~> check { + status should be(StatusCodes.TemporaryRedirect) + header("Location") should be(Option(Location(Uri("/swagger/index.html?url=/api-docs")))) + } + } + + it should "return index.html from the swagger-ui jar" in { + Get("/swagger/index.html") ~> swaggerUiRoute ~> check { + status should be(StatusCodes.OK) + } + } +} + +class NoRedirectRootSwaggerUiHttpServiceSpec extends SwaggerUiHttpServiceSpec { + override def swaggerUiFromRoot = false + + behavior of "SwaggerUiHttpService" + + it should "not redirect / to /swagger" in { + Get() ~> Route.seal(swaggerUiRoute) ~> check { + status should be(StatusCodes.NotFound) + } + } + + it should "not return options for /" in { + Options() ~> Route.seal(swaggerUiRoute) ~> check { + status should be(StatusCodes.MethodNotAllowed) + } + } + + it should "redirect /swagger to the index.html" in { + Get("/swagger") ~> swaggerUiRoute ~> check { + status should be(StatusCodes.TemporaryRedirect) + header("Location") should be(Option(Location(Uri("/swagger/index.html?url=/api-docs")))) + } + } + + it should "return index.html from the swagger-ui jar" in { + Get("/swagger/index.html") ~> swaggerUiRoute ~> check { + status should be(StatusCodes.OK) + } + } +} + +class DefaultSwaggerUiConfigHttpServiceSpec extends SwaggerUiHttpServiceSpec with SwaggerUiConfigHttpService { + override def swaggerUiConfig = ConfigFactory.parseString(s"uiVersion = $TestSwaggerUiVersion") + + behavior of "SwaggerUiConfigHttpService" + + it should "redirect /swagger to the index.html" in { + Get("/swagger") ~> swaggerUiRoute ~> check { + status should be(StatusCodes.TemporaryRedirect) + header("Location") should be(Option(Location(Uri("/swagger/index.html?url=/api-docs")))) + } + } + + it should "return index.html from the swagger-ui jar" in { + Get("/swagger/index.html") ~> swaggerUiRoute ~> check { + status should be(StatusCodes.OK) + responseAs[String].take(SwaggerIndexPreamble.length) should be(SwaggerIndexPreamble) + } + } +} + +class OverriddenSwaggerUiConfigHttpServiceSpec extends SwaggerUiHttpServiceSpec with SwaggerUiConfigHttpService { + override def swaggerUiConfig = ConfigFactory.parseString( + s""" + |baseUrl = /base + |docsPath = swagger/lenthall.yaml + |uiPath = ui/path + |uiVersion = $TestSwaggerUiVersion + """.stripMargin) + + behavior of "SwaggerUiConfigHttpService" + + it should "redirect /ui/path to the index.html under /base" in { + Get("/ui/path") ~> swaggerUiRoute ~> check { + status should be(StatusCodes.TemporaryRedirect) + header("Location") should be(Option(Location(Uri("/base/ui/path/index.html?url=/base/swagger/lenthall.yaml")))) + } + } + + it should "return index.html from the swagger-ui jar" in { + Get("/ui/path/index.html") ~> swaggerUiRoute ~> check { + status should be(StatusCodes.OK) + responseAs[String].take(SwaggerIndexPreamble.length) should be(SwaggerIndexPreamble) + } + } +} + +class YamlSwaggerResourceHttpServiceSpec extends SwaggerResourceHttpServiceSpec { + override def swaggerServiceName = "testservice" + + behavior of "SwaggerResourceHttpService" + + it should "service swagger yaml" in { + Get("/swagger/testservice.yaml") ~> swaggerResourceRoute ~> check { + status should be(StatusCodes.OK) + responseAs[String] should startWith("swagger: '2.0'\n") + } + } + + it should "not service swagger json" in { + Get("/swagger/testservice.json") ~> Route.seal(swaggerResourceRoute) ~> check { + status should be(StatusCodes.NotFound) + } + } + + it should "not service /swagger" in { + Get("/swagger") ~> Route.seal(swaggerResourceRoute) ~> check { + status should be(StatusCodes.NotFound) + } + } + + it should "return options for all routes" in { + forAll(testPathsForOptions) { path => + Options(path) ~> swaggerResourceRoute ~> check { + status should be(StatusCodes.OK) + responseAs[String] should be("OK") + } + } + } +} + +class JsonSwaggerResourceHttpServiceSpec extends SwaggerResourceHttpServiceSpec { + override def swaggerServiceName = "testservice" + + override def swaggerResourceType = "json" + + behavior of "SwaggerResourceHttpService" + + it should "service swagger json" in { + Get("/swagger/testservice.json") ~> swaggerResourceRoute ~> check { + status should be(StatusCodes.OK) + responseAs[String] should startWith("{\n \"swagger\": \"2.0\",\n") + } + } + + it should "not service swagger yaml" in { + Get("/swagger/testservice.yaml") ~> Route.seal(swaggerResourceRoute) ~> check { + status should be(StatusCodes.NotFound) + } + } + + it should "not service /swagger" in { + Get("/swagger") ~> Route.seal(swaggerResourceRoute) ~> check { + status should be(StatusCodes.NotFound) + } + } + + it should "return options for all routes" in { + forAll(testPathsForOptions) { path => + Options(path) ~> swaggerResourceRoute ~> check { + status should be(StatusCodes.OK) + responseAs[String] should be("OK") + } + } + } +} + +class NoOptionsSwaggerResourceHttpServiceSpec extends SwaggerResourceHttpServiceSpec { + override def swaggerServiceName = "testservice" + + override def swaggerAllOptionsOk = false + + behavior of "SwaggerResourceHttpService" + + it should "service swagger yaml" in { + Get("/swagger/testservice.yaml") ~> swaggerResourceRoute ~> check { + status should be(StatusCodes.OK) + responseAs[String] should startWith("swagger: '2.0'\n") + } + } + + it should "not service swagger json" in { + Get("/swagger/testservice.json") ~> Route.seal(swaggerResourceRoute) ~> check { + status should be(StatusCodes.NotFound) + } + } + + it should "not service /swagger" in { + Get("/swagger") ~> Route.seal(swaggerResourceRoute) ~> check { + status should be(StatusCodes.NotFound) + } + } + + it should "not return options for all routes" in { + forAll(testPathsForOptions) { path => + Options(path) ~> Route.seal(swaggerResourceRoute) ~> check { + status should be(StatusCodes.MethodNotAllowed) + } + } + } +} + +class YamlSwaggerUiResourceHttpServiceSpec extends SwaggerUiResourceHttpServiceSpec { + override def swaggerServiceName = "testservice" + + behavior of "SwaggerUiResourceHttpService" + + it should "redirect /swagger to /swagger/index.html with yaml" in { + Get("/swagger") ~> swaggerUiResourceRoute ~> check { + status should be(StatusCodes.TemporaryRedirect) + header("Location") should be(Option(Location(Uri("/swagger/index.html?url=/swagger/testservice.yaml")))) + } + } + + it should "service swagger yaml" in { + Get("/swagger/testservice.yaml") ~> swaggerUiResourceRoute ~> check { + status should be(StatusCodes.OK) + responseAs[String] should startWith("swagger: '2.0'\n") + } + } + + it should "not service swagger json" in { + Get("/swagger/testservice.json") ~> Route.seal(swaggerUiResourceRoute) ~> check { + status should be(StatusCodes.NotFound) + } + } + + it should "return options for all routes" in { + forAll(testPathsForOptions) { path => + Options(path) ~> swaggerUiResourceRoute ~> check { + status should be(StatusCodes.OK) + responseAs[String] should be("OK") + } + } + } +} + + +class JsonSwaggerUiResourceHttpServiceSpec extends SwaggerUiResourceHttpServiceSpec { + override def swaggerServiceName = "testservice" + + override def swaggerResourceType = "json" + + behavior of "SwaggerUiResourceHttpService" + + it should "redirect /swagger to /swagger/index.html with yaml with json" in { + Get("/swagger") ~> swaggerUiResourceRoute ~> check { + status should be(StatusCodes.TemporaryRedirect) + header("Location") should be(Option(Location(Uri("/swagger/index.html?url=/swagger/testservice.json")))) + } + } + + it should "service swagger json" in { + Get("/swagger/testservice.json") ~> swaggerUiResourceRoute ~> check { + status should be(StatusCodes.OK) + responseAs[String] should startWith("{\n \"swagger\": \"2.0\",\n") + } + } + + it should "not service swagger yaml" in { + Get("/swagger/testservice.yaml") ~> Route.seal(swaggerUiResourceRoute) ~> check { + status should be(StatusCodes.NotFound) + } + } + + it should "return options for all routes" in { + forAll(testPathsForOptions) { path => + Options(path) ~> swaggerUiResourceRoute ~> check { + status should be(StatusCodes.OK) + responseAs[String] should be("OK") + } + } + } +} diff --git a/filesystems/gcs/src/main/scala/cromwell/filesystems/gcs/ContentTypeOption.scala b/filesystems/gcs/src/main/scala/cromwell/filesystems/gcs/ContentTypeOption.scala deleted file mode 100644 index e6f83b0e4..000000000 --- a/filesystems/gcs/src/main/scala/cromwell/filesystems/gcs/ContentTypeOption.scala +++ /dev/null @@ -1,15 +0,0 @@ -package cromwell.filesystems.gcs - -import java.nio.file.OpenOption - -object ContentTypeOption { - sealed trait ContentType - case object PlainText extends ContentType with OpenOption { - override def toString = "plain/text" - } - case object Json extends ContentType with OpenOption { - override def toString = "application/json" - } -} - - diff --git a/filesystems/gcs/src/main/scala/cromwell/filesystems/gcs/GcsFileAttributes.scala b/filesystems/gcs/src/main/scala/cromwell/filesystems/gcs/GcsFileAttributes.scala deleted file mode 100644 index 5d45641de..000000000 --- a/filesystems/gcs/src/main/scala/cromwell/filesystems/gcs/GcsFileAttributes.scala +++ /dev/null @@ -1,23 +0,0 @@ -package cromwell.filesystems.gcs - -import java.nio.file.attribute.{BasicFileAttributes, FileTime} - -import com.google.api.services.storage.Storage -import com.google.api.services.storage.model.StorageObject -import org.apache.commons.codec.digest.DigestUtils - -class GcsFileAttributes(path: NioGcsPath, storageClient: Storage) extends BasicFileAttributes { - override def fileKey(): AnyRef = DigestUtils.md5Hex(path.toString) - override def isRegularFile: Boolean = throw new NotImplementedError("To be implemented when/if needed") - override def isOther: Boolean = throw new NotImplementedError("To be implemented when/if needed") - override def lastModifiedTime(): FileTime = throw new NotImplementedError("To be implemented when/if needed") - override def size(): Long = { - val getObject = storageClient.objects.get(path.bucket, path.objectName) - val storageObject: StorageObject = getObject.execute() - storageObject.getSize.longValue() - } - override def isDirectory: Boolean = path.isDirectory - override def isSymbolicLink: Boolean = false - override def creationTime(): FileTime = throw new NotImplementedError("To be implemented when/if needed") - override def lastAccessTime(): FileTime = throw new NotImplementedError("To be implemented when/if needed") -} diff --git a/filesystems/gcs/src/main/scala/cromwell/filesystems/gcs/GcsFileSystem.scala b/filesystems/gcs/src/main/scala/cromwell/filesystems/gcs/GcsFileSystem.scala deleted file mode 100644 index 2aad4b57b..000000000 --- a/filesystems/gcs/src/main/scala/cromwell/filesystems/gcs/GcsFileSystem.scala +++ /dev/null @@ -1,69 +0,0 @@ -package cromwell.filesystems.gcs - -import java.lang.Iterable -import java.nio.file._ -import java.nio.file.attribute.UserPrincipalLookupService -import java.nio.file.spi.FileSystemProvider -import java.util.{Collections, Set => JSet} - -import scala.language.postfixOps - -case class NotAGcsPathException(path: String) extends IllegalArgumentException(s"$path is not a valid GCS path.") - -object GcsFileSystem { - val Separator = "/" - private[gcs] val Scheme = "gs" - private[gcs] val Protocol = s"$Scheme://" - private val GsUriRegex = s"""$Protocol(.*)""".r - private val AttributeViews = Collections.singleton("basic") - - def isAbsoluteGcsPath(str: String) = str match { - case GsUriRegex(chunks) => true - case _ => false - } - - def apply(provider: GcsFileSystemProvider) = new GcsFileSystem(provider) -} - -/** - * Implements the java.nio.FileSystem interface for GoogleCloudStorage. - */ -class GcsFileSystem private(val gcsFileSystemProvider: GcsFileSystemProvider) extends FileSystem { - - import GcsFileSystem._ - - override def supportedFileAttributeViews(): JSet[String] = AttributeViews - - override def getSeparator: String = Separator - - override def getRootDirectories: Iterable[Path] = Collections.emptyList[Path] - - override def newWatchService(): WatchService = throw new NotImplementedError("GCS FS does not support Watch Service at this time") - - override def getFileStores: Iterable[FileStore] = Collections.emptyList() - - override def isReadOnly: Boolean = false - - override def provider(): FileSystemProvider = gcsFileSystemProvider - - override def isOpen: Boolean = true - - override def close(): Unit = throw new UnsupportedOperationException("GCS FS cannot be closed") - - override def getPathMatcher(syntaxAndPattern: String): PathMatcher = FileSystems.getDefault.getPathMatcher(syntaxAndPattern) - - override def getUserPrincipalLookupService: UserPrincipalLookupService = throw new UnsupportedOperationException() - - private def buildPath(first: String, more: Seq[String], forceDirectory: Boolean) = { - val directory = forceDirectory || (more.isEmpty && first.endsWith(Separator)) || more.lastOption.exists(_.endsWith(Separator)) - first match { - case GsUriRegex(chunks) => new NioGcsPath(chunks.split(Separator) ++ more.toArray[String], true, directory)(this) - case empty if empty.isEmpty => new NioGcsPath(Array.empty[String] ++ more.toArray[String], false, false)(this) - case _ => throw NotAGcsPathException(s"$first is not a gcs path") - } - } - - override def getPath(first: String, more: String*): Path = buildPath(first, more, forceDirectory = false) - - def getPathAsDirectory(first: String, more: String*): Path = buildPath(first, more, forceDirectory = true) -} diff --git a/filesystems/gcs/src/main/scala/cromwell/filesystems/gcs/GcsFileSystemProvider.scala b/filesystems/gcs/src/main/scala/cromwell/filesystems/gcs/GcsFileSystemProvider.scala deleted file mode 100644 index 7199d58ec..000000000 --- a/filesystems/gcs/src/main/scala/cromwell/filesystems/gcs/GcsFileSystemProvider.scala +++ /dev/null @@ -1,279 +0,0 @@ -package cromwell.filesystems.gcs - -import java.io.{FileNotFoundException, OutputStream} -import java.net.URI -import java.nio.channels.{Channels, SeekableByteChannel} -import java.nio.file.DirectoryStream.Filter -import java.nio.file._ -import java.nio.file.attribute.{BasicFileAttributes, FileAttribute, FileAttributeView} -import java.nio.file.spi.FileSystemProvider -import java.util -import java.util.Collections -import java.util.concurrent.{AbstractExecutorService, TimeUnit} - -import com.google.api.client.googleapis.json.GoogleJsonResponseException -import com.google.api.client.googleapis.media.MediaHttpUploader -import com.google.api.services.storage.Storage -import com.google.api.services.storage.model.StorageObject -import com.google.cloud.hadoop.gcsio.{GoogleCloudStorageReadChannel, GoogleCloudStorageWriteChannel, ObjectWriteConditions} -import com.google.cloud.hadoop.util.{ApiErrorExtractor, AsyncWriteChannelOptions, ClientRequestHelper} -import com.typesafe.config.ConfigFactory -import lenthall.config.ScalaConfig.EnhancedScalaConfig - -import scala.annotation.tailrec -import scala.collection.JavaConverters._ -import scala.concurrent.duration._ -import scala.concurrent.{ExecutionContext, ExecutionContextExecutorService} -import scala.language.postfixOps -import scala.util.{Failure, Success, Try} - -object GcsFileSystemProvider { - def apply(storageClient: Storage)(implicit ec: ExecutionContext) = { - new GcsFileSystemProvider(Success(storageClient), ec) - } - - object AcceptAllFilter extends DirectoryStream.Filter[Path] { - override def accept(entry: Path): Boolean = true - } - - // To choose these numbers I first entered a prolonged period of personal consideration and deep thought. - // Then, at the end of this time, I decided to just pick some numbers arbitrarily. - private val retryInterval = 500 milliseconds - private val retryCount = 3 - - def withRetry[A](f: => A, retries: Int = retryCount): A = Try(f) match { - case Success(res) => res - case Failure(ex: GoogleJsonResponseException) - if retries > 0 && - (ex.getStatusCode == 404 || ex.getStatusCode == 500) => - // FIXME remove this sleep - Thread.sleep(retryInterval.toMillis.toInt) - withRetry(f, retries - 1) - case Failure(ex) => throw ex - } -} - -/** - * Converts a Scala ExecutionContext to a Java ExecutorService. - * https://groups.google.com/forum/#!topic/scala-user/ZyHrfzD7eX8 - */ -object ExecutionContextExecutorServiceBridge { - def apply(ec: ExecutionContext): ExecutionContextExecutorService = ec match { - case null => throw new RuntimeException("Execution context cannot be null") - case eces: ExecutionContextExecutorService => eces - case executionContext => new AbstractExecutorService with ExecutionContextExecutorService { - override def prepare(): ExecutionContext = executionContext - override def isShutdown = false - override def isTerminated = false - override def shutdown() = () - override def shutdownNow() = Collections.emptyList[Runnable] - override def execute(runnable: Runnable): Unit = executionContext execute runnable - override def reportFailure(t: Throwable): Unit = executionContext reportFailure t - override def awaitTermination(length: Long,unit: TimeUnit): Boolean = false - } - } -} - -/** - * Implements java.nio.FileSystemProvider for GoogleCloudStorage - * This implementation is not complete and mostly a proof of concept that it's possible to *copy* around files from/to local/gcs. - * Copying is the only functionality that has been successfully tested (same and cross filesystems). - * - * If/when switching to Google's GCS NIO implementation, callers may need to implement various utilities built into - * this implementation, including: - * - * - Minimizing the upload buffer size, assuming the default is also on the order of megabytes of memory per upload - * - Automatically retrying transient errors - * - etc. - * - * @param storageClient Google API Storage object - * @param executionContext executionContext, will be used to perform async writes to GCS after being converted to a Java execution service - */ -class GcsFileSystemProvider private[gcs](storageClient: Try[Storage], val executionContext: ExecutionContext) extends FileSystemProvider { - import GcsFileSystemProvider._ - - private[this] lazy val config = ConfigFactory.load() - - // We want to throw an exception here if we try to use this class with a failed gcs interface - lazy val client = storageClient.get - private val executionService = ExecutionContextExecutorServiceBridge(executionContext) - private val errorExtractor = new ApiErrorExtractor() - def notAGcsPath(path: Path) = throw new IllegalArgumentException(s"$path is not a GCS path.") - - lazy val defaultFileSystem: GcsFileSystem = GcsFileSystem(this) - - private def exists(path: Path) = path match { - case gcsPath: NioGcsPath => - Try(withRetry(client.objects.get(gcsPath.bucket, gcsPath.objectName).execute)) recover { - case ex: GoogleJsonResponseException - if ex.getStatusCode == 404 => if (!gcsPath.isDirectory) throw new FileNotFoundException(path.toString) - } get - case _ => throw new FileNotFoundException(path.toString) - } - - /** - * Note: options and attributes are not honored. - */ - override def newByteChannel(path: Path, options: util.Set[_ <: OpenOption], attrs: FileAttribute[_]*): SeekableByteChannel = { - def createReadChannel(gcsPath: NioGcsPath) = new GoogleCloudStorageReadChannel(client, - gcsPath.bucket, - gcsPath.objectName, - errorExtractor, - new ClientRequestHelper[StorageObject]() - ) - - path match { - case gcsPath: NioGcsPath => withRetry(createReadChannel(gcsPath)) - case _ => notAGcsPath(path) - } - } - - /* - For now, default all upload buffers as small as possible, 256K per upload. Without this default the buffers are 64M. - In the future, we may possibly be able to pass information to the NioGcsPath with the expected... or Google's GCS NIO - implementation will be finished we'll need to revisit this issue again. - - See also: - - com.google.cloud.hadoop.util.AbstractGoogleAsyncWriteChannel.setUploadBufferSize - - com.google.api.client.googleapis.media.MediaHttpUploader.setContentAndHeadersOnCurrentRequest - */ - private[this] lazy val uploadBufferBytes = config.getBytesOr("google.upload-buffer-bytes", - MediaHttpUploader.MINIMUM_CHUNK_SIZE).toInt - - /** - * Overrides the default implementation to provide a writable channel (which newByteChannel doesn't). - * NOTE: options are not honored. - */ - override def newOutputStream(path: Path, options: OpenOption*): OutputStream = { - val contentType = options collectFirst { - case e: ContentTypeOption.ContentType => e.toString - } getOrElse ContentTypeOption.PlainText.toString - - def initializeOutputStream(gcsPath: NioGcsPath) = { - val channel = new GoogleCloudStorageWriteChannel( - executionService, - client, - new ClientRequestHelper[StorageObject](), - gcsPath.bucket, - gcsPath.objectName, - AsyncWriteChannelOptions.newBuilder().setUploadBufferSize(uploadBufferBytes).build(), - new ObjectWriteConditions(), - Map.empty[String, String].asJava, - contentType) - channel.initialize() - Channels.newOutputStream(channel) - } - - path match { - case gcsPath: NioGcsPath => withRetry(initializeOutputStream(gcsPath)) - case _ => notAGcsPath(path) - } - } - - override def copy(source: Path, target: Path, options: CopyOption*): Unit = { - (source, target) match { - case (s: NioGcsPath, d: NioGcsPath) => - def innerCopy = { - val storageObject = client.objects.get(s.bucket, s.objectName).execute - client.objects.copy(s.bucket, s.objectName, d.bucket, d.objectName, storageObject).execute - } - - withRetry(innerCopy) - case _ => throw new UnsupportedOperationException(s"Can only copy from GCS to GCS: $source or $target is not a GCS path") - } - } - - override def delete(path: Path): Unit = { - path match { - case gcs: NioGcsPath => try { - withRetry(client.objects.delete(gcs.bucket, gcs.objectName).execute()) - } catch { - case ex: GoogleJsonResponseException if ex.getStatusCode == 404 => throw new NoSuchFileException(path.toString) - } - case _ => notAGcsPath(path) - } - } - - override def readAttributes[A <: BasicFileAttributes](path: Path, `type`: Class[A], options: LinkOption*): A = path match { - case gcsPath: NioGcsPath => - exists(path) - new GcsFileAttributes(gcsPath, client).asInstanceOf[A] - case _ => notAGcsPath(path) - } - - override def move(source: Path, target: Path, options: CopyOption*): Unit = { - (source, target) match { - case (s: NioGcsPath, d: NioGcsPath) => - def moveInner = { - val storageObject = client.objects.get(s.bucket, s.objectName).execute - client.objects.rewrite(s.bucket, s.objectName, d.bucket, d.objectName, storageObject).execute - } - - withRetry(moveInner) - case _ => throw new UnsupportedOperationException(s"Can only move from GCS to GCS: $source or $target is not a GCS path") - } - } - - def crc32cHash(path: Path) = path match { - case gcsDir: NioGcsPath => withRetry(client.objects().get(gcsDir.bucket, gcsDir.objectName).execute().getCrc32c) - case _ => notAGcsPath(path) - } - - override def checkAccess(path: Path, modes: AccessMode*): Unit = exists(path) - override def createDirectory(dir: Path, attrs: FileAttribute[_]*): Unit = {} - - override def getFileSystem(uri: URI): FileSystem = defaultFileSystem - - override def isHidden(path: Path): Boolean = throw new NotImplementedError() - - private[this] lazy val maxResults = config.getIntOr("google.list-max-results", 1000).toLong - - private def list(gcsDir: NioGcsPath) = { - val listRequest = client.objects().list(gcsDir.bucket).setMaxResults(maxResults) - listRequest.setPrefix(gcsDir.objectName) - - def objectToPath(storageObject: StorageObject): Path = { - NioGcsPath(s"$getScheme${storageObject.getBucket}${GcsFileSystem.Separator}${storageObject.getName}")(gcsDir.getFileSystem.asInstanceOf[GcsFileSystem]) - } - - // Contains a Seq corresponding to the current page of objects, plus a token for the next page of objects, if any. - case class ListPageResult(objects: Seq[StorageObject], nextPageToken: Option[String]) - - def requestListPage(pageToken: Option[String] = None): ListPageResult = { - val objects = withRetry(listRequest.setPageToken(pageToken.orNull).execute()) - ListPageResult(objects.getItems.asScala, Option(objects.getNextPageToken)) - } - - @tailrec - def remainingObjects(pageToken: Option[String], acc: Seq[StorageObject]): Seq[StorageObject] = { - if (pageToken.isEmpty) acc - else { - val page = requestListPage(pageToken) - remainingObjects(page.nextPageToken, acc ++ page.objects) - } - } - - val firstPage = requestListPage(pageToken = None) - val allObjects = remainingObjects(firstPage.nextPageToken, firstPage.objects) - - new DirectoryStream[Path] { - override def iterator(): util.Iterator[Path] = (allObjects map objectToPath).toIterator.asJava - override def close(): Unit = {} - } - } - - override def newDirectoryStream(dir: Path, filter: Filter[_ >: Path]): DirectoryStream[Path] = dir match { - case gcsDir: NioGcsPath => list(gcsDir) - case _ => notAGcsPath(dir) - } - override def setAttribute(path: Path, attribute: String, value: scala.Any, options: LinkOption*): Unit = throw new NotImplementedError() - override def getPath(uri: URI): Path = throw new NotImplementedError() - override def newFileSystem(uri: URI, env: util.Map[String, _]): FileSystem = { - throw new UnsupportedOperationException("GcsFileSystem provider doesn't support creation of new FileSystems at this time. Use getFileSystem instead.") - } - override def readAttributes(path: Path, attributes: String, options: LinkOption*): util.Map[String, AnyRef] = throw new NotImplementedError() - override def isSameFile(path: Path, path2: Path): Boolean = throw new NotImplementedError() - override def getFileAttributeView[V <: FileAttributeView](path: Path, `type`: Class[V], options: LinkOption*): V = throw new NotImplementedError() - override def getFileStore(path: Path): FileStore = throw new NotImplementedError() - override def getScheme: String = GcsFileSystem.Protocol -} diff --git a/filesystems/gcs/src/main/scala/cromwell/filesystems/gcs/GcsPathBuilder.scala b/filesystems/gcs/src/main/scala/cromwell/filesystems/gcs/GcsPathBuilder.scala new file mode 100644 index 000000000..08b702029 --- /dev/null +++ b/filesystems/gcs/src/main/scala/cromwell/filesystems/gcs/GcsPathBuilder.scala @@ -0,0 +1,176 @@ +package cromwell.filesystems.gcs + +import java.net.URI + +import akka.actor.ActorSystem +import com.google.api.client.googleapis.javanet.GoogleNetHttpTransport +import com.google.api.client.json.jackson2.JacksonFactory +import com.google.api.gax.retrying.RetrySettings +import com.google.auth.Credentials +import com.google.cloud.http.HttpTransportOptions +import com.google.cloud.storage.contrib.nio.{CloudStorageConfiguration, CloudStorageFileSystem, CloudStorageFileSystemProvider, CloudStoragePath} +import com.google.cloud.storage.{BlobId, StorageOptions} +import com.google.common.base.Preconditions._ +import com.google.common.net.UrlEscapers +import cromwell.core.WorkflowOptions +import cromwell.core.path.{NioPath, Path, PathBuilder} +import cromwell.filesystems.gcs.GcsPathBuilder._ +import cromwell.filesystems.gcs.auth.GoogleAuthMode + +import scala.concurrent.{ExecutionContext, Future} +import scala.concurrent.duration._ +import scala.language.postfixOps +import scala.util.Try + +object GcsPathBuilder { + + val JsonFactory = JacksonFactory.getDefaultInstance + val HttpTransport = GoogleNetHttpTransport.newTrustedTransport + + def checkValid(uri: URI) = { + checkNotNull(uri.getScheme, s"%s does not have a gcs scheme", uri) + checkArgument( + uri.getScheme.equalsIgnoreCase(CloudStorageFileSystem.URI_SCHEME), + "Cloud Storage URIs must have '%s' scheme: %s", + CloudStorageFileSystem.URI_SCHEME: Any, + uri: Any + ) + checkNotNull(uri.getHost, s"%s does not have a host", uri) + } + + sealed trait GcsPathValidation + case object ValidFullGcsPath extends GcsPathValidation + case object PossiblyValidRelativeGcsPath extends GcsPathValidation + sealed trait InvalidGcsPath extends GcsPathValidation { + def pathString: String + def errorMessage: String + } + final case class InvalidFullGcsPath(pathString: String) extends InvalidGcsPath { + override def errorMessage = { + val prefix = s""" + |The bucket name in GCS path '$pathString' is not compatible with URI host name standards. + |URI host name compatibility is a requirement for Cromwell's GCS filesystem support. + |Google also generally advises against the use of underscores in GCS bucket names, as well as against + |the use of periods or dashes in certain patterns as described here: + |https://cloud.google.com/storage/docs/naming. + """.stripMargin.replaceAll("\n", " ").trim + val underscoreWarning = if (pathString.contains("_")) s"In particular, the bucket name in '$pathString' may contain an underscore which is not a valid character in a URI host." else "" + List(prefix, underscoreWarning).mkString(" ") + } + } + final case class UnparseableGcsPath(pathString: String, throwable: Throwable) extends InvalidGcsPath { + override def errorMessage: String = + List(s"The specified GCS path '$pathString' does not parse as a URI.", throwable.getMessage).mkString("\n") + } + + def validateGcsPath(string: String): GcsPathValidation = { + Try { + val uri = getUri(string) + if (uri.getScheme == null) PossiblyValidRelativeGcsPath + else if (uri.getScheme == "gs") { + if (uri.getHost == null) InvalidFullGcsPath(string) else ValidFullGcsPath + } else InvalidFullGcsPath(string) + } recover { case t => UnparseableGcsPath(string, t) } get + } + + def isGcsPath(nioPath: NioPath): Boolean = { + nioPath.getFileSystem.provider().getScheme == CloudStorageFileSystem.URI_SCHEME + } + + def getUri(string: String) = URI.create(UrlEscapers.urlFragmentEscaper().escape(string)) + + def fromAuthMode(authMode: GoogleAuthMode, + applicationName: String, + retrySettings: Option[RetrySettings], + cloudStorageConfiguration: CloudStorageConfiguration, + options: WorkflowOptions)(implicit as: ActorSystem, ec: ExecutionContext): Future[GcsPathBuilder] = { + authMode.credential(options) map { credentials => + fromCredentials(credentials, + applicationName, + retrySettings, + cloudStorageConfiguration, + options + ) + } + } + + def fromCredentials(credentials: Credentials, + applicationName: String, + retrySettings: Option[RetrySettings], + cloudStorageConfiguration: CloudStorageConfiguration, + options: WorkflowOptions): GcsPathBuilder = { + val transportOptions = HttpTransportOptions.newBuilder() + .setReadTimeout(3.minutes.toMillis.toInt) + .build() + + val storageOptionsBuilder = StorageOptions.newBuilder() + .setTransportOptions(transportOptions) + .setCredentials(credentials) + + retrySettings foreach storageOptionsBuilder.setRetrySettings + + // Grab the google project from Workflow Options if specified and set + // that to be the project used by the StorageOptions Builder + options.get("google_project") map storageOptionsBuilder.setProjectId + + + val storageOptions = storageOptionsBuilder.build() + + // Create a com.google.api.services.storage.Storage + // This is the underlying api used by com.google.cloud.storage + // By bypassing com.google.cloud.storage, we can create low level requests that can be batched + val apiStorage: com.google.api.services.storage.Storage = { + new com.google.api.services.storage.Storage + .Builder(HttpTransport, JsonFactory, GoogleConfiguration.withCustomTimeouts(transportOptions.getHttpRequestInitializer(storageOptions))) + .setApplicationName(applicationName) + .build() + } + + // Create a com.google.cloud.storage.Storage + // This is the "relatively" high level API, and recommended one. The nio implementation sits on top of this. + val cloudStorage: com.google.cloud.storage.Storage = storageOptions.getService + + // The CloudStorageFileSystemProvider constructor is not public. Currently the only way to obtain one is through a CloudStorageFileSystem + // Moreover at this point we can use the same provider for all operations as we have usable credentials + // In order to avoid recreating a provider with every getPath call, create a dummy FileSystem just to get its provider + val provider: CloudStorageFileSystemProvider = CloudStorageFileSystem.forBucket("dummy", cloudStorageConfiguration, storageOptions).provider() + + new GcsPathBuilder(apiStorage, cloudStorage, provider, storageOptions.getProjectId) + } +} + +class GcsPathBuilder(val apiStorage: com.google.api.services.storage.Storage, + val cloudStorage: com.google.cloud.storage.Storage, + provider: CloudStorageFileSystemProvider, + val projectId: String) extends PathBuilder { + def build(string: String): Try[GcsPath] = { + Try { + val uri = getUri(string) + GcsPathBuilder.checkValid(uri) + GcsPath(provider.getPath(uri), apiStorage, cloudStorage) + } + } + + override def name: String = "Gcs" +} + +case class GcsPath private[gcs](nioPath: NioPath, + apiStorage: com.google.api.services.storage.Storage, + cloudStorage: com.google.cloud.storage.Storage + ) extends Path { + lazy val blob = BlobId.of(cloudStoragePath.bucket, cloudStoragePath.toRealPath().toString) + + override protected def newPath(nioPath: NioPath): GcsPath = GcsPath(nioPath, apiStorage, cloudStorage) + + override def pathAsString: String = java.net.URLDecoder.decode(nioPath.toUri.toString, "UTF-8") + + override def pathWithoutScheme: String = { + val gcsPath = cloudStoragePath + gcsPath.bucket + gcsPath.toAbsolutePath.toString + } + + def cloudStoragePath: CloudStoragePath = nioPath match { + case gcsPath: CloudStoragePath => gcsPath + case _ => throw new RuntimeException(s"Internal path was not a cloud storage path: $nioPath") + } +} diff --git a/filesystems/gcs/src/main/scala/cromwell/filesystems/gcs/GcsPathBuilderFactory.scala b/filesystems/gcs/src/main/scala/cromwell/filesystems/gcs/GcsPathBuilderFactory.scala new file mode 100644 index 000000000..01659e42f --- /dev/null +++ b/filesystems/gcs/src/main/scala/cromwell/filesystems/gcs/GcsPathBuilderFactory.scala @@ -0,0 +1,50 @@ +package cromwell.filesystems.gcs + +import akka.actor.ActorSystem +import com.google.api.client.googleapis.media.MediaHttpUploader +import com.google.api.gax.retrying.RetrySettings +import com.google.auth.Credentials +import com.google.cloud.storage.contrib.nio.CloudStorageConfiguration +import com.typesafe.config.ConfigFactory +import cromwell.core.WorkflowOptions +import cromwell.core.path.PathBuilderFactory +import cromwell.filesystems.gcs.auth.GoogleAuthMode +import net.ceedubs.ficus.Ficus._ + +import scala.concurrent.ExecutionContext + +object GcsPathBuilderFactory { + + private[this] lazy val UploadBufferBytes = { + ConfigFactory.load().as[Option[Int]]("google.upload-buffer-bytes").getOrElse(MediaHttpUploader.MINIMUM_CHUNK_SIZE) + } + + val DefaultCloudStorageConfiguration = { + CloudStorageConfiguration.builder() + .blockSize(UploadBufferBytes) + .permitEmptyPathComponents(true) + .stripPrefixSlash(true) + .usePseudoDirectories(true) + .build() + } +} + +case class GcsPathBuilderFactory(authMode: GoogleAuthMode, + applicationName: String, + retrySettings: Option[RetrySettings] = None, + cloudStorageConfiguration: CloudStorageConfiguration = GcsPathBuilderFactory.DefaultCloudStorageConfiguration) + + extends PathBuilderFactory { + + def withOptions(options: WorkflowOptions)(implicit as: ActorSystem, ec: ExecutionContext) = { + GcsPathBuilder.fromAuthMode(authMode, applicationName, retrySettings, cloudStorageConfiguration, options) + } + + /** + * Ignores the authMode and creates a GcsPathBuilder using the passed credentials directly. + * Can be used when the Credentials are already available. + */ + def fromCredentials(options: WorkflowOptions, credentials: Credentials) = { + GcsPathBuilder.fromCredentials(credentials, applicationName, retrySettings, cloudStorageConfiguration, options) + } +} diff --git a/filesystems/gcs/src/main/scala/cromwell/filesystems/gcs/GoogleAuthMode.scala b/filesystems/gcs/src/main/scala/cromwell/filesystems/gcs/GoogleAuthMode.scala deleted file mode 100644 index c18a85a25..000000000 --- a/filesystems/gcs/src/main/scala/cromwell/filesystems/gcs/GoogleAuthMode.scala +++ /dev/null @@ -1,186 +0,0 @@ -package cromwell.filesystems.gcs - -import java.io.{FileNotFoundException, IOException, InputStreamReader} -import java.nio.file.{Files, Paths} - -import com.google.api.client.auth.oauth2.Credential -import com.google.api.client.extensions.java6.auth.oauth2.AuthorizationCodeInstalledApp -import com.google.api.client.googleapis.auth.oauth2.{GoogleAuthorizationCodeFlow, GoogleClientSecrets, GoogleCredential} -import com.google.api.client.googleapis.extensions.java6.auth.oauth2.GooglePromptReceiver -import com.google.api.client.googleapis.javanet.GoogleNetHttpTransport -import com.google.api.client.json.JsonFactory -import com.google.api.client.json.jackson2.JacksonFactory -import com.google.api.client.util.store.FileDataStoreFactory -import com.google.api.services.storage.{Storage, StorageScopes} -import cromwell.filesystems.gcs.GoogleAuthMode.{GcsScopes, GoogleAuthOptions} -import org.slf4j.LoggerFactory - -import scala.collection.JavaConverters._ -import scala.util.{Failure, Success, Try} - -object GoogleAuthMode { - - lazy val jsonFactory = JacksonFactory.getDefaultInstance - lazy val httpTransport = GoogleNetHttpTransport.newTrustedTransport - val RefreshTokenOptionKey = "refresh_token" - - /** - * Before it returns the raw credential, checks if the token will expire within 60 seconds. - * - * TODO: Needs more design / testing around thread safety. - * For example, the credential returned is mutable, and may be modified by another thread. - * - * Most Google clients have the ability to refresh tokens automatically, as they use the standard Google - * HttpTransport that automatically triggers credential refreshing via Credential.handleResponse. Since Cromwell - * contacts https://gcr.io directly via HTTP requests using spray-client, we need to keep the token fresh ourselves. - * - * @see Credential#handleResponse(HttpRequest, HttpResponse, boolean) - */ - implicit class EnhancedCredentials(val credential: Credential) extends AnyVal { - def freshCredential: Try[Credential] = { - val stillValid = Option(credential.getExpiresInSeconds).exists(_ > 60) - if (stillValid || credential.refreshToken()) { - Success(credential) - } else { - Failure(new Exception("Unable to refresh token")) - } - } - } - - def buildStorage(credential: Credential, applicationName: String) = { - new Storage.Builder( - httpTransport, - jsonFactory, - credential).setApplicationName(applicationName).build() - } - - trait GoogleAuthOptions { - def get(key: String): Try[String] - } - - val GcsScopes = List( - StorageScopes.DEVSTORAGE_FULL_CONTROL, - StorageScopes.DEVSTORAGE_READ_WRITE - ) -} - - -sealed trait GoogleAuthMode { - def credential(options: GoogleAuthOptions): Credential - - def assertWorkflowOptions(options: GoogleAuthOptions): Unit = () - - def name: String - - def requiresAuthFile: Boolean = false - - protected lazy val log = LoggerFactory.getLogger(getClass.getSimpleName) - - protected def validateCredentials(credential: Credential) = { - Try(credential.refreshToken()) match { - case Failure(ex) => throw new RuntimeException(s"Google credentials are invalid: ${ex.getMessage}") - case Success(_) => credential - } - } - - def buildStorage(options: GoogleAuthOptions, applicationName: String): Storage = { - GoogleAuthMode.buildStorage(credential(options), applicationName) - } -} - -final case class ServiceAccountMode(override val name: String, accountId: String, pemPath: String, scopes: List[String] = GcsScopes) extends GoogleAuthMode { - import GoogleAuthMode._ - - private lazy val credentials: Credential = { - val pemFile = Paths.get(pemPath).toAbsolutePath - if (!Files.exists(pemFile)) { - throw new FileNotFoundException(s"PEM file $pemFile does not exist") - } - validateCredentials( - new GoogleCredential.Builder().setTransport(httpTransport) - .setJsonFactory(jsonFactory) - .setServiceAccountId(accountId) - .setServiceAccountScopes(scopes.asJava) - .setServiceAccountPrivateKeyFromPemFile(pemFile.toFile) - .build() - ) - } - - override def credential(options: GoogleAuthOptions) = credentials -} - -final case class UserMode(override val name: String, user: String, secretsFile: String, datastoreDir: String, scopes: List[String] = GcsScopes) extends GoogleAuthMode { - import GoogleAuthMode._ - - private def filePathToSecrets(secrets: String, jsonFactory: JsonFactory) = { - val secretsPath = Paths.get(secrets).toAbsolutePath - if(!Files.isReadable(secretsPath)) { - log.warn("Secrets file does not exist or is not readable.") - } - val secretStream = new InputStreamReader(Files.newInputStream(secretsPath)) - - GoogleClientSecrets.load(jsonFactory, secretStream) - } - - private lazy val credentials: Credential = { - val clientSecrets = filePathToSecrets(secretsFile, jsonFactory) - val dataStore = Paths.get(datastoreDir).toAbsolutePath - val dataStoreFactory = new FileDataStoreFactory(dataStore.toFile) - val flow = new GoogleAuthorizationCodeFlow.Builder(httpTransport, - jsonFactory, - clientSecrets, - scopes.asJava).setDataStoreFactory(dataStoreFactory).build - validateCredentials(new AuthorizationCodeInstalledApp(flow, new GooglePromptReceiver).authorize(user)) - } - - override def credential(options: GoogleAuthOptions) = credentials -} - -// It would be goofy to have multiple auths that are application_default, but Cromwell won't prevent it. -final case class ApplicationDefaultMode(override val name: String, scopes: List[String] = GcsScopes) extends GoogleAuthMode { - import GoogleAuthMode._ - - private lazy val credentials: Credential = { - try { - validateCredentials(GoogleCredential.getApplicationDefault().createScoped(scopes.asJava)) - } catch { - case e: IOException => - log.warn("Failed to get application default credentials", e) - throw e - } - } - - override def credential(options: GoogleAuthOptions) = credentials -} - -final case class RefreshTokenMode(name: String, clientId: String, clientSecret: String) extends GoogleAuthMode with ClientSecrets { - import GoogleAuthMode._ - - override def requiresAuthFile = true - - /** - * Throws if the refresh token is not specified. - */ - override def assertWorkflowOptions(options: GoogleAuthOptions) = getToken(options) - - private def getToken(options: GoogleAuthOptions): String = { - options.get(RefreshTokenOptionKey).getOrElse(throw new IllegalArgumentException(s"Missing parameters in workflow options: $RefreshTokenOptionKey")) - } - - override def credential(options: GoogleAuthOptions): Credential = { - validateCredentials( - new GoogleCredential.Builder().setTransport(httpTransport) - .setJsonFactory(jsonFactory) - .setClientSecrets(clientId, clientSecret) - .build() - .setRefreshToken(getToken(options)) - ) - } -} - -trait ClientSecrets { - val clientId: String - val clientSecret: String -} - -final case class SimpleClientSecrets(clientId: String, clientSecret: String) extends ClientSecrets diff --git a/filesystems/gcs/src/main/scala/cromwell/filesystems/gcs/GoogleConfiguration.scala b/filesystems/gcs/src/main/scala/cromwell/filesystems/gcs/GoogleConfiguration.scala index 8c4e559ae..34c1bff53 100644 --- a/filesystems/gcs/src/main/scala/cromwell/filesystems/gcs/GoogleConfiguration.scala +++ b/filesystems/gcs/src/main/scala/cromwell/filesystems/gcs/GoogleConfiguration.scala @@ -1,62 +1,99 @@ package cromwell.filesystems.gcs +import java.io.IOException + +import cats.data.Validated._ +import cats.instances.list._ +import cats.syntax.cartesian._ +import cats.syntax.traverse._ +import cats.syntax.validated._ +import com.google.api.client.googleapis.auth.oauth2.GoogleCredential +import com.google.api.client.http.{HttpRequest, HttpRequestInitializer} import com.google.api.services.storage.StorageScopes -import com.typesafe.config.Config -import lenthall.config.ConfigValidationException -import lenthall.config.ValidatedConfig._ +import com.typesafe.config.{Config, ConfigException} +import cromwell.filesystems.gcs.auth.ServiceAccountMode.{JsonFileFormat, PemFileFormat} +import cromwell.filesystems.gcs.auth._ +import lenthall.exception.MessageAggregation +import lenthall.validation.ErrorOr._ +import lenthall.validation.Validation._ +import net.ceedubs.ficus.Ficus._ import org.slf4j.LoggerFactory -import scala.collection.JavaConverters._ -import scala.language.postfixOps -import scalaz.Scalaz._ -import scalaz.Validation.FlatMap._ -import scalaz._ - - final case class GoogleConfiguration private (applicationName: String, authsByName: Map[String, GoogleAuthMode]) { def auth(name: String): ErrorOr[GoogleAuthMode] = { authsByName.get(name) match { case None => val knownAuthNames = authsByName.keys.mkString(", ") - s"`google` configuration stanza does not contain an auth named '$name'. Known auth names: $knownAuthNames".failureNel - case Some(a) => a.successNel + s"`google` configuration stanza does not contain an auth named '$name'. Known auth names: $knownAuthNames".invalidNel + case Some(a) => a.validNel } } } object GoogleConfiguration { + import scala.collection.JavaConverters._ + import scala.concurrent.duration._ + import scala.language.postfixOps + + lazy val DefaultConnectionTimeout = 3 minutes + lazy val DefaultReadTimeout = 3 minutes + + lazy val DefaultRequestInitializer = GoogleConfiguration.withCustomTimeouts(new GoogleCredential.Builder().build()) + + def withCustomTimeouts(httpRequestInitializer: HttpRequestInitializer, + connectionTimeout: FiniteDuration = DefaultConnectionTimeout, + readTimeout: FiniteDuration = DefaultReadTimeout) = { + new HttpRequestInitializer() { + @throws[IOException] + override def initialize(httpRequest: HttpRequest) = { + httpRequestInitializer.initialize(httpRequest) + httpRequest.setConnectTimeout(connectionTimeout.toMillis.toInt) + httpRequest.setReadTimeout(readTimeout.toMillis.toInt) + () + } + } + } private val log = LoggerFactory.getLogger("GoogleConfiguration") - private val GoogleScopes = List( + final case class GoogleConfigurationException(errorMessages: List[String]) extends MessageAggregation { + override val exceptionContext = "Google configuration" + } + + val GoogleScopes = List( StorageScopes.DEVSTORAGE_FULL_CONTROL, StorageScopes.DEVSTORAGE_READ_WRITE, "https://www.googleapis.com/auth/genomics", "https://www.googleapis.com/auth/compute" - ) + ).asJava def apply(config: Config): GoogleConfiguration = { val googleConfig = config.getConfig("google") - val appName = googleConfig.validateString("application-name") + val appName = validate { googleConfig.as[String]("application-name") } def buildAuth(authConfig: Config): ErrorOr[GoogleAuthMode] = { - def serviceAccountAuth(authConfig: Config, name: String) = authConfig validateAny { - cfg => ServiceAccountMode(name, cfg.getString("service-account-id"), cfg.getString("pem-file"), GoogleScopes) + def serviceAccountAuth(authConfig: Config, name: String): ErrorOr[GoogleAuthMode] = validate { + (authConfig.getAs[String]("pem-file"), authConfig.getAs[String]("json-file")) match { + case (Some(pem), None) => ServiceAccountMode(name, PemFileFormat(authConfig.as[String]("service-account-id"), pem), GoogleScopes) + case (None, Some(json)) => ServiceAccountMode(name, JsonFileFormat(json), GoogleScopes) + case (None, None) => throw new ConfigException.Generic(s"""No credential configuration was found for service account "$name". See reference.conf under the google.auth, service-account section for supported credential formats.""") + case (Some(_), Some(_)) => throw new ConfigException.Generic(s"""Both a pem file and a json file were supplied for service account "$name" in the configuration file. Only one credential file can be supplied for the same service account. Please choose between the two.""") + } } - def userAccountAuth(authConfig: Config, name: String) = authConfig validateAny { - cfg => UserMode(name, cfg.getString("user"), cfg.getString("secrets-file"), cfg.getString("data-store-dir"), GoogleScopes) + def userAccountAuth(authConfig: Config, name: String): ErrorOr[GoogleAuthMode] = validate { + UserMode(name, authConfig.as[String]("user"), authConfig.as[String]("secrets-file"), authConfig.as[String]("data-store-dir"), GoogleScopes) } - def refreshTokenAuth(authConfig: Config, name: String) = authConfig validateAny { - cfg => RefreshTokenMode(name, cfg.getString("client-id"), cfg.getString("client-secret")) + def refreshTokenAuth(authConfig: Config, name: String): ErrorOr[GoogleAuthMode] = validate { + RefreshTokenMode(name, authConfig.as[String]("client-id"), authConfig.as[String]("client-secret"), GoogleScopes) } - def applicationDefaultAuth(name: String) = ApplicationDefaultMode(name, GoogleScopes).successNel[String] + def applicationDefaultAuth(name: String): ErrorOr[GoogleAuthMode] = ApplicationDefaultMode(name).validNel val name = authConfig.getString("name") val scheme = authConfig.getString("scheme") @@ -65,32 +102,32 @@ object GoogleConfiguration { case "user_account" => userAccountAuth(authConfig, name) case "refresh_token" => refreshTokenAuth(authConfig, name) case "application_default" => applicationDefaultAuth(name) - case wut => s"Unsupported authentication scheme: $wut".failureNel + case wut => s"Unsupported authentication scheme: $wut".invalidNel } } - val listOfErrorOrAuths: List[ErrorOr[GoogleAuthMode]] = googleConfig.getConfigList("auths").asScala.toList map buildAuth + val listOfErrorOrAuths: List[ErrorOr[GoogleAuthMode]] = googleConfig.as[List[Config]]("auths") map buildAuth val errorOrAuthList: ErrorOr[List[GoogleAuthMode]] = listOfErrorOrAuths.sequence[ErrorOr, GoogleAuthMode] def uniqueAuthNames(list: List[GoogleAuthMode]): ErrorOr[Unit] = { val duplicateAuthNames = list.groupBy(_.name) collect { case (n, as) if as.size > 1 => n } if (duplicateAuthNames.nonEmpty) { - ("Duplicate auth names: " + duplicateAuthNames.mkString(", ")).failureNel + ("Duplicate auth names: " + duplicateAuthNames.mkString(", ")).invalidNel } else { - ().successNel + ().validNel } } - (appName |@| errorOrAuthList) { (_, _) } flatMap { case (name, list) => + (appName |@| errorOrAuthList) map { (_, _) } flatMap { case (name, list) => uniqueAuthNames(list) map { _ => GoogleConfiguration(name, list map { a => a.name -> a } toMap) } } match { - case Success(r) => r - case Failure(f) => - val errorMessages = f.list.toList.mkString(", ") + case Valid(r) => r + case Invalid(f) => + val errorMessages = f.toList.mkString(", ") log.error(errorMessages) - throw new ConfigValidationException("Google", errorMessages) + throw new GoogleConfigurationException(f.toList) } } } diff --git a/filesystems/gcs/src/main/scala/cromwell/filesystems/gcs/NioGcsPath.scala b/filesystems/gcs/src/main/scala/cromwell/filesystems/gcs/NioGcsPath.scala deleted file mode 100644 index 672b64cf0..000000000 --- a/filesystems/gcs/src/main/scala/cromwell/filesystems/gcs/NioGcsPath.scala +++ /dev/null @@ -1,191 +0,0 @@ -package cromwell.filesystems.gcs - -import java.io.File -import java.net.URI -import java.nio.file.WatchEvent.{Kind, Modifier} -import java.nio.file._ -import java.util - -import scala.collection.JavaConverters._ -import scala.language.{implicitConversions, postfixOps} -import scala.util.Try - -object NioGcsPath { - def apply(path: String)(implicit gcsFileSystem: GcsFileSystem) = gcsFileSystem.getPath(path) - - implicit class PathEnhanced(val path: Path) extends AnyVal { - def asGcsPath(implicit gcsFileSystem: GcsFileSystem) = path match { - case gcsPath: NioGcsPath => gcsPath - case otherPath: Path => getSoftPath(otherPath.toString).asInstanceOf[NioGcsPath] - case _ => throw new IllegalArgumentException("Only GcsPaths are supported.") - } - } - - /** Allow instantiation of a relative gcs path. - * Relative GCS paths can only be created via NioGcsPath methods (eg: subpath, getName...) but not through the GcsFileSystem.getPath method - * in order to avoid floating paths without root. It also ensures that a relative local path cannot mistakenly be parsed as a GCS path. - * */ - private def getSoftPath(first: String, more: String*)(implicit gcsFs: GcsFileSystem): Path = Try(gcsFs.getPath(first, more: _*)) recover { - case e: NotAGcsPathException => new NioGcsPath(first.split(GcsFileSystem.Separator) ++ more.toArray[String], false, first.endsWith(GcsFileSystem.Separator))(gcsFs) - } get - - val Protocol = GcsFileSystem.Protocol -} - -/** - * NOTE: Currently called NioGcsPath so it can exist alongside the current GcsPath class. - * If this approach was to be validated the current GcsPath class would be replaced by this one. - * This class proposes an implementation of the java.nio.Path interface for GoogleCloudStorage. - * The following methods are yet to be implemented: - * relativize - * compareTo - * @param chunks array containing all parts of the path in between separators - except the protocol (gs://) - * eg: gs://path/to/resource.txt -> chunks = [path, to, resource.txt] - * @param absolute true if this path is to be considered absolute. - * Only absolute GCS paths can be used to actually locate resources. - * Calling methods on an absolute path can return a relative paths (eg subpath). - * @param gcsFileSystem the gcsFileSystem to be used when performing operations on this path - */ -class NioGcsPath(private val chunks: Array[String], absolute: Boolean, val isDirectory: Boolean)(implicit gcsFileSystem: GcsFileSystem) extends Path { - import NioGcsPath._ - - private val separator = GcsFileSystem.Separator - - private val objectChunks = chunks match { - case values if isAbsolute && values.nonEmpty => values.tail - case _ => chunks - } - - private val fullPath = chunksToString(chunks) - - lazy val bucket: String = chunks match { - case values if values.isEmpty && isAbsolute => throw new IllegalStateException("An absolute gcs path cannot be empty") - case _ => if(isAbsolute) chunks.head else { - throw new UnsupportedOperationException("Attached gcs filesystem has no root and is not Absolute. The corresponding bucket is unknown.") - } - } - - val objectName = chunksToString(objectChunks) - - private def chunksToString(chunksArray: Array[String]): String = chunksArray.mkString(separator) - - override def subpath(beginIndex: Int, endIndex: Int): Path = { - val directory = if (endIndex == chunks.length - 1) isDirectory else true - new NioGcsPath(chunks.slice(beginIndex, endIndex), isAbsolute && beginIndex == 0, directory) - } - - override def toFile: File = throw new UnsupportedOperationException("A GCS path cannot be converted to a File.") - - override def resolveSibling(other: Path): Path = { - val otherPath = other.asGcsPath - new NioGcsPath(getParent.asGcsPath.chunks ++ otherPath.chunks, isAbsolute, otherPath.isDirectory) - } - - override def resolveSibling(other: String): Path = { - val otherPath = getSoftPath(other).asGcsPath - new NioGcsPath(getParent.asGcsPath.chunks ++ getSoftPath(other).asGcsPath.chunks, isAbsolute, otherPath.isDirectory) - } - - override def getFileSystem: FileSystem = gcsFileSystem - - override def getName(index: Int): Path = { - val directory = if (index == chunks.length - 1) isDirectory else true - new NioGcsPath(Array(chunks(index)), isAbsolute && index == 0, directory) - } - - override def getParent: Path = chunks match { - case values if values.isEmpty || values.length == 1 => null - case values => new NioGcsPath(values.init, isAbsolute, true) - } - - override def toAbsolutePath: Path = if (isAbsolute) this else { - throw new UnsupportedOperationException(s"Attached gcs filesystem has no root. path $toString can't be converted to an absolute path.") - } - - override def relativize(other: Path): Path = other match { - case gcs: NioGcsPath => new NioGcsPath(gcs.chunks.diff(this.chunks), false, gcs.isDirectory) - case _ => throw new IllegalArgumentException(s"$other is not a GCS path.") - } - - override def getNameCount: Int = chunks.length - - override def toUri: URI = new URI(GcsFileSystem.Scheme, bucket, s"/$objectName", null) - - override def compareTo(other: Path): Int = throw new NotImplementedError() - - override def register(watcher: WatchService, events: Array[Kind[_]], modifiers: Modifier*): WatchKey = throw new UnsupportedOperationException() - - override def register(watcher: WatchService, events: Kind[_]*): WatchKey = throw new UnsupportedOperationException() - - override def getFileName: Path = chunks match { - case values if values.isEmpty => null - case _ => new NioGcsPath(Array(chunks.last), isAbsolute && chunks.length == 1, isDirectory) - } - - override def getRoot: Path = new NioGcsPath(Array(bucket), true, true) - - override def iterator(): util.Iterator[Path] = { - if (chunks.isEmpty) chunks.map(_.asInstanceOf[Path]).iterator.asJava else { - val init = chunks.init map { elt => new NioGcsPath(Array(elt), false, true).asInstanceOf[Path] } - val fullIterator = init :+ new NioGcsPath(Array(chunks.last), false, isDirectory).asInstanceOf[Path] - fullIterator.iterator.asJava - } - } - - override def normalize(): Path = if (isAbsolute) this else throw new UnsupportedOperationException("Cannot normalize a relative GCS path.") - - override def endsWith(other: Path): Boolean = { - other match { - case rel: NioGcsPath if !isAbsolute && rel.isAbsolute => false - case _: NioGcsPath => chunks.endsWith(other.asGcsPath.chunks) - case _ => false - } - } - - override def endsWith(other: String): Boolean = { - Try(getSoftPath(other)) map { - case rel: NioGcsPath if !isAbsolute && rel.isAbsolute => false - case path@(_: NioGcsPath) => chunks.endsWith(path.asGcsPath.chunks) - case _ => false - } getOrElse false - } - - override def resolve(other: Path): Path = { - if (other.isAbsolute) other - else { - val otherGcs = other.asGcsPath - new NioGcsPath(chunks ++ otherGcs.chunks, isAbsolute, otherGcs.isDirectory) - } - } - - override def resolve(other: String): Path = { - val otherPath = getSoftPath(other).asGcsPath - if (otherPath.isAbsolute) otherPath - else new NioGcsPath(chunks ++ otherPath.asGcsPath.chunks, isAbsolute, otherPath.isDirectory) - } - - override def toRealPath(options: LinkOption*): Path = this - - override def startsWith(other: Path): Boolean = { - other match { - case rel: NioGcsPath if !isAbsolute && rel.isAbsolute => false - case _: NioGcsPath => chunks.startsWith(other.asGcsPath.chunks) - case _ => false - } - } - - override def startsWith(other: String): Boolean = { - Try(getSoftPath(other)) map { - case rel: NioGcsPath if !isAbsolute && rel.isAbsolute => false - case path@(_: NioGcsPath) => chunks.startsWith(path.asGcsPath.chunks) - case _ => false - } getOrElse false - } - - override def toString: String = { - if (absolute) s"$Protocol$fullPath" - else fullPath - } - - override def isAbsolute: Boolean = absolute -} diff --git a/filesystems/gcs/src/main/scala/cromwell/filesystems/gcs/auth/GoogleAuthMode.scala b/filesystems/gcs/src/main/scala/cromwell/filesystems/gcs/auth/GoogleAuthMode.scala new file mode 100644 index 000000000..e9ba4eb8a --- /dev/null +++ b/filesystems/gcs/src/main/scala/cromwell/filesystems/gcs/auth/GoogleAuthMode.scala @@ -0,0 +1,185 @@ +package cromwell.filesystems.gcs.auth + +import java.io.FileNotFoundException + +import akka.actor.ActorSystem +import akka.http.scaladsl.model.StatusCodes +import better.files._ +import com.google.api.client.googleapis.javanet.GoogleNetHttpTransport +import com.google.api.client.http.HttpResponseException +import com.google.api.client.json.jackson2.JacksonFactory +import com.google.api.services.storage.StorageScopes +import com.google.auth.Credentials +import com.google.auth.http.HttpTransportFactory +import com.google.auth.oauth2.{GoogleCredentials, ServiceAccountCredentials, UserCredentials} +import com.google.cloud.NoCredentials +import cromwell.core.WorkflowOptions +import cromwell.core.retry.Retry +import cromwell.filesystems.gcs.auth.GoogleAuthMode._ +import cromwell.filesystems.gcs.auth.ServiceAccountMode.{CredentialFileFormat, JsonFileFormat, PemFileFormat} +import org.slf4j.LoggerFactory + +import scala.collection.JavaConverters._ +import scala.concurrent.{ExecutionContext, Future} +import scala.util.{Failure, Success, Try} + +object GoogleAuthMode { + + lazy val jsonFactory = JacksonFactory.getDefaultInstance + lazy val httpTransport = GoogleNetHttpTransport.newTrustedTransport + lazy val HttpTransportFactory = new HttpTransportFactory { + override def create() = { + httpTransport + } + } + + val RefreshTokenOptionKey = "refresh_token" + val GcsScopes = List( + StorageScopes.DEVSTORAGE_FULL_CONTROL, + StorageScopes.DEVSTORAGE_READ_WRITE + ).asJava + + def checkReadable(file: File) = { + if (!file.isReadable) throw new FileNotFoundException(s"File $file does not exist or is not readable") + } + + case object MockAuthMode extends GoogleAuthMode { + override def name = "no_auth" + override def credential(options: WorkflowOptions)(implicit as: ActorSystem, ec: ExecutionContext): Future[Credentials] = { + Future.successful(NoCredentials.getInstance()) + } + } + + def isFatal(ex: Throwable) = { + // We wrap the actual exception in a RuntimeException so get the cause + ex.getCause match { + case http: HttpResponseException => + http.getStatusCode == StatusCodes.Unauthorized.intValue || + http.getStatusCode == StatusCodes.Forbidden.intValue || + http.getStatusCode == StatusCodes.BadRequest.intValue + case _ => false + } + } +} + + +sealed trait GoogleAuthMode { + protected lazy val log = LoggerFactory.getLogger(getClass.getSimpleName) + + /** + * Validate the auth mode against provided options + */ + def validate(options: WorkflowOptions): Unit = {()} + + def name: String + // Create a Credential object from the google.api.client.auth library (https://github.com/google/google-api-java-client) + def credential(options: WorkflowOptions)(implicit as: ActorSystem, ec: ExecutionContext): Future[Credentials] + + def requiresAuthFile: Boolean = false + + protected def validateCredential(credential: Credentials) = { + Try(credential.refresh()) match { + case Failure(ex) => throw new RuntimeException(s"Google credentials are invalid: ${ex.getMessage}", ex) + case Success(_) => credential + } + } +} + +object ServiceAccountMode { + sealed trait CredentialFileFormat { + def file: String + } + case class PemFileFormat(accountId: String, file: String) extends CredentialFileFormat + case class JsonFileFormat(file: String) extends CredentialFileFormat +} + +final case class ServiceAccountMode(override val name: String, + fileFormat: CredentialFileFormat, + scopes: java.util.List[String]) extends GoogleAuthMode { + private val credentialsFile = File(fileFormat.file) + checkReadable(credentialsFile) + + private lazy val _credential: Credentials = { + val serviceAccount = fileFormat match { + case PemFileFormat(accountId, _) => + log.warn("The PEM file format will be deprecated in the upcoming Cromwell version. Please use JSON instead.") + ServiceAccountCredentials.fromPkcs8(accountId, accountId, credentialsFile.contentAsString, null, scopes) + case _: JsonFileFormat => ServiceAccountCredentials.fromStream(credentialsFile.newInputStream).createScoped(scopes) + } + + // Validate credentials synchronously here, without retry. + // It's very unlikely to fail as it should not happen more than a few times + // (one for the engine and for each backend using it) per Cromwell instance. + validateCredential(serviceAccount) + } + + override def credential(options: WorkflowOptions)(implicit as: ActorSystem, ec: ExecutionContext): Future[Credentials] = { + Future.successful(_credential) + } +} + +final case class UserMode(override val name: String, + user: String, + secretsPath: String, + datastoreDir: String, + scopes: java.util.List[String]) extends GoogleAuthMode { + + private lazy val secretsStream = { + val secretsFile = File(secretsPath) + checkReadable(secretsFile) + secretsFile.newInputStream + } + + private lazy val _credential: Credentials = { + validateCredential(UserCredentials.fromStream(secretsStream)) + } + + override def credential(options: WorkflowOptions)(implicit as: ActorSystem, ec: ExecutionContext): Future[Credentials] = Future.successful(_credential) +} + +private object ApplicationDefault { + private [auth] lazy val _Credential: Credentials = GoogleCredentials.getApplicationDefault +} + +final case class ApplicationDefaultMode(name: String) extends GoogleAuthMode { + override def credential(options: WorkflowOptions)(implicit as: ActorSystem, ec: ExecutionContext): Future[Credentials] = { + Future.successful(ApplicationDefault._Credential) + } +} + +final case class RefreshTokenMode(name: String, + clientId: String, + clientSecret: String, + scopes: java.util.List[String]) extends GoogleAuthMode with ClientSecrets { + import GoogleAuthMode._ + override def requiresAuthFile = true + + private def extractRefreshToken(options: WorkflowOptions): String = { + options.get(RefreshTokenOptionKey) getOrElse { + throw new IllegalArgumentException(s"Missing parameters in workflow options: $RefreshTokenOptionKey") + } + } + + override def validate(options: WorkflowOptions) = { + extractRefreshToken(options) + () + } + + override def credential(options: WorkflowOptions)(implicit as: ActorSystem, ec: ExecutionContext): Future[Credentials] = { + val refreshToken = extractRefreshToken(options) + Retry.withRetry( + () => Future(validateCredential( + new UserCredentials(clientId, clientSecret, refreshToken, null, GoogleAuthMode.HttpTransportFactory, null) + )), + isFatal = isFatal, + maxRetries = Option(3) + ) + } +} + +trait ClientSecrets { + val clientId: String + val clientSecret: String +} + +final case class SimpleClientSecrets(clientId: String, clientSecret: String) extends ClientSecrets diff --git a/filesystems/gcs/src/main/scala/cromwell/filesystems/gcs/batch/GcsBatchCommandBuilder.scala b/filesystems/gcs/src/main/scala/cromwell/filesystems/gcs/batch/GcsBatchCommandBuilder.scala new file mode 100644 index 000000000..ce09725fa --- /dev/null +++ b/filesystems/gcs/src/main/scala/cromwell/filesystems/gcs/batch/GcsBatchCommandBuilder.scala @@ -0,0 +1,32 @@ +package cromwell.filesystems.gcs.batch + +import cromwell.core.io._ +import cromwell.core.path.Path +import cromwell.filesystems.gcs.GcsPath + +trait GcsBatchCommandBuilder extends DefaultIoCommandBuilder { + override def sizeCommand(path: Path) = path match { + case gcsPath: GcsPath => GcsBatchSizeCommand(gcsPath) + case _ => super.sizeCommand(path) + } + + override def deleteCommand(path: Path, swallowIoExceptions: Boolean = false) = path match { + case gcsPath: GcsPath => GcsBatchDeleteCommand(gcsPath, swallowIoExceptions) + case _ => super.deleteCommand(path, swallowIoExceptions) + } + + override def copyCommand(src: Path, dest: Path, overwrite: Boolean = true) = (src, dest) match { + case (gcsSrc: GcsPath, gcsDest: GcsPath) => GcsBatchCopyCommand(gcsSrc, gcsDest, overwrite) + case _ => super.copyCommand(src, dest, overwrite) + } + + override def hashCommand(path: Path) = path match { + case gcsPath: GcsPath => GcsBatchCrc32Command(gcsPath) + case _ => super.hashCommand(path) + } + + override def touchCommand(path: Path) = path match { + case gcsPath: GcsPath => GcsBatchTouchCommand(gcsPath) + case _ => super.touchCommand(path) + } +} diff --git a/filesystems/gcs/src/main/scala/cromwell/filesystems/gcs/batch/GcsBatchIoCommand.scala b/filesystems/gcs/src/main/scala/cromwell/filesystems/gcs/batch/GcsBatchIoCommand.scala new file mode 100644 index 000000000..02c91c169 --- /dev/null +++ b/filesystems/gcs/src/main/scala/cromwell/filesystems/gcs/batch/GcsBatchIoCommand.scala @@ -0,0 +1,96 @@ +package cromwell.filesystems.gcs.batch + +import com.google.api.client.http.HttpHeaders +import com.google.api.services.storage.StorageRequest +import com.google.api.services.storage.model.{RewriteResponse, StorageObject} +import cromwell.core.io._ +import cromwell.filesystems.gcs._ + +/** + * Io commands with GCS paths and some logic enabling batching of request. + * @tparam T Return type of the IoCommand + * @tparam U Return type of the Google response + */ +sealed trait GcsBatchIoCommand[T, U] extends IoCommand[T] { + /** + * StorageRequest operation to be executed by this command + */ + def operation: StorageRequest[U] + + /** + * Maps the google response of type U to the Cromwell Io response of type T + */ + protected def mapGoogleResponse(response: U): T + + /** + * Method called in the success callback of a batched request to decide what to do next. + * Returns an Either[T, GcsBatchIoCommand[T, U]] + * Left(value) means the command is complete, and the result can be sent back to the sender. + * Right(newCommand) means the command is not complete and needs another request to be executed. + * Most commands will reply with Left(value). + */ + def onSuccess(response: U, httpHeaders: HttpHeaders): Either[T, GcsBatchIoCommand[T, U]] = { + Left(mapGoogleResponse(response)) + } +} + +case class GcsBatchCopyCommand( + override val source: GcsPath, + override val destination: GcsPath, + override val overwrite: Boolean, + rewriteToken: Option[String] = None + ) extends IoCopyCommand(source, destination, overwrite) with GcsBatchIoCommand[Unit, RewriteResponse] { + val sourceBlob = source.blob + val destinationBlob = destination.blob + + override def operation: StorageRequest[RewriteResponse] = { + val rewriteOperation = source.apiStorage.objects().rewrite(sourceBlob.getBucket, sourceBlob.getName, destinationBlob.getBucket, destinationBlob.getName, null) + // Set the rewrite token if present + rewriteToken foreach rewriteOperation.setRewriteToken + rewriteOperation + } + + /** + * Clone this command with the give rewrite token + */ + def withRewriteToken(rewriteToken: String) = copy(rewriteToken = Option(rewriteToken)) + + override def onSuccess(response: RewriteResponse, httpHeaders: HttpHeaders) = { + if (response.getDone) super.onSuccess(response, httpHeaders) + else { + Right(withRewriteToken(response.getRewriteToken)) + } + } + + override def mapGoogleResponse(response: RewriteResponse): Unit = () +} + +case class GcsBatchDeleteCommand( + override val file: GcsPath, + override val swallowIOExceptions: Boolean + ) extends IoDeleteCommand(file, swallowIOExceptions) with GcsBatchIoCommand[Unit, Void] { + private val blob = file.blob + def operation = file.apiStorage.objects().delete(blob.getBucket, blob.getName) + override protected def mapGoogleResponse(response: Void): Unit = () +} + +/** + * Base trait for commands that use the objects.get() operation. (e.g: size, crc32, ...) + */ +sealed trait GcsBatchGetCommand[T] extends GcsBatchIoCommand[T, StorageObject] { + def file: GcsPath + private val blob = file.blob + override def operation: StorageRequest[StorageObject] = file.apiStorage.objects().get(blob.getBucket, blob.getName) +} + +case class GcsBatchSizeCommand(override val file: GcsPath) extends IoSizeCommand(file) with GcsBatchGetCommand[Long] { + override def mapGoogleResponse(response: StorageObject): Long = response.getSize.longValue() +} + +case class GcsBatchCrc32Command(override val file: GcsPath) extends IoHashCommand(file) with GcsBatchGetCommand[String] { + override def mapGoogleResponse(response: StorageObject): String = response.getCrc32c +} + +case class GcsBatchTouchCommand(override val file: GcsPath) extends IoTouchCommand(file) with GcsBatchGetCommand[Unit] { + override def mapGoogleResponse(response: StorageObject): Unit = () +} diff --git a/filesystems/gcs/src/main/scala/cromwell/filesystems/gcs/package.scala b/filesystems/gcs/src/main/scala/cromwell/filesystems/gcs/package.scala deleted file mode 100644 index 19140a069..000000000 --- a/filesystems/gcs/src/main/scala/cromwell/filesystems/gcs/package.scala +++ /dev/null @@ -1,8 +0,0 @@ -package cromwell.filesystems - -import scalaz.ValidationNel - -package object gcs { - type ErrorOr[+A] = ValidationNel[String, A] - type RefreshToken = String -} diff --git a/filesystems/gcs/src/test/scala/cromwell/filesystems/gcs/GcsIntegrationTest.scala b/filesystems/gcs/src/test/scala/cromwell/filesystems/gcs/GcsIntegrationTest.scala deleted file mode 100644 index 3c6a28734..000000000 --- a/filesystems/gcs/src/test/scala/cromwell/filesystems/gcs/GcsIntegrationTest.scala +++ /dev/null @@ -1,5 +0,0 @@ -package cromwell.filesystems.gcs - -import org.scalatest.Tag - -object GcsIntegrationTest extends Tag("GcsIntegrationTest") diff --git a/filesystems/gcs/src/test/scala/cromwell/filesystems/gcs/GcsPathBuilderSpec.scala b/filesystems/gcs/src/test/scala/cromwell/filesystems/gcs/GcsPathBuilderSpec.scala new file mode 100644 index 000000000..fbfbf10ef --- /dev/null +++ b/filesystems/gcs/src/test/scala/cromwell/filesystems/gcs/GcsPathBuilderSpec.scala @@ -0,0 +1,369 @@ +package cromwell.filesystems.gcs + +import com.google.cloud.NoCredentials +import com.google.cloud.storage.contrib.nio.CloudStorageConfiguration +import cromwell.core.path._ +import cromwell.core.{TestKitSuite, WorkflowOptions} +import cromwell.filesystems.gcs.auth.GoogleAuthModeSpec +import org.scalatest.prop.Tables.Table +import org.scalatest.{FlatSpecLike, Matchers} + +class GcsPathBuilderSpec extends TestKitSuite with FlatSpecLike with Matchers with PathBuilderSpecUtils { + + behavior of "GcsPathBuilder" + + it should "use google project credentials when provided in the workflow options" in { + GoogleAuthModeSpec.assumeHasApplicationDefaultCredentials() + + val wfOptionsWithProject = WorkflowOptions.fromMap(Map("google_project" -> "my_project")).get + + val gcsPathBuilderWithProjectInfo = GcsPathBuilder.fromCredentials( + NoCredentials.getInstance(), + "cromwell-test", + None, + CloudStorageConfiguration.DEFAULT, + wfOptionsWithProject + ) + + gcsPathBuilderWithProjectInfo.projectId shouldBe "my_project" + } + + it should behave like truncateCommonRoots(pathBuilder, pathsToTruncate) + + goodPaths foreach { goodPath => + it should behave like buildGoodPath(pathBuilder, goodPath) + } + + badPaths foreach { badPath => + it should behave like buildBadPath(pathBuilder, badPath) + } + + private def pathsToTruncate = Table( + ("context", "file", "relative"), + ("gs://bucket", "gs://bucket/path/to/file", "path/to/file"), + ("gs://bucket/path/to/my/dir", "gs://bucket/path/to/my/dir/file", "file"), + ("gs://bucket/path/to/my/dir", "gs://bucket/path/to/my/dir//file", "file"), + // NOTE: Next two are different from the DefaultPathBuilder. "//" doesn't build to "/" in the GcsPathBuilder + ("gs://bucket/path/to/my//dir", "gs://bucket/path/to/my/dir/file", "dir/file"), + ("gs://bucket/path/to/my//dir", "gs://bucket/path/to/my/dir//file", "dir//file"), + ("gs://bucket/path/to/my/dir", "gs://bucket/path/./to/my/dir/file", "./to/my/dir/file"), + ("gs://bucket/path/to/my/dir/with/file", "gs://bucket/path/to/other/dir/with/file", "other/dir/with/file") + ) + + private def bucket = "mymadeupbucket" + + private def goodPaths = Seq( + GoodPath( + description = "a path with spaces", + path = s"gs://$bucket/hello/world/with spaces", + normalize = false, + pathAsString = s"gs://$bucket/hello/world/with spaces", + pathWithoutScheme = s"$bucket/hello/world/with spaces", + parent = s"gs://$bucket/hello/world/", + getParent = s"gs://$bucket/hello/world/", + root = s"gs://$bucket/", + name = "with spaces", + getFileName = s"gs://$bucket/with spaces", + toUriHost = bucket, + toUriPath = "/hello/world/with%20spaces", + toUriStartsWith = s"gs://$bucket/hello/world/with%20spaces", + toUriEndsWith = s"gs://$bucket/hello/world/with%20spaces", + getNameCount = 3, + isAbsolute = true, + isDirectory = false), + + GoodPath( + description = "a path with non-ascii", + path = s"gs://$bucket/hello/world/with non ascii £€", + normalize = false, + pathAsString = s"gs://$bucket/hello/world/with non ascii £€", + pathWithoutScheme = s"$bucket/hello/world/with non ascii £€", + parent = s"gs://$bucket/hello/world/", + getParent = s"gs://$bucket/hello/world/", + root = s"gs://$bucket/", + name = "with non ascii £€", + getFileName = s"gs://$bucket/with non ascii £€", + toUriHost = bucket, + toUriPath = "/hello/world/with%20non%20ascii%20£€", + toUriStartsWith = s"gs://$bucket/hello/world/with%20non%20ascii%20£€", + toUriEndsWith = s"gs://$bucket/hello/world/with%20non%20ascii%20£€", + getNameCount = 3, + isAbsolute = true, + isDirectory = false), + + GoodPath( + description = "a gs uri path with encoded characters", + path = s"gs://$bucket/hello/world/encoded%20spaces", + normalize = false, + pathAsString = s"gs://$bucket/hello/world/encoded%20spaces", + pathWithoutScheme = s"$bucket/hello/world/encoded%20spaces", + parent = s"gs://$bucket/hello/world/", + getParent = s"gs://$bucket/hello/world/", + root = s"gs://$bucket/", + name = "encoded%20spaces", + getFileName = s"gs://$bucket/encoded%20spaces", + toUriHost = bucket, + toUriPath = "/hello/world/encoded%2520spaces", + toUriStartsWith = s"gs://$bucket/hello/world/encoded%2520spaces", + toUriEndsWith = s"gs://$bucket/hello/world/encoded%2520spaces", + getNameCount = 3, + isAbsolute = true, + isDirectory = false), + + GoodPath( + description = "a bucket only path", + path = s"gs://$bucket", + normalize = false, + pathAsString = s"gs://$bucket/", + pathWithoutScheme = s"$bucket/", + parent = null, + getParent = null, + root = s"gs://$bucket/", + name = "", + getFileName = s"gs://$bucket/", + toUriHost = bucket, + toUriPath = "/", + toUriStartsWith = s"gs://$bucket/", + toUriEndsWith = s"gs://$bucket/", + getNameCount = 1, + isAbsolute = false, + isDirectory = true), + + GoodPath( + description = "a bucket only path ending in a /", + path = s"gs://$bucket/", + normalize = false, + pathAsString = s"gs://$bucket/", + pathWithoutScheme = s"$bucket/", + parent = null, + getParent = null, + root = s"gs://$bucket/", + name = "", + getFileName = null, + toUriHost = bucket, + toUriPath = "/", + toUriStartsWith = s"gs://$bucket/", + toUriEndsWith = s"gs://$bucket/", + getNameCount = 0, + isAbsolute = true, + isDirectory = true), + + GoodPath( + description = "a file at the top of the bucket", + path = s"gs://$bucket/hello", + normalize = false, + pathAsString = s"gs://$bucket/hello", + pathWithoutScheme = s"$bucket/hello", + parent = s"gs://$bucket/", + getParent = s"gs://$bucket/", + root = s"gs://$bucket/", + name = "hello", + getFileName = s"gs://$bucket/hello", + toUriHost = bucket, + toUriPath = "/hello", + toUriStartsWith = s"gs://$bucket/hello", + toUriEndsWith = s"gs://$bucket/hello", + getNameCount = 1, + isAbsolute = true, + isDirectory = false), + + GoodPath( + description = "a path ending in /", + path = s"gs://$bucket/hello/world/", + normalize = false, + pathAsString = s"gs://$bucket/hello/world/", + pathWithoutScheme = s"$bucket/hello/world/", + parent = s"gs://$bucket/hello/", + getParent = s"gs://$bucket/hello/", + root = s"gs://$bucket/", + name = "world", + getFileName = s"gs://$bucket/world", + toUriHost = bucket, + toUriPath = "/hello/world/", + toUriStartsWith = s"gs://$bucket/hello/world/", + toUriEndsWith = s"gs://$bucket/hello/world/", + getNameCount = 2, + isAbsolute = true, + isDirectory = true), + + // Special paths + + GoodPath( + description = "a bucket with a path .", + path = s"gs://$bucket/.", + normalize = false, + pathAsString = s"gs://$bucket/.", + pathWithoutScheme = s"$bucket/.", + parent = null, + getParent = s"gs://$bucket/", + root = s"gs://$bucket/", + name = "", + getFileName = s"gs://$bucket/.", + toUriHost = bucket, + toUriPath = "/.", + toUriStartsWith = s"gs://$bucket/.", + toUriEndsWith = s"gs://$bucket/.", + getNameCount = 1, + isAbsolute = true, + isDirectory = true), + + GoodPath( + description = "a bucket with a path ..", + path = s"gs://$bucket/..", + normalize = false, + pathAsString = s"gs://$bucket/..", + pathWithoutScheme = s"$bucket/..", + parent = null, + getParent = s"gs://$bucket/", + root = null, + name = "", + getFileName = s"gs://$bucket/..", + toUriHost = bucket, + toUriPath = "/..", + toUriStartsWith = s"gs://$bucket/..", + toUriEndsWith = s"gs://$bucket/..", + getNameCount = 1, + isAbsolute = true, + isDirectory = true), + + GoodPath( + description = "a bucket including . in the path", + path = s"gs://$bucket/hello/./world", + normalize = false, + pathAsString = s"gs://$bucket/hello/./world", + pathWithoutScheme = s"$bucket/hello/./world", + parent = s"gs://$bucket/hello/", + getParent = s"gs://$bucket/hello/./", + root = s"gs://$bucket/", + name = "world", + getFileName = s"gs://$bucket/world", + toUriHost = bucket, + toUriPath = "/hello/./world", + toUriStartsWith = s"gs://$bucket/hello/./world", + toUriEndsWith = s"gs://$bucket/hello/./world", + getNameCount = 3, + isAbsolute = true, + isDirectory = false), + + GoodPath( + description = "a bucket including .. in the path", + path = s"gs://$bucket/hello/../world", + normalize = false, + pathAsString = s"gs://$bucket/hello/../world", + pathWithoutScheme = s"$bucket/hello/../world", + parent = s"gs://$bucket/", + getParent = s"gs://$bucket/hello/../", + root = s"gs://$bucket/", + name = "world", + getFileName = s"gs://$bucket/world", + toUriHost = bucket, + toUriPath = "/hello/../world", + toUriStartsWith = s"gs://$bucket/hello/../world", + toUriEndsWith = s"gs://$bucket/hello/../world", + getNameCount = 3, + isAbsolute = true, + isDirectory = false), + + // Normalized + + GoodPath( + description = "a bucket with a normalized path .", + path = s"gs://$bucket/.", + normalize = true, + pathAsString = s"gs://$bucket/", + pathWithoutScheme = s"$bucket/", + parent = null, + getParent = null, + root = s"gs://$bucket/", + name = "", + getFileName = null, + toUriHost = bucket, + toUriPath = "/", + toUriStartsWith = s"gs://$bucket/", + toUriEndsWith = s"gs://$bucket/", + getNameCount = 0, + isAbsolute = true, + isDirectory = true), + + GoodPath( + description = "a bucket with a normalized path ..", + path = s"gs://$bucket/..", + normalize = true, + pathAsString = s"gs://$bucket/", + pathWithoutScheme = s"$bucket/", + parent = null, + getParent = null, + root = s"gs://$bucket/", + name = "", + getFileName = s"gs://$bucket/", + toUriHost = bucket, + toUriPath = "/", + toUriStartsWith = s"gs://$bucket/", + toUriEndsWith = s"gs://$bucket/", + getNameCount = 1, + isAbsolute = false, + isDirectory = true), + + GoodPath( + description = "a bucket including . in the normalized path", + path = s"gs://$bucket/hello/./world", + normalize = true, + pathAsString = s"gs://$bucket/hello/world", + pathWithoutScheme = s"$bucket/hello/world", + parent = s"gs://$bucket/hello/", + getParent = s"gs://$bucket/hello/", + root = s"gs://$bucket/", + name = "world", + getFileName = s"gs://$bucket/world", + toUriHost = bucket, + toUriPath = "/hello/world", + toUriStartsWith = s"gs://$bucket/hello/world", + toUriEndsWith = s"gs://$bucket/hello/world", + getNameCount = 2, + isAbsolute = true, + isDirectory = false), + + GoodPath( + description = "a bucket including .. in the normalized path", + path = s"gs://$bucket/hello/../world", + normalize = true, + pathAsString = s"gs://$bucket/world", + pathWithoutScheme = s"$bucket/world", + parent = s"gs://$bucket/", + getParent = s"gs://$bucket/", + root = s"gs://$bucket/", + name = "world", + getFileName = s"gs://$bucket/world", + toUriHost = bucket, + toUriPath = "/world", + toUriStartsWith = s"gs://$bucket/world", + toUriEndsWith = s"gs://$bucket/world", + getNameCount = 1, + isAbsolute = true, + isDirectory = false) + ) + + private def badPaths = Seq( + BadPath("an empty path", "", " does not have a gcs scheme"), + BadPath("an bucketless path", "gs://", "Expected authority at index 5: gs://"), + BadPath("a bucket named .", "gs://./hello/world", "gs://./hello/world does not have a host"), + BadPath("a non ascii bucket name", "gs://nonasciibucket£€/hello/world", + "gs://nonasciibucket%C2%A3%E2%82%AC/hello/world does not have a host"), + BadPath("a https path", "https://hello/world", "Cloud Storage URIs must have 'gs' scheme: https://hello/world"), + BadPath("a file uri path", "file:///hello/world", "Cloud Storage URIs must have 'gs' scheme: file:///hello/world"), + BadPath("a relative file path", "hello/world", "hello/world does not have a gcs scheme"), + BadPath("an absolute file path", "/hello/world", "/hello/world does not have a gcs scheme") + ) + + private lazy val pathBuilder = { + GoogleAuthModeSpec.assumeHasApplicationDefaultCredentials() + + GcsPathBuilder.fromCredentials( + NoCredentials.getInstance(), + "cromwell-test", + None, + CloudStorageConfiguration.DEFAULT, + WorkflowOptions.empty + ) + } +} diff --git a/filesystems/gcs/src/test/scala/cromwell/filesystems/gcs/GoogleConfigurationSpec.scala b/filesystems/gcs/src/test/scala/cromwell/filesystems/gcs/GoogleConfigurationSpec.scala index b1d44feaa..ab868fbcc 100644 --- a/filesystems/gcs/src/test/scala/cromwell/filesystems/gcs/GoogleConfigurationSpec.scala +++ b/filesystems/gcs/src/test/scala/cromwell/filesystems/gcs/GoogleConfigurationSpec.scala @@ -1,18 +1,23 @@ package cromwell.filesystems.gcs +import better.files.File import com.typesafe.config.{ConfigException, ConfigFactory} -import lenthall.config.ConfigValidationException +import cromwell.filesystems.gcs.GoogleConfiguration.GoogleConfigurationException +import cromwell.filesystems.gcs.auth.ServiceAccountMode.{JsonFileFormat, PemFileFormat} +import cromwell.filesystems.gcs.auth.{ApplicationDefaultMode, RefreshTokenMode, ServiceAccountMode, UserMode} import org.scalatest.{FlatSpec, Matchers} -import scala.language.postfixOps class GoogleConfigurationSpec extends FlatSpec with Matchers { behavior of "GoogleConfiguration" it should "parse all manner of well-formed auths" in { + val pemMockFile = File.newTemporaryFile() + val jsonMockFile = File.newTemporaryFile() + val righteousGoogleConfig = - """ + s""" |google { | application-name = "cromwell" | @@ -31,14 +36,19 @@ class GoogleConfigurationSpec extends FlatSpec with Matchers { | name = "name-user" | scheme = "user_account" | user = "me" - | secrets-file = "/very/secret/file.txt" + | secrets-file = "${pemMockFile.pathAsString}" | data-store-dir = "/where/the/data/at" | }, | { - | name = "name-service" + | name = "name-pem-service" | scheme = "service_account" | service-account-id = "my-google-account" - | pem-file = "/yonder/file.pem" + | pem-file = "${pemMockFile.pathAsString}" + | }, + | { + | name = "name-json-service" + | scheme = "service_account" + | json-file = "${jsonMockFile.pathAsString}" | } | ] |} @@ -48,7 +58,7 @@ class GoogleConfigurationSpec extends FlatSpec with Matchers { val gconf = GoogleConfiguration(ConfigFactory.parseString(righteousGoogleConfig)) gconf.applicationName shouldBe "cromwell" - gconf.authsByName should have size 4 + gconf.authsByName should have size 5 val auths = gconf.authsByName.values @@ -62,13 +72,21 @@ class GoogleConfigurationSpec extends FlatSpec with Matchers { val user = (auths collectFirst { case a: UserMode => a }).get user.name shouldBe "name-user" - user.secretsFile shouldBe "/very/secret/file.txt" + user.secretsPath shouldBe pemMockFile.pathAsString user.datastoreDir shouldBe "/where/the/data/at" - val service = (auths collectFirst { case a: ServiceAccountMode => a }).get - service.name shouldBe "name-service" - service.accountId shouldBe "my-google-account" - service.pemPath shouldBe "/yonder/file.pem" + val servicePem = (auths collectFirst { case a: ServiceAccountMode if a.name == "name-pem-service" => a }).get + servicePem.name shouldBe "name-pem-service" + servicePem.fileFormat.asInstanceOf[PemFileFormat].accountId shouldBe "my-google-account" + servicePem.fileFormat.file shouldBe pemMockFile.pathAsString + + val serviceJson = (auths collectFirst { case a: ServiceAccountMode if a.name == "name-json-service" => a }).get + serviceJson.name shouldBe "name-json-service" + serviceJson.fileFormat.isInstanceOf[JsonFileFormat] shouldBe true + serviceJson.fileFormat.file shouldBe jsonMockFile.pathAsString + + pemMockFile.delete(true) + jsonMockFile.delete(true) } @@ -85,12 +103,52 @@ class GoogleConfigurationSpec extends FlatSpec with Matchers { |} """.stripMargin - a[ConfigValidationException] shouldBe thrownBy { + a[GoogleConfigurationException] shouldBe thrownBy { GoogleConfiguration(ConfigFactory.parseString(applessGoogleConfig)) } } it should "not parse a configuration stanza with wrong cromwell auth" in { + val doubleServiceAccountCredentials = + """ + |google { + | application-name = "cromwell" + | + | auths = [ + | { + | name = "service-account" + | scheme = "service-account" + | service-account-id = "my-google-account" + | pem-file = "path/to/file.pem" + | json-file = "path/to/json.pem" + | } + | ] + |} + """.stripMargin + + a[GoogleConfigurationException] shouldBe thrownBy { + GoogleConfiguration(ConfigFactory.parseString(doubleServiceAccountCredentials)) + } + + val noServiceAccountCredentials = + """ + |google { + | application-name = "cromwell" + | + | auths = [ + | { + | name = "service-account" + | scheme = "service-account" + | service-account-id = "my-google-account" + | } + | ] + |} + """.stripMargin + + a[GoogleConfigurationException] shouldBe thrownBy { + GoogleConfiguration(ConfigFactory.parseString(noServiceAccountCredentials)) + } + val unsupported = """ |google { @@ -105,7 +163,7 @@ class GoogleConfigurationSpec extends FlatSpec with Matchers { |} """.stripMargin - a[ConfigValidationException] shouldBe thrownBy { + a[GoogleConfigurationException] shouldBe thrownBy { GoogleConfiguration(ConfigFactory.parseString(unsupported)) } @@ -162,7 +220,7 @@ class GoogleConfigurationSpec extends FlatSpec with Matchers { |} """.stripMargin - a[ConfigException.Missing] shouldBe thrownBy { + a[GoogleConfigurationException] shouldBe thrownBy { GoogleConfiguration(ConfigFactory.parseString(badKeyInRefreshTokenMode)) } @@ -183,7 +241,7 @@ class GoogleConfigurationSpec extends FlatSpec with Matchers { |} """.stripMargin - a[ConfigException.Missing] shouldBe thrownBy { + a[GoogleConfigurationException] shouldBe thrownBy { GoogleConfiguration(ConfigFactory.parseString(badKeyInUserMode)) } @@ -203,7 +261,7 @@ class GoogleConfigurationSpec extends FlatSpec with Matchers { |} """.stripMargin - a[ConfigException.Missing] shouldBe thrownBy { + a[GoogleConfigurationException] shouldBe thrownBy { GoogleConfiguration(ConfigFactory.parseString(badKeyInServiceAccountMode)) } } diff --git a/filesystems/gcs/src/test/scala/cromwell/filesystems/gcs/GoogleCredentialFactorySpec.scala b/filesystems/gcs/src/test/scala/cromwell/filesystems/gcs/GoogleCredentialFactorySpec.scala deleted file mode 100644 index 541dc75eb..000000000 --- a/filesystems/gcs/src/test/scala/cromwell/filesystems/gcs/GoogleCredentialFactorySpec.scala +++ /dev/null @@ -1,158 +0,0 @@ -package cromwell.filesystems.gcs - -import java.nio.file.Paths - -import com.google.api.client.auth.oauth2.Credential -import com.google.api.client.googleapis.auth.oauth2.GoogleCredential -import com.google.api.client.googleapis.javanet.GoogleNetHttpTransport -import com.google.api.client.json.jackson2.JacksonFactory -import com.typesafe.config.ConfigFactory -import cromwell.filesystems.gcs.GoogleAuthMode.EnhancedCredentials -import org.scalatest.{FlatSpec, Matchers} - -import scala.util.Try - -class GoogleCredentialFactorySpec extends FlatSpec with Matchers { - import GoogleCredentialFactorySpec._ - - behavior of "GoogleCredentialFactory" - - it should "refresh a token using user credentials" taggedAs GcsIntegrationTest in { - val credential = UserMode( - name = "user", - user = secretConf("user"), - secretsFile = secretConf("secrets-file"), - datastoreDir = secretConf("data-store-dir")).credential(emptyOptions) - - val firstCredentialTry: Try[Credential] = credential.freshCredential - assert(firstCredentialTry.isSuccess) - val firstCredential = firstCredentialTry.get - firstCredential.getAccessToken shouldNot be(empty) - - firstCredential.setExpiresInSeconds(59L) - - val secondCredentialTry: Try[Credential] = firstCredential.freshCredential - assert(secondCredentialTry.isSuccess) - - val secondCredential = secondCredentialTry.get - secondCredential.getAccessToken shouldNot be(empty) - secondCredential.getExpiresInSeconds shouldNot be(null) - secondCredential.getExpiresInSeconds.longValue should be > 60L - } - - it should "refresh a token using a service account" taggedAs GcsIntegrationTest in { - val credential = ServiceAccountMode( - name = "service", - accountId = secretConf("service-account-id"), - pemPath = secretConf("pem-file")).credential(emptyOptions) - - val firstCredentialTry: Try[Credential] = credential.freshCredential - assert(firstCredentialTry.isSuccess) - val firstCredential = firstCredentialTry.get - firstCredential.getAccessToken shouldNot be(empty) - - firstCredential.setExpiresInSeconds(59L) - - val secondCredentialTry: Try[Credential] = firstCredential.freshCredential - assert(secondCredentialTry.isSuccess) - - val secondCredential = secondCredentialTry.get - secondCredential.getAccessToken shouldNot be(empty) - secondCredential.getExpiresInSeconds shouldNot be(null) - secondCredential.getExpiresInSeconds.longValue should be > 60L - } - - it should "refresh a token using a refresh token" taggedAs GcsIntegrationTest in { - val opts = GoogleOptionsMap(Map("refresh_token" -> secretConf("refresh_token"))) - - val credential = RefreshTokenMode(name = "refresh", - clientId = secretConf("client-id"), - clientSecret = secretConf("client-secret")).credential(opts) - - val firstUserCredentialsTry = credential.freshCredential - - assert(firstUserCredentialsTry.isSuccess) - val firstUserCredentials = firstUserCredentialsTry.get - - val firstRefreshedUserCredentialsTry: Try[Credential] = firstUserCredentials.freshCredential - assert(firstRefreshedUserCredentialsTry.isSuccess) - val firstRefreshedUserCredentials = firstRefreshedUserCredentialsTry.get - firstRefreshedUserCredentials.getAccessToken shouldNot be(empty) - - firstRefreshedUserCredentials.setExpiresInSeconds(59L) - - val secondRefreshedUserCredentialsTry: Try[Credential] = firstRefreshedUserCredentials.freshCredential - assert(secondRefreshedUserCredentialsTry.isSuccess) - - val secondRefreshedUserCredentials = secondRefreshedUserCredentialsTry.get - secondRefreshedUserCredentials.getAccessToken shouldNot be(empty) - secondRefreshedUserCredentials.getExpiresInSeconds shouldNot be(null) - secondRefreshedUserCredentials.getExpiresInSeconds.longValue should be > 60L - } - - it should "not refresh an empty token" in { - - val wrongCredentials = new GoogleCredential.Builder() - .setTransport(GoogleNetHttpTransport.newTrustedTransport) - .setJsonFactory(JacksonFactory.getDefaultInstance) - .setClientSecrets("fakeId", "fakeSecret") - .build() - - val exception = wrongCredentials.freshCredential.failed.get - - exception.getMessage should be("Unable to refresh token") - } - - it should "refresh a token using application default credentials" taggedAs GcsIntegrationTest in { - val credential = applicationDefaultCredential - - val firstCredentialTry: Try[Credential] = credential.freshCredential - assert(firstCredentialTry.isSuccess) - val firstCredential = firstCredentialTry.get - firstCredential.getAccessToken shouldNot be(empty) - - firstCredential.setExpiresInSeconds(59L) - - val secondCredentialTry: Try[Credential] = firstCredential.freshCredential - assert(secondCredentialTry.isSuccess) - - val secondCredential = secondCredentialTry.get - secondCredential.getAccessToken shouldNot be(empty) - secondCredential.getExpiresInSeconds shouldNot be(null) - secondCredential.getExpiresInSeconds.longValue should be > 60L - } -} - -object GoogleCredentialFactorySpec { - /* - - To run this integration spec, your cromwell-credentials.conf file should have the following keys for the listed tests: - - // For testing UserMode - user = "" - secrets-file = "" - data-store-dir = "" - - // For testing ServiceAccountMode - service-account-id = "" - pem-file = "" - - // For testing RefreshTokenMode - client-id = "" - client-secret = "" - refresh_token = "" - - */ - - private lazy val credentialsConfig = ConfigFactory.parseFile(Paths.get("cromwell-credentials.conf").toFile) - - private def secretConf(path: String) = credentialsConfig.getString(path) - - private val emptyOptions = GoogleOptionsMap(Map.empty) - - def applicationDefaultCredential = ApplicationDefaultMode(name = "default").credential(emptyOptions) -} - -case class GoogleOptionsMap(map: Map[String, String]) extends GoogleAuthMode.GoogleAuthOptions { - override def get(key: String): Try[String] = Try { map(key) } -} diff --git a/filesystems/gcs/src/test/scala/cromwell/filesystems/gcs/MockGcsFileSystemBuilder.scala b/filesystems/gcs/src/test/scala/cromwell/filesystems/gcs/MockGcsFileSystemBuilder.scala deleted file mode 100644 index af569d7f0..000000000 --- a/filesystems/gcs/src/test/scala/cromwell/filesystems/gcs/MockGcsFileSystemBuilder.scala +++ /dev/null @@ -1,9 +0,0 @@ -package cromwell.filesystems.gcs - -import scala.util.Failure - -object MockGcsFileSystemBuilder { - val mockGcsFileSystem = new GcsFileSystemProvider( - Failure(new Exception("No Storage object available")), - scala.concurrent.ExecutionContext.global).defaultFileSystem -} diff --git a/filesystems/gcs/src/test/scala/cromwell/filesystems/gcs/NioGcsPathSpec.scala b/filesystems/gcs/src/test/scala/cromwell/filesystems/gcs/NioGcsPathSpec.scala deleted file mode 100644 index ccb35efa6..000000000 --- a/filesystems/gcs/src/test/scala/cromwell/filesystems/gcs/NioGcsPathSpec.scala +++ /dev/null @@ -1,291 +0,0 @@ -package cromwell.filesystems.gcs - -import java.nio.file.Path - -import org.scalatest.mockito.MockitoSugar -import org.scalatest.prop.TableDrivenPropertyChecks._ -import org.scalatest.prop.Tables.Table -import org.scalatest.{FlatSpec, Matchers} - -class NioGcsPathSpec extends FlatSpec with Matchers with MockitoSugar { - - behavior of "NioGcsPath" - - implicit val GCSFs = MockGcsFileSystemBuilder.mockGcsFileSystem - - it should "implement toString" in { - val absPath1 = new NioGcsPath(Array("absolute", "path", "to", "somewhere"), true, true) - val relPath1 = new NioGcsPath(Array("some", "relative", "path"), false, true) - - absPath1.toString shouldBe "gs://absolute/path/to/somewhere" - relPath1.toString shouldBe "some/relative/path" - } - - it should "implement subpath" in { - val absPath1 = new NioGcsPath(Array("absolute", "path", "to", "somewhere"), true, true) - val relPath1 = new NioGcsPath(Array("some", "relative", "path"), false, true) - - val absSub1 = absPath1.subpath(0, 2) - absSub1.isAbsolute shouldBe true - absSub1.toString shouldBe "gs://absolute/path" - - val absSub2 = absPath1.subpath(1, 2) - absSub2.isAbsolute shouldBe false - absSub2.toString shouldBe "path" - - val relSub1 = relPath1.subpath(0, 2) - relSub1.isAbsolute shouldBe false - relSub1.toString shouldBe "some/relative" - } - - it should "implement resolveSibling" in { - val absPath1 = new NioGcsPath(Array("absolute", "path", "to", "somewhere"), true, true) - val relPath1 = new NioGcsPath(Array("some", "relative", "path"), false, true) - val relPath2 = new NioGcsPath(Array("another", "relative", "resource", "path"), false, true) - - val absSibling = absPath1.resolveSibling("somewhere else") - absSibling.isAbsolute shouldBe true - absSibling.toString shouldBe "gs://absolute/path/to/somewhere else" - - val absSiblingPath = absPath1.resolveSibling(relPath1) - absSiblingPath.isAbsolute shouldBe true - absSiblingPath.toString shouldBe "gs://absolute/path/to/some/relative/path" - - val absRel = relPath1.resolveSibling("other path") - absRel.isAbsolute shouldBe false - absRel.toString shouldBe "some/relative/other path" - - val absRelPath = relPath1.resolveSibling(relPath2) - absRelPath.isAbsolute shouldBe false - absRelPath.toString shouldBe "some/relative/another/relative/resource/path" - } - - it should "implement resolve" in { - val absPath1 = new NioGcsPath(Array("absolute", "path", "to", "somewhere"), true, true) - val absPath2 = new NioGcsPath(Array("absolute", "location"), true, true) - val relPath1 = new NioGcsPath(Array("some", "relative", "path"), false, true) - val relPath2 = new NioGcsPath(Array("another", "relative", "resource", "path"), false, true) - - val absToRel = absPath1.resolve(relPath1) - absToRel.isAbsolute shouldBe true - absToRel.toString shouldBe "gs://absolute/path/to/somewhere/some/relative/path" - - val absToAbs = absPath1.resolve(absPath2) - absToAbs.isAbsolute shouldBe true - absToAbs.toString shouldBe "gs://absolute/location" - - val relToAbs = relPath1.resolve(absPath1) - relToAbs.isAbsolute shouldBe true - relToAbs.toString shouldBe "gs://absolute/path/to/somewhere" - - val relToRel = relPath1.resolve(relPath2) - relToRel.isAbsolute shouldBe false - relToRel.toString shouldBe "some/relative/path/another/relative/resource/path" - } - - it should "implement getName" in { - val absPath1 = new NioGcsPath(Array("absolute", "path", "to", "somewhere"), true, true) - val relPath1 = new NioGcsPath(Array("some", "relative", "path"), false, true) - - val nameAbs1 = absPath1.getName(0) - nameAbs1.isAbsolute shouldBe true - nameAbs1.toString shouldBe "gs://absolute" - - val nameAbs2 = absPath1.getName(1) - nameAbs2.isAbsolute shouldBe false - nameAbs2.toString shouldBe "path" - - val nameRel1 = relPath1.getName(0) - nameRel1.isAbsolute shouldBe false - nameRel1.toString shouldBe "some" - - val nameRel2 = relPath1.getName(1) - nameRel2.isAbsolute shouldBe false - nameRel2.toString shouldBe "relative" - } - - it should "implement getParent" in { - val empty = new NioGcsPath(Array.empty[String], true, true) - val singleton = new NioGcsPath(Array("singleton"), true, true) - val absPath1 = new NioGcsPath(Array("absolute", "path", "to", "somewhere"), true, true) - val relPath1 = new NioGcsPath(Array("some", "relative", "path"), false, true) - - val parentAbs1 = absPath1.getParent - parentAbs1.isAbsolute shouldBe true - parentAbs1.toString shouldBe "gs://absolute/path/to" - - empty.getParent shouldBe null - singleton.getParent shouldBe null - - val nameRel1 = relPath1.getParent - nameRel1.isAbsolute shouldBe false - nameRel1.toString shouldBe "some/relative" - } - - it should "implement toAbsolutePath" in { - val absPath1 = new NioGcsPath(Array("absolute", "path", "to", "somewhere"), true, true) - val relPath1 = new NioGcsPath(Array("some", "relative", "path"), false, true) - - val abs = absPath1.toAbsolutePath - abs.isAbsolute shouldBe true - abs.toString shouldBe "gs://absolute/path/to/somewhere" - - an[Exception] shouldBe thrownBy(relPath1.toAbsolutePath) - } - - it should "implement getNameCount" in { - val empty = new NioGcsPath(Array.empty[String], true, true) - val singleton = new NioGcsPath(Array("singleton"), true, true) - val absPath1 = new NioGcsPath(Array("absolute", "path", "to", "somewhere"), true, true) - val relPath1 = new NioGcsPath(Array("some", "relative", "path"), false, true) - - absPath1.getNameCount shouldBe 4 - relPath1.getNameCount shouldBe 3 - empty.getNameCount shouldBe 0 - singleton.getNameCount shouldBe 1 - } - - it should "implement getFileName" in { - val empty = new NioGcsPath(Array.empty[String], true, true) - val singletonAbs = new NioGcsPath(Array("singleton"), true, true) - val singletonRel = new NioGcsPath(Array("singleton"), false, true) - val absPath1 = new NioGcsPath(Array("absolute", "path", "to", "somewhere"), true, true) - val relPath1 = new NioGcsPath(Array("some", "relative", "path"), false, true) - - val emptyFileName = empty.getFileName - emptyFileName shouldBe null - - val singletonAbsFileName = singletonAbs.getFileName - singletonAbsFileName.isAbsolute shouldBe true - singletonAbsFileName.toString shouldBe "gs://singleton" - - val singletonRelFileName = singletonRel.getFileName - singletonRelFileName.isAbsolute shouldBe false - singletonRelFileName.toString shouldBe "singleton" - - val relFileName = relPath1.getFileName - relFileName.isAbsolute shouldBe false - relFileName.toString shouldBe "path" - - val absFileName = absPath1.getFileName - absFileName.isAbsolute shouldBe false - absFileName.toString shouldBe "somewhere" - } - - it should "implement getIterator" in { - val empty = new NioGcsPath(Array.empty[String], true, true) - val singletonAbs = new NioGcsPath(Array("singleton"), true, true) - val singletonRel = new NioGcsPath(Array("singleton"), false, true) - val absPath1 = new NioGcsPath(Array("absolute", "path", "to", "somewhere"), true, true) - val relPath1 = new NioGcsPath(Array("some", "relative", "path"), false, true) - - empty.iterator().hasNext shouldBe false - - val singletonAbsIterator = singletonAbs.iterator() - val nextAbsSingleton: Path = singletonAbsIterator.next() - nextAbsSingleton.isAbsolute shouldBe false - nextAbsSingleton.toString shouldBe "singleton" - singletonAbsIterator.hasNext shouldBe false - - val singletonRelIterator = singletonRel.iterator() - val nextRelSingleton: Path = singletonRelIterator.next() - nextRelSingleton.isAbsolute shouldBe false - nextRelSingleton.toString shouldBe "singleton" - singletonRelIterator.hasNext shouldBe false - - val relIterator = relPath1.iterator() - val nextRel: Path = relIterator.next() - nextRel.isAbsolute shouldBe false - nextRel.toString shouldBe "some" - relIterator.next().toString shouldBe "relative" - relIterator.next().toString shouldBe "path" - relIterator.hasNext shouldBe false - - val absIterator = absPath1.iterator() - val absRel: Path = absIterator.next() - absRel.isAbsolute shouldBe false - absRel.toString shouldBe "absolute" - absIterator.next().toString shouldBe "path" - absIterator.next().toString shouldBe "to" - absIterator.next().toString shouldBe "somewhere" - absIterator.hasNext shouldBe false - } - - it should "implement startsWith" in { - val empty = new NioGcsPath(Array.empty[String], false, true) - val singletonAbs = new NioGcsPath(Array("absolute"), true, true) - - val absPath = new NioGcsPath(Array("absolute", "path", "to", "somewhere"), true, true) - val startsWithAbsPath = new NioGcsPath(Array("absolute", "path", "to"), true, true) - val doesntStartsWithAbsPath = new NioGcsPath(Array("absolute", "path", "to", "another", "place"), true, true) - val absPathStartingLikeRel = new NioGcsPath(Array("some", "relative", "path"), true, true) - - val relPath = new NioGcsPath(Array("some", "relative", "path"), false, true) - val startsWithRelPath = new NioGcsPath(Array("some", "relative"), false, true) - val doesntStartsWithRelPath = new NioGcsPath(Array("some", "relative", "other", "path"), false, true) - val relPathStartingLikeAbs = new NioGcsPath(Array("absolute", "path", "to"), false, true) - - val paths = Table( - ("path1", "path2", "result"), - (empty, empty, true), - (empty, absPath, false), - (singletonAbs, singletonAbs, true), - (absPath, startsWithAbsPath, true), - (absPath, doesntStartsWithAbsPath, false), - (absPath, relPathStartingLikeAbs, true), - (absPath, relPath, false), - (relPath, startsWithRelPath, true), - (relPath, doesntStartsWithRelPath, false), - (relPath, absPathStartingLikeRel, false), - (relPath, absPath, false) - ) - - forAll(paths) { (p1, p2, res) => - val startsWith: Boolean = p1.startsWith(p2) - startsWith shouldBe res - val startsWith1: Boolean = p1.startsWith(p2.toString) - startsWith1 shouldBe res - } - } - - it should "implement endsWith" in { - val empty = new NioGcsPath(Array.empty[String], false, true) - val singletonAbs = new NioGcsPath(Array("absolute"), true, true) - - val absPath = new NioGcsPath(Array("absolute", "path", "to", "somewhere"), true, true) - val doesntEndWithAbsPath = new NioGcsPath(Array("absolute", "path", "to", "another", "place"), true, true) - val absPathEndingLikeRel = new NioGcsPath(Array("relative", "path"), true, true) - - val relPath = new NioGcsPath(Array("some", "relative", "path"), false, true) - val endsWithRelPath = new NioGcsPath(Array("relative", "path"), false, true) - val doesntStartsWithRelPath = new NioGcsPath(Array("relative", "other", "path"), false, true) - val relPathEndingLikeAbs = new NioGcsPath(Array("path", "to", "somewhere"), false, true) - - val paths = Table( - ("path1", "path2", "result"), - (empty, empty, true), - (empty, absPath, false), - (singletonAbs, singletonAbs, true), - (absPath, absPath, true), - (absPath, doesntEndWithAbsPath, false), - (absPath, relPathEndingLikeAbs, true), - (absPath, relPath, false), - (relPath, endsWithRelPath, true), - (relPath, doesntStartsWithRelPath, false), - (relPath, absPathEndingLikeRel, false), - (relPath, absPath, false) - ) - - forAll(paths) { (p1, p2, res) => - p1.endsWith(p2) shouldBe res - p1.endsWith(p2.toString) shouldBe res - } - } - - it should "implement toUri" in { - val file = new NioGcsPath(Array("some", "file"), true, false) - val uri = file.toUri - uri.toString shouldBe "gs://some/file" - } - -} diff --git a/filesystems/gcs/src/test/scala/cromwell/filesystems/gcs/RefreshTokenModeSpec.scala b/filesystems/gcs/src/test/scala/cromwell/filesystems/gcs/RefreshTokenModeSpec.scala deleted file mode 100644 index f959dcad5..000000000 --- a/filesystems/gcs/src/test/scala/cromwell/filesystems/gcs/RefreshTokenModeSpec.scala +++ /dev/null @@ -1,26 +0,0 @@ -package cromwell.filesystems.gcs - -import org.scalatest.{FlatSpec, Matchers} - -class RefreshTokenModeSpec extends FlatSpec with Matchers { - - val refreshToken = RefreshTokenMode(name = "bar", clientId = "secret-id", clientSecret = "secret-secret") - - behavior of "RefreshTokenMode" - - it should "assert good workflow options" in { - val goodOptions = GoogleOptionsMap(Map("refresh_token" -> "token")) - refreshToken.assertWorkflowOptions(goodOptions) - } - - it should "fail to assert bad workflow options" in { - val badOptions = GoogleOptionsMap(Map("fresh_tokin" -> "broken")) - val noOptions = GoogleOptionsMap(Map.empty[String, String]) - - List(badOptions, noOptions).foreach { option => - the[IllegalArgumentException] thrownBy { - refreshToken.assertWorkflowOptions(option) - } should have message s"Missing parameters in workflow options: refresh_token" - } - } -} diff --git a/filesystems/gcs/src/test/scala/cromwell/filesystems/gcs/auth/GoogleAuthModeSpec.scala b/filesystems/gcs/src/test/scala/cromwell/filesystems/gcs/auth/GoogleAuthModeSpec.scala new file mode 100644 index 000000000..1ce3d5986 --- /dev/null +++ b/filesystems/gcs/src/test/scala/cromwell/filesystems/gcs/auth/GoogleAuthModeSpec.scala @@ -0,0 +1,21 @@ +package cromwell.filesystems.gcs.auth + +import com.google.auth.oauth2.GoogleCredentials +import org.scalatest.Assertions._ + +import scala.util.{Failure, Try} + +object GoogleAuthModeSpec { + def assumeHasApplicationDefaultCredentials(): Unit = { + tryApplicationDefaultCredentials match { + case Failure(exception) => cancel(exception.getMessage) + case _ => + } + () + } + + private lazy val tryApplicationDefaultCredentials: Try[Unit] = Try { + GoogleCredentials.getApplicationDefault + () + } +} diff --git a/project/Dependencies.scala b/project/Dependencies.scala index cf6b04238..628a0ea5d 100644 --- a/project/Dependencies.scala +++ b/project/Dependencies.scala @@ -1,37 +1,55 @@ import sbt._ object Dependencies { - lazy val lenthallV = "0.18" - lazy val wdl4sV = "0.5" - lazy val sprayV = "1.3.3" - /* - spray-json is an independent project from the "spray suite" - - https://github.com/spray/spray - - https://github.com/spray/spray-json - - http://spray.io/documentation/1.2.2/spray-httpx/spray-json-support/ - - http://doc.akka.io/docs/akka/2.4/scala/http/common/json-support.html#akka-http-spray-json - */ - lazy val sprayJsonV = "1.3.2" - lazy val akkaV = "2.4.9" - lazy val slickV = "3.1.1" - lazy val googleClientApiV = "1.20.0" - lazy val betterFilesV = "2.16.0" - lazy val scalazCoreV = "7.2.5" + lazy val lenthallV = "0.25" + lazy val wdl4sV = "0.14-7c693a3-SNAP" + + lazy val akkaV = "2.5.3" + lazy val akkaHttpV = "10.0.9" + + lazy val slickV = "3.2.0" + + lazy val googleClientApiV = "1.22.0" + lazy val googleGenomicsServicesApiV = "1.22.0" + lazy val betterFilesV = "2.17.1" + lazy val catsV = "0.9.0" + lazy val fs2V = "0.9.7" + + lazy val pegdownV = "1.6.0" + lazy val scalatestV = "3.0.2" // Internal collections of dependencies + private val fs2Test = "co.fs2" %% "fs2-io" % fs2V % "test" + + private val catsDependencies = List( + "org.typelevel" %% "cats" % catsV, + "com.github.benhutchison" %% "mouse" % "0.9" + ) map (_ + /* + Exclude test framework cats-laws and its transitive dependency scalacheck. + If sbt detects scalacheck, it tries to run it. + Explicitly excluding the two problematic artifacts instead of including the three (or four?). + https://github.com/typelevel/cats/tree/v0.7.2#getting-started + Re "_2.12", see also: https://github.com/sbt/sbt/issues/1518 + */ + exclude("org.typelevel", "cats-laws_2.12") + exclude("org.typelevel", "cats-kernel-laws_2.12") + ) + private val baseDependencies = List( "org.broadinstitute" %% "lenthall" % lenthallV, - "org.scalaz" %% "scalaz-core" % scalazCoreV, - "org.scalatest" %% "scalatest" % "3.0.0" % Test, - "org.specs2" %% "specs2" % "3.7" % Test - ) + "com.iheart" %% "ficus" % "1.4.1", + "org.scalatest" %% "scalatest" % scalatestV % Test, + "org.pegdown" % "pegdown" % pegdownV % Test, + "org.specs2" %% "specs2-mock" % "3.8.9" % Test // 3.9.X doesn't enjoy the spark backend or refined + ) ++ catsDependencies :+ fs2Test private val slf4jBindingDependencies = List( // http://logback.qos.ch/dependencies.html - "ch.qos.logback" % "logback-classic" % "1.1.7", - "ch.qos.logback" % "logback-access" % "1.1.7", - "org.codehaus.janino" % "janino" % "3.0.1" + "ch.qos.logback" % "logback-classic" % "1.2.3", + "ch.qos.logback" % "logback-access" % "1.2.3", + "org.codehaus.janino" % "janino" % "3.0.7" ) private val slickDependencies = List( @@ -46,27 +64,29 @@ object Dependencies { "com.mattbertolini" % "liquibase-slf4j" % "2.0.0" ) - private val sprayServerDependencies = List( - "io.spray" %% "spray-can" % sprayV, - "io.spray" %% "spray-routing-shapeless2" % sprayV, - "io.spray" %% "spray-http" % sprayV, - "io.spray" %% "spray-testkit" % sprayV % Test + val akkaHttpDependencies = List( + "com.typesafe.akka" %% "akka-http" % akkaHttpV, + "com.typesafe.akka" %% "akka-http-testkit" % akkaHttpV % Test ) + val akkaHttpServerDependencies = akkaHttpDependencies :+ "org.webjars" % "swagger-ui" % "2.1.1" + private val googleApiClientDependencies = List( // Used by swagger, but only in tests. This overrides an older 2.1.3 version of jackson-core brought in by // these Google dependencies, but which isn't properly evicted by IntelliJ's sbt integration. - "com.fasterxml.jackson.core" % "jackson-core" % "2.8.2", + "com.fasterxml.jackson.core" % "jackson-core" % "2.8.9", // The exclusions prevent guava 13 from colliding at assembly time with guava 18 brought in elsewhere. "com.google.api-client" % "google-api-client-java6" % googleClientApiV exclude("com.google.guava", "guava-jdk5"), "com.google.api-client" % "google-api-client-jackson2" % googleClientApiV exclude("com.google.guava", "guava-jdk5") ) private val googleCloudDependencies = List( - "com.google.gcloud" % "gcloud-java" % "0.0.9", - "com.google.oauth-client" % "google-oauth-client" % googleClientApiV, - "com.google.cloud.bigdataoss" % "gcsio" % "1.4.4", - "com.google.apis" % "google-api-services-genomics" % ("v1alpha2-rev14-" + googleClientApiV) + "com.google.apis" % "google-api-services-genomics" % ("v1alpha2-rev64-" + googleGenomicsServicesApiV), + "com.google.cloud" % "google-cloud-nio" % "0.20.1-alpha" + exclude("com.google.api.grpc", "grpc-google-common-protos") + exclude("com.google.cloud.datastore", "datastore-v1-protos") + exclude("org.apache.httpcomponents", "httpclient"), + "org.apache.httpcomponents" % "httpclient" % "4.5.3" ) private val dbmsDependencies = List( @@ -79,52 +99,68 @@ object Dependencies { - serverTimezone=UTC via http://stackoverflow.com/a/36793896/3320205 - nullNamePatternMatchesAll=true via https://liquibase.jira.com/browse/CORE-2723 */ - "mysql" % "mysql-connector-java" % "5.1.39" + "mysql" % "mysql-connector-java" % "5.1.42" ) - // Sub-project dependencies, added in addition to any dependencies inherited from .dependsOn(). + private val refinedTypeDependenciesList = List( + "org.scala-lang" % "scala-compiler" % Settings.ScalaVersion, + "eu.timepit" %% "refined" % "0.8.2" + ) - val gcsFileSystemDependencies = baseDependencies ++ googleApiClientDependencies ++ googleCloudDependencies + // Sub-project dependencies, added in addition to any dependencies inherited from .dependsOn(). - val databaseSqlDependencies = baseDependencies ++ slickDependencies ++ dbmsDependencies + val gcsFileSystemDependencies = baseDependencies ++ googleApiClientDependencies ++ googleCloudDependencies ++ List ( + "com.github.pathikrit" %% "better-files" % betterFilesV + ) - val databaseMigrationDependencies = List( - "org.broadinstitute" %% "wdl4s" % wdl4sV, // Used in migration scripts - "com.github.pathikrit" %% "better-files" % betterFilesV % Test - ) ++ baseDependencies ++ liquibaseDependencies ++ dbmsDependencies + val databaseSqlDependencies = baseDependencies ++ slickDependencies ++ dbmsDependencies ++ refinedTypeDependenciesList val coreDependencies = List( - "com.typesafe.scala-logging" %% "scala-logging" % "3.4.0", - "org.broadinstitute" %% "wdl4s" % wdl4sV, - "org.apache.commons" % "commons-lang3" % "3.4", - "io.spray" %% "spray-json" % sprayJsonV, - "com.typesafe" % "config" % "1.3.0", + "com.typesafe.scala-logging" %% "scala-logging" % "3.6.0", + "org.broadinstitute" %% "wdl4s-wdl" % wdl4sV, + "org.apache.commons" % "commons-lang3" % "3.6", + "com.typesafe.akka" %% "akka-http-spray-json" % akkaHttpV, + "com.typesafe" % "config" % "1.3.1", "com.typesafe.akka" %% "akka-actor" % akkaV, "com.typesafe.akka" %% "akka-slf4j" % akkaV, - "com.typesafe.akka" %% "akka-testkit" % akkaV % Test + "com.typesafe.akka" %% "akka-testkit" % akkaV % Test, + "com.google.guava" % "guava" % "22.0", + "com.google.auth" % "google-auth-library-oauth2-http" % "0.7.0", + "com.typesafe.akka" %% "akka-stream-testkit" % akkaV, + "com.chuusai" %% "shapeless" % "2.3.2", + "com.github.scopt" %% "scopt" % "3.6.0" ) ++ baseDependencies ++ googleApiClientDependencies ++ // TODO: We're not using the "F" in slf4j. Core only supports logback, specifically the WorkflowLogger. slf4jBindingDependencies - val htCondorBackendDependencies = List( - "com.twitter" %% "chill" % "0.8.0", - "org.mongodb" %% "casbah" % "3.0.0" - ) + val databaseMigrationDependencies = List( + "com.github.pathikrit" %% "better-files" % betterFilesV % Test + ) ++ liquibaseDependencies ++ dbmsDependencies - val sparkBackendDependencies = List( - "io.spray" %% "spray-client" % sprayV - ) ++ sprayServerDependencies + val cromwellApiClientDependencies = List( + "com.typesafe.akka" %% "akka-actor" % akkaV, + "com.typesafe.akka" %% "akka-http-spray-json" % akkaHttpV, + "com.github.pathikrit" %% "better-files" % betterFilesV, + "org.scalatest" %% "scalatest" % scalatestV % Test, + "org.pegdown" % "pegdown" % pegdownV % Test + ) ++ akkaHttpDependencies val engineDependencies = List( - "com.typesafe.scala-logging" %% "scala-logging" % "3.4.0", - "org.webjars" % "swagger-ui" % "2.1.1", "commons-codec" % "commons-codec" % "1.10", "commons-io" % "commons-io" % "2.5", - "org.scalaz" %% "scalaz-core" % scalazCoreV, - "com.github.pathikrit" %% "better-files" % betterFilesV, + "com.storm-enroute" %% "scalameter" % "0.8.2" + exclude("com.fasterxml.jackson.core", "jackson-databind") + exclude("com.fasterxml.jackson.module", "jackson-module-scala") + exclude("org.scala-tools.testing", "test-interface"), + "com.fasterxml.jackson.core" % "jackson-databind" % "2.8.9", + "com.fasterxml.jackson.module" %% "jackson-module-scala" % "2.8.9", "io.swagger" % "swagger-parser" % "1.0.22" % Test, "org.yaml" % "snakeyaml" % "1.17" % Test - ) ++ sprayServerDependencies + ) ++ akkaHttpServerDependencies val rootDependencies = slf4jBindingDependencies + + val jesBackendDependencies = refinedTypeDependenciesList + val tesBackendDependencies = akkaHttpDependencies + val sparkBackendDependencies = akkaHttpDependencies } diff --git a/project/Merging.scala b/project/Merging.scala index 0591a09f9..2c941a76e 100644 --- a/project/Merging.scala +++ b/project/Merging.scala @@ -25,10 +25,15 @@ object Merging { MergeStrategy.filterDistinctLines case ("spring.schemas" :: Nil) | ("spring.handlers" :: Nil) => MergeStrategy.filterDistinctLines + case "io.netty.versions.properties" :: Nil => + MergeStrategy.first + case "maven" :: "com.google.guava" :: xs => + MergeStrategy.first case _ => MergeStrategy.deduplicate } case "asm-license.txt" | "overview.html" | "cobertura.properties" => MergeStrategy.discard + case _ => MergeStrategy.deduplicate } } \ No newline at end of file diff --git a/project/Publishing.scala b/project/Publishing.scala index 244ece80b..56301c899 100644 --- a/project/Publishing.scala +++ b/project/Publishing.scala @@ -7,7 +7,7 @@ object Publishing { private def artifactoryResolver(isSnapshot: Boolean): Resolver = { val repoType = if (isSnapshot) "snapshot" else "release" val repoUrl = - s"https://artifactory.broadinstitute.org/artifactory/libs-$repoType-local;build.timestamp=$buildTimestamp" + s"https://broadinstitute.jfrog.io/broadinstitute/libs-$repoType-local;build.timestamp=$buildTimestamp" val repoName = "artifactory-publish" repoName at repoUrl } @@ -15,7 +15,7 @@ object Publishing { private val artifactoryCredentials: Credentials = { val username = sys.env.getOrElse("ARTIFACTORY_USERNAME", "") val password = sys.env.getOrElse("ARTIFACTORY_PASSWORD", "") - Credentials("Artifactory Realm", "artifactory.broadinstitute.org", username, password) + Credentials("Artifactory Realm", "broadinstitute.jfrog.io", username, password) } def publishingSettings: Seq[Setting[_]] = diff --git a/project/Settings.scala b/project/Settings.scala index cfb0032d3..cbfc0a807 100644 --- a/project/Settings.scala +++ b/project/Settings.scala @@ -6,14 +6,15 @@ import Version._ import sbt.Keys._ import sbt._ import sbtassembly.AssemblyPlugin.autoImport._ -import sbtrelease.ReleasePlugin import sbtdocker.DockerPlugin.autoImport._ +import sbtrelease.ReleasePlugin object Settings { val commonResolvers = List( - "Broad Artifactory Releases" at "https://artifactory.broadinstitute.org/artifactory/libs-release/", - "Broad Artifactory Snapshots" at "https://artifactory.broadinstitute.org/artifactory/libs-snapshot/" + Resolver.jcenterRepo, + "Broad Artifactory Releases" at "https://broadinstitute.jfrog.io/broadinstitute/libs-release/", + "Broad Artifactory Snapshots" at "https://broadinstitute.jfrog.io/broadinstitute/libs-snapshot/" ) /* @@ -23,13 +24,60 @@ object Settings { https://github.com/sbt/sbt-assembly/issues/69 https://github.com/scala/pickling/issues/10 + + Other fancy flags from https://tpolecat.github.io/2017/04/25/scalac-flags.html. + + The following isn't used (yet), and in general is an exercise in pain for 2.12 with Cromwell. + It'd certainly be nice to have, but params causes a world of hurt. Interested parties are encouraged + to take a stab at it. + + "-Ywarn-unused:params" // Warn if a value parameter is unused. */ val compilerSettings = List( - "-deprecation", - "-unchecked", + "-explaintypes", "-feature", - "-Xmax-classfile-name", - "200" + "-Xmax-classfile-name", "200", + "-target:jvm-1.8", + "-encoding", "UTF-8", + "-unchecked", + "-deprecation", + "-Xfuture", + "-Xlint:adapted-args", + "-Xlint:by-name-right-associative", + "-Xlint:constant", + "-Xlint:delayedinit-select", + "-Xlint:doc-detached", + "-Xlint:inaccessible", + "-Xlint:infer-any", + "-Xlint:missing-interpolator", + "-Xlint:nullary-override", + "-Xlint:nullary-unit", + "-Xlint:option-implicit", + "-Xlint:package-object-classes", + "-Xlint:poly-implicit-overload", + "-Xlint:private-shadow", + "-Xlint:stars-align", + "-Xlint:type-parameter-shadow", + "-Xlint:unsound-match", + "-Yno-adapted-args", + "-Ywarn-dead-code", + "-Ywarn-numeric-widen", + "-Ywarn-value-discard", + "-Ywarn-inaccessible", + "-Ywarn-unused:implicits", + "-Ywarn-unused:privates", + "-Ywarn-unused:locals", + "-Ywarn-unused:patvars" + ) + + val consoleHostileSettings = List( + "-Ywarn-unused:imports", // warns about every unused import on every command. + "-Xfatal-warnings" // makes those warnings fatal. + ) + + val docSettings = List( + // http://stackoverflow.com/questions/31488335/scaladoc-2-11-6-fails-on-throws-tag-with-unable-to-find-any-member-to-link#31497874 + "-no-link-warnings" ) lazy val assemblySettings = Seq( @@ -39,14 +87,17 @@ object Settings { logLevel in assembly := Level.Info, assemblyMergeStrategy in assembly := customMergeStrategy ) - + lazy val dockerSettings = Seq( imageNames in docker := Seq( - ImageName( - namespace = Option("broadinstitute"), - repository = name.value, - tag = Some(s"${version.value}") - ) + ImageName( + namespace = Option("broadinstitute"), + repository = name.value, + tag = Option(cromwellVersion)), + ImageName( + namespace = Option("broadinstitute"), + repository = name.value, + tag = Option(version.value)) ), dockerfile in docker := { // The assembly task generates a fat JAR file @@ -57,22 +108,30 @@ object Settings { from("openjdk:8") expose(8000) add(artifact, artifactTargetPath) - entryPoint("java", "-jar", artifactTargetPath) + runRaw(s"ln -s $artifactTargetPath /app/cromwell.jar") + + // If you use the 'exec' form for an entry point, shell processing is not performed and + // environment variable substitution does not occur. Thus we have to /bin/bash here + // and pass along any subsequent command line arguments + // See https://docs.docker.com/engine/reference/builder/#/entrypoint + entryPoint("/bin/bash", "-c", "java ${JAVA_OPTS} -jar /app/cromwell.jar ${CROMWELL_ARGS} ${*}", "--") } }, buildOptions in docker := BuildOptions( cache = false, removeIntermediateContainers = BuildOptions.Remove.Always ) - ) - + ) + val ScalaVersion = "2.12.3" val commonSettings = ReleasePlugin.projectSettings ++ testSettings ++ assemblySettings ++ dockerSettings ++ cromwellVersionWithGit ++ publishingSettings ++ List( organization := "org.broadinstitute", - scalaVersion := "2.11.8", + scalaVersion := ScalaVersion, resolvers ++= commonResolvers, - scalacOptions ++= compilerSettings, + scalacOptions ++= (compilerSettings ++ consoleHostileSettings), + scalacOptions in (Compile, doc) ++= docSettings, + scalacOptions in (Compile, console) := compilerSettings, parallelExecution := false ) @@ -100,6 +159,22 @@ object Settings { libraryDependencies ++= databaseMigrationDependencies ) ++ commonSettings + val cromwellApiClientSettings = List( + name := "cromwell-api-client", + libraryDependencies ++= cromwellApiClientDependencies, + organization := "org.broadinstitute", + scalaVersion := ScalaVersion, + scalacOptions ++= (compilerSettings ++ consoleHostileSettings), + scalacOptions in (Compile, doc) ++= docSettings, + scalacOptions in (Compile, console) := compilerSettings, + resolvers ++= commonResolvers + ) ++ ReleasePlugin.projectSettings ++ testSettings ++ assemblySettings ++ + cromwellVersionWithGit ++ publishingSettings + + val dockerHashingSettings = List( + name := "cromwell-docker-hashing" + ) ++ commonSettings + val backendSettings = List( name := "cromwell-backend" ) ++ commonSettings @@ -108,9 +183,9 @@ object Settings { name := "cromwell-sfs-backend" ) ++ commonSettings - val htCondorBackendSettings = List( - name := "cromwell-htcondor-backend", - libraryDependencies ++= htCondorBackendDependencies + val tesBackendSettings = List( + name := "cromwell-tes-backend", + libraryDependencies ++= tesBackendDependencies ) ++ commonSettings val sparkBackendSettings = List( @@ -119,13 +194,14 @@ object Settings { ) ++ commonSettings val jesBackendSettings = List( - name := "cromwell-jes-backend" + name := "cromwell-jes-backend", + libraryDependencies ++= jesBackendDependencies ) ++ commonSettings val engineSettings = List( name := "cromwell-engine", libraryDependencies ++= engineDependencies - ) ++ commonSettings + ) ++ commonSettings ++ versionConfCompileSettings val rootSettings = List( name := "cromwell", diff --git a/project/Testing.scala b/project/Testing.scala index 82df707d5..157e6bc98 100644 --- a/project/Testing.scala +++ b/project/Testing.scala @@ -6,27 +6,28 @@ object Testing { lazy val DockerTest = config("docker") extend Test lazy val NoDockerTest = config("nodocker") extend Test lazy val CromwellIntegrationTest = config("integration") extend Test + lazy val CromwellBenchmarkTest = config("benchmark") extend Test lazy val CromwellNoIntegrationTest = config("nointegration") extend Test lazy val DbmsTest = config("dbms") extend Test lazy val DockerTestTag = "DockerTest" - lazy val UseDockerTaggedTests = Tests.Argument("-n", DockerTestTag) - lazy val DontUseDockerTaggedTests = Tests.Argument("-l", DockerTestTag) + lazy val UseDockerTaggedTests = Tests.Argument(TestFrameworks.ScalaTest, "-n", DockerTestTag) + lazy val DontUseDockerTaggedTests = Tests.Argument(TestFrameworks.ScalaTest, "-l", DockerTestTag) lazy val CromwellIntegrationTestTag = "CromwellIntegrationTest" - lazy val UseCromwellIntegrationTaggedTests = Tests.Argument("-n", CromwellIntegrationTestTag) - lazy val DontUseCromwellIntegrationTaggedTests = Tests.Argument("-l", CromwellIntegrationTestTag) + lazy val UseCromwellIntegrationTaggedTests = Tests.Argument(TestFrameworks.ScalaTest, "-n", CromwellIntegrationTestTag) + lazy val DontUseCromwellIntegrationTaggedTests = Tests.Argument(TestFrameworks.ScalaTest, "-l", CromwellIntegrationTestTag) lazy val GcsIntegrationTestTag = "GcsIntegrationTest" - lazy val UseGcsIntegrationTaggedTests = Tests.Argument("-n", GcsIntegrationTestTag) - lazy val DontUseGcsIntegrationTaggedTests = Tests.Argument("-l", GcsIntegrationTestTag) + lazy val UseGcsIntegrationTaggedTests = Tests.Argument(TestFrameworks.ScalaTest, "-n", GcsIntegrationTestTag) + lazy val DontUseGcsIntegrationTaggedTests = Tests.Argument(TestFrameworks.ScalaTest, "-l", GcsIntegrationTestTag) lazy val DbmsTestTag = "DbmsTest" - lazy val UseDbmsTaggedTests = Tests.Argument("-n", DbmsTestTag) - lazy val DontUseDbmsTaggedTests = Tests.Argument("-l", DbmsTestTag) + lazy val UseDbmsTaggedTests = Tests.Argument(TestFrameworks.ScalaTest, "-n", DbmsTestTag) + lazy val DontUseDbmsTaggedTests = Tests.Argument(TestFrameworks.ScalaTest, "-l", DbmsTestTag) lazy val PostMVPTag = "PostMVP" - lazy val DontUsePostMVPTaggedTests = Tests.Argument("-l", PostMVPTag) + lazy val DontUsePostMVPTaggedTests = Tests.Argument(TestFrameworks.ScalaTest, "-l", PostMVPTag) lazy val TestReportArgs = Tests.Argument(TestFrameworks.ScalaTest, "-oDSI", "-h", "target/test-reports") @@ -52,7 +53,11 @@ object Testing { // `nointegration:test` - Run all tests, except integration testOptions in CromwellNoIntegrationTest := (testOptions in AllTests).value ++ Seq(DontUseCromwellIntegrationTaggedTests, DontUseGcsIntegrationTaggedTests, DontUsePostMVPTaggedTests), // `dbms:test` - Run database management tests. - testOptions in DbmsTest := (testOptions in AllTests).value ++ Seq(UseDbmsTaggedTests) + testOptions in DbmsTest := (testOptions in AllTests).value ++ Seq(UseDbmsTaggedTests), + // Add scalameter as a test framework in the CromwellBenchmarkTest scope + testFrameworks in CromwellBenchmarkTest += new TestFramework("org.scalameter.ScalaMeterFramework"), + // Don't execute benchmarks in parallel + parallelExecution in CromwellBenchmarkTest := false ) /* TODO: This syntax of test in (NoTests, assembly) isn't correct @@ -82,6 +87,7 @@ object Testing { .configs(DockerTest).settings(inConfig(DockerTest)(Defaults.testTasks): _*) .configs(NoDockerTest).settings(inConfig(NoDockerTest)(Defaults.testTasks): _*) .configs(CromwellIntegrationTest).settings(inConfig(CromwellIntegrationTest)(Defaults.testTasks): _*) + .configs(CromwellBenchmarkTest).settings(inConfig(CromwellBenchmarkTest)(Defaults.testTasks): _*) .configs(CromwellNoIntegrationTest).settings(inConfig(CromwellNoIntegrationTest)(Defaults.testTasks): _*) .configs(DbmsTest).settings(inConfig(DbmsTest)(Defaults.testTasks): _*) } diff --git a/project/Version.scala b/project/Version.scala index 99e00f9f6..3e4ee2bf0 100644 --- a/project/Version.scala +++ b/project/Version.scala @@ -3,8 +3,8 @@ import sbt.Keys._ import sbt._ object Version { - // Upcoming release, or current if we're on the master branch - val cromwellVersion = "0.20" + // Upcoming release, or current if we're on a master / hotfix branch + val cromwellVersion = "29" // Adapted from SbtGit.versionWithGit def cromwellVersionWithGit: Seq[Setting[_]] = @@ -19,6 +19,19 @@ object Version { shellPrompt in ThisBuild := { state => "%s| %s> ".format(GitCommand.prompt.apply(state), cromwellVersion) } ) + val writeVersionConf: Def.Initialize[Task[Seq[File]]] = Def.task { + val file = (resourceManaged in Compile).value / "cromwell-version.conf" + val contents = + s"""|version { + | cromwell: "${version.value}" + |} + |""".stripMargin + IO.write(file, contents) + Seq(file) + } + + val versionConfCompileSettings = List(resourceGenerators in Compile <+= writeVersionConf) + private def makeVersion(versionProperty: String, baseVersion: Option[String], headCommit: Option[String]): String = { @@ -39,6 +52,7 @@ object Version { // The project isSnapshot string passed in via command line settings, if desired. val isSnapshot = sys.props.get("project.isSnapshot").forall(_.toBoolean) - if (isSnapshot) s"$version-SNAPSHOT" else version + // For now, obfuscate SNAPSHOTs from sbt's developers: https://github.com/sbt/sbt/issues/2687#issuecomment-236586241 + if (isSnapshot) s"$version-SNAP" else version } } diff --git a/project/build.properties b/project/build.properties index 35c88bab7..64317fdae 100644 --- a/project/build.properties +++ b/project/build.properties @@ -1 +1 @@ -sbt.version=0.13.12 +sbt.version=0.13.15 diff --git a/project/plugins.sbt b/project/plugins.sbt index 4de0d6862..c4bfd6abc 100644 --- a/project/plugins.sbt +++ b/project/plugins.sbt @@ -1,4 +1,4 @@ -addSbtPlugin("se.marcuslonnberg" % "sbt-docker" % "1.4.0") +addSbtPlugin("se.marcuslonnberg" % "sbt-docker" % "1.4.1") addSbtPlugin("com.eed3si9n" % "sbt-assembly" % "0.14.3") addSbtPlugin("io.spray" % "sbt-revolver" % "0.8.0") /* @@ -7,5 +7,5 @@ See https://github.com/broadinstitute/cromwell/issues/645 */ addSbtPlugin("com.typesafe.sbt" % "sbt-git" % "0.7.1") addSbtPlugin("com.github.gseitz" % "sbt-release" % "1.0.3") -addSbtPlugin("org.scoverage" % "sbt-scoverage" % "1.3.5") +addSbtPlugin("org.scoverage" % "sbt-scoverage" % "1.5.0") addSbtPlugin("org.scoverage" % "sbt-coveralls" % "1.1.0") diff --git a/release/release_inputs.json b/release/release_inputs.json new file mode 100644 index 000000000..5a018c652 --- /dev/null +++ b/release/release_inputs.json @@ -0,0 +1,4 @@ +{ + "release_cromwell.githubToken": "<>", + "release_cromwell.organization": "broadinstitute" +} diff --git a/release/release_workflow.wdl b/release/release_workflow.wdl new file mode 100644 index 000000000..a1297556d --- /dev/null +++ b/release/release_workflow.wdl @@ -0,0 +1,377 @@ +task do_release { + # Repo to release + String repo + # Current version being released + String releaseV + # Next version + String nextV + # Command that will update the appropriate file for the current release + String updateVersionCommand + + # Commands that will update previously released/published dependencies in this repo + Array[String] dependencyCommands = [] + + # Can be swapped out to try this on a fork + String organization + + command { + set -e + set -x + + # Clone repo and checkout develop + git clone git@github.com:${organization}/${repo}.git + cd ${repo} + git checkout develop + git pull --rebase + + # Expect the version number on develop to be the version TO BE RELEASED + + echo "Updating dependencies" + ${sep='\n' dependencyCommands} + + git add . + # If there is nothing to commit, git commit will return 1 which will fail the script. + # This ensures we only commit if build.sbt was effectively updated + git diff-index --quiet HEAD || git commit -m "Update ${repo} version to ${releaseV}" + + # wdl4s needs a scala docs update + if [ ${repo} == "wdl4s" ]; then + + # Generate new scaladoc + sbt 'set scalacOptions in (Compile, doc) := List("-skip-packages", "better")' doc + git checkout gh-pages + for subproj in cwl wdl wom; do + mkdir -p ${releaseV}/$subproj + mv $subproj/target/scala-2.12/api ${releaseV}/$subproj + done + git add ${releaseV} + + # Update latest pointer + git rm --ignore-unmatch latest + ln -s ${releaseV} latest + git add latest + + git diff-index --quiet HEAD || git commit -m "Update Scaladoc" + git push origin gh-pages + + # Update badges on README + git checkout develop + curl -o scaladoc.png https://img.shields.io/badge/scaladoc-${releaseV}-blue.png + curl -o version.png https://img.shields.io/badge/version-${releaseV}-blue.png + + git add scaladoc.png + git add version.png + + git diff-index --quiet HEAD || git commit -m "Update README badges" + git push origin develop + fi + + # Merge develop into master + git checkout master + git pull --rebase + git merge develop --no-edit + + # Make sure tests pass + sbt update + JAVA_OPTS=-XX:MaxMetaspaceSize=1024m sbt test + + # Tag the release + git tag ${releaseV} + + # Push master and push the tags + git push origin master + git push --tags + + # Create and push the hotfix branch + git checkout -b ${releaseV}_hotfix + + # Pin centaur for cromwell + if [ ${repo} == "cromwell" ]; then + centaurDevelopHEAD=$(git ls-remote git://github.com/${organization}/centaur.git | grep refs/heads/develop | cut -f 1) + sed -i '' s/CENTAUR_BRANCH=.*/CENTAUR_BRANCH="$centaurDevelopHEAD"/g .travis.yml + git add .travis.yml + git commit -m "Pin release to centaur branch" + fi + + git push origin ${releaseV}_hotfix + + # Assemble jar for cromwell + if [ ${repo} == "cromwell" ]; then + sbt -Dproject.version=${releaseV} -Dproject.isSnapshot=false assembly + fi + + # Update develop to point to next release version + git checkout develop + ${updateVersionCommand} + git add . + git diff-index --quiet HEAD || git commit -m "Update ${repo} version from ${releaseV} to ${nextV}" + git push origin develop + + pwd > executionDir.txt + } + + output { + String version = releaseV + String executionDir = read_string(repo + "/executionDir.txt") + } +} + +task wait_for_published_artifact { + String repo + String version + + command <<< + checkIfPresent() { + isPresent=$(curl -s --head https://broadinstitute.jfrog.io/broadinstitute/libs-release-local/org/broadinstitute/${repo}/${version}/ | head -n 1 | grep -q "HTTP/1.[01] [23]..") + } + + elapsedTime=0 + checkIfPresent + + # Allow 1 hour for the file to appear as a published artifact + while [ $? -ne 0 ] && [ $elapsedTime -lt 3600 ]; do + sleep 10; + let "elapsedTime+=10" + checkIfPresent + done + + exit $? + >>> + + output { + String publishedVersion = version + } +} + +task create_update_dependency_command { + String dependencyName + String newVersion + String dependencyFilePath = "build.sbt" + + command { + echo "sed -i '' \"s/${dependencyName}[[:space:]]=.*/${dependencyName} = \\\"${newVersion}\\\"/g\" ${dependencyFilePath}" + } + + output { + String updateCommand = read_string(stdout()) + } +} + +task versionPrep { + String organization + String repo + String file + String regexPrefix + String updateCommandTemplate + + String bash_rematch = "{BASH_REMATCH[1]}" + command <<< + curl -o versionFile https://raw.githubusercontent.com/${organization}/${repo}/develop/${file} + regex="${regexPrefix}\"(([0-9]+\.)?([0-9]+))\"" + + if [[ $(cat versionFile) =~ $regex ]] + then + version="$${bash_rematch}" + echo $version > version + echo $version | perl -ne 'if (/^([0-9]+\.)?([0-9]+)$/) { $incr = $2 + 1; print "$1$incr\n" }' > nextVersion + else + exit 1 + fi + >>> + + output { + String version = read_string("version") + String nextVersion = read_string("nextVersion") + String updateCommand = sub(updateCommandTemplate, "<>", nextVersion) + } +} + +task makeGithubRelease { + String githubToken + String organization + File cromwellJar + Int oldVersion + Int newVersion + + command <<< + set -e + set -x + + # download changelog from master + curl https://raw.githubusercontent.com/${organization}/cromwell/master/CHANGELOG.md -o CHANGELOG.md + + # Extract the latest piece of the changelog corresponding to this release + # head remove the last line, next sed escapes all ", and last sed/tr replaces all new lines with \n so it can be used as a JSON string + BODY=$(sed -n '/## ${newVersion}/,/## ${oldVersion}/p' CHANGELOG.md | head -n -1 | sed -e 's/"/\\"/g' | sed 's/$/\\n/' | tr -d '\n') + + # Build the json body for the POST release + API_JSON="{\"tag_name\": \"${newVersion}\",\"name\": \"${newVersion}\",\"body\": \"$BODY\",\"draft\": true,\"prerelease\": false}" + + # POST the release as a draft + curl --data "$API_JSON" https://api.github.com/repos/${organization}/cromwell/releases?access_token=${githubToken} -o release_response + + # parse the response to get the release id and the asset upload url + RELEASE_ID=$(python -c "import sys, json; print json.load(sys.stdin)['id']" < release_response) + UPLOAD_URL=$(python -c "import sys, json; print json.load(sys.stdin)['upload_url']" < release_response) + # Maybe update this when we have basename ! + UPLOAD_URL=$(sed 's/{.*}/?name=cromwell-${newVersion}.jar/' <<< "$UPLOAD_URL") + + # Upload the cromwell jar as an asset + curl -X POST --data-binary @${cromwellJar} -H "Authorization: token ${githubToken}" -H "Content-Type: application/octet-stream" "$UPLOAD_URL" + + # Publish the draft + curl -X PATCH -d '{"draft": false}' https://api.github.com/repos/${organization}/cromwell/releases/"$RELEASE_ID"?access_token=${githubToken} + >>> + runtime { + docker: "python:2.7" + } +} + +workflow release_cromwell { + String githubToken + String organization + + Pair[String, String] lenthallAsDependency = ("lenthallV", waitForLenthall.publishedVersion) + Pair[String, String] wdl4sAsDependency = ("wdl4sV", waitForWdl4s.publishedVersion) + + Array[Pair[String, String]] wdl4sDependencies = [lenthallAsDependency] + Array[Pair[String, String]] wdltoolDependencies = [wdl4sAsDependency] + Array[Pair[String, String]] cromwellDependencies = [lenthallAsDependency, wdl4sAsDependency] + + # Regex to find the line setting the current version + String dependencyRegexPrefix = "git\\.baseVersion[[:space:]]:=[[:space:]]" + # Template command to update the version + String dependencyTemplate = "sed -i '' \"s/git\\.baseVersion[[:space:]]:=.*/git.baseVersion := \\\"<>\\\",/g\" build.sbt" + + String cromwellTemplate = "sed -i '' \"s/cromwellVersion[[:space:]]=.*/cromwellVersion = \\\"<>\\\"/g\" project/Version.scala" + String cromwellRegexPrefix = "cromwellVersion[[:space:]]=[[:space:]]" + + String wdl4sTemplate = "sed -i '' \"s/wdl4sVersion[[:space:]]=.*/wdl4sVersion = \\\"<>\\\"/g\" project/Version.scala" + String wdl4sRegexPrefix = "wdl4sVersion[[:space:]]=[[:space:]]" + + # Prepare releases by finding out the current version, next version, and update version command + call versionPrep as lenthallPrep { input: + organization = organization, + repo = "lenthall", + file = "build.sbt", + regexPrefix = dependencyRegexPrefix, + updateCommandTemplate = dependencyTemplate + } + + call versionPrep as wdl4sPrep { input: + organization = organization, + repo = "wdl4s", + file = "project/Version.scala", + regexPrefix = wdl4sRegexPrefix, + updateCommandTemplate = wdl4sTemplate + } + + call versionPrep as wdltoolPrep { input: + organization = organization, + repo = "wdltool", + file = "build.sbt", + regexPrefix = dependencyRegexPrefix, + updateCommandTemplate = dependencyTemplate + } + + call versionPrep as cromwellPrep { input: + organization = organization, + repo = "cromwell", + file = "project/Version.scala", + regexPrefix = cromwellRegexPrefix, + updateCommandTemplate = cromwellTemplate + } + + # Release calls + call do_release as release_lenthall { input: + organization = organization, + repo = "lenthall", + releaseV = lenthallPrep.version, + nextV = lenthallPrep.nextVersion, + updateVersionCommand = lenthallPrep.updateCommand, + } + + call do_release as release_wdl4s { input: + organization = organization, + repo = "wdl4s", + releaseV = wdl4sPrep.version, + nextV = wdl4sPrep.nextVersion, + updateVersionCommand = wdl4sPrep.updateCommand, + dependencyCommands = wdl4sDependencyCommands.updateCommand + } + + call do_release as release_wdltool { input: + organization = organization, + repo = "wdltool", + releaseV = wdltoolPrep.version, + nextV = wdltoolPrep.nextVersion, + updateVersionCommand = wdltoolPrep.updateCommand, + dependencyCommands = wdltoolDependencyCommands.updateCommand + } + + call do_release as release_cromwell { input: + organization = organization, + repo = "cromwell", + releaseV = cromwellPrep.version, + nextV = cromwellPrep.nextVersion, + updateVersionCommand = cromwellPrep.updateCommand, + dependencyCommands = cromwellDependencyCommands.updateCommand + } + + call wait_for_published_artifact as waitForLenthall { input: repo = "lenthall_2.12", version = release_lenthall.version } + call wait_for_published_artifact as waitForWdl4s { input: repo = "wdl4s-wdl_2.12", version = release_wdl4s.version } + + # Generates commands to update wdl4s dependencies + scatter(wdl4sDependency in wdl4sDependencies) { + String wdl4sDepName = wdl4sDependency.left + String wdl4sVersionName = wdl4sDependency.right + + call create_update_dependency_command as wdl4sDependencyCommands { input: + dependencyName = wdl4sDepName, + newVersion = wdl4sVersionName, + dependencyFilePath = "build.sbt" + } + } + + # Generates commands to update wdltool dependencies + scatter(wdltoolDependency in wdltoolDependencies) { + String wdltoolDepName = wdltoolDependency.left + String wdltoolVersionName = wdltoolDependency.right + + call create_update_dependency_command as wdltoolDependencyCommands { input: + dependencyName = wdltoolDepName, + newVersion = wdltoolVersionName, + dependencyFilePath = "build.sbt" + } + } + + # Generates commands to update cromwell dependencies + scatter(cromwellDependency in cromwellDependencies) { + String cromwellDepName = cromwellDependency.left + String cromwellVersionName = cromwellDependency.right + + call create_update_dependency_command as cromwellDependencyCommands { input: + dependencyName = cromwellDepName, + newVersion = cromwellVersionName, + dependencyFilePath = "project/Dependencies.scala" + } + } + + File cromwellJar = release_cromwell.executionDir + "/target/scala-2.12/cromwell-" + cromwellPrep.version + ".jar" + # Version that was just released + Int cromwellVersionAsInt = cromwellPrep.version + # Previous version + Int cromwellPreviousVersion = cromwellVersionAsInt - 1 + + call makeGithubRelease { input: + githubToken = githubToken, + organization = organization, + cromwellJar = cromwellJar, + newVersion = cromwellVersionAsInt, + oldVersion = cromwellPreviousVersion + } + + output { + File cromwellReleasedJar = cromwellJar + } +} diff --git a/scripts/docker-compose-mysql/README.md b/scripts/docker-compose-mysql/README.md new file mode 100644 index 000000000..c6ea57d08 --- /dev/null +++ b/scripts/docker-compose-mysql/README.md @@ -0,0 +1,32 @@ +# Cromwell server on MySQL Database + +Uses docker-compose to link together a Cromwell docker image (built locally with `sbt docker` or available on [dockerhub](https://hub.docker.com/r/broadinstitute/cromwell/)) with a MySQL docker image. +To change the version of Cromwell used, change the tag in `compose/cromwell/Dockerfile` + +## Local + +`docker-compose up` from this directory will start a Cromwell server running on a mysql instance with local backend. +The default configuration file used can be found at `compose/cromwell/app-config/application.conf` +To override it, simply mount a volume containing your custom `application.conf` to `/app-config` (see `jes-cromwell/docker-compose.yml` for an example) + +## JES + +The `jes-cromwell` directory is an example of how to customize the original compose file with a configuration file and environment variables. +It uses the application default credentials of the host machine. To use it make sure your gcloud is up to date and that your [application-default credentials](https://developers.google.com/identity/protocols/application-default-credentials) are set up. +Then run `docker-compose -f docker-compose.yml -f jes-cromwell/docker-compose.yml up` to start a Cromwell server with a JES backend on MySQL. + +## MySQL + +The data directory in the MySQL container is mounted to `compose/mysql/data`, which allows the data to survive a `docker-compose down`. +To disable this feature, simply remove the `./compose/mysql/data:/var/lib/mysql` line in the volume section of `docker-compose.yml`. +Note that in such case, the data will still be preserved by a `docker-compose stop` that stops the container but doesn't delete it. + +## Notes + +To run Cromwell in the background, add `-d` at the end of the command: +`docker-compose up -d` + +To then see the logs for a specific service, run `docker-compose logs -f `. +For example `docker-compose logs -f cromwell`. + +For more information about docker compose: [Docker compose doc](https://docs.docker.com/compose/) diff --git a/scripts/docker-compose-mysql/compose/cromwell/Dockerfile b/scripts/docker-compose-mysql/compose/cromwell/Dockerfile new file mode 100644 index 000000000..6bf9fd88b --- /dev/null +++ b/scripts/docker-compose-mysql/compose/cromwell/Dockerfile @@ -0,0 +1,9 @@ +FROM broadinstitute/cromwell:develop + +RUN git clone https://github.com/vishnubob/wait-for-it.git +RUN mkdir cromwell-working-dir +WORKDIR cromwell-working-dir + +COPY ./app-config /app-config + +ENTRYPOINT ["/bin/sh", "-c"] diff --git a/scripts/docker-compose-mysql/compose/cromwell/app-config/application.conf b/scripts/docker-compose-mysql/compose/cromwell/app-config/application.conf new file mode 100644 index 000000000..72feb2457 --- /dev/null +++ b/scripts/docker-compose-mysql/compose/cromwell/app-config/application.conf @@ -0,0 +1,55 @@ +call-caching { + enabled = false +} + +backend { + default = "Local" + providers { + Local { + actor-factory = "cromwell.backend.impl.sfs.config.ConfigBackendLifecycleActorFactory" + config { + run-in-background = true + runtime-attributes = "String? docker Int? max_runtime = 2" + submit = "/bin/bash ${script}" + submit-docker = "docker run --rm -v ${cwd}:${docker_cwd} -i ${docker} /bin/bash < ${script}" + + # Root directory where Cromwell writes job results. This directory must be + # visible and writeable by the Cromwell process as well as the jobs that Cromwell + # launches. + root: "cromwell-executions" + + filesystems { + local { + localization: [ + "soft-link", "hard-link", "copy" + ] + + caching { + duplication-strategy: [ + "soft-link" + ] + + # Possible values: file, path + # "file" will compute an md5 hash of the file content. + # "path" will compute an md5 hash of the file path. This strategy will only be effective if the duplication-strategy (above) is set to "soft-link", + # in order to allow for the original file path to be hashed. + hashing-strategy: "path" + + # When true, will check if a sibling file with the same name and the .md5 extension exists, and if it does, use the content of this file as a hash. + # If false or the md5 does not exist, will proceed with the above-defined hashing strategy. + check-sibling-md5: false + } + } + } + } + } + } +} + +database { + db.url = "jdbc:mysql://mysql-db/cromwell_db?useSSL=false&rewriteBatchedStatements=true" + db.user = "cromwell" + db.password = "cromwell" + db.driver = "com.mysql.jdbc.Driver" + profile = "slick.jdbc.MySQLProfile$" +} diff --git a/scripts/docker-compose-mysql/compose/mysql/init/init_user.sql b/scripts/docker-compose-mysql/compose/mysql/init/init_user.sql new file mode 100644 index 000000000..b07938189 --- /dev/null +++ b/scripts/docker-compose-mysql/compose/mysql/init/init_user.sql @@ -0,0 +1,4 @@ +CREATE USER 'cromwell'@'localhost' IDENTIFIED BY 'cromwell'; +GRANT ALL PRIVILEGES ON cromwell_db.* TO 'cromwell'@'localhost' WITH GRANT OPTION; +CREATE USER 'cromwell'@'%' IDENTIFIED BY 'cromwell'; +GRANT ALL PRIVILEGES ON cromwell_db.* TO 'cromwell'@'%' WITH GRANT OPTION; diff --git a/scripts/docker-compose-mysql/docker-compose.yml b/scripts/docker-compose-mysql/docker-compose.yml new file mode 100644 index 000000000..c1a8e21f3 --- /dev/null +++ b/scripts/docker-compose-mysql/docker-compose.yml @@ -0,0 +1,22 @@ +version: '2' +services: + cromwell: + build: + context: ./compose/cromwell + volumes: + - ./cromwell-executions:/cromwell-working-dir/cromwell-executions + command: ["/wait-for-it/wait-for-it.sh mysql-db:3306 -t 120 -- java -Dconfig.file=/app-config/application.conf -jar /app/cromwell.jar server"] + links: + - mysql-db + ports: + - "80:8000" + mysql-db: + image: "mysql:5.7" + environment: + - MYSQL_ROOT_PASSWORD=cromwell + - MYSQL_DATABASE=cromwell_db + volumes: + - ./compose/mysql/init:/docker-entrypoint-initdb.d + - ./compose/mysql/data:/var/lib/mysql + ports: + - "3307:3306" diff --git a/scripts/docker-compose-mysql/jes-cromwell/docker-compose.yml b/scripts/docker-compose-mysql/jes-cromwell/docker-compose.yml new file mode 100644 index 000000000..c7f0e1cee --- /dev/null +++ b/scripts/docker-compose-mysql/jes-cromwell/docker-compose.yml @@ -0,0 +1,9 @@ +version: '2' +services: + cromwell: + volumes: + - $HOME/.config/gcloud:/root/.config/gcloud + - ./jes-cromwell/jes-config:/app-config + environment: + - GOOGLE_APPLICATION_CREDENTIALS=/root/.config/gcloud/application_default_credentials.json + diff --git a/scripts/docker-compose-mysql/jes-cromwell/jes-config/application.conf b/scripts/docker-compose-mysql/jes-cromwell/jes-config/application.conf new file mode 100644 index 000000000..64b47218f --- /dev/null +++ b/scripts/docker-compose-mysql/jes-cromwell/jes-config/application.conf @@ -0,0 +1,68 @@ +system.new-workflow-poll-rate=1 + +google { + + application-name = "cromwell" + + auths = [ + { + name = "application-default" + scheme = "application_default" + } + ] +} + +engine { + filesystems { + gcs { + auth = "application-default" + } + } +} + +call-caching { + enabled = false +} + +backend { + default = "Jes" + providers { + Jes { + actor-factory = "cromwell.backend.impl.jes.JesBackendLifecycleActorFactory" + config { + // Google project + project = "broad-dsde-cromwell-dev" + + // Base bucket for workflow executions + root = "gs://tjeandet-cromwell-execs" + + // Polling for completion backs-off gradually for slower-running jobs. + // This is the maximum polling interval (in seconds): + maximum-polling-interval = 600 + + genomics { + // A reference to an auth defined in the `google` stanza at the top. This auth is used to create + // Pipelines and manipulate auth JSONs. + auth = "application-default" + // Endpoint for APIs, no reason to change this unless directed by Google. + endpoint-url = "https://genomics.googleapis.com/" + } + + filesystems { + gcs { + // A reference to a potentially different auth for manipulating files via engine functions. + auth = "application-default" + } + } + } + } + } +} + +database { + db.url = "jdbc:mysql://mysql-db/cromwell_db?useSSL=false&rewriteBatchedStatements=true" + db.user = "cromwell" + db.password = "cromwell" + db.driver = "com.mysql.jdbc.Driver" + profile = "slick.jdbc.MySQLProfile$" +} diff --git a/scripts/test_upgrade/custom_labels.json b/scripts/test_upgrade/custom_labels.json new file mode 100644 index 000000000..d3e90d2bf --- /dev/null +++ b/scripts/test_upgrade/custom_labels.json @@ -0,0 +1,3 @@ +{ + "test-upgrade-label": "goodvalue" +} diff --git a/scripts/test_upgrade/defaultDocker.json b/scripts/test_upgrade/defaultDocker.json new file mode 100644 index 000000000..f4713ec3e --- /dev/null +++ b/scripts/test_upgrade/defaultDocker.json @@ -0,0 +1,5 @@ +{ + "default_runtime_attributes": { + "docker": "library/ubuntu:latest" + } +} diff --git a/scripts/test_upgrade/scatter_files.wdl b/scripts/test_upgrade/scatter_files.wdl new file mode 100644 index 000000000..636faae37 --- /dev/null +++ b/scripts/test_upgrade/scatter_files.wdl @@ -0,0 +1,60 @@ +task mkFile { + + Int index + + command { + echo "content-${index}" + } + + output { + File f = stdout() + } + runtime { docker: "ubuntu:latest" } +} + +task catFile { + File f_in + + command { + sleep 50 + cat ${f_in} + } + + output { + File f = stdout() + } +} + +task gather { + Array[File] inputs + + command { + cat ${sep=" " inputs} + } + + output { + String result = stdout() + } + runtime { docker: "ubuntu:latest" } +} + +workflow scatter_files { + + Int scatter_width_part_1 + Int scatter_width_part_2 + Int scatter_width_part_3 + Int scatter_width_part_4 + Int scatter_width_part_5 + Int scatter_width_part_6 + + Int sum = scatter_width_part_1 + scatter_width_part_2 + scatter_width_part_3 + scatter_width_part_4 + scatter_width_part_5 + scatter_width_part_6 + + Array[Int] xs = range(sum / 4) + + scatter (x in xs) { + call mkFile { input: index = x } + call catFile { input: f_in = mkFile.f} + } + + call gather { input: inputs = catFile.f } +} diff --git a/scripts/test_upgrade/scatter_files_input_part1_ab.json b/scripts/test_upgrade/scatter_files_input_part1_ab.json new file mode 100644 index 000000000..a706d3f72 --- /dev/null +++ b/scripts/test_upgrade/scatter_files_input_part1_ab.json @@ -0,0 +1,5 @@ +[{ + "scatter_files.scatter_width_part_1": 2 +},{ + "scatter_files.scatter_width_part_1": 2 +}] diff --git a/scripts/test_upgrade/scatter_files_input_part1_c.json b/scripts/test_upgrade/scatter_files_input_part1_c.json new file mode 100644 index 000000000..1b04da787 --- /dev/null +++ b/scripts/test_upgrade/scatter_files_input_part1_c.json @@ -0,0 +1,3 @@ +{ + "scatter_files.scatter_width_part_1": 2 +} diff --git a/scripts/test_upgrade/scatter_files_input_part2.json b/scripts/test_upgrade/scatter_files_input_part2.json new file mode 100644 index 000000000..5208451da --- /dev/null +++ b/scripts/test_upgrade/scatter_files_input_part2.json @@ -0,0 +1,3 @@ +{ + "scatter_files.scatter_width_part_2": 2 +} diff --git a/scripts/test_upgrade/scatter_files_input_part3.json b/scripts/test_upgrade/scatter_files_input_part3.json new file mode 100644 index 000000000..a9611598c --- /dev/null +++ b/scripts/test_upgrade/scatter_files_input_part3.json @@ -0,0 +1,3 @@ +{ + "scatter_files.scatter_width_part_3": 2 +} diff --git a/scripts/test_upgrade/scatter_files_input_part4.json b/scripts/test_upgrade/scatter_files_input_part4.json new file mode 100644 index 000000000..e61982340 --- /dev/null +++ b/scripts/test_upgrade/scatter_files_input_part4.json @@ -0,0 +1,3 @@ +{ + "scatter_files.scatter_width_part_4": 2 +} diff --git a/scripts/test_upgrade/scatter_files_input_part5.json b/scripts/test_upgrade/scatter_files_input_part5.json new file mode 100644 index 000000000..4cdabf350 --- /dev/null +++ b/scripts/test_upgrade/scatter_files_input_part5.json @@ -0,0 +1,3 @@ +{ + "scatter_files.scatter_width_part_5": 2 +} diff --git a/scripts/test_upgrade/scatter_files_input_part6.json b/scripts/test_upgrade/scatter_files_input_part6.json new file mode 100644 index 000000000..d0d5e4ecd --- /dev/null +++ b/scripts/test_upgrade/scatter_files_input_part6.json @@ -0,0 +1,3 @@ +{ + "scatter_files.scatter_width_part_6": 2 +} diff --git a/scripts/test_upgrade/test_upgrade.sh b/scripts/test_upgrade/test_upgrade.sh new file mode 100755 index 000000000..7eb6a22e6 --- /dev/null +++ b/scripts/test_upgrade/test_upgrade.sh @@ -0,0 +1,182 @@ +#!/bin/bash +# +# test_upgrade.sh +# +# What this script does: +# - Starts Cromwell using the "previous" cromwell JAR/configuration +# - Sends 3 jobs to Cromwell (2 batched, one single) +# - Waits 6 minutes +# - Sends the same 3 jobs again, in the same way +# - Shutdown Cromwell with jobs still running +# - Restarts Cromwell using the "new" cromwell JAR/configuration +# - Sends the same three jobs again +# - Waits for everything to complete +# +# What it doesn't do (yet... but maybe for the C27 release!): +# - Guarantee that any jobs are in any specific states at the point that it shuts down. +# - Check the status of jobs after completing. +# - Check that operations IDs weren't duplicated after restarting. +# - DRY it up with functions replacing the outrageous copy/pasting. +# + +PREVIOUS_CROMWELL_JAR=cromwell-25-31ae549-SNAP.jar +PREVIOUS_CROMWELL_CONF=jes.conf + +NEW_CROMWELL_JAR=cromwell-26-88630db-SNAP.jar +NEW_CROMWELL_CONF=jes.conf + +OUT_DIR=out + +PREVIOUS_CROMWELL_LOG="$OUT_DIR/previous_version.log" +NEW_CROMWELL_LOG="$OUT_DIR/new_version.log" + +START_DT=$(date "+%Y-%m-%dT%H:%M:%S.000-04:00") +echo "Starting $0 at: $START_DT" +# Now we've printed it, make it URL-safe +START_DT="${START_DT//:/%3A}" + +pgrep -q cromwell +ALREADY_RUNNING=$? +if [ $ALREADY_RUNNING -ne 1 ]; +then + echo "Oops! Cromwell (🐖 ) is already running!" + pgrep cromwell + exit 1 +fi + +rm -r $OUT_DIR +mkdir $OUT_DIR + +echo -n "Starting Cromwell (🐖 )..." +java -Dconfig.file="$PREVIOUS_CROMWELL_CONF" -jar "$PREVIOUS_CROMWELL_JAR" server &> "$PREVIOUS_CROMWELL_LOG" & +CROMWELL_PID=$! +echo "started (PID: $CROMWELL_PID)." + +echo -n "Waiting for (🐖 ) API..." +READY=-1 +while [ $READY -ne 0 ]; +do + sleep 1 + curl -X GET --header "Accept: application/json" "http://localhost:8000/api/workflows/v1/backends" &>/dev/null + READY=$? +done +echo "ready." + +for i in 1 2 +do + echo -n "submitting job ${i}a and ${i}b..." + RESULT_FILE="$OUT_DIR/submitResult_${i}_ab.json" + curl -X POST --header "Accept: application/json" "http://localhost:8000/api/workflows/v1/batch" \ + -F workflowSource=@scatter_files.wdl \ + -F workflowInputs=@scatter_files_input_part1_ab.json \ + -F workflowInputs_2=@scatter_files_input_part2.json \ + -F workflowInputs_3=@scatter_files_input_part3.json \ + -F workflowInputs_4=@scatter_files_input_part4.json \ + -F workflowInputs_5=@scatter_files_input_part5.json \ + -F workflowInputs_6=@scatter_files_input_part6.json \ + -F workflowOptions=@defaultDocker.json \ + -F customLabels=@custom_labels.json &> $"RESULT_FILE" + echo "done (Response in: $RESULT_FILE)." + echo -n "submitting job ${i}c..." + RESULT_FILE="$OUT_DIR/submitResult_${i}_c.json" + curl -X POST --header "Accept: application/json" "http://localhost:8000/api/workflows/v1/batch" \ + -F workflowSource=@scatter_files.wdl \ + -F workflowInputs=@scatter_files_input_part1_c.json \ + -F workflowInputs_2=@scatter_files_input_part2.json \ + -F workflowInputs_3=@scatter_files_input_part3.json \ + -F workflowInputs_4=@scatter_files_input_part4.json \ + -F workflowInputs_5=@scatter_files_input_part5.json \ + -F workflowInputs_6=@scatter_files_input_part6.json \ + -F workflowOptions=@defaultDocker.json \ + -F customLabels=@custom_labels.json &> $"RESULT_FILE" + echo "done (Response in: $RESULT_FILE)." + [ "$i" -eq 1 ] && sleep 360 +done + +# Step two: upgrade cromwell +kill $CROMWELL_PID + +pgrep -q cromwell +STILL_RUNNING=$? +while [ $STILL_RUNNING -eq 0 ]; +do + echo "Waiting for Cromwell(🐖 ) to exit..." + sleep 1 + pgrep -q cromwell + STILL_RUNNING=$? +done + +echo -n "Starting Cromwell(🐖 )... " +java -Dconfig.file=$NEW_CROMWELL_CONF -jar $NEW_CROMWELL_JAR server &> $NEW_CROMWELL_LOG & +CROMWELL_PID=$! +echo "started (PID=$CROMWELL_PID)." + +echo -n "Waiting for 🐖 API..." +READY=-1 +while [ "$READY" -ne "0" ]; +do + sleep 1 + curl -X GET --header "Accept: application/json" "http://localhost:8000/api/workflows/v1/backends" &>/dev/null + READY=$? +done +echo "ready." + +i=3 +echo -n "submitting job ${i}a and ${i}b..." +RESULT_FILE="$OUT_DIR/submitResult_${i}_ab.json" +curl -X POST --header "Accept: application/json" "http://localhost:8000/api/workflows/v1/batch" \ + -F workflowSource=@scatter_files.wdl \ + -F workflowInputs=@scatter_files_input_part1_ab.json \ + -F workflowInputs_2=@scatter_files_input_part2.json \ + -F workflowInputs_3=@scatter_files_input_part3.json \ + -F workflowInputs_4=@scatter_files_input_part4.json \ + -F workflowInputs_5=@scatter_files_input_part5.json \ + -F workflowInputs_6=@scatter_files_input_part6.json \ + -F workflowOptions=@defaultDocker.json \ + -F customLabels=@custom_labels.json &> $"RESULT_FILE" +echo "done (Response in: $RESULT_FILE)." +echo -n "submitting job ${i}c..." +RESULT_FILE="$OUT_DIR/submitResult_${i}_c.json" +curl -X POST --header "Accept: application/json" "http://localhost:8000/api/workflows/v1/batch" \ + -F workflowSource=@scatter_files.wdl \ + -F workflowInputs=@scatter_files_input_part1_c.json \ + -F workflowInputs_2=@scatter_files_input_part2.json \ + -F workflowInputs_3=@scatter_files_input_part3.json \ + -F workflowInputs_4=@scatter_files_input_part4.json \ + -F workflowInputs_5=@scatter_files_input_part5.json \ + -F workflowInputs_6=@scatter_files_input_part6.json \ + -F workflowOptions=@defaultDocker.json \ + -F customLabels=@custom_labels.json &> $"RESULT_FILE" +echo "done (Response in: $RESULT_FILE)." + +# Step 3: Wait until everything's done: +echo -n "Waiting for the run to complete at a rate of one 🐷 per minute..." +DONE=1 +while [ "$DONE" -ne "0" ]; +do + CURLED=$(curl -X GET --header "Accept: application/json" "http://localhost:8000/api/workflows/v1/query?start=${START_DT}&status=Running" 2>/dev/null) + grep -q ": \[\]" <<< "$CURLED" + FINISHED=$? + grep -q '"status": "fail"' <<< "$CURLED" + ERROR=$? + [ $ERROR -eq 0 ] && ( echo "Error: $CURLED" ) + if [ $ERROR -eq 0 ] || [ $FINISHED -eq 0 ]; + then + DONE=0 + else + echo -n ".🐷 ." + sleep 60 + fi +done +echo "...done" + +kill $CROMWELL_PID + +# Step 4: analyse logs to make sure things worked out: +echo "Previous version's operations IDs:" +grep operations "$PREVIOUS_CROMWELL_LOG" | sed "s/.*\ - \(.*\)/\1/g" + +echo +echo "New version's operations IDs:" +grep operations "$NEW_CROMWELL_LOG" | sed "s/.*\ - \(.*\)/\1/g" +# TODO... diff --git a/services/src/main/scala/cromwell/services/ServiceRegistryActor.scala b/services/src/main/scala/cromwell/services/ServiceRegistryActor.scala index 357f5d33f..66ec1dbe4 100644 --- a/services/src/main/scala/cromwell/services/ServiceRegistryActor.scala +++ b/services/src/main/scala/cromwell/services/ServiceRegistryActor.scala @@ -2,8 +2,12 @@ package cromwell.services import akka.actor.SupervisorStrategy.Escalate import akka.actor.{Actor, ActorInitializationException, ActorLogging, ActorRef, OneForOneStrategy, Props} +import cats.data.NonEmptyList import com.typesafe.config.{Config, ConfigFactory, ConfigObject} -import lenthall.config.ScalaConfig._ +import cromwell.core.Dispatcher.ServiceDispatcher +import cromwell.util.GracefulShutdownHelper +import cromwell.util.GracefulShutdownHelper.ShutdownCommand +import net.ceedubs.ficus.Ficus._ import scala.collection.JavaConverters._ @@ -14,12 +18,12 @@ object ServiceRegistryActor { def serviceName: String } - def props(config: Config) = Props(new ServiceRegistryActor(serviceNameToPropsMap(config))) + def props(config: Config) = Props(new ServiceRegistryActor(serviceNameToPropsMap(config))).withDispatcher(ServiceDispatcher) // To enable testing, this lets us override a config value with a Props of our choice: def props(config: Config, overrides: Map[String, Props]) = { val fromConfig = serviceNameToPropsMap(config).filterNot { case (name: String, _: Props) => overrides.keys.toList.contains(name) } - Props(new ServiceRegistryActor(fromConfig ++ overrides)) + Props(new ServiceRegistryActor(fromConfig ++ overrides)).withDispatcher(ServiceDispatcher) } def serviceNameToPropsMap(globalConfig: Config): Map[String, Props] = { @@ -31,17 +35,24 @@ object ServiceRegistryActor { } private def serviceProps(serviceName: String, globalConfig: Config, serviceStanza: Config): Props = { - val serviceConfigStanza = serviceStanza.getConfigOr("config", ConfigFactory.parseString("")) - val className = serviceStanza.getStringOr( - "class", + val serviceConfigStanza = serviceStanza.as[Option[Config]]("config").getOrElse(ConfigFactory.parseString("")) + + val dispatcher = serviceStanza.as[Option[String]]("dispatcher").getOrElse(ServiceDispatcher) + val className = serviceStanza.as[Option[String]]("class").getOrElse( throw new IllegalArgumentException(s"Invalid configuration for service $serviceName: missing 'class' definition") ) - Props.create(Class.forName(className), serviceConfigStanza, globalConfig) + try { + Props.create(Class.forName(className), serviceConfigStanza, globalConfig).withDispatcher(dispatcher) + } catch { + case e: ClassNotFoundException => throw new RuntimeException( + s"Class $className for service $serviceName cannot be found in the class path.", e + ) + } } } -class ServiceRegistryActor(serviceProps: Map[String, Props]) extends Actor with ActorLogging { +class ServiceRegistryActor(serviceProps: Map[String, Props]) extends Actor with ActorLogging with GracefulShutdownHelper { import ServiceRegistryActor._ val services: Map[String, ActorRef] = serviceProps map { @@ -56,6 +67,11 @@ class ServiceRegistryActor(serviceProps: Map[String, Props]) extends Actor with log.error("Received ServiceRegistryMessage requesting service '{}' for which no service is configured. Message: {}", msg.serviceName, msg) sender ! ServiceRegistryFailure(msg.serviceName) } + case ShutdownCommand => + services.values.toList match { + case Nil => context stop self + case head :: tail => waitForActorsAndShutdown(NonEmptyList.of(head, tail: _*)) + } case fool => log.error("Received message which is not a ServiceRegistryMessage: {}", fool) sender ! ServiceRegistryFailure("Message is not a ServiceRegistryMessage: " + fool) @@ -66,7 +82,7 @@ class ServiceRegistryActor(serviceProps: Map[String, Props]) extends Actor with * the error up the chain */ override val supervisorStrategy = OneForOneStrategy() { - case aie: ActorInitializationException => Escalate + case _: ActorInitializationException => Escalate case t => super.supervisorStrategy.decider.applyOrElse(t, (_: Any) => Escalate) } } diff --git a/services/src/main/scala/cromwell/services/ServicesStore.scala b/services/src/main/scala/cromwell/services/ServicesStore.scala index a73c3f3ec..e8a8b940d 100644 --- a/services/src/main/scala/cromwell/services/ServicesStore.scala +++ b/services/src/main/scala/cromwell/services/ServicesStore.scala @@ -4,8 +4,7 @@ import com.typesafe.config.ConfigFactory import cromwell.database.migration.liquibase.LiquibaseUtils import cromwell.database.slick.SlickDatabase import cromwell.database.sql.SqlDatabase -import lenthall.config.ScalaConfig._ -import org.slf4j.LoggerFactory +import net.ceedubs.ficus.Ficus._ trait ServicesStore { def databaseInterface: SqlDatabase @@ -15,7 +14,7 @@ object ServicesStore { implicit class EnhancedSqlDatabase[A <: SqlDatabase](val sqlDatabase: A) extends AnyVal { def initialized: A = { - if (sqlDatabase.databaseConfig.getBooleanOr("liquibase.updateSchema", default = true)) { + if (sqlDatabase.databaseConfig.as[Option[Boolean]]("liquibase.updateSchema").getOrElse(true)) { sqlDatabase withConnection LiquibaseUtils.updateSchema } sqlDatabase @@ -30,20 +29,45 @@ trait SingletonServicesStore extends ServicesStore { object SingletonServicesStore { - private lazy val log = LoggerFactory.getLogger("SingletonServicesStore") - private val databaseConfig = { - val config = ConfigFactory.load.getConfig("database") - if (config.hasPath("config")) { - log.warn( - """ - |Use of configuration path 'database.config' is deprecated. - | - |Move the configuration directly under the 'database' element, and remove the key 'database.config'. - |""".stripMargin) - config.getConfig(config.getString("config")) - } else { - config - } + private val databaseConfig = ConfigFactory.load.getConfig("database") + + if (databaseConfig.hasPath("config")) { + val msg = """ + |******************************* + |***** DEPRECATION MESSAGE ***** + |******************************* + | + |Use of configuration path 'database.config' has been deprecated. + | + |Move the configuration directly under the 'database' element, and remove the key 'database.config'. + | + |""".stripMargin + throw new Exception(msg) + } else if (databaseConfig.hasPath("driver")) { + val msg = + """ + |******************************* + |***** DEPRECATION MESSAGE ***** + |******************************* + | + |Use of configuration path 'database.driver' has been deprecated. Replace with a "profile" element instead, e.g: + | + |database { + | #driver = "slick.driver.MySQLDriver$" #old + | profile = "slick.jdbc.MySQLProfile$" #new + | db { + | driver = "com.mysql.jdbc.Driver" + | url = "jdbc:mysql://host/cromwell?rewriteBatchedStatements=true" + | user = "user" + | password = "pass" + | connectionTimeout = 5000 + | } + |} + | + |Cromwell thanks you. + |""".stripMargin + throw + new Exception(msg) } import ServicesStore.EnhancedSqlDatabase diff --git a/services/src/main/scala/cromwell/services/keyvalue/KeyValueServiceActor.scala b/services/src/main/scala/cromwell/services/keyvalue/KeyValueServiceActor.scala index 1d25ee88f..49e220936 100644 --- a/services/src/main/scala/cromwell/services/keyvalue/KeyValueServiceActor.scala +++ b/services/src/main/scala/cromwell/services/keyvalue/KeyValueServiceActor.scala @@ -1,45 +1,57 @@ package cromwell.services.keyvalue import akka.actor.{Actor, ActorRef} -import com.typesafe.config.Config -import cromwell.core.WorkflowId +import cromwell.core.{JobKey, MonitoringCompanionHelper, WorkflowId} import cromwell.services.ServiceRegistryActor.ServiceRegistryMessage import cromwell.services.keyvalue.KeyValueServiceActor._ import scala.concurrent.{ExecutionContextExecutor, Future} import scala.util.{Failure, Success} - object KeyValueServiceActor { - sealed trait KvMessage + final case class KvJobKey(callFqn: String, callIndex: Option[Int], callAttempt: Int) + object KvJobKey { + def apply(jobKey: JobKey): KvJobKey = KvJobKey(jobKey.scope.fullyQualifiedName, jobKey.index, jobKey.attempt) + } + + final case class ScopedKey(workflowId: WorkflowId, jobKey: KvJobKey, key: String) - sealed trait KvAction extends KvMessage with ServiceRegistryMessage { - def serviceName = "KeyValue" + sealed trait KvMessage { + def key: ScopedKey } - case class KvJobKey(callFqn: String, callIndex: Option[Int], callAttempt: Int) - case class ScopedKey(workflowId: WorkflowId, jobKey: KvJobKey, key: String) - case class KvPut(pair: KvPair) extends KvAction - case class KvGet(key: ScopedKey) extends KvAction + sealed trait KvMessageWithAction extends KvMessage { + val action: KvAction + def key = action.key + } + + sealed trait KvAction extends KvMessage with ServiceRegistryMessage { override val serviceName = "KeyValue" } + + final case class KvPut(pair: KvPair) extends KvAction { override def key = pair.key } + final case class KvGet(key: ScopedKey) extends KvAction sealed trait KvResponse extends KvMessage - case class KvPair(key: ScopedKey, value: Option[String]) extends KvResponse - case class KvFailure(action: KvAction, failure: Throwable) extends KvResponse - case class KvKeyLookupFailed(action: KvGet) extends KvResponse - case class KvPutSuccess(action: KvPut) extends KvResponse + + final case class KvPair(key: ScopedKey, value: Option[String]) extends KvResponse + final case class KvFailure(action: KvAction, failure: Throwable) extends KvResponse with KvMessageWithAction + final case class KvKeyLookupFailed(action: KvGet) extends KvResponse with KvMessageWithAction + final case class KvPutSuccess(action: KvPut) extends KvResponse with KvMessageWithAction } -trait KeyValueServiceActor extends Actor { +trait KeyValueServiceActor extends Actor with MonitoringCompanionHelper { implicit val ec: ExecutionContextExecutor - val serviceConfig: Config - val globalConfig: Config - def receive = { + val kvReceive: Receive = { case action: KvGet => respond(sender(), action, doGet(action)) - case action: KvPut => respond(sender(), action, doPut(action)) + case action: KvPut => + addWork() + val putAction = doPut(action) + putAction andThen { case _ => removeWork() } + respond(sender(), action, putAction) } - def doPut(put: KvPut): Future[KvResponse] + override def receive = kvReceive.orElse(monitoringReceive) + def doPut(put: KvPut): Future[KvResponse] def doGet(get: KvGet): Future[KvResponse] private def respond(replyTo: ActorRef, action: KvAction, response: Future[KvResponse]): Unit = { diff --git a/services/src/main/scala/cromwell/services/keyvalue/KvClient.scala b/services/src/main/scala/cromwell/services/keyvalue/KvClient.scala new file mode 100644 index 000000000..c8ddbcb88 --- /dev/null +++ b/services/src/main/scala/cromwell/services/keyvalue/KvClient.scala @@ -0,0 +1,41 @@ +package cromwell.services.keyvalue + +import akka.actor.{Actor, ActorLogging, ActorRef} +import cromwell.services.keyvalue.KeyValueServiceActor._ + +import scala.concurrent.{ExecutionContext, Future, Promise} + +trait KvClient { this: Actor with ActorLogging => + + def serviceRegistryActor: ActorRef + private[keyvalue] var currentKvClientRequests: Map[ScopedKey, Promise[KvResponse]] = Map.empty + + final def makeKvRequest(actions: Seq[KvAction])(implicit ec: ExecutionContext): Future[Seq[KvResponse]] = { + if (actions.exists(action => currentKvClientRequests.contains(action.key))) { + val msg = "Programmer Error! KvClient does not support multiple KvActions active for the same ScopedKey concurrently. Mi Scusi!" + log.error(msg) + Future.failed(new RuntimeException(msg)) + } else { + createResponseSet(actions) + } + } + + final def kvClientReceive: Actor.Receive = { + case response: KvResponse => fulfillOrLog(response) + } + + private def createResponseSet(newActions: Seq[KvAction])(implicit ec: ExecutionContext) = { + val actionsAndPromises = newActions.map(a => a.key -> Promise[KvResponse]()) + currentKvClientRequests ++= actionsAndPromises.toMap + newActions foreach { serviceRegistryActor ! _ } + Future.sequence(actionsAndPromises.map(_._2.future)) + } + + private def fulfillOrLog(response: KvResponse) = currentKvClientRequests.get(response.key) match { + case Some(fulfilledPromise) => + fulfilledPromise.success(response) + currentKvClientRequests -= response.key + case None => log.error(s"Programmer Error: Got a KV response for a request that was never sent: $response. Did you use the KV store without KvClient? Current key set: ${currentKvClientRequests.keys.mkString("")}") + } +} + diff --git a/services/src/main/scala/cromwell/services/keyvalue/impl/SqlKeyValueServiceActor.scala b/services/src/main/scala/cromwell/services/keyvalue/impl/SqlKeyValueServiceActor.scala index a1909d8f2..3ea62743d 100644 --- a/services/src/main/scala/cromwell/services/keyvalue/impl/SqlKeyValueServiceActor.scala +++ b/services/src/main/scala/cromwell/services/keyvalue/impl/SqlKeyValueServiceActor.scala @@ -2,6 +2,7 @@ package cromwell.services.keyvalue.impl import akka.actor.Props import com.typesafe.config.Config +import cromwell.core.Dispatcher.ServiceDispatcher import cromwell.services.SingletonServicesStore import cromwell.services.keyvalue.KeyValueServiceActor import cromwell.services.keyvalue.KeyValueServiceActor._ @@ -9,17 +10,17 @@ import cromwell.services.keyvalue.KeyValueServiceActor._ import scala.concurrent.Future object SqlKeyValueServiceActor { - def props(serviceConfig: Config, globalConfig: Config) = Props(SqlKeyValueServiceActor(serviceConfig, globalConfig)) + def props(serviceConfig: Config, globalConfig: Config) = Props(SqlKeyValueServiceActor(serviceConfig, globalConfig)).withDispatcher(ServiceDispatcher) } -case class SqlKeyValueServiceActor(override val serviceConfig: Config, override val globalConfig: Config) +final case class SqlKeyValueServiceActor(serviceConfig: Config, globalConfig: Config) extends KeyValueServiceActor with BackendKeyValueDatabaseAccess with SingletonServicesStore { override implicit val ec = context.dispatcher private implicit val system = context.system override def doPut(put: KvPut): Future[KvResponse] = { put.pair.value match { - case Some(backendVal) => updateBackendKeyValuePair(put.pair.key.workflowId, + case Some(_) => updateBackendKeyValuePair(put.pair.key.workflowId, put.pair.key.jobKey, put.pair.key.key, put.pair.value.get).map(_ => KvPutSuccess(put)) diff --git a/services/src/main/scala/cromwell/services/metadata/CallMetadataKeys.scala b/services/src/main/scala/cromwell/services/metadata/CallMetadataKeys.scala index 5284a9a8e..5d1db5e4f 100644 --- a/services/src/main/scala/cromwell/services/metadata/CallMetadataKeys.scala +++ b/services/src/main/scala/cromwell/services/metadata/CallMetadataKeys.scala @@ -15,6 +15,22 @@ object CallMetadataKeys { val Stdout = "stdout" val Stderr = "stderr" val BackendLogsPrefix = "backendLogs" + val BackendStatus = "backendStatus" val JobId = "jobId" val CallRoot = "callRoot" + val SubWorkflowId = "subWorkflowId" + val SubWorkflowMetadata = "subWorkflowMetadata" + val CallCaching = "callCaching" + val BackendLabels = "backendLabels" + val Labels = "labels" + + object CallCachingKeys { + val EffectiveModeKey = CallCaching + MetadataKey.KeySeparator + "effectiveCallCachingMode" + val ReadResultMetadataKey = CallCaching + MetadataKey.KeySeparator + "result" + val HitResultMetadataKey = CallCaching + MetadataKey.KeySeparator + "hit" + val AllowReuseMetadataKey = CallCaching + MetadataKey.KeySeparator + "allowResultReuse" + val HitFailuresKey = CallCaching + MetadataKey.KeySeparator + "hitFailures" + val HashFailuresKey = CallCaching + MetadataKey.KeySeparator + "hashFailures" + val HashesKey = CallCaching + MetadataKey.KeySeparator + "hashes" + } } diff --git a/services/src/main/scala/cromwell/services/metadata/MetadataQuery.scala b/services/src/main/scala/cromwell/services/metadata/MetadataQuery.scala index 979a6474c..818a4def6 100644 --- a/services/src/main/scala/cromwell/services/metadata/MetadataQuery.scala +++ b/services/src/main/scala/cromwell/services/metadata/MetadataQuery.scala @@ -2,25 +2,32 @@ package cromwell.services.metadata import java.time.OffsetDateTime +import cats.data.NonEmptyList import cromwell.core.WorkflowId -import org.slf4j.LoggerFactory -import wdl4s.values.{WdlBoolean, WdlFloat, WdlInteger, WdlValue} - -import scalaz.NonEmptyList +import org.slf4j.{Logger, LoggerFactory} +import wdl4s.wdl.values._ case class MetadataJobKey(callFqn: String, index: Option[Int], attempt: Int) -case class MetadataKey(workflowId: WorkflowId, jobKey: Option[MetadataJobKey], key: String) +case class MetadataKey private (workflowId: WorkflowId, jobKey: Option[MetadataJobKey], key: String) object MetadataKey { + implicit class KeyMetacharacterEscaper(val key: String) extends AnyVal { + // The escapes are necessary on the first arguments to `replaceAll` since they're treated like regular expressions + // and square braces are character class delimiters. Backslashes must be escaped in both parameters. + // Ignore the red in some of the "raw" strings, IntelliJ and GitHub don't seem to understand them. + def escapeMeta = key.replaceAll(raw"\[", raw"\\[").replaceAll(raw"\]", raw"\\]").replaceAll(":", raw"\\:") + def unescapeMeta = key.replaceAll(raw"\\\[", "[").replaceAll(raw"\\\]", "]").replaceAll(raw"\\:", ":") + } + val KeySeparator = ':' def apply(workflowId: WorkflowId, jobKey: Option[MetadataJobKey], keys: String*): MetadataKey = { new MetadataKey(workflowId, jobKey, compositeKey(keys:_*)) } - def compositeKey(keys: String*) = keys.toList.mkString(KeySeparator.toString) + def compositeKey(keys: String*): String = keys.toList.mkString(KeySeparator.toString) } object MetadataEvent { @@ -36,24 +43,25 @@ case object MetadataNumber extends MetadataType { override val typeName = "numbe case object MetadataBoolean extends MetadataType { override val typeName = "boolean" } object MetadataValue { - def apply(value: Any) = { - value match { + def apply(value: Any): MetadataValue = { + Option(value).getOrElse("") match { case WdlInteger(i) => new MetadataValue(i.toString, MetadataInt) case WdlFloat(f) => new MetadataValue(f.toString, MetadataNumber) case WdlBoolean(b) => new MetadataValue(b.toString, MetadataBoolean) + case WdlOptionalValue(_, Some(o)) => apply(o) case value: WdlValue => new MetadataValue(value.valueString, MetadataString) case _: Int | Long => new MetadataValue(value.toString, MetadataInt) case _: Double | Float => new MetadataValue(value.toString, MetadataNumber) case _: Boolean => new MetadataValue(value.toString, MetadataBoolean) - case _ => new MetadataValue(value.toString, MetadataString) + case other => new MetadataValue(other.toString, MetadataString) } } } object MetadataType { - val log = LoggerFactory.getLogger("Metadata Type") + val log: Logger = LoggerFactory.getLogger("Metadata Type") - def fromString(s: String) = s match { + def fromString(s: String): MetadataType = s match { case MetadataString.typeName => MetadataString case MetadataInt.typeName => MetadataInt case MetadataNumber.typeName => MetadataNumber @@ -68,24 +76,25 @@ case class MetadataValue(value: String, valueType: MetadataType) case class MetadataEvent(key: MetadataKey, value: Option[MetadataValue], offsetDateTime: OffsetDateTime) -case class MetadataQueryJobKey(callFqn: String, index: Option[Int], attempt: Int) +case class MetadataQueryJobKey(callFqn: String, index: Option[Int], attempt: Option[Int]) object MetadataQueryJobKey { - def forMetadataJobKey(jobKey: MetadataJobKey) = MetadataQueryJobKey(jobKey.callFqn, jobKey.index, jobKey.attempt) + def forMetadataJobKey(jobKey: MetadataJobKey) = MetadataQueryJobKey(jobKey.callFqn, jobKey.index, Option(jobKey.attempt)) } case class MetadataQuery(workflowId: WorkflowId, jobKey: Option[MetadataQueryJobKey], key: Option[String], includeKeysOption: Option[NonEmptyList[String]], - excludeKeysOption: Option[NonEmptyList[String]]) + excludeKeysOption: Option[NonEmptyList[String]], + expandSubWorkflows: Boolean) object MetadataQuery { - def forWorkflow(workflowId: WorkflowId) = MetadataQuery(workflowId, None, None, None, None) + def forWorkflow(workflowId: WorkflowId) = MetadataQuery(workflowId, None, None, None, None, expandSubWorkflows = false) - def forJob(workflowId: WorkflowId, jobKey: MetadataJobKey) = { - MetadataQuery(workflowId, Option(MetadataQueryJobKey.forMetadataJobKey(jobKey)), None, None, None) + def forJob(workflowId: WorkflowId, jobKey: MetadataJobKey): MetadataQuery = { + MetadataQuery(workflowId, Option(MetadataQueryJobKey.forMetadataJobKey(jobKey)), None, None, None, expandSubWorkflows = false) } - def forKey(key: MetadataKey) = { - MetadataQuery(key.workflowId, key.jobKey map MetadataQueryJobKey.forMetadataJobKey, Option(key.key), None, None) + def forKey(key: MetadataKey): MetadataQuery = { + MetadataQuery(key.workflowId, key.jobKey map MetadataQueryJobKey.forMetadataJobKey, Option(key.key), None, None, expandSubWorkflows = false) } } diff --git a/services/src/main/scala/cromwell/services/metadata/MetadataService.scala b/services/src/main/scala/cromwell/services/metadata/MetadataService.scala index cce4f6eb1..daf567c7e 100644 --- a/services/src/main/scala/cromwell/services/metadata/MetadataService.scala +++ b/services/src/main/scala/cromwell/services/metadata/MetadataService.scala @@ -2,23 +2,25 @@ package cromwell.services.metadata import java.time.OffsetDateTime -import akka.actor.{ActorRef, DeadLetterSuppression} -import cromwell.core.{JobKey, WorkflowId, WorkflowState} +import akka.actor.ActorRef +import cats.data.NonEmptyList +import cromwell.core.{FullyQualifiedName, JobKey, WorkflowId, WorkflowState} import cromwell.services.ServiceRegistryActor.ServiceRegistryMessage -import wdl4s.values._ +import lenthall.exception.{MessageAggregation, ThrowableAggregation} +import wdl4s.wdl.values._ + +import scala.util.Random -import scala.language.postfixOps -import scalaz.NonEmptyList object MetadataService { final val MetadataServiceName = "MetadataService" - case class WorkflowQueryResult(id: String, name: Option[String], status: Option[String], start: Option[OffsetDateTime], end: Option[OffsetDateTime]) + final case class WorkflowQueryResult(id: String, name: Option[String], status: Option[String], start: Option[OffsetDateTime], end: Option[OffsetDateTime]) - case class WorkflowQueryResponse(results: Seq[WorkflowQueryResult]) + final case class WorkflowQueryResponse(results: Seq[WorkflowQueryResult]) - case class QueryMetadata(page: Option[Int], pageSize: Option[Int], totalRecords: Option[Int]) + final case class QueryMetadata(page: Option[Int], pageSize: Option[Int], totalRecords: Option[Int]) trait MetadataServiceMessage /** @@ -46,16 +48,29 @@ object MetadataService { } serviceRegistryActor ! PutMetadataAction(events) } + + def putMetadataWithRawKey(workflowId: WorkflowId, jobKey: Option[(FullyQualifiedName, Option[Int], Int)], keyValue: Map[String, Any]) = { + val metadataJobKey = jobKey map { case (fullyQualifiedName, index, attempt) => MetadataJobKey(fullyQualifiedName, index, attempt) } + + val events = keyValue map { case (key, value) => + val metadataKey = MetadataKey(workflowId, metadataJobKey, key) + MetadataEvent(metadataKey, MetadataValue(value)) + } + serviceRegistryActor ! PutMetadataAction(events) + } } } - case class PutMetadataAction(events: Iterable[MetadataEvent]) extends MetadataServiceAction + final case class PutMetadataAction(events: Iterable[MetadataEvent]) extends MetadataServiceAction + final case class PutMetadataActionAndRespond(events: Iterable[MetadataEvent], replyTo: ActorRef) extends MetadataServiceAction + case class GetSingleWorkflowMetadataAction(workflowId: WorkflowId, includeKeysOption: Option[NonEmptyList[String]], - excludeKeysOption: Option[NonEmptyList[String]]) + excludeKeysOption: Option[NonEmptyList[String]], + expandSubWorkflows: Boolean) extends ReadAction case class GetMetadataQueryAction(key: MetadataQuery) extends ReadAction case class GetStatus(workflowId: WorkflowId) extends ReadAction - case class WorkflowQuery[A](uri: A, parameters: Seq[(String, String)]) extends ReadAction + case class WorkflowQuery(parameters: Seq[(String, String)]) extends ReadAction case class WorkflowOutputs(workflowId: WorkflowId) extends ReadAction case class GetLogs(workflowId: WorkflowId) extends ReadAction case object RefreshSummary extends MetadataServiceAction @@ -65,8 +80,8 @@ object MetadataService { def onUnrecognized(possibleWorkflowId: String): Unit def onFailure(possibleWorkflowId: String, throwable: Throwable): Unit } - final case class ValidateWorkflowIdAndExecute(possibleWorkflowId: String, - validationCallback: ValidationCallback) extends MetadataServiceAction + + final case class ValidateWorkflowId(possibleWorkflowId: WorkflowId) extends MetadataServiceAction /** * Responses @@ -76,18 +91,11 @@ object MetadataService { def reason: Throwable } - case class MetadataPutAcknowledgement(putRequest: PutMetadataAction) extends MetadataServiceResponse with DeadLetterSuppression - case class MetadataPutFailed(putRequest: PutMetadataAction, reason: Throwable) extends MetadataServiceFailure - - case class MetadataLookupResponse(query: MetadataQuery, eventList: Seq[MetadataEvent]) extends MetadataServiceResponse - case class MetadataServiceKeyLookupFailed(query: MetadataQuery, reason: Throwable) extends MetadataServiceFailure - - case class StatusLookupResponse(workflowId: WorkflowId, status: WorkflowState) extends MetadataServiceResponse - case class StatusLookupFailed(workflowId: WorkflowId, reason: Throwable) extends MetadataServiceFailure + final case class MetadataLookupResponse(query: MetadataQuery, eventList: Seq[MetadataEvent]) extends MetadataServiceResponse + final case class MetadataServiceKeyLookupFailed(query: MetadataQuery, reason: Throwable) extends MetadataServiceFailure - final case class WorkflowQuerySuccess[A](uri: A, response: WorkflowQueryResponse, meta: Option[QueryMetadata]) - extends MetadataServiceResponse - final case class WorkflowQueryFailure(reason: Throwable) extends MetadataServiceFailure + final case class StatusLookupResponse(workflowId: WorkflowId, status: WorkflowState) extends MetadataServiceResponse + final case class StatusLookupFailed(workflowId: WorkflowId, reason: Throwable) extends MetadataServiceFailure final case class WorkflowOutputsResponse(id: WorkflowId, outputs: Seq[MetadataEvent]) extends MetadataServiceResponse final case class WorkflowOutputsFailure(id: WorkflowId, reason: Throwable) extends MetadataServiceFailure @@ -95,6 +103,18 @@ object MetadataService { final case class LogsResponse(id: WorkflowId, logs: Seq[MetadataEvent]) extends MetadataServiceResponse final case class LogsFailure(id: WorkflowId, reason: Throwable) extends MetadataServiceFailure + final case class MetadataWriteSuccess(events: Iterable[MetadataEvent]) extends MetadataServiceResponse + final case class MetadataWriteFailure(reason: Throwable, events: Iterable[MetadataEvent]) extends MetadataServiceFailure + + sealed abstract class WorkflowValidationResponse extends MetadataServiceResponse + case object RecognizedWorkflowId extends WorkflowValidationResponse + case object UnrecognizedWorkflowId extends WorkflowValidationResponse + final case class FailedToCheckWorkflowId(cause: Throwable) extends WorkflowValidationResponse + + sealed abstract class MetadataQueryResponse extends MetadataServiceResponse + final case class WorkflowQuerySuccess(response: WorkflowQueryResponse, meta: Option[QueryMetadata]) extends MetadataQueryResponse + final case class WorkflowQueryFailure(reason: Throwable) extends MetadataQueryResponse + def wdlValueToMetadataEvents(metadataKey: MetadataKey, wdlValue: WdlValue): Iterable[MetadataEvent] = wdlValue match { case WdlArray(_, valueSeq) => if (valueSeq.isEmpty) { @@ -109,14 +129,52 @@ object MetadataService { } else { valueMap.toList flatMap { case (key, value) => wdlValueToMetadataEvents(metadataKey.copy(key = metadataKey.key + s":${key.valueString}"), value) } } + case WdlOptionalValue(_, Some(value)) => + wdlValueToMetadataEvents(metadataKey, value) + case WdlPair(left, right) => + wdlValueToMetadataEvents(metadataKey.copy(key = metadataKey.key + ":left"), left) ++ + wdlValueToMetadataEvents(metadataKey.copy(key = metadataKey.key + ":right"), right) case value => List(MetadataEvent(metadataKey, MetadataValue(value))) } - def throwableToMetadataEvents(metadataKey: MetadataKey, t: Throwable): List[MetadataEvent] = { - val message = List(MetadataEvent(metadataKey.copy(key = s"${metadataKey.key}:message"), MetadataValue(t.getMessage))) - val cause = Option(t.getCause) map { cause => throwableToMetadataEvents(metadataKey.copy(key = s"${metadataKey.key}:causedBy"), cause) } getOrElse List.empty - message ++ cause + def throwableToMetadataEvents(metadataKey: MetadataKey, t: Throwable, failureIndex: Int = Random.nextInt(Int.MaxValue)): List[MetadataEvent] = { + val emptyCauseList = List(MetadataEvent.empty(metadataKey.copy(key = metadataKey.key + s"[$failureIndex]:causedBy[]"))) + val metadataKeyAndFailureIndex = s"${metadataKey.key}[$failureIndex]" + + t match { + case aggregation: ThrowableAggregation => + val message = List(MetadataEvent(metadataKey.copy(key = s"$metadataKeyAndFailureIndex:message"), MetadataValue(aggregation.exceptionContext))) + val indexedCauses = aggregation.throwables.toList.zipWithIndex + val indexedCauseEvents = if (indexedCauses.nonEmpty) { + indexedCauses flatMap { case (cause, index) => + val causeKey = metadataKey.copy(key = s"$metadataKeyAndFailureIndex:causedBy") + throwableToMetadataEvents(causeKey, cause, index) + } + } else { + emptyCauseList + } + message ++ indexedCauseEvents + case aggregation: MessageAggregation => + val message = List(MetadataEvent(metadataKey.copy(key = s"$metadataKeyAndFailureIndex:message"), MetadataValue(aggregation.exceptionContext))) + val indexedCauses = aggregation.errorMessages.toList.zipWithIndex + val indexedCauseEvents = if (indexedCauses.nonEmpty) { + indexedCauses flatMap { case (cause, index) => + val causeMessageKey = metadataKey.copy(key = s"$metadataKeyAndFailureIndex:causedBy[$index]:message") + val causeCausedByKey = metadataKey.copy(key = s"$metadataKeyAndFailureIndex:causedBy[$index]:causedBy[]") + List(MetadataEvent(causeMessageKey, MetadataValue(cause)), MetadataEvent.empty(causeCausedByKey)) + } + } else { + emptyCauseList + } + message ++ indexedCauseEvents + + case other @ _ => + val message = List(MetadataEvent(metadataKey.copy(key = s"$metadataKeyAndFailureIndex:message"), MetadataValue(t.getMessage))) + val causeKey = metadataKey.copy(key = s"$metadataKeyAndFailureIndex:causedBy") + val cause = Option(t.getCause) map { cause => throwableToMetadataEvents(causeKey, cause, 0) } getOrElse emptyCauseList + message ++ cause + } } } diff --git a/services/src/main/scala/cromwell/services/metadata/WorkflowQueryKey.scala b/services/src/main/scala/cromwell/services/metadata/WorkflowQueryKey.scala index 0cdba09bd..1e3847712 100644 --- a/services/src/main/scala/cromwell/services/metadata/WorkflowQueryKey.scala +++ b/services/src/main/scala/cromwell/services/metadata/WorkflowQueryKey.scala @@ -2,15 +2,19 @@ package cromwell.services.metadata import java.time.OffsetDateTime -import cromwell.core.{ErrorOr, WorkflowId, WorkflowState} +import cats.data +import cats.syntax.traverse._ +import cats.syntax.validated._ +import cromwell.core.labels.Label +import cromwell.core.{WorkflowId, WorkflowState} +import lenthall.validation.ErrorOr._ +import cats.data.Validated._ +import cats.instances.list._ -import scala.language.postfixOps import scala.util.{Success, Try} -import scalaz.Scalaz._ -import scalaz.ValidationNel object WorkflowQueryKey { - val ValidKeys = Set(StartDate, EndDate, Name, Id, Status, Page, PageSize) map { _.name } + val ValidKeys = Set(StartDate, EndDate, Name, Id, Status, LabelKeyValue, Page, PageSize) map { _.name } case object StartDate extends DateTimeWorkflowQueryKey { override val name = "Start" @@ -32,41 +36,57 @@ object WorkflowQueryKey { override def displayName = "page size" } - case object Name extends SeqStringWorkflowQueryKey { + case object Name extends SeqWorkflowQueryKey[String] { override val name = "Name" - override def validate(grouped: Map[String, Seq[(String, String)]]): ErrorOr[Seq[String]] = { + override def validate(grouped: Map[String, Seq[(String, String)]]): ErrorOr[List[String]] = { + val values = valuesFromMap(grouped).toList + val nels:List[data.ValidatedNel[String,String]] = values map { + case Patterns.WorkflowName(n) => n.validNel[String] + case v => v.invalidNel[String] + } + sequenceListOfValidatedNels("Name values do not match allowed workflow naming pattern", nels) + } + } + case object LabelKeyValue extends SeqWorkflowQueryKey[Label] { + override val name = "Label" + + override def validate(grouped: Map[String, Seq[(String, String)]]): ErrorOr[List[Label]] = { val values = valuesFromMap(grouped).toList - val nels = values map { - case Patterns.WorkflowName(n) => n.successNel[String] - case v => v.failureNel + + def validateLabelRegex(labelKeyValue: String): ErrorOr[Label] = { + labelKeyValue.split("\\:", 2) match { + case Array(k, v) => Label.validateLabel(k, v) + case other @ _ => s"$labelKeyValue".invalidNel + } } - sequenceListOfValidationNels(s"Name values do not match allowed workflow naming pattern", nels) + val nels: List[ErrorOr[Label]] = values map validateLabelRegex + sequenceListOfValidatedNels("Label values do not match allowed pattern label-key:label-value", nels) } } - case object Id extends SeqStringWorkflowQueryKey { + case object Id extends SeqWorkflowQueryKey[String] { override val name = "Id" - override def validate(grouped: Map[String, Seq[(String, String)]]): ErrorOr[Seq[String]] = { + override def validate(grouped: Map[String, Seq[(String, String)]]): ErrorOr[List[String]] = { val values = valuesFromMap(grouped).toList val nels = values map { v => - if (Try(WorkflowId.fromString(v.toLowerCase.capitalize)).isSuccess) v.successNel[String] else v.failureNel + if (Try(WorkflowId.fromString(v.toLowerCase.capitalize)).isSuccess) v.validNel[String] else v.invalidNel[String] } - sequenceListOfValidationNels(s"Id values do match allowed workflow id pattern", nels) + sequenceListOfValidatedNels("Id values do match allowed workflow id pattern", nels) } } - case object Status extends SeqStringWorkflowQueryKey { + case object Status extends SeqWorkflowQueryKey[String] { override val name = "Status" - override def validate(grouped: Map[String, Seq[(String, String)]]): ErrorOr[Seq[String]] = { + override def validate(grouped: Map[String, Seq[(String, String)]]): ErrorOr[List[String]] = { val values = valuesFromMap(grouped).toList val nels = values map { v => - if (Try(WorkflowState.fromString(v.toLowerCase.capitalize)).isSuccess) v.successNel[String] else v.failureNel + if (Try(WorkflowState.withName(v.toLowerCase.capitalize)).isSuccess) v.validNel[String] else v.invalidNel[String] } - sequenceListOfValidationNels("Unrecognized status values", nels) + sequenceListOfValidatedNels("Unrecognized status values", nels) } } } @@ -83,25 +103,25 @@ sealed trait DateTimeWorkflowQueryKey extends WorkflowQueryKey[Option[OffsetDate override def validate(grouped: Map[String, Seq[(String, String)]]): ErrorOr[Option[OffsetDateTime]] = { valuesFromMap(grouped) match { case vs if vs.size > 1 => - s"Found ${vs.size} values for key '$name' but at most one is allowed.".failureNel - case Nil => None.successNel + s"Found ${vs.size} values for key '$name' but at most one is allowed.".invalidNel[Option[OffsetDateTime]] + case Nil => None.validNel[String] case v :: Nil => Try(OffsetDateTime.parse(v)) match { - case Success(dt) => Option(dt).successNel - case _ => s"Value given for $displayName does not parse as a datetime: $v".failureNel + case Success(dt) => Option(dt).validNel[String] + case _ => s"Value given for $displayName does not parse as a datetime: $v".invalidNel[Option[OffsetDateTime]] } } } def displayName: String } -sealed trait SeqStringWorkflowQueryKey extends WorkflowQueryKey[Seq[String]] { - /** `sequence` the `List[ErrorOr[String]]` to a single `ErrorOr[List[String]]` */ - protected def sequenceListOfValidationNels(prefix: String, errorOrList: List[ErrorOr[String]]): ErrorOr[List[String]] = { - val errorOr = errorOrList.sequence[ErrorOr, String] +sealed trait SeqWorkflowQueryKey[A] extends WorkflowQueryKey[Seq[A]] { + /** `sequence` the `List[ErrorOr[A]]` to a single `ErrorOr[List[A]]` */ + protected def sequenceListOfValidatedNels(prefix: String, errorOrList: List[ErrorOr[A]]): ErrorOr[List[A]] = { + val errorOr = errorOrList.sequence[ErrorOr, A] // With a leftMap, prepend an error message to the concatenated error values if there are error values. - // This turns the ValidationNel into a Validation, force it back to a ValidationNel with toValidationNel. - errorOr.leftMap(prefix + ": " + _.list.toList.mkString(", ")).toValidationNel + // This turns the ValidatedNel into a Validated, force it back to a ValidatedNel with toValidationNel. + errorOr.leftMap(prefix + ": " + _.toList.mkString(", ")).toValidatedNel } } @@ -109,12 +129,12 @@ sealed trait IntWorkflowQueryKey extends WorkflowQueryKey[Option[Int]] { override def validate(grouped: Map[String, Seq[(String, String)]]): ErrorOr[Option[Int]] = { valuesFromMap(grouped) match { case vs if vs.size > 1 => - s"Found ${vs.size} values for key '$name' but at most one is allowed.".failureNel - case Nil => None.successNel + s"Found ${vs.size} values for key '$name' but at most one is allowed.".invalidNel[Option[Int]] + case Nil => None.validNel case v :: Nil => Try(v.toInt) match { - case Success(intVal) => if (intVal > 0) Option(intVal).successNel else s"Integer value not greater than 0".failureNel - case _ => s"Value given for $displayName does not parse as a integer: $v".failureNel + case Success(intVal) => if (intVal > 0) Option(intVal).validNel else s"Integer value not greater than 0".invalidNel[Option[Int]] + case _ => s"Value given for $displayName does not parse as a integer: $v".invalidNel[Option[Int]] } } } diff --git a/services/src/main/scala/cromwell/services/metadata/WorkflowQueryParameters.scala b/services/src/main/scala/cromwell/services/metadata/WorkflowQueryParameters.scala index a7952d1b4..ba2c7256d 100644 --- a/services/src/main/scala/cromwell/services/metadata/WorkflowQueryParameters.scala +++ b/services/src/main/scala/cromwell/services/metadata/WorkflowQueryParameters.scala @@ -2,17 +2,18 @@ package cromwell.services.metadata import java.time.OffsetDateTime +import cats.data.Validated._ +import cats.syntax.cartesian._ +import cats.syntax.validated._ import cromwell.core.WorkflowId +import cromwell.core.labels.Label import cromwell.services.metadata.WorkflowQueryKey._ - -import scala.language.postfixOps -import scalaz.Scalaz._ -import scalaz.{Name => _, _} - +import lenthall.validation.ErrorOr._ case class WorkflowQueryParameters private(statuses: Set[String], names: Set[String], ids: Set[WorkflowId], + labels: Set[Label], startDate: Option[OffsetDateTime], endDate: Option[OffsetDateTime], page: Option[Int], @@ -20,7 +21,7 @@ case class WorkflowQueryParameters private(statuses: Set[String], object WorkflowQueryParameters { - private def validateStartBeforeEnd(start: Option[OffsetDateTime], end: Option[OffsetDateTime]): ValidationNel[String, Unit] = { + private def validateStartBeforeEnd(start: Option[OffsetDateTime], end: Option[OffsetDateTime]): ErrorOr[Unit] = { // Invert the notion of success/failure here to only "successfully" generate an error message if // both start and end dates have been specified and start is after end. val startAfterEndError = for { @@ -30,10 +31,10 @@ object WorkflowQueryParameters { } yield s"Specified start date is after specified end date: start: $s, end: $e" // If the Option is defined this represents a failure, if it's empty this is a success. - startAfterEndError map { _.failureNel } getOrElse ().successNel + startAfterEndError map { _.invalidNel } getOrElse ().validNel } - private def validateOnlyRecognizedKeys(rawParameters: Seq[(String, String)]): ValidationNel[String, Unit] = { + private def validateOnlyRecognizedKeys(rawParameters: Seq[(String, String)]): ErrorOr[Unit] = { // Create a map of keys by canonical capitalization (capitalized first letter, lowercase everything else). // The values are the keys capitalized as actually given to the API, which is what will be used in any // error messages. @@ -43,8 +44,8 @@ object WorkflowQueryParameters { keysByCanonicalCapitalization.keys.toSet -- WorkflowQueryKey.ValidKeys match { case set if set.nonEmpty => val unrecognized = set flatMap keysByCanonicalCapitalization - ("Unrecognized query keys: " + unrecognized.mkString(", ")).failureNel - case _ => ().successNel + ("Unrecognized query keys: " + unrecognized.mkString(", ")).invalidNel + case _ => ().validNel } } @@ -52,7 +53,7 @@ object WorkflowQueryParameters { * Run the validation logic over the specified raw parameters, creating a `WorkflowQueryParameters` if all * validation succeeds, otherwise accumulate all validation messages within the `ValidationNel`. */ - private [metadata] def runValidation(rawParameters: Seq[(String, String)]): ValidationNel[String, WorkflowQueryParameters] = { + private [metadata] def runValidation(rawParameters: Seq[(String, String)]): ErrorOr[WorkflowQueryParameters] = { val onlyRecognizedKeys = validateOnlyRecognizedKeys(rawParameters) @@ -60,29 +61,32 @@ object WorkflowQueryParameters { rawParameters groupBy { case (key, _) => key.toLowerCase.capitalize } val Seq(startDate, endDate) = Seq(StartDate, EndDate) map { _.validate(valuesByCanonicalCapitalization) } + val Seq(statuses, names, ids) = Seq(Status, Name, WorkflowQueryKey.Id) map { _.validate(valuesByCanonicalCapitalization) } + val Seq(labels) = Seq(WorkflowQueryKey.LabelKeyValue) map { _.validate(valuesByCanonicalCapitalization) } + val Seq(page, pageSize) = Seq(Page, PageSize) map { _.validate(valuesByCanonicalCapitalization) } // Only validate start before end if both of the individual date parsing validations have already succeeded. val startBeforeEnd = (startDate, endDate) match { - case (Success(s), Success(e)) => validateStartBeforeEnd(s, e) - case _ => ().successNel[String] + case (Valid(s), Valid(e)) => validateStartBeforeEnd(s, e) + case _ => ().validNel[String] } - (onlyRecognizedKeys |@| startBeforeEnd |@| statuses |@| names |@| ids |@| startDate |@| endDate |@| page |@| pageSize) { - case (_, _, status, name, uuid, start, end, _page, _pageSize) => + (onlyRecognizedKeys |@| startBeforeEnd |@| statuses |@| names |@| ids |@| labels |@| startDate |@| endDate |@| page |@| pageSize) map { + case (_, _, status, name, uuid, label, start, end, _page, _pageSize) => val workflowId = uuid map WorkflowId.fromString - WorkflowQueryParameters(status.toSet, name.toSet, workflowId.toSet, start, end, _page, _pageSize) + WorkflowQueryParameters(status.toSet, name.toSet, workflowId.toSet, label.toSet, start, end, _page, _pageSize) } } def apply(rawParameters: Seq[(String, String)]): WorkflowQueryParameters = { runValidation(rawParameters) match { - case Success(queryParameters) => queryParameters - case Failure(x) => throw new IllegalArgumentException(x.list.toList.mkString("\n")) + case Valid(queryParameters) => queryParameters + case Invalid(x) => throw new IllegalArgumentException(x.toList.mkString("\n")) } } } diff --git a/services/src/main/scala/cromwell/services/metadata/impl/MetadataDatabaseAccess.scala b/services/src/main/scala/cromwell/services/metadata/impl/MetadataDatabaseAccess.scala index dfd14c94a..6e1b356df 100644 --- a/services/src/main/scala/cromwell/services/metadata/impl/MetadataDatabaseAccess.scala +++ b/services/src/main/scala/cromwell/services/metadata/impl/MetadataDatabaseAccess.scala @@ -1,28 +1,28 @@ package cromwell.services.metadata.impl -import java.time.OffsetDateTime - +import cats.Semigroup +import cats.data.NonEmptyList +import cats.syntax.semigroup._ import cromwell.core.{WorkflowId, WorkflowMetadataKeys, WorkflowState} import cromwell.database.sql.SqlConverters._ +import cromwell.database.sql.joins.{CallOrWorkflowQuery, CallQuery, WorkflowQuery} import cromwell.database.sql.tables.{MetadataEntry, WorkflowMetadataSummaryEntry} import cromwell.services.ServicesStore import cromwell.services.metadata.MetadataService.{QueryMetadata, WorkflowQueryResponse} import cromwell.services.metadata._ import scala.concurrent.{ExecutionContext, Future} -import scalaz.Scalaz._ -import scalaz.{NonEmptyList, Semigroup} object MetadataDatabaseAccess { private lazy val WorkflowMetadataSummarySemigroup = new Semigroup[WorkflowMetadataSummaryEntry] { - override def append(summary1: WorkflowMetadataSummaryEntry, - summary2: => WorkflowMetadataSummaryEntry): WorkflowMetadataSummaryEntry = { + override def combine(summary1: WorkflowMetadataSummaryEntry, + summary2: WorkflowMetadataSummaryEntry): WorkflowMetadataSummaryEntry = { // Resolve the status if both `this` and `that` have defined statuses. This will evaluate to `None` // if one or both of the statuses is not defined. val resolvedStatus = for { - thisStatus <- summary1.workflowStatus map WorkflowState.fromString - thatStatus <- summary2.workflowStatus map WorkflowState.fromString + thisStatus <- summary1.workflowStatus map WorkflowState.withName + thatStatus <- summary2.workflowStatus map WorkflowState.withName } yield (thisStatus |+| thatStatus).toString WorkflowMetadataSummaryEntry( @@ -37,16 +37,17 @@ object MetadataDatabaseAccess { def baseSummary(workflowUuid: String) = WorkflowMetadataSummaryEntry(workflowUuid, None, None, None, None, None) - private implicit class MetadatumEnhancer(val metadatum: MetadataEntry) extends AnyVal { + // If visibility is made `private`, there's a bogus warning about this being unused. + implicit class MetadatumEnhancer(val metadatum: MetadataEntry) extends AnyVal { def toSummary: WorkflowMetadataSummaryEntry = { val base = baseSummary(metadatum.workflowExecutionUuid) metadatum.metadataKey match { - case WorkflowMetadataKeys.Name => base.copy(workflowName = metadatum.metadataValue) - case WorkflowMetadataKeys.Status => base.copy(workflowStatus = metadatum.metadataValue) + case WorkflowMetadataKeys.Name => base.copy(workflowName = metadatum.metadataValue.toRawStringOption) + case WorkflowMetadataKeys.Status => base.copy(workflowStatus = metadatum.metadataValue.toRawStringOption) case WorkflowMetadataKeys.StartTime => - base.copy(startTimestamp = metadatum.metadataValue map OffsetDateTime.parse map { _.toSystemTimestamp }) + base.copy(startTimestamp = metadatum.metadataValue.parseSystemTimestampOption) case WorkflowMetadataKeys.EndTime => - base.copy(endTimestamp = metadatum.metadataValue map OffsetDateTime.parse map { _.toSystemTimestamp }) + base.copy(endTimestamp = metadatum.metadataValue.parseSystemTimestampOption) } } } @@ -76,10 +77,9 @@ trait MetadataDatabaseAccess { val value = metadataEvent.value map { _.value } val valueType = metadataEvent.value map { _.valueType.typeName } val jobKey = key.jobKey map { jk => (jk.callFqn, jk.index, jk.attempt) } - MetadataEntry( - workflowUuid, jobKey.map(_._1), jobKey.flatMap(_._2), jobKey.map(_._3), key.key, value, valueType, timestamp) + MetadataEntry(workflowUuid, jobKey.map(_._1), jobKey.flatMap(_._2), jobKey.map(_._3), + key.key, value.toClobOption, valueType, timestamp) } - databaseInterface.addMetadataEntries(metadata) } @@ -92,10 +92,9 @@ trait MetadataDatabaseAccess { } yield MetadataJobKey(callFqn, m.jobIndex, attempt) val key = MetadataKey(workflowId, metadataJobKey, m.metadataKey) - val value = for { - mValue <- m.metadataValue - mType <- m.metadataValueType - } yield MetadataValue(mValue, MetadataType.fromString(mType)) + val value = m.metadataValueType.map(mType => + MetadataValue(m.metadataValue.toRawString, MetadataType.fromString(mType)) + ) MetadataEvent(key, value, m.metadataTimestamp.toSystemOffsetDateTime) } @@ -105,23 +104,28 @@ trait MetadataDatabaseAccess { val uuid = query.workflowId.id.toString val futureMetadata: Future[Seq[MetadataEntry]] = query match { - case MetadataQuery(_, None, None, None, None) => databaseInterface.queryMetadataEntries(uuid) - case MetadataQuery(_, None, Some(key), None, None) => databaseInterface.queryMetadataEntries(uuid, key) - case MetadataQuery(_, Some(jobKey), None, None, None) => + case MetadataQuery(_, None, None, None, None, _) => databaseInterface.queryMetadataEntries(uuid) + case MetadataQuery(_, None, Some(key), None, None, _) => databaseInterface.queryMetadataEntries(uuid, key) + case MetadataQuery(_, Some(jobKey), None, None, None, _) => databaseInterface.queryMetadataEntries(uuid, jobKey.callFqn, jobKey.index, jobKey.attempt) - case MetadataQuery(_, Some(jobKey), Some(key), None, None) => + case MetadataQuery(_, Some(jobKey), Some(key), None, None, _) => databaseInterface.queryMetadataEntries(uuid, key, jobKey.callFqn, jobKey.index, jobKey.attempt) - case MetadataQuery(_, None, None, Some(includeKeys), None) => + case MetadataQuery(_, None, None, Some(includeKeys), None, _) => + databaseInterface. + queryMetadataEntriesLikeMetadataKeys(uuid, includeKeys.map(_ + "%"), CallOrWorkflowQuery) + case MetadataQuery(_, Some(MetadataQueryJobKey(callFqn, index, attempt)), None, Some(includeKeys), None, _) => databaseInterface. - queryMetadataEntriesLikeMetadataKeys(uuid, includeKeys.map(_ + "%"), requireEmptyJobKey = false) - case MetadataQuery(_, None, None, None, Some(excludeKeys)) => + queryMetadataEntriesLikeMetadataKeys(uuid, includeKeys.map(_ + "%"), CallQuery(callFqn, index, attempt)) + case MetadataQuery(_, None, None, None, Some(excludeKeys), _) => databaseInterface. - queryMetadataEntryNotLikeMetadataKeys(uuid, excludeKeys.map(_ + "%"), requireEmptyJobKey = false) - case MetadataQuery(_, None, None, Some(includeKeys), Some(excludeKeys)) => Future.failed( + queryMetadataEntryNotLikeMetadataKeys(uuid, excludeKeys.map(_ + "%"), CallOrWorkflowQuery) + case MetadataQuery(_, Some(MetadataQueryJobKey(callFqn, index, attempt)), None, None, Some(excludeKeys), _) => + databaseInterface. + queryMetadataEntryNotLikeMetadataKeys(uuid, excludeKeys.map(_ + "%"), CallQuery(callFqn, index, attempt)) + case MetadataQuery(_, None, None, Some(includeKeys), Some(excludeKeys), _) => Future.failed( new IllegalArgumentException( s"Include/Exclude keys may not be mixed: include = $includeKeys, exclude = $excludeKeys")) - case invalidQuery => Future.failed(new IllegalArgumentException( - s"Include/Exclude keys are only supported when querying the workflow, not when querying calls: $invalidQuery")) + case _ => Future.failed(new IllegalArgumentException(s"Invalid MetadataQuery: $query")) } futureMetadata map metadataToMetadataEvents(query.workflowId) @@ -131,7 +135,7 @@ trait MetadataDatabaseAccess { (implicit ec: ExecutionContext): Future[Seq[MetadataEvent]] = { val uuid = id.id.toString databaseInterface.queryMetadataEntriesLikeMetadataKeys( - uuid, s"${WorkflowMetadataKeys.Outputs}:%".wrapNel, requireEmptyJobKey = true). + uuid, NonEmptyList.of(s"${WorkflowMetadataKeys.Outputs}:%"), WorkflowQuery). map(metadataToMetadataEvents(id)) } @@ -139,19 +143,19 @@ trait MetadataDatabaseAccess { (implicit ec: ExecutionContext): Future[Seq[MetadataEvent]] = { import cromwell.services.metadata.CallMetadataKeys._ - val keys = NonEmptyList(Stdout, Stderr, BackendLogsPrefix + ":%") - databaseInterface.queryMetadataEntriesLikeMetadataKeys(id.id.toString, keys, requireEmptyJobKey = false) map + val keys = NonEmptyList.of(Stdout, Stderr, BackendLogsPrefix + ":%") + databaseInterface.queryMetadataEntriesLikeMetadataKeys(id.id.toString, keys, CallOrWorkflowQuery) map metadataToMetadataEvents(id) } def refreshWorkflowMetadataSummaries()(implicit ec: ExecutionContext): Future[Long] = { databaseInterface.refreshMetadataSummaryEntries(WorkflowMetadataKeys.StartTime, WorkflowMetadataKeys.EndTime, WorkflowMetadataKeys.Name, - WorkflowMetadataKeys.Status, MetadataDatabaseAccess.buildUpdatedSummary) + WorkflowMetadataKeys.Status, WorkflowMetadataKeys.Labels, MetadataDatabaseAccess.buildUpdatedSummary) } def getWorkflowStatus(id: WorkflowId) (implicit ec: ExecutionContext): Future[Option[WorkflowState]] = { - databaseInterface.getWorkflowStatus(id.toString) map { _ map WorkflowState.fromString } + databaseInterface.getWorkflowStatus(id.toString) map { _ map WorkflowState.withName } } def workflowExistsWithId(possibleWorkflowId: String)(implicit ec: ExecutionContext): Future[Boolean] = { @@ -160,24 +164,22 @@ trait MetadataDatabaseAccess { def queryWorkflowSummaries(queryParameters: WorkflowQueryParameters) (implicit ec: ExecutionContext): Future[(WorkflowQueryResponse, Option[QueryMetadata])] = { + + val labelsToQuery = queryParameters.labels.map(label => (label.key, label.value)) + val workflowSummaries = databaseInterface.queryWorkflowSummaries( - queryParameters.statuses, queryParameters.names, queryParameters.ids.map(_.toString), + queryParameters.statuses, queryParameters.names, queryParameters.ids.map(_.toString), labelsToQuery, queryParameters.startDate.map(_.toSystemTimestamp), queryParameters.endDate.map(_.toSystemTimestamp), queryParameters.page, queryParameters.pageSize) val workflowSummaryCount = databaseInterface.countWorkflowSummaries( - queryParameters.statuses, queryParameters.names, queryParameters.ids.map(_.toString), + queryParameters.statuses, queryParameters.names, queryParameters.ids.map(_.toString), queryParameters.labels.map(label => (label.key, label.value)), queryParameters.startDate.map(_.toSystemTimestamp), queryParameters.endDate.map(_.toSystemTimestamp)) workflowSummaryCount flatMap { count => workflowSummaries map { workflows => (WorkflowQueryResponse(workflows.toSeq map { workflow => - MetadataService.WorkflowQueryResult( - id = workflow.workflowExecutionUuid, - name = workflow.workflowName, - status = workflow.workflowStatus, - start = workflow.startTimestamp map { _.toSystemOffsetDateTime }, - end = workflow.endTimestamp map { _.toSystemOffsetDateTime }) + MetadataService.WorkflowQueryResult(id = workflow.workflowExecutionUuid, name = workflow.workflowName, status = workflow.workflowStatus, start = workflow.startTimestamp map { _.toSystemOffsetDateTime }, end = workflow.endTimestamp map { _.toSystemOffsetDateTime }) }), //only return metadata if page is defined queryParameters.page map { _ => QueryMetadata(queryParameters.page, queryParameters.pageSize, Option(count)) }) diff --git a/services/src/main/scala/cromwell/services/metadata/impl/MetadataServiceActor.scala b/services/src/main/scala/cromwell/services/metadata/impl/MetadataServiceActor.scala index 48a85b71e..2312d0df9 100644 --- a/services/src/main/scala/cromwell/services/metadata/impl/MetadataServiceActor.scala +++ b/services/src/main/scala/cromwell/services/metadata/impl/MetadataServiceActor.scala @@ -1,44 +1,71 @@ package cromwell.services.metadata.impl -import java.time.OffsetDateTime -import java.util.UUID -import akka.actor.{Actor, ActorLogging, ActorRef, Props} +import akka.actor.SupervisorStrategy.{Decider, Directive, Escalate, Resume} +import akka.actor.{Actor, ActorContext, ActorInitializationException, ActorLogging, ActorRef, Cancellable, OneForOneStrategy, Props} +import cats.data.NonEmptyList import com.typesafe.config.{Config, ConfigFactory} +import cromwell.core.Dispatcher.ServiceDispatcher import cromwell.core.WorkflowId import cromwell.services.SingletonServicesStore -import cromwell.services.metadata.MetadataService.{PutMetadataAction, ReadAction, RefreshSummary, ValidateWorkflowIdAndExecute} +import cromwell.services.metadata.MetadataService._ import cromwell.services.metadata.impl.MetadataServiceActor._ import cromwell.services.metadata.impl.MetadataSummaryRefreshActor.{MetadataSummaryFailure, MetadataSummarySuccess, SummarizeMetadata} -import lenthall.config.ScalaConfig._ +import cromwell.services.metadata.impl.WriteMetadataActor.CheckPendingWrites +import cromwell.util.GracefulShutdownHelper +import cromwell.util.GracefulShutdownHelper.ShutdownCommand +import net.ceedubs.ficus.Ficus._ -import scala.concurrent.duration.{Duration, FiniteDuration} +import scala.concurrent.duration._ import scala.language.postfixOps -import scala.util.{Failure, Success, Try} +import scala.util.{Failure, Success} object MetadataServiceActor { val MetadataSummaryRefreshInterval: Option[FiniteDuration] = { - val duration = Duration(ConfigFactory.load().getStringOr("services.MetadataService.metadata-summary-refresh-interval", "2 seconds")) + val duration = Duration(ConfigFactory.load().as[Option[String]]("services.MetadataService.config.metadata-summary-refresh-interval").getOrElse("2 seconds")) if (duration.isFinite()) Option(duration.asInstanceOf[FiniteDuration]) else None } - def props(serviceConfig: Config, globalConfig: Config) = Props(MetadataServiceActor(serviceConfig, globalConfig)) + def props(serviceConfig: Config, globalConfig: Config) = Props(MetadataServiceActor(serviceConfig, globalConfig)).withDispatcher(ServiceDispatcher) } case class MetadataServiceActor(serviceConfig: Config, globalConfig: Config) - extends Actor with ActorLogging with MetadataDatabaseAccess with SingletonServicesStore { + extends Actor with ActorLogging with MetadataDatabaseAccess with SingletonServicesStore with GracefulShutdownHelper { + + private val decider: Decider = { + case _: ActorInitializationException => Escalate + case _ => Resume + } + + override val supervisorStrategy = new OneForOneStrategy()(decider) { + override def logFailure(context: ActorContext, child: ActorRef, cause: Throwable, decision: Directive) = { + val childName = if (child == readActor) "Read" else "Write" + log.error(s"The $childName Metadata Actor died unexpectedly, metadata events might have been lost. Restarting it...", cause) + } + } private val summaryActor: Option[ActorRef] = buildSummaryActor val readActor = context.actorOf(ReadMetadataActor.props(), "read-metadata-actor") - val writeActor = context.actorOf(WriteMetadataActor.props(), "write-metadata-actor") - implicit val ec = context.dispatcher + val dbFlushRate = serviceConfig.as[Option[FiniteDuration]]("services.MetadataService.db-flush-rate").getOrElse(5 seconds) + val dbBatchSize = serviceConfig.as[Option[Int]]("services.MetadataService.db-batch-size").getOrElse(200) + val writeActor = context.actorOf(WriteMetadataActor.props(dbBatchSize, dbFlushRate), "write-metadata-actor") + implicit val ec = context.dispatcher + private var summaryRefreshCancellable: Option[Cancellable] = None + summaryActor foreach { _ => self ! RefreshSummary } - private def scheduleSummary = { - MetadataSummaryRefreshInterval map { context.system.scheduler.scheduleOnce(_, self, RefreshSummary)(context.dispatcher, self) } + private def scheduleSummary(): Unit = { + MetadataSummaryRefreshInterval foreach { interval => + summaryRefreshCancellable = Option(context.system.scheduler.scheduleOnce(interval, self, RefreshSummary)(context.dispatcher, self)) + } + } + + override def postStop(): Unit = { + summaryRefreshCancellable foreach { _.cancel() } + super.postStop() } private def buildSummaryActor: Option[ActorRef] = { @@ -53,32 +80,25 @@ case class MetadataServiceActor(serviceConfig: Config, globalConfig: Config) actor } - private def validateWorkflowId(validation: ValidateWorkflowIdAndExecute): Unit = { - val possibleWorkflowId = validation.possibleWorkflowId - val callback = validation.validationCallback - - Try(UUID.fromString(possibleWorkflowId)) match { - case Failure(t) => callback.onMalformed(possibleWorkflowId) - case Success(uuid) => - workflowExistsWithId(possibleWorkflowId) onComplete { - case Success(true) => - callback.onRecognized(WorkflowId(uuid)) - case Success(false) => - callback.onUnrecognized(possibleWorkflowId) - case Failure(t) => - callback.onFailure(possibleWorkflowId, t) - } + private def validateWorkflowId(possibleWorkflowId: WorkflowId, sender: ActorRef): Unit = { + workflowExistsWithId(possibleWorkflowId.toString) onComplete { + case Success(true) => sender ! RecognizedWorkflowId + case Success(false) => sender ! UnrecognizedWorkflowId + case Failure(e) => sender ! FailedToCheckWorkflowId(new RuntimeException(s"Failed lookup attempt for workflow ID $possibleWorkflowId", e)) } } def receive = { - case action@PutMetadataAction(events) => writeActor forward action - case v: ValidateWorkflowIdAndExecute => validateWorkflowId(v) + case ShutdownCommand => waitForActorsAndShutdown(NonEmptyList.of(writeActor)) + case action: PutMetadataAction => writeActor forward action + case action: PutMetadataActionAndRespond => writeActor forward action + case CheckPendingWrites => writeActor forward CheckPendingWrites + case v: ValidateWorkflowId => validateWorkflowId(v.possibleWorkflowId, sender()) case action: ReadAction => readActor forward action case RefreshSummary => summaryActor foreach { _ ! SummarizeMetadata(sender()) } - case MetadataSummarySuccess => scheduleSummary + case MetadataSummarySuccess => scheduleSummary() case MetadataSummaryFailure(t) => log.error(t, "Error summarizing metadata") - scheduleSummary + scheduleSummary() } } diff --git a/services/src/main/scala/cromwell/services/metadata/impl/MetadataSummaryRefreshActor.scala b/services/src/main/scala/cromwell/services/metadata/impl/MetadataSummaryRefreshActor.scala index ad176567e..172d0012b 100644 --- a/services/src/main/scala/cromwell/services/metadata/impl/MetadataSummaryRefreshActor.scala +++ b/services/src/main/scala/cromwell/services/metadata/impl/MetadataSummaryRefreshActor.scala @@ -1,9 +1,9 @@ package cromwell.services.metadata.impl -import java.time.OffsetDateTime import akka.actor.{ActorRef, LoggingFSM, Props} import com.typesafe.config.ConfigFactory +import cromwell.core.Dispatcher.ServiceDispatcher import cromwell.services.SingletonServicesStore import cromwell.services.metadata.impl.MetadataSummaryRefreshActor._ @@ -22,7 +22,7 @@ object MetadataSummaryRefreshActor { case object MetadataSummarySuccess extends MetadataSummaryActorMessage final case class MetadataSummaryFailure(t: Throwable) extends MetadataSummaryActorMessage - def props() = Props(new MetadataSummaryRefreshActor()) + def props() = Props(new MetadataSummaryRefreshActor()).withDispatcher(ServiceDispatcher) sealed trait SummaryRefreshState case object WaitingForRequest extends SummaryRefreshState @@ -41,9 +41,9 @@ class MetadataSummaryRefreshActor() startWith(WaitingForRequest, SummaryRefreshData) when (WaitingForRequest) { - case (Event(SummarizeMetadata(respondTo), data)) => + case (Event(SummarizeMetadata(respondTo), _)) => refreshWorkflowMetadataSummaries() onComplete { - case Success(id) => + case Success(_) => respondTo ! MetadataSummarySuccess self ! MetadataSummaryComplete case Failure(t) => diff --git a/services/src/main/scala/cromwell/services/metadata/impl/ReadMetadataActor.scala b/services/src/main/scala/cromwell/services/metadata/impl/ReadMetadataActor.scala index ebdc500de..2ad5a9d5c 100644 --- a/services/src/main/scala/cromwell/services/metadata/impl/ReadMetadataActor.scala +++ b/services/src/main/scala/cromwell/services/metadata/impl/ReadMetadataActor.scala @@ -5,7 +5,7 @@ import cromwell.core.Dispatcher.ApiDispatcher import cromwell.core.{WorkflowId, WorkflowSubmitted} import cromwell.services.SingletonServicesStore import cromwell.services.metadata.MetadataService._ -import cromwell.services.metadata.{MetadataQuery, WorkflowQueryParameters} +import cromwell.services.metadata.{CallMetadataKeys, MetadataQuery, WorkflowQueryParameters} import scala.concurrent.Future import scala.util.{Failure, Success, Try} @@ -19,12 +19,15 @@ class ReadMetadataActor extends Actor with ActorLogging with MetadataDatabaseAcc implicit val ec = context.dispatcher def receive = { - case GetSingleWorkflowMetadataAction(workflowId, includeKeysOption, excludeKeysOption) => - queryAndRespond(MetadataQuery(workflowId, None, None, includeKeysOption, excludeKeysOption)) - case GetMetadataQueryAction(query@MetadataQuery(_, _, _, _, _)) => queryAndRespond(query) + case GetSingleWorkflowMetadataAction(workflowId, includeKeysOption, excludeKeysOption, expandSubWorkflows) => + val includeKeys = if (expandSubWorkflows) { + includeKeysOption map { _.::(CallMetadataKeys.SubWorkflowId) } + } else includeKeysOption + queryAndRespond(MetadataQuery(workflowId, None, None, includeKeys, excludeKeysOption, expandSubWorkflows)) + case GetMetadataQueryAction(query@MetadataQuery(_, _, _, _, _, _)) => queryAndRespond(query) case GetStatus(workflowId) => queryStatusAndRespond(workflowId) case GetLogs(workflowId) => queryLogsAndRespond(workflowId) - case query: WorkflowQuery[_] => queryWorkflowsAndRespond(query.uri, query.parameters) + case query: WorkflowQuery => queryWorkflowsAndRespond(query.parameters) case WorkflowOutputs(id) => queryWorkflowOutputsAndRespond(id) } @@ -47,7 +50,7 @@ class ReadMetadataActor extends Actor with ActorLogging with MetadataDatabaseAcc } } - private def queryWorkflowsAndRespond[A](uri: A, rawParameters: Seq[(String, String)]): Unit = { + private def queryWorkflowsAndRespond(rawParameters: Seq[(String, String)]): Unit = { def queryWorkflows: Future[(WorkflowQueryResponse, Option[QueryMetadata])] = { for { // Future/Try to wrap the exception that might be thrown from WorkflowQueryParameters.apply. @@ -59,7 +62,7 @@ class ReadMetadataActor extends Actor with ActorLogging with MetadataDatabaseAcc val sndr = sender() queryWorkflows onComplete { - case Success((response, metadata)) => sndr ! WorkflowQuerySuccess(uri, response, metadata) + case Success((response, metadata)) => sndr ! WorkflowQuerySuccess(response, metadata) case Failure(t) => sndr ! WorkflowQueryFailure(t) } } diff --git a/services/src/main/scala/cromwell/services/metadata/impl/WriteMetadataActor.scala b/services/src/main/scala/cromwell/services/metadata/impl/WriteMetadataActor.scala index 0a2c31f4a..cdd6e9237 100644 --- a/services/src/main/scala/cromwell/services/metadata/impl/WriteMetadataActor.scala +++ b/services/src/main/scala/cromwell/services/metadata/impl/WriteMetadataActor.scala @@ -1,28 +1,96 @@ package cromwell.services.metadata.impl -import akka.actor.{Actor, ActorLogging, Props} +import akka.actor.{ActorLogging, ActorRef, LoggingFSM, Props} +import cromwell.core.Dispatcher.ServiceDispatcher +import cromwell.core.actor.BatchingDbWriter._ +import cromwell.core.actor.{BatchingDbWriter, BatchingDbWriterActor} import cromwell.services.SingletonServicesStore -import cromwell.services.metadata.MetadataService.{MetadataPutAcknowledgement, MetadataPutFailed, PutMetadataAction} +import cromwell.services.metadata.MetadataEvent +import cromwell.services.metadata.MetadataService._ +import scala.concurrent.ExecutionContext +import scala.concurrent.duration._ import scala.util.{Failure, Success} -object WriteMetadataActor { - def props() = Props(new WriteMetadataActor()) -} -class WriteMetadataActor extends Actor with ActorLogging with MetadataDatabaseAccess with SingletonServicesStore { +class WriteMetadataActor(batchSize: Int, override val dbFlushRate: FiniteDuration) + extends LoggingFSM[BatchingDbWriterState, BatchingDbWriter.BatchingDbWriterData] with ActorLogging with + MetadataDatabaseAccess with SingletonServicesStore with BatchingDbWriterActor { + import WriteMetadataActor._ + + implicit val ec: ExecutionContext = context.dispatcher - implicit val ec = context.dispatcher + startWith(WaitingToWrite, NoData) - def receive = { - case action@PutMetadataAction(events) => - val sndr = sender() - addMetadataEvents(events) onComplete { - case Success(_) => sndr ! MetadataPutAcknowledgement(action) - case Failure(t) => - val msg = MetadataPutFailed(action, t) - log.error(t, "Sending {} failure message {}", sndr, msg) - sndr ! msg + when(WaitingToWrite) { + case Event(PutMetadataAction(events), curData) => + curData.addData(events) match { + case newData: HasData[_] if newData.length > batchSize => goto(WritingToDb) using newData + case newData => stay using newData + } + case Event(ScheduledFlushToDb, curData) => + log.debug("Initiating periodic metadata flush to DB") + goto(WritingToDb) using curData + case Event(CheckPendingWrites, NoData) => + sender() ! NoPendingWrites + stay() + case Event(CheckPendingWrites, _: HasData[_]) => + sender() ! HasPendingWrites + stay() + case Event(e: PutMetadataActionAndRespond, curData) => + curData.addData(e) match { + case newData: HasData[_] if newData.length > batchSize => goto(WritingToDb) using newData + case newData => stay using newData } } + + when(WritingToDb) { + case Event(CheckPendingWrites, _) => + sender() ! HasPendingWrites + stay() + case Event(ScheduledFlushToDb, curData) => stay using curData + case Event(PutMetadataAction(events), curData) => stay using curData.addData(events) + case Event(FlushBatchToDb, NoData) => + log.debug("Attempted metadata flush to DB but had nothing to write") + goto(WaitingToWrite) using NoData + case Event(FlushBatchToDb, HasData(e)) => + log.debug("Flushing {} metadata events to the DB", e.length) + // blech + //Partitioning the current data into put events that require a response and those that don't + val empty = (Vector.empty[MetadataEvent], Map.empty[Iterable[MetadataEvent], ActorRef]) + val (putWithoutResponse, putWithResponse) = e.toVector.foldLeft(empty)({ + case ((putEvents, putAndRespondEvents), events) => + events match { + case putEvent: MetadataEvent => (putEvents :+ putEvent, putAndRespondEvents) + case PutMetadataActionAndRespond(ev, replyTo) => (putEvents, putAndRespondEvents + (ev -> replyTo)) + } + }) + val allPutEvents: Iterable[MetadataEvent] = putWithoutResponse ++ putWithResponse.keys.flatten + addMetadataEvents(allPutEvents) onComplete { + case Success(_) => + self ! DbWriteComplete + putWithResponse foreach { case(ev, replyTo) => replyTo ! MetadataWriteSuccess(ev) } + case Failure(regerts) => + log.error(regerts, "Failed to properly flush metadata to database") + self ! DbWriteComplete + putWithResponse foreach { case(ev, replyTo) => replyTo ! MetadataWriteFailure(regerts, ev) } + } + stay using NoData + case Event(DbWriteComplete, curData) => + log.debug("Flush of metadata events complete") + goto(WaitingToWrite) using curData + // When receiving a put&respond message, add it to the current data so that when flushing metadata events, we have + // enough information to be able to send an acknowledgement of success/failure of metadata event writes to the original requester. + case Event(PutMetadataActionAndRespond(events, replyTo), curData) => + stay using curData.addData(PutMetadataActionAndRespond(events, replyTo)) + } +} + +object WriteMetadataActor { + def props(batchSize: Int, flushRate: FiniteDuration): Props = Props(new WriteMetadataActor(batchSize, flushRate)).withDispatcher(ServiceDispatcher) + + sealed trait WriteMetadataActorMessage + case object CheckPendingWrites extends WriteMetadataActorMessage with MetadataServiceAction + case object HasPendingWrites extends WriteMetadataActorMessage + case object NoPendingWrites extends WriteMetadataActorMessage } diff --git a/services/src/main/scala/cromwell/services/metadata/metadata.scala b/services/src/main/scala/cromwell/services/metadata/metadata.scala new file mode 100644 index 000000000..9e621cfad --- /dev/null +++ b/services/src/main/scala/cromwell/services/metadata/metadata.scala @@ -0,0 +1,39 @@ +package cromwell.services.metadata + +case class QueryParameter(key: String, value: String) + +object Patterns { + val WorkflowName = """ + (?x) # Turn on comments and whitespace insensitivity. + + ( # Begin capture. + + [a-zA-Z][a-zA-Z0-9_]* # WDL identifier naming pattern of an initial alpha character followed by zero + # or more alphanumeric or underscore characters. + + ) # End capture. + """.trim.r + + val CallFullyQualifiedName = """ + (?x) # Turn on comments and whitespace insensitivity. + + ( # Begin outer capturing group for FQN. + + (?:[a-zA-Z][a-zA-Z0-9_]*) # Inner noncapturing group for top-level workflow name. This is the WDL + # identifier naming pattern of an initial alpha character followed by zero + # or more alphanumeric or underscore characters. + + (?:\.[a-zA-Z][a-zA-Z0-9_]*){1} # Inner noncapturing group for call name, a literal dot followed by a WDL + # identifier. Currently this is quantified to {1} since the call name is + # mandatory and nested workflows are not supported. This could be changed + # to + or a different quantifier if these assumptions change. + + ) # End outer capturing group for FQN. + + + (?: # Begin outer noncapturing group for shard. + \. # Literal dot. + (\d+) # Captured shard digits. + )? # End outer optional noncapturing group for shard. + """.trim.r // The trim is necessary as (?x) must be at the beginning of the regex. +} diff --git a/services/src/main/scala/cromwell/services/metadata/package.scala b/services/src/main/scala/cromwell/services/metadata/package.scala index a6ed193a2..f35408296 100644 --- a/services/src/main/scala/cromwell/services/metadata/package.scala +++ b/services/src/main/scala/cromwell/services/metadata/package.scala @@ -1,42 +1,5 @@ package cromwell.services package object metadata { - case class QueryParameter(key: String, value: String) type QueryParameters = Seq[QueryParameter] - - object Patterns { - val WorkflowName = """ - (?x) # Turn on comments and whitespace insensitivity. - - ( # Begin capture. - - [a-zA-Z][a-zA-Z0-9_]* # WDL identifier naming pattern of an initial alpha character followed by zero - # or more alphanumeric or underscore characters. - - ) # End capture. - """.trim.r - - val CallFullyQualifiedName = """ - (?x) # Turn on comments and whitespace insensitivity. - - ( # Begin outer capturing group for FQN. - - (?:[a-zA-Z][a-zA-Z0-9_]*) # Inner noncapturing group for top-level workflow name. This is the WDL - # identifier naming pattern of an initial alpha character followed by zero - # or more alphanumeric or underscore characters. - - (?:\.[a-zA-Z][a-zA-Z0-9_]*){1} # Inner noncapturing group for call name, a literal dot followed by a WDL - # identifier. Currently this is quantified to {1} since the call name is - # mandatory and nested workflows are not supported. This could be changed - # to + or a different quantifier if these assumptions change. - - ) # End outer capturing group for FQN. - - - (?: # Begin outer noncapturing group for shard. - \. # Literal dot. - (\d+) # Captured shard digits. - )? # End outer optional noncapturing group for shard. - """.trim.r // The trim is necessary as (?x) must be at the beginning of the regex. - } } diff --git a/services/src/test/scala/cromwell/services/ServiceRegistryActorSpec.scala b/services/src/test/scala/cromwell/services/ServiceRegistryActorSpec.scala index bb8658b24..ae51d6283 100644 --- a/services/src/test/scala/cromwell/services/ServiceRegistryActorSpec.scala +++ b/services/src/test/scala/cromwell/services/ServiceRegistryActorSpec.scala @@ -124,8 +124,12 @@ class ServiceRegistryActorSpec extends TestKitSuite("service-registry-actor-spec val probe = buildProbeForInitializationException(ConfigFactory.parseString(missingService)) probe.expectMsgPF(AwaitTimeout) { case e: ActorInitializationException => - e.getCause shouldBe a [ClassNotFoundException] - e.getCause.getMessage shouldBe "cromwell.services.FooWhoServiceActor" + // The class not found exception is wrapped in a Runtime Exception giving the name of the faulty service + val cause = e.getCause + cause shouldBe a [RuntimeException] + val classNotFound = cause.getCause + classNotFound shouldBe a [ClassNotFoundException] + classNotFound.getMessage shouldBe "cromwell.services.FooWhoServiceActor" } } diff --git a/services/src/test/scala/cromwell/services/ServicesSpec.scala b/services/src/test/scala/cromwell/services/ServicesSpec.scala index 629079a5c..71d56d39a 100644 --- a/services/src/test/scala/cromwell/services/ServicesSpec.scala +++ b/services/src/test/scala/cromwell/services/ServicesSpec.scala @@ -21,6 +21,7 @@ object ServicesSpec { | debug { | receive = on | } + | guardian-supervisor-strategy = "akka.actor.DefaultSupervisorStrategy" | } | dispatchers { | # A dispatcher for actors performing blocking io operations @@ -39,7 +40,7 @@ object ServicesSpec { | } | | # A dispatcher for engine actors - | # Because backends behaviour is unpredictable (potentially blocking, slow) the engine runs + | # Because backends behavior is unpredictable (potentially blocking, slow) the engine runs | # on its own dispatcher to prevent backends from affecting its performance. | engine-dispatcher { | type = Dispatcher diff --git a/services/src/test/scala/cromwell/services/ServicesStoreSpec.scala b/services/src/test/scala/cromwell/services/ServicesStoreSpec.scala index 291b41988..b62669e2e 100644 --- a/services/src/test/scala/cromwell/services/ServicesStoreSpec.scala +++ b/services/src/test/scala/cromwell/services/ServicesStoreSpec.scala @@ -2,111 +2,224 @@ package cromwell.services import java.io.{ByteArrayOutputStream, PrintStream} import java.sql.Connection +import java.time.OffsetDateTime +import javax.sql.rowset.serial.{SerialBlob, SerialClob, SerialException} import better.files._ import com.typesafe.config.ConfigFactory import cromwell.core.Tags._ +import cromwell.core.WorkflowId import cromwell.database.migration.liquibase.LiquibaseUtils import cromwell.database.slick.SlickDatabase +import cromwell.database.sql.SqlConverters._ +import cromwell.database.sql.joins.JobStoreJoin +import cromwell.database.sql.tables.{JobStoreEntry, JobStoreSimpletonEntry, WorkflowStoreEntry} import liquibase.diff.DiffResult import liquibase.diff.output.DiffOutputControl import liquibase.diff.output.changelog.DiffToChangeLog import org.hsqldb.persist.HsqlDatabaseProperties import org.scalactic.StringNormalizations +import org.scalatest.concurrent.PatienceConfiguration.Timeout import org.scalatest.concurrent.ScalaFutures import org.scalatest.time.{Millis, Seconds, Span} import org.scalatest.{FlatSpec, Matchers} -import slick.driver.JdbcProfile +import slick.jdbc.JdbcProfile +import slick.jdbc.meta._ -import scala.concurrent.duration.Duration +import scala.concurrent.duration._ import scala.concurrent.{Await, ExecutionContext, Future} +import scala.util.Try import scala.xml._ class ServicesStoreSpec extends FlatSpec with Matchers with ScalaFutures with StringNormalizations { - behavior of "ServicesStore" - import ServicesStoreSpec._ implicit val ec = ExecutionContext.global implicit val defaultPatience = PatienceConfig(timeout = Span(5, Seconds), interval = Span(100, Millis)) - it should "have the same liquibase and slick schema" in { + behavior of "ServicesStore" + + it should "not deadlock" in { + // Test based on https://github.com/kwark/slick-deadlock/blob/82525fc/src/main/scala/SlickDeadlock.scala + val databaseConfig = ConfigFactory.parseString( + s"""|db.url = "jdbc:hsqldb:mem:$${uniqueSchema};shutdown=false;hsqldb.tx=mvcc" + |db.driver = "org.hsqldb.jdbcDriver" + |db.connectionTimeout = 3000 + |db.numThreads = 2 + |profile = "slick.jdbc.HsqldbProfile$$" + |""".stripMargin) + import ServicesStore.EnhancedSqlDatabase for { - liquibaseDatabase <- databaseForSchemaManager("liquibase").autoClosed - slickDatabase <- databaseForSchemaManager("slick").autoClosed + database <- new SlickDatabase(databaseConfig).initialized.autoClosed } { - compare( - liquibaseDatabase.dataAccess.driver, liquibaseDatabase.database, - slickDatabase.dataAccess.driver, slickDatabase.database) { diffResult => - - // TODO PBE get rid of this after the migration of #789 has run. - val oldeTables = Seq( - "EXECUTION", - "EXECUTION_INFO", - "EXECUTION_EVENT", - "FAILURE_EVENT", - "RUNTIME_ATTRIBUTES", - "SYMBOL", - "WORKFLOW_EXECUTION", - "WORKFLOW_EXECUTION_AUX" - ) - - import cromwell.database.migration.liquibase.DiffResultFilter._ - val diffFilters = StandardTypeFilters :+ UniqueIndexFilter - val filteredDiffResult = diffResult - .filterLiquibaseObjects - .filterTableObjects(oldeTables) - .filterChangedObjects(diffFilters) - - val totalChanged = - filteredDiffResult.getChangedObjects.size + - filteredDiffResult.getMissingObjects.size + - filteredDiffResult.getUnexpectedObjects.size - - if (totalChanged > 0) { - val outputStream = new ByteArrayOutputStream - val printStream = new PrintStream(outputStream, true) - val diffOutputControl = new DiffOutputControl(false, false, false, Array.empty) - val diffToChangeLog = new DiffToChangeLog(filteredDiffResult, diffOutputControl) - diffToChangeLog.print(printStream) - val changeSetsScoped = XML.loadString(outputStream.toString) \ "changeSet" \ "_" - val changeSets = changeSetsScoped map stripNodeScope - fail(changeSets.mkString( - "The following changes are in liquibase but not in slick:\n ", - "\n ", - "\nEither add the changes to slick or remove them from liquibase.")) - } + val futures = 1 to 20 map { _ => + val workflowUuid = WorkflowId.randomId().toString + val callFqn = "call.fqn" + val jobIndex = 1 + val jobAttempt = 1 + val jobSuccessful = false + val jobStoreEntry = JobStoreEntry(workflowUuid, callFqn, jobIndex, jobAttempt, jobSuccessful, None, None, None) + val jobStoreJoins = Seq(JobStoreJoin(jobStoreEntry, Seq())) + // NOTE: This test just needs to repeatedly read/write from a table that acts as a PK for a FK. + for { + _ <- database.addJobStores(jobStoreJoins, 10) + queried <- database.queryJobStores(workflowUuid, callFqn, jobIndex, jobAttempt) + _ = queried.get.jobStoreEntry.workflowExecutionUuid should be(workflowUuid) + } yield () } + Future.sequence(futures).futureValue(Timeout(10.seconds)) } } - it should "not deadlock" taggedAs PostMVP ignore { - // // Test based on https://github.com/kwark/slick-deadlock/blob/82525fc/src/main/scala/SlickDeadlock.scala - // val databaseConfig = ConfigFactory.parseString( - // s""" - // |db.url = "jdbc:hsqldb:mem:$${slick.uniqueSchema};shutdown=false;hsqldb.tx=mvcc" - // |db.driver = "org.hsqldb.jdbcDriver" - // |db.numThreads = 2 - // |driver = "slick.driver.HsqldbDriver$$" - // |""".stripMargin) - // - // for { - // dataAccess <- (new SlickDatabase(databaseConfig) with DataAccess).autoClosed - // } { - // val futures = 1 to 20 map { _ => - // val workflowId = WorkflowId.randomId() - // val workflowInfo = createMaterializedEngineWorkflowDescriptor(id = workflowId, workflowSources = test1Sources) - // for { - // _ <- dataAccess.createWorkflow(workflowInfo, test1Sources, Nil, Nil, localBackend) - // _ <- dataAccess.getWorkflowExecutionAndAux(workflowInfo.id) map { result => - // result.execution.workflowExecutionUuid should be(workflowId.toString) - // } - // } yield () - // } - // Future.sequence(futures).futureValue(Timeout(10.seconds)) - // } + "Slick" should behave like testSchemaManager("slick") + + "Liquibase" should behave like testSchemaManager("liquibase") + + def testSchemaManager(schemaManager: String): Unit = { + val otherSchemaManager = if (schemaManager == "slick") "liquibase" else "slick" + + it should s"have the same schema as $otherSchemaManager" in { + for { + actualDatabase <- databaseForSchemaManager(schemaManager).autoClosed + expectedDatabase <- databaseForSchemaManager(otherSchemaManager).autoClosed + } { + compare( + actualDatabase.dataAccess.driver, actualDatabase.database, + expectedDatabase.dataAccess.driver, expectedDatabase.database) { diffResult => + + import cromwell.database.migration.liquibase.DiffResultFilter._ + + /* + NOTE: Unique indexes no longer need to be filtered, as WE SHOULD NOT BE USING THEM! + See notes at the bottom of changelog.xml + */ + val diffFilters = StandardTypeFilters + val filteredDiffResult = diffResult + .filterLiquibaseObjects + .filterChangedObjects(diffFilters) + + val totalChanged = + filteredDiffResult.getChangedObjects.size + + filteredDiffResult.getMissingObjects.size + + filteredDiffResult.getUnexpectedObjects.size + + if (totalChanged > 0) { + val outputStream = new ByteArrayOutputStream + val printStream = new PrintStream(outputStream, true) + val diffOutputControl = new DiffOutputControl(false, false, false, Array.empty) + val diffToChangeLog = new DiffToChangeLog(filteredDiffResult, diffOutputControl) + diffToChangeLog.print(printStream) + val changeSetsScoped = XML.loadString(outputStream.toString) \ "changeSet" \ "_" + val changeSets = changeSetsScoped map stripNodeScope + fail(changeSets.mkString( + s"The following changes are in $schemaManager but not in $otherSchemaManager:\n ", + "\n ", + "\nEnsure that the columns/fields exist, with the same lengths in " + + s"$schemaManager and $otherSchemaManager and synchronize the two.")) + } + } + } + } + + it should "match expected generated names" in { + var schemaMetadata: SchemaMetadata = null + + for { + slickDatabase <- databaseForSchemaManager(schemaManager).autoClosed + } { + import slickDatabase.dataAccess.driver.api._ + val schemaMetadataFuture = + for { + tables <- slickDatabase.database.run(MTable.getTables(Option("PUBLIC"), Option("PUBLIC"), None, None)) + workingTables = tables + .filterNot(_.name.name.startsWith("DATABASECHANGELOG")) + // NOTE: MetadataEntry column names are perma-busted due to the large size of the table. + .filterNot(_.name.name == "METADATA_ENTRY") + columns <- slickDatabase.database.run(DBIO.sequence(workingTables.map(_.getColumns))) + indexes <- slickDatabase.database.run(DBIO.sequence(workingTables.map(_.getIndexInfo()))) + primaryKeys <- slickDatabase.database.run(DBIO.sequence(workingTables.map(_.getPrimaryKeys))) + foreignKeys <- slickDatabase.database.run(DBIO.sequence(workingTables.map(_.getExportedKeys))) + } yield SchemaMetadata(tables, columns.flatten, indexes.flatten.filterNot(isGenerated), + primaryKeys.flatten.filterNot(isGenerated), foreignKeys.flatten) + + schemaMetadata = schemaMetadataFuture.futureValue + } + + var misnamed = Seq.empty[String] + + schemaMetadata.primaryKeyMetadata foreach { primaryKey => + val actual = primaryKey.pkName.get + val expected = s"PK_${primaryKey.table.name}" + if (actual != expected) { + misnamed :+= + s"""| PrimaryKey: $actual + | Should be: $expected + |""".stripMargin + } + } + + schemaMetadata.foreignKeyMetadata foreach { foreignKey => + val actual = foreignKey.fkName.get + val expected = s"FK_${foreignKey.fkTable.name}_${foreignKey.fkColumn}" + if (actual != expected) { + misnamed :+= + s"""| ForeignKey: $actual + | Should be: $expected + |""".stripMargin + } + } + + schemaMetadata.indexMetadata.groupBy(getIndexName) foreach { + case (indexName, indexColumns) => + val index = indexColumns.head + val prefix = if (index.nonUnique) "IX" else "UC" + val tableName = index.table.name + val sortedColumns = indexColumns.sortBy(_.ordinalPosition) + val abbrColumns = sortedColumns.map(indexColumn => snakeAbbreviate(indexColumn.column.get)) + + val actual = indexName + val expected = abbrColumns.mkString(s"${prefix}_${tableName}_", "_", "") + + if (actual != expected) { + misnamed :+= + s"""| Index: $actual + | Should be: $expected + |""".stripMargin + } + } + + var missing = Seq.empty[String] + + schemaMetadata.columns foreach { column => + if (!schemaMetadata.existsTableItem(column)) { + missing :+= s" ${tableClassName(column.tableName)}.${column.itemName}" + } + } + + schemaMetadata.slickItems foreach { databaseItem => + if (!schemaMetadata.existsSlickMapping(databaseItem)) { + missing :+= s" ${slickClassName(databaseItem.tableName)}.${databaseItem.itemName}" + } + } + + if (missing.nonEmpty || misnamed.nonEmpty) { + var failMessage = "" + + if (misnamed.nonEmpty) { + failMessage += misnamed.mkString(s"The following items are misnamed in $schemaManager:\n", "\n", "\n") + } + + if (missing.nonEmpty) { + failMessage += missing.mkString( + s"Based on the schema in $schemaManager, please ensure that the following tables/columns exist:\n", + "\n", "\n") + } + + fail(failMessage) + } + } } "SlickDatabase (hsqldb)" should behave like testWith("database") @@ -119,10 +232,13 @@ class ServicesStoreSpec extends FlatSpec with Matchers with ScalaFutures with St lazy val databaseConfig = ConfigFactory.load.getConfig(configPath) lazy val dataAccess = new SlickDatabase(databaseConfig).initialized - it should "(if hsqldb) have transaction isolation mvcc" taggedAs DbmsTest in { + lazy val getProduct = { import dataAccess.dataAccess.driver.api._ + SimpleDBIO[String](_.connection.getMetaData.getDatabaseProductName) + } - val getProduct = SimpleDBIO[String](_.connection.getMetaData.getDatabaseProductName) + it should "(if hsqldb) have transaction isolation mvcc" taggedAs DbmsTest in { + import dataAccess.dataAccess.driver.api._ //noinspection SqlDialectInspection val getHsqldbTx = sql"""SELECT PROPERTY_VALUE FROM INFORMATION_SCHEMA.SYSTEM_PROPERTIES @@ -140,6 +256,180 @@ class ServicesStoreSpec extends FlatSpec with Matchers with ScalaFutures with St } yield ()).futureValue } + it should "fail to store and retrieve empty clobs" taggedAs DbmsTest in { + // See notes in StringToClobOption + val emptyClob = new SerialClob(Array.empty[Char]) + + val workflowUuid = WorkflowId.randomId().toString + val callFqn = "call.fqn" + val jobIndex = 1 + val jobAttempt = 1 + val jobSuccessful = false + val jobStoreEntry = JobStoreEntry(workflowUuid, callFqn, jobIndex, jobAttempt, jobSuccessful, None, None, None) + val jobStoreSimpletonEntries = Seq(JobStoreSimpletonEntry("empty", Option(emptyClob), "WdlString")) + val jobStoreJoins = Seq(JobStoreJoin(jobStoreEntry, jobStoreSimpletonEntries)) + + val future = for { + product <- dataAccess.database.run(getProduct) + _ <- product match { + case "HSQL Database Engine" => + // HSQLDB doesn't crash because it calls getCharacterStream instead of getSubString. + dataAccess.addJobStores(jobStoreJoins, 1) + case "MySQL" => + dataAccess.addJobStores(jobStoreJoins, 1).failed map { exception => + exception should be(a[SerialException]) + exception.getMessage should be("Invalid position in SerialClob object set") + } + } + } yield () + + future.futureValue + } + + it should "fail to store and retrieve empty blobs" taggedAs DbmsTest in { + // See notes in BytesToBlobOption + import eu.timepit.refined.auto._ + import eu.timepit.refined.collection._ + val clob = "".toClob(default = "{}") + val clobOption = "{}".toClobOption + val emptyBlob = new SerialBlob(Array.empty[Byte]) + + val workflowUuid = WorkflowId.randomId().toString + val workflowStoreEntry = WorkflowStoreEntry( + workflowExecutionUuid = workflowUuid, + workflowType = WdlWorkflowType, + workflowTypeVersion = None, + workflowDefinition = clobOption, + workflowInputs = clobOption, + workflowOptions = clobOption, + workflowState = "Testing", + submissionTime = OffsetDateTime.now.toSystemTimestamp, + importsZip = Option(emptyBlob), + customLabels = clob) + + val workflowStoreEntries = Seq(workflowStoreEntry) + + val future = for { + product <- dataAccess.database.run(getProduct) + _ <- product match { + case "HSQL Database Engine" => + // HSQLDB doesn't crash because it calls getBinaryStream instead of getBytes. + dataAccess.addWorkflowStoreEntries(workflowStoreEntries) + case "MySQL" => + dataAccess.addWorkflowStoreEntries(workflowStoreEntries).failed map { exception => + exception should be(a[SerialException]) + exception.getMessage should + be("Invalid arguments: position cannot be less than 1 or greater than the length of the SerialBlob") + } + } + } yield () + + future.futureValue + } + + it should "store and retrieve empty clobs" taggedAs DbmsTest in { + // See notes in StringToClobOption + val workflowUuid = WorkflowId.randomId().toString + val callFqn = "call.fqn" + val jobIndex = 1 + val jobAttempt = 1 + val jobSuccessful = false + val jobStoreEntry = JobStoreEntry(workflowUuid, callFqn, jobIndex, jobAttempt, jobSuccessful, None, None, None) + val jobStoreSimpletonEntries = Seq( + JobStoreSimpletonEntry("empty", "".toClobOption, "WdlString"), + JobStoreSimpletonEntry("aEntry", "a".toClobOption, "WdlString") + ) + val jobStoreJoins = Seq(JobStoreJoin(jobStoreEntry, jobStoreSimpletonEntries)) + + val future = for { + _ <- dataAccess.addJobStores(jobStoreJoins, 1) + queried <- dataAccess.queryJobStores(workflowUuid, callFqn, jobIndex, jobAttempt) + _ = { + val jobStoreJoin = queried.get + jobStoreJoin.jobStoreEntry.workflowExecutionUuid should be(workflowUuid) + + val emptyEntry = jobStoreJoin.jobStoreSimpletonEntries.find(_.simpletonKey == "empty").get + emptyEntry.simpletonValue.toRawString should be("") + + val aEntry = jobStoreJoin.jobStoreSimpletonEntries.find(_.simpletonKey == "aEntry").get + aEntry.simpletonValue.toRawString should be("a") + } + _ <- dataAccess.removeJobStores(Seq(workflowUuid)) + } yield () + future.futureValue + } + + it should "store and retrieve empty blobs" taggedAs DbmsTest in { + // See notes in BytesToBlobOption + import eu.timepit.refined.auto._ + import eu.timepit.refined.collection._ + + val testWorkflowState = "Testing" + val clob = "".toClob(default = "{}") + val clobOption = "{}".toClobOption + + val emptyWorkflowUuid = WorkflowId.randomId().toString + val emptyWorkflowStoreEntry = WorkflowStoreEntry( + workflowExecutionUuid = emptyWorkflowUuid, + workflowType = WdlWorkflowType, + workflowTypeVersion = None, + workflowDefinition = clobOption, + workflowInputs = clobOption, + workflowOptions = clobOption, + workflowState = testWorkflowState, + submissionTime = OffsetDateTime.now.toSystemTimestamp, + importsZip = Option(Array.empty[Byte]).toBlobOption, + customLabels = clob) + + val noneWorkflowUuid = WorkflowId.randomId().toString + val noneWorkflowStoreEntry = WorkflowStoreEntry( + workflowExecutionUuid = noneWorkflowUuid, + workflowType = WdlWorkflowType, + workflowTypeVersion = None, + workflowDefinition = clobOption, + workflowInputs = clobOption, + workflowOptions = clobOption, + workflowState = testWorkflowState, + submissionTime = OffsetDateTime.now.toSystemTimestamp, + importsZip = None, + customLabels = clob) + + val aByte = 'a'.toByte + val aByteWorkflowUuid = WorkflowId.randomId().toString + val aByteWorkflowStoreEntry = WorkflowStoreEntry( + workflowExecutionUuid = aByteWorkflowUuid, + workflowType = WdlWorkflowType, + workflowTypeVersion = None, + workflowDefinition = clobOption, + workflowInputs = clobOption, + workflowOptions = clobOption, + workflowState = testWorkflowState, + submissionTime = OffsetDateTime.now.toSystemTimestamp, + importsZip = Option(Array(aByte)).toBlobOption, + customLabels = clob) + + val workflowStoreEntries = Seq(emptyWorkflowStoreEntry, noneWorkflowStoreEntry, aByteWorkflowStoreEntry) + + val future = for { + _ <- dataAccess.addWorkflowStoreEntries(workflowStoreEntries) + queried <- dataAccess.queryWorkflowStoreEntries(Int.MaxValue, testWorkflowState, testWorkflowState) + _ = { + val emptyEntry = queried.find(_.workflowExecutionUuid == emptyWorkflowUuid).get + emptyEntry.importsZip.toBytesOption should be(None) + + val noneEntry = queried.find(_.workflowExecutionUuid == noneWorkflowUuid).get + noneEntry.importsZip.toBytesOption should be(None) + + val aByteEntry = queried.find(_.workflowExecutionUuid == aByteWorkflowUuid).get + aByteEntry.importsZip.toBytesOption.get.toSeq should be(Seq(aByte)) + } + _ <- dataAccess.removeWorkflowStoreEntry(emptyWorkflowUuid) + _ <- dataAccess.removeWorkflowStoreEntry(noneWorkflowUuid) + _ <- dataAccess.removeWorkflowStoreEntry(aByteWorkflowUuid) + } yield () + future.futureValue + } + it should "close the database" taggedAs DbmsTest in { dataAccess.close() } @@ -160,7 +450,8 @@ object ServicesStoreSpec { s""" |db.url = "jdbc:hsqldb:mem:$${uniqueSchema};shutdown=false;hsqldb.tx=mvcc" |db.driver = "org.hsqldb.jdbcDriver" - |driver = "slick.driver.HsqldbDriver$$" + |db.connectionTimeout = 3000 + |profile = "slick.jdbc.HsqldbProfile$$" |liquibase.updateSchema = false |""".stripMargin) val database = new SlickDatabase(databaseConfig) @@ -177,9 +468,7 @@ object ServicesStoreSpec { (referenceProfile: ReferenceProfile, referenceDatabase: ReferenceProfile#Backend#Database, comparisonProfile: ComparisonProfile, - comparisonDatabase: ComparisonProfile#Backend#Database)(block: DiffResult => T) - (implicit executor: ExecutionContext): T = { - + comparisonDatabase: ComparisonProfile#Backend#Database)(block: DiffResult => T): T = { withConnections(referenceProfile, referenceDatabase, comparisonProfile, comparisonDatabase) { LiquibaseUtils.compare(_, _)(block) } @@ -227,4 +516,94 @@ object ServicesStoreSpec { } } } + + private val SnakeRegex = "_([a-z])".r + + private def snakeToCamel(value: String): String = { + SnakeRegex.replaceAllIn(value.toLowerCase, _.group(1).toUpperCase) + } + + private def snakeAbbreviate(value: String): String = { + SnakeRegex.findAllMatchIn("_" + value.toLowerCase).map(_.group(1)).mkString("").toUpperCase + } + + private val SlickPrimaryKeyRegex = """SYS_PK_\d+""".r + + private def isGenerated(primaryKey: MPrimaryKey): Boolean = { + primaryKey.pkName.get match { + case SlickPrimaryKeyRegex(_*) => true + case _ => false + } + } + + private val LiquibasePrimaryKeyIndexRegex = """SYS_IDX_PK_[A-Z_]+_\d+""".r + private val SlickPrimaryKeyIndexRegex = """SYS_IDX_SYS_PK_\d+_\d+""".r + private val SlickForeignKeyIndexRegex = """SYS_IDX_\d+""".r + + private def isGenerated(index: MIndexInfo): Boolean = { + index.indexName.get match { + case LiquibasePrimaryKeyIndexRegex(_*) => true + case SlickPrimaryKeyIndexRegex(_*) => true + case SlickForeignKeyIndexRegex(_*) => true + case _ => false + } + } + + private def tableClassName(tableName: String) = s"cromwell.database.sql.tables.$tableName" + + private def slickClassName(tableName: String) = + s"cromwell.database.slick.tables.${tableName}Component$$${tableName.replace("Entry", "Entries")}" + + private def getIndexName(index: MIndexInfo) = index.indexName.get.replaceAll("(^SYS_IDX_|_\\d+$)", "") + + case class TableClass(tableName: String) { + private def getClass(name: String): Try[Class[_]] = Try(Class.forName(name)) + + private lazy val tableColumns = getClass(tableClassName(tableName)).map(_.getDeclaredFields).getOrElse(Array.empty) + private lazy val slickMapping = getClass(slickClassName(tableName)).map(_.getDeclaredMethods).getOrElse(Array.empty) + + def existsTableField(name: String): Boolean = tableColumns.exists(_.getName == name) + + def existsSlickMapping(name: String): Boolean = slickMapping.exists(_.getName == name) + } + + case class DatabaseItem(tableName: String, itemName: String) + + case class SchemaMetadata(tableMetadata: Seq[MTable], columnMetadata: Seq[MColumn], indexMetadata: Seq[MIndexInfo], + primaryKeyMetadata: Seq[MPrimaryKey], foreignKeyMetadata: Seq[MForeignKey]) { + lazy val tables: Seq[TableClass] = tableMetadata.map({ table => + val tableName = snakeToCamel(table.name.name).capitalize + TableClass(tableName) + }).distinct + + lazy val columns: Seq[DatabaseItem] = columnMetadata.map({ column => + val tableName = snakeToCamel(column.table.name).capitalize + val columnName = snakeToCamel(column.name) + DatabaseItem(tableName, columnName) + }).distinct + + lazy val indexes: Seq[DatabaseItem] = indexMetadata.map({ index => + val tableName = snakeToCamel(index.table.name).capitalize + val indexName = snakeToCamel(getIndexName(index)) + DatabaseItem(tableName, indexName) + }).distinct + + lazy val foreignKeys: Seq[DatabaseItem] = foreignKeyMetadata.map({ foreignKey => + val tableName = snakeToCamel(foreignKey.fkTable.name).capitalize + val indexName = snakeToCamel(foreignKey.fkName.get) + DatabaseItem(tableName, indexName) + }).distinct + + lazy val slickItems: Seq[DatabaseItem] = columns ++ indexes ++ foreignKeys + + def existsTableItem(tableItem: DatabaseItem): Boolean = { + tables.find(_.tableName == tableItem.tableName).exists(_.existsTableField(tableItem.itemName)) + } + + def existsSlickMapping(tableItem: DatabaseItem): Boolean = { + tables.find(_.tableName == tableItem.tableName).exists(_.existsSlickMapping(tableItem.itemName)) + } + } + + private val WdlWorkflowType = Option("WDL") } diff --git a/services/src/test/scala/cromwell/services/keyvalue/InMemoryKvServiceActor.scala b/services/src/test/scala/cromwell/services/keyvalue/InMemoryKvServiceActor.scala new file mode 100644 index 000000000..96ebd30c3 --- /dev/null +++ b/services/src/test/scala/cromwell/services/keyvalue/InMemoryKvServiceActor.scala @@ -0,0 +1,21 @@ +package cromwell.services.keyvalue + +import cromwell.services.keyvalue.KeyValueServiceActor._ + +import scala.concurrent.{ExecutionContextExecutor, Future} + +final class InMemoryKvServiceActor extends KeyValueServiceActor { + override implicit val ec: ExecutionContextExecutor = context.dispatcher + + var kvStore = Map.empty[ScopedKey, Option[String]] + + override def doGet(get: KvGet): Future[KvResponse] = kvStore.get(get.key).map(KvPair(get.key, _)) match { + case Some(kvPair) => Future.successful(kvPair) + case None => Future.successful(KvKeyLookupFailed(get)) + } + + override def doPut(put: KvPut): Future[KvResponse] = { + kvStore += (put.key -> put.pair.value) + Future.successful(KvPutSuccess(put)) + } +} \ No newline at end of file diff --git a/services/src/test/scala/cromwell/services/keyvalue/KvClientSpec.scala b/services/src/test/scala/cromwell/services/keyvalue/KvClientSpec.scala new file mode 100644 index 000000000..2a38f46e1 --- /dev/null +++ b/services/src/test/scala/cromwell/services/keyvalue/KvClientSpec.scala @@ -0,0 +1,60 @@ +package cromwell.services.keyvalue + +import java.io.IOException + +import akka.actor.{Actor, ActorLogging, ActorRef, ActorSystem} +import akka.testkit.{TestActorRef, TestKit, TestProbe} +import cromwell.services.keyvalue.KeyValueServiceActor._ +import org.scalatest.{FlatSpecLike, Matchers} + +import scala.concurrent.Await +import scala.concurrent.duration._ +import scala.language.postfixOps + +class KvClientSpec extends TestKit(ActorSystem("KvClientSpec")) with FlatSpecLike with Matchers { + + implicit val ec = system.dispatcher + + behavior of "KvClient" + + it should "Correctly forward multiple requests and responses" in { + val serviceActorProbe = TestProbe() + val kvTestClient = TestActorRef(new KvTestClientActor(serviceActorProbe.ref)) + + val scopedKey1 = ScopedKey(null, null, "key1") + val scopedKey2 = ScopedKey(null, null, "key2") + val putRequest = KvPut(KvPair(scopedKey1, Some("value1"))) + val getRequest = KvGet(scopedKey2) + val putResponse = KvFailure(putRequest, new IOException()) + val getResponse = KvPair(scopedKey2, Some("value2")) + + val requests = Seq(putRequest, getRequest) + val futureResult = kvTestClient.underlyingActor.makeKvRequest(requests) + + serviceActorProbe.expectMsgAllOf(putRequest, getRequest) + serviceActorProbe.expectNoMsg(max = 50 milliseconds) + + kvTestClient.underlyingActor.currentKvClientRequests.size should be(2) + + kvTestClient.tell(getResponse, sender = serviceActorProbe.ref) + serviceActorProbe.expectNoMsg(max = 50 milliseconds) + futureResult.isCompleted should be(false) + + kvTestClient.underlyingActor.currentKvClientRequests.size should be(1) + + kvTestClient.tell(putResponse, sender = serviceActorProbe.ref) + serviceActorProbe.expectNoMsg(max = 50 milliseconds) + + // Make sure the future completes promptly and the original order is preserved: + Await.result(futureResult, atMost = 100 milliseconds) should be(Seq(putResponse, getResponse)) + serviceActorProbe.expectNoMsg(max = 50 milliseconds) + + kvTestClient.underlyingActor.currentKvClientRequests.size should be(0) + } +} + +class KvTestClientActor(val serviceRegistryActor: ActorRef) extends Actor with ActorLogging with KvClient { + override def receive = kvClientReceive orElse Actor.ignoringBehavior +} + + diff --git a/services/src/test/scala/cromwell/services/metadata/MetadataServiceSpec.scala b/services/src/test/scala/cromwell/services/metadata/MetadataServiceSpec.scala index 0ef7d1976..a655234ff 100644 --- a/services/src/test/scala/cromwell/services/metadata/MetadataServiceSpec.scala +++ b/services/src/test/scala/cromwell/services/metadata/MetadataServiceSpec.scala @@ -1,11 +1,14 @@ package cromwell.services.metadata +import java.util.UUID + import cromwell.core.WorkflowId +import lenthall.exception.AggregatedException import org.scalactic.Equality import org.scalatest.prop.TableDrivenPropertyChecks import org.scalatest.{FlatSpec, Matchers} -import wdl4s.types.{WdlArrayType, WdlMapType, WdlStringType} -import wdl4s.values._ +import wdl4s.wdl.types.{WdlArrayType, WdlMapType, WdlStringType} +import wdl4s.wdl.values._ class MetadataServiceSpec extends FlatSpec with Matchers with TableDrivenPropertyChecks { @@ -74,4 +77,135 @@ class MetadataServiceSpec extends FlatSpec with Matchers with TableDrivenPropert } } + // For the metadata event tests! + val failureMessageRegex = "([^\\[]*)\\[([0-9]+)\\](.*)\\:message".r + val pathToFailures = "path:to:failures" + + it should "convert an exception into a failure event with an empty causedBy block" in { + import MetadataService.throwableToMetadataEvents + + val workflowId = WorkflowId(UUID.randomUUID()) + val mdkey = MetadataKey(workflowId, None, pathToFailures) + + val tMsg = "The Oscars suck!" + val t = new RuntimeException(tMsg) + + val events = throwableToMetadataEvents(mdkey, t) + events.size should be(2) + val (keyPrefix, causedBys, failureIndex) = validateExceptionMessage(events.head, workflowId, tMsg) + keyPrefix should be(pathToFailures) + causedBys should be("") + events(1).key.key should be(s"$keyPrefix[$failureIndex]:causedBy[]") + events(1).key.workflowId should be(workflowId) + events(1).value should be(None) + + } + + it should "convert nested exceptions into a sequence of failure events" in { + import MetadataService.throwableToMetadataEvents + + val workflowId = WorkflowId(UUID.randomUUID()) + val mdkey = MetadataKey(workflowId, None, pathToFailures) + + val innerCauseMsg = "Envelope malfunctions" + val innerCause = new RuntimeException(innerCauseMsg) + + val causeMsg = "Wrong recipients" + val cause = new RuntimeException(causeMsg, innerCause) + + val tMsg = "The Oscars suck!" + val t = new RuntimeException(tMsg, cause) + + val events = throwableToMetadataEvents(mdkey, t) + events.size should be(4) + + val (outerPrefix, outerCausedBys, outerFailureId) = validateExceptionMessage(events.head, workflowId, tMsg) + val (cause1Prefix, cause1CausedBys, cause1FailureId) = validateExceptionMessage(events(1), workflowId, causeMsg) + val (cause2Prefix, cause2CausedBys, cause2FailureId) = validateExceptionMessage(events(2), workflowId, innerCauseMsg) + events(3).key.key should be(s"$cause2Prefix[$cause2FailureId]$cause2CausedBys:causedBy[]") + + outerPrefix should be(pathToFailures) + cause1Prefix should be(pathToFailures) + cause2Prefix should be(pathToFailures) + outerCausedBys should be("") + cause1CausedBys should be(":causedBy[0]") + cause2CausedBys should be(":causedBy[0]:causedBy[0]") + cause1FailureId should be(outerFailureId) + cause2FailureId should be(cause1FailureId) + } + + it should "convert aggregated exceptions into a sequence of failure events" in { + import MetadataService.throwableToMetadataEvents + + val workflowId = WorkflowId(UUID.randomUUID()) + val mdkey = MetadataKey(workflowId, None, "path:to:failures") + + val innerCauseMsg = "Envelope malfunctions" + val innerCause = new RuntimeException(innerCauseMsg) + + val cause1Msg = "Wrong recipients" + val cause1 = new RuntimeException(cause1Msg) + val cause2Msg = "Self congratulation" + val cause2 = new RuntimeException(cause2Msg, innerCause) + val cause3Msg = "The Globes are better anyway" + val cause3 = new RuntimeException(cause3Msg) + + val causeContext = "Compound Entertainment Failure" + val cause = new AggregatedException(causeContext, List(cause1, cause2, cause3)) + + val tMsg = "The Oscars suck!" + val t = new RuntimeException(tMsg, cause) + + val events = throwableToMetadataEvents(mdkey, t) + events.size should be(9) + + // Outer runtime exception: + val (runtimePrefix, runtimeCausedBys, runtimeFailureId) = validateExceptionMessage(events.head, workflowId, tMsg) + runtimePrefix should be(pathToFailures) + runtimeCausedBys should be("") + + // Aggregate exception: + val (aggregatePrefix, aggregateCausedBys, aggregateFailureId) = validateExceptionMessage(events(1), workflowId, causeContext) + aggregatePrefix should be(pathToFailures) + aggregateCausedBys should be(":causedBy[0]") + aggregateFailureId should be(runtimeFailureId) + + // cause1, caused by [] + val (cause1Prefix, cause1CausedBys, cause1FailureId) = validateExceptionMessage(events(2), workflowId, cause1Msg) + cause1Prefix should be(pathToFailures) + cause1CausedBys should be(":causedBy[0]:causedBy[0]") + cause1FailureId should be(runtimeFailureId) + events(3).key.key should be(s"$cause1Prefix[$runtimeFailureId]$cause1CausedBys:causedBy[]") + + // cause2, caused by innerCause caused by [] + val (cause2Prefix, cause2CausedBys, cause2FailureId) = validateExceptionMessage(events(4), workflowId, cause2Msg) + val (innerCausePrefix, innerCauseCausedBys, innerCauseFailureIds) = validateExceptionMessage(events(5), workflowId, innerCauseMsg) + cause2Prefix should be(pathToFailures) + cause2CausedBys should be(":causedBy[0]:causedBy[1]") + cause2FailureId should be(runtimeFailureId) + innerCausePrefix should be(pathToFailures) + innerCauseCausedBys should be(":causedBy[0]:causedBy[1]:causedBy[0]") + innerCauseFailureIds should be(runtimeFailureId) + events(6).key.key should be(s"$innerCausePrefix[$runtimeFailureId]$innerCauseCausedBys:causedBy[]") + + // cause3, caused by [] + val (cause3Prefix, cause3CausedBys, cause3FailureId) = validateExceptionMessage(events(7), workflowId, cause3Msg) + cause3Prefix should be(pathToFailures) + cause3CausedBys should be(":causedBy[0]:causedBy[2]") + cause3FailureId should be(runtimeFailureId) + events(8).key.key should be(s"$cause3Prefix[$cause3FailureId]$cause3CausedBys:causedBy[]") + } + + def validateExceptionMessage(event: MetadataEvent, workflowId: WorkflowId, message: String) = event match { + case MetadataEvent(k, Some(MetadataValue(v, _)), _) => + k.workflowId should be(workflowId) + v should be(message) + + // Return the ID so that we can check for uniqueness later: + k.key match { + case failureMessageRegex(prefix, failureIndex, causedBys) => (prefix, causedBys, failureIndex) + case _ => fail("Unexpected failure key format: " + k.key) + } + case _ => fail("throwableToMetadataEvents generated a metadata event without a metadata value! Bad throwableToMetadataEvents! Very bad!") + } } diff --git a/services/src/test/scala/cromwell/services/metadata/WorkflowQueryParametersSpec.scala b/services/src/test/scala/cromwell/services/metadata/WorkflowQueryParametersSpec.scala index 50ae75fe0..3f503f075 100644 --- a/services/src/test/scala/cromwell/services/metadata/WorkflowQueryParametersSpec.scala +++ b/services/src/test/scala/cromwell/services/metadata/WorkflowQueryParametersSpec.scala @@ -2,10 +2,10 @@ package cromwell.services.metadata import java.time.OffsetDateTime +import cats.data.Validated._ +import cromwell.core.labels.Label import cromwell.services.metadata.WorkflowQueryKey._ -import org.scalatest.{WordSpec, Matchers} - -import scalaz.{Name => _, _} +import org.scalatest.{Matchers, WordSpec} class WorkflowQueryParametersSpec extends WordSpec with Matchers { @@ -17,13 +17,14 @@ class WorkflowQueryParametersSpec extends WordSpec with Matchers { "be accepted if empty" in { val result = WorkflowQueryParameters.runValidation(Seq.empty) result match { - case Success(r) => + case Valid(r) => r.startDate should be('empty) r.endDate should be('empty) r.names should be('empty) r.statuses should be('empty) - case Failure(fs) => - throw new RuntimeException(fs.list.toList.mkString(", ")) + r.labels should be ('empty) + case Invalid(fs) => + throw new RuntimeException(fs.toList.mkString(", ")) } } @@ -33,18 +34,20 @@ class WorkflowQueryParametersSpec extends WordSpec with Matchers { Status.name -> "Succeeded", Name.name -> "my_other_workflow", Status.name -> "Running", + LabelKeyValue.name -> "label-key:label-value", StartDate.name -> StartDateString, EndDate.name -> EndDateString ) val result = WorkflowQueryParameters.runValidation(rawParameters) result match { - case Success(r) => + case Valid(r) => r.startDate.get.toInstant should equal(OffsetDateTime.parse(StartDateString).toInstant) r.endDate.get.toInstant should equal(OffsetDateTime.parse(EndDateString).toInstant) r.names should be(Set("my_workflow", "my_other_workflow")) r.statuses should be(Set("Succeeded", "Running")) - case Failure(fs) => - throw new RuntimeException(fs.list.toList.mkString(", ")) + r.labels should be(Set(Label("label-key", "label-value"))) + case Invalid(fs) => + throw new RuntimeException(fs.toList.mkString(", ")) } } @@ -55,11 +58,11 @@ class WorkflowQueryParametersSpec extends WordSpec with Matchers { ) val result = WorkflowQueryParameters.runValidation(rawParameters) result match { - case Success(r) => + case Valid(r) => throw new RuntimeException(s"Unexpected success: $r") - case Failure(fs) => - fs.list.toList should have size 1 - fs.list.toList.head should include("Unrecognized query keys: Bogosity") + case Invalid(fs) => + fs.toList should have size 1 + fs.toList.head should include("Unrecognized query keys: Bogosity") } } @@ -71,11 +74,11 @@ class WorkflowQueryParametersSpec extends WordSpec with Matchers { ) val result = WorkflowQueryParameters.runValidation(rawParameters) result match { - case Success(r) => + case Valid(r) => throw new RuntimeException(s"Unexpected success: $r") - case Failure(fs) => - fs.list.toList should have size 1 - fs.list.toList.head should include("Specified start date is after specified end date") + case Invalid(fs) => + fs.toList should have size 1 + fs.toList.head should include("Specified start date is after specified end date") } } @@ -88,11 +91,11 @@ class WorkflowQueryParametersSpec extends WordSpec with Matchers { ) val result = WorkflowQueryParameters.runValidation(rawParameters) result match { - case Success(r) => + case Valid(r) => throw new RuntimeException(s"Unexpected success: $r") - case Failure(fs) => - fs.list.toList should have size 1 - fs.list.toList.head should include("Name values do not match allowed workflow naming pattern") + case Invalid(fs) => + fs.toList should have size 1 + fs.toList.head should include("Name values do not match allowed workflow naming pattern") } } @@ -104,11 +107,11 @@ class WorkflowQueryParametersSpec extends WordSpec with Matchers { ) val result = WorkflowQueryParameters.runValidation(rawParameters) result match { - case Success(r) => + case Valid(r) => throw new RuntimeException(s"Unexpected success: $r") - case Failure(fs) => - fs.list.toList should have size 1 - fs.list.toList.head should include("does not parse as a datetime") + case Invalid(fs) => + fs.toList should have size 1 + fs.toList.head should include("does not parse as a datetime") } } @@ -119,11 +122,11 @@ class WorkflowQueryParametersSpec extends WordSpec with Matchers { ) val result = WorkflowQueryParameters.runValidation(rawParameters) result match { - case Success(r) => + case Valid(r) => throw new RuntimeException(s"Unexpected success: $r") - case Failure(fs) => - fs.list.toList should have size 1 - fs.list.toList.head should include("at most one is allowed") + case Invalid(fs) => + fs.toList should have size 1 + fs.toList.head should include("at most one is allowed") } } @@ -135,11 +138,43 @@ class WorkflowQueryParametersSpec extends WordSpec with Matchers { ) val result = WorkflowQueryParameters.runValidation(rawParameters) result match { - case Success(r) => + case Valid(r) => + throw new RuntimeException(s"Unexpected success: $r") + case Invalid(fs) => + fs.toList should have size 1 + fs.toList.head should be("Unrecognized status values: Moseying") + } + } + + "reject labels with invalid format" in { + val badLabelKey = "0-label-key" + val rawParameters = Seq( + LabelKeyValue.name -> "label-key:label-value", + LabelKeyValue.name -> s"$badLabelKey:label-value" + ) + val result = WorkflowQueryParameters.runValidation(rawParameters) + result match { + case Valid(r) => + throw new RuntimeException(s"Unexpected success: $r") + case Invalid(fs) => + fs.toList should have size 1 + fs.toList.head should include(s"Invalid label: `$badLabelKey` did not match the regex ${Label.LabelKeyRegex}") + } + } + + "reject bad label syntax" in { + val badLabelSyntax = "label-keyLabel-value" + val rawParameters = Seq( + LabelKeyValue.name -> "label-key:label-value", + LabelKeyValue.name -> badLabelSyntax + ) + val result = WorkflowQueryParameters.runValidation(rawParameters) + result match { + case Valid(r) => throw new RuntimeException(s"Unexpected success: $r") - case Failure(fs) => - fs.list.toList should have size 1 - fs.list.toList.head should be("Unrecognized status values: Moseying") + case Invalid(fs) => + fs.toList should have size 1 + fs.toList.head should include("Label values do not match allowed pattern label-key:label-value") } } @@ -152,13 +187,13 @@ class WorkflowQueryParametersSpec extends WordSpec with Matchers { ) val result = WorkflowQueryParameters.runValidation(rawParameters) result match { - case Success(r) => + case Valid(r) => throw new RuntimeException(s"Unexpected success: $r") - case Failure(fs) => - fs.list.toList should have size 3 - fs.list.toList find { _ == "Unrecognized status values: Moseying" } getOrElse fail - fs.list.toList find { _ contains "does not parse as a datetime" } getOrElse fail - fs.list.toList find { _ contains "Name values do not match allowed workflow naming pattern" } getOrElse fail + case Invalid(fs) => + fs.toList should have size 3 + fs.toList find { _ == "Unrecognized status values: Moseying" } getOrElse fail + fs.toList find { _ contains "does not parse as a datetime" } getOrElse fail + fs.toList find { _ contains "Name values do not match allowed workflow naming pattern" } getOrElse fail } } } diff --git a/services/src/test/scala/cromwell/services/metadata/impl/MetadataDatabaseAccessSpec.scala b/services/src/test/scala/cromwell/services/metadata/impl/MetadataDatabaseAccessSpec.scala index f9f555abf..85e077652 100644 --- a/services/src/test/scala/cromwell/services/metadata/impl/MetadataDatabaseAccessSpec.scala +++ b/services/src/test/scala/cromwell/services/metadata/impl/MetadataDatabaseAccessSpec.scala @@ -5,6 +5,7 @@ import java.time.OffsetDateTime import com.typesafe.config.ConfigFactory import cromwell.core.Tags.DbmsTest import cromwell.core._ +import cromwell.core.labels.Label import cromwell.database.slick.SlickDatabase import cromwell.services.ServicesStore import cromwell.services.metadata._ @@ -50,14 +51,18 @@ class MetadataDatabaseAccessSpec extends FlatSpec with Matchers with ScalaFuture dataAccess.addMetadataEvents(events) } - def baseWorkflowMetadata(name: String): Future[WorkflowId] = { + def baseWorkflowMetadata(name: String, labels: Set[Label] = Set.empty): Future[WorkflowId] = { val workflowId = WorkflowId.randomId() + val defaultLabels = Set(Label("cromwell-workflow-name", name)) + val labelMetadata = (labels ++ defaultLabels).map(label => (s"${WorkflowMetadataKeys.Labels}:${label.key}", label.value)).toArray + val workflowKey = MetadataKey(workflowId, jobKey = None, key = null) def keyAndValue(name: String) = Array( (WorkflowMetadataKeys.StartTime, OffsetDateTime.now.toString), (WorkflowMetadataKeys.Status, WorkflowSubmitted.toString), (WorkflowMetadataKeys.Name, name) - ) + ) ++ labelMetadata + publishMetadataEvents(workflowKey, keyAndValue(name)).map(_ => workflowId) } @@ -66,27 +71,57 @@ class MetadataDatabaseAccessSpec extends FlatSpec with Matchers with ScalaFuture workflow1Id <- baseWorkflowMetadata(Workflow1Name) //get metadata when page and pagesize are specified _ <- dataAccess.queryWorkflowSummaries(WorkflowQueryParameters(Seq( - WorkflowQueryKey.Page.name -> "1", WorkflowQueryKey.PageSize.name -> "50"))) map { case (response, meta) => + WorkflowQueryKey.Page.name -> "1", WorkflowQueryKey.PageSize.name -> "50"))) map { case (response @ _, meta) => meta match { - case Some(metadata) => + case Some(_) => case None => fail("Should have metadata when page and pagesize are specified.") } } //don't get metadata when page and pagesize are not specified _ <- dataAccess.queryWorkflowSummaries( - WorkflowQueryParameters(Seq())) map { case(response, meta) => + WorkflowQueryParameters(Seq())) map { case(response @ _, meta) => meta match { - case Some(metadata) => fail("Should not have metadata when page and pagesize are not specified") + case Some(_) => fail("Should not have metadata when page and pagesize are not specified") case None => } } } yield()).futureValue } + + it should "sort metadata events by timestamp from older to newer" taggedAs DbmsTest in { + def unorderedEvents(id: WorkflowId): Future[Vector[MetadataEvent]] = { + val workflowKey = MetadataKey(id, jobKey = None, key = null) + val now = OffsetDateTime.now() + val yesterday = now.minusDays(1) + val tomorrow = now.plusDays(1) + + val yesterdayEvent = MetadataEvent(workflowKey.copy(key = WorkflowMetadataKeys.WorkflowRoot), Option(MetadataValue("A")), yesterday) + val nowEvent = MetadataEvent(workflowKey.copy(key = WorkflowMetadataKeys.WorkflowRoot), Option(MetadataValue("B")), now) + val tomorrowEvent = MetadataEvent(workflowKey.copy(key = WorkflowMetadataKeys.WorkflowRoot), Option(MetadataValue("C")), tomorrow) + + val events = Vector(tomorrowEvent, yesterdayEvent, nowEvent) + + val expectedEvents = Vector(yesterdayEvent, nowEvent, tomorrowEvent) + + dataAccess.addMetadataEvents(events) map { _ => expectedEvents } + } + + (for { + workflow1Id <- baseWorkflowMetadata(Workflow1Name) + expected <- unorderedEvents(workflow1Id) + response <- dataAccess.queryMetadataEvents(MetadataQuery(workflow1Id, None, Option(WorkflowMetadataKeys.WorkflowRoot), None, None, expandSubWorkflows = false)) + _ = response shouldBe expected + } yield()).futureValue + } it should "create and query a workflow" taggedAs DbmsTest in { val randomIds = Seq.fill(10)(WorkflowId.randomId().toString) + val testLabel1 = Label("testing-key-1", "testing-value-1") + val testLabel2 = Label("testing-key-2", "testing-value-2") + val testLabel3 = Label("testing-key-3", "testing-value-3") + def succeededWorkflowMetadata(id: WorkflowId): Future[Unit] = { val workflowKey = MetadataKey(id, jobKey = None, key = null) val keyAndValue = Array( @@ -98,11 +133,11 @@ class MetadataDatabaseAccessSpec extends FlatSpec with Matchers with ScalaFuture } (for { - workflow1Id <- baseWorkflowMetadata(Workflow1Name) + workflow1Id <- baseWorkflowMetadata(Workflow1Name, Set(testLabel1, testLabel2)) _ <- succeededWorkflowMetadata(workflow1Id) // Put a bit of space between the two workflows _ = Thread.sleep(50) - workflow2Id <- baseWorkflowMetadata(Workflow2Name) + workflow2Id <- baseWorkflowMetadata(Workflow2Name, Set(testLabel2, testLabel3)) // refresh the metadata _ <- dataAccess.refreshWorkflowMetadataSummaries() map { max => @@ -111,7 +146,7 @@ class MetadataDatabaseAccessSpec extends FlatSpec with Matchers with ScalaFuture // Query with no filters (workflowQueryResult, workflowQueryResult2) <- - dataAccess.queryWorkflowSummaries(WorkflowQueryParameters(Seq.empty)) map { case (response, meta) => + dataAccess.queryWorkflowSummaries(WorkflowQueryParameters(Seq.empty)) map { case (response, meta @ _) => val result = response.results find { r => r.name.contains(Workflow1Name) && r.end.isDefined } getOrElse fail(s"$Workflow1Name with an end not found in ${response.results}") val result2 = response.results find { @@ -120,14 +155,14 @@ class MetadataDatabaseAccessSpec extends FlatSpec with Matchers with ScalaFuture (result, result2) } // Filter by name - _ <- dataAccess.queryWorkflowSummaries(WorkflowQueryParameters(Seq(WorkflowQueryKey.Name.name -> Workflow1Name))) map { case (response, meta) => + _ <- dataAccess.queryWorkflowSummaries(WorkflowQueryParameters(Seq(WorkflowQueryKey.Name.name -> Workflow1Name))) map { case (response, meta @ _) => val resultsByName = response.results groupBy { _.name } resultsByName.keys.toSet.flatten should equal(Set(Workflow1Name)) } // Filter by multiple names - _ <- dataAccess.queryWorkflowSummaries(WorkflowQueryParameters(Seq(WorkflowQueryKey.Name.name -> Workflow1Name, WorkflowQueryKey.Name.name -> Workflow2Name))) map { case (response, meta) => + _ <- dataAccess.queryWorkflowSummaries(WorkflowQueryParameters(Seq(WorkflowQueryKey.Name.name -> Workflow1Name, WorkflowQueryKey.Name.name -> Workflow2Name))) map { case (response, meta @ _) => val resultsByName = response.results groupBy { _.name } @@ -135,7 +170,7 @@ class MetadataDatabaseAccessSpec extends FlatSpec with Matchers with ScalaFuture } // Filter by workflow id _ <- dataAccess.queryWorkflowSummaries(WorkflowQueryParameters( - Seq(WorkflowQueryKey.Id.name -> workflow1Id.toString))) map { case (response, meta) => + Seq(WorkflowQueryKey.Id.name -> workflow1Id.toString))) map { case (response, meta @ _) => val resultsById = response.results groupBy { _.name } @@ -143,7 +178,7 @@ class MetadataDatabaseAccessSpec extends FlatSpec with Matchers with ScalaFuture } // Filter by multiple workflow ids _ <- dataAccess.queryWorkflowSummaries(WorkflowQueryParameters( - Seq(workflow1Id, workflow2Id).map(id => WorkflowQueryKey.Id.name -> id.toString))) map { case (response, meta) => + Seq(workflow1Id, workflow2Id).map(id => WorkflowQueryKey.Id.name -> id.toString))) map { case (response, meta @ _) => val resultsById = response.results groupBy { _.name } @@ -151,25 +186,36 @@ class MetadataDatabaseAccessSpec extends FlatSpec with Matchers with ScalaFuture } // Filter by workflow id within random Ids _ <- dataAccess.queryWorkflowSummaries(WorkflowQueryParameters( - (randomIds :+ workflow1Id).map(id => WorkflowQueryKey.Id.name -> id.toString))) map { case (response, meta) => + (randomIds :+ workflow1Id).map(id => WorkflowQueryKey.Id.name -> id.toString))) map { case (response, meta @ _) => val resultsById = response.results groupBy { _.name } resultsById.keys.toSet.flatten should equal(Set(Workflow1Name)) } // Filter by status - _ <- dataAccess.queryWorkflowSummaries(WorkflowQueryParameters(Seq(WorkflowQueryKey.Status.name -> "Submitted"))) map { case (response, meta) => + _ <- dataAccess.queryWorkflowSummaries(WorkflowQueryParameters(Seq(WorkflowQueryKey.Status.name -> "Submitted"))) map { case (response, meta @ _) => val resultsByStatus = response.results groupBy (_.status) resultsByStatus.keys.toSet.flatten should equal(Set("Submitted")) } // Filter by multiple statuses - _ <- dataAccess.queryWorkflowSummaries(WorkflowQueryParameters(Seq(WorkflowQueryKey.Status.name -> "Submitted", WorkflowQueryKey.Status.name -> "Succeeded"))) map { case (response, meta) => + _ <- dataAccess.queryWorkflowSummaries(WorkflowQueryParameters(Seq(WorkflowQueryKey.Status.name -> "Submitted", WorkflowQueryKey.Status.name -> "Succeeded"))) map { case (response, meta @ _) => val resultsByStatus = response.results groupBy (_.status) resultsByStatus.keys.toSet.flatten should equal(Set("Submitted", "Succeeded")) } + // Filter by label + _ <- dataAccess.queryWorkflowSummaries(WorkflowQueryParameters(Seq(WorkflowQueryKey.LabelKeyValue.name -> s"${testLabel2.key}:${testLabel2.value}"))) map { case (response, meta @ _) => + val resultByName = response.results groupBy (_.name) + resultByName.keys.toSet.flatten should equal(Set(Workflow1Name, Workflow2Name)) + } + // Filter by multiple labels + _ <- dataAccess.queryWorkflowSummaries(WorkflowQueryParameters( + Seq(testLabel2, testLabel3).map(label => WorkflowQueryKey.LabelKeyValue.name -> s"${label.key}:${label.value}"))) map { case (response, meta @ _) => + val resultByName = response.results groupBy (_.name) + resultByName.keys.toSet.flatten should equal(Set(Workflow2Name)) + } // Filter by start date _ <- dataAccess.queryWorkflowSummaries(WorkflowQueryParameters(Seq( - WorkflowQueryKey.StartDate.name -> workflowQueryResult2.start.get.toString))) map { case (response, meta) => + WorkflowQueryKey.StartDate.name -> workflowQueryResult2.start.get.toString))) map { case (response, meta @ _) => response.results partition { r => r.start.isDefined && r.start.get.compareTo(workflowQueryResult.start.get) >= 0 } match { case (y, n) if y.nonEmpty && n.isEmpty => // good case (y, n) => fail(s"Found ${y.size} later workflows and ${n.size} earlier") @@ -177,7 +223,7 @@ class MetadataDatabaseAccessSpec extends FlatSpec with Matchers with ScalaFuture } // Filter by end date _ <- dataAccess.queryWorkflowSummaries(WorkflowQueryParameters(Seq( - WorkflowQueryKey.EndDate.name -> workflowQueryResult.end.get.toString))) map { case (response, meta) => + WorkflowQueryKey.EndDate.name -> workflowQueryResult.end.get.toString))) map { case (response, meta @ _) => response.results partition { r => r.end.isDefined && r.end.get.compareTo(workflowQueryResult.end.get) <= 0 } match { case (y, n) if y.nonEmpty && n.isEmpty => // good case (y, n) => fail(s"Found ${y.size} earlier workflows and ${n.size} later") diff --git a/services/src/test/scala/cromwell/services/metadata/impl/MetadataServiceActorSpec.scala b/services/src/test/scala/cromwell/services/metadata/impl/MetadataServiceActorSpec.scala index 72e6b370a..a14dab597 100644 --- a/services/src/test/scala/cromwell/services/metadata/impl/MetadataServiceActorSpec.scala +++ b/services/src/test/scala/cromwell/services/metadata/impl/MetadataServiceActorSpec.scala @@ -8,69 +8,77 @@ import cromwell.core.WorkflowId import cromwell.services.ServicesSpec import cromwell.services.metadata.MetadataService._ import cromwell.services.metadata._ +import org.scalatest.concurrent.Eventually._ +import org.scalatest.concurrent.PatienceConfiguration.{Interval, Timeout} -class MetadataServiceActorSpec extends ServicesSpec("Metadata") { +import scala.concurrent.duration._ - val config = ConfigFactory.empty() +class MetadataServiceActorSpec extends ServicesSpec("Metadata") { + import MetadataServiceActorSpec.Config + val config = ConfigFactory.parseString(Config) val actor = system.actorOf(MetadataServiceActor.props(config, config)) - val workflowId = WorkflowId.randomId() + val workflowId = WorkflowId.randomId() - /* - Simple store / retrieve - */ + /* + Simple store / retrieve + */ - val key1 = MetadataKey(workflowId, None, "key1") - val key2 = MetadataKey(workflowId, None, "key2") - val supJob = MetadataJobKey("sup.sup", None, 1) - val key3 = MetadataKey(workflowId, Option(supJob), "dog") - val moment = OffsetDateTime.now + val key1 = MetadataKey(workflowId, None, "key1") + val key2 = MetadataKey(workflowId, None, "key2") + val supJob = MetadataJobKey("sup.sup", None, 1) + val key3 = MetadataKey(workflowId, Option(supJob), "dog") + val moment = OffsetDateTime.now - val event1_1 = MetadataEvent(key1, Option(MetadataValue("value1")), moment) - val event1_2 = MetadataEvent(key1, Option(MetadataValue("value2")), moment) - val event2_1 = MetadataEvent(key2, Option(MetadataValue("value1")), moment) - val event3_1 = MetadataEvent(key3, Option(MetadataValue("value3")), moment) - val event3_2 = MetadataEvent(key3, None, moment) + val event1_1 = MetadataEvent(key1, Option(MetadataValue("value1")), moment) + val event1_2 = MetadataEvent(key1, Option(MetadataValue("value2")), moment) + val event2_1 = MetadataEvent(key2, Option(MetadataValue("value1")), moment) + val event3_1 = MetadataEvent(key3, Option(MetadataValue("value3")), moment) + val event3_2 = MetadataEvent(key3, None, moment) "MetadataServiceActor" should { - "Store values for different keys" in { + "Store values for different keys and then retrieve those values" in { val putAction1 = PutMetadataAction(event1_1) val putAction2 = PutMetadataAction(event1_2) val putAction3 = PutMetadataAction(event2_1, event3_1, event3_2) - (for { - response1 <- (actor ? putAction1).mapTo[MetadataServiceResponse] - response2 <- (actor ? putAction2).mapTo[MetadataServiceResponse] - response3 <- (actor ? putAction3).mapTo[MetadataServiceResponse] - _ = response1 shouldBe MetadataPutAcknowledgement(putAction1) - _ = response2 shouldBe MetadataPutAcknowledgement(putAction2) - _ = response3 shouldBe MetadataPutAcknowledgement(putAction3) - } yield ()).futureValue - } - "Retrieve the correct values for different keys" in { + actor ! putAction1 + actor ! putAction2 + actor ! putAction3 + val query1 = MetadataQuery.forKey(key1) val query2 = MetadataQuery.forKey(key2) val query3 = MetadataQuery.forKey(key3) val query4 = MetadataQuery.forWorkflow(workflowId) val query5 = MetadataQuery.forJob(workflowId, supJob) - (for { - response1 <- (actor ? GetMetadataQueryAction(query1)).mapTo[MetadataServiceResponse] - _ = response1 shouldBe MetadataLookupResponse(query1, Seq(event1_1, event1_2)) + eventually(Timeout(10.seconds), Interval(2.seconds)) { + (for { + response1 <- (actor ? GetMetadataQueryAction(query1)).mapTo[MetadataServiceResponse] + _ = response1 shouldBe MetadataLookupResponse(query1, Seq(event1_1, event1_2)) - response2 <- (actor ? GetMetadataQueryAction(query2)).mapTo[MetadataServiceResponse] - _ = response2 shouldBe MetadataLookupResponse(query2, Seq(event2_1)) + response2 <- (actor ? GetMetadataQueryAction(query2)).mapTo[MetadataServiceResponse] + _ = response2 shouldBe MetadataLookupResponse(query2, Seq(event2_1)) - response3 <- (actor ? GetMetadataQueryAction(query3)).mapTo[MetadataServiceResponse] - _ = response3 shouldBe MetadataLookupResponse(query3, Seq(event3_1, event3_2)) + response3 <- (actor ? GetMetadataQueryAction(query3)).mapTo[MetadataServiceResponse] + _ = response3 shouldBe MetadataLookupResponse(query3, Seq(event3_1, event3_2)) - response4 <- (actor ? GetMetadataQueryAction(query4)).mapTo[MetadataServiceResponse] - _ = response4 shouldBe MetadataLookupResponse(query4, Seq(event1_1, event1_2, event2_1, event3_1, event3_2)) + response4 <- (actor ? GetMetadataQueryAction(query4)).mapTo[MetadataServiceResponse] + _ = response4 shouldBe MetadataLookupResponse(query4, Seq(event1_1, event1_2, event2_1, event3_1, event3_2)) - response5 <- (actor ? GetMetadataQueryAction(query5)).mapTo[MetadataServiceResponse] - _ = response5 shouldBe MetadataLookupResponse(query5, Seq(event3_1, event3_2)) + response5 <- (actor ? GetMetadataQueryAction(query5)).mapTo[MetadataServiceResponse] + _ = response5 shouldBe MetadataLookupResponse(query5, Seq(event3_1, event3_2)) - } yield ()).futureValue + } yield ()).futureValue + } } } } + +object MetadataServiceActorSpec { + val Config = + """ + |services.MetadataService.db-batch-size = 3 + |services.MetadataService.db-flush-rate = 100 millis + """.stripMargin +} diff --git a/services/src/test/scala/cromwell/services/metadata/impl/MetadataValueSpec.scala b/services/src/test/scala/cromwell/services/metadata/impl/MetadataValueSpec.scala new file mode 100644 index 000000000..316ad5578 --- /dev/null +++ b/services/src/test/scala/cromwell/services/metadata/impl/MetadataValueSpec.scala @@ -0,0 +1,11 @@ +package cromwell.services.metadata.impl + +import cromwell.services.metadata.MetadataValue +import org.scalatest.{FlatSpec, Matchers} + +class MetadataValueSpec extends FlatSpec with Matchers { + "MetadataValue" should "not NPE with a null value" in { + val metadataValue = MetadataValue(null) + metadataValue.value shouldBe "" + } +} diff --git a/services/src/test/scala/cromwell/services/metadata/impl/WriteMetadataActorSpec.scala b/services/src/test/scala/cromwell/services/metadata/impl/WriteMetadataActorSpec.scala new file mode 100644 index 000000000..3b16acd72 --- /dev/null +++ b/services/src/test/scala/cromwell/services/metadata/impl/WriteMetadataActorSpec.scala @@ -0,0 +1,102 @@ +package cromwell.services.metadata.impl + +import java.time.OffsetDateTime + +import akka.testkit.TestFSMRef +import cats.data.NonEmptyVector +import cromwell.core.WorkflowId +import cromwell.core.actor.BatchingDbWriter +import cromwell.core.actor.BatchingDbWriter._ +import cromwell.services.ServicesSpec +import cromwell.services.metadata.MetadataService.PutMetadataAction +import cromwell.services.metadata.{MetadataEvent, MetadataKey, MetadataValue} +import org.scalatest.BeforeAndAfter +import org.scalatest.concurrent.Eventually + +import scala.concurrent.duration._ +import scala.concurrent.{ExecutionContext, Future, Promise} +import scala.util.Success + +class WriteMetadataActorSpec extends ServicesSpec("Metadata") with Eventually with BeforeAndAfter { + import WriteMetadataActorSpec.Action + + var actor: TestFSMRef[BatchingDbWriterState, BatchingDbWriter.BatchingDbWriterData, DelayingWriteMetadataActor] = _ + + before { + actor = TestFSMRef(new DelayingWriteMetadataActor()) + } + + "WriteMetadataActor" should { + "start with no events and waiting to write" in { + actor.stateName shouldBe WaitingToWrite + actor.stateData shouldBe NoData + } + + "Have one event and be waiting after one event is sent" in { + actor ! Action + eventually { + actor.stateName shouldBe WaitingToWrite + actor.stateData shouldBe HasData(NonEmptyVector.fromVectorUnsafe(Action.events.toVector)) + } + } + + "Have one event after batch size + 1 is reached" in { + 1 to WriteMetadataActorSpec.BatchRate foreach { _ => actor ! Action } + actor.stateName shouldBe WaitingToWrite + + eventually { + actor.stateData match { + case HasData(e) => e.toVector.size shouldBe WriteMetadataActorSpec.BatchRate + case _ => fail("Expecting the actor to have events queued up") + } + } + actor ! Action + eventually { + actor.stateName shouldBe WritingToDb + actor.underlyingActor.writeToDbInProgress shouldBe true + actor.stateData shouldBe NoData + } + actor ! Action + eventually { + actor.stateName shouldBe WritingToDb + actor.stateData shouldBe HasData(NonEmptyVector.fromVectorUnsafe(Action.events.toVector)) + } + actor.underlyingActor.completeWritePromise() + eventually { + actor.stateName shouldBe WaitingToWrite + actor.stateData shouldBe HasData(NonEmptyVector.fromVectorUnsafe(Action.events.toVector)) + } + } + } +} + +object WriteMetadataActorSpec { + val Event = MetadataEvent(MetadataKey(WorkflowId.randomId(), None, "key"), Option(MetadataValue("value")), OffsetDateTime.now) + val Action = PutMetadataAction(Event) + + val BatchRate: Int = 10 + val FunctionallyForever: FiniteDuration = 100.days +} + +// A WMA that won't (hopefully!) perform a time based flush during this test +final class DelayingWriteMetadataActor extends WriteMetadataActor(WriteMetadataActorSpec.BatchRate, WriteMetadataActorSpec.FunctionallyForever) { + + var writeToDbInProgress: Boolean = false + var writeToDbCompletionPromise: Option[Promise[Unit]] = None + + override def addMetadataEvents(metadataEvents: Iterable[MetadataEvent])(implicit ec: ExecutionContext): Future[Unit] = { + writeToDbCompletionPromise = Option(Promise[Unit]()) + writeToDbInProgress = true + writeToDbCompletionPromise.get.future + } + + def completeWritePromise(): Unit = { + writeToDbCompletionPromise match { + case Some(promise) => + promise.complete(Success(())) + writeToDbInProgress = false + writeToDbCompletionPromise = None + case None => throw new Exception("BAD TEST! Cannot complete the actor's write future if the actor hasn't requested it yet!") + } + } +} diff --git a/src/bin/travis/afterSuccess.sh b/src/bin/travis/afterSuccess.sh new file mode 100755 index 000000000..09e5be1ce --- /dev/null +++ b/src/bin/travis/afterSuccess.sh @@ -0,0 +1,26 @@ +#!/usr/bin/env bash + +set -e + +echo "BUILD_TYPE='$BUILD_TYPE'" +echo "TRAVIS_BRANCH='$TRAVIS_BRANCH'" +echo "TRAVIS_PULL_REQUEST='$TRAVIS_PULL_REQUEST'" + +if [ "$BUILD_TYPE" == "sbt" ] && [ "$TRAVIS_PULL_REQUEST" == "false" ]; then + + if [ "$TRAVIS_BRANCH" == "develop" ]; then + # Publish images for both the "cromwell develop branch" and the "cromwell dev environment". + docker login -u="$DOCKER_USERNAME" -p="$DOCKER_PASSWORD" + sbt \ + 'set test in Test := {}' \ + 'set imageNames in docker := Seq("develop", "dev").map(tag => ImageName(s"broadinstitute/cromwell:$tag"))' \ + publish \ + dockerBuildAndPush + + elif [[ "$TRAVIS_BRANCH" =~ ^[0-9\.]+_hotfix$ ]]; then + docker login -u="$DOCKER_USERNAME" -p="$DOCKER_PASSWORD" + sbt 'set test in Test := {}' -Dproject.isSnapshot=false dockerBuildAndPush + + fi + +fi diff --git a/src/bin/travis/publishSnapshot.sh b/src/bin/travis/publishSnapshot.sh deleted file mode 100755 index 9c18a97fd..000000000 --- a/src/bin/travis/publishSnapshot.sh +++ /dev/null @@ -1,11 +0,0 @@ -#!/usr/bin/env bash - -set -e - -echo "BUILD_TYPE='$BUILD_TYPE'" -echo "TRAVIS_BRANCH='$TRAVIS_BRANCH'" -echo "TRAVIS_PULL_REQUEST='$TRAVIS_PULL_REQUEST'" - -if [ "$BUILD_TYPE" == "sbt" ] && [ "$TRAVIS_BRANCH" == "develop" ] && [ "$TRAVIS_PULL_REQUEST" == "false" ]; then - sbt 'set test in Test := {}' publish -fi diff --git a/src/bin/travis/resources/centaur.inputs b/src/bin/travis/resources/centaur.inputs index c378c0f71..67822dbd2 100644 --- a/src/bin/travis/resources/centaur.inputs +++ b/src/bin/travis/resources/centaur.inputs @@ -1,7 +1,8 @@ { - "centaur.centaur.cromwell_jar":"gs://cloud-cromwell-dev/travis-centaur/CROMWELL_JAR", - "centaur.centaur.cromwell_branch":"BRANCH", - "centaur.centaur.conf":"gs://cloud-cromwell-dev/travis-centaur/multiBackend.conf", - "centaur.centaur.pem":"gs://cloud-cromwell-dev/travis-centaur/cromwell-account.pem", - "centaur.centaur.token": "gs://cloud-cromwell-dev/travis-centaur/token.txt" + "centaur_workflow.centaur.cromwell_jar":"gs://cloud-cromwell-dev/travis-centaur/CROMWELL_JAR", + "centaur_workflow.centaur.centaur_branch":"CENTAUR_BRANCH", + "centaur_workflow.centaur.conf":"gs://cloud-cromwell-dev/travis-centaur/multiBackend.conf", + "centaur_workflow.centaur.pem":"gs://cloud-cromwell-dev/travis-centaur/cromwell-account.pem", + "centaur_workflow.centaur.integration_tests_dir": "INTEGRATION_TESTS_DIR", + "centaur_workflow.centaur.token": "gs://cloud-cromwell-dev/travis-centaur/token.txt" } diff --git a/src/bin/travis/resources/centaur.wdl b/src/bin/travis/resources/centaur.wdl index 62e13c98c..c385d23b6 100644 --- a/src/bin/travis/resources/centaur.wdl +++ b/src/bin/travis/resources/centaur.wdl @@ -1,17 +1,39 @@ task centaur { - String cromwell_branch + String centaur_branch File conf File pem File cromwell_jar File token - String secret = read_string(token) + String? integration_tests_dir command<<< + # start mysql server + mysqld & + + # give time to server to start + git clone https://github.com/vishnubob/wait-for-it.git + cd wait-for-it + ./wait-for-it.sh -t 10 localhost:3306 + if (($? != 0)); then + echo "Timed out waiting for mysql server. Exiting." + exit 1 + fi + + cd .. + + # setup mysql + mysql -u root -e "SET GLOBAL sql_mode = 'STRICT_ALL_TABLES';" + mysql -u root -e "CREATE DATABASE IF NOT EXISTS cromwell_test;" + mysql -u root -e "CREATE USER 'travis'@'localhost' IDENTIFIED BY '';" + mysql -u root -e "GRANT ALL PRIVILEGES ON cromwell_test . * TO 'travis'@'localhost';" + mkdir -p /cromwell_root/tmp/ivy2 export SBT_OPTS=-Dsbt.ivy.home=/cromwell_root/tmp/.ivy2 git clone https://github.com/broadinstitute/centaur.git cd centaur - ./test_cromwell.sh -j${cromwell_jar} -c${conf} -r/cromwell_root -t ${secret} -elocaldockertest + git checkout ${centaur_branch} + cd .. + centaur/test_cromwell.sh -j${cromwell_jar} -c${conf} -r/cromwell_root -t${token} -elocaldockertest -p100 ${ "-i" + integration_tests_dir} >>> output { @@ -20,12 +42,13 @@ task centaur { } runtime { - docker: "geoffjentry/centaur-cromwell:latest" + docker: "us.gcr.io/broad-dsde-cromwell-dev/centaur:latest" cpu: "8" zones: "us-central1-b" failOnStderr: false } } -workflow centaur { +workflow centaur_workflow { call centaur } + diff --git a/src/bin/travis/resources/cromwell-service-account.json.ctmpl b/src/bin/travis/resources/cromwell-service-account.json.ctmpl new file mode 100644 index 000000000..2cbdcea92 --- /dev/null +++ b/src/bin/travis/resources/cromwell-service-account.json.ctmpl @@ -0,0 +1,5 @@ +{{with $cromwellServiceAccount := vault (printf "secret/dsde/cromwell/common/cromwell-service-account.json")}} + +{{$cromwellServiceAccount.Data | toJSONPretty}} + +{{end}} diff --git a/src/bin/travis/resources/funnel.conf b/src/bin/travis/resources/funnel.conf new file mode 100644 index 000000000..08dbab623 --- /dev/null +++ b/src/bin/travis/resources/funnel.conf @@ -0,0 +1,10 @@ +HttpPort: 9000 +Storage: + - Local: + AllowedDirs: + - /home/ + - /cromwell-executions + - /tmp/ +DBPath: /tmp/tes_task.db +Scheduler: local +LogLevel: info diff --git a/src/bin/travis/resources/jesConf.tar.enc b/src/bin/travis/resources/jesConf.tar.enc deleted file mode 100644 index 3aa3b4af0..000000000 Binary files a/src/bin/travis/resources/jesConf.tar.enc and /dev/null differ diff --git a/src/bin/travis/resources/jes_centaur.conf.ctmpl b/src/bin/travis/resources/jes_centaur.conf.ctmpl new file mode 100644 index 000000000..0324162a7 --- /dev/null +++ b/src/bin/travis/resources/jes_centaur.conf.ctmpl @@ -0,0 +1,71 @@ +{{with $refreshToken := vault (printf "secret/dsde/cromwell/common/refresh-token")}} + +include "application.conf" + +backend { + default = "Jes" + enabled = ["Jes", "Jes-Refresh"] + providers { + Local.config.filesystems.gcs.auth = "service_account" + Jes { + actor-factory = "cromwell.backend.impl.jes.JesBackendLifecycleActorFactory" + config { + project = "broad-dsde-cromwell-dev" + root = "gs://cloud-cromwell-dev/cromwell_execution/travis" + maximum-polling-interval = 600 + genomics { + auth = "service_account" + endpoint-url = "https://genomics.googleapis.com/" + } + filesystems { + gcs.auth = "service_account" + } + } + } + Jes-Refresh { + actor-factory = "cromwell.backend.impl.jes.JesBackendLifecycleActorFactory" + config { + project = "broad-dsde-cromwell-dev" + root = "gs://centaur-refresh-private/cromwell_execution/travis" + maximum-polling-interval = 600 + genomics { + auth = "service_account" + endpoint-url = "https://genomics.googleapis.com/" + } + filesystems { + gcs.auth = "refresh_token" + } + } + } + } +} + +google { + application-name = "cromwell" + auths = [ + { + name = "service_account" + scheme = "service_account" + json-file = "cromwell-service-account.json" + } + { + name = "refresh_token" + scheme = "refresh_token" + client-id = "{{$refreshToken.Data.client_id}}" + client-secret = "{{$refreshToken.Data.client_secret}}" + } + ] +} + +call-caching { + enabled = true + lookup-docker-hash = false +} + +engine { + filesystems { + gcs.auth = "service_account" + } +} + +{{end}} diff --git a/src/bin/travis/resources/local_centaur.conf b/src/bin/travis/resources/local_centaur.conf new file mode 100644 index 000000000..86dcce84b --- /dev/null +++ b/src/bin/travis/resources/local_centaur.conf @@ -0,0 +1,70 @@ +include "application.conf" + +akka { + loggers = ["akka.event.slf4j.Slf4jLogger"] + logging-filter = "akka.event.slf4j.Slf4jLoggingFilter" +} + +spray.can { + server { + request-timeout = 40s + } + client { + request-timeout = 40s + connecting-timeout = 40s + } +} + +call-caching { + enabled = true +} + +system.graceful-server-shutdown = true + +backend.providers { + LocalNoDocker { + actor-factory = "cromwell.backend.impl.sfs.config.ConfigBackendLifecycleActorFactory" + config { + run-in-background = true + runtime-attributes = "" + submit = "/bin/bash ${script}" + root: "cromwell-executions" + + filesystems { + local { + localization: [ + "soft-link", "hard-link", "copy" + ] + + caching { + duplication-strategy: [ + "soft-link" + ] + + # Possible values: file, path + # "file" will compute an md5 hash of the file content. + # "path" will compute an md5 hash of the file path. This strategy will only be effective if the duplication-strategy (above) is set to "soft-link", + # in order to allow for the original file path to be hashed. + hashing-strategy: "path" + + # When true, will check if a sibling file with the same name and the .md5 extension exists, and if it does, use the content of this file as a hash. + # If false or the md5 does not exist, will proceed with the above-defined hashing strategy. + check-sibling-md5: false + } + } + } + } + } +} + +database { + db.url = "jdbc:mysql://localhost/cromwell_test?rewriteBatchedStatements=true" + db.user = "travis" + db.password = "" + db.driver = "com.mysql.jdbc.Driver" + profile = "slick.jdbc.MySQLProfile$" +} + +backend.providers.Local.config.filesystems.local.caching.duplication-strategy = ["copy"] +backend.providers.Local.config.filesystems.local.localization = ["soft-link", "copy"] +backend.providers.Local.config.concurrent-job-limit = 20 diff --git a/src/bin/travis/resources/tes_centaur.conf b/src/bin/travis/resources/tes_centaur.conf new file mode 100644 index 000000000..fe7420017 --- /dev/null +++ b/src/bin/travis/resources/tes_centaur.conf @@ -0,0 +1,44 @@ +include "application.conf" + +akka { + loggers = ["akka.event.slf4j.Slf4jLogger"] + logging-filter = "akka.event.slf4j.Slf4jLoggingFilter" +} + +spray.can { + server { + request-timeout = 40s + } + client { + request-timeout = 40s + connecting-timeout = 40s + } +} + +call-caching { + enabled = true +} + +system.graceful-server-shutdown = true + +backend { + default = "TES" + providers { + TES { + actor-factory = "cromwell.backend.impl.tes.TesBackendLifecycleActorFactory" + config { + root = "cromwell-executions" + dockerRoot = "/cromwell-executions" + endpoint = "http://127.0.0.1:9000/v1/tasks" + } + } + } +} + +database { + db.url = "jdbc:mysql://localhost/cromwell_test?rewriteBatchedStatements=true" + db.user = "travis" + db.password = "" + db.driver = "com.mysql.jdbc.Driver" + profile = "slick.jdbc.MySQLProfile$" +} diff --git a/src/bin/travis/test.sh b/src/bin/travis/test.sh index 8730db3af..0cabb3e50 100755 --- a/src/bin/travis/test.sh +++ b/src/bin/travis/test.sh @@ -4,14 +4,48 @@ set -e SCRIPT_DIR=src/bin/travis -# BUILD_TYPE is coming in from the Travis build matrix -if [ "$BUILD_TYPE" = "centaurJes" ]; then - "${SCRIPT_DIR}"/testCentaurJes.sh -elif [ "$BUILD_TYPE" = "centaurLocal" ]; then - "${SCRIPT_DIR}"/testCentaurLocal.sh -elif [ "$BUILD_TYPE" = "sbt" ]; then - "${SCRIPT_DIR}"/testSbt.sh -else - echo "Unknown BUILD_TYPE: '$BUILD_TYPE'" - exit 1 -fi +# $TRAVIS_EVENT_TYPE will be cron if this build was initiated by a cron job +case "$TRAVIS_EVENT_TYPE" in + push|pull_request|api) + # BUILD_TYPE is coming in from the Travis build matrix + case "$BUILD_TYPE" in + centaurJes) + "${SCRIPT_DIR}"/testCentaurJes.sh + ;; + centaurTes) + "${SCRIPT_DIR}"/testCentaurTes.sh + ;; + centaurLocal) + "${SCRIPT_DIR}"/testCentaurLocal.sh + ;; + sbt) + "${SCRIPT_DIR}"/testSbt.sh + ;; + checkPublish) + "${SCRIPT_DIR}"/testCheckPublish.sh + ;; + *) + echo "Unknown BUILD_TYPE: '$BUILD_TYPE'" + exit 1 + ;; + esac + ;; + cron) + case "$BUILD_TYPE" in + centaurJes) + "${SCRIPT_DIR}"/testCentaurJes.sh -i + ;; + centaurTes|centaurLocal|sbt|checkPublish) + exit 0 + ;; + *) + echo "Unknown BUILD_TYPE: '$BUILD_TYPE'" + exit 1 + ;; + esac + ;; + *) + echo "Unknown TRAVIS_EVENT_TYPE: '$TRAVIS_EVENT_TYPE'" + exit 1 + ;; + esac diff --git a/src/bin/travis/testCentaurJes.sh b/src/bin/travis/testCentaurJes.sh index 04813698d..e112a8f04 100755 --- a/src/bin/travis/testCentaurJes.sh +++ b/src/bin/travis/testCentaurJes.sh @@ -2,6 +2,17 @@ set -e +if [ "$TRAVIS_SECURE_ENV_VARS" = "false" ]; then + echo "************************************************************************************************" + echo "************************************************************************************************" + echo "** **" + echo "** WARNING: Encrypted keys are unavailable to automatically test JES with centaur. Exiting. **" + echo "** **" + echo "************************************************************************************************" + echo "************************************************************************************************" + exit 0 +fi + removeCromwellJar() { gsutil rm "${JAR_GCS_PATH}" || true } @@ -31,9 +42,59 @@ printTravisHeartbeat set -x -# Unpack our credentials and such -openssl aes-256-cbc -K "$encrypted_5b9e82629fa8_key" -iv "$encrypted_5b9e82629fa8_iv" -in src/bin/travis/resources/jesConf.tar.enc -out jesConf.tar -d -tar xvf jesConf.tar +PROGNAME="$(basename "$0")" +RUN_INTEGRATION_TESTS=0 + +usage=" +$PROGNAME [-i ] + +Builds and runs specified branch of Cromwell and runs Centaur against it. + +Arguments: + -i Flag that if supplied, will run centaur integration tests instead of standardtests +" + +while getopts ":hi" option; do + case "$option" in + h) echo "$usage" + exit + ;; + i) RUN_INTEGRATION_TESTS=1 + ;; + :) printf "Missing argument for -%s\n" "$OPTARG" >&2 + echo "$usage" >&2 + exit 1 + ;; + \?) printf "Illegal option: -%s\n" "$OPTARG" >&2 + echo "$usage" >&2 + exit 1 + ;; + esac +done + +# TURN OFF LOGGING WHILE WE TALK TO DOCKER/VAULT +set +x + +# Login to docker to access the dsde-toolbox +docker login -u="$DOCKER_USERNAME" -p="$DOCKER_PASSWORD" + +# Login to vault to access secrets +docker run --rm \ + -v $HOME:/root:rw \ + broadinstitute/dsde-toolbox \ + vault auth "$JES_TOKEN" < /dev/null > /dev/null && echo vault auth success + +set -x + +# Render secrets +docker run --rm \ + -v $HOME:/root:rw \ + -v $PWD/src/bin/travis/resources:/working \ + -v $PWD:/output \ + -e ENVIRONMENT=not_used \ + -e INPUT_PATH=/working \ + -e OUT_PATH=/output \ + broadinstitute/dsde-toolbox render-templates.sh # Do a bunch of crap to enable gsutil. It's possible this is overkill but it doesn't take long anyways sudo apt-key adv --keyserver keyserver.ubuntu.com --recv-keys 1397BC53640DB551 @@ -45,33 +106,53 @@ export PYTHONPATH="/usr/lib/python2.7/site-packages:/usr/local/lib/python2.7/sit export CONFIGURE_OPTS="--enable-unicode=ucs4" pyenv install 2.7.10 pyenv global 2.7.10 -sudo pip install --upgrade pip -sudo pip install pyopenssl ndg-httpsclient pyasn1 --upgrade +sudo -H pip install --upgrade pip +sudo -H pip install pyopenssl ndg-httpsclient pyasn1 --upgrade export CLOUDSDK_PYTHON_SITEPACKAGES=1 -gcloud auth activate-service-account --key-file=broad-dsde-cromwell-dev-d9c443bb4a94.json "$CROMWELL_SERVICE_ACCOUNT" + +# Use sed to redact the service account from the stderr +gcloud -q \ + auth \ + activate-service-account \ + --key-file=cromwell-service-account.json 2>&1 | \ + sed 's/[A-Za-z0-9._-]*@[A-Za-z0-9._-]*/REDACTED/g' echo "RUNNING TRAVIS CENTAUR" sbt assembly -# Update the inputs file with stuff specific to this run -sed -i "s/BRANCH/${TRAVIS_BRANCH}/g" src/bin/travis/resources/centaur.inputs + +# Update the .inputs file with stuff specific to this run +sed -i "s/CENTAUR_BRANCH/${CENTAUR_BRANCH}/g" src/bin/travis/resources/centaur.inputs CROMWELL_JAR=cromwell_${TRAVIS_BUILD_ID}.jar sed -i "s/CROMWELL_JAR/${CROMWELL_JAR}/g" src/bin/travis/resources/centaur.inputs +# pass integration directory to the inputs json otherwise remove it from the inputs file +if [ $RUN_INTEGRATION_TESTS -ne 1 ]; then + sed -i "/INTEGRATION_TESTS_DIR/d" src/bin/travis/resources/centaur.inputs +else + sed -i "s|INTEGRATION_TESTS_DIR|$INTEGRATION_TESTS_DIR|g" src/bin/travis/resources/centaur.inputs +fi + # Upload the built Cromwell jar to GCS so we can use it in our centaur test. Set an exit trap to clean it up on failure JAR_GCS_PATH=gs://cloud-cromwell-dev/travis-centaur/${CROMWELL_JAR} -gsutil cp target/scala-2.11/cromwell-*.jar "${JAR_GCS_PATH}" - -java -Dconfig.file=./jes.conf -jar target/scala-2.11/cromwell-*.jar run src/bin/travis/resources/centaur.wdl src/bin/travis/resources/centaur.inputs | tee log.txt +gsutil cp target/scala-2.12/cromwell-*.jar "${JAR_GCS_PATH}" + +java \ + -Dconfig.file=./jes_centaur.conf \ + -jar target/scala-2.12/cromwell-*.jar \ + run \ + src/bin/travis/resources/centaur.wdl \ + --inputs src/bin/travis/resources/centaur.inputs | \ + tee log.txt EXIT_CODE="${PIPESTATUS[0]}" # The perl code below is to remove our lovely color highlighting -export WORKFLOW_ID=`grep "SingleWorkflowRunnerActor: Workflow submitted " log.txt | perl -pe 's/\e\[?.*?[\@-~]//g' | cut -f7 -d" "` - -# Grab the Centaur log from GCS and cat it so we see it in the main travis log. -export CENTAUR_LOG_PATH="gs://cloud-cromwell-dev/cromwell_execution/travis/centaur/${WORKFLOW_ID}/call-centaur//cromwell_root/logs/centaur.log" -gsutil cp ${CENTAUR_LOG_PATH} centaur.log +WORKFLOW_ID=$(grep "SingleWorkflowRunnerActor: Workflow submitted " log.txt | perl -pe 's/\e\[?.*?[\@-~]//g' | cut -f7 -d" ") +export WORKFLOW_ID +# Grab the Centaur log from GCS and cat it so we see it in the main travis log. +export CENTAUR_LOG_PATH="gs://cloud-cromwell-dev/cromwell_execution/travis/centaur_workflow/${WORKFLOW_ID}/call-centaur/cromwell_root/logs/centaur.log" +gsutil cp "${CENTAUR_LOG_PATH}" centaur.log cat centaur.log -echo "More logs for this run are available at https://console.cloud.google.com/storage/browser/cloud-cromwell-dev/cromwell_execution/travis/centaur/${WORKFLOW_ID}/call-centaur/" +echo "More logs for this run are available at https://console.cloud.google.com/storage/browser/cloud-cromwell-dev/cromwell_execution/travis/centaur_workflow/${WORKFLOW_ID}/call-centaur/" exit "${EXIT_CODE}" diff --git a/src/bin/travis/testCentaurLocal.sh b/src/bin/travis/testCentaurLocal.sh index 203d87c18..ba8138f2c 100755 --- a/src/bin/travis/testCentaurLocal.sh +++ b/src/bin/travis/testCentaurLocal.sh @@ -9,28 +9,92 @@ printTravisHeartbeat() { TRAVIS_HEARTBEAT_PID=$! } +cromwellLogTail() { + ( + while [ ! -f logs/cromwell.log ]; + do + sleep 2 + printf "(Cr)" + done + tail -f logs/cromwell.log & + CROMWELL_LOG_TAIL_PID=$! + ) & + CROMWELL_LOG_WAIT_PID=$! +} + +centaurLogTail() { + ( + while [ ! -f logs/centaur.log ]; + do + sleep 2 + printf "(Ce)" + done + tail -f logs/centaur.log & + CENTAUR_LOG_TAIL_PID=$! + ) & + CENTAUR_LOG_WAIT_PID=$! +} + killTravisHeartbeat() { if [ -n "${TRAVIS_HEARTBEAT_PID+set}" ]; then kill ${TRAVIS_HEARTBEAT_PID} || true fi } +killCromwellLogTail() { + if [ -n "${CROMWELL_LOG_TAIL_PID+set}" ]; then + kill ${CROMWELL_LOG_TAIL_PID} || true + else + if [ -n "${CROMWELL_LOG_WAIT_PID+set}" ]; then + kill ${CROMWELL_LOG_WAIT_PID} || true + fi + fi +} + +killCentaurLogTail() { + if [ -n "${CENTAUR_LOG_TAIL_PID+set}" ]; then + kill ${CENTAUR_LOG_TAIL_PID} || true + else + if [ -n "${CENTAUR_LOG_WAIT_PID+set}" ]; then + kill ${CENTAUR_LOG_WAIT_PID} || true + fi + fi +} + exitScript() { - echo "CROMWELL LOG" - cat logs/cromwell.log echo "CENTAUR LOG" cat logs/centaur.log killTravisHeartbeat + killCromwellLogTail + killCentaurLogTail } trap exitScript EXIT +trap exitScript TERM +cromwellLogTail +centaurLogTail printTravisHeartbeat set -x set -e +sudo apt-get update -qq +sudo apt-get install -qq mysql-server-5.6 mysql-client-5.6 mysql-client-core-5.6 +docker pull ubuntu:latest +mysql -u root -e "SET GLOBAL sql_mode = 'STRICT_ALL_TABLES';" +mysql -u root -e "CREATE DATABASE IF NOT EXISTS cromwell_test;" +mysql -u root -e "CREATE USER 'travis'@'localhost' IDENTIFIED BY '';" +mysql -u root -e "GRANT ALL PRIVILEGES ON cromwell_test . * TO 'travis'@'localhost';" + sbt assembly -CROMWELL_JAR=$(find "$(pwd)/target/scala-2.11" -name "cromwell-*.jar") +CROMWELL_JAR=$(find "$(pwd)/target/scala-2.12" -name "cromwell-*.jar") +LOCAL_CONF="$(pwd)/src/bin/travis/resources/local_centaur.conf" git clone https://github.com/broadinstitute/centaur.git cd centaur -./test_cromwell.sh -j"${CROMWELL_JAR}" +git checkout ${CENTAUR_BRANCH} +cd .. +# All tests use ubuntu:latest - make sure it's there before starting the tests +# because pulling the image during some of the tests would cause them to fail +# (specifically output_redirection which expects a specific value in stderr) +docker pull ubuntu:latest +centaur/test_cromwell.sh -j"${CROMWELL_JAR}" -c${LOCAL_CONF} diff --git a/src/bin/travis/testCentaurTes.sh b/src/bin/travis/testCentaurTes.sh new file mode 100755 index 000000000..8fb829bac --- /dev/null +++ b/src/bin/travis/testCentaurTes.sh @@ -0,0 +1,87 @@ +#!/usr/bin/env bash + +printTravisHeartbeat() { + # Sleep one minute between printouts, but don't zombie for more than two hours + for ((i=0; i < 120; i++)); do + sleep 60 + printf "…" + done & + TRAVIS_HEARTBEAT_PID=$! +} + +killTravisHeartbeat() { + if [ -n "${TRAVIS_HEARTBEAT_PID+set}" ]; then + kill ${TRAVIS_HEARTBEAT_PID} || true + fi +} + +exitScript() { + echo "FUNNEL LOG" + cat logs/funnel.log + echo "CROMWELL LOG" + cat logs/cromwell.log + echo "CENTAUR LOG" + cat logs/centaur.log + killTravisHeartbeat +} + +trap exitScript EXIT +printTravisHeartbeat + +set -x +set -e + +sudo apt-get update -qq +sudo apt-get install -qq mysql-server-5.6 mysql-client-5.6 mysql-client-core-5.6 +docker pull ubuntu:latest +mysql -u root -e "SET GLOBAL sql_mode = 'STRICT_ALL_TABLES';" +mysql -u root -e "CREATE DATABASE IF NOT EXISTS cromwell_test;" +mysql -u root -e "CREATE USER 'travis'@'localhost' IDENTIFIED BY '';" +mysql -u root -e "GRANT ALL PRIVILEGES ON cromwell_test . * TO 'travis'@'localhost';" + +WORKDIR=$(pwd) + +sbt assembly +CROMWELL_JAR=$(find "$(pwd)/target/scala-2.12" -name "cromwell-*.jar") +TES_CENTAUR_CONF="$(pwd)/src/bin/travis/resources/tes_centaur.conf" +git clone https://github.com/broadinstitute/centaur.git +cd centaur +git checkout ${CENTAUR_BRANCH} +cd $WORKDIR + + +FUNNEL_CONF="$(pwd)/src/bin/travis/resources/funnel.conf" +wget https://storage.googleapis.com/golang/go1.8.1.linux-amd64.tar.gz +tar xfz go1.8.1.linux-amd64.tar.gz +export GOROOT=$WORKDIR/go +mkdir go-lib +export GOPATH=$WORKDIR/go-lib +go get github.com/ohsu-comp-bio/funnel +cd $GOPATH/src/github.com/ohsu-comp-bio/funnel +git checkout c4d9134 +make +cd $WORKDIR +mkdir logs +nohup $GOPATH/bin/funnel server --config ${FUNNEL_CONF} > logs/funnel.log 2>&1 & + + +# All tests use ubuntu:latest - make sure it's there before starting the tests +# because pulling the image during some of the tests would cause them to fail +# (specifically output_redirection which expects a specific value in stderr) +docker pull ubuntu:latest + +# The following tests are skipped: +# +# non_root_specified_user: TES doesn't support switching users in the image +# write_lines_files: all inputs are read-only in TES +# lots_of_inputs: Funnel mounts in each input separately, this task surpasses the docker limit for volumes +# call_cache_capoeira_local: fails on task 'read_files_without_docker' since the 'docker' runtime key is required for this backend +# +centaur/test_cromwell.sh \ +-j ${CROMWELL_JAR} \ +-c ${TES_CENTAUR_CONF} \ +-e non_root_specified_user \ +-e write_lines_files \ +-e lots_of_inputs \ +-e call_cache_capoeira_local \ +-e non_root_default_user diff --git a/src/bin/travis/testCheckPublish.sh b/src/bin/travis/testCheckPublish.sh new file mode 100755 index 000000000..541b55385 --- /dev/null +++ b/src/bin/travis/testCheckPublish.sh @@ -0,0 +1,6 @@ +#!/usr/bin/env bash + +set -e +set -x + +sbt clean assembly doc diff --git a/src/main/resources/application.conf b/src/main/resources/application.conf index aaf3d4148..e397980ad 100644 --- a/src/main/resources/application.conf +++ b/src/main/resources/application.conf @@ -1,14 +1,22 @@ akka { loggers = ["akka.event.slf4j.Slf4jLogger"] logging-filter = "akka.event.slf4j.Slf4jLoggingFilter" -} + actor.guardian-supervisor-strategy = "cromwell.core.CromwellUserGuardianStrategy" + + http { + server { + request-timeout = 40s + bind-timeout = 5s + } -spray.can { - server { - request-timeout = 40s + client.connecting-timeout = 40s } - client { - request-timeout = 40s - connecting-timeout = 40s + + coordinated-shutdown.phases { + # Adds stop-io-activity to the dependencies of before-actor-system-terminate + before-actor-system-terminate.depends-on = ["cluster-shutdown", "stop-io-activity"] + + # Adds abort-all-workflows-activity to the dependencies of service-stop + service-stop.depends-on = ["service-requests-done", "abort-all-workflows"] } } diff --git a/src/main/scala/cromwell/CommandLineParser.scala b/src/main/scala/cromwell/CommandLineParser.scala new file mode 100644 index 000000000..da909505e --- /dev/null +++ b/src/main/scala/cromwell/CommandLineParser.scala @@ -0,0 +1,107 @@ +package cromwell + +import com.typesafe.config.ConfigFactory +import cromwell.core.path.{DefaultPathBuilder, Path} +import scopt.OptionParser + +object CommandLineParser extends App { + + sealed trait Command + case object Run extends Command + case object Server extends Command + + case class CommandLineArguments(command: Option[Command] = None, + workflowSource: Option[Path] = None, + workflowInputs: Option[Path] = None, + workflowOptions: Option[Path] = None, + workflowType: Option[String] = Option("WDL"), + workflowTypeVersion: Option[String] = Option("v2.0-draft"), + workflowLabels: Option[Path] = None, + imports: Option[Path] = None, + metadataOutput: Option[Path] = None + ) + + lazy val cromwellVersion = ConfigFactory.load("cromwell-version.conf").getConfig("version").getString("cromwell") + + case class ParserAndCommand(parser: OptionParser[CommandLineArguments], command: Option[Command]) + + // cromwell 29 + // Usage: java -jar /path/to/cromwell.jar [server|run] [options] ... + // + // --help Cromwell - Workflow Execution Engine + // --version + // Command: server + // Starts a web server on port 8000. See the web server documentation for more details about the API endpoints. + // Command: run [options] workflow-source + // Run the workflow and print out the outputs in JSON format. + // workflow-source Workflow source file. + // -i, --inputs Workflow inputs file. + // -o, --options Workflow options file. + // -t, --type Workflow type. + // -v, --type-version + // Workflow type version. + // -l, --labels Workflow labels file. + // -p, --imports A directory or zipfile to search for workflow imports. + // -m, --metadata-output + // An optional directory path to output metadata. + + def buildParser(): scopt.OptionParser[CommandLineArguments] = { + new scopt.OptionParser[CommandLineArguments]("java -jar /path/to/cromwell.jar") { + head("cromwell", cromwellVersion) + + help("help").text("Cromwell - Workflow Execution Engine") + + version("version") + + cmd("server").action((_, c) => c.copy(command = Option(Server))).text( + "Starts a web server on port 8000. See the web server documentation for more details about the API endpoints.") + + cmd("run"). + action((_, c) => c.copy(command = Option(Run))). + text("Run the workflow and print out the outputs in JSON format."). + children( + arg[String]("workflow-source").text("Workflow source file.").required(). + action((s, c) => c.copy(workflowSource = Option(DefaultPathBuilder.get(s)))), + opt[String]('i', "inputs").text("Workflow inputs file."). + action((s, c) => + c.copy(workflowInputs = Option(DefaultPathBuilder.get(s)))), + opt[String]('o', "options").text("Workflow options file."). + action((s, c) => + c.copy(workflowOptions = Option(DefaultPathBuilder.get(s)))), + opt[String]('t', "type").text("Workflow type."). + action((s, c) => + c.copy(workflowType = Option(s))), + opt[String]('v', "type-version").text("Workflow type version."). + action((s, c) => + c.copy(workflowTypeVersion = Option(s))), + opt[String]('l', "labels").text("Workflow labels file."). + action((s, c) => + c.copy(workflowLabels = Option(DefaultPathBuilder.get(s)))), + opt[String]('p', "imports").text( + "A directory or zipfile to search for workflow imports."). + action((s, c) => + c.copy(imports = Option(DefaultPathBuilder.get(s)))), + opt[String]('m', "metadata-output").text( + "An optional directory path to output metadata."). + action((s, c) => + c.copy(metadataOutput = Option(DefaultPathBuilder.get(s)))) + ) + } + } + + def runCromwell(args: CommandLineArguments): Unit = { + args.command match { + case Some(Run) => CromwellEntryPoint.runSingle(args) + case Some(Server) => CromwellEntryPoint.runServer() + case None => parser.showUsage() + } + } + + val parser = buildParser() + + val parsedArgs = parser.parse(args, CommandLineArguments()) + parsedArgs match { + case Some(pa) => runCromwell(pa) + case None => parser.showUsage() + } +} diff --git a/src/main/scala/cromwell/CromwellCommandLine.scala b/src/main/scala/cromwell/CromwellCommandLine.scala deleted file mode 100644 index 192242a5e..000000000 --- a/src/main/scala/cromwell/CromwellCommandLine.scala +++ /dev/null @@ -1,113 +0,0 @@ -package cromwell - -import java.nio.file.{Files, Path, Paths} - -import better.files._ -import cromwell.core.{ErrorOr, WorkflowSourceFiles} -import cromwell.util.FileUtil._ -import lenthall.exception.MessageAggregation - -import scala.util.{Failure, Success, Try} -import scalaz.Scalaz._ - -sealed abstract class CromwellCommandLine -case object UsageAndExit extends CromwellCommandLine -case object RunServer extends CromwellCommandLine -final case class RunSingle(wdlPath: Path, - sourceFiles: WorkflowSourceFiles, - inputsPath: Option[Path], - optionsPath: Option[Path], - metadataPath: Option[Path]) extends CromwellCommandLine - -object CromwellCommandLine { - def apply(args: Seq[String]): CromwellCommandLine = { - args.headOption match { - case Some("server") if args.size == 1 => RunServer - case Some("run") if args.size >= 2 && args.size <= 5 => RunSingle(args.tail) - case _ => UsageAndExit - } - } -} - -object RunSingle { - def apply(args: Seq[String]): RunSingle = { - val wdlPath = Paths.get(args.head).toAbsolutePath - val inputsPath = argPath(args, 1, Option(".inputs"), checkDefaultExists = false) - val optionsPath = argPath(args, 2, Option(".options"), checkDefaultExists = true) - val metadataPath = argPath(args, 3, None) - - val wdl = readContent("WDL file", wdlPath) - val inputsJson = readJson("Inputs", inputsPath) - val optionsJson = readJson("Workflow Options", optionsPath) - - val sourceFiles = (wdl |@| inputsJson |@| optionsJson) { WorkflowSourceFiles.apply } - - import scalaz.Validation.FlatMap._ - val runSingle = for { - sources <- sourceFiles - _ <- writeableMetadataPath(metadataPath) - } yield RunSingle(wdlPath, sources, inputsPath, optionsPath, metadataPath) - - runSingle match { - case scalaz.Success(r) => r - case scalaz.Failure(nel) => throw new RuntimeException with MessageAggregation { - override def exceptionContext: String = "ERROR: Unable to run Cromwell:" - override def errorMessages: Traversable[String] = nel.list.toList - } - } - } - - private def writeableMetadataPath(path: Option[Path]): ErrorOr[Unit] = { - path match { - case Some(p) if !metadataPathIsWriteable(p) => s"Unable to write to metadata directory: $p".failureNel - case otherwise => ().successNel - } - } - - /** Read the path to a string. */ - private def readContent(inputDescription: String, path: Path): ErrorOr[String] = { - if (!Files.exists(path)) { - s"$inputDescription does not exist: $path".failureNel - } else if (!Files.isReadable(path)) { - s"$inputDescription is not readable: $path".failureNel - } else File(path).contentAsString.successNel - } - - /** Read the path to a string, unless the path is None, in which case returns "{}". */ - private def readJson(inputDescription: String, pathOption: Option[Path]): ErrorOr[String] = { - pathOption match { - case Some(path) => readContent(inputDescription, path) - case None => "{}".successNel - } - } - - private def metadataPathIsWriteable(metadataPath: Path): Boolean = { - Try(File(metadataPath).createIfNotExists(asDirectory = false, createParents = true).append("")) match { - case Success(_) => true - case Failure(_) => false - } - } - - /** - * Retrieve the arg at index as path, or return some default. Args specified as "-" will be returned as None. - * - * @param args The run command arguments, with the wdl path at arg.head. - * @param index The index of the path we're looking for. - * @param defaultExt The default extension to use if the argument was not specified at all. - * @param checkDefaultExists If true, verify that our computed default file exists before using it. - * @return The argument as a Path resolved as a sibling to the wdl path. - */ - private def argPath(args: Seq[String], index: Int, defaultExt: Option[String], - checkDefaultExists: Boolean = true): Option[Path] = { - - // To return a default, swap the extension, and then maybe check if the file exists. - def defaultPath = defaultExt - .map(ext => swapExt(args.head, ".wdl", ext)) - .filter(path => !checkDefaultExists || Files.exists(Paths.get(path))) - - // Return the path for the arg index, or the default, but remove "-" paths. - for { - path <- args.lift(index) orElse defaultPath filterNot (_ == "-") - } yield Paths.get(path).toAbsolutePath - } -} diff --git a/src/main/scala/cromwell/CromwellEntryPoint.scala b/src/main/scala/cromwell/CromwellEntryPoint.scala new file mode 100644 index 000000000..ab8926fbf --- /dev/null +++ b/src/main/scala/cromwell/CromwellEntryPoint.scala @@ -0,0 +1,202 @@ +package cromwell + +import akka.pattern.GracefulStopSupport +import cats.data.Validated._ +import cats.syntax.cartesian._ +import cats.syntax.validated._ +import com.typesafe.config.ConfigFactory +import cromwell.CommandLineParser._ +import cromwell.core.path.Path +import cromwell.core.{WorkflowSourceFilesCollection, WorkflowSourceFilesWithDependenciesZip, WorkflowSourceFilesWithoutImports} +import cromwell.engine.workflow.SingleWorkflowRunnerActor +import cromwell.engine.workflow.SingleWorkflowRunnerActor.RunWorkflow +import cromwell.server.{CromwellServer, CromwellSystem} +import lenthall.exception.MessageAggregation +import lenthall.validation.ErrorOr._ +import net.ceedubs.ficus.Ficus._ +import org.slf4j.LoggerFactory + +import scala.collection.JavaConverters._ +import scala.concurrent.duration.{Duration, _} +import scala.concurrent.{Await, Future, TimeoutException} +import scala.language.postfixOps +import scala.util.{Failure, Success, Try} + +object CromwellEntryPoint extends GracefulStopSupport { + + lazy val EntryPointLogger = LoggerFactory.getLogger("Cromwell EntryPoint") + private lazy val config = ConfigFactory.load() + + // Only abort jobs on SIGINT if the config explicitly sets system.abort-jobs-on-terminate = true. + val abortJobsOnTerminate = config.as[Boolean]("system.abort-jobs-on-terminate") + + val gracefulShutdown = config.as[Boolean]("system.graceful-server-shutdown") + + /** + * Run Cromwell in server mode. + */ + def runServer() = { + val system = buildCromwellSystem(Server) + waitAndExit(CromwellServer.run(gracefulShutdown, abortJobsOnTerminate), system) + } + + /** + * Run a single workflow using the successfully parsed but as yet not validated arguments. + */ + def runSingle(args: CommandLineArguments): Unit = { + val cromwellSystem = buildCromwellSystem(Run) + implicit val actorSystem = cromwellSystem.actorSystem + + val sources = validateRunArguments(args) + val runnerProps = SingleWorkflowRunnerActor.props(sources, args.metadataOutput, gracefulShutdown, abortJobsOnTerminate)(cromwellSystem.materializer) + + val runner = cromwellSystem.actorSystem.actorOf(runnerProps, "SingleWorkflowRunnerActor") + + import cromwell.util.PromiseActor.EnhancedActorRef + waitAndExit(_ => runner.askNoTimeout(RunWorkflow), cromwellSystem) + } + + private def buildCromwellSystem(command: Command): CromwellSystem = { + initLogging(command) + lazy val Log = LoggerFactory.getLogger("cromwell") + Try { + new CromwellSystem {} + } recoverWith { + case t: Throwable => + Log.error("Failed to instantiate Cromwell System. Shutting down Cromwell.") + Log.error(t.getMessage) + System.exit(1) + Failure(t) + } get + } + + /** + * If a cromwell server is going to be run, makes adjustments to the default logback configuration. + * Overwrites LOG_MODE system property used in our logback.xml, _before_ the logback classes load. + * Restored from similar functionality in + * https://github.com/broadinstitute/cromwell/commit/2e3f45b#diff-facc2160a82442932c41026c9a1e4b2bL28 + * TODO: Logback is configurable programmatically. We don't have to overwrite system properties like this. + * + * Also copies variables from config/system/environment/defaults over to the system properties. + * Fixes issue where users are trying to specify Java properties as environment variables. + */ + private def initLogging(command: Command): Unit = { + val logbackSetting = command match { + case Server => "STANDARD" + case Run => "PRETTY" + } + + val defaultProps = Map( + "LOG_MODE" -> logbackSetting, + "LOG_LEVEL" -> "INFO" + ) + + val configWithFallbacks = config + .withFallback(ConfigFactory.systemEnvironment()) + .withFallback(ConfigFactory.parseMap(defaultProps.asJava, "Defaults")) + + val props = sys.props + defaultProps.keys foreach { key => + props += key -> configWithFallbacks.getString(key) + } + + /* + We've possibly copied values from the environment, or our defaults, into the system properties. + Make sure that the next time one uses the ConfigFactory that our updated system properties are loaded. + */ + ConfigFactory.invalidateCaches() + } + + private def waitAndExit(runner: CromwellSystem => Future[Any], workflowManagerSystem: CromwellSystem): Unit = { + val futureResult = runner(workflowManagerSystem) + Await.ready(futureResult, Duration.Inf) + + try { + Await.ready(workflowManagerSystem.shutdownActorSystem(), 30 seconds) + } catch { + case _: TimeoutException => Console.err.println("Timed out trying to shutdown actor system") + case other: Exception => Console.err.println(s"Unexpected error trying to shutdown actor system: ${other.getMessage}") + } + + val returnCode = futureResult.value.get match { + case Success(_) => 0 + case Failure(e) => + Console.err.println(e.getMessage) + 1 + } + + sys.exit(returnCode) + } + + def validateRunArguments(args: CommandLineArguments): WorkflowSourceFilesCollection = { + + val workflowSource = readContent("Workflow source", args.workflowSource.get) + val inputsJson = readJson("Workflow inputs", args.workflowInputs) + val optionsJson = readJson("Workflow options", args.workflowOptions) + val labelsJson = readJson("Workflow labels", args.workflowLabels) + + val sourceFileCollection = args.imports match { + case Some(p) => (workflowSource |@| inputsJson |@| optionsJson |@| labelsJson) map { (w, i, o, l) => + WorkflowSourceFilesWithDependenciesZip.apply( + workflowSource = w, + workflowType = Option("WDL"), + workflowTypeVersion = None, + inputsJson = i, + workflowOptionsJson = o, + labelsJson = l, + importsZip = p.loadBytes) + } + case None => (workflowSource |@| inputsJson |@| optionsJson |@| labelsJson) map { (w, i, o, l) => + WorkflowSourceFilesWithoutImports.apply( + workflowSource = w, + workflowType = Option("WDL"), + workflowTypeVersion = None, + inputsJson = i, + workflowOptionsJson = o, + labelsJson = l + ) + } + } + + val sourceFiles = for { + sources <- sourceFileCollection + _ <- writeableMetadataPath(args.metadataOutput) + } yield sources + + sourceFiles match { + case Valid(r) => r + case Invalid(nel) => throw new RuntimeException with MessageAggregation { + override def exceptionContext: String = "ERROR: Unable to run Cromwell:" + override def errorMessages: Traversable[String] = nel.toList + } + } + } + + private def writeableMetadataPath(path: Option[Path]): ErrorOr[Unit] = { + path match { + case Some(p) if !metadataPathIsWriteable(p) => s"Unable to write to metadata directory: $p".invalidNel + case _ => ().validNel + } + } + + /** Read the path to a string. */ + private def readContent(inputDescription: String, path: Path): ErrorOr[String] = { + if (!path.exists) { + s"$inputDescription does not exist: $path".invalidNel + } else if (!path.isReadable) { + s"$inputDescription is not readable: $path".invalidNel + } else path.contentAsString.validNel + } + + /** Read the path to a string, unless the path is None, in which case returns "{}". */ + private def readJson(inputDescription: String, pathOption: Option[Path]): ErrorOr[String] = { + pathOption match { + case Some(path) => readContent(inputDescription, path) + case None => "{}".validNel + } + } + + private def metadataPathIsWriteable(metadataPath: Path): Boolean = + Try(metadataPath.createIfNotExists(createParents = true).append("")).isSuccess + +} diff --git a/src/main/scala/cromwell/Main.scala b/src/main/scala/cromwell/Main.scala deleted file mode 100644 index a54a982de..000000000 --- a/src/main/scala/cromwell/Main.scala +++ /dev/null @@ -1,125 +0,0 @@ -package cromwell - -import com.typesafe.config.ConfigFactory -import cromwell.engine.workflow.SingleWorkflowRunnerActor -import cromwell.engine.workflow.SingleWorkflowRunnerActor.RunWorkflow -import cromwell.server.{CromwellServer, CromwellSystem} -import cromwell.util.PromiseActor -import org.slf4j.LoggerFactory - -import scala.collection.JavaConverters._ -import scala.concurrent.duration._ -import scala.concurrent.{Await, Future} -import scala.language.postfixOps -import scala.util.{Failure, Success} - -object Main extends App { - val CommandLine = CromwellCommandLine(args) - initLogging(CommandLine) - - lazy val Log = LoggerFactory.getLogger("cromwell") - lazy val CromwellSystem = new CromwellSystem {} - - CommandLine match { - case UsageAndExit => usageAndExit() - case RunServer => waitAndExit(CromwellServer.run(CromwellSystem), CromwellSystem) - case r: RunSingle => runWorkflow(r) - } - - /** - * If a cromwell server is going to be run, makes adjustments to the default logback configuration. - * Overwrites LOG_MODE system property used in our logback.xml, _before_ the logback classes load. - * Restored from similar functionality in - * https://github.com/broadinstitute/cromwell/commit/2e3f45b#diff-facc2160a82442932c41026c9a1e4b2bL28 - * TODO: Logback is configurable programmatically. We don't have to overwrite system properties like this. - * - * Also copies variables from config/system/environment/defaults over to the system properties. - * Fixes issue where users are trying to specify Java properties as environment variables. - */ - private def initLogging(commandLine: CromwellCommandLine): Unit = { - val defaultLogMode = commandLine match { - case RunServer => "STANDARD" - case _ => "PRETTY" - } - - val defaultProps = Map("LOG_MODE" -> defaultLogMode, "LOG_LEVEL" -> "INFO") - - val config = ConfigFactory.load - .withFallback(ConfigFactory.systemEnvironment()) - .withFallback(ConfigFactory.parseMap(defaultProps.asJava, "Defaults")) - - val props = sys.props - defaultProps.keys foreach { key => - props += key -> config.getString(key) - } - - /* - We've possibly copied values from the environment, or our defaults, into the system properties. - Make sure that the next time one uses the ConfigFactory that our updated system properties are loaded. - */ - ConfigFactory.invalidateCaches() - } - - private def runWorkflow(commandLine: RunSingle): Unit = { - implicit val actorSystem = CromwellSystem.actorSystem - - Log.info(s"RUN sub-command") - Log.info(s" WDL file: ${commandLine.wdlPath}") - commandLine.inputsPath foreach { i => Log.info(s" Inputs: $i") } - commandLine.optionsPath foreach { o => Log.info(s" Workflow Options: $o") } - commandLine.metadataPath foreach { m => Log.info(s" Workflow Metadata Output: $m") } - - val runnerProps = SingleWorkflowRunnerActor.props(commandLine.sourceFiles, commandLine.metadataPath) - - val runner = CromwellSystem.actorSystem.actorOf(runnerProps, "SingleWorkflowRunnerActor") - - import PromiseActor.EnhancedActorRef - import scala.concurrent.ExecutionContext.Implicits.global - - val promise = runner.askNoTimeout(RunWorkflow) - waitAndExit(promise, CromwellSystem) - } - - - private def waitAndExit(futureResult: Future[Any], workflowManagerSystem: CromwellSystem): Unit = { - Await.ready(futureResult, Duration.Inf) - - workflowManagerSystem.shutdownActorSystem() - - val returnCode = futureResult.value.get match { - case Success(_) => 0 - case Failure(e) => - Console.err.println(e.getMessage) - 1 - } - - sys.exit(returnCode) - } - - def usageAndExit(): Unit = { - println( - """ - |java -jar cromwell.jar - | - |Actions: - |run [ [ - | []]] - | - | Given a WDL file and JSON file containing the value of the - | workflow inputs, this will run the workflow locally and - | print out the outputs in JSON format. The workflow - | options file specifies some runtime configuration for the - | workflow (see README for details). The workflow metadata - | output is an optional file path to output the metadata. - | Use a single dash ("-") to skip optional files. Ex: - | run noinputs.wdl - - metadata.json - | - |server - | - | Starts a web server on port 8000. See the web server - | documentation for more details about the API endpoints. - """.stripMargin) - - System.exit(1) - } -} diff --git a/src/test/scala/cromwell/CromwellCommandLineSpec.scala b/src/test/scala/cromwell/CromwellCommandLineSpec.scala index ec85f3ab7..1f452175d 100644 --- a/src/test/scala/cromwell/CromwellCommandLineSpec.scala +++ b/src/test/scala/cromwell/CromwellCommandLineSpec.scala @@ -1,82 +1,123 @@ package cromwell -import better.files._ -import cromwell.core.PathFactory._ +import cromwell.CommandLineParser.{CommandLineArguments, Run, Server} +import cromwell.CromwellCommandLineSpec.WdlAndInputs +import cromwell.core.path.{DefaultPathBuilder, Path} import cromwell.util.SampleWdl -import cromwell.util.SampleWdl.ThreeStep -import org.scalatest.{FlatSpec, Matchers} +import cromwell.util.SampleWdl.{FileClobber, FilePassingWorkflow, ThreeStep} +import org.scalatest.{BeforeAndAfter, FlatSpec, Matchers} -import scala.language.postfixOps import scala.util.Try -class CromwellCommandLineSpec extends FlatSpec with Matchers { - import CromwellCommandLineSpec._ +class CromwellCommandLineSpec extends FlatSpec with Matchers with BeforeAndAfter { + + var parser: scopt.OptionParser[CommandLineArguments] = _ behavior of "CromwellCommandLine" - it should "UsageAndExit with no arguments" in { - CromwellCommandLine(List.empty[String]) shouldBe UsageAndExit + before { + parser = CommandLineParser.buildParser() } - it should "RunServer when specified" in { - CromwellCommandLine(List("server")) shouldBe RunServer + it should "fail to parse with no arguments" in { + parser.parse(Array.empty[String], CommandLineArguments()).get.command shouldBe None } - it should "UsageAndExit when supplying an argument to server" in { - CromwellCommandLine(List("server", "foo")) shouldBe UsageAndExit + it should "run server when specified" in { + parser.parse(Array("server"), CommandLineArguments()).get.command shouldBe Some(Server) } - it should "UsageAndExit with no arguments to run" in { - CromwellCommandLine(List("run")) shouldBe UsageAndExit + it should "fail to parse when supplying an argument to server" in { + parser.parse(Array("server", "foo"), CommandLineArguments()) shouldBe None } - it should "fail with too many arguments to run" in { - CromwellCommandLine(List("run", "bork", "bork", "bork", "bork", "bork")) + it should "fail to parse with no arguments to run" in { + parser.parse(Array("run"), CommandLineArguments()) shouldBe None } - it should "RunSingle when supplying wdl and inputs" in { - CromwellCommandLine(List("run", ThreeStepWithoutOptions.wdl, ThreeStepWithoutOptions.inputs)) shouldBe a [RunSingle] + it should "fail to parse with too many arguments to run" in { + parser.parse(Array("run", "forrest", "run"), CommandLineArguments()) shouldBe None } - it should "RunSingle with default inputs when only supplying wdl" in { - val ccl = CromwellCommandLine(List("run", ThreeStepWithoutOptions.wdl)).asInstanceOf[RunSingle] - ccl.sourceFiles.inputsJson shouldBe ThreeStepInputs + // --version exits the JVM which is not great in a test suite. Haven't figure out a way to test this yet. + // it should "handle version output when the `-version` flag is passed" in { + // // I don't see a way to see that --version is printing just the version, but this at least confirms a `None` + // // output that should generate a usage and version. + // parser.parse(Array("--version"), CommandLineArguments()) shouldBe None + // } + + it should "run single when supplying wdl and inputs" in { + val optionsLast = parser.parse(Array("run", "3step.wdl", "--inputs", "3step.inputs"), CommandLineArguments()).get + optionsLast.command shouldBe Some(Run) + optionsLast.workflowSource.get.pathAsString shouldBe "3step.wdl" + optionsLast.workflowInputs.get.pathAsString shouldBe "3step.inputs" + + val optionsFirst = parser.parse(Array("run", "--inputs", "3step.inputs", "3step.wdl"), CommandLineArguments()).get + optionsFirst.command shouldBe Some(Run) + optionsFirst.workflowSource.get.pathAsString shouldBe "3step.wdl" + optionsFirst.workflowInputs.get.pathAsString shouldBe "3step.inputs" } - it should "RunSingle with defaults if you use dashes" in { - val ccl = CromwellCommandLine(List("run", ThreeStepWithoutOptions.wdl, "-", "-", "-")).asInstanceOf[RunSingle] - ccl.sourceFiles.inputsJson shouldBe "{}" - ccl.sourceFiles.workflowOptionsJson shouldBe "{}" + it should "run single when supplying wdl and inputs and options" in { + val optionsLast = parser.parse(Array("run", "3step.wdl", "--inputs", "3step.inputs", "--options", "3step.options"), CommandLineArguments()).get + optionsLast.command shouldBe Some(Run) + optionsLast.workflowSource.get.pathAsString shouldBe "3step.wdl" + optionsLast.workflowInputs.get.pathAsString shouldBe "3step.inputs" + optionsLast.workflowOptions.get.pathAsString shouldBe "3step.options" + + val optionsFirst = parser.parse(Array("run", "--inputs", "3step.inputs", "--options", "3step.options", "3step.wdl"), CommandLineArguments()).get + optionsFirst.command shouldBe Some(Run) + optionsFirst.workflowSource.get.pathAsString shouldBe "3step.wdl" + optionsFirst.workflowInputs.get.pathAsString shouldBe "3step.inputs" + optionsFirst.workflowOptions.get.pathAsString shouldBe "3step.options" } - it should "RunSingle with options, if passed in" in { - val threeStep = WdlAndInputs(ThreeStep, optionsJson = """{ foobar bad json! }""") - val ccl = CromwellCommandLine(List("run", threeStep.wdl, threeStep.inputs, threeStep.options)).asInstanceOf[RunSingle] - ccl.sourceFiles.workflowOptionsJson shouldBe threeStep.optionsJson - } + it should "fail if input files do not exist" in { + val parsedArgs = parser.parse(Array("run", "3step.wdl", "--inputs", "3step.inputs", "--options", "3step.options"), CommandLineArguments()).get + val validation = Try(CromwellEntryPoint.validateRunArguments(parsedArgs)) - it should "fail if inputs path does not exist" in { - val ccl = Try(CromwellCommandLine(List("run", ThreeStepWithoutOptions.wdl, "/some/path/that/doesnt/exit"))) - ccl.isFailure shouldBe true - ccl.failed.get.getMessage should include("Inputs does not exist") + validation.isFailure shouldBe true + validation.failed.get.getMessage should include("Workflow source does not exist") + validation.failed.get.getMessage should include("Workflow inputs does not exist") + validation.failed.get.getMessage should include("Workflow options does not exist") } - it should "fail if inputs path is not writeable" in { + it should "fail if inputs path is not readable" in { val threeStep = WdlAndInputs(ThreeStep) + val parsedArgs = parser.parse(Array("run", threeStep.wdl, "--inputs", threeStep.inputs), CommandLineArguments()).get threeStep.inputsFile setPermissions Set.empty - val ccl = Try(CromwellCommandLine(List("run", threeStep.wdl, threeStep.inputs))) + val ccl = Try(CromwellEntryPoint.validateRunArguments(parsedArgs)) ccl.isFailure shouldBe true - ccl.failed.get.getMessage should include("Inputs is not readable") + ccl.failed.get.getMessage should include("Workflow inputs is not readable") } - it should "fail if metadata path is not writeable" in { + it should "fail if metadata output path is not writeable" in { val threeStep = WdlAndInputs(ThreeStep) + val parsedArgs = parser.parse(Array("run", threeStep.wdl, "--inputs", threeStep.inputs, "--metadata-output", threeStep.metadata), CommandLineArguments()).get threeStep.metadataFile write "foo" threeStep.metadataFile setPermissions Set.empty - val ccl = Try(CromwellCommandLine(List("run", threeStep.wdl, threeStep.inputs, "-", threeStep.metadata))) + val ccl = Try(CromwellEntryPoint.validateRunArguments(parsedArgs)) ccl.isFailure shouldBe true ccl.failed.get.getMessage should include("Unable to write to metadata directory:") } + + it should "run if the imports path is a .zip file" in { + val wdlDir = DefaultPathBuilder.createTempDirectory("wdlDirectory") + + val filePassing = DefaultPathBuilder.createTempFile("filePassing", ".wdl", Option(wdlDir)) + val fileClobber = DefaultPathBuilder.createTempFile("fileClobber", ".wdl", Option(wdlDir)) + filePassing write FilePassingWorkflow.workflowSource() + fileClobber write FileClobber.workflowSource() + + val zippedDir = wdlDir.zip() + val zippedPath = zippedDir.pathAsString + + val parsedArgs = parser.parse(Array("run", filePassing.pathAsString, "--imports", zippedPath), CommandLineArguments()).get + val ccl = Try(CromwellEntryPoint.validateRunArguments(parsedArgs)) + ccl.isFailure shouldBe false + + zippedDir.delete(swallowIOExceptions = true) + } } object CromwellCommandLineSpec { @@ -89,26 +130,26 @@ object CromwellCommandLineSpec { */ case class WdlAndInputs(sampleWdl: SampleWdl, optionsJson: String = "{}") { // Track all the temporary files we create, and delete them after the test. - private var tempFiles = Vector.empty[File] + private var tempFiles = Vector.empty[Path] lazy val wdlFile = { - val file = File.newTemporaryFile(s"${sampleWdl.name}.", ".wdl") + val file = DefaultPathBuilder.createTempFile(s"${sampleWdl.name}.", ".wdl") tempFiles :+= file - file write sampleWdl.wdlSource("") + file write sampleWdl.workflowSource() } lazy val wdl = wdlFile.pathAsString lazy val inputsFile = { - val file = File(wdlFile.path.swapExt(".wdl", ".inputs")) + val file = wdlFile.swapExt("wdl", "inputs") tempFiles :+= file - file write sampleWdl.wdlJson + file write sampleWdl.workflowJson } lazy val inputs = inputsFile.pathAsString lazy val optionsFile = { - val file = File(wdlFile.path.swapExt(".wdl", ".options")) + val file = wdlFile.swapExt("wdl", "options") tempFiles :+= file file write optionsJson } @@ -116,7 +157,7 @@ object CromwellCommandLineSpec { lazy val options = optionsFile.pathAsString lazy val metadataFile = { - val path = File(wdlFile.path.swapExt(".wdl", ".metadata.json")) + val path = wdlFile.swapExt("wdl", "metadata.json") tempFiles :+= path path } diff --git a/supportedBackends/htcondor/src/main/scala/cromwell/backend/impl/htcondor/HtCondorBackendFactory.scala b/supportedBackends/htcondor/src/main/scala/cromwell/backend/impl/htcondor/HtCondorBackendFactory.scala deleted file mode 100644 index bd4ce3add..000000000 --- a/supportedBackends/htcondor/src/main/scala/cromwell/backend/impl/htcondor/HtCondorBackendFactory.scala +++ /dev/null @@ -1,71 +0,0 @@ -package cromwell.backend.impl.htcondor - -import akka.actor.{ActorRef, Props} -import com.typesafe.config.Config -import com.typesafe.scalalogging.StrictLogging -import cromwell.backend._ -import cromwell.backend.impl.htcondor.caching.CacheActorFactory -import cromwell.backend.io.JobPaths -import cromwell.backend.sfs.SharedFileSystemExpressionFunctions -import cromwell.core.{CallContext, WorkflowOptions} -import wdl4s.Call -import wdl4s.expression.WdlStandardLibraryFunctions - -import scala.util.{Failure, Success, Try} - -case class HtCondorBackendFactory(configurationDescriptor: BackendConfigurationDescriptor) - extends BackendLifecycleActorFactory with StrictLogging { - - override def workflowInitializationActorProps(workflowDescriptor: BackendWorkflowDescriptor, - calls: Seq[Call], - serviceRegistryActor: ActorRef): Option[Props] = { - Option(HtCondorInitializationActor.props(workflowDescriptor, calls, configurationDescriptor, serviceRegistryActor)) - } - - override def jobExecutionActorProps(jobDescriptor: BackendJobDescriptor, - initializationData: Option[BackendInitializationData], - serviceRegistryActor: ActorRef): Props = { - HtCondorJobExecutionActor.props(jobDescriptor, configurationDescriptor, serviceRegistryActor, resolveCacheProviderProps(jobDescriptor.workflowDescriptor.workflowOptions)) - } - - override def expressionLanguageFunctions(workflowDescriptor: BackendWorkflowDescriptor, - jobKey: BackendJobDescriptorKey, - initializationData: Option[BackendInitializationData]): WdlStandardLibraryFunctions = { - val jobPaths = new JobPaths(workflowDescriptor, configurationDescriptor.backendConfig, jobKey) - val callContext = CallContext( - jobPaths.callExecutionRoot, - jobPaths.stdout.toAbsolutePath.toString, - jobPaths.stderr.toAbsolutePath.toString - ) - - new SharedFileSystemExpressionFunctions(HtCondorJobExecutionActor.fileSystems, callContext) - } - - private def resolveCacheProviderProps(workflowOptions: WorkflowOptions) = { - val defaultCacheEnabled = configurationDescriptor.backendConfig.getBoolean("cache.enabled") - val cacheEnabled: Boolean = getBooleanFromWfOptions(workflowOptions, "cacheEnabled", defaultCacheEnabled) - - if (cacheEnabled) { - val defaultForceRewrite = configurationDescriptor.backendConfig.getBoolean("cache.forceRewrite") - val cacheForceRewrite = getBooleanFromWfOptions(workflowOptions, "cacheForceRw", defaultForceRewrite) - val provider = configurationDescriptor.backendConfig.getString("cache.provider") - val cacheFactory = Class.forName(provider) - .getConstructor(classOf[Config]) - .newInstance(configurationDescriptor.backendConfig) - .asInstanceOf[CacheActorFactory] - Option(cacheFactory.getCacheActorProps(cacheForceRewrite)) - } - else None - } - - private def getBooleanFromWfOptions(workflowOptions: WorkflowOptions, optionKey: String, defaultValue: Boolean) = { - workflowOptions.get(optionKey) match { - case Success(value) => Try(value.toBoolean).getOrElse { - logger.warn(s"Could not get '$optionKey' attribute from workflow options. Falling back to default value.") - defaultValue - } - case Failure(_) => defaultValue - } - } -} - diff --git a/supportedBackends/htcondor/src/main/scala/cromwell/backend/impl/htcondor/HtCondorInitializationActor.scala b/supportedBackends/htcondor/src/main/scala/cromwell/backend/impl/htcondor/HtCondorInitializationActor.scala deleted file mode 100644 index b0fdc75b2..000000000 --- a/supportedBackends/htcondor/src/main/scala/cromwell/backend/impl/htcondor/HtCondorInitializationActor.scala +++ /dev/null @@ -1,69 +0,0 @@ -package cromwell.backend.impl.htcondor - -import akka.actor.{ActorRef, Props} -import cromwell.backend.impl.htcondor.HtCondorInitializationActor._ -import cromwell.backend.impl.htcondor.HtCondorRuntimeAttributes._ -import cromwell.backend.validation.RuntimeAttributesDefault -import cromwell.backend.validation.RuntimeAttributesKeys._ -import cromwell.backend.{BackendConfigurationDescriptor, BackendInitializationData, BackendWorkflowDescriptor, BackendWorkflowInitializationActor} -import cromwell.core.WorkflowOptions -import wdl4s.types.{WdlBooleanType, WdlIntegerType, WdlStringType} -import wdl4s.values.WdlValue -import wdl4s.Call - -import scala.concurrent.Future -import scala.util.Try - -object HtCondorInitializationActor { - val SupportedKeys = Set(DockerKey, DockerWorkingDirKey, DockerOutputDirKey, FailOnStderrKey, - ContinueOnReturnCodeKey, CpuKey, MemoryKey, DiskKey) - - def props(workflowDescriptor: BackendWorkflowDescriptor, - calls: Seq[Call], - configurationDescriptor: BackendConfigurationDescriptor, - serviceRegistryActor: ActorRef): Props = - Props(new HtCondorInitializationActor(workflowDescriptor, calls, configurationDescriptor, serviceRegistryActor)) -} - -class HtCondorInitializationActor(override val workflowDescriptor: BackendWorkflowDescriptor, - override val calls: Seq[Call], - override val configurationDescriptor: BackendConfigurationDescriptor, - override val serviceRegistryActor: ActorRef) extends BackendWorkflowInitializationActor { - - override protected def runtimeAttributeValidators: Map[String, (Option[WdlValue]) => Boolean] = Map( - DockerKey -> wdlTypePredicate(valueRequired = false, WdlStringType.isCoerceableFrom), - DockerWorkingDirKey -> wdlTypePredicate(valueRequired = false, WdlStringType.isCoerceableFrom), - DockerOutputDirKey -> wdlTypePredicate(valueRequired = false, WdlStringType.isCoerceableFrom), - FailOnStderrKey -> wdlTypePredicate(valueRequired = false, WdlBooleanType.isCoerceableFrom), - ContinueOnReturnCodeKey -> continueOnReturnCodePredicate(valueRequired = false), - CpuKey -> wdlTypePredicate(valueRequired = false, WdlIntegerType.isCoerceableFrom), - MemoryKey -> wdlTypePredicate(valueRequired = false, WdlStringType.isCoerceableFrom), - DiskKey -> wdlTypePredicate(valueRequired = false, WdlStringType.isCoerceableFrom) - ) - - /** - * A call which happens before anything else runs - */ - override def beforeAll(): Future[Option[BackendInitializationData]] = Future.successful(None) - - /** - * Validate that this WorkflowBackendActor can run all of the calls that it's been assigned - */ - override def validate(): Future[Unit] = { - Future { - calls foreach { call => - val runtimeAttributes = call.task.runtimeAttributes.attrs - val notSupportedAttributes = runtimeAttributes filterKeys { !SupportedKeys.contains(_) } - - if (notSupportedAttributes.nonEmpty) { - val notSupportedAttrString = notSupportedAttributes.keys mkString ", " - log.warning(s"Key/s [$notSupportedAttrString] is/are not supported by HtCondorBackend. Unsupported attributes will not be part of jobs executions.") - } - } - } - } - - override protected def coerceDefaultRuntimeAttributes(options: WorkflowOptions): Try[Map[String, WdlValue]] = { - RuntimeAttributesDefault.workflowOptionsDefault(options, HtCondorRuntimeAttributes.coercionMap) - } -} diff --git a/supportedBackends/htcondor/src/main/scala/cromwell/backend/impl/htcondor/HtCondorJobExecutionActor.scala b/supportedBackends/htcondor/src/main/scala/cromwell/backend/impl/htcondor/HtCondorJobExecutionActor.scala deleted file mode 100644 index f30111b3d..000000000 --- a/supportedBackends/htcondor/src/main/scala/cromwell/backend/impl/htcondor/HtCondorJobExecutionActor.scala +++ /dev/null @@ -1,350 +0,0 @@ -package cromwell.backend.impl.htcondor - -import java.nio.file.attribute.PosixFilePermission -import java.nio.file.{FileSystems, Files, Path, Paths} - -import akka.actor.{ActorRef, Props} -import better.files.File -import cromwell.backend.BackendJobExecutionActor.{AbortedResponse, BackendJobExecutionResponse, FailedNonRetryableResponse, SucceededResponse} -import cromwell.backend._ -import cromwell.backend.impl.htcondor.caching.CacheActor._ -import cromwell.backend.impl.htcondor.caching.localization.CachedResultLocalization -import cromwell.backend.io.JobPaths -import cromwell.backend.sfs.{SharedFileSystem, SharedFileSystemExpressionFunctions} -import cromwell.core.{JobOutput, JobOutputs, LocallyQualifiedName} -import cromwell.services.keyvalue.KeyValueServiceActor._ -import org.apache.commons.codec.digest.DigestUtils -import wdl4s._ -import wdl4s.parser.MemoryUnit -import wdl4s.types.{WdlArrayType, WdlFileType} -import wdl4s.util.TryUtil -import wdl4s.values.{WdlArray, WdlFile, WdlSingleFile, WdlValue} - -import scala.concurrent.{Future, Promise} -import scala.sys.process.ProcessLogger -import scala.util.{Failure, Success, Try} -import scala.language.postfixOps - -object HtCondorJobExecutionActor { - val HtCondorJobIdKey = "htCondor_job_id" - - val fileSystems = List(FileSystems.getDefault) - - def props(jobDescriptor: BackendJobDescriptor, configurationDescriptor: BackendConfigurationDescriptor, serviceRegistryActor: ActorRef, cacheActorProps: Option[Props]): Props = - Props(new HtCondorJobExecutionActor(jobDescriptor, configurationDescriptor, serviceRegistryActor, cacheActorProps)) -} - -class HtCondorJobExecutionActor(override val jobDescriptor: BackendJobDescriptor, - override val configurationDescriptor: BackendConfigurationDescriptor, - serviceRegistryActor: ActorRef, - cacheActorProps: Option[Props]) - extends BackendJobExecutionActor with CachedResultLocalization with SharedFileSystem { - - import HtCondorJobExecutionActor._ - import better.files._ - import cromwell.core.PathFactory._ - - private val tag = s"CondorJobExecutionActor-${jobDescriptor.call.fullyQualifiedName}:" - - implicit val executionContext = context.dispatcher - - lazy val cmds = new HtCondorCommands - lazy val extProcess = new HtCondorProcess - - private val fileSystemsConfig = configurationDescriptor.backendConfig.getConfig("filesystems") - override val sharedFileSystemConfig = fileSystemsConfig.getConfig("local") - private val workflowDescriptor = jobDescriptor.workflowDescriptor - private val jobPaths = new JobPaths(workflowDescriptor, configurationDescriptor.backendConfig, jobDescriptor.key) - - // Files - private val executionDir = jobPaths.callExecutionRoot - private val returnCodePath = jobPaths.returnCode - private val stdoutPath = jobPaths.stdout - private val stderrPath = jobPaths.stderr - private val scriptPath = jobPaths.script - - // stdout stderr writers for submit file logs - private val submitFilePath = executionDir.resolve("submitfile") - private val submitFileStderr = executionDir.resolve("submitfile.stderr") - private val submitFileStdout = executionDir.resolve("submitfile.stdout") - private val htCondorLog = executionDir.resolve(s"${jobDescriptor.call.unqualifiedName}.log") - - private lazy val stdoutWriter = extProcess.untailedWriter(submitFileStdout) - private lazy val stderrWriter = extProcess.tailedWriter(100, submitFileStderr) - - private val call = jobDescriptor.key.call - private val callEngineFunction = SharedFileSystemExpressionFunctions(jobPaths, fileSystems) - - private val lookup = jobDescriptor.inputs.apply _ - - private val runtimeAttributes = { - val evaluateAttrs = call.task.runtimeAttributes.attrs mapValues (_.evaluate(lookup, callEngineFunction)) - // Fail the call if runtime attributes can't be evaluated - val runtimeMap = TryUtil.sequenceMap(evaluateAttrs, "Runtime attributes evaluation").get - HtCondorRuntimeAttributes(runtimeMap, jobDescriptor.workflowDescriptor.workflowOptions) - } - - private val cacheActor = cacheActorProps match { - case Some(props) => Some(context.actorOf(props, s"CacheActor-${jobDescriptor.call.fullyQualifiedName}")) - case None => None - } - - log.debug("{} Calculating hash for current job.", tag) - lazy private val jobHash = calculateHash - - private val executionResponse = Promise[BackendJobExecutionResponse]() - - // Message sent (by self, to self) wrapping over the response produced by HtCondor - private final case class JobExecutionResponse(resp: BackendJobExecutionResponse) - - // Message sent (by self, to self) to trigger a status check to HtCondor - private final case class TrackTaskStatus(id: String) - - private var condorJobId: Option[String] = None - - private val pollingInterval = configurationDescriptor.backendConfig.getInt("poll-interval") - - override def receive = super.receive orElse { - case JobExecutionResponse(resp) => - log.debug("{}: Completing job [{}] with response: [{}]", tag, jobDescriptor.key, resp) - executionResponse trySuccess resp - case TrackTaskStatus(id) => - // Avoid the redundant status check if the response is already completed (e.g. in case of abort) - if (!executionResponse.isCompleted) trackTask(id) - - // Messages received from Caching actor - case ExecutionResultFound(succeededResponse) => executionResponse trySuccess localizeCachedResponse(succeededResponse) - case ExecutionResultNotFound => prepareAndExecute() - case ExecutionResultStored(hash) => log.debug("{} Cache entry was stored for Job with hash {}.", tag, hash) - case ExecutionResultAlreadyExist => log.warning("{} Cache entry for hash {} already exist.", tag, jobHash) - - // Messages received from KV actor - case KvPair(scopedKey, Some(jobId)) if scopedKey.key == HtCondorJobIdKey => - log.info("{} Found job id {}. Trying to recover job now.", tag, jobId) - self ! TrackTaskStatus(jobId) - case KvKeyLookupFailed(_) => - log.debug("{} Job id not found. Falling back to execute.", tag) - execute - case KvFailure(_, e) => - log.error("{} Failure attempting to look up HtCondor job id. Exception message: {}. Falling back to execute.", tag, e.getMessage) - execute - } - - /** - * Restart or resume a previously-started job. - */ - override def recover: Future[BackendJobExecutionResponse] = { - log.warning("{} Trying to recover job {}.", tag, jobDescriptor.key.call.fullyQualifiedName) - serviceRegistryActor ! KvGet(ScopedKey(jobDescriptor.workflowDescriptor.id, - KvJobKey(jobDescriptor.key.call.fullyQualifiedName, jobDescriptor.key.index, jobDescriptor.key.attempt), - HtCondorJobIdKey)) - executionResponse.future - } - - /** - * Execute a new job. - */ - override def execute: Future[BackendJobExecutionResponse] = { - log.debug("{} Checking if hash {{}} is in the cache.", tag, jobHash) - cacheActor match { - case Some(actorRef) => actorRef ! ReadExecutionResult(jobHash) - case None => prepareAndExecute() - } - executionResponse.future - } - - /** - * Abort a running job. - */ - override def abort(): Unit = { - // Nothing to do in case `condorJobId` is not defined - condorJobId foreach { id => - log.info("{}: Aborting job [{}:{}].", tag, jobDescriptor.key.tag, id) - val abortProcess = new HtCondorProcess - val argv = Seq(HtCondorCommands.Remove, id) - val process = abortProcess.externalProcess(argv) - val exitVal = process.exitValue() - if (exitVal == 0) - log.info("{}: Job {} successfully killed and removed from the queue.", tag, id) - else - log.error("{}: Failed to kill / remove job {}. Exit Code: {}, Stderr: {}", tag, id, exitVal, abortProcess.processStderr) - } - } - - private def executeTask(): Unit = { - val argv = Seq(HtCondorCommands.Submit, submitFilePath.toString) - val process = extProcess.externalProcess(argv, ProcessLogger(stdoutWriter.writeWithNewline, stderrWriter.writeWithNewline)) - val condorReturnCode = process.exitValue() // blocks until process (i.e. condor submission) finishes - log.debug("{} Return code of condor submit command: {}", tag, condorReturnCode) - - List(stdoutWriter.writer, stderrWriter.writer).foreach(_.flushAndClose()) - - condorReturnCode match { - case 0 if File(submitFileStderr).lines.toList.isEmpty => - log.info("{} {} submitted to HtCondor. Waiting for the job to complete via. RC file status.", tag, jobDescriptor.call.fullyQualifiedName) - val job = HtCondorCommands.SubmitOutputPattern.r - //Number of lines in stdout for submit job will be 3 at max therefore reading all lines at once. - log.debug(s"{} Output of submit process : {}", tag, File(submitFileStdout).lines.toList) - val line = File(submitFileStdout).lines.toList.last - line match { - case job(jobId, clusterId) => - val overallJobIdentifier = s"$clusterId.${jobId.toInt - 1}" // Condor has 0 based indexing on the jobs, probably won't work on stuff like `queue 150` - log.info("{} {} mapped to HtCondor JobID: {}", tag, jobDescriptor.call.fullyQualifiedName, overallJobIdentifier) - serviceRegistryActor ! KvPut(KvPair(ScopedKey(jobDescriptor.workflowDescriptor.id, - KvJobKey(jobDescriptor.key.call.fullyQualifiedName, jobDescriptor.key.index, jobDescriptor.key.attempt), - HtCondorJobIdKey), Option(overallJobIdentifier))) - condorJobId = Option(overallJobIdentifier) - self ! TrackTaskStatus(overallJobIdentifier) - - case _ => self ! JobExecutionResponse(FailedNonRetryableResponse(jobDescriptor.key, - new IllegalStateException("Failed to retrieve job(id) and cluster id"), Option(condorReturnCode))) - } - - case 0 => - log.error(s"Unexpected! Received return code for condor submission as 0, although stderr file is non-empty: {}", File(submitFileStderr).lines) - self ! JobExecutionResponse(FailedNonRetryableResponse(jobDescriptor.key, - new IllegalStateException(s"Execution process failed. HtCondor returned zero status code but non empty stderr file: $condorReturnCode"), - Option(condorReturnCode))) - - case nonZeroExitCode: Int => - self ! JobExecutionResponse(FailedNonRetryableResponse(jobDescriptor.key, - new IllegalStateException(s"Execution process failed. HtCondor returned non zero status code: $condorReturnCode"), Option(condorReturnCode))) - } - } - - private def trackTask(jobIdentifier: String): Unit = { - val jobReturnCode = Try(extProcess.jobReturnCode(jobIdentifier, returnCodePath)) - log.debug("{} Process complete. RC file now exists with value: {}", tag, jobReturnCode) - - jobReturnCode match { - case Success(None) => - import scala.concurrent.duration._ - // Job is still running in HtCondor. Check back again after `pollingInterval` seconds - context.system.scheduler.scheduleOnce(pollingInterval.seconds, self, TrackTaskStatus(jobIdentifier)) - case Success(Some(rc)) if runtimeAttributes.continueOnReturnCode.continueFor(rc) => self ! JobExecutionResponse(processSuccess(rc)) - case Success(Some(rc)) => self ! JobExecutionResponse(FailedNonRetryableResponse(jobDescriptor.key, - new IllegalStateException("Job exited with invalid return code: " + rc), Option(rc))) - case Failure(error) => self ! JobExecutionResponse(FailedNonRetryableResponse(jobDescriptor.key, error, None)) - } - } - - private def processSuccess(rc: Int): BackendJobExecutionResponse = { - evaluateOutputs(callEngineFunction, outputMapper(jobPaths)) match { - case Success(outputs) => - val succeededResponse = SucceededResponse(jobDescriptor.key, Some(rc), outputs, None, Seq.empty) - log.debug("{} Storing data into cache for hash {}.", tag, jobHash) - // If cache fails to store data for any reason it should not stop the workflow/task execution but log the issue. - cacheActor foreach { _ ! StoreExecutionResult(jobHash, succeededResponse) } - succeededResponse - case Failure(e) => - val message = Option(e.getMessage) map { - ": " + _ - } getOrElse "" - FailedNonRetryableResponse(jobDescriptor.key, new Throwable("Failed post processing of outputs" + message, e), Option(rc)) - } - } - - private def calculateHash: String = { - val cmd = call.task.instantiateCommand(jobDescriptor.inputs, callEngineFunction, identity) match { - case Success(command) => command - case Failure(ex) => - val errMsg = s"$tag Cannot instantiate job command for caching purposes due to ${ex.getMessage}." - log.error(ex.getCause, errMsg) - throw new IllegalStateException(errMsg, ex.getCause) - } - val str = Seq(cmd, - runtimeAttributes.failOnStderr, - runtimeAttributes.dockerImage.getOrElse(""), - runtimeAttributes.dockerWorkingDir.getOrElse(""), - runtimeAttributes.dockerOutputDir.getOrElse(""), - runtimeAttributes.cpu.toString, - runtimeAttributes.memory.toString, - runtimeAttributes.disk.toString).mkString - DigestUtils.md5Hex(str) - } - - private def createExecutionFolderAndScript(): Unit = { - try { - log.debug("{} Creating execution folder: {}", tag, executionDir) - executionDir.toString.toFile.createIfNotExists(asDirectory = true, createParents = true) - - log.debug("{} Resolving job command", tag) - val command = localizeInputs(jobPaths.callInputsRoot, runtimeAttributes.dockerImage.isDefined, fileSystems, jobDescriptor.inputs) flatMap { - localizedInputs => resolveJobCommand(localizedInputs) - } - - log.debug("{} Creating bash script for executing command: {}", tag, command) - cmds.writeScript(command.get, scriptPath.toAbsolutePath, executionDir.toAbsolutePath) // Writes the bash script for executing the command - File(scriptPath).addPermission(PosixFilePermission.OWNER_EXECUTE) // Add executable permissions to the script. - //TODO: Need to append other runtime attributes from Wdl to Condor submit file - val attributes: Map[String, Any] = Map(HtCondorRuntimeKeys.Executable -> scriptPath.toAbsolutePath, - HtCondorRuntimeKeys.InitialWorkingDir -> jobPaths.callExecutionRoot.toAbsolutePath, - HtCondorRuntimeKeys.Output -> stdoutPath.toAbsolutePath, - HtCondorRuntimeKeys.Error -> stderrPath.toAbsolutePath, - HtCondorRuntimeKeys.Log -> htCondorLog.toAbsolutePath, - HtCondorRuntimeKeys.LogXml -> true, - HtCondorRuntimeKeys.LeaveInQueue -> true, - HtCondorRuntimeKeys.Cpu -> runtimeAttributes.cpu, - HtCondorRuntimeKeys.Memory -> runtimeAttributes.memory.to(MemoryUnit.MB).amount.toLong, - HtCondorRuntimeKeys.Disk -> runtimeAttributes.disk.to(MemoryUnit.KB).amount.toLong - ) - - cmds.generateSubmitFile(submitFilePath, attributes) // This writes the condor submit file - - } catch { - case ex: Exception => - log.error(ex, "Failed to prepare task: " + ex.getMessage) - throw ex - } - } - - private def resolveJobCommand(localizedInputs: CallInputs): Try[String] = { - if (runtimeAttributes.dockerImage.isDefined) - modifyCommandForDocker(call.task.instantiateCommand(localizedInputs, callEngineFunction, identity), localizedInputs) - else - call.task.instantiateCommand(localizedInputs, callEngineFunction, identity) - } - - private def modifyCommandForDocker(jobCmd: Try[String], localizedInputs: CallInputs): Try[String] = { - Try { - val dockerInputDataVol = localizedInputs.values.collect { - case file if file.wdlType == WdlFileType => - val limit = file.valueString.lastIndexOf("/") - Seq(file.valueString.substring(0, limit)) - case files if files.wdlType == WdlArrayType(WdlFileType) => files.asInstanceOf[WdlArray].value map { file => - val limit = file.valueString.lastIndexOf("/") - file.valueString.substring(0, limit) - } - }.flatten.toSeq - - log.debug("{} List of input volumes: {}", tag, dockerInputDataVol.mkString(",")) - val dockerCmd = configurationDescriptor.backendConfig.getString("docker.cmd") - val dockerVolume = "-v %s:%s" - val dockerVolumeInputs = s"$dockerVolume:ro" - // `v.get` is safe below since we filtered the list earlier with only defined elements - val inputVolumes = dockerInputDataVol.distinct.map(v => dockerVolumeInputs.format(v, v)).mkString(" ") - val outputVolume = dockerVolume.format(executionDir.toAbsolutePath.toString, runtimeAttributes.dockerOutputDir.getOrElse(executionDir.toAbsolutePath.toString)) - val cmd = dockerCmd.format(runtimeAttributes.dockerWorkingDir.getOrElse(executionDir.toAbsolutePath.toString), inputVolumes, outputVolume, runtimeAttributes.dockerImage.get, jobCmd.get) - log.debug("{} Docker command line to be used for task execution: {}.", tag, cmd) - cmd - } - } - - private def prepareAndExecute(): Unit = { - Try { - createExecutionFolderAndScript() - executeTask() - } recover { - case exception => self ! JobExecutionResponse(FailedNonRetryableResponse(jobDescriptor.key, exception, None)) - } - } - - private def localizeCachedResponse(succeededResponse: SucceededResponse): BackendJobExecutionResponse = { - Try(localizeCachedOutputs(executionDir, succeededResponse.jobOutputs)) match { - case Success(outputs) => - executionDir.toString.toFile.createIfNotExists(asDirectory = true, createParents = true) - SucceededResponse(jobDescriptor.key, succeededResponse.returnCode, outputs, None, Seq.empty) - case Failure(exception) => FailedNonRetryableResponse(jobDescriptor.key, exception, None) - } - } -} diff --git a/supportedBackends/htcondor/src/main/scala/cromwell/backend/impl/htcondor/HtCondorRuntimeAttributes.scala b/supportedBackends/htcondor/src/main/scala/cromwell/backend/impl/htcondor/HtCondorRuntimeAttributes.scala deleted file mode 100644 index 1ddb35c35..000000000 --- a/supportedBackends/htcondor/src/main/scala/cromwell/backend/impl/htcondor/HtCondorRuntimeAttributes.scala +++ /dev/null @@ -1,106 +0,0 @@ -package cromwell.backend.impl.htcondor - -import cromwell.backend.MemorySize -import cromwell.backend.validation.ContinueOnReturnCode -import cromwell.backend.validation.RuntimeAttributesDefault._ -import cromwell.backend.validation.RuntimeAttributesKeys._ -import cromwell.backend.validation.RuntimeAttributesValidation._ -import cromwell.core._ -import lenthall.exception.MessageAggregation -import wdl4s.types.{WdlIntegerType, WdlStringType, WdlBooleanType, WdlType} -import wdl4s.values.{WdlString, WdlBoolean, WdlInteger, WdlValue} - -import scalaz.Scalaz._ -import scalaz._ - -object HtCondorRuntimeAttributes { - private val FailOnStderrDefaultValue = false - private val ContinueOnRcDefaultValue = 0 - private val CpuDefaultValue = 1 - private val MemoryDefaultValue = "0.512 GB" - private val DisksDefaultValue = "1.024 GB" - - val DockerWorkingDirKey = "dockerWorkingDir" - val DockerOutputDirKey = "dockerOutputDir" - val DiskKey = "disk" - - val staticDefaults = Map( - FailOnStderrKey -> WdlBoolean(FailOnStderrDefaultValue), - ContinueOnReturnCodeKey -> WdlInteger(ContinueOnRcDefaultValue), - CpuKey -> WdlInteger(CpuDefaultValue), - MemoryKey -> WdlString(MemoryDefaultValue), - DiskKey -> WdlString(DisksDefaultValue) - ) - - private[htcondor] val coercionMap: Map[String, Set[WdlType]] = Map ( - FailOnStderrKey -> Set[WdlType](WdlBooleanType), - ContinueOnReturnCodeKey -> ContinueOnReturnCode.validWdlTypes, - DockerKey -> Set(WdlStringType), - DockerWorkingDirKey -> Set(WdlStringType), - DockerOutputDirKey -> Set(WdlStringType), - CpuKey -> Set(WdlIntegerType), - MemoryKey -> Set(WdlStringType), - DiskKey -> Set(WdlStringType) - ) - - def apply(attrs: Map[String, WdlValue], options: WorkflowOptions): HtCondorRuntimeAttributes = { - // Fail now if some workflow options are specified but can't be parsed correctly - val defaultFromOptions = workflowOptionsDefault(options, coercionMap).get - val withDefaultValues = withDefaults(attrs, List(defaultFromOptions, staticDefaults)) - - val docker = validateDocker(withDefaultValues.get(DockerKey), None.successNel) - val dockerWorkingDir = validateDockerWorkingDir(withDefaultValues.get(DockerWorkingDirKey), None.successNel) - val dockerOutputDir = validateDockerOutputDir(withDefaultValues.get(DockerOutputDirKey), None.successNel) - val failOnStderr = validateFailOnStderr(withDefaultValues.get(FailOnStderrKey), noValueFoundFor(FailOnStderrKey)) - val continueOnReturnCode = validateContinueOnReturnCode(withDefaultValues.get(ContinueOnReturnCodeKey), noValueFoundFor(ContinueOnReturnCodeKey)) - val cpu = validateCpu(withDefaultValues.get(CpuKey), noValueFoundFor(CpuKey)) - val memory = validateMemory(withDefaultValues.get(MemoryKey), noValueFoundFor(MemoryKey)) - val disk = validateDisk(withDefaultValues.get(DiskKey), noValueFoundFor(DiskKey)) - - (continueOnReturnCode |@| docker |@| dockerWorkingDir |@| dockerOutputDir |@| failOnStderr |@| cpu |@| memory |@| disk) { - new HtCondorRuntimeAttributes(_, _, _, _, _, _, _, _) - } match { - case Success(x) => x - case Failure(nel) => throw new RuntimeException with MessageAggregation { - override def exceptionContext: String = "Runtime attribute validation failed" - override def errorMessages: Traversable[String] = nel.list.toList - } - } - } - - private def validateDockerWorkingDir(dockerWorkingDir: Option[WdlValue], onMissingKey: => ErrorOr[Option[String]]): ErrorOr[Option[String]] = { - dockerWorkingDir match { - case Some(WdlString(s)) => Some(s).successNel - case None => onMissingKey - case _ => s"Expecting $DockerWorkingDirKey runtime attribute to be a String".failureNel - } - } - - private def validateDockerOutputDir(dockerOutputDir: Option[WdlValue], onMissingKey: => ErrorOr[Option[String]]): ErrorOr[Option[String]] = { - dockerOutputDir match { - case Some(WdlString(s)) => Some(s).successNel - case None => onMissingKey - case _ => s"Expecting $DockerOutputDirKey runtime attribute to be a String".failureNel - } - } - - private def validateDisk(value: Option[WdlValue], onMissingKey: => ErrorOr[MemorySize]): ErrorOr[MemorySize] = { - val diskWrongFormatMsg = s"Expecting $DiskKey runtime attribute to be an Integer or String with format '8 GB'. Exception: %s" - - value match { - case Some(i: WdlInteger) => parseMemoryInteger(i) - case Some(s: WdlString) => parseMemoryString(s) - case Some(_) => String.format(diskWrongFormatMsg, "Not supported WDL type value").failureNel - case None => onMissingKey - } - } -} - -case class HtCondorRuntimeAttributes(continueOnReturnCode: ContinueOnReturnCode, - dockerImage: Option[String], - dockerWorkingDir: Option[String], - dockerOutputDir: Option[String], - failOnStderr: Boolean, - cpu: Int, - memory: MemorySize, - disk: MemorySize) diff --git a/supportedBackends/htcondor/src/main/scala/cromwell/backend/impl/htcondor/HtCondorWrapper.scala b/supportedBackends/htcondor/src/main/scala/cromwell/backend/impl/htcondor/HtCondorWrapper.scala deleted file mode 100644 index 27364b8f2..000000000 --- a/supportedBackends/htcondor/src/main/scala/cromwell/backend/impl/htcondor/HtCondorWrapper.scala +++ /dev/null @@ -1,149 +0,0 @@ -package cromwell.backend.impl.htcondor - -import java.nio.file.{Files, Path} - -import better.files._ -import com.typesafe.scalalogging.StrictLogging -import cromwell.backend.impl.htcondor -import cromwell.core.PathFactory.{EnhancedPath, FlushingAndClosingWriter} -import cromwell.core.{TailedWriter, UntailedWriter} - -import scala.language.postfixOps -import scala.sys.process._ - -object JobStatus { - val MapOfStatuses = Map( - 0 -> Created, // This is actually `unexpanded` in HtCondor, not sure what that actually means - 1 -> Created, // Idle - 2 -> Running, - 3 -> Removed, - 4 -> Completed, - 5 -> Failed, // SystemOnHold - 6 -> SubmissionError // Also the default - ) - - def fromCondorStatusCode(statusCode: Int): JobStatus = { - MapOfStatuses.getOrElse(statusCode, SubmissionError) // By default we return SubmissionError - } - - def isTerminal(jobStatus: JobStatus): Boolean = jobStatus.isInstanceOf[TerminalJobStatus] -} - -sealed trait JobStatus -sealed trait TerminalJobStatus extends JobStatus -case object Created extends JobStatus -case object Running extends JobStatus -case object Completed extends TerminalJobStatus -case object Removed extends TerminalJobStatus -case object Failed extends TerminalJobStatus -case object Aborted extends TerminalJobStatus -case object SubmissionError extends TerminalJobStatus - - -object HtCondorCommands { - val SubmitOutputPattern = "(\\d*) job\\(s\\) submitted to cluster (\\d*)\\." - val Submit = "condor_submit" - val Remove = "condor_rm" - private val JobStatus = "condor_q %s -autoformat JobStatus" - def generateJobStatusCommand(jobId: String): String = HtCondorCommands.JobStatus.format(jobId) -} - -class HtCondorCommands extends StrictLogging { - - /** - * Writes the script file containing the user's command from the WDL as well - * as some extra shell code for monitoring jobs - */ - def writeScript(instantiatedCommand: String, filePath: Path, containerRoot: Path): Unit = { - logger.debug(s"Writing bash script for execution. Command: $instantiatedCommand.") - File(filePath).write( - s"""#!/bin/sh - |cd $containerRoot - |$instantiatedCommand - |echo $$? > rc - |""".stripMargin) - } - - def generateSubmitFile(path: Path, attributes: Map[String, Any]): String = { - def htCondorSubmitCommand(filePath: Path) = { - s"${HtCondorCommands.Submit} ${filePath.toString}" - } - - val submitFileWriter = path.untailed - attributes.foreach(attribute => submitFileWriter.writeWithNewline(s"${attribute._1}=${attribute._2}")) - submitFileWriter.writeWithNewline(HtCondorRuntimeKeys.Queue) - submitFileWriter.writer.flushAndClose() - logger.debug(s"submit file name is : $path") - logger.debug(s"content of file is : ${File(path).lines.toList}") - htCondorSubmitCommand(path) - } - -} - -class HtCondorProcess extends StrictLogging { - private val stdout = new StringBuilder - private val stderr = new StringBuilder - - def processLogger: ProcessLogger = ProcessLogger(stdout append _, stderr append _) - def processStdout: String = stdout.toString().trim - def processStderr: String = stderr.toString().trim - def commandList(command: String): Seq[String] = Seq("/bin/bash",command) - def untailedWriter(path: Path): UntailedWriter = path.untailed - def tailedWriter(limit: Int, path: Path): TailedWriter = path.tailed(limit) - def externalProcess(cmdList: Seq[String], processLogger: ProcessLogger = processLogger): Process = cmdList.run(processLogger) - - /** - * Returns the RC of this job if it has finished. - */ - def jobReturnCode(jobId: String, returnCodeFilePath: Path): Option[Int] = { - - checkStatus(jobId) match { - case status if JobStatus.isTerminal(status) => - Files.exists(returnCodeFilePath) match { - case true => Option(File(returnCodeFilePath).contentAsString.stripLineEnd.toInt) - case false => - val msg = s"JobStatus from Condor is terminal ($status) and no RC file exists!" - logger.debug(msg) - throw new IllegalStateException(msg) - } - case nonTerminalStatus => None - } - } - - private def checkStatus(jobId: String): JobStatus = { - val htCondorProcess = new HtCondorProcess - val commandArgv = HtCondorCommands.generateJobStatusCommand(jobId).split(" ").toSeq - val process = htCondorProcess.externalProcess(commandArgv) - val returnCode = process.exitValue() - returnCode match { - case 0 => - val stdout = htCondorProcess.processStdout - // If stdout is empty, that means the job got removed from the queue. Return Completed in that case - val status = if (stdout.isEmpty) htcondor.Completed else JobStatus.fromCondorStatusCode(htCondorProcess.processStdout.toInt) - logger.info("Condor JobId {} current status: {}", jobId, status) - status - case errorCode => - val msg = "Could not retreive status from the queue: " + htCondorProcess.processStderr - logger.error(msg) - throw new IllegalStateException(msg) - } - } - -} - -object HtCondorRuntimeKeys { - val Executable = "executable" - val Arguments = "arguments" - val Error = "error" - val Output = "output" - val Log = "log" - val Queue = "queue" - val Rank = "rank" - val Requirements = "requirements" - val Cpu = "request_cpus" - val Memory = "request_memory" - val Disk = "request_disk" - val LogXml = "log_xml" - val LeaveInQueue = "leave_in_queue" - val InitialWorkingDir = "Iwd" -} diff --git a/supportedBackends/htcondor/src/main/scala/cromwell/backend/impl/htcondor/caching/CacheActor.scala b/supportedBackends/htcondor/src/main/scala/cromwell/backend/impl/htcondor/caching/CacheActor.scala deleted file mode 100644 index 70799bd67..000000000 --- a/supportedBackends/htcondor/src/main/scala/cromwell/backend/impl/htcondor/caching/CacheActor.scala +++ /dev/null @@ -1,51 +0,0 @@ -package cromwell.backend.impl.htcondor.caching - -import akka.actor.{Actor, ActorLogging} -import cromwell.backend.BackendJobExecutionActor.SucceededResponse -import cromwell.backend.impl.htcondor.caching.CacheActor._ -import cromwell.backend.impl.htcondor.caching.exception.{CachedResultAlreadyExistException, CachedResultNotFoundException} -import cromwell.backend.impl.htcondor.caching.model.CachedExecutionResult - -object CacheActor { - - trait CacheActorCommand - case class ReadExecutionResult(hash: String) extends CacheActorCommand - case class StoreExecutionResult(hash: String, succeededResponse: SucceededResponse) extends CacheActorCommand - - trait CacheActorResponse - case class ExecutionResultFound(succeededResponse: SucceededResponse) extends CacheActorResponse - case object ExecutionResultNotFound extends CacheActorResponse - case class ExecutionResultStored(hash: String) extends CacheActorResponse - case object ExecutionResultAlreadyExist extends CacheActorResponse - -} - -trait CacheActor extends Actor with ActorLogging { - def tag: String = "[CacheActor]" - def forceRewrite: Boolean = false - - override def receive: Receive = { - case ReadExecutionResult(hash) => - try { - val executionResult = readExecutionResult(hash) - log.info(s"{} Execution result found in cache for hash {}. Returning result: {}.", tag, hash, executionResult) - sender() ! ExecutionResultFound(executionResult.succeededResponse) - } catch { - case ex: CachedResultNotFoundException => sender() ! ExecutionResultNotFound - } - - case StoreExecutionResult(hash, succeededResult) => - try { - storeExecutionResult(CachedExecutionResult(hash, succeededResult)) - log.info(s"{} Cache entry for job [{}] stored successfully.", tag, hash) - sender() ! ExecutionResultStored(hash) - } catch { - case ex: CachedResultAlreadyExistException => sender() ! ExecutionResultAlreadyExist - } - } - - def readExecutionResult(hash: String): CachedExecutionResult - - def storeExecutionResult(cachedExecutionResult: CachedExecutionResult): Unit - -} diff --git a/supportedBackends/htcondor/src/main/scala/cromwell/backend/impl/htcondor/caching/CacheActorFactory.scala b/supportedBackends/htcondor/src/main/scala/cromwell/backend/impl/htcondor/caching/CacheActorFactory.scala deleted file mode 100644 index 9dd99cc24..000000000 --- a/supportedBackends/htcondor/src/main/scala/cromwell/backend/impl/htcondor/caching/CacheActorFactory.scala +++ /dev/null @@ -1,9 +0,0 @@ -package cromwell.backend.impl.htcondor.caching - -import akka.actor.Props - -trait CacheActorFactory { - - def getCacheActorProps(forceRewrite: Boolean): Props - -} diff --git a/supportedBackends/htcondor/src/main/scala/cromwell/backend/impl/htcondor/caching/exception/CachedResultAlreadyExistException.scala b/supportedBackends/htcondor/src/main/scala/cromwell/backend/impl/htcondor/caching/exception/CachedResultAlreadyExistException.scala deleted file mode 100644 index 387252809..000000000 --- a/supportedBackends/htcondor/src/main/scala/cromwell/backend/impl/htcondor/caching/exception/CachedResultAlreadyExistException.scala +++ /dev/null @@ -1,5 +0,0 @@ -package cromwell.backend.impl.htcondor.caching.exception - -class CachedResultNotFoundException(message: String) extends RuntimeException(message) - -class CachedResultAlreadyExistException(message: String) extends RuntimeException(message) diff --git a/supportedBackends/htcondor/src/main/scala/cromwell/backend/impl/htcondor/caching/localization/CachedResultLocalization.scala b/supportedBackends/htcondor/src/main/scala/cromwell/backend/impl/htcondor/caching/localization/CachedResultLocalization.scala deleted file mode 100644 index 2f8254e5c..000000000 --- a/supportedBackends/htcondor/src/main/scala/cromwell/backend/impl/htcondor/caching/localization/CachedResultLocalization.scala +++ /dev/null @@ -1,39 +0,0 @@ -package cromwell.backend.impl.htcondor.caching.localization - -import java.nio.file.{Files, Path, Paths} - -import better.files.File -import cromwell.core.{JobOutput, _} -import wdl4s.types.{WdlArrayType, WdlFileType} -import wdl4s.values.{WdlArray, WdlSingleFile, WdlValue} - -trait CachedResultLocalization { - private[localization] def localizePathViaSymbolicLink(originalPath: Path, executionPath: Path): Path = { - if (File(originalPath).isDirectory) throw new UnsupportedOperationException("Cannot localize directory with symbolic links.") - else { - File(executionPath).parent.createDirectories() - Files.createSymbolicLink(executionPath, originalPath.toAbsolutePath) - } - } - - private[localization] def localizeCachedFile(executionPath: Path, output: WdlValue): WdlSingleFile = { - val origPath = Paths.get(output.valueString) - val newPath = executionPath.toAbsolutePath.resolve(origPath.getFileName) - val slPath = localizePathViaSymbolicLink(origPath, newPath) - WdlSingleFile(slPath.toString) - } - - def localizeCachedOutputs(executionPath: Path, outputs: JobOutputs): JobOutputs = { - outputs map { case (lqn, jobOutput) => - jobOutput.wdlValue.wdlType match { - case WdlFileType => (lqn -> JobOutput(localizeCachedFile(executionPath, jobOutput.wdlValue))) - case WdlArrayType(WdlFileType) => - val newArray: Seq[WdlSingleFile] = jobOutput.wdlValue.asInstanceOf[WdlArray].value map { - localizeCachedFile(executionPath, _) - } - (lqn -> JobOutput(WdlArray(WdlArrayType(WdlFileType), newArray))) - case _ => (lqn, jobOutput) - } - } - } -} diff --git a/supportedBackends/htcondor/src/main/scala/cromwell/backend/impl/htcondor/caching/model/CachedExecutionResult.scala b/supportedBackends/htcondor/src/main/scala/cromwell/backend/impl/htcondor/caching/model/CachedExecutionResult.scala deleted file mode 100644 index fdff70804..000000000 --- a/supportedBackends/htcondor/src/main/scala/cromwell/backend/impl/htcondor/caching/model/CachedExecutionResult.scala +++ /dev/null @@ -1,6 +0,0 @@ -package cromwell.backend.impl.htcondor.caching.model - -import cromwell.backend.BackendJobExecutionActor.SucceededResponse - -case class CachedExecutionResult(hash: String, succeededResponse: SucceededResponse) - diff --git a/supportedBackends/htcondor/src/main/scala/cromwell/backend/impl/htcondor/caching/provider/mongodb/MongoCacheActor.scala b/supportedBackends/htcondor/src/main/scala/cromwell/backend/impl/htcondor/caching/provider/mongodb/MongoCacheActor.scala deleted file mode 100644 index 7f99e1565..000000000 --- a/supportedBackends/htcondor/src/main/scala/cromwell/backend/impl/htcondor/caching/provider/mongodb/MongoCacheActor.scala +++ /dev/null @@ -1,88 +0,0 @@ -package cromwell.backend.impl.htcondor.caching.provider.mongodb - -import com.mongodb.DBObject -import com.mongodb.casbah.MongoCollection -import com.mongodb.casbah.commons.{MongoDBObject, TypeImports} -import com.mongodb.util.JSON -import cromwell.backend.BackendJobExecutionActor.SucceededResponse -import cromwell.backend.impl.htcondor.caching.CacheActor -import cromwell.backend.impl.htcondor.caching.exception.{CachedResultAlreadyExistException, CachedResultNotFoundException} -import cromwell.backend.impl.htcondor.caching.model.CachedExecutionResult -import cromwell.backend.impl.htcondor.caching.provider.mongodb.model.{KryoSerializedObject, MongoCachedExecutionResult} -import cromwell.backend.impl.htcondor.caching.provider.mongodb.serialization.KryoSerDe - -class MongoCacheActor(collection: MongoCollection, - override val forceRewrite: Boolean = false) extends CacheActor with KryoSerDe { - - import cromwell.backend.impl.htcondor.caching.provider.mongodb.model.MongoCachedExecutionResultProtocol._ - import spray.json._ - - val ErrMsg = "Got an exception when storing execution result for hash {}." - val HashIdentifier = "hash" - override val tag = s"[MongoCacheActor]" - - override def readExecutionResult(hash: String): CachedExecutionResult = { - val query = MongoDBObject(HashIdentifier -> hash) - val result = collection.findOne(query) - result match { - case Some(mongoDbObject) => - if (forceRewrite) throwCachedResultNotFoundException(hash) - else deserializeSucceededResponse(mongoDbObject) - case None => throwCachedResultNotFoundException(hash) - } - } - - override def storeExecutionResult(cachedExecutionResult: CachedExecutionResult): Unit = { - try { - readExecutionResult(cachedExecutionResult.hash) - val warnMsg = s"$tag Execution result hash {${cachedExecutionResult.hash}} is already defined in database." - log.warning(warnMsg) - if (forceRewrite) { - removeExecutionResult(cachedExecutionResult.hash) - storeExecutionResultInMongoDb(cachedExecutionResult) - } - else throw new CachedResultAlreadyExistException(warnMsg) - } catch { - case e: CachedResultNotFoundException => - storeExecutionResultInMongoDb(cachedExecutionResult) - case e: CachedResultAlreadyExistException => - throw e - case e: Exception => - log.error(e, "{} Got an unhandled exception when trying to store execution result for hash {}.", tag, cachedExecutionResult.hash) - throw e - } - } - - private def deserializeSucceededResponse(mongoDbObject: TypeImports.DBObject): CachedExecutionResult = { - val cachedResult = JsonParser(mongoDbObject.toString).convertTo[MongoCachedExecutionResult] - val succeededResponse = deserialize(cachedResult.succeededResponse.byteArray, classOf[SucceededResponse]) - CachedExecutionResult(cachedResult.hash, succeededResponse) - } - - private def removeExecutionResult(hash: String): Unit = { - val query = MongoDBObject(HashIdentifier -> hash) - val result = collection.remove(query) - if (result.getN == 0) throwCachedResultNotFoundException(hash) - log.info("{} Removed execution result for hash: {}.", tag, hash) - } - - private def storeExecutionResultInMongoDb(cachedExecutionResult: CachedExecutionResult): Unit = { - val cachedResult = MongoCachedExecutionResult(cachedExecutionResult.hash, KryoSerializedObject(serialize(cachedExecutionResult.succeededResponse))) - val result = collection.insert(constructDbObjectForCachedExecutionResult(cachedResult)) - if (!result.wasAcknowledged()) { - log.error(ErrMsg, cachedExecutionResult) - throw new IllegalStateException(ErrMsg) - } - } - - private def constructDbObjectForCachedExecutionResult(cachedExecutionResult: MongoCachedExecutionResult): DBObject = { - val resultAsJsonString = cachedExecutionResult.toJson.toString() - JSON.parse(resultAsJsonString).asInstanceOf[DBObject] - } - - private def throwCachedResultNotFoundException(hash: String): Nothing = { - val warnMsg = s"$tag Execution result hash {$hash} does not exist in database." - log.warning(warnMsg) - throw new CachedResultNotFoundException(warnMsg) - } -} diff --git a/supportedBackends/htcondor/src/main/scala/cromwell/backend/impl/htcondor/caching/provider/mongodb/MongoCacheActorFactory.scala b/supportedBackends/htcondor/src/main/scala/cromwell/backend/impl/htcondor/caching/provider/mongodb/MongoCacheActorFactory.scala deleted file mode 100644 index bdb1354f9..000000000 --- a/supportedBackends/htcondor/src/main/scala/cromwell/backend/impl/htcondor/caching/provider/mongodb/MongoCacheActorFactory.scala +++ /dev/null @@ -1,21 +0,0 @@ -package cromwell.backend.impl.htcondor.caching.provider.mongodb - -import akka.actor.Props -import com.mongodb.casbah.{MongoClient, MongoCollection} -import com.typesafe.config.Config -import cromwell.backend.impl.htcondor.caching.CacheActorFactory - -class MongoCacheActorFactory(config: Config) extends CacheActorFactory { - val dbHost = config.getString("cache.db.host") - val dbPort = config.getInt("cache.db.port") - val dbName = config.getString("cache.db.name") - val dbCollectionName = config.getString("cache.db.collection") - val dbInstance: MongoClient = MongoClient(dbHost, dbPort) - val db = dbInstance(dbName) - val collection: MongoCollection = db(dbCollectionName) - - override def getCacheActorProps(forceRewrite: Boolean): Props = { - Props(new MongoCacheActor(collection, forceRewrite)) - } - -} diff --git a/supportedBackends/htcondor/src/main/scala/cromwell/backend/impl/htcondor/caching/provider/mongodb/model/MongoCachedExecutionResult.scala b/supportedBackends/htcondor/src/main/scala/cromwell/backend/impl/htcondor/caching/provider/mongodb/model/MongoCachedExecutionResult.scala deleted file mode 100644 index f13326257..000000000 --- a/supportedBackends/htcondor/src/main/scala/cromwell/backend/impl/htcondor/caching/provider/mongodb/model/MongoCachedExecutionResult.scala +++ /dev/null @@ -1,24 +0,0 @@ -package cromwell.backend.impl.htcondor.caching.provider.mongodb.model - -import spray.json - -/** - * Wrapper over the byte array that is stored in db - * - * @param byteArray Serialized data - */ -case class KryoSerializedObject(byteArray: Array[Byte]) - -/** - * SucceededResponse to be stored in MongoDB. - * - * @param hash Calculated hash for the Job. - * @param succeededResponse Serialized succeeded response. - */ -case class MongoCachedExecutionResult(hash: String, succeededResponse: KryoSerializedObject) - -object MongoCachedExecutionResultProtocol extends json.DefaultJsonProtocol { - implicit val kryoSerializedObject = jsonFormat1(KryoSerializedObject) - implicit val cachedExecutionResultProtocol = jsonFormat2(MongoCachedExecutionResult) -} - diff --git a/supportedBackends/htcondor/src/main/scala/cromwell/backend/impl/htcondor/caching/provider/mongodb/serialization/KryoSerDe.scala b/supportedBackends/htcondor/src/main/scala/cromwell/backend/impl/htcondor/caching/provider/mongodb/serialization/KryoSerDe.scala deleted file mode 100644 index d3f564c61..000000000 --- a/supportedBackends/htcondor/src/main/scala/cromwell/backend/impl/htcondor/caching/provider/mongodb/serialization/KryoSerDe.scala +++ /dev/null @@ -1,55 +0,0 @@ -package cromwell.backend.impl.htcondor.caching.provider.mongodb.serialization - -import java.io.{ByteArrayInputStream, ByteArrayOutputStream} - -import com.esotericsoftware.kryo.io.{Input, Output} -import com.twitter.chill.ScalaKryoInstantiator - -/** - * This mixin provides access to `serialize` and `deserialize` methods that use Kryo underneath to - * perform the actual conversion to / from bytes - */ -trait KryoSerDe extends SerDe { - - /** - * - * @param data Any Scala / Java object that needs to be serialized - * @return A serialized byte array that can be transported across Network moved around with - */ - override def serialize[A <: AnyRef](data: A): Array[Byte] = { - try { - val instantiator = new ScalaKryoInstantiator - instantiator.setRegistrationRequired(false) // This makes it unnecessary to register all classes - val kryo = instantiator.newKryo() - val buffer = new ByteArrayOutputStream() - val output = new Output(buffer) - kryo.writeObject(output, data) - output.close() - buffer.toByteArray - } catch { - case exception: Exception => throw SerDeException("Failed to serialize data.", exception) - } - } - - /** - * - * @param byteArray Kryo serialized data. Expects only results of `writeObject` - * @param toClass Class to which the `byteArray` will be deserialized - * @return The deserialized object as an instance of A - */ - override def deserialize[A <: AnyRef](byteArray: Array[Byte], toClass: Class[A]): A = { - try { - val instantiator = new ScalaKryoInstantiator - instantiator.setRegistrationRequired(false) - val kryo = instantiator.newKryo() - val buffer = new ByteArrayInputStream(byteArray) - val input = new Input(buffer) - val result = kryo.readObject(input, toClass) - input.close() - result - } catch { - case exception: Exception => throw SerDeException("Failed to deserialize data.", exception) - } - } - -} diff --git a/supportedBackends/htcondor/src/main/scala/cromwell/backend/impl/htcondor/caching/provider/mongodb/serialization/SerDe.scala b/supportedBackends/htcondor/src/main/scala/cromwell/backend/impl/htcondor/caching/provider/mongodb/serialization/SerDe.scala deleted file mode 100644 index 7ab8883d0..000000000 --- a/supportedBackends/htcondor/src/main/scala/cromwell/backend/impl/htcondor/caching/provider/mongodb/serialization/SerDe.scala +++ /dev/null @@ -1,25 +0,0 @@ -package cromwell.backend.impl.htcondor.caching.provider.mongodb.serialization - -/** - * A vanilla Serialization / Deserialization interface - */ -trait SerDe { - - case class SerDeException(message: String, error: Throwable) extends IllegalStateException(message, error) - - /** - * Serialize the given data - * @param data Any Scala / Java object that needs to be serialized - * @return A serialized byte array that can be transported across Network moved around with - */ - def serialize[A <: AnyRef](data: A): Array[Byte] - - /** - * Deserializes an array of Bytes to the given class - * @param byteArray Kryo serialized data. Expects only results of `writeObject` - * @param toClass Class to which the `byteArray` will be deserialized - * @return The deserialized object as an instance of A - */ - def deserialize[A <: AnyRef](byteArray: Array[Byte], toClass: Class[A]): A - -} diff --git a/supportedBackends/htcondor/src/test/scala/cromwell/backend/impl/htcondor/HtCondorCommandSpec.scala b/supportedBackends/htcondor/src/test/scala/cromwell/backend/impl/htcondor/HtCondorCommandSpec.scala deleted file mode 100644 index ce1097f4e..000000000 --- a/supportedBackends/htcondor/src/test/scala/cromwell/backend/impl/htcondor/HtCondorCommandSpec.scala +++ /dev/null @@ -1,29 +0,0 @@ -package cromwell.backend.impl.htcondor - -import better.files._ - -import org.scalatest.{Matchers, WordSpecLike} - -class HtCondorCommandSpec extends WordSpecLike with Matchers { - val attributes = Map("executable" -> "test.sh", "input" -> "/temp/test", "error"->"stderr") - val resultAttributes = List("executable=test.sh","input=/temp/test","error=stderr", "queue") - val htCondorCommands = new HtCondorCommands - - "submitCommand method" should { - "return submit file with content passed to it" in { - val dir = File.newTemporaryFile() - val command = htCondorCommands.generateSubmitFile(dir.path,attributes) - val file = dir - resultAttributes shouldEqual dir.lines.toList - dir.delete() - command shouldEqual s"condor_submit ${file.path}" - } - } - - "statusCommand method" should { - "return status command" in { - val command = HtCondorCommands.generateJobStatusCommand("96.0") - command shouldEqual s"condor_q 96.0 -autoformat JobStatus" - } - } -} \ No newline at end of file diff --git a/supportedBackends/htcondor/src/test/scala/cromwell/backend/impl/htcondor/HtCondorInitializationActorSpec.scala b/supportedBackends/htcondor/src/test/scala/cromwell/backend/impl/htcondor/HtCondorInitializationActorSpec.scala deleted file mode 100644 index 45da26644..000000000 --- a/supportedBackends/htcondor/src/test/scala/cromwell/backend/impl/htcondor/HtCondorInitializationActorSpec.scala +++ /dev/null @@ -1,53 +0,0 @@ -package cromwell.backend.impl.htcondor - -import akka.testkit.{EventFilter, ImplicitSender, TestDuration} -import cromwell.backend.BackendWorkflowInitializationActor.Initialize -import cromwell.backend.{BackendConfigurationDescriptor, BackendSpec, BackendWorkflowDescriptor} -import cromwell.core.TestKitSuite -import org.scalatest.{Matchers, WordSpecLike} -import wdl4s.Call - -import scala.concurrent.duration._ - -class HtCondorInitializationActorSpec extends TestKitSuite("HtCondorInitializationActorSpec") with WordSpecLike - with Matchers with ImplicitSender { - val Timeout = 5.second.dilated - - import BackendSpec._ - - val HelloWorld = - """ - |task hello { - | String addressee = "you" - | command { - | echo "Hello ${addressee}!" - | } - | output { - | String salutation = read_string(stdout()) - | } - | - | RUNTIME - |} - | - |workflow hello { - | call hello - |} - """.stripMargin - - private def getHtCondorBackend(workflowDescriptor: BackendWorkflowDescriptor, calls: Seq[Call], conf: BackendConfigurationDescriptor) = { - system.actorOf(HtCondorInitializationActor.props(workflowDescriptor, calls, conf, emptyActor)) - } - - "HtCondorInitializationActor" should { - "log a warning message when there are unsupported runtime attributes" in { - within(Timeout) { - EventFilter.warning(message = s"Key/s [proc] is/are not supported by HtCondorBackend. Unsupported attributes will not be part of jobs executions.", occurrences = 1) intercept { - val workflowDescriptor = buildWorkflowDescriptor(HelloWorld, runtime = """runtime { proc: 1 }""") - val backend = getHtCondorBackend(workflowDescriptor, workflowDescriptor.workflowNamespace.workflow.calls, - emptyBackendConfig) - backend ! Initialize - } - } - } - } -} diff --git a/supportedBackends/htcondor/src/test/scala/cromwell/backend/impl/htcondor/HtCondorJobExecutionActorSpec.scala b/supportedBackends/htcondor/src/test/scala/cromwell/backend/impl/htcondor/HtCondorJobExecutionActorSpec.scala deleted file mode 100644 index e44e66b4a..000000000 --- a/supportedBackends/htcondor/src/test/scala/cromwell/backend/impl/htcondor/HtCondorJobExecutionActorSpec.scala +++ /dev/null @@ -1,466 +0,0 @@ -package cromwell.backend.impl.htcondor - -import java.io.Writer -import java.nio.file.{Files, Path} - -import akka.actor.{Actor, Props} -import akka.testkit.{ImplicitSender, TestActorRef} -import better.files._ -import com.typesafe.config.ConfigFactory -import cromwell.backend.BackendJobExecutionActor.{FailedNonRetryableResponse, SucceededResponse} -import cromwell.backend.impl.htcondor.caching.CacheActor -import cromwell.backend.impl.htcondor.caching.exception.CachedResultNotFoundException -import cromwell.backend.impl.htcondor.caching.model.CachedExecutionResult -import cromwell.backend.io.JobPaths -import cromwell.backend.{BackendConfigurationDescriptor, BackendJobDescriptor, BackendSpec} -import cromwell.core._ -import cromwell.services.keyvalue.KeyValueServiceActor.{KvGet, KvPair, KvPut} -import org.mockito.Matchers._ -import org.mockito.Mockito -import org.mockito.Mockito._ -import org.scalatest.concurrent.PatienceConfiguration.Timeout -import org.scalatest.mockito.MockitoSugar -import org.scalatest.{BeforeAndAfter, Matchers, WordSpecLike} -import wdl4s.types.{WdlArrayType, WdlFileType} -import wdl4s.values.{WdlArray, WdlFile, WdlValue} - -import scala.concurrent.duration._ -import scala.io.Source -import scala.sys.process.{Process, ProcessLogger} - -class HtCondorJobExecutionActorSpec extends TestKitSuite("HtCondorJobExecutionActorSpec") - with WordSpecLike - with Matchers - with MockitoSugar - with BeforeAndAfter - with ImplicitSender { - - import BackendSpec._ - - private val htCondorCommands: HtCondorCommands = new HtCondorCommands - private val htCondorProcess: HtCondorProcess = mock[HtCondorProcess] - private val cacheActorMockProps = Props(new CacheActorMock()) - - private val helloWorldWdl = - """ - |task hello { - | - | command { - | echo "Hello World!" - | } - | output { - | String salutation = read_string(stdout()) - | } - | RUNTIME - |} - | - |workflow hello { - | call hello - |} - """.stripMargin - - private val helloWorldWdlWithFileInput = - """ - |task hello { - | File inputFile - | - | command { - | echo ${inputFile} - | } - | output { - | String salutation = read_string(stdout()) - | } - | RUNTIME - |} - | - |workflow hello { - | call hello - |} - """.stripMargin - - private val helloWorldWdlWithFileArrayInput = - """ - |task hello { - | Array[File] inputFiles - | - | command { - | echo ${sep=' ' inputFiles} - | } - | output { - | String salutation = read_string(stdout()) - | } - | RUNTIME - |} - | - |workflow hello { - | call hello - |} - """.stripMargin - - private val backendConfig = ConfigFactory.parseString( - s"""{ - | root = "local-cromwell-executions" - | - | docker { - | cmd = "docker run -w %s %s %s --rm %s %s" - | } - | - | filesystems { - | local { - | localization = [ - | "hard-link", "soft-link", "copy" - | ] - | } - | } - | poll-interval = 3 - |} - """.stripMargin) - - private val timeout = Timeout(1.seconds) - - after { - Mockito.reset(htCondorProcess) - } - - "executeTask method" should { - "return succeeded task status with stdout" in { - val jobDescriptor = prepareJob() - val (job, jobPaths, backendConfigDesc) = (jobDescriptor.jobDescriptor, jobDescriptor.jobPaths, jobDescriptor.backendConfigurationDescriptor) - val stubProcess = mock[Process] - val stubUntailed = new UntailedWriter(jobPaths.stdout) with MockPathWriter - val stubTailed = new TailedWriter(jobPaths.stderr, 100) with MockPathWriter - val stderrResult = "" - - when(htCondorProcess.commandList(any[String])).thenReturn(Seq.empty[String]) - when(htCondorProcess.externalProcess(any[Seq[String]], any[ProcessLogger])).thenReturn(stubProcess) - when(stubProcess.exitValue()).thenReturn(0) - when(htCondorProcess.tailedWriter(any[Int], any[Path])).thenReturn(stubTailed) - when(htCondorProcess.untailedWriter(any[Path])).thenReturn(stubUntailed) - when(htCondorProcess.processStderr).thenReturn(stderrResult) - when(htCondorProcess.jobReturnCode(any[String], any[Path])).thenReturn(Option(0)) - - val backend = TestActorRef(new HtCondorJobExecutionActor(job, backendConfigDesc, system.deadLetters, None) { - override lazy val cmds = htCondorCommands - override lazy val extProcess = htCondorProcess - }).underlyingActor - - whenReady(backend.execute, timeout) { response => - response shouldBe a[SucceededResponse] - verify(htCondorProcess, times(1)).externalProcess(any[Seq[String]], any[ProcessLogger]) - verify(htCondorProcess, times(1)).tailedWriter(any[Int], any[Path]) - verify(htCondorProcess, times(1)).untailedWriter(any[Path]) - } - - cleanUpJob(jobPaths) - } - - "return succeeded task status when it recovers from a shutdown" in { - val jobDescriptor = prepareJob() - val (job, jobPaths, backendConfigDesc) = (jobDescriptor.jobDescriptor, jobDescriptor.jobPaths, jobDescriptor.backendConfigurationDescriptor) - val stubProcess = mock[Process] - val stubUntailed = new UntailedWriter(jobPaths.stdout) with MockPathWriter - val stubTailed = new TailedWriter(jobPaths.stderr, 100) with MockPathWriter - val stderrResult = "" - val kVServiceActor = system.actorOf(Props(new KVServiceActor())) - - when(htCondorProcess.commandList(any[String])).thenReturn(Seq.empty[String]) - when(htCondorProcess.externalProcess(any[Seq[String]], any[ProcessLogger])).thenReturn(stubProcess) - when(stubProcess.exitValue()).thenReturn(0) - when(htCondorProcess.tailedWriter(any[Int], any[Path])).thenReturn(stubTailed) - when(htCondorProcess.untailedWriter(any[Path])).thenReturn(stubUntailed) - when(htCondorProcess.processStderr).thenReturn(stderrResult) - when(htCondorProcess.jobReturnCode(any[String], any[Path])).thenReturn(Option(0)) - - val backend = TestActorRef(new HtCondorJobExecutionActor(job, backendConfigDesc, kVServiceActor, None) { - override lazy val cmds = htCondorCommands - override lazy val extProcess = htCondorProcess - }).underlyingActor - - whenReady(backend.recover, timeout) { response => - response shouldBe a[SucceededResponse] - } - - cleanUpJob(jobPaths) - } - - "return succeeded task status with stdout when cache is enabled" in { - val jobDescriptor = prepareJob() - val (job, jobPaths, backendConfigDesc) = (jobDescriptor.jobDescriptor, jobDescriptor.jobPaths, jobDescriptor.backendConfigurationDescriptor) - val stubProcess = mock[Process] - val stubUntailed = new UntailedWriter(jobPaths.stdout) with MockPathWriter - val stubTailed = new TailedWriter(jobPaths.stderr, 100) with MockPathWriter - val stderrResult = "" - - when(htCondorProcess.commandList(any[String])).thenReturn(Seq.empty[String]) - when(htCondorProcess.externalProcess(any[Seq[String]], any[ProcessLogger])).thenReturn(stubProcess) - when(stubProcess.exitValue()).thenReturn(0) - when(htCondorProcess.tailedWriter(any[Int], any[Path])).thenReturn(stubTailed) - when(htCondorProcess.untailedWriter(any[Path])).thenReturn(stubUntailed) - when(htCondorProcess.processStderr).thenReturn(stderrResult) - when(htCondorProcess.jobReturnCode(any[String], any[Path])).thenReturn(Option(0)) - - val backend = TestActorRef(new HtCondorJobExecutionActor(job, backendConfigDesc, system.deadLetters, Some(cacheActorMockProps)) { - override lazy val cmds = htCondorCommands - override lazy val extProcess = htCondorProcess - }).underlyingActor - - whenReady(backend.execute, timeout) { response => - response shouldBe a[SucceededResponse] - verify(htCondorProcess, times(1)).externalProcess(any[Seq[String]], any[ProcessLogger]) - verify(htCondorProcess, times(1)).tailedWriter(any[Int], any[Path]) - verify(htCondorProcess, times(1)).untailedWriter(any[Path]) - } - - cleanUpJob(jobPaths) - } - - "return failed task status with stderr on non-zero process exit" in { - val jobDescriptor = prepareJob() - val (job, jobPaths, backendConfigDesc) = (jobDescriptor.jobDescriptor, jobDescriptor.jobPaths, jobDescriptor.backendConfigurationDescriptor) - - val backend = TestActorRef(new HtCondorJobExecutionActor(job, backendConfigDesc, system.deadLetters, Some(cacheActorMockProps)) { - override lazy val cmds = htCondorCommands - override lazy val extProcess = htCondorProcess - }).underlyingActor - val stubProcess = mock[Process] - val stubUntailed = new UntailedWriter(jobPaths.stdout) with MockPathWriter - val stubTailed = new TailedWriter(jobPaths.stderr, 100) with MockPathWriter - val stderrResult = "" - - when(htCondorProcess.externalProcess(any[Seq[String]], any[ProcessLogger])).thenReturn(stubProcess) - when(stubProcess.exitValue()).thenReturn(0) - when(htCondorProcess.tailedWriter(any[Int], any[Path])).thenReturn(stubTailed) - when(htCondorProcess.untailedWriter(any[Path])).thenReturn(stubUntailed) - when(htCondorProcess.processStderr).thenReturn(stderrResult) - when(htCondorProcess.jobReturnCode(any[String], any[Path])).thenReturn(Option(-1)) - - whenReady(backend.execute, timeout) { response => - response shouldBe a[FailedNonRetryableResponse] - assert(response.asInstanceOf[FailedNonRetryableResponse].throwable.getMessage.contains("Job exited with invalid return code")) - } - - cleanUpJob(jobPaths) - } - - "return a successful task status even with a non-zero process exit" in { - val runtime = - """ - |runtime { - | continueOnReturnCode: [911] - |} - """.stripMargin - val jobDescriptor = prepareJob(runtimeString = runtime) - val (job, jobPaths, backendConfigDesc) = (jobDescriptor.jobDescriptor, jobDescriptor.jobPaths, jobDescriptor.backendConfigurationDescriptor) - - val backend = TestActorRef(new HtCondorJobExecutionActor(job, backendConfigDesc, system.deadLetters, Some(cacheActorMockProps)) { - override lazy val cmds = htCondorCommands - override lazy val extProcess = htCondorProcess - }).underlyingActor - val stubProcess = mock[Process] - val stubUntailed = new UntailedWriter(jobPaths.stdout) with MockPathWriter - val stubTailed = new TailedWriter(jobPaths.stderr, 100) with MockPathWriter - val stderrResult = "" - - when(htCondorProcess.externalProcess(any[Seq[String]], any[ProcessLogger])).thenReturn(stubProcess) - when(stubProcess.exitValue()).thenReturn(0) - when(htCondorProcess.tailedWriter(any[Int], any[Path])).thenReturn(stubTailed) - when(htCondorProcess.untailedWriter(any[Path])).thenReturn(stubUntailed) - when(htCondorProcess.processStderr).thenReturn(stderrResult) - when(htCondorProcess.jobReturnCode(any[String], any[Path])).thenReturn(Option(911)) - - whenReady(backend.execute, timeout) { response => - response shouldBe a[SucceededResponse] - } - - cleanUpJob(jobPaths) - } - - "return a successful task status when it runs a docker command with working and output directory" in { - val runtime = - """ - |runtime { - | docker: "ubuntu/latest" - | dockerWorkingDir: "/workingDir" - | dockerOutputDir: "/outputDir" - |} - """.stripMargin - val jsonInputFile = createCannedFile("testFile", "some content").pathAsString - val inputs = Map( - "inputFile" -> WdlFile(jsonInputFile) - ) - val jobDescriptor = prepareJob(helloWorldWdlWithFileInput, runtime, Option(inputs)) - val (job, jobPaths, backendConfigDesc) = (jobDescriptor.jobDescriptor, jobDescriptor.jobPaths, jobDescriptor.backendConfigurationDescriptor) - - val backend = TestActorRef(new HtCondorJobExecutionActor(job, backendConfigDesc, system.deadLetters, Some(cacheActorMockProps)) { - override lazy val cmds = htCondorCommands - override lazy val extProcess = htCondorProcess - }).underlyingActor - val stubProcess = mock[Process] - val stubUntailed = new UntailedWriter(jobPaths.stdout) with MockPathWriter - val stubTailed = new TailedWriter(jobPaths.stderr, 100) with MockPathWriter - val stderrResult = "" - - when(htCondorProcess.externalProcess(any[Seq[String]], any[ProcessLogger])).thenReturn(stubProcess) - when(stubProcess.exitValue()).thenReturn(0) - when(htCondorProcess.tailedWriter(any[Int], any[Path])).thenReturn(stubTailed) - when(htCondorProcess.untailedWriter(any[Path])).thenReturn(stubUntailed) - when(htCondorProcess.processStderr).thenReturn(stderrResult) - when(htCondorProcess.jobReturnCode(any[String], any[Path])).thenReturn(Option(0)) - - whenReady(backend.execute) { response => - response shouldBe a[SucceededResponse] - } - - val bashScript = Source.fromFile(jobPaths.script.toFile).getLines.mkString - - assert(bashScript.contains("docker run -w /workingDir -v")) - assert(bashScript.contains(":ro")) - assert(bashScript.contains("/call-hello/execution:/outputDir --rm ubuntu/latest echo")) - - cleanUpJob(jobPaths) - } - - "return failed when cmds fails to write script" in { - val htCondorCommandsMock: HtCondorCommands = mock[HtCondorCommands] - val jobDescriptor = prepareJob() - val (job, jobPaths, backendConfigDesc) = (jobDescriptor.jobDescriptor, jobDescriptor.jobPaths, jobDescriptor.backendConfigurationDescriptor) - - val backend = TestActorRef(new HtCondorJobExecutionActor(job, backendConfigDesc, system.deadLetters, Some(cacheActorMockProps)) { - override lazy val cmds = htCondorCommandsMock - override lazy val extProcess = htCondorProcess - }).underlyingActor - val stubProcess = mock[Process] - val stubUntailed = new UntailedWriter(jobPaths.stdout) with MockPathWriter - val stubTailed = new TailedWriter(jobPaths.stderr, 100) with MockPathWriter - val stderrResult = "" - - when(htCondorCommandsMock.writeScript(any[String], any[Path], any[Path])).thenThrow(new IllegalStateException("Could not write the file.")) - when(htCondorProcess.externalProcess(any[Seq[String]], any[ProcessLogger])).thenReturn(stubProcess) - when(stubProcess.exitValue()).thenReturn(0) - when(htCondorProcess.tailedWriter(any[Int], any[Path])).thenReturn(stubTailed) - when(htCondorProcess.untailedWriter(any[Path])).thenReturn(stubUntailed) - when(htCondorProcess.processStderr).thenReturn(stderrResult) - when(htCondorProcess.jobReturnCode(any[String], any[Path])).thenReturn(Option(-1)) - - whenReady(backend.execute, timeout) { response => - response shouldBe a[FailedNonRetryableResponse] - assert(response.asInstanceOf[FailedNonRetryableResponse].throwable.getMessage.contains("Could not write the file.")) - } - - cleanUpJob(jobPaths) - } - } - - "return a successful task status when it tries to run a docker command containing file data from a WDL file array" in { - val runtime = - """ - |runtime { - | docker: "ubuntu/latest" - | dockerWorkingDir: "/workingDir" - | dockerOutputDir: "/outputDir" - |} - """.stripMargin - - val tempDir1 = Files.createTempDirectory("dir1") - val tempDir2 = Files.createTempDirectory("dir2") - val jsonInputFile = - createCannedFile(prefix = "testFile", contents = "some content", dir = Some(tempDir1)).pathAsString - val jsonInputFile2 = - createCannedFile(prefix = "testFile2", contents = "some other content", dir = Some(tempDir2)).pathAsString - - val inputs = Map( - "inputFiles" -> WdlArray(WdlArrayType(WdlFileType), Seq(WdlFile(jsonInputFile), WdlFile(jsonInputFile2))) - ) - val jobDescriptor = prepareJob(helloWorldWdlWithFileArrayInput, runtime, Option(inputs)) - val (job, jobPaths, backendConfigDesc) = (jobDescriptor.jobDescriptor, jobDescriptor.jobPaths, jobDescriptor.backendConfigurationDescriptor) - - val backend = TestActorRef(new HtCondorJobExecutionActor(job, backendConfigDesc, system.deadLetters, Some(cacheActorMockProps)) { - override lazy val cmds = htCondorCommands - override lazy val extProcess = htCondorProcess - }).underlyingActor - val stubProcess = mock[Process] - val stubUntailed = new UntailedWriter(jobPaths.stdout) with MockPathWriter - val stubTailed = new TailedWriter(jobPaths.stderr, 100) with MockPathWriter - val stderrResult = "" - - when(htCondorProcess.externalProcess(any[Seq[String]], any[ProcessLogger])).thenReturn(stubProcess) - when(stubProcess.exitValue()).thenReturn(0) - when(htCondorProcess.tailedWriter(any[Int], any[Path])).thenReturn(stubTailed) - when(htCondorProcess.untailedWriter(any[Path])).thenReturn(stubUntailed) - when(htCondorProcess.processStderr).thenReturn(stderrResult) - when(htCondorProcess.jobReturnCode(any[String], any[Path])).thenReturn(Option(0)) - - whenReady(backend.execute) { response => - response shouldBe a[SucceededResponse] - } - - val bashScript = Source.fromFile(jobPaths.script.toFile).getLines.mkString - - assert(bashScript.contains("docker run -w /workingDir -v")) - assert(bashScript.contains(tempDir1.toAbsolutePath.toString)) - assert(bashScript.contains(tempDir2.toAbsolutePath.toString)) - assert(bashScript.contains("/call-hello/execution:/outputDir --rm ubuntu/latest echo")) - - cleanUpJob(jobPaths) - } - - private def cleanUpJob(jobPaths: JobPaths): Unit = File(jobPaths.workflowRoot).delete(true) - - private def createCannedFile(prefix: String, contents: String, dir: Option[Path] = None): File = { - val suffix = ".out" - val file = File.newTemporaryFile(prefix, suffix, dir.map(File.apply)) - file.write(contents) - } - - val emptyWorkflowOptions = WorkflowOptions.fromMap(Map.empty).get - - private def prepareJob(source: String = helloWorldWdl, runtimeString: String = "", inputFiles: Option[Map[String, WdlValue]] = None): TestJobDescriptor = { - val backendWorkflowDescriptor = buildWorkflowDescriptor(wdl = source, inputs = inputFiles.getOrElse(Map.empty), runtime = runtimeString) - val backendConfigurationDescriptor = BackendConfigurationDescriptor(backendConfig, ConfigFactory.load) - val jobDesc = jobDescriptorFromSingleCallWorkflow(backendWorkflowDescriptor, inputFiles.getOrElse(Map.empty), emptyWorkflowOptions, Set.empty) - val jobPaths = new JobPaths(backendWorkflowDescriptor, backendConfig, jobDesc.key) - val executionDir = File(jobPaths.callExecutionRoot) - val stdout = File(executionDir.pathAsString, "stdout") - stdout.createIfNotExists(asDirectory = false, createParents = true) - val submitFileStderr = executionDir./("submitfile.stderr") - val submitFileStdout = executionDir./("submitfile.stdout") - submitFileStdout.createIfNotExists(asDirectory = false, createParents = true) - submitFileStdout << - """Submitting job(s).. - |1 job(s) submitted to cluster 88. - """.stripMargin.trim - submitFileStderr.createIfNotExists(asDirectory = false, createParents = true) - TestJobDescriptor(jobDesc, jobPaths, backendConfigurationDescriptor) - } - - private case class TestJobDescriptor(jobDescriptor: BackendJobDescriptor, jobPaths: JobPaths, backendConfigurationDescriptor: BackendConfigurationDescriptor) - - trait MockWriter extends Writer { - var closed = false - - override def close() = closed = true - - override def flush() = {} - - override def write(a: Array[Char], b: Int, c: Int) = {} - } - - trait MockPathWriter extends PathWriter { - override lazy val writer: Writer = new MockWriter {} - override val path: Path = mock[Path] - } - - class CacheActorMock extends CacheActor { - override def readExecutionResult(hash: String): CachedExecutionResult = throw new CachedResultNotFoundException("Entry not found.") - - override def storeExecutionResult(cachedExecutionResult: CachedExecutionResult): Unit = () - } - - class KVServiceActor extends Actor { - override def receive: Receive = { - case KvPut => // Do nothing - case KvGet(kvKey) => sender ! KvPair(kvKey, Option("123")) - } - } - -} diff --git a/supportedBackends/htcondor/src/test/scala/cromwell/backend/impl/htcondor/HtCondorRuntimeAttributesSpec.scala b/supportedBackends/htcondor/src/test/scala/cromwell/backend/impl/htcondor/HtCondorRuntimeAttributesSpec.scala deleted file mode 100644 index a411a7aef..000000000 --- a/supportedBackends/htcondor/src/test/scala/cromwell/backend/impl/htcondor/HtCondorRuntimeAttributesSpec.scala +++ /dev/null @@ -1,274 +0,0 @@ -package cromwell.backend.impl.htcondor - -import cromwell.backend.{BackendSpec, MemorySize} -import cromwell.backend.validation.ContinueOnReturnCodeSet -import cromwell.backend.validation.RuntimeAttributesKeys._ -import cromwell.core.WorkflowOptions -import org.scalatest.{Matchers, WordSpecLike} -import spray.json._ -import wdl4s.WdlExpression._ -import wdl4s._ -import wdl4s.expression.NoFunctions -import wdl4s.util.TryUtil -import wdl4s.values.WdlValue - -class HtCondorRuntimeAttributesSpec extends WordSpecLike with Matchers { - - import BackendSpec._ - - val HelloWorld = - """ - |task hello { - | String addressee = "you" - | command { - | echo "Hello ${addressee}!" - | } - | output { - | String salutation = read_string(stdout()) - | } - | - | RUNTIME - |} - | - |workflow hello { - | call hello - |} - """.stripMargin - - val emptyWorkflowOptions = WorkflowOptions(JsObject(Map.empty[String, JsValue])) - - val memorySize = MemorySize.parse("0.512 GB").get - val diskSize = MemorySize.parse("1.024 GB").get - val staticDefaults = new HtCondorRuntimeAttributes(ContinueOnReturnCodeSet(Set(0)), None, None, None, false, 1, memorySize, diskSize) - - def workflowOptionsWithDefaultRA(defaults: Map[String, JsValue]) = { - WorkflowOptions(JsObject(Map( - "default_runtime_attributes" -> JsObject(defaults) - ))) - } - - "HtCondorRuntimeAttributes" should { - "return an instance of itself when there are no runtime attributes defined." in { - val runtimeAttributes = createRuntimeAttributes(HelloWorld, """runtime { }""").head - assertHtCondorRuntimeAttributesSuccessfulCreation(runtimeAttributes, emptyWorkflowOptions, staticDefaults) - } - - "return an instance of itself when tries to validate a valid Docker entry" in { - val expectedRuntimeAttributes = staticDefaults.copy(dockerImage = Option("ubuntu:latest")) - val runtimeAttributes = createRuntimeAttributes(HelloWorld, """runtime { docker: "ubuntu:latest" }""").head - assertHtCondorRuntimeAttributesSuccessfulCreation(runtimeAttributes, emptyWorkflowOptions, expectedRuntimeAttributes) - } - - "return an instance of itself when tries to validate a valid Docker entry based on input" in { - val expectedRuntimeAttributes = staticDefaults.copy(dockerImage = Option("you")) - val runtimeAttributes = createRuntimeAttributes(HelloWorld, """runtime { docker: "\${addressee}" }""").head - assertHtCondorRuntimeAttributesSuccessfulCreation(runtimeAttributes, emptyWorkflowOptions, expectedRuntimeAttributes) - } - - "use workflow options as default if docker key is missing" in { - val runtimeAttributes = createRuntimeAttributes(HelloWorld, """runtime { }""").head - val workflowOptions = workflowOptionsWithDefaultRA(Map(DockerKey -> JsString("ubuntu:latest"))) - assertHtCondorRuntimeAttributesSuccessfulCreation(runtimeAttributes, workflowOptions, staticDefaults.copy(dockerImage = Some("ubuntu:latest"))) - } - - "throw an exception when tries to validate an invalid Docker entry" in { - val runtimeAttributes = createRuntimeAttributes(HelloWorld, """runtime { docker: 1 }""").head - assertHtCondorRuntimeAttributesFailedCreation(runtimeAttributes, "Expecting docker runtime attribute to be a String") - } - - "return an instance of itself when tries to validate a valid docker working directory entry" in { - val expectedRuntimeAttributes = staticDefaults.copy(dockerWorkingDir = Option("/workingDir")) - val runtimeAttributes = createRuntimeAttributes(HelloWorld, """runtime { dockerWorkingDir: "/workingDir" }""").head - assertHtCondorRuntimeAttributesSuccessfulCreation(runtimeAttributes, emptyWorkflowOptions, expectedRuntimeAttributes) - } - - "return an instance of itself when tries to validate a valid docker working directory entry based on input" in { - val expectedRuntimeAttributes = staticDefaults.copy(dockerWorkingDir = Option("you")) - val runtimeAttributes = createRuntimeAttributes(HelloWorld, """runtime { dockerWorkingDir: "\${addressee}" }""").head - assertHtCondorRuntimeAttributesSuccessfulCreation(runtimeAttributes, emptyWorkflowOptions, expectedRuntimeAttributes) - } - - "use workflow options as default if docker working directory key is missing" in { - val runtimeAttributes = createRuntimeAttributes(HelloWorld, """runtime { }""").head - val workflowOptions = workflowOptionsWithDefaultRA(Map("dockerWorkingDir" -> JsString("/workingDir"))) - assertHtCondorRuntimeAttributesSuccessfulCreation(runtimeAttributes, workflowOptions, staticDefaults.copy(dockerWorkingDir = Some("/workingDir"))) - } - - "throw an exception when tries to validate an invalid docker working directory entry" in { - val runtimeAttributes = createRuntimeAttributes(HelloWorld, """runtime { dockerWorkingDir: 1 }""").head - assertHtCondorRuntimeAttributesFailedCreation(runtimeAttributes, "Expecting dockerWorkingDir runtime attribute to be a String") - } - - "return an instance of itself when tries to validate a valid docker output directory entry" in { - val expectedRuntimeAttributes = staticDefaults.copy(dockerOutputDir = Option("/outputDir")) - val runtimeAttributes = createRuntimeAttributes(HelloWorld, """runtime { dockerOutputDir: "/outputDir" }""").head - assertHtCondorRuntimeAttributesSuccessfulCreation(runtimeAttributes, emptyWorkflowOptions, expectedRuntimeAttributes) - } - - "return an instance of itself when tries to validate a valid docker output directory entry based on input" in { - val expectedRuntimeAttributes = staticDefaults.copy(dockerOutputDir = Option("you")) - val runtimeAttributes = createRuntimeAttributes(HelloWorld, """runtime { dockerOutputDir: "\${addressee}" }""").head - assertHtCondorRuntimeAttributesSuccessfulCreation(runtimeAttributes, emptyWorkflowOptions, expectedRuntimeAttributes) - } - - "use workflow options as default if docker output directory key is missing" in { - val runtimeAttributes = createRuntimeAttributes(HelloWorld, """runtime { }""").head - val workflowOptions = workflowOptionsWithDefaultRA(Map("dockerOutputDir" -> JsString("/outputDir"))) - assertHtCondorRuntimeAttributesSuccessfulCreation(runtimeAttributes, workflowOptions, staticDefaults.copy(dockerOutputDir = Some("/outputDir"))) - } - - "throw an exception when tries to validate an invalid docker output directory entry" in { - val runtimeAttributes = createRuntimeAttributes(HelloWorld, """runtime { dockerOutputDir: 1 }""").head - assertHtCondorRuntimeAttributesFailedCreation(runtimeAttributes, "Expecting dockerOutputDir runtime attribute to be a String") - } - - "return an instance of itself when tries to validate a valid failOnStderr entry" in { - val expectedRuntimeAttributes = staticDefaults.copy(failOnStderr = true) - val runtimeAttributes = createRuntimeAttributes(HelloWorld, """runtime { failOnStderr: "true" }""").head - val shouldBeIgnored = workflowOptionsWithDefaultRA(Map(FailOnStderrKey -> JsBoolean(false))) - assertHtCondorRuntimeAttributesSuccessfulCreation(runtimeAttributes, shouldBeIgnored, expectedRuntimeAttributes) - } - - "throw an exception when tries to validate an invalid failOnStderr entry" in { - val runtimeAttributes = createRuntimeAttributes(HelloWorld, """runtime { failOnStderr: "yes" }""").head - assertHtCondorRuntimeAttributesFailedCreation(runtimeAttributes, "Expecting failOnStderr runtime attribute to be a Boolean or a String with values of 'true' or 'false'") - } - - "use workflow options as default if failOnStdErr key is missing" in { - val runtimeAttributes = createRuntimeAttributes(HelloWorld, """runtime { }""").head - val workflowOptions = workflowOptionsWithDefaultRA(Map(FailOnStderrKey -> JsBoolean(true))) - assertHtCondorRuntimeAttributesSuccessfulCreation(runtimeAttributes, workflowOptions, staticDefaults.copy(failOnStderr = true)) - } - - "return an instance of itself when tries to validate a valid continueOnReturnCode entry" in { - val expectedRuntimeAttributes = staticDefaults.copy(continueOnReturnCode = ContinueOnReturnCodeSet(Set(1))) - val runtimeAttributes = createRuntimeAttributes(HelloWorld, """runtime { continueOnReturnCode: 1 }""").head - val shouldBeIgnored = workflowOptionsWithDefaultRA(Map(ContinueOnReturnCodeKey -> JsBoolean(false))) - assertHtCondorRuntimeAttributesSuccessfulCreation(runtimeAttributes, shouldBeIgnored, expectedRuntimeAttributes) - } - - "throw an exception when tries to validate an invalid continueOnReturnCode entry" in { - val runtimeAttributes = createRuntimeAttributes(HelloWorld, """runtime { continueOnReturnCode: "value" }""").head - assertHtCondorRuntimeAttributesFailedCreation(runtimeAttributes, "Expecting continueOnReturnCode runtime attribute to be either a Boolean, a String 'true' or 'false', or an Array[Int]") - } - - "use workflow options as default if continueOnReturnCode key is missing" in { - val runtimeAttributes = createRuntimeAttributes(HelloWorld, """runtime { }""").head - val workflowOptions = workflowOptionsWithDefaultRA(Map(ContinueOnReturnCodeKey -> JsArray(Vector(JsNumber(1), JsNumber(2))))) - assertHtCondorRuntimeAttributesSuccessfulCreation(runtimeAttributes, workflowOptions, staticDefaults.copy(continueOnReturnCode = ContinueOnReturnCodeSet(Set(1, 2)))) - } - - "return an instance of itself when tries to validate a valid cpu entry" in { - val expectedRuntimeAttributes = staticDefaults.copy(cpu = 2) - val runtimeAttributes = createRuntimeAttributes(HelloWorld, """runtime { cpu: 2 }""").head - val shouldBeIgnored = workflowOptionsWithDefaultRA(Map(CpuKey -> JsString("6"))) - assertHtCondorRuntimeAttributesSuccessfulCreation(runtimeAttributes, shouldBeIgnored, expectedRuntimeAttributes) - } - - "throw an exception when tries to validate an invalid cpu entry" in { - val runtimeAttributes = createRuntimeAttributes(HelloWorld, """runtime { cpu: "value" }""").head - assertHtCondorRuntimeAttributesFailedCreation(runtimeAttributes, "Expecting cpu runtime attribute to be an Integer") - } - - "use workflow options as default if cpu key is missing" in { - val runtimeAttributes = createRuntimeAttributes(HelloWorld, """runtime { }""").head - val expectedRuntimeAttributes = staticDefaults.copy(cpu = 6) - val workflowOptions = workflowOptionsWithDefaultRA(Map(CpuKey -> JsString("6"))) - val workflowOptions2 = workflowOptionsWithDefaultRA(Map(CpuKey -> JsNumber("6"))) - assertHtCondorRuntimeAttributesSuccessfulCreation(runtimeAttributes, workflowOptions, expectedRuntimeAttributes) - assertHtCondorRuntimeAttributesSuccessfulCreation(runtimeAttributes, workflowOptions2, expectedRuntimeAttributes) - } - - "use default cpu value when there is no cpu key entry" in { - val expectedRuntimeAttributes = staticDefaults.copy(cpu = 1) - val runtimeAttributes = createRuntimeAttributes(HelloWorld, """runtime { }""").head - val shouldBeIgnored = workflowOptionsWithDefaultRA(Map()) - assertHtCondorRuntimeAttributesSuccessfulCreation(runtimeAttributes, shouldBeIgnored, expectedRuntimeAttributes) - } - - "return an instance of itself when tries to validate a valid memory entry" in { - val expectedRuntimeAttributes = staticDefaults.copy(memory = MemorySize.parse("1 GB").get) - val runtimeAttributes = createRuntimeAttributes(HelloWorld, """runtime { memory: "1 GB" }""").head - val shouldBeIgnored = workflowOptionsWithDefaultRA(Map(MemoryKey -> JsString("blahaha"))) - assertHtCondorRuntimeAttributesSuccessfulCreation(runtimeAttributes, shouldBeIgnored, expectedRuntimeAttributes) - } - - "throw an exception when tries to validate an invalid memory entry" in { - val runtimeAttributes = createRuntimeAttributes(HelloWorld, """runtime { docker: "ubuntu:latest" memory: "value" }""").head - assertHtCondorRuntimeAttributesFailedCreation(runtimeAttributes, "Expecting memory runtime attribute to be an Integer or String with format '8 GB'") - } - - "use workflow options as default if memory key is missing" in { - val runtimeAttributes = createRuntimeAttributes(HelloWorld, """runtime { }""").head - val expectedRuntimeAttributes = staticDefaults.copy(memory = MemorySize.parse("65 GB").get) - val workflowOptions = workflowOptionsWithDefaultRA(Map(MemoryKey -> JsString("65 GB"))) - assertHtCondorRuntimeAttributesSuccessfulCreation(runtimeAttributes, workflowOptions, expectedRuntimeAttributes) - } - - "use default memory value when there is no memory key entry" in { - val expectedRuntimeAttributes = staticDefaults.copy(memory = MemorySize.parse("0.512 GB").get) - val runtimeAttributes = createRuntimeAttributes(HelloWorld, """runtime { }""").head - val shouldBeIgnored = workflowOptionsWithDefaultRA(Map()) - assertHtCondorRuntimeAttributesSuccessfulCreation(runtimeAttributes, shouldBeIgnored, expectedRuntimeAttributes) - } - - "return an instance of itself when tries to validate a valid disk entry" in { - val expectedRuntimeAttributes = staticDefaults.copy(disk = MemorySize.parse("1 GB").get) - val runtimeAttributes = createRuntimeAttributes(HelloWorld, """runtime { disk: "1 GB" }""").head - val shouldBeIgnored = workflowOptionsWithDefaultRA(Map("disk" -> JsString("blahaha"))) - assertHtCondorRuntimeAttributesSuccessfulCreation(runtimeAttributes, shouldBeIgnored, expectedRuntimeAttributes) - } - - "throw an exception when tries to validate an invalid disk entry" in { - val runtimeAttributes = createRuntimeAttributes(HelloWorld, """runtime { docker: "ubuntu:latest" disk: "value" }""").head - assertHtCondorRuntimeAttributesFailedCreation(runtimeAttributes, "Expecting memory runtime attribute to be an Integer or String with format '8 GB'") - } - - "use workflow options as default if disk key is missing" in { - val runtimeAttributes = createRuntimeAttributes(HelloWorld, """runtime { }""").head - val expectedRuntimeAttributes = staticDefaults.copy(disk = MemorySize.parse("65 GB").get) - val workflowOptions = workflowOptionsWithDefaultRA(Map("disk" -> JsString("65 GB"))) - assertHtCondorRuntimeAttributesSuccessfulCreation(runtimeAttributes, workflowOptions, expectedRuntimeAttributes) - } - - "use default disk value when there is no disk key entry" in { - val expectedRuntimeAttributes = staticDefaults.copy(disk = MemorySize.parse("1.024 GB").get) - val runtimeAttributes = createRuntimeAttributes(HelloWorld, """runtime { }""").head - val shouldBeIgnored = workflowOptionsWithDefaultRA(Map()) - assertHtCondorRuntimeAttributesSuccessfulCreation(runtimeAttributes, shouldBeIgnored, expectedRuntimeAttributes) - } - } - - private def assertHtCondorRuntimeAttributesSuccessfulCreation(runtimeAttributes: Map[String, WdlValue], workflowOptions: WorkflowOptions, expectedRuntimeAttributes: HtCondorRuntimeAttributes): Unit = { - try { - assert(HtCondorRuntimeAttributes(runtimeAttributes, workflowOptions) == expectedRuntimeAttributes) - } catch { - case ex: RuntimeException => fail(s"Exception was not expected but received: ${ex.getMessage}") - } - } - - private def assertHtCondorRuntimeAttributesFailedCreation(runtimeAttributes: Map[String, WdlValue], exMsg: String): Unit = { - try { - HtCondorRuntimeAttributes(runtimeAttributes, emptyWorkflowOptions) - fail("A RuntimeException was expected.") - } catch { - case ex: RuntimeException => assert(ex.getMessage.contains(exMsg)) - } - } - - private def createRuntimeAttributes(wdlSource: WdlSource, runtimeAttributes: String): Seq[Map[String, WdlValue]] = { - val workflowDescriptor = buildWorkflowDescriptor(wdlSource, runtime = runtimeAttributes) - - def createLookup(call: Call): ScopedLookupFunction = { - val declarations = workflowDescriptor.workflowNamespace.workflow.declarations ++ call.task.declarations - val knownInputs = workflowDescriptor.inputs - WdlExpression.standardLookupFunction(knownInputs, declarations, NoFunctions) - } - - workflowDescriptor.workflowNamespace.workflow.calls map { - call => - val ra = call.task.runtimeAttributes.attrs mapValues { _.evaluate(createLookup(call), NoFunctions) } - TryUtil.sequenceMap(ra, "Runtime attributes evaluation").get - } - } -} diff --git a/supportedBackends/htcondor/src/test/scala/cromwell/backend/impl/htcondor/caching/localization/CachedResultLocalizationSpec.scala b/supportedBackends/htcondor/src/test/scala/cromwell/backend/impl/htcondor/caching/localization/CachedResultLocalizationSpec.scala deleted file mode 100644 index c2c4d101f..000000000 --- a/supportedBackends/htcondor/src/test/scala/cromwell/backend/impl/htcondor/caching/localization/CachedResultLocalizationSpec.scala +++ /dev/null @@ -1,64 +0,0 @@ -package cromwell.backend.impl.htcondor.caching.localization - -import java.nio.file.Files - -import cromwell.core.{JobOutput, JobOutputs} -import org.scalatest.{BeforeAndAfterAll, Matchers, WordSpecLike} -import wdl4s.types.{WdlArrayType, WdlFileType} -import wdl4s.values.{WdlArray, WdlSingleFile, WdlString} - -class CachedResultLocalizationSpec extends WordSpecLike with Matchers with BeforeAndAfterAll { - private class CachedResultLocalizationMock extends CachedResultLocalization - private val defaultTmpDir = Files.createTempDirectory("cachedFiles").toAbsolutePath - private val defaultCachedFile = defaultTmpDir.resolve("input.txt") - private val newTmpDir = Files.createTempDirectory("newFiles").toAbsolutePath - private val newTmpFile = newTmpDir.resolve(defaultCachedFile.getFileName()) - private val cachedResults = new CachedResultLocalizationMock() - private val defaultFileArray = Seq("arrInput1.txt", "arrInput2.txt", "arrInput3.txt").map(defaultTmpDir.resolve(_).toAbsolutePath) - private val newFileArray = Seq("arrInput1.txt", "arrInput2.txt", "arrInput3.txt").map(newTmpDir.resolve(_).toAbsolutePath) - - override def afterAll() = { - Seq(defaultCachedFile, newTmpFile) ++ newFileArray ++ Seq(defaultTmpDir, newTmpDir) foreach { _.toFile.delete() } - } - - "CachedResultLocalization" should { - "localize file path via symbolic link" in { - val slPath = cachedResults.localizePathViaSymbolicLink(defaultCachedFile, newTmpFile) - assert(Files.isSymbolicLink(slPath)) - Files.delete(newTmpFile) - } - - "not localize dir path via symbolic link" in { - assertThrows[UnsupportedOperationException](cachedResults.localizePathViaSymbolicLink(defaultTmpDir, newTmpFile)) - } - - "localize cached job outputs which are WDL files using symbolic link" in { - val outputs: JobOutputs = Map("File1" -> JobOutput(WdlSingleFile(defaultCachedFile.toAbsolutePath.toString))) - val newJobOutputs = cachedResults.localizeCachedOutputs(newTmpDir, outputs) - newJobOutputs foreach { case (lqn, jobOutput) => - assert(jobOutput.wdlValue.valueString == newTmpFile.toString) - } - } - - "localize cached job outputs which are WDL File Array using symbolic link" in { - val wdlArray = WdlArray(WdlArrayType(WdlFileType), defaultFileArray.map(file => WdlSingleFile(file.toString()))) - val outputs = Map("File1" -> JobOutput(wdlArray)) - val newJobOutputs = cachedResults.localizeCachedOutputs(newTmpDir, outputs) - newJobOutputs foreach { case (lqn, jobOutput) => - val wdlArray = jobOutput.wdlValue.asInstanceOf[WdlArray].value - wdlArray foreach { entry => - assert(!entry.valueString.contains(defaultTmpDir.toString)) - assert(entry.valueString.contains(newTmpDir.toString)) - } - } - } - - "not localize cached job outputs which are not WDL files" in { - val outputs = Map("String1" -> JobOutput(WdlString(defaultCachedFile.toAbsolutePath.toString))) - val newJobOutputs = cachedResults.localizeCachedOutputs(newTmpDir, outputs) - newJobOutputs foreach { case (lqn, jobOutput) => - assert(jobOutput.wdlValue.valueString == defaultCachedFile.toString) - } - } - } -} diff --git a/supportedBackends/htcondor/src/test/scala/cromwell/backend/impl/htcondor/caching/provider/mongodb/MongoCacheActorSpec.scala b/supportedBackends/htcondor/src/test/scala/cromwell/backend/impl/htcondor/caching/provider/mongodb/MongoCacheActorSpec.scala deleted file mode 100644 index fa675e758..000000000 --- a/supportedBackends/htcondor/src/test/scala/cromwell/backend/impl/htcondor/caching/provider/mongodb/MongoCacheActorSpec.scala +++ /dev/null @@ -1,104 +0,0 @@ -package cromwell.backend.impl.htcondor.caching.provider.mongodb - -import akka.actor.ActorSystem -import akka.testkit.{ImplicitSender, TestActorRef, TestKit} -import com.mongodb.casbah.MongoCollection -import com.mongodb.casbah.commons.MongoDBObject -import com.mongodb.util.JSON -import com.mongodb.{DBObject, WriteResult} -import com.typesafe.config.{Config, ConfigFactory} -import cromwell.backend.{MemorySize, BackendJobDescriptorKey} -import cromwell.backend.BackendJobExecutionActor.SucceededResponse -import cromwell.backend.impl.htcondor.HtCondorRuntimeAttributes -import cromwell.backend.impl.htcondor.caching.CacheActor._ -import cromwell.backend.impl.htcondor.caching.exception.CachedResultNotFoundException -import cromwell.backend.impl.htcondor.caching.provider.mongodb.model.{KryoSerializedObject, MongoCachedExecutionResult} -import cromwell.backend.impl.htcondor.caching.provider.mongodb.serialization.KryoSerDe -import cromwell.backend.validation.ContinueOnReturnCodeSet -import cromwell.core.JobOutput -import org.mockito.Mockito -import org.mockito.Mockito._ -import org.scalatest.mockito.MockitoSugar -import org.scalatest.{BeforeAndAfter, BeforeAndAfterAll, MustMatchers, WordSpecLike} -import wdl4s.Call -import wdl4s.values.WdlString - -class MongoCacheActorSpec extends TestKit(ActorSystem("MongoCacheProviderActorSpecSystem")) with WordSpecLike with MustMatchers - with BeforeAndAfter with BeforeAndAfterAll with ImplicitSender with MockitoSugar with KryoSerDe { - - import spray.json._ - import cromwell.backend.impl.htcondor.caching.provider.mongodb.model.MongoCachedExecutionResultProtocol._ - - val config: Config = ConfigFactory.load() - val mongoDbCollectionMock = mock[MongoCollection] - val memorySize = MemorySize.parse("0.512 GB").get - val diskSize = MemorySize.parse("1.024 GB").get - val runtimeConfig = HtCondorRuntimeAttributes(ContinueOnReturnCodeSet(Set(0)), Some("tool-name"), Some("/workingDir"), Some("/outputDir"), true, 1, memorySize, diskSize) - val jobHash = "88dde49db10f1551299fb9937f313c10" - val taskStatus = "done" - val succeededResponseMock = SucceededResponse(BackendJobDescriptorKey(Call(None, "TestJob", null, null, null, None), None, 0), None, Map("test" -> JobOutput(WdlString("Test"))), None, Seq.empty) - val serSucceededRespMock = KryoSerializedObject(serialize(succeededResponseMock)) - val cachedExecutionResult = MongoCachedExecutionResult(jobHash, serSucceededRespMock) - val cachedExecutionResultDbObject = JSON.parse(cachedExecutionResult.toJson.toString).asInstanceOf[DBObject] - val query = MongoDBObject("hash" -> jobHash) - - after { - Mockito.reset(mongoDbCollectionMock) - } - - override def afterAll = shutdown() - - "A CacheActor" should { - "return an ExecutionResultFound when read an execution result from cache" in { - when(mongoDbCollectionMock.findOne(query)) thenReturn Some(cachedExecutionResultDbObject) - val cacheActor = TestActorRef(new MongoCacheActor(mongoDbCollectionMock)) - cacheActor ! ReadExecutionResult(jobHash) - expectMsg(ExecutionResultFound(succeededResponseMock)) - verify(mongoDbCollectionMock, atLeastOnce).findOne(query) - } - - "return an ExecutionResultNotFound when it can't find an execution result in cache" in { - when(mongoDbCollectionMock.findOne(query)) thenReturn None - val cacheActor = TestActorRef(new MongoCacheActor(mongoDbCollectionMock)) - cacheActor ! ReadExecutionResult(jobHash) - expectMsg(ExecutionResultNotFound) - verify(mongoDbCollectionMock, atLeastOnce).findOne(query) - } - - "return ExecutionResultStored when it stores an execution result" in { - when(mongoDbCollectionMock.findOne(query)) thenThrow new CachedResultNotFoundException("") - when(mongoDbCollectionMock.insert(cachedExecutionResultDbObject)) thenReturn new WriteResult(0, true, "") - val cacheActor = TestActorRef(new MongoCacheActor(mongoDbCollectionMock)) - cacheActor ! StoreExecutionResult(jobHash, succeededResponseMock) - expectMsg(ExecutionResultStored("88dde49db10f1551299fb9937f313c10")) - verify(mongoDbCollectionMock, atLeastOnce).insert(cachedExecutionResultDbObject) - } - - "return ExecutionResultAlreadyExist when it tries to store an existing execution result" in { - when(mongoDbCollectionMock.findOne(query)) thenReturn Some(cachedExecutionResultDbObject) - val cacheActor = TestActorRef(new MongoCacheActor(mongoDbCollectionMock)) - cacheActor ! StoreExecutionResult(jobHash, succeededResponseMock) - expectMsg(ExecutionResultAlreadyExist) - verify(mongoDbCollectionMock, atLeastOnce).findOne(query) - } - - "return ExecutionResultNotFound when try to read and force re-write flag is enabled" in { - when(mongoDbCollectionMock.findOne(query)) thenReturn Some(cachedExecutionResultDbObject) - val cacheActor = TestActorRef(new MongoCacheActor(mongoDbCollectionMock, true)) - cacheActor ! ReadExecutionResult(jobHash) - expectMsg(ExecutionResultNotFound) - verify(mongoDbCollectionMock, atLeastOnce).findOne(query) - } - - "return ExecutionResultStored when try to store and force re-write flag is enabled" in { - when(mongoDbCollectionMock.findOne(query)) thenThrow new CachedResultNotFoundException("") - when(mongoDbCollectionMock.insert(cachedExecutionResultDbObject)) thenReturn new WriteResult(0, true, "") - when(mongoDbCollectionMock.remove(query)) thenReturn new WriteResult(0, true, "") - val cacheActor = TestActorRef(new MongoCacheActor(mongoDbCollectionMock)) - cacheActor ! StoreExecutionResult(jobHash, succeededResponseMock) - expectMsg(ExecutionResultStored("88dde49db10f1551299fb9937f313c10")) - verify(mongoDbCollectionMock, atLeastOnce).insert(cachedExecutionResultDbObject) - verify(mongoDbCollectionMock, atLeastOnce).remove(query) - } - } -} diff --git a/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/GenomicsFactory.scala b/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/GenomicsFactory.scala index 3427f3f09..edbdac0f6 100644 --- a/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/GenomicsFactory.scala +++ b/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/GenomicsFactory.scala @@ -2,23 +2,31 @@ package cromwell.backend.impl.jes import java.net.URL -import com.google.api.client.auth.oauth2.Credential -import com.google.api.client.http.HttpTransport -import com.google.api.client.json.JsonFactory +import com.google.api.client.http.{HttpRequest, HttpRequestInitializer} import com.google.api.services.genomics.Genomics -import cromwell.filesystems.gcs.GoogleConfiguration +import com.google.auth.Credentials +import com.google.auth.http.HttpCredentialsAdapter +import cromwell.filesystems.gcs.auth.GoogleAuthMode -object GenomicsFactory { +case class GenomicsFactory(applicationName: String, authMode: GoogleAuthMode, endpointUrl: URL) { - def apply(applicationName: String, credential: Credential, endpointUrl: URL): Genomics = { - GoogleGenomics.from(applicationName, endpointUrl, credential, credential.getJsonFactory, credential.getTransport) - } - - // Wrapper object around Google's Genomics class providing a convenience 'from' "method" - object GoogleGenomics { - def from(applicationName: String, endpointUrl: URL, credential: Credential, jsonFactory: JsonFactory, httpTransport: HttpTransport): Genomics = { - new Genomics.Builder(httpTransport, jsonFactory, credential).setApplicationName(applicationName).setRootUrl(endpointUrl.toString).build + def fromCredentials(credentials: Credentials) = { + val httpRequestInitializer = { + val delegate = new HttpCredentialsAdapter(credentials) + new HttpRequestInitializer() { + def initialize(httpRequest: HttpRequest) = { + delegate.initialize(httpRequest) + } + } } + + new Genomics.Builder( + GoogleAuthMode.httpTransport, + GoogleAuthMode.jsonFactory, + httpRequestInitializer) + .setApplicationName(applicationName) + .setRootUrl(endpointUrl.toString) + .build } } diff --git a/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/GoogleLabels.scala b/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/GoogleLabels.scala new file mode 100644 index 000000000..a2bcbddf8 --- /dev/null +++ b/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/GoogleLabels.scala @@ -0,0 +1,55 @@ +package cromwell.backend.impl.jes + +import cats.data.Validated.Valid +import cromwell.core.labels.{Label, Labels} + +object GoogleLabels { + + val MaxLabelLength = Label.MaxLabelLength + val GoogleLabelsRegexPattern = Label.LabelKeyRegex + + // This function is used to coerce a string into one that meets the requirements for a label submission to JES. + // See 'labels' in https://cloud.google.com/genomics/reference/rpc/google.genomics.v1alpha2#google.genomics.v1alpha2.RunPipelineArgs + def safeGoogleName(mainText: String, emptyAllowed: Boolean = false): String = { + + Label.validateLabelRegex(mainText, GoogleLabelsRegexPattern.r) match { + case Valid(labelText) => labelText + case invalid @ _ if mainText.equals("") && emptyAllowed => mainText + case invalid @ _ => + def appendSafe(current: String, nextChar: Char): String = { + nextChar match { + case c if c.isLetterOrDigit || c == '-' => current + c.toLower + case _ => current + '-' + } + } + + val foldResult = mainText.toCharArray.foldLeft("")(appendSafe) + + val startsValid = foldResult.headOption.exists(_.isLetter) + val endsValid = foldResult.lastOption.exists(_.isLetterOrDigit) + + val validStart = if (startsValid) foldResult else "x--" + foldResult + val validStartAndEnd = if (endsValid) validStart else validStart + "--x" + + val length = validStartAndEnd.length + val tooLong = length > MaxLabelLength + + if (tooLong) { + val middleSeparator = "---" + val subSectionLength = (MaxLabelLength - middleSeparator.length) / 2 + validStartAndEnd.substring(0, subSectionLength) + middleSeparator + validStartAndEnd.substring(length - subSectionLength, length) + } else { + validStartAndEnd + } + } + } + + def toLabels(values: (String, String)*): Labels = { + + def safeGoogleLabel(key: String, value: String): Label = { + Label(safeGoogleName(key), safeGoogleName(value, emptyAllowed = true)) + } + + Labels(values.toVector map (safeGoogleLabel _ ).tupled) + } +} diff --git a/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/JesAsyncBackendJobExecutionActor.scala b/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/JesAsyncBackendJobExecutionActor.scala index 3b2a4bc11..9d229b3ee 100644 --- a/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/JesAsyncBackendJobExecutionActor.scala +++ b/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/JesAsyncBackendJobExecutionActor.scala @@ -1,145 +1,125 @@ package cromwell.backend.impl.jes import java.net.SocketTimeoutException -import java.nio.file.{Path, Paths} -import akka.actor.{Actor, ActorLogging, ActorRef, Props} -import akka.event.LoggingReceive -import better.files._ +import akka.actor.ActorRef +import cats.data.Validated.{Invalid, Valid} import com.google.api.client.googleapis.json.GoogleJsonResponseException -import cromwell.backend.BackendJobExecutionActor.{AbortedResponse, BackendJobExecutionResponse} -import cromwell.backend.BackendLifecycleActor.AbortJobCommand -import cromwell.backend.async.AsyncBackendJobExecutionActor.{ExecutionMode, JobId} -import cromwell.backend.async.{AbortedExecutionHandle, AsyncBackendJobExecutionActor, ExecutionHandle, FailedNonRetryableExecutionHandle, FailedRetryableExecutionHandle, NonRetryableExecution, SuccessfulExecutionHandle} -import cromwell.backend.impl.jes.JesImplicits.PathString -import cromwell.backend.impl.jes.JesJobExecutionActor.JesOperationIdKey +import com.google.api.services.genomics.model.RunPipelineRequest +import com.google.cloud.storage.contrib.nio.CloudStorageOptions +import cromwell.backend._ import cromwell.backend.impl.jes.RunStatus.TerminalRunStatus +import cromwell.backend.async.{AbortedExecutionHandle, ExecutionHandle, FailedNonRetryableExecutionHandle, FailedRetryableExecutionHandle, PendingExecutionHandle} +import cromwell.backend.impl.jes.errors.FailedToDelocalizeFailure import cromwell.backend.impl.jes.io._ -import cromwell.backend.{AttemptedLookupResult, BackendJobDescriptor, BackendWorkflowDescriptor, PreemptedException} -import cromwell.core.Dispatcher.BackendDispatcher +import cromwell.backend.impl.jes.statuspolling.{JesRunCreationClient, JesStatusRequestClient} +import cromwell.backend.standard.{StandardAsyncExecutionActor, StandardAsyncExecutionActorParams, StandardAsyncJob} import cromwell.core._ -import cromwell.core.logging.JobLogging -import cromwell.core.retry.{Retry, SimpleExponentialBackoff} -import cromwell.filesystems.gcs.NioGcsPath +import cromwell.core.logging.JobLogger +import cromwell.core.path.{DefaultPathBuilder, Path} +import cromwell.core.retry.SimpleExponentialBackoff import cromwell.services.keyvalue.KeyValueServiceActor._ -import cromwell.services.metadata._ -import wdl4s.AstTools._ -import wdl4s.WdlExpression.ScopedLookupFunction -import wdl4s._ -import wdl4s.command.ParameterCommandPart -import wdl4s.expression.NoFunctions -import wdl4s.util.TryUtil -import wdl4s.values._ - +import cromwell.filesystems.gcs.GcsPath +import lenthall.validation.ErrorOr.ErrorOr +import cromwell.filesystems.gcs.batch.GcsBatchCommandBuilder +import cromwell.services.keyvalue.KvClient +import org.slf4j.LoggerFactory +import wdl4s.wdl._ +import wdl4s.wdl.expression.PureStandardLibraryFunctions +import wdl4s.wdl.values._ + +import scala.collection.JavaConverters._ import scala.concurrent.duration._ -import scala.concurrent.{ExecutionContext, Future, Promise} +import scala.concurrent.Future import scala.language.postfixOps -import scala.util.{Failure, Success, Try} +import scala.util.{Success, Try} object JesAsyncBackendJobExecutionActor { - - def props(jobDescriptor: BackendJobDescriptor, - completionPromise: Promise[BackendJobExecutionResponse], - jesWorkflowInfo: JesConfiguration, - initializationData: JesBackendInitializationData, - serviceRegistryActor: ActorRef): Props = { - Props(new JesAsyncBackendJobExecutionActor(jobDescriptor, - completionPromise, - jesWorkflowInfo, - initializationData, - serviceRegistryActor)).withDispatcher(BackendDispatcher) - } + val JesOperationIdKey = "__jes_operation_id" object WorkflowOptionKeys { val MonitoringScript = "monitoring_script" val GoogleProject = "google_project" + val GoogleComputeServiceAccount = "google_compute_service_account" } + type JesPendingExecutionHandle = PendingExecutionHandle[StandardAsyncJob, Run, RunStatus] private val ExtraConfigParamName = "__extra_config_gcs_path" - /** - * Representing a running JES execution, instances of this class are never Done and it is never okay to - * ask them for results. - */ - case class JesPendingExecutionHandle(jobDescriptor: BackendJobDescriptor, - jesOutputs: Seq[JesFileOutput], - run: Run, - previousStatus: Option[RunStatus]) extends ExecutionHandle { - override val isDone = false - override val result = NonRetryableExecution(new IllegalStateException("JesPendingExecutionHandle cannot yield a result")) - } + private def stringifyMap(m: Map[String, String]): String = m map { case(k, v) => s" $k -> $v"} mkString "\n" - case class JesJobId(operationId: String) extends JobId -} + val maxUnexpectedRetries = 2 + val GoogleCancelledRpc = 1 + val GoogleNotFoundRpc = 5 + val GoogleAbortedRpc = 10 // Note "Aborted" here is not the same as our "abort" + val JesFailedToDelocalize = 5 + val JesUnexpectedTermination = 13 + val JesPreemption = 14 -class JesAsyncBackendJobExecutionActor(override val jobDescriptor: BackendJobDescriptor, - override val completionPromise: Promise[BackendJobExecutionResponse], - override val jesConfiguration: JesConfiguration, - override val initializationData: JesBackendInitializationData, - override val serviceRegistryActor: ActorRef) - extends Actor with ActorLogging with AsyncBackendJobExecutionActor with JesJobCachingActorHelper with JobLogging { + def StandardException(errorCode: Int, message: String, jobTag: String) = { + new Exception(s"Task $jobTag failed. JES error code $errorCode. $message") + } +} + +class JesAsyncBackendJobExecutionActor(override val standardParams: StandardAsyncExecutionActorParams) + extends BackendJobLifecycleActor with StandardAsyncExecutionActor with JesJobCachingActorHelper + with JesStatusRequestClient with JesRunCreationClient with GcsBatchCommandBuilder with KvClient { import JesAsyncBackendJobExecutionActor._ + val slf4jLogger = LoggerFactory.getLogger(JesAsyncBackendJobExecutionActor.getClass) + val logger = new JobLogger("JesRun", jobDescriptor.workflowDescriptor.id, jobDescriptor.key.tag, None, Set(slf4jLogger)) + + val jesBackendSingletonActor: ActorRef = + standardParams.backendSingletonActorOption.getOrElse( + throw new RuntimeException("JES Backend actor cannot exist without the JES backend singleton actor")) + + override type StandardAsyncRunInfo = Run + + override type StandardAsyncRunStatus = RunStatus + + override val pollingActor: ActorRef = jesBackendSingletonActor + override lazy val pollBackOff = SimpleExponentialBackoff( - initialInterval = 30 seconds, maxInterval = 10 minutes, multiplier = 1.1) + initialInterval = 30 seconds, maxInterval = jesAttributes.maxPollingInterval seconds, multiplier = 1.1) override lazy val executeOrRecoverBackOff = SimpleExponentialBackoff( initialInterval = 3 seconds, maxInterval = 20 seconds, multiplier = 1.1) - private lazy val workflowDescriptor = jobDescriptor.workflowDescriptor - - private lazy val call = jobDescriptor.key.call - - override lazy val retryable = jobDescriptor.key.attempt <= runtimeAttributes.preemptible private lazy val cmdInput = - JesFileInput(ExecParamName, jesCallPaths.gcsExecPath.toString, Paths.get(jesCallPaths.gcsExecFilename), workingDisk) + JesFileInput(ExecParamName, jesCallPaths.script.pathAsString, DefaultPathBuilder.get(jesCallPaths.scriptFilename), workingDisk) private lazy val jesCommandLine = s"/bin/bash ${cmdInput.containerPath}" - private lazy val rcJesOutput = JesFileOutput(returnCodeFilename, returnCodeGcsPath.toString, Paths.get(returnCodeFilename), workingDisk) + private lazy val rcJesOutput = JesFileOutput(returnCodeFilename, returnCodeGcsPath.pathAsString, DefaultPathBuilder.get(returnCodeFilename), workingDisk) private lazy val standardParameters = Seq(rcJesOutput) - private lazy val returnCodeContents = Try(File(returnCodeGcsPath).contentAsString) + private lazy val dockerConfiguration = jesConfiguration.dockerCredentials - private lazy val tag = s"${this.getClass.getSimpleName} [UUID(${workflowId.shortString}):${jobDescriptor.key.tag}]" - private var runId: Option[String] = None + private val previousRetryReasons: ErrorOr[PreviousRetryReasons] = PreviousRetryReasons.tryApply(jobDescriptor.prefetchedKvStoreEntries, jobDescriptor.key.attempt) - def jesReceiveBehavior: Receive = LoggingReceive { - case AbortJobCommand => - runId foreach { id => - Try(Run(id, initializationData.genomics).abort()) match { - case Success(_) => jobLogger.info("{} Aborted {}", tag: Any, id) - case Failure(ex) => jobLogger.warn("{} Failed to abort {}: {}", tag, id, ex.getMessage) - } - } - context.parent ! AbortedResponse(jobDescriptor.key) - context.stop(self) - case KvPutSuccess(_) => // expected after the KvPut for the operation ID + private lazy val jobDockerImage = jobDescriptor.maybeCallCachingEligible.dockerHash.getOrElse(runtimeAttributes.dockerImage) + + override lazy val dockerImageUsed: Option[String] = Option(jobDockerImage) + + override val preemptible: Boolean = previousRetryReasons match { + case Valid(PreviousRetryReasons(p, _)) => p < maxPreemption + case _ => false } - override def receive: Receive = jesReceiveBehavior orElse super.receive - - private def globOutputPath(glob: String) = callRootPath.resolve(s"glob-${glob.md5Sum}/") - - private def gcsAuthParameter: Option[JesInput] = { - if (jesAttributes.gcsFilesystemAuth.requiresAuthFile || dockerConfiguration.isDefined) - Option(JesLiteralInput(ExtraConfigParamName, jesCallPaths.gcsAuthFilePath.toString)) - else None + override def tryAbort(job: StandardAsyncJob): Unit = { + Run(job, initializationData.genomics).abort() } - private lazy val callContext = CallContext( - callRootPath, - jesStdoutFile.toString, - jesStderrFile.toString - ) + override def requestsAbortAndDiesImmediately: Boolean = true - private[jes] lazy val callEngineFunctions = new JesExpressionFunctions(List(jesCallPaths.gcsFileSystem), callContext) + override def receive: Receive = pollingActorClientReceive orElse runCreationClientReceive orElse ioReceive orElse kvClientReceive orElse super.receive - private val lookup: ScopedLookupFunction = { - val declarations = workflowDescriptor.workflowNamespace.workflow.declarations ++ call.task.declarations - WdlExpression.standardLookupFunction(jobDescriptor.inputs, declarations, callEngineFunctions) + private def gcsAuthParameter: Option[JesInput] = { + if (jesAttributes.auths.gcs.requiresAuthFile || dockerConfiguration.isDefined) + Option(JesLiteralInput(ExtraConfigParamName, jesCallPaths.workflowPaths.gcsAuthFilePath.pathAsString)) + else None } /** @@ -151,7 +131,7 @@ class JesAsyncBackendJobExecutionActor(override val jobDescriptor: BackendJobDes jobDescriptor: BackendJobDescriptor): Iterable[JesInput] = { (remotePathArray zip localPathArray zipWithIndex) flatMap { case ((remotePath, localPath), index) => - Seq(JesFileInput(s"$jesNamePrefix-$index", remotePath.valueString, Paths.get(localPath.valueString), workingDisk)) + Seq(JesFileInput(s"$jesNamePrefix-$index", remotePath.valueString, DefaultPathBuilder.get(localPath.valueString), workingDisk)) } } @@ -162,43 +142,28 @@ class JesAsyncBackendJobExecutionActor(override val jobDescriptor: BackendJobDes * relativeLocalizationPath("gs://some/bucket/foo.txt") -> "some/bucket/foo.txt" */ private def relativeLocalizationPath(file: WdlFile): WdlFile = { - Try(getPath(file.value)) match { - case Success(gcsPath: NioGcsPath) => WdlFile(gcsPath.bucket + "/" + gcsPath.objectName, file.isGlob) - case Success(gcsPath) => file - case Failure(e) => file + getPath(file.value) match { + case Success(path) => WdlFile(path.pathWithoutScheme, file.isGlob) + case _ => file } } - private[jes] def generateJesInputs(jobDescriptor: BackendJobDescriptor): Iterable[JesInput] = { - /** - * Commands in WDL tasks can also generate input files. For example: ./my_exec --file=${write_lines(arr)} - * - * write_lines(arr) would produce a string-ified version of the array stored as a GCS path. The next block of code - * will go through each ${...} expression within the task's command section and find all write_*() ASTs and - * evaluate them so the files are written to GCS and the they can be included as inputs to Google's Pipeline object - */ - val commandExpressions = jobDescriptor.key.scope.task.commandTemplate.collect({ - case x: ParameterCommandPart => x.expression - }) - - val writeFunctionAsts = commandExpressions.map(_.ast).flatMap(x => AstTools.findAsts(x, "FunctionCall")).collect({ - case y if y.getAttribute("name").sourceString.startsWith("write_") => y - }) - - val evaluatedExpressionMap = writeFunctionAsts map { ast => - val expression = WdlExpression(ast) - val value = expression.evaluate(lookup, callEngineFunctions) - expression.toWdlString.md5SumShort -> value - } toMap + private[jes] def generateJesInputs(jobDescriptor: BackendJobDescriptor): Set[JesInput] = { - val writeFunctionFiles = evaluatedExpressionMap collect { case (k, v: Success[_]) => k -> v.get } collect { case (k, v: WdlFile) => k -> Seq(v)} + val fullyQualifiedPreprocessedInputs = jobDescriptor.inputDeclarations map { case (declaration, value) => declaration.fullyQualifiedName -> commandLineValueMapper(value) } + val writeFunctionFiles = call.task.evaluateFilesFromCommand(fullyQualifiedPreprocessedInputs, backendEngineFunctions) map { + case (expression, file) => expression.toWdlString.md5SumShort -> Seq(file) + } - /** Collect all WdlFiles from inputs to the call */ - val callInputFiles: Map[FullyQualifiedName, Seq[WdlFile]] = jobDescriptor.inputs mapValues { _.collectAsSeq { case w: WdlFile => w } } + /* Collect all WdlFiles from inputs to the call */ + val callInputFiles: Map[FullyQualifiedName, Seq[WdlFile]] = jobDescriptor.fullyQualifiedInputs mapValues { + _.collectAsSeq { case w: WdlFile => w } + } - (callInputFiles ++ writeFunctionFiles) flatMap { + val inputs = (callInputFiles ++ writeFunctionFiles) flatMap { case (name, files) => jesInputsFromWdlFiles(name, files, files.map(relativeLocalizationPath), jobDescriptor) } + inputs.toSet } /** @@ -208,7 +173,7 @@ class JesAsyncBackendJobExecutionActor(override val jobDescriptor: BackendJobDes * @throws Exception if the `path` does not live in one of the supplied `disks` */ private def relativePathAndAttachedDisk(path: String, disks: Seq[JesAttachedDisk]): (Path, JesAttachedDisk) = { - val absolutePath = Paths.get(path) match { + val absolutePath = DefaultPathBuilder.get(path) match { case p if !p.isAbsolute => JesWorkingDisk.MountPoint.resolve(p) case p => p } @@ -228,401 +193,283 @@ class JesAsyncBackendJobExecutionActor(override val jobDescriptor: BackendJobDes if (referenceName.length <= 127) referenceName else referenceName.md5Sum } - private[jes] def generateJesOutputs(jobDescriptor: BackendJobDescriptor): Seq[JesFileOutput] = { - val wdlFileOutputs = jobDescriptor.key.scope.task.outputs flatMap { taskOutput => - taskOutput.requiredExpression.evaluateFiles(lookup, NoFunctions, taskOutput.wdlType) match { - case Success(wdlFiles) => wdlFiles map relativeLocalizationPath - case Failure(ex) => - jobLogger.warn(s"Could not evaluate $taskOutput: ${ex.getMessage}", ex) - Seq.empty[WdlFile] - } - } + private[jes] def generateJesOutputs(jobDescriptor: BackendJobDescriptor): Set[JesFileOutput] = { + val wdlFileOutputs = call.task.findOutputFiles(jobDescriptor.fullyQualifiedInputs, PureStandardLibraryFunctions) map relativeLocalizationPath - // Create the mappings. GLOB mappings require special treatment (i.e. stick everything matching the glob in a folder) - wdlFileOutputs.distinct map { wdlFile => - val destination = wdlFile match { - case WdlSingleFile(filePath) => callRootPath.resolve(filePath).toString - case WdlGlobFile(filePath) => globOutputPath(filePath).toString + val outputs = wdlFileOutputs.distinct flatMap { wdlFile => + wdlFile match { + case singleFile: WdlSingleFile => List(generateJesSingleFileOutputs(singleFile)) + case globFile: WdlGlobFile => generateJesGlobFileOutputs(globFile) } - val (relpath, disk) = relativePathAndAttachedDisk(wdlFile.value, runtimeAttributes.disks) - JesFileOutput(makeSafeJesReferenceName(wdlFile.value), destination, relpath, disk) } + + outputs.toSet + } + + private def generateJesSingleFileOutputs(wdlFile: WdlSingleFile): JesFileOutput = { + val destination = callRootPath.resolve(wdlFile.value.stripPrefix("/")).pathAsString + val (relpath, disk) = relativePathAndAttachedDisk(wdlFile.value, runtimeAttributes.disks) + JesFileOutput(makeSafeJesReferenceName(wdlFile.value), destination, relpath, disk) } - private def instantiateCommand: Try[String] = { - val backendInputs = jobDescriptor.inputs mapValues gcsPathToLocal - jobDescriptor.call.instantiateCommandLine(backendInputs, callEngineFunctions, gcsPathToLocal) + private def generateJesGlobFileOutputs(wdlFile: WdlGlobFile): List[JesFileOutput] = { + val globName = backendEngineFunctions.globName(wdlFile.value) + val globDirectory = globName + "/" + val globListFile = globName + ".list" + val gcsGlobDirectoryDestinationPath = callRootPath.resolve(globDirectory).pathAsString + val gcsGlobListFileDestinationPath = callRootPath.resolve(globListFile).pathAsString + + val (_, globDirectoryDisk) = relativePathAndAttachedDisk(wdlFile.value, runtimeAttributes.disks) + + // We need both the glob directory and the glob list: + List( + // The glob directory: + JesFileOutput(makeSafeJesReferenceName(globDirectory), gcsGlobDirectoryDestinationPath, DefaultPathBuilder.get(globDirectory + "*"), globDirectoryDisk), + // The glob list file: + JesFileOutput(makeSafeJesReferenceName(globListFile), gcsGlobListFileDestinationPath, DefaultPathBuilder.get(globListFile), globDirectoryDisk) + ) } - private def uploadCommandScript(command: String, withMonitoring: Boolean): Future[Unit] = { - val monitoring = if (withMonitoring) { + override lazy val commandDirectory: Path = JesWorkingDisk.MountPoint + + override def commandScriptPreamble: String = { + if (monitoringOutput.isDefined) { s"""|touch $JesMonitoringLogFile |chmod u+x $JesMonitoringScript |$JesMonitoringScript > $JesMonitoringLogFile &""".stripMargin } else "" + } - val tmpDir = File(JesWorkingDisk.MountPoint)./("tmp").path - val rcPath = File(JesWorkingDisk.MountPoint)./(returnCodeFilename).path - - val fileContent = - s""" - |#!/bin/bash - |export _JAVA_OPTIONS=-Djava.io.tmpdir=$tmpDir - |export TMPDIR=$tmpDir - |$monitoring - |( - |cd ${JesWorkingDisk.MountPoint} - |$command - |) - |echo $$? > $rcPath - """.stripMargin.trim - - def writeScript(): Future[Unit] = Future(File(jesCallPaths.gcsExecPath).write(fileContent)) - - implicit val system = context.system - Retry.withRetry( - writeScript, - isTransient = isTransientJesException, - isFatal = isFatalJesException - ) + override def globParentDirectory(wdlGlobFile: WdlGlobFile): Path = { + val (_, disk) = relativePathAndAttachedDisk(wdlGlobFile.value, runtimeAttributes.disks) + disk.mountPoint } private def googleProject(descriptor: BackendWorkflowDescriptor): String = { descriptor.workflowOptions.getOrElse(WorkflowOptionKeys.GoogleProject, jesAttributes.project) } - private def createJesRun(jesParameters: Seq[JesParameter], runIdForResumption: Option[String] = None): Future[Run] = { + private def computeServiceAccount(descriptor: BackendWorkflowDescriptor): String = { + descriptor.workflowOptions.getOrElse(WorkflowOptionKeys.GoogleComputeServiceAccount, jesAttributes.computeServiceAccount) + } + + override def isTerminal(runStatus: RunStatus): Boolean = { + runStatus match { + case _: TerminalRunStatus => true + case _ => false + } + } - def createRun() = Future(Run( - runIdForResumption, + private def createJesRunPipelineRequest(jesParameters: Seq[JesParameter]): RunPipelineRequest = { + val runPipelineParameters = Run.makeRunPipelineRequest( jobDescriptor = jobDescriptor, runtimeAttributes = runtimeAttributes, - callRootPath = callRootPath.toString, + dockerImage = jobDockerImage, + callRootPath = callRootPath.pathAsString, commandLine = jesCommandLine, logFileName = jesLogFilename, jesParameters, googleProject(jobDescriptor.workflowDescriptor), - retryable, + computeServiceAccount(jobDescriptor.workflowDescriptor), + backendLabels, + preemptible, initializationData.genomics - )) - - implicit val system = context.system - Retry.withRetry( - createRun, - isTransient = isTransientJesException, - isFatal = isFatalJesException - ) andThen { - case Success(run) => - // If this execution represents a resumption don't publish the operation ID since clearly it is already persisted. - runId = Option(run.runId) - if (runIdForResumption.isEmpty) { - serviceRegistryActor ! KvPut(KvPair(ScopedKey(jobDescriptor.workflowDescriptor.id, - KvJobKey(jobDescriptor.key.call.fullyQualifiedName, jobDescriptor.key.index, jobDescriptor.key.attempt), - JesOperationIdKey), runId)) - } - } + ) + logger.debug(s"Inputs:\n${stringifyMap(runPipelineParameters.getPipelineArgs.getInputs.asScala.toMap)}") + logger.debug(s"Outputs:\n${stringifyMap(runPipelineParameters.getPipelineArgs.getOutputs.asScala.toMap)}") + runPipelineParameters } - protected def runWithJes(command: String, - jesInputs: Seq[JesInput], - jesOutputs: Seq[JesFileOutput], - runIdForResumption: Option[String], - withMonitoring: Boolean): Future[ExecutionHandle] = { + override def isFatal(throwable: Throwable): Boolean = super.isFatal(throwable) || isFatalJesException(throwable) - tellStartMetadata() + override def isTransient(throwable: Throwable): Boolean = isTransientJesException(throwable) - val jesParameters = standardParameters ++ gcsAuthParameter ++ jesInputs ++ jesOutputs + override def executeAsync(): Future[ExecutionHandle] = runWithJes(None) - val jesJobSetup = for { - _ <- uploadCommandScript(command, withMonitoring) - run <- createJesRun(jesParameters, runIdForResumption) - _ = tellMetadata(Map(CallMetadataKeys.JobId -> run.runId)) - } yield run + val futureKvJobKey = KvJobKey(jobDescriptor.key.call.fullyQualifiedName, jobDescriptor.key.index, jobDescriptor.key.attempt + 1) - jesJobSetup map { run => JesPendingExecutionHandle(jobDescriptor, jesOutputs, run, previousStatus = None) } - } + override def recoverAsync(jobId: StandardAsyncJob): Future[ExecutionHandle] = runWithJes(Option(jobId)) - override def executeOrRecover(mode: ExecutionMode)(implicit ec: ExecutionContext): Future[ExecutionHandle] = { - // Force runtimeAttributes to evaluate so we can fail quickly now if we need to: - Try(runtimeAttributes) match { - case Success(_) => startExecuting(monitoringOutput, mode) - case Failure(e) => Future.successful(FailedNonRetryableExecutionHandle(e, None)) - } - } + private def runWithJes(jobForResumption: Option[StandardAsyncJob]): Future[ExecutionHandle] = { + // Want to force runtimeAttributes to evaluate so we can fail quickly now if we need to: + def evaluateRuntimeAttributes = Future.fromTry(Try(runtimeAttributes)) - private def startExecuting(monitoringOutput: Option[JesFileOutput], mode: ExecutionMode): Future[ExecutionHandle] = { - val jesInputs: Seq[JesInput] = generateJesInputs(jobDescriptor).toSeq ++ monitoringScript :+ cmdInput - val jesOutputs: Seq[JesFileOutput] = generateJesOutputs(jobDescriptor) ++ monitoringOutput + def generateJesParameters = Future.fromTry( Try { + val generatedJesInputs = generateJesInputs(jobDescriptor) + val jesInputs: Set[JesInput] = generatedJesInputs ++ monitoringScript + cmdInput + val jesOutputs: Set[JesFileOutput] = generateJesOutputs(jobDescriptor) ++ monitoringOutput - instantiateCommand match { - case Success(command) => runWithJes(command, jesInputs, jesOutputs, mode.jobId.collectFirst { case j: JesJobId => j.operationId }, monitoringScript.isDefined) - case Failure(ex: SocketTimeoutException) => Future.successful(FailedNonRetryableExecutionHandle(ex)) - case Failure(ex) => Future.successful(FailedNonRetryableExecutionHandle(ex)) - } - } + standardParameters ++ gcsAuthParameter ++ jesInputs ++ jesOutputs + }) - /** - * Update the ExecutionHandle - */ - override def poll(previous: ExecutionHandle)(implicit ec: ExecutionContext): Future[ExecutionHandle] = Future { - previous match { - case handle: JesPendingExecutionHandle => - val runId = handle.run.runId - jobLogger.debug(s"$tag Polling JES Job $runId") - val previousStatus = handle.previousStatus - val status = Try(handle.run.status()) - status foreach { currentStatus => - if (!(handle.previousStatus contains currentStatus)) { - // If this is the first time checking the status, we log the transition as '-' to 'currentStatus'. Otherwise - // just use the state names. - val prevStateName = previousStatus map { _.toString } getOrElse "-" - jobLogger.info(s"$tag Status change from $prevStateName to $currentStatus") - tellMetadata(Map("backendStatus" -> currentStatus)) - } - } - status match { - case Success(s: TerminalRunStatus) => - val metadata = Map( - JesMetadataKeys.MachineType -> s.machineType.getOrElse("unknown"), - JesMetadataKeys.InstanceName -> s.instanceName.getOrElse("unknown"), - JesMetadataKeys.Zone -> s.zone.getOrElse("unknown") - ) - - tellMetadata(metadata) - executionResult(s, handle) - case Success(s) => handle.copy(previousStatus = Option(s)).future // Copy the current handle with updated previous status. - case Failure(e: GoogleJsonResponseException) if e.getStatusCode == 404 => - jobLogger.error(s"$tag JES Job ID ${handle.run.runId} has not been found, failing call") - FailedNonRetryableExecutionHandle(e).future - case Failure(e: Exception) => - // Log exceptions and return the original handle to try again. - jobLogger.warn(s"Caught exception, retrying", e) - handle.future - case Failure(e: Error) => Future.failed(e) // JVM-ending calamity. - case Failure(throwable) => - // Someone has subclassed Throwable directly? - FailedNonRetryableExecutionHandle(throwable).future - } - case f: FailedNonRetryableExecutionHandle => f.future - case s: SuccessfulExecutionHandle => s.future - case badHandle => Future.failed(new IllegalArgumentException(s"Unexpected execution handle: $badHandle")) - } - } flatten + def uploadScriptFile = writeAsync(jobPaths.script, commandScriptContents, Seq(CloudStorageOptions.withMimeType("text/plain"))) - /** - * Fire and forget start info to the metadata service - */ - private def tellStartMetadata(): Unit = { - val runtimeAttributesMetadata: Map[String, Any] = runtimeAttributes.asMap map { - case (key, value) => s"runtimeAttributes:$key" -> value - } + def makeRpr(jesParameters: Seq[JesParameter]) = Future.fromTry(Try { + createJesRunPipelineRequest(jesParameters) + }) - var fileMetadata: Map[String, Any] = jesCallPaths.metadataPaths - if (monitoringOutput.nonEmpty) { - // TODO: Move this to JesCallPaths - fileMetadata += JesMetadataKeys.MonitoringLog -> monitoringOutput.get.gcs + jobForResumption match { + case Some(job) => + val run = Run(job, initializationData.genomics) + Future.successful(PendingExecutionHandle(jobDescriptor, job, Option(run), previousStatus = None)) + case None => + for { + _ <- evaluateRuntimeAttributes + jesParameters <- generateJesParameters + _ <- uploadScriptFile + rpr <- makeRpr(jesParameters) + runId <- runPipeline(initializationData.genomics, rpr) + run = Run(runId, initializationData.genomics) + } yield PendingExecutionHandle(jobDescriptor, runId, Option(run), previousStatus = None) } - - val otherMetadata = Map( - JesMetadataKeys.GoogleProject -> jesAttributes.project, - JesMetadataKeys.ExecutionBucket -> jesAttributes.executionBucket, - JesMetadataKeys.EndpointUrl -> jesAttributes.endpointUrl, - "preemptible" -> preemptible, - "cache:allowResultReuse" -> true - ) - - val metadataKeyValues = runtimeAttributesMetadata ++ fileMetadata ++ otherMetadata - - tellMetadata(metadataKeyValues) } - /** - * Fire and forget info to the metadata service - */ - def tellMetadata(metadataKeyValues: Map[String, Any]): Unit = { - import cromwell.services.metadata.MetadataService.implicits.MetadataAutoPutter - serviceRegistryActor.putMetadata(jobDescriptor.workflowDescriptor.id, Option(jobDescriptor.key), metadataKeyValues) + override def pollStatusAsync(handle: JesPendingExecutionHandle): Future[RunStatus] = super[JesStatusRequestClient].pollStatus(handle.runInfo.get) + + override def customPollStatusFailure: PartialFunction[(ExecutionHandle, Exception), ExecutionHandle] = { + case (oldHandle: JesPendingExecutionHandle@unchecked, e: GoogleJsonResponseException) if e.getStatusCode == 404 => + jobLogger.error(s"JES Job ID ${oldHandle.runInfo.get.job} has not been found, failing call") + FailedNonRetryableExecutionHandle(e) } - private def customLookupFunction(alreadyGeneratedOutputs: Map[String, WdlValue])(toBeLookedUp: String): WdlValue = alreadyGeneratedOutputs.getOrElse(toBeLookedUp, lookup(toBeLookedUp)) + override lazy val startMetadataKeyValues: Map[String, Any] = super[JesJobCachingActorHelper].startMetadataKeyValues - private[jes] def wdlValueToGcsPath(jesOutputs: Seq[JesFileOutput])(value: WdlValue): WdlValue = { - def toGcsPath(wdlFile: WdlFile) = jesOutputs collectFirst { - case o if o.name == makeSafeJesReferenceName(wdlFile.valueString) => WdlFile(o.gcs) - } getOrElse value - value match { - case wdlArray: WdlArray => wdlArray map wdlValueToGcsPath(jesOutputs) - case wdlMap: WdlMap => wdlMap map { - case (k, v) => wdlValueToGcsPath(jesOutputs)(k) -> wdlValueToGcsPath(jesOutputs)(v) - } - case file: WdlFile => if (file.value.isGcsUrl) file else toGcsPath(file) - case other => other + override def getTerminalMetadata(runStatus: RunStatus): Map[String, Any] = { + runStatus match { + case terminalRunStatus: TerminalRunStatus => + Map( + JesMetadataKeys.MachineType -> terminalRunStatus.machineType.getOrElse("unknown"), + JesMetadataKeys.InstanceName -> terminalRunStatus.instanceName.getOrElse("unknown"), + JesMetadataKeys.Zone -> terminalRunStatus.zone.getOrElse("unknown") + ) + case unknown => throw new RuntimeException(s"Attempt to get terminal metadata from non terminal status: $unknown") } } - private def outputLookup(taskOutput: TaskOutput, currentList: Seq[AttemptedLookupResult]) = for { - /** - * This will evaluate the task output expression and coerces it to the task output's type. - * If the result is a WdlFile, then attempt to find the JesOutput with the same path and - * return a WdlFile that represents the GCS path and not the local path. For example, - * - *
-    * output {
-    *   File x = "out" + ".txt"
-    * }
-    * 
- * - * "out" + ".txt" is evaluated to WdlString("out.txt") and then coerced into a WdlFile("out.txt") - * Then, via wdlFileToGcsPath(), we attempt to find the JesOutput with .name == "out.txt". - * If it is found, then WdlFile("gs://some_bucket/out.txt") will be returned. - */ - wdlValue <- taskOutput.requiredExpression.evaluate(customLookupFunction(currentList.toLookupMap), callEngineFunctions) - coercedValue <- taskOutput.wdlType.coerceRawValue(wdlValue) - value = wdlValueToGcsPath(generateJesOutputs(jobDescriptor))(coercedValue) - } yield value + override def mapOutputWdlFile(wdlFile: WdlFile): WdlFile = { + wdlFileToGcsPath(generateJesOutputs(jobDescriptor))(wdlFile) + } + private[jes] def wdlFileToGcsPath(jesOutputs: Set[JesFileOutput])(wdlFile: WdlFile): WdlFile = { + jesOutputs collectFirst { + case jesOutput if jesOutput.name == makeSafeJesReferenceName(wdlFile.valueString) => WdlFile(jesOutput.gcs) + } getOrElse wdlFile + } - private def outputFoldingFunction: (Seq[AttemptedLookupResult], TaskOutput) => Seq[AttemptedLookupResult] = { - (currentList: Seq[AttemptedLookupResult], taskOutput: TaskOutput) => { - currentList ++ Seq(AttemptedLookupResult(taskOutput.name, outputLookup(taskOutput, currentList))) + override def isSuccess(runStatus: RunStatus): Boolean = { + runStatus match { + case _: RunStatus.Success => true + case _: RunStatus.UnsuccessfulRunStatus => false + case _ => throw new RuntimeException(s"Cromwell programmer blunder: isSuccess was called on an incomplete RunStatus ($runStatus).") } } - private def postProcess: Try[JobOutputs] = { - val outputs = call.task.outputs - val outputMappings = outputs.foldLeft(Seq.empty[AttemptedLookupResult])(outputFoldingFunction).map(_.toPair).toMap - TryUtil.sequenceMap(outputMappings) map { outputMap => - outputMap mapValues { v => JobOutput(v) } + override def getTerminalEvents(runStatus: RunStatus): Seq[ExecutionEvent] = { + runStatus match { + case successStatus: RunStatus.Success => successStatus.eventList + case unknown => + throw new RuntimeException(s"handleExecutionSuccess not called with RunStatus.Success. Instead got $unknown") } } - private def handleSuccess(outputMappings: Try[JobOutputs], returnCode: Int, jobDetritusFiles: Map[String, String], executionHandle: ExecutionHandle, events: Seq[ExecutionEvent]): ExecutionHandle = { - outputMappings match { - case Success(outputs) => SuccessfulExecutionHandle(outputs, returnCode, jobDetritusFiles, events) - case Failure(ex: CromwellAggregatedException) if ex.throwables collectFirst { case s: SocketTimeoutException => s } isDefined => - // Return the execution handle in this case to retry the operation - executionHandle - case Failure(ex) => FailedNonRetryableExecutionHandle(ex) + override def retryEvaluateOutputs(exception: Exception): Boolean = { + exception match { + case aggregated: CromwellAggregatedException => + aggregated.throwables.collectFirst { case s: SocketTimeoutException => s }.isDefined + case _ => false } } - private def extractErrorCodeFromErrorMessage(errorMessage: String): Int = { - errorMessage.substring(0, errorMessage.indexOf(':')).toInt + // If one exists, extract the JES error code (not the google RPC) from the error message + private[jes] def getJesErrorCode(errorMessage: String): Option[Int] = { + Try { errorMessage.substring(0, errorMessage.indexOf(':')).toInt } toOption } - private def preempted(errorCode: Int, errorMessage: Option[String]): Boolean = { - def isPreemptionCode(code: Int) = code == 13 || code == 14 + override def handleExecutionFailure(runStatus: RunStatus, + handle: StandardAsyncPendingExecutionHandle, + returnCode: Option[Int]): Future[ExecutionHandle] = { + // Inner function: Handles a 'Failed' runStatus (or Preempted if preemptible was false) + def handleFailedRunStatus(runStatus: RunStatus.UnsuccessfulRunStatus, + handle: StandardAsyncPendingExecutionHandle, + returnCode: Option[Int]): Future[ExecutionHandle] = { + (runStatus.errorCode, runStatus.jesCode) match { + case (GoogleCancelledRpc, None) => Future.successful(AbortedExecutionHandle) + case (GoogleNotFoundRpc, Some(JesFailedToDelocalize)) => Future.successful(FailedNonRetryableExecutionHandle(FailedToDelocalizeFailure(runStatus.prettyPrintedError, jobTag, Option(jobPaths.stderr)))) + case (GoogleAbortedRpc, Some(JesUnexpectedTermination)) => handleUnexpectedTermination(runStatus.errorCode, runStatus.prettyPrintedError, returnCode) + case _ => Future.successful(FailedNonRetryableExecutionHandle(StandardException(runStatus.errorCode, runStatus.prettyPrintedError, jobTag), returnCode)) + } + } - try { - errorCode == 10 && errorMessage.isDefined && isPreemptionCode(extractErrorCodeFromErrorMessage(errorMessage.get)) && preemptible - } catch { - case _: NumberFormatException | _: StringIndexOutOfBoundsException => - jobLogger.warn(s"Unable to parse JES error code from error message: {}, assuming this was not a preempted VM.", errorMessage.get) - false + runStatus match { + case preemptedStatus: RunStatus.Preempted if preemptible => handlePreemption(preemptedStatus, returnCode) + case failedStatus: RunStatus.UnsuccessfulRunStatus => handleFailedRunStatus(failedStatus, handle, returnCode) + case unknown => throw new RuntimeException(s"handleExecutionFailure not called with RunStatus.Failed or RunStatus.Preempted. Instead got $unknown") } } - private def handleFailure(errorCode: Int, errorMessage: Option[String]) = { - import lenthall.numeric.IntegerUtil._ + private def writeFuturePreemptedAndUnexpectedRetryCounts(p: Int, ur: Int): Future[Unit] = { + val updateRequests = Seq( + KvPut(KvPair(ScopedKey(workflowId, futureKvJobKey, JesBackendLifecycleActorFactory.unexpectedRetryCountKey), Option(ur.toString))), + KvPut(KvPair(ScopedKey(workflowId, futureKvJobKey, JesBackendLifecycleActorFactory.preemptionCountKey), Option(p.toString))) + ) - val taskName = s"${workflowDescriptor.id}:${call.unqualifiedName}" - val attempt = jobDescriptor.key.attempt + makeKvRequest(updateRequests).map(_ => ()) + } - if (errorMessage.exists(_.contains("Operation canceled at"))) { - AbortedExecutionHandle.future - } else if (preempted(errorCode, errorMessage)) { - val preemptedMsg = s"Task $taskName was preempted for the ${attempt.toOrdinal} time." + private def handleUnexpectedTermination(errorCode: Int, errorMessage: String, jobReturnCode: Option[Int]): Future[ExecutionHandle] = { - if (attempt < maxPreemption) { - val e = PreemptedException( - s"""$preemptedMsg The call will be restarted with another preemptible VM (max preemptible attempts number is $maxPreemption). - |Error code $errorCode. Message: $errorMessage""".stripMargin - ) - FailedRetryableExecutionHandle(e, None).future - } else { - val e = PreemptedException( - s"""$preemptedMsg The maximum number of preemptible attempts ($maxPreemption) has been reached. The call will be restarted with a non-preemptible VM. - |Error code $errorCode. Message: $errorMessage)""".stripMargin) - FailedRetryableExecutionHandle(e, None).future - } - } else { - val id = workflowDescriptor.id - val name = jobDescriptor.call.unqualifiedName - val message = errorMessage.getOrElse("null") - val exception = new RuntimeException(s"Task $id:$name failed: error code $errorCode. Message: $message") - FailedNonRetryableExecutionHandle(exception, None).future - } - } + val msg = s"Retrying. $errorMessage" - private[jes] def executionResult(status: TerminalRunStatus, handle: JesPendingExecutionHandle) - (implicit ec: ExecutionContext): Future[ExecutionHandle] = Future { - try { - lazy val stderrLength: Long = File(jesStderrFile).size - lazy val returnCode = returnCodeContents map { _.trim.toInt } - lazy val continueOnReturnCode = runtimeAttributes.continueOnReturnCode - - status match { - case _: RunStatus.Success if runtimeAttributes.failOnStderr && stderrLength.intValue > 0 => - // returnCode will be None if it couldn't be downloaded/parsed, which will yield a null in the DB - FailedNonRetryableExecutionHandle(new RuntimeException( - s"execution failed: stderr has length $stderrLength"), returnCode.toOption).future - case _: RunStatus.Success if returnCodeContents.isFailure => - val exception = returnCode.failed.get - jobLogger.warn(s"could not download return code file, retrying", exception) - // Return handle to try again. - handle.future - case _: RunStatus.Success if returnCode.isFailure => - FailedNonRetryableExecutionHandle(new RuntimeException( - s"execution failed: could not parse return code as integer: ${returnCodeContents.get}")).future - case _: RunStatus.Success if !continueOnReturnCode.continueFor(returnCode.get) => - val badReturnCodeMessage = s"Call ${call.fullyQualifiedName}: return code was ${returnCode.getOrElse("(none)")}" - FailedNonRetryableExecutionHandle(new RuntimeException(badReturnCodeMessage), returnCode.toOption).future - case success: RunStatus.Success => - handleSuccess(postProcess, returnCode.get, jesCallPaths.detritusPaths.mapValues(_.toString), handle, success.eventList).future - case RunStatus.Failed(errorCode, errorMessage, _, _, _, _) => handleFailure(errorCode, errorMessage) - } - } catch { - case e: Exception => - jobLogger.warn("Caught exception trying to download result, retrying", e) - // Return the original handle to try again. - handle.future + previousRetryReasons match { + case Valid(PreviousRetryReasons(p, ur)) => + val thisUnexpectedRetry = ur + 1 + if (thisUnexpectedRetry <= maxUnexpectedRetries) { + // Increment unexpected retry count and preemption count stays the same + writeFuturePreemptedAndUnexpectedRetryCounts(p, thisUnexpectedRetry).map { _ => + FailedRetryableExecutionHandle(StandardException(errorCode, msg, jobTag), jobReturnCode) + } + } + else { + Future.successful(FailedNonRetryableExecutionHandle(StandardException(errorCode, errorMessage, jobTag), jobReturnCode)) + } + case Invalid(_) => + Future.successful(FailedNonRetryableExecutionHandle(StandardException(errorCode, errorMessage, jobTag), jobReturnCode)) } - } flatten - - /** - * Takes a path in GCS and comes up with a local path which is unique for the given GCS path. - * - * Matches the path generated via relativeLocalizationPath and passed in as JesFileInput.local. - * - * @param mountPoint The mount point for inputs - * @param gcsPath The input path - * @return A path which is unique per input path - */ - private def localFilePathFromCloudStoragePath(mountPoint: Path, gcsPath: NioGcsPath): Path = { - mountPoint.resolve(gcsPath.bucket).resolve(gcsPath.objectName) } - /** - * Takes a single WdlValue and maps google cloud storage (GCS) paths into an appropriate local file path. - * If the input is not a WdlFile, or the WdlFile is not a GCS path, the mapping is a noop. - * - * @param wdlValue the value of the input - * @return a new FQN to WdlValue pair, with WdlFile paths modified if appropriate. - */ - private[jes] def gcsPathToLocal(wdlValue: WdlValue): WdlValue = { - wdlValue match { - case wdlFile: WdlFile => - Try(getPath(wdlFile.valueString)) match { - case Success(gcsPath: NioGcsPath) => - WdlFile(localFilePathFromCloudStoragePath(workingDisk.mountPoint, gcsPath).toString, wdlFile.isGlob) - case Success(otherPath) => wdlValue - case Failure(e) => wdlValue + private def handlePreemption(runStatus: RunStatus.Preempted, jobReturnCode: Option[Int]): Future[ExecutionHandle] = { + import lenthall.numeric.IntegerUtil._ + + val errorCode: Int = runStatus.errorCode + val prettyPrintedError: String = runStatus.prettyPrintedError + previousRetryReasons match { + case Valid(PreviousRetryReasons(p, ur)) => + val thisPreemption = p + 1 + val taskName = s"${workflowDescriptor.id}:${call.unqualifiedName}" + val baseMsg = s"Task $taskName was preempted for the ${thisPreemption.toOrdinal} time." + + writeFuturePreemptedAndUnexpectedRetryCounts(thisPreemption, ur).map { _ => + if (thisPreemption < maxPreemption) { + // Increment preemption count and unexpectedRetryCount stays the same + val msg = s"""$baseMsg The call will be restarted with another preemptible VM (max preemptible attempts number is $maxPreemption). Error code $errorCode.$prettyPrintedError""".stripMargin + FailedRetryableExecutionHandle(StandardException(errorCode, msg, jobTag), jobReturnCode) + } + else { + val msg = s"""$baseMsg The maximum number of preemptible attempts ($maxPreemption) has been reached. The call will be restarted with a non-preemptible VM. Error code $errorCode.$prettyPrintedError)""".stripMargin + FailedRetryableExecutionHandle(StandardException(errorCode, msg, jobTag), jobReturnCode) + } } - case wdlArray: WdlArray => wdlArray map gcsPathToLocal - case wdlMap: WdlMap => wdlMap map { case (k, v) => gcsPathToLocal(k) -> gcsPathToLocal(v) } - case _ => wdlValue + case Invalid(_) => + Future.successful(FailedNonRetryableExecutionHandle(StandardException(errorCode, prettyPrintedError, jobTag), jobReturnCode)) } } - protected implicit def ec: ExecutionContext = context.dispatcher + override def mapCommandLineWdlFile(wdlFile: WdlFile): WdlFile = { + getPath(wdlFile.valueString) match { + case Success(gcsPath: GcsPath) => + val localPath = workingDisk.mountPoint.resolve(gcsPath.pathWithoutScheme).pathAsString + WdlFile(localPath, wdlFile.isGlob) + case _ => wdlFile + } + } } diff --git a/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/JesAttributes.scala b/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/JesAttributes.scala index fc10cff9b..73d681812 100644 --- a/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/JesAttributes.scala +++ b/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/JesAttributes.scala @@ -1,69 +1,118 @@ package cromwell.backend.impl.jes -import java.net.URL +import java.net.{URI, URL} -import com.typesafe.config.Config -import cromwell.backend.impl.jes.JesImplicits.GoogleAuthWorkflowOptions -import cromwell.core.{ErrorOr, WorkflowOptions} -import cromwell.filesystems.gcs.{GoogleAuthMode, GoogleConfiguration} -import lenthall.config.ScalaConfig._ -import lenthall.config.ValidatedConfig._ -import wdl4s.ExceptionWithErrors +import cats.data.Validated._ +import cats.syntax.cartesian._ +import cats.syntax.validated._ +import com.typesafe.config.{Config, ConfigValue} +import cromwell.backend.impl.jes.authentication.JesAuths +import cromwell.backend.impl.jes.callcaching.{CopyCachedOutputs, JesCacheHitDuplicationStrategy, UseOriginalCachedOutputs} +import cromwell.filesystems.gcs.GoogleConfiguration +import eu.timepit.refined.api.Refined +import eu.timepit.refined.numeric.Positive +import lenthall.exception.MessageAggregation +import lenthall.validation.ErrorOr._ +import lenthall.validation.Validation._ +import net.ceedubs.ficus.Ficus._ +import net.ceedubs.ficus.readers.{StringReader, ValueReader} +import org.slf4j.{Logger, LoggerFactory} -import scala.language.postfixOps -import scalaz.Scalaz._ -import scalaz.Validation.FlatMap._ -import scalaz._ +import scala.collection.JavaConverters._ case class JesAttributes(project: String, - genomicsAuth: GoogleAuthMode, - gcsFilesystemAuth: GoogleAuthMode, + computeServiceAccount: String, + auths: JesAuths, + restrictMetadataAccess: Boolean, executionBucket: String, endpointUrl: URL, - maxPollingInterval: Int) { - def genomicsCredential(options: WorkflowOptions) = genomicsAuth.credential(options.toGoogleAuthOptions) - def gcsCredential(options: WorkflowOptions) = gcsFilesystemAuth.credential(options.toGoogleAuthOptions) -} + maxPollingInterval: Int, + qps: Int Refined Positive, + duplicationStrategy: JesCacheHitDuplicationStrategy) object JesAttributes { + lazy val Logger = LoggerFactory.getLogger("JesAttributes") + + val GenomicsApiDefaultQps = 1000 private val jesKeys = Set( "project", "root", "maximum-polling-interval", + "genomics.compute-service-account", "dockerhub", + "dockerhub.account", + "dockerhub.token", "genomics", "filesystems", "genomics.auth", + "genomics.restrict-metadata-access", "genomics.endpoint-url", - "filesystems.gcs.auth" + "filesystems.gcs.auth", + "filesystems.gcs.caching.duplication-strategy", + "genomics-api-queries-per-100-seconds" + ) + + private val deprecatedJesKeys: Map[String, String] = Map( + "genomics.default-zones" -> "default-runtime-attributes.zones" ) private val context = "Jes" + implicit val urlReader: ValueReader[URL] = StringReader.stringValueReader.map { URI.create(_).toURL } + def apply(googleConfig: GoogleConfiguration, backendConfig: Config): JesAttributes = { - backendConfig.warnNotRecognized(jesKeys, context) - - val project: ErrorOr[String] = backendConfig.validateString("project") - val executionBucket: ErrorOr[String] = backendConfig.validateString("root") - val endpointUrl: ErrorOr[URL] = backendConfig.validateURL("genomics.endpoint-url") - val maxPollingInterval: Int = backendConfig.getIntOption("maximum-polling-interval").getOrElse(600) - val genomicsAuthName: ErrorOr[String] = backendConfig.validateString("genomics.auth") - val gcsFilesystemAuthName: ErrorOr[String] = backendConfig.validateString("filesystems.gcs.auth") - - (project |@| executionBucket |@| endpointUrl |@| genomicsAuthName |@| gcsFilesystemAuthName) { - (_, _, _, _, _) - } flatMap { case (p, b, u, genomicsName, gcsName) => - (googleConfig.auth(genomicsName) |@| googleConfig.auth(gcsName)) { case (genomicsAuth, gcsAuth) => - JesAttributes(p, genomicsAuth, gcsAuth, b, u, maxPollingInterval) + val configKeys = backendConfig.entrySet().asScala.toSet map { entry: java.util.Map.Entry[String, ConfigValue] => entry.getKey } + warnNotRecognized(configKeys, jesKeys, context, Logger) + + def warnDeprecated(keys: Set[String], deprecated: Map[String, String], context: String, logger: Logger) = { + val deprecatedKeys = keys.intersect(deprecated.keySet) + deprecatedKeys foreach { key => logger.warn(s"Found deprecated configuration key $key, replaced with ${deprecated.get(key)}") } + } + + warnDeprecated(configKeys, deprecatedJesKeys, context, Logger) + + val project: ErrorOr[String] = validate { backendConfig.as[String]("project") } + val executionBucket: ErrorOr[String] = validate { backendConfig.as[String]("root") } + val endpointUrl: ErrorOr[URL] = validate { backendConfig.as[URL]("genomics.endpoint-url") } + val maxPollingInterval: Int = backendConfig.as[Option[Int]]("maximum-polling-interval").getOrElse(600) + val computeServiceAccount: String = backendConfig.as[Option[String]]("genomics.compute-service-account").getOrElse("default") + val genomicsAuthName: ErrorOr[String] = validate { backendConfig.as[String]("genomics.auth") } + val genomicsRestrictMetadataAccess: ErrorOr[Boolean] = validate { backendConfig.as[Option[Boolean]]("genomics.restrict-metadata-access").getOrElse(false) } + val gcsFilesystemAuthName: ErrorOr[String] = validate { backendConfig.as[String]("filesystems.gcs.auth") } + val qpsValidation = validateQps(backendConfig) + val duplicationStrategy = validate { backendConfig.as[Option[String]]("filesystems.gcs.caching.duplication-strategy").getOrElse("copy") match { + case "copy" => CopyCachedOutputs + case "reference" => UseOriginalCachedOutputs + case other => throw new IllegalArgumentException(s"Unrecognized caching duplication strategy: $other. Supported strategies are copy and reference. See reference.conf for more details.") + } } + + + (project |@| executionBucket |@| endpointUrl |@| genomicsAuthName |@| genomicsRestrictMetadataAccess |@| gcsFilesystemAuthName |@| + qpsValidation |@| duplicationStrategy).tupled flatMap { + case (p, b, u, genomicsName, restrictMetadata, gcsName, qps, cachingStrategy) => + (googleConfig.auth(genomicsName) |@| googleConfig.auth(gcsName)) map { case (genomicsAuth, gcsAuth) => + JesAttributes(p, computeServiceAccount, JesAuths(genomicsAuth, gcsAuth), restrictMetadata, b, u, maxPollingInterval, qps, cachingStrategy) } } match { - case Success(r) => r - case Failure(f) => - throw new IllegalArgumentException with ExceptionWithErrors { - override val message = "Jes Configuration is not valid: Errors" - override val errors = f + case Valid(r) => r + case Invalid(f) => + throw new IllegalArgumentException with MessageAggregation { + override val exceptionContext = "Jes Configuration is not valid: Errors" + override val errorMessages = f.toList } } } + + def validateQps(config: Config): ErrorOr[Int Refined Positive] = { + import eu.timepit.refined._ + + val qp100s = config.as[Option[Int]]("genomics-api-queries-per-100-seconds").getOrElse(GenomicsApiDefaultQps) + val qpsCandidate = qp100s / 100 + + refineV[Positive](qpsCandidate) match { + case Left(_) => s"Calculated QPS for Google Genomics API ($qpsCandidate/s) was not a positive integer (supplied value was $qp100s per 100s)".invalidNel + case Right(refined) => refined.validNel + } + } } diff --git a/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/JesBackendInitializationData.scala b/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/JesBackendInitializationData.scala index 4ba914c34..8aeebe639 100644 --- a/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/JesBackendInitializationData.scala +++ b/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/JesBackendInitializationData.scala @@ -1,7 +1,14 @@ package cromwell.backend.impl.jes import com.google.api.services.genomics.Genomics -import cromwell.backend.BackendInitializationData +import com.google.auth.Credentials +import cromwell.backend.standard.{StandardInitializationData, StandardValidatedRuntimeAttributesBuilder} -case class JesBackendInitializationData(workflowPaths: JesWorkflowPaths, genomics: Genomics) - extends BackendInitializationData +case class JesBackendInitializationData +( + override val workflowPaths: JesWorkflowPaths, + override val runtimeAttributesBuilder: StandardValidatedRuntimeAttributesBuilder, + jesConfiguration: JesConfiguration, + gcsCredentials: Credentials, + genomics: Genomics +) extends StandardInitializationData(workflowPaths, runtimeAttributesBuilder, classOf[JesExpressionFunctions]) diff --git a/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/JesBackendLifecycleActorFactory.scala b/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/JesBackendLifecycleActorFactory.scala index 4028166cc..7476e3c79 100644 --- a/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/JesBackendLifecycleActorFactory.scala +++ b/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/JesBackendLifecycleActorFactory.scala @@ -1,103 +1,69 @@ package cromwell.backend.impl.jes -import java.nio.file.Path - -import akka.actor.{ActorRef, Props} -import com.typesafe.config.Config +import akka.actor.ActorRef import cromwell.backend._ -import cromwell.backend.callcaching.FileHashingActor.FileHashingFunction -import cromwell.backend.impl.jes.callcaching.JesBackendFileHashing -import cromwell.backend.validation.RuntimeAttributesKeys -import cromwell.core.Dispatcher.BackendDispatcher -import cromwell.core.{ExecutionStore, OutputStore} -import wdl4s.Call -import wdl4s.expression.WdlStandardLibraryFunctions +import cromwell.backend.impl.jes.callcaching.{JesBackendCacheHitCopyingActor, JesBackendFileHashingActor} +import cromwell.backend.standard._ +import cromwell.backend.standard.callcaching.{StandardCacheHitCopyingActor, StandardFileHashingActor} +import cromwell.core.CallOutputs +import wdl4s.wdl.WdlTaskCall -import scala.language.postfixOps +import scala.util.{Success, Try} +import cromwell.backend.impl.jes.JesBackendLifecycleActorFactory._ -case class JesBackendLifecycleActorFactory(configurationDescriptor: BackendConfigurationDescriptor) - extends BackendLifecycleActorFactory { - import JesBackendLifecycleActorFactory._ +case class JesBackendLifecycleActorFactory(name: String, configurationDescriptor: BackendConfigurationDescriptor) + extends StandardLifecycleActorFactory { - val jesConfiguration = new JesConfiguration(configurationDescriptor) + override lazy val initializationActorClass: Class[_ <: StandardInitializationActor] = classOf[JesInitializationActor] - override def workflowInitializationActorProps(workflowDescriptor: BackendWorkflowDescriptor, - calls: Seq[Call], - serviceRegistryActor: ActorRef): Option[Props] = { - Option(JesInitializationActor.props(workflowDescriptor, calls, jesConfiguration, serviceRegistryActor).withDispatcher(BackendDispatcher)) - } + override lazy val asyncExecutionActorClass: Class[_ <: StandardAsyncExecutionActor] = + classOf[JesAsyncBackendJobExecutionActor] - override def jobExecutionActorProps(jobDescriptor: BackendJobDescriptor, - initializationData: Option[BackendInitializationData], - serviceRegistryActor: ActorRef): Props = { - // The `JesInitializationActor` will only return a non-`Empty` `JesBackendInitializationData` from a successful `beforeAll` - // invocation, so the `get` here is safe. - JesJobExecutionActor.props(jobDescriptor, jesConfiguration, initializationData.toJes.get, serviceRegistryActor).withDispatcher(BackendDispatcher) - } + override lazy val finalizationActorClassOption: Option[Class[_ <: StandardFinalizationActor]] = + Option(classOf[JesFinalizationActor]) - override def cacheHitCopyingActorProps = Option(cacheHitCopyingActorInner _) + override lazy val jobIdKey: String = JesAsyncBackendJobExecutionActor.JesOperationIdKey - def cacheHitCopyingActorInner(jobDescriptor: BackendJobDescriptor, - initializationData: Option[BackendInitializationData], - serviceRegistryActor: ActorRef): Props = { - // The `JesInitializationActor` will only return a non-`Empty` `JesBackendInitializationData` from a successful `beforeAll` - // invocation, so the `get` here is safe. - JesCacheHitCopyingActor.props(jobDescriptor, jesConfiguration, initializationData.toJes.get, serviceRegistryActor).withDispatcher(BackendDispatcher) + val jesConfiguration = new JesConfiguration(configurationDescriptor) + + override def workflowInitializationActorParams(workflowDescriptor: BackendWorkflowDescriptor, ioActor: ActorRef, calls: Set[WdlTaskCall], + serviceRegistryActor: ActorRef, restart: Boolean): StandardInitializationActorParams = { + JesInitializationActorParams(workflowDescriptor, ioActor, calls, jesConfiguration, serviceRegistryActor, restart) } - override def workflowFinalizationActorProps(workflowDescriptor: BackendWorkflowDescriptor, - calls: Seq[Call], - executionStore: ExecutionStore, - outputStore: OutputStore, - initializationData: Option[BackendInitializationData]) = { + override def workflowFinalizationActorParams(workflowDescriptor: BackendWorkflowDescriptor, ioActor: ActorRef, calls: Set[WdlTaskCall], + jobExecutionMap: JobExecutionMap, workflowOutputs: CallOutputs, + initializationDataOption: Option[BackendInitializationData]): + StandardFinalizationActorParams = { // The `JesInitializationActor` will only return a non-`Empty` `JesBackendInitializationData` from a successful `beforeAll` // invocation. HOWEVER, the finalization actor is created regardless of whether workflow initialization was successful // or not. So the finalization actor must be able to handle an empty `JesBackendInitializationData` option, and there is no // `.get` on the initialization data as there is with the execution or cache hit copying actor methods. - Option(JesFinalizationActor.props(workflowDescriptor, calls, jesConfiguration, executionStore, outputStore, initializationData.toJes).withDispatcher(BackendDispatcher)) + JesFinalizationActorParams(workflowDescriptor, ioActor, calls, jesConfiguration, jobExecutionMap, workflowOutputs, + initializationDataOption) } - override def runtimeAttributeDefinitions(initializationDataOption: Option[BackendInitializationData]) = staticRuntimeAttributeDefinitions - - override def expressionLanguageFunctions(workflowDescriptor: BackendWorkflowDescriptor, - jobKey: BackendJobDescriptorKey, - initializationData: Option[BackendInitializationData]): WdlStandardLibraryFunctions = { - - val jesCallPaths = initializationData.toJes.get.workflowPaths.toJesCallPaths(jobKey) - new JesExpressionFunctions(List(jesCallPaths.gcsFileSystem), jesCallPaths.callContext) + override lazy val cacheHitCopyingActorClassOption: Option[Class[_ <: StandardCacheHitCopyingActor]] = { + Option(classOf[JesBackendCacheHitCopyingActor]) } - override def getExecutionRootPath(workflowDescriptor: BackendWorkflowDescriptor, backendConfig: Config, - initializationData: Option[BackendInitializationData]): Path = { - initializationData.toJes.get.workflowPaths.rootPath + override def backendSingletonActorProps = Option(JesBackendSingletonActor.props(jesConfiguration.qps)) + + override lazy val fileHashingActorClassOption: Option[Class[_ <: StandardFileHashingActor]] = Option(classOf[JesBackendFileHashingActor]) + + override def dockerHashCredentials(initializationData: Option[BackendInitializationData]) = { + Try(BackendInitializationData.as[JesBackendInitializationData](initializationData)) match { + case Success(jesData) => + val maybeDockerHubCredentials = jesData.jesConfiguration.dockerCredentials + val googleCredentials = Option(jesData.gcsCredentials) + List(maybeDockerHubCredentials, googleCredentials).flatten + case _ => List.empty[Any] + } } - - override lazy val fileHashingFunction: Option[FileHashingFunction] = Option(FileHashingFunction(JesBackendFileHashing.getCrc32c)) + override val requestedKeyValueStoreKeys: Seq[String] = Seq(preemptionCountKey, unexpectedRetryCountKey) } object JesBackendLifecycleActorFactory { - implicit class Jessify(val genericInitializationData: Option[BackendInitializationData]) { - // This leaves the result in an `Option` as finalization will be called even if initialization has failed, and if - // initialization fails there won't be any initialization data. The various `.get`s that occur below are in instances - // where the workflow has successfully gotten past initialization and the JES initialization data is defined. - def toJes: Option[JesBackendInitializationData] = genericInitializationData collectFirst { case d: JesBackendInitializationData => d } - } - - val staticRuntimeAttributeDefinitions = { - import RuntimeAttributesKeys._ - import JesRuntimeAttributes._ - - Set( - RuntimeAttributeDefinition(DockerKey, None, usedInCallCaching = true), - RuntimeAttributeDefinition(ContinueOnReturnCodeKey, Option(staticDefaults(ContinueOnReturnCodeKey)), usedInCallCaching = true), - RuntimeAttributeDefinition(CpuKey, Option(staticDefaults(CpuKey)), usedInCallCaching = false), - RuntimeAttributeDefinition(FailOnStderrKey, Option(staticDefaults(FailOnStderrKey)), usedInCallCaching = true), - RuntimeAttributeDefinition(MemoryKey, Option(staticDefaults(MemoryKey)), usedInCallCaching = false), - RuntimeAttributeDefinition(DisksKey, Option(staticDefaults(DisksKey)), usedInCallCaching = false), - RuntimeAttributeDefinition(ZonesKey, Option(staticDefaults(ZonesKey)), usedInCallCaching = false), - RuntimeAttributeDefinition(PreemptibleKey, Option(staticDefaults(PreemptibleKey)), usedInCallCaching = false), - RuntimeAttributeDefinition(BootDiskSizeKey, Option(staticDefaults(BootDiskSizeKey)), usedInCallCaching = false), - RuntimeAttributeDefinition(NoAddressKey, Option(staticDefaults(NoAddressKey)), usedInCallCaching = false) - ) - } + val preemptionCountKey = "PreemptionCount" + val unexpectedRetryCountKey = "UnexpectedRetryCount" } diff --git a/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/JesBackendSingletonActor.scala b/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/JesBackendSingletonActor.scala new file mode 100644 index 000000000..f1a5eda4a --- /dev/null +++ b/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/JesBackendSingletonActor.scala @@ -0,0 +1,23 @@ +package cromwell.backend.impl.jes + +import akka.actor.{Actor, ActorLogging, Props} +import cromwell.core.Dispatcher.BackendDispatcher +import cromwell.backend.impl.jes.statuspolling.JesApiQueryManager +import cromwell.backend.impl.jes.statuspolling.JesApiQueryManager.JesApiQueryManagerRequest +import eu.timepit.refined.api.Refined +import eu.timepit.refined.numeric.Positive + +final case class JesBackendSingletonActor(qps: Int Refined Positive) extends Actor with ActorLogging { + + val jesApiQueryManager = context.actorOf(JesApiQueryManager.props(qps)) + + override def receive = { + case apiQuery: JesApiQueryManagerRequest => + log.debug("Forwarding API query to JES API query manager actor") + jesApiQueryManager.forward(apiQuery) + } +} + +object JesBackendSingletonActor { + def props(qps: Int Refined Positive): Props = Props(JesBackendSingletonActor(qps)).withDispatcher(BackendDispatcher) +} diff --git a/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/JesCacheHitCopyingActor.scala b/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/JesCacheHitCopyingActor.scala deleted file mode 100644 index e24d8627d..000000000 --- a/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/JesCacheHitCopyingActor.scala +++ /dev/null @@ -1,31 +0,0 @@ -package cromwell.backend.impl.jes - -import java.nio.file.Path - -import akka.actor.{ActorRef, Props} -import cromwell.backend.callcaching.CacheHitDuplicating -import cromwell.backend.{BackendCacheHitCopyingActor, BackendJobDescriptor} -import cromwell.core.PathCopier -import cromwell.core.logging.JobLogging - -case class JesCacheHitCopyingActor(override val jobDescriptor: BackendJobDescriptor, - jesConfiguration: JesConfiguration, - initializationData: JesBackendInitializationData, - serviceRegistryActor: ActorRef) - extends BackendCacheHitCopyingActor with CacheHitDuplicating with JesJobCachingActorHelper with JobLogging { - override protected def duplicate(source: Path, destination: Path) = PathCopier.copy(source, destination) - - override protected def destinationCallRootPath = jesCallPaths.callRootPath - - override protected def destinationJobDetritusPaths = jesCallPaths.detritusPaths -} - -object JesCacheHitCopyingActor { - - def props(jobDescriptor: BackendJobDescriptor, - jesConfiguration: JesConfiguration, - initializationData: JesBackendInitializationData, - serviceRegistryActor: ActorRef): Props = { - Props(new JesCacheHitCopyingActor(jobDescriptor, jesConfiguration, initializationData, serviceRegistryActor)) - } -} diff --git a/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/JesCallPaths.scala b/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/JesCallPaths.scala deleted file mode 100644 index f18daecaa..000000000 --- a/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/JesCallPaths.scala +++ /dev/null @@ -1,82 +0,0 @@ -package cromwell.backend.impl.jes - -import java.nio.file.Path - -import cromwell.backend.impl.jes.authentication.JesCredentials -import cromwell.backend.io.JobPaths -import cromwell.backend.io.JobPaths._ -import cromwell.backend.{BackendJobDescriptorKey, BackendWorkflowDescriptor} -import cromwell.core.CallContext -import cromwell.services.metadata.CallMetadataKeys - -import scala.concurrent.ExecutionContext - -object JesCallPaths { - def apply(jobKey: BackendJobDescriptorKey, workflowDescriptor: BackendWorkflowDescriptor, - jesConfiguration: JesConfiguration, - credentials: JesCredentials)(implicit ec: ExecutionContext): JesCallPaths = { - new JesCallPaths(jobKey, workflowDescriptor, jesConfiguration, credentials) - } - - val JesLogPathKey = "jesLog" - val GcsExecPathKey = "gcsExec" -} - -class JesCallPaths(jobKey: BackendJobDescriptorKey, workflowDescriptor: BackendWorkflowDescriptor, - jesConfiguration: JesConfiguration, - credentials: JesCredentials)(implicit ec: ExecutionContext) extends - JesWorkflowPaths(workflowDescriptor, jesConfiguration, credentials)(ec) { - - val jesLogBasename = { - val index = jobKey.index.map(s => s"-$s").getOrElse("") - s"${jobKey.scope.unqualifiedName}$index" - } - - val callRootPath: Path = { - val callName = jobKey.call.fullyQualifiedName.split('.').last - val call = s"$CallPrefix-$callName" - val shard = jobKey.index map { s => s"$ShardPrefix-$s" } getOrElse "" - val retry = if (jobKey.attempt > 1) s"$AttemptPrefix-${jobKey.attempt}" else "" - - List(call, shard, retry).foldLeft(workflowRootPath)((path, dir) => path.resolve(dir)) - } - - val returnCodeFilename: String = s"$jesLogBasename-rc.txt" - val stdoutFilename: String = s"$jesLogBasename-stdout.log" - val stderrFilename: String = s"$jesLogBasename-stderr.log" - val jesLogFilename: String = s"$jesLogBasename.log" - val gcsExecFilename: String = "exec.sh" - - lazy val returnCodePath: Path = callRootPath.resolve(returnCodeFilename) - lazy val stdoutPath: Path = callRootPath.resolve(stdoutFilename) - lazy val stderrPath: Path = callRootPath.resolve(stderrFilename) - lazy val jesLogPath: Path = callRootPath.resolve(jesLogFilename) - lazy val gcsExecPath: Path = callRootPath.resolve(gcsExecFilename) - lazy val callContext = CallContext(callRootPath, stdoutFilename, stderrFilename) - - /* - TODO: Move various monitoring files path generation here. - - "/cromwell_root" is a well known path, called in the regular JobPaths callDockerRoot. - This JesCallPaths should know about that root, and be able to create the monitoring file paths. - Instead of the AsyncActor creating the paths, the paths could then be shared with the CachingActor. - - Those monitoring paths could then be returned by metadataFiles and detritusFiles. - */ - - lazy val metadataPaths: Map[String, Path] = Map( - CallMetadataKeys.CallRoot -> callRootPath, - CallMetadataKeys.Stdout -> stdoutPath, - CallMetadataKeys.Stderr -> stderrPath, - CallMetadataKeys.BackendLogsPrefix + ":log" -> jesLogPath - ) - - lazy val detritusPaths: Map[String, Path] = Map( - JobPaths.CallRootPathKey -> callRootPath, - JesCallPaths.GcsExecPathKey -> gcsExecPath, - JesCallPaths.JesLogPathKey -> jesLogPath, - JobPaths.StdoutPathKey -> stdoutPath, - JobPaths.StdErrPathKey -> stderrPath, - JobPaths.ReturnCodePathKey -> returnCodePath - ) -} diff --git a/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/JesConfiguration.scala b/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/JesConfiguration.scala index 6657250b0..f0e863da6 100644 --- a/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/JesConfiguration.scala +++ b/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/JesConfiguration.scala @@ -2,14 +2,21 @@ package cromwell.backend.impl.jes import cromwell.backend.BackendConfigurationDescriptor import cromwell.backend.impl.jes.authentication.JesDockerCredentials -import cromwell.core.DockerConfiguration -import cromwell.filesystems.gcs.GoogleConfiguration +import cromwell.core.BackendDockerConfiguration +import cromwell.filesystems.gcs.{GcsPathBuilderFactory, GoogleConfiguration} class JesConfiguration(val configurationDescriptor: BackendConfigurationDescriptor) { - val root = configurationDescriptor.backendConfig.getString("root") val googleConfig = GoogleConfiguration(configurationDescriptor.globalConfig) + + val root = configurationDescriptor.backendConfig.getString("root") + val runtimeConfig = configurationDescriptor.backendRuntimeConfig val jesAttributes = JesAttributes(googleConfig, configurationDescriptor.backendConfig) - val dockerCredentials = DockerConfiguration.build(configurationDescriptor.backendConfig).dockerCredentials map JesDockerCredentials.apply - val needAuthFileUpload = jesAttributes.gcsFilesystemAuth.requiresAuthFile || dockerCredentials.isDefined + val jesAuths = jesAttributes.auths + val jesComputeServiceAccount = jesAttributes.computeServiceAccount + val gcsPathBuilderFactory = GcsPathBuilderFactory(jesAuths.gcs, googleConfig.applicationName) + val genomicsFactory = GenomicsFactory(googleConfig.applicationName, jesAuths.genomics, jesAttributes.endpointUrl) + val dockerCredentials = BackendDockerConfiguration.build(configurationDescriptor.backendConfig).dockerCredentials map JesDockerCredentials.apply + val needAuthFileUpload = jesAuths.gcs.requiresAuthFile || dockerCredentials.isDefined || jesAttributes.restrictMetadataAccess + val qps = jesAttributes.qps } diff --git a/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/JesExpressionFunctions.scala b/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/JesExpressionFunctions.scala index 823108ca8..aed500f22 100644 --- a/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/JesExpressionFunctions.scala +++ b/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/JesExpressionFunctions.scala @@ -1,42 +1,17 @@ package cromwell.backend.impl.jes -import java.nio.file.{FileSystem, Path} - -import better.files._ -import cromwell.backend.wdl.{PureFunctions, ReadLikeFunctions, WriteFunctions} -import cromwell.backend.impl.jes.JesImplicits.PathString -import cromwell.core.CallContext -import cromwell.filesystems.gcs.GcsFileSystem -import wdl4s.expression.WdlStandardLibraryFunctions -import wdl4s.values._ - -import scala.language.postfixOps -import scala.util.{Success, Try} - -class JesExpressionFunctions(override val fileSystems: List[FileSystem], - context: CallContext - ) extends WdlStandardLibraryFunctions with PureFunctions with ReadLikeFunctions with WriteFunctions { - import JesExpressionFunctions.EnhancedPath - - private def globDirectory(glob: String): String = s"glob-${glob.md5Sum}/" - - override def globPath(glob: String): String = context.root.resolve(globDirectory(glob)).toString - - override def glob(path: String, pattern: String): Seq[String] = { - File(path.toAbsolutePath(fileSystems).asDirectory). - glob("**/*") map { _.pathAsString } filterNot { _.toString == path } toSeq - } - - override def preMapping(str: String): String = if (!GcsFileSystem.isAbsoluteGcsPath(str)) context.root.resolve(str).toString else str - - override def stdout(params: Seq[Try[WdlValue]]) = Success(WdlFile(context.stdout)) - override def stderr(params: Seq[Try[WdlValue]]) = Success(WdlFile(context.stderr)) - - override val writeDirectory: Path = context.root -} - -object JesExpressionFunctions { - implicit class EnhancedPath(val path: Path) extends AnyVal { - def asDirectory = path.toString.toDirectory(path.getFileSystem) +import cromwell.backend.standard.{StandardExpressionFunctions, StandardExpressionFunctionsParams} +import cromwell.filesystems.gcs.GcsPathBuilder +import cromwell.filesystems.gcs.GcsPathBuilder.{InvalidGcsPath, PossiblyValidRelativeGcsPath, ValidFullGcsPath} + +class JesExpressionFunctions(standardParams: StandardExpressionFunctionsParams) + extends StandardExpressionFunctions(standardParams) { + + override def preMapping(str: String) = { + GcsPathBuilder.validateGcsPath(str) match { + case ValidFullGcsPath => str + case PossiblyValidRelativeGcsPath => callContext.root.resolve(str.stripPrefix("/")).pathAsString + case invalid: InvalidGcsPath => throw new IllegalArgumentException(invalid.errorMessage) + } } } diff --git a/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/JesFinalizationActor.scala b/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/JesFinalizationActor.scala index 9b50bb8e2..dc03764ac 100644 --- a/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/JesFinalizationActor.scala +++ b/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/JesFinalizationActor.scala @@ -1,84 +1,49 @@ package cromwell.backend.impl.jes -import java.nio.file.Path - -import akka.actor.Props -import better.files._ -import cromwell.backend.{BackendJobDescriptorKey, BackendWorkflowDescriptor, BackendWorkflowFinalizationActor} -import cromwell.core.Dispatcher.IoDispatcher -import cromwell.core.{ExecutionStore, OutputStore, PathCopier} -import wdl4s.Call +import akka.actor.ActorRef +import cromwell.backend._ +import cromwell.backend.standard.{StandardFinalizationActor, StandardFinalizationActorParams} +import cromwell.core.CallOutputs +import cromwell.core.io.AsyncIo +import cromwell.filesystems.gcs.batch.GcsBatchCommandBuilder +import wdl4s.wdl.WdlTaskCall import scala.concurrent.Future -object JesFinalizationActor { - def props(workflowDescriptor: BackendWorkflowDescriptor, calls: Seq[Call], jesConfiguration: JesConfiguration, - executionStore: ExecutionStore, outputStore: OutputStore, initializationData: Option[JesBackendInitializationData]) = { - Props(new JesFinalizationActor(workflowDescriptor, calls, jesConfiguration, executionStore, outputStore, initializationData)) - } +case class JesFinalizationActorParams +( + workflowDescriptor: BackendWorkflowDescriptor, + ioActor: ActorRef, + calls: Set[WdlTaskCall], + jesConfiguration: JesConfiguration, + jobExecutionMap: JobExecutionMap, + workflowOutputs: CallOutputs, + initializationDataOption: Option[BackendInitializationData] +) extends StandardFinalizationActorParams { + override val configurationDescriptor: BackendConfigurationDescriptor = jesConfiguration.configurationDescriptor } -class JesFinalizationActor (override val workflowDescriptor: BackendWorkflowDescriptor, - override val calls: Seq[Call], - jesConfiguration: JesConfiguration, executionStore: ExecutionStore, - outputStore: OutputStore, - initializationData: Option[JesBackendInitializationData]) extends BackendWorkflowFinalizationActor { - - override val configurationDescriptor = jesConfiguration.configurationDescriptor +class JesFinalizationActor(val jesParams: JesFinalizationActorParams) + extends StandardFinalizationActor(jesParams) with AsyncIo with GcsBatchCommandBuilder { - private val workflowPaths = initializationData.map { _.workflowPaths } - - private val iOExecutionContext = context.system.dispatchers.lookup(IoDispatcher) + lazy val jesConfiguration: JesConfiguration = jesParams.jesConfiguration + + override def receive = ioReceive orElse super.receive override def afterAll(): Future[Unit] = { for { // NOTE: These are currently in series, not in parallel. Not sure how many threads to throw at finalization _ <- deleteAuthenticationFile() - _ <- copyCallOutputs() + _ <- super.afterAll() } yield () } private def deleteAuthenticationFile(): Future[Unit] = { (jesConfiguration.needAuthFileUpload, workflowPaths) match { - case (true, Some(paths)) => Future(File(paths.gcsAuthFilePath).delete(false)) map { _ => () } - case _ => Future.successful(()) - } - } - - private def copyCallOutputs(): Future[Unit] = { - /* - NOTE: Only using one thread pool slot here to upload all the files for all the calls. - Using the io-dispatcher defined in application.conf because this might take a while. - One could also use Future.sequence to flood the dispatcher, or even create a separate jes final call specific thread - pool for parallel uploads. - - Measure and optimize as necessary. Will likely need retry code at some level as well. - */ - workflowPaths match { - case Some(paths) => Future(paths.finalCallLogsPath foreach copyCallOutputs)(iOExecutionContext) + case (true, Some(paths: JesWorkflowPaths)) => deleteAsync(paths.gcsAuthFilePath) case _ => Future.successful(()) } } - private def copyCallOutputs(callLogsPath: Path): Unit = { - copyLogs(callLogsPath, logPaths) - } - - private lazy val logPaths: Seq[Path] = { - val allCallPaths = executionStore.store.toSeq collect { - case (backendJobDescriptorKey: BackendJobDescriptorKey, _) => - initializationData map { _.workflowPaths.toJesCallPaths(backendJobDescriptorKey) } - } - - allCallPaths.flatten flatMap { callPaths => - Seq(callPaths.stdoutPath, callPaths.stderrPath, callPaths.jesLogPath) - } - } - - private def copyLogs(callLogsDirPath: Path, logPaths: Seq[Path]): Unit = { - workflowPaths match { - case Some(paths) => logPaths.foreach(PathCopier.copy(paths.rootPath, _, callLogsDirPath)) - case None => - } - } + override def ioActor: ActorRef = jesParams.ioActor } diff --git a/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/JesImplicits.scala b/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/JesImplicits.scala deleted file mode 100644 index 6c722c756..000000000 --- a/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/JesImplicits.scala +++ /dev/null @@ -1,41 +0,0 @@ -package cromwell.backend.impl.jes - -import java.nio.file.{FileSystem, Path} - -import cromwell.core.{PathFactory, WorkflowOptions} -import cromwell.filesystems.gcs.GoogleAuthMode.GoogleAuthOptions -import cromwell.filesystems.gcs.{GcsFileSystem, GoogleAuthMode} - -import scala.util.Try - -object JesImplicits { - implicit class GoogleAuthWorkflowOptions(val workflowOptions: WorkflowOptions) extends AnyVal { - def toGoogleAuthOptions: GoogleAuthMode.GoogleAuthOptions = new GoogleAuthOptions { - override def get(key: String): Try[String] = workflowOptions.get(key) - } - } - - object PathBuilder extends PathFactory - - implicit class PathString(val str: String) extends AnyVal { - def isGcsUrl: Boolean = str.startsWith("gs://") - def isUriWithProtocol: Boolean = "^[a-z]+://".r.findFirstIn(str).nonEmpty - - def toPath(fss: List[FileSystem]): Path = PathBuilder.buildPath(str, fss) - def toPath(fs: FileSystem): Path = str.toPath(List(fs)) - - def toAbsolutePath(fss: List[FileSystem]): Path = str.toPath(fss).toAbsolutePath - def toAbsolutePath(fs: FileSystem): Path = str.toAbsolutePath(List(fs)) - - def toDirectory(fss: List[FileSystem]): Path = buildPathAsDirectory(str, fss) - def toDirectory(fs: FileSystem): Path = str.toDirectory(List(fs)) - - // TODO this needs to go away because it's gcs specific. Replacing gcs FS with google implementation (when available) will take care of it - private def buildPathAsDirectory(rawString: String, fileSystems: List[FileSystem]): Path = { - PathBuilder.findFileSystem(rawString, fileSystems, { - case fs: GcsFileSystem => Try(fs.getPathAsDirectory(rawString)) - case fs => Try(fs.getPath(rawString)) - }) - } - } -} diff --git a/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/JesInitializationActor.scala b/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/JesInitializationActor.scala index 04ac8f70d..4586762b1 100644 --- a/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/JesInitializationActor.scala +++ b/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/JesInitializationActor.scala @@ -1,130 +1,134 @@ package cromwell.backend.impl.jes import java.io.IOException +import java.nio.file.StandardOpenOption -import akka.actor.{ActorRef, Props} +import akka.actor.ActorRef +import akka.http.scaladsl.model.StatusCodes import com.google.api.services.genomics.Genomics -import cromwell.backend.impl.jes.JesInitializationActor._ -import cromwell.backend.impl.jes.authentication.{GcsLocalizing, JesAuthInformation, JesCredentials} -import cromwell.backend.impl.jes.io._ -import cromwell.backend.validation.RuntimeAttributesDefault -import cromwell.backend.validation.RuntimeAttributesKeys._ -import cromwell.backend.{BackendInitializationData, BackendWorkflowDescriptor, BackendWorkflowInitializationActor} -import cromwell.core.Dispatcher.IoDispatcher -import cromwell.core.WorkflowOptions -import cromwell.core.retry.Retry -import cromwell.filesystems.gcs.{ClientSecrets, GoogleAuthMode} -import spray.json.JsObject -import wdl4s.types.{WdlBooleanType, WdlFloatType, WdlIntegerType, WdlStringType} -import wdl4s.values.WdlValue -import wdl4s.{Call, WdlExpression} +import com.google.auth.Credentials +import com.google.cloud.storage.StorageException +import com.google.cloud.storage.contrib.nio.CloudStorageOptions +import cromwell.backend.impl.jes.authentication.{GcsLocalizing, JesAuthObject, JesDockerCredentials} +import cromwell.backend.standard.{StandardInitializationActor, StandardInitializationActorParams, StandardValidatedRuntimeAttributesBuilder} +import cromwell.backend.{BackendConfigurationDescriptor, BackendInitializationData, BackendWorkflowDescriptor} +import cromwell.core.CromwellFatalException +import cromwell.core.io.AsyncIo +import cromwell.filesystems.gcs.auth.{ClientSecrets, GoogleAuthMode} +import cromwell.filesystems.gcs.batch.GcsBatchCommandBuilder +import spray.json.{JsObject, JsTrue} +import wdl4s.wdl.WdlTaskCall import scala.concurrent.Future -import scala.util.Try -object JesInitializationActor { - val SupportedKeys = Set(CpuKey, MemoryKey, DockerKey, FailOnStderrKey, ContinueOnReturnCodeKey, JesRuntimeAttributes.ZonesKey, - JesRuntimeAttributes.PreemptibleKey, JesRuntimeAttributes.BootDiskSizeKey, JesRuntimeAttributes.DisksKey) - - def props(workflowDescriptor: BackendWorkflowDescriptor, - calls: Seq[Call], - jesConfiguration: JesConfiguration, - serviceRegistryActor: ActorRef): Props = - Props(new JesInitializationActor(workflowDescriptor, calls, jesConfiguration, serviceRegistryActor: ActorRef)) +case class JesInitializationActorParams +( + workflowDescriptor: BackendWorkflowDescriptor, + ioActor: ActorRef, + calls: Set[WdlTaskCall], + jesConfiguration: JesConfiguration, + serviceRegistryActor: ActorRef, + restarting: Boolean +) extends StandardInitializationActorParams { + override val configurationDescriptor: BackendConfigurationDescriptor = jesConfiguration.configurationDescriptor } -class JesInitializationActor(override val workflowDescriptor: BackendWorkflowDescriptor, - override val calls: Seq[Call], - private[jes] val jesConfiguration: JesConfiguration, - override val serviceRegistryActor: ActorRef) - extends BackendWorkflowInitializationActor { +class JesInitializationActor(jesParams: JesInitializationActorParams) + extends StandardInitializationActor(jesParams) with AsyncIo with GcsBatchCommandBuilder { - override protected def runtimeAttributeValidators: Map[String, (Option[WdlValue]) => Boolean] = Map( - CpuKey -> wdlTypePredicate(valueRequired = false, WdlIntegerType.isCoerceableFrom), - MemoryKey -> wdlTypePredicate(valueRequired = false, WdlStringType.isCoerceableFrom), - DockerKey -> wdlTypePredicate(valueRequired = true, WdlStringType.isCoerceableFrom), - FailOnStderrKey -> wdlTypePredicate(valueRequired = false, WdlBooleanType.isCoerceableFrom), - ContinueOnReturnCodeKey -> continueOnReturnCodePredicate(valueRequired = false), - JesRuntimeAttributes.PreemptibleKey -> wdlTypePredicate(valueRequired = false, WdlIntegerType.isCoerceableFrom), - JesRuntimeAttributes.BootDiskSizeKey -> wdlTypePredicate(valueRequired = false, WdlFloatType.isCoerceableFrom), + override lazy val ioActor = jesParams.ioActor + private val jesConfiguration = jesParams.jesConfiguration + private val workflowOptions = workflowDescriptor.workflowOptions + implicit private val system = context.system - // TODO (eventually): make these more appropriate pre-checks - JesRuntimeAttributes.ZonesKey -> wdlTypePredicate(valueRequired = false, WdlStringType.isCoerceableFrom), - JesRuntimeAttributes.DisksKey -> wdlTypePredicate(valueRequired = false, WdlStringType.isCoerceableFrom)) + context.become(ioReceive orElse receive) - override val configurationDescriptor = jesConfiguration.configurationDescriptor + override lazy val runtimeAttributesBuilder: StandardValidatedRuntimeAttributesBuilder = + JesRuntimeAttributes.runtimeAttributesBuilder(jesConfiguration) - private[jes] lazy val refreshTokenAuth: Option[JesAuthInformation] = { + // From the gcs auth and the workflow options, optionally builds a GcsLocalizing that contains + // the information (client Id/Secrets + refresh token) that will be uploaded to Gcs before the workflow start + private[jes] lazy val refreshTokenAuth: Option[JesAuthObject] = { for { - clientSecrets <- List(jesConfiguration.jesAttributes.gcsFilesystemAuth) collectFirst { case s: ClientSecrets => s } + clientSecrets <- List(jesConfiguration.jesAttributes.auths.gcs) collectFirst { case s: ClientSecrets => s } token <- workflowDescriptor.workflowOptions.get(GoogleAuthMode.RefreshTokenOptionKey).toOption } yield GcsLocalizing(clientSecrets, token) } - private val iOExecutionContext = context.system.dispatchers.lookup(IoDispatcher) + // Credentials object for the GCS API + private lazy val gcsCredentials: Future[Credentials] = + jesConfiguration.jesAttributes.auths.gcs.credential(workflowOptions) + // Credentials object for the Genomics API + private lazy val genomicsCredentials: Future[Credentials] = + jesConfiguration.jesAttributes.auths.genomics.credential(workflowOptions) - override protected def coerceDefaultRuntimeAttributes(options: WorkflowOptions): Try[Map[String, WdlValue]] = { - RuntimeAttributesDefault.workflowOptionsDefault(options, JesRuntimeAttributes.coercionMap) + // Genomics object to access the Genomics API + private lazy val genomics: Future[Genomics] = { + genomicsCredentials map jesConfiguration.genomicsFactory.fromCredentials } - /** - * A call which happens before anything else runs - */ - override def beforeAll(): Future[Option[BackendInitializationData]] = { + override lazy val workflowPaths: Future[JesWorkflowPaths] = for { + gcsCred <- gcsCredentials + genomicsCred <- genomicsCredentials + } yield new JesWorkflowPaths(workflowDescriptor, gcsCred, genomicsCred, jesConfiguration) - val genomicsCredential = jesConfiguration.jesAttributes.genomicsCredential(workflowDescriptor.workflowOptions) - val gcsCredential = jesConfiguration.jesAttributes.gcsCredential(workflowDescriptor.workflowOptions) + override lazy val initializationData: Future[JesBackendInitializationData] = for { + jesWorkflowPaths <- workflowPaths + gcsCreds <- gcsCredentials + genomicsFactory <- genomics + } yield JesBackendInitializationData(jesWorkflowPaths, runtimeAttributesBuilder, jesConfiguration, gcsCreds, genomicsFactory) - val jesCredentials = JesCredentials(genomicsCredential = genomicsCredential, gcsCredential = gcsCredential) - def buildGenomics: Future[Genomics] = Future { - GenomicsFactory(jesConfiguration.googleConfig.applicationName, genomicsCredential, jesConfiguration.jesAttributes.endpointUrl) + override def beforeAll(): Future[Option[BackendInitializationData]] = { + def fileUpload(paths: JesWorkflowPaths): Future[Unit] = { + writeAuthenticationFile(paths, jesConfiguration.jesAttributes.restrictMetadataAccess, jesConfiguration.dockerCredentials) recoverWith { + case CromwellFatalException(e: StorageException) if e.getCode == StatusCodes.PreconditionFailed.intValue && jesParams.restarting => + workflowLogger.debug(s"Authentication file already exists but this is a restart, proceeding.") + Future.successful(()) + case CromwellFatalException(e: StorageException) if e.getCode == StatusCodes.PreconditionFailed.intValue => + Future.failed(new IOException(s"Failed to upload authentication file:" + + " there was already a file at the same location and this workflow was not being restarted.")) + case failure => Future.failed(new IOException(s"Failed to upload authentication file", failure)) + } } for { - // generate single filesystem and genomics instances - genomics <- buildGenomics - workflowPaths = new JesWorkflowPaths(workflowDescriptor, jesConfiguration, jesCredentials)(iOExecutionContext) - _ <- if (jesConfiguration.needAuthFileUpload) writeAuthenticationFile(workflowPaths) else Future.successful(()) - _ = publishWorkflowRoot(workflowPaths.workflowRootPath.toString) - } yield Option(JesBackendInitializationData(workflowPaths, genomics)) + paths <- workflowPaths + _ = publishWorkflowRoot(paths.workflowRoot.pathAsString) + _ <- if (jesConfiguration.needAuthFileUpload) fileUpload(paths) else Future.successful(()) + data <- initializationData + } yield Option(data) } - private def writeAuthenticationFile(workflowPath: JesWorkflowPaths): Future[Unit] = { - generateAuthJson(jesConfiguration.dockerCredentials, refreshTokenAuth) map { content => + private def writeAuthenticationFile(workflowPath: JesWorkflowPaths, + restrictMetadataAccess: Boolean, + dockerCredentials: Option[JesDockerCredentials]): Future[Unit] = { + val authObjects = List(dockerCredentials, refreshTokenAuth).flatten + generateAuthJson(authObjects, restrictMetadataAccess) map { content => val path = workflowPath.gcsAuthFilePath - val upload = () => Future(path.writeAsJson(content)) - - workflowLogger.info(s"Creating authentication file for workflow ${workflowDescriptor.id} at \n ${path.toString}") - Retry.withRetry(upload, isFatal = isFatalJesException, isTransient = isTransientJesException)(context.system) map { _ => () } recoverWith { - case failure => Future.failed(new IOException("Failed to upload authentication file", failure)) - } + workflowLogger.info(s"Creating authentication file for workflow ${workflowDescriptor.id} at \n $path") + val openOptions = Seq( + CloudStorageOptions.withMimeType("application/json"), + // Will fail if the file already exists - + // In case of a restart this avoids rewriting the file as the JES job might be trying to localize it at the same time + StandardOpenOption.CREATE_NEW + ) + writeAsync(path, content, openOptions) } getOrElse Future.successful(()) } - def generateAuthJson(authInformation: Option[JesAuthInformation]*) = { - authInformation.flatten map { _.toMap } match { - case Nil => None - case jsons => - val authsValues = jsons.reduce(_ ++ _) mapValues JsObject.apply - Option(JsObject("auths" -> JsObject(authsValues)).prettyPrint) + def generateAuthJson(authObjects: List[JesAuthObject], restrictMetadataAccess: Boolean): Option[String] = { + def generateAuthObject(): Map[String, JsObject] = { + if (authObjects.nonEmpty) { + val authObjectMaps = authObjects map { _.toMap } + Map("auths" -> JsObject(authObjectMaps.reduce(_ ++ _) map { case (k, v) => k -> JsObject.apply(v) })) + } else Map.empty } - } - /** - * Validate that this WorkflowBackendActor can run all of the calls that it's been assigned - */ - override def validate(): Future[Unit] = { - Future { - calls foreach { call => - val runtimeAttributes = call.task.runtimeAttributes.attrs - val notSupportedAttributes = runtimeAttributes filterKeys { !SupportedKeys.contains(_) } - - if (notSupportedAttributes.nonEmpty) { - val notSupportedAttrString = notSupportedAttributes.keys mkString ", " - workflowLogger.warn(s"Key/s [$notSupportedAttrString] is/are not supported by JesBackend. Unsupported attributes will not be part of jobs executions.") - } - } - } + val authMap = generateAuthObject() + val jsonMap = if (restrictMetadataAccess) authMap ++ Map("restrictMetadataAccess" -> JsTrue) else authMap + + if (jsonMap.nonEmpty) Option(JsObject(jsonMap).prettyPrint) + else None } } diff --git a/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/JesJobCachingActorHelper.scala b/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/JesJobCachingActorHelper.scala index acfa8c6b2..f913047be 100644 --- a/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/JesJobCachingActorHelper.scala +++ b/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/JesJobCachingActorHelper.scala @@ -1,84 +1,93 @@ package cromwell.backend.impl.jes -import java.nio.file.Path - -import akka.actor.{Actor, ActorRef} -import better.files._ -import cromwell.backend.callcaching.JobCachingActorHelper -import cromwell.backend.impl.jes.JesAsyncBackendJobExecutionActor.WorkflowOptionKeys +import akka.actor.Actor import cromwell.backend.impl.jes.io.{JesAttachedDisk, JesWorkingDisk} +import cromwell.backend.standard.StandardCachingActorHelper +import cromwell.core.labels.Labels import cromwell.core.logging.JobLogging +import cromwell.core.path.Path +import cromwell.services.metadata.CallMetadataKeys import scala.language.postfixOps -trait JesJobCachingActorHelper extends JobCachingActorHelper { +trait JesJobCachingActorHelper extends StandardCachingActorHelper { this: Actor with JobLogging => val ExecParamName = "exec" val MonitoringParamName = "monitoring" - val JesMonitoringScript = JesWorkingDisk.MountPoint.resolve("monitoring.sh") - val JesMonitoringLogFile = JesWorkingDisk.MountPoint.resolve("monitoring.log") - - def jesConfiguration: JesConfiguration - - def initializationData: JesBackendInitializationData - - def serviceRegistryActor: ActorRef + val JesMonitoringScript: Path = JesWorkingDisk.MountPoint.resolve("monitoring.sh") + val JesMonitoringLogFile: Path = JesWorkingDisk.MountPoint.resolve("monitoring.log") - def getPath(str: String) = jesCallPaths.gcsFileSystem.getPath(str) + lazy val initializationData: JesBackendInitializationData = { + backendInitializationDataAs[JesBackendInitializationData] + } - override lazy val configurationDescriptor = jesConfiguration.configurationDescriptor + lazy val jesConfiguration: JesConfiguration = initializationData.jesConfiguration - lazy val jesCallPaths = initializationData.workflowPaths.toJesCallPaths(jobDescriptor.key) + lazy val jesCallPaths: JesJobPaths = jobPaths.asInstanceOf[JesJobPaths] - lazy val runtimeAttributes = JesRuntimeAttributes(jobDescriptor.runtimeAttributes, jobLogger) + lazy val runtimeAttributes = JesRuntimeAttributes(validatedRuntimeAttributes, jesConfiguration.runtimeConfig) - lazy val retryable = jobDescriptor.key.attempt <= runtimeAttributes.preemptible lazy val workingDisk: JesAttachedDisk = runtimeAttributes.disks.find(_.name == JesWorkingDisk.Name).get - lazy val callRootPath: Path = jesCallPaths.callRootPath - lazy val returnCodeFilename = jesCallPaths.returnCodeFilename - lazy val returnCodeGcsPath = jesCallPaths.returnCodePath - lazy val jesStdoutFile = jesCallPaths.stdoutPath - lazy val jesStderrFile = jesCallPaths.stderrPath - lazy val jesLogFilename = jesCallPaths.jesLogFilename - lazy val defaultMonitoringOutputPath = callRootPath.resolve(JesMonitoringLogFile) + lazy val callRootPath: Path = jesCallPaths.callExecutionRoot + lazy val returnCodeFilename: String = jesCallPaths.returnCodeFilename + lazy val returnCodeGcsPath: Path = jesCallPaths.returnCode + lazy val jesStdoutFile: Path = jesCallPaths.stdout + lazy val jesStderrFile: Path = jesCallPaths.stderr + lazy val jesLogFilename: String = jesCallPaths.jesLogFilename + lazy val defaultMonitoringOutputPath: Path = callRootPath.resolve(JesMonitoringLogFile) - lazy val maxPreemption = runtimeAttributes.preemptible - lazy val preemptible: Boolean = jobDescriptor.key.attempt <= maxPreemption + lazy val maxPreemption: Int = runtimeAttributes.preemptible + def preemptible: Boolean - lazy val jesAttributes = jesConfiguration.jesAttributes - // TODO: Move monitoring paths to JesCallPaths + lazy val jesAttributes: JesAttributes = jesConfiguration.jesAttributes lazy val monitoringScript: Option[JesInput] = { - jobDescriptor.workflowDescriptor.workflowOptions.get(WorkflowOptionKeys.MonitoringScript) map { path => - JesFileInput(s"$MonitoringParamName-in", getPath(path).toString, + jesCallPaths.workflowPaths.monitoringPath map { path => + JesFileInput(s"$MonitoringParamName-in", path.pathAsString, JesWorkingDisk.MountPoint.resolve(JesMonitoringScript), workingDisk) - } toOption + } } - lazy val monitoringOutput = monitoringScript map { _ => JesFileOutput(s"$MonitoringParamName-out", - defaultMonitoringOutputPath.toString, File(JesMonitoringLogFile).path, workingDisk) + lazy val monitoringOutput: Option[JesFileOutput] = monitoringScript map { _ => JesFileOutput(s"$MonitoringParamName-out", + defaultMonitoringOutputPath.pathAsString, JesMonitoringLogFile, workingDisk) } - lazy val metadataKeyValues: Map[String, Any] = { - val runtimeAttributesMetadata: Map[String, Any] = runtimeAttributes.asMap map { - case (key, value) => s"runtimeAttributes:$key" -> value - } + lazy val defaultLabels: Labels = { + val workflow = jobDescriptor.workflowDescriptor + val call = jobDescriptor.call + val subWorkflow = workflow.workflow + val subWorkflowLabels = if (!subWorkflow.equals(workflow.rootWorkflow)) + Labels("cromwell-sub-workflow-name" -> subWorkflow.unqualifiedName) + else + Labels.empty + + val alias = call.unqualifiedName + val aliasLabels = if (!alias.equals(call.task.name)) + Labels("wdl-call-alias" -> alias) + else + Labels.empty + + Labels( + "cromwell-workflow-id" -> s"cromwell-${workflow.rootWorkflowId}", + "wdl-task-name" -> call.task.name + ) ++ subWorkflowLabels ++ aliasLabels + } - var fileMetadata: Map[String, Any] = jesCallPaths.metadataPaths - if (monitoringOutput.nonEmpty) { - // TODO: Move this to JesCallPaths - fileMetadata += JesMetadataKeys.MonitoringLog -> monitoringOutput.get.gcs - } + lazy val originalLabels: Labels = defaultLabels ++ workflowDescriptor.customLabels + + lazy val backendLabels: Labels = GoogleLabels.toLabels(originalLabels.asTuple :_*) - val otherMetadata = Map( + lazy val originalLabelEvents: Map[String, String] = originalLabels.value map { l => s"${CallMetadataKeys.Labels}:${l.key}" -> l.value } toMap + + lazy val backendLabelEvents: Map[String, String] = backendLabels.value map { l => s"${CallMetadataKeys.BackendLabels}:${l.key}" -> l.value } toMap + + override protected def nonStandardMetadata: Map[String, Any] = { + Map( JesMetadataKeys.GoogleProject -> jesAttributes.project, JesMetadataKeys.ExecutionBucket -> jesAttributes.executionBucket, JesMetadataKeys.EndpointUrl -> jesAttributes.endpointUrl, - "preemptible" -> preemptible, - "cache:allowResultReuse" -> true - ) - - runtimeAttributesMetadata ++ fileMetadata ++ otherMetadata + "preemptible" -> preemptible + ) ++ backendLabelEvents ++ originalLabelEvents } } diff --git a/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/JesJobExecutionActor.scala b/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/JesJobExecutionActor.scala deleted file mode 100644 index 594b7f0c2..000000000 --- a/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/JesJobExecutionActor.scala +++ /dev/null @@ -1,94 +0,0 @@ -package cromwell.backend.impl.jes - -import akka.actor.{ActorRef, Props} -import akka.event.LoggingReceive -import cromwell.backend.BackendJobExecutionActor.{AbortedResponse, BackendJobExecutionResponse} -import cromwell.backend.BackendLifecycleActor.AbortJobCommand -import cromwell.backend._ -import cromwell.backend.async.AsyncBackendJobExecutionActor.{Execute, Recover} -import cromwell.backend.impl.jes.JesAsyncBackendJobExecutionActor.JesJobId -import cromwell.backend.impl.jes.JesJobExecutionActor._ -import cromwell.services.keyvalue.KeyValueServiceActor._ -import org.slf4j.LoggerFactory - -import scala.concurrent.{Future, Promise} -import scala.language.postfixOps - -object JesJobExecutionActor { - val logger = LoggerFactory.getLogger("JesBackend") - - def props(jobDescriptor: BackendJobDescriptor, - jesWorkflowInfo: JesConfiguration, - initializationData: JesBackendInitializationData, - serviceRegistryActor: ActorRef): Props = { - Props(new JesJobExecutionActor(jobDescriptor, jesWorkflowInfo, initializationData, serviceRegistryActor)) - } - - val JesOperationIdKey = "__jes_operation_id" -} - -case class JesJobExecutionActor(override val jobDescriptor: BackendJobDescriptor, - jesConfiguration: JesConfiguration, - initializationData: JesBackendInitializationData, - serviceRegistryActor: ActorRef) - extends BackendJobExecutionActor { - - private def jesReceiveBehavior: Receive = LoggingReceive { - case AbortJobCommand => - executor.foreach(_ ! AbortJobCommand) - case abortResponse: AbortedResponse => - context.parent ! abortResponse - context.stop(self) - case KvPair(key, id @ Some(operationId)) if key.key == JesOperationIdKey => - // Successful operation ID lookup during recover. - executor foreach { _ ! Recover(JesJobId(operationId))} - case KvKeyLookupFailed(_) => - // Missed operation ID lookup during recover, fall back to execute. - executor foreach { _ ! Execute } - case KvFailure(_, e) => - // Failed operation ID lookup during recover, crash and let the supervisor deal with it. - completionPromise.tryFailure(e) - throw new RuntimeException("Failure attempting to look up JES operation ID for key " + jobDescriptor.key, e) - } - - override def receive = jesReceiveBehavior orElse super.receive - - override val configurationDescriptor = jesConfiguration.configurationDescriptor - - private lazy val completionPromise = Promise[BackendJobExecutionResponse]() - - private var executor: Option[ActorRef] = None - - private def launchExecutor: Future[Unit] = Future { - val executionProps = JesAsyncBackendJobExecutionActor.props(jobDescriptor, - completionPromise, - jesConfiguration, - initializationData, - serviceRegistryActor) - val executorRef = context.actorOf(executionProps, "JesAsyncBackendJobExecutionActor") - executor = Option(executorRef) - () - } - - override def recover: Future[BackendJobExecutionResponse] = { - import JesJobExecutionActor._ - - for { - _ <- launchExecutor - _ = serviceRegistryActor ! KvGet(ScopedKey(jobDescriptor.workflowDescriptor.id, - KvJobKey(jobDescriptor.key.call.fullyQualifiedName, jobDescriptor.key.index, jobDescriptor.key.attempt), - JesOperationIdKey)) - c <- completionPromise.future - } yield c - } - - override def execute: Future[BackendJobExecutionResponse] = { - for { - _ <- launchExecutor - _ = executor foreach { _ ! Execute } - c <- completionPromise.future - } yield c - } - - override def abort(): Unit = {} -} diff --git a/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/JesJobPaths.scala b/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/JesJobPaths.scala new file mode 100644 index 000000000..e3c349ec0 --- /dev/null +++ b/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/JesJobPaths.scala @@ -0,0 +1,51 @@ +package cromwell.backend.impl.jes + +import cromwell.backend.BackendJobDescriptorKey +import cromwell.backend.io.JobPaths +import cromwell.core.path.Path +import cromwell.services.metadata.CallMetadataKeys + +object JesJobPaths { + val JesLogPathKey = "jesLog" + val GcsExecPathKey = "gcsExec" +} + +final case class JesJobPaths(override val workflowPaths: JesWorkflowPaths, jobKey: BackendJobDescriptorKey) extends JobPaths { + + val jesLogBasename = { + val index = jobKey.index.map(s => s"-$s").getOrElse("") + s"${jobKey.scope.unqualifiedName}$index" + } + + override val returnCodeFilename: String = s"$jesLogBasename-rc.txt" + override val stdoutFilename: String = s"$jesLogBasename-stdout.log" + override val stderrFilename: String = s"$jesLogBasename-stderr.log" + override val scriptFilename: String = "exec.sh" + + val jesLogFilename: String = s"$jesLogBasename.log" + lazy val jesLogPath: Path = callExecutionRoot.resolve(jesLogFilename) + + /* + TODO: Move various monitoring files path generation here. + + "/cromwell_root" is a well known path, called in the regular JobPaths callDockerRoot. + This JesCallPaths should know about that root, and be able to create the monitoring file paths. + Instead of the AsyncActor creating the paths, the paths could then be shared with the CachingActor. + + Those monitoring paths could then be returned by metadataFiles and detritusFiles. + */ + + override lazy val customMetadataPaths = Map( + CallMetadataKeys.BackendLogsPrefix + ":log" -> jesLogPath + ) ++ ( + workflowPaths.monitoringPath map { p => Map(JesMetadataKeys.MonitoringLog -> p) } getOrElse Map.empty + ) + + override lazy val customDetritusPaths: Map[String, Path] = Map( + JesJobPaths.JesLogPathKey -> jesLogPath + ) + + override lazy val customLogPaths: Map[String, Path] = Map( + JesJobPaths.JesLogPathKey -> jesLogPath + ) +} diff --git a/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/JesParameters.scala b/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/JesParameters.scala index e814ea04a..6595e71a2 100644 --- a/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/JesParameters.scala +++ b/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/JesParameters.scala @@ -1,9 +1,8 @@ package cromwell.backend.impl.jes -import java.nio.file.Path - import com.google.api.services.genomics.model.{LocalCopy, PipelineParameter} import cromwell.backend.impl.jes.io.JesAttachedDisk +import cromwell.core.path.Path sealed trait JesParameter { def name: String @@ -16,7 +15,7 @@ sealed trait JesInput extends JesParameter final case class JesFileInput(name: String, gcs: String, local: Path, mount: JesAttachedDisk) extends JesInput { def toGooglePipelineParameter = { new PipelineParameter().setName(name).setLocalCopy( - new LocalCopy().setDisk(mount.name).setPath(local.toString) + new LocalCopy().setDisk(mount.name).setPath(local.pathAsString) ) } val toGoogleRunParameter: String = gcs @@ -31,7 +30,7 @@ final case class JesLiteralInput(name: String, value: String) extends JesInput { final case class JesFileOutput(name: String, gcs: String, local: Path, mount: JesAttachedDisk) extends JesParameter { def toGooglePipelineParameter = { new PipelineParameter().setName(name).setLocalCopy( - new LocalCopy().setDisk(mount.name).setPath(local.toString) + new LocalCopy().setDisk(mount.name).setPath(local.pathAsString) ) } val toGoogleRunParameter: String = gcs diff --git a/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/JesPipelineInfo.scala b/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/JesPipelineInfo.scala index 063a374b8..ba052cd31 100644 --- a/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/JesPipelineInfo.scala +++ b/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/JesPipelineInfo.scala @@ -26,27 +26,18 @@ trait JesPipelineInfoBuilder { .set(Run.NoAddressFieldName, runtimeAttributes.noAddress) } - def build(commandLine: String, runtimeAttributes: JesRuntimeAttributes): JesPipelineInfo + def build(commandLine: String, runtimeAttributes: JesRuntimeAttributes, docker: String): JesPipelineInfo } object NonPreemptibleJesPipelineInfoBuilder extends JesPipelineInfoBuilder { - def build(commandLine: String, runtimeAttributes: JesRuntimeAttributes): JesPipelineInfo = { - /* - It should be impossible for docker to be None here. Enforcing that w/ ADTs seemed more trouble than - it was worth. If you're ever debugging a NoSuchElementException which leads you here, that means - the more trouble than worth calculation was incorrect and we should have separate RuntimeAttributes for - docker and no docker cases - */ - val dockerImage = runtimeAttributes.dockerImage.get + def build(commandLine: String, runtimeAttributes: JesRuntimeAttributes, dockerImage: String): JesPipelineInfo = { val resources = buildResources(runtimeAttributes).setPreemptible(false) new NonPreemptibleJesPipelineInfoBuilder(resources, buildDockerExecutor(commandLine, dockerImage)) } } object PreemptibleJesPipelineInfoBuilder extends JesPipelineInfoBuilder { - def build(commandLine: String, runtimeAttributes: JesRuntimeAttributes): JesPipelineInfo = { - // See comment above - val dockerImage = runtimeAttributes.dockerImage.get + def build(commandLine: String, runtimeAttributes: JesRuntimeAttributes, dockerImage: String): JesPipelineInfo = { val resources = buildResources(runtimeAttributes).setPreemptible(true) new PreemptibleJesPipelineInfoBuilder(resources, buildDockerExecutor(commandLine, dockerImage)) } diff --git a/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/JesRuntimeAttributes.scala b/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/JesRuntimeAttributes.scala index 5e4a42ea5..05b9ca358 100644 --- a/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/JesRuntimeAttributes.scala +++ b/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/JesRuntimeAttributes.scala @@ -1,18 +1,17 @@ package cromwell.backend.impl.jes +import cats.syntax.validated._ +import cats.data.Validated._ +import cats.syntax.cartesian._ +import com.typesafe.config.Config import cromwell.backend.MemorySize -import cromwell.backend.impl.jes.io.{JesWorkingDisk, JesAttachedDisk} -import cromwell.backend.validation.RuntimeAttributesKeys._ -import cromwell.backend.validation.RuntimeAttributesValidation._ -import cromwell.backend.validation._ -import cromwell.core._ -import lenthall.exception.MessageAggregation -import org.slf4j.Logger -import wdl4s.types._ -import wdl4s.values._ -import cromwell.backend.validation.RuntimeAttributesDefault._ -import scalaz._ -import Scalaz._ +import cromwell.backend.impl.jes.io.{JesAttachedDisk, JesWorkingDisk} +import cromwell.backend.standard.StandardValidatedRuntimeAttributesBuilder +import cromwell.backend.validation.{BooleanRuntimeAttributesValidation, _} +import lenthall.validation.ErrorOr._ +import wdl4s.wdl.types._ +import wdl4s.wdl.values._ + case class JesRuntimeAttributes(cpu: Int, zones: Vector[String], @@ -20,148 +19,159 @@ case class JesRuntimeAttributes(cpu: Int, bootDiskSize: Int, memory: MemorySize, disks: Seq[JesAttachedDisk], - dockerImage: Option[String], + dockerImage: String, failOnStderr: Boolean, continueOnReturnCode: ContinueOnReturnCode, - noAddress: Boolean) { - import JesRuntimeAttributes._ - - lazy val asMap = Map[String, Any]( - CpuKey -> cpu.toString, - ZonesKey -> zones.mkString(","), - PreemptibleKey -> preemptible.toString, - BootDiskSizeKey -> bootDiskSize.toString, - MemoryKey -> memory.toString, - DisksKey -> disks.mkString(","), - DockerKey -> dockerImage.get, - FailOnStderrKey -> failOnStderr.toString, - ContinueOnReturnCodeKey -> continueOnReturnCode - ) -} + noAddress: Boolean) object JesRuntimeAttributes { - private val CpuDefaultValue = 1 - private val ContinueOnReturnCodeDefaultValue = 0 - private val MemoryDefaultValue = "2 GB" val ZonesKey = "zones" - private val ZoneDefaultValue = "us-central1-b" + private val ZonesDefaultValue = WdlString("us-central1-b") val PreemptibleKey = "preemptible" - private val PreemptibleDefaultValue = 0 + private val preemptibleValidationInstance = new IntRuntimeAttributesValidation(PreemptibleKey) + private val PreemptibleDefaultValue = WdlInteger(0) val BootDiskSizeKey = "bootDiskSizeGb" - private val BootDiskSizeDefaultValue = 10 + private val bootDiskValidationInstance = new IntRuntimeAttributesValidation(BootDiskSizeKey) + private val BootDiskDefaultValue = WdlInteger(10) val NoAddressKey = "noAddress" - private val NoAddressDefaultValue = false + private val noAddressValidationInstance = new BooleanRuntimeAttributesValidation(NoAddressKey) + private val NoAddressDefaultValue = WdlBoolean(false) val DisksKey = "disks" - private val DisksDefaultValue = s"${JesWorkingDisk.Name} 10 SSD" - - val staticDefaults = Map( - CpuKey -> WdlInteger(CpuDefaultValue), - DisksKey -> WdlString(DisksDefaultValue), - ZonesKey -> WdlString(ZoneDefaultValue), - ContinueOnReturnCodeKey -> WdlInteger(ContinueOnReturnCodeDefaultValue), - FailOnStderrKey -> WdlBoolean.False, - PreemptibleKey -> WdlInteger(PreemptibleDefaultValue), - MemoryKey -> WdlString(MemoryDefaultValue), - BootDiskSizeKey -> WdlInteger(BootDiskSizeDefaultValue), - NoAddressKey -> WdlBoolean(NoAddressDefaultValue) - ) - - private[jes] val coercionMap: Map[String, Set[WdlType]] = Map( - CpuKey -> Set(WdlIntegerType), - DisksKey -> Set(WdlStringType, WdlArrayType(WdlStringType)), - ZonesKey -> Set(WdlStringType, WdlArrayType(WdlStringType)), - ContinueOnReturnCodeKey -> ContinueOnReturnCode.validWdlTypes, - FailOnStderrKey -> Set(WdlBooleanType), - PreemptibleKey -> Set(WdlIntegerType), - MemoryKey -> Set(WdlStringType), - BootDiskSizeKey -> Set(WdlIntegerType), - NoAddressKey -> Set(WdlBooleanType), - DockerKey -> Set(WdlStringType) - ) - - def apply(attrs: Map[String, WdlValue], logger: Logger): JesRuntimeAttributes = { - warnUnrecognized(attrs.keySet, coercionMap.keySet, logger) - - val cpu = validateCpu(attrs.get(CpuKey), noValueFoundFor(CpuKey)) - val memory = validateMemory(attrs.get(MemoryKey), noValueFoundFor(MemoryKey)) - val docker = validateDocker(attrs.get(DockerKey), noValueFoundFor(DockerKey)) - val failOnStderr = validateFailOnStderr(attrs.get(FailOnStderrKey), noValueFoundFor(FailOnStderrKey)) - val continueOnReturnCode = validateContinueOnReturnCode(attrs.get(ContinueOnReturnCodeKey), noValueFoundFor(ContinueOnReturnCodeKey)) - - val zones = validateZone(attrs(ZonesKey)) - val preemptible = validatePreemptible(attrs(PreemptibleKey)) - val noAddress = validateNoAddress(attrs(NoAddressKey)) - val bootDiskSize = validateBootDisk(attrs(BootDiskSizeKey)) - val disks = validateLocalDisks(attrs(DisksKey)) - (cpu |@| zones |@| preemptible |@| bootDiskSize |@| memory |@| disks |@| docker |@| failOnStderr |@| continueOnReturnCode |@| noAddress) { - new JesRuntimeAttributes(_, _, _, _, _, _, _, _, _, _) - } match { - case Success(x) => x - case Failure(nel) => throw new RuntimeException with MessageAggregation { - override def exceptionContext: String = "Runtime attribute validation failed" - override def errorMessages: Traversable[String] = nel.list.toList - } - } + private val DisksDefaultValue = WdlString(s"${JesWorkingDisk.Name} 10 SSD") + + private val MemoryDefaultValue = "2 GB" + + private def cpuValidation(runtimeConfig: Option[Config]): RuntimeAttributesValidation[Int] = CpuValidation.instance + .withDefault(CpuValidation.configDefaultWdlValue(runtimeConfig) getOrElse CpuValidation.default) + + private def failOnStderrValidation(runtimeConfig: Option[Config]) = FailOnStderrValidation.default(runtimeConfig) + + private def continueOnReturnCodeValidation(runtimeConfig: Option[Config]) = ContinueOnReturnCodeValidation.default(runtimeConfig) + + private def disksValidation(runtimeConfig: Option[Config]): RuntimeAttributesValidation[Seq[JesAttachedDisk]] = DisksValidation + .withDefault(DisksValidation.configDefaultWdlValue(runtimeConfig) getOrElse DisksDefaultValue) + + private def zonesValidation(runtimeConfig: Option[Config]): RuntimeAttributesValidation[Vector[String]] = ZonesValidation + .withDefault(ZonesValidation.configDefaultWdlValue(runtimeConfig) getOrElse ZonesDefaultValue) + + private def preemptibleValidation(runtimeConfig: Option[Config]): RuntimeAttributesValidation[Int] = preemptibleValidationInstance + .withDefault(preemptibleValidationInstance.configDefaultWdlValue(runtimeConfig) getOrElse PreemptibleDefaultValue) + + private def memoryValidation(runtimeConfig: Option[Config]): RuntimeAttributesValidation[MemorySize] = { + MemoryValidation.withDefaultMemory( + RuntimeAttributesKeys.MemoryKey, + MemoryValidation.configDefaultString(RuntimeAttributesKeys.MemoryKey, runtimeConfig) getOrElse MemoryDefaultValue) } - private def validateZone(zoneValue: WdlValue): ErrorOr[Vector[String]] = { - zoneValue match { - case WdlString(s) => s.split("\\s+").toVector.successNel - case WdlArray(wdlType, value) if wdlType.memberType == WdlStringType => - value.map(_.valueString).toVector.successNel - case _ => s"Expecting $ZonesKey runtime attribute to be either a whitespace separated String or an Array[String]".failureNel - } + private def bootDiskSizeValidation(runtimeConfig: Option[Config]): RuntimeAttributesValidation[Int] = bootDiskValidationInstance + .withDefault(bootDiskValidationInstance.configDefaultWdlValue(runtimeConfig) getOrElse BootDiskDefaultValue) + + private def noAddressValidation(runtimeConfig: Option[Config]): RuntimeAttributesValidation[Boolean] = noAddressValidationInstance + .withDefault(noAddressValidationInstance.configDefaultWdlValue(runtimeConfig) getOrElse NoAddressDefaultValue) + + private val dockerValidation: RuntimeAttributesValidation[String] = DockerValidation.instance + + def runtimeAttributesBuilder(jesConfiguration: JesConfiguration): StandardValidatedRuntimeAttributesBuilder = { + val runtimeConfig = jesConfiguration.runtimeConfig + StandardValidatedRuntimeAttributesBuilder.default(runtimeConfig).withValidation( + cpuValidation(runtimeConfig), + disksValidation(runtimeConfig), + zonesValidation(runtimeConfig), + preemptibleValidation(runtimeConfig), + memoryValidation(runtimeConfig), + bootDiskSizeValidation(runtimeConfig), + noAddressValidation(runtimeConfig), + dockerValidation + ) } - private def contextualizeFailure[T](validation: ErrorOr[T], key: String): ErrorOr[T] = { - validation.leftMap[String](errors => s"Failed to validate $key runtime attribute: " + errors.toList.mkString(",")).toValidationNel + def apply(validatedRuntimeAttributes: ValidatedRuntimeAttributes, runtimeAttrsConfig: Option[Config]): JesRuntimeAttributes = { + val cpu: Int = RuntimeAttributesValidation.extract(cpuValidation(runtimeAttrsConfig), validatedRuntimeAttributes) + val zones: Vector[String] = RuntimeAttributesValidation.extract(ZonesValidation, validatedRuntimeAttributes) + val preemptible: Int = RuntimeAttributesValidation.extract(preemptibleValidation(runtimeAttrsConfig), validatedRuntimeAttributes) + val bootDiskSize: Int = RuntimeAttributesValidation.extract(bootDiskSizeValidation(runtimeAttrsConfig), validatedRuntimeAttributes) + val memory: MemorySize = RuntimeAttributesValidation.extract(memoryValidation(runtimeAttrsConfig), validatedRuntimeAttributes) + val disks: Seq[JesAttachedDisk] = RuntimeAttributesValidation.extract(disksValidation(runtimeAttrsConfig), validatedRuntimeAttributes) + val docker: String = RuntimeAttributesValidation.extract(dockerValidation, validatedRuntimeAttributes) + val failOnStderr: Boolean = RuntimeAttributesValidation.extract(failOnStderrValidation(runtimeAttrsConfig), validatedRuntimeAttributes) + val continueOnReturnCode: ContinueOnReturnCode = RuntimeAttributesValidation.extract(continueOnReturnCodeValidation(runtimeAttrsConfig), validatedRuntimeAttributes) + val noAddress: Boolean = RuntimeAttributesValidation.extract(noAddressValidation(runtimeAttrsConfig), validatedRuntimeAttributes) + + new JesRuntimeAttributes( + cpu, + zones, + preemptible, + bootDiskSize, + memory, + disks, + docker, + failOnStderr, + continueOnReturnCode, + noAddress + ) } +} + +object ZonesValidation extends RuntimeAttributesValidation[Vector[String]] { + override def key: String = JesRuntimeAttributes.ZonesKey - private def validatePreemptible(preemptible: WdlValue): ErrorOr[Int] = { - contextualizeFailure(validateInt(preemptible), PreemptibleKey) + override def coercion: Traversable[WdlType] = Set(WdlStringType, WdlArrayType(WdlStringType)) + + override protected def validateValue: PartialFunction[WdlValue, ErrorOr[Vector[String]]] = { + case WdlString(s) => s.split("\\s+").toVector.validNel + case WdlArray(wdlType, value) if wdlType.memberType == WdlStringType => + value.map(_.valueString).toVector.validNel } - private def validateNoAddress(noAddress: WdlValue): ErrorOr[Boolean] = { - contextualizeFailure(validateBoolean(noAddress), NoAddressKey) + override protected def missingValueMessage: String = + s"Expecting $key runtime attribute to be either a whitespace separated String or an Array[String]" +} + +object DisksValidation extends RuntimeAttributesValidation[Seq[JesAttachedDisk]] { + override def key: String = JesRuntimeAttributes.DisksKey + + override def coercion: Traversable[WdlType] = Set(WdlStringType, WdlArrayType(WdlStringType)) + + override protected def validateValue: PartialFunction[WdlValue, ErrorOr[Seq[JesAttachedDisk]]] = { + case WdlString(value) => validateLocalDisks(value.split(",\\s*").toSeq) + case WdlArray(wdlType, values) if wdlType.memberType == WdlStringType => + validateLocalDisks(values.map(_.valueString)) } - private def validateBootDisk(diskSize: WdlValue): ErrorOr[Int] = diskSize match { - case x if WdlIntegerType.isCoerceableFrom(x.wdlType) => - WdlIntegerType.coerceRawValue(x) match { - case scala.util.Success(x: WdlInteger) => x.value.intValue.successNel - case scala.util.Success(unhandled) => s"Coercion was expected to create an Integer but instead got $unhandled".failureNel - case scala.util.Failure(t) => s"Expecting $BootDiskSizeKey runtime attribute to be an Integer".failureNel - } + private def validateLocalDisks(disks: Seq[String]): ErrorOr[Seq[JesAttachedDisk]] = { + val diskNels: Seq[ErrorOr[JesAttachedDisk]] = disks map validateLocalDisk + val sequenced: ErrorOr[Seq[JesAttachedDisk]] = sequenceNels(diskNels) + val defaulted: ErrorOr[Seq[JesAttachedDisk]] = addDefault(sequenced) + defaulted } - private def validateLocalDisks(value: WdlValue): ErrorOr[Seq[JesAttachedDisk]] = { - val nels = value match { - case WdlString(s) => s.split(",\\s*").toSeq.map(validateLocalDisk) - case WdlArray(wdlType, seq) if wdlType.memberType == WdlStringType => - seq.map(_.valueString).map(validateLocalDisk) - case _ => - Seq(s"Expecting $DisksKey runtime attribute to be a comma separated String or Array[String]".failureNel[JesAttachedDisk]) + private def validateLocalDisk(disk: String): ErrorOr[JesAttachedDisk] = { + JesAttachedDisk.parse(disk) match { + case scala.util.Success(attachedDisk) => attachedDisk.validNel + case scala.util.Failure(ex) => ex.getMessage.invalidNel } + } - val emptyDiskNel = Vector.empty[JesAttachedDisk].successNel[String] - val disksNel = nels.foldLeft(emptyDiskNel)((acc, v) => (acc |@| v) { (a, v) => a :+ v }) - - disksNel map { - case disks if disks.exists(_.name == JesWorkingDisk.Name) => disks - case disks => disks :+ JesAttachedDisk.parse(DisksDefaultValue).get + private def sequenceNels(nels: Seq[ErrorOr[JesAttachedDisk]]): ErrorOr[Seq[JesAttachedDisk]] = { + val emptyDiskNel = Vector.empty[JesAttachedDisk].validNel[String] + val disksNel: ErrorOr[Vector[JesAttachedDisk]] = nels.foldLeft(emptyDiskNel) { + (acc, v) => (acc |@| v) map { (a, v) => a :+ v } } + disksNel } - private def validateLocalDisk(disk: String): ErrorOr[JesAttachedDisk] = { - JesAttachedDisk.parse(disk) match { - case scala.util.Success(localDisk) => localDisk.successNel - case scala.util.Failure(ex) => ex.getMessage.failureNel + private def addDefault(disksNel: ErrorOr[Seq[JesAttachedDisk]]): ErrorOr[Seq[JesAttachedDisk]] = { + disksNel map { + case disks if disks.exists(_.name == JesWorkingDisk.Name) => disks + case disks => disks :+ JesWorkingDisk.Default } } + override protected def missingValueMessage: String = + s"Expecting $key runtime attribute to be a comma separated String or Array[String]" } diff --git a/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/JesWorkflowPaths.scala b/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/JesWorkflowPaths.scala index 9b39c869a..3d9d669ec 100644 --- a/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/JesWorkflowPaths.scala +++ b/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/JesWorkflowPaths.scala @@ -1,52 +1,69 @@ package cromwell.backend.impl.jes -import java.nio.file.Path - -import cromwell.backend.impl.jes.authentication.JesCredentials +import akka.actor.ActorSystem +import com.google.auth.Credentials +import com.typesafe.config.Config +import cromwell.backend.impl.jes.JesAsyncBackendJobExecutionActor.WorkflowOptionKeys +import cromwell.backend.io.WorkflowPaths import cromwell.backend.{BackendJobDescriptorKey, BackendWorkflowDescriptor} -import cromwell.core.WorkflowOptions.FinalCallLogsDir -import cromwell.filesystems.gcs.{GcsFileSystem, GcsFileSystemProvider, GoogleAuthMode} +import cromwell.core.WorkflowOptions +import cromwell.core.path.{Path, PathBuilder} +import cromwell.filesystems.gcs.{GcsPathBuilder, GcsPathBuilderFactory} -import scala.concurrent.ExecutionContext +import scala.language.postfixOps object JesWorkflowPaths { private val GcsRootOptionKey = "jes_gcs_root" private val AuthFilePathOptionKey = "auth_bucket" - - def apply(workflowDescriptor: BackendWorkflowDescriptor, - jesConfiguration: JesConfiguration, - credentials: JesCredentials)(implicit ec: ExecutionContext) = { - new JesWorkflowPaths(workflowDescriptor, jesConfiguration, credentials) - } } -class JesWorkflowPaths(workflowDescriptor: BackendWorkflowDescriptor, - jesConfiguration: JesConfiguration, - credentials: JesCredentials)(implicit ec: ExecutionContext) { +case class JesWorkflowPaths(workflowDescriptor: BackendWorkflowDescriptor, + gcsCredentials: Credentials, + genomicsCredentials: Credentials, + jesConfiguration: JesConfiguration)(implicit actorSystem: ActorSystem) extends WorkflowPaths { - private val gcsStorage = GoogleAuthMode.buildStorage(credentials.gcsCredential, jesConfiguration.googleConfig.applicationName) - val gcsFileSystemProvider: GcsFileSystemProvider = GcsFileSystemProvider(gcsStorage)(ec) - val gcsFileSystem = GcsFileSystem(gcsFileSystemProvider) + override lazy val executionRootString: String = + workflowDescriptor.workflowOptions.getOrElse(JesWorkflowPaths.GcsRootOptionKey, jesConfiguration.root) - val rootPath: Path = - gcsFileSystem.getPath(workflowDescriptor.workflowOptions.getOrElse(JesWorkflowPaths.GcsRootOptionKey, jesConfiguration.root)) + private val workflowOptions: WorkflowOptions = workflowDescriptor.workflowOptions - val workflowRootPath: Path = rootPath.resolve(workflowDescriptor.workflowNamespace.workflow.unqualifiedName) - .resolve(workflowDescriptor.id.toString) - - val finalCallLogsPath = workflowDescriptor.getWorkflowOption(FinalCallLogsDir) map { gcsFileSystem.getPath(_) } + private val gcsPathBuilder: GcsPathBuilder = jesConfiguration.gcsPathBuilderFactory.fromCredentials(workflowOptions, gcsCredentials) val gcsAuthFilePath: Path = { + // The default auth file bucket is always at the root of the root workflow + val defaultBucket = executionRoot.resolve(workflowDescriptor.rootWorkflow.unqualifiedName).resolve(workflowDescriptor.rootWorkflowId.toString) + val bucket = workflowDescriptor.workflowOptions.get(JesWorkflowPaths.AuthFilePathOptionKey) getOrElse defaultBucket.pathAsString + /* * This is an "exception". The filesystem used here is built from genomicsAuth * unlike everywhere else where the filesystem used is built from gcsFileSystemAuth */ - val genomicsStorage = GoogleAuthMode.buildStorage(credentials.genomicsCredential, jesConfiguration.googleConfig.applicationName) - val fileSystemWithGenomicsAuth = GcsFileSystem(GcsFileSystemProvider(genomicsStorage)(ec)) - val bucket = workflowDescriptor.workflowOptions.get(JesWorkflowPaths.AuthFilePathOptionKey) getOrElse workflowRootPath.toString + val pathBuilderWithGenomicsAuth = GcsPathBuilder.fromCredentials( + genomicsCredentials, + jesConfiguration.googleConfig.applicationName, + None, + GcsPathBuilderFactory.DefaultCloudStorageConfiguration, + workflowOptions + ) + + val authBucket = pathBuilderWithGenomicsAuth.build(bucket) recover { + case ex => throw new Exception(s"Invalid gcs auth_bucket path $bucket", ex) + } get - fileSystemWithGenomicsAuth.getPath(bucket).resolve(s"${workflowDescriptor.id}_auth.json") + authBucket.resolve(s"${workflowDescriptor.rootWorkflowId}_auth.json") } - def toJesCallPaths(jobKey: BackendJobDescriptorKey) = JesCallPaths(jobKey, workflowDescriptor, jesConfiguration, credentials)(ec) + val monitoringPath: Option[Path] = workflowOptions.get(WorkflowOptionKeys.MonitoringScript).toOption map { path => + // Fail here if the path exists but can't be built + getPath(path).get + } + + override def toJobPaths(workflowPaths: WorkflowPaths, jobKey: BackendJobDescriptorKey): JesJobPaths = { + new JesJobPaths(workflowPaths.asInstanceOf[JesWorkflowPaths], jobKey) + } + + override protected def withDescriptor(workflowDescriptor: BackendWorkflowDescriptor): WorkflowPaths = this.copy(workflowDescriptor = workflowDescriptor) + + override def config: Config = jesConfiguration.configurationDescriptor.backendConfig + override def pathBuilders: List[PathBuilder] = List(gcsPathBuilder) } diff --git a/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/PreviousRetryReasons.scala b/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/PreviousRetryReasons.scala new file mode 100644 index 000000000..1d3eba07e --- /dev/null +++ b/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/PreviousRetryReasons.scala @@ -0,0 +1,45 @@ +package cromwell.backend.impl.jes + +import cromwell.services.keyvalue.KeyValueServiceActor._ +import lenthall.validation.ErrorOr.ErrorOr +import cats.syntax.validated._ +import cats.syntax.cartesian._ + +import scala.util.{Failure, Success, Try} +import JesBackendLifecycleActorFactory.preemptionCountKey +import JesBackendLifecycleActorFactory.unexpectedRetryCountKey + +case class PreviousRetryReasons(preempted: Int, unexpectedRetry: Int) + +object PreviousRetryReasons { + def tryApply(prefetchedKvEntries: Map[String, KvResponse], attemptNumber: Int): ErrorOr[PreviousRetryReasons] = { + val validatedPreemptionCount = validatedKvResponse(prefetchedKvEntries.get(preemptionCountKey), preemptionCountKey) + val validatedUnexpectedRetryCount = validatedKvResponse(prefetchedKvEntries.get(unexpectedRetryCountKey), unexpectedRetryCountKey) + + validatedPreemptionCount |@| validatedUnexpectedRetryCount map { case (p, uf) => PreviousRetryReasons(p, uf) } + } + + def apply(knownPreemptedCount: Int, knownUnexpectedRetryCount: Int, attempt: Int): PreviousRetryReasons = { + // If we have anything unaccounted for, we can top up the unexpected retry count. + // NB: 'attempt' is 1-indexed, so, magic number: + // NB2: for sanity's sake, I won't let this unaccounted for drop below 0, just in case... + val unaccountedFor = Math.max(attempt - 1 - knownPreemptedCount - knownUnexpectedRetryCount, 0) + PreviousRetryReasons(knownPreemptedCount, knownUnexpectedRetryCount + unaccountedFor) + } + + private def validatedKvResponse(r: Option[KvResponse], fromKey: String): ErrorOr[Int] = r match { + case Some(KvPair(_, v)) => validatedIntOption(v, fromKey) + case Some(_: KvKeyLookupFailed) => 0.validNel + case Some(KvFailure(_, failure)) => s"Failed to get key $fromKey: ${failure.getMessage}".invalidNel + case Some(_: KvPutSuccess) => s"Programmer Error: Got a KvPutSuccess from a Get request...".invalidNel + case None => s"Programmer Error: Engine made no effort to prefetch $fromKey".invalidNel + } + + private def validatedIntOption(s: Option[String], fromKey: String): ErrorOr[Int] = validatedInt(s.getOrElse(""), fromKey) + private def validatedInt(s: String, fromKey: String): ErrorOr[Int] = { + Try(s.toInt) match { + case Success(i) => i.validNel + case Failure(_) => s"Unexpected value found in the KV store: $fromKey='$s'".invalidNel + } + } +} diff --git a/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/Run.scala b/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/Run.scala index caf87e024..d3b014316 100644 --- a/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/Run.scala +++ b/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/Run.scala @@ -1,21 +1,13 @@ package cromwell.backend.impl.jes -import java.time.OffsetDateTime -import java.util.{ArrayList => JArrayList} - -import com.google.api.client.util.{ArrayMap => GArrayMap} import com.google.api.services.genomics.Genomics import com.google.api.services.genomics.model._ -import com.typesafe.config.ConfigFactory import cromwell.backend.BackendJobDescriptor -import cromwell.backend.impl.jes.RunStatus.{Failed, Initializing, Running, Success} -import cromwell.core.ExecutionEvent -import cromwell.core.logging.JobLogger +import cromwell.backend.standard.StandardAsyncJob +import cromwell.core.labels.Labels import org.slf4j.LoggerFactory import scala.collection.JavaConverters._ -import scala.concurrent.duration._ -import scala.language.postfixOps object Run { private val GenomicsScopes = List( @@ -23,140 +15,69 @@ object Run { "https://www.googleapis.com/auth/compute" ).asJava - private val JesServiceAccount = new ServiceAccount().setEmail("default").setScopes(GenomicsScopes) - private val AcceptableEvents = Set("start", "pulling-image", "localizing-files", "running-docker", "delocalizing-files", "ok", "fail", "start-shutdown", "preempted") - val NoAddressFieldName = "noAddress" val slf4jLogger = LoggerFactory.getLogger(Run.getClass) - def apply(runIdForResumption: Option[String], - jobDescriptor: BackendJobDescriptor, - runtimeAttributes: JesRuntimeAttributes, - callRootPath: String, - commandLine: String, - logFileName: String, - jesParameters: Seq[JesParameter], - projectId: String, - preemptible: Boolean, - genomicsInterface: Genomics): Run = { - val logger = new JobLogger("JesRun", jobDescriptor.workflowDescriptor.id, jobDescriptor.key.tag, None, Set(slf4jLogger)) + def makeRunPipelineRequest(jobDescriptor: BackendJobDescriptor, + runtimeAttributes: JesRuntimeAttributes, + dockerImage: String, + callRootPath: String, + commandLine: String, + logFileName: String, + jesParameters: Seq[JesParameter], + projectId: String, + computeServiceAccount: String, + labels: Labels, + preemptible: Boolean, + genomicsInterface: Genomics): RunPipelineRequest = { lazy val workflow = jobDescriptor.workflowDescriptor val pipelineInfoBuilder = if (preemptible) PreemptibleJesPipelineInfoBuilder else NonPreemptibleJesPipelineInfoBuilder - val pipelineInfo = pipelineInfoBuilder.build(commandLine, runtimeAttributes) + val pipelineInfo = pipelineInfoBuilder.build(commandLine, runtimeAttributes, dockerImage) val pipeline = new Pipeline() - .setProjectId(projectId) - .setDocker(pipelineInfo.docker) - .setResources(pipelineInfo.resources) - .setName(workflow.workflowNamespace.workflow.unqualifiedName) - .setInputParameters(jesParameters.collect({ case i: JesInput => i.toGooglePipelineParameter }).toVector.asJava) - .setOutputParameters(jesParameters.collect({ case i: JesFileOutput => i.toGooglePipelineParameter }).toVector.asJava) - - def runPipeline: String = { - val runtimeResources = new PipelineResources().set(NoAddressFieldName, runtimeAttributes.noAddress) - val rpargs = new RunPipelineArgs().setProjectId(projectId).setServiceAccount(JesServiceAccount).setResources(runtimeResources) - - rpargs.setInputs(jesParameters.collect({ case i: JesInput => i.name -> i.toGoogleRunParameter }).toMap.asJava) - logger.debug(s"Inputs:\n${stringifyMap(rpargs.getInputs.asScala.toMap)}") - - rpargs.setOutputs(jesParameters.collect({ case i: JesFileOutput => i.name -> i.toGoogleRunParameter }).toMap.asJava) - logger.debug(s"Outputs:\n${stringifyMap(rpargs.getOutputs.asScala.toMap)}") - - val rpr = new RunPipelineRequest().setEphemeralPipeline(pipeline).setPipelineArgs(rpargs) - - val logging = new LoggingOptions() - logging.setGcsPath(s"$callRootPath/$logFileName") - rpargs.setLogging(logging) - - val runId = genomicsInterface.pipelines().run(rpr).execute().getName - logger.info(s"JES Run ID is $runId") - runId - } - - // If runIdForResumption is defined use that, otherwise we'll create a new Run with an ephemeral pipeline. - val runId = runIdForResumption getOrElse runPipeline - if (runIdForResumption.isDefined) { logger.info(s"JES Run is resuming with Run ID: ${runIdForResumption.get.toString}") } - new Run(runId, genomicsInterface) - } - - private def stringifyMap(m: Map[String, String]): String = m map { case(k, v) => s" $k -> $v"} mkString "\n" - - implicit class RunOperationExtension(val operation: Operation) extends AnyVal { - def hasStarted = operation.getMetadata.asScala.get("startTime") isDefined - } -} - -case class Run(runId: String, genomicsInterface: Genomics) { - import Run._ - - def status(): RunStatus = { - val op = genomicsInterface.operations().get(runId).execute - if (op.getDone) { - val eventList = getEventList(op) - val ceInfo = op.getMetadata.get ("runtimeMetadata").asInstanceOf[GArrayMap[String,Object]].get("computeEngine").asInstanceOf[GArrayMap[String, String]] - val machineType = Option(ceInfo.get("machineType")) - val instanceName = Option(ceInfo.get("instanceName")) - val zone = Option(ceInfo.get("zone")) - - // If there's an error, generate a Failed status. Otherwise, we were successful! - Option(op.getError) match { - case None => Success(eventList, machineType, zone, instanceName) - case Some(error) => Failed(error.getCode, Option(error.getMessage), eventList, machineType, zone, instanceName) + .setProjectId(projectId) + .setDocker(pipelineInfo.docker) + .setResources(pipelineInfo.resources) + .setName(workflow.workflow.unqualifiedName) + .setInputParameters(jesParameters.collect({ case i: JesInput => i.toGooglePipelineParameter }).toVector.asJava) + .setOutputParameters(jesParameters.collect({ case i: JesFileOutput => i.toGooglePipelineParameter }).toVector.asJava) + + // disks cannot have mount points at runtime, so set them null + val runtimePipelineResources = { + val resources = pipelineInfoBuilder.build(commandLine, runtimeAttributes, dockerImage).resources + val disksWithoutMountPoint = resources.getDisks.asScala map { + _.setMountPoint(null) } - } else if (op.hasStarted) { - Running - } else { - Initializing + resources.setDisks(disksWithoutMountPoint.asJava) } - } - def getEventList(op: Operation): Seq[ExecutionEvent] = { - val metadata = op.getMetadata.asScala.toMap + val svcAccount = new ServiceAccount().setEmail(computeServiceAccount).setScopes(GenomicsScopes) + val rpargs = new RunPipelineArgs().setProjectId(projectId).setServiceAccount(svcAccount).setResources(runtimePipelineResources) - val starterEvents: Seq[ExecutionEvent] = Seq( - eventIfExists("createTime", metadata, "waiting for quota"), - eventIfExists("startTime", metadata, "initializing VM")).flatten + rpargs.setInputs(jesParameters.collect({ case i: JesInput => i.name -> i.toGoogleRunParameter }).toMap.asJava) + rpargs.setOutputs(jesParameters.collect({ case i: JesFileOutput => i.name -> i.toGoogleRunParameter }).toMap.asJava) - val eventsList = for { - events <- metadata.get("events").toSeq - entry <- events.asInstanceOf[JArrayList[GArrayMap[String, String]]].asScala - } yield ExecutionEvent(entry.get("description"), OffsetDateTime.parse(entry.get("startTime"))) + rpargs.setLabels(labels.asJavaMap) - val filteredEventsList: Seq[ExecutionEvent] = eventsList filter { i => AcceptableEvents.contains(i.name) } + val rpr = new RunPipelineRequest().setEphemeralPipeline(pipeline).setPipelineArgs(rpargs) - // A little bit ugly... the endTime of the jes operation can actually be before the final "event" time, due to differences - // in the reported precision. As a result, we have to make sure it all lines up nicely: - val finalEvent = getCromwellPollIntervalEvent(metadata, filteredEventsList) + val logging = new LoggingOptions() + logging.setGcsPath(s"$callRootPath/$logFileName") + rpargs.setLogging(logging) - starterEvents ++ filteredEventsList :+ finalEvent + rpr } +} - private def getCromwellPollIntervalEvent(metadata: Map[String, AnyRef], eventsList: Seq[ExecutionEvent]) = { - { - val jesReportedEndTime = eventIfExists("endTime", metadata, "cromwell poll interval") - val finalEventsListTime = if (eventsList.nonEmpty) Some(eventsList.last.offsetDateTime) else None - - (jesReportedEndTime, finalEventsListTime) match { - case (Some(jesEndTime), Some(finalEventTime)) => - if (jesEndTime.offsetDateTime isAfter finalEventTime) jesEndTime else jesEndTime.copy(offsetDateTime = finalEventTime) - case (Some(jesEndTime), None) => jesEndTime - case (None, Some(finalEventTime)) => ExecutionEvent("cromwell poll interval", finalEventTime) - case (None, None) => - throw new IllegalArgumentException("Both jesReportedEndTime and finalEventsListTime were None.") - } - } - } +final case class Run(job: StandardAsyncJob, genomicsInterface: Genomics) { - private def eventIfExists(name: String, metadata: Map[String, AnyRef], eventName: String): Option[ExecutionEvent] = { - metadata.get(name) map { - case time => ExecutionEvent(eventName, OffsetDateTime.parse(time.toString)) - } - } + def getOperationCommand = genomicsInterface.operations().get(job.jobId) def abort(): Unit = { val cancellationRequest: CancelOperationRequest = new CancelOperationRequest() - genomicsInterface.operations().cancel(runId, cancellationRequest).execute + genomicsInterface.operations().cancel(job.jobId, cancellationRequest).execute + () } } diff --git a/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/RunStatus.scala b/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/RunStatus.scala index 7a2e31b77..7cbe16038 100644 --- a/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/RunStatus.scala +++ b/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/RunStatus.scala @@ -2,6 +2,8 @@ package cromwell.backend.impl.jes import cromwell.core.ExecutionEvent +import scala.util.Try + sealed trait RunStatus { import RunStatus._ @@ -23,13 +25,57 @@ object RunStatus { def instanceName: Option[String] } - case class Success(eventList: Seq[ExecutionEvent], machineType: Option[String], zone: Option[String], instanceName: Option[String]) extends TerminalRunStatus { + sealed trait UnsuccessfulRunStatus extends TerminalRunStatus { + val errorMessage: Option[String] + lazy val prettyPrintedError: String = errorMessage map { e => s" Message: $e" } getOrElse "" + val errorCode: Int + + /** + * If one exists, the JES error code (not the google RPC) (extracted from the error message) + */ + val jesCode: Option[Int] + } + + case class Success(eventList: Seq[ExecutionEvent], + machineType: Option[String], + zone: Option[String], + instanceName: Option[String]) extends TerminalRunStatus { override def toString = "Success" } - final case class Failed(errorCode: Int, errorMessage: Option[String], eventList: Seq[ExecutionEvent], machineType: Option[String], zone: Option[String], instanceName: Option[String]) - extends TerminalRunStatus { - // Don't want to include errorMessage or code in the snappy status toString: + object UnsuccessfulRunStatus { + def apply(errorCode: Int, + errorMessage: Option[String], + eventList: Seq[ExecutionEvent], + machineType: Option[String], + zone: Option[String], + instanceName: Option[String]): UnsuccessfulRunStatus = { + val jesCode: Option[Int] = errorMessage flatMap { em => Try(em.substring(0, em.indexOf(':')).toInt).toOption } + if (errorCode == JesAsyncBackendJobExecutionActor.GoogleAbortedRpc && jesCode.contains(JesAsyncBackendJobExecutionActor.JesPreemption)) { + Preempted(errorCode, jesCode, errorMessage, eventList, machineType, zone, instanceName) + } else { + Failed(errorCode, jesCode, errorMessage, eventList, machineType, zone, instanceName) + } + } + } + + final case class Failed(errorCode: Int, + jesCode: Option[Int], + errorMessage: Option[String], + eventList: Seq[ExecutionEvent], + machineType: Option[String], + zone: Option[String], + instanceName: Option[String]) extends UnsuccessfulRunStatus { override def toString = "Failed" } + + final case class Preempted(errorCode: Int, + jesCode: Option[Int], + errorMessage: Option[String], + eventList: Seq[ExecutionEvent], + machineType: Option[String], + zone: Option[String], + instanceName: Option[String]) extends UnsuccessfulRunStatus { + override def toString = "Preempted" + } } diff --git a/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/authentication/JesAuths.scala b/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/authentication/JesAuths.scala new file mode 100644 index 000000000..bb6b048c4 --- /dev/null +++ b/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/authentication/JesAuths.scala @@ -0,0 +1,5 @@ +package cromwell.backend.impl.jes.authentication + +import cromwell.filesystems.gcs.auth.GoogleAuthMode + +case class JesAuths(genomics: GoogleAuthMode, gcs: GoogleAuthMode) diff --git a/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/authentication/JesCredentials.scala b/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/authentication/JesCredentials.scala deleted file mode 100644 index b4316fde3..000000000 --- a/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/authentication/JesCredentials.scala +++ /dev/null @@ -1,5 +0,0 @@ -package cromwell.backend.impl.jes.authentication - -import com.google.api.client.auth.oauth2.Credential - -case class JesCredentials(genomicsCredential: Credential, gcsCredential: Credential) diff --git a/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/authentication/JesVMAuthentication.scala b/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/authentication/JesVMAuthentication.scala index 67ddd1df3..0f6a259a3 100644 --- a/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/authentication/JesVMAuthentication.scala +++ b/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/authentication/JesVMAuthentication.scala @@ -1,14 +1,14 @@ package cromwell.backend.impl.jes.authentication import cromwell.core.DockerCredentials -import cromwell.filesystems.gcs.ClientSecrets +import cromwell.filesystems.gcs.auth.ClientSecrets import spray.json.{JsString, JsValue} /** - * Interface for Authentication information that can be included in the json file uploaded to GCS + * Interface for Authentication information that can be included as a json object in the file uploaded to GCS * upon workflow creation and used in the VM. */ -sealed trait JesAuthInformation { +sealed trait JesAuthObject { def context: String def map: Map[String, JsValue] @@ -18,7 +18,7 @@ sealed trait JesAuthInformation { /** * Authentication information for data (de)localization as the user. */ -case class GcsLocalizing(clientSecrets: ClientSecrets, token: String) extends JesAuthInformation { +case class GcsLocalizing(clientSecrets: ClientSecrets, token: String) extends JesAuthObject { override val context = "boto" override val map = Map( "client_id" -> JsString(clientSecrets.clientId), @@ -35,7 +35,7 @@ object JesDockerCredentials { /** * Authentication information to pull docker images as the user. */ -class JesDockerCredentials(account: String, token: String) extends DockerCredentials(account, token) with JesAuthInformation { +class JesDockerCredentials(account: String, token: String) extends DockerCredentials(account, token) with JesAuthObject { override val context = "docker" override val map = Map( "account" -> JsString(account), diff --git a/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/callcaching/JesBackendCacheHitCopyingActor.scala b/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/callcaching/JesBackendCacheHitCopyingActor.scala new file mode 100644 index 000000000..d7ded8fc1 --- /dev/null +++ b/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/callcaching/JesBackendCacheHitCopyingActor.scala @@ -0,0 +1,65 @@ +package cromwell.backend.impl.jes.callcaching + +import com.google.cloud.storage.contrib.nio.CloudStorageOptions +import cromwell.backend.BackendInitializationData +import cromwell.backend.impl.jes.JesBackendInitializationData +import cromwell.backend.io.JobPaths +import cromwell.backend.standard.callcaching.{StandardCacheHitCopyingActor, StandardCacheHitCopyingActorParams} +import cromwell.core.CallOutputs +import cromwell.core.io.{IoCommand, IoTouchCommand} +import cromwell.core.path.Path +import cromwell.core.simpleton.{WdlValueBuilder, WdlValueSimpleton} +import cromwell.filesystems.gcs.batch.GcsBatchCommandBuilder +import lenthall.util.TryUtil +import wdl4s.wdl.values.WdlFile + +import scala.language.postfixOps +import scala.util.Try + +class JesBackendCacheHitCopyingActor(standardParams: StandardCacheHitCopyingActorParams) extends StandardCacheHitCopyingActor(standardParams) with GcsBatchCommandBuilder { + + private val cachingStrategy = BackendInitializationData + .as[JesBackendInitializationData](standardParams.backendInitializationDataOption) + .jesConfiguration.jesAttributes.duplicationStrategy + + override def processSimpletons(wdlValueSimpletons: Seq[WdlValueSimpleton], sourceCallRootPath: Path) = cachingStrategy match { + case CopyCachedOutputs => super.processSimpletons(wdlValueSimpletons, sourceCallRootPath) + case UseOriginalCachedOutputs => + val touchCommands: Seq[Try[IoTouchCommand]] = wdlValueSimpletons collect { + case WdlValueSimpleton(_, wdlFile: WdlFile) => getPath(wdlFile.value) map touchCommand + } + + TryUtil.sequence(touchCommands) map { + WdlValueBuilder.toJobOutputs(jobDescriptor.call.task.outputs, wdlValueSimpletons) -> _.toSet + } + } + + override def processDetritus(sourceJobDetritusFiles: Map[String, String]) = cachingStrategy match { + case CopyCachedOutputs => super.processDetritus(sourceJobDetritusFiles) + case UseOriginalCachedOutputs => + // apply getPath on each detritus string file + val detritusAsPaths = detritusFileKeys(sourceJobDetritusFiles).toSeq map { key => + key -> getPath(sourceJobDetritusFiles(key)) + } toMap + + // Don't forget to re-add the CallRootPathKey that has been filtered out by detritusFileKeys + TryUtil.sequenceMap(detritusAsPaths, "Failed to make paths out of job detritus") map { newDetritus => + (newDetritus + (JobPaths.CallRootPathKey -> destinationCallRootPath)) -> newDetritus.values.map(touchCommand).toSet + } + } + + override protected def additionalIoCommands(sourceCallRootPath: Path, + originalSimpletons: Seq[WdlValueSimpleton], + newOutputs: CallOutputs, + originalDetritus: Map[String, String], + newDetritus: Map[String, Path]): List[Set[IoCommand[_]]] = { + val content = + s""" + |This directory does not contain any output files because this job matched an identical job that was previously run, thus it was a cache-hit. + |Cromwell is configured to not copy outputs during call caching. To change this, edit the filesystems.gcs.caching.duplication-strategy field in your backend configuration. + |The original outputs can be found at this location: ${sourceCallRootPath.pathAsString} + """.stripMargin + + List(Set(writeCommand(jobPaths.callExecutionRoot / "call_caching_placeholder.txt", content, Seq(CloudStorageOptions.withMimeType("text/plain"))))) + } +} diff --git a/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/callcaching/JesBackendFileHashing.scala b/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/callcaching/JesBackendFileHashing.scala deleted file mode 100644 index 5c7cf4a8a..000000000 --- a/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/callcaching/JesBackendFileHashing.scala +++ /dev/null @@ -1,21 +0,0 @@ -package cromwell.backend.impl.jes.callcaching - -import akka.event.LoggingAdapter -import cromwell.backend.callcaching.FileHashingActor.SingleFileHashRequest -import cromwell.backend.impl.jes.JesBackendInitializationData - -import scala.util.{Failure, Try} - -private[jes] object JesBackendFileHashing { - def getCrc32c(singleFileHashRequest: SingleFileHashRequest, log: LoggingAdapter): Try[String] = { - def usingJesInitData(jesInitData: JesBackendInitializationData) = for { - path <- Try(jesInitData.workflowPaths.gcsFileSystem.getPath(singleFileHashRequest.file.valueString)) - crc32c <- Try(jesInitData.workflowPaths.gcsFileSystemProvider.crc32cHash(path)) - } yield crc32c - - singleFileHashRequest.initializationData match { - case Some(jesInitData: JesBackendInitializationData) => usingJesInitData(jesInitData) - case _ => Failure(new IllegalArgumentException("Need JesBackendInitializationData to generate a GCS CRC32C hash")) - } - } -} diff --git a/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/callcaching/JesBackendFileHashingActor.scala b/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/callcaching/JesBackendFileHashingActor.scala new file mode 100644 index 000000000..af65a07d8 --- /dev/null +++ b/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/callcaching/JesBackendFileHashingActor.scala @@ -0,0 +1,6 @@ +package cromwell.backend.impl.jes.callcaching + +import cromwell.backend.standard.callcaching.{StandardFileHashingActor, StandardFileHashingActorParams} +import cromwell.filesystems.gcs.batch.GcsBatchCommandBuilder + +class JesBackendFileHashingActor(standardParams: StandardFileHashingActorParams) extends StandardFileHashingActor(standardParams) with GcsBatchCommandBuilder diff --git a/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/callcaching/JesCacheHitDuplicationStrategy.scala b/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/callcaching/JesCacheHitDuplicationStrategy.scala new file mode 100644 index 000000000..9713e9a04 --- /dev/null +++ b/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/callcaching/JesCacheHitDuplicationStrategy.scala @@ -0,0 +1,6 @@ +package cromwell.backend.impl.jes.callcaching + +sealed trait JesCacheHitDuplicationStrategy + +case object CopyCachedOutputs extends JesCacheHitDuplicationStrategy +case object UseOriginalCachedOutputs extends JesCacheHitDuplicationStrategy diff --git a/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/errors/JesKnownJobFailure.scala b/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/errors/JesKnownJobFailure.scala new file mode 100644 index 000000000..dca8b2595 --- /dev/null +++ b/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/errors/JesKnownJobFailure.scala @@ -0,0 +1,24 @@ +package cromwell.backend.impl.jes.errors + +import cromwell.backend.async.KnownJobFailureException +import cromwell.core.path.Path + +sealed trait JesKnownJobFailure extends KnownJobFailureException + +case class FailedToDelocalizeFailure(message: String, jobTag: String, stderrPath: Option[Path]) + extends JesKnownJobFailure { + lazy val stderrMessage = stderrPath map { p => + s"3) Look into the stderr (${p.pathAsString}) file for evidence that some of the output files the command is expected to create were not created." + } getOrElse "" + + lazy val missingFilesMessage = if (message.contains("No URLs matched")) { + s"""It appears that some of the expected output files for task $jobTag did not exist when the command exited. + |A few things to try + |1) Check that the output section in your WDL is correct. Remember that all output files declared in a task must exist when the command exits. + |2) Check that the return code is available and is valid with respect to your command expected exit code + |$stderrMessage + """.stripMargin + } else "" + + override def getMessage = missingFilesMessage + message +} diff --git a/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/io/JesAttachedDisk.scala b/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/io/JesAttachedDisk.scala index 32b558b53..d4d1f2397 100644 --- a/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/io/JesAttachedDisk.scala +++ b/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/io/JesAttachedDisk.scala @@ -1,60 +1,62 @@ package cromwell.backend.impl.jes.io -import java.nio.file.{Path, Paths} - +import cats.data.Validated._ +import cats.syntax.cartesian._ +import cats.syntax.validated._ import com.google.api.services.genomics.model.Disk -import cromwell.core.ErrorOr -import wdl4s.ExceptionWithErrors -import wdl4s.values._ +import cromwell.core.path.{DefaultPathBuilder, Path} +import lenthall.exception.MessageAggregation +import lenthall.validation.ErrorOr._ +import wdl4s.wdl.values._ import scala.util.Try -import scalaz.Scalaz._ -import scalaz._ +import scala.util.matching.Regex + object JesAttachedDisk { val Identifier = "[a-zA-Z0-9-_]+" val Directory = """/[^\s]+""" val Integer = "[1-9][0-9]*" - val WorkingDiskPattern = s"""${JesWorkingDisk.Name}\\s+($Integer)\\s+($Identifier)""".r - val MountedDiskPattern = s"""($Directory)\\s+($Integer)\\s+($Identifier)""".r + val WorkingDiskPattern: Regex = s"""${JesWorkingDisk.Name}\\s+($Integer)\\s+($Identifier)""".r + val MountedDiskPattern: Regex = s"""($Directory)\\s+($Integer)\\s+($Identifier)""".r def parse(s: String): Try[JesAttachedDisk] = { - val validation = s match { + val validation: ErrorOr[JesAttachedDisk] = s match { case WorkingDiskPattern(sizeGb, diskType) => - (validateLong(sizeGb) |@| validateDiskType(diskType)) { (s, dt) => + (validateLong(sizeGb) |@| validateDiskType(diskType)) map { (s, dt) => JesWorkingDisk(dt, s.toInt) } case MountedDiskPattern(mountPoint, sizeGb, diskType) => - (validateLong(sizeGb) |@| validateDiskType(diskType)) { (s, dt) => - JesEmptyMountedDisk(dt, s.toInt, Paths.get(mountPoint)) + (validateLong(sizeGb) |@| validateDiskType(diskType)) map { (s, dt) => + JesEmptyMountedDisk(dt, s.toInt, DefaultPathBuilder.get(mountPoint)) } - case _ => s"Disk strings should be of the format 'local-disk SIZE TYPE' or '/mount/point SIZE TYPE'".failureNel + case _ => s"Disk strings should be of the format 'local-disk SIZE TYPE' or '/mount/point SIZE TYPE'".invalidNel } Try(validation match { - case Success(localDisk) => localDisk - case Failure(nels) => - throw new UnsupportedOperationException with ExceptionWithErrors { - val message = "" - val errors = nels + case Valid(localDisk) => localDisk + case Invalid(nels) => + throw new UnsupportedOperationException with MessageAggregation { + val exceptionContext = "" + val errorMessages: List[String] = nels.toList } }) } private def validateDiskType(diskTypeName: String): ErrorOr[DiskType] = { DiskType.values().find(_.diskTypeName == diskTypeName) match { - case Some(diskType) => diskType.successNel[String] + case Some(diskType) => diskType.validNel case None => val diskTypeNames = DiskType.values.map(_.diskTypeName).mkString(", ") - s"Disk TYPE $diskTypeName should be one of $diskTypeNames".failureNel + s"Disk TYPE $diskTypeName should be one of $diskTypeNames".invalidNel } } private def validateLong(value: String): ErrorOr[Long] = { try { - value.toLong.successNel + value.toLong.validNel } catch { - case _: IllegalArgumentException => s"$value not convertible to a Long".failureNel[Long] + case _: IllegalArgumentException => s"$value not convertible to a Long".invalidNel } } } @@ -69,18 +71,19 @@ trait JesAttachedDisk { .setType(diskType.googleTypeName) .setAutoDelete(true) .setSizeGb(sizeGb) - .setMountPoint(mountPoint.toAbsolutePath.toString) + .setMountPoint(mountPoint.toAbsolutePath.pathAsString) } } case class JesEmptyMountedDisk(diskType: DiskType, sizeGb: Int, mountPoint: Path) extends JesAttachedDisk { - val name = s"d-${mountPoint.toString.md5Sum}" + val name = s"d-${mountPoint.pathAsString.md5Sum}" override def toString: String = s"$mountPoint $sizeGb ${diskType.diskTypeName}" } object JesWorkingDisk { - val MountPoint = Paths.get("/cromwell_root") + val MountPoint: Path = DefaultPathBuilder.get("/cromwell_root") val Name = "local-disk" + val Default = JesWorkingDisk(DiskType.SSD, 10) } case class JesWorkingDisk(diskType: DiskType, sizeGb: Int) extends JesAttachedDisk { diff --git a/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/io/package.scala b/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/io/package.scala index a4a2b0ba8..592add17c 100644 --- a/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/io/package.scala +++ b/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/io/package.scala @@ -1,23 +1,18 @@ package cromwell.backend.impl.jes -import java.nio.file.{Files, Path} - import com.google.api.client.http.HttpResponseException -import cromwell.backend.BackendWorkflowDescriptor -import cromwell.backend.impl.jes.JesImplicits.GoogleAuthWorkflowOptions -import cromwell.filesystems.gcs._ +import com.google.cloud.storage.contrib.nio.CloudStorageOptions +import cromwell.core.path.Path package object io { implicit class PathEnhanced(val path: Path) extends AnyVal { - import better.files._ - def hash = path match { - case gcs: NioGcsPath => gcs.getFileSystem.provider().asInstanceOf[GcsFileSystemProvider].crc32cHash(gcs) - case _ => File(path).md5 + def writeAsJson(content: String): Path = { + path.writeBytes(content.getBytes.toIterator)(Seq(CloudStorageOptions.withMimeType("application/json"))) } - def writeAsJson(content: String): File = { - Files.write(path, content.getBytes, ContentTypeOption.Json) + def writeAsText(content: String): Path = { + path.writeBytes(content.getBytes.toIterator)(Seq(CloudStorageOptions.withMimeType("text/plain"))) } } diff --git a/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/statuspolling/JesApiQueryManager.scala b/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/statuspolling/JesApiQueryManager.scala new file mode 100644 index 000000000..dd65d8225 --- /dev/null +++ b/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/statuspolling/JesApiQueryManager.scala @@ -0,0 +1,183 @@ +package cromwell.backend.impl.jes.statuspolling + +import java.io.IOException + +import akka.actor.{Actor, ActorLogging, ActorRef, Props, Terminated} +import cats.data.NonEmptyList +import com.google.api.client.googleapis.json.GoogleJsonError +import com.google.api.client.http.HttpHeaders +import com.google.api.services.genomics.Genomics +import com.google.api.services.genomics.model.RunPipelineRequest +import cromwell.backend.impl.jes.Run +import cromwell.backend.impl.jes.statuspolling.JesApiQueryManager._ +import cromwell.core.CromwellFatalExceptionMarker +import cromwell.core.Dispatcher.BackendDispatcher +import cromwell.core.retry.{Backoff, SimpleExponentialBackoff} +import cromwell.util.StopAndLogSupervisor +import eu.timepit.refined.api.Refined +import eu.timepit.refined.numeric._ + +import scala.concurrent.duration._ +import scala.collection.immutable.Queue + +/** + * Holds a set of JES API requests until a JesQueryActor pulls the work. + */ +class JesApiQueryManager(val qps: Int Refined Positive) extends Actor with ActorLogging with StopAndLogSupervisor { + + private implicit val ec = context.dispatcher + private val maxRetries = 10 + + // workQueue is protected for the unit tests, not intended to be generally overridden + protected[statuspolling] var workQueue: Queue[JesApiQuery] = Queue.empty + private var workInProgress: Map[ActorRef, JesPollingWorkBatch] = Map.empty + + private def statusPollerProps = JesPollingActor.props(self, qps) + + // statusPoller is protected for the unit tests, not intended to be generally overridden + protected[statuspolling] var statusPoller: ActorRef = _ + + resetWorker() + + override def receive = { + case DoPoll(run) => workQueue :+= JesStatusPollQuery(sender, run) + case DoCreateRun(genomics, rpr) => + workQueue :+= JesRunCreationQuery(sender, genomics, rpr) + case q: JesApiQuery => workQueue :+= q + case RequestJesPollingWork(maxBatchSize) => + log.debug("Request for JES Polling Work received (max batch: {}, current queue size is {})", maxBatchSize, workQueue.size) + handleJesPollingRequest(sender, maxBatchSize) + case failure: JesApiQueryFailed => handleQueryFailure(failure) + case Terminated(actorRef) => handleTerminated(actorRef) + case other => log.error(s"Unexpected message to JesPollingManager: $other") + } + + private def handleQueryFailure(failure: JesApiQueryFailed) = if (failure.query.failedAttempts < maxRetries) { + val nextRequest = failure.query.withFailedAttempt + val delay = nextRequest.backoff.backoffMillis.millis + context.system.scheduler.scheduleOnce(delay, self, nextRequest) + () + } else { + failure.query.requester ! failure + } + + private def handleJesPollingRequest(workPullingJesPollingActor: ActorRef, maxBatchSize: Int) = { + workInProgress -= workPullingJesPollingActor + val beheaded = beheadWorkQueue(maxBatchSize) + beheaded.workToDo match { + case Some(work) => + log.debug("Sending work to JES API query manager.") + val workBatch = JesPollingWorkBatch(work) + workPullingJesPollingActor ! workBatch + workInProgress += (workPullingJesPollingActor -> workBatch) + case None => + log.debug("No work for JES poller") + workPullingJesPollingActor ! NoWorkToDo + } + + workQueue = beheaded.newWorkQueue + } + + // Intentionally not final, this runs afoul of SI-4440 (I believe) + private case class BeheadedWorkQueue(workToDo: Option[NonEmptyList[JesApiQuery]], newWorkQueue: Queue[JesApiQuery]) + private def beheadWorkQueue(maxBatchSize: Int): BeheadedWorkQueue = { + + val head = workQueue.take(maxBatchSize).toList + val tail = workQueue.drop(maxBatchSize) + + head match { + case h :: t => BeheadedWorkQueue(Option(NonEmptyList(h, t)), tail) + case Nil => BeheadedWorkQueue(None, Queue.empty) + } + } + + private def handleTerminated(terminee: ActorRef) = { + val cause = getFailureCause(terminee).getOrElse(new RuntimeException("No failure reason recorded")) + // We assume this is a polling actor. Might change in a future update: + workInProgress.get(terminee) match { + case Some(work) => + // Most likely due to an unexpected HTTP error, push the work back on the queue and keep going + log.error(s"The JES API worker actor $terminee unexpectedly terminated while conducting ${work.workBatch.tail.size + 1} polls. Making a new one...") + workInProgress -= terminee + work.workBatch.toList.foreach { + case statusQuery: JesStatusPollQuery => + self ! JesApiStatusQueryFailed(statusQuery, new JesApiException(cause)) + case runCreationQuery: JesRunCreationQuery => + self ! JesApiRunCreationQueryFailed(runCreationQuery, new JesApiException(cause)) + } + case None => + // It managed to die while doing absolutely nothing...!? + // Maybe it deserves an entry in https://en.wikipedia.org/wiki/List_of_unusual_deaths + // Oh well, in the mean time don't do anything, just start a new one + log.error(cause, s"The JES API worker actor managed to unexpectedly terminate whilst doing absolutely nothing (${cause.getMessage}). This is probably a programming error. Making a new one...") + } + + resetWorker() + } + + private def resetWorker() = { + statusPoller = makeWorkerActor() + context.watch(statusPoller) + log.info(s"watching $statusPoller") + } + + // Separate method to allow overriding in tests: + private[statuspolling] def makeWorkerActor(): ActorRef = context.actorOf(statusPollerProps) +} + +object JesApiQueryManager { + + def props(qps: Int Refined Positive): Props = Props(new JesApiQueryManager(qps)).withDispatcher(BackendDispatcher) + + sealed trait JesApiQueryManagerRequest + + /** + * Poll the job represented by the Run. + */ + final case class DoPoll(run: Run) extends JesApiQueryManagerRequest + + /** + * Create an ephemeral pipeline and run it in JES. + */ + final case class DoCreateRun(genomicsInterface: Genomics, rpr: RunPipelineRequest) extends JesApiQueryManagerRequest + + private[statuspolling] trait JesApiQuery { + val failedAttempts: Int + def requester: ActorRef + def genomicsInterface: Genomics + def withFailedAttempt: JesApiQuery + def backoff: Backoff + } + private object JesApiQuery { + // This must be a def, we want a new one each time (they're mutable! Boo!) + def backoff: Backoff = SimpleExponentialBackoff(1.second, 1000.seconds, 1.5d) + } + private[statuspolling] final case class JesStatusPollQuery(requester: ActorRef, run: Run, failedAttempts: Int = 0, backoff: Backoff = JesApiQuery.backoff) extends JesApiQuery { + override val genomicsInterface = run.genomicsInterface + override def withFailedAttempt = this.copy(failedAttempts = failedAttempts + 1, backoff = backoff.next) + } + private[statuspolling] final case class JesRunCreationQuery(requester: ActorRef, genomicsInterface: Genomics, rpr: RunPipelineRequest, failedAttempts: Int = 0, backoff: Backoff = JesApiQuery.backoff) extends JesApiQuery { + override def withFailedAttempt = this.copy(failedAttempts = failedAttempts + 1, backoff = backoff.next) + } + + trait JesApiQueryFailed { + val query: JesApiQuery + val cause: JesApiException + } + + final case class JesApiStatusQueryFailed(query: JesApiQuery, cause: JesApiException) extends JesApiQueryFailed + final case class JesApiRunCreationQueryFailed(query: JesApiQuery, cause: JesApiException) extends JesApiQueryFailed + + private[statuspolling] final case class JesPollingWorkBatch(workBatch: NonEmptyList[JesApiQuery]) + private[statuspolling] case object NoWorkToDo + + private[statuspolling] final case class RequestJesPollingWork(maxBatchSize: Int) + + final case class GoogleJsonException(e: GoogleJsonError, responseHeaders: HttpHeaders) extends IOException with CromwellFatalExceptionMarker { + override def getMessage: String = e.getMessage + } + + final class JesApiException(e: Throwable) extends RuntimeException(e) with CromwellFatalExceptionMarker { + override def getMessage: String = "Unable to complete JES Api Request" + } +} diff --git a/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/statuspolling/JesPollingActor.scala b/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/statuspolling/JesPollingActor.scala new file mode 100644 index 000000000..0d953d251 --- /dev/null +++ b/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/statuspolling/JesPollingActor.scala @@ -0,0 +1,116 @@ +package cromwell.backend.impl.jes.statuspolling + +import akka.actor.{Actor, ActorLogging, ActorRef, Props} +import cats.data.NonEmptyList +import com.google.api.client.googleapis.batch.BatchRequest +import com.google.api.client.googleapis.json.GoogleJsonError +import com.google.api.services.genomics.Genomics +import cromwell.backend.impl.jes.statuspolling.JesApiQueryManager._ +import cromwell.backend.impl.jes.statuspolling.JesPollingActor._ +import cromwell.core.Dispatcher.BackendDispatcher +import eu.timepit.refined.api.Refined +import eu.timepit.refined.numeric._ + +import scala.concurrent.{ExecutionContext, Future} +import scala.util.{Failure, Success, Try} +import scala.concurrent.duration._ +import scala.collection.JavaConverters._ + +/** + * Sends batched requests to JES as a worker to the JesApiQueryManager + */ +class JesPollingActor(val pollingManager: ActorRef, val qps: Int Refined Positive) extends Actor with ActorLogging + with StatusPolling with RunCreation { + // The interval to delay between submitting each batch + lazy val batchInterval = determineBatchInterval(qps) + log.debug("JES batch polling interval is {}", batchInterval) + + self ! NoWorkToDo // Starts the check-for-work cycle when the actor is fully initialized. + + implicit val ec: ExecutionContext = context.dispatcher + + override def receive = { + + case JesPollingWorkBatch(workBatch) => + log.debug(s"Got a polling batch with ${workBatch.tail.size + 1} requests.") + handleBatch(workBatch).andThen(interstitialRecombobulation) + () + case NoWorkToDo => + scheduleCheckForWork() + } + + private def handleBatch(workBatch: NonEmptyList[JesApiQuery]): Future[List[Try[Unit]]] = { + // Assume that the auth for the first element is also good enough for everything else: + val batch: BatchRequest = createBatch(workBatch.head.genomicsInterface) + + // Create the batch: + // WARNING: These call change 'batch' as a side effect. Things might go awry if map runs items in parallel? + val batchFutures = workBatch map { + case pollingRequest: JesStatusPollQuery => enqueueStatusPollInBatch(pollingRequest, batch) + case runCreationRequest: JesRunCreationQuery => enqueueRunCreationInBatch(runCreationRequest, batch) + + // We do the "successful Failure" thing so that the Future.sequence doesn't short-out immediately when the first one fails. + case other => Future.successful(Failure(new RuntimeException(s"Cannot handle ${other.getClass.getSimpleName} requests"))) + } + + // Execute the batch and return the map: + runBatch(batch) + Future.sequence(batchFutures.toList) + } + + // These are separate functions so that the tests can hook in and replace the JES-side stuff + private[statuspolling] def createBatch(genomicsInterface: Genomics): BatchRequest = genomicsInterface.batch() + private[statuspolling] def runBatch(batch: BatchRequest) = batch.execute() + + // TODO: FSMify this actor? + private def interstitialRecombobulation: PartialFunction[Try[List[Try[Unit]]], Unit] = { + case Success(allSuccesses) if allSuccesses.forall(_.isSuccess) => + log.debug(s"All status polls completed successfully.") + scheduleCheckForWork() + case Success(someFailures) => + val errors = someFailures collect { case Failure(t) => t.getMessage } + if (log.isDebugEnabled) { + log.warning("{} failures (from {} requests) fetching JES statuses", errors.size, someFailures.size) + } else { + log.warning("{} failures (from {} requests) fetching JES statuses: {}", errors.size, someFailures.size, errors.mkString(", ")) + } + scheduleCheckForWork() + case Failure(t) => + // NB: Should be impossible since we only ever do completionPromise.trySuccess() + val msg = "Programmer Error: Completion promise unexpectedly set to Failure: {}. Don't do this, otherwise the Future.sequence is short-circuited on the first failure" + log.error(msg, t.getMessage) + scheduleCheckForWork() + } + + /** + * Schedules a check for work. + * Warning: Only use this from inside a receive method. + */ + private def scheduleCheckForWork(): Unit = { + context.system.scheduler.scheduleOnce(batchInterval) { pollingManager ! JesApiQueryManager.RequestJesPollingWork(MaxBatchSize) } + () + } + + private[statuspolling] def mkErrorString(e: GoogleJsonError) = e.getErrors.asScala.toList.mkString(", ") +} + +object JesPollingActor { + def props(pollingManager: ActorRef, qps: Int Refined Positive) = Props(new JesPollingActor(pollingManager, qps)).withDispatcher(BackendDispatcher) + + // The Batch API limits us to 100 at a time + val MaxBatchSize = 100 + + /** + * Given the Genomics API queries per 100 seconds and given MaxBatchSize will determine a batch interval which + * is at 90% of the quota. The (still crude) delta is to provide some room at the edges for things like new + * calls, etc. + * + * Forcing the minimum value to be 1 second, for now it seems unlikely to matter and it makes testing a bit + * easier + */ + def determineBatchInterval(qps: Int Refined Positive): FiniteDuration = { + val maxInterval = MaxBatchSize / qps.value + val interval = Math.max(maxInterval / 0.9, 1).toInt + interval.seconds + } +} diff --git a/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/statuspolling/JesRunCreationClient.scala b/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/statuspolling/JesRunCreationClient.scala new file mode 100644 index 000000000..89785a74c --- /dev/null +++ b/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/statuspolling/JesRunCreationClient.scala @@ -0,0 +1,46 @@ +package cromwell.backend.impl.jes.statuspolling + +import akka.actor.{Actor, ActorLogging, ActorRef} +import com.google.api.services.genomics.Genomics +import com.google.api.services.genomics.model.RunPipelineRequest +import cromwell.backend.impl.jes.statuspolling.JesApiQueryManager.{JesApiRunCreationQueryFailed} +import cromwell.backend.standard.StandardAsyncJob + +import scala.concurrent.{Future, Promise} +import scala.util.{Failure, Success, Try} + +/** + * I'm putting this stuff in a mixin to avoid polluting the main class. + * + * Be sure to make the main class's receive look like: + * override def receive = runCreationClientReceive orElse { ... } + */ +trait JesRunCreationClient { this: Actor with ActorLogging => + + private var runCreationClientPromise: Option[Promise[StandardAsyncJob]] = None + + val pollingActor: ActorRef + + def runCreationClientReceive: Actor.Receive = { + case job: StandardAsyncJob => + completePromise(Success(job)) + case JesApiRunCreationQueryFailed(_, e) => completePromise(Failure(e)) + } + + private def completePromise(job: Try[StandardAsyncJob]) = { + runCreationClientPromise foreach { _.complete(job) } + runCreationClientPromise = None + } + + def runPipeline(genomicsInterface: Genomics, rpr: RunPipelineRequest): Future[StandardAsyncJob] = { + runCreationClientPromise match { + case Some(p) => + p.future + case None => + pollingActor ! JesApiQueryManager.DoCreateRun(genomicsInterface, rpr) + val newPromise = Promise[StandardAsyncJob]() + runCreationClientPromise = Option(newPromise) + newPromise.future + } + } +} diff --git a/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/statuspolling/JesStatusRequestClient.scala b/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/statuspolling/JesStatusRequestClient.scala new file mode 100644 index 000000000..3ddff56ea --- /dev/null +++ b/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/statuspolling/JesStatusRequestClient.scala @@ -0,0 +1,46 @@ +package cromwell.backend.impl.jes.statuspolling + +import akka.actor.{Actor, ActorLogging, ActorRef} +import cromwell.backend.impl.jes.statuspolling.JesApiQueryManager.{JesApiStatusQueryFailed} +import cromwell.backend.impl.jes.{Run, RunStatus} + +import scala.concurrent.{Future, Promise} +import scala.util.{Failure, Success, Try} + +/** + * I'm putting this stuff in a mixin to avoid polluting the main class. + * + * Be sure to make the main class's receive look like: + * override def receive = pollingActorClientReceive orElse { ... } + */ +trait JesStatusRequestClient { this: Actor with ActorLogging => + + private var pollingActorClientPromise: Option[Promise[RunStatus]] = None + + val pollingActor: ActorRef + + def pollingActorClientReceive: Actor.Receive = { + case r: RunStatus => + log.debug(s"Polled status received: $r") + completePromise(Success(r)) + case JesApiStatusQueryFailed(_, e) => + log.debug("JES poll failed!") + completePromise(Failure(e)) + } + + private def completePromise(runStatus: Try[RunStatus]) = { + pollingActorClientPromise foreach { _.complete(runStatus) } + pollingActorClientPromise = None + } + + def pollStatus(run: Run): Future[RunStatus] = { + pollingActorClientPromise match { + case Some(p) => p.future + case None => + pollingActor ! JesApiQueryManager.DoPoll(run) + val newPromise = Promise[RunStatus]() + pollingActorClientPromise = Option(newPromise) + newPromise.future + } + } +} diff --git a/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/statuspolling/RunCreation.scala b/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/statuspolling/RunCreation.scala new file mode 100644 index 000000000..54759590b --- /dev/null +++ b/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/statuspolling/RunCreation.scala @@ -0,0 +1,42 @@ +package cromwell.backend.impl.jes.statuspolling + +import com.google.api.client.googleapis.batch.BatchRequest +import com.google.api.client.googleapis.batch.json.JsonBatchCallback +import com.google.api.client.googleapis.json.GoogleJsonError +import com.google.api.client.http.HttpHeaders +import com.google.api.services.genomics.Genomics +import com.google.api.services.genomics.model.{Operation, RunPipelineRequest} +import cromwell.backend.impl.jes.statuspolling.JesApiQueryManager._ +import cromwell.backend.standard.StandardAsyncJob + +import scala.concurrent.{Future, Promise} +import scala.util.{Failure, Success, Try} + +private[statuspolling] trait RunCreation { this: JesPollingActor => + + private def runCreationResultHandler(originalRequest: JesApiQuery, completionPromise: Promise[Try[Unit]]) = new JsonBatchCallback[Operation] { + override def onSuccess(operation: Operation, responseHeaders: HttpHeaders): Unit = { + originalRequest.requester ! getJob(operation) + completionPromise.trySuccess(Success(())) + () + } + + override def onFailure(e: GoogleJsonError, responseHeaders: HttpHeaders): Unit = { + pollingManager ! JesApiRunCreationQueryFailed(originalRequest, new JesApiException(GoogleJsonException(e, responseHeaders))) + completionPromise.trySuccess(Failure(new Exception(mkErrorString(e)))) + () + } + } + + def enqueueRunCreationInBatch(runCreationQuery: JesRunCreationQuery, batch: BatchRequest): Future[Try[Unit]] = { + val completionPromise = Promise[Try[Unit]]() + val resultHandler = runCreationResultHandler(runCreationQuery, completionPromise) + addRunCreationToBatch(runCreationQuery.rpr, runCreationQuery.genomicsInterface, batch, resultHandler) + completionPromise.future + } + + private def addRunCreationToBatch(rpr: RunPipelineRequest, genomicsInterface: Genomics, batch: BatchRequest, resultHandler: JsonBatchCallback[Operation]) = + genomicsInterface.pipelines().run(rpr).queue(batch, resultHandler) + + private def getJob(operation: Operation) = StandardAsyncJob(operation.getName) +} diff --git a/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/statuspolling/StatusPolling.scala b/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/statuspolling/StatusPolling.scala new file mode 100644 index 000000000..80291334f --- /dev/null +++ b/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/statuspolling/StatusPolling.scala @@ -0,0 +1,129 @@ +package cromwell.backend.impl.jes.statuspolling + +import java.time.OffsetDateTime +import java.util.{ArrayList => JArrayList} + +import com.google.api.client.util.{ArrayMap => GArrayMap} +import com.google.api.client.googleapis.batch.BatchRequest +import com.google.api.client.googleapis.batch.json.JsonBatchCallback +import com.google.api.client.googleapis.json.GoogleJsonError +import com.google.api.client.http.HttpHeaders +import com.google.api.services.genomics.model.Operation +import cromwell.backend.impl.jes.RunStatus._ +import cromwell.backend.impl.jes.{Run, RunStatus} +import cromwell.backend.impl.jes.statuspolling.JesApiQueryManager._ +import cromwell.core.ExecutionEvent + +import scala.language.postfixOps +import scala.collection.JavaConverters._ +import scala.concurrent.{Future, Promise} +import scala.util.{Failure, Try, Success => TrySuccess} + +private[statuspolling] trait StatusPolling { this: JesPollingActor => + + private def statusPollResultHandler(originalRequest: JesStatusPollQuery, completionPromise: Promise[Try[Unit]]) = new JsonBatchCallback[Operation] { + override def onSuccess(operation: Operation, responseHeaders: HttpHeaders): Unit = { + originalRequest.requester ! interpretOperationStatus(operation) + completionPromise.trySuccess(TrySuccess(())) + () + } + + override def onFailure(e: GoogleJsonError, responseHeaders: HttpHeaders): Unit = { + pollingManager ! JesApiStatusQueryFailed(originalRequest, new JesApiException(GoogleJsonException(e, responseHeaders))) + completionPromise.trySuccess(Failure(new Exception(mkErrorString(e)))) + () + } + } + + def enqueueStatusPollInBatch(pollingRequest: JesStatusPollQuery, batch: BatchRequest): Future[Try[Unit]] = { + val completionPromise = Promise[Try[Unit]]() + val resultHandler = statusPollResultHandler(pollingRequest, completionPromise) + addStatusPollToBatch(pollingRequest.run, batch, resultHandler) + completionPromise.future + } + + // Done so that the specs can override this behavior + def interpretOperationStatus(op: Operation) = StatusPolling.interpretOperationStatus(op) + + def addStatusPollToBatch(run: Run, batch: BatchRequest, resultHandler: JsonBatchCallback[Operation]) = + run.getOperationCommand.queue(batch, resultHandler) +} + +private[statuspolling] object StatusPolling { + + private val AcceptableEvents = Set("start", "pulling-image", "localizing-files", "running-docker", "delocalizing-files", "ok", "fail", "start-shutdown", "preempted") + + implicit class RunOperationExtension(val operation: Operation) extends AnyVal { + def hasStarted = operation.getMetadata.asScala.get("startTime") isDefined + } + + def interpretOperationStatus(op: Operation): RunStatus = { + require(op != null, "Operation must not be null.") + try { + if (op.getDone) { + lazy val eventList = getEventList(op) + lazy val computeEngineOption = for { + runtimeMetadata <- op.getMetadata.asScala.get("runtimeMetadata") + computeEngine <- runtimeMetadata.asInstanceOf[GArrayMap[String, Object]].asScala.get("computeEngine") + } yield computeEngine.asInstanceOf[GArrayMap[String, String]].asScala + lazy val machineType = computeEngineOption.flatMap(_.get("machineType")) + lazy val instanceName = computeEngineOption.flatMap(_.get("instanceName")) + lazy val zone = computeEngineOption.flatMap(_.get("zone")) + + // If there's an error, generate an unsuccessful status. Otherwise, we were successful! + Option(op.getError) match { + case Some(error) => UnsuccessfulRunStatus(error.getCode, Option(error.getMessage), eventList, machineType, zone, instanceName) + case None => Success(eventList, machineType, zone, instanceName) + } + } else if (op.hasStarted) { + Running + } else { + Initializing + } + } catch { + case npe: NullPointerException => + throw new RuntimeException(s"Caught NPE while processing operation ${op.getName}: $op", npe) + } + } + + def getEventList(op: Operation): Seq[ExecutionEvent] = { + val metadata = op.getMetadata.asScala.toMap + + val starterEvents: Seq[ExecutionEvent] = Seq( + eventIfExists("createTime", metadata, "waiting for quota"), + eventIfExists("startTime", metadata, "initializing VM")).flatten + + val eventsList = for { + events <- metadata.get("events").toSeq + entry <- events.asInstanceOf[JArrayList[GArrayMap[String, String]]].asScala + } yield ExecutionEvent(entry.get("description"), OffsetDateTime.parse(entry.get("startTime"))) + + val filteredEventsList: Seq[ExecutionEvent] = eventsList filter { i => AcceptableEvents.contains(i.name) } + + // A little bit ugly... the endTime of the jes operation can actually be before the final "event" time, due to differences + // in the reported precision. As a result, we have to make sure it all lines up nicely: + val finalEvent = getCromwellPollIntervalEvent(metadata, filteredEventsList) + + starterEvents ++ filteredEventsList :+ finalEvent + } + + private def getCromwellPollIntervalEvent(metadata: Map[String, AnyRef], eventsList: Seq[ExecutionEvent]) = { + { + val jesReportedEndTime = eventIfExists("endTime", metadata, "cromwell poll interval") + val finalEventsListTime = if (eventsList.nonEmpty) Some(eventsList.last.offsetDateTime) else None + + (jesReportedEndTime, finalEventsListTime) match { + case (Some(jesEndTime), Some(finalEventTime)) => + if (jesEndTime.offsetDateTime isAfter finalEventTime) jesEndTime else jesEndTime.copy(offsetDateTime = finalEventTime) + case (Some(jesEndTime), None) => jesEndTime + case (None, Some(finalEventTime)) => ExecutionEvent("cromwell poll interval", finalEventTime) + case (None, None) => + throw new IllegalArgumentException("Both jesReportedEndTime and finalEventsListTime were None.") + } + } + } + + private def eventIfExists(name: String, metadata: Map[String, AnyRef], eventName: String): Option[ExecutionEvent] = { + metadata.get(name) map { time => ExecutionEvent(eventName, OffsetDateTime.parse(time.toString)) } + } +} diff --git a/supportedBackends/jes/src/test/scala/cromwell/backend/impl/jes/GoogleLabelsSpec.scala b/supportedBackends/jes/src/test/scala/cromwell/backend/impl/jes/GoogleLabelsSpec.scala new file mode 100644 index 000000000..c2fb14444 --- /dev/null +++ b/supportedBackends/jes/src/test/scala/cromwell/backend/impl/jes/GoogleLabelsSpec.scala @@ -0,0 +1,37 @@ +package cromwell.backend.impl.jes + +import cats.data.Validated.{Invalid, Valid} +import cromwell.core.labels.Label +import org.scalatest.{FlatSpec, Matchers} + +class GoogleLabelsSpec extends FlatSpec with Matchers { + + behavior of "GoogleLabels" + + /** + * In the format 'to validate', 'expected result' + */ + val googleLabelConversions = List( + "11f2468c-39d6-4be3-85c8-32735c01e66b" -> "x--11f2468c-39d6-4be3-85c8-32735c01e66b", + "0-cromwell-root-workflow-id" -> "x--0-cromwell-root-workflow-id", + "cromwell-root-workflow-id-" -> "cromwell-root-workflow-id---x", + "0-cromwell-root-workflow-id-" -> "x--0-cromwell-root-workflow-id---x", + "Cromwell-root-workflow-id" -> "cromwell-root-workflow-id", + "cromwell_root_workflow_id" -> "cromwell-root-workflow-id", + "too-long-too-long-too-long-too-long-too-long-too-long-too-long-t" -> "too-long-too-long-too-long-too---g-too-long-too-long-too-long-t", + "0-too-long-and-invalid-too-long-and-invalid-too-long-and-invali+" -> "x--0-too-long-and-invalid-too----nvalid-too-long-and-invali---x" + ) + + googleLabelConversions foreach { case (label: String, conversion: String) => + it should s"not validate the bad label string '$label'" in { + Label.validateLabelRegex(label, GoogleLabels.GoogleLabelsRegexPattern.r) match { + case Invalid(_) => // Good! + case Valid(_) => fail(s"Label validation succeeded but should have failed.") + } + } + + it should s"convert the bad label string '$label' into the safe label string '$conversion'" in { + GoogleLabels.safeGoogleName(label) should be(conversion) + } + } +} diff --git a/supportedBackends/jes/src/test/scala/cromwell/backend/impl/jes/JesAsyncBackendJobExecutionActorSpec.scala b/supportedBackends/jes/src/test/scala/cromwell/backend/impl/jes/JesAsyncBackendJobExecutionActorSpec.scala index d2527df6e..4848e8546 100644 --- a/supportedBackends/jes/src/test/scala/cromwell/backend/impl/jes/JesAsyncBackendJobExecutionActorSpec.scala +++ b/supportedBackends/jes/src/test/scala/cromwell/backend/impl/jes/JesAsyncBackendJobExecutionActorSpec.scala @@ -1,51 +1,61 @@ package cromwell.backend.impl.jes -import java.nio.file.Paths import java.util.UUID import akka.actor.{ActorRef, Props} -import akka.event.LoggingAdapter -import akka.testkit.{ImplicitSender, TestActorRef, TestDuration} -import cromwell.backend.BackendJobExecutionActor.BackendJobExecutionResponse +import akka.testkit.{ImplicitSender, TestActorRef, TestDuration, TestProbe} +import com.google.cloud.NoCredentials +import cromwell.backend.BackendJobExecutionActor.{BackendJobExecutionResponse, JobFailedNonRetryableResponse, JobFailedRetryableResponse} +import cromwell.backend._ import cromwell.backend.async.AsyncBackendJobExecutionActor.{Execute, ExecutionMode} import cromwell.backend.async.{AbortedExecutionHandle, ExecutionHandle, FailedNonRetryableExecutionHandle, FailedRetryableExecutionHandle} import cromwell.backend.impl.jes.JesAsyncBackendJobExecutionActor.JesPendingExecutionHandle -import cromwell.backend.impl.jes.RunStatus.Failed +import cromwell.backend.impl.jes.RunStatus.UnsuccessfulRunStatus import cromwell.backend.impl.jes.io.{DiskType, JesWorkingDisk} -import cromwell.backend.{BackendConfigurationDescriptor, BackendJobDescriptor, BackendJobDescriptorKey, BackendWorkflowDescriptor, PreemptedException, RuntimeAttributeDefinition} +import cromwell.backend.impl.jes.statuspolling.JesApiQueryManager.DoPoll +import cromwell.backend.standard.{DefaultStandardAsyncExecutionActorParams, StandardAsyncExecutionActorParams, StandardAsyncJob, StandardExpressionFunctionsParams} +import cromwell.backend.wdl.WdlFileMapper import cromwell.core._ -import cromwell.core.logging.LoggerWrapper -import cromwell.filesystems.gcs._ +import cromwell.core.callcaching.NoDocker +import cromwell.core.labels.Labels +import cromwell.core.logging.JobLogger +import cromwell.core.path.{DefaultPathBuilder, PathBuilder} +import cromwell.filesystems.gcs.{GcsPath, GcsPathBuilder, GcsPathBuilderFactory} +import cromwell.services.keyvalue.InMemoryKvServiceActor +import cromwell.services.keyvalue.KeyValueServiceActor._ import cromwell.util.SampleWdl import org.scalatest._ -import cromwell.core.{WorkflowId, WorkflowOptions} -import cromwell.filesystems.gcs.GoogleAuthMode.GoogleAuthOptions import org.scalatest.prop.Tables.Table import org.slf4j.Logger import org.specs2.mock.Mockito import spray.json.{JsObject, JsValue} -import wdl4s.types.{WdlArrayType, WdlFileType, WdlMapType, WdlStringType} -import wdl4s.values.{WdlArray, WdlFile, WdlMap, WdlString, WdlValue} -import wdl4s.{Call, LocallyQualifiedName, NamespaceWithWorkflow} +import wdl4s.wdl._ +import wdl4s.wdl.types.{WdlArrayType, WdlFileType, WdlMapType, WdlStringType} +import wdl4s.wdl.values.{WdlArray, WdlFile, WdlMap, WdlString, WdlValue} import scala.concurrent.duration._ import scala.concurrent.{Await, ExecutionContext, Future, Promise} +import scala.language.postfixOps import scala.util.{Success, Try} -import cromwell.backend.impl.jes.MockObjects._ class JesAsyncBackendJobExecutionActorSpec extends TestKitSuite("JesAsyncBackendJobExecutionActorSpec") - with FlatSpecLike with Matchers with ImplicitSender with Mockito { + with FlatSpecLike with Matchers with ImplicitSender with Mockito with BackendSpec with BeforeAndAfter { + + val mockPathBuilder: GcsPathBuilder = GcsPathBuilder.fromCredentials(NoCredentials.getInstance(), + "test-cromwell", None, GcsPathBuilderFactory.DefaultCloudStorageConfiguration, WorkflowOptions.empty) + + var kvService: ActorRef = system.actorOf(Props(new InMemoryKvServiceActor)) import JesTestConfig._ - implicit val Timeout = 5.seconds.dilated + implicit val Timeout: FiniteDuration = 25.seconds.dilated - val YoSup = - """ + val YoSup: String = + s""" |task sup { | String addressee | command { - | echo "yo sup ${addressee}!" + | echo "yo sup $${addressee}!" | } | output { | String salutation = read_string(stdout()) @@ -56,7 +66,7 @@ class JesAsyncBackendJobExecutionActorSpec extends TestKitSuite("JesAsyncBackend | } |} | - |workflow sup { + |workflow wf_sup { | call sup |} """.stripMargin @@ -65,49 +75,61 @@ class JesAsyncBackendJobExecutionActorSpec extends TestKitSuite("JesAsyncBackend val NoOptions = WorkflowOptions(JsObject(Map.empty[String, JsValue])) - val TestableCallContext = CallContext(MockGcsFileSystemBuilder.mockGcsFileSystem.getPath("gs://root"), "out", "err") + lazy val TestableCallContext = CallContext(mockPathBuilder.build("gs://root").get, "out", "err") - val TestableJesExpressionFunctions = { - new JesExpressionFunctions(List(MockGcsFileSystemBuilder.mockGcsFileSystem), TestableCallContext) + lazy val TestableStandardExpressionFunctionsParams = new StandardExpressionFunctionsParams { + override lazy val pathBuilders: List[PathBuilder] = List(mockPathBuilder) + override lazy val callContext: CallContext = TestableCallContext } - private def buildInitializationData(jobDescriptor: BackendJobDescriptor, configuration: JesConfiguration) = { - def gcsFileSystem = { - val authOptions = new GoogleAuthOptions { - override def get(key: String): Try[String] = Try(throw new RuntimeException(s"key '$key' not found")) - } - - val storage = jesConfiguration.jesAttributes.gcsFilesystemAuth.buildStorage(authOptions, "appName") - GcsFileSystem(GcsFileSystemProvider(storage)(scala.concurrent.ExecutionContext.global)) - } - - val workflowPaths = JesWorkflowPaths(jobDescriptor.workflowDescriptor, - configuration, - mockCredentials)(scala.concurrent.ExecutionContext.global) - JesBackendInitializationData(workflowPaths, null) + lazy val TestableJesExpressionFunctions: JesExpressionFunctions = { + new JesExpressionFunctions(TestableStandardExpressionFunctionsParams) } - class TestableJesJobExecutionActor(jobDescriptor: BackendJobDescriptor, - promise: Promise[BackendJobExecutionResponse], - jesConfiguration: JesConfiguration, - functions: JesExpressionFunctions = TestableJesExpressionFunctions) - extends JesAsyncBackendJobExecutionActor(jobDescriptor, promise, jesConfiguration, buildInitializationData(jobDescriptor, jesConfiguration), emptyActor) { + private def buildInitializationData(jobDescriptor: BackendJobDescriptor, configuration: JesConfiguration) = { + val workflowPaths = JesWorkflowPaths(jobDescriptor.workflowDescriptor, NoCredentials.getInstance(), NoCredentials.getInstance(), configuration)(system) + val runtimeAttributesBuilder = JesRuntimeAttributes.runtimeAttributesBuilder(configuration) + JesBackendInitializationData(workflowPaths, runtimeAttributesBuilder, configuration, null, null) + } - override lazy val jobLogger = new LoggerWrapper { - override def akkaLogger: Option[LoggingAdapter] = Option(log) + class TestableJesJobExecutionActor(params: StandardAsyncExecutionActorParams, functions: JesExpressionFunctions) + extends JesAsyncBackendJobExecutionActor(params) { + + def this(jobDescriptor: BackendJobDescriptor, + promise: Promise[BackendJobExecutionResponse], + jesConfiguration: JesConfiguration, + functions: JesExpressionFunctions = TestableJesExpressionFunctions, + jesSingletonActor: ActorRef = emptyActor, + ioActor: ActorRef = mockIoActor) = { + + this( + DefaultStandardAsyncExecutionActorParams( + jobIdKey = JesAsyncBackendJobExecutionActor.JesOperationIdKey, + serviceRegistryActor = kvService, + ioActor = ioActor, + jobDescriptor = jobDescriptor, + configurationDescriptor = jesConfiguration.configurationDescriptor, + backendInitializationDataOption = Option(buildInitializationData(jobDescriptor, jesConfiguration)), + backendSingletonActorOption = Option(jesSingletonActor), + completionPromise = promise + ), + functions + ) + } + override lazy val jobLogger = new JobLogger("TestLogger", workflowId, jobTag, akkaLogger = Option(log)) { override def tag: String = s"$name [UUID(${workflowId.shortString})$jobTag]" - - override def slf4jLoggers: Set[Logger] = Set.empty + override val slf4jLoggers: Set[Logger] = Set.empty } - override lazy val callEngineFunctions = functions + override lazy val backendEngineFunctions: JesExpressionFunctions = functions } private val jesConfiguration = new JesConfiguration(JesBackendConfigurationDescriptor) + private val runtimeAttributesBuilder = JesRuntimeAttributes.runtimeAttributesBuilder(jesConfiguration) private val workingDisk = JesWorkingDisk(DiskType.SSD, 200) - val DockerAndDiskRuntime = + val DockerAndDiskRuntime: String = """ |runtime { | docker: "ubuntu:latest" @@ -115,86 +137,118 @@ class JesAsyncBackendJobExecutionActorSpec extends TestKitSuite("JesAsyncBackend |} """.stripMargin - private def buildPreemptibleJobDescriptor(attempt: Int, preemptible: Int): BackendJobDescriptor = { + private def buildPreemptibleJobDescriptor(preemptible: Int, previousPreemptions: Int, previousUnexpectedRetries: Int): BackendJobDescriptor = { + val attempt = previousPreemptions + previousUnexpectedRetries + 1 val workflowDescriptor = BackendWorkflowDescriptor( WorkflowId.randomId(), - NamespaceWithWorkflow.load(YoSup.replace("[PREEMPTIBLE]", s"preemptible: $preemptible")), + WdlNamespaceWithWorkflow.load(YoSup.replace("[PREEMPTIBLE]", s"preemptible: $preemptible"), Seq.empty[ImportResolver]).get.workflow, Inputs, - NoOptions + NoOptions, + Labels.empty ) - val job = workflowDescriptor.workflowNamespace.workflow.calls.head + val job = workflowDescriptor.workflow.taskCalls.head val key = BackendJobDescriptorKey(job, None, attempt) val runtimeAttributes = makeRuntimeAttributes(job) - BackendJobDescriptor(workflowDescriptor, key, runtimeAttributes, Inputs) + val prefetchedKvEntries = Map( + JesBackendLifecycleActorFactory.preemptionCountKey -> KvPair(ScopedKey(workflowDescriptor.id, KvJobKey(key), JesBackendLifecycleActorFactory.preemptionCountKey), Some(previousPreemptions.toString)), + JesBackendLifecycleActorFactory.unexpectedRetryCountKey -> KvPair(ScopedKey(workflowDescriptor.id, KvJobKey(key), JesBackendLifecycleActorFactory.unexpectedRetryCountKey), Some(previousUnexpectedRetries.toString))) + BackendJobDescriptor(workflowDescriptor, key, runtimeAttributes, fqnMapToDeclarationMap(Inputs), NoDocker, prefetchedKvEntries) } private def executionActor(jobDescriptor: BackendJobDescriptor, configurationDescriptor: BackendConfigurationDescriptor, promise: Promise[BackendJobExecutionResponse], - errorCode: Int, - innerErrorCode: Int): ActorRef = { + jesSingletonActor: ActorRef, + shouldBePreemptible: Boolean): ActorRef = { - // Mock/stub out the bits that would reach out to JES. - val run = mock[Run] - run.status() returns Failed(errorCode, Option(s"$innerErrorCode: I seen some things man"), Seq.empty, Option("fakeMachine"), Option("fakeZone"), Option("fakeInstance")) + val job = StandardAsyncJob(UUID.randomUUID().toString) + val run = Run(job, null) + val handle = new JesPendingExecutionHandle(jobDescriptor, run.job, Option(run), None) - val handle = JesPendingExecutionHandle(jobDescriptor, Seq.empty, run, None) - - class ExecuteOrRecoverActor extends TestableJesJobExecutionActor(jobDescriptor, promise, jesConfiguration) { - override def executeOrRecover(mode: ExecutionMode)(implicit ec: ExecutionContext): Future[ExecutionHandle] = Future.successful(handle) + class ExecuteOrRecoverActor extends TestableJesJobExecutionActor(jobDescriptor, promise, jesConfiguration, jesSingletonActor = jesSingletonActor) { + override def executeOrRecover(mode: ExecutionMode)(implicit ec: ExecutionContext): Future[ExecutionHandle] = { + if(preemptible == shouldBePreemptible) Future.successful(handle) + else Future.failed(new Exception(s"Test expected preemptible to be $shouldBePreemptible but got $preemptible")) + } } system.actorOf(Props(new ExecuteOrRecoverActor), "ExecuteOrRecoverActor-" + UUID.randomUUID) } - private def run(attempt: Int, preemptible: Int, errorCode: Int, innerErrorCode: Int): BackendJobExecutionResponse = { - within(Timeout) { - val promise = Promise[BackendJobExecutionResponse]() - val jobDescriptor = buildPreemptibleJobDescriptor(attempt, preemptible) - val backend = executionActor(jobDescriptor, JesBackendConfigurationDescriptor, promise, errorCode, innerErrorCode) - backend ! Execute - Await.result(promise.future, Timeout) + private def runAndFail(previousPreemptions: Int, previousUnexpectedRetries: Int, preemptible: Int, errorCode: Int, innerErrorCode: Int, expectPreemptible: Boolean): BackendJobExecutionResponse = { + + val runStatus = UnsuccessfulRunStatus(errorCode, Option(s"$innerErrorCode: I seen some things man"), Seq.empty, Option("fakeMachine"), Option("fakeZone"), Option("fakeInstance")) + val statusPoller = TestProbe() + + val promise = Promise[BackendJobExecutionResponse]() + val jobDescriptor = buildPreemptibleJobDescriptor(preemptible, previousPreemptions, previousUnexpectedRetries) + + // TODO: Use this to check the new KV entries are there! + //val kvProbe = TestProbe() + + val backend = executionActor(jobDescriptor, JesBackendConfigurationDescriptor, promise, statusPoller.ref, expectPreemptible) + backend ! Execute + statusPoller.expectMsgPF(max = Timeout, hint = "awaiting status poll") { + case DoPoll(_) => backend ! runStatus } + + Await.result(promise.future, Timeout) } def buildPreemptibleTestActorRef(attempt: Int, preemptible: Int): TestActorRef[TestableJesJobExecutionActor] = { - val jobDescriptor = buildPreemptibleJobDescriptor(attempt, preemptible) - val props = Props(new TestableJesJobExecutionActor(jobDescriptor, Promise(), jesConfiguration)) + // For this test we say that all previous attempts were preempted: + val jobDescriptor = buildPreemptibleJobDescriptor(preemptible, attempt - 1, 0) + val props = Props(new TestableJesJobExecutionActor(jobDescriptor, Promise(), + jesConfiguration, + TestableJesExpressionFunctions, + emptyActor, + failIoActor)) TestActorRef(props, s"TestableJesJobExecutionActor-${jobDescriptor.workflowDescriptor.id}") } behavior of "JesAsyncBackendJobExecutionActor" + + val timeout = 25 seconds - { // Set of "handle call failures appropriately with respect to preemption" tests + { // Set of "handle call failures appropriately with respect to preemption and failure" tests val expectations = Table( - ("attempt", "preemptible", "errorCode", "innerErrorCode", "shouldRetry"), - // No preemptible attempts allowed, nothing should be retryable. - (1, 0, 10, 13, false), - (1, 0, 10, 14, false), - (1, 0, 10, 15, false), - (1, 0, 11, 13, false), - (1, 0, 11, 14, false), + ("previous_preemptions", "previous_unexpectedRetries", "preemptible", "errorCode", "innerErrorCode", "shouldRunAsPreemptible", "shouldRetry"), + // No preemptible attempts allowed, but standard failures should be retried. + (0, 0, 0, 10, 13, false, true), // This is the new "unexpected failure" mode, which is now retried + (0, 1, 0, 10, 13, false, true), + (0, 2, 0, 10, 13, false, false), // The third unexpected failure is a real failure. + (0, 0, 0, 10, 14, false, false), // Usually means "preempted', but this wasn't a preemptible VM, so this should just be a failure. + (0, 0, 0, 10, 15, false, false), + (0, 0, 0, 11, 13, false, false), + (0, 0, 0, 11, 14, false, false), // 1 preemptible attempt allowed, but not all failures represent preemptions. - (1, 1, 10, 13, true), - (1, 1, 10, 14, true), - (1, 1, 10, 15, false), - (1, 1, 11, 13, false), - (1, 1, 11, 14, false), - // 1 preemptible attempt allowed, but now on the second attempt nothing should be retryable. - (2, 1, 10, 13, false), - (2, 1, 10, 14, false), - (2, 1, 10, 15, false), - (2, 1, 11, 13, false), - (2, 1, 11, 14, false) + (0, 0, 1, 10, 13, true, true), + (0, 1, 1, 10, 13, true, true), + (0, 2, 1, 10, 13, true, false), + (0, 0, 1, 10, 14, true, true), + (0, 0, 1, 10, 15, true, false), + (0, 0, 1, 11, 13, true, false), + (0, 0, 1, 11, 14, true, false), + // 1 preemptible attempt allowed, but since we're now on the second preemption attempt only 13s should be retryable. + (1, 0, 1, 10, 13, false, true), + (1, 1, 1, 10, 13, false, true), + (1, 2, 1, 10, 13, false, false), + (1, 0, 1, 10, 14, false, false), + (1, 0, 1, 10, 15, false, false), + (1, 0, 1, 11, 13, false, false), + (1, 0, 1, 11, 14, false, false) ) - expectations foreach { case (attempt, preemptible, errorCode, innerErrorCode, shouldRetry) => - it should s"handle call failures appropriately with respect to preemption (attempt=$attempt, preemptible=$preemptible, errorCode=$errorCode, innerErrorCode=$innerErrorCode)" in { - run(attempt, preemptible, errorCode, innerErrorCode).getClass.getSimpleName match { - case "FailedNonRetryableResponse" => false shouldBe shouldRetry - case "FailedRetryableResponse" => true shouldBe shouldRetry - case huh => fail(s"Unexpected response class name: '$huh'") + expectations foreach { case (previousPreemptions, previousUnexpectedRetries, preemptible, errorCode, innerErrorCode, shouldBePreemptible, shouldRetry) => + val descriptor = s"previousPreemptions=$previousPreemptions, previousUnexpectedRetries=$previousUnexpectedRetries preemptible=$preemptible, errorCode=$errorCode, innerErrorCode=$innerErrorCode" + it should s"handle call failures appropriately with respect to preemption and failure ($descriptor)" in { + runAndFail(previousPreemptions, previousUnexpectedRetries, preemptible, errorCode, innerErrorCode, shouldBePreemptible) match { + case response: JobFailedNonRetryableResponse => + if(shouldRetry) fail(s"A should-be-retried job ($descriptor) was sent back to the engine with: $response") + case response: JobFailedRetryableResponse => + if(!shouldRetry) fail(s"A shouldn't-be-retried job ($descriptor) was sent back to the engine with $response") + case huh => fail(s"Unexpected response: $huh") } } } @@ -203,72 +257,66 @@ class JesAsyncBackendJobExecutionActorSpec extends TestKitSuite("JesAsyncBackend it should "not restart 2 of 1 unexpected shutdowns without another preemptible VM" in { val actorRef = buildPreemptibleTestActorRef(2, 1) val jesBackend = actorRef.underlyingActor - val handle = mock[JesPendingExecutionHandle] - implicit val ec = system.dispatcher - - val failedStatus = Failed(10, Some("14: VM XXX shut down unexpectedly."), Seq.empty, Option("fakeMachine"), Option("fakeZone"), Option("fakeInstance")) - val executionResult = Await.result(jesBackend.executionResult(failedStatus, handle), 2.seconds) - executionResult.isInstanceOf[FailedNonRetryableExecutionHandle] shouldBe true - val failedHandle = executionResult.asInstanceOf[FailedNonRetryableExecutionHandle] + val runId = StandardAsyncJob(UUID.randomUUID().toString) + val handle = new JesPendingExecutionHandle(null, runId, None, None) + + val failedStatus = UnsuccessfulRunStatus(10, Option("14: VM XXX shut down unexpectedly."), Seq.empty, Option("fakeMachine"), Option("fakeZone"), Option("fakeInstance")) + val executionResult = jesBackend.handleExecutionResult(failedStatus, handle) + val result = Await.result(executionResult, timeout) + result.isInstanceOf[FailedNonRetryableExecutionHandle] shouldBe true + val failedHandle = result.asInstanceOf[FailedNonRetryableExecutionHandle] failedHandle.returnCode shouldBe None } it should "restart 1 of 1 unexpected shutdowns without another preemptible VM" in { val actorRef = buildPreemptibleTestActorRef(1, 1) val jesBackend = actorRef.underlyingActor - val handle = mock[JesPendingExecutionHandle] - implicit val ec = system.dispatcher - - val failedStatus = Failed(10, Some("14: VM XXX shut down unexpectedly."), Seq.empty, Option("fakeMachine"), Option("fakeZone"), Option("fakeInstance")) - val executionResult = Await.result(jesBackend.executionResult(failedStatus, handle), 2.seconds) - executionResult.isInstanceOf[FailedRetryableExecutionHandle] shouldBe true - val retryableHandle = executionResult.asInstanceOf[FailedRetryableExecutionHandle] - retryableHandle.throwable.isInstanceOf[PreemptedException] shouldBe true + val runId = StandardAsyncJob(UUID.randomUUID().toString) + val handle = new JesPendingExecutionHandle(null, runId, None, None) + + val failedStatus = UnsuccessfulRunStatus(10, Option("14: VM XXX shut down unexpectedly."), Seq.empty, Option("fakeMachine"), Option("fakeZone"), Option("fakeInstance")) + val executionResult = jesBackend.handleExecutionResult(failedStatus, handle) + val result = Await.result(executionResult, timeout) + result.isInstanceOf[FailedRetryableExecutionHandle] shouldBe true + val retryableHandle = result.asInstanceOf[FailedRetryableExecutionHandle] retryableHandle.returnCode shouldBe None - val preemptedException = retryableHandle.throwable.asInstanceOf[PreemptedException] - preemptedException.getMessage should include("will be restarted with a non-preemptible VM") + retryableHandle.throwable.getMessage should include("will be restarted with a non-preemptible VM") } it should "restart 1 of 2 unexpected shutdowns with another preemptible VM" in { val actorRef = buildPreemptibleTestActorRef(1, 2) val jesBackend = actorRef.underlyingActor - val handle = mock[JesPendingExecutionHandle] - implicit val ec = system.dispatcher - - val failedStatus = Failed(10, Some("14: VM XXX shut down unexpectedly."), Seq.empty, Option("fakeMachine"), Option("fakeZone"), Option("fakeInstance")) - val executionResult = Await.result(jesBackend.executionResult(failedStatus, handle), 2.seconds) - executionResult.isInstanceOf[FailedRetryableExecutionHandle] shouldBe true - val retryableHandle = executionResult.asInstanceOf[FailedRetryableExecutionHandle] - retryableHandle.throwable.isInstanceOf[PreemptedException] shouldBe true + val runId = StandardAsyncJob(UUID.randomUUID().toString) + val handle = new JesPendingExecutionHandle(null, runId, None, None) + + val failedStatus = UnsuccessfulRunStatus(10, Option("14: VM XXX shut down unexpectedly."), Seq.empty, Option("fakeMachine"), Option("fakeZone"), Option("fakeInstance")) + val executionResult = jesBackend.handleExecutionResult(failedStatus, handle) + val result = Await.result(executionResult, timeout) + result.isInstanceOf[FailedRetryableExecutionHandle] shouldBe true + val retryableHandle = result.asInstanceOf[FailedRetryableExecutionHandle] retryableHandle.returnCode shouldBe None - val preemptedException2 = retryableHandle.throwable.asInstanceOf[PreemptedException] - preemptedException2.getMessage should include("will be restarted with another preemptible VM") + retryableHandle.throwable.getMessage should include("will be restarted with another preemptible VM") } it should "handle Failure Status for various errors" in { val actorRef = buildPreemptibleTestActorRef(1, 1) val jesBackend = actorRef.underlyingActor - val handle = mock[JesPendingExecutionHandle] - implicit val ec = system.dispatcher - - Await.result(jesBackend.executionResult( - Failed(10, Some("15: Other type of error."), Seq.empty, Option("fakeMachine"), Option("fakeZone"), Option("fakeInstance")), handle), 2.seconds - ).isInstanceOf[FailedNonRetryableExecutionHandle] shouldBe true - Await.result(jesBackend.executionResult( - Failed(11, Some("14: Wrong errorCode."), Seq.empty, Option("fakeMachine"), Option("fakeZone"), Option("fakeInstance")), handle), 2.seconds - ).isInstanceOf[FailedNonRetryableExecutionHandle] shouldBe true - Await.result(jesBackend.executionResult( - Failed(10, Some("Weird error message."), Seq.empty, Option("fakeMachine"), Option("fakeZone"), Option("fakeInstance")), handle), 2.seconds - ).isInstanceOf[FailedNonRetryableExecutionHandle] shouldBe true - Await.result(jesBackend.executionResult( - Failed(10, Some("UnparsableInt: Even weirder error message."), Seq.empty, Option("fakeMachine"), Option("fakeZone"), Option("fakeInstance")), handle), 2.seconds - ).isInstanceOf[FailedNonRetryableExecutionHandle] shouldBe true - Await.result(jesBackend.executionResult( - Failed(10, None, Seq.empty, Option("fakeMachine"), Option("fakeZone"), Option("fakeInstance")), handle), 2.seconds - ).isInstanceOf[FailedNonRetryableExecutionHandle] shouldBe true - Await.result(jesBackend.executionResult( - Failed(10, Some("Operation canceled at"), Seq.empty, Option("fakeMachine"), Option("fakeZone"), Option("fakeInstance")), handle), 2.seconds - ) shouldBe AbortedExecutionHandle + val runId = StandardAsyncJob(UUID.randomUUID().toString) + val handle = new JesPendingExecutionHandle(null, runId, None, None) + + def checkFailedResult(errorCode: Int, errorMessage: Option[String]): ExecutionHandle = { + val failed = UnsuccessfulRunStatus(errorCode, errorMessage, Seq.empty, Option("fakeMachine"), Option("fakeZone"), Option("fakeInstance")) + Await.result(jesBackend.handleExecutionResult(failed, handle), timeout) + } + + checkFailedResult(10, Option("15: Other type of error.")) + .isInstanceOf[FailedNonRetryableExecutionHandle] shouldBe true + checkFailedResult(11, Option("14: Wrong errorCode.")).isInstanceOf[FailedNonRetryableExecutionHandle] shouldBe true + checkFailedResult(10, Option("Weird error message.")).isInstanceOf[FailedNonRetryableExecutionHandle] shouldBe true + checkFailedResult(10, Option("UnparsableInt: Even weirder error message.")) + .isInstanceOf[FailedNonRetryableExecutionHandle] shouldBe true + checkFailedResult(10, None).isInstanceOf[FailedNonRetryableExecutionHandle] shouldBe true + checkFailedResult(1, Option("Operation canceled at")) shouldBe AbortedExecutionHandle actorRef.stop() } @@ -289,21 +337,27 @@ class JesAsyncBackendJobExecutionActorSpec extends TestKitSuite("JesAsyncBackend val workflowDescriptor = BackendWorkflowDescriptor( WorkflowId.randomId(), - NamespaceWithWorkflow.load(YoSup.replace("[PREEMPTIBLE]", "")), + WdlNamespaceWithWorkflow.load(YoSup.replace("[PREEMPTIBLE]", ""), Seq.empty[ImportResolver]).get.workflow, inputs, - NoOptions + NoOptions, + Labels.empty ) - val call = workflowDescriptor.workflowNamespace.workflow.calls.head + val call = workflowDescriptor.workflow.taskCalls.head val key = BackendJobDescriptorKey(call, None, 1) val runtimeAttributes = makeRuntimeAttributes(call) - val jobDescriptor = BackendJobDescriptor(workflowDescriptor, key, runtimeAttributes, inputs) + val jobDescriptor = BackendJobDescriptor(workflowDescriptor, key, runtimeAttributes, fqnMapToDeclarationMap(inputs), NoDocker, Map.empty) val props = Props(new TestableJesJobExecutionActor(jobDescriptor, Promise(), jesConfiguration)) val testActorRef = TestActorRef[TestableJesJobExecutionActor]( props, s"TestableJesJobExecutionActor-${jobDescriptor.workflowDescriptor.id}") - val mappedInputs = jobDescriptor.inputs mapValues testActorRef.underlyingActor.gcsPathToLocal + + def gcsPathToLocal(wdlValue: WdlValue): WdlValue = { + WdlFileMapper.mapWdlFiles(testActorRef.underlyingActor.mapCommandLineWdlFile)(wdlValue).get + } + + val mappedInputs = jobDescriptor.fullyQualifiedInputs mapValues gcsPathToLocal mappedInputs(stringKey) match { case WdlString(v) => assert(v.equalsIgnoreCase(stringVal.value)) @@ -343,15 +397,16 @@ class JesAsyncBackendJobExecutionActorSpec extends TestKitSuite("JesAsyncBackend val workflowDescriptor = BackendWorkflowDescriptor( WorkflowId.randomId(), - NamespaceWithWorkflow.load(SampleWdl.CurrentDirectory.asWorkflowSources(DockerAndDiskRuntime).wdlSource), + WdlNamespaceWithWorkflow.load(SampleWdl.CurrentDirectory.asWorkflowSources(DockerAndDiskRuntime).workflowSource, Seq.empty[ImportResolver]).get.workflow, inputs, - NoOptions + NoOptions, + Labels.empty ) - val job = workflowDescriptor.workflowNamespace.workflow.calls.head + val job = workflowDescriptor.workflow.taskCalls.head val runtimeAttributes = makeRuntimeAttributes(job) val key = BackendJobDescriptorKey(job, None, 1) - val jobDescriptor = BackendJobDescriptor(workflowDescriptor, key, runtimeAttributes, inputs) + val jobDescriptor = BackendJobDescriptor(workflowDescriptor, key, runtimeAttributes, fqnMapToDeclarationMap(inputs), NoDocker, Map.empty) val props = Props(new TestableJesJobExecutionActor(jobDescriptor, Promise(), jesConfiguration)) val testActorRef = TestActorRef[TestableJesJobExecutionActor]( @@ -360,21 +415,21 @@ class JesAsyncBackendJobExecutionActorSpec extends TestKitSuite("JesAsyncBackend val jesInputs = testActorRef.underlyingActor.generateJesInputs(jobDescriptor) jesInputs should have size 8 jesInputs should contain(JesFileInput( - "stringToFileMap-0", "gs://path/to/stringTofile1", Paths.get("path/to/stringTofile1"), workingDisk)) + "stringToFileMap-0", "gs://path/to/stringTofile1", DefaultPathBuilder.get("path/to/stringTofile1"), workingDisk)) jesInputs should contain(JesFileInput( - "stringToFileMap-1", "gs://path/to/stringTofile2", Paths.get("path/to/stringTofile2"), workingDisk)) + "stringToFileMap-1", "gs://path/to/stringTofile2", DefaultPathBuilder.get("path/to/stringTofile2"), workingDisk)) jesInputs should contain(JesFileInput( - "fileToStringMap-0", "gs://path/to/fileToString1", Paths.get("path/to/fileToString1"), workingDisk)) + "fileToStringMap-0", "gs://path/to/fileToString1", DefaultPathBuilder.get("path/to/fileToString1"), workingDisk)) jesInputs should contain(JesFileInput( - "fileToStringMap-1", "gs://path/to/fileToString2", Paths.get("path/to/fileToString2"), workingDisk)) + "fileToStringMap-1", "gs://path/to/fileToString2", DefaultPathBuilder.get("path/to/fileToString2"), workingDisk)) jesInputs should contain(JesFileInput( - "fileToFileMap-0", "gs://path/to/fileToFile1Key", Paths.get("path/to/fileToFile1Key"), workingDisk)) + "fileToFileMap-0", "gs://path/to/fileToFile1Key", DefaultPathBuilder.get("path/to/fileToFile1Key"), workingDisk)) jesInputs should contain(JesFileInput( - "fileToFileMap-1", "gs://path/to/fileToFile1Value", Paths.get("path/to/fileToFile1Value"), workingDisk)) + "fileToFileMap-1", "gs://path/to/fileToFile1Value", DefaultPathBuilder.get("path/to/fileToFile1Value"), workingDisk)) jesInputs should contain(JesFileInput( - "fileToFileMap-2", "gs://path/to/fileToFile2Key", Paths.get("path/to/fileToFile2Key"), workingDisk)) + "fileToFileMap-2", "gs://path/to/fileToFile2Key", DefaultPathBuilder.get("path/to/fileToFile2Key"), workingDisk)) jesInputs should contain(JesFileInput( - "fileToFileMap-3", "gs://path/to/fileToFile2Value", Paths.get("path/to/fileToFile2Value"), workingDisk)) + "fileToFileMap-3", "gs://path/to/fileToFile2Value", DefaultPathBuilder.get("path/to/fileToFile2Value"), workingDisk)) } def makeJesActorRef(sampleWdl: SampleWdl, callName: LocallyQualifiedName, inputs: Map[FullyQualifiedName, WdlValue], @@ -382,15 +437,16 @@ class JesAsyncBackendJobExecutionActorSpec extends TestKitSuite("JesAsyncBackend TestActorRef[TestableJesJobExecutionActor] = { val workflowDescriptor = BackendWorkflowDescriptor( WorkflowId.randomId(), - NamespaceWithWorkflow.load(sampleWdl.asWorkflowSources(DockerAndDiskRuntime).wdlSource), + WdlNamespaceWithWorkflow.load(sampleWdl.asWorkflowSources(DockerAndDiskRuntime).workflowSource, Seq.empty[ImportResolver]).get.workflow, inputs, - NoOptions + NoOptions, + Labels.empty ) - val call = workflowDescriptor.workflowNamespace.workflow.findCallByName(callName).get + val call = workflowDescriptor.workflow.findCallByName(callName).get.asInstanceOf[WdlTaskCall] val key = BackendJobDescriptorKey(call, None, 1) val runtimeAttributes = makeRuntimeAttributes(call) - val jobDescriptor = BackendJobDescriptor(workflowDescriptor, key, runtimeAttributes, inputs) + val jobDescriptor = BackendJobDescriptor(workflowDescriptor, key, runtimeAttributes, fqnMapToDeclarationMap(inputs), NoDocker, Map.empty) val props = Props(new TestableJesJobExecutionActor(jobDescriptor, Promise(), jesConfiguration, functions)) TestActorRef[TestableJesJobExecutionActor](props, s"TestableJesJobExecutionActor-${jobDescriptor.workflowDescriptor.id}") @@ -398,18 +454,18 @@ class JesAsyncBackendJobExecutionActorSpec extends TestKitSuite("JesAsyncBackend it should "generate correct JesOutputs" in { val inputs = Map( - "in" -> WdlFile("gs://a/b/c.txt") + "in" -> WdlFile("gs://blah/b/c.txt") ) val jesBackend = makeJesActorRef(SampleWdl.FilePassingWorkflow, "a", inputs).underlyingActor val jobDescriptor = jesBackend.jobDescriptor val workflowId = jesBackend.workflowId val jesInputs = jesBackend.generateJesInputs(jobDescriptor) jesInputs should have size 1 - jesInputs should contain(JesFileInput("in-0", "gs://a/b/c.txt", Paths.get("a/b/c.txt"), workingDisk)) + jesInputs should contain(JesFileInput("in-0", "gs://blah/b/c.txt", DefaultPathBuilder.get("blah/b/c.txt"), workingDisk)) val jesOutputs = jesBackend.generateJesOutputs(jobDescriptor) jesOutputs should have size 1 jesOutputs should contain(JesFileOutput("out", - s"gs://my-cromwell-workflows-bucket/file_passing/$workflowId/call-a/out", Paths.get("out"), workingDisk)) + s"gs://my-cromwell-workflows-bucket/file_passing/$workflowId/call-a/out", DefaultPathBuilder.get("out"), workingDisk)) } it should "generate correct JesInputs when a command line contains a write_lines call in it" in { @@ -417,8 +473,7 @@ class JesAsyncBackendJobExecutionActorSpec extends TestKitSuite("JesAsyncBackend "strs" -> WdlArray(WdlArrayType(WdlStringType), Seq("A", "B", "C").map(WdlString)) ) - class TestJesExpressionFunctions extends JesExpressionFunctions( - List(MockGcsFileSystemBuilder.mockGcsFileSystem), TestableCallContext) { + class TestJesExpressionFunctions extends JesExpressionFunctions(TestableStandardExpressionFunctionsParams) { override def write_lines(params: Seq[Try[WdlValue]]): Try[WdlFile] = { Success(WdlFile(s"gs://some/path/file.txt")) } @@ -430,7 +485,7 @@ class JesAsyncBackendJobExecutionActorSpec extends TestKitSuite("JesAsyncBackend val jesInputs = jesBackend.generateJesInputs(jobDescriptor) jesInputs should have size 1 jesInputs should contain(JesFileInput( - "c6fd5c91-0", "gs://some/path/file.txt", Paths.get("some/path/file.txt"), workingDisk)) + "c6fd5c91-0", "gs://some/path/file.txt", DefaultPathBuilder.get("some/path/file.txt"), workingDisk)) val jesOutputs = jesBackend.generateJesOutputs(jobDescriptor) jesOutputs should have size 0 } @@ -443,15 +498,16 @@ class JesAsyncBackendJobExecutionActorSpec extends TestKitSuite("JesAsyncBackend val workflowDescriptor = BackendWorkflowDescriptor( WorkflowId.randomId(), - NamespaceWithWorkflow.load(SampleWdl.CurrentDirectory.asWorkflowSources(DockerAndDiskRuntime).wdlSource), + WdlNamespaceWithWorkflow.load(SampleWdl.CurrentDirectory.asWorkflowSources(DockerAndDiskRuntime).workflowSource, Seq.empty[ImportResolver]).get.workflow, inputs, - NoOptions + NoOptions, + Labels.empty ) - val job = workflowDescriptor.workflowNamespace.workflow.calls.head + val job = workflowDescriptor.workflow.taskCalls.head val runtimeAttributes = makeRuntimeAttributes(job) val key = BackendJobDescriptorKey(job, None, 1) - val jobDescriptor = BackendJobDescriptor(workflowDescriptor, key, runtimeAttributes, inputs) + val jobDescriptor = BackendJobDescriptor(workflowDescriptor, key, runtimeAttributes, fqnMapToDeclarationMap(inputs), NoDocker, Map.empty) val props = Props(new TestableJesJobExecutionActor(jobDescriptor, Promise(), jesConfiguration)) val testActorRef = TestActorRef[TestableJesJobExecutionActor]( @@ -459,8 +515,8 @@ class JesAsyncBackendJobExecutionActorSpec extends TestKitSuite("JesAsyncBackend val jesInputs = testActorRef.underlyingActor.generateJesInputs(jobDescriptor) jesInputs should have size 2 - jesInputs should contain(JesFileInput("fileArray-0", "gs://path/to/file1", Paths.get("path/to/file1"), workingDisk)) - jesInputs should contain(JesFileInput("fileArray-1", "gs://path/to/file2", Paths.get("path/to/file2"), workingDisk)) + jesInputs should contain(JesFileInput("fileArray-0", "gs://path/to/file1", DefaultPathBuilder.get("path/to/file1"), workingDisk)) + jesInputs should contain(JesFileInput("fileArray-1", "gs://path/to/file2", DefaultPathBuilder.get("path/to/file2"), workingDisk)) } it should "generate correct JesFileInputs from a WdlFile" in { @@ -471,15 +527,16 @@ class JesAsyncBackendJobExecutionActorSpec extends TestKitSuite("JesAsyncBackend val workflowDescriptor = BackendWorkflowDescriptor( WorkflowId.randomId(), - NamespaceWithWorkflow.load(SampleWdl.CurrentDirectory.asWorkflowSources(DockerAndDiskRuntime).wdlSource), + WdlNamespaceWithWorkflow.load(SampleWdl.CurrentDirectory.asWorkflowSources(DockerAndDiskRuntime).workflowSource, Seq.empty[ImportResolver]).get.workflow, inputs, - NoOptions + NoOptions, + Labels.empty ) - val job = workflowDescriptor.workflowNamespace.workflow.calls.head + val job = workflowDescriptor.workflow.taskCalls.head val runtimeAttributes = makeRuntimeAttributes(job) val key = BackendJobDescriptorKey(job, None, 1) - val jobDescriptor = BackendJobDescriptor(workflowDescriptor, key, runtimeAttributes, inputs) + val jobDescriptor = BackendJobDescriptor(workflowDescriptor, key, runtimeAttributes, fqnMapToDeclarationMap(inputs), NoDocker, Map.empty) val props = Props(new TestableJesJobExecutionActor(jobDescriptor, Promise(), jesConfiguration)) val testActorRef = TestActorRef[TestableJesJobExecutionActor]( @@ -487,22 +544,22 @@ class JesAsyncBackendJobExecutionActorSpec extends TestKitSuite("JesAsyncBackend val jesInputs = testActorRef.underlyingActor.generateJesInputs(jobDescriptor) jesInputs should have size 2 - jesInputs should contain(JesFileInput("file1-0", "gs://path/to/file1", Paths.get("path/to/file1"), workingDisk)) - jesInputs should contain(JesFileInput("file2-0", "gs://path/to/file2", Paths.get("path/to/file2"), workingDisk)) + jesInputs should contain(JesFileInput("file1-0", "gs://path/to/file1", DefaultPathBuilder.get("path/to/file1"), workingDisk)) + jesInputs should contain(JesFileInput("file2-0", "gs://path/to/file2", DefaultPathBuilder.get("path/to/file2"), workingDisk)) } it should "convert local Paths back to corresponding GCS paths in JesOutputs" in { - val jesOutputs = Seq( + val jesOutputs = Set( JesFileOutput("/cromwell_root/path/to/file1", "gs://path/to/file1", - Paths.get("/cromwell_root/path/to/file1"), workingDisk), + DefaultPathBuilder.get("/cromwell_root/path/to/file1"), workingDisk), JesFileOutput("/cromwell_root/path/to/file2", "gs://path/to/file2", - Paths.get("/cromwell_root/path/to/file2"), workingDisk), + DefaultPathBuilder.get("/cromwell_root/path/to/file2"), workingDisk), JesFileOutput("/cromwell_root/path/to/file3", "gs://path/to/file3", - Paths.get("/cromwell_root/path/to/file3"), workingDisk), + DefaultPathBuilder.get("/cromwell_root/path/to/file3"), workingDisk), JesFileOutput("/cromwell_root/path/to/file4", "gs://path/to/file4", - Paths.get("/cromwell_root/path/to/file4"), workingDisk), + DefaultPathBuilder.get("/cromwell_root/path/to/file4"), workingDisk), JesFileOutput("/cromwell_root/path/to/file5", "gs://path/to/file5", - Paths.get("/cromwell_root/path/to/file5"), workingDisk) + DefaultPathBuilder.get("/cromwell_root/path/to/file5"), workingDisk) ) val outputValues = Seq( WdlFile("/cromwell_root/path/to/file1"), @@ -515,21 +572,26 @@ class JesAsyncBackendJobExecutionActorSpec extends TestKitSuite("JesAsyncBackend val workflowDescriptor = BackendWorkflowDescriptor( WorkflowId.randomId(), - NamespaceWithWorkflow.load(SampleWdl.EmptyString.asWorkflowSources(DockerAndDiskRuntime).wdlSource), + WdlNamespaceWithWorkflow.load(SampleWdl.EmptyString.asWorkflowSources(DockerAndDiskRuntime).workflowSource, Seq.empty[ImportResolver]).get.workflow, Map.empty, - NoOptions + NoOptions, + Labels.empty ) - val call = workflowDescriptor.workflowNamespace.workflow.calls.head + val call = workflowDescriptor.workflow.taskCalls.head val key = BackendJobDescriptorKey(call, None, 1) val runtimeAttributes = makeRuntimeAttributes(call) - val jobDescriptor = BackendJobDescriptor(workflowDescriptor, key, runtimeAttributes, Map.empty) + val jobDescriptor = BackendJobDescriptor(workflowDescriptor, key, runtimeAttributes, Map.empty, NoDocker, Map.empty) val props = Props(new TestableJesJobExecutionActor(jobDescriptor, Promise(), jesConfiguration)) val testActorRef = TestActorRef[TestableJesJobExecutionActor]( props, s"TestableJesJobExecutionActor-${jobDescriptor.workflowDescriptor.id}") - val result = outputValues map testActorRef.underlyingActor.wdlValueToGcsPath(jesOutputs) + def wdlValueToGcsPath(jesOutputs: Set[JesFileOutput])(wdlValue: WdlValue): WdlValue = { + WdlFileMapper.mapWdlFiles(testActorRef.underlyingActor.wdlFileToGcsPath(jesOutputs))(wdlValue).get + } + + val result = outputValues map wdlValueToGcsPath(jesOutputs) result should have size 3 result should contain(WdlFile("gs://path/to/file1")) result should contain(WdlArray(WdlArrayType(WdlFileType), @@ -543,36 +605,38 @@ class JesAsyncBackendJobExecutionActorSpec extends TestKitSuite("JesAsyncBackend it should "create a JesFileInput for the monitoring script, when specified" in { val workflowDescriptor = BackendWorkflowDescriptor( WorkflowId.randomId(), - NamespaceWithWorkflow.load(SampleWdl.EmptyString.asWorkflowSources(DockerAndDiskRuntime).wdlSource), + WdlNamespaceWithWorkflow.load(SampleWdl.EmptyString.asWorkflowSources(DockerAndDiskRuntime).workflowSource, Seq.empty[ImportResolver]).get.workflow, Map.empty, - WorkflowOptions.fromJsonString("""{"monitoring_script": "gs://path/to/script"}""").get + WorkflowOptions.fromJsonString("""{"monitoring_script": "gs://path/to/script"}""").get, + Labels.empty ) - val job = workflowDescriptor.workflowNamespace.workflow.calls.head + val job = workflowDescriptor.workflow.taskCalls.head val runtimeAttributes = makeRuntimeAttributes(job) val key = BackendJobDescriptorKey(job, None, 1) - val jobDescriptor = BackendJobDescriptor(workflowDescriptor, key, runtimeAttributes, Map.empty) + val jobDescriptor = BackendJobDescriptor(workflowDescriptor, key, runtimeAttributes, Map.empty, NoDocker, Map.empty) val props = Props(new TestableJesJobExecutionActor(jobDescriptor, Promise(), jesConfiguration)) val testActorRef = TestActorRef[TestableJesJobExecutionActor]( props, s"TestableJesJobExecutionActor-${jobDescriptor.workflowDescriptor.id}") testActorRef.underlyingActor.monitoringScript shouldBe - Some(JesFileInput("monitoring-in", "gs://path/to/script", Paths.get("/cromwell_root/monitoring.sh"), workingDisk)) + Some(JesFileInput("monitoring-in", "gs://path/to/script", DefaultPathBuilder.get("/cromwell_root/monitoring.sh"), workingDisk)) } it should "not create a JesFileInput for the monitoring script, when not specified" in { val workflowDescriptor = BackendWorkflowDescriptor( WorkflowId.randomId(), - NamespaceWithWorkflow.load(SampleWdl.EmptyString.asWorkflowSources(DockerAndDiskRuntime).wdlSource), + WdlNamespaceWithWorkflow.load(SampleWdl.EmptyString.asWorkflowSources(DockerAndDiskRuntime).workflowSource, Seq.empty[ImportResolver]).get.workflow, Map.empty, - NoOptions + NoOptions, + Labels.empty ) - val job = workflowDescriptor.workflowNamespace.workflow.calls.head + val job = workflowDescriptor.workflow.taskCalls.head val key = BackendJobDescriptorKey(job, None, 1) val runtimeAttributes = makeRuntimeAttributes(job) - val jobDescriptor = BackendJobDescriptor(workflowDescriptor, key, runtimeAttributes, Map.empty) + val jobDescriptor = BackendJobDescriptor(workflowDescriptor, key, runtimeAttributes, Map.empty, NoDocker, Map.empty) val props = Props(new TestableJesJobExecutionActor(jobDescriptor, Promise(), jesConfiguration)) val testActorRef = TestActorRef[TestableJesJobExecutionActor]( @@ -584,16 +648,17 @@ class JesAsyncBackendJobExecutionActorSpec extends TestKitSuite("JesAsyncBackend it should "return JES log paths for non-scattered call" in { val workflowDescriptor = BackendWorkflowDescriptor( WorkflowId(UUID.fromString("e6236763-c518-41d0-9688-432549a8bf7c")), - NamespaceWithWorkflow.load( - SampleWdl.HelloWorld.asWorkflowSources(""" runtime {docker: "ubuntu:latest"} """).wdlSource), + WdlNamespaceWithWorkflow.load( + SampleWdl.HelloWorld.asWorkflowSources(""" runtime {docker: "ubuntu:latest"} """).workflowSource, Seq.empty[ImportResolver]).get.workflow, Map.empty, - WorkflowOptions.fromJsonString(""" {"jes_gcs_root": "gs://path/to/gcs_root"} """).get + WorkflowOptions.fromJsonString(""" {"jes_gcs_root": "gs://path/to/gcs_root"} """).get, + Labels.empty ) - val call = workflowDescriptor.workflowNamespace.workflow.findCallByName("hello").get + val call = workflowDescriptor.workflow.findCallByName("hello").get.asInstanceOf[WdlTaskCall] val key = BackendJobDescriptorKey(call, None, 1) val runtimeAttributes = makeRuntimeAttributes(call) - val jobDescriptor = BackendJobDescriptor(workflowDescriptor, key, runtimeAttributes, Map.empty) + val jobDescriptor = BackendJobDescriptor(workflowDescriptor, key, runtimeAttributes, Map.empty, NoDocker, Map.empty) val props = Props(new TestableJesJobExecutionActor(jobDescriptor, Promise(), jesConfiguration)) val testActorRef = TestActorRef[TestableJesJobExecutionActor]( @@ -601,31 +666,31 @@ class JesAsyncBackendJobExecutionActorSpec extends TestKitSuite("JesAsyncBackend val jesBackend = testActorRef.underlyingActor - // TODO: NioGcsPath.equals not implemented, so use toString instead - jesBackend.jesCallPaths.stdoutPath should be(a[NioGcsPath]) - jesBackend.jesCallPaths.stdoutPath.toString shouldBe - "gs://path/to/gcs_root/hello/e6236763-c518-41d0-9688-432549a8bf7c/call-hello/hello-stdout.log" - jesBackend.jesCallPaths.stderrPath should be(a[NioGcsPath]) - jesBackend.jesCallPaths.stderrPath.toString shouldBe - "gs://path/to/gcs_root/hello/e6236763-c518-41d0-9688-432549a8bf7c/call-hello/hello-stderr.log" - jesBackend.jesCallPaths.jesLogPath should be(a[NioGcsPath]) - jesBackend.jesCallPaths.jesLogPath.toString shouldBe - "gs://path/to/gcs_root/hello/e6236763-c518-41d0-9688-432549a8bf7c/call-hello/hello.log" + jesBackend.jesCallPaths.stdout should be(a[GcsPath]) + jesBackend.jesCallPaths.stdout.pathAsString shouldBe + "gs://path/to/gcs_root/wf_hello/e6236763-c518-41d0-9688-432549a8bf7c/call-hello/hello-stdout.log" + jesBackend.jesCallPaths.stderr should be(a[GcsPath]) + jesBackend.jesCallPaths.stderr.pathAsString shouldBe + "gs://path/to/gcs_root/wf_hello/e6236763-c518-41d0-9688-432549a8bf7c/call-hello/hello-stderr.log" + jesBackend.jesCallPaths.jesLogPath should be(a[GcsPath]) + jesBackend.jesCallPaths.jesLogPath.pathAsString shouldBe + "gs://path/to/gcs_root/wf_hello/e6236763-c518-41d0-9688-432549a8bf7c/call-hello/hello.log" } it should "return JES log paths for scattered call" in { val workflowDescriptor = BackendWorkflowDescriptor( WorkflowId(UUID.fromString("e6236763-c518-41d0-9688-432549a8bf7d")), - NamespaceWithWorkflow.load( - new SampleWdl.ScatterWdl().asWorkflowSources(""" runtime {docker: "ubuntu:latest"} """).wdlSource), + WdlNamespaceWithWorkflow.load( + new SampleWdl.ScatterWdl().asWorkflowSources(""" runtime {docker: "ubuntu:latest"} """).workflowSource, Seq.empty[ImportResolver]).get.workflow, Map.empty, - WorkflowOptions.fromJsonString(""" {"jes_gcs_root": "gs://path/to/gcs_root"} """).get + WorkflowOptions.fromJsonString(""" {"jes_gcs_root": "gs://path/to/gcs_root"} """).get, + Labels.empty ) - val call = workflowDescriptor.workflowNamespace.workflow.findCallByName("B").get + val call = workflowDescriptor.workflow.findCallByName("B").get.asInstanceOf[WdlTaskCall] val key = BackendJobDescriptorKey(call, Option(2), 1) val runtimeAttributes = makeRuntimeAttributes(call) - val jobDescriptor = BackendJobDescriptor(workflowDescriptor, key, runtimeAttributes, Map.empty) + val jobDescriptor = BackendJobDescriptor(workflowDescriptor, key, runtimeAttributes, Map.empty, NoDocker, Map.empty) val props = Props(new TestableJesJobExecutionActor(jobDescriptor, Promise(), jesConfiguration)) val testActorRef = TestActorRef[TestableJesJobExecutionActor]( @@ -633,14 +698,14 @@ class JesAsyncBackendJobExecutionActorSpec extends TestKitSuite("JesAsyncBackend val jesBackend = testActorRef.underlyingActor - jesBackend.jesCallPaths.stdoutPath should be(a[NioGcsPath]) - jesBackend.jesCallPaths.stdoutPath.toString shouldBe + jesBackend.jesCallPaths.stdout should be(a[GcsPath]) + jesBackend.jesCallPaths.stdout.pathAsString shouldBe "gs://path/to/gcs_root/w/e6236763-c518-41d0-9688-432549a8bf7d/call-B/shard-2/B-2-stdout.log" - jesBackend.jesCallPaths.stderrPath should be(a[NioGcsPath]) - jesBackend.jesCallPaths.stderrPath.toString shouldBe + jesBackend.jesCallPaths.stderr should be(a[GcsPath]) + jesBackend.jesCallPaths.stderr.pathAsString shouldBe "gs://path/to/gcs_root/w/e6236763-c518-41d0-9688-432549a8bf7d/call-B/shard-2/B-2-stderr.log" - jesBackend.jesCallPaths.jesLogPath should be(a[NioGcsPath]) - jesBackend.jesCallPaths.jesLogPath.toString shouldBe + jesBackend.jesCallPaths.jesLogPath should be(a[GcsPath]) + jesBackend.jesCallPaths.jesLogPath.pathAsString shouldBe "gs://path/to/gcs_root/w/e6236763-c518-41d0-9688-432549a8bf7d/call-B/shard-2/B-2.log" } @@ -667,8 +732,9 @@ class JesAsyncBackendJobExecutionActorSpec extends TestKitSuite("JesAsyncBackend descriptorWithMax2AndKey2.preemptible shouldBe true } - private def makeRuntimeAttributes(job: Call) = { + private def makeRuntimeAttributes(job: WdlTaskCall) = { val evaluatedAttributes = RuntimeAttributeDefinition.evaluateRuntimeAttributes(job.task.runtimeAttributes, TestableJesExpressionFunctions, Map.empty) - RuntimeAttributeDefinition.addDefaultsToAttributes(JesBackendLifecycleActorFactory.staticRuntimeAttributeDefinitions, NoOptions)(evaluatedAttributes.get) // Fine to throw the exception if this "get" fails. This is a test after all! + RuntimeAttributeDefinition.addDefaultsToAttributes( + runtimeAttributesBuilder.definitions.toSet, NoOptions)(evaluatedAttributes.get) } } diff --git a/supportedBackends/jes/src/test/scala/cromwell/backend/impl/jes/JesAttachedDiskSpec.scala b/supportedBackends/jes/src/test/scala/cromwell/backend/impl/jes/JesAttachedDiskSpec.scala index 3e0f19943..37eb1b14c 100644 --- a/supportedBackends/jes/src/test/scala/cromwell/backend/impl/jes/JesAttachedDiskSpec.scala +++ b/supportedBackends/jes/src/test/scala/cromwell/backend/impl/jes/JesAttachedDiskSpec.scala @@ -1,9 +1,8 @@ package cromwell.backend.impl.jes -import java.nio.file.Paths - import com.google.api.services.genomics.model.Disk import cromwell.backend.impl.jes.io.{DiskType, JesAttachedDisk, JesEmptyMountedDisk, JesWorkingDisk} +import cromwell.core.path.DefaultPathBuilder import org.scalatest.prop.TableDrivenPropertyChecks._ import org.scalatest.prop.Tables.Table import org.scalatest.{FlatSpec, Matchers, TryValues} @@ -13,8 +12,8 @@ import scala.util.Failure class JesAttachedDiskSpec extends FlatSpec with Matchers with TryValues { val validTable = Table( ("unparsed", "parsed"), - ("/mnt 3 SSD", JesEmptyMountedDisk(DiskType.SSD, 3, Paths.get("/mnt"))), - ("/mnt/my_path 10 HDD", JesEmptyMountedDisk(DiskType.HDD, 10, Paths.get("/mnt/my_path"))), + ("/mnt 3 SSD", JesEmptyMountedDisk(DiskType.SSD, 3, DefaultPathBuilder.get("/mnt"))), + ("/mnt/my_path 10 HDD", JesEmptyMountedDisk(DiskType.HDD, 10, DefaultPathBuilder.get("/mnt/my_path"))), ("local-disk 100 SSD", JesWorkingDisk(DiskType.SSD, 100)), ("local-disk 100 LOCAL", JesWorkingDisk(DiskType.LOCAL, 100)) ) diff --git a/supportedBackends/jes/src/test/scala/cromwell/backend/impl/jes/JesAttributesSpec.scala b/supportedBackends/jes/src/test/scala/cromwell/backend/impl/jes/JesAttributesSpec.scala index 020be850b..695305781 100644 --- a/supportedBackends/jes/src/test/scala/cromwell/backend/impl/jes/JesAttributesSpec.scala +++ b/supportedBackends/jes/src/test/scala/cromwell/backend/impl/jes/JesAttributesSpec.scala @@ -5,8 +5,8 @@ import java.net.URL import com.typesafe.config.ConfigFactory import cromwell.core.Tags._ import cromwell.filesystems.gcs.GoogleConfiguration +import lenthall.exception.MessageAggregation import org.scalatest.{FlatSpec, Matchers} -import wdl4s.ExceptionWithErrors class JesAttributesSpec extends FlatSpec with Matchers { @@ -14,20 +14,24 @@ class JesAttributesSpec extends FlatSpec with Matchers { behavior of "JesAttributes" + val googleConfig = GoogleConfiguration(JesGlobalConfig) + val runtimeConfig = ConfigFactory.load() + it should "parse correct JES config" taggedAs IntegrationTest in { - val googleConfig = GoogleConfiguration(JesGlobalConfig) - val backendConfig = ConfigFactory.parseString(configString.replace("[PREEMPTIBLE]", "")) + + val backendConfig = ConfigFactory.parseString(configString()) val jesAttributes = JesAttributes(googleConfig, backendConfig) jesAttributes.endpointUrl should be(new URL("http://myEndpoint")) jesAttributes.project should be("myProject") jesAttributes.executionBucket should be("gs://myBucket") jesAttributes.maxPollingInterval should be(600) + jesAttributes.computeServiceAccount should be("default") + jesAttributes.restrictMetadataAccess should be(false) } it should "parse correct preemptible config" taggedAs IntegrationTest in { - val googleConfig = GoogleConfiguration(JesGlobalConfig) - val backendConfig = ConfigFactory.parseString(configString.replace("[PREEMPTIBLE]", "preemptible = 3")) + val backendConfig = ConfigFactory.parseString(configString(preemptible = "preemptible = 3")) val jesAttributes = JesAttributes(googleConfig, backendConfig) jesAttributes.endpointUrl should be(new URL("http://myEndpoint")) @@ -36,6 +40,21 @@ class JesAttributesSpec extends FlatSpec with Matchers { jesAttributes.maxPollingInterval should be(600) } + it should "parse compute service account" taggedAs IntegrationTest in { + val backendConfig = ConfigFactory.parseString(configString(genomics = """compute-service-account = "testing" """)) + + val jesAttributes = JesAttributes(googleConfig, backendConfig) + jesAttributes.computeServiceAccount should be("testing") + } + + it should "parse restrict-metadata-access" taggedAs IntegrationTest in { + val backendConfig = ConfigFactory.parseString(configString(genomics = "restrict-metadata-access = true")) + + val jesAttributes = JesAttributes(googleConfig, backendConfig) + jesAttributes.restrictMetadataAccess should be(true) + + } + it should "not parse invalid config" taggedAs IntegrationTest in { val nakedConfig = ConfigFactory.parseString( @@ -47,30 +66,29 @@ class JesAttributesSpec extends FlatSpec with Matchers { |} """.stripMargin) - val googleConfig = GoogleConfiguration(JesGlobalConfig) - - val exception = intercept[IllegalArgumentException with ExceptionWithErrors] { + val exception = intercept[IllegalArgumentException with MessageAggregation] { JesAttributes(googleConfig, nakedConfig) } - val errorsList = exception.errors.list.toList - errorsList should contain("Could not find key: project") - errorsList should contain("Could not find key: root") - errorsList should contain("Could not find key: genomics.auth") - errorsList should contain("Could not find key: filesystems.gcs.auth") - errorsList should contain("no protocol: myEndpoint") + val errorsList = exception.errorMessages.toList + errorsList should contain("No configuration setting found for key 'project'") + errorsList should contain("No configuration setting found for key 'root'") + errorsList should contain("No configuration setting found for key 'genomics.auth'") + errorsList should contain("No configuration setting found for key 'filesystems'") + errorsList should contain("URI is not absolute") } - val configString = - """ + def configString(preemptible: String = "", genomics: String = ""): String = + s""" |{ | project = "myProject" | root = "gs://myBucket" | maximum-polling-interval = 600 - | [PREEMPTIBLE] + | $preemptible | genomics { | // A reference to an auth defined in the `google` stanza at the top. This auth is used to create | // Pipelines and manipulate auth JSONs. | auth = "application-default" + | $genomics | endpoint-url = "http://myEndpoint" | } | diff --git a/supportedBackends/jes/src/test/scala/cromwell/backend/impl/jes/JesCallPathsSpec.scala b/supportedBackends/jes/src/test/scala/cromwell/backend/impl/jes/JesCallPathsSpec.scala index d27328706..73a1cf85f 100644 --- a/supportedBackends/jes/src/test/scala/cromwell/backend/impl/jes/JesCallPathsSpec.scala +++ b/supportedBackends/jes/src/test/scala/cromwell/backend/impl/jes/JesCallPathsSpec.scala @@ -1,13 +1,14 @@ package cromwell.backend.impl.jes +import com.google.cloud.NoCredentials import cromwell.backend.BackendSpec +import cromwell.core.TestKitSuite +import cromwell.filesystems.gcs.auth.GoogleAuthModeSpec import cromwell.util.SampleWdl -import org.scalatest.{FlatSpec, Matchers} +import org.scalatest.{FlatSpecLike, Matchers} import org.specs2.mock.Mockito -import scala.concurrent.ExecutionContext.Implicits.global -import cromwell.backend.impl.jes.MockObjects._ -class JesCallPathsSpec extends FlatSpec with Matchers with Mockito { +class JesCallPathsSpec extends TestKitSuite with FlatSpecLike with Matchers with Mockito { import BackendSpec._ import JesTestConfig._ @@ -15,12 +16,15 @@ class JesCallPathsSpec extends FlatSpec with Matchers with Mockito { behavior of "JesCallPaths" it should "map the correct filenames" in { - val workflowDescriptor = buildWorkflowDescriptor(SampleWdl.HelloWorld.wdlSource()) + GoogleAuthModeSpec.assumeHasApplicationDefaultCredentials() + + val workflowDescriptor = buildWorkflowDescriptor(SampleWdl.HelloWorld.workflowSource()) val jobDescriptorKey = firstJobDescriptorKey(workflowDescriptor) val jesConfiguration = new JesConfiguration(JesBackendConfigurationDescriptor) - - val callPaths = JesCallPaths(jobDescriptorKey, workflowDescriptor, - jesConfiguration, mockCredentials) + val workflowPaths = JesWorkflowPaths(workflowDescriptor, NoCredentials.getInstance(), NoCredentials.getInstance(), jesConfiguration) + + val callPaths = JesJobPaths(workflowPaths, jobDescriptorKey) + callPaths.returnCodeFilename should be("hello-rc.txt") callPaths.stderrFilename should be("hello-stderr.log") callPaths.stdoutFilename should be("hello-stdout.log") @@ -28,33 +32,41 @@ class JesCallPathsSpec extends FlatSpec with Matchers with Mockito { } it should "map the correct paths" in { - val workflowDescriptor = buildWorkflowDescriptor(SampleWdl.HelloWorld.wdlSource()) + GoogleAuthModeSpec.assumeHasApplicationDefaultCredentials() + + val workflowDescriptor = buildWorkflowDescriptor(SampleWdl.HelloWorld.workflowSource()) val jobDescriptorKey = firstJobDescriptorKey(workflowDescriptor) val jesConfiguration = new JesConfiguration(JesBackendConfigurationDescriptor) + val workflowPaths = JesWorkflowPaths(workflowDescriptor, NoCredentials.getInstance(), NoCredentials.getInstance(), jesConfiguration) - val callPaths = JesCallPaths(jobDescriptorKey, workflowDescriptor, jesConfiguration, - mockCredentials) - callPaths.returnCodePath.toString should - be(s"gs://my-cromwell-workflows-bucket/hello/${workflowDescriptor.id}/call-hello/hello-rc.txt") - callPaths.stdoutPath.toString should - be(s"gs://my-cromwell-workflows-bucket/hello/${workflowDescriptor.id}/call-hello/hello-stdout.log") - callPaths.stderrPath.toString should - be(s"gs://my-cromwell-workflows-bucket/hello/${workflowDescriptor.id}/call-hello/hello-stderr.log") - callPaths.jesLogPath.toString should - be(s"gs://my-cromwell-workflows-bucket/hello/${workflowDescriptor.id}/call-hello/hello.log") + val callPaths = JesJobPaths(workflowPaths, jobDescriptorKey) + + callPaths.returnCode.pathAsString should + be(s"gs://my-cromwell-workflows-bucket/wf_hello/${workflowDescriptor.id}/call-hello/hello-rc.txt") + callPaths.stdout.pathAsString should + be(s"gs://my-cromwell-workflows-bucket/wf_hello/${workflowDescriptor.id}/call-hello/hello-stdout.log") + callPaths.stderr.pathAsString should + be(s"gs://my-cromwell-workflows-bucket/wf_hello/${workflowDescriptor.id}/call-hello/hello-stderr.log") + callPaths.jesLogPath.pathAsString should + be(s"gs://my-cromwell-workflows-bucket/wf_hello/${workflowDescriptor.id}/call-hello/hello.log") } it should "map the correct call context" in { - val workflowDescriptor = buildWorkflowDescriptor(SampleWdl.HelloWorld.wdlSource()) + GoogleAuthModeSpec.assumeHasApplicationDefaultCredentials() + + val workflowDescriptor = buildWorkflowDescriptor(SampleWdl.HelloWorld.workflowSource()) val jobDescriptorKey = firstJobDescriptorKey(workflowDescriptor) val jesConfiguration = new JesConfiguration(JesBackendConfigurationDescriptor) + val workflowPaths = JesWorkflowPaths(workflowDescriptor, NoCredentials.getInstance(), NoCredentials.getInstance(), jesConfiguration) - val callPaths = JesCallPaths(jobDescriptorKey, workflowDescriptor, jesConfiguration, - mockCredentials) - callPaths.callContext.root.toString should - be(s"gs://my-cromwell-workflows-bucket/hello/${workflowDescriptor.id}/call-hello") - callPaths.callContext.stdout should be("hello-stdout.log") - callPaths.callContext.stderr should be("hello-stderr.log") + val callPaths = JesJobPaths(workflowPaths, jobDescriptorKey) + + callPaths.callContext.root.pathAsString should + be(s"gs://my-cromwell-workflows-bucket/wf_hello/${workflowDescriptor.id}/call-hello") + callPaths.callContext.stdout should + be(s"gs://my-cromwell-workflows-bucket/wf_hello/${workflowDescriptor.id}/call-hello/hello-stdout.log") + callPaths.callContext.stderr should + be(s"gs://my-cromwell-workflows-bucket/wf_hello/${workflowDescriptor.id}/call-hello/hello-stderr.log") } } diff --git a/supportedBackends/jes/src/test/scala/cromwell/backend/impl/jes/JesConfigurationSpec.scala b/supportedBackends/jes/src/test/scala/cromwell/backend/impl/jes/JesConfigurationSpec.scala index c2b77d38c..2ead86d6c 100644 --- a/supportedBackends/jes/src/test/scala/cromwell/backend/impl/jes/JesConfigurationSpec.scala +++ b/supportedBackends/jes/src/test/scala/cromwell/backend/impl/jes/JesConfigurationSpec.scala @@ -1,16 +1,24 @@ package cromwell.backend.impl.jes -import com.typesafe.config.{ConfigValueFactory, ConfigFactory} +import com.typesafe.config.{ConfigFactory, ConfigValueFactory} import cromwell.backend.BackendConfigurationDescriptor +import cromwell.core.path.DefaultPathBuilder import org.scalatest.prop.TableDrivenPropertyChecks -import org.scalatest.{FlatSpec, Matchers} +import org.scalatest.{BeforeAndAfterAll, FlatSpec, Matchers} -class JesConfigurationSpec extends FlatSpec with Matchers with TableDrivenPropertyChecks { +class JesConfigurationSpec extends FlatSpec with Matchers with TableDrivenPropertyChecks with BeforeAndAfterAll { behavior of "JesConfigurationSpec" + val mockFile = DefaultPathBuilder.createTempFile() + + override def afterAll(): Unit = { + mockFile.delete(swallowIOExceptions = true) + () + } + val globalConfig = ConfigFactory.parseString( - """ + s""" |google { | | application-name = "cromwell" @@ -24,16 +32,17 @@ class JesConfigurationSpec extends FlatSpec with Matchers with TableDrivenProper | name = "user-via-refresh" | scheme = "refresh_token" | client-id = "secret_id" - | client-secret = "secret_secret" + | client-secret = "${mockFile.pathAsString}" | }, | { | name = "service-account" | scheme = "service_account" | service-account-id = "my-service-account" - | pem-file = "/path/to/file.pem" + | pem-file = "${mockFile.pathAsString}" | } | ] |} + | """.stripMargin) val backendConfig = ConfigFactory.parseString( @@ -56,6 +65,18 @@ class JesConfigurationSpec extends FlatSpec with Matchers with TableDrivenProper | endpoint-url = "https://genomics.googleapis.com/" | } | + | default-runtime-attributes { + | failOnStderr: false + | continueOnReturnCode: 0 + | cpu: 1 + | memory: "2 GB" + | bootDiskSizeGb: 10 + | disks: "local-disk 10 SSD" + | noAddress: false + | preemptible: 3 + | zones:["us-central1-a", "us-central1-b"] + | } + | | dockerhub { | account = "dockerAccount" | token = "dockerToken" @@ -67,6 +88,7 @@ class JesConfigurationSpec extends FlatSpec with Matchers with TableDrivenProper | auth = "application-default" | } | } + | """.stripMargin) it should "fail to instantiate if any required configuration is missing" in { diff --git a/supportedBackends/jes/src/test/scala/cromwell/backend/impl/jes/JesInitializationActorSpec.scala b/supportedBackends/jes/src/test/scala/cromwell/backend/impl/jes/JesInitializationActorSpec.scala index 2669552b9..121d4603f 100644 --- a/supportedBackends/jes/src/test/scala/cromwell/backend/impl/jes/JesInitializationActorSpec.scala +++ b/supportedBackends/jes/src/test/scala/cromwell/backend/impl/jes/JesInitializationActorSpec.scala @@ -2,34 +2,40 @@ package cromwell.backend.impl.jes import java.util.UUID +import akka.actor.Props import akka.testkit._ import com.typesafe.config.{Config, ConfigFactory} import cromwell.backend.BackendWorkflowInitializationActor.{InitializationFailed, InitializationSuccess, Initialize} -import cromwell.backend.impl.jes.authentication.GcsLocalizing +import cromwell.backend.async.RuntimeAttributeValidationFailures +import cromwell.backend.impl.jes.authentication.{GcsLocalizing, JesAuthObject} import cromwell.backend.{BackendConfigurationDescriptor, BackendSpec, BackendWorkflowDescriptor} +import cromwell.core.Dispatcher.BackendDispatcher +import cromwell.core.Tags.IntegrationTest import cromwell.core.logging.LoggingTest._ import cromwell.core.{TestKitSuite, WorkflowOptions} -import cromwell.filesystems.gcs.{RefreshTokenMode, SimpleClientSecrets} +import cromwell.filesystems.gcs.GoogleConfiguration +import cromwell.filesystems.gcs.auth.{GoogleAuthModeSpec, RefreshTokenMode, SimpleClientSecrets} import cromwell.util.{EncryptionSpec, SampleWdl} import org.scalatest.{FlatSpecLike, Matchers} import org.specs2.mock.Mockito import spray.json._ -import wdl4s.Call +import wdl4s.wdl.WdlTaskCall import scala.concurrent.duration._ class JesInitializationActorSpec extends TestKitSuite("JesInitializationActorSpec") with FlatSpecLike with Matchers with ImplicitSender with Mockito { - val Timeout = 5.second.dilated + val Timeout: FiniteDuration = 5.second.dilated import BackendSpec._ + import JesInitializationActorSpec._ - val HelloWorld = - """ + val HelloWorld: String = + s""" |task hello { | String addressee = "you" | command { - | echo "Hello ${addressee}!" + | echo "Hello $${addressee}!" | } | output { | String salutation = read_string(stdout()) @@ -38,12 +44,12 @@ class JesInitializationActorSpec extends TestKitSuite("JesInitializationActorSpe | RUNTIME |} | - |workflow hello { + |workflow wf_hello { | call hello |} """.stripMargin - val globalConfig = ConfigFactory.parseString( + val globalConfig: Config = ConfigFactory.parseString( """ |google { | @@ -62,9 +68,9 @@ class JesInitializationActorSpec extends TestKitSuite("JesInitializationActorSpe | } | ] |} - | """.stripMargin) + |""".stripMargin) - val backendConfigTemplate = + val backendConfigTemplate: String = """ | // Google project | project = "my-cromwell-workflows" @@ -84,6 +90,19 @@ class JesInitializationActorSpec extends TestKitSuite("JesInitializationActorSpe | endpoint-url = "https://genomics.googleapis.com/" | } | + | default-runtime-attributes { + | cpu: 1 + | failOnStderr: false + | # Allowed to be a boolean, or a list of Ints, or an Int + | continueOnReturnCode: 0 + | memory: "2 GB" + | bootDiskSizeGb: 10 + | # Allowed to be a String, or a list of Strings + | disks: "local-disk 10 SSD" + | noAddress: false + | preemptible: 0 + | zones: ["us-central1-a", "us-central1-b"] + | } | filesystems { | gcs { | // A reference to a potentially different auth for manipulating files via engine functions. @@ -94,7 +113,7 @@ class JesInitializationActorSpec extends TestKitSuite("JesInitializationActorSpe |[DOCKERHUBCONFIG] |""".stripMargin - val refreshTokenConfigTemplate = + val refreshTokenConfigTemplate: String = """ | // Google project | project = "my-cromwell-workflows" @@ -114,6 +133,20 @@ class JesInitializationActorSpec extends TestKitSuite("JesInitializationActorSpe | endpoint-url = "https://genomics.googleapis.com/" | } | + | default-runtime-attributes { + | cpu: 1 + | failOnStderr: false + | # Allowed to be a boolean, or a list of Ints, or an Int + | continueOnReturnCode: 0 + | memory: "2 GB" + | bootDiskSizeGb: 10 + | # Allowed to be a String, or a list of Strings + | disks: "local-disk 10 SSD" + | noAddress: false + | preemptible: 0 + | zones: ["us-central1-a", "us-central1-b"] + | } + | | filesystems { | gcs { | // A reference to a potentially different auth for manipulating files via engine functions. @@ -122,9 +155,9 @@ class JesInitializationActorSpec extends TestKitSuite("JesInitializationActorSpe | } |""".stripMargin - val backendConfig = ConfigFactory.parseString(backendConfigTemplate.replace("[DOCKERHUBCONFIG]", "")) + val backendConfig: Config = ConfigFactory.parseString(backendConfigTemplate.replace("[DOCKERHUBCONFIG]", "")) - val dockerBackendConfig = ConfigFactory.parseString(backendConfigTemplate.replace("[DOCKERHUBCONFIG]", + val dockerBackendConfig: Config = ConfigFactory.parseString(backendConfigTemplate.replace("[DOCKERHUBCONFIG]", """ |dockerhub { | account = "my@docker.account" @@ -134,22 +167,33 @@ class JesInitializationActorSpec extends TestKitSuite("JesInitializationActorSpe val defaultBackendConfig = BackendConfigurationDescriptor(backendConfig, globalConfig) - val refreshTokenConfig = ConfigFactory.parseString(refreshTokenConfigTemplate) + val refreshTokenConfig: Config = ConfigFactory.parseString(refreshTokenConfigTemplate) - private def getJesBackend(workflowDescriptor: BackendWorkflowDescriptor, calls: Seq[Call], conf: BackendConfigurationDescriptor) = { - system.actorOf(JesInitializationActor.props(workflowDescriptor, calls, new JesConfiguration(conf), emptyActor)) + private def getJesBackendProps(workflowDescriptor: BackendWorkflowDescriptor, + calls: Set[WdlTaskCall], + jesConfiguration: JesConfiguration): Props = { + val ioActor = mockIoActor + val params = JesInitializationActorParams(workflowDescriptor, ioActor, calls, jesConfiguration, emptyActor, restarting = false) + Props(new JesInitializationActor(params)).withDispatcher(BackendDispatcher) + } + + private def getJesBackend(workflowDescriptor: BackendWorkflowDescriptor, calls: Set[WdlTaskCall], conf: BackendConfigurationDescriptor) = { + val props = getJesBackendProps(workflowDescriptor, calls, new JesConfiguration(conf)) + system.actorOf(props, "TestableJesInitializationActor-" + UUID.randomUUID) } behavior of "JesInitializationActor" - it should "log a warning message when there are unsupported runtime attributes" in { + it should "log a warning message when there are unsupported runtime attributes" taggedAs IntegrationTest in { + GoogleAuthModeSpec.assumeHasApplicationDefaultCredentials() + within(Timeout) { val workflowDescriptor = buildWorkflowDescriptor(HelloWorld, runtime = """runtime { docker: "ubuntu/latest" test: true }""") - val backend = getJesBackend(workflowDescriptor, workflowDescriptor.workflowNamespace.workflow.calls, + val backend = getJesBackend(workflowDescriptor, workflowDescriptor.workflow.taskCalls, defaultBackendConfig) val eventPattern = - "Key/s [test] is/are not supported by JesBackend. Unsupported attributes will not be part of jobs executions." + "Key/s [test] is/are not supported by backend. Unsupported attributes will not be part of job executions." EventFilter.warning(pattern = escapePattern(eventPattern), occurrences = 1) intercept { backend ! Initialize } @@ -163,15 +207,15 @@ class JesInitializationActorSpec extends TestKitSuite("JesInitializationActorSpe it should "return InitializationFailed when docker runtime attribute key is not present" in { within(Timeout) { val workflowDescriptor = buildWorkflowDescriptor(HelloWorld, runtime = """runtime { }""") - val backend = getJesBackend(workflowDescriptor, workflowDescriptor.workflowNamespace.workflow.calls, + val backend = getJesBackend(workflowDescriptor, workflowDescriptor.workflow.taskCalls, defaultBackendConfig) backend ! Initialize expectMsgPF() { case InitializationFailed(failure) => failure match { - case exception: IllegalArgumentException => - if (!exception.getMessage.equals("Task hello has an invalid runtime attribute docker = !! NOT FOUND !!")) - fail("Exception message does not contains 'Runtime attribute validation failed'.") + case exception: RuntimeAttributeValidationFailures => + if (!exception.getMessage.equals("Runtime validation failed:\nTask hello has an invalid runtime attribute docker = !! NOT FOUND !!")) + fail("Exception message is not equal to 'Runtime validation failed:\nTask hello has an invalid runtime attribute docker = !! NOT FOUND !!'.") } } } @@ -181,13 +225,13 @@ class JesInitializationActorSpec extends TestKitSuite("JesInitializationActorSpe private def buildJesInitializationTestingBits(backendConfig: Config = dockerBackendConfig): TestingBits = { val workflowOptions = WorkflowOptions.fromMap(Map("refresh_token" -> "mytoken")).get - val workflowDescriptor = buildWorkflowDescriptor(SampleWdl.HelloWorld.wdlSource(), options = workflowOptions) - val calls = workflowDescriptor.workflowNamespace.workflow.calls + val workflowDescriptor = buildWorkflowDescriptor(SampleWdl.HelloWorld.workflowSource(), options = workflowOptions) + val calls = workflowDescriptor.workflow.taskCalls val backendConfigurationDescriptor = BackendConfigurationDescriptor(backendConfig, globalConfig) val jesConfiguration = new JesConfiguration(backendConfigurationDescriptor) val actorRef = TestActorRef[JesInitializationActor]( - JesInitializationActor.props(workflowDescriptor, calls, jesConfiguration, emptyActor), + getJesBackendProps(workflowDescriptor, calls, jesConfiguration), "TestableJesInitializationActor-" + UUID.randomUUID) TestingBits(actorRef, jesConfiguration) } @@ -197,7 +241,7 @@ class JesInitializationActorSpec extends TestKitSuite("JesInitializationActorSpe val TestingBits(actorRef, _) = buildJesInitializationTestingBits(refreshTokenConfig) val actor = actorRef.underlyingActor - actor.refreshTokenAuth should be(Some(GcsLocalizing(RefreshTokenMode("user-via-refresh", "secret_id", "secret_secret"), "mytoken"))) + actor.refreshTokenAuth should be(Some(GcsLocalizing(RefreshTokenMode("user-via-refresh", "secret_id", "secret_secret", GoogleConfiguration.GoogleScopes), "mytoken"))) } it should "generate the correct json content for no docker token and no refresh token" in { @@ -206,9 +250,9 @@ class JesInitializationActorSpec extends TestKitSuite("JesInitializationActorSpe val TestingBits(actorRef, _) = buildJesInitializationTestingBits() val actor = actorRef.underlyingActor - actor.generateAuthJson(None, None) should be(empty) + actor.generateAuthJson(flattenAuthOptions(None, None), false) should be(empty) - val authJsonOption = actor.generateAuthJson(None, None) + val authJsonOption = actor.generateAuthJson(flattenAuthOptions(None, None), false) authJsonOption should be(empty) actorRef.stop() @@ -220,7 +264,7 @@ class JesInitializationActorSpec extends TestKitSuite("JesInitializationActorSpe val TestingBits(actorRef, jesConfiguration) = buildJesInitializationTestingBits() val actor = actorRef.underlyingActor - val authJsonOption = actor.generateAuthJson(jesConfiguration.dockerCredentials, None) + val authJsonOption = actor.generateAuthJson(flattenAuthOptions(jesConfiguration.dockerCredentials, None), false) authJsonOption shouldNot be(empty) authJsonOption.get should be( normalize( @@ -246,7 +290,7 @@ class JesInitializationActorSpec extends TestKitSuite("JesInitializationActorSpe val actor = actorRef.underlyingActor val gcsUserAuth = Option(GcsLocalizing(SimpleClientSecrets("myclientid", "myclientsecret"), "mytoken")) - val authJsonOption = actor.generateAuthJson(None, gcsUserAuth) + val authJsonOption = actor.generateAuthJson(flattenAuthOptions(None, gcsUserAuth), false) authJsonOption shouldNot be(empty) authJsonOption.get should be( normalize( @@ -273,7 +317,7 @@ class JesInitializationActorSpec extends TestKitSuite("JesInitializationActorSpe val actor = actorRef.underlyingActor val gcsUserAuth = Option(GcsLocalizing(SimpleClientSecrets("myclientid", "myclientsecret"), "mytoken")) - val authJsonOption = actor.generateAuthJson(jesConfiguration.dockerCredentials, gcsUserAuth) + val authJsonOption = actor.generateAuthJson(flattenAuthOptions(jesConfiguration.dockerCredentials, gcsUserAuth), false) authJsonOption shouldNot be(empty) authJsonOption.get should be( normalize( @@ -297,7 +341,65 @@ class JesInitializationActorSpec extends TestKitSuite("JesInitializationActorSpe actorRef.stop() } - private def normalize(str: String) = { + it should "generate the correct json content for a docker token, a refresh token, and restrictMetadataAccess" in { + EncryptionSpec.assumeAes256Cbc() + + val TestingBits(actorRef, jesConfiguration) = buildJesInitializationTestingBits() + val actor = actorRef.underlyingActor + + val gcsUserAuth = Option(GcsLocalizing(SimpleClientSecrets("myclientid", "myclientsecret"), "mytoken")) + val authJsonOption = actor.generateAuthJson(flattenAuthOptions(jesConfiguration.dockerCredentials, gcsUserAuth), true) + authJsonOption shouldNot be(empty) + authJsonOption.get should be( + normalize( + """ + |{ + | "auths": { + | "docker": { + | "account": "my@docker.account", + | "token": "mydockertoken" + | }, + | "boto": { + | "client_id": "myclientid", + | "client_secret": "myclientsecret", + | "refresh_token": "mytoken" + | } + | }, + | "restrictMetadataAccess": true + |} + """.stripMargin) + ) + + actorRef.stop() + } + + it should "generate the correct json content for just restrictMetadataAccess" in { + EncryptionSpec.assumeAes256Cbc() + + val TestingBits(actorRef, _) = buildJesInitializationTestingBits() + val actor = actorRef.underlyingActor + + val authJsonOption = actor.generateAuthJson(flattenAuthOptions(None, None), true) + authJsonOption shouldNot be(empty) + authJsonOption.get should be( + normalize( + """ + |{ + | "restrictMetadataAccess": true + |} + """.stripMargin) + ) + + actorRef.stop() + } +} + +object JesInitializationActorSpec { + def normalize(str: String) = { str.parseJson.prettyPrint } + + def flattenAuthOptions(options: Option[JesAuthObject]*): List[JesAuthObject] = { + options.toList.flatten + } } diff --git a/supportedBackends/jes/src/test/scala/cromwell/backend/impl/jes/JesJobExecutionActorSpec.scala b/supportedBackends/jes/src/test/scala/cromwell/backend/impl/jes/JesJobExecutionActorSpec.scala new file mode 100644 index 000000000..2ff644e47 --- /dev/null +++ b/supportedBackends/jes/src/test/scala/cromwell/backend/impl/jes/JesJobExecutionActorSpec.scala @@ -0,0 +1,126 @@ +package cromwell.backend.impl.jes + +import akka.actor.{Actor, ActorRef, Props} +import akka.testkit._ +import cromwell.backend.BackendJobDescriptor +import cromwell.backend.BackendJobExecutionActor.{ExecuteJobCommand, JobFailedNonRetryableResponse} +import cromwell.backend.impl.jes.ControllableFailingJabjea.JabjeaExplode +import cromwell.backend.standard.{DefaultStandardSyncExecutionActorParams, StandardSyncExecutionActor, StandardSyncExecutionActorParams} +import cromwell.core.TestKitSuite +import org.scalatest.{FlatSpecLike, Matchers} +import org.specs2.mock.Mockito + +import scala.concurrent.duration._ +import scala.concurrent.{ExecutionContext, Promise} +import scala.util.{Failure, Success} + +class JesJobExecutionActorSpec extends TestKitSuite("JesJobExecutionActorSpec") with FlatSpecLike with Matchers with Mockito { + + behavior of "JesJobExecutionActor" + + private val AwaitAlmostNothing = 100.milliseconds.dilated + private val TimeoutDuration = 10.seconds.dilated + implicit val ec: ExecutionContext = system.dispatcher + + it should "catch failures in JABJEA initialization and fail the job accordingly" in { + val jobDescriptor = mock[BackendJobDescriptor] + val jesWorkflowInfo = mock[JesConfiguration] + val initializationData = mock[JesBackendInitializationData] + val serviceRegistryActor = system.actorOf(Props.empty) + val ioActor = system.actorOf(Props.empty) + val jesBackendSingletonActor = Option(system.actorOf(Props.empty)) + + initializationData.jesConfiguration returns jesWorkflowInfo + + val parent = TestProbe() + val deathwatch = TestProbe() + val params = DefaultStandardSyncExecutionActorParams(JesAsyncBackendJobExecutionActor.JesOperationIdKey, serviceRegistryActor, ioActor, + jobDescriptor, null, Option(initializationData), jesBackendSingletonActor, + classOf[JesAsyncBackendJobExecutionActor]) + val testJJEA = TestActorRef[TestJesJobExecutionActor]( + props = Props(new TestJesJobExecutionActor(params, Props(new ConstructorFailingJABJEA))), + supervisor = parent.ref) + deathwatch watch testJJEA + + // Nothing happens: + parent.expectNoMsg(max = AwaitAlmostNothing) + deathwatch.expectNoMsg(max = AwaitAlmostNothing) + + testJJEA.tell(msg = ExecuteJobCommand, sender = parent.ref) + + parent.expectMsgPF(max = TimeoutDuration) { + case JobFailedNonRetryableResponse(_, throwable, _) => + throwable.getMessage should be("JesAsyncBackendJobExecutionActor failed and didn't catch its exception.") + } + } + + it should "catch failures at a random point during JABJEA processing and fail the job accordingly" in { + val jobDescriptor = mock[BackendJobDescriptor] + val jesWorkflowInfo = mock[JesConfiguration] + val initializationData = mock[JesBackendInitializationData] + val serviceRegistryActor = system.actorOf(Props.empty) + val ioActor = system.actorOf(Props.empty) + val jesBackendSingletonActor = Option(system.actorOf(Props.empty)) + + initializationData.jesConfiguration returns jesWorkflowInfo + + val parent = TestProbe() + val deathwatch = TestProbe() + val jabjeaConstructionPromise = Promise[ActorRef]() + val params = DefaultStandardSyncExecutionActorParams(JesAsyncBackendJobExecutionActor.JesOperationIdKey, serviceRegistryActor, ioActor, + jobDescriptor, null, Option(initializationData), jesBackendSingletonActor, + classOf[JesAsyncBackendJobExecutionActor]) + val testJJEA = TestActorRef[TestJesJobExecutionActor]( + props = Props(new TestJesJobExecutionActor(params, Props(new ControllableFailingJabjea(jabjeaConstructionPromise)))), + supervisor = parent.ref) + deathwatch watch testJJEA + + // Nothing happens: + parent.expectNoMsg(max = AwaitAlmostNothing) + deathwatch.expectNoMsg(max = AwaitAlmostNothing) + + testJJEA.tell(msg = ExecuteJobCommand, sender = parent.ref) + + // Wait for the JABJEA to be spawned. Then kill it: + parent.expectNoMsg(max = AwaitAlmostNothing) + deathwatch.expectNoMsg(max = AwaitAlmostNothing) + jabjeaConstructionPromise.future onComplete { + case Success(jabjea) => + jabjea ! JabjeaExplode + case Failure(throwable) => + val exception = new RuntimeException("Error creating jabjea for test!", throwable) + exception.printStackTrace() + throw exception + } + + parent.expectMsgPF(max = TimeoutDuration) { + case JobFailedNonRetryableResponse(_, throwable, _) => + throwable.getMessage should be("JesAsyncBackendJobExecutionActor failed and didn't catch its exception.") + } + } +} + +class TestJesJobExecutionActor(params: StandardSyncExecutionActorParams, + fakeJabjeaProps: Props) extends StandardSyncExecutionActor(params) { + override def createAsyncProps(): Props = fakeJabjeaProps +} + +class ConstructorFailingJABJEA extends ControllableFailingJabjea(Promise[ActorRef]()) { + // Explode immediately in the constructor: + explode() +} + +class ControllableFailingJabjea(constructionPromise: Promise[ActorRef]) extends Actor { + def explode(): Unit = { + val boom = 1 == 1 + if (boom) throw new RuntimeException("Test Exception! Don't panic if this appears during a test run!") + } + constructionPromise.trySuccess(self) + override def receive: Receive = { + case JabjeaExplode => explode() + } +} + +object ControllableFailingJabjea { + case object JabjeaExplode +} diff --git a/supportedBackends/jes/src/test/scala/cromwell/backend/impl/jes/JesRuntimeAttributesSpec.scala b/supportedBackends/jes/src/test/scala/cromwell/backend/impl/jes/JesRuntimeAttributesSpec.scala index efbcb53e6..1de774fa6 100644 --- a/supportedBackends/jes/src/test/scala/cromwell/backend/impl/jes/JesRuntimeAttributesSpec.scala +++ b/supportedBackends/jes/src/test/scala/cromwell/backend/impl/jes/JesRuntimeAttributesSpec.scala @@ -1,27 +1,29 @@ package cromwell.backend.impl.jes +import cats.data.NonEmptyList import cromwell.backend.impl.jes.io.{DiskType, JesAttachedDisk, JesWorkingDisk} -import cromwell.backend.validation.ContinueOnReturnCodeSet -import cromwell.backend.{BackendSpec, MemorySize, RuntimeAttributeDefinition} +import cromwell.backend.validation.{ContinueOnReturnCodeFlag, ContinueOnReturnCodeSet} +import cromwell.backend.{MemorySize, RuntimeAttributeDefinition} import cromwell.core.WorkflowOptions import org.scalatest.{Matchers, WordSpecLike} import org.slf4j.helpers.NOPLogger import org.specs2.mock.Mockito import spray.json._ import wdl4s.parser.MemoryUnit -import wdl4s.types.{WdlArrayType, WdlIntegerType, WdlStringType} -import wdl4s.values.{WdlArray, WdlBoolean, WdlInteger, WdlString, WdlValue} +import wdl4s.wdl.types.{WdlArrayType, WdlIntegerType, WdlStringType} +import wdl4s.wdl.values.{WdlArray, WdlBoolean, WdlInteger, WdlString, WdlValue} class JesRuntimeAttributesSpec extends WordSpecLike with Matchers with Mockito { - def workflowOptionsWithDefaultRA(defaults: Map[String, JsValue]) = { + def workflowOptionsWithDefaultRA(defaults: Map[String, JsValue]): WorkflowOptions = { WorkflowOptions(JsObject(Map( "default_runtime_attributes" -> JsObject(defaults) ))) } - val expectedDefaults = new JesRuntimeAttributes(1, Vector("us-central1-b"), 0, 10, MemorySize(2, MemoryUnit.GB), Seq(JesWorkingDisk(DiskType.SSD, 10)), None, false, ContinueOnReturnCodeSet(Set(0)), false) - val expectedDefaultsPlusUbuntuDocker = expectedDefaults.copy(dockerImage = Some("ubuntu:latest")) + val expectedDefaults = new JesRuntimeAttributes(1, Vector("us-central1-b", "us-central1-a"), 0, 10, + MemorySize(2, MemoryUnit.GB), Vector(JesWorkingDisk(DiskType.SSD, 10)), "ubuntu:latest", false, + ContinueOnReturnCodeSet(Set(0)), false) "JesRuntimeAttributes" should { @@ -30,9 +32,15 @@ class JesRuntimeAttributesSpec extends WordSpecLike with Matchers with Mockito { assertJesRuntimeAttributesFailedCreation(runtimeAttributes, "Can't find an attribute value for key docker") } + "use hardcoded defaults if not declared in task, workflow options, or config (except for docker)" in { + val runtimeAttributes = Map("docker" -> WdlString("ubuntu:latest")) + val expectedRuntimeAttributes = expectedDefaults + assertJesRuntimeAttributesSuccessfulCreation(runtimeAttributes, expectedRuntimeAttributes, jesConfiguration = noDefaultsJesConfiguration) + } + "validate a valid Docker entry" in { val runtimeAttributes = Map("docker" -> WdlString("ubuntu:latest")) - val expectedRuntimeAttributes = expectedDefaults.copy(dockerImage = Option("ubuntu:latest")) + val expectedRuntimeAttributes = expectedDefaults assertJesRuntimeAttributesSuccessfulCreation(runtimeAttributes, expectedRuntimeAttributes) } @@ -43,7 +51,7 @@ class JesRuntimeAttributesSpec extends WordSpecLike with Matchers with Mockito { "validate a valid failOnStderr entry" in { val runtimeAttributes = Map("docker" -> WdlString("ubuntu:latest"), "failOnStderr" -> WdlBoolean(true)) - val expectedRuntimeAttributes = expectedDefaultsPlusUbuntuDocker.copy(failOnStderr = true) + val expectedRuntimeAttributes = expectedDefaults.copy(failOnStderr = true) assertJesRuntimeAttributesSuccessfulCreation(runtimeAttributes, expectedRuntimeAttributes) } @@ -52,21 +60,27 @@ class JesRuntimeAttributesSpec extends WordSpecLike with Matchers with Mockito { assertJesRuntimeAttributesFailedCreation(runtimeAttributes, "Expecting failOnStderr runtime attribute to be a Boolean or a String with values of 'true' or 'false'") } - "validate a valid continueOnReturnCode entry" in { + "validate a valid continueOnReturnCode integer entry" in { val runtimeAttributes = Map("docker" -> WdlString("ubuntu:latest"), "continueOnReturnCode" -> WdlInteger(1)) - val expectedRuntimeAttributes = expectedDefaultsPlusUbuntuDocker.copy(continueOnReturnCode = ContinueOnReturnCodeSet(Set(1))) + val expectedRuntimeAttributes = expectedDefaults.copy(continueOnReturnCode = ContinueOnReturnCodeSet(Set(1))) + assertJesRuntimeAttributesSuccessfulCreation(runtimeAttributes, expectedRuntimeAttributes) + } + + "validate a valid continueOnReturnCode boolean entry" in { + val runtimeAttributes = Map("docker" -> WdlString("ubuntu:latest"), "continueOnReturnCode" -> WdlBoolean(false)) + val expectedRuntimeAttributes = expectedDefaults.copy(continueOnReturnCode = ContinueOnReturnCodeFlag(false)) assertJesRuntimeAttributesSuccessfulCreation(runtimeAttributes, expectedRuntimeAttributes) } "validate a valid continueOnReturnCode array entry" in { val runtimeAttributes = Map("docker" -> WdlString("ubuntu:latest"), "continueOnReturnCode" -> WdlArray(WdlArrayType(WdlIntegerType), Array(WdlInteger(1), WdlInteger(2)))) - val expectedRuntimeAttributes = expectedDefaultsPlusUbuntuDocker.copy(continueOnReturnCode = ContinueOnReturnCodeSet(Set(1, 2))) + val expectedRuntimeAttributes = expectedDefaults.copy(continueOnReturnCode = ContinueOnReturnCodeSet(Set(1, 2))) assertJesRuntimeAttributesSuccessfulCreation(runtimeAttributes, expectedRuntimeAttributes) } "coerce then validate a valid continueOnReturnCode array entry" in { val runtimeAttributes = Map("docker" -> WdlString("ubuntu:latest"), "continueOnReturnCode" -> WdlArray(WdlArrayType(WdlStringType), Array(WdlString("1"), WdlString("2")))) - val expectedRuntimeAttributes = expectedDefaultsPlusUbuntuDocker.copy(continueOnReturnCode = ContinueOnReturnCodeSet(Set(1, 2))) + val expectedRuntimeAttributes = expectedDefaults.copy(continueOnReturnCode = ContinueOnReturnCodeSet(Set(1, 2))) assertJesRuntimeAttributesSuccessfulCreation(runtimeAttributes, expectedRuntimeAttributes) } @@ -77,13 +91,13 @@ class JesRuntimeAttributesSpec extends WordSpecLike with Matchers with Mockito { "validate a valid cpu entry" in { val runtimeAttributes = Map("docker" -> WdlString("ubuntu:latest"), "cpu" -> WdlInteger(2)) - val expectedRuntimeAttributes = expectedDefaultsPlusUbuntuDocker.copy(cpu = 2) + val expectedRuntimeAttributes = expectedDefaults.copy(cpu = 2) assertJesRuntimeAttributesSuccessfulCreation(runtimeAttributes, expectedRuntimeAttributes) } "validate a valid cpu string entry" in { val runtimeAttributes = Map("docker" -> WdlString("ubuntu:latest"), "cpu" -> WdlString("2")) - val expectedRuntimeAttributes = expectedDefaultsPlusUbuntuDocker.copy(cpu = 2) + val expectedRuntimeAttributes = expectedDefaults.copy(cpu = 2) assertJesRuntimeAttributesSuccessfulCreation(runtimeAttributes, expectedRuntimeAttributes) } @@ -94,7 +108,7 @@ class JesRuntimeAttributesSpec extends WordSpecLike with Matchers with Mockito { "validate a valid zones entry" in { val runtimeAttributes = Map("docker" -> WdlString("ubuntu:latest"), "zones" -> WdlString("us-central-z")) - val expectedRuntimeAttributes = expectedDefaultsPlusUbuntuDocker.copy(zones = Vector("us-central-z")) + val expectedRuntimeAttributes = expectedDefaults.copy(zones = Vector("us-central-z")) assertJesRuntimeAttributesSuccessfulCreation(runtimeAttributes, expectedRuntimeAttributes) } @@ -105,7 +119,7 @@ class JesRuntimeAttributesSpec extends WordSpecLike with Matchers with Mockito { "validate a valid array zones entry" in { val runtimeAttributes = Map("docker" -> WdlString("ubuntu:latest"), "zones" -> WdlArray(WdlArrayType(WdlStringType), Array(WdlString("us-central1-y"), WdlString("us-central1-z")))) - val expectedRuntimeAttributes = expectedDefaultsPlusUbuntuDocker.copy(zones = Vector("us-central1-y", "us-central1-z")) + val expectedRuntimeAttributes = expectedDefaults.copy(zones = Vector("us-central1-y", "us-central1-z")) assertJesRuntimeAttributesSuccessfulCreation(runtimeAttributes, expectedRuntimeAttributes) } @@ -116,18 +130,19 @@ class JesRuntimeAttributesSpec extends WordSpecLike with Matchers with Mockito { "validate a valid preemptible entry" in { val runtimeAttributes = Map("docker" -> WdlString("ubuntu:latest"), "preemptible" -> WdlInteger(3)) - val expectedRuntimeAttributes = expectedDefaultsPlusUbuntuDocker.copy(preemptible = 3) + val expectedRuntimeAttributes = expectedDefaults.copy(preemptible = 3) assertJesRuntimeAttributesSuccessfulCreation(runtimeAttributes, expectedRuntimeAttributes) } "fail to validate an invalid preemptible entry" in { val runtimeAttributes = Map("docker" -> WdlString("ubuntu:latest"), "preemptible" -> WdlString("value")) - assertJesRuntimeAttributesFailedCreation(runtimeAttributes, "Failed to validate preemptible runtime attribute: Could not coerce value into an integer") + assertJesRuntimeAttributesFailedCreation(runtimeAttributes, + "Expecting preemptible runtime attribute to be an Integer") } "validate a valid bootDiskSizeGb entry" in { val runtimeAttributes = Map("docker" -> WdlString("ubuntu:latest"), "bootDiskSizeGb" -> WdlInteger(4)) - val expectedRuntimeAttributes = expectedDefaultsPlusUbuntuDocker.copy(bootDiskSize = 4) + val expectedRuntimeAttributes = expectedDefaults.copy(bootDiskSize = 4) assertJesRuntimeAttributesSuccessfulCreation(runtimeAttributes, expectedRuntimeAttributes) } @@ -138,7 +153,7 @@ class JesRuntimeAttributesSpec extends WordSpecLike with Matchers with Mockito { "validate a valid disks entry" in { val runtimeAttributes = Map("docker" -> WdlString("ubuntu:latest"), "disks" -> WdlString("local-disk 20 SSD")) - val expectedRuntimeAttributes = expectedDefaultsPlusUbuntuDocker.copy(disks = Seq(JesAttachedDisk.parse("local-disk 20 SSD").get)) + val expectedRuntimeAttributes = expectedDefaults.copy(disks = Seq(JesAttachedDisk.parse("local-disk 20 SSD").get)) assertJesRuntimeAttributesSuccessfulCreation(runtimeAttributes, expectedRuntimeAttributes) } @@ -149,7 +164,7 @@ class JesRuntimeAttributesSpec extends WordSpecLike with Matchers with Mockito { "validate a valid disks array entry" in { val runtimeAttributes = Map("docker" -> WdlString("ubuntu:latest"), "disks" -> WdlArray(WdlArrayType(WdlStringType), Array(WdlString("local-disk 20 SSD"), WdlString("local-disk 30 SSD")))) - val expectedRuntimeAttributes = expectedDefaultsPlusUbuntuDocker.copy(disks = Seq(JesAttachedDisk.parse("local-disk 20 SSD").get, JesAttachedDisk.parse("local-disk 30 SSD").get)) + val expectedRuntimeAttributes = expectedDefaults.copy(disks = Seq(JesAttachedDisk.parse("local-disk 20 SSD").get, JesAttachedDisk.parse("local-disk 30 SSD").get)) assertJesRuntimeAttributesSuccessfulCreation(runtimeAttributes, expectedRuntimeAttributes) } @@ -160,7 +175,7 @@ class JesRuntimeAttributesSpec extends WordSpecLike with Matchers with Mockito { "validate a valid memory entry" in { val runtimeAttributes = Map("docker" -> WdlString("ubuntu:latest"), "memory" -> WdlString("1 GB")) - val expectedRuntimeAttributes = expectedDefaultsPlusUbuntuDocker.copy(memory = MemorySize.parse("1 GB").get) + val expectedRuntimeAttributes = expectedDefaults.copy(memory = MemorySize.parse("1 GB").get) assertJesRuntimeAttributesSuccessfulCreation(runtimeAttributes, expectedRuntimeAttributes) } @@ -171,41 +186,99 @@ class JesRuntimeAttributesSpec extends WordSpecLike with Matchers with Mockito { "validate a valid noAddress entry" in { val runtimeAttributes = Map("docker" -> WdlString("ubuntu:latest"), "noAddress" -> WdlBoolean(true)) - val expectedRuntimeAttributes = expectedDefaultsPlusUbuntuDocker.copy(noAddress = true) + val expectedRuntimeAttributes = expectedDefaults.copy(noAddress = true) assertJesRuntimeAttributesSuccessfulCreation(runtimeAttributes, expectedRuntimeAttributes) } "fail to validate an invalid noAddress entry" in { val runtimeAttributes = Map("docker" -> WdlString("ubuntu:latest"), "noAddress" -> WdlInteger(1)) - assertJesRuntimeAttributesFailedCreation(runtimeAttributes, "Failed to validate noAddress runtime attribute: Could not coerce 1 into a boolean") + assertJesRuntimeAttributesFailedCreation(runtimeAttributes, + "Expecting noAddress runtime attribute to be a Boolean") } - "use reasonable default values" in { + "override config default attributes with default attributes declared in workflow options" in { val runtimeAttributes = Map("docker" -> WdlString("ubuntu:latest")) - val expectedRuntimeAttributes = expectedDefaultsPlusUbuntuDocker - assertJesRuntimeAttributesSuccessfulCreation(runtimeAttributes, expectedRuntimeAttributes) + + val workflowOptionsJson = + """{ + | "default_runtime_attributes": { "cpu": 2 } + |} + """.stripMargin.parseJson.asInstanceOf[JsObject] + + val workflowOptions = WorkflowOptions.fromJsonObject(workflowOptionsJson).get + val expectedRuntimeAttributes = expectedDefaults.copy(cpu = 2) + assertJesRuntimeAttributesSuccessfulCreation(runtimeAttributes, expectedRuntimeAttributes, workflowOptions) + } + + "override config default runtime attributes with task runtime attributes" in { + val runtimeAttributes = Map("docker" -> WdlString("ubuntu:latest"), "cpu" -> WdlInteger(4)) + + val workflowOptionsJson = + """{ + | "default_runtime_attributes": { "cpu": 2 } + |} + """.stripMargin.parseJson.asInstanceOf[JsObject] + + val workflowOptions = WorkflowOptions.fromJsonObject(workflowOptionsJson).get + val expectedRuntimeAttributes = expectedDefaults.copy(cpu = 4) + assertJesRuntimeAttributesSuccessfulCreation(runtimeAttributes, expectedRuntimeAttributes, workflowOptions) + } + + "override invalid config default attributes with task runtime attributes" in { + val runtimeAttributes = Map("docker" -> WdlString("ubuntu:latest"), "cpu" -> WdlInteger(4)) + + val workflowOptionsJson = + """{ + | "default_runtime_attributes": { "cpu": 2.2 } + |} + """.stripMargin.parseJson.asInstanceOf[JsObject] + + val workflowOptions = WorkflowOptions.fromJsonObject(workflowOptionsJson).get + val expectedRuntimeAttributes = expectedDefaults.copy(cpu = 4) + assertJesRuntimeAttributesSuccessfulCreation(runtimeAttributes, expectedRuntimeAttributes, workflowOptions) } } - private def assertJesRuntimeAttributesSuccessfulCreation(runtimeAttributes: Map[String, WdlValue], expectedRuntimeAttributes: JesRuntimeAttributes, workflowOptions: WorkflowOptions = emptyWorkflowOptions): Unit = { - val withDefaults = RuntimeAttributeDefinition.addDefaultsToAttributes(JesBackendLifecycleActorFactory.staticRuntimeAttributeDefinitions, workflowOptions) _ + private def assertJesRuntimeAttributesSuccessfulCreation(runtimeAttributes: Map[String, WdlValue], + expectedRuntimeAttributes: JesRuntimeAttributes, + workflowOptions: WorkflowOptions = emptyWorkflowOptions, + defaultZones: NonEmptyList[String] = defaultZones, + jesConfiguration: JesConfiguration = jesConfiguration): Unit = { try { - - assert(JesRuntimeAttributes(withDefaults(runtimeAttributes), NOPLogger.NOP_LOGGER) == expectedRuntimeAttributes) + val actualRuntimeAttributes = toJesRuntimeAttributes(runtimeAttributes, workflowOptions, jesConfiguration) + assert(actualRuntimeAttributes == expectedRuntimeAttributes) } catch { case ex: RuntimeException => fail(s"Exception was not expected but received: ${ex.getMessage}") } + () } - private def assertJesRuntimeAttributesFailedCreation(runtimeAttributes: Map[String, WdlValue], exMsg: String, workflowOptions: WorkflowOptions = emptyWorkflowOptions): Unit = { - val withDefaults = RuntimeAttributeDefinition.addDefaultsToAttributes(JesBackendLifecycleActorFactory.staticRuntimeAttributeDefinitions, workflowOptions) _ + private def assertJesRuntimeAttributesFailedCreation(runtimeAttributes: Map[String, WdlValue], + exMsg: String, + workflowOptions: WorkflowOptions = emptyWorkflowOptions): Unit = { try { - JesRuntimeAttributes(withDefaults(runtimeAttributes), NOPLogger.NOP_LOGGER) - fail("A RuntimeException was expected.") + toJesRuntimeAttributes(runtimeAttributes, workflowOptions, jesConfiguration) + fail(s"A RuntimeException was expected with message: $exMsg") } catch { case ex: RuntimeException => assert(ex.getMessage.contains(exMsg)) } + () + } + + private def toJesRuntimeAttributes(runtimeAttributes: Map[String, WdlValue], + workflowOptions: WorkflowOptions, + jesConfiguration: JesConfiguration): JesRuntimeAttributes = { + val runtimeAttributesBuilder = JesRuntimeAttributes.runtimeAttributesBuilder(jesConfiguration) + val defaultedAttributes = RuntimeAttributeDefinition.addDefaultsToAttributes( + staticRuntimeAttributeDefinitions, workflowOptions)(runtimeAttributes) + val validatedRuntimeAttributes = runtimeAttributesBuilder.build(defaultedAttributes, NOPLogger.NOP_LOGGER) + JesRuntimeAttributes(validatedRuntimeAttributes, jesConfiguration.runtimeConfig) } private val emptyWorkflowOptions = WorkflowOptions.fromMap(Map.empty).get + private val defaultZones = NonEmptyList.of("us-central1-b", "us-central1-a") + private val jesConfiguration = new JesConfiguration(JesTestConfig.JesBackendConfigurationDescriptor) + private val noDefaultsJesConfiguration = new JesConfiguration(JesTestConfig.NoDefaultsConfigurationDescriptor) + private val staticRuntimeAttributeDefinitions: Set[RuntimeAttributeDefinition] = + JesRuntimeAttributes.runtimeAttributesBuilder(jesConfiguration).definitions.toSet } diff --git a/supportedBackends/jes/src/test/scala/cromwell/backend/impl/jes/JesTestConfig.scala b/supportedBackends/jes/src/test/scala/cromwell/backend/impl/jes/JesTestConfig.scala index 03f17d65b..3423a8b71 100644 --- a/supportedBackends/jes/src/test/scala/cromwell/backend/impl/jes/JesTestConfig.scala +++ b/supportedBackends/jes/src/test/scala/cromwell/backend/impl/jes/JesTestConfig.scala @@ -4,6 +4,7 @@ import com.typesafe.config.ConfigFactory import cromwell.backend.BackendConfigurationDescriptor object JesTestConfig { + private val JesBackendConfigString = """ |project = "my-cromwell-workflows" @@ -19,6 +20,37 @@ object JesTestConfig { | auth = "application-default" | } |} + | + |default-runtime-attributes { + | cpu: 1 + | failOnStderr: false + | continueOnReturnCode: 0 + | docker: "ubuntu:latest" + | memory: "2 GB" + | bootDiskSizeGb: 10 + | disks: "local-disk 10 SSD" + | noAddress: false + | preemptible: 0 + | zones:["us-central1-b", "us-central1-a"] + |} + | + |""".stripMargin + + private val NoDefaultsConfigString = + """ + |project = "my-cromwell-workflows" + |root = "gs://my-cromwell-workflows-bucket" + | + |genomics { + | auth = "application-default" + | endpoint-url = "https://genomics.googleapis.com/" + |} + | + |filesystems { + | gcs { + | auth = "application-default" + | } + |} |""".stripMargin private val JesGlobalConfigString = @@ -44,9 +76,12 @@ object JesTestConfig { | } | } |} + | |""".stripMargin val JesBackendConfig = ConfigFactory.parseString(JesBackendConfigString) val JesGlobalConfig = ConfigFactory.parseString(JesGlobalConfigString) - val JesBackendConfigurationDescriptor = new BackendConfigurationDescriptor(JesBackendConfig, JesGlobalConfig) + val JesBackendNoDefaultConfig = ConfigFactory.parseString(NoDefaultsConfigString) + val JesBackendConfigurationDescriptor = BackendConfigurationDescriptor(JesBackendConfig, JesGlobalConfig) + val NoDefaultsConfigurationDescriptor = BackendConfigurationDescriptor(JesBackendNoDefaultConfig, JesGlobalConfig) } diff --git a/supportedBackends/jes/src/test/scala/cromwell/backend/impl/jes/JesWorkflowPathsSpec.scala b/supportedBackends/jes/src/test/scala/cromwell/backend/impl/jes/JesWorkflowPathsSpec.scala index 3f1dea365..e8faa6951 100644 --- a/supportedBackends/jes/src/test/scala/cromwell/backend/impl/jes/JesWorkflowPathsSpec.scala +++ b/supportedBackends/jes/src/test/scala/cromwell/backend/impl/jes/JesWorkflowPathsSpec.scala @@ -1,26 +1,30 @@ package cromwell.backend.impl.jes +import com.google.cloud.NoCredentials import cromwell.backend.BackendSpec +import cromwell.core.TestKitSuite +import cromwell.filesystems.gcs.auth.GoogleAuthModeSpec import cromwell.util.SampleWdl -import org.scalatest.{FlatSpec, Matchers} +import org.scalatest.{FlatSpecLike, Matchers} import org.specs2.mock.Mockito -import cromwell.backend.impl.jes.MockObjects._ -class JesWorkflowPathsSpec extends FlatSpec with Matchers with Mockito { +class JesWorkflowPathsSpec extends TestKitSuite with FlatSpecLike with Matchers with Mockito { import BackendSpec._ import JesTestConfig._ behavior of "JesWorkflowPaths" it should "map the correct paths" in { - val workflowDescriptor = buildWorkflowDescriptor(SampleWdl.HelloWorld.wdlSource()) + GoogleAuthModeSpec.assumeHasApplicationDefaultCredentials() + + val workflowDescriptor = buildWorkflowDescriptor(SampleWdl.HelloWorld.workflowSource()) val jesConfiguration = new JesConfiguration(JesBackendConfigurationDescriptor) - val workflowPaths = JesWorkflowPaths(workflowDescriptor, jesConfiguration, mockCredentials)(scala.concurrent.ExecutionContext.global) - workflowPaths.rootPath.toString should be("gs://my-cromwell-workflows-bucket") - workflowPaths.workflowRootPath.toString should - be(s"gs://my-cromwell-workflows-bucket/hello/${workflowDescriptor.id}") - workflowPaths.gcsAuthFilePath.toString should - be(s"gs://my-cromwell-workflows-bucket/hello/${workflowDescriptor.id}/${workflowDescriptor.id}_auth.json") + val workflowPaths = JesWorkflowPaths(workflowDescriptor, NoCredentials.getInstance(), NoCredentials.getInstance(), jesConfiguration)(system) + workflowPaths.executionRoot.pathAsString should be("gs://my-cromwell-workflows-bucket/") + workflowPaths.workflowRoot.pathAsString should + be(s"gs://my-cromwell-workflows-bucket/wf_hello/${workflowDescriptor.id}/") + workflowPaths.gcsAuthFilePath.pathAsString should + be(s"gs://my-cromwell-workflows-bucket/wf_hello/${workflowDescriptor.id}/${workflowDescriptor.id}_auth.json") } } diff --git a/supportedBackends/jes/src/test/scala/cromwell/backend/impl/jes/MockObjects.scala b/supportedBackends/jes/src/test/scala/cromwell/backend/impl/jes/MockObjects.scala deleted file mode 100644 index 1cde38c47..000000000 --- a/supportedBackends/jes/src/test/scala/cromwell/backend/impl/jes/MockObjects.scala +++ /dev/null @@ -1,9 +0,0 @@ -package cromwell.backend.impl.jes - -import com.google.api.client.googleapis.testing.auth.oauth2.MockGoogleCredential -import cromwell.backend.impl.jes.authentication.JesCredentials - -object MockObjects { - val mockCredential = new MockGoogleCredential.Builder().build() - val mockCredentials = JesCredentials(mockCredential, mockCredential) -} diff --git a/supportedBackends/jes/src/test/scala/cromwell/backend/impl/jes/RunSpec.scala b/supportedBackends/jes/src/test/scala/cromwell/backend/impl/jes/RunSpec.scala deleted file mode 100644 index 0486d9ec5..000000000 --- a/supportedBackends/jes/src/test/scala/cromwell/backend/impl/jes/RunSpec.scala +++ /dev/null @@ -1,51 +0,0 @@ -package cromwell.backend.impl.jes - -import java.time.OffsetDateTime -import java.util - -import com.google.api.client.googleapis.testing.auth.oauth2.MockGoogleCredential -import com.google.api.client.util.ArrayMap -import com.google.api.services.genomics.Genomics -import com.google.api.services.genomics.model.Operation -import org.scalatest.{FlatSpec, Matchers} -import org.specs2.mock.{Mockito => MockitoTrait} -import cromwell.core.ExecutionEvent - -import scala.collection.JavaConverters._ - -class RunSpec extends FlatSpec with Matchers with MockitoTrait { - "JES Run" should "parse events from Operation metadata" in { - val op: Operation = new Operation() - - val event1: ArrayMap[String, String] = ArrayMap.create(2) - event1.add("description", "start") - event1.add("startTime", "2015-12-05T00:00:01+00:00") - - val event2: ArrayMap[String, String] = ArrayMap.create(2) - event2.add("description", "blah") - event2.add("startTime", "2015-12-05T00:01:00+00:00") - - val events = new util.ArrayList(Seq(event1, event2).asJava) - - val metadata: Map[String, AnyRef] = Map( - "createTime" -> "2015-12-05T00:00:00+00:00", - "startTime" -> "2015-12-05T00:00:01+00:00", - "endTime" -> "2015-12-05T11:00:00+00:00", - "events" -> events - ) - - op.setMetadata(metadata.asJava) - - val mockedCredentials = new MockGoogleCredential.Builder().build() - val genomics = new Genomics(mockedCredentials.getTransport, mockedCredentials.getJsonFactory, mockedCredentials) - val run = new Run("runId", genomics) - val list = run.getEventList(op) - list should contain theSameElementsAs List( - ExecutionEvent("waiting for quota", OffsetDateTime.parse("2015-12-05T00:00:00+00:00")), - ExecutionEvent("initializing VM", OffsetDateTime.parse("2015-12-05T00:00:01+00:00")), - ExecutionEvent("start", OffsetDateTime.parse("2015-12-05T00:00:01+00:00")), - ExecutionEvent("cromwell poll interval", OffsetDateTime.parse("2015-12-05T11:00:00+00:00")) - ) - - } -} diff --git a/supportedBackends/jes/src/test/scala/cromwell/backend/impl/jes/statuspolling/JesApiQueryManagerSpec.scala b/supportedBackends/jes/src/test/scala/cromwell/backend/impl/jes/statuspolling/JesApiQueryManagerSpec.scala new file mode 100644 index 000000000..2564dfdfd --- /dev/null +++ b/supportedBackends/jes/src/test/scala/cromwell/backend/impl/jes/statuspolling/JesApiQueryManagerSpec.scala @@ -0,0 +1,149 @@ +package cromwell.backend.impl.jes.statuspolling + +import akka.actor.{ActorRef, Props} +import akka.testkit.{TestActorRef, TestProbe} +import cromwell.backend.impl.jes.{JesConfiguration, Run} +import cromwell.core.TestKitSuite +import org.scalatest.{FlatSpecLike, Matchers} + +import scala.concurrent.duration._ +import akka.testkit._ +import JesApiQueryManagerSpec._ +import cromwell.backend.impl.jes.statuspolling.JesApiQueryManager.JesStatusPollQuery +import cromwell.backend.standard.StandardAsyncJob +import cromwell.util.AkkaTestUtil +import eu.timepit.refined.api.Refined +import eu.timepit.refined.numeric._ +import org.scalatest.concurrent.Eventually + +import scala.collection.immutable.Queue + +class JesApiQueryManagerSpec extends TestKitSuite("JesApiQueryManagerSpec") with FlatSpecLike with Matchers with Eventually { + + behavior of "JesApiQueryManagerSpec" + + implicit val TestExecutionTimeout = 10.seconds.dilated + implicit val DefaultPatienceConfig = PatienceConfig(TestExecutionTimeout) + val AwaitAlmostNothing = 30.milliseconds.dilated + val BatchSize = 5 + + it should "queue up and dispense status poll requests, in order" in { + val statusPoller = TestProbe(name = "StatusPoller") + val jaqmActor: TestActorRef[TestJesApiQueryManager] = TestActorRef(TestJesApiQueryManager.props(statusPoller.ref)) + + var statusRequesters = ((0 until BatchSize * 2) map { i => i -> TestProbe(name = s"StatusRequester_$i") }).toMap + + // Initially, we should have no work: + jaqmActor.tell(msg = JesApiQueryManager.RequestJesPollingWork(BatchSize), sender = statusPoller.ref) + statusPoller.expectMsg(max = TestExecutionTimeout, obj = JesApiQueryManager.NoWorkToDo) + + // Send a few status poll requests: + statusRequesters foreach { case (index, probe) => + jaqmActor.tell(msg = JesApiQueryManager.DoPoll(Run(StandardAsyncJob(index.toString), null)), sender = probe.ref) + } + + // Should have no messages to the actual statusPoller yet: + statusPoller.expectNoMsg(max = AwaitAlmostNothing) + + // Verify batches: + 2 times { + jaqmActor.tell(msg = JesApiQueryManager.RequestJesPollingWork(BatchSize), sender = statusPoller.ref) + statusPoller.expectMsgPF(max = TestExecutionTimeout) { + case JesApiQueryManager.JesPollingWorkBatch(workBatch) => + val requesters = statusRequesters.take(BatchSize) + statusRequesters = statusRequesters.drop(BatchSize) + + val zippedWithRequesters = workBatch.toList.zip(requesters) + zippedWithRequesters foreach { case (pollQuery, (index, testProbe)) => + pollQuery.requester should be(testProbe.ref) + pollQuery.asInstanceOf[JesStatusPollQuery].run.job should be(StandardAsyncJob(index.toString)) + } + case other => fail(s"Unexpected message: $other") + } + } + + // Finally, we should have no work: + jaqmActor.tell(msg = JesApiQueryManager.RequestJesPollingWork(BatchSize), sender = statusPoller.ref) + statusPoller.expectMsg(max = TestExecutionTimeout, obj = JesApiQueryManager.NoWorkToDo) + + jaqmActor.underlyingActor.testPollerCreations should be(1) + } + + AkkaTestUtil.actorDeathMethods(system) foreach { case (name, stopMethod) => + /* + This test creates two statusPoller ActorRefs which are handed to the TestJesApiQueryManager. Work is added to that query + manager and then the first statusPoller requests work and is subsequently killed. The expectation is that: + + - The work will return to the workQueue of the query manager + - The query manager will have registered a new statusPoller + - That statusPoller is the second ActorRef (and artifact of TestJesApiQueryManager) + */ + it should s"catch polling actors if they $name, recreate them and add work back to the queue" in { + val statusPoller1 = TestActorRef(Props(new AkkaTestUtil.DeathTestActor()), TestActorRef(new AkkaTestUtil.StoppingSupervisor())) + val statusPoller2 = TestActorRef(Props(new AkkaTestUtil.DeathTestActor()), TestActorRef(new AkkaTestUtil.StoppingSupervisor())) + val jaqmActor: TestActorRef[TestJesApiQueryManager] = TestActorRef(TestJesApiQueryManager.props(statusPoller1, statusPoller2)) + + val emptyActor = system.actorOf(Props.empty) + + // Send a few status poll requests: + BatchSize indexedTimes { index => + jaqmActor.tell(msg = JesApiQueryManager.DoPoll(Run(StandardAsyncJob(index.toString), null)), sender = emptyActor) + } + + jaqmActor.tell(msg = JesApiQueryManager.RequestJesPollingWork(BatchSize), sender = statusPoller1) + + stopMethod(statusPoller1) + + eventually { + jaqmActor.underlyingActor.testPollerCreations should be (2) + jaqmActor.underlyingActor.queueSize should be (BatchSize) + jaqmActor.underlyingActor.statusPollerEquals(statusPoller2) should be (true) + } + } + } +} + +object JesApiQueryManagerSpec { + implicit class intWithTimes(n: Int) { + def times(f: => Unit) = 1 to n foreach { _ => f } + def indexedTimes(f: Int => Unit) = 0 until n foreach { i => f(i) } + } +} + +/** + * This test class allows us to hook into the JesApiQueryManager's makeStatusPoller and provide our own TestProbes instead + */ +class TestJesApiQueryManager(qps: Int Refined Positive, statusPollerProbes: ActorRef*) extends JesApiQueryManager(qps) { + var testProbes: Queue[ActorRef] = _ + var testPollerCreations: Int = _ + + private def init() = { + testProbes = Queue(statusPollerProbes: _*) + testPollerCreations = 0 + } + + override private[statuspolling] def makeWorkerActor(): ActorRef = { + // Initialize the queue, if necessary: + if (testProbes == null) { + init() + } + + // Register that the creation was requested: + testPollerCreations += 1 + + // Pop the queue to get the next test probe: + val (probe, newQueue) = testProbes.dequeue + testProbes = newQueue + probe + } + + def queueSize = workQueue.size + def statusPollerEquals(otherStatusPoller: ActorRef) = statusPoller == otherStatusPoller +} + +object TestJesApiQueryManager { + import cromwell.backend.impl.jes.JesTestConfig.JesBackendConfigurationDescriptor + val jesConfiguration = new JesConfiguration(JesBackendConfigurationDescriptor) + + def props(statusPollers: ActorRef*): Props = Props(new TestJesApiQueryManager(jesConfiguration.qps, statusPollers: _*)) +} diff --git a/supportedBackends/jes/src/test/scala/cromwell/backend/impl/jes/statuspolling/JesPollingActorSpec.scala b/supportedBackends/jes/src/test/scala/cromwell/backend/impl/jes/statuspolling/JesPollingActorSpec.scala new file mode 100644 index 000000000..c04564fc0 --- /dev/null +++ b/supportedBackends/jes/src/test/scala/cromwell/backend/impl/jes/statuspolling/JesPollingActorSpec.scala @@ -0,0 +1,141 @@ +package cromwell.backend.impl.jes.statuspolling + +import akka.actor.{ActorRef, Props} +import akka.testkit.{TestActorRef, TestProbe} +import cromwell.core.{ExecutionEvent, TestKitSuite} +import org.scalatest.{BeforeAndAfter, FlatSpecLike, Matchers} +import org.scalatest.concurrent.Eventually + +import scala.concurrent.duration._ +import akka.testkit._ +import cats.data.NonEmptyList +import com.google.api.client.googleapis.batch.BatchRequest +import com.google.api.client.googleapis.batch.json.JsonBatchCallback +import com.google.api.client.googleapis.json.GoogleJsonError +import com.google.api.services.genomics.Genomics +import com.google.api.services.genomics.model.Operation +import cromwell.backend.impl.jes.{JesConfiguration, Run, RunStatus} +import cromwell.backend.impl.jes.statuspolling.JesApiQueryManager.{JesApiException, JesApiQueryFailed, JesStatusPollQuery, RequestJesPollingWork} +import cromwell.backend.impl.jes.statuspolling.TestJesPollingActor.{CallbackFailure, CallbackSuccess, JesBatchCallbackResponse} +import eu.timepit.refined.api.Refined +import eu.timepit.refined.numeric.Positive +import org.specs2.mock.Mockito + +import scala.collection.immutable.Queue + +class JesPollingActorSpec extends TestKitSuite("JesPollingActor") with FlatSpecLike with Matchers with Eventually with BeforeAndAfter with Mockito { + + behavior of "JesPollingActor" + + implicit val TestExecutionTimeout = 10.seconds.dilated + implicit val DefaultPatienceConfig = PatienceConfig(TestExecutionTimeout) + val AwaitAlmostNothing = 30.milliseconds.dilated + + import cromwell.backend.impl.jes.JesTestConfig.JesBackendConfigurationDescriptor + val jesConfiguration = new JesConfiguration(JesBackendConfigurationDescriptor) + + var managerProbe: TestProbe = _ + var jpActor: TestActorRef[TestJesPollingActor] = _ + + it should "correctly calculate batch intervals" in { + import eu.timepit.refined.auto._ + JesPollingActor.determineBatchInterval(10) should be(11.seconds) + JesPollingActor.determineBatchInterval(100000) shouldBe 1.seconds + } + + it should "query for work and wait for a reply" in { + managerProbe.expectMsgClass(max = TestExecutionTimeout, c = classOf[JesApiQueryManager.RequestJesPollingWork]) + managerProbe.expectNoMsg(max = AwaitAlmostNothing) + } + + it should "respond correctly with various run statuses" in { + managerProbe.expectMsgClass(max = TestExecutionTimeout, c = classOf[JesApiQueryManager.RequestJesPollingWork]) + + val requester1 = TestProbe() + val query1 = JesStatusPollQuery(requester1.ref, Run(null, null)) + val requester2 = TestProbe() + val query2 = JesStatusPollQuery(requester2.ref, Run(null, null)) + val requester3 = TestProbe() + val query3 = JesStatusPollQuery(requester3.ref, Run(null, null)) + + // For two requests the callback succeeds (first with RunStatus.Success, then RunStatus.Failed). The third callback fails (simulating a network timeout, for example): + jpActor.underlyingActor.callbackResponses :+= CallbackSuccess + jpActor.underlyingActor.callbackResponses :+= CallbackSuccess + jpActor.underlyingActor.callbackResponses :+= CallbackFailure + + val successStatus = RunStatus.Success(Seq.empty[ExecutionEvent], None, None, None) + val failureStatus = RunStatus.UnsuccessfulRunStatus(-1, Option.empty[String], Seq.empty[ExecutionEvent], None, None, None) + jpActor.underlyingActor.operationStatusResponses :+= successStatus + jpActor.underlyingActor.operationStatusResponses :+= failureStatus + + jpActor.tell(msg = JesApiQueryManager.JesPollingWorkBatch(NonEmptyList(query1, List(query2, query3))), sender = managerProbe.ref) + eventually { jpActor.underlyingActor.resultHandlers.size should be(3) } + eventually { jpActor.underlyingActor.runBatchRequested should be(true) } + + // The manager shouldn't have been asked for more work yet: + managerProbe.expectNoMsg(max = AwaitAlmostNothing) + + // Ok, let's trigger the callbacks: + jpActor.underlyingActor.executeBatch() + + requester1.expectMsg(successStatus) + requester2.expectMsg(failureStatus) + requester3.expectNoMsg(max = AwaitAlmostNothing) + + // Requester3 expected nothing... Instead, the manager expects an API failure notification and then a request for more work: + managerProbe.expectMsgPF(TestExecutionTimeout) { + case failure: JesApiQueryFailed => + if (!failure.cause.isInstanceOf[JesApiException]) fail("Unexpected failure cause class: " + failure.cause.getClass.getSimpleName) + if (failure.query != query2 && failure.query != query3) fail("Unexpected query caused failure: " + failure.query) + } + managerProbe.expectMsg(RequestJesPollingWork(JesPollingActor.MaxBatchSize)) + managerProbe.expectNoMsg(max = AwaitAlmostNothing) + } + + before { + managerProbe = TestProbe() + jpActor = TestActorRef(TestJesPollingActor.props(managerProbe.ref, jesConfiguration), managerProbe.ref) + } +} + +/** + * Testable JES polling actor. + * - Mocks out the methods which actually call out to JES, and allows the callbacks to be triggered in a testable way + * - Also waits a **lot** less time before polls! + */ +class TestJesPollingActor(manager: ActorRef, qps: Int Refined Positive) extends JesPollingActor(manager, qps) with Mockito { + + override lazy val batchInterval = 10.milliseconds + + var operationStatusResponses: Queue[RunStatus] = Queue.empty + var resultHandlers: Queue[JsonBatchCallback[Operation]] = Queue.empty + var callbackResponses: Queue[JesBatchCallbackResponse] = Queue.empty + var runBatchRequested: Boolean = false + + override private[statuspolling] def createBatch(genomicsInterface: Genomics): BatchRequest = null + override private[statuspolling] def runBatch(batch: BatchRequest): Unit = runBatchRequested = true + + def executeBatch(): Unit = { + resultHandlers.zip(callbackResponses) foreach { case (handler, response) => response match { + case CallbackSuccess => handler.onSuccess(null, null) + case CallbackFailure => + val error: GoogleJsonError = new GoogleJsonError() + handler.onFailure(error, null) + }} + } + override def addStatusPollToBatch(run: Run, batch: BatchRequest, resultHandler: JsonBatchCallback[Operation]): Unit = resultHandlers :+= resultHandler + override def interpretOperationStatus(operation: Operation): RunStatus = { + val (status, newQueue) = operationStatusResponses.dequeue + operationStatusResponses = newQueue + status + } + override def mkErrorString(e: GoogleJsonError) = "NA" +} + +object TestJesPollingActor { + def props(manager: ActorRef, jesConfiguration: JesConfiguration) = Props(new TestJesPollingActor(manager, jesConfiguration.qps)) + + sealed trait JesBatchCallbackResponse + case object CallbackSuccess extends JesBatchCallbackResponse + case object CallbackFailure extends JesBatchCallbackResponse +} diff --git a/supportedBackends/jes/src/test/scala/cromwell/backend/impl/jes/statuspolling/StatusPollingSpec.scala b/supportedBackends/jes/src/test/scala/cromwell/backend/impl/jes/statuspolling/StatusPollingSpec.scala new file mode 100644 index 000000000..ffd7baf04 --- /dev/null +++ b/supportedBackends/jes/src/test/scala/cromwell/backend/impl/jes/statuspolling/StatusPollingSpec.scala @@ -0,0 +1,184 @@ +package cromwell.backend.impl.jes.statuspolling + +import java.time.OffsetDateTime +import java.util + +import com.google.api.client.util.ArrayMap +import com.google.api.services.genomics.model.Operation +import cromwell.backend.impl.jes.RunStatus.Success +import cromwell.core.ExecutionEvent +import org.scalatest.{FlatSpec, Matchers} +import org.specs2.mock.{Mockito => MockitoTrait} + +import scala.collection.JavaConverters._ + +class StatusPollingSpec extends FlatSpec with Matchers with MockitoTrait { + behavior of "JES Status polling" + + it should "parse events from Operation metadata" in { + val op: Operation = new Operation() + op.setMetadata(eventsMetadata.asJava) + + val list = StatusPolling.getEventList(op) + list should contain theSameElementsAs eventsExpected + } + + it should "require operation be non-null" in { + val exception = intercept[RuntimeException](StatusPolling.interpretOperationStatus(null)) + exception.getMessage should be("requirement failed: Operation must not be null.") + } + + it should "catch and wrap null pointer exceptions in an empty operation" in { + val op = new Operation() + + val exception = intercept[RuntimeException](StatusPolling.interpretOperationStatus(op)) + exception.getMessage should be("Caught NPE while processing operation null: {}") + } + + it should "catch and wrap null pointer exceptions in a name only operation" in { + val op = new Operation() + op.setName("my/customName") + + val exception = intercept[RuntimeException](StatusPolling.interpretOperationStatus(op)) + exception.getMessage should be("Caught NPE while processing operation my/customName: {name=my/customName}") + } + + it should "parse an operation without machine information" in { + val op = new Operation() + op.setName("my/customName") + op.setDone(true) + op.setMetadata(eventsMetadata.asJava) + + val runStatus = StatusPolling.interpretOperationStatus(op) + + runStatus should be(a[Success]) + + val success = runStatus.asInstanceOf[Success] + success.instanceName should be(None) + success.machineType should be(None) + success.zone should be(None) + success.eventList should contain theSameElementsAs eventsExpected + } + + it should "parse an operation with empty runtime metadata" in { + val op = new Operation() + op.setName("my/customName") + op.setDone(true) + + val runtimeMetadata = ArrayMap.create[String, Object]() + + val metadata = eventsMetadata ++ Map("runtimeMetadata" -> runtimeMetadata) + op.setMetadata(metadata.asJava) + + val runStatus = StatusPolling.interpretOperationStatus(op) + runStatus should be(a[Success]) + + val success = runStatus.asInstanceOf[Success] + success.instanceName should be(None) + success.machineType should be(None) + success.zone should be(None) + success.eventList should contain theSameElementsAs eventsExpected + } + + it should "parse an operation with empty compute engine information" in { + val op = new Operation() + op.setName("my/customName") + op.setDone(true) + + val computeEngine = ArrayMap.create[String, String]() + + val runtimeMetadata = ArrayMap.create[String, Object]() + runtimeMetadata.add("computeEngine", computeEngine) + + val metadata = eventsMetadata ++ Map("runtimeMetadata" -> runtimeMetadata) + op.setMetadata(metadata.asJava) + + val runStatus = StatusPolling.interpretOperationStatus(op) + runStatus should be(a[Success]) + + val success = runStatus.asInstanceOf[Success] + success.instanceName should be(None) + success.machineType should be(None) + success.zone should be(None) + success.eventList should contain theSameElementsAs eventsExpected + } + + it should "parse an operation with partially filled compute engine" in { + val op = new Operation() + op.setName("my/customName") + op.setDone(true) + + val computeEngine = ArrayMap.create[String, String]() + computeEngine.add("zone", "us-central1-b") + + val runtimeMetadata = ArrayMap.create[String, Object]() + runtimeMetadata.add("computeEngine", computeEngine) + + val metadata = eventsMetadata ++ Map("runtimeMetadata" -> runtimeMetadata) + op.setMetadata(metadata.asJava) + + val runStatus = StatusPolling.interpretOperationStatus(op) + + runStatus should be(a[Success]) + + val success = runStatus.asInstanceOf[Success] + success.instanceName should be(None) + success.machineType should be(None) + success.zone should be(Option("us-central1-b")) + success.eventList should contain theSameElementsAs eventsExpected + } + + it should "parse an operation with filled compute engine" in { + val op = new Operation() + op.setName("my/customName") + op.setDone(true) + + val computeEngine = ArrayMap.create[String, String]() + computeEngine.add("instanceName", "ggp-12345678901234567890") + computeEngine.add("machineType", "us-central1-b/g1-small") + computeEngine.add("zone", "us-central1-b") + + val runtimeMetadata = ArrayMap.create[String, Object]() + runtimeMetadata.add("computeEngine", computeEngine) + + val metadata = eventsMetadata ++ Map("runtimeMetadata" -> runtimeMetadata) + op.setMetadata(metadata.asJava) + + val runStatus = StatusPolling.interpretOperationStatus(op) + + runStatus should be(a[Success]) + + val success = runStatus.asInstanceOf[Success] + success.instanceName should be(Option("ggp-12345678901234567890")) + success.machineType should be(Option("us-central1-b/g1-small")) + success.zone should be(Option("us-central1-b")) + success.eventList should contain theSameElementsAs eventsExpected + } + + private lazy val eventsMetadata: Map[String, AnyRef] = { + val event1: ArrayMap[String, String] = ArrayMap.create(2) + event1.add("description", "start") + event1.add("startTime", "2015-12-05T00:00:01+00:00") + + val event2: ArrayMap[String, String] = ArrayMap.create(2) + event2.add("description", "blah") + event2.add("startTime", "2015-12-05T00:01:00+00:00") + + val events = new util.ArrayList(Seq(event1, event2).asJava) + + Map( + "createTime" -> "2015-12-05T00:00:00+00:00", + "startTime" -> "2015-12-05T00:00:01+00:00", + "endTime" -> "2015-12-05T11:00:00+00:00", + "events" -> events + ) + } + + private lazy val eventsExpected = + List( + ExecutionEvent("waiting for quota", OffsetDateTime.parse("2015-12-05T00:00:00+00:00")), + ExecutionEvent("initializing VM", OffsetDateTime.parse("2015-12-05T00:00:01+00:00")), + ExecutionEvent("start", OffsetDateTime.parse("2015-12-05T00:00:01+00:00")), + ExecutionEvent("cromwell poll interval", OffsetDateTime.parse("2015-12-05T11:00:00+00:00")) + ) +} diff --git a/supportedBackends/sfs/src/main/scala/cromwell/backend/impl/sfs/config/ConfigAsyncJobExecutionActor.scala b/supportedBackends/sfs/src/main/scala/cromwell/backend/impl/sfs/config/ConfigAsyncJobExecutionActor.scala index 014faa1b0..8c90e2049 100644 --- a/supportedBackends/sfs/src/main/scala/cromwell/backend/impl/sfs/config/ConfigAsyncJobExecutionActor.scala +++ b/supportedBackends/sfs/src/main/scala/cromwell/backend/impl/sfs/config/ConfigAsyncJobExecutionActor.scala @@ -1,14 +1,13 @@ package cromwell.backend.impl.sfs.config -import java.nio.file.Path - -import better.files._ import cromwell.backend.impl.sfs.config.ConfigConstants._ -import cromwell.backend.sfs.SharedFileSystem._ import cromwell.backend.sfs._ -import wdl4s._ -import wdl4s.expression.NoFunctions -import wdl4s.values.WdlString +import cromwell.backend.standard.{StandardAsyncExecutionActorParams, StandardAsyncJob} +import cromwell.backend.validation.DockerValidation +import cromwell.core.path.Path +import wdl4s.wdl._ +import wdl4s.wdl.expression.NoFunctions +import wdl4s.wdl.values.WdlString /** * Base ConfigAsyncJobExecutionActor that reads the config and generates an outer script to submit an inner script @@ -19,10 +18,7 @@ import wdl4s.values.WdlString */ sealed trait ConfigAsyncJobExecutionActor extends SharedFileSystemAsyncJobExecutionActor { - lazy val configInitializationData: ConfigInitializationData = params.backendInitializationDataOption match { - case Some(data: ConfigInitializationData) => data - case other => throw new RuntimeException(s"Unable to get config initialization data from $other") - } + lazy val configInitializationData: ConfigInitializationData = backendInitializationDataAs[ConfigInitializationData] /** * Returns the arguments for submitting the job, either with or without docker. @@ -33,8 +29,8 @@ sealed trait ConfigAsyncJobExecutionActor extends SharedFileSystemAsyncJobExecut * will grab the job id from the stdout. */ override lazy val processArgs: SharedFileSystemCommand = { - val submitScript = pathPlusSuffix(jobPaths.script, "submit") - val submitInputs = standardInputs ++ dockerInputs ++ runtimeAttributeInputs + val submitScript = jobPaths.script.plusExt("submit") + val submitInputs = standardInputs ++ dockerizedInputs ++ runtimeAttributeInputs val submitTaskName = if (isDockerRun) SubmitDockerTask else SubmitTask writeTaskScript(submitScript, submitTaskName, submitInputs) SharedFileSystemCommand("/bin/bash", submitScript) @@ -48,41 +44,49 @@ sealed trait ConfigAsyncJobExecutionActor extends SharedFileSystemAsyncJobExecut * @param taskName The name of the task to retrieve from the precomputed wdl namespace. * @param inputs The customized inputs to this task. */ - def writeTaskScript(script: Path, taskName: String, inputs: CallInputs): Unit = { + def writeTaskScript(script: Path, taskName: String, inputs: WorkflowCoercedInputs): Unit = { val task = configInitializationData.wdlNamespace.findTask(taskName). getOrElse(throw new RuntimeException(s"Unable to find task $taskName")) - val command = task.instantiateCommand(inputs, NoFunctions).get + val inputsWithFqns = inputs map { case (k, v) => s"$taskName.$k" -> v } + val command = task.instantiateCommand(task.inputsFromMap(inputsWithFqns), NoFunctions).get jobLogger.info(s"executing: $command") - File(script).write( + val scriptBody = s"""|#!/bin/bash - |$command - |""".stripMargin) + |SCRIPT_COMMAND + |""".stripMargin.replace("SCRIPT_COMMAND", command) + script.write(scriptBody) + () } /** * The inputs that are not specified by the config, that will be passed into a command for both submit and * submit-docker. */ - private lazy val standardInputs: CallInputs = { + private lazy val standardInputs: WorkflowCoercedInputs = { Map( JobNameInput -> WdlString(jobName), - CwdInput -> WdlString(jobPaths.callRoot.toString), - StdoutInput -> WdlString(jobPaths.stdout.toString), - StderrInput -> WdlString(jobPaths.stderr.toString), - ScriptInput -> WdlString(jobPaths.script.toString) + CwdInput -> WdlString(jobPaths.callRoot.pathAsString) ) } /** - * Extra arguments if this is a submit-docker command, or Map.empty. + * The inputs that are not specified by the config, that will be passed into a command for either submit or + * submit-docker. */ - private lazy val dockerInputs: CallInputs = { + private lazy val dockerizedInputs: WorkflowCoercedInputs = { if (isDockerRun) { Map( - DockerCwdInput -> WdlString(jobPaths.callDockerRoot.toString) + DockerCwdInput -> WdlString(jobPathsWithDocker.callDockerRoot.pathAsString), + StdoutInput -> WdlString(jobPathsWithDocker.toDockerPath(jobPaths.stdout).pathAsString), + StderrInput -> WdlString(jobPathsWithDocker.toDockerPath(jobPaths.stderr).pathAsString), + ScriptInput -> WdlString(jobPathsWithDocker.toDockerPath(jobPaths.script).pathAsString) ) } else { - Map.empty + Map( + StdoutInput -> WdlString(jobPaths.stdout.pathAsString), + StderrInput -> WdlString(jobPaths.stderr.pathAsString), + ScriptInput -> WdlString(jobPaths.script.pathAsString) + ) } } @@ -90,32 +94,40 @@ sealed trait ConfigAsyncJobExecutionActor extends SharedFileSystemAsyncJobExecut * The arguments generated from the backend config's list of attributes. These will include things like CPU, memory, * and other custom arguments like "backend_queue_name", "backend_billing_project", etc. */ - private lazy val runtimeAttributeInputs: CallInputs = { + private lazy val runtimeAttributeInputs: WorkflowCoercedInputs = { val declarationValidations = configInitializationData.declarationValidations - val inputOptions = declarationValidations map { declarationValidation => - declarationValidation.extractWdlValueOption(validatedRuntimeAttributes) map { wdlValue => - declarationValidation.key -> wdlValue - } + val inputOptions = declarationValidations map { + // Is it always the right thing to pass the Docker hash to a config backend? What if it can't use hashes? + case declarationValidation if declarationValidation.key == DockerValidation.instance.key && jobDescriptor.maybeCallCachingEligible.dockerHash.isDefined => + val dockerHash = jobDescriptor.maybeCallCachingEligible.dockerHash.get + Option(declarationValidation.key -> WdlString(dockerHash)) + case declarationValidation => + declarationValidation.extractWdlValueOption(validatedRuntimeAttributes) map { wdlValue => + declarationValidation.key -> wdlValue + } } inputOptions.flatten.toMap } + + // `runtimeAttributeInputs` has already adjusted for the case of a `JobDescriptor` with `DockerWithHash`. + override lazy val dockerImageUsed: Option[String] = runtimeAttributeInputs.get(DockerValidation.instance.key).map(_.valueString) } /** * Submits a job and sends it to the background via "&". Saves the unix PID for status or killing later. * - * @param params Params for running a shared file system job. + * @param standardParams Params for running a shared file system job. */ -class BackgroundConfigAsyncJobExecutionActor(override val params: SharedFileSystemAsyncJobExecutionActorParams) +class BackgroundConfigAsyncJobExecutionActor(override val standardParams: StandardAsyncExecutionActorParams) extends ConfigAsyncJobExecutionActor with BackgroundAsyncJobExecutionActor /** * Submits a job and returns relatively quickly. The job-id-regex is then used to read the job id for status or killing * later. * - * @param params Params for running a shared file system job. + * @param standardParams Params for running a shared file system job. */ -class DispatchedConfigAsyncJobExecutionActor(override val params: SharedFileSystemAsyncJobExecutionActorParams) +class DispatchedConfigAsyncJobExecutionActor(override val standardParams: StandardAsyncExecutionActorParams) extends ConfigAsyncJobExecutionActor { /** @@ -126,11 +138,11 @@ class DispatchedConfigAsyncJobExecutionActor(override val params: SharedFileSyst * @param stderr The stderr from dispatching the job. * @return The wrapped job id. */ - override def getJob(exitValue: Int, stdout: Path, stderr: Path): SharedFileSystemJob = { + override def getJob(exitValue: Int, stdout: Path, stderr: Path): StandardAsyncJob = { val jobIdRegex = configurationDescriptor.backendConfig.getString(JobIdRegexConfig).r - val output = File(stdout).contentAsString.stripLineEnd + val output = stdout.contentAsString.stripLineEnd output match { - case jobIdRegex(jobId) => SharedFileSystemJob(jobId) + case jobIdRegex(jobId) => StandardAsyncJob(jobId) case _ => throw new RuntimeException("Could not find job ID from stdout file. " + s"Check the stderr file for possible errors: $stderr") @@ -143,7 +155,7 @@ class DispatchedConfigAsyncJobExecutionActor(override val params: SharedFileSyst * @param job The job to check. * @return A command that checks if the job is alive. */ - override def checkAliveArgs(job: SharedFileSystemJob): SharedFileSystemCommand = { + override def checkAliveArgs(job: StandardAsyncJob): SharedFileSystemCommand = { jobScriptArgs(job, "check", CheckAliveTask) } @@ -153,7 +165,7 @@ class DispatchedConfigAsyncJobExecutionActor(override val params: SharedFileSyst * @param job The job id to kill. * @return A command that may be used to kill the job. */ - override def killArgs(job: SharedFileSystemJob): SharedFileSystemCommand = { + override def killArgs(job: StandardAsyncJob): SharedFileSystemCommand = { jobScriptArgs(job, "kill", KillTask) } @@ -165,8 +177,8 @@ class DispatchedConfigAsyncJobExecutionActor(override val params: SharedFileSyst * @param task The config task that defines the command. * @return A runnable command. */ - private def jobScriptArgs(job: SharedFileSystemJob, suffix: String, task: String): SharedFileSystemCommand = { - val script = pathPlusSuffix(jobPaths.script, suffix) + private def jobScriptArgs(job: StandardAsyncJob, suffix: String, task: String): SharedFileSystemCommand = { + val script = jobPaths.script.plusExt(suffix) writeTaskScript(script, task, Map(JobIdInput -> WdlString(job.jobId))) SharedFileSystemCommand("/bin/bash", script) } diff --git a/supportedBackends/sfs/src/main/scala/cromwell/backend/impl/sfs/config/ConfigBackendFileHashing.scala b/supportedBackends/sfs/src/main/scala/cromwell/backend/impl/sfs/config/ConfigBackendFileHashing.scala index 0fc377aac..867647988 100644 --- a/supportedBackends/sfs/src/main/scala/cromwell/backend/impl/sfs/config/ConfigBackendFileHashing.scala +++ b/supportedBackends/sfs/src/main/scala/cromwell/backend/impl/sfs/config/ConfigBackendFileHashing.scala @@ -1,15 +1,22 @@ package cromwell.backend.impl.sfs.config import akka.event.LoggingAdapter -import better.files._ -import cromwell.backend.callcaching.FileHashingActor.SingleFileHashRequest +import cromwell.backend.standard.callcaching.StandardFileHashingActor.SingleFileHashRequest +import cromwell.core.path.DefaultPathBuilder import cromwell.util.TryWithResource._ +import scala.language.postfixOps import scala.util.Try private[config] object ConfigBackendFileHashing { - def getMd5Result(request: SingleFileHashRequest, log: LoggingAdapter): Try[String] = - tryWithResource(() => File(request.file.valueString).newInputStream) { inputStream => + def getMd5Result(request: SingleFileHashRequest, log: LoggingAdapter): Try[String] ={ + val path = DefaultPathBuilder.build(request.file.valueString) recover { + case failure => throw new RuntimeException("Failed to construct path to hash", failure) + } get + + tryWithResource(() => path.newInputStream) { inputStream => org.apache.commons.codec.digest.DigestUtils.md5Hex(inputStream) } + } + } diff --git a/supportedBackends/sfs/src/main/scala/cromwell/backend/impl/sfs/config/ConfigBackendFileHashingActor.scala b/supportedBackends/sfs/src/main/scala/cromwell/backend/impl/sfs/config/ConfigBackendFileHashingActor.scala new file mode 100644 index 000000000..270a577be --- /dev/null +++ b/supportedBackends/sfs/src/main/scala/cromwell/backend/impl/sfs/config/ConfigBackendFileHashingActor.scala @@ -0,0 +1,26 @@ +package cromwell.backend.impl.sfs.config + +import akka.actor.Props +import com.typesafe.config.Config +import cromwell.backend.standard.callcaching.StandardFileHashingActor.SingleFileHashRequest +import cromwell.backend.standard.callcaching.{StandardFileHashingActor, StandardFileHashingActorParams} +import cromwell.filesystems.gcs.batch.GcsBatchCommandBuilder +import net.ceedubs.ficus.Ficus._ + +import scala.util.Try + +object ConfigBackendFileHashingActor { + def props(standardParams: StandardFileHashingActorParams) = Props(new ConfigBackendFileHashingActor(standardParams)) +} + +class ConfigBackendFileHashingActor(standardParams: StandardFileHashingActorParams) extends StandardFileHashingActor(standardParams) with GcsBatchCommandBuilder { + + lazy val hashingStrategy: ConfigHashingStrategy = { + configurationDescriptor.backendConfig.as[Option[Config]]("filesystems.local.caching") map ConfigHashingStrategy.apply getOrElse ConfigHashingStrategy.defaultStrategy + } + + override def customHashStrategy(fileRequest: SingleFileHashRequest): Option[Try[String]] = { + log.debug(hashingStrategy.toString) + Option(hashingStrategy.getHash(fileRequest, log)) + } +} diff --git a/supportedBackends/sfs/src/main/scala/cromwell/backend/impl/sfs/config/ConfigBackendLifecycleActorFactory.scala b/supportedBackends/sfs/src/main/scala/cromwell/backend/impl/sfs/config/ConfigBackendLifecycleActorFactory.scala index e37d9416a..c30fb1d6e 100644 --- a/supportedBackends/sfs/src/main/scala/cromwell/backend/impl/sfs/config/ConfigBackendLifecycleActorFactory.scala +++ b/supportedBackends/sfs/src/main/scala/cromwell/backend/impl/sfs/config/ConfigBackendLifecycleActorFactory.scala @@ -1,37 +1,41 @@ package cromwell.backend.impl.sfs.config -import cromwell.backend.callcaching.FileHashingActor.FileHashingFunction +import com.typesafe.config.Config +import cromwell.backend.BackendConfigurationDescriptor import cromwell.backend.impl.sfs.config.ConfigConstants._ import cromwell.backend.sfs._ -import cromwell.backend.{BackendConfigurationDescriptor, BackendInitializationData, RuntimeAttributeDefinition} -import lenthall.config.ScalaConfig._ +import cromwell.backend.standard.callcaching.StandardFileHashingActor +import cromwell.core.JobExecutionToken.JobExecutionTokenType +import net.ceedubs.ficus.Ficus._ +import org.slf4j.{Logger, LoggerFactory} /** * Builds a backend by reading the job control from the config. * * @param configurationDescriptor The config information. */ -class ConfigBackendLifecycleActorFactory(val configurationDescriptor: BackendConfigurationDescriptor) +class ConfigBackendLifecycleActorFactory(name: String, val configurationDescriptor: BackendConfigurationDescriptor) extends SharedFileSystemBackendLifecycleActorFactory { - override def initializationActorClass = classOf[ConfigInitializationActor] + lazy val logger: Logger = LoggerFactory.getLogger(getClass) + lazy val hashingStrategy: ConfigHashingStrategy = { + configurationDescriptor.backendConfig.as[Option[Config]]("filesystems.local.caching") map ConfigHashingStrategy.apply getOrElse ConfigHashingStrategy.defaultStrategy + } + + override lazy val initializationActorClass: Class[ConfigInitializationActor] = classOf[ConfigInitializationActor] - override def asyncJobExecutionActorClass: Class[_ <: ConfigAsyncJobExecutionActor] = { - val runInBackground = configurationDescriptor.backendConfig.getBooleanOr(RunInBackgroundConfig, default = false) + override lazy val asyncExecutionActorClass: Class[_ <: ConfigAsyncJobExecutionActor] = { + val runInBackground = configurationDescriptor.backendConfig.as[Option[Boolean]](RunInBackgroundConfig).getOrElse(false) if (runInBackground) classOf[BackgroundConfigAsyncJobExecutionActor] else classOf[DispatchedConfigAsyncJobExecutionActor] } - override def runtimeAttributeDefinitions(initializationDataOption: Option[BackendInitializationData]): - Set[RuntimeAttributeDefinition] = { - val initializationData = BackendInitializationData. - as[SharedFileSystemBackendInitializationData](initializationDataOption) + override lazy val fileHashingActorClassOption: Option[Class[_ <: StandardFileHashingActor]] = Option(classOf[ConfigBackendFileHashingActor]) - initializationData.runtimeAttributesBuilder.definitions.toSet + override val jobExecutionTokenType: JobExecutionTokenType = { + val concurrentJobLimit = configurationDescriptor.backendConfig.as[Option[Int]]("concurrent-job-limit") + JobExecutionTokenType(name, concurrentJobLimit) } - - override lazy val fileHashingFunction: Option[FileHashingFunction] = Option(FileHashingFunction(ConfigBackendFileHashing.getMd5Result)) - override lazy val fileHashingActorCount: Int = 5 } diff --git a/supportedBackends/sfs/src/main/scala/cromwell/backend/impl/sfs/config/ConfigConstants.scala b/supportedBackends/sfs/src/main/scala/cromwell/backend/impl/sfs/config/ConfigConstants.scala index 3a951be9b..682b794f4 100644 --- a/supportedBackends/sfs/src/main/scala/cromwell/backend/impl/sfs/config/ConfigConstants.scala +++ b/supportedBackends/sfs/src/main/scala/cromwell/backend/impl/sfs/config/ConfigConstants.scala @@ -25,7 +25,8 @@ object ConfigConstants { val MemoryRuntimeAttribute = "memory" // See: MemoryDeclarationValidation val MemoryRuntimeAttributePrefix = "memory_" - + val DiskRuntimeAttribute = "disk" + val DiskRuntimeAttributePrefix = "disk_" /* List of task names used internally. NOTE: underscore separated diff --git a/supportedBackends/sfs/src/main/scala/cromwell/backend/impl/sfs/config/ConfigHashingStrategy.scala b/supportedBackends/sfs/src/main/scala/cromwell/backend/impl/sfs/config/ConfigHashingStrategy.scala new file mode 100644 index 000000000..a497ee98b --- /dev/null +++ b/supportedBackends/sfs/src/main/scala/cromwell/backend/impl/sfs/config/ConfigHashingStrategy.scala @@ -0,0 +1,86 @@ +package cromwell.backend.impl.sfs.config + +import java.io.FileNotFoundException + +import akka.event.LoggingAdapter +import com.typesafe.config.Config +import cromwell.backend.standard.StandardInitializationData +import cromwell.backend.standard.callcaching.StandardFileHashingActor.SingleFileHashRequest +import cromwell.core.path.{Path, PathFactory} +import cromwell.util.TryWithResource._ +import net.ceedubs.ficus.Ficus._ +import org.apache.commons.codec.digest.DigestUtils +import org.slf4j.{Logger, LoggerFactory} + +import scala.util.{Failure, Try} + +object ConfigHashingStrategy { + val logger: Logger = LoggerFactory.getLogger(getClass) + val defaultStrategy = HashFileStrategy(false) + + def apply(hashingConfig: Config): ConfigHashingStrategy = { + val checkSiblingMd5 = hashingConfig.as[Option[Boolean]]("check-sibling-md5").getOrElse(false) + + hashingConfig.as[Option[String]]("hashing-strategy").getOrElse("file") match { + case "path" => HashPathStrategy(checkSiblingMd5) + case "file" => HashFileStrategy(checkSiblingMd5) + case what => + logger.warn(s"Unrecognized hashing strategy $what.") + HashPathStrategy(checkSiblingMd5) + } + } +} + +abstract class ConfigHashingStrategy { + def checkSiblingMd5: Boolean + protected def hash(file: Path): Try[String] + protected def description: String + + protected lazy val checkSiblingMessage: String = + if (checkSiblingMd5) "Check first for sibling md5 and if not found " else "" + + def getHash(request: SingleFileHashRequest, log: LoggingAdapter): Try[String] = { + def usingStandardInitData(initData: StandardInitializationData) = { + val pathBuilders = initData.workflowPaths.pathBuilders + val file = PathFactory.buildPath(request.file.valueString, pathBuilders).followSymbolicLinks + if (!file.exists) Failure(new FileNotFoundException(s"Cannot hash file $file because it can't be found")) else { + if (checkSiblingMd5) { + precomputedMd5(file) match { + case Some(md5) => Try(md5.contentAsString) + case None => hash(file) + } + } else hash(file) + } + } + + request.initializationData match { + case Some(initData: StandardInitializationData) => usingStandardInitData(initData) + case _ => Failure(new IllegalArgumentException("Need SharedFileSystemBackendInitializationData to calculate hash.")) + } + } + + private def precomputedMd5(file: Path): Option[Path] = { + val md5 = file.sibling(s"${file.name}.md5") + if (md5.exists) Option(md5) else None + } + + override def toString: String = { + s"Call caching hashing strategy: $checkSiblingMessage$description." + } +} + +final case class HashPathStrategy(checkSiblingMd5: Boolean) extends ConfigHashingStrategy { + override def hash(file: Path): Try[String] = { + Try(DigestUtils.md5Hex(file.toAbsolutePath.pathAsString)) + } + + override val description = "hash file path" +} + +final case class HashFileStrategy(checkSiblingMd5: Boolean) extends ConfigHashingStrategy { + override protected def hash(file: Path): Try[String] = { + tryWithResource(() => file.newInputStream) { DigestUtils.md5Hex } + } + + override val description = "hash file content" +} diff --git a/supportedBackends/sfs/src/main/scala/cromwell/backend/impl/sfs/config/ConfigInitializationActor.scala b/supportedBackends/sfs/src/main/scala/cromwell/backend/impl/sfs/config/ConfigInitializationActor.scala index f9f5a882f..e5538b323 100644 --- a/supportedBackends/sfs/src/main/scala/cromwell/backend/impl/sfs/config/ConfigInitializationActor.scala +++ b/supportedBackends/sfs/src/main/scala/cromwell/backend/impl/sfs/config/ConfigInitializationActor.scala @@ -2,11 +2,14 @@ package cromwell.backend.impl.sfs.config import cromwell.backend.io.WorkflowPaths import cromwell.backend.sfs._ -import wdl4s.WdlNamespace +import cromwell.backend.standard.{StandardInitializationActorParams, StandardInitializationData, StandardValidatedRuntimeAttributesBuilder} +import wdl4s.wdl.WdlNamespace + +import scala.concurrent.Future /** * Extension of the SharedFileSystemBackendInitializationData with declarations of extra runtime attributes, and a - * wdl namespace containing various tasks for submiting, killing, etc. + * wdl namespace containing various tasks for submitting, killing, etc. * * @param workflowPaths The paths for the workflow. * @param runtimeAttributesBuilder The customized runtime attributes builder with extra validations for the @@ -17,10 +20,11 @@ import wdl4s.WdlNamespace class ConfigInitializationData ( workflowPaths: WorkflowPaths, - runtimeAttributesBuilder: SharedFileSystemValidatedRuntimeAttributesBuilder, + runtimeAttributesBuilder: StandardValidatedRuntimeAttributesBuilder, val declarationValidations: Seq[DeclarationValidation], val wdlNamespace: WdlNamespace) - extends SharedFileSystemBackendInitializationData(workflowPaths, runtimeAttributesBuilder) + extends StandardInitializationData(workflowPaths, runtimeAttributesBuilder, + classOf[SharedFileSystemExpressionFunctions]) /** * Extends the SharedFileSystemInitializationActor to create an instance of the ConfigInitializationData. @@ -29,7 +33,7 @@ class ConfigInitializationData * * @param params Parameters to create an initialization actor. */ -class ConfigInitializationActor(params: SharedFileSystemInitializationActorParams) +class ConfigInitializationActor(params: StandardInitializationActorParams) extends SharedFileSystemInitializationActor(params) { lazy val configWdlNamespace = new ConfigWdlNamespace(params.configurationDescriptor.backendConfig) @@ -38,12 +42,14 @@ class ConfigInitializationActor(params: SharedFileSystemInitializationActorParam DeclarationValidation.fromDeclarations(configWdlNamespace.runtimeDeclarations) } - override lazy val initializationData = { + override lazy val initializationData: Future[ConfigInitializationData] = { val wdlNamespace = configWdlNamespace.wdlNamespace - new ConfigInitializationData(workflowPaths, runtimeAttributesBuilder, declarationValidations, wdlNamespace) + workflowPaths map { + new ConfigInitializationData(_, runtimeAttributesBuilder, declarationValidations, wdlNamespace) + } } - override lazy val runtimeAttributesBuilder = { + override lazy val runtimeAttributesBuilder: StandardValidatedRuntimeAttributesBuilder = { val declared = declarationValidations.map(_.makeValidation()) super.runtimeAttributesBuilder.withValidation(declared: _*) } diff --git a/supportedBackends/sfs/src/main/scala/cromwell/backend/impl/sfs/config/ConfigWdlNamespace.scala b/supportedBackends/sfs/src/main/scala/cromwell/backend/impl/sfs/config/ConfigWdlNamespace.scala index 7b6909edb..1e434de56 100644 --- a/supportedBackends/sfs/src/main/scala/cromwell/backend/impl/sfs/config/ConfigWdlNamespace.scala +++ b/supportedBackends/sfs/src/main/scala/cromwell/backend/impl/sfs/config/ConfigWdlNamespace.scala @@ -2,8 +2,10 @@ package cromwell.backend.impl.sfs.config import com.typesafe.config.Config import cromwell.backend.impl.sfs.config.ConfigConstants._ -import lenthall.config.ScalaConfig._ -import wdl4s._ +import net.ceedubs.ficus.Ficus._ +import wdl4s.wdl._ + +import scala.util.{Failure, Success} /** * Builds a wdl namespace from the config. @@ -14,24 +16,24 @@ class ConfigWdlNamespace(backendConfig: Config) { import ConfigWdlNamespace._ - private val configRuntimeAttributes = backendConfig.getStringOr(RuntimeAttributesConfig) + private val configRuntimeAttributes = backendConfig.as[Option[String]](RuntimeAttributesConfig).getOrElse("") - private val submitCommandOption = backendConfig.getStringOption(SubmitConfig) + private val submitCommandOption = backendConfig.as[Option[String]](SubmitConfig) private val submitSourceOption = submitCommandOption.map(makeWdlSource( SubmitTask, _, submitRuntimeAttributes + configRuntimeAttributes)) - private val submitDockerCommandOption = backendConfig.getStringOption(SubmitDockerConfig) + private val submitDockerCommandOption = backendConfig.as[Option[String]](SubmitDockerConfig) private val submitDockerSourceOption = submitDockerCommandOption.map(makeWdlSource( SubmitDockerTask, _, submitRuntimeAttributes + submitDockerRuntimeAttributes + configRuntimeAttributes)) - private val killCommandOption = backendConfig.getStringOption(KillConfig) + private val killCommandOption = backendConfig.as[Option[String]](KillConfig) private val killSourceOption = killCommandOption.map(makeWdlSource(KillTask, _, jobIdRuntimeAttributes)) - private val checkAliveCommandOption = backendConfig.getStringOption(CheckAliveConfig) + private val checkAliveCommandOption = backendConfig.as[Option[String]](CheckAliveConfig) private val checkAliveSourceOption = checkAliveCommandOption.map(makeWdlSource( CheckAliveTask, _, jobIdRuntimeAttributes)) - private val wdlSource = + private val workflowSource = s""" |${submitSourceOption getOrElse ""} |${submitDockerSourceOption getOrElse ""} @@ -43,11 +45,9 @@ class ConfigWdlNamespace(backendConfig: Config) { * The wdl namespace containing the submit, kill, and check alive tasks. */ val wdlNamespace = { - try { - WdlNamespace.load(wdlSource) - } catch { - case exception: Exception => - throw new RuntimeException(s"Error parsing generated wdl:\n$wdlSource".stripMargin, exception) + WdlNamespace.loadUsingSource(workflowSource, None, None) match { + case Success(ns) => ns + case Failure(f) => throw new RuntimeException(s"Error parsing generated wdl:\n$workflowSource".stripMargin, f) } } @@ -61,7 +61,7 @@ class ConfigWdlNamespace(backendConfig: Config) { } object ConfigWdlNamespace { - private def makeWdlSource(taskName: String, command: String, declarations: String): WdlSource = { + private def makeWdlSource(taskName: String, command: String, declarations: String): WorkflowSource = { s""" |task $taskName { |$declarations @@ -72,9 +72,9 @@ object ConfigWdlNamespace { |""".stripMargin } - private def makeTask(taskName: String, command: String, declarations: String): Task = { - val wdlSource = makeWdlSource(taskName, command, declarations) - val wdlNamespace = WdlNamespace.load(wdlSource) + private def makeTask(taskName: String, command: String, declarations: String): WdlTask = { + val workflowSource = makeWdlSource(taskName, command, declarations) + val wdlNamespace = WdlNamespace.loadUsingSource(workflowSource, None, None).get wdlNamespace.findTask(taskName).getOrElse(throw new RuntimeException(s"Couldn't find task $taskName")) } diff --git a/supportedBackends/sfs/src/main/scala/cromwell/backend/impl/sfs/config/DeclarationValidation.scala b/supportedBackends/sfs/src/main/scala/cromwell/backend/impl/sfs/config/DeclarationValidation.scala index ec3aecbf7..5627be9c0 100644 --- a/supportedBackends/sfs/src/main/scala/cromwell/backend/impl/sfs/config/DeclarationValidation.scala +++ b/supportedBackends/sfs/src/main/scala/cromwell/backend/impl/sfs/config/DeclarationValidation.scala @@ -1,13 +1,11 @@ package cromwell.backend.impl.sfs.config -import cromwell.backend.MemorySize -import cromwell.backend.impl.sfs.config.ConfigConstants._ import cromwell.backend.validation._ -import wdl4s.expression.NoFunctions -import wdl4s.parser.MemoryUnit -import wdl4s.types._ -import wdl4s.values.{WdlFloat, WdlInteger, WdlValue} -import wdl4s.{Declaration, NoLookup, WdlExpression} +import wdl4s.wdl.expression.NoFunctions +import wdl4s.wdl.types._ +import wdl4s.wdl.values.WdlValue +import wdl4s.wdl.{Declaration, NoLookup, WdlExpression} +import cromwell.backend.impl.sfs.config.ConfigConstants._ /** * Creates instances of runtime attribute validations from WDL declarations. @@ -24,23 +22,31 @@ object DeclarationValidation { * @return The DeclarationValidation object for the declaration. */ def fromDeclaration(declaration: Declaration): DeclarationValidation = { - declaration.name match { + declaration.unqualifiedName match { // Docker and CPU are special keys understood by cromwell. - case DockerValidation.key => new DeclarationValidation(declaration, DockerValidation.instance) - case CpuValidation.key => new DeclarationValidation(declaration, CpuValidation.default) + case name if name == DockerValidation.instance.key => + new DeclarationValidation(declaration, DockerValidation.instance) + case name if name == CpuValidation.instance.key => new DeclarationValidation(declaration, CpuValidation.instance) // See MemoryDeclarationValidation for more info - case name if MemoryDeclarationValidation.isMemoryDeclaration(name) => - new MemoryDeclarationValidation(declaration) + case name if MemoryDeclarationValidation.isMemoryDeclaration(name, MemoryRuntimeAttribute, MemoryRuntimeAttributePrefix) => + new MemoryDeclarationValidation(declaration, MemoryRuntimeAttribute, MemoryRuntimeAttributePrefix) + case name if MemoryDeclarationValidation.isMemoryDeclaration(name, DiskRuntimeAttribute, DiskRuntimeAttributePrefix) => + new MemoryDeclarationValidation(declaration, DiskRuntimeAttribute, DiskRuntimeAttributePrefix) // All other declarations must be a Boolean, Float, Integer, or String. case _ => - val validator: PrimitiveRuntimeAttributesValidation[_] = declaration.wdlType match { - case WdlBooleanType => new BooleanRuntimeAttributesValidation(declaration.name) - case WdlFloatType => new FloatRuntimeAttributesValidation(declaration.name) - case WdlIntegerType => new IntRuntimeAttributesValidation(declaration.name) - case WdlStringType => new StringRuntimeAttributesValidation(declaration.name) - case other => throw new RuntimeException(s"Unsupported config runtime attribute $other ${declaration.name}") - } - new DeclarationValidation(declaration, validator) + val validatedRuntimeAttr = validator(declaration.wdlType, declaration.unqualifiedName) + new DeclarationValidation(declaration, validatedRuntimeAttr) + } + } + + private def validator(wdlType: WdlType, unqualifiedName: String): PrimitiveRuntimeAttributesValidation[_, _] = { + wdlType match { + case WdlBooleanType => new BooleanRuntimeAttributesValidation(unqualifiedName) + case WdlFloatType => new FloatRuntimeAttributesValidation(unqualifiedName) + case WdlIntegerType => new IntRuntimeAttributesValidation(unqualifiedName) + case WdlStringType => new StringRuntimeAttributesValidation(unqualifiedName) + case WdlOptionalType(x) => validator(x, unqualifiedName) + case other => throw new RuntimeException(s"Unsupported config runtime attribute $other $unqualifiedName") } } } @@ -52,7 +58,7 @@ object DeclarationValidation { * @param instanceValidation A basic instance validation for the declaration. */ class DeclarationValidation(declaration: Declaration, instanceValidation: RuntimeAttributesValidation[_]) { - val key = declaration.name + val key: String = declaration.unqualifiedName /** * Creates a validation, by adding on defaults if they're specified in the declaration, and then making the @@ -74,7 +80,7 @@ class DeclarationValidation(declaration: Declaration, instanceValidation: Runtim val validationDefault = if (declaration.expression.isDefined) default(instanceValidation, declaration.expression.get) else instanceValidation - if (declaration.postfixQuantifier.contains("?")) validationDefault.optional else validationDefault + if (declaration.wdlType.isInstanceOf[WdlOptionalType]) validationDefault.optional else validationDefault } /** @@ -101,98 +107,3 @@ class DeclarationValidation(declaration: Declaration, instanceValidation: Runtim } } } - -/** - * Maps declarations of memory in WDL runtime attributes to the commands used to submit. - * - * The wdl runtime attributes for memory specified as strings such as: - * - * {{{ - * runtime { - * memory: "500 MB" - * } - * }}} - * - * However, the backend configuration only supports specifying memory in amounts in Float or Int. To specify the unit - * of the amount, the string "memory_" is suffixed with the unit, for example "memory_mb" or "memory_gb", or even - * "memory_ki". - * - * This class and companion object will do the conversion. The backend configuration should use the runtime attribute - * "Float? memory_gb", meaning that "memory" is now an optional runtime attribute, and will be converted to GB. - * - * Just like the runtime attribute, when no units are specified the config, the default unit is bytes. - * - * @param declaration The declaration used to create this memory validation. - */ -class MemoryDeclarationValidation(declaration: Declaration) - extends DeclarationValidation(declaration, MemoryValidation.instance) { - - import MemoryDeclarationValidation._ - - /** - * Converts the validation to a version with the default from the memory expression. - * - * If the backend configuration contains a runtime attribute such as "Float memory_gb = 1.0", then the default will - * be set to 1 GB of memory when the attribute is not set. - * - * @param validation The validation to set the default for. - * @param wdlExpression The declaration expression to retrieve the default. - * @return The new validation. - */ - override protected def default(validation: RuntimeAttributesValidation[_], wdlExpression: WdlExpression) = { - val wdlValue = declaration.expression.get.evaluate(NoLookup, NoFunctions).get - val amount: Double = wdlValue match { - case WdlInteger(value) => value.toDouble - case WdlFloat(value) => value - case other => throw new RuntimeException(s"Unsupported memory default: $other") - } - val memorySize = MemorySize(amount, declarationMemoryUnit) - validation.withDefault(WdlInteger(memorySize.bytes.toInt)) - } - - private lazy val declarationMemoryUnit: MemoryUnit = { - val suffix = memoryUnitSuffix(declaration.name) - val memoryUnitOption = MemoryUnit.values.find(_.suffixes.map(_.toLowerCase).contains(suffix.toLowerCase)) - memoryUnitOption match { - case Some(memoryUnit) => memoryUnit - case None => throw new IllegalArgumentException(s"MemoryUnit with suffix $suffix was not found.") - } - } - - /** - * Converts the memory value from a `MemorySize` to a `Float` or `Int` based on the units. - * - * @param validatedRuntimeAttributes The validated attributes. - * @return The value from the collection wrapped in `Some`, or `None` if the value wasn't found. - */ - override def extractWdlValueOption(validatedRuntimeAttributes: ValidatedRuntimeAttributes): Option[WdlValue] = { - RuntimeAttributesValidation.extractOption(MemoryValidation.instance, validatedRuntimeAttributes) map { value => - declaration.wdlType match { - case WdlIntegerType => WdlInteger(value.to(declarationMemoryUnit).amount.toInt) - case WdlFloatType => WdlFloat(value.to(declarationMemoryUnit).amount) - case other => throw new RuntimeException(s"Unsupported wdl type for memory: $other") - } - } - } -} - -object MemoryDeclarationValidation { - def isMemoryDeclaration(name: String): Boolean = { - name match { - case MemoryRuntimeAttribute => true - case prefixed if prefixed.startsWith(MemoryRuntimeAttributePrefix) => - val suffix = memoryUnitSuffix(name) - MemoryUnit.values exists { - _.suffixes.map(_.toLowerCase).contains(suffix) - } - case _ => false - } - } - - private def memoryUnitSuffix(name: String) = { - if (name == MemoryRuntimeAttribute) - MemoryUnit.Bytes.suffixes.head - else - name.substring(MemoryRuntimeAttributePrefix.length) - } -} diff --git a/supportedBackends/sfs/src/main/scala/cromwell/backend/impl/sfs/config/MemoryDeclarationValidation.scala b/supportedBackends/sfs/src/main/scala/cromwell/backend/impl/sfs/config/MemoryDeclarationValidation.scala new file mode 100644 index 000000000..9ea3f9735 --- /dev/null +++ b/supportedBackends/sfs/src/main/scala/cromwell/backend/impl/sfs/config/MemoryDeclarationValidation.scala @@ -0,0 +1,114 @@ +package cromwell.backend.impl.sfs.config + +import cromwell.backend.MemorySize +import cromwell.backend.validation._ +import wdl4s.wdl.expression.NoFunctions +import wdl4s.parser.MemoryUnit +import wdl4s.wdl.types._ +import wdl4s.wdl.values.{WdlFloat, WdlInteger, WdlOptionalValue, WdlValue} +import wdl4s.wdl.{Declaration, NoLookup, WdlExpression} + +/** + * Maps declarations of memory in WDL runtime attributes to the commands used to submit. + * + * The wdl runtime attributes for memory specified as strings such as: + * + * {{{ + * runtime { + * memory: "500 MB" + * } + * }}} + * + * However, the backend configuration only supports specifying memory in amounts in Float or Int. To specify the unit + * of the amount, the string "memory_" is suffixed with the unit, for example "memory_mb" or "memory_gb", or even + * "memory_ki". + * + * This class and companion object will do the conversion. The backend configuration should use the runtime attribute + * "Float? memory_gb", meaning that "memory" is now an optional runtime attribute, and will be converted to GB. + * + * Just like the runtime attribute, when no units are specified the config, the default unit is bytes. + * + * @param declaration The declaration used to create this memory validation. + */ +class MemoryDeclarationValidation(declaration: Declaration, attributeName: String, attributeNamePrefix: String) + extends DeclarationValidation(declaration, MemoryValidation.instance(attributeName)) { + + import MemoryDeclarationValidation._ + + /** + * Converts the validation to a version with the default from the memory expression. + * + * If the backend configuration contains a runtime attribute such as "Float memory_gb = 1.0", then the default will + * be set to 1 GB of memory when the attribute is not set. + * + * @param validation The validation to set the default for. + * @param wdlExpression The declaration expression to retrieve the default. + * @return The new validation. + */ + override protected def default(validation: RuntimeAttributesValidation[_], + wdlExpression: WdlExpression): RuntimeAttributesValidation[_] = { + val wdlValue = declaration.expression.get.evaluate(NoLookup, NoFunctions).get + val amount: Double = defaultAmount(wdlValue) + val memorySize = MemorySize(amount, declarationMemoryUnit) + validation.withDefault(WdlInteger(memorySize.bytes.toInt)) + } + + private def defaultAmount(wdlValue: WdlValue): Double = { + wdlValue match { + case WdlInteger(value) => value.toDouble + case WdlFloat(value) => value + case WdlOptionalValue(_, Some(optionalWdlValue)) => defaultAmount(optionalWdlValue) + case other => throw new RuntimeException(s"Unsupported memory default: $other") + } + } + + private lazy val declarationMemoryUnit: MemoryUnit = { + val suffix = memoryUnitSuffix(declaration.unqualifiedName, attributeName, attributeNamePrefix) + val memoryUnitOption = MemoryUnit.values.find(_.suffixes.map(_.toLowerCase).contains(suffix.toLowerCase)) + memoryUnitOption match { + case Some(memoryUnit) => memoryUnit + case None => throw new IllegalArgumentException(s"MemoryUnit with suffix $suffix was not found.") + } + } + + /** + * Converts the memory value from a `MemorySize` to a `Float` or `Int` based on the units. + * + * @param validatedRuntimeAttributes The validated attributes. + * @return The value from the collection wrapped in `Some`, or `None` if the value wasn't found. + */ + override def extractWdlValueOption(validatedRuntimeAttributes: ValidatedRuntimeAttributes): Option[WdlValue] = { + RuntimeAttributesValidation.extractOption(MemoryValidation.instance(attributeName), validatedRuntimeAttributes) map + coerceMemorySize(declaration.wdlType) + } + + private def coerceMemorySize(wdlType: WdlType)(value: MemorySize): WdlValue = { + wdlType match { + case WdlIntegerType => WdlInteger(value.to(declarationMemoryUnit).amount.toInt) + case WdlFloatType => WdlFloat(value.to(declarationMemoryUnit).amount) + case WdlOptionalType(optionalType) => coerceMemorySize(optionalType)(value) + case other => throw new RuntimeException(s"Unsupported wdl type for memory: $other") + } + } +} + +object MemoryDeclarationValidation { + def isMemoryDeclaration(name: String, attributeName: String, attributeNamePrefix: String): Boolean = { + name match { + case `attributeName` => true + case prefixed if prefixed.startsWith(attributeNamePrefix) => + val suffix = memoryUnitSuffix(name, attributeName, attributeNamePrefix) + MemoryUnit.values exists { + _.suffixes.map(_.toLowerCase).contains(suffix) + } + case _ => false + } + } + + private def memoryUnitSuffix(name: String, attributeName: String, attributeNamePrefix: String) = { + if (name == attributeName) + MemoryUnit.Bytes.suffixes.head + else + name.substring(attributeNamePrefix.length) + } +} diff --git a/supportedBackends/sfs/src/main/scala/cromwell/backend/impl/sfs/config/PrimitiveRuntimeAttributesValidation.scala b/supportedBackends/sfs/src/main/scala/cromwell/backend/impl/sfs/config/PrimitiveRuntimeAttributesValidation.scala deleted file mode 100644 index deec1bcf9..000000000 --- a/supportedBackends/sfs/src/main/scala/cromwell/backend/impl/sfs/config/PrimitiveRuntimeAttributesValidation.scala +++ /dev/null @@ -1,52 +0,0 @@ -package cromwell.backend.impl.sfs.config - -import cromwell.backend.validation.RuntimeAttributesValidation -import wdl4s.types._ -import wdl4s.values.{WdlBoolean, WdlFloat, WdlInteger, WdlString} - -import scalaz.Scalaz._ - -/** - * Validates one of the wdl primitive types: Boolean, Float, Integer, or String. WdlFile is not supported. - * - * @tparam A The type of validated runtime attribute. - */ -sealed trait PrimitiveRuntimeAttributesValidation[A] extends RuntimeAttributesValidation[A] { - val wdlType: WdlPrimitiveType - - override def coercion = Seq(wdlType) -} - -class BooleanRuntimeAttributesValidation(override val key: String) extends - PrimitiveRuntimeAttributesValidation[Boolean] { - - override val wdlType = WdlBooleanType - - override protected def validateValue = { - case WdlBoolean(value) => value.successNel - } -} - -class FloatRuntimeAttributesValidation(override val key: String) extends PrimitiveRuntimeAttributesValidation[Double] { - override val wdlType = WdlFloatType - - override protected def validateValue = { - case WdlFloat(value) => value.successNel - } -} - -class IntRuntimeAttributesValidation(override val key: String) extends PrimitiveRuntimeAttributesValidation[Int] { - override val wdlType = WdlIntegerType - - override protected def validateValue = { - case WdlInteger(value) => value.toInt.successNel - } -} - -class StringRuntimeAttributesValidation(override val key: String) extends PrimitiveRuntimeAttributesValidation[String] { - override val wdlType = WdlStringType - - override protected def validateValue = { - case WdlString(value) => value.successNel - } -} diff --git a/supportedBackends/sfs/src/main/scala/cromwell/backend/sfs/BackgroundAsyncJobExecutionActor.scala b/supportedBackends/sfs/src/main/scala/cromwell/backend/sfs/BackgroundAsyncJobExecutionActor.scala index 41173d811..44c584187 100644 --- a/supportedBackends/sfs/src/main/scala/cromwell/backend/sfs/BackgroundAsyncJobExecutionActor.scala +++ b/supportedBackends/sfs/src/main/scala/cromwell/backend/sfs/BackgroundAsyncJobExecutionActor.scala @@ -1,73 +1,58 @@ package cromwell.backend.sfs -import java.nio.file.Path - -import better.files._ -import cromwell.backend.sfs.SharedFileSystem._ +import cromwell.backend.standard.StandardAsyncJob +import cromwell.core.path.Path trait BackgroundAsyncJobExecutionActor extends SharedFileSystemAsyncJobExecutionActor { - override def makeProcessRunner(): ProcessRunner = { - val backgroundScript = pathPlusSuffix(jobPaths.script, "background") - writeBackgroundScript(backgroundScript, processArgs.argv.mkString("'", "' '", "'")) - val stdout = pathPlusSuffix(jobPaths.stdout, "background") - val stderr = pathPlusSuffix(jobPaths.stderr, "background") - val argv = Seq("/bin/bash", backgroundScript) - new ProcessRunner(argv, stdout, stderr) - } - - private def writeBackgroundScript(backgroundScript: Path, backgroundCommand: String): Unit = { - /* - Run the `backgroundCommand` in the background. Redirect the stdout and stderr to the appropriate files. While not - necessary, mark the job as not receiving any stdin by pointing it at /dev/null. - - If the `backgroundCommand` errors for some reason, put a "-1" into the rc file. + lazy val backgroundScript = jobPaths.script.plusExt("background") - Finally, run all of the above in the bash background, and return the PID of the backgrounded command. + override def writeScriptContents(): Unit = { + super.writeScriptContents() + writeBackgroundScriptContents() + } - bashism | english - --------|-------------------------------------------------------------------------- - > | redirect stdout to - 2> | redirect stderr to - < | redirect stdin from - || | if the previous command fails, then run the following command - > | redirect stdout to - & | send the entire compound command, including the || to the background - $! | a variable containing the previous background command's process id (PID) - */ - File(backgroundScript).write( + /** + * Run the command via bash in the background, and echo the PID. + */ + private def writeBackgroundScriptContents(): Unit = { + val backgroundCommand = redirectOutputs(processArgs.argv.mkString("'", "' '", "'")) + // $! contains the previous background command's process id (PID) + backgroundScript.write( s"""|#!/bin/bash - |$backgroundCommand \\ - | > ${jobPaths.stdout} \\ - | 2> ${jobPaths.stderr} \\ - | < /dev/null \\ - | || echo -1 \\ - | > ${jobPaths.returnCode} \\ - | & + |BACKGROUND_COMMAND & |echo $$! - |""".stripMargin) + |""".stripMargin.replace("BACKGROUND_COMMAND", backgroundCommand)) + () + } + + override def makeProcessRunner(): ProcessRunner = { + val stdout = jobPaths.stdout.plusExt("background") + val stderr = jobPaths.stderr.plusExt("background") + val argv = Seq("/bin/bash", backgroundScript) + new ProcessRunner(argv, stdout, stderr) } - override def getJob(exitValue: Int, stdout: Path, stderr: Path) = { - val pid = File(stdout).contentAsString.stripLineEnd - SharedFileSystemJob(pid) + override def getJob(exitValue: Int, stdout: Path, stderr: Path): StandardAsyncJob = { + val pid = stdout.contentAsString.stripLineEnd + StandardAsyncJob(pid) } - override def checkAliveArgs(job: SharedFileSystemJob) = { + override def checkAliveArgs(job: StandardAsyncJob): SharedFileSystemCommand = { SharedFileSystemCommand("ps", job.jobId) } - override def killArgs(job: SharedFileSystemJob) = { - val killScript = pathPlusSuffix(jobPaths.script, "kill") + override def killArgs(job: StandardAsyncJob): SharedFileSystemCommand = { + val killScript = jobPaths.script.plusExt("kill") writeKillScript(killScript, job) SharedFileSystemCommand("/bin/bash", killScript) } - private def writeKillScript(killScript: Path, job: SharedFileSystemJob): Unit = { + private def writeKillScript(killScript: Path, job: StandardAsyncJob): Unit = { /* Use pgrep to find the children of a process, and recursively kill the children before killing the parent. */ - File(killScript).write( + killScript.write( s"""|#!/bin/bash |kill_children() { | local pid=$$1 @@ -80,5 +65,6 @@ trait BackgroundAsyncJobExecutionActor extends SharedFileSystemAsyncJobExecution | |kill_children ${job.jobId} |""".stripMargin) + () } } diff --git a/supportedBackends/sfs/src/main/scala/cromwell/backend/sfs/GcsWorkflowFileSystemProvider.scala b/supportedBackends/sfs/src/main/scala/cromwell/backend/sfs/GcsWorkflowFileSystemProvider.scala deleted file mode 100644 index 35cbd97d6..000000000 --- a/supportedBackends/sfs/src/main/scala/cromwell/backend/sfs/GcsWorkflowFileSystemProvider.scala +++ /dev/null @@ -1,35 +0,0 @@ -package cromwell.backend.sfs - -import cromwell.backend.wfs.{WorkflowFileSystemProvider, WorkflowFileSystemProviderParams} -import cromwell.filesystems.gcs.GoogleAuthMode.GoogleAuthOptions -import cromwell.filesystems.gcs.{GcsFileSystem, GcsFileSystemProvider, GoogleConfiguration} -import lenthall.config.ScalaConfig._ -import wdl4s.ValidationException - -import scala.util.Try - -object GcsWorkflowFileSystemProvider extends WorkflowFileSystemProvider { - override def fileSystemOption(params: WorkflowFileSystemProviderParams): Option[GcsFileSystem] = { - params.fileSystemConfig.getStringOption("gcs.auth") map gcsFileSystem(params) - } - - private def gcsFileSystem(params: WorkflowFileSystemProviderParams)(gcsAuthName: String): GcsFileSystem = { - val workflowOptions = params.workflowOptions - val globalConfig = params.globalConfig - val googleConfig = GoogleConfiguration(globalConfig) - val googleAuthModeValidation = googleConfig.auth(gcsAuthName) - - val gcsAuthMode = googleAuthModeValidation match { - case scalaz.Success(googleAuthMode) => googleAuthMode - case scalaz.Failure(errors) => - throw new ValidationException("Could not create gcs filesystem from configuration", errors) - } - - val authOptions = new GoogleAuthOptions { - override def get(key: String): Try[String] = workflowOptions.get(key) - } - - val storage = gcsAuthMode.buildStorage(authOptions, googleConfig.applicationName) - GcsFileSystem(GcsFileSystemProvider(storage)(params.fileSystemExecutionContext)) - } -} diff --git a/supportedBackends/sfs/src/main/scala/cromwell/backend/sfs/ProcessRunner.scala b/supportedBackends/sfs/src/main/scala/cromwell/backend/sfs/ProcessRunner.scala index 509ec01ba..64401db64 100644 --- a/supportedBackends/sfs/src/main/scala/cromwell/backend/sfs/ProcessRunner.scala +++ b/supportedBackends/sfs/src/main/scala/cromwell/backend/sfs/ProcessRunner.scala @@ -1,6 +1,6 @@ package cromwell.backend.sfs -import java.nio.file.Path +import cromwell.core.path.Path /** * Runs a process and sends the stdout and stderr to a file path. @@ -17,7 +17,7 @@ class ProcessRunner(val argv: Seq[Any], val stdoutPath: Path, val stderrPath: Pa processBuilder.command(argv.map(_.toString): _*) processBuilder.redirectOutput(stdoutPath.toFile) processBuilder.redirectError(stderrPath.toFile) - val proccess = processBuilder.start() - proccess.waitFor() + val process = processBuilder.start() + process.waitFor() } } diff --git a/supportedBackends/sfs/src/main/scala/cromwell/backend/sfs/SharedFileSystem.scala b/supportedBackends/sfs/src/main/scala/cromwell/backend/sfs/SharedFileSystem.scala index 1ac20f284..bc50ec13a 100644 --- a/supportedBackends/sfs/src/main/scala/cromwell/backend/sfs/SharedFileSystem.scala +++ b/supportedBackends/sfs/src/main/scala/cromwell/backend/sfs/SharedFileSystem.scala @@ -1,24 +1,27 @@ package cromwell.backend.sfs -import java.nio.file.{FileSystem, Files, Path, Paths} +import java.io.{FileNotFoundException, IOException} +import cats.instances.try_._ +import cats.syntax.functor._ import com.typesafe.config.Config +import com.typesafe.scalalogging.StrictLogging import cromwell.backend.io.JobPaths -import cromwell.core._ -import wdl4s.CallInputs -import wdl4s.types.{WdlArrayType, WdlMapType} -import wdl4s.util.TryUtil -import wdl4s.values._ +import cromwell.backend.wdl.WdlFileMapper +import cromwell.core.CromwellFatalExceptionMarker +import cromwell.core.path.{DefaultPath, DefaultPathBuilder, Path, PathFactory} +import lenthall.util.TryUtil +import wdl4s.wdl.EvaluatedTaskInputs +import wdl4s.wdl.values._ import scala.collection.JavaConverters._ import scala.language.postfixOps import scala.util.{Failure, Success, Try} -object SharedFileSystem { - import better.files._ +object SharedFileSystem extends StrictLogging { final case class AttemptedLookupResult(name: String, value: Try[WdlValue]) { - def toPair = name -> value + def toPair: (String, Try[WdlValue]) = name -> value } object AttemptedLookupResult { @@ -29,67 +32,71 @@ object SharedFileSystem { } } - type PathsPair = (Path, Path) + case class PairOfFiles(src: Path, dst: Path) type DuplicationStrategy = (Path, Path) => Try[Unit] /** * Return a `Success` result if the file has already been localized, otherwise `Failure`. */ private def localizePathAlreadyLocalized(originalPath: Path, executionPath: Path): Try[Unit] = { - if (File(executionPath).exists) Success(Unit) else Failure(new RuntimeException(s"$originalPath doesn't exists")) + if (executionPath.exists) Success(()) else Failure(new RuntimeException(s"$originalPath doesn't exists")) } private def localizePathViaCopy(originalPath: Path, executionPath: Path): Try[Unit] = { - File(executionPath).parent.createDirectories() - val executionTmpPath = pathPlusSuffix(executionPath, ".tmp") - Try(File(originalPath).copyTo(executionTmpPath, overwrite = true).moveTo(executionPath, overwrite = true)) + val action = Try { + executionPath.parent.createPermissionedDirectories() + val executionTmpPath = executionPath.plusExt("tmp") + originalPath.copyTo(executionTmpPath, overwrite = true).moveTo(executionPath, overwrite = true) + }.void + logOnFailure(action, "copy") } private def localizePathViaHardLink(originalPath: Path, executionPath: Path): Try[Unit] = { - File(executionPath).parent.createDirectories() - Try(Files.createLink(executionPath, originalPath)) + val action = Try { + executionPath.parent.createPermissionedDirectories() + originalPath.linkTo(executionPath) + }.void + logOnFailure(action, "hard link") } - /** - * TODO: The 'call' parameter here represents the call statement in WDL that references this path. - * We're supposed to not use symbolic links if the call uses Docker. However, this is currently a - * bit incorrect because multiple calls can reference the same path if that path is in a declaration. - * - * The symbolic link will only fail in the Docker case if a Call uses the file directly and not - * indirectly through one of its input expressions - */ - private def localizePathViaSymbolicLink(originalPath: Path, executionPath: Path): Try[Unit] = { - if (File(originalPath).isDirectory) Failure(new UnsupportedOperationException("Cannot localize directory with symbolic links")) + if (originalPath.isDirectory) Failure(new UnsupportedOperationException("Cannot localize directory with symbolic links")) + else if (!originalPath.exists) Failure(new FileNotFoundException(originalPath.pathAsString)) else { - File(executionPath).parent.createDirectories() - Try(Files.createSymbolicLink(executionPath, originalPath.toAbsolutePath)) + val action = Try { + executionPath.parent.createPermissionedDirectories() + executionPath.linkTo(originalPath, symbolic = true) + }.void + logOnFailure(action, "symbolic link") } } - private def duplicate(description: String, source: Path, dest: Path, strategies: Stream[DuplicationStrategy]) = { - strategies.map(_ (source, dest)).find(_.isSuccess) getOrElse { - Failure(new UnsupportedOperationException(s"Could not $description $source -> $dest")) - } + private def logOnFailure(action: Try[Unit], actionLabel: String): Try[Unit] = { + if (action.isFailure) logger.warn(s"Localization via $actionLabel has failed: ${action.failed.get.getMessage}") + action } - def pathPlusSuffix(path: Path, suffix: String) = path.resolveSibling(s"${File(path).name}.$suffix") + private def duplicate(description: String, source: Path, dest: Path, strategies: Stream[DuplicationStrategy]): Try[Unit] = { + val attempts: Stream[Try[Unit]] = strategies.map(_ (source.followSymbolicLinks, dest)) + attempts.find(_.isSuccess) getOrElse { + TryUtil.sequence(attempts, s"Could not $description $source -> $dest").void + } + } } trait SharedFileSystem extends PathFactory { import SharedFileSystem._ - import better.files._ def sharedFileSystemConfig: Config lazy val DefaultStrategies = Seq("hard-link", "soft-link", "copy") - lazy val LocalizationStrategies = getConfigStrategies("localization") - lazy val Localizers = createStrategies(LocalizationStrategies, docker = false) - lazy val DockerLocalizers = createStrategies(LocalizationStrategies, docker = true) + lazy val LocalizationStrategies: Seq[String] = getConfigStrategies("localization") + lazy val Localizers: Seq[DuplicationStrategy] = createStrategies(LocalizationStrategies, docker = false) + lazy val DockerLocalizers: Seq[DuplicationStrategy] = createStrategies(LocalizationStrategies, docker = true) - lazy val CachingStrategies = getConfigStrategies("caching") - lazy val Cachers = createStrategies(CachingStrategies, docker = false) + lazy val CachingStrategies: Seq[String] = getConfigStrategies("caching.duplication-strategy") + lazy val Cachers: Seq[DuplicationStrategy] = createStrategies(CachingStrategies, docker = false) private def getConfigStrategies(configPath: String): Seq[String] = { if (sharedFileSystemConfig.hasPath(configPath)) { @@ -118,21 +125,24 @@ trait SharedFileSystem extends PathFactory { localizePathAlreadyLocalized _ +: mappedDuplicationStrategies } - private def hostAbsoluteFilePath(callRoot: Path, pathString: String): File = { - val wdlPath = Paths.get(pathString) - callRoot.resolve(wdlPath).toAbsolutePath + private def hostAbsoluteFilePath(callRoot: Path, pathString: String): Path = { + val wdlPath = PathFactory.buildPath(pathString, pathBuilders) + wdlPath match { + case _: DefaultPath if !wdlPath.isAbsolute => callRoot.resolve(wdlPath).toAbsolutePath + case _ => wdlPath + } } def outputMapper(job: JobPaths)(wdlValue: WdlValue): Try[WdlValue] = { - wdlValue match { + WdlFileMapper.mapWdlFiles(mapJobWdlFile(job))(wdlValue) + } + + def mapJobWdlFile(job: JobPaths)(wdlFile: WdlFile): WdlFile = { + wdlFile match { case fileNotFound: WdlFile if !hostAbsoluteFilePath(job.callExecutionRoot, fileNotFound.valueString).exists => - Failure(new RuntimeException("Could not process output, file not found: " + - s"${hostAbsoluteFilePath(job.callExecutionRoot, fileNotFound.valueString).pathAsString}")) - case file: WdlFile => Try(WdlFile(hostAbsoluteFilePath(job.callExecutionRoot, file.valueString).pathAsString)) - case array: WdlArray => - val mappedArray = array.value map outputMapper(job) - TryUtil.sequence(mappedArray) map { WdlArray(array.wdlType, _) } - case other => Success(other) + throw new RuntimeException("Could not process output, file not found: " + + s"${hostAbsoluteFilePath(job.callExecutionRoot, fileNotFound.valueString).pathAsString}") + case _ => WdlFile(hostAbsoluteFilePath(job.callExecutionRoot, wdlFile.valueString).pathAsString) } } @@ -143,88 +153,58 @@ trait SharedFileSystem extends PathFactory { /** * Return a possibly altered copy of inputs reflecting any localization of input file paths that might have * been performed for this `Backend` implementation. - * NOTE: This ends up being a backdoor implementation of Backend.adjustInputPaths as both LocalBackend and SgeBackend - * end up with this implementation and thus use it to satisfy their contract with Backend. - * This is yuck-tastic and I consider this a FIXME, but not for this refactor */ - def localizeInputs(inputsRoot: Path, docker: Boolean, filesystems: List[FileSystem], inputs: CallInputs): Try[CallInputs] = { - val strategies = if (docker) DockerLocalizers else Localizers + def localizeInputs(inputsRoot: Path, docker: Boolean)(inputs: EvaluatedTaskInputs): Try[EvaluatedTaskInputs] = { + TryUtil.sequenceMap( + inputs mapValues WdlFileMapper.mapWdlFiles(localizeWdlFile(inputsRoot, docker)), + "Failures during localization" + ) recoverWith { + case e => Failure(new IOException(e.getMessage) with CromwellFatalExceptionMarker) + } + } - // Use URI to identify protocol scheme and strip it out - def stripProtocolScheme(path: Path): Path = { - val uri = path.toUri - val host = Option(uri.getHost) - val uriPath = uri.getPath + def localizeWdlFile(inputsRoot: Path, docker: Boolean)(value: WdlFile): WdlFile = { + val strategies = if (docker) DockerLocalizers else Localizers - host map { h => Paths.get(h, uriPath) } getOrElse Paths.get(uriPath) - } + // Strip the protocol scheme + def stripProtocolScheme(path: Path): Path = DefaultPathBuilder.get(path.pathWithoutScheme) - /** + /* * Transform an original input path to a path in the call directory. * The new path matches the original path, it only "moves" the root to be the call directory. */ - def toCallPath(path: String): Try[PathsPair] = Try { - val src = buildPath(path, filesystems) + + def toCallPath(path: String): Try[PairOfFiles] = Try { + val src = buildPath(path) // Strip out potential prefix protocol val localInputPath = stripProtocolScheme(src) - val dest = if (File(inputsRoot).isParentOf(localInputPath)) localInputPath + val dest = if (inputsRoot.isParentOf(localInputPath)) localInputPath else { // Concatenate call directory with absolute input path - Paths.get(inputsRoot.toString, localInputPath.toString) + DefaultPathBuilder.get(inputsRoot.pathAsString, localInputPath.pathAsString) } - (src, dest) + PairOfFiles(src, dest) } // Optional function to adjust the path to "docker path" if the call runs in docker - val localizeFunction = localizeWdlValue(toCallPath, strategies.toStream) _ - val localizedValues = inputs.toSeq map { - case (name, value) => localizeFunction(value) map { name -> _ } - } - - TryUtil.sequence(localizedValues, "Failures during localization").map(_.toMap) recover { - case e => throw new CromwellFatalException(e) - } + localizeWdlFile(toCallPath _, strategies.toStream)(value) } /** - * Try to localize a WdlValue if it is or contains a WdlFile. - * - * @param toDestPath function specifying how to generate the destination path from the source path - * @param strategies strategies to use for localization - * @param wdlValue WdlValue to localize - * @return localized wdlValue - */ - private def localizeWdlValue(toDestPath: (String => Try[PathsPair]), strategies: Stream[DuplicationStrategy]) - (wdlValue: WdlValue): Try[WdlValue] = { - - def adjustArray(t: WdlArrayType, inputArray: Seq[WdlValue]): Try[WdlArray] = { - val tryAdjust = inputArray map localizeWdlValue(toDestPath, strategies) - - TryUtil.sequence(tryAdjust, s"Failed to localize files in input Array ${wdlValue.valueString}") map { adjusted => - new WdlArray(t, adjusted) - } - } - - def adjustMap(t: WdlMapType, inputMap: Map[WdlValue, WdlValue]): Try[WdlMap] = { - val tryAdjust = inputMap mapValues { localizeWdlValue(toDestPath, strategies) } - - TryUtil.sequenceMap(tryAdjust, s"Failed to localize files in input Map ${wdlValue.valueString}") map { adjusted => - new WdlMap(t, adjusted) - } - } - - def adjustFile(path: String) = { - toDestPath(path) flatMap { - case (src, dst) => duplicate("localize", src, dst, strategies) map { _ => WdlFile(dst.toString) } - } - } - - wdlValue match { - case wdlFile: WdlFile => adjustFile(wdlFile.value) - case WdlArray(t, values) => adjustArray(t, values) - case WdlMap(t, values) => adjustMap(t, values) - case x => Success(x) + * Try to localize a WdlFile. + * + * @param toDestPath function specifying how to generate the destination path from the source path + * @param strategies strategies to use for localization + * @param wdlFile WdlFile to localize + * @return localized wdl file + */ + private def localizeWdlFile(toDestPath: (String => Try[PairOfFiles]), strategies: Stream[DuplicationStrategy]) + (wdlFile: WdlFile): WdlFile = { + val path = wdlFile.value + val result = toDestPath(path) flatMap { + case PairOfFiles(src, dst) => duplicate("localize", src, dst, strategies) map { _ => WdlFile(dst.pathAsString) } } + result.get } } diff --git a/supportedBackends/sfs/src/main/scala/cromwell/backend/sfs/SharedFileSystemAsyncJobExecutionActor.scala b/supportedBackends/sfs/src/main/scala/cromwell/backend/sfs/SharedFileSystemAsyncJobExecutionActor.scala index 3a925aad9..cdc79b279 100644 --- a/supportedBackends/sfs/src/main/scala/cromwell/backend/sfs/SharedFileSystemAsyncJobExecutionActor.scala +++ b/supportedBackends/sfs/src/main/scala/cromwell/backend/sfs/SharedFileSystemAsyncJobExecutionActor.scala @@ -1,45 +1,25 @@ package cromwell.backend.sfs -import java.nio.file.{FileAlreadyExistsException, Path, Paths} +import java.nio.file.FileAlreadyExistsException -import akka.actor.{Actor, ActorLogging, ActorRef} -import akka.event.LoggingReceive -import better.files._ -import cromwell.backend.BackendJobExecutionActor.BackendJobExecutionResponse -import cromwell.backend.BackendLifecycleActor.AbortJobCommand -import cromwell.backend.async.AsyncBackendJobExecutionActor._ -import cromwell.backend.async.{AbortedExecutionHandle, AsyncBackendJobExecutionActor, ExecutionHandle, FailedNonRetryableExecutionHandle, NonRetryableExecution, SuccessfulExecutionHandle} -import cromwell.backend.io.WorkflowPathsBackendInitializationData -import cromwell.backend.sfs.SharedFileSystem._ +import cromwell.backend._ +import cromwell.backend.async.{ExecutionHandle, FailedNonRetryableExecutionHandle, PendingExecutionHandle} +import cromwell.backend.io.JobPathsWithDocker +import cromwell.backend.standard.{StandardAsyncExecutionActor, StandardAsyncJob} import cromwell.backend.validation._ -import cromwell.backend.{BackendConfigurationDescriptor, BackendInitializationData, BackendJobDescriptor, OutputEvaluator} -import cromwell.core.JobOutputs -import cromwell.core.logging.JobLogging +import cromwell.core.path.{DefaultPathBuilder, Path} import cromwell.core.retry.SimpleExponentialBackoff -import cromwell.services.keyvalue.KeyValueServiceActor._ -import wdl4s.values.{WdlArray, WdlFile, WdlMap, WdlValue} +import wdl4s.wdl.values.WdlFile import scala.concurrent.duration._ -import scala.concurrent.{ExecutionContext, Future, Promise} -import scala.util.{Failure, Success, Try} -object SharedFileSystemJob { - val JobIdKey = "sfs_job_id" +case class SharedFileSystemRunStatus(returnCodeFileExists: Boolean) { + override def toString: String = if (returnCodeFileExists) "Done" else "WaitingForReturnCodeFile" } -/** - * A generic job that runs and tracks some string identifier for the job. - */ -case class SharedFileSystemJob(jobId: String) extends JobId - -case class SharedFileSystemAsyncJobExecutionActorParams -( - serviceRegistryActor: ActorRef, - jobDescriptor: BackendJobDescriptor, - configurationDescriptor: BackendConfigurationDescriptor, - completionPromise: Promise[BackendJobExecutionResponse], - backendInitializationDataOption: Option[BackendInitializationData] -) +object SharedFileSystemAsyncJobExecutionActor { + val JobIdKey = "sfs_job_id" +} /** * Runs a job on a shared backend, with the ability to (abstractly) submit asynchronously, then poll, kill, etc. @@ -66,29 +46,16 @@ case class SharedFileSystemAsyncJobExecutionActorParams * messages. */ trait SharedFileSystemAsyncJobExecutionActor - extends Actor with ActorLogging with AsyncBackendJobExecutionActor with SharedFileSystemJobCachingActorHelper - with JobLogging { - - case class SharedFileSystemPendingExecutionHandle(jobDescriptor: BackendJobDescriptor, - run: SharedFileSystemJob) extends ExecutionHandle { - override val isDone = false - override val result = NonRetryableExecution(new IllegalStateException( - "SharedFileSystemPendingExecutionHandle cannot yield a result")) - } + extends BackendJobLifecycleActor with StandardAsyncExecutionActor with SharedFileSystemJobCachingActorHelper { - context.become(sharedReceive(None) orElse super.receive) + override type StandardAsyncRunInfo = Any - val SIGTERM = 143 - val SIGINT = 130 + override type StandardAsyncRunStatus = SharedFileSystemRunStatus override lazy val pollBackOff = SimpleExponentialBackoff(1.second, 5.minutes, 1.1) override lazy val executeOrRecoverBackOff = SimpleExponentialBackoff(3.seconds, 30.seconds, 1.1) - override protected implicit def ec = context.dispatcher - - val params: SharedFileSystemAsyncJobExecutionActorParams - /** * Returns the command for running the job. The returned command may or may not run the job asynchronously in the * background. If the command does not run the script asynchronously in the background or on some job scheduler, the @@ -107,7 +74,7 @@ trait SharedFileSystemAsyncJobExecutionActor * @param stderr The stderr of the submit. * @return The job id wrapped in a SharedFileSystemJob. */ - def getJob(exitValue: Int, stdout: Path, stderr: Path): SharedFileSystemJob + def getJob(exitValue: Int, stdout: Path, stderr: Path): StandardAsyncJob /** * Returns the command for checking if a job is alive, returing non-zero if the job cannot be found or has errored. @@ -115,7 +82,7 @@ trait SharedFileSystemAsyncJobExecutionActor * @param job The job to check. * @return The command for checking if a job is alive. */ - def checkAliveArgs(job: SharedFileSystemJob): SharedFileSystemCommand + def checkAliveArgs(job: StandardAsyncJob): SharedFileSystemCommand /** * Returns the command for killing a job. @@ -123,91 +90,37 @@ trait SharedFileSystemAsyncJobExecutionActor * @param job The job to kill. * @return The command for killing a job. */ - def killArgs(job: SharedFileSystemJob): SharedFileSystemCommand - - override lazy val jobDescriptor = params.jobDescriptor - - override lazy val completionPromise = params.completionPromise + def killArgs(job: StandardAsyncJob): SharedFileSystemCommand - override lazy val serviceRegistryActor = params.serviceRegistryActor - - override lazy val configurationDescriptor = params.configurationDescriptor - - override lazy val backendInitializationDataOption = params.backendInitializationDataOption - - def toDockerPath(path: WdlValue): WdlValue = { - path match { - case file: WdlFile => WdlFile(jobPaths.toDockerPath(Paths.get(path.valueString)).toString) - case array: WdlArray => WdlArray(array.wdlType, array.value map toDockerPath) - case map: WdlMap => WdlMap(map.wdlType, map.value mapValues toDockerPath) - case wdlValue => wdlValue - } - } + lazy val jobPathsWithDocker: JobPathsWithDocker = jobPaths.asInstanceOf[JobPathsWithDocker] def jobName: String = s"cromwell_${jobDescriptor.workflowDescriptor.id.shortString}_${jobDescriptor.call.unqualifiedName}" - override def retryable = false - - lazy val workflowDescriptor = jobDescriptor.workflowDescriptor - lazy val call = jobDescriptor.key.call - lazy val fileSystems = WorkflowPathsBackendInitializationData.fileSystems(backendInitializationDataOption) - lazy val callEngineFunction = SharedFileSystemExpressionFunctions(jobPaths, fileSystems) - override lazy val workflowId = jobDescriptor.workflowDescriptor.id - override lazy val jobTag = jobDescriptor.key.tag - - lazy val isDockerRun = RuntimeAttributesValidation.extractOption( + lazy val isDockerRun: Boolean = RuntimeAttributesValidation.extractOption( DockerValidation.instance, validatedRuntimeAttributes).isDefined - def sharedReceive(jobOption: Option[SharedFileSystemJob]): Receive = LoggingReceive { - case AbortJobCommand => - jobOption foreach tryKill - case KvPutSuccess(_) => // expected after the KvPut in tellKvJobId - } - - def instantiatedScript: String = { - val pathTransformFunction: WdlValue => WdlValue = if (isDockerRun) toDockerPath else identity - val tryCommand = sharedFileSystem.localizeInputs(jobPaths.callInputsRoot, - isDockerRun, fileSystems, jobDescriptor.inputs) flatMap { localizedInputs => - call.task.instantiateCommand(localizedInputs, callEngineFunction, pathTransformFunction) - } - tryCommand.get - } - - override def executeOrRecover(mode: ExecutionMode)(implicit ec: ExecutionContext) = { - // Run now in receive, not in yet another Runnable. - Future.fromTry(Try { - mode match { - case Execute => - tellMetadata(metadataKeyValues) - executeScript() - case Recover(recoveryId) => - recoveryId match { - case job: SharedFileSystemJob => recoverScript(job) - case other => throw new RuntimeException(s"Unable to recover $other") - } - } - } recoverWith { - case exception: Exception => - jobLogger.error(s"Error attempting to $mode the script", exception) - Failure(exception) - }) + /** + * Localizes the file, run outside of docker. + */ + override def preProcessWdlFile(wdlFile: WdlFile): WdlFile = { + sharedFileSystem.localizeWdlFile(jobPathsWithDocker.callInputsRoot, isDockerRun)(wdlFile) } /** - * Fire and forget info to the metadata service + * Returns the paths to the file, inside of docker. */ - def tellMetadata(metadataKeyValues: Map[String, Any]): Unit = { - import cromwell.services.metadata.MetadataService.implicits.MetadataAutoPutter - serviceRegistryActor.putMetadata(jobDescriptor.workflowDescriptor.id, Option(jobDescriptor.key), metadataKeyValues) + override def mapCommandLineWdlFile(wdlFile: WdlFile): WdlFile = { + val cleanPath = DefaultPathBuilder.build(wdlFile.valueString).get + WdlFile(if (isDockerRun) jobPathsWithDocker.toDockerPath(cleanPath).pathAsString else cleanPath.pathAsString) } - def executeScript(): ExecutionHandle = { - val script = instantiatedScript - jobLogger.info(s"`$script`") - File(jobPaths.callExecutionRoot).createDirectories() - val cwd = if (isDockerRun) jobPaths.callExecutionDockerRoot else jobPaths.callExecutionRoot - writeScript(script, cwd) - jobLogger.info(s"command: $processArgs") + override lazy val commandDirectory: Path = { + if (isDockerRun) jobPathsWithDocker.callExecutionDockerRoot else jobPaths.callExecutionRoot + } + + override def execute(): ExecutionHandle = { + jobPaths.callExecutionRoot.createPermissionedDirectories() + writeScriptContents() val runner = makeProcessRunner() val exitValue = runner.run() if (exitValue != 0) { @@ -215,14 +128,15 @@ trait SharedFileSystemAsyncJobExecutionActor s"Check the stderr file for possible errors: ${runner.stderrPath}")) } else { val runningJob = getJob(exitValue, runner.stdoutPath, runner.stderrPath) - context.become(sharedReceive(Option(runningJob)) orElse super.receive) - tellKvJobId(runningJob) - jobLogger.info(s"job id: ${runningJob.jobId}") - tellMetadata(Map("jobId" -> runningJob.jobId)) - SharedFileSystemPendingExecutionHandle(jobDescriptor, runningJob) + PendingExecutionHandle(jobDescriptor, runningJob, None, None) } } + def writeScriptContents(): Unit = { + jobPaths.script.write(commandScriptContents) + () + } + /** * Creates a script to submit the script for asynchronous processing. The default implementation assumes the * processArgs already runs the script asynchronously. If not, mix in the `BackgroundAsyncJobExecutionActor` that @@ -231,50 +145,18 @@ trait SharedFileSystemAsyncJobExecutionActor * @return A process runner that will relatively quickly submit the script asynchronously. */ def makeProcessRunner(): ProcessRunner = { - new ProcessRunner(processArgs.argv, jobPaths.stdout, jobPaths.stderr) - } - - /** - * Writes the script file containing the user's command from the WDL as well - * as some extra shell code for monitoring jobs - */ - private def writeScript(instantiatedCommand: String, cwd: Path) = { - val rcPath = if (isDockerRun) jobPaths.toDockerPath(jobPaths.returnCode) else jobPaths.returnCode - val rcTmpPath = s"$rcPath.tmp" - - File(jobPaths.script).write( - s"""#!/bin/sh - |( - | cd $cwd - | $instantiatedCommand - |) - |echo $$? > $rcTmpPath - |mv $rcTmpPath $rcPath""".stripMargin) + val stdout = jobPaths.stdout.plusExt("submit") + val stderr = jobPaths.stderr.plusExt("submit") + new ProcessRunner(processArgs.argv, stdout, stderr) } - /** - * Send the job id of the running job to the key value store. - * - * @param runningJob The running job. - */ - private def tellKvJobId(runningJob: SharedFileSystemJob): Unit = { - val kvJobKey = - KvJobKey(jobDescriptor.key.call.fullyQualifiedName, jobDescriptor.key.index, jobDescriptor.key.attempt) - val scopedKey = ScopedKey(jobDescriptor.workflowDescriptor.id, kvJobKey, SharedFileSystemJob.JobIdKey) - val kvValue = Option(runningJob.jobId) - val kvPair = KvPair(scopedKey, kvValue) - val kvPut = KvPut(kvPair) - serviceRegistryActor ! kvPut - } - - def recoverScript(job: SharedFileSystemJob): ExecutionHandle = { - context.become(sharedReceive(Option(job)) orElse super.receive) + override def recover(job: StandardAsyncJob): ExecutionHandle = { // To avoid race conditions, check for the rc file after checking if the job is alive. - if (isAlive(job) || File(jobPaths.returnCode).exists) { + if (isAlive(job) || jobPaths.returnCode.exists) { // If we're done, we'll get to the rc during the next poll. // Or if we're still running, return pending also. jobLogger.info(s"Recovering using job id: ${job.jobId}") - SharedFileSystemPendingExecutionHandle(jobDescriptor, job) + PendingExecutionHandle(jobDescriptor, job, None, None) } else { // Could start executeScript(), but for now fail because we shouldn't be in this state. FailedNonRetryableExecutionHandle(new RuntimeException( @@ -282,16 +164,18 @@ trait SharedFileSystemAsyncJobExecutionActor } } - def isAlive(job: SharedFileSystemJob): Boolean = { + def isAlive(job: StandardAsyncJob): Boolean = { val argv = checkAliveArgs(job).argv - val stdout = pathPlusSuffix(jobPaths.stdout, "check") - val stderr = pathPlusSuffix(jobPaths.stderr, "check") + val stdout = jobPaths.stdout.plusExt("check") + val stderr = jobPaths.stderr.plusExt("check") val checkAlive = new ProcessRunner(argv, stdout, stderr) checkAlive.run() == 0 } - def tryKill(job: SharedFileSystemJob): Unit = { - val returnCodeTmp = File(pathPlusSuffix(jobPaths.returnCode, "kill")) + override def requestsAbortAndDiesImmediately: Boolean = false + + override def tryAbort(job: StandardAsyncJob): Unit = { + val returnCodeTmp = jobPaths.returnCode.plusExt("kill") returnCodeTmp.write(s"$SIGTERM\n") try { returnCodeTmp.moveTo(jobPaths.returnCode) @@ -300,75 +184,32 @@ trait SharedFileSystemAsyncJobExecutionActor // If the process has already completed, there will be an existing rc file. returnCodeTmp.delete(true) } + val stderrTmp = jobPaths.stderr.plusExt("kill") + stderrTmp.touch() + try { + stderrTmp.moveTo(jobPaths.stderr) + } catch { + case _: FileAlreadyExistsException => + // If the process has already started, there will be an existing stderr file. + stderrTmp.delete(true) + } val argv = killArgs(job).argv - val stdout = pathPlusSuffix(jobPaths.stdout, "kill") - val stderr = pathPlusSuffix(jobPaths.stderr, "kill") + val stdout = jobPaths.stdout.plusExt("kill") + val stderr = jobPaths.stderr.plusExt("kill") val killer = new ProcessRunner(argv, stdout, stderr) killer.run() + () } - def processReturnCode()(implicit ec: ExecutionContext): Future[ExecutionHandle] = { - val returnCodeTry = Try(File(jobPaths.returnCode).contentAsString.stripLineEnd.toInt) - - lazy val badReturnCodeMessage = s"Call ${call.fullyQualifiedName}: return code was ${returnCodeTry.getOrElse("(none)")}" - - lazy val badReturnCodeResponse = Future.successful( - FailedNonRetryableExecutionHandle(new Exception(badReturnCodeMessage), returnCodeTry.toOption)) - - lazy val abortResponse = Future.successful(AbortedExecutionHandle) - - def processSuccess(returnCode: Int) = { - val successfulFuture = for { - outputs <- Future.fromTry(processOutputs()) - } yield SuccessfulExecutionHandle(outputs, returnCode, jobPaths.detritusPaths.mapValues(_.toString), Seq.empty) - - successfulFuture recover { - case failed: Throwable => - FailedNonRetryableExecutionHandle(failed, Option(returnCode)) - } - } - - def stopFor(returnCode: Int) = { - val continueOnReturnCode = RuntimeAttributesValidation.extract( - ContinueOnReturnCodeValidation.instance, validatedRuntimeAttributes) - !continueOnReturnCode.continueFor(returnCode) - } - - def failForStderr = { - val failOnStderr = RuntimeAttributesValidation.extract( - FailOnStderrValidation.instance, validatedRuntimeAttributes) - failOnStderr && File(jobPaths.stderr).size > 0 - } - - returnCodeTry match { - case Success(SIGTERM) => abortResponse // Special case to check for SIGTERM exit code - implying abort - case Success(SIGINT) => abortResponse // Special case to check for SIGINT exit code - implying abort - case Success(returnCode) if stopFor(returnCode) => badReturnCodeResponse - case Success(returnCode) if failForStderr => badReturnCodeResponse - case Success(returnCode) => processSuccess(returnCode) - case Failure(e) => badReturnCodeResponse - } + override def pollStatus(handle: StandardAsyncPendingExecutionHandle): SharedFileSystemRunStatus = { + SharedFileSystemRunStatus(jobPaths.returnCode.exists) } - override def poll(previous: ExecutionHandle)(implicit ec: ExecutionContext) = { - previous match { - case handle: SharedFileSystemPendingExecutionHandle => - val runId = handle.run - jobLogger.debug(s"Polling Job $runId") - File(jobPaths.returnCode).exists match { - case true => - processReturnCode() - case false => - jobLogger.debug(s"'${jobPaths.returnCode}' file does not exist yet") - Future.successful(previous) - } - case failed: FailedNonRetryableExecutionHandle => Future.successful(failed) - case successful: SuccessfulExecutionHandle => Future.successful(successful) - case bad => Future.failed(new IllegalArgumentException(s"Unexpected execution handle: $bad")) - } + override def isTerminal(runStatus: StandardAsyncRunStatus): Boolean = { + runStatus.returnCodeFileExists } - private def processOutputs(): Try[JobOutputs] = { - OutputEvaluator.evaluateOutputs(jobDescriptor, callEngineFunction, sharedFileSystem.outputMapper(jobPaths)) + override def mapOutputWdlFile(wdlFile: WdlFile): WdlFile = { + sharedFileSystem.mapJobWdlFile(jobPaths)(wdlFile) } } diff --git a/supportedBackends/sfs/src/main/scala/cromwell/backend/sfs/SharedFileSystemBackendLifecycleActorFactory.scala b/supportedBackends/sfs/src/main/scala/cromwell/backend/sfs/SharedFileSystemBackendLifecycleActorFactory.scala index d812b1c8e..7e991a6ec 100644 --- a/supportedBackends/sfs/src/main/scala/cromwell/backend/sfs/SharedFileSystemBackendLifecycleActorFactory.scala +++ b/supportedBackends/sfs/src/main/scala/cromwell/backend/sfs/SharedFileSystemBackendLifecycleActorFactory.scala @@ -1,82 +1,18 @@ package cromwell.backend.sfs -import akka.actor.{ActorRef, Props} -import cromwell.backend.BackendJobExecutionActor.BackendJobExecutionResponse -import cromwell.backend.{BackendConfigurationDescriptor, BackendInitializationData, BackendJobDescriptor, BackendJobDescriptorKey, BackendLifecycleActorFactory, BackendWorkflowDescriptor} -import cromwell.core.Dispatcher -import cromwell.core.Dispatcher._ -import wdl4s.Call -import wdl4s.expression.WdlStandardLibraryFunctions - -import scala.concurrent.Promise +import cromwell.backend.standard._ +import cromwell.backend.standard.callcaching.StandardCacheHitCopyingActor /** * A factory that can be extended for any shared file system implementation. * * See the SharedFileSystemAsyncJobExecutionActor for more info. */ -trait SharedFileSystemBackendLifecycleActorFactory extends BackendLifecycleActorFactory { - - /** - * Config values for the backend, and a pointer to the global config. - * - * This is the single parameter passed into each factory during creation. - * - * @return The backend configuration. - */ - def configurationDescriptor: BackendConfigurationDescriptor - - /** - * Returns the initialization class, or by default uses the `SharedFileSystemInitializationActor`. - * - * @return the initialization class. - */ - def initializationActorClass: Class[_ <: SharedFileSystemInitializationActor] = - classOf[SharedFileSystemInitializationActor] - - /** - * Returns the main engine for async execution. - * - * @return the main engine for async execution. - */ - def asyncJobExecutionActorClass: Class[_ <: SharedFileSystemAsyncJobExecutionActor] - - override def workflowInitializationActorProps(workflowDescriptor: BackendWorkflowDescriptor, calls: Seq[Call], - serviceRegistryActor: ActorRef) = { - val params = SharedFileSystemInitializationActorParams(serviceRegistryActor, workflowDescriptor, - configurationDescriptor, calls) - Option(Props(initializationActorClass, params).withDispatcher(Dispatcher.BackendDispatcher)) - } +trait SharedFileSystemBackendLifecycleActorFactory extends StandardLifecycleActorFactory { - override def jobExecutionActorProps(jobDescriptor: BackendJobDescriptor, - initializationDataOption: Option[BackendInitializationData], - serviceRegistryActor: ActorRef) = { - def propsCreator(completionPromise: Promise[BackendJobExecutionResponse]): Props = { - val params = SharedFileSystemAsyncJobExecutionActorParams(serviceRegistryActor, jobDescriptor, - configurationDescriptor, completionPromise, initializationDataOption) - Props(asyncJobExecutionActorClass, params).withDispatcher(Dispatcher.BackendDispatcher) - } - - Props(new SharedFileSystemJobExecutionActor( - jobDescriptor, configurationDescriptor, serviceRegistryActor, propsCreator) - ).withDispatcher(Dispatcher.BackendDispatcher) - } - - override def cacheHitCopyingActorProps = Option(cacheHitCopyingActorInner _) - - def cacheHitCopyingActorInner(jobDescriptor: BackendJobDescriptor, - initializationDataOption: Option[BackendInitializationData], - serviceRegistryActor: ActorRef): Props = { - Props( - new SharedFileSystemCacheHitCopyingActor( - jobDescriptor, configurationDescriptor, initializationDataOption, serviceRegistryActor) - ).withDispatcher(BackendDispatcher) - } + override def jobIdKey: String = SharedFileSystemAsyncJobExecutionActor.JobIdKey - override def expressionLanguageFunctions(workflowDescriptor: BackendWorkflowDescriptor, - jobKey: BackendJobDescriptorKey, - initializationData: Option[BackendInitializationData]): - WdlStandardLibraryFunctions = { - SharedFileSystemExpressionFunctions(workflowDescriptor, configurationDescriptor, jobKey, initializationData) + override lazy val cacheHitCopyingActorClassOption: Option[Class[_ <: StandardCacheHitCopyingActor]] = { + Option(classOf[SharedFileSystemCacheHitCopyingActor]) } } diff --git a/supportedBackends/sfs/src/main/scala/cromwell/backend/sfs/SharedFileSystemCacheHitCopyingActor.scala b/supportedBackends/sfs/src/main/scala/cromwell/backend/sfs/SharedFileSystemCacheHitCopyingActor.scala index be4ab0ef9..b29816600 100644 --- a/supportedBackends/sfs/src/main/scala/cromwell/backend/sfs/SharedFileSystemCacheHitCopyingActor.scala +++ b/supportedBackends/sfs/src/main/scala/cromwell/backend/sfs/SharedFileSystemCacheHitCopyingActor.scala @@ -1,23 +1,32 @@ package cromwell.backend.sfs -import java.nio.file.{Path, Paths} +import cromwell.backend.standard.callcaching.StandardCacheHitCopyingActor.PathPair +import cromwell.backend.standard.callcaching.{StandardCacheHitCopyingActor, StandardCacheHitCopyingActorParams} +import cromwell.filesystems.gcs.batch.GcsBatchCommandBuilder +import lenthall.util.TryUtil +import cats.instances.try_._ +import cats.syntax.functor._ -import akka.actor.ActorRef -import cromwell.backend.callcaching.CacheHitDuplicating -import cromwell.backend.{BackendCacheHitCopyingActor, BackendConfigurationDescriptor, BackendInitializationData, BackendJobDescriptor} +import scala.util.{Failure, Try} -class SharedFileSystemCacheHitCopyingActor(override val jobDescriptor: BackendJobDescriptor, - override val configurationDescriptor: BackendConfigurationDescriptor, - override val backendInitializationDataOption: - Option[BackendInitializationData], - override val serviceRegistryActor: ActorRef) - extends SharedFileSystemJobCachingActorHelper with BackendCacheHitCopyingActor with CacheHitDuplicating { - - override lazy val destinationCallRootPath = jobPaths.callRoot - - override lazy val destinationJobDetritusPaths = jobPaths.detritusPaths - - override protected def getPath(file: String) = Paths.get(file) - - override protected def duplicate(source: Path, destination: Path) = sharedFileSystem.cacheCopy(source, destination) +class SharedFileSystemCacheHitCopyingActor(standardParams: StandardCacheHitCopyingActorParams) + extends StandardCacheHitCopyingActor(standardParams) with SharedFileSystemJobCachingActorHelper with GcsBatchCommandBuilder { + override protected def duplicate(copyPairs: Set[PathPair]): Option[Try[Unit]] = Option { + val copies = copyPairs map { + case (source, destination) => + sharedFileSystem.cacheCopy(source, destination) + } + + TryUtil.sequence(copies.toList).void recoverWith { + case failure => + // If one or more of the copies failed, we want to delete all the files that were successfully copied + // before that. Especially if they've been symlinked, leaving them could lead to rewriting the original + // files when the job gets re-run + // TODO: this could be done more generally in the StandardCacheHitCopyingActor + copyPairs foreach { + case (_, dst) => dst.delete(swallowIOExceptions = true) + } + Failure(failure) + } + } } diff --git a/supportedBackends/sfs/src/main/scala/cromwell/backend/sfs/SharedFileSystemExpressionFunctions.scala b/supportedBackends/sfs/src/main/scala/cromwell/backend/sfs/SharedFileSystemExpressionFunctions.scala index 1f73cac38..754c9549b 100644 --- a/supportedBackends/sfs/src/main/scala/cromwell/backend/sfs/SharedFileSystemExpressionFunctions.scala +++ b/supportedBackends/sfs/src/main/scala/cromwell/backend/sfs/SharedFileSystemExpressionFunctions.scala @@ -1,74 +1,27 @@ package cromwell.backend.sfs -import java.nio.file.{FileSystem, Path} - -import cromwell.backend.io.{JobPaths, WorkflowPathsBackendInitializationData} -import cromwell.backend.wdl._ -import cromwell.backend.{BackendConfigurationDescriptor, BackendInitializationData, BackendJobDescriptorKey, BackendWorkflowDescriptor} +import cromwell.backend.io._ +import cromwell.backend.standard.{DefaultStandardExpressionFunctionsParams, StandardExpressionFunctions, StandardExpressionFunctionsParams} import cromwell.core.CallContext -import wdl4s.expression.WdlStandardLibraryFunctions -import wdl4s.values.{WdlFile, WdlValue} - -import scala.language.postfixOps -import scala.util.{Success, Try} +import cromwell.core.path.{DefaultPath, Path, PathBuilder} object SharedFileSystemExpressionFunctions { - private val LocalFileSystemScheme = "file" - - def isLocalPath(path: Path) = path.toUri.getScheme == SharedFileSystemExpressionFunctions.LocalFileSystemScheme - - def apply(workflowDescriptor: BackendWorkflowDescriptor, - jobKey: BackendJobDescriptorKey, - configurationDescriptor: BackendConfigurationDescriptor, - fileSystems: List[FileSystem]): SharedFileSystemExpressionFunctions = { - val jobPaths = new JobPaths(workflowDescriptor, configurationDescriptor.backendConfig, jobKey) - val callContext = CallContext( - jobPaths.callExecutionRoot, - jobPaths.stdout.toString, - jobPaths.stderr.toString - ) - new SharedFileSystemExpressionFunctions(fileSystems, callContext) - } - - def apply(jobPaths: JobPaths, fileSystems: List[FileSystem]): SharedFileSystemExpressionFunctions = { - val callContext = CallContext( - jobPaths.callExecutionRoot, - jobPaths.stdout.toString, - jobPaths.stderr.toString - ) - new SharedFileSystemExpressionFunctions(fileSystems, callContext) - } - - def apply(workflowDescriptor: BackendWorkflowDescriptor, - configurationDescriptor: BackendConfigurationDescriptor, - jobKey: BackendJobDescriptorKey, - initializationData: Option[BackendInitializationData]) = { - val jobPaths = new JobPaths(workflowDescriptor, configurationDescriptor.backendConfig, jobKey) - val callContext = CallContext( - jobPaths.callExecutionRoot, - jobPaths.stdout.toString, - jobPaths.stderr.toString - ) - - new SharedFileSystemExpressionFunctions(WorkflowPathsBackendInitializationData.fileSystems(initializationData), callContext) + def apply(jobPaths: JobPaths, pathBuilders: List[PathBuilder]): SharedFileSystemExpressionFunctions = { + new SharedFileSystemExpressionFunctions(pathBuilders, jobPaths.callContext) } } -class SharedFileSystemExpressionFunctions(override val fileSystems: List[FileSystem], - context: CallContext - ) extends WdlStandardLibraryFunctions with PureFunctions with ReadLikeFunctions with WriteFunctions { - import SharedFileSystemExpressionFunctions._ - import better.files._ +class SharedFileSystemExpressionFunctions(standardParams: StandardExpressionFunctionsParams) + extends StandardExpressionFunctions(standardParams) { - override def globPath(glob: String) = context.root.toString - override def glob(path: String, pattern: String): Seq[String] = { - File(toPath(path)).glob(s"**/$pattern") map { _.pathAsString } toSeq + def this(pathBuilders: List[PathBuilder], callContext: CallContext) = { + this(DefaultStandardExpressionFunctionsParams(pathBuilders, callContext)) } - override val writeDirectory = context.root - - override def stdout(params: Seq[Try[WdlValue]]) = Success(WdlFile(context.stdout)) - override def stderr(params: Seq[Try[WdlValue]]) = Success(WdlFile(context.stderr)) - - override def postMapping(path: Path) = if (!path.isAbsolute && isLocalPath(path)) context.root.resolve(path) else path + override def postMapping(path: Path) = { + path match { + case _: DefaultPath if !path.isAbsolute => callContext.root.resolve(path) + case _ => path + } + } } diff --git a/supportedBackends/sfs/src/main/scala/cromwell/backend/sfs/SharedFileSystemInitializationActor.scala b/supportedBackends/sfs/src/main/scala/cromwell/backend/sfs/SharedFileSystemInitializationActor.scala index 36b010547..dbbca727c 100644 --- a/supportedBackends/sfs/src/main/scala/cromwell/backend/sfs/SharedFileSystemInitializationActor.scala +++ b/supportedBackends/sfs/src/main/scala/cromwell/backend/sfs/SharedFileSystemInitializationActor.scala @@ -1,92 +1,57 @@ package cromwell.backend.sfs -import akka.actor.ActorRef -import better.files._ -import cromwell.backend.io.{WorkflowPaths, WorkflowPathsBackendInitializationData} -import cromwell.backend.validation.RuntimeAttributesDefault -import cromwell.backend.wfs.{DefaultWorkflowFileSystemProvider, WorkflowFileSystemProvider} -import cromwell.backend.{BackendConfigurationDescriptor, BackendInitializationData, BackendWorkflowDescriptor, BackendWorkflowInitializationActor} -import cromwell.core.{Dispatcher, WorkflowOptions} -import wdl4s.values.WdlValue -import wdl4s.{Call, WdlExpression} +import cats.data.Validated.{Invalid, Valid} +import cats.instances.future._ +import cats.instances.list._ +import cats.syntax.traverse._ +import cromwell.backend.BackendInitializationData +import cromwell.backend.io.WorkflowPaths +import cromwell.backend.standard.{StandardExpressionFunctions, StandardInitializationActor, StandardInitializationActorParams} +import cromwell.backend.wfs.WorkflowPathBuilder +import cromwell.core.path.{DefaultPathBuilder, PathBuilder} +import cromwell.filesystems.gcs.{GcsPathBuilderFactory, GoogleConfiguration} +import lenthall.exception.MessageAggregation +import net.ceedubs.ficus.Ficus._ import scala.concurrent.Future -import scala.util.Try -case class SharedFileSystemInitializationActorParams -( - serviceRegistryActor: ActorRef, - workflowDescriptor: BackendWorkflowDescriptor, - configurationDescriptor: BackendConfigurationDescriptor, - calls: Seq[Call] -) +class SharedFileSystemInitializationActor(standardParams: StandardInitializationActorParams) + extends StandardInitializationActor(standardParams) { -class SharedFileSystemBackendInitializationData -( - val workflowPaths: WorkflowPaths, - val runtimeAttributesBuilder: SharedFileSystemValidatedRuntimeAttributesBuilder) - extends WorkflowPathsBackendInitializationData - -/** - * Initializes a shared file system actor factory and creates initialization data to pass to the execution actors. - * - * @param params Initialization parameters. - */ -class SharedFileSystemInitializationActor(params: SharedFileSystemInitializationActorParams) - extends BackendWorkflowInitializationActor { - - override lazy val workflowDescriptor: BackendWorkflowDescriptor = params.workflowDescriptor - override lazy val configurationDescriptor: BackendConfigurationDescriptor = params.configurationDescriptor - override lazy val calls: Seq[Call] = params.calls - override lazy val serviceRegistryActor: ActorRef = params.serviceRegistryActor - - def runtimeAttributesBuilder: SharedFileSystemValidatedRuntimeAttributesBuilder = - SharedFileSystemValidatedRuntimeAttributesBuilder.default - - override protected def runtimeAttributeValidators: Map[String, (Option[WdlValue]) => Boolean] = { - runtimeAttributesBuilder.validations.map(validation => - validation.key -> validation.validateOptionalExpression _ - ).toMap - } - - val providers = Seq(GcsWorkflowFileSystemProvider, DefaultWorkflowFileSystemProvider) - val ioDispatcher = context.system.dispatchers.lookup(Dispatcher.IoDispatcher) - - val workflowPaths = WorkflowFileSystemProvider.workflowPaths(configurationDescriptor, workflowDescriptor, - providers, ioDispatcher) - - override def beforeAll(): Future[Option[BackendInitializationData]] = { - Future.fromTry(Try { - publishWorkflowRoot(workflowPaths.workflowRoot.toString) - File(workflowPaths.workflowRoot).createDirectories() - Option(initializationData) - }) - } - - def initializationData: SharedFileSystemBackendInitializationData = { - new SharedFileSystemBackendInitializationData(workflowPaths, runtimeAttributesBuilder) - } + private implicit val system = context.system /** - * Log a warning if there are non-supported runtime attributes defined for the call. + * If the backend sets a gcs authentication mode, try to create a PathBuilderFactory with it. */ - override def validate(): Future[Unit] = { - Future.fromTry(Try { - calls foreach { call => - val runtimeAttributeKeys = call.task.runtimeAttributes.attrs.keys.toList - val notSupportedAttributes = runtimeAttributesBuilder.unsupportedKeys(runtimeAttributeKeys).toList - - if (notSupportedAttributes.nonEmpty) { - val notSupportedAttrString = notSupportedAttributes mkString ", " - workflowLogger.warn( - s"Key/s [$notSupportedAttrString] is/are not supported by backend. " + - s"Unsupported attributes will not be part of jobs executions.") + lazy val gcsPathBuilderFactory: Option[GcsPathBuilderFactory] = { + configurationDescriptor.backendConfig.as[Option[String]]("filesystems.gcs.auth") map { configAuth => + val googleConfiguration = GoogleConfiguration(configurationDescriptor.globalConfig) + googleConfiguration.auth(configAuth) match { + case Valid(auth) => GcsPathBuilderFactory(auth, googleConfiguration.applicationName) + case Invalid(error) => throw new MessageAggregation { + override def exceptionContext: String = "Failed to parse gcs auth configuration" + + override def errorMessages: Traversable[String] = error.toList } } - }) + } } - override protected def coerceDefaultRuntimeAttributes(options: WorkflowOptions): Try[Map[String, WdlValue]] = { - RuntimeAttributesDefault.workflowOptionsDefault(options, runtimeAttributesBuilder.validations.map(v => v.key -> v.coercion).toMap) + override lazy val pathBuilders: Future[List[PathBuilder]] = + gcsPathBuilderFactory.toList.traverse(_.withOptions(workflowDescriptor.workflowOptions)).map(_ ++ Option(DefaultPathBuilder)) + + override lazy val workflowPaths: Future[WorkflowPaths] = pathBuilders map { + WorkflowPathBuilder.workflowPaths(configurationDescriptor, workflowDescriptor, _) + } + + override lazy val expressionFunctions: Class[_ <: StandardExpressionFunctions] = + classOf[SharedFileSystemExpressionFunctions] + + override def beforeAll(): Future[Option[BackendInitializationData]] = { + initializationData map { data => + publishWorkflowRoot(data.workflowPaths.workflowRoot.pathAsString) + data.workflowPaths.workflowRoot.createPermissionedDirectories() + Option(data) + } } } diff --git a/supportedBackends/sfs/src/main/scala/cromwell/backend/sfs/SharedFileSystemJobCachingActorHelper.scala b/supportedBackends/sfs/src/main/scala/cromwell/backend/sfs/SharedFileSystemJobCachingActorHelper.scala index a423d6cb7..e43a6bf41 100644 --- a/supportedBackends/sfs/src/main/scala/cromwell/backend/sfs/SharedFileSystemJobCachingActorHelper.scala +++ b/supportedBackends/sfs/src/main/scala/cromwell/backend/sfs/SharedFileSystemJobCachingActorHelper.scala @@ -1,43 +1,20 @@ package cromwell.backend.sfs -import akka.actor.{Actor, ActorRef} -import cromwell.backend.BackendInitializationData -import cromwell.backend.callcaching.JobCachingActorHelper -import cromwell.backend.io.JobPaths -import cromwell.backend.validation.{RuntimeAttributesValidation, ValidatedRuntimeAttributes} +import akka.actor.Actor +import com.typesafe.config.{Config, ConfigFactory} +import cromwell.backend.standard.StandardCachingActorHelper import cromwell.core.logging.JobLogging -import lenthall.config.ScalaConfig._ +import cromwell.core.path.PathBuilder +import net.ceedubs.ficus.Ficus._ -trait SharedFileSystemJobCachingActorHelper extends JobCachingActorHelper { +trait SharedFileSystemJobCachingActorHelper extends StandardCachingActorHelper { this: Actor with JobLogging => - def backendInitializationDataOption: Option[BackendInitializationData] - - def serviceRegistryActor: ActorRef - - lazy val jobPaths = - new JobPaths(jobDescriptor.workflowDescriptor, configurationDescriptor.backendConfig, jobDescriptor.key) - - lazy val initializationData = BackendInitializationData. - as[SharedFileSystemBackendInitializationData](backendInitializationDataOption) - - lazy val validatedRuntimeAttributes: ValidatedRuntimeAttributes = { - val builder = initializationData.runtimeAttributesBuilder - builder.build(jobDescriptor.runtimeAttributes, jobLogger) - } - - lazy val metadataKeyValues: Map[String, Any] = { - val runtimeAttributesMetadata = RuntimeAttributesValidation.extract(validatedRuntimeAttributes) map { - case (key, value) => (s"runtimeAttributes:$key", value) - } - val fileMetadata = jobPaths.metadataPaths - val otherMetadata = Map("cache:allowResultReuse" -> true) - runtimeAttributesMetadata ++ fileMetadata ++ otherMetadata - } - lazy val sharedFileSystem = new SharedFileSystem { - override lazy val sharedFileSystemConfig = { - configurationDescriptor.backendConfig.getConfigOr("filesystems.local") + override lazy val pathBuilders: List[PathBuilder] = standardInitializationData.workflowPaths.pathBuilders + + override lazy val sharedFileSystemConfig: Config = { + configurationDescriptor.backendConfig.as[Option[Config]]("filesystems.local").getOrElse(ConfigFactory.empty()) } } } diff --git a/supportedBackends/sfs/src/main/scala/cromwell/backend/sfs/SharedFileSystemJobExecutionActor.scala b/supportedBackends/sfs/src/main/scala/cromwell/backend/sfs/SharedFileSystemJobExecutionActor.scala deleted file mode 100644 index beac805dd..000000000 --- a/supportedBackends/sfs/src/main/scala/cromwell/backend/sfs/SharedFileSystemJobExecutionActor.scala +++ /dev/null @@ -1,85 +0,0 @@ -package cromwell.backend.sfs - -import akka.actor.{ActorRef, Props} -import cromwell.backend.BackendJobExecutionActor.{AbortedResponse, BackendJobExecutionResponse} -import cromwell.backend.BackendLifecycleActor.AbortJobCommand -import cromwell.backend.async.AsyncBackendJobExecutionActor.{Execute, Recover} -import cromwell.backend.{BackendConfigurationDescriptor, BackendJobDescriptor, BackendJobExecutionActor} -import cromwell.services.keyvalue.KeyValueServiceActor._ - -import scala.concurrent.{Future, Promise} - -/** - * Facade to the asynchronous execution actor. - * - * Creates the asynchronous execution actor, then relays messages to that actor. - * - * NOTE: Although some methods return futures due to the (current) contract in BJEA/ABJEA, this actor only executes - * during the receive, and does not launch new runnables/futures from inside "receive". - * - * Thus there are no vars, and the context switches during "receive", once the asynchronous actor has been created. - * - * @param jobDescriptor The job to execute. - * @param configurationDescriptor The configuration. - * @param asyncPropsCreator A function that can create the specific asynchronous backend. - */ -class SharedFileSystemJobExecutionActor(override val jobDescriptor: BackendJobDescriptor, - override val configurationDescriptor: BackendConfigurationDescriptor, - serviceRegistryActor: ActorRef, - asyncPropsCreator: Promise[BackendJobExecutionResponse] => Props) - extends BackendJobExecutionActor { - - context.become(startup orElse super.receive) - - private def startup: Receive = { - case AbortJobCommand => - context.parent ! AbortedResponse(jobDescriptor.key) - context.stop(self) - } - - private def running(executor: ActorRef): Receive = { - case AbortJobCommand => - executor ! AbortJobCommand - case abortResponse: AbortedResponse => - context.parent ! abortResponse - context.stop(self) - case KvPair(key, id@Some(jobId)) if key.key == SharedFileSystemJob.JobIdKey => - // Successful operation ID lookup during recover. - executor ! Recover(SharedFileSystemJob(jobId)) - case KvKeyLookupFailed(_) => - // Missed operation ID lookup during recover, fall back to execute. - executor ! Execute - case KvFailure(_, e) => - // Failed operation ID lookup during recover, crash and let the supervisor deal with it. - completionPromise.tryFailure(e) - throw new RuntimeException(s"Failure attempting to look up job id for key ${jobDescriptor.key}", e) - } - - /** - * This "synchronous" actor isn't finished until this promise finishes over in the asynchronous version. - * - * Still not sure why the AsyncBackendJobExecutionActor doesn't wait for an Akka message instead of using Scala promises. - */ - private lazy val completionPromise = Promise[BackendJobExecutionResponse]() - - override def execute: Future[BackendJobExecutionResponse] = { - val executorRef = context.actorOf(asyncPropsCreator(completionPromise), "SharedFileSystemAsyncJobExecutionActor") - context.become(running(executorRef) orElse super.receive) - executorRef ! Execute - completionPromise.future - } - - override def recover: Future[BackendJobExecutionResponse] = { - val executorRef = context.actorOf(asyncPropsCreator(completionPromise), "SharedFileSystemAsyncJobExecutionActor") - context.become(running(executorRef) orElse super.receive) - val kvJobKey = - KvJobKey(jobDescriptor.key.call.fullyQualifiedName, jobDescriptor.key.index, jobDescriptor.key.attempt) - val kvGet = KvGet(ScopedKey(jobDescriptor.workflowDescriptor.id, kvJobKey, SharedFileSystemJob.JobIdKey)) - serviceRegistryActor ! kvGet - completionPromise.future - } - - override def abort() = { - throw new NotImplementedError("Abort is implemented via a custom receive of the message AbortJobCommand.") - } -} diff --git a/supportedBackends/sfs/src/main/scala/cromwell/backend/sfs/SharedFileSystemValidatedRuntimeAttributesBuilder.scala b/supportedBackends/sfs/src/main/scala/cromwell/backend/sfs/SharedFileSystemValidatedRuntimeAttributesBuilder.scala deleted file mode 100644 index 55be9e663..000000000 --- a/supportedBackends/sfs/src/main/scala/cromwell/backend/sfs/SharedFileSystemValidatedRuntimeAttributesBuilder.scala +++ /dev/null @@ -1,120 +0,0 @@ -package cromwell.backend.sfs - -import cromwell.backend.validation._ - -/** - * Validates a collection of runtime attributes for a Shared File System (SFS) Backend. - * - * There are always three collections of runtime attributes, two of which are required for the SFS base class: - * - * Required for the SFS to be able to run properly: - * 1) The set of runtime attributes that are absolutely required, or the validation fails. - * 2) _Extra_ validations that will run for the SFS. - * - * Lastly for the sub classes of the SFS, there are: - * 3) Custom validations that the backend sub class may specify. - * - * 3), the custom validations, are always set by the sub class, via calls to `withValidation()`. - * - * For 1) and 2) the biggest difference is when it comes to docker support in a backend. - * - * For a backend that does __not__ support docker, the default, 1) and 2) will contain: - * - * 1) required = ContinueOnReturnCodeValidation.default, FailOnStderrValidation.default - * 2) unsupportedExtra = DockerValidation.optional - * - * Suppose the above validation runs a WDL with runtime attributes shows up with: - * - * {{{ - * runtimeAttributes { - * continueOnReturnCode: 14 - * # failOnStdErr not specified - * docker: "ubuntu" - * } - * }}} - * - * This will cause a validation warning to print out that docker is unsupported, because the docker validation isn't - * present in the required validations. If/when the SFS backend asks what the value of the optional docker is, it will - * receive `None`, as the validation _is not_ listed in the required validations. - * - * There is an additional interesting thing about having the docker validation still running under an "unsupported - * extra validation". Say an invalid docker were to be specified as a WdlArray. The extra validation __would__ catch - * the error, even after the warning had been printed stating that docker is not supported by the backend. - * - * Meanwhile, even though failOnStdErr is not specified, the `FailOnStderrValidation.default` will return its default - * value. And of course, the `ContinueOnReturnCodeValidation.default` returns the specified, and valid, runtime - * attribute `ContinueOnReturnCodeSet(14)`. - * - * Now-- - * - * Suppose the `withDockerSupport(true)` has been invoked on the builder. The required and unsupported runtime - * attributes will then look like: - * - * 1) required = ContinueOnReturnCodeValidation.default, FailOnStderrValidation.default, - * DockerValidation.optional - * 2) unsupportedExtra = __empty__ - * - * With the same WDL above, this builder does NOT print a warning, because docker __is__ supported. When the SFS asks - * for the optional docker element, it receives `Some("ubuntu")`, as the `DockerValidation.optional` __is__ listed in - * the required validations. - * - * `ContinueOnReturnCodeValidation.default` and `FailOnStderrValidation.default` still operate as in the previous - * example. - * - * What happens when there is no runtime attribute for docker? Easy, the validation for docker is always optional! - * In either case of running a builder via `withDockerSupport(true)` or `withDockerSupport(false)`, if the docker - * runtime attribute is not specified, the `SharedFileSystemValidatedRuntimeAttributesBuilder` will return a - * `None` value. - */ -object SharedFileSystemValidatedRuntimeAttributesBuilder { - - private case class SharedFileSystemValidatedRuntimeAttributesBuilderImpl - ( - override val requiredValidations: Seq[RuntimeAttributesValidation[_]], - override val customValidations: Seq[RuntimeAttributesValidation[_]] - ) extends SharedFileSystemValidatedRuntimeAttributesBuilder - - /** - * `default` returns the default set of attributes required to run an SFS: - * - `ContinueOnReturnCodeValidation.default` - * - `FailOnStderrValidation.default` - * - * Additional runtime attribute validations may be added by calling `withValidation` on the default. - * - * The SFS will also _always_ validate using the `DockerValidation`, but will end up warning the user that the - * runtime attribute is unsupported by the backend implementation unless `withDockerSupport(true)` is called. - */ - lazy val default: SharedFileSystemValidatedRuntimeAttributesBuilder = { - val required = Seq(ContinueOnReturnCodeValidation.default, FailOnStderrValidation.default) - val custom = Seq.empty - SharedFileSystemValidatedRuntimeAttributesBuilderImpl(custom, required) - } - - private def withValidations(builder: SharedFileSystemValidatedRuntimeAttributesBuilder, - customValidations: Seq[RuntimeAttributesValidation[_]]): - SharedFileSystemValidatedRuntimeAttributesBuilder = { - val required = builder.requiredValidations - val custom = builder.customValidations ++ customValidations - SharedFileSystemValidatedRuntimeAttributesBuilderImpl(custom, required) - } -} - -sealed trait SharedFileSystemValidatedRuntimeAttributesBuilder extends ValidatedRuntimeAttributesBuilder { - /** - * Returns a new builder with the additional validation(s). - * - * @param validation Additional validation. - * @return New builder with the validation. - */ - final def withValidation(validation: RuntimeAttributesValidation[_]*): - SharedFileSystemValidatedRuntimeAttributesBuilder = { - SharedFileSystemValidatedRuntimeAttributesBuilder.withValidations(this, validation) - } - - /** Returns on the supported validations, those required for the SFS, plus custom addons for the subclass. */ - override final lazy val validations: Seq[RuntimeAttributesValidation[_]] = requiredValidations ++ customValidations - - private[sfs] def requiredValidations: Seq[RuntimeAttributesValidation[_]] - - private[sfs] def customValidations: Seq[RuntimeAttributesValidation[_]] -} diff --git a/supportedBackends/sfs/src/test/scala/cromwell/backend/impl/sfs/config/ConfigHashingStrategySpec.scala b/supportedBackends/sfs/src/test/scala/cromwell/backend/impl/sfs/config/ConfigHashingStrategySpec.scala new file mode 100644 index 000000000..e65debcc6 --- /dev/null +++ b/supportedBackends/sfs/src/test/scala/cromwell/backend/impl/sfs/config/ConfigHashingStrategySpec.scala @@ -0,0 +1,151 @@ +package cromwell.backend.impl.sfs.config + +import java.util.UUID + +import akka.event.LoggingAdapter +import com.typesafe.config.{ConfigFactory, ConfigValueFactory} +import cromwell.backend.io.WorkflowPaths +import cromwell.backend.standard.StandardInitializationData +import cromwell.backend.standard.callcaching.StandardFileHashingActor.SingleFileHashRequest +import cromwell.core.path.{DefaultPathBuilder, Path} +import org.apache.commons.codec.digest.DigestUtils +import org.scalatest.prop.TableDrivenPropertyChecks +import org.scalatest.{BeforeAndAfterAll, FlatSpec, Matchers} +import org.specs2.mock.Mockito +import wdl4s.wdl.values.WdlFile + +import scala.util.Success + +class ConfigHashingStrategySpec extends FlatSpec with Matchers with TableDrivenPropertyChecks with Mockito with BeforeAndAfterAll { + + behavior of "ConfigHashingStrategy" + + val steak = "Steak" + val steakHash = DigestUtils.md5Hex(steak) + val file = DefaultPathBuilder.createTempFile() + val symLinksDir = DefaultPathBuilder.createTempDirectory("sym-dir") + val pathHash = DigestUtils.md5Hex(file.pathAsString) + val md5File = file.sibling(s"${file.name}.md5") + // Not the md5 value of "Steak". This is intentional so we can verify which hash is used depending on the strategy + val md5FileHash = "103508832bace55730c8ee8d89c1a45f" + + override def beforeAll() = { + file.write(steak) + () + } + + private def randomName(): String = UUID.randomUUID().toString + + def mockRequest(withSibling: Boolean, symlink: Boolean) = { + if (withSibling && md5File.notExists) md5File.write(md5FileHash) + val requestFile = if (symlink) { + val symLink: Path = symLinksDir./(s"symlink-${randomName()}") + symLink.symbolicLinkTo(file) + symLink + } else file + + val workflowPaths = mock[WorkflowPaths] + workflowPaths.pathBuilders returns List(DefaultPathBuilder) + + val initData = mock[StandardInitializationData] + initData.workflowPaths returns workflowPaths + + SingleFileHashRequest(null, null, WdlFile(requestFile.pathAsString), Option(initData)) + } + + def makeStrategy(strategy: String, checkSibling: Option[Boolean] = None) = { + val conf = ConfigFactory.parseString(s"""hashing-strategy: "$strategy"""") + ConfigHashingStrategy( + checkSibling map { check => conf.withValue("check-sibling-md5", ConfigValueFactory.fromAnyRef(check)) } getOrElse conf + ) + } + + it should "create a path hashing strategy from config" in { + val defaultSibling = makeStrategy("path") + defaultSibling.isInstanceOf[HashPathStrategy] shouldBe true + defaultSibling.checkSiblingMd5 shouldBe false + + val checkSibling = makeStrategy("path", Option(true)) + + checkSibling.isInstanceOf[HashPathStrategy] shouldBe true + checkSibling.checkSiblingMd5 shouldBe true + checkSibling.toString shouldBe "Call caching hashing strategy: Check first for sibling md5 and if not found hash file path." + + val dontCheckSibling = makeStrategy("path", Option(false)) + + dontCheckSibling.isInstanceOf[HashPathStrategy] shouldBe true + dontCheckSibling.checkSiblingMd5 shouldBe false + dontCheckSibling.toString shouldBe "Call caching hashing strategy: hash file path." + } + + it should "have a path hashing strategy and use md5 sibling file when appropriate" in { + val table = Table( + ("check", "withMd5", "expected"), + (true, true, md5FileHash), + (false, true, pathHash), + (true, false, pathHash), + (false, false, pathHash) + ) + + forAll(table) { (check, withMd5, expected) => + md5File.delete(swallowIOExceptions = true) + val checkSibling = makeStrategy("path", Option(check)) + + checkSibling.getHash(mockRequest(withMd5, symlink = false), mock[LoggingAdapter]) shouldBe Success(expected) + + val symLinkRequest: SingleFileHashRequest = mockRequest(withMd5, symlink = true) + val symlink = DefaultPathBuilder.get(symLinkRequest.file.valueString) + + symlink.isSymbolicLink shouldBe true + DigestUtils.md5Hex(symlink.pathAsString) should not be expected + checkSibling.getHash(symLinkRequest, mock[LoggingAdapter]) shouldBe Success(expected) + } + } + + it should "create a file hashing strategy from config" in { + val defaultSibling = makeStrategy("file") + defaultSibling.isInstanceOf[HashFileStrategy] shouldBe true + defaultSibling.checkSiblingMd5 shouldBe false + + val checkSibling = makeStrategy("file", Option(true)) + + checkSibling.isInstanceOf[HashFileStrategy] shouldBe true + checkSibling.checkSiblingMd5 shouldBe true + checkSibling.toString shouldBe "Call caching hashing strategy: Check first for sibling md5 and if not found hash file content." + + val dontCheckSibling = makeStrategy("file", Option(false)) + + dontCheckSibling.isInstanceOf[HashFileStrategy] shouldBe true + dontCheckSibling.checkSiblingMd5 shouldBe false + dontCheckSibling.toString shouldBe "Call caching hashing strategy: hash file content." + } + + it should "have a file hashing strategy and use md5 sibling file when appropriate" in { + val table = Table( + ("check", "withMd5", "expected"), + (true, true, md5FileHash), + (false, true, steakHash), + (true, false, steakHash), + (false, false, steakHash) + ) + + forAll(table) { (check, withMd5, expected) => + md5File.delete(swallowIOExceptions = true) + val checkSibling = makeStrategy("file", Option(check)) + + checkSibling.getHash(mockRequest(withMd5, symlink = false), mock[LoggingAdapter]) shouldBe Success(expected) + + val symLinkRequest: SingleFileHashRequest = mockRequest(withMd5, symlink = true) + val symlink = DefaultPathBuilder.get(symLinkRequest.file.valueString) + + symlink.isSymbolicLink shouldBe true + checkSibling.getHash(symLinkRequest, mock[LoggingAdapter]) shouldBe Success(expected) + } + } + + override def afterAll() = { + file.delete(true) + md5File.delete(true) + () + } +} diff --git a/supportedBackends/sfs/src/test/scala/cromwell/backend/impl/sfs/config/MemoryDeclarationValidationSpec.scala b/supportedBackends/sfs/src/test/scala/cromwell/backend/impl/sfs/config/MemoryDeclarationValidationSpec.scala new file mode 100644 index 000000000..a1b7928eb --- /dev/null +++ b/supportedBackends/sfs/src/test/scala/cromwell/backend/impl/sfs/config/MemoryDeclarationValidationSpec.scala @@ -0,0 +1,102 @@ +package cromwell.backend.impl.sfs.config + +import com.typesafe.config.ConfigFactory +import cromwell.backend.MemorySize +import cromwell.backend.validation.{RuntimeAttributesKeys, ValidatedRuntimeAttributes} +import org.scalatest.prop.TableDrivenPropertyChecks +import org.scalatest.{FlatSpec, Matchers} +import wdl4s.parser.MemoryUnit +import wdl4s.wdl.values.{WdlFloat, WdlInteger} +import ConfigConstants._ + +class MemoryDeclarationValidationSpec extends FlatSpec with Matchers with TableDrivenPropertyChecks { + behavior of "MemoryDeclarationValidation" + + val validDeclaredAmounts = Table( + ("declaration", "runtimeAmount", "expectedDefaultAmount", "expectedExtracted"), + ("Int memory", Option(2), None, Option(WdlInteger(2 * 1000 * 1000 * 1000))), + ("Int memory_gb", Option(2), None, Option(WdlInteger(2))), + ("Int memory_gb = 3", None, Option(3), None), + ("Int memory_gb = 3", Option(2), Option(3), Option(WdlInteger(2))), + ("Int? memory_gb", None, None, None), + ("Int? memory_gb", Option(2), None, Option(WdlInteger(2))), + ("Int? memory_gb = 3", None, Option(3), None), + ("Int? memory_gb = 3", Option(2), Option(3), Option(WdlInteger(2))), + ("Float memory", Option(2), None, Option(WdlFloat(2 * 1000 * 1000 * 1000))), + ("Float memory_gb", Option(2), None, Option(WdlFloat(2))), + ("Float memory_gb = 3.0", None, Option(3), None), + ("Float memory_gb = 3.0", Option(2), Option(3), Option(WdlFloat(2))), + ("Float? memory_gb", None, None, None), + ("Float? memory_gb", Option(2), None, Option(WdlFloat(2))), + ("Float? memory_gb = 3.0", None, Option(3), None), + ("Float? memory_gb = 3.0", Option(2), Option(3), Option(WdlFloat(2))) + ) + + forAll(validDeclaredAmounts) { (declaration, runtimeAmount, expectedDefaultAmount, expectedExtracted) => + it should s"extract memory from declared $declaration with memory set to ${runtimeAmount.getOrElse("none")}" in { + val config = ConfigFactory.parseString( + s"""|submit = "anything" + |${ConfigConstants.RuntimeAttributesConfig} = "$declaration" + |""".stripMargin) + + val configWdlNamespace = new ConfigWdlNamespace(config) + val runtimeDeclaration = configWdlNamespace.runtimeDeclarations.head + val memoryDeclarationValidation = new MemoryDeclarationValidation(runtimeDeclaration, + MemoryRuntimeAttribute, MemoryRuntimeAttributePrefix) + val attributes = runtimeAmount + .map(amount => RuntimeAttributesKeys.MemoryKey -> MemorySize(amount.toDouble, MemoryUnit.GB)) + .toMap + val validatedRuntimeAttributes = ValidatedRuntimeAttributes(attributes) + + val default = memoryDeclarationValidation.makeValidation().runtimeAttributeDefinition.factoryDefault + val extracted = memoryDeclarationValidation.extractWdlValueOption(validatedRuntimeAttributes) + + val expectedDefault = expectedDefaultAmount + .map(amount => WdlInteger(MemorySize(amount.toDouble, MemoryUnit.GB).bytes.toInt)) + + MemoryDeclarationValidation.isMemoryDeclaration(runtimeDeclaration.unqualifiedName, + MemoryRuntimeAttribute, MemoryRuntimeAttributePrefix) should be(true) + default should be(expectedDefault) + extracted should be(expectedExtracted) + } + } + + val badSyntaxDeclarations = Table( + "declaration", + "Int memory_gb = 3.0", + "Float memory_gb = 3" + ) + + forAll(badSyntaxDeclarations) { declaration => + it should s"throw a syntax error for memory declaration $declaration" in { + val config = ConfigFactory.parseString( + s"""|submit = "anything" + |${ConfigConstants.RuntimeAttributesConfig} = "$declaration" + |""".stripMargin) + + val expectedException = intercept[RuntimeException](new ConfigWdlNamespace(config)) + expectedException.getMessage should startWith("Error parsing generated wdl:\n") + } + } + + val invalidDeclarations = Table( + "declaration", + "Int mem", + "Int memory_badunit", + "Float memory_badunit" + ) + + forAll(invalidDeclarations) { declaration => + it should s"not identify $declaration as a memory declaration" in { + val config = ConfigFactory.parseString( + s"""|submit = "anything" + |${ConfigConstants.RuntimeAttributesConfig} = "$declaration" + |""".stripMargin) + + val configWdlNamespace = new ConfigWdlNamespace(config) + val runtimeDeclaration = configWdlNamespace.runtimeDeclarations.head + MemoryDeclarationValidation.isMemoryDeclaration(runtimeDeclaration.unqualifiedName, + MemoryRuntimeAttribute, MemoryRuntimeAttributePrefix) should be(false) + } + } +} diff --git a/supportedBackends/sfs/src/test/scala/cromwell/backend/sfs/SharedFileSystemInitializationActorSpec.scala b/supportedBackends/sfs/src/test/scala/cromwell/backend/sfs/SharedFileSystemInitializationActorSpec.scala index ac04a0bd9..a08b3a3b9 100644 --- a/supportedBackends/sfs/src/test/scala/cromwell/backend/sfs/SharedFileSystemInitializationActorSpec.scala +++ b/supportedBackends/sfs/src/test/scala/cromwell/backend/sfs/SharedFileSystemInitializationActorSpec.scala @@ -2,26 +2,28 @@ package cromwell.backend.sfs import akka.actor.Props import akka.testkit.{EventFilter, ImplicitSender, TestDuration} +import com.typesafe.config.ConfigFactory import cromwell.backend.BackendSpec._ import cromwell.backend.BackendWorkflowInitializationActor.Initialize -import cromwell.backend.{BackendConfigurationDescriptor, BackendWorkflowDescriptor} +import cromwell.backend.standard.DefaultInitializationActorParams +import cromwell.backend.{BackendConfigurationDescriptor, BackendWorkflowDescriptor, TestConfig} import cromwell.core.TestKitSuite import cromwell.core.logging.LoggingTest._ import org.scalatest.{Matchers, WordSpecLike} -import wdl4s.Call +import wdl4s.wdl.WdlTaskCall import scala.concurrent.duration._ class SharedFileSystemInitializationActorSpec extends TestKitSuite("SharedFileSystemInitializationActorSpec") with WordSpecLike with Matchers with ImplicitSender { - val Timeout = 5.second.dilated + val Timeout: FiniteDuration = 5.second.dilated - val HelloWorld = - """ + val HelloWorld: String = + s""" |task hello { | String addressee = "you" | command { - | echo "Hello ${addressee}!" + | echo "Hello $${addressee}!" | } | output { | String salutation = read_string(stdout()) @@ -30,14 +32,14 @@ class SharedFileSystemInitializationActorSpec extends TestKitSuite("SharedFileSy | RUNTIME |} | - |workflow hello { + |workflow wf_hello { | call hello |} """.stripMargin - private def getActorRef(workflowDescriptor: BackendWorkflowDescriptor, calls: Seq[Call], + private def getActorRef(workflowDescriptor: BackendWorkflowDescriptor, calls: Set[WdlTaskCall], conf: BackendConfigurationDescriptor) = { - val params = SharedFileSystemInitializationActorParams(emptyActor, workflowDescriptor, conf, calls) + val params = DefaultInitializationActorParams(workflowDescriptor, emptyActor, calls, emptyActor, conf, restarting = false) val props = Props(new SharedFileSystemInitializationActor(params)) system.actorOf(props, "SharedFileSystemInitializationActor") } @@ -46,10 +48,10 @@ class SharedFileSystemInitializationActorSpec extends TestKitSuite("SharedFileSy "log a warning message when there are unsupported runtime attributes" in { within(Timeout) { val workflowDescriptor = buildWorkflowDescriptor(HelloWorld, runtime = """runtime { unsupported: 1 }""") - val conf = emptyBackendConfig - val backend = getActorRef(workflowDescriptor, workflowDescriptor.workflowNamespace.workflow.calls, conf) + val conf = BackendConfigurationDescriptor(TestConfig.sampleBackendRuntimeConfig, ConfigFactory.empty()) + val backend = getActorRef(workflowDescriptor, workflowDescriptor.workflow.taskCalls, conf) val pattern = "Key/s [unsupported] is/are not supported by backend. " + - "Unsupported attributes will not be part of jobs executions." + "Unsupported attributes will not be part of job executions." EventFilter.warning(pattern = escapePattern(pattern), occurrences = 1) intercept { backend ! Initialize } diff --git a/supportedBackends/sfs/src/test/scala/cromwell/backend/sfs/SharedFileSystemJobExecutionActorSpec.scala b/supportedBackends/sfs/src/test/scala/cromwell/backend/sfs/SharedFileSystemJobExecutionActorSpec.scala index f697a7f4b..84a6a84a3 100644 --- a/supportedBackends/sfs/src/test/scala/cromwell/backend/sfs/SharedFileSystemJobExecutionActorSpec.scala +++ b/supportedBackends/sfs/src/test/scala/cromwell/backend/sfs/SharedFileSystemJobExecutionActorSpec.scala @@ -1,44 +1,45 @@ package cromwell.backend.sfs -import java.nio.file.{Files, Paths} - -import akka.testkit.TestDuration -import better.files._ +import akka.testkit.{TestDuration, TestProbe} import com.typesafe.config.ConfigFactory -import cromwell.backend.BackendJobExecutionActor.{AbortedResponse, FailedNonRetryableResponse, SucceededResponse} +import cromwell.backend.BackendJobExecutionActor.{AbortedResponse, JobFailedNonRetryableResponse, JobSucceededResponse} import cromwell.backend.BackendLifecycleActor.AbortJobCommand +import cromwell.backend._ +import cromwell.backend.async.WrongReturnCode import cromwell.backend.io.TestWorkflows._ -import cromwell.backend.io.{JobPaths, TestWorkflows} +import cromwell.backend.io.{JobPathsWithDocker, TestWorkflows} import cromwell.backend.sfs.TestLocalAsyncJobExecutionActor._ -import cromwell.backend.{BackendConfigurationDescriptor, BackendJobDescriptor, BackendJobDescriptorKey, BackendSpec, RuntimeAttributeDefinition} +import cromwell.backend.standard.StandardValidatedRuntimeAttributesBuilder import cromwell.core.Tags._ import cromwell.core._ -import cromwell.services.keyvalue.KeyValueServiceActor.{KvJobKey, KvPair, ScopedKey} +import cromwell.core.callcaching.NoDocker +import cromwell.core.path.{DefaultPathBuilder, Path} +import cromwell.services.keyvalue.KeyValueServiceActor._ +import lenthall.exception.AggregatedException import org.scalatest.concurrent.PatienceConfiguration.Timeout -import org.scalatest.mockito.MockitoSugar import org.scalatest.prop.TableDrivenPropertyChecks -import org.scalatest.{FlatSpecLike, OptionValues} -import wdl4s.types._ -import wdl4s.util.AggregatedException -import wdl4s.values._ +import org.scalatest.{Assertion, FlatSpecLike, OptionValues} +import wdl4s.wdl.types._ +import wdl4s.wdl.values._ import scala.concurrent.duration._ class SharedFileSystemJobExecutionActorSpec extends TestKitSuite("SharedFileSystemJobExecutionActorSpec") - with FlatSpecLike with BackendSpec with MockitoSugar with TableDrivenPropertyChecks with OptionValues { + with FlatSpecLike with BackendSpec with TableDrivenPropertyChecks with OptionValues { behavior of "SharedFileSystemJobExecutionActor" - lazy val runtimeAttributeDefinitions = SharedFileSystemValidatedRuntimeAttributesBuilder.default.definitions.toSet + lazy val runtimeAttributeDefinitions: Set[RuntimeAttributeDefinition] = + StandardValidatedRuntimeAttributesBuilder.default(Some(TestConfig.optionalRuntimeConfig)).definitions.toSet - def executeSpec(docker: Boolean) = { - val expectedOutputs: JobOutputs = Map( + def executeSpec(docker: Boolean): Any = { + val expectedOutputs: CallOutputs = Map( "salutation" -> JobOutput(WdlString("Hello you !")) ) - val expectedResponse = SucceededResponse(mock[BackendJobDescriptorKey], Some(0), expectedOutputs, None, Seq.empty) + val expectedResponse = JobSucceededResponse(mock[BackendJobDescriptorKey], Some(0), expectedOutputs, None, Seq.empty, None) val runtime = if (docker) """runtime { docker: "ubuntu:latest" }""" else "" val workflowDescriptor = buildWorkflowDescriptor(HelloWorld, runtime = runtime) - val workflow = TestWorkflow(workflowDescriptor, emptyBackendConfig, expectedResponse) + val workflow = TestWorkflow(workflowDescriptor, TestConfig.backendRuntimeConfigDescriptor, expectedResponse) val backend = createBackend(jobDescriptorFromSingleCallWorkflow(workflow.workflowDescriptor, Map.empty, WorkflowOptions.empty, runtimeAttributeDefinitions), workflow.config) testWorkflow(workflow, backend) } @@ -52,28 +53,31 @@ class SharedFileSystemJobExecutionActorSpec extends TestKitSuite("SharedFileSyst } it should "send back an execution failure if the task fails" in { - val expectedResponse = FailedNonRetryableResponse(mock[BackendJobDescriptorKey], new Exception(""), Option(1)) - val workflow = TestWorkflow(buildWorkflowDescriptor(GoodbyeWorld), emptyBackendConfig, expectedResponse) + val expectedResponse = + JobFailedNonRetryableResponse(mock[BackendJobDescriptorKey], WrongReturnCode("wf_goodbye.goodbye:NA:1", 1, None), Option(1)) + val workflow = TestWorkflow(buildWorkflowDescriptor(GoodbyeWorld), TestConfig.backendRuntimeConfigDescriptor, expectedResponse) val backend = createBackend(jobDescriptorFromSingleCallWorkflow(workflow.workflowDescriptor, Map.empty, WorkflowOptions.empty, runtimeAttributeDefinitions), workflow.config) testWorkflow(workflow, backend) } - def localizationSpec(docker: Boolean) = { - def templateConf(localizers: String) = BackendConfigurationDescriptor( - ConfigFactory.parseString( - s"""{ - | root = "local-cromwell-executions" - | filesystems { - | local { - | localization = [ - | $localizers - | ] - | } - | } - |} - """.stripMargin), - ConfigFactory.parseString("{}") - ) + def localizationSpec(docker: Boolean): Assertion = { + def templateConf(localizers: String) = BackendConfigurationDescriptor(ConfigFactory.parseString( + s"""|{ + | root = "local-cromwell-executions" + | filesystems { + | local { + | localization = [ + | $localizers + | ] + | } + | } + | default-runtime-attributes { + | cpu: 1 + | failOnStderr: false + | continueOnReturnCode: 0 + | } + |} + |""".stripMargin), ConfigFactory.parseString("{}")) val hardConf = templateConf("hard-link") val symConf = templateConf("soft-link") @@ -82,11 +86,11 @@ class SharedFileSystemJobExecutionActorSpec extends TestKitSuite("SharedFileSyst val jsonInputFile = createCannedFile("localize", "content from json inputs").pathAsString val callInputFile = createCannedFile("localize", "content from call inputs").pathAsString val inputs = Map( - "inputFileFromCallInputs" -> WdlFile(callInputFile), - "inputFileFromJson" -> WdlFile(jsonInputFile) + "wf_localize.localize.inputFileFromCallInputs" -> WdlFile(callInputFile), + "wf_localize.localize.inputFileFromJson" -> WdlFile(jsonInputFile) ) - val expectedOutputs: JobOutputs = Map( + val expectedOutputs: CallOutputs = Map( "out" -> JobOutput(WdlArray(WdlArrayType(WdlStringType), Array( WdlString("content from json inputs"), @@ -106,24 +110,24 @@ class SharedFileSystemJobExecutionActorSpec extends TestKitSuite("SharedFileSyst val runtime = if (docker) """runtime { docker: "ubuntu:latest" } """ else "" val workflowDescriptor = buildWorkflowDescriptor(InputFiles, inputs, runtime = runtime) val backend = createBackend(jobDescriptorFromSingleCallWorkflow(workflowDescriptor, inputs, WorkflowOptions.empty, runtimeAttributeDefinitions), conf) - val jobDescriptor: BackendJobDescriptor = jobDescriptorFromSingleCallWorkflow(workflowDescriptor, Map.empty, WorkflowOptions.empty, runtimeAttributeDefinitions) - val expectedResponse = SucceededResponse(jobDescriptor.key, Some(0), expectedOutputs, None, Seq.empty) + val jobDescriptor: BackendJobDescriptor = jobDescriptorFromSingleCallWorkflow(workflowDescriptor, inputs, WorkflowOptions.empty, runtimeAttributeDefinitions) + val expectedResponse = JobSucceededResponse(jobDescriptor.key, Some(0), expectedOutputs, None, Seq.empty, None) - val jobPaths = new JobPaths(workflowDescriptor, conf.backendConfig, jobDescriptor.key) + val jobPaths = JobPathsWithDocker(jobDescriptor.key, workflowDescriptor, conf.backendConfig) whenReady(backend.execute) { executionResponse => assertResponse(executionResponse, expectedResponse) - val localizedJsonInputFile = Paths.get(jobPaths.callInputsRoot.toString, jsonInputFile) - val localizedCallInputFile = Paths.get(jobPaths.callInputsRoot.toString, callInputFile) + val localizedJsonInputFile = DefaultPathBuilder.get(jobPaths.callInputsRoot.pathAsString, jsonInputFile) + val localizedCallInputFile = DefaultPathBuilder.get(jobPaths.callInputsRoot.pathAsString, callInputFile) - Files.isSymbolicLink(localizedJsonInputFile) shouldBe isSymlink + localizedJsonInputFile.isSymbolicLink shouldBe isSymlink val realJsonInputFile = - if (isSymlink) Files.readSymbolicLink(localizedJsonInputFile) else localizedJsonInputFile + if (isSymlink) localizedJsonInputFile.symbolicLink.get else localizedJsonInputFile realJsonInputFile.toFile should exist - Files.isSymbolicLink(localizedCallInputFile) shouldBe isSymlink + localizedCallInputFile.isSymbolicLink shouldBe isSymlink val realCallInputFile = - if (isSymlink) Files.readSymbolicLink(localizedJsonInputFile) else localizedCallInputFile + if (isSymlink) localizedCallInputFile.symbolicLink.get else localizedCallInputFile realCallInputFile.toFile should exist } } @@ -140,7 +144,7 @@ class SharedFileSystemJobExecutionActorSpec extends TestKitSuite("SharedFileSyst it should "abort a job and kill a process" in { val workflowDescriptor = buildWorkflowDescriptor(Sleep10) val jobDescriptor: BackendJobDescriptor = jobDescriptorFromSingleCallWorkflow(workflowDescriptor, Map.empty, WorkflowOptions.empty, runtimeAttributeDefinitions) - val backendRef = createBackendRef(jobDescriptor, emptyBackendConfig) + val backendRef = createBackendRef(jobDescriptor, TestConfig.backendRuntimeConfigDescriptor) val backend = backendRef.underlyingActor val execute = backend.execute @@ -151,21 +155,21 @@ class SharedFileSystemJobExecutionActorSpec extends TestKitSuite("SharedFileSyst } } - def recoverSpec(completed: Boolean, writeReturnCode: Boolean = true) = { + def recoverSpec(completed: Boolean, writeReturnCode: Boolean = true): Assertion = { val workflowDescriptor = buildWorkflowDescriptor(HelloWorld) val jobDescriptor: BackendJobDescriptor = jobDescriptorFromSingleCallWorkflow(workflowDescriptor, Map.empty, WorkflowOptions.empty, runtimeAttributeDefinitions) - val backendRef = createBackendRef(jobDescriptor, emptyBackendConfig) + val backendRef = createBackendRef(jobDescriptor, TestConfig.backendRuntimeConfigDescriptor) val backend = backendRef.underlyingActor - val jobPaths = new JobPaths(workflowDescriptor, ConfigFactory.empty, jobDescriptor.key) - File(jobPaths.callExecutionRoot).createDirectories() - File(jobPaths.stdout).write("Hello stubby ! ") - File(jobPaths.stderr).touch() + val jobPaths = JobPathsWithDocker(jobDescriptor.key, workflowDescriptor, ConfigFactory.empty) + jobPaths.callExecutionRoot.createPermissionedDirectories() + jobPaths.stdout.write("Hello stubby ! ") + jobPaths.stderr.touch() val pid = if (completed) { if (writeReturnCode) - File(jobPaths.returnCode).write("0") + jobPaths.returnCode.write("0") "0" } else { import sys.process._ @@ -178,24 +182,27 @@ class SharedFileSystemJobExecutionActorSpec extends TestKitSuite("SharedFileSyst pidField.get(p).toString } - val execute = backend.recover + def execute = backend.recover val kvJobKey = KvJobKey(jobDescriptor.key.call.fullyQualifiedName, jobDescriptor.key.index, jobDescriptor.key.attempt) - val scopedKey = ScopedKey(workflowDescriptor.id, kvJobKey, SharedFileSystemJob.JobIdKey) + val scopedKey = ScopedKey(workflowDescriptor.id, kvJobKey, SharedFileSystemAsyncJobExecutionActor.JobIdKey) val kvPair = KvPair(scopedKey, Option(pid)) - backendRef ! kvPair + val previousKvPutter = TestProbe() + val kvPutReq = KvPut(kvPair) + backendRef.underlyingActor.serviceRegistryActor.tell(msg = kvPutReq, sender = previousKvPutter.ref) + previousKvPutter.expectMsg(KvPutSuccess(kvPutReq)) whenReady(execute, Timeout(10.seconds.dilated)) { executionResponse => if (writeReturnCode) { - executionResponse should be(a[SucceededResponse]) - val succeededResponse = executionResponse.asInstanceOf[SucceededResponse] + executionResponse should be(a[JobSucceededResponse]) + val succeededResponse = executionResponse.asInstanceOf[JobSucceededResponse] succeededResponse.returnCode.value should be(0) succeededResponse.jobOutputs should be(Map("salutation" -> JobOutput(WdlString("Hello stubby !")))) } else { - executionResponse should be(a[FailedNonRetryableResponse]) - val failedResponse = executionResponse.asInstanceOf[FailedNonRetryableResponse] + executionResponse should be(a[JobFailedNonRetryableResponse]) + val failedResponse = executionResponse.asInstanceOf[JobFailedNonRetryableResponse] failedResponse.returnCode should be(empty) failedResponse.throwable should be(a[RuntimeException]) failedResponse.throwable.getMessage should startWith("Unable to determine that 0 is alive, and") @@ -219,20 +226,20 @@ class SharedFileSystemJobExecutionActorSpec extends TestKitSuite("SharedFileSyst it should "execute shards from a scatter" in { val workflowDescriptor = buildWorkflowDescriptor(TestWorkflows.Scatter) - val call = workflowDescriptor.workflowNamespace.workflow.calls.head + val call = workflowDescriptor.workflow.taskCalls.head 0 to 2 foreach { shard => // This assumes that engine will give us the evaluated value of the scatter item at the correct index // If this is not the case, more context/logic will need to be moved to the backend so it can figure it out by itself - val symbolMaps: Map[LocallyQualifiedName, WdlInteger] = Map("intNumber" -> WdlInteger(shard)) + val symbolMaps: Map[LocallyQualifiedName, WdlInteger] = Map("scattering.intNumber" -> WdlInteger(shard)) val runtimeAttributes = RuntimeAttributeDefinition.addDefaultsToAttributes(runtimeAttributeDefinitions, WorkflowOptions.empty)(call.task.runtimeAttributes.attrs) val jobDescriptor: BackendJobDescriptor = - BackendJobDescriptor(workflowDescriptor, BackendJobDescriptorKey(call, Option(shard), 1), runtimeAttributes, symbolMaps) - val backend = createBackend(jobDescriptor, emptyBackendConfig) + BackendJobDescriptor(workflowDescriptor, BackendJobDescriptorKey(call, Option(shard), 1), runtimeAttributes, fqnMapToDeclarationMap(symbolMaps), NoDocker, Map.empty) + val backend = createBackend(jobDescriptor, TestConfig.backendRuntimeConfigDescriptor) val response = - SucceededResponse(mock[BackendJobDescriptorKey], Some(0), Map("out" -> JobOutput(WdlInteger(shard))), None, Seq.empty) + JobSucceededResponse(mock[BackendJobDescriptorKey], Some(0), Map("out" -> JobOutput(WdlInteger(shard))), None, Seq.empty, None) executeJobAndAssertOutputs(backend, response) } } @@ -240,34 +247,33 @@ class SharedFileSystemJobExecutionActorSpec extends TestKitSuite("SharedFileSyst it should "post process outputs" in { val inputFile = createCannedFile("localize", "content from json inputs").pathAsString val inputs = Map { - "inputFile" -> WdlFile(inputFile) + "wf_localize.localize.inputFile" -> WdlFile(inputFile) } val workflowDescriptor = buildWorkflowDescriptor(OutputProcess, inputs) val jobDescriptor: BackendJobDescriptor = jobDescriptorFromSingleCallWorkflow(workflowDescriptor, inputs, WorkflowOptions.empty, runtimeAttributeDefinitions) - val backend = createBackend(jobDescriptor, emptyBackendConfig) - val jobPaths = new JobPaths(workflowDescriptor, emptyBackendConfig.backendConfig, jobDescriptor.key) - val expectedA = WdlFile(jobPaths.callExecutionRoot.resolve("a").toAbsolutePath.toString) - val expectedB = WdlFile(jobPaths.callExecutionRoot.resolve("dir").toAbsolutePath.resolve("b").toString) + val backend = createBackend(jobDescriptor, TestConfig.backendRuntimeConfigDescriptor) + val jobPaths = JobPathsWithDocker(jobDescriptor.key, workflowDescriptor, TestConfig.backendRuntimeConfigDescriptor.backendConfig) + val expectedA = WdlFile(jobPaths.callExecutionRoot.resolve("a").toAbsolutePath.pathAsString) + val expectedB = WdlFile(jobPaths.callExecutionRoot.resolve("dir").toAbsolutePath.resolve("b").pathAsString) val expectedOutputs = Map( "o1" -> JobOutput(expectedA), "o2" -> JobOutput(WdlArray(WdlArrayType(WdlFileType), Seq(expectedA, expectedB))), "o3" -> JobOutput(WdlFile(inputFile)) ) - val expectedResponse = SucceededResponse(jobDescriptor.key, Some(0), expectedOutputs, None, Seq.empty) + val expectedResponse = JobSucceededResponse(jobDescriptor.key, Some(0), expectedOutputs, None, Seq.empty, None) executeJobAndAssertOutputs(backend, expectedResponse) } - it should "fail post processing if an output fail is not found" in { - val expectedResponse = FailedNonRetryableResponse(mock[BackendJobDescriptorKey], - AggregatedException(Seq.empty, "Could not process output, file not found"), Option(0)) - val workflow = TestWorkflow(buildWorkflowDescriptor(MissingOutputProcess), emptyBackendConfig, expectedResponse) + it should "fail post processing if an output file is not found" in { + val expectedResponse = JobFailedNonRetryableResponse(mock[BackendJobDescriptorKey], + AggregatedException("Could not process output, file not found:", Seq.empty), Option(0)) + val workflow = TestWorkflow(buildWorkflowDescriptor(MissingOutputProcess), TestConfig.backendRuntimeConfigDescriptor, expectedResponse) val backend = createBackend(jobDescriptorFromSingleCallWorkflow(workflow.workflowDescriptor, Map.empty, WorkflowOptions.empty, runtimeAttributeDefinitions), workflow.config) testWorkflow(workflow, backend) } - def createCannedFile(prefix: String, contents: String): File = { - val suffix = ".out" - File.newTemporaryFile(prefix, suffix).write(contents) + def createCannedFile(prefix: String, contents: String): Path = { + DefaultPathBuilder.createTempFile(prefix, ".out").write(contents) } } diff --git a/supportedBackends/sfs/src/test/scala/cromwell/backend/sfs/SharedFileSystemSpec.scala b/supportedBackends/sfs/src/test/scala/cromwell/backend/sfs/SharedFileSystemSpec.scala index c3874a14a..404c2bf03 100644 --- a/supportedBackends/sfs/src/test/scala/cromwell/backend/sfs/SharedFileSystemSpec.scala +++ b/supportedBackends/sfs/src/test/scala/cromwell/backend/sfs/SharedFileSystemSpec.scala @@ -1,22 +1,22 @@ package cromwell.backend.sfs -import java.nio.file.{FileSystems, Files} - -import better.files._ import com.typesafe.config.{Config, ConfigFactory} +import cromwell.backend.BackendSpec +import cromwell.core.CromwellFatalExceptionMarker +import cromwell.core.path.{DefaultPathBuilder, Path} import org.scalatest.prop.TableDrivenPropertyChecks import org.scalatest.{FlatSpec, Matchers} import org.specs2.mock.Mockito -import wdl4s.values.WdlFile +import wdl4s.wdl.values.WdlFile -class SharedFileSystemSpec extends FlatSpec with Matchers with Mockito with TableDrivenPropertyChecks { +class SharedFileSystemSpec extends FlatSpec with Matchers with Mockito with TableDrivenPropertyChecks with BackendSpec { behavior of "SharedFileSystem" val defaultLocalization = ConfigFactory.parseString(""" localization: [copy, hard-link, soft-link] """) val hardLinkLocalization = ConfigFactory.parseString(""" localization: [hard-link] """) val softLinkLocalization = ConfigFactory.parseString(""" localization: [soft-link] """) - val localFS = List(FileSystems.getDefault) + val localPathBuilder = List(DefaultPathBuilder) def localizationTest(config: Config, @@ -25,21 +25,25 @@ class SharedFileSystemSpec extends FlatSpec with Matchers with Mockito with Tabl fileAlreadyExists: Boolean = false, symlink: Boolean = false, linkNb: Int = 1) = { - val callDir = File.newTemporaryDirectory("SharedFileSystem") - val orig = if (fileInCallDir) callDir.createChild("inputFile") else File.newTemporaryFile("inputFile") + val callDir = DefaultPathBuilder.createTempDirectory("SharedFileSystem") + val orig = if (fileInCallDir) callDir.createChild("inputFile") else DefaultPathBuilder.createTempFile("inputFile") val dest = if (fileInCallDir) orig else callDir./(orig.pathAsString.drop(1)) orig.touch() if (fileAlreadyExists) { - dest.parent.createDirectories() + dest.parent.createPermissionedDirectories() dest.touch() } - val inputs = Map("input" -> WdlFile(orig.pathAsString)) - val sharedFS = new SharedFileSystem { override val sharedFileSystemConfig = config } - val result = sharedFS.localizeInputs(callDir.path, docker = docker, localFS, inputs) + val inputs = fqnMapToDeclarationMap(Map("input" -> WdlFile(orig.pathAsString))) + val sharedFS = new SharedFileSystem { + override val pathBuilders = localPathBuilder + override val sharedFileSystemConfig = config + } + val localizedinputs = Map(inputs.head._1 -> WdlFile(dest.pathAsString)) + val result = sharedFS.localizeInputs(callDir, docker = docker)(inputs) result.isSuccess shouldBe true - result.get should contain theSameElementsAs Map("input" -> WdlFile(dest.pathAsString)) + result.get should contain theSameElementsAs localizedinputs dest.exists shouldBe true countLinks(dest) should be(linkNb) @@ -72,8 +76,22 @@ class SharedFileSystemSpec extends FlatSpec with Matchers with Mockito with Tabl it should "localize a file via symbolic link" in { localizationTest(softLinkLocalization, docker = false, symlink = true) } + + it should "throw a fatal exception if localization fails" in { + val callDir = DefaultPathBuilder.createTempDirectory("SharedFileSystem") + val orig = DefaultPathBuilder.get("/made/up/origin") + + val inputs = fqnMapToDeclarationMap(Map("input" -> WdlFile(orig.pathAsString))) + val sharedFS = new SharedFileSystem { + override val pathBuilders = localPathBuilder + override val sharedFileSystemConfig = defaultLocalization + } + val result = sharedFS.localizeInputs(callDir, docker = false)(inputs) + result.isFailure shouldBe true + result.failed.get.isInstanceOf[CromwellFatalExceptionMarker] shouldBe true + } - private[this] def countLinks(file: File): Int = Files.getAttribute(file.path, "unix:nlink").asInstanceOf[Int] + private[this] def countLinks(file: Path): Int = file.getAttribute("unix:nlink").asInstanceOf[Int] - private[this] def isSymLink(file: File): Boolean = Files.isSymbolicLink(file.path) + private[this] def isSymLink(file: Path): Boolean = file.isSymbolicLink } diff --git a/supportedBackends/sfs/src/test/scala/cromwell/backend/sfs/TestLocalAsyncJobExecutionActor.scala b/supportedBackends/sfs/src/test/scala/cromwell/backend/sfs/TestLocalAsyncJobExecutionActor.scala index 0ae091367..0935c7dc1 100644 --- a/supportedBackends/sfs/src/test/scala/cromwell/backend/sfs/TestLocalAsyncJobExecutionActor.scala +++ b/supportedBackends/sfs/src/test/scala/cromwell/backend/sfs/TestLocalAsyncJobExecutionActor.scala @@ -2,49 +2,57 @@ package cromwell.backend.sfs import akka.actor.{ActorSystem, Props} import akka.testkit.TestActorRef -import cromwell.backend.BackendJobExecutionActor.BackendJobExecutionResponse -import cromwell.backend.io.WorkflowPaths +import cromwell.backend.io.WorkflowPathsWithDocker +import cromwell.backend.standard._ import cromwell.backend.validation.{DockerValidation, RuntimeAttributesValidation} import cromwell.backend.{BackendConfigurationDescriptor, BackendJobDescriptor} +import cromwell.core.SimpleIoActor +import cromwell.services.keyvalue.InMemoryKvServiceActor -import scala.concurrent.Promise - -class TestLocalAsyncJobExecutionActor(override val params: SharedFileSystemAsyncJobExecutionActorParams) +class TestLocalAsyncJobExecutionActor(override val standardParams: StandardAsyncExecutionActorParams) extends BackgroundAsyncJobExecutionActor { - override lazy val processArgs = { - val script = jobPaths.script.toString + override lazy val processArgs: SharedFileSystemCommand = { + val script = jobPaths.script.pathAsString if (isDockerRun) { val docker = RuntimeAttributesValidation.extract(DockerValidation.instance, validatedRuntimeAttributes) - val cwd = jobPaths.callRoot.toString - val dockerCwd = jobPaths.callDockerRoot.toString + val cwd = jobPaths.callRoot.pathAsString + val dockerCwd = jobPathsWithDocker.callDockerRoot.pathAsString SharedFileSystemCommand("/bin/bash", "-c", s"docker run --rm -v $cwd:$dockerCwd -i $docker /bin/bash < $script") } else { SharedFileSystemCommand("/bin/bash", script) } } + + override def dockerImageUsed: Option[String] = None } object TestLocalAsyncJobExecutionActor { def createBackend(jobDescriptor: BackendJobDescriptor, configurationDescriptor: BackendConfigurationDescriptor) - (implicit system: ActorSystem): SharedFileSystemJobExecutionActor = { + (implicit system: ActorSystem): StandardSyncExecutionActor = { createBackendRef(jobDescriptor, configurationDescriptor).underlyingActor } def createBackendRef(jobDescriptor: BackendJobDescriptor, configurationDescriptor: BackendConfigurationDescriptor) - (implicit system: ActorSystem): TestActorRef[SharedFileSystemJobExecutionActor] = { - val emptyActor = system.actorOf(Props.empty) - val workflowPaths = new WorkflowPaths(jobDescriptor.workflowDescriptor, configurationDescriptor.backendConfig) - val initializationData = new SharedFileSystemBackendInitializationData(workflowPaths, - SharedFileSystemValidatedRuntimeAttributesBuilder.default.withValidation(DockerValidation.optional)) + (implicit system: ActorSystem): TestActorRef[StandardSyncExecutionActor] = { + val serviceRegistryActor = system.actorOf(Props(new InMemoryKvServiceActor)) // We only really need the KV store for now + val ioActor = system.actorOf(SimpleIoActor.props) + val workflowPaths = new WorkflowPathsWithDocker(jobDescriptor.workflowDescriptor, configurationDescriptor.backendConfig) + val initializationData = new StandardInitializationData(workflowPaths, + StandardValidatedRuntimeAttributesBuilder.default(configurationDescriptor.backendRuntimeConfig).withValidation(DockerValidation.optional), + classOf[SharedFileSystemExpressionFunctions]) + val asyncClass = classOf[TestLocalAsyncJobExecutionActor] - def propsCreator(completionPromise: Promise[BackendJobExecutionResponse]): Props = { - val params = SharedFileSystemAsyncJobExecutionActorParams(emptyActor, jobDescriptor, - configurationDescriptor, completionPromise, Option(initializationData)) - Props(classOf[TestLocalAsyncJobExecutionActor], params) - } + val params = DefaultStandardSyncExecutionActorParams( + jobIdKey = SharedFileSystemAsyncJobExecutionActor.JobIdKey, + serviceRegistryActor = serviceRegistryActor, + ioActor = ioActor, + jobDescriptor = jobDescriptor, + configurationDescriptor = configurationDescriptor, + backendInitializationDataOption = Option(initializationData), + backendSingletonActorOption = None, + asyncJobExecutionActorClass = asyncClass) - TestActorRef(new SharedFileSystemJobExecutionActor( - jobDescriptor, configurationDescriptor, emptyActor, propsCreator)) + TestActorRef(new StandardSyncExecutionActor(params)) } } diff --git a/supportedBackends/spark/src/main/scala/cromwell/backend/impl/spark/SparkBackendFactory.scala b/supportedBackends/spark/src/main/scala/cromwell/backend/impl/spark/SparkBackendFactory.scala index d69446519..c93868f65 100644 --- a/supportedBackends/spark/src/main/scala/cromwell/backend/impl/spark/SparkBackendFactory.scala +++ b/supportedBackends/spark/src/main/scala/cromwell/backend/impl/spark/SparkBackendFactory.scala @@ -2,31 +2,35 @@ package cromwell.backend.impl.spark import akka.actor.{ActorRef, ActorSystem, Props} import cromwell.backend._ +import cromwell.backend.io.JobPathsWithDocker import cromwell.backend.sfs.SharedFileSystemExpressionFunctions -import cromwell.backend.io.JobPaths import cromwell.core.CallContext -import wdl4s.Call -import wdl4s.expression.WdlStandardLibraryFunctions +import wdl4s.wdl.WdlTaskCall +import wdl4s.wdl.expression.WdlStandardLibraryFunctions -case class SparkBackendFactory(configurationDescriptor: BackendConfigurationDescriptor, actorSystem: ActorSystem) extends BackendLifecycleActorFactory { - override def workflowInitializationActorProps(workflowDescriptor: BackendWorkflowDescriptor, calls: Seq[Call], serviceRegistryActor: ActorRef): Option[Props] = { +case class SparkBackendFactory(name: String, configurationDescriptor: BackendConfigurationDescriptor, actorSystem: ActorSystem) extends BackendLifecycleActorFactory { + override def workflowInitializationActorProps(workflowDescriptor: BackendWorkflowDescriptor, ioActor: ActorRef, + calls: Set[WdlTaskCall], serviceRegistryActor: ActorRef, restarting: Boolean): Option[Props] = { Option(SparkInitializationActor.props(workflowDescriptor, calls, configurationDescriptor, serviceRegistryActor)) } - override def jobExecutionActorProps(jobDescriptor: BackendJobDescriptor, initializationData: Option[BackendInitializationData], - serviceRegistryActor: ActorRef): Props = { + override def jobExecutionActorProps(jobDescriptor: BackendJobDescriptor, + initializationData: Option[BackendInitializationData], + serviceRegistryActor: ActorRef, + ioActor: ActorRef, + backendSingletonActor: Option[ActorRef]): Props = { SparkJobExecutionActor.props(jobDescriptor, configurationDescriptor) } override def expressionLanguageFunctions(workflowDescriptor: BackendWorkflowDescriptor, jobKey: BackendJobDescriptorKey, initializationData: Option[BackendInitializationData]): WdlStandardLibraryFunctions = { - val jobPaths = new JobPaths(workflowDescriptor, configurationDescriptor.backendConfig, jobKey) - val callContext = new CallContext( + val jobPaths = JobPathsWithDocker(jobKey, workflowDescriptor, configurationDescriptor.backendConfig) + val callContext = CallContext( jobPaths.callExecutionRoot, jobPaths.stdout.toAbsolutePath.toString, jobPaths.stderr.toAbsolutePath.toString ) - new SharedFileSystemExpressionFunctions(SparkJobExecutionActor.DefaultFileSystems, callContext) + new SharedFileSystemExpressionFunctions(SparkJobExecutionActor.DefaultPathBuilders, callContext) } } diff --git a/supportedBackends/spark/src/main/scala/cromwell/backend/impl/spark/SparkClusterProcess.scala b/supportedBackends/spark/src/main/scala/cromwell/backend/impl/spark/SparkClusterProcess.scala index aa9575fd4..701cb21a2 100644 --- a/supportedBackends/spark/src/main/scala/cromwell/backend/impl/spark/SparkClusterProcess.scala +++ b/supportedBackends/spark/src/main/scala/cromwell/backend/impl/spark/SparkClusterProcess.scala @@ -1,19 +1,20 @@ package cromwell.backend.impl.spark -import java.nio.file.Path - import akka.actor.ActorSystem +import akka.http.scaladsl.Http +import akka.http.scaladsl.model.{HttpRequest, HttpResponse, StatusCodes} +import akka.http.scaladsl.unmarshalling.Unmarshal +import akka.stream.ActorMaterializer +import com.typesafe.scalalogging.Logger import cromwell.backend.impl.spark.SparkClusterProcess.{SparkJobSubmissionResponse, TerminalStatus} -import spray.http.{HttpRequest, HttpResponse, StatusCodes} +import cromwell.core.path.Obsolete._ +import cromwell.core.path.Path +import org.slf4j.LoggerFactory import spray.json.{DefaultJsonProtocol, JsonParser} -import spray.client.pipelining._ +import akka.http.scaladsl.marshallers.sprayjson.SprayJsonSupport._ -import scala.concurrent.{ExecutionContext, Future, Promise} -import better.files._ -import com.typesafe.scalalogging.Logger -import org.slf4j.LoggerFactory -import spray.httpx.unmarshalling._ import scala.concurrent.duration._ +import scala.concurrent.{ExecutionContext, Future, Promise} import scala.util.{Failure, Success, Try} object SparkClusterProcess { @@ -36,12 +37,6 @@ object SparkClusterProcess { } -trait SparkClusterRestClient { - def sendAndReceive: SendReceive - - def makeHttpRequest(httpRequest: HttpRequest): Future[HttpResponse] -} - trait SparkClusterProcessMonitor { def startMonitoringSparkClusterJob(jobPath: Path, jsonFile: String): Future[TerminalStatus] @@ -57,20 +52,18 @@ trait SparkClusterJobParser { } class SparkClusterProcess(implicit system: ActorSystem) extends SparkProcess - with SparkClusterRestClient with SparkClusterJobParser with SparkClusterProcessMonitor { + with SparkClusterJobParser with SparkClusterProcessMonitor { import SparkClusterProcess._ - import spray.httpx.SprayJsonSupport._ import SparkClusterJsonProtocol._ implicit lazy val ec: ExecutionContext = system.dispatcher + implicit val materializer = ActorMaterializer() lazy val completionPromise = Promise[TerminalStatus]() lazy val monitorPromise = Promise[Unit]() val tag = this.getClass.getSimpleName lazy val logger = Logger(LoggerFactory.getLogger(getClass.getName)) - override def sendAndReceive: SendReceive = sendReceive - override def startMonitoringSparkClusterJob(jobPath: Path, jsonFile: String): Future[TerminalStatus] = { Future(parseJsonForSubmissionIdAndStatus(jobPath.resolve(jsonFile))) onComplete { case Success(resp: SparkJobSubmissionResponse) => @@ -119,7 +112,9 @@ class SparkClusterProcess(implicit system: ActorSystem) extends SparkProcess override def completeMonitoringProcess(rcPath: Path, status: String, promise: Promise[Unit]) = { File(rcPath) write status - promise success Unit + val unitValue = () + promise success unitValue + () } def pollForJobStatus(subId: String): Future[SparkDriverStateQueryResponse] = { @@ -130,15 +125,13 @@ class SparkClusterProcess(implicit system: ActorSystem) extends SparkProcess } val request = sparkClusterMasterHostName match { - case Some(master) => - Get(s"http://$master:6066/v1/submissions/status/$subId") - case None => - Get(s"http://spark-master:6066/v1/submissions/status/$subId") + case Some(master) => HttpRequest(uri = s"http://$master:6066/v1/submissions/status/$subId") + case None => HttpRequest(uri = s"http://spark-master:6066/v1/submissions/status/$subId") } makeHttpRequest(request) flatMap { v => v.status match { - case StatusCodes.OK => Future(v ~> unmarshal[SparkDriverStateQueryResponse]) + case StatusCodes.OK => Unmarshal(v).to[SparkDriverStateQueryResponse] case _ => val msg = s"Unexpected response received in response from Spark rest api. Response: $v" logger.error("{} reason: {}", tag, msg) @@ -163,8 +156,8 @@ class SparkClusterProcess(implicit system: ActorSystem) extends SparkProcess JsonParser(line).convertTo[SparkJobSubmissionResponse] } - override def makeHttpRequest(httpRequest: HttpRequest): Future[HttpResponse] = { + def makeHttpRequest(httpRequest: HttpRequest): Future[HttpResponse] = { val headers = httpRequest.headers - sendAndReceive(httpRequest.withHeaders(headers)) + Http().singleRequest(httpRequest.withHeaders(headers)) } } diff --git a/supportedBackends/spark/src/main/scala/cromwell/backend/impl/spark/SparkInitializationActor.scala b/supportedBackends/spark/src/main/scala/cromwell/backend/impl/spark/SparkInitializationActor.scala index c03a2975d..d2f47c3e6 100644 --- a/supportedBackends/spark/src/main/scala/cromwell/backend/impl/spark/SparkInitializationActor.scala +++ b/supportedBackends/spark/src/main/scala/cromwell/backend/impl/spark/SparkInitializationActor.scala @@ -1,14 +1,15 @@ package cromwell.backend.impl.spark import akka.actor.{ActorRef, Props} +import cromwell.backend.impl.spark.SparkInitializationActor._ import cromwell.backend.validation.RuntimeAttributesDefault import cromwell.backend.validation.RuntimeAttributesKeys._ -import cromwell.backend.impl.spark.SparkInitializationActor._ import cromwell.backend.{BackendConfigurationDescriptor, BackendInitializationData, BackendWorkflowDescriptor, BackendWorkflowInitializationActor} import cromwell.core.WorkflowOptions -import wdl4s.types.{WdlBooleanType, WdlIntegerType, WdlStringType} -import wdl4s.values.WdlValue -import wdl4s.{Call, WdlExpression} +import cromwell.core.Dispatcher.BackendDispatcher +import wdl4s.wdl.WdlTaskCall +import wdl4s.wdl.types.{WdlBooleanType, WdlIntegerType, WdlStringType} +import wdl4s.wdl.values.WdlValue import scala.concurrent.Future import scala.util.Try @@ -18,14 +19,14 @@ object SparkInitializationActor { SparkRuntimeAttributes.NumberOfExecutorsKey, SparkRuntimeAttributes.AppMainClassKey) def props(workflowDescriptor: BackendWorkflowDescriptor, - calls: Seq[Call], + calls: Set[WdlTaskCall], configurationDescriptor: BackendConfigurationDescriptor, serviceRegistryActor: ActorRef): Props = - Props(new SparkInitializationActor(workflowDescriptor, calls, configurationDescriptor, serviceRegistryActor)) + Props(new SparkInitializationActor(workflowDescriptor, calls, configurationDescriptor, serviceRegistryActor)).withDispatcher(BackendDispatcher) } class SparkInitializationActor(override val workflowDescriptor: BackendWorkflowDescriptor, - override val calls: Seq[Call], + override val calls: Set[WdlTaskCall], override val configurationDescriptor: BackendConfigurationDescriptor, override val serviceRegistryActor: ActorRef) extends BackendWorkflowInitializationActor { diff --git a/supportedBackends/spark/src/main/scala/cromwell/backend/impl/spark/SparkJobExecutionActor.scala b/supportedBackends/spark/src/main/scala/cromwell/backend/impl/spark/SparkJobExecutionActor.scala index 3d34dda47..4b0f9700b 100644 --- a/supportedBackends/spark/src/main/scala/cromwell/backend/impl/spark/SparkJobExecutionActor.scala +++ b/supportedBackends/spark/src/main/scala/cromwell/backend/impl/spark/SparkJobExecutionActor.scala @@ -1,25 +1,26 @@ package cromwell.backend.impl.spark -import java.nio.file.FileSystems import java.nio.file.attribute.PosixFilePermission import akka.actor.Props -import cromwell.backend.BackendJobExecutionActor.{BackendJobExecutionResponse, FailedNonRetryableResponse, SucceededResponse} +import cromwell.backend.BackendJobExecutionActor.{BackendJobExecutionResponse, JobFailedNonRetryableResponse, JobSucceededResponse} import cromwell.backend.impl.spark.SparkClusterProcess._ -import cromwell.backend.io.JobPaths +import cromwell.backend.io.JobPathsWithDocker import cromwell.backend.sfs.{SharedFileSystem, SharedFileSystemExpressionFunctions} +import cromwell.backend.wdl.Command import cromwell.backend.{BackendConfigurationDescriptor, BackendJobDescriptor, BackendJobExecutionActor} -import cromwell.core.{TailedWriter, UntailedWriter} +import cromwell.core.path.JavaWriterImplicits._ +import cromwell.core.path.Obsolete._ +import cromwell.core.path.{DefaultPathBuilder, TailedWriter, UntailedWriter} +import lenthall.util.TryUtil import wdl4s.parser.MemoryUnit -import wdl4s.util.TryUtil import scala.concurrent.{Future, Promise} import scala.sys.process.ProcessLogger import scala.util.{Failure, Success, Try} -import scala.language.postfixOps object SparkJobExecutionActor { - val DefaultFileSystems = List(FileSystems.getDefault) + val DefaultPathBuilders = List(DefaultPathBuilder) def props(jobDescriptor: BackendJobDescriptor, configurationDescriptor: BackendConfigurationDescriptor): Props = Props(new SparkJobExecutionActor(jobDescriptor, configurationDescriptor)) @@ -29,9 +30,8 @@ class SparkJobExecutionActor(override val jobDescriptor: BackendJobDescriptor, override val configurationDescriptor: BackendConfigurationDescriptor) extends BackendJobExecutionActor with SharedFileSystem { import SparkJobExecutionActor._ - import better.files._ - import cromwell.core.PathFactory._ + override val pathBuilders = DefaultPathBuilders private val tag = s"SparkJobExecutionActor-${jobDescriptor.key.tag}:" lazy val cmds = new SparkCommands @@ -44,7 +44,7 @@ class SparkJobExecutionActor(override val jobDescriptor: BackendJobDescriptor, private val sparkDeployMode = configurationDescriptor.backendConfig.getString("deployMode").toLowerCase override val sharedFileSystemConfig = fileSystemsConfig.getConfig("local") private val workflowDescriptor = jobDescriptor.workflowDescriptor - private val jobPaths = new JobPaths(workflowDescriptor, configurationDescriptor.backendConfig, jobDescriptor.key) + private val jobPaths = JobPathsWithDocker(jobDescriptor.key, workflowDescriptor, configurationDescriptor.backendConfig) // Files private val executionDir = jobPaths.callExecutionRoot @@ -59,9 +59,9 @@ class SparkJobExecutionActor(override val jobDescriptor: BackendJobDescriptor, private lazy val isClusterMode = isSparkClusterMode(sparkDeployMode, sparkMaster) private val call = jobDescriptor.key.call - private val callEngineFunction = SharedFileSystemExpressionFunctions(jobPaths, DefaultFileSystems) + private val callEngineFunction = SharedFileSystemExpressionFunctions(jobPaths, DefaultPathBuilders) - private val lookup = jobDescriptor.inputs.apply _ + private val lookup = jobDescriptor.fullyQualifiedInputs.apply _ private val executionResponse = Promise[BackendJobExecutionResponse]() @@ -108,12 +108,12 @@ class SparkJobExecutionActor(override val jobDescriptor: BackendJobDescriptor, private def resolveExecutionResult(jobReturnCode: Try[Int], failedOnStderr: Boolean): Future[BackendJobExecutionResponse] = { (jobReturnCode, failedOnStderr) match { case (Success(0), true) if File(jobPaths.stderr).lines.toList.nonEmpty => - Future.successful(FailedNonRetryableResponse(jobDescriptor.key, + Future.successful(JobFailedNonRetryableResponse(jobDescriptor.key, new IllegalStateException(s"Execution process failed although return code is zero but stderr is not empty"), Option(0))) case (Success(0), _) => resolveExecutionProcess - case (Success(rc), _) => Future.successful(FailedNonRetryableResponse(jobDescriptor.key, + case (Success(rc), _) => Future.successful(JobFailedNonRetryableResponse(jobDescriptor.key, new IllegalStateException(s"Execution process failed. Spark returned non zero status code: $rc"), Option(rc))) - case (Failure(error), _) => Future.successful(FailedNonRetryableResponse(jobDescriptor.key, error, None)) + case (Failure(error), _) => Future.successful(JobFailedNonRetryableResponse(jobDescriptor.key, error, None)) } } @@ -123,9 +123,9 @@ class SparkJobExecutionActor(override val jobDescriptor: BackendJobDescriptor, case true => clusterExtProcess.startMonitoringSparkClusterJob(jobPaths.callExecutionRoot, SubmitJobJson.format(sparkDeployMode)) collect { case Finished => processSuccess(0) - case Failed(error: Throwable) => FailedNonRetryableResponse(jobDescriptor.key, error, None) + case Failed(error: Throwable) => JobFailedNonRetryableResponse(jobDescriptor.key, error, None) } recover { - case error: Throwable => FailedNonRetryableResponse(jobDescriptor.key, error, None) + case error: Throwable => JobFailedNonRetryableResponse(jobDescriptor.key, error, None) } case false => Future.successful(processSuccess(0)) } @@ -133,19 +133,21 @@ class SparkJobExecutionActor(override val jobDescriptor: BackendJobDescriptor, private def processSuccess(rc: Int) = { evaluateOutputs(callEngineFunction, outputMapper(jobPaths)) match { - case Success(outputs) => SucceededResponse(jobDescriptor.key, Some(rc), outputs, None, Seq.empty) + case Success(outputs) => JobSucceededResponse(jobDescriptor.key, Some(rc), outputs, None, Seq.empty, dockerImageUsed = None) case Failure(e) => val message = Option(e.getMessage) map { ": " + _ } getOrElse "" - FailedNonRetryableResponse(jobDescriptor.key, new Throwable("Failed post processing of outputs" + message, e), Option(rc)) + JobFailedNonRetryableResponse(jobDescriptor.key, new Throwable("Failed post processing of outputs" + message, e), Option(rc)) } } /** * Abort a running job. */ - override def abort(): Unit = Future.failed(new UnsupportedOperationException("SparkBackend currently doesn't support aborting jobs.")) + // -Ywarn-value-discard + // override def abort(): Unit = Future.failed(new UnsupportedOperationException("SparkBackend currently doesn't support aborting jobs.")) + override def abort(): Unit = throw new UnsupportedOperationException("SparkBackend currently doesn't support aborting jobs.") private def createExecutionFolderAndScript(): Unit = { @@ -154,9 +156,12 @@ class SparkJobExecutionActor(override val jobDescriptor: BackendJobDescriptor, executionDir.toString.toFile.createIfNotExists(asDirectory = true, createParents = true) log.debug("{} Resolving job command", tag) - val command = localizeInputs(jobPaths.callInputsRoot, docker = false, DefaultFileSystems, jobDescriptor.inputs) flatMap { - localizedInputs => call.task.instantiateCommand(localizedInputs, callEngineFunction, identity) - } + + val command = Command.instantiate( + jobDescriptor, + callEngineFunction, + localizeInputs(jobPaths.callInputsRoot, docker = false) + ) log.debug("{} Creating bash script for executing command: {}", tag, command) // TODO: we should use shapeless Heterogeneous list here not good to have generic map @@ -178,11 +183,14 @@ class SparkJobExecutionActor(override val jobDescriptor: BackendJobDescriptor, cmds.writeScript(sparkCommand, scriptPath, executionDir) File(scriptPath).addPermission(PosixFilePermission.OWNER_EXECUTE) + () } catch { case ex: Exception => log.error(ex, "Failed to prepare task: " + ex.getMessage) - executionResponse success FailedNonRetryableResponse(jobDescriptor.key, ex, None) + // -Ywarn-value-discard + // executionResponse success FailedNonRetryableResponse(jobDescriptor.key, ex, None) + () } } @@ -198,7 +206,7 @@ class SparkJobExecutionActor(override val jobDescriptor: BackendJobDescriptor, case false => executionResponse completeWith executeTask(extProcess, stdoutWriter, stderrWriter) } } recover { - case exception => executionResponse success FailedNonRetryableResponse(jobDescriptor.key, exception, None) + case exception => executionResponse success JobFailedNonRetryableResponse(jobDescriptor.key, exception, None) } } diff --git a/supportedBackends/spark/src/main/scala/cromwell/backend/impl/spark/SparkProcess.scala b/supportedBackends/spark/src/main/scala/cromwell/backend/impl/spark/SparkProcess.scala index d04041f20..a3576ea83 100644 --- a/supportedBackends/spark/src/main/scala/cromwell/backend/impl/spark/SparkProcess.scala +++ b/supportedBackends/spark/src/main/scala/cromwell/backend/impl/spark/SparkProcess.scala @@ -1,14 +1,10 @@ package cromwell.backend.impl.spark -import java.nio.file.Path - import com.typesafe.scalalogging.StrictLogging -import cromwell.core.{TailedWriter, UntailedWriter} -import cromwell.core.PathFactory.EnhancedPath +import cromwell.core.path.Obsolete._ +import cromwell.core.path.{Path, TailedWriter, UntailedWriter} import scala.sys.process._ -import better.files._ -import scala.language.postfixOps import scala.util.{Failure, Success, Try} object SparkCommands { @@ -30,18 +26,23 @@ class SparkCommands extends StrictLogging { * as some extra shell code for monitoring jobs */ def writeScript(instantiatedCommand: String, filePath: Path, containerRoot: Path) = { - File(filePath).write( - s"""#!/bin/sh - |cd $containerRoot - |$instantiatedCommand - |echo $$? > rc - |""".stripMargin) + + val scriptBody = + s""" + +#!/bin/sh +cd $containerRoot +$instantiatedCommand +echo $$? > rc + + """.trim + "\n" + File(filePath).write(scriptBody) } def sparkSubmitCommand(attributes: Map[String, Any]): String = { val sparkHome = Try(sys.env("SPARK_HOME")) match { case Success(s) => Option(s) - case Failure(ex) => + case Failure(_) => logger.warn(s"Spark home does not exist picking up default command") None } diff --git a/supportedBackends/spark/src/main/scala/cromwell/backend/impl/spark/SparkRuntimeAttributes.scala b/supportedBackends/spark/src/main/scala/cromwell/backend/impl/spark/SparkRuntimeAttributes.scala index 1a951f4b7..073069c52 100644 --- a/supportedBackends/spark/src/main/scala/cromwell/backend/impl/spark/SparkRuntimeAttributes.scala +++ b/supportedBackends/spark/src/main/scala/cromwell/backend/impl/spark/SparkRuntimeAttributes.scala @@ -1,16 +1,18 @@ package cromwell.backend.impl.spark +import cats.data.Validated.{Invalid, Valid} +import cats.syntax.cartesian._ +import cats.syntax.validated._ import cromwell.backend.MemorySize import cromwell.backend.validation.RuntimeAttributesDefault._ import cromwell.backend.validation.RuntimeAttributesKeys._ import cromwell.backend.validation.RuntimeAttributesValidation._ import cromwell.core._ +import lenthall.validation.ErrorOr._ import lenthall.exception.MessageAggregation -import wdl4s.types.{WdlBooleanType, WdlIntegerType, WdlStringType, WdlType} -import wdl4s.values.{WdlBoolean, WdlInteger, WdlString, WdlValue} +import wdl4s.wdl.types.{WdlBooleanType, WdlIntegerType, WdlStringType, WdlType} +import wdl4s.wdl.values.{WdlBoolean, WdlInteger, WdlString, WdlValue} -import scalaz.Scalaz._ -import scalaz._ object SparkRuntimeAttributes { private val FailOnStderrDefaultValue = false @@ -46,33 +48,33 @@ object SparkRuntimeAttributes { val executorCores = validateCpu(withDefaultValues.get(ExecutorCoresKey), noValueFoundFor(ExecutorCoresKey)) val executorMemory = validateMemory(withDefaultValues.get(ExecutorMemoryKey), noValueFoundFor(ExecutorMemoryKey)) - val numberOfExecutors = validateNumberOfExecutors(withDefaultValues.get(NumberOfExecutorsKey), None.successNel) + val numberOfExecutors = validateNumberOfExecutors(withDefaultValues.get(NumberOfExecutorsKey), None.validNel) val appMainCLass = validateAppEntryPoint(withDefaultValues(AppMainClassKey)) - (executorCores |@| executorMemory |@| numberOfExecutors |@| appMainCLass |@| failOnStderr) { + (executorCores |@| executorMemory |@| numberOfExecutors |@| appMainCLass |@| failOnStderr) map { new SparkRuntimeAttributes(_, _, _, _, _) } match { - case Success(x) => x - case Failure(nel) => throw new RuntimeException with MessageAggregation { + case Valid(x) => x + case Invalid(nel) => throw new RuntimeException with MessageAggregation { override def exceptionContext: String = "Runtime attribute validation failed" - override def errorMessages: Traversable[String] = nel.list.toList + override def errorMessages: Traversable[String] = nel.toList } } } private def validateNumberOfExecutors(numOfExecutors: Option[WdlValue], onMissingKey: => ErrorOr[Option[Int]]): ErrorOr[Option[Int]] = { numOfExecutors match { - case Some(i: WdlInteger) => Option(i.value.intValue()).successNel + case Some(i: WdlInteger) => Option(i.value.intValue()).validNel case None => onMissingKey - case _ => s"Expecting $NumberOfExecutorsKey runtime attribute to be an Integer".failureNel + case _ => s"Expecting $NumberOfExecutorsKey runtime attribute to be an Integer".invalidNel } } private def validateAppEntryPoint(mainClass: WdlValue): ErrorOr[String] = { WdlStringType.coerceRawValue(mainClass) match { - case scala.util.Success(WdlString(s)) => s.successNel - case _ => s"Could not coerce $AppMainClassKey into a String".failureNel + case scala.util.Success(WdlString(s)) => s.validNel + case _ => s"Could not coerce $AppMainClassKey into a String".invalidNel } } } diff --git a/supportedBackends/spark/src/test/scala/cromwell/backend/impl/spark/SparkClusterProcessSpec.scala b/supportedBackends/spark/src/test/scala/cromwell/backend/impl/spark/SparkClusterProcessSpec.scala index c2b003a53..07301fad5 100644 --- a/supportedBackends/spark/src/test/scala/cromwell/backend/impl/spark/SparkClusterProcessSpec.scala +++ b/supportedBackends/spark/src/test/scala/cromwell/backend/impl/spark/SparkClusterProcessSpec.scala @@ -1,28 +1,23 @@ package cromwell.backend.impl.spark -import java.nio.file.Path - +import akka.http.scaladsl.model._ import akka.testkit.ImplicitSender -import better.files._ -import Cmds._ - -import scala.concurrent.{Future, Promise} +import cromwell.backend.impl.spark.SparkClusterProcess.SparkClusterJsonProtocol._ +import cromwell.backend.impl.spark.SparkClusterProcess._ import cromwell.core.TestKitSuite +import cromwell.core.path.Obsolete._ +import cromwell.core.path.Path import org.mockito.Matchers._ import org.mockito.Mockito import org.mockito.Mockito._ import org.scalatest.concurrent.PatienceConfiguration.Timeout +import org.scalatest.concurrent.ScalaFutures import org.scalatest.mockito.MockitoSugar import org.scalatest.{BeforeAndAfter, Matchers, WordSpecLike} import spray.json._ import scala.concurrent.duration._ -import cromwell.backend.impl.spark.SparkClusterProcess.{Failed, _} -import org.scalatest.concurrent.ScalaFutures -import spray.http._ -import SparkClusterJsonProtocol._ -import spray.httpx.unmarshalling._ -import spray.httpx.SprayJsonSupport._ +import scala.concurrent.{Future, Promise} class SparkClusterProcessSpec extends TestKitSuite("SparkClusterProcess") with WordSpecLike @@ -78,9 +73,9 @@ class SparkClusterProcessSpec extends TestKitSuite("SparkClusterProcess") private val mockRunningClusterResponse = SparkDriverStateQueryResponse(action = "SubmissionStatusResponse", driverState = "RUNNING", serverSparkVersion = "1.6.1", submissionId = "driver-20160803181054-0000", success = true, workerHostPort = "10.0.1.55:43834", workerId = "worker-20160801162431-10.0.1.55-43834") - private val mockSuccessHttpResponse = HttpResponse(StatusCodes.OK, HttpEntity(ContentTypes.`application/json`, mockSuccessClusterResponse.toJson.toString)) - private val mockRunningHttpResponse = HttpResponse(StatusCodes.OK, HttpEntity(ContentTypes.`application/json`, mockRunningClusterResponse.toJson.toString)) - private val mockFailedHttpResponse = HttpResponse(StatusCodes.OK, HttpEntity(ContentTypes.`application/json`, mockFailedClusterResponse.toJson.toString)) + private val mockSuccessHttpResponse = HttpResponse(StatusCodes.OK, entity = HttpEntity(ContentTypes.`application/json`, mockSuccessClusterResponse.toJson.toString)) + private val mockRunningHttpResponse = HttpResponse(StatusCodes.OK, entity = HttpEntity(ContentTypes.`application/json`, mockRunningClusterResponse.toJson.toString)) + private val mockFailedHttpResponse = HttpResponse(StatusCodes.OK, entity = HttpEntity(ContentTypes.`application/json`, mockFailedClusterResponse.toJson.toString)) private val mockBadHttpResponse = HttpResponse(StatusCodes.BadRequest) "SparkCluster process" should { diff --git a/supportedBackends/spark/src/test/scala/cromwell/backend/impl/spark/SparkInitializationActorSpec.scala b/supportedBackends/spark/src/test/scala/cromwell/backend/impl/spark/SparkInitializationActorSpec.scala index f8aba1a4b..1eac555bc 100644 --- a/supportedBackends/spark/src/test/scala/cromwell/backend/impl/spark/SparkInitializationActorSpec.scala +++ b/supportedBackends/spark/src/test/scala/cromwell/backend/impl/spark/SparkInitializationActorSpec.scala @@ -3,10 +3,11 @@ package cromwell.backend.impl.spark import akka.testkit.{EventFilter, ImplicitSender, TestDuration} import cromwell.backend.BackendSpec._ import cromwell.backend.BackendWorkflowInitializationActor.Initialize -import cromwell.backend.{BackendConfigurationDescriptor, BackendWorkflowDescriptor} +import cromwell.backend.{BackendConfigurationDescriptor, BackendWorkflowDescriptor, TestConfig} import cromwell.core.TestKitSuite import org.scalatest.{BeforeAndAfterAll, Matchers, WordSpecLike} -import wdl4s._ +import wdl4s.wdl._ + import scala.concurrent.duration._ class SparkInitializationActorSpec extends TestKitSuite("SparkInitializationActorSpec") @@ -27,12 +28,12 @@ class SparkInitializationActorSpec extends TestKitSuite("SparkInitializationAc | RUNTIME |} | - |workflow hello { + |workflow wf_hello { | call hello |} """.stripMargin - private def getSparkBackend(workflowDescriptor: BackendWorkflowDescriptor, calls: Seq[Call], conf: BackendConfigurationDescriptor) = { + private def getSparkBackend(workflowDescriptor: BackendWorkflowDescriptor, calls: Set[WdlTaskCall], conf: BackendConfigurationDescriptor) = { system.actorOf(SparkInitializationActor.props(workflowDescriptor, calls, conf, emptyActor)) } @@ -41,7 +42,7 @@ class SparkInitializationActorSpec extends TestKitSuite("SparkInitializationAc within(Timeout) { EventFilter.warning(message = s"Key/s [memory] is/are not supported by SparkBackend. Unsupported attributes will not be part of jobs executions.", occurrences = 1) intercept { val workflowDescriptor = buildWorkflowDescriptor(HelloWorld, runtime = """runtime { memory: 1 %s: "%s"}""".format("appMainClass", "test")) - val backend = getSparkBackend(workflowDescriptor, workflowDescriptor.workflowNamespace.workflow.calls, emptyBackendConfig) + val backend = getSparkBackend(workflowDescriptor, workflowDescriptor.workflow.taskCalls, TestConfig.emptyBackendConfigDescriptor) backend ! Initialize } } diff --git a/supportedBackends/spark/src/test/scala/cromwell/backend/impl/spark/SparkJobExecutionActorSpec.scala b/supportedBackends/spark/src/test/scala/cromwell/backend/impl/spark/SparkJobExecutionActorSpec.scala index fac5f389b..ac98705df 100644 --- a/supportedBackends/spark/src/test/scala/cromwell/backend/impl/spark/SparkJobExecutionActorSpec.scala +++ b/supportedBackends/spark/src/test/scala/cromwell/backend/impl/spark/SparkJobExecutionActorSpec.scala @@ -1,24 +1,23 @@ package cromwell.backend.impl.spark import java.io.Writer -import java.nio.file.Path import akka.testkit.{ImplicitSender, TestActorRef} -import better.files._ import com.typesafe.config.ConfigFactory -import cromwell.backend.BackendJobExecutionActor.{FailedNonRetryableResponse, SucceededResponse} +import cromwell.backend.BackendJobExecutionActor.{JobFailedNonRetryableResponse, JobSucceededResponse} import cromwell.backend.impl.spark.SparkClusterProcess._ import cromwell.backend.io._ import cromwell.backend.{BackendConfigurationDescriptor, BackendJobDescriptor, BackendSpec} -import cromwell.core.{PathWriter, TailedWriter, TestKitSuite, UntailedWriter, _} -import org.mockito.Matchers._ +import cromwell.core.path.Obsolete._ +import cromwell.core.path.{Path, PathWriter, TailedWriter, UntailedWriter} +import cromwell.core.{TestKitSuite, WorkflowOptions} import org.mockito.Mockito import org.mockito.Mockito._ import org.scalatest.concurrent.PatienceConfiguration.Timeout import org.scalatest.mockito.MockitoSugar import org.scalatest.{BeforeAndAfter, Matchers, WordSpecLike} -import wdl4s._ -import wdl4s.values.WdlValue +import wdl4s.wdl._ +import wdl4s.wdl.values.WdlValue import scala.concurrent.Future import scala.concurrent.duration._ @@ -50,7 +49,7 @@ class SparkJobExecutionActorSpec extends TestKitSuite("SparkJobExecutionActor") | RUNTIME |} | - |workflow hello { + |workflow wf_hello { | call hello |} """.stripMargin @@ -68,7 +67,7 @@ class SparkJobExecutionActorSpec extends TestKitSuite("SparkJobExecutionActor") | RUNTIME |} | - |workflow helloClusterMode { + |workflow wf_helloClusterMode { | call helloClusterMode |} """.stripMargin @@ -144,7 +143,10 @@ class SparkJobExecutionActorSpec extends TestKitSuite("SparkJobExecutionActor") Mockito.reset(sparkClusterProcess) } - override def afterAll(): Unit = system.terminate() + override def afterAll(): Unit = { + system.terminate() + () + } "executeTask method in cluster deploy mode " should { "return succeed response when the spark cluster process monitor method returns finished status" in { @@ -168,7 +170,7 @@ class SparkJobExecutionActorSpec extends TestKitSuite("SparkJobExecutionActor") when(sparkClusterProcess.startMonitoringSparkClusterJob(any[Path], any[String])).thenReturn(Future.successful(Finished)) whenReady(backend.execute, timeout) { response => - response shouldBe a[SucceededResponse] + response shouldBe a[JobSucceededResponse] verify(sparkClusterProcess, times(1)).externalProcess(any[Seq[String]], any[ProcessLogger]) verify(sparkClusterProcess, times(1)).tailedWriter(any[Int], any[Path]) verify(sparkClusterProcess, times(1)).untailedWriter(any[Path]) @@ -198,8 +200,8 @@ class SparkJobExecutionActorSpec extends TestKitSuite("SparkJobExecutionActor") when(sparkClusterProcess.startMonitoringSparkClusterJob(any[Path], any[String])).thenReturn(Future.successful(Failed(new Throwable("failed to monitor")))) whenReady(backend.execute, timeout) { response => - response shouldBe a[FailedNonRetryableResponse] - assert(response.asInstanceOf[FailedNonRetryableResponse].throwable.getMessage.contains("failed to monitor")) + response shouldBe a[JobFailedNonRetryableResponse] + assert(response.asInstanceOf[JobFailedNonRetryableResponse].throwable.getMessage.contains("failed to monitor")) verify(sparkClusterProcess, times(1)).externalProcess(any[Seq[String]], any[ProcessLogger]) verify(sparkClusterProcess, times(1)).tailedWriter(any[Int], any[Path]) verify(sparkClusterProcess, times(1)).untailedWriter(any[Path]) @@ -229,8 +231,8 @@ class SparkJobExecutionActorSpec extends TestKitSuite("SparkJobExecutionActor") when(sparkClusterProcess.startMonitoringSparkClusterJob(any[Path], any[String])).thenReturn(Future.failed(new IllegalStateException("failed to start monitoring process"))) whenReady(backend.execute, timeout) { response => - response shouldBe a[FailedNonRetryableResponse] - assert(response.asInstanceOf[FailedNonRetryableResponse].throwable.getMessage.contains("failed to start monitoring process")) + response shouldBe a[JobFailedNonRetryableResponse] + assert(response.asInstanceOf[JobFailedNonRetryableResponse].throwable.getMessage.contains("failed to start monitoring process")) verify(sparkClusterProcess, times(1)).externalProcess(any[Seq[String]], any[ProcessLogger]) verify(sparkClusterProcess, times(1)).tailedWriter(any[Int], any[Path]) verify(sparkClusterProcess, times(1)).untailedWriter(any[Path]) @@ -260,8 +262,8 @@ class SparkJobExecutionActorSpec extends TestKitSuite("SparkJobExecutionActor") when(sparkClusterProcess.processStderr).thenReturn(sampleSubmissionResponse) whenReady(backend.execute, timeout) { response => - response shouldBe a[FailedNonRetryableResponse] - assert(response.asInstanceOf[FailedNonRetryableResponse].throwable.getMessage.contains(s"Execution process failed although return code is zero but stderr is not empty")) + response shouldBe a[JobFailedNonRetryableResponse] + assert(response.asInstanceOf[JobFailedNonRetryableResponse].throwable.getMessage.contains(s"Execution process failed although return code is zero but stderr is not empty")) verify(sparkClusterProcess, times(1)).externalProcess(any[Seq[String]], any[ProcessLogger]) verify(sparkClusterProcess, times(1)).tailedWriter(any[Int], any[Path]) verify(sparkClusterProcess, times(1)).untailedWriter(any[Path]) @@ -289,8 +291,8 @@ class SparkJobExecutionActorSpec extends TestKitSuite("SparkJobExecutionActor") when(sparkClusterProcess.processStderr).thenReturn(stderrResult) whenReady(backend.execute, timeout) { response => - response shouldBe a[FailedNonRetryableResponse] - assert(response.asInstanceOf[FailedNonRetryableResponse].throwable.getMessage.contains(s"Execution process failed. Spark returned non zero status code:")) + response shouldBe a[JobFailedNonRetryableResponse] + assert(response.asInstanceOf[JobFailedNonRetryableResponse].throwable.getMessage.contains(s"Execution process failed. Spark returned non zero status code:")) } cleanUpJob(jobPaths) } @@ -315,8 +317,8 @@ class SparkJobExecutionActorSpec extends TestKitSuite("SparkJobExecutionActor") when(sparkClusterProcess.processStderr).thenReturn(stderrResult) whenReady(backend.execute, timeout) { response => - response shouldBe a[FailedNonRetryableResponse] - assert(response.asInstanceOf[FailedNonRetryableResponse].throwable.getMessage.contains(s"submit job process exitValue method failed")) + response shouldBe a[JobFailedNonRetryableResponse] + assert(response.asInstanceOf[JobFailedNonRetryableResponse].throwable.getMessage.contains(s"submit job process exitValue method failed")) } cleanUpJob(jobPaths) } @@ -344,7 +346,7 @@ class SparkJobExecutionActorSpec extends TestKitSuite("SparkJobExecutionActor") }).underlyingActor whenReady(backend.execute, timeout) { response => - response shouldBe a[SucceededResponse] + response shouldBe a[JobSucceededResponse] verify(sparkProcess, times(1)).externalProcess(any[Seq[String]], any[ProcessLogger]) verify(sparkProcess, times(1)).tailedWriter(any[Int], any[Path]) verify(sparkProcess, times(1)).untailedWriter(any[Path]) @@ -373,8 +375,8 @@ class SparkJobExecutionActorSpec extends TestKitSuite("SparkJobExecutionActor") when(sparkProcess.processStderr).thenReturn(stderrResult) whenReady(backend.execute, timeout) { response => - response shouldBe a[FailedNonRetryableResponse] - assert(response.asInstanceOf[FailedNonRetryableResponse].throwable.getMessage.contains(s"Execution process failed. Spark returned non zero status code:")) + response shouldBe a[JobFailedNonRetryableResponse] + assert(response.asInstanceOf[JobFailedNonRetryableResponse].throwable.getMessage.contains(s"Execution process failed. Spark returned non zero status code:")) } cleanUpJob(jobPaths) @@ -399,8 +401,8 @@ class SparkJobExecutionActorSpec extends TestKitSuite("SparkJobExecutionActor") when(sparkProcess.untailedWriter(any[Path])).thenReturn(stubUntailed) whenReady(backend.execute, timeout) { response => - response shouldBe a[FailedNonRetryableResponse] - assert(response.asInstanceOf[FailedNonRetryableResponse].throwable.getMessage.contains(s"Execution process failed although return code is zero but stderr is not empty")) + response shouldBe a[JobFailedNonRetryableResponse] + assert(response.asInstanceOf[JobFailedNonRetryableResponse].throwable.getMessage.contains(s"Execution process failed although return code is zero but stderr is not empty")) } cleanUpJob(jobPaths) @@ -424,7 +426,7 @@ class SparkJobExecutionActorSpec extends TestKitSuite("SparkJobExecutionActor") when(sparkProcess.untailedWriter(any[Path])).thenReturn(stubUntailed) whenReady(backend.execute, timeout) { response => - response shouldBe a[SucceededResponse] + response shouldBe a[JobSucceededResponse] verify(sparkProcess, times(1)).externalProcess(any[Seq[String]], any[ProcessLogger]) verify(sparkProcess, times(1)).tailedWriter(any[Int], any[Path]) verify(sparkProcess, times(1)).untailedWriter(any[Path]) @@ -435,13 +437,16 @@ class SparkJobExecutionActorSpec extends TestKitSuite("SparkJobExecutionActor") } - private def cleanUpJob(jobPaths: JobPaths): Unit = File(jobPaths.workflowRoot).delete(true) + private def cleanUpJob(jobPaths: JobPathsWithDocker): Unit = { + File(jobPaths.workflowPaths.workflowRoot).delete(true) + () + } - private def prepareJob(wdlSource: WdlSource = helloWorldWdl, runtimeString: String = passOnStderr, inputFiles: Option[Map[String, WdlValue]] = None, isCluster: Boolean = false): TestJobDescriptor = { - val backendWorkflowDescriptor = buildWorkflowDescriptor(wdl = wdlSource, inputs = inputFiles.getOrElse(Map.empty), runtime = runtimeString) + private def prepareJob(workflowSource: WorkflowSource = helloWorldWdl, runtimeString: String = passOnStderr, inputFiles: Option[Map[String, WdlValue]] = None, isCluster: Boolean = false): TestJobDescriptor = { + val backendWorkflowDescriptor = buildWorkflowDescriptor(workflowSource = workflowSource, inputs = inputFiles.getOrElse(Map.empty), runtime = runtimeString) val backendConfigurationDescriptor = if (isCluster) BackendConfigurationDescriptor(backendClusterConfig, ConfigFactory.load) else BackendConfigurationDescriptor(backendClientConfig, ConfigFactory.load) val jobDesc = jobDescriptorFromSingleCallWorkflow(backendWorkflowDescriptor, inputFiles.getOrElse(Map.empty), WorkflowOptions.empty, Set.empty) - val jobPaths = if (isCluster) new JobPaths(backendWorkflowDescriptor, backendClusterConfig, jobDesc.key) else new JobPaths(backendWorkflowDescriptor, backendClientConfig, jobDesc.key) + val jobPaths = if (isCluster) JobPathsWithDocker(jobDesc.key, backendWorkflowDescriptor, backendClusterConfig) else JobPathsWithDocker(jobDesc.key, backendWorkflowDescriptor, backendClientConfig) val executionDir = jobPaths.callExecutionRoot val stdout = File(executionDir.toString, "stdout") stdout.createIfNotExists(asDirectory = false, createParents = true) @@ -450,7 +455,7 @@ class SparkJobExecutionActorSpec extends TestKitSuite("SparkJobExecutionActor") TestJobDescriptor(jobDesc, jobPaths, backendConfigurationDescriptor) } - private case class TestJobDescriptor(jobDescriptor: BackendJobDescriptor, jobPaths: JobPaths, backendConfigurationDescriptor: BackendConfigurationDescriptor) + private case class TestJobDescriptor(jobDescriptor: BackendJobDescriptor, jobPaths: JobPathsWithDocker, backendConfigurationDescriptor: BackendConfigurationDescriptor) trait MockWriter extends Writer { var closed = false diff --git a/supportedBackends/spark/src/test/scala/cromwell/backend/impl/spark/SparkRuntimeAttributesSpec.scala b/supportedBackends/spark/src/test/scala/cromwell/backend/impl/spark/SparkRuntimeAttributesSpec.scala index d166dca11..d570015c3 100644 --- a/supportedBackends/spark/src/test/scala/cromwell/backend/impl/spark/SparkRuntimeAttributesSpec.scala +++ b/supportedBackends/spark/src/test/scala/cromwell/backend/impl/spark/SparkRuntimeAttributesSpec.scala @@ -1,15 +1,16 @@ package cromwell.backend.impl.spark -import cromwell.backend.{MemorySize, BackendWorkflowDescriptor} import cromwell.backend.validation.RuntimeAttributesKeys._ +import cromwell.backend.{BackendWorkflowDescriptor, MemorySize} +import cromwell.core.labels.Labels import cromwell.core.{WorkflowId, WorkflowOptions} +import lenthall.util.TryUtil import org.scalatest.{Matchers, WordSpecLike} -import spray.json.{JsBoolean, JsNumber, JsObject, JsString, JsValue} -import wdl4s.WdlExpression._ -import wdl4s.expression.NoFunctions -import wdl4s.util.TryUtil -import wdl4s.{Call, WdlExpression, _} -import wdl4s.values.WdlValue +import spray.json.{JsBoolean, JsNumber, JsObject, JsValue} +import wdl4s.wdl.WdlExpression._ +import wdl4s.wdl.expression.NoFunctions +import wdl4s.wdl.values.WdlValue +import wdl4s.wdl._ class SparkRuntimeAttributesSpec extends WordSpecLike with Matchers { @@ -26,14 +27,14 @@ class SparkRuntimeAttributesSpec extends WordSpecLike with Matchers { | RUNTIME |} | - |workflow hello { + |workflow wf_hello { | call hello |} """.stripMargin val emptyWorkflowOptions = WorkflowOptions(JsObject(Map.empty[String, JsValue])) - val staticDefaults = SparkRuntimeAttributes(1, MemorySize.parse("1 GB").get, None, "com.test.spark" , false) + val staticDefaults = SparkRuntimeAttributes(1, MemorySize.parse("1 GB").get, None, "com.test.spark" , failOnStderr = false) def workflowOptionsWithDefaultRA(defaults: Map[String, JsValue]) = { WorkflowOptions(JsObject(Map( @@ -84,28 +85,28 @@ class SparkRuntimeAttributesSpec extends WordSpecLike with Matchers { } - private def buildWorkflowDescriptor(wdl: WdlSource, + private def buildWorkflowDescriptor(wdl: WorkflowSource, inputs: Map[String, WdlValue] = Map.empty, options: WorkflowOptions = WorkflowOptions(JsObject(Map.empty[String, JsValue])), runtime: String) = { - new BackendWorkflowDescriptor( + BackendWorkflowDescriptor( WorkflowId.randomId(), - NamespaceWithWorkflow.load(wdl.replaceAll("RUNTIME", runtime.format("appMainClass", "com.test.spark"))), + WdlNamespaceWithWorkflow.load(wdl.replaceAll("RUNTIME", runtime.format("appMainClass", "com.test.spark")), Seq.empty[ImportResolver]).get.workflow, inputs, - options + options, + Labels.empty ) } - private def createRuntimeAttributes(wdlSource: WdlSource, runtimeAttributes: String) = { - val workflowDescriptor = buildWorkflowDescriptor(wdlSource, runtime = runtimeAttributes) + private def createRuntimeAttributes(workflowSource: WorkflowSource, runtimeAttributes: String) = { + val workflowDescriptor = buildWorkflowDescriptor(workflowSource, runtime = runtimeAttributes) - def createLookup(call: Call): ScopedLookupFunction = { - val declarations = workflowDescriptor.workflowNamespace.workflow.declarations ++ call.task.declarations - val knownInputs = workflowDescriptor.inputs - WdlExpression.standardLookupFunction(knownInputs, declarations, NoFunctions) + def createLookup(call: WdlCall): ScopedLookupFunction = { + val knownInputs = workflowDescriptor.knownValues + call.lookupFunction(knownInputs, NoFunctions) } - workflowDescriptor.workflowNamespace.workflow.calls map { + workflowDescriptor.workflow.taskCalls map { call => val ra = call.task.runtimeAttributes.attrs mapValues { _.evaluate(createLookup(call), NoFunctions) } TryUtil.sequenceMap(ra, "Runtime attributes evaluation").get @@ -118,6 +119,7 @@ class SparkRuntimeAttributesSpec extends WordSpecLike with Matchers { } catch { case ex: RuntimeException => fail(s"Exception was not expected but received: ${ex.getMessage}") } + () } private def assertSparkRuntimeAttributesFailedCreation(runtimeAttributes: Map[String, WdlValue], exMsg: String): Unit = { @@ -127,5 +129,6 @@ class SparkRuntimeAttributesSpec extends WordSpecLike with Matchers { } catch { case ex: RuntimeException => assert(ex.getMessage.contains(exMsg)) } + () } } diff --git a/supportedBackends/tes/src/main/scala/cromwell/backend/impl/tes/TesAsyncBackendJobExecutionActor.scala b/supportedBackends/tes/src/main/scala/cromwell/backend/impl/tes/TesAsyncBackendJobExecutionActor.scala new file mode 100644 index 000000000..1e8b69c7e --- /dev/null +++ b/supportedBackends/tes/src/main/scala/cromwell/backend/impl/tes/TesAsyncBackendJobExecutionActor.scala @@ -0,0 +1,210 @@ +package cromwell.backend.impl.tes + +import java.nio.file.FileAlreadyExistsException + +import akka.http.scaladsl.Http +import akka.http.scaladsl.model._ +import cromwell.backend.BackendJobLifecycleActor +import cromwell.backend.async.{ExecutionHandle, FailedNonRetryableExecutionHandle, PendingExecutionHandle} +import cromwell.backend.impl.tes.TesResponseJsonFormatter._ +import cromwell.backend.standard.{StandardAsyncExecutionActor, StandardAsyncExecutionActorParams, StandardAsyncJob} +import cromwell.core.path.{DefaultPathBuilder, Path} +import cromwell.core.retry.SimpleExponentialBackoff +import wdl4s.wdl.expression.NoFunctions +import wdl4s.wdl.values.WdlFile +import akka.http.scaladsl.marshallers.sprayjson.SprayJsonSupport._ +import akka.http.scaladsl.marshalling.Marshal +import akka.http.scaladsl.unmarshalling.{Unmarshal, Unmarshaller} +import akka.stream.ActorMaterializer + +import scala.concurrent.duration._ +import scala.concurrent.Future +import scala.language.postfixOps +import scala.util.{Failure, Success} + +sealed trait TesRunStatus { + def isTerminal: Boolean +} + +case object Running extends TesRunStatus { + def isTerminal = false +} + +case object Complete extends TesRunStatus { + def isTerminal = true +} + +case object FailedOrError extends TesRunStatus { + def isTerminal = true +} + +object TesAsyncBackendJobExecutionActor { + val JobIdKey = "tes_job_id" +} + +class TesAsyncBackendJobExecutionActor(override val standardParams: StandardAsyncExecutionActorParams) + extends BackendJobLifecycleActor with StandardAsyncExecutionActor with TesJobCachingActorHelper { + implicit val actorSystem = context.system + implicit val materializer = ActorMaterializer() + + override type StandardAsyncRunInfo = Any + + override type StandardAsyncRunStatus = TesRunStatus + + override lazy val pollBackOff = SimpleExponentialBackoff( + initialInterval = 1 seconds, + maxInterval = 5 minutes, + multiplier = 1.1 + ) + + override lazy val executeOrRecoverBackOff = SimpleExponentialBackoff( + initialInterval = 3 seconds, + maxInterval = 30 seconds, + multiplier = 1.1 + ) + + private lazy val realDockerImageUsed: String = jobDescriptor.maybeCallCachingEligible.dockerHash.getOrElse(runtimeAttributes.dockerImage) + override lazy val dockerImageUsed: Option[String] = Option(realDockerImageUsed) + + private val tesEndpoint = workflowDescriptor.workflowOptions.getOrElse("endpoint", tesConfiguration.endpointURL) + + override lazy val jobTag: String = jobDescriptor.key.tag + + // Utility for converting a WdlValue so that the path is localized to the + // container's filesystem. + override def mapCommandLineWdlFile(wdlFile: WdlFile): WdlFile = { + val localPath = DefaultPathBuilder.get(wdlFile.valueString).toAbsolutePath + localPath match { + case p if p.startsWith(tesJobPaths.workflowPaths.DockerRoot) => + val containerPath = p.pathAsString + WdlFile(containerPath) + case p if p.startsWith(tesJobPaths.callExecutionRoot) => + val containerPath = tesJobPaths.containerExec(commandDirectory, localPath.getFileName.pathAsString) + WdlFile(containerPath) + case p => + val containerPath = tesJobPaths.containerInput(p.pathAsString) + WdlFile(containerPath) + } + } + + override lazy val commandDirectory: Path = { + runtimeAttributes.dockerWorkingDir match { + case Some(path) => DefaultPathBuilder.get(path) + case None => tesJobPaths.callExecutionDockerRoot + } + } + + def createTaskMessage(): Task = { + val task = TesTask(jobDescriptor, configurationDescriptor, jobLogger, tesJobPaths, + runtimeAttributes, commandDirectory, commandScriptContents, backendEngineFunctions, + realDockerImageUsed) + + Task( + None, + None, + Option(task.name), + Option(task.description), + Option(task.project), + Option(task.inputs(commandLineValueMapper)), + Option(task.outputs), + Option(task.resources), + task.executors, + None, + None, + None + ) + } + + override def executeAsync(): Future[ExecutionHandle] = { + // create call exec dir + tesJobPaths.callExecutionRoot.createPermissionedDirectories() + val taskMessage = createTaskMessage() + + for { + entity <- Marshal(taskMessage).to[RequestEntity] + ctr <- makeRequest[CreateTaskResponse](HttpRequest(method = HttpMethods.POST, uri = tesEndpoint, entity = entity)) + } yield PendingExecutionHandle(jobDescriptor, StandardAsyncJob(ctr.id), None, previousStatus = None) + } + + override def recoverAsync(jobId: StandardAsyncJob) = executeAsync() + + override def tryAbort(job: StandardAsyncJob): Unit = { + + val returnCodeTmp = jobPaths.returnCode.plusExt("kill") + returnCodeTmp.write(s"$SIGTERM\n") + try { + returnCodeTmp.moveTo(jobPaths.returnCode) + } catch { + case _: FileAlreadyExistsException => + // If the process has already completed, there will be an existing rc file. + returnCodeTmp.delete(true) + } + + makeRequest[CancelTaskResponse](HttpRequest(method = HttpMethods.POST, uri = s"$tesEndpoint/${job.jobId}:cancel")) onComplete { + case Success(_) => jobLogger.info("{} Aborted {}", tag: Any, job.jobId) + case Failure(ex) => jobLogger.warn("{} Failed to abort {}: {}", tag, job.jobId, ex.getMessage) + } + + () + } + + override def pollStatusAsync(handle: StandardAsyncPendingExecutionHandle): Future[TesRunStatus] = { + makeRequest[MinimalTaskView](HttpRequest(uri = s"$tesEndpoint/${handle.pendingJob.jobId}?view=MINIMAL")) map { + response => + val state = response.state + state match { + case s if s.contains("COMPLETE") => + jobLogger.info(s"Job ${handle.pendingJob.jobId} is complete") + Complete + + case s if s.contains("CANCELED") => + jobLogger.info(s"Job ${handle.pendingJob.jobId} was canceled") + FailedOrError + + case s if s.contains("ERROR") => + jobLogger.info(s"TES reported an error for Job ${handle.pendingJob.jobId}") + FailedOrError + + case _ => Running + } + } + } + + override def customPollStatusFailure: PartialFunction[(ExecutionHandle, Exception), ExecutionHandle] = { + case (oldHandle: StandardAsyncPendingExecutionHandle@unchecked, e: Exception) => + jobLogger.error(s"$tag TES Job ${oldHandle.pendingJob.jobId} has not been found, failing call") + FailedNonRetryableExecutionHandle(e) + } + + override def isTerminal(runStatus: TesRunStatus): Boolean = { + runStatus.isTerminal + } + + override def isSuccess(runStatus: TesRunStatus): Boolean = { + runStatus match { + case Complete => true + case _ => false + } + } + + private val outputWdlFiles: Seq[WdlFile] = jobDescriptor.call.task + .findOutputFiles(jobDescriptor.fullyQualifiedInputs, NoFunctions) + .filter(o => !DefaultPathBuilder.get(o.valueString).isAbsolute) + + override def mapOutputWdlFile(wdlFile: WdlFile): WdlFile = { + val absPath: Path = tesJobPaths.callExecutionRoot.resolve(wdlFile.valueString) + wdlFile match { + case fileNotFound if !absPath.exists && outputWdlFiles.contains(fileNotFound) => + throw new RuntimeException("Could not process output, file not found: " + + s"${absPath.pathAsString}") + case _ => WdlFile(absPath.pathAsString) + } + } + + private def makeRequest[A](request: HttpRequest)(implicit um: Unmarshaller[ResponseEntity, A]): Future[A] = { + for { + response <- Http().singleRequest(request) + data <- Unmarshal(response.entity).to[A] + } yield data + } +} diff --git a/supportedBackends/tes/src/main/scala/cromwell/backend/impl/tes/TesBackendInitializationData.scala b/supportedBackends/tes/src/main/scala/cromwell/backend/impl/tes/TesBackendInitializationData.scala new file mode 100644 index 000000000..132126f4a --- /dev/null +++ b/supportedBackends/tes/src/main/scala/cromwell/backend/impl/tes/TesBackendInitializationData.scala @@ -0,0 +1,11 @@ +package cromwell.backend.impl.tes + +import cromwell.backend.sfs.SharedFileSystemExpressionFunctions +import cromwell.backend.standard.{StandardInitializationData, StandardValidatedRuntimeAttributesBuilder} + +case class TesBackendInitializationData +( + override val workflowPaths: TesWorkflowPaths, + override val runtimeAttributesBuilder: StandardValidatedRuntimeAttributesBuilder, + tesConfiguration: TesConfiguration +) extends StandardInitializationData(workflowPaths, runtimeAttributesBuilder, classOf[SharedFileSystemExpressionFunctions]) diff --git a/supportedBackends/tes/src/main/scala/cromwell/backend/impl/tes/TesBackendLifecycleActorFactory.scala b/supportedBackends/tes/src/main/scala/cromwell/backend/impl/tes/TesBackendLifecycleActorFactory.scala new file mode 100644 index 000000000..d4b18a35b --- /dev/null +++ b/supportedBackends/tes/src/main/scala/cromwell/backend/impl/tes/TesBackendLifecycleActorFactory.scala @@ -0,0 +1,31 @@ +package cromwell.backend.impl.tes + +import akka.actor.ActorRef +import cromwell.backend._ +import cromwell.backend.standard._ +import cromwell.core.JobExecutionToken.JobExecutionTokenType +import net.ceedubs.ficus.Ficus._ +import wdl4s.wdl.WdlTaskCall + +case class TesBackendLifecycleActorFactory(name: String, configurationDescriptor: BackendConfigurationDescriptor) + extends StandardLifecycleActorFactory { + + override lazy val initializationActorClass: Class[_ <: StandardInitializationActor] = classOf[TesInitializationActor] + + override lazy val asyncExecutionActorClass: Class[_ <: StandardAsyncExecutionActor] = + classOf[TesAsyncBackendJobExecutionActor] + + override def jobIdKey: String = TesAsyncBackendJobExecutionActor.JobIdKey + + val tesConfiguration = new TesConfiguration(configurationDescriptor) + + override val jobExecutionTokenType: JobExecutionTokenType = { + val concurrentJobLimit = configurationDescriptor.backendConfig.as[Option[Int]]("concurrent-job-limit") + JobExecutionTokenType(name, concurrentJobLimit) + } + + override def workflowInitializationActorParams(workflowDescriptor: BackendWorkflowDescriptor, ioActor: ActorRef, calls: Set[WdlTaskCall], + serviceRegistryActor: ActorRef, restarting: Boolean): StandardInitializationActorParams = { + TesInitializationActorParams(workflowDescriptor, calls, tesConfiguration, serviceRegistryActor) + } +} diff --git a/supportedBackends/tes/src/main/scala/cromwell/backend/impl/tes/TesConfiguration.scala b/supportedBackends/tes/src/main/scala/cromwell/backend/impl/tes/TesConfiguration.scala new file mode 100644 index 000000000..9a499dde5 --- /dev/null +++ b/supportedBackends/tes/src/main/scala/cromwell/backend/impl/tes/TesConfiguration.scala @@ -0,0 +1,8 @@ +package cromwell.backend.impl.tes + +import cromwell.backend.BackendConfigurationDescriptor + +class TesConfiguration(val configurationDescriptor: BackendConfigurationDescriptor) { + val endpointURL = configurationDescriptor.backendConfig.getString("endpoint") + val runtimeConfig = configurationDescriptor.backendRuntimeConfig +} diff --git a/supportedBackends/tes/src/main/scala/cromwell/backend/impl/tes/TesInitializationActor.scala b/supportedBackends/tes/src/main/scala/cromwell/backend/impl/tes/TesInitializationActor.scala new file mode 100644 index 000000000..dc055c898 --- /dev/null +++ b/supportedBackends/tes/src/main/scala/cromwell/backend/impl/tes/TesInitializationActor.scala @@ -0,0 +1,69 @@ +package cromwell.backend.impl.tes + +import akka.actor.ActorRef +import cats.data.Validated.{Invalid, Valid} +import cats.instances.future._ +import cats.instances.list._ +import cats.syntax.traverse._ +import cromwell.backend.standard._ +import cromwell.backend.{BackendConfigurationDescriptor, BackendInitializationData, BackendWorkflowDescriptor} +import cromwell.core.path.{DefaultPathBuilder, PathBuilder} +import cromwell.filesystems.gcs.{GcsPathBuilderFactory, GoogleConfiguration} +import lenthall.exception.MessageAggregation +import net.ceedubs.ficus.Ficus._ +import wdl4s.wdl.WdlTaskCall + +import scala.concurrent.Future + +case class TesInitializationActorParams +( + workflowDescriptor: BackendWorkflowDescriptor, + calls: Set[WdlTaskCall], + tesConfiguration: TesConfiguration, + serviceRegistryActor: ActorRef +) extends StandardInitializationActorParams { + override val configurationDescriptor: BackendConfigurationDescriptor = tesConfiguration.configurationDescriptor +} + +class TesInitializationActor(params: TesInitializationActorParams) + extends StandardInitializationActor(params) { + + private val tesConfiguration = params.tesConfiguration + + private implicit val system = context.system + + /** + * If the backend sets a gcs authentication mode, try to create a PathBuilderFactory with it. + */ + lazy val gcsPathBuilderFactory: Option[GcsPathBuilderFactory] = { + configurationDescriptor.backendConfig.as[Option[String]]("filesystems.gcs.auth") map { configAuth => + val googleConfiguration = GoogleConfiguration(configurationDescriptor.globalConfig) + googleConfiguration.auth(configAuth) match { + case Valid(auth) => GcsPathBuilderFactory(auth, googleConfiguration.applicationName) + case Invalid(error) => throw new MessageAggregation { + override def exceptionContext: String = "Failed to parse gcs auth configuration" + + override def errorMessages: Traversable[String] = error.toList + } + } + } + } + + override lazy val pathBuilders: Future[List[PathBuilder]] = + gcsPathBuilderFactory.toList.traverse(_.withOptions(workflowDescriptor.workflowOptions)).map(_ ++ Option(DefaultPathBuilder)) + + override lazy val workflowPaths: Future[TesWorkflowPaths] = pathBuilders map { + new TesWorkflowPaths(workflowDescriptor, tesConfiguration.configurationDescriptor.backendConfig, _) + } + + override lazy val runtimeAttributesBuilder: StandardValidatedRuntimeAttributesBuilder = + TesRuntimeAttributes.runtimeAttributesBuilder(tesConfiguration.runtimeConfig) + + override def beforeAll(): Future[Option[BackendInitializationData]] = { + workflowPaths map { paths => + publishWorkflowRoot(paths.workflowRoot.toString) + paths.workflowRoot.createPermissionedDirectories() + Option(TesBackendInitializationData(paths, runtimeAttributesBuilder, tesConfiguration)) + } + } +} diff --git a/supportedBackends/tes/src/main/scala/cromwell/backend/impl/tes/TesJobCachingActorHelper.scala b/supportedBackends/tes/src/main/scala/cromwell/backend/impl/tes/TesJobCachingActorHelper.scala new file mode 100644 index 000000000..58828915f --- /dev/null +++ b/supportedBackends/tes/src/main/scala/cromwell/backend/impl/tes/TesJobCachingActorHelper.scala @@ -0,0 +1,22 @@ +package cromwell.backend.impl.tes + + +import akka.actor.Actor +import cromwell.backend.standard.StandardCachingActorHelper +import cromwell.core.logging.JobLogging + +trait TesJobCachingActorHelper extends StandardCachingActorHelper { + this: Actor with JobLogging => + + lazy val initializationData: TesBackendInitializationData = { + backendInitializationDataAs[TesBackendInitializationData] + } + + lazy val tesWorkflowPaths: TesWorkflowPaths = workflowPaths.asInstanceOf[TesWorkflowPaths] + + lazy val tesJobPaths: TesJobPaths = jobPaths.asInstanceOf[TesJobPaths] + + lazy val tesConfiguration: TesConfiguration = initializationData.tesConfiguration + + lazy val runtimeAttributes = TesRuntimeAttributes(validatedRuntimeAttributes, tesConfiguration.runtimeConfig) +} diff --git a/supportedBackends/tes/src/main/scala/cromwell/backend/impl/tes/TesJobPaths.scala b/supportedBackends/tes/src/main/scala/cromwell/backend/impl/tes/TesJobPaths.scala new file mode 100644 index 000000000..b270d0864 --- /dev/null +++ b/supportedBackends/tes/src/main/scala/cromwell/backend/impl/tes/TesJobPaths.scala @@ -0,0 +1,62 @@ +package cromwell.backend.impl.tes + +import com.typesafe.config.Config +import cromwell.backend.{BackendJobDescriptorKey, BackendWorkflowDescriptor} +import cromwell.backend.io.{JobPaths, WorkflowPaths} +import cromwell.core.path._ + +object TesJobPaths { + def apply(jobKey: BackendJobDescriptorKey, + workflowDescriptor: BackendWorkflowDescriptor, + config: Config, + pathBuilders: List[PathBuilder] = WorkflowPaths.DefaultPathBuilders) = { + val workflowPaths = TesWorkflowPaths(workflowDescriptor, config, pathBuilders) + new TesJobPaths(workflowPaths, jobKey) + } +} + +case class TesJobPaths private[tes] (override val workflowPaths: TesWorkflowPaths, + jobKey: BackendJobDescriptorKey) extends JobPaths { + + import JobPaths._ + + override lazy val callExecutionRoot = { + callRoot.resolve("execution") + } + val callDockerRoot = callPathBuilder(workflowPaths.dockerWorkflowRoot, jobKey) + val callExecutionDockerRoot = callDockerRoot.resolve("execution") + val callInputsDockerRoot = callDockerRoot.resolve("inputs") + val callInputsRoot = callRoot.resolve("inputs") + + // Given an output path, return a path localized to the storage file system + def storageOutput(path: String): String = { + callExecutionRoot.resolve(path).toString + } + + def containerInput(path: String): String = { + cleanContainerInputPath(callInputsDockerRoot, path) + } + + // Given an output path, return a path localized to the container file system + def containerOutput(cwd: Path, path: String): String = containerExec(cwd, path) + + // TODO this could be used to create a separate directory for outputs e.g. + // callDockerRoot.resolve("outputs").resolve(name).toString + + // Given an file name, return a path localized to the container's execution directory + def containerExec(cwd: Path, path: String): String = { + cwd.resolve(path).toString + } + + private def cleanContainerInputPath(inputDir: Path, path: String): String = { + path match { + case p if p.startsWith("gs:") => + inputDir.resolve(p.replaceFirst("gs:/?/?", "")).pathAsString + case p if p.startsWith(callExecutionRoot.pathAsString) => + val f = DefaultPathBuilder.get(p) + callExecutionDockerRoot.resolve(f.name).pathAsString + case p => + inputDir.resolve(p).pathAsString + } + } +} diff --git a/supportedBackends/tes/src/main/scala/cromwell/backend/impl/tes/TesResponseJsonFormatter.scala b/supportedBackends/tes/src/main/scala/cromwell/backend/impl/tes/TesResponseJsonFormatter.scala new file mode 100644 index 000000000..79a2be702 --- /dev/null +++ b/supportedBackends/tes/src/main/scala/cromwell/backend/impl/tes/TesResponseJsonFormatter.scala @@ -0,0 +1,21 @@ +package cromwell.backend.impl.tes + +import spray.json._ + +final case class CreateTaskResponse(id: String) +final case class MinimalTaskView(id: String, state: String) +final case class CancelTaskResponse() + +object TesResponseJsonFormatter extends DefaultJsonProtocol { + implicit val resourcesFormat = jsonFormat5(Resources) + implicit val taskParameterFormat = jsonFormat6(TaskParameter) + implicit val portsFormat = jsonFormat2(Ports) + implicit val executorFormat = jsonFormat8(Executor) + implicit val executorLogFormat = jsonFormat7(ExecutorLog) + implicit val outputFileLogFormat = jsonFormat3(OutputFileLog) + implicit val taskLogFormat = jsonFormat5(TaskLog) + implicit val taskFormat = jsonFormat12(Task) + implicit val minimalTaskView = jsonFormat2(MinimalTaskView) + implicit val createTaskResponseFormat = jsonFormat1(CreateTaskResponse) + implicit val cancelTaskResponseFormat = jsonFormat0(CancelTaskResponse) +} diff --git a/supportedBackends/tes/src/main/scala/cromwell/backend/impl/tes/TesRuntimeAttributes.scala b/supportedBackends/tes/src/main/scala/cromwell/backend/impl/tes/TesRuntimeAttributes.scala new file mode 100644 index 000000000..b6b09446a --- /dev/null +++ b/supportedBackends/tes/src/main/scala/cromwell/backend/impl/tes/TesRuntimeAttributes.scala @@ -0,0 +1,81 @@ +package cromwell.backend.impl.tes + +import cats.syntax.validated._ +import com.typesafe.config.Config +import cromwell.backend.MemorySize +import cromwell.backend.standard.StandardValidatedRuntimeAttributesBuilder +import cromwell.backend.validation._ +import lenthall.validation.ErrorOr.ErrorOr +import wdl4s.wdl.values.{WdlString, WdlValue} + +case class TesRuntimeAttributes(continueOnReturnCode: ContinueOnReturnCode, + dockerImage: String, + dockerWorkingDir: Option[String], + failOnStderr: Boolean, + cpu: Option[Int], + memory: Option[MemorySize], + disk: Option[MemorySize]) + +object TesRuntimeAttributes { + + val DockerWorkingDirKey = "dockerWorkingDir" + val DiskSizeKey = "disk" + + private def cpuValidation(runtimeConfig: Option[Config]): OptionalRuntimeAttributesValidation[Int] = CpuValidation.optional + + private def failOnStderrValidation(runtimeConfig: Option[Config]) = FailOnStderrValidation.default(runtimeConfig) + + private def continueOnReturnCodeValidation(runtimeConfig: Option[Config]) = ContinueOnReturnCodeValidation.default(runtimeConfig) + + private def diskSizeValidation(runtimeConfig: Option[Config]): OptionalRuntimeAttributesValidation[MemorySize] = MemoryValidation.optional(DiskSizeKey) + + private def memoryValidation(runtimeConfig: Option[Config]): OptionalRuntimeAttributesValidation[MemorySize] = MemoryValidation.optional(RuntimeAttributesKeys.MemoryKey) + + private val dockerValidation: RuntimeAttributesValidation[String] = DockerValidation.instance + + private val dockerWorkingDirValidation: OptionalRuntimeAttributesValidation[String] = DockerWorkingDirValidation.optional + + def runtimeAttributesBuilder(backendRuntimeConfig: Option[Config]): StandardValidatedRuntimeAttributesBuilder = + StandardValidatedRuntimeAttributesBuilder.default(backendRuntimeConfig).withValidation( + cpuValidation(backendRuntimeConfig), + memoryValidation(backendRuntimeConfig), + diskSizeValidation(backendRuntimeConfig), + dockerValidation, + dockerWorkingDirValidation + ) + + def apply(validatedRuntimeAttributes: ValidatedRuntimeAttributes, backendRuntimeConfig: Option[Config]): TesRuntimeAttributes = { + val docker: String = RuntimeAttributesValidation.extract(dockerValidation, validatedRuntimeAttributes) + val dockerWorkingDir: Option[String] = RuntimeAttributesValidation.extractOption(dockerWorkingDirValidation.key, validatedRuntimeAttributes) + val cpu: Option[Int] = RuntimeAttributesValidation.extractOption(cpuValidation(backendRuntimeConfig).key, validatedRuntimeAttributes) + val memory: Option[MemorySize] = RuntimeAttributesValidation.extractOption(memoryValidation(backendRuntimeConfig).key, validatedRuntimeAttributes) + val disk: Option[MemorySize] = RuntimeAttributesValidation.extractOption(diskSizeValidation(backendRuntimeConfig).key, validatedRuntimeAttributes) + val failOnStderr: Boolean = + RuntimeAttributesValidation.extract(failOnStderrValidation(backendRuntimeConfig), validatedRuntimeAttributes) + val continueOnReturnCode: ContinueOnReturnCode = + RuntimeAttributesValidation.extract(continueOnReturnCodeValidation(backendRuntimeConfig), validatedRuntimeAttributes) + + new TesRuntimeAttributes( + continueOnReturnCode, + docker, + dockerWorkingDir, + failOnStderr, + cpu, + memory, + disk + ) + } +} + +object DockerWorkingDirValidation { + lazy val instance: RuntimeAttributesValidation[String] = new DockerWorkingDirValidation + lazy val optional: OptionalRuntimeAttributesValidation[String] = instance.optional +} + +class DockerWorkingDirValidation extends StringRuntimeAttributesValidation(TesRuntimeAttributes.DockerWorkingDirKey) { + // NOTE: Docker's current test specs don't like WdlInteger, etc. auto converted to WdlString. + override protected def validateValue: PartialFunction[WdlValue, ErrorOr[String]] = { + case WdlString(value) => value.validNel + } +} + diff --git a/supportedBackends/tes/src/main/scala/cromwell/backend/impl/tes/TesTask.scala b/supportedBackends/tes/src/main/scala/cromwell/backend/impl/tes/TesTask.scala new file mode 100644 index 000000000..f106d0b3e --- /dev/null +++ b/supportedBackends/tes/src/main/scala/cromwell/backend/impl/tes/TesTask.scala @@ -0,0 +1,234 @@ +package cromwell.backend.impl.tes + +import cromwell.backend.standard.StandardExpressionFunctions +import cromwell.backend.{BackendConfigurationDescriptor, BackendJobDescriptor} +import cromwell.core.logging.JobLogger +import cromwell.core.path.{DefaultPathBuilder, Path} +import wdl4s.wdl.FullyQualifiedName +import wdl4s.wdl.expression.NoFunctions +import wdl4s.parser.MemoryUnit +import wdl4s.wdl.values.{WdlFile, WdlGlobFile, WdlSingleFile, WdlValue} + +final case class TesTask(jobDescriptor: BackendJobDescriptor, + configurationDescriptor: BackendConfigurationDescriptor, + jobLogger: JobLogger, + tesPaths: TesJobPaths, + runtimeAttributes: TesRuntimeAttributes, + containerWorkDir: Path, + commandScriptContents: String, + backendEngineFunctions: StandardExpressionFunctions, + dockerImageUsed: String) { + + private val workflowDescriptor = jobDescriptor.workflowDescriptor + private val workflowName = workflowDescriptor.workflow.unqualifiedName + private val fullyQualifiedTaskName = jobDescriptor.call.fullyQualifiedName + val name: String = fullyQualifiedTaskName + val description: String = jobDescriptor.toString + + // TODO validate "project" field of workflowOptions + val project = { + workflowDescriptor.workflowOptions.getOrElse("project", "") + } + + // contains the script to be executed + private val commandScript = TaskParameter( + Option("commandScript"), + Option(fullyQualifiedTaskName + ".commandScript"), + None, + tesPaths.callExecutionDockerRoot.resolve("script").toString, + Option("FILE"), + Option(commandScriptContents) + ) + + private val commandScriptOut = commandScript.copy( + url = Option(tesPaths.script.toString), + contents = None + ) + + private def writeFunctionFiles(commandLineValueMapper: WdlValue => WdlValue): Map[FullyQualifiedName, Seq[WdlFile]] = { + val commandLineMappedInputs = jobDescriptor.inputDeclarations map { + case (declaration, value) => declaration.fullyQualifiedName -> commandLineValueMapper(value) + } + + jobDescriptor + .call + .task + .evaluateFilesFromCommand(commandLineMappedInputs, backendEngineFunctions) + .map { + case (expression, file) => expression.toWdlString -> Seq(file) + } + } + + private val callInputFiles: Map[FullyQualifiedName, Seq[WdlFile]] = jobDescriptor + .fullyQualifiedInputs + .mapValues { + _.collectAsSeq { case w: WdlFile => w } + } + + def inputs(commandLineValueMapper: WdlValue => WdlValue): Seq[TaskParameter] = (callInputFiles ++ writeFunctionFiles(commandLineValueMapper)) + .flatMap { + case (fullyQualifiedName, files) => files.zipWithIndex.map { + case (f, index) => TaskParameter( + Option(fullyQualifiedName + "." + index), + Option(workflowName + "." + fullyQualifiedName + "." + index), + Option(f.value), + tesPaths.containerInput(f.value), + Option("FILE"), + None + ) + } + }.toList ++ Seq(commandScript) + + // TODO add TES logs to standard outputs + private val standardOutputs = Seq("rc", "stdout", "stderr").map { + f => + TaskParameter( + Option(f), + Option(fullyQualifiedTaskName + "." + f), + Option(tesPaths.storageOutput(f)), + tesPaths.containerOutput(containerWorkDir, f), + Option("FILE"), + None + ) + } + + // TODO extract output file variable names and match with Files below + // The problem is that we only care about the files CREATED, so stdout and input redirects are ignored and + // thus we can't directly match the names returned here to the files returned below. Also we have to consider Arrays + // + // private val outputFileNames = jobDescriptor.call.task.outputs + // .filter(o => o.wdlType.toWdlString == "Array[File]" || o.wdlType.toWdlString == "File") + // .map(_.unqualifiedName) + + // extract output files + // if output paths are absolute we will ignore them here and assume they are redirects + private val outputWdlFiles: Seq[WdlFile] = jobDescriptor.call.task + .findOutputFiles(jobDescriptor.fullyQualifiedInputs, NoFunctions) + .filter(o => !DefaultPathBuilder.get(o.valueString).isAbsolute) + + private val wdlOutputs = outputWdlFiles + .zipWithIndex + .flatMap { + case (f: WdlSingleFile, index) => + val outputFile = f.value + Seq( + TaskParameter( + Option(fullyQualifiedTaskName + ".output." + index), + Option(fullyQualifiedTaskName + ".output." + index), + Option(tesPaths.storageOutput(outputFile)), + tesPaths.containerOutput(containerWorkDir, outputFile), + Option("FILE"), + None + ) + ) + case (g: WdlGlobFile, index) => + val globName = backendEngineFunctions.globName(g.value) + val globDirName = "globDir." + index + val globDirectory = globName + "/" + val globListName = "globList." + index + val globListFile = globName + ".list" + Seq( + TaskParameter( + Option(globDirName), + Option(fullyQualifiedTaskName + "." + globDirName), + Option(tesPaths.storageOutput(globDirectory)), + tesPaths.containerOutput(containerWorkDir, globDirectory), + Option("DIRECTORY"), + None + ), + TaskParameter( + Option(globListName), + Option(fullyQualifiedTaskName + "." + globListName), + Option(tesPaths.storageOutput(globListFile)), + tesPaths.containerOutput(containerWorkDir, globListFile), + Option("FILE"), + None + ) + ) + } + + val outputs: Seq[TaskParameter] = wdlOutputs ++ standardOutputs ++ Seq(commandScriptOut) + + private val disk :: ram :: _ = Seq(runtimeAttributes.disk, runtimeAttributes.memory) map { + case Some(x) => + Option(x.to(MemoryUnit.GB).amount) + case None => + None + } + + val resources = Resources( + runtimeAttributes.cpu, + ram, + disk, + Option(false), + None + ) + + val executors = Seq(Executor( + dockerImageUsed, + Seq("/bin/bash", commandScript.path), + runtimeAttributes.dockerWorkingDir, + Option(tesPaths.containerOutput(containerWorkDir, "stdout")), + Option(tesPaths.containerOutput(containerWorkDir, "stderr")), + None, + None, + None + )) +} + +// Field requirements in classes below based off GA4GH schema +final case class Task(id: Option[String], + state: Option[String], + name: Option[String], + description: Option[String], + project: Option[String], + inputs: Option[Seq[TaskParameter]], + outputs: Option[Seq[TaskParameter]], + resources: Option[Resources], + executors: Seq[Executor], + volumes: Option[Seq[String]], + tags: Option[Map[String, String]], + logs: Option[Seq[TaskLog]]) + +final case class Executor(image_name: String, + cmd: Seq[String], + workdir: Option[String], + stdout: Option[String], + stderr: Option[String], + stdin: Option[String], + environ: Option[Map[String, String]], + ports: Option[Seq[Ports]]) + +final case class TaskParameter(name: Option[String], + description: Option[String], + url: Option[String], + path: String, + `type`: Option[String], + contents: Option[String]) + +final case class Resources(cpu_cores: Option[Int], + ram_gb: Option[Double], + size_gb: Option[Double], + preemptible: Option[Boolean], + zones: Option[Seq[String]]) + +final case class OutputFileLog(url: String, + path: String, + size_bytes: Int) + +final case class TaskLog(start_time: Option[String], + end_time: Option[String], + metadata: Option[Map[String, String]], + logs: Option[Seq[ExecutorLog]], + outputs: Option[Seq[OutputFileLog]]) + +final case class ExecutorLog(start_time: Option[String], + end_time: Option[String], + stdout: Option[String], + stderr: Option[String], + exit_code: Option[Int], + host_ip: Option[String], + ports: Option[Seq[Ports]]) + +final case class Ports(host: Option[String], + container: String) diff --git a/supportedBackends/tes/src/main/scala/cromwell/backend/impl/tes/TesWorkflowPaths.scala b/supportedBackends/tes/src/main/scala/cromwell/backend/impl/tes/TesWorkflowPaths.scala new file mode 100644 index 000000000..c85cee1a0 --- /dev/null +++ b/supportedBackends/tes/src/main/scala/cromwell/backend/impl/tes/TesWorkflowPaths.scala @@ -0,0 +1,26 @@ +package cromwell.backend.impl.tes + +import com.typesafe.config.Config +import cromwell.backend.io.WorkflowPaths +import cromwell.backend.{BackendJobDescriptorKey, BackendWorkflowDescriptor} +import cromwell.core.path.{PathBuilder, PathFactory} +import net.ceedubs.ficus.Ficus._ + +case class TesWorkflowPaths(override val workflowDescriptor: BackendWorkflowDescriptor, + override val config: Config, + override val pathBuilders: List[PathBuilder] = WorkflowPaths.DefaultPathBuilders) extends WorkflowPaths { + + val DockerRootString = config.as[Option[String]]("dockerRoot").getOrElse("/cromwell-executions") + var DockerRoot = PathFactory.buildPath(DockerRootString, pathBuilders) + if (!DockerRoot.isAbsolute) { + DockerRoot = PathFactory.buildPath("/".concat(DockerRootString), pathBuilders) + } + val dockerWorkflowRoot = workflowPathBuilder(DockerRoot) + + override def toJobPaths(workflowPaths: WorkflowPaths, + jobKey: BackendJobDescriptorKey): TesJobPaths = { + new TesJobPaths(workflowPaths.asInstanceOf[TesWorkflowPaths], jobKey) + } + + override protected def withDescriptor(workflowDescriptor: BackendWorkflowDescriptor): WorkflowPaths = this.copy(workflowDescriptor = workflowDescriptor) +} diff --git a/supportedBackends/tes/src/test/scala/cromwell/backend/impl/tes/TesInitializationActorSpec.scala b/supportedBackends/tes/src/test/scala/cromwell/backend/impl/tes/TesInitializationActorSpec.scala new file mode 100644 index 000000000..f54d2040c --- /dev/null +++ b/supportedBackends/tes/src/test/scala/cromwell/backend/impl/tes/TesInitializationActorSpec.scala @@ -0,0 +1,112 @@ +package cromwell.backend.impl.tes + +import java.util.UUID + +import akka.actor.Props +import akka.testkit.{EventFilter, ImplicitSender, TestDuration} +import com.typesafe.config.{Config, ConfigFactory} +import cromwell.backend.BackendSpec._ +import cromwell.backend.BackendWorkflowInitializationActor.{InitializationFailed, InitializationSuccess, Initialize} +import cromwell.backend.async.RuntimeAttributeValidationFailures +import cromwell.backend.{BackendConfigurationDescriptor, BackendWorkflowDescriptor} +import cromwell.core.TestKitSuite +import cromwell.core.logging.LoggingTest._ +import org.scalatest.{Matchers, WordSpecLike} +import wdl4s.wdl.WdlTaskCall + +import scala.concurrent.duration._ + +class TesInitializationActorSpec extends TestKitSuite("TesInitializationActorSpec") + with WordSpecLike with Matchers with ImplicitSender { + val Timeout = 10.second.dilated + + val HelloWorld = + s""" + |task hello { + | String addressee = "you" + | command { + | echo "Hello $${addressee}!" + | } + | output { + | String salutation = read_string(stdout()) + | } + | + | RUNTIME + |} + | + |workflow wf_hello { + | call hello + |} + """.stripMargin + + val globalConfig: Config = ConfigFactory.parseString("") + + val backendConfigTemplate: String = + """ + |// Base bucket for workflow executions + |root = "cromwell-executions" + |endpoint = "0.0.0.0" + | + |// Polling for completion backs-off gradually for slower-running jobs. + |// This is the maximum polling interval (in seconds): + |maximum-polling-interval = 600 + | + |default-runtime-attributes { + | cpu: 1 + | failOnStderr: false + | continueOnReturnCode: 0 + | memory: "2 GB" + | disk: "2 GB" + | # The keys below have been commented out as they are optional runtime attributes. + | # dockerWorkingDir + | # docker + |} + |""".stripMargin + + + private def getActorRef(workflowDescriptor: BackendWorkflowDescriptor, calls: Set[WdlTaskCall], + conf: BackendConfigurationDescriptor) = { + val params = TesInitializationActorParams(workflowDescriptor, calls, new TesConfiguration(conf), emptyActor) + val props = Props(new TesInitializationActor(params)) + system.actorOf(props, "TesInitializationActor" + UUID.randomUUID) + } + + val backendConfig: Config = ConfigFactory.parseString(backendConfigTemplate) + val conf = BackendConfigurationDescriptor(backendConfig, globalConfig) + + "TesInitializationActor" should { + "log a warning message when there are unsupported runtime attributes" in { + within(Timeout) { + val workflowDescriptor = buildWorkflowDescriptor(HelloWorld, + runtime = """runtime { docker: "ubuntu/latest" test: true }""") + val backend = getActorRef(workflowDescriptor, workflowDescriptor.workflow.taskCalls, conf) + val eventPattern = + "Key/s [test] is/are not supported by backend. Unsupported attributes will not be part of job executions." + EventFilter.warning(pattern = escapePattern(eventPattern), occurrences = 1) intercept { + backend ! Initialize + } + expectMsgPF() { + case InitializationSuccess(_) => // Docker entry is present. + case InitializationFailed(failure) => fail(s"InitializationSuccess was expected but got $failure") + } + } + } + + "return InitializationFailed when docker runtime attribute key is not present" in { + within(Timeout) { + val workflowDescriptor = buildWorkflowDescriptor(HelloWorld, runtime = """runtime { }""") + val backend = getActorRef(workflowDescriptor, workflowDescriptor.workflow.taskCalls, conf) + backend ! Initialize + expectMsgPF() { + case InitializationFailed(failure) => + failure match { + case exception: RuntimeAttributeValidationFailures => + if (!exception.getMessage.equals("Runtime validation failed:\nTask hello has an invalid runtime attribute docker = !! NOT FOUND !!")) + fail("Exception message is not equal to 'Runtime validation failed:\nTask hello has an invalid runtime attribute docker = !! NOT FOUND !!'.") + } + } + } + } + } +} + diff --git a/supportedBackends/tes/src/test/scala/cromwell/backend/impl/tes/TesJobPathsSpec.scala b/supportedBackends/tes/src/test/scala/cromwell/backend/impl/tes/TesJobPathsSpec.scala new file mode 100644 index 000000000..85fc95b89 --- /dev/null +++ b/supportedBackends/tes/src/test/scala/cromwell/backend/impl/tes/TesJobPathsSpec.scala @@ -0,0 +1,51 @@ +package cromwell.backend.impl.tes + +import better.files._ +import cromwell.backend.{BackendJobDescriptorKey, BackendSpec} +import org.scalatest.{FlatSpec, Matchers} +import wdl4s.wdl.WdlTaskCall + +class TesJobPathsSpec extends FlatSpec with Matchers with BackendSpec { + + "JobPaths" should "provide correct paths for a job" in { + + val wd = buildWorkflowDescriptor(TestWorkflows.HelloWorld) + val call: WdlTaskCall = wd.workflow.taskCalls.head + val jobKey = BackendJobDescriptorKey(call, None, 1) + val jobPaths = TesJobPaths(jobKey, wd, TesTestConfig.backendConfig) + val id = wd.id + jobPaths.callRoot.toString shouldBe + File(s"local-cromwell-executions/wf_hello/$id/call-hello").pathAsString + jobPaths.callExecutionRoot.toString shouldBe + File(s"local-cromwell-executions/wf_hello/$id/call-hello/execution").pathAsString + jobPaths.returnCode.toString shouldBe + File(s"local-cromwell-executions/wf_hello/$id/call-hello/execution/rc").pathAsString + jobPaths.script.toString shouldBe + File(s"local-cromwell-executions/wf_hello/$id/call-hello/execution/script").pathAsString + jobPaths.stderr.toString shouldBe + File(s"local-cromwell-executions/wf_hello/$id/call-hello/execution/stderr").pathAsString + jobPaths.stdout.toString shouldBe + File(s"local-cromwell-executions/wf_hello/$id/call-hello/execution/stdout").pathAsString + jobPaths.callExecutionRoot.toString shouldBe + File(s"local-cromwell-executions/wf_hello/$id/call-hello/execution").pathAsString + jobPaths.callDockerRoot.toString shouldBe + File(s"/cromwell-executions/wf_hello/$id/call-hello").pathAsString + jobPaths.callExecutionDockerRoot.toString shouldBe + File(s"/cromwell-executions/wf_hello/$id/call-hello/execution").pathAsString + + val jobKeySharded = BackendJobDescriptorKey(call, Option(0), 1) + val jobPathsSharded = TesJobPaths(jobKeySharded, wd, TesTestConfig.backendConfig) + jobPathsSharded.callExecutionRoot.toString shouldBe + File(s"local-cromwell-executions/wf_hello/$id/call-hello/shard-0/execution").pathAsString + + val jobKeyAttempt = BackendJobDescriptorKey(call, None, 2) + val jobPathsAttempt = TesJobPaths(jobKeyAttempt, wd, TesTestConfig.backendConfig) + jobPathsAttempt.callExecutionRoot.toString shouldBe + File(s"local-cromwell-executions/wf_hello/$id/call-hello/attempt-2/execution").pathAsString + + val jobKeyShardedAttempt = BackendJobDescriptorKey(call, Option(0), 2) + val jobPathsShardedAttempt = TesJobPaths(jobKeyShardedAttempt, wd, TesTestConfig.backendConfig) + jobPathsShardedAttempt.callExecutionRoot.toString shouldBe + File(s"local-cromwell-executions/wf_hello/$id/call-hello/shard-0/attempt-2/execution").pathAsString + } +} diff --git a/supportedBackends/tes/src/test/scala/cromwell/backend/impl/tes/TesRuntimeAttributesSpec.scala b/supportedBackends/tes/src/test/scala/cromwell/backend/impl/tes/TesRuntimeAttributesSpec.scala new file mode 100644 index 000000000..346739e5c --- /dev/null +++ b/supportedBackends/tes/src/test/scala/cromwell/backend/impl/tes/TesRuntimeAttributesSpec.scala @@ -0,0 +1,182 @@ +package cromwell.backend.impl.tes + +import cromwell.backend.validation.ContinueOnReturnCodeSet +import cromwell.backend.{BackendConfigurationDescriptor, MemorySize, RuntimeAttributeDefinition, TestConfig} +import cromwell.core.WorkflowOptions +import org.scalatest.{Matchers, WordSpecLike} +import org.slf4j.helpers.NOPLogger +import spray.json._ +import wdl4s.wdl.types.{WdlArrayType, WdlIntegerType, WdlStringType} +import wdl4s.wdl.values.{WdlArray, WdlBoolean, WdlInteger, WdlString, WdlValue} + +class TesRuntimeAttributesSpec extends WordSpecLike with Matchers { + + val expectedDefaults = new TesRuntimeAttributes( + ContinueOnReturnCodeSet(Set(0)), + "ubuntu:latest", + None, + false, + None, + None, + None + ) + + val expectedDefaultsPlusUbuntuDocker = expectedDefaults.copy(dockerImage = "ubuntu:latest") + + def workflowOptionsWithDefaultRA(defaults: Map[String, JsValue]) = { + WorkflowOptions(JsObject(Map( + "default_runtime_attributes" -> JsObject(defaults) + ))) + } + + "TesRuntimeAttributes" should { + + "throw an exception when there are no runtime attributes defined." in { + val runtimeAttributes = Map.empty[String, WdlValue] + assertFailure(runtimeAttributes, "Can't find an attribute value for key docker") + } + + "validate a valid Docker entry" in { + val runtimeAttributes = Map("docker" -> WdlString("ubuntu:latest")) + val expectedRuntimeAttributes = expectedDefaults.copy(dockerImage = "ubuntu:latest") + assertSuccess(runtimeAttributes, expectedRuntimeAttributes) + } + + "fail to validate an invalid Docker entry" in { + val runtimeAttributes = Map("docker" -> WdlInteger(1)) + assertFailure(runtimeAttributes, "Expecting docker runtime attribute to be a String") + } + + "validate a valid failOnStderr entry" in { + val runtimeAttributes = Map("docker" -> WdlString("ubuntu:latest"), "failOnStderr" -> WdlBoolean(true)) + val expectedRuntimeAttributes = expectedDefaultsPlusUbuntuDocker.copy(failOnStderr = true) + assertSuccess(runtimeAttributes, expectedRuntimeAttributes) + } + + "fail to validate an invalid failOnStderr entry" in { + val runtimeAttributes = Map("docker" -> WdlString("ubuntu:latest"), "failOnStderr" -> WdlString("yes")) + assertFailure(runtimeAttributes, "Expecting failOnStderr runtime attribute to be a Boolean or a String with values of 'true' or 'false'") + } + + "validate a valid continueOnReturnCode entry" in { + val runtimeAttributes = Map("docker" -> WdlString("ubuntu:latest"), "continueOnReturnCode" -> WdlInteger(1)) + val expectedRuntimeAttributes = expectedDefaultsPlusUbuntuDocker.copy(continueOnReturnCode = ContinueOnReturnCodeSet(Set(1))) + assertSuccess(runtimeAttributes, expectedRuntimeAttributes) + } + + "validate a valid continueOnReturnCode array entry" in { + val runtimeAttributes = Map("docker" -> WdlString("ubuntu:latest"), "continueOnReturnCode" -> WdlArray(WdlArrayType(WdlIntegerType), Array(WdlInteger(1), WdlInteger(2)))) + val expectedRuntimeAttributes = expectedDefaultsPlusUbuntuDocker.copy(continueOnReturnCode = ContinueOnReturnCodeSet(Set(1, 2))) + assertSuccess(runtimeAttributes, expectedRuntimeAttributes) + } + + "coerce then validate a valid continueOnReturnCode array entry" in { + val runtimeAttributes = Map("docker" -> WdlString("ubuntu:latest"), "continueOnReturnCode" -> WdlArray(WdlArrayType(WdlStringType), Array(WdlString("1"), WdlString("2")))) + val expectedRuntimeAttributes = expectedDefaultsPlusUbuntuDocker.copy(continueOnReturnCode = ContinueOnReturnCodeSet(Set(1, 2))) + assertSuccess(runtimeAttributes, expectedRuntimeAttributes) + } + + "fail to validate an invalid continueOnReturnCode entry" in { + val runtimeAttributes = Map("docker" -> WdlString("ubuntu:latest"), "continueOnReturnCode" -> WdlString("value")) + assertFailure(runtimeAttributes, "Expecting continueOnReturnCode runtime attribute to be either a Boolean, a String 'true' or 'false', or an Array[Int]") + } + + "validate a valid cpu entry" in assertSuccess( + Map("docker" -> WdlString("ubuntu:latest"), "cpu" -> WdlInteger(2)), + expectedDefaultsPlusUbuntuDocker.copy(cpu = Option(2)) + ) + + "validate a valid cpu string entry" in { + val runtimeAttributes = Map("docker" -> WdlString("ubuntu:latest"), "cpu" -> WdlString("2")) + val expectedRuntimeAttributes = expectedDefaultsPlusUbuntuDocker.copy(cpu = Option(2)) + assertSuccess(runtimeAttributes, expectedRuntimeAttributes) + } + + "fail to validate an invalid cpu entry" in { + val runtimeAttributes = Map("docker" -> WdlString("ubuntu:latest"), "cpu" -> WdlString("value")) + assertFailure(runtimeAttributes, "Expecting cpu runtime attribute to be an Integer") + } + + "validate a valid memory entry" in { + val runtimeAttributes = Map("docker" -> WdlString("ubuntu:latest"), "memory" -> WdlString("1 GB")) + val expectedRuntimeAttributes = expectedDefaults.copy(memory = Option(MemorySize.parse("1 GB").get)) + assertSuccess(runtimeAttributes, expectedRuntimeAttributes) + } + + "fail to validate an invalid memory entry" in { + val runtimeAttributes = Map("docker" -> WdlString("ubuntu:latest"), "memory" -> WdlString("blah")) + assertFailure(runtimeAttributes, "Expecting memory runtime attribute to be an Integer or String with format '8 GB'") + } + + "validate a valid disk entry" in { + val runtimeAttributes = Map("docker" -> WdlString("ubuntu:latest"), "disk" -> WdlString("1 GB")) + val expectedRuntimeAttributes = expectedDefaults.copy(disk = Option(MemorySize.parse("1 GB").get)) + assertSuccess(runtimeAttributes, expectedRuntimeAttributes) + } + + "fail to validate an invalid disk entry" in { + val runtimeAttributes = Map("docker" -> WdlString("ubuntu:latest"), "disk" -> WdlString("blah")) + assertFailure(runtimeAttributes, "Expecting disk runtime attribute to be an Integer or String with format '8 GB'") + } + + "validate a valid dockerWorkingDir entry" in { + val runtimeAttributes = Map("docker" -> WdlString("ubuntu:latest"), "dockerWorkingDir" -> WdlString("/tmp")) + val expectedRuntimeAttributes = expectedDefaults.copy(dockerWorkingDir = Option("/tmp")) + assertSuccess(runtimeAttributes, expectedRuntimeAttributes) + } + + "fail to validate an invalid dockerWorkingDir entry" in { + val runtimeAttributes = Map("docker" -> WdlString("ubuntu:latest"), "dockerWorkingDir" -> WdlInteger(1)) + assertFailure(runtimeAttributes, "Expecting dockerWorkingDir runtime attribute to be a String") + } + + "use reasonable default values" in assertSuccess( + Map("docker" -> WdlString("ubuntu:latest")), + expectedDefaultsPlusUbuntuDocker + ) + } + + private val mockConfigurationDescriptor = BackendConfigurationDescriptor(TesTestConfig.backendConfig, TestConfig.globalConfig) + private val mockTesConfiguration = new TesConfiguration(mockConfigurationDescriptor) + + private def assertSuccess(runtimeAttributes: Map[String, WdlValue], + expectedRuntimeAttributes: TesRuntimeAttributes, + workflowOptions: WorkflowOptions = emptyWorkflowOptions): Unit = { + + try { + val actualRuntimeAttributes = toTesRuntimeAttributes(runtimeAttributes, workflowOptions, mockTesConfiguration) + assert(actualRuntimeAttributes == expectedRuntimeAttributes) + } catch { + case ex: RuntimeException => fail(s"Exception was not expected but received: ${ex.getMessage}") + } + () + } + + private def assertFailure(runtimeAttributes: Map[String, WdlValue], + exMsg: String, + workflowOptions: WorkflowOptions = emptyWorkflowOptions): Unit = { + try { + toTesRuntimeAttributes(runtimeAttributes, workflowOptions, mockTesConfiguration) + fail("A RuntimeException was expected.") + } catch { + case ex: RuntimeException => assert(ex.getMessage.contains(exMsg)) + } + () + } + + private val emptyWorkflowOptions = WorkflowOptions.fromMap(Map.empty).get + private val staticRuntimeAttributeDefinitions: Set[RuntimeAttributeDefinition] = + TesRuntimeAttributes.runtimeAttributesBuilder(mockTesConfiguration.runtimeConfig).definitions.toSet + + + private def toTesRuntimeAttributes(runtimeAttributes: Map[String, WdlValue], + workflowOptions: WorkflowOptions, + tesConfiguration: TesConfiguration): TesRuntimeAttributes = { + val runtimeAttributesBuilder = TesRuntimeAttributes.runtimeAttributesBuilder(tesConfiguration.runtimeConfig) + val defaultedAttributes = RuntimeAttributeDefinition.addDefaultsToAttributes( + staticRuntimeAttributeDefinitions, workflowOptions)(runtimeAttributes) + val validatedRuntimeAttributes = runtimeAttributesBuilder.build(defaultedAttributes, NOPLogger.NOP_LOGGER) + TesRuntimeAttributes(validatedRuntimeAttributes, tesConfiguration.runtimeConfig + ) + } +} diff --git a/supportedBackends/tes/src/test/scala/cromwell/backend/impl/tes/TesTestConfig.scala b/supportedBackends/tes/src/test/scala/cromwell/backend/impl/tes/TesTestConfig.scala new file mode 100644 index 000000000..eb3085706 --- /dev/null +++ b/supportedBackends/tes/src/test/scala/cromwell/backend/impl/tes/TesTestConfig.scala @@ -0,0 +1,27 @@ +package cromwell.backend.impl.tes + +import com.typesafe.config.ConfigFactory + +object TesTestConfig { + + private val backendConfigString = + """ + |root = "local-cromwell-executions" + |dockerRoot = "/cromwell-executions" + |endpoint = "http://127.0.0.1:9000/v1/jobs" + | + |default-runtime-attributes { + | cpu: 1 + | failOnStderr: false + | continueOnReturnCode: 0 + | memory: "2 GB" + | disk: "2 GB" + | # The keys below have been commented out as they are optional runtime attributes. + | # dockerWorkingDir + | # docker + |} + |""".stripMargin + + val backendConfig = ConfigFactory.parseString(backendConfigString) +} + diff --git a/supportedBackends/tes/src/test/scala/cromwell/backend/impl/tes/TesWorkflowPathsSpec.scala b/supportedBackends/tes/src/test/scala/cromwell/backend/impl/tes/TesWorkflowPathsSpec.scala new file mode 100644 index 000000000..1fbefeec4 --- /dev/null +++ b/supportedBackends/tes/src/test/scala/cromwell/backend/impl/tes/TesWorkflowPathsSpec.scala @@ -0,0 +1,55 @@ +package cromwell.backend.impl.tes + +import better.files._ +import cromwell.backend.{BackendJobBreadCrumb, BackendSpec, BackendWorkflowDescriptor} +import cromwell.core.{JobKey, WorkflowId} +import org.scalatest.{FlatSpec, Matchers} +import wdl4s.wdl.{WdlCall, WdlWorkflow} + +class TesWorkflowPathsSpec extends FlatSpec with Matchers with BackendSpec { + + "WorkflowPaths" should "provide correct paths for a workflow" in { + val wd = buildWorkflowDescriptor(TestWorkflows.HelloWorld) + val workflowPaths = TesWorkflowPaths(wd, TesTestConfig.backendConfig) + val id = wd.id + workflowPaths.workflowRoot.toString shouldBe + File(s"local-cromwell-executions/wf_hello/$id").pathAsString + workflowPaths.dockerWorkflowRoot.toString shouldBe + s"/cromwell-executions/wf_hello/$id" + } + + "WorkflowPaths" should "provide correct paths for a sub workflow" in { + val rootWd = mock[BackendWorkflowDescriptor] + val rootWorkflow = mock[WdlWorkflow] + val rootWorkflowId = WorkflowId.randomId() + rootWorkflow.unqualifiedName returns "rootWorkflow" + rootWd.workflow returns rootWorkflow + rootWd.id returns rootWorkflowId + + val subWd = mock[BackendWorkflowDescriptor] + val subWorkflow = mock[WdlWorkflow] + val subWorkflowId = WorkflowId.randomId() + subWorkflow.unqualifiedName returns "subWorkflow" + subWd.workflow returns subWorkflow + subWd.id returns subWorkflowId + + val call1 = mock[WdlCall] + call1.unqualifiedName returns "call1" + val call2 = mock[WdlCall] + call2.unqualifiedName returns "call2" + + val jobKey = new JobKey { + override def scope = call1 + override def tag: String = "tag1" + override def index: Option[Int] = Option(1) + override def attempt: Int = 2 + } + + subWd.breadCrumbs returns List(BackendJobBreadCrumb(rootWorkflow, rootWorkflowId, jobKey)) + subWd.id returns subWorkflowId + + val workflowPaths = new TesWorkflowPaths(subWd, TesTestConfig.backendConfig) + workflowPaths.workflowRoot.toString shouldBe File(s"local-cromwell-executions/rootWorkflow/$rootWorkflowId/call-call1/shard-1/attempt-2/subWorkflow/$subWorkflowId").pathAsString + workflowPaths.dockerWorkflowRoot.toString shouldBe s"/cromwell-executions/rootWorkflow/$rootWorkflowId/call-call1/shard-1/attempt-2/subWorkflow/$subWorkflowId" + } +} diff --git a/supportedBackends/tes/src/test/scala/cromwell/backend/impl/tes/TestWorkflows.scala b/supportedBackends/tes/src/test/scala/cromwell/backend/impl/tes/TestWorkflows.scala new file mode 100644 index 000000000..e98ca5210 --- /dev/null +++ b/supportedBackends/tes/src/test/scala/cromwell/backend/impl/tes/TestWorkflows.scala @@ -0,0 +1,141 @@ +package cromwell.backend.impl.tes + +import cromwell.backend.BackendJobExecutionActor.BackendJobExecutionResponse +import cromwell.backend.{BackendConfigurationDescriptor, BackendWorkflowDescriptor} + +object TestWorkflows { + + case class TestWorkflow(workflowDescriptor: BackendWorkflowDescriptor, + config: BackendConfigurationDescriptor, + expectedResponse: BackendJobExecutionResponse) + + val HelloWorld = + s""" + |task hello { + | String addressee = "you " + | command { + | echo "Hello $${addressee}!" + | } + | output { + | String salutation = read_string(stdout()) + | } + | + | RUNTIME + |} + | + |workflow wf_hello { + | call hello + |} + |""".stripMargin + + val GoodbyeWorld = + """ + |task goodbye { + | command { + | sh -c "exit 1" + | } + | output { + | String out = read_string(stdout()) + | } + |} + | + |workflow wf_goodbye { + | call goodbye + |} + |""".stripMargin + + val InputFiles = + s""" + |task localize { + | File inputFileFromJson + | File inputFileFromCallInputs + | command { + | cat $${inputFileFromJson} + | echo "" + | cat $${inputFileFromCallInputs} + | } + | output { + | Array[String] out = read_lines(stdout()) + | } + | + | RUNTIME + |} + | + |workflow wf_localize { + | File workflowFile + | call localize { input: inputFileFromCallInputs = workflowFile } + |} + |""".stripMargin + + val Sleep10 = + """ + |task abort { + | command { + | sleep 10 + | echo "something after sleep" + | } + |} + | + |workflow wf_abort { + | call abort + |} + |""".stripMargin + + val Scatter = + s""" + |task scattering { + | Int intNumber + | command { + | echo $${intNumber} + | } + | output { + | Int out = read_string(stdout()) + | } + |} + | + |workflow wf_scattering { + | Array[Int] numbers = [1, 2, 3] + | scatter (i in numbers) { + | call scattering { input: intNumber = i } + | } + |} + |""".stripMargin + + val OutputProcess = { + """ + |task localize { + | File inputFile + | command { + | echo "Hello" > a + | mkdir dir + | echo "world" > dir/b + | } + | output { + | File o1 = "a" + | Array[File] o2 = ["a", "dir/b"] + | File o3 = inputFile + | } + |} + | + |workflow wf_localize { + | call localize + |} + |""".stripMargin + } + + val MissingOutputProcess = { + """ + |task localize { + | command { + | } + | output { + | File o1 = "c" + | } + |} + | + |workflow wf_localize { + | call localize + |} + |""".stripMargin + } +}