diff --git a/.gitattributes b/.gitattributes index 49c97c85f..9fcf38aa0 100644 --- a/.gitattributes +++ b/.gitattributes @@ -1,6 +1,3 @@ -# This file can always be added to, line additions should never collide. -CHANGELOG.MD merge=union - # These files are text and should be normalized (Convert crlf => lf) *.scala text *.MD text diff --git a/.gitignore b/.gitignore index f9bb0a4f5..21f16ea18 100644 --- a/.gitignore +++ b/.gitignore @@ -1,10 +1,19 @@ -.idea -log -target +# common scala config +*~ +.DS_Store .artifactory +.idea/* +!/.idea/inspectionProfiles/ +.idea/inspectionProfiles/* +!/.idea/inspectionProfiles/Project_Default.xml +target + +# custom config cromwell-executions cromwell-test-executions -cromwell-workflow-logs cromwell-test-workflow-logs +cromwell-workflow-logs local-cromwell-executions +log native +scripts/docker-compose-mysql/compose/mysql/data diff --git a/.idea/inspectionProfiles/Project_Default.xml b/.idea/inspectionProfiles/Project_Default.xml new file mode 100644 index 000000000..b15d7f2ad --- /dev/null +++ b/.idea/inspectionProfiles/Project_Default.xml @@ -0,0 +1,6 @@ + + + + \ No newline at end of file diff --git a/.pullapprove.yml b/.pullapprove.yml index 8e5e37ffa..d9d70c225 100644 --- a/.pullapprove.yml +++ b/.pullapprove.yml @@ -5,13 +5,14 @@ author_approval: ignored reviewers: required: 2 members: - - cjllanwarne - Horneth - - mcovarr - - geoffjentry - - kshakir + - cjllanwarne - francares - gauravs90 + - geoffjentry - jainh + - katevoss - kcibul + - kshakir + - mcovarr - ruchim diff --git a/.travis.yml b/.travis.yml index cfd59b202..1be54097a 100644 --- a/.travis.yml +++ b/.travis.yml @@ -15,6 +15,8 @@ before_cache: # Tricks to avoid unnecessary cache updates - find $HOME/.ivy2 -name "ivydata-*.properties" -delete - find $HOME/.sbt -name "*.lock" -delete +before_install: + - openssl aes-256-cbc -K "$encrypted_5ebd3ff04788_key" -iv "$encrypted_5ebd3ff04788_iv" -in src/bin/travis/resources/jesConf.tar.enc -out jesConf.tar -d || true env: global: - CENTAUR_BRANCH=develop @@ -24,6 +26,7 @@ env: - BUILD_TYPE=checkPublish - BUILD_TYPE=centaurJes - BUILD_TYPE=centaurLocal + - BUILD_TYPE=centaurTes script: - src/bin/travis/test.sh after_success: diff --git a/CHANGELOG.md b/CHANGELOG.md index 06b720f39..f388b4139 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,59 @@ # Cromwell Change Log +## 25 + +### External Contributors +* A special thank you to @adamstruck, @antonkulaga and @delocalizer for their contributions to Cromwell. +### Breaking Changes + +* Metadata keys for call caching are changed. All call caching keys are now in a `callCaching` stanza. `Call cache read result` has moved here and is now `result`. The `allowResultReuse` and `effectiveCallCachingMode` have moved here. The `hit` boolean is a simple indication of whether or not it was a hit, with no additional information. An example using the new format is: +``` +"callCaching": { + "hit": false, + "effectiveCallCachingMode": "ReadAndWriteCache", + "result": "Cache Miss", + "allowResultReuse": true +} +``` + +### Config Changes + +* Added a field `insert-batch-size` to the `database` stanza which defines how many values from a batch insert will be processed at a time. This value defaults to 2000. +* Moved the config value `services.MetadataService.metadata-summary-refresh-interval` to `services.MetadataService.config.metadata-summary-refresh-interval` +* Added ability to override the default zone(s) used by JES via the config structure by setting `genomics.default-zones` in the JES configuration +* The cromwell server TCP binding timeout is now configurable via the config key `webservice.binding-timeout`, defaulted + to the previous value `5s` (five seconds) via the reference.conf. +* For MySQL users, a massive scalability improvement via batched DB writing of internal metadata events. Note that one must add `rewriteBatchedStatements=true` to their JDBC URL in their config in order to take advantage of this + +### General Changes + +* Cromwell's WDL parser now recognizes empty array literals correctly, e.g. `Array[String] emptyArray = []`. +* Cromwell now applies default labels automatically to JES pipeline runs. +* Added support for new WDL functions: + * `length: (Array[X]) => Integer` - report the length of the specified array + * `prefix: (String, Array[X]) => Array[String]` - generate an array consisting of each element of the input array prefixed + by a specified `String`. The input array can have elements of any primitive type, the return array will always have + type `Array[String]`. + * `defined: (Any) => Boolean` - Will return false if the provided value is an optional that is not defined. Returns true in all other cases. +* Cromwell's Config (Shared Filesystem) backend now supports invocation of commands which run in a Docker image as a non-root user. + The non-root user could either be the default user for a given Docker image (e.g. specified in a Dockerfile via a `USER` directive), + or the Config backend could pass an optional `"-u username"` as part of the `submit-docker` command. +* In some cases the SFS backend, used for Local, SGE, etc., coerced `WdlFile` to `WdlString` by using `.toUri`. This +resulted in strings prepended with `file:///path/to/file`. Now absolute file paths will not contain the uri scheme. +* Launch jobs on servers that support the GA4GH Task Execution Schema using the TES backend. +* **Call caching: Cromwell will no longer try to use the cache for WDL tasks that contain a floating docker tag.** + Call caching will still behave the same for tasks having a docker image with a specific hash. + See https://github.com/broadinstitute/cromwell#call-caching-docker-tags for more details. +* Added docker hash lookup. Cromwell will try to lookup the hash for a docker image with a floating tag, and use that hash when executing the job. + This will be reflected in the metadata where the docker runtime attribute will contains the hash that was used. + If Cromwell is unable to lookup the docker hash, the job will be run with the original user defined floating tag. + Cromwell is currently able to lookup public and private docker hashes for images on Docker Hub and Google Container Engine for job running on the JES backend. + For other backends, cromwell is able to lookup public docker hashes for Docker Hub and Google Container Engine. + See https://github.com/broadinstitute/cromwell#call-caching-docker-tags for more details. + +### Database schema changes +* Added CUSTOM_LABELS as a field of WORKFLOW_STORE_ENTRY, to store workflow store entries. + ## 24 * When emitting workflow outputs to the Cromwell log only the first 1000 characters per output will be printed @@ -19,6 +73,7 @@ * `transpose: (Array[Array[X]]) => Array[Array[X]]` compute the matrix transpose for a 2D array. Assumes each inner array has the same length. * By default, `system.abort-jobs-on-terminate` is false when running `java -jar cromwell.jar server`, and true when running `java -jar cromwell.jar run `. * Enable WDL imports when running in Single Workflow Runner Mode. +* Both batch and non-batch REST workflow submissions now require a multipart/form-data encoded body. * Support for sub workflows (see [Annex A](#annex-a---workflow-outputs)) * Enable WDL imports when running in Single Workflow Runner Mode as well as Server Mode * Support for WDL imports through an additional imports.zip parameter diff --git a/MakingABackend.MD b/MakingABackend.MD deleted file mode 100644 index 5106b3726..000000000 --- a/MakingABackend.MD +++ /dev/null @@ -1,197 +0,0 @@ -# Making a backend - -## Part 0: Introduction - -- These notes were added while making a new AWS backend for Amazon AWS. - -## Part 1 (October 13 2016): The skeleton: - -To start with, I just need to create a bunch of boilerplate which will eventually be filled in with all of the lovely AWS details! - -### Defining the awsBackend project: - -- Added entries to `project/Settings.scala`, `project/Dependencies.scala` and `build.sbt` -- This was mainly just a copy/paste from existing backend projects. I made a few typos renaming everything and linking the dependencies properly though! -- E.g. In my first commit I forgot to update the libraryDependencies name for my AWS backend project: -``` - val awsBackendSettings = List( - name := "cromwell-aws-backend", - libraryDependencies ++= awsBackendDependencies - ) ++ commonSettings -``` -- I guessed that I'd need the AWS SDK so I included that immediately in Dependencies.scala: -``` - val awsBackendDependencies = List( - "com.amazonaws" % "aws-java-sdk" % "1.11.41" - ) -``` -- In build.scala I had to also edit the `lazy val root` to include a new `.aggregate(awsBackend)` and a new `.dependsOn(awsBackend)` - -### Directory structure: - -- This is probably going to be autogenerated for you in the directories specified in the above files. I'd already added my own directory structure and sbt managed to pick it up correctly in `supportedBackends/aws`. - -### AWS Job Execution Actor: -- To run a job, Cromwell needs to instantiate a Job Execution actor. I'll fill in the details later but for now, I'll just add the constructor, props, and an unimplemented method definition for `execute`: -``` -class AwsJobExecutionActor(override val jobDescriptor: BackendJobDescriptor, - override val configurationDescriptor: BackendConfigurationDescriptor) extends BackendJobExecutionActor { - - override def execute: Future[BackendJobExecutionResponse] = ??? -} - -object AwsJobExecutionActor { - def props(jobDescriptor: BackendJobDescriptor, - configurationDescriptor: BackendConfigurationDescriptor): Props = Props(new AwsJobExecutionActor(jobDescriptor, configurationDescriptor)) -} -``` - -### Actor factory: -- This is the class which tells Cromwell which classes represent job execution actors, initialization actors and so on. I'm just adding a skeleton for now, with a constructor of the form the Cromwell expects: -``` -case class AwsBackendActorFactory(name: String, configurationDescriptor: BackendConfigurationDescriptor) extends BackendLifecycleActorFactory { - - override def jobExecutionActorProps(jobDescriptor: BackendJobDescriptor, - initializationData: Option[BackendInitializationData], - serviceRegistryActor: ActorRef, - backendSingletonActor: Option[ActorRef]): Props = AwsJobExecutionActor.props(jobDescriptor, configurationDescriptor) -} -``` -- There are a few other actor definitions that can be added to this file over time. But the only one that Cromwell *requires* to work is the job execution actor. - -### Reference conf: - -- Reference.conf is a set of reference options which shows people how to enable the backends that they want. So I'll add the initial config which people would add if they wanted the AWS backend (commented out in the reference so it's not enabled by default). This goes below all the other backend references: -``` - #AWS { - # actor-factory = "cromwell.backend.impl.aws.AwsBackendActorFactory" - # config { - # - # } - #} -``` - -### Application.conf - -- OK so I've now told people how to add this backend... Now I actually add it to my own personal configuration file so I can try it out! -``` -backend { - default = "AWS" - providers { - AWS { - actor-factory = "cromwell.backend.impl.aws.AwsBackendActorFactory" - config { - - } - } - } -} -``` - -### Trying it out -So we now have a backend skeleton! What happens when we run it? Well hopefully Cromwell will instantiate the backend far enough to reach the unimplemented execute method and then fall over. Let's give it a go! -- I fire up cromwell in server mode with my modified application.conf. -- I create a sample WDL that would sleep for 20 seconds if it actually worked: -The input WDL: -``` -task sleep { - command { sleep 20 } -} -workflow main { - call sleep -} -``` -- I submit the WDL to the swagger endpoint (http://localhost:8000/swagger/index.html?url=/swagger/cromwell.yaml) and watch the server logs... -- And as expected: -``` -2016-10-13 13:14:29,017 cromwell-system-akka.dispatchers.engine-dispatcher-39 INFO - MaterializeWorkflowDescriptorActor [UUID(ddd827ba)]: Call-to-Backend assignments: main.sleep -> AWS -2016-10-13 13:14:30,167 cromwell-system-akka.dispatchers.engine-dispatcher-39 INFO - WorkflowExecutionActor-ddd827ba-091f-4c6f-b98f-cc9825717007 [UUID(ddd827ba)]: Starting calls: main.sleep:NA:1 -2016-10-13 13:14:30,983 cromwell-system-akka.actor.default-dispatcher-5 ERROR - guardian failed, shutting down system -scala.NotImplementedError: an implementation is missing - at scala.Predef$.$qmark$qmark$qmark(Predef.scala:230) - at cromwell.backend.impl.aws.AwsJobExecutionActor.execute(AwsJobExecutionActor.scala:12) -``` -- OK, so now I just need to implement `execute(): Future[JobExecutionResult]` and Cromwell can interface with AWS. How hard can it be! - -## Part 2 (October 13 2016): Using Amazon to sleep 20 seconds - -### Starting point -- This was a learning experience after using the Google pipelines service to submit jobs! -- To get myself started, I've manually created an ECS cluster which I've called `ecs-t2micro-cluster` via the ECS web console. - -### Trial and Error - -- I see in the aws sdk docs that there's an AmazonECSAsyncClient class. That sounds promising! Luckily I already added the dependency on AWS SDK in Part 1 so I guess I can just write something basic in my AwsJobExecutionActor class and see what happens: - -- I ended up having to add some credentials options to the configuration file. The new `reference.conf` now looks like: -``` - #AWS { - # actor-factory = "cromwell.backend.impl.aws.AwsBackendActorFactory" - # config { - # ## These two settings are required to authenticate with the ECS service: - # accessKeyId = "..." - # secretKey = "..." - # } - #} -``` - -- After a little bit of experimentation with the ECS API, I was able to come up with a backend that works but is very limited... It is entirely synchronous in the `execute` method. That's certainly not a final answer but it works OK for running a single task. And we can now run that single `sleep` command successfully on the Amazon EC2 Container Service! - - The synchronous `execute` method: -``` -class AwsJobExecutionActor(override val jobDescriptor: BackendJobDescriptor, - override val configurationDescriptor: BackendConfigurationDescriptor) extends BackendJobExecutionActor { - - val awsAccessKeyId = configurationDescriptor.backendConfig.as[String]("accessKeyId") - val awsSecretKey = configurationDescriptor.backendConfig.as[String]("secretKey") - - val clusterName = "ecs-t2micro-cluster" - - val credentials = new AWSCredentials { - override def getAWSAccessKeyId: String = awsAccessKeyId - override def getAWSSecretKey: String = awsSecretKey - } - val ecsAsyncClient = new AmazonECSAsyncClient(credentials) - - override def execute: Future[BackendJobExecutionResponse] = { - - val commandOverride = new ContainerOverride().withName("simple-app").withCommand(jobDescriptor.call.instantiateCommandLine(Map.empty, OnlyPureFunctions, identity).get) - - val runRequest: RunTaskRequest = new RunTaskRequest() - .withCluster(clusterName) - .withCount(1) - .withTaskDefinition("ubuntuTask:1") - .withOverrides(new TaskOverride().withContainerOverrides(commandOverride)) - - val submitResultHandler = new AwsSdkAsyncHandler[RunTaskRequest, RunTaskResult]() - val _ = ecsAsyncClient.runTaskAsync(runRequest, submitResultHandler) - - submitResultHandler.future map { - case AwsSdkAsyncResult(_, result) => - log.info("AWS submission completed:\n{}", result.toString) - val taskArn= result.getTasks.asScala.head.getTaskArn - val taskDescription = waitUntilDone(taskArn) - - log.info("AWS task completed!\n{}", taskDescription.toString) - SucceededResponse(jobDescriptor.key, Option(0), Map.empty, None, Seq.empty) - } - } - - private def waitUntilDone(taskArn: String): Task = { - val describeTasksRequest = new DescribeTasksRequest().withCluster(clusterName).withTasks(List(taskArn).asJava) - - val resultHandler = new AwsSdkAsyncHandler[DescribeTasksRequest, DescribeTasksResult]() - val _ = ecsAsyncClient.describeTasksAsync(describeTasksRequest, resultHandler) - - val desribedTasks = Await.result(resultHandler.future, Duration.Inf) - val taskDescription = desribedTasks.result.getTasks.asScala.head - if (taskDescription.getLastStatus == DesiredStatus.STOPPED.toString) { - taskDescription - } else { - Thread.sleep(200) - waitUntilDone(taskArn) - } - } -} -``` - - diff --git a/README.md b/README.md index 49c09df42..1ac9cfd76 100644 --- a/README.md +++ b/README.md @@ -33,6 +33,19 @@ A [Workflow Management System](https://en.wikipedia.org/wiki/Workflow_management * [Shared Local Filesystem](#shared-local-filesystem) * [Google Cloud Storage Filesystem](#google-cloud-storage-filesystem) * [Local Backend](#local-backend) + * [Google JES Backend](#google-jes-backend) + * [Configuring Google Project](#configuring-google-project) + * [Configuring Authentication](#configuring-authentication) + * [Application Default Credentials](#application-default-credentials) + * [Service Account](#service-account) + * [Refresh Token](#refresh-token) + * [Docker](#docker) + * [Monitoring](#monitoring) + * [GA4GH TES Backend](#ga4gh-tes-backend) + * [Configuring](#configuring) + * [Supported File Systems](#supported-file-systems) + * [Docker](#docker) + * [CPU, Memory and Disk](#cpu-memory-and-disk) * [Sun GridEngine Backend](#sun-gridengine-backend) * [HtCondor Backend](#htcondor-backend) * [Caching configuration](#caching-configuration) @@ -45,14 +58,6 @@ A [Workflow Management System](https://en.wikipedia.org/wiki/Workflow_management * [Spark runtime attributes](#spark-runtime-attributes) * [Spark Environment](#spark-environment) * [Sample Wdl](#sample-wdl) - * [Google JES Backend](#google-jes-backend) - * [Configuring Google Project](#configuring-google-project) - * [Configuring Authentication](#configuring-authentication) - * [Application Default Credentials](#application-default-credentials) - * [Service Account](#service-account) - * [Refresh Token](#refresh-token) - * [Docker](#docker) - * [Monitoring](#monitoring) * [Runtime Attributes](#runtime-attributes) * [Specifying Default Values](#specifying-default-values) * [continueOnReturnCode](#continueonreturncode) @@ -66,13 +71,17 @@ A [Workflow Management System](https://en.wikipedia.org/wiki/Workflow_management * [preemptible](#preemptible) * [Logging](#logging) * [Workflow Options](#workflow-options) +* [Labels](#labels) + * [Custom Labels File](#custom-labels-file) + * [Label Format](#label-format) * [Call Caching](#call-caching) * [Configuring Call Caching](#configuring-call-caching) * [Call Caching Workflow Options](#call-caching-workflow-options) * [Local Filesystem Options](#local-filesystem-options) * [Imports](#imports) * [Sub Workflows](#sub-workflows) -* [Meta blocks](#meta-blocks) + * [Execution](#execution) + * [Metadata](#metadata) * [REST API](#rest-api) * [REST API Versions](#rest-api-versions) * [POST /api/workflows/:version](#post-apiworkflowsversion) @@ -142,7 +151,7 @@ java -jar cromwell.jar Actions: run [] [] - [] [] + [] [] [] Given a WDL file and JSON file containing the value of the workflow inputs, this will run the workflow locally and @@ -287,6 +296,8 @@ The command to run this WDL, without needing any inputs, workflow options or met $ java -jar cromwell.jar run threestep.wdl - - - /path/to/my_WDLs.zip ``` +The sixth optional parameter is a path to a labels file. See [Labels](#labels) for information and the expected format. + ## server Start a server on port 8000, the API for the server is described in the [REST API](#rest-api) section. @@ -376,6 +387,8 @@ For many examples on how to use WDL see [the WDL site](https://github.com/broadi * [Array\[Array\[X\]\] transpose(Array\[Array\[X\]\])](https://github.com/broadinstitute/wdl/blob/develop/SPEC.md#arrayarrayx-transposearrayarrayx) * [Pair(X,Y) zip(X,Y)](https://github.com/broadinstitute/wdl/blob/develop/SPEC.md#pairxy-zipxy) * [Pair(X,Y) cross(X,Y)](https://github.com/broadinstitute/wdl/blob/develop/SPEC.md#pairxy-crossxy) + * [Integer length(Array\[X\])](https://github.com/broadinstitute/wdl/blob/develop/SPEC.md#integer-lengtharrayx) + * [Array\[String\] prefix(String, Array\[X\])](https://github.com/broadinstitute/wdl/blob/develop/SPEC.md#arraystring-prefixstring-arrayx) * [Data Types & Serialization](https://github.com/broadinstitute/wdl/blob/develop/SPEC.md#data-types--serialization) * [Serialization of Task Inputs](https://github.com/broadinstitute/wdl/blob/develop/SPEC.md#serialization-of-task-inputs) * [Primitive Types](https://github.com/broadinstitute/wdl/blob/develop/SPEC.md#primitive-types) @@ -412,6 +425,7 @@ The configuration file is in [Hocon](https://github.com/typesafehub/config/blob/ webservice { port = 8000 interface = 0.0.0.0 + binding-timeout = 5s instance.name = "reference" } ``` @@ -421,6 +435,7 @@ Or, alternatively, as dot-separated values: ```hocon webservice.port = 8000 webservice.interface = 0.0.0.0 +webservice.binding-timeout = 5s webservice.instance.name = "reference" ``` @@ -458,7 +473,7 @@ database { driver = "slick.driver.MySQLDriver$" db { driver = "com.mysql.jdbc.Driver" - url = "jdbc:mysql://host/cromwell" + url = "jdbc:mysql://host/cromwell?rewriteBatchedStatements=true" user = "user" password = "pass" connectionTimeout = 5000 @@ -470,6 +485,8 @@ database { } ``` +By default batch inserts will be processed in blocks of 2000. To modify this value add the field `insert-batch-size` to the `database` stanza. + ## SIGINT abort handler For backends that support aborting task invocations, Cromwell can be configured to automatically try to abort all currently running calls (and set their status to `Aborted`) when a SIGINT is sent to the Cromwell process. To turn this feature on, set the configuration option @@ -498,6 +515,7 @@ Cromwell distribution: * Local / GridEngine / LSF / etc. - Run jobs as subprocesses or via a dispatcher. Supports launching in Docker containers. Use `bash`, `qsub`, `bsub`, etc. to run scripts. * Google JES - Launch jobs on Google Compute Engine through the Job Execution Service (JES). +* GA4GH TES - Launch jobs on servers that support the GA4GH Task Execution Schema (TES). * HtCondor - Allows to execute jobs using HTCondor. * Spark - Adds support for execution of spark jobs. @@ -728,7 +746,7 @@ cd echo $? > rc ``` -`` would be equal to `` for non-Docker jobs, or it would be under `/root//call-` if this is running in a Docker container. +`` would be equal to `` for non-Docker jobs, or it would be under `/cromwell-executions//call-` if this is running in a Docker container. When running without docker, the subprocess command that the local backend will launch is: @@ -752,6 +770,244 @@ docker run --rm -v : -i /bin/bash <