Permalink
| ################################## | |
| # Cromwell Reference Config File # | |
| ################################## | |
| # This is the reference config file that contains all the default settings. | |
| # Make your edits/overrides in your application.conf. | |
| webservice { | |
| port = 8000 | |
| interface = 0.0.0.0 | |
| binding-timeout = 5s | |
| instance.name = "reference" | |
| } | |
| akka { | |
| actor.default-dispatcher.fork-join-executor { | |
| # Number of threads = min(parallelism-factor * cpus, parallelism-max) | |
| # Below are the default values set by Akka, uncomment to tune these | |
| #parallelism-factor = 3.0 | |
| #parallelism-max = 64 | |
| } | |
| dispatchers { | |
| # A dispatcher for actors performing blocking io operations | |
| # Prevents the whole system from being slowed down when waiting for responses from external resources for instance | |
| io-dispatcher { | |
| type = Dispatcher | |
| executor = "fork-join-executor" | |
| # Using the forkjoin defaults, this can be tuned if we wish | |
| } | |
| # A dispatcher for actors handling API operations | |
| # Keeps the API responsive regardless of the load of workflows being run | |
| api-dispatcher { | |
| type = Dispatcher | |
| executor = "fork-join-executor" | |
| } | |
| # A dispatcher for engine actors | |
| # Because backends behavior is unpredictable (potentially blocking, slow) the engine runs | |
| # on its own dispatcher to prevent backends from affecting its performance. | |
| engine-dispatcher { | |
| type = Dispatcher | |
| executor = "fork-join-executor" | |
| } | |
| # A dispatcher used by supported backend actors | |
| backend-dispatcher { | |
| type = Dispatcher | |
| executor = "fork-join-executor" | |
| } | |
| # A dispatcher used for the service registry | |
| service-dispatcher { | |
| type = Dispatcher | |
| executor = "fork-join-executor" | |
| } | |
| # Note that without further configuration, all other actors run on the default dispatcher | |
| } | |
| coordinated-shutdown.phases { | |
| abort-all-workflows { | |
| # This phase is used to give time to Cromwell to abort all workflows upon shutdown. | |
| # It's only used if system.abort-jobs-on-terminate = true | |
| # This timeout can be adusted to give more or less time to Cromwell to abort workflows | |
| timeout = 1 hour | |
| depends-on = [service-unbind] | |
| } | |
| stop-io-activity{ | |
| # Adjust this timeout according to the maximum amount of time Cromwell | |
| # should be allowed to spend flushing its database queues | |
| timeout = 30 minutes | |
| depends-on = [service-stop] | |
| } | |
| } | |
| } | |
| system { | |
| # If 'true', a SIGINT will trigger Cromwell to attempt to abort all currently running jobs before exiting | |
| abort-jobs-on-terminate = false | |
| # If 'true', a SIGTERM or SIGINT will trigger Cromwell to attempt to gracefully shutdown in server mode, | |
| # in particular clearing up all queued database writes before letting the JVM shut down. | |
| # The shutdown is a multi-phase process, each phase having its own configurable timeout. See the Dev Wiki for more details. | |
| graceful-server-shutdown = true | |
| # If 'true' then when Cromwell starts up, it tries to restart incomplete workflows | |
| workflow-restart = true | |
| # Cromwell will cap the number of running workflows at N | |
| max-concurrent-workflows = 5000 | |
| # Cromwell will launch up to N submitted workflows at a time, regardless of how many open workflow slots exist | |
| max-workflow-launch-count = 50 | |
| # Number of seconds between workflow launches | |
| new-workflow-poll-rate = 20 | |
| # Since the WorkflowLogCopyRouter is initialized in code, this is the number of workers | |
| number-of-workflow-log-copy-workers = 10 | |
| # Default number of cache read workers | |
| number-of-cache-read-workers = 25 | |
| io { | |
| # Global Throttling - This is mostly useful for GCS and can be adjusted to match | |
| # the quota availble on the GCS API | |
| number-of-requests = 100000 | |
| per = 100 seconds | |
| # Number of times an I/O operation should be attempted before giving up and failing it. | |
| number-of-attempts = 5 | |
| } | |
| # Maximum number of input file bytes allowed in order to read each type. | |
| # If exceeded a FileSizeTooBig exception will be thrown. | |
| input-read-limits { | |
| lines = 128000 | |
| bool = 7 | |
| int = 19 | |
| float = 50 | |
| string = 128000 | |
| json = 128000 | |
| tsv = 128000 | |
| map = 128000 | |
| object = 128000 | |
| } | |
| } | |
| workflow-options { | |
| # These workflow options will be encrypted when stored in the database | |
| encrypted-fields: [] | |
| # AES-256 key to use to encrypt the values in `encrypted-fields` | |
| base64-encryption-key: "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA=" | |
| # Directory where to write per workflow logs | |
| workflow-log-dir: "cromwell-workflow-logs" | |
| # When true, per workflow logs will be deleted after copying | |
| workflow-log-temporary: true | |
| # Workflow-failure-mode determines what happens to other calls when a call fails. Can be either ContinueWhilePossible or NoNewCalls. | |
| # Can also be overridden in workflow options. Defaults to NoNewCalls. Uncomment to change: | |
| #workflow-failure-mode: "ContinueWhilePossible" | |
| } | |
| # Optional call-caching configuration. | |
| call-caching { | |
| # Allows re-use of existing results for jobs you've already run | |
| # (default: false) | |
| enabled = false | |
| # Whether to invalidate a cache result forever if we cannot reuse them. Disable this if you expect some cache copies | |
| # to fail for external reasons which should not invalidate the cache (e.g. auth differences between users): | |
| # (default: true) | |
| invalidate-bad-cache-results = true | |
| } | |
| google { | |
| application-name = "cromwell" | |
| auths = [ | |
| { | |
| name = "application-default" | |
| scheme = "application_default" | |
| }, | |
| #{ | |
| # name = "user-via-refresh" | |
| # scheme = "refresh_token" | |
| # client-id = "secret_id" | |
| # client-secret = "secret_secret" | |
| #}, | |
| #{ | |
| # name = "service-account" | |
| # scheme = "service_account" | |
| # Choose between PEM file and JSON file as a credential format. They're mutually exclusive. | |
| # PEM format: | |
| # service-account-id = "my-service-account" | |
| # pem-file = "/path/to/file.pem" | |
| # JSON format: | |
| # json-file = "/path/to/file.json" | |
| #} | |
| ] | |
| } | |
| docker { | |
| hash-lookup { | |
| // Set this to match your available quota against the Google Container Engine API | |
| gcr-api-queries-per-100-seconds = 1000 | |
| // Time in minutes before an entry expires from the docker hashes cache and needs to be fetched again | |
| cache-entry-ttl = "20 minutes" | |
| // Maximum number of elements to be kept in the cache. If the limit is reached, old elements will be removed from the cache | |
| cache-size = 200 | |
| // How should docker hashes be looked up. Possible values are "local" and "remote" | |
| // "local": Lookup hashes on the local docker daemon using the cli | |
| // "remote": Lookup hashes on docker hub and gcr | |
| method = "remote" | |
| } | |
| } | |
| engine { | |
| # This instructs the engine which filesystems are at its disposal to perform any IO operation that it might need. | |
| # For instance, WDL variables declared at the Workflow level will be evaluated using the filesystems declared here. | |
| # If you intend to be able to run workflows with this kind of declarations: | |
| # workflow { | |
| # String str = read_string("gs://bucket/my-file.txt") | |
| # } | |
| # You will need to provide the engine with a gcs filesystem | |
| # Note that the default filesystem (local) is always available. | |
| filesystems { | |
| # gcs { | |
| # auth = "application-default" | |
| # } | |
| local { | |
| enabled: true | |
| } | |
| } | |
| } | |
| backend { | |
| default = "Local" | |
| providers { | |
| Local { | |
| actor-factory = "cromwell.backend.impl.sfs.config.ConfigBackendLifecycleActorFactory" | |
| config { | |
| # Limits the number of concurrent jobs | |
| #concurrent-job-limit = 5 | |
| run-in-background = true | |
| # `script-epilogue` configures a shell command to run after the execution of every command block. | |
| # | |
| # If this value is not set explicitly, the default value is `sync`, equivalent to: | |
| # script-epilogue = "sync" | |
| # | |
| # To turn off the default `sync` behavior set this value to an empty string: | |
| # script-epilogue = "" | |
| runtime-attributes = """ | |
| String? docker | |
| String? docker_user | |
| """ | |
| submit = "/bin/bash ${script}" | |
| submit-docker = """ | |
| docker run \ | |
| --rm -i \ | |
| ${"--user " + docker_user} \ | |
| --entrypoint /bin/bash \ | |
| -v ${cwd}:${docker_cwd} \ | |
| ${docker} ${script} | |
| """ | |
| # Root directory where Cromwell writes job results. This directory must be | |
| # visible and writeable by the Cromwell process as well as the jobs that Cromwell | |
| # launches. | |
| root = "cromwell-executions" | |
| filesystems { | |
| local { | |
| localization: [ | |
| "hard-link", "soft-link", "copy" | |
| ] | |
| caching { | |
| # When copying a cached result, what type of file duplication should occur. Attempted in the order listed below: | |
| duplication-strategy: [ | |
| "hard-link", "soft-link", "copy" | |
| ] | |
| # Possible values: file, path | |
| # "file" will compute an md5 hash of the file content. | |
| # "path" will compute an md5 hash of the file path. This strategy will only be effective if the duplication-strategy (above) is set to "soft-link", | |
| # in order to allow for the original file path to be hashed. | |
| hashing-strategy: "file" | |
| # When true, will check if a sibling file with the same name and the .md5 extension exists, and if it does, use the content of this file as a hash. | |
| # If false or the md5 does not exist, will proceed with the above-defined hashing strategy. | |
| check-sibling-md5: false | |
| } | |
| } | |
| } | |
| default-runtime-attributes { | |
| failOnStderr: false | |
| continueOnReturnCode: 0 | |
| } | |
| } | |
| } | |
| #TES { | |
| # actor-factory = "cromwell.backend.impl.tes.TesBackendLifecycleActorFactory" | |
| # config { | |
| # root = "cromwell-executions" | |
| # dockerRoot = "/cromwell-executions" | |
| # endpoint = "http://127.0.0.1:9000/v1/jobs" | |
| # default-runtime-attributes { | |
| # cpu: 1 | |
| # failOnStderr: false | |
| # continueOnReturnCode: 0 | |
| # memory: "2 GB" | |
| # disk: "2 GB" | |
| # } | |
| # } | |
| #} | |
| #SGE { | |
| # actor-factory = "cromwell.backend.impl.sfs.config.ConfigBackendLifecycleActorFactory" | |
| # config { | |
| # | |
| # # Limits the number of concurrent jobs | |
| # concurrent-job-limit = 5 | |
| # | |
| # runtime-attributes = """ | |
| # Int cpu = 1 | |
| # Float? memory_gb | |
| # String? sge_queue | |
| # String? sge_project | |
| # """ | |
| # | |
| # submit = """ | |
| # qsub \ | |
| # -terse \ | |
| # -V \ | |
| # -b n \ | |
| # -N ${job_name} \ | |
| # -wd ${cwd} \ | |
| # -o ${out} \ | |
| # -e ${err} \ | |
| # -pe smp ${cpu} \ | |
| # ${"-l mem_free=" + memory_gb + "g"} \ | |
| # ${"-q " + sge_queue} \ | |
| # ${"-P " + sge_project} \ | |
| # ${script} | |
| # """ | |
| # | |
| # kill = "qdel ${job_id}" | |
| # check-alive = "qstat -j ${job_id}" | |
| # job-id-regex = "(\\d+)" | |
| # } | |
| #} | |
| #LSF { | |
| # actor-factory = "cromwell.backend.impl.sfs.config.ConfigBackendLifecycleActorFactory" | |
| # config { | |
| # submit = "bsub -J ${job_name} -cwd ${cwd} -o ${out} -e ${err} /bin/bash ${script}" | |
| # kill = "bkill ${job_id}" | |
| # check-alive = "bjobs ${job_id}" | |
| # job-id-regex = "Job <(\\d+)>.*" | |
| # } | |
| #} | |
| #SLURM { | |
| # actor-factory = "cromwell.backend.impl.sfs.config.ConfigBackendLifecycleActorFactory" | |
| # config { | |
| # runtime-attributes = """ | |
| # Int runtime_minutes = 600 | |
| # Int cpus = 2 | |
| # Int requested_memory_mb_per_core = 8000 | |
| # String queue = "short" | |
| # """ | |
| # | |
| # submit = """ | |
| # sbatch -J ${job_name} -D ${cwd} -o ${out} -e ${err} -t ${runtime_minutes} -p ${queue} \ | |
| # ${"-n " + cpus} \ | |
| # --mem-per-cpu=${requested_memory_mb_per_core} \ | |
| # --wrap "/bin/bash ${script}" | |
| # """ | |
| # kill = "scancel ${job_id}" | |
| # check-alive = "squeue -j ${job_id}" | |
| # job-id-regex = "Submitted batch job (\\d+).*" | |
| # } | |
| #} | |
| # Example backend that _only_ runs workflows that specify docker for every command. | |
| #Docker { | |
| # actor-factory = "cromwell.backend.impl.sfs.config.ConfigBackendLifecycleActorFactory" | |
| # config { | |
| # run-in-background = true | |
| # runtime-attributes = "String docker" | |
| # submit-docker = "docker run --rm -v ${cwd}:${docker_cwd} -i ${docker} /bin/bash < ${script}" | |
| # } | |
| #} | |
| #HtCondor { | |
| # actor-factory = "cromwell.backend.impl.sfs.config.ConfigBackendLifecycleActorFactory" | |
| # config { | |
| # runtime-attributes = """ | |
| # Int cpu = 1 | |
| # Float memory_mb = 512.0 | |
| # Float disk_kb = 256000.0 | |
| # String? nativeSpecs | |
| # String? docker | |
| # """ | |
| # | |
| # submit = """ | |
| # chmod 755 ${script} | |
| # cat > ${cwd}/execution/submitFile <<EOF | |
| # Iwd=${cwd}/execution | |
| # requirements=${nativeSpecs} | |
| # leave_in_queue=true | |
| # request_memory=${memory_mb} | |
| # request_disk=${disk_kb} | |
| # error=${err} | |
| # output=${out} | |
| # log_xml=true | |
| # request_cpus=${cpu} | |
| # executable=${script} | |
| # log=${cwd}/execution/execution.log | |
| # queue | |
| # EOF | |
| # condor_submit ${cwd}/execution/submitFile | |
| # """ | |
| # | |
| # submit-docker = """ | |
| # chmod 755 ${script} | |
| # cat > ${cwd}/execution/dockerScript <<EOF | |
| # #!/bin/bash | |
| # docker run --rm -i -v ${cwd}:${docker_cwd} ${docker} /bin/bash ${script} | |
| # EOF | |
| # chmod 755 ${cwd}/execution/dockerScript | |
| # cat > ${cwd}/execution/submitFile <<EOF | |
| # Iwd=${cwd}/execution | |
| # requirements=${nativeSpecs} | |
| # leave_in_queue=true | |
| # request_memory=${memory_mb} | |
| # request_disk=${disk_kb} | |
| # error=${cwd}/execution/stderr | |
| # output=${cwd}/execution/stdout | |
| # log_xml=true | |
| # request_cpus=${cpu} | |
| # executable=${cwd}/execution/dockerScript | |
| # log=${cwd}/execution/execution.log | |
| # queue | |
| # EOF | |
| # condor_submit ${cwd}/execution/submitFile | |
| # """ | |
| # | |
| # kill = "condor_rm ${job_id}" | |
| # check-alive = "condor_q ${job_id}" | |
| # job-id-regex = "(?sm).*cluster (\\d+)..*" | |
| # } | |
| #} | |
| #Spark { | |
| # actor-factory = "cromwell.backend.impl.spark.SparkBackendFactory" | |
| # config { | |
| # # Root directory where Cromwell writes job results. This directory must be | |
| # # visible and writeable by the Cromwell process as well as the jobs that Cromwell | |
| # # launches. | |
| # root: "cromwell-executions" | |
| # | |
| # filesystems { | |
| # local { | |
| # localization: [ | |
| # "hard-link", "soft-link", "copy" | |
| # ] | |
| # } | |
| # } | |
| # # change (master, deployMode) to (yarn, client), (yarn, cluster) | |
| # # or (spark://hostname:port, cluster) for spark standalone cluster mode | |
| # master: "local" | |
| # deployMode: "client" | |
| # } | |
| # } | |
| #JES { | |
| # actor-factory = "cromwell.backend.impl.jes.JesBackendLifecycleActorFactory" | |
| # config { | |
| # # Google project | |
| # project = "my-cromwell-workflows" | |
| # | |
| # # Base bucket for workflow executions | |
| # root = "gs://my-cromwell-workflows-bucket" | |
| # | |
| # # Set this to the lower of the two values "Queries per 100 seconds" and "Queries per 100 seconds per user" for | |
| # # your project. | |
| # # | |
| # # Used to help determine maximum throughput to the Google Genomics API. Setting this value too low will | |
| # # cause a drop in performance. Setting this value too high will cause QPS based locks from Google. | |
| # # 1000 is the default "Queries per 100 seconds per user", 50000 is the default "Queries per 100 seconds" | |
| # # See https://cloud.google.com/genomics/quotas for more information | |
| # genomics-api-queries-per-100-seconds = 1000 | |
| # | |
| # # Polling for completion backs-off gradually for slower-running jobs. | |
| # # This is the maximum polling interval (in seconds): | |
| # maximum-polling-interval = 600 | |
| # | |
| # # Optional Dockerhub Credentials. Can be used to access private docker images. | |
| # dockerhub { | |
| # # account = "" | |
| # # token = "" | |
| # } | |
| # | |
| # genomics { | |
| # # A reference to an auth defined in the `google` stanza at the top. This auth is used to create | |
| # # Pipelines and manipulate auth JSONs. | |
| # auth = "application-default" | |
| # | |
| # | |
| # // alternative service account to use on the launched compute instance | |
| # // NOTE: If combined with service account authorization, both that serivce account and this service account | |
| # // must be able to read and write to the 'root' GCS path | |
| # compute-service-account = "default" | |
| # | |
| # # Endpoint for APIs, no reason to change this unless directed by Google. | |
| # endpoint-url = "https://genomics.googleapis.com/" | |
| # | |
| # # Restrict access to VM metadata. Useful in cases when untrusted containers are running under a service | |
| # # account not owned by the submitting user | |
| # restrict-metadata-access = false | |
| # } | |
| # | |
| # filesystems { | |
| # gcs { | |
| # # A reference to a potentially different auth for manipulating files via engine functions. | |
| # auth = "application-default" | |
| # | |
| # caching { | |
| # # When a cache hit is found, the following duplication strategy will be followed to use the cached outputs | |
| # # Possible values: "copy", "reference". Defaults to "copy" | |
| # # "copy": Copy the output files | |
| # # "reference": DO NOT copy the output files but point to the original output files instead. | |
| # # Will still make sure than all the original output files exist and are accessible before | |
| # # going forward with the cache hit. | |
| # duplication-strategy = "copy" | |
| # } | |
| # } | |
| # } | |
| # | |
| # default-runtime-attributes { | |
| # cpu: 1 | |
| # failOnStderr: false | |
| # continueOnReturnCode: 0 | |
| # memory: "2 GB" | |
| # bootDiskSizeGb: 10 | |
| # # Allowed to be a String, or a list of Strings | |
| # disks: "local-disk 10 SSD" | |
| # noAddress: false | |
| # preemptible: 0 | |
| # zones: ["us-central1-a", "us-central1-b"] | |
| # } | |
| # } | |
| #} | |
| #AWS { | |
| # actor-factory = "cromwell.backend.impl.aws.AwsBackendActorFactory" | |
| # config { | |
| # ## These two settings are required to authenticate with the ECS service: | |
| # accessKeyId = "..." | |
| # secretKey = "..." | |
| # } | |
| #} | |
| } | |
| } | |
| services { | |
| KeyValue { | |
| class = "cromwell.services.keyvalue.impl.SqlKeyValueServiceActor" | |
| } | |
| MetadataService { | |
| class = "cromwell.services.metadata.impl.MetadataServiceActor" | |
| config { | |
| # Set this value to "Inf" to turn off metadata summary refresh. The default value is currently "2 seconds". | |
| # metadata-summary-refresh-interval = "Inf" | |
| # For higher scale environments, e.g. many workflows and/or jobs, DB write performance for metadata events | |
| # can improved by writing to the database in batches. Increasing this value can dramatically improve overall | |
| # performance but will both lead to a higher memory usage as well as increase the risk that metadata events | |
| # might not have been persisted in the event of a Cromwell crash. | |
| # | |
| # For normal usage the default value of 200 should be fine but for larger/production environments we recommend a | |
| # value of at least 500. There'll be no one size fits all number here so we recommend benchmarking performance and | |
| # tuning the value to match your environment. | |
| # db-batch-size = 200 | |
| # | |
| # Periodically the stored metadata events will be forcibly written to the DB regardless of if the batch size | |
| # has been reached. This is to prevent situations where events wind up never being written to an incomplete batch | |
| # with no new events being generated. The default value is currently 5 seconds | |
| # db-flush-rate = 5 seconds | |
| } | |
| } | |
| } | |
| database { | |
| # hsql default | |
| profile = "slick.jdbc.HsqldbProfile$" | |
| db { | |
| driver = "org.hsqldb.jdbcDriver" | |
| url = "jdbc:hsqldb:mem:${uniqueSchema};shutdown=false;hsqldb.tx=mvcc" | |
| connectionTimeout = 3000 | |
| } | |
| # mysql example | |
| #driver = "slick.driver.MySQLDriver$" | |
| #db { | |
| # driver = "com.mysql.jdbc.Driver" | |
| # url = "jdbc:mysql://host/cromwell?rewriteBatchedStatements=true" | |
| # user = "user" | |
| # password = "pass" | |
| # connectionTimeout = 5000 | |
| #} | |
| # For batch inserts the number of inserts to send to the DB at a time | |
| # insert-batch-size = 2000 | |
| migration { | |
| # For databases with a very large number of symbols, selecting all the rows at once can generate a variety of | |
| # problems. In order to avoid any issue, the selection is paginated. This value sets how many rows should be | |
| # retrieved and processed at a time, before asking for the next chunk. | |
| read-batch-size = 100000 | |
| # Because a symbol row can contain any arbitrary wdl value, the amount of metadata rows to insert from a single | |
| # symbol row can vary from 1 to several thousands (or more). To keep the size of the insert batch from growing out | |
| # of control we monitor its size and execute/commit when it reaches or exceeds writeBatchSize. | |
| write-batch-size = 100000 | |
| } | |
| } |