Skip to content

Commit

Permalink
WX-990 Make TES request backoff behavior configurable (#7122)
Browse files Browse the repository at this point in the history
  • Loading branch information
jgainerdewar committed Apr 24, 2023
1 parent b568fac commit 1a3a68e
Show file tree
Hide file tree
Showing 6 changed files with 176 additions and 14 deletions.
16 changes: 16 additions & 0 deletions cromwell.example.backends/TES.conf
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,22 @@ backend {
disk: "2 GB"
preemptible: false
}

# Backoff behavior for task status polling and execution retries are configurable, with defaults
# shown below. All four fields must be set for each backoff if overriding.
#
# poll-backoff {
# min: "10 seconds"
# max: "5 minutes"
# multiplier: 1.1
# randomization-factor: 0.5
# }
# execute-or-recover-backoff {
# min: "3 seconds"
# max: "30 seconds"
# multiplier: 1.1
# randomization-factor: 0.5
# }
}
}
}
Expand Down
14 changes: 14 additions & 0 deletions docs/backends/TES.md
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,20 @@ backend {
}
```

Cromwell uses an exponential backoff strategy to control the rate of requests sent to the TES server.
Users can override the default settings for this behavior in config, see `cromwell.example.backends/TES.conf`
for full expected format.

For example, to force a constant polling rate rather than exponential backoff:
```hocon
backend.providers.TES.config.poll-backoff {
min: "1 minute"
max: "1 minute"
multiplier: 1
randomization-factor: 0
}
```

### Supported File Systems

Currently this backend only works with files on a Local or Shared File System.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,6 @@ import wom.values.WomFile
import net.ceedubs.ficus.Ficus._

import scala.concurrent.Future
import scala.concurrent.duration._
import scala.language.postfixOps
import scala.util.{Failure, Success}

sealed trait TesRunStatus {
Expand Down Expand Up @@ -71,17 +69,8 @@ class TesAsyncBackendJobExecutionActor(override val standardParams: StandardAsyn

def statusEquivalentTo(thiz: StandardAsyncRunState)(that: StandardAsyncRunState): Boolean = thiz == that

override lazy val pollBackOff = SimpleExponentialBackoff(
initialInterval = 1 seconds,
maxInterval = 5 minutes,
multiplier = 1.1
)

override lazy val executeOrRecoverBackOff = SimpleExponentialBackoff(
initialInterval = 3 seconds,
maxInterval = 30 seconds,
multiplier = 1.1
)
override lazy val pollBackOff: SimpleExponentialBackoff = tesConfiguration.pollBackoff
override lazy val executeOrRecoverBackOff: SimpleExponentialBackoff = tesConfiguration.executeOrRecoverBackoff

private lazy val realDockerImageUsed: String = jobDescriptor.maybeCallCachingEligible.dockerHash.getOrElse(runtimeAttributes.dockerImage)
override lazy val dockerImageUsed: Option[String] = Option(realDockerImageUsed)
Expand Down
Original file line number Diff line number Diff line change
@@ -1,9 +1,13 @@
package cromwell.backend.impl.tes

import com.typesafe.config.Config
import cromwell.backend.BackendConfigurationDescriptor

import cromwell.core.retry.SimpleExponentialBackoff
import net.ceedubs.ficus.Ficus._

import scala.concurrent.duration._
import scala.language.postfixOps

class TesConfiguration(val configurationDescriptor: BackendConfigurationDescriptor) {

val endpointURL = configurationDescriptor.backendConfig.getString("endpoint")
Expand All @@ -13,8 +17,35 @@ class TesConfiguration(val configurationDescriptor: BackendConfigurationDescript
.backendConfig
.as[Option[Boolean]](TesConfiguration.useBackendParametersKey)
.getOrElse(false)

val pollBackoff =
configurationDescriptor
.backendConfig
.as[Option[Config]]("poll-backoff")
.map(SimpleExponentialBackoff(_))
.getOrElse(TesConfiguration.defaultPollBackoff)

val executeOrRecoverBackoff =
configurationDescriptor
.backendConfig
.as[Option[Config]]("execute-or-recover-backoff")
.map(SimpleExponentialBackoff(_))
.getOrElse(TesConfiguration.defaultExecOrRecoverBackoff)

}

object TesConfiguration {
final val useBackendParametersKey = "use_tes_11_preview_backend_parameters"

final val defaultPollBackoff = SimpleExponentialBackoff(
initialInterval = 10 seconds,
maxInterval = 5 minutes,
multiplier = 1.1
)

final val defaultExecOrRecoverBackoff = SimpleExponentialBackoff(
initialInterval = 3 seconds,
maxInterval = 30 seconds,
multiplier = 1.1
)
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
package cromwell.backend.impl.tes

import com.typesafe.config.{Config, ConfigException, ConfigFactory}
import cromwell.backend.BackendConfigurationDescriptor
import cromwell.core.retry.SimpleExponentialBackoff
import org.scalatest.flatspec.AnyFlatSpec
import org.scalatest.matchers.should.Matchers

import scala.concurrent.duration._
import scala.language.postfixOps

class TesConfigurationSpec extends AnyFlatSpec with Matchers {

behavior of "TesConfiguration"

def makeTesConfig(backendConfig: Config) = new TesConfiguration(
BackendConfigurationDescriptor(
backendConfig,
ConfigFactory.empty()
)
)

def backoffsAreEquivalent(expectedBackoff: SimpleExponentialBackoff, actualBackoff: SimpleExponentialBackoff): Boolean = {
val b1 = expectedBackoff.googleBackoff
val b2 = actualBackoff.googleBackoff
b1.getInitialIntervalMillis == b2.getInitialIntervalMillis &&
b1.getMaxIntervalMillis == b2.getMaxIntervalMillis &&
b1.getMultiplier == b2.getMultiplier &&
b1.getRandomizationFactor == b2.getRandomizationFactor
}

it should "use default backoffs when no custom config provided" in {
val tesConfig = makeTesConfig(TesTestConfig.backendConfig)
backoffsAreEquivalent(TesConfiguration.defaultPollBackoff, tesConfig.pollBackoff) shouldBe true
backoffsAreEquivalent(TesConfiguration.defaultExecOrRecoverBackoff, tesConfig.executeOrRecoverBackoff) shouldBe true
}

it should "use configured backoffs if they exist" in {
val tesConfig = makeTesConfig(TesTestConfig.backendConfigWithBackoffs)
backoffsAreEquivalent(SimpleExponentialBackoff(5 seconds, 1 minute, 2.5, .7), tesConfig.pollBackoff) shouldBe true
backoffsAreEquivalent(SimpleExponentialBackoff(3 minutes, 1 hours, 5, .1), tesConfig.executeOrRecoverBackoff) shouldBe true
}

it should "fail if user defines an invalid backoff" in {
assertThrows[ConfigException.Missing](makeTesConfig(TesTestConfig.backendConfigWithInvalidBackoffs))
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -46,5 +46,70 @@ object TesTestConfig {
|""".stripMargin

val backendConfigWithBackendParams = ConfigFactory.parseString(backendConfigStringWithBackendParams)

private val backendConfigStringWithBackoffs =
"""
|root = "local-cromwell-executions"
|dockerRoot = "/cromwell-executions"
|endpoint = "http://127.0.0.1:9000/v1/jobs"
|
|default-runtime-attributes {
| cpu: 1
| failOnStderr: false
| continueOnReturnCode: 0
| memory: "2 GB"
| disk: "2 GB"
| preemptible: false
| # The keys below have been commented out as they are optional runtime attributes.
| # dockerWorkingDir
| # docker
|}
|
|poll-backoff {
| min: "5 seconds"
| max: "1 minute"
| multiplier: 2.5
| randomization-factor: .7
|}
|
|execute-or-recover-backoff {
| min: "3 minutes"
| max: "1 hours"
| multiplier: 5
| randomization-factor: .1
|}
|
|""".stripMargin

val backendConfigWithBackoffs = ConfigFactory.parseString(backendConfigStringWithBackoffs)

private val backendConfigStringWithInvalidBackoffs =
"""
|root = "local-cromwell-executions"
|dockerRoot = "/cromwell-executions"
|endpoint = "http://127.0.0.1:9000/v1/jobs"
|
|default-runtime-attributes {
| cpu: 1
| failOnStderr: false
| continueOnReturnCode: 0
| memory: "2 GB"
| disk: "2 GB"
| preemptible: false
| # The keys below have been commented out as they are optional runtime attributes.
| # dockerWorkingDir
| # docker
|}
|
|poll-backoff {
| min: "5 seconds"
| max: "1 minute"
| multiplier: 2.5
| # missing randomization-factor
|}
|
|""".stripMargin

val backendConfigWithInvalidBackoffs = ConfigFactory.parseString(backendConfigStringWithInvalidBackoffs)
}

0 comments on commit 1a3a68e

Please sign in to comment.