From e7791bee565a39cb00e82dba513abd6652532bb1 Mon Sep 17 00:00:00 2001 From: Ruchi Munshi Date: Fri, 3 Mar 2017 17:11:40 -0500 Subject: [PATCH 001/170] Update Cromwell develop to the next version --- project/Version.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/project/Version.scala b/project/Version.scala index a4dc20d69..5d7c067e5 100644 --- a/project/Version.scala +++ b/project/Version.scala @@ -4,7 +4,7 @@ import sbt._ object Version { // Upcoming release, or current if we're on a master / hotfix branch - val cromwellVersion = "25" + val cromwellVersion = "26" // Adapted from SbtGit.versionWithGit def cromwellVersionWithGit: Seq[Setting[_]] = From 7912d9cb2a93fe64021eb2bdec6011d394784c43 Mon Sep 17 00:00:00 2001 From: Chris Llanwarne Date: Mon, 27 Feb 2017 14:30:15 -0500 Subject: [PATCH 002/170] Failure metadata redux --- engine/src/main/scala/cromwell/engine/engine.scala | 3 - .../lifecycle/execution/CallMetadataHelper.scala | 4 +- .../execution/WorkflowMetadataHelper.scala | 4 +- .../cromwell/webservice/WorkflowJsonSupport.scala | 1 - .../test/scala/cromwell/MetadataWatchActor.scala | 5 +- .../workflow/SingleWorkflowRunnerActorSpec.scala | 2 +- .../services/metadata/MetadataService.scala | 24 +++- .../services/metadata/MetadataServiceSpec.scala | 134 +++++++++++++++++++++ 8 files changed, 161 insertions(+), 16 deletions(-) diff --git a/engine/src/main/scala/cromwell/engine/engine.scala b/engine/src/main/scala/cromwell/engine/engine.scala index 7a65770b1..23d677fb3 100644 --- a/engine/src/main/scala/cromwell/engine/engine.scala +++ b/engine/src/main/scala/cromwell/engine/engine.scala @@ -1,7 +1,5 @@ package cromwell.engine -import java.time.OffsetDateTime - import wdl4s._ import scala.util.{Failure, Success, Try} @@ -9,7 +7,6 @@ import scala.util.{Failure, Success, Try} final case class AbortFunction(function: () => Unit) final case class AbortRegistrationFunction(register: AbortFunction => Unit) -final case class FailureEventEntry(failure: String, timestamp: OffsetDateTime) final case class CallAttempt(fqn: FullyQualifiedName, attempt: Int) object WorkflowFailureMode { diff --git a/engine/src/main/scala/cromwell/engine/workflow/lifecycle/execution/CallMetadataHelper.scala b/engine/src/main/scala/cromwell/engine/workflow/lifecycle/execution/CallMetadataHelper.scala index bdc6eaf7c..2bdb83d8f 100644 --- a/engine/src/main/scala/cromwell/engine/workflow/lifecycle/execution/CallMetadataHelper.scala +++ b/engine/src/main/scala/cromwell/engine/workflow/lifecycle/execution/CallMetadataHelper.scala @@ -85,7 +85,7 @@ trait CallMetadataHelper { val failedState = if (retryableFailure) ExecutionStatus.Preempted else ExecutionStatus.Failed val completionEvents = completedCallMetadataEvents(jobKey, failedState, returnCode) val retryableFailureEvent = MetadataEvent(metadataKeyForCall(jobKey, CallMetadataKeys.RetryableFailure), MetadataValue(retryableFailure)) - val failureEvents = throwableToMetadataEvents(metadataKeyForCall(jobKey, s"${CallMetadataKeys.Failures}[$randomNumberString]"), failure).+:(retryableFailureEvent) + val failureEvents = throwableToMetadataEvents(metadataKeyForCall(jobKey, s"${CallMetadataKeys.Failures}"), failure).+:(retryableFailureEvent) serviceRegistryActor ! PutMetadataAction(completionEvents ++ failureEvents) } @@ -137,5 +137,5 @@ trait CallMetadataHelper { private def metadataKeyForCall(jobKey: JobKey, myKey: String) = MetadataKey(workflowIdForCallMetadata, Option(MetadataJobKey(jobKey.scope.fullyQualifiedName, jobKey.index, jobKey.attempt)), myKey) private def randomNumberString: String = Random.nextInt.toString.stripPrefix("-") - + } diff --git a/engine/src/main/scala/cromwell/engine/workflow/lifecycle/execution/WorkflowMetadataHelper.scala b/engine/src/main/scala/cromwell/engine/workflow/lifecycle/execution/WorkflowMetadataHelper.scala index d569f1fae..563c53006 100644 --- a/engine/src/main/scala/cromwell/engine/workflow/lifecycle/execution/WorkflowMetadataHelper.scala +++ b/engine/src/main/scala/cromwell/engine/workflow/lifecycle/execution/WorkflowMetadataHelper.scala @@ -7,8 +7,6 @@ import cromwell.core.{WorkflowId, WorkflowMetadataKeys, WorkflowState} import cromwell.services.metadata.MetadataService._ import cromwell.services.metadata.{MetadataEvent, MetadataKey, MetadataValue} -import scala.util.Random - trait WorkflowMetadataHelper { def serviceRegistryActor: ActorRef @@ -24,7 +22,7 @@ trait WorkflowMetadataHelper { } def pushWorkflowFailures(workflowId: WorkflowId, failures: List[Throwable]) = { - val failureEvents = failures flatMap { r => throwableToMetadataEvents(MetadataKey(workflowId, None, s"${WorkflowMetadataKeys.Failures}[${Random.nextInt(Int.MaxValue)}]"), r) } + val failureEvents = failures flatMap { r => throwableToMetadataEvents(MetadataKey(workflowId, None, s"${WorkflowMetadataKeys.Failures}"), r) } serviceRegistryActor ! PutMetadataAction(failureEvents) } diff --git a/engine/src/main/scala/cromwell/webservice/WorkflowJsonSupport.scala b/engine/src/main/scala/cromwell/webservice/WorkflowJsonSupport.scala index 817cb8dcf..65cd7af7b 100644 --- a/engine/src/main/scala/cromwell/webservice/WorkflowJsonSupport.scala +++ b/engine/src/main/scala/cromwell/webservice/WorkflowJsonSupport.scala @@ -43,7 +43,6 @@ object WorkflowJsonSupport extends DefaultJsonProtocol { } } - implicit val unqualifiedFailureEventEntry = jsonFormat2(FailureEventEntry) implicit val workflowQueryResult = jsonFormat5(WorkflowQueryResult) implicit val workflowQueryResponse = jsonFormat1(WorkflowQueryResponse) } diff --git a/engine/src/test/scala/cromwell/MetadataWatchActor.scala b/engine/src/test/scala/cromwell/MetadataWatchActor.scala index cb8802e22..0fb30213c 100644 --- a/engine/src/test/scala/cromwell/MetadataWatchActor.scala +++ b/engine/src/test/scala/cromwell/MetadataWatchActor.scala @@ -23,7 +23,7 @@ final case class MetadataWatchActor(promise: Promise[Unit], matchers: Matcher*) () } case PutMetadataAction(_) => // Superfluous message. Ignore - case _ => throw new Exception("Invalid message to MetadataWatchActor") + case other => throw new Exception(s"Invalid message to MetadataWatchActor: $other") } } @@ -69,6 +69,7 @@ object MetadataWatchActor { } } - val failurePattern = """failures\[\d*\].message""" + val failurePattern = """failures\[\d*\].*\:message""" + // val failurePattern = """failures\[\d*\]\:message""" final case class FailureMatcher(value: String) extends KeyMatchesRegexAndValueContainsStringMatcher(failurePattern, value) { } } diff --git a/engine/src/test/scala/cromwell/engine/workflow/SingleWorkflowRunnerActorSpec.scala b/engine/src/test/scala/cromwell/engine/workflow/SingleWorkflowRunnerActorSpec.scala index 820091fef..ca4723ce0 100644 --- a/engine/src/test/scala/cromwell/engine/workflow/SingleWorkflowRunnerActorSpec.scala +++ b/engine/src/test/scala/cromwell/engine/workflow/SingleWorkflowRunnerActorSpec.scala @@ -41,7 +41,7 @@ object SingleWorkflowRunnerActorSpec { implicit class OptionJsValueEnhancer(val jsValue: Option[JsValue]) extends AnyVal { def toOffsetDateTime = OffsetDateTime.parse(jsValue.toStringValue) - def toStringValue = jsValue.get.asInstanceOf[JsString].value + def toStringValue = jsValue.getOrElse(JsString("{}")).asInstanceOf[JsString].value def toFields = jsValue.get.asJsObject.fields } diff --git a/services/src/main/scala/cromwell/services/metadata/MetadataService.scala b/services/src/main/scala/cromwell/services/metadata/MetadataService.scala index 3eb13fbe3..2aec11781 100644 --- a/services/src/main/scala/cromwell/services/metadata/MetadataService.scala +++ b/services/src/main/scala/cromwell/services/metadata/MetadataService.scala @@ -9,6 +9,8 @@ import cromwell.services.ServiceRegistryActor.ServiceRegistryMessage import lenthall.exception.ThrowableAggregation import wdl4s.values._ +import scala.util.Random + object MetadataService { @@ -126,13 +128,27 @@ object MetadataService { List(MetadataEvent(metadataKey, MetadataValue(value))) } - def throwableToMetadataEvents(metadataKey: MetadataKey, t: Throwable): List[MetadataEvent] = { + def throwableToMetadataEvents(metadataKey: MetadataKey, t: Throwable, failureIndex: Int = Random.nextInt(Int.MaxValue)): List[MetadataEvent] = { + val emptyCauseList = List(MetadataEvent.empty(metadataKey.copy(key = metadataKey.key + s"[$failureIndex]:causedBy[]"))) + t match { case aggregation: ThrowableAggregation => - aggregation.errorMessages.toList map { message => MetadataEvent(metadataKey.copy(key = s"${metadataKey.key}:message"), MetadataValue(s"${aggregation.exceptionContext}: $message")) } + val message = List(MetadataEvent(metadataKey.copy(key = s"${metadataKey.key}[$failureIndex]:message"), MetadataValue(aggregation.exceptionContext))) + val indexedCauses = aggregation.throwables.toList.zipWithIndex + val indexedCauseEvents = if (indexedCauses.nonEmpty) { + indexedCauses flatMap { case (cause, index) => + val causeKey = metadataKey.copy(key = metadataKey.key + s"[$failureIndex]:causedBy") + throwableToMetadataEvents(causeKey, cause, index) + } + } else { + emptyCauseList + } + + message ++ indexedCauseEvents case other => - val message = List(MetadataEvent(metadataKey.copy(key = s"${metadataKey.key}:message"), MetadataValue(t.getMessage))) - val cause = Option(t.getCause) map { cause => throwableToMetadataEvents(metadataKey.copy(key = s"${metadataKey.key}:causedBy"), cause) } getOrElse List.empty + val message = List(MetadataEvent(metadataKey.copy(key = s"${metadataKey.key}[$failureIndex]:message"), MetadataValue(t.getMessage))) + val causeKey = metadataKey.copy(key = metadataKey.key + s"[$failureIndex]:causedBy") + val cause = Option(t.getCause) map { cause => throwableToMetadataEvents(causeKey, cause, 0) } getOrElse emptyCauseList message ++ cause } } diff --git a/services/src/test/scala/cromwell/services/metadata/MetadataServiceSpec.scala b/services/src/test/scala/cromwell/services/metadata/MetadataServiceSpec.scala index 0ef7d1976..0717acfb3 100644 --- a/services/src/test/scala/cromwell/services/metadata/MetadataServiceSpec.scala +++ b/services/src/test/scala/cromwell/services/metadata/MetadataServiceSpec.scala @@ -1,6 +1,9 @@ package cromwell.services.metadata +import java.util.UUID + import cromwell.core.WorkflowId +import lenthall.exception.AggregatedException import org.scalactic.Equality import org.scalatest.prop.TableDrivenPropertyChecks import org.scalatest.{FlatSpec, Matchers} @@ -74,4 +77,135 @@ class MetadataServiceSpec extends FlatSpec with Matchers with TableDrivenPropert } } + // For the metadata event tests! + val failureMessageRegex = "([^\\[]*)\\[([0-9]+)\\](.*)\\:message".r + val pathToFailures = "path:to:failures" + + it should "convert an exception into a failure event with an empty causedBy block" in { + import MetadataService.throwableToMetadataEvents + + val workflowId = WorkflowId(UUID.randomUUID()) + val mdkey = MetadataKey(workflowId, None, pathToFailures) + + val tMsg = "The Oscars suck!" + val t = new RuntimeException(tMsg) + + val events = throwableToMetadataEvents(mdkey, t) + events.size should be(2) + val (keyPrefix, causedBys, failureIndex) = validateExceptionMessage(events.head, workflowId, tMsg) + keyPrefix should be(pathToFailures) + causedBys should be("") + events(1).key.key should be(s"$keyPrefix[$failureIndex]:causedBy[]") + events(1).key.workflowId should be(workflowId) + events(1).value should be(None) + + } + + it should "convert nested exceptions into a sequence of failure events" in { + import MetadataService.throwableToMetadataEvents + + val workflowId = WorkflowId(UUID.randomUUID()) + val mdkey = MetadataKey(workflowId, None, pathToFailures) + + val innerCauseMsg = "Envelope malfunctions" + val innerCause = new RuntimeException(innerCauseMsg) + + val causeMsg = "Wrong recipients" + val cause = new RuntimeException(causeMsg, innerCause) + + val tMsg = "The Oscars suck!" + val t = new RuntimeException(tMsg, cause) + + val events = throwableToMetadataEvents(mdkey, t) + events.size should be(4) + + val (outerPrefix, outerCausedBys, outerFailureId) = validateExceptionMessage(events.head, workflowId, tMsg) + val (cause1Prefix, cause1CausedBys, cause1FailureId) = validateExceptionMessage(events(1), workflowId, causeMsg) + val (cause2Prefix, cause2CausedBys, cause2FailureId) = validateExceptionMessage(events(2), workflowId, innerCauseMsg) + events(3).key.key should be(s"$cause2Prefix[$cause2FailureId]$cause2CausedBys:causedBy[]") + + outerPrefix should be(pathToFailures) + cause1Prefix should be(pathToFailures) + cause2Prefix should be(pathToFailures) + outerCausedBys should be("") + cause1CausedBys should be(":causedBy[0]") + cause2CausedBys should be(":causedBy[0]:causedBy[0]") + cause1FailureId should be(outerFailureId) + cause2FailureId should be(cause1FailureId) + } + + it should "convert aggregated exceptions into a sequence of failure events" in { + import MetadataService.throwableToMetadataEvents + + val workflowId = WorkflowId(UUID.randomUUID()) + val mdkey = MetadataKey(workflowId, None, "path:to:failures") + + val innerCauseMsg = "Envelope malfunctions" + val innerCause = new RuntimeException(innerCauseMsg) + + val cause1Msg = "Wrong recipients" + val cause1 = new RuntimeException(cause1Msg) + val cause2Msg = "Self congratulation" + val cause2 = new RuntimeException(cause2Msg, innerCause) + val cause3Msg = "The Globes are better anyway" + val cause3 = new RuntimeException(cause3Msg) + + val causeContext = "Compound Entertainment Failure" + val cause = new AggregatedException(causeContext, List(cause1, cause2, cause3)) + + val tMsg = "The Oscars suck!" + val t = new RuntimeException(tMsg, cause) + + val events = throwableToMetadataEvents(mdkey, t) + events.size should be(9) + + // Outer runtime exception: + val (runtimePrefix, runtimeCausedBys, runtimeFailureId) = validateExceptionMessage(events.head, workflowId, tMsg) + runtimePrefix should be(pathToFailures) + runtimeCausedBys should be("") + + // Aggregate exception: + val (aggregatePrefix, aggregateCausedBys, aggregateFailureId) = validateExceptionMessage(events(1), workflowId, causeContext) + aggregatePrefix should be(pathToFailures) + aggregateCausedBys should be(":causedBy[0]") + aggregateFailureId should be(runtimeFailureId) + + // cause1, caused by [] + val (cause1Prefix, cause1CausedBys, cause1FailureId) = validateExceptionMessage(events(2), workflowId, cause1Msg) + cause1Prefix should be(pathToFailures) + cause1CausedBys should be(":causedBy[0]:causedBy[0]") + cause1FailureId should be(runtimeFailureId) + events(3).key.key should be(s"$cause1Prefix[$runtimeFailureId]$cause1CausedBys:causedBy[]") + + // cause2, caused by innerCause caused by [] + val (cause2Prefix, cause2CausedBys, cause2FailureId) = validateExceptionMessage(events(4), workflowId, cause2Msg) + val (innerCausePrefix, innerCauseCausedBys, innerCauseFailureIds) = validateExceptionMessage(events(5), workflowId, innerCauseMsg) + cause2Prefix should be(pathToFailures) + cause2CausedBys should be(":causedBy[0]:causedBy[1]") + cause2FailureId should be(runtimeFailureId) + innerCausePrefix should be(pathToFailures) + innerCauseCausedBys should be(":causedBy[0]:causedBy[1]:causedBy[0]") + innerCauseFailureIds should be(runtimeFailureId) + events(6).key.key should be(s"$innerCausePrefix[$runtimeFailureId]$innerCauseCausedBys:causedBy[]") + + // cause3, caused by [] + val (cause3Prefix, cause3CausedBys, cause3FailureId) = validateExceptionMessage(events(7), workflowId, cause3Msg) + cause3Prefix should be(pathToFailures) + cause3CausedBys should be(":causedBy[0]:causedBy[2]") + cause3FailureId should be(runtimeFailureId) + events(8).key.key should be(s"$cause3Prefix[$cause3FailureId]$cause3CausedBys:causedBy[]") + } + + def validateExceptionMessage(event: MetadataEvent, workflowId: WorkflowId, message: String) = event match { + case MetadataEvent(k, Some(MetadataValue(v, _)), _) => + k.workflowId should be(workflowId) + v should be(message) + + // Return the ID so that we can check for uniqueness later: + k.key match { + case failureMessageRegex(prefix, failureIndex, causedBys) => (prefix, causedBys, failureIndex) + case _ => fail("Unexpected failure key format: " + k.key) + } + case _ => fail("throwableToMetadataEvents generated a metadata event without a metadata value! Bad throwableToMetadataEvents! Very bad!") + } } From cf50d9ae59b0169366d8ca364296a2d247c10ff2 Mon Sep 17 00:00:00 2001 From: mcovarr Date: Thu, 9 Mar 2017 10:21:22 -0500 Subject: [PATCH 003/170] temporary directory should be world writeable (#2053) --- .../scala/cromwell/backend/standard/StandardAsyncExecutionActor.scala | 1 + 1 file changed, 1 insertion(+) diff --git a/backend/src/main/scala/cromwell/backend/standard/StandardAsyncExecutionActor.scala b/backend/src/main/scala/cromwell/backend/standard/StandardAsyncExecutionActor.scala index e6e257e2b..3bd42b2fb 100644 --- a/backend/src/main/scala/cromwell/backend/standard/StandardAsyncExecutionActor.scala +++ b/backend/src/main/scala/cromwell/backend/standard/StandardAsyncExecutionActor.scala @@ -177,6 +177,7 @@ trait StandardAsyncExecutionActor extends AsyncBackendJobExecutionActor with Sta s"""|#!/bin/bash |tmpDir=$$(mktemp -d $cwd/tmp.XXXXXX) + |chmod 777 $$tmpDir |export _JAVA_OPTIONS=-Djava.io.tmpdir=$$tmpDir |export TMPDIR=$$tmpDir |$commandScriptPreamble From 7dd945e2d16d61c6791fa7b8f9056879e6e6bcec Mon Sep 17 00:00:00 2001 From: Thib Date: Mon, 13 Mar 2017 15:37:58 -0400 Subject: [PATCH 004/170] I/O Actor (#2056) * I/O Actor --- CHANGELOG.md | 12 + README.md | 14 ++ .../backend/BackendCacheHitCopyingActor.scala | 27 -- .../backend/BackendLifecycleActorFactory.scala | 53 ++-- .../backend/callcaching/CacheHitDuplicating.scala | 103 -------- .../backend/callcaching/FileHashingActor.scala | 42 ---- .../standard/StandardAsyncExecutionActor.scala | 90 +++---- .../standard/StandardCacheHitCopyingActor.scala | 45 ---- .../standard/StandardCachingActorHelper.scala | 2 +- .../standard/StandardInitializationActor.scala | 1 + .../standard/StandardJobExecutionActorParams.scala | 3 + .../standard/StandardLifecycleActorFactory.scala | 70 ++++-- .../standard/StandardSyncExecutionActor.scala | 2 + .../callcaching/JobCachingActorHelper.scala | 2 +- .../callcaching/StandardCacheHitCopyingActor.scala | 214 ++++++++++++++++ .../callcaching/StandardFileHashingActor.scala | 97 +++++++ core/src/main/resources/reference.conf | 7 + .../cromwell/core/actor/RobustClientHelper.scala | 74 ++++++ .../cromwell/core/actor/StreamActorHelper.scala | 94 +++++++ .../cromwell/core/actor/StreamIntegration.scala | 15 ++ .../core/callcaching/HashResultMessage.scala | 3 +- .../core/callcaching/docker/DockerHashActor.scala | 3 +- .../registryv2/flows/gcr/GcrAbstractFlow.scala | 15 +- core/src/main/scala/cromwell/core/io/AsyncIo.scala | 78 ++++++ core/src/main/scala/cromwell/core/io/IoAck.scala | 27 ++ .../scala/cromwell/core/io/IoClientHelper.scala | 29 +++ .../main/scala/cromwell/core/io/IoCommand.scala | 65 +++++ .../scala/cromwell/core/io/IoCommandBuilder.scala | 22 ++ .../main/scala/cromwell/core/io/IoTimeout.scala | 5 + .../src/main/scala/cromwell/core/io/Throttle.scala | 5 + .../cromwell/core/path/proxy/FileSystemProxy.scala | 25 -- .../scala/cromwell/core/path/proxy/PathProxy.scala | 44 ---- .../proxy/RetryableFileSystemProviderProxy.scala | 59 ----- .../src/test/scala/cromwell/core/FailIoActor.scala | 19 ++ .../src/test/scala/cromwell/core/MockIoActor.scala | 25 ++ .../test/scala/cromwell/core/SimpleIoActor.scala | 100 ++++++++ .../test/scala/cromwell/core/TestKitSuite.scala | 2 + .../core/actor/RobustClientHelperSpec.scala | 187 ++++++++++++++ .../core/actor/StreamActorHelperSpec.scala | 70 ++++++ .../test/scala/cromwell/core/io/AsyncIoSpec.scala | 116 +++++++++ .../cromwell/core/io/IoClientHelperSpec.scala | 106 ++++++++ .../path/proxy/RetryableFileSystemProxySpec.scala | 280 --------------------- .../scala/cromwell/engine/EngineFilesystems.scala | 31 +-- .../main/scala/cromwell/engine/io/IoActor.scala | 148 +++++++++++ .../engine/io/gcs/GcsBatchCommandContext.scala | 95 +++++++ .../cromwell/engine/io/gcs/GcsBatchFlow.scala | 152 +++++++++++ .../scala/cromwell/engine/io/gcs/GcsResponse.scala | 14 ++ .../engine/io/gcs/ParallelGcsBatchFlow.scala | 29 +++ .../scala/cromwell/engine/io/nio/NioFlow.scala | 80 ++++++ .../cromwell/engine/workflow/WorkflowActor.scala | 11 +- .../engine/workflow/WorkflowManagerActor.scala | 6 +- .../workflow/lifecycle/CopyWorkflowLogsActor.scala | 59 +++-- .../lifecycle/CopyWorkflowOutputsActor.scala | 56 +++-- .../lifecycle/WorkflowFinalizationActor.scala | 12 +- .../lifecycle/WorkflowInitializationActor.scala | 6 +- .../execution/EngineJobExecutionActor.scala | 116 +++------ .../execution/SubWorkflowExecutionActor.scala | 4 + .../execution/WorkflowExecutionActor.scala | 10 +- .../callcaching/EngineJobHashingActor.scala | 39 ++- .../preparation/JobPreparationActor.scala | 5 +- .../scala/cromwell/server/CromwellRootActor.scala | 30 ++- .../test/scala/cromwell/CromwellTestKitSpec.scala | 2 + .../scala/cromwell/SimpleWorkflowActorSpec.scala | 3 +- .../mock/DefaultBackendJobExecutionActor.scala | 2 + .../RetryableBackendLifecycleActorFactory.scala | 2 + .../cromwell/engine/io/IoActorGcsBatchSpec.scala | 103 ++++++++ .../scala/cromwell/engine/io/IoActorSpec.scala | 135 ++++++++++ .../scala/cromwell/engine/io/nio/NioFlowSpec.scala | 189 ++++++++++++++ .../workflow/SingleWorkflowRunnerActorSpec.scala | 6 +- .../engine/workflow/WorkflowActorSpec.scala | 4 +- .../execution/SubWorkflowExecutionActorSpec.scala | 2 + .../execution/WorkflowExecutionActorSpec.scala | 10 +- .../callcaching/EngineJobHashingActorSpec.scala | 53 +++- .../lifecycle/execution/ejea/PerTestHelper.scala | 28 ++- .../preparation/JobPreparationTestHelper.scala | 3 + .../cromwell/filesystems/gcs/GcsPathBuilder.scala | 80 +++--- .../filesystems/gcs/GcsPathBuilderFactory.scala | 18 +- .../filesystems/gcs/GoogleConfiguration.scala | 26 ++ .../filesystems/gcs/auth/GoogleAuthMode.scala | 96 ++----- .../gcs/auth/GoogleCredentialBundle.scala | 6 - .../gcs/auth/RefreshableOAuth2Credentials.scala | 31 --- .../gcs/batch/GcsBatchCommandBuilder.scala | 27 ++ .../filesystems/gcs/batch/GcsBatchIoCommand.scala | 92 +++++++ .../filesystems/gcs/GcsPathBuilderSpec.scala | 28 +-- .../filesystems/gcs/auth/GoogleAuthModeSpec.scala | 1 - project/Dependencies.scala | 6 +- .../impl/htcondor/HtCondorBackendFactory.scala | 2 + .../backend/impl/jes/GenomicsFactory.scala | 19 +- .../jes/JesAsyncBackendJobExecutionActor.scala | 29 ++- .../impl/jes/JesBackendInitializationData.scala | 4 +- .../impl/jes/JesBackendLifecycleActorFactory.scala | 20 +- .../backend/impl/jes/JesConfiguration.scala | 22 +- .../backend/impl/jes/JesFinalizationActor.scala | 15 +- .../backend/impl/jes/JesInitializationActor.scala | 38 +-- .../cromwell/backend/impl/jes/JesJobPaths.scala | 1 - .../backend/impl/jes/JesWorkflowPaths.scala | 9 +- .../JesBackendCacheHitCopyingActor.scala | 6 + .../jes/callcaching/JesBackendFileHashing.scala | 21 -- .../callcaching/JesBackendFileHashingActor.scala | 6 + .../jes/JesAsyncBackendJobExecutionActorSpec.scala | 34 ++- .../impl/jes/JesInitializationActorSpec.scala | 3 +- .../impl/jes/JesJobExecutionActorSpec.scala | 6 +- .../impl/sfs/config/ConfigBackendFileHashing.scala | 2 +- .../sfs/config/ConfigBackendFileHashingActor.scala | 26 ++ .../ConfigBackendLifecycleActorFactory.scala | 9 +- .../impl/sfs/config/ConfigHashingStrategy.scala | 2 +- ...redFileSystemBackendLifecycleActorFactory.scala | 1 + .../sfs/SharedFileSystemCacheHitCopyingActor.scala | 31 ++- .../sfs/SharedFileSystemInitializationActor.scala | 5 +- .../sfs/config/ConfigHashingStrategySpec.scala | 2 +- .../SharedFileSystemInitializationActorSpec.scala | 2 +- .../sfs/TestLocalAsyncJobExecutionActor.scala | 6 +- .../backend/impl/spark/SparkBackendFactory.scala | 3 +- .../backend/impl/tes/TesBackendFileHashing.scala | 2 +- .../impl/tes/TesBackendLifecycleActorFactory.scala | 5 +- 115 files changed, 3202 insertions(+), 1237 deletions(-) delete mode 100644 backend/src/main/scala/cromwell/backend/callcaching/CacheHitDuplicating.scala delete mode 100644 backend/src/main/scala/cromwell/backend/callcaching/FileHashingActor.scala delete mode 100644 backend/src/main/scala/cromwell/backend/standard/StandardCacheHitCopyingActor.scala rename backend/src/main/scala/cromwell/backend/{ => standard}/callcaching/JobCachingActorHelper.scala (96%) create mode 100644 backend/src/main/scala/cromwell/backend/standard/callcaching/StandardCacheHitCopyingActor.scala create mode 100644 backend/src/main/scala/cromwell/backend/standard/callcaching/StandardFileHashingActor.scala create mode 100644 core/src/main/scala/cromwell/core/actor/RobustClientHelper.scala create mode 100644 core/src/main/scala/cromwell/core/actor/StreamActorHelper.scala create mode 100644 core/src/main/scala/cromwell/core/actor/StreamIntegration.scala create mode 100644 core/src/main/scala/cromwell/core/io/AsyncIo.scala create mode 100644 core/src/main/scala/cromwell/core/io/IoAck.scala create mode 100644 core/src/main/scala/cromwell/core/io/IoClientHelper.scala create mode 100644 core/src/main/scala/cromwell/core/io/IoCommand.scala create mode 100644 core/src/main/scala/cromwell/core/io/IoCommandBuilder.scala create mode 100644 core/src/main/scala/cromwell/core/io/IoTimeout.scala create mode 100644 core/src/main/scala/cromwell/core/io/Throttle.scala delete mode 100644 core/src/main/scala/cromwell/core/path/proxy/FileSystemProxy.scala delete mode 100644 core/src/main/scala/cromwell/core/path/proxy/PathProxy.scala delete mode 100644 core/src/main/scala/cromwell/core/path/proxy/RetryableFileSystemProviderProxy.scala create mode 100644 core/src/test/scala/cromwell/core/FailIoActor.scala create mode 100644 core/src/test/scala/cromwell/core/MockIoActor.scala create mode 100644 core/src/test/scala/cromwell/core/SimpleIoActor.scala create mode 100644 core/src/test/scala/cromwell/core/actor/RobustClientHelperSpec.scala create mode 100644 core/src/test/scala/cromwell/core/actor/StreamActorHelperSpec.scala create mode 100644 core/src/test/scala/cromwell/core/io/AsyncIoSpec.scala create mode 100644 core/src/test/scala/cromwell/core/io/IoClientHelperSpec.scala delete mode 100644 core/src/test/scala/cromwell/core/path/proxy/RetryableFileSystemProxySpec.scala create mode 100644 engine/src/main/scala/cromwell/engine/io/IoActor.scala create mode 100644 engine/src/main/scala/cromwell/engine/io/gcs/GcsBatchCommandContext.scala create mode 100644 engine/src/main/scala/cromwell/engine/io/gcs/GcsBatchFlow.scala create mode 100644 engine/src/main/scala/cromwell/engine/io/gcs/GcsResponse.scala create mode 100644 engine/src/main/scala/cromwell/engine/io/gcs/ParallelGcsBatchFlow.scala create mode 100644 engine/src/main/scala/cromwell/engine/io/nio/NioFlow.scala create mode 100644 engine/src/test/scala/cromwell/engine/io/IoActorGcsBatchSpec.scala create mode 100644 engine/src/test/scala/cromwell/engine/io/IoActorSpec.scala create mode 100644 engine/src/test/scala/cromwell/engine/io/nio/NioFlowSpec.scala delete mode 100644 filesystems/gcs/src/main/scala/cromwell/filesystems/gcs/auth/GoogleCredentialBundle.scala delete mode 100644 filesystems/gcs/src/main/scala/cromwell/filesystems/gcs/auth/RefreshableOAuth2Credentials.scala create mode 100644 filesystems/gcs/src/main/scala/cromwell/filesystems/gcs/batch/GcsBatchCommandBuilder.scala create mode 100644 filesystems/gcs/src/main/scala/cromwell/filesystems/gcs/batch/GcsBatchIoCommand.scala create mode 100644 supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/callcaching/JesBackendCacheHitCopyingActor.scala delete mode 100644 supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/callcaching/JesBackendFileHashing.scala create mode 100644 supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/callcaching/JesBackendFileHashingActor.scala create mode 100644 supportedBackends/sfs/src/main/scala/cromwell/backend/impl/sfs/config/ConfigBackendFileHashingActor.scala diff --git a/CHANGELOG.md b/CHANGELOG.md index f388b4139..d3a38e195 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,17 @@ # Cromwell Change Log +## 26 + +* Added a configuration option under `system.io` to throttle the number of I/O queries that Cromwell makes. + This is mostly useful for the JES backend and should be updated to match the GCS quota available for the project. + +``` +system.io { + number-of-requests = 100000 + per = 100 seconds +} +``` + ## 25 ### External Contributors diff --git a/README.md b/README.md index 1ac9cfd76..6080fa0bf 100644 --- a/README.md +++ b/README.md @@ -451,6 +451,20 @@ It is recommended that one copies `src/main/resources/application.conf`, modify java -Dconfig.file=/path/to/application.conf cromwell.jar ... ``` +## I/O + +Cromwell centralizes as many of its I/O operations as possible through a unique entry point. This allows users to effectively control and throttle the number of requests and resources allocated to those operations throughout the entire system. +It is possible to configure this throttling behavior in the configuration: + +``` +system.io { + number-of-requests = 100000 + per = 100 seconds +} +``` + +This is particularly useful when running Cromwell on a JES backend for example, as Google imposes a quota on the number of GCS queries that can be made. + ## Workflow Submission Cromwell has a configurable cap on the number of workflows running at a time. To set this value provide an integer value to the `system.max-concurrent-workflows` config value. diff --git a/backend/src/main/scala/cromwell/backend/BackendCacheHitCopyingActor.scala b/backend/src/main/scala/cromwell/backend/BackendCacheHitCopyingActor.scala index f1bf38386..b832e8400 100644 --- a/backend/src/main/scala/cromwell/backend/BackendCacheHitCopyingActor.scala +++ b/backend/src/main/scala/cromwell/backend/BackendCacheHitCopyingActor.scala @@ -1,34 +1,7 @@ package cromwell.backend -import akka.actor.{Actor, ActorLogging} -import akka.event.LoggingReceive -import cromwell.backend.BackendCacheHitCopyingActor.CopyOutputsCommand -import cromwell.backend.BackendJobExecutionActor.{AbortedResponse, BackendJobExecutionResponse, JobFailedNonRetryableResponse} -import cromwell.backend.BackendLifecycleActor._ import cromwell.core.simpleton.WdlValueSimpleton -import scala.concurrent.Future - object BackendCacheHitCopyingActor { final case class CopyOutputsCommand(wdlValueSimpletons: Seq[WdlValueSimpleton], jobDetritusFiles: Map[String, String], returnCode: Option[Int]) } - -trait BackendCacheHitCopyingActor extends Actor with ActorLogging with BackendJobLifecycleActor { - - def copyCachedOutputs(wdlValueSimpletons: Seq[WdlValueSimpleton], jobDetritusFiles: Map[String, String], returnCode: Option[Int]): BackendJobExecutionResponse - - def receive: Receive = LoggingReceive { - case CopyOutputsCommand(simpletons, jobDetritus, returnCode) => - performActionThenRespond(Future(copyCachedOutputs(simpletons, jobDetritus, returnCode)), onFailure = cachingFailed, andThen = context stop self) - case AbortJobCommand => - abort() - context.parent ! AbortedResponse(jobDescriptor.key) - context stop self - } - - def abort(): Unit = log.warning("{}: Abort not supported during cache hit copying", jobTag) - - private def cachingFailed(t: Throwable) = { - JobFailedNonRetryableResponse(jobKey = jobDescriptor.key, throwable = t, returnCode = None) - } -} diff --git a/backend/src/main/scala/cromwell/backend/BackendLifecycleActorFactory.scala b/backend/src/main/scala/cromwell/backend/BackendLifecycleActorFactory.scala index e6aa055b1..70c4f500a 100644 --- a/backend/src/main/scala/cromwell/backend/BackendLifecycleActorFactory.scala +++ b/backend/src/main/scala/cromwell/backend/BackendLifecycleActorFactory.scala @@ -2,8 +2,6 @@ package cromwell.backend import akka.actor.{ActorRef, Props} import com.typesafe.config.Config -import cromwell.backend.callcaching.FileHashingActor -import cromwell.backend.callcaching.FileHashingActor.FileHashingFunction import cromwell.backend.io.WorkflowPathsWithDocker import cromwell.core.CallOutputs import cromwell.core.JobExecutionToken.JobExecutionTokenType @@ -13,34 +11,62 @@ import wdl4s.expression.{PureStandardLibraryFunctions, WdlStandardLibraryFunctio trait BackendLifecycleActorFactory { + + /* ****************************** */ + /* Workflow Initialization */ + /* ****************************** */ + def workflowInitializationActorProps(workflowDescriptor: BackendWorkflowDescriptor, + ioActor: ActorRef, calls: Set[TaskCall], serviceRegistryActor: ActorRef): Option[Props] = None + /* ****************************** */ + /* Job Execution */ + /* ****************************** */ + def jobExecutionActorProps(jobDescriptor: BackendJobDescriptor, initializationData: Option[BackendInitializationData], serviceRegistryActor: ActorRef, + ioActor: ActorRef, backendSingletonActor: Option[ActorRef]): Props + def jobExecutionTokenType: JobExecutionTokenType = JobExecutionTokenType("Default", None) + + /* ****************************** */ + /* Workflow Finalization */ + /* ****************************** */ + + def workflowFinalizationActorProps(workflowDescriptor: BackendWorkflowDescriptor, + ioActor: ActorRef, + calls: Set[TaskCall], + jobExecutionMap: JobExecutionMap, + workflowOutputs: CallOutputs, + initializationData: Option[BackendInitializationData]): Option[Props] = None + + /* ****************************** */ + /* Call Caching */ + /* ****************************** */ + + def fileHashingActorProps: Option[(BackendJobDescriptor, Option[BackendInitializationData], ActorRef, ActorRef) => Props] = None + /** * Providing this method to generate Props for a cache hit copying actor is optional. * To implement it, add a function: - * def cacheHitCopyingActorInner(jobDescriptor: BackendJobDescriptor,initializationData: Option[BackendInitializationData], serviceRegistryActor: ActorRef): Props + * def cacheHitCopyingActorInner(jobDescriptor: BackendJobDescriptor,initializationData: Option[BackendInitializationData], serviceRegistryActor: ActorRef, ioActor: ActorRef): Props * And then override this method to point to it: * override def cacheHitCopyingActorProps = Option(cacheHitCopyingActorInner _) * * Simples! */ - def cacheHitCopyingActorProps: Option[(BackendJobDescriptor, Option[BackendInitializationData], ActorRef) => Props] = None + def cacheHitCopyingActorProps: Option[(BackendJobDescriptor, Option[BackendInitializationData], ActorRef, ActorRef) => Props] = None + /* ****************************** */ + /* Misc. */ + /* ****************************** */ + def backendSingletonActorProps: Option[Props] = None - def workflowFinalizationActorProps(workflowDescriptor: BackendWorkflowDescriptor, - calls: Set[TaskCall], - jobExecutionMap: JobExecutionMap, - workflowOutputs: CallOutputs, - initializationData: Option[BackendInitializationData]): Option[Props] = None - def expressionLanguageFunctions(workflowDescriptor: BackendWorkflowDescriptor, jobKey: BackendJobDescriptorKey, initializationData: Option[BackendInitializationData]): WdlStandardLibraryFunctions = PureStandardLibraryFunctions @@ -55,16 +81,9 @@ trait BackendLifecycleActorFactory { def runtimeAttributeDefinitions(initializationDataOption: Option[BackendInitializationData]): Set[RuntimeAttributeDefinition] = Set.empty - lazy val fileHashingFunction: Option[FileHashingFunction] = None - lazy val fileHashingActorCount: Int = 50 - - def fileHashingActorProps: Props = FileHashingActor.props(fileHashingFunction) - /* * Returns credentials that can be used to authenticate to a docker registry server * in order to obtain a docker hash. */ def dockerHashCredentials(initializationDataOption: Option[BackendInitializationData]): List[Any] = List.empty - - def jobExecutionTokenType: JobExecutionTokenType = JobExecutionTokenType("Default", None) } diff --git a/backend/src/main/scala/cromwell/backend/callcaching/CacheHitDuplicating.scala b/backend/src/main/scala/cromwell/backend/callcaching/CacheHitDuplicating.scala deleted file mode 100644 index 979166f08..000000000 --- a/backend/src/main/scala/cromwell/backend/callcaching/CacheHitDuplicating.scala +++ /dev/null @@ -1,103 +0,0 @@ -package cromwell.backend.callcaching - -import akka.actor.ActorRef -import cromwell.backend.BackendCacheHitCopyingActor -import cromwell.backend.BackendJobExecutionActor.{BackendJobExecutionResponse, JobSucceededResponse} -import cromwell.backend.io.JobPaths -import cromwell.core.path.{Path, PathCopier} -import cromwell.core.simpleton.{WdlValueBuilder, WdlValueSimpleton} -import wdl4s.values.WdlFile - -import scala.language.postfixOps -import scala.util.Try - -/** - * Mixin implementing common functionality for a BackendCacheHitCopyingActor. - * - * Implements copyCachedOutputs, with abstract methods for converting a string to a path, duplicating a path, returning - * a reference to the service registry actor, and obtaining various metadata/outputs for the job. - */ -trait CacheHitDuplicating { - this: BackendCacheHitCopyingActor => - - /** - * Duplicates two paths returned by getPath. - * - * @param source Source path. - * @param destination Destination path. - */ - protected def duplicate(source: Path, destination: Path): Unit - - /** - * Returns an absolute path to the file. - * - * NOTE: If necessary for separate credentialing, we may split this method into getSourcePath and getDestinationPath. - * - * @param file the string version of the path - * @return an absolute path to the file with potential credentials embedded within. - */ - protected def getPath(file: String): Try[Path] - - protected def destinationCallRootPath: Path - - protected def serviceRegistryActor: ActorRef - - protected def destinationJobDetritusPaths: Map[String, Path] - - // Usually implemented by a subclass of JobCachingActorHelper - protected def startMetadataKeyValues: Map[String, Any] - - private def lookupSourceCallRootPath(sourceJobDetritusFiles: Map[String, String]): Path = { - sourceJobDetritusFiles.get(JobPaths.CallRootPathKey).map(getPath).get recover { - case failure => - throw new RuntimeException(s"${JobPaths.CallRootPathKey} wasn't found for call ${jobDescriptor.call.fullyQualifiedName}", failure) - } get - } - - /** - * After copying files, return the simpletons substituting the destination file paths. - */ - private def copySimpletons(wdlValueSimpletons: Seq[WdlValueSimpleton], - sourceCallRootPath: Path): Seq[WdlValueSimpleton] = { - wdlValueSimpletons map { - case WdlValueSimpleton(key, wdlFile: WdlFile) => - val sourcePath = getPath(wdlFile.value).get - val destinationPath = PathCopier.getDestinationFilePath(sourceCallRootPath, sourcePath, destinationCallRootPath) - duplicate(sourcePath, destinationPath) - WdlValueSimpleton(key, WdlFile(destinationPath.pathAsString)) - case wdlValueSimpleton => wdlValueSimpleton - } - } - - private def copyDetritus(sourceJobDetritusFiles: Map[String, String]): Map[String, Path] = { - val sourceKeys = sourceJobDetritusFiles.keySet - val destinationKeys = destinationJobDetritusPaths.keySet - val fileKeys = sourceKeys.intersect(destinationKeys).filterNot(_ == JobPaths.CallRootPathKey) - - val destinationJobDetritusFiles = fileKeys map { fileKey => - val sourcePath = getPath(sourceJobDetritusFiles(fileKey)).get - val destinationPath = destinationJobDetritusPaths(fileKey) - duplicate(sourcePath, destinationPath) - (fileKey, destinationPath) - } - - destinationJobDetritusFiles.toMap + (JobPaths.CallRootPathKey -> destinationCallRootPath) - } - - override def copyCachedOutputs(wdlValueSimpletons: Seq[WdlValueSimpleton], - sourceJobDetritusFiles: Map[String, String], - returnCodeOption: Option[Int]): BackendJobExecutionResponse = { - val sourceCallRootPath = lookupSourceCallRootPath(sourceJobDetritusFiles) - - val destinationSimpletons = copySimpletons(wdlValueSimpletons, sourceCallRootPath) - val destinationJobDetritusFiles = copyDetritus(sourceJobDetritusFiles) - - val destinationJobOutputs = WdlValueBuilder.toJobOutputs(jobDescriptor.call.task.outputs, destinationSimpletons) - - import cromwell.services.metadata.MetadataService.implicits.MetadataAutoPutter - serviceRegistryActor.putMetadata( - jobDescriptor.workflowDescriptor.id, Option(jobDescriptor.key), startMetadataKeyValues) - - JobSucceededResponse(jobDescriptor.key, returnCodeOption, destinationJobOutputs, Option(destinationJobDetritusFiles), Seq.empty) - } -} diff --git a/backend/src/main/scala/cromwell/backend/callcaching/FileHashingActor.scala b/backend/src/main/scala/cromwell/backend/callcaching/FileHashingActor.scala deleted file mode 100644 index ee1ecea4c..000000000 --- a/backend/src/main/scala/cromwell/backend/callcaching/FileHashingActor.scala +++ /dev/null @@ -1,42 +0,0 @@ -package cromwell.backend.callcaching - -import akka.actor.{Actor, ActorLogging, Props} -import akka.event.LoggingAdapter -import cromwell.backend.BackendInitializationData -import cromwell.core.Dispatcher.BackendDispatcher -import cromwell.core.JobKey -import cromwell.core.callcaching._ -import wdl4s.values.WdlFile -import FileHashingActor._ - -import scala.util.{Failure, Success, Try} - -/** - * Blocking worker. Warning! If this actor dies then its mailbox of hash requests will be lost - */ -class FileHashingActor(workerFunction: Option[FileHashingFunction]) extends Actor with ActorLogging { - override def receive = { - case x: SingleFileHashRequest => - - // Create the path with the filesystem in the initialization data: - workerFunction.map(_.work(x, log)) match { - case Some(Success(hashSuccess)) => sender ! FileHashResponse(HashResult(x.hashKey, HashValue(hashSuccess))) - case Some(Failure(t)) => sender ! HashingFailedMessage(x.hashKey, t) - case None => sender ! HashingFailedMessage(x.hashKey, new NotImplementedError("Backend has no file hashing function")) - } - case x => log.error(s"Unexpected message to ${self.path.name}: $x") - } -} - -object FileHashingActor { - def props(workerFunction: Option[FileHashingFunction]): Props = Props(new FileHashingActor(workerFunction)).withDispatcher(BackendDispatcher) - - case class FileHashingFunction(work: (SingleFileHashRequest, LoggingAdapter) => Try[String]) - - sealed trait BackendSpecificHasherCommand { def jobKey: JobKey } - case class SingleFileHashRequest(jobKey: JobKey, hashKey: HashKey, file: WdlFile, initializationData: Option[BackendInitializationData]) extends BackendSpecificHasherCommand - case class HashesNoLongerRequired(jobKey: JobKey) extends BackendSpecificHasherCommand - - sealed trait BackendSpecificHasherResponse extends SuccessfulHashResultMessage - case class FileHashResponse(hashResult: HashResult) extends BackendSpecificHasherResponse { override def hashes = Set(hashResult) } -} diff --git a/backend/src/main/scala/cromwell/backend/standard/StandardAsyncExecutionActor.scala b/backend/src/main/scala/cromwell/backend/standard/StandardAsyncExecutionActor.scala index 3bd42b2fb..2c5d1c832 100644 --- a/backend/src/main/scala/cromwell/backend/standard/StandardAsyncExecutionActor.scala +++ b/backend/src/main/scala/cromwell/backend/standard/StandardAsyncExecutionActor.scala @@ -11,6 +11,7 @@ import cromwell.backend.async.{AbortedExecutionHandle, AsyncBackendJobExecutionA import cromwell.backend.validation._ import cromwell.backend.wdl.{Command, OutputEvaluator, WdlFileMapper} import cromwell.backend.{BackendConfigurationDescriptor, BackendInitializationData, BackendJobDescriptor, BackendJobLifecycleActor} +import cromwell.core.io.{AsyncIo, DefaultIoCommandBuilder} import cromwell.core.path.Path import cromwell.core.{CallOutputs, CromwellAggregatedException, CromwellFatalExceptionMarker, ExecutionEvent} import cromwell.services.keyvalue.KeyValueServiceActor._ @@ -31,6 +32,7 @@ case class DefaultStandardAsyncExecutionActorParams ( override val jobIdKey: String, override val serviceRegistryActor: ActorRef, + override val ioActor: ActorRef, override val jobDescriptor: BackendJobDescriptor, override val configurationDescriptor: BackendConfigurationDescriptor, override val backendInitializationDataOption: Option[BackendInitializationData], @@ -48,7 +50,7 @@ case class DefaultStandardAsyncExecutionActorParams * NOTE: Unlike the parent trait `AsyncBackendJobExecutionActor`, this trait is subject to even more frequent updates * as the common behavior among the backends adjusts in unison. */ -trait StandardAsyncExecutionActor extends AsyncBackendJobExecutionActor with StandardCachingActorHelper { +trait StandardAsyncExecutionActor extends AsyncBackendJobExecutionActor with StandardCachingActorHelper with AsyncIo with DefaultIoCommandBuilder { this: Actor with ActorLogging with BackendJobLifecycleActor => val SIGTERM = 143 @@ -70,6 +72,8 @@ trait StandardAsyncExecutionActor extends AsyncBackendJobExecutionActor with Sta override lazy val configurationDescriptor: BackendConfigurationDescriptor = standardParams.configurationDescriptor override lazy val completionPromise: Promise[BackendJobExecutionResponse] = standardParams.completionPromise + + override lazy val ioActor = standardParams.ioActor /** Backend initialization data created by the a factory initializer. */ override lazy val backendInitializationDataOption: Option[BackendInitializationData] = @@ -482,7 +486,7 @@ trait StandardAsyncExecutionActor extends AsyncBackendJobExecutionActor with Sta private var missedAbort = false private case class CheckMissedAbort(jobId: StandardAsyncJob) - context.become(standardReceiveBehavior(None) orElse receive) + context.become(ioReceive orElse standardReceiveBehavior(None) orElse receive) def standardReceiveBehavior(jobIdOption: Option[StandardAsyncJob]): Receive = LoggingReceive { case AbortJobCommand => @@ -496,7 +500,7 @@ trait StandardAsyncExecutionActor extends AsyncBackendJobExecutionActor with Sta } postAbort() case CheckMissedAbort(jobId: StandardAsyncJob) => - context.become(standardReceiveBehavior(Option(jobId)) orElse receive) + context.become(ioReceive orElse standardReceiveBehavior(Option(jobId)) orElse receive) if (missedAbort) self ! AbortJobCommand case KvPutSuccess(_) => // expected after the KvPut for the operation ID @@ -547,7 +551,7 @@ trait StandardAsyncExecutionActor extends AsyncBackendJobExecutionActor with Sta StandardAsyncJob@unchecked, StandardAsyncRunInfo@unchecked, StandardAsyncRunStatus@unchecked] => jobLogger.debug(s"$tag Polling Job ${handle.pendingJob}") - pollStatusAsync(handle) map { + pollStatusAsync(handle) flatMap { backendRunStatus => handlePollSuccess(handle, backendRunStatus) } recover { @@ -568,7 +572,7 @@ trait StandardAsyncExecutionActor extends AsyncBackendJobExecutionActor with Sta * @return The updated execution handle. */ def handlePollSuccess(oldHandle: StandardAsyncPendingExecutionHandle, - status: StandardAsyncRunStatus): ExecutionHandle = { + status: StandardAsyncRunStatus): Future[ExecutionHandle] = { val previousStatus = oldHandle.previousStatus if (!(previousStatus contains status)) { /* @@ -585,7 +589,7 @@ trait StandardAsyncExecutionActor extends AsyncBackendJobExecutionActor with Sta val metadata = getTerminalMetadata(status) tellMetadata(metadata) handleExecutionResult(status, oldHandle) - case s => oldHandle.copy(previousStatus = Option(s)) // Copy the current handle with updated previous status. + case s => Future.successful(oldHandle.copy(previousStatus = Option(s))) // Copy the current handle with updated previous status. } } @@ -627,52 +631,38 @@ trait StandardAsyncExecutionActor extends AsyncBackendJobExecutionActor with Sta * @return The updated execution handle. */ def handleExecutionResult(status: StandardAsyncRunStatus, - oldHandle: StandardAsyncPendingExecutionHandle): ExecutionHandle = { - try { - - lazy val returnCodeAsString: Try[String] = Try(jobPaths.returnCode.contentAsString) - lazy val returnCodeAsInt: Try[Int] = returnCodeAsString.map(_.trim.toInt) + oldHandle: StandardAsyncPendingExecutionHandle): Future[ExecutionHandle] = { lazy val stderrAsOption: Option[Path] = Option(jobPaths.stderr) - - if (isSuccess(status)) { - lazy val stderrLength: Try[Long] = Try(jobPaths.stderr.size) - (stderrLength, returnCodeAsString, returnCodeAsInt) match { - // Failed to get stderr size -> Retry - case (Failure(exception), _, _) => - jobLogger.warn(s"could not get stderr file size, retrying", exception) - oldHandle - // Failed to get return code content -> Retry - case (_, Failure(exception), _) => - jobLogger.warn(s"could not download return code file, retrying", exception) - oldHandle - // Failed to convert return code content to Int -> Fail - case (_, _, Failure(_)) => - FailedNonRetryableExecutionHandle(ReturnCodeIsNotAnInt(jobDescriptor.key.tag, returnCodeAsString.get, stderrAsOption)) - // Stderr is not empty and failOnStdErr is true -> Fail - case (Success(length), _, _) if failOnStdErr && length.intValue > 0 => - FailedNonRetryableExecutionHandle(StderrNonEmpty(jobDescriptor.key.tag, length, stderrAsOption), returnCodeAsInt.toOption) - // Return code is abort code -> Abort - case (_, _, Success(rc)) if isAbort(rc) => - AbortedExecutionHandle - // Return code is not valid -> Fail - case (_, _, Success(rc)) if !continueOnReturnCode.continueFor(rc) => - FailedNonRetryableExecutionHandle(WrongReturnCode(jobDescriptor.key.tag, returnCodeAsInt.get, stderrAsOption), returnCodeAsInt.toOption) - // Otherwise -> Succeed - case (_, _, Success(rc)) => - handleExecutionSuccess(status, oldHandle, rc) - } - } else { - handleExecutionFailure(status, oldHandle, returnCodeAsInt.toOption) + + val stderrSizeAndReturnCode = for { + returnCodeAsString <- contentAsStringAsync(jobPaths.returnCode) + stderrSize <- sizeAsync(jobPaths.stderr) + } yield (stderrSize, returnCodeAsString) + + stderrSizeAndReturnCode map { + case (stderrSize, returnCodeAsString) => + val tryReturnCodeAsInt = Try(returnCodeAsString.trim.toInt) + + if (isSuccess(status)) { + tryReturnCodeAsInt match { + case Success(returnCodeAsInt) if failOnStdErr && stderrSize.intValue > 0 => + FailedNonRetryableExecutionHandle(StderrNonEmpty(jobDescriptor.key.tag, stderrSize, stderrAsOption), Option(returnCodeAsInt)) + case Success(returnCodeAsInt) if isAbort(returnCodeAsInt) => + AbortedExecutionHandle + case Success(returnCodeAsInt) if !continueOnReturnCode.continueFor(returnCodeAsInt) => + FailedNonRetryableExecutionHandle(WrongReturnCode(jobDescriptor.key.tag, returnCodeAsInt, stderrAsOption), Option(returnCodeAsInt)) + case Success(returnCodeAsInt) => + handleExecutionSuccess(status, oldHandle, returnCodeAsInt) + case Failure(_) => FailedNonRetryableExecutionHandle(ReturnCodeIsNotAnInt(jobDescriptor.key.tag, returnCodeAsString, stderrAsOption)) + } + } else { + handleExecutionFailure(status, oldHandle, tryReturnCodeAsInt.toOption) + } + } recoverWith { + case exception => + if (isSuccess(status)) Future.successful(FailedNonRetryableExecutionHandle(exception)) + else Future.successful(handleExecutionFailure(status, oldHandle, None)) } - } catch { - case exception: Exception if isFatal(exception) => - jobLogger.warn("Caught fatal exception processing job result", exception) - FailedNonRetryableExecutionHandle(exception) - case exception: Exception => - jobLogger.warn("Caught exception processing job result, retrying", exception) - // Return the original handle to try again. - oldHandle - } } /** diff --git a/backend/src/main/scala/cromwell/backend/standard/StandardCacheHitCopyingActor.scala b/backend/src/main/scala/cromwell/backend/standard/StandardCacheHitCopyingActor.scala deleted file mode 100644 index 5f0ee8982..000000000 --- a/backend/src/main/scala/cromwell/backend/standard/StandardCacheHitCopyingActor.scala +++ /dev/null @@ -1,45 +0,0 @@ -package cromwell.backend.standard - -import akka.actor.ActorRef -import cromwell.backend.callcaching.CacheHitDuplicating -import cromwell.backend.{BackendCacheHitCopyingActor, BackendConfigurationDescriptor, BackendInitializationData, BackendJobDescriptor} -import cromwell.core.path.{Path, PathCopier} - -/** - * Trait of parameters passed to a StandardCacheHitCopyingActor. - */ -trait StandardCacheHitCopyingActorParams { - def jobDescriptor: BackendJobDescriptor - - def backendInitializationDataOption: Option[BackendInitializationData] - - def serviceRegistryActor: ActorRef - - def configurationDescriptor: BackendConfigurationDescriptor -} - -/** A default implementation of the cache hit copying params. */ -case class DefaultStandardCacheHitCopyingActorParams -( - override val jobDescriptor: BackendJobDescriptor, - override val backendInitializationDataOption: Option[BackendInitializationData], - override val serviceRegistryActor: ActorRef, - override val configurationDescriptor: BackendConfigurationDescriptor -) extends StandardCacheHitCopyingActorParams - -/** - * Standard implementation of a BackendCacheHitCopyingActor. - */ -class StandardCacheHitCopyingActor(val standardParams: StandardCacheHitCopyingActorParams) - extends BackendCacheHitCopyingActor with CacheHitDuplicating with StandardCachingActorHelper { - - override protected def duplicate(source: Path, destination: Path): Unit = PathCopier.copy(source, destination).get - - override lazy val jobDescriptor: BackendJobDescriptor = standardParams.jobDescriptor - override lazy val backendInitializationDataOption: Option[BackendInitializationData] = - standardParams.backendInitializationDataOption - override lazy val serviceRegistryActor: ActorRef = standardParams.serviceRegistryActor - override lazy val configurationDescriptor: BackendConfigurationDescriptor = standardParams.configurationDescriptor - override lazy val destinationCallRootPath: Path = jobPaths.callRoot - override lazy val destinationJobDetritusPaths: Map[String, Path] = jobPaths.detritusPaths -} diff --git a/backend/src/main/scala/cromwell/backend/standard/StandardCachingActorHelper.scala b/backend/src/main/scala/cromwell/backend/standard/StandardCachingActorHelper.scala index 9dc801512..cb76541a1 100644 --- a/backend/src/main/scala/cromwell/backend/standard/StandardCachingActorHelper.scala +++ b/backend/src/main/scala/cromwell/backend/standard/StandardCachingActorHelper.scala @@ -2,7 +2,7 @@ package cromwell.backend.standard import akka.actor.{Actor, ActorRef} import cromwell.backend._ -import cromwell.backend.callcaching.JobCachingActorHelper +import cromwell.backend.standard.callcaching.JobCachingActorHelper import cromwell.backend.io.{JobPaths, WorkflowPaths} import cromwell.backend.validation.{RuntimeAttributesValidation, ValidatedRuntimeAttributes} import cromwell.core.logging.JobLogging diff --git a/backend/src/main/scala/cromwell/backend/standard/StandardInitializationActor.scala b/backend/src/main/scala/cromwell/backend/standard/StandardInitializationActor.scala index d958cdefc..d3389bcc7 100644 --- a/backend/src/main/scala/cromwell/backend/standard/StandardInitializationActor.scala +++ b/backend/src/main/scala/cromwell/backend/standard/StandardInitializationActor.scala @@ -26,6 +26,7 @@ trait StandardInitializationActorParams { case class DefaultInitializationActorParams ( workflowDescriptor: BackendWorkflowDescriptor, + ioActor: ActorRef, calls: Set[TaskCall], serviceRegistryActor: ActorRef, configurationDescriptor: BackendConfigurationDescriptor diff --git a/backend/src/main/scala/cromwell/backend/standard/StandardJobExecutionActorParams.scala b/backend/src/main/scala/cromwell/backend/standard/StandardJobExecutionActorParams.scala index 83e29a14e..9a21c59c3 100644 --- a/backend/src/main/scala/cromwell/backend/standard/StandardJobExecutionActorParams.scala +++ b/backend/src/main/scala/cromwell/backend/standard/StandardJobExecutionActorParams.scala @@ -9,6 +9,9 @@ import cromwell.backend.{BackendConfigurationDescriptor, BackendInitializationDa trait StandardJobExecutionActorParams { /** The service registry actor for key/value and metadata. */ def serviceRegistryActor: ActorRef + + /** Actor able to handle IO requests asynchronously */ + def ioActor: ActorRef /** The descriptor of this job. */ def jobDescriptor: BackendJobDescriptor diff --git a/backend/src/main/scala/cromwell/backend/standard/StandardLifecycleActorFactory.scala b/backend/src/main/scala/cromwell/backend/standard/StandardLifecycleActorFactory.scala index ab11ccd82..3c14def64 100644 --- a/backend/src/main/scala/cromwell/backend/standard/StandardLifecycleActorFactory.scala +++ b/backend/src/main/scala/cromwell/backend/standard/StandardLifecycleActorFactory.scala @@ -3,6 +3,7 @@ package cromwell.backend.standard import akka.actor.{ActorRef, Props} import com.typesafe.config.Config import cromwell.backend._ +import cromwell.backend.standard.callcaching._ import cromwell.core.Dispatcher.BackendDispatcher import cromwell.core.path.Path import cromwell.core.{CallOutputs, Dispatcher} @@ -56,48 +57,79 @@ trait StandardLifecycleActorFactory extends BackendLifecycleActorFactory { * * @return the cache hit copying class. */ - lazy val cacheHitCopyingActorClassOption: Option[Class[_ <: StandardCacheHitCopyingActor]] = - Option(classOf[StandardCacheHitCopyingActor]) + lazy val cacheHitCopyingActorClassOption: Option[Class[_ <: StandardCacheHitCopyingActor]] = Option(classOf[DefaultStandardCacheHitCopyingActor]) + + /** + * Returns the cache hit copying class. + * + * @return the cache hit copying class. + */ + lazy val fileHashingActorClassOption: Option[Class[_ <: StandardFileHashingActor]] = Option(classOf[DefaultStandardFileHashingActor]) /** * Returns the finalization class. * * @return the finalization class. */ - lazy val finalizationActorClassOption: Option[Class[_ <: StandardFinalizationActor]] = - Option(classOf[StandardFinalizationActor]) + lazy val finalizationActorClassOption: Option[Class[_ <: StandardFinalizationActor]] = Option(classOf[StandardFinalizationActor]) - override def workflowInitializationActorProps(workflowDescriptor: BackendWorkflowDescriptor, calls: Set[TaskCall], + override def workflowInitializationActorProps(workflowDescriptor: BackendWorkflowDescriptor, ioActor: ActorRef, calls: Set[TaskCall], serviceRegistryActor: ActorRef): Option[Props] = { - val params = workflowInitializationActorParams(workflowDescriptor, calls, serviceRegistryActor) + val params = workflowInitializationActorParams(workflowDescriptor, ioActor, calls, serviceRegistryActor) val props = Props(initializationActorClass, params).withDispatcher(Dispatcher.BackendDispatcher) Option(props) } - def workflowInitializationActorParams(workflowDescriptor: BackendWorkflowDescriptor, calls: Set[TaskCall], + def workflowInitializationActorParams(workflowDescriptor: BackendWorkflowDescriptor, ioActor: ActorRef, calls: Set[TaskCall], serviceRegistryActor: ActorRef): StandardInitializationActorParams = { - DefaultInitializationActorParams(workflowDescriptor, calls, serviceRegistryActor, configurationDescriptor) + DefaultInitializationActorParams(workflowDescriptor, ioActor, calls, serviceRegistryActor, configurationDescriptor) } override def jobExecutionActorProps(jobDescriptor: BackendJobDescriptor, initializationDataOption: Option[BackendInitializationData], serviceRegistryActor: ActorRef, + ioActor: ActorRef, backendSingletonActorOption: Option[ActorRef]): Props = { val params = jobExecutionActorParams(jobDescriptor, initializationDataOption, serviceRegistryActor, - backendSingletonActorOption) + ioActor, backendSingletonActorOption) Props(new StandardSyncExecutionActor(params)).withDispatcher(Dispatcher.BackendDispatcher) } def jobExecutionActorParams(jobDescriptor: BackendJobDescriptor, initializationDataOption: Option[BackendInitializationData], serviceRegistryActor: ActorRef, + ioActor: ActorRef, backendSingletonActorOption: Option[ActorRef]): StandardSyncExecutionActorParams = { - DefaultStandardSyncExecutionActorParams(jobIdKey, serviceRegistryActor, jobDescriptor, configurationDescriptor, + DefaultStandardSyncExecutionActorParams(jobIdKey, serviceRegistryActor, ioActor, jobDescriptor, configurationDescriptor, initializationDataOption, backendSingletonActorOption, asyncExecutionActorClass) } + override def fileHashingActorProps: + Option[(BackendJobDescriptor, Option[BackendInitializationData], ActorRef, ActorRef) => Props] = { + fileHashingActorClassOption map { + standardFileHashingActor => fileHashingActorInner(standardFileHashingActor) _ + } + } + + def fileHashingActorInner(standardFileHashingActor: Class[_ <: StandardFileHashingActor]) + (jobDescriptor: BackendJobDescriptor, + initializationDataOption: Option[BackendInitializationData], + serviceRegistryActor: ActorRef, + ioActor: ActorRef): Props = { + val params = fileHashingActorParams(jobDescriptor, initializationDataOption, serviceRegistryActor, ioActor) + Props(standardFileHashingActor, params).withDispatcher(BackendDispatcher) + } + + def fileHashingActorParams(jobDescriptor: BackendJobDescriptor, + initializationDataOption: Option[BackendInitializationData], + serviceRegistryActor: ActorRef, + ioActor: ActorRef): StandardFileHashingActorParams = { + DefaultStandardFileHashingActorParams( + jobDescriptor, initializationDataOption, serviceRegistryActor, ioActor, configurationDescriptor) + } + override def cacheHitCopyingActorProps: - Option[(BackendJobDescriptor, Option[BackendInitializationData], ActorRef) => Props] = { + Option[(BackendJobDescriptor, Option[BackendInitializationData], ActorRef, ActorRef) => Props] = { cacheHitCopyingActorClassOption map { standardCacheHitCopyingActor => cacheHitCopyingActorInner(standardCacheHitCopyingActor) _ } @@ -106,29 +138,31 @@ trait StandardLifecycleActorFactory extends BackendLifecycleActorFactory { def cacheHitCopyingActorInner(standardCacheHitCopyingActor: Class[_ <: StandardCacheHitCopyingActor]) (jobDescriptor: BackendJobDescriptor, initializationDataOption: Option[BackendInitializationData], - serviceRegistryActor: ActorRef): Props = { - val params = cacheHitCopyingActorParams(jobDescriptor, initializationDataOption, serviceRegistryActor) + serviceRegistryActor: ActorRef, + ioActor: ActorRef): Props = { + val params = cacheHitCopyingActorParams(jobDescriptor, initializationDataOption, serviceRegistryActor, ioActor) Props(standardCacheHitCopyingActor, params).withDispatcher(BackendDispatcher) } def cacheHitCopyingActorParams(jobDescriptor: BackendJobDescriptor, initializationDataOption: Option[BackendInitializationData], - serviceRegistryActor: ActorRef): StandardCacheHitCopyingActorParams = { + serviceRegistryActor: ActorRef, + ioActor: ActorRef): StandardCacheHitCopyingActorParams = { DefaultStandardCacheHitCopyingActorParams( - jobDescriptor, initializationDataOption, serviceRegistryActor, configurationDescriptor) + jobDescriptor, initializationDataOption, serviceRegistryActor, ioActor, configurationDescriptor) } - override def workflowFinalizationActorProps(workflowDescriptor: BackendWorkflowDescriptor, calls: Set[TaskCall], + override def workflowFinalizationActorProps(workflowDescriptor: BackendWorkflowDescriptor, ioActor: ActorRef, calls: Set[TaskCall], jobExecutionMap: JobExecutionMap, workflowOutputs: CallOutputs, initializationData: Option[BackendInitializationData]): Option[Props] = { finalizationActorClassOption map { finalizationActorClass => - val params = workflowFinalizationActorParams(workflowDescriptor, calls, jobExecutionMap, workflowOutputs, + val params = workflowFinalizationActorParams(workflowDescriptor, ioActor, calls, jobExecutionMap, workflowOutputs, initializationData) Props(finalizationActorClass, params).withDispatcher(BackendDispatcher) } } - def workflowFinalizationActorParams(workflowDescriptor: BackendWorkflowDescriptor, calls: Set[TaskCall], + def workflowFinalizationActorParams(workflowDescriptor: BackendWorkflowDescriptor, ioActor: ActorRef, calls: Set[TaskCall], jobExecutionMap: JobExecutionMap, workflowOutputs: CallOutputs, initializationDataOption: Option[BackendInitializationData]): StandardFinalizationActorParams = { diff --git a/backend/src/main/scala/cromwell/backend/standard/StandardSyncExecutionActor.scala b/backend/src/main/scala/cromwell/backend/standard/StandardSyncExecutionActor.scala index 11e487a0b..3f44f0ba8 100644 --- a/backend/src/main/scala/cromwell/backend/standard/StandardSyncExecutionActor.scala +++ b/backend/src/main/scala/cromwell/backend/standard/StandardSyncExecutionActor.scala @@ -21,6 +21,7 @@ case class DefaultStandardSyncExecutionActorParams ( override val jobIdKey: String, override val serviceRegistryActor: ActorRef, + override val ioActor: ActorRef, override val jobDescriptor: BackendJobDescriptor, override val configurationDescriptor: BackendConfigurationDescriptor, override val backendInitializationDataOption: Option[BackendInitializationData], @@ -111,6 +112,7 @@ class StandardSyncExecutionActor(val standardParams: StandardSyncExecutionActorP DefaultStandardAsyncExecutionActorParams( standardParams.jobIdKey, standardParams.serviceRegistryActor, + standardParams.ioActor, standardParams.jobDescriptor, standardParams.configurationDescriptor, standardParams.backendInitializationDataOption, diff --git a/backend/src/main/scala/cromwell/backend/callcaching/JobCachingActorHelper.scala b/backend/src/main/scala/cromwell/backend/standard/callcaching/JobCachingActorHelper.scala similarity index 96% rename from backend/src/main/scala/cromwell/backend/callcaching/JobCachingActorHelper.scala rename to backend/src/main/scala/cromwell/backend/standard/callcaching/JobCachingActorHelper.scala index 8395f5264..89ce5739e 100644 --- a/backend/src/main/scala/cromwell/backend/callcaching/JobCachingActorHelper.scala +++ b/backend/src/main/scala/cromwell/backend/standard/callcaching/JobCachingActorHelper.scala @@ -1,4 +1,4 @@ -package cromwell.backend.callcaching +package cromwell.backend.standard.callcaching import akka.actor.Actor import cromwell.backend.{BackendConfigurationDescriptor, BackendJobDescriptor} diff --git a/backend/src/main/scala/cromwell/backend/standard/callcaching/StandardCacheHitCopyingActor.scala b/backend/src/main/scala/cromwell/backend/standard/callcaching/StandardCacheHitCopyingActor.scala new file mode 100644 index 000000000..baaa9a78d --- /dev/null +++ b/backend/src/main/scala/cromwell/backend/standard/callcaching/StandardCacheHitCopyingActor.scala @@ -0,0 +1,214 @@ +package cromwell.backend.standard.callcaching + +import java.util.concurrent.TimeoutException + +import akka.actor.{ActorRef, FSM} +import cats.instances.list._ +import cats.instances.set._ +import cats.instances.tuple._ +import cats.syntax.foldable._ +import cromwell.backend.BackendCacheHitCopyingActor.CopyOutputsCommand +import cromwell.backend.BackendJobExecutionActor.{AbortedResponse, JobFailedNonRetryableResponse, JobSucceededResponse} +import cromwell.backend.BackendLifecycleActor.AbortJobCommand +import cromwell.backend.io.JobPaths +import cromwell.backend.standard.StandardCachingActorHelper +import cromwell.backend.standard.callcaching.StandardCacheHitCopyingActor._ +import cromwell.backend.{BackendConfigurationDescriptor, BackendInitializationData, BackendJobDescriptor} +import cromwell.core._ +import cromwell.core.io._ +import cromwell.core.logging.JobLogging +import cromwell.core.path.{Path, PathCopier} +import cromwell.core.simpleton.{WdlValueBuilder, WdlValueSimpleton} +import wdl4s.values.WdlFile + +import scala.language.postfixOps +import scala.util.{Failure, Success, Try} + +/** + * Trait of parameters passed to a StandardCacheHitCopyingActor. + */ +trait StandardCacheHitCopyingActorParams { + def jobDescriptor: BackendJobDescriptor + + def backendInitializationDataOption: Option[BackendInitializationData] + + def serviceRegistryActor: ActorRef + + def ioActor: ActorRef + + def configurationDescriptor: BackendConfigurationDescriptor +} + +/** A default implementation of the cache hit copying params. */ +case class DefaultStandardCacheHitCopyingActorParams +( + override val jobDescriptor: BackendJobDescriptor, + override val backendInitializationDataOption: Option[BackendInitializationData], + override val serviceRegistryActor: ActorRef, + override val ioActor: ActorRef, + override val configurationDescriptor: BackendConfigurationDescriptor +) extends StandardCacheHitCopyingActorParams + +object StandardCacheHitCopyingActor { + type DetritusMap = Map[String, Path] + type PathPair = (Path, Path) + + sealed trait StandardCacheHitCopyingActorState + case object Idle extends StandardCacheHitCopyingActorState + case object WaitingForCopyResponses extends StandardCacheHitCopyingActorState + + case class StandardCacheHitCopyingActorData(copyCommandsToWaitFor: Set[IoCopyCommand], + copiedJobOutputs: CallOutputs, + copiedDetritus: DetritusMap, + returnCode: Option[Int] + ) { + def remove(copyCommand: IoCopyCommand) = copy(copyCommandsToWaitFor = copyCommandsToWaitFor filterNot { _ == copyCommand }) + } +} + +class DefaultStandardCacheHitCopyingActor(standardParams: StandardCacheHitCopyingActorParams) extends StandardCacheHitCopyingActor(standardParams) with DefaultIoCommandBuilder + +/** + * Standard implementation of a BackendCacheHitCopyingActor. + */ +abstract class StandardCacheHitCopyingActor(val standardParams: StandardCacheHitCopyingActorParams) + extends FSM[StandardCacheHitCopyingActorState, Option[StandardCacheHitCopyingActorData]] with JobLogging with StandardCachingActorHelper with IoClientHelper { this: IoCommandBuilder => + + override lazy val jobDescriptor: BackendJobDescriptor = standardParams.jobDescriptor + override lazy val backendInitializationDataOption: Option[BackendInitializationData] = standardParams.backendInitializationDataOption + override lazy val serviceRegistryActor: ActorRef = standardParams.serviceRegistryActor + override lazy val configurationDescriptor: BackendConfigurationDescriptor = standardParams.configurationDescriptor + + lazy val destinationCallRootPath: Path = jobPaths.callRoot + lazy val destinationJobDetritusPaths: Map[String, Path] = jobPaths.detritusPaths + lazy val ioActor = standardParams.ioActor + + startWith(Idle, None) + + context.become(ioReceive orElse receive) + + /** Override this method if you want to provide an alternative way to duplicate files than copying them. */ + protected def duplicate(copyPairs: Set[PathPair]): Option[Try[Unit]] = None + + when(Idle) { + case Event(CopyOutputsCommand(simpletons, jobDetritus, returnCode), None) => + val sourceCallRootPath = lookupSourceCallRootPath(jobDetritus) + + val processed = for { + (callOutputs, simpletonCopyPairs) <- processSimpletons(simpletons, sourceCallRootPath) + (destinationDetritus, detritusCopyPairs) <- processDetritus(jobDetritus) + } yield (callOutputs, destinationDetritus, simpletonCopyPairs ++ detritusCopyPairs) + + processed match { + case Success((callOutputs, destinationDetritus, allCopyPairs)) => + duplicate(allCopyPairs) match { + case Some(Success(_)) => succeedAndStop(returnCode, callOutputs, destinationDetritus) + case Some(Failure(failure)) => failAndStop(failure) + case None => + val allCopyCommands = allCopyPairs map { case (source, destination) => copyCommand(source, destination, overwrite = true) } + + allCopyCommands foreach { sendIoCommand(_) } + + goto(WaitingForCopyResponses) using Option(StandardCacheHitCopyingActorData(allCopyCommands, callOutputs, destinationDetritus, returnCode)) + } + + case Failure(failure) => failAndStop(failure) + } + } + + when(WaitingForCopyResponses) { + case Event(IoSuccess(copyCommand: IoCopyCommand, _), Some(data)) => + val newData = data.remove(copyCommand) + if (newData.copyCommandsToWaitFor.isEmpty) succeedAndStop(data.returnCode, data.copiedJobOutputs, data.copiedDetritus) + else stay() using Option(newData) + case Event(IoFailure(copyCommand: IoCopyCommand, failure), _) => + failAndStop(failure) + } + + whenUnhandled { + case Event(AbortJobCommand, _) => + abort() + case Event(unexpected, _) => + log.warning(s"Backend cache hit copying actor received an unexpected message: $unexpected in state $stateName") + stay() + } + + def succeedAndStop(returnCode: Option[Int], copiedJobOutputs: CallOutputs, detritusMap: DetritusMap) = { + import cromwell.services.metadata.MetadataService.implicits.MetadataAutoPutter + serviceRegistryActor.putMetadata(jobDescriptor.workflowDescriptor.id, Option(jobDescriptor.key), startMetadataKeyValues) + context.parent ! JobSucceededResponse(jobDescriptor.key, returnCode, copiedJobOutputs, Option(detritusMap), Seq.empty) + context stop self + stay() + } + + def failAndStop(failure: Throwable) = { + context.parent ! JobFailedNonRetryableResponse(jobDescriptor.key, failure, None) + context stop self + stay() + } + + def abort() = { + log.warning("{}: Abort not supported during cache hit copying", jobTag) + context.parent ! AbortedResponse(jobDescriptor.key) + context stop self + stay() + } + + private def lookupSourceCallRootPath(sourceJobDetritusFiles: Map[String, String]): Path = { + sourceJobDetritusFiles.get(JobPaths.CallRootPathKey).map(getPath).get recover { + case failure => + throw new RuntimeException(s"${JobPaths.CallRootPathKey} wasn't found for call ${jobDescriptor.call.fullyQualifiedName}", failure) + } get + } + + /** + * Returns a pair of the list of simpletons with copied paths, and copy commands necessary to perform those copies. + */ + private def processSimpletons(wdlValueSimpletons: Seq[WdlValueSimpleton], sourceCallRootPath: Path): Try[(CallOutputs, Set[PathPair])] = Try { + val (destinationSimpletons, ioCommands): (List[WdlValueSimpleton], Set[PathPair]) = wdlValueSimpletons.toList.foldMap({ + case WdlValueSimpleton(key, wdlFile: WdlFile) => + val sourcePath = getPath(wdlFile.value).get + val destinationPath = PathCopier.getDestinationFilePath(sourceCallRootPath, sourcePath, destinationCallRootPath) + + val destinationSimpleton = WdlValueSimpleton(key, WdlFile(destinationPath.pathAsString)) + + List(destinationSimpleton) -> Set(sourcePath -> destinationPath) + case nonFileSimpleton => (List(nonFileSimpleton), Set.empty[PathPair]) + }) + + (WdlValueBuilder.toJobOutputs(jobDescriptor.call.task.outputs, destinationSimpletons), ioCommands) + } + + /** + * Returns a pair of the detritus with copied paths, and copy commands necessary to perform those copies. + */ + private def processDetritus(sourceJobDetritusFiles: Map[String, String]): Try[(Map[String, Path], Set[PathPair])] = Try { + val sourceKeys = sourceJobDetritusFiles.keySet + val destinationKeys = destinationJobDetritusPaths.keySet + val fileKeys = sourceKeys.intersect(destinationKeys).filterNot(_ == JobPaths.CallRootPathKey) + + val zero = (Map.empty[String, Path], Set.empty[PathPair]) + + val (destinationDetritus, ioCommands) = fileKeys.foldLeft(zero)({ + case ((detrituses, commands), detritus) => + val sourcePath = getPath(sourceJobDetritusFiles(detritus)).get + val destinationPath = destinationJobDetritusPaths(detritus) + + val newDetrituses = detrituses + (detritus -> destinationPath) + + (newDetrituses, commands + ((sourcePath, destinationPath))) + }) + + (destinationDetritus + (JobPaths.CallRootPathKey -> destinationCallRootPath), ioCommands) + } + + override protected def onTimeout(message: Any, to: ActorRef): Unit = { + val exceptionMessage = message match { + case copyCommand: IoCopyCommand => s"The Cache hit copying actor timed out waiting for a response to copy ${copyCommand.source.pathAsString} to ${copyCommand.destination.pathAsString}" + case other => s"The Cache hit copying actor timed out waiting for an unknown I/O operation: $other" + } + + failAndStop(new TimeoutException(exceptionMessage)) + () + } +} diff --git a/backend/src/main/scala/cromwell/backend/standard/callcaching/StandardFileHashingActor.scala b/backend/src/main/scala/cromwell/backend/standard/callcaching/StandardFileHashingActor.scala new file mode 100644 index 000000000..417cb1b74 --- /dev/null +++ b/backend/src/main/scala/cromwell/backend/standard/callcaching/StandardFileHashingActor.scala @@ -0,0 +1,97 @@ +package cromwell.backend.standard.callcaching + +import akka.actor.{Actor, ActorLogging, ActorRef} +import akka.event.LoggingAdapter +import cromwell.backend.standard.StandardCachingActorHelper +import cromwell.backend.standard.callcaching.StandardFileHashingActor.{FileHashResponse, SingleFileHashRequest} +import cromwell.backend.{BackendConfigurationDescriptor, BackendInitializationData, BackendJobDescriptor} +import cromwell.core.JobKey +import cromwell.core.callcaching._ +import cromwell.core.io._ +import cromwell.core.logging.JobLogging +import wdl4s.values.WdlFile + +import scala.util.{Failure, Success, Try} + +/** + * Trait of parameters passed to a StandardCacheHitCopyingActor. + */ +trait StandardFileHashingActorParams { + def jobDescriptor: BackendJobDescriptor + + def backendInitializationDataOption: Option[BackendInitializationData] + + def serviceRegistryActor: ActorRef + + def ioActor: ActorRef + + def configurationDescriptor: BackendConfigurationDescriptor +} + +/** A default implementation of the cache hit copying params. */ +case class DefaultStandardFileHashingActorParams +( + override val jobDescriptor: BackendJobDescriptor, + override val backendInitializationDataOption: Option[BackendInitializationData], + override val serviceRegistryActor: ActorRef, + override val ioActor: ActorRef, + override val configurationDescriptor: BackendConfigurationDescriptor +) extends StandardFileHashingActorParams + +class DefaultStandardFileHashingActor(standardParams: StandardFileHashingActorParams) extends StandardFileHashingActor(standardParams) with DefaultIoCommandBuilder + +object StandardFileHashingActor { + case class FileHashingFunction(work: (SingleFileHashRequest, LoggingAdapter) => Try[String]) + + sealed trait BackendSpecificHasherCommand { def jobKey: JobKey } + case class SingleFileHashRequest(jobKey: JobKey, hashKey: HashKey, file: WdlFile, initializationData: Option[BackendInitializationData]) extends BackendSpecificHasherCommand + case class HashesNoLongerRequired(jobKey: JobKey) extends BackendSpecificHasherCommand + + sealed trait BackendSpecificHasherResponse extends SuccessfulHashResultMessage + case class FileHashResponse(hashResult: HashResult) extends BackendSpecificHasherResponse { override def hashes = Set(hashResult) } +} + +abstract class StandardFileHashingActor(standardParams: StandardFileHashingActorParams) extends Actor with ActorLogging with JobLogging with IoClientHelper with StandardCachingActorHelper { + this: IoCommandBuilder => + override val ioActor = standardParams.ioActor + override lazy val jobDescriptor: BackendJobDescriptor = standardParams.jobDescriptor + override lazy val backendInitializationDataOption: Option[BackendInitializationData] = standardParams.backendInitializationDataOption + override lazy val serviceRegistryActor: ActorRef = standardParams.serviceRegistryActor + override lazy val configurationDescriptor: BackendConfigurationDescriptor = standardParams.configurationDescriptor + + def customHashStrategy(fileRequest: SingleFileHashRequest): Option[Try[String]] = None + + def fileHashingReceive: Receive = { + // Hash Request + case fileRequest: SingleFileHashRequest => + val replyTo = sender() + + customHashStrategy(fileRequest) match { + case Some(Success(result)) => context.parent ! FileHashResponse(HashResult(fileRequest.hashKey, HashValue(result))) + case Some(Failure(failure)) => context.parent ! HashingFailedMessage(fileRequest.hashKey, failure) + case None => asyncHashing(fileRequest, replyTo) + } + + // Hash Success + case (fileHashRequest: SingleFileHashRequest, response @ IoSuccess(_, result: String)) => + context.parent ! FileHashResponse(HashResult(fileHashRequest.hashKey, HashValue(result))) + + // Hash Failure + case (fileHashRequest: SingleFileHashRequest, response @ IoFailure(_, failure: Throwable)) => + context.parent ! HashingFailedMessage(fileHashRequest.hashKey, failure) + + case other => + log.warning(s"Async File hashing actor received unexpected message: $other") + } + + def asyncHashing(fileRequest: SingleFileHashRequest, replyTo: ActorRef) = getPath(fileRequest.file.value) match { + case Success(gcsPath) => sendIoCommandWithContext(hashCommand(gcsPath), fileRequest) + case Failure(failure) => replyTo ! HashingFailedMessage(fileRequest.hashKey, failure) + } + + override def receive: Receive = ioReceive orElse fileHashingReceive + + protected def onTimeout(message: Any, to: ActorRef): Unit = { + context.parent ! HashingServiceUnvailable + } +} diff --git a/core/src/main/resources/reference.conf b/core/src/main/resources/reference.conf index 6405857af..da2a1ea84 100644 --- a/core/src/main/resources/reference.conf +++ b/core/src/main/resources/reference.conf @@ -85,6 +85,13 @@ system { # Default number of cache read workers number-of-cache-read-workers = 25 + + io { + # Global Throttling - This is mostly useful for GCS and can be adjusted to match + # the quota availble on the GCS API + number-of-requests = 100000 + per = 100 seconds + } } workflow-options { diff --git a/core/src/main/scala/cromwell/core/actor/RobustClientHelper.scala b/core/src/main/scala/cromwell/core/actor/RobustClientHelper.scala new file mode 100644 index 000000000..9a9391a80 --- /dev/null +++ b/core/src/main/scala/cromwell/core/actor/RobustClientHelper.scala @@ -0,0 +1,74 @@ +package cromwell.core.actor + +import akka.actor.{Actor, ActorLogging, ActorRef, Cancellable} +import cromwell.core.actor.RobustClientHelper._ +import cromwell.core.actor.StreamIntegration._ + +import scala.concurrent.duration.{FiniteDuration, _} +import scala.language.postfixOps +import scala.util.Random + +object RobustClientHelper { + case class RequestTimeout(msg: Any, to: ActorRef) + val DefaultRequestLostTimeout = 5 minutes +} + +trait RobustClientHelper { this: Actor with ActorLogging => + private [actor] implicit val robustActorHelperEc = context.dispatcher + + private final val random = new Random() + + // package private for testing + private [core] var timeouts = Map.empty[Any, (Cancellable, FiniteDuration)] + + protected def backpressureTimeout: FiniteDuration = 10 seconds + protected def backpressureRandomizerFactor: Double = 0.5D + + private [core] def robustReceive: Receive = { + case Backpressure(request) => + val snd = sender() + newTimer(request, snd, generateBackpressureTime) + resetTimeout(request, snd) + () + case RequestTimeout(request, to) => onTimeout(request, to) + } + + private final def newTimer(msg: Any, to: ActorRef, in: FiniteDuration) = { + context.system.scheduler.scheduleOnce(in, to, msg)(robustActorHelperEc, self) + } + + private [core] def robustSend(msg: Any, to: ActorRef, timeout: FiniteDuration = DefaultRequestLostTimeout): Unit = { + to ! msg + addTimeout(msg, to, timeout) + } + + private final def addTimeout(command: Any, to: ActorRef, timeout: FiniteDuration) = { + val cancellable = newTimer(RequestTimeout(command, to), self, timeout) + timeouts = timeouts + (command -> (cancellable -> timeout)) + } + + protected final def hasTimeout(command: Any) = timeouts.get(command).isDefined + + protected final def cancelTimeout(command: Any) = { + timeouts.get(command) foreach { case (cancellable, _) => cancellable.cancel() } + timeouts = timeouts - command + } + + private final def resetTimeout(command: Any, to: ActorRef) = { + val timeout = timeouts.get(command) map { _._2 } + cancelTimeout(command) + timeout foreach { addTimeout(command, to, _) } + } + + private [actor] final def generateBackpressureTime = { + val backpressureTimeoutInMillis = backpressureTimeout.toMillis + + val delta = backpressureRandomizerFactor * backpressureTimeoutInMillis + val minInterval = backpressureTimeoutInMillis - delta + val maxInterval = backpressureTimeoutInMillis + delta + val randomValue = (minInterval + (random.nextDouble() * (maxInterval - minInterval + 1))).toInt + randomValue.milliseconds + } + + protected def onTimeout(message: Any, to: ActorRef): Unit +} diff --git a/core/src/main/scala/cromwell/core/actor/StreamActorHelper.scala b/core/src/main/scala/cromwell/core/actor/StreamActorHelper.scala new file mode 100644 index 000000000..2b254589c --- /dev/null +++ b/core/src/main/scala/cromwell/core/actor/StreamActorHelper.scala @@ -0,0 +1,94 @@ +package cromwell.core.actor + +import akka.actor.{Actor, ActorLogging} +import akka.pattern.pipe +import akka.stream.{ActorAttributes, ActorMaterializer, QueueOfferResult, Supervision} +import akka.stream.QueueOfferResult.{Dropped, Enqueued, QueueClosed} +import akka.stream.scaladsl.{Sink, Source, SourceQueueWithComplete} +import cromwell.core.actor.StreamActorHelper.{ActorRestartException, StreamCompleted, StreamFailed} +import cromwell.core.actor.StreamIntegration._ + +import scala.concurrent.{ExecutionContext, Future} +import scala.util.{Failure, Success} + +object StreamActorHelper { + private [actor] case class StreamFailed(failure: Throwable) + private [actor] case object StreamCompleted + class ActorRestartException(throwable: Throwable) extends RuntimeException(throwable) +} + +trait StreamActorHelper[T <: StreamContext] { this: Actor with ActorLogging => + + implicit val ec: ExecutionContext = context.system.dispatcher + + implicit def materializer: ActorMaterializer + + private val decider: Supervision.Decider = _ => Supervision.Resume + + private val replySink = Sink.foreach[(Any, T)] { + case (response, commandContext) => + val reply = commandContext.clientContext map { (_, response) } getOrElse response + commandContext.replyTo ! reply + } + + protected def actorReceive: Receive + + protected def streamSource: Source[(Any, T), SourceQueueWithComplete[T]] + + override def receive = streamReceive.orElse(actorReceive) + + private [actor] lazy val stream = { + streamSource + .to(replySink) + .withAttributes(ActorAttributes.supervisionStrategy(decider)) + .run() + } + + override def preStart(): Unit = { + stream.watchCompletion() onComplete { + case Success(_) => + self ! StreamCompleted + case Failure(failure) => + self ! StreamFailed(failure) + } + } + + def sendToStream(commandContext: T) = { + val enqueue = stream offer commandContext map { + case Enqueued => EnqueueResponse(Enqueued, commandContext) + case other => EnqueueResponse(other, commandContext) + } recoverWith { + case t => Future.successful(FailedToEnqueue(t, commandContext)) + } + + pipe(enqueue) to self + () + } + + private def backpressure(commandContext: StreamContext) = { + val originalRequest = commandContext.clientContext map { _ -> commandContext.request } getOrElse commandContext.request + commandContext.replyTo ! Backpressure(originalRequest) + } + + private def streamReceive: Receive = { + case EnqueueResponse(Enqueued, commandContext: T @unchecked) => // Good ! + case EnqueueResponse(Dropped, commandContext) => backpressure(commandContext) + + // In any of the cases below, the stream is in a failed state, which will he caught by the watchCompletion hook and the + // actor will be restarted + case EnqueueResponse(QueueClosed, commandContext) => backpressure(commandContext) + case EnqueueResponse(QueueOfferResult.Failure(failure), commandContext) => backpressure(commandContext) + case FailedToEnqueue(throwable, commandContext) => backpressure(commandContext) + + // Those 2 cases should never happen, as long as the strategy is Resume, but in case it does... + case StreamCompleted => restart(new IllegalStateException("Stream was completed unexepectedly")) + case StreamFailed(failure) => restart(failure) + } + + /** Throw the exception to force the actor to restart so it can be back in business + * IMPORTANT: Make sure the supervision strategy for this actor is Restart + */ + private def restart(throwable: Throwable) = { + throw new ActorRestartException(throwable) + } +} diff --git a/core/src/main/scala/cromwell/core/actor/StreamIntegration.scala b/core/src/main/scala/cromwell/core/actor/StreamIntegration.scala new file mode 100644 index 000000000..0d83c6ee5 --- /dev/null +++ b/core/src/main/scala/cromwell/core/actor/StreamIntegration.scala @@ -0,0 +1,15 @@ +package cromwell.core.actor + +import akka.actor.ActorRef +import akka.stream.QueueOfferResult + +object StreamIntegration { + trait StreamContext { + def replyTo: ActorRef + def request: Any + def clientContext: Option[Any] = None + } + case class EnqueueResponse(response: QueueOfferResult, request: StreamContext) + case class Backpressure(request: Any) + case class FailedToEnqueue(failure: Throwable, request: StreamContext) +} diff --git a/core/src/main/scala/cromwell/core/callcaching/HashResultMessage.scala b/core/src/main/scala/cromwell/core/callcaching/HashResultMessage.scala index 581326c16..a604cc9b7 100644 --- a/core/src/main/scala/cromwell/core/callcaching/HashResultMessage.scala +++ b/core/src/main/scala/cromwell/core/callcaching/HashResultMessage.scala @@ -8,4 +8,5 @@ sealed trait HashResultMessage trait SuccessfulHashResultMessage extends HashResultMessage { def hashes: Set[HashResult] } -case class HashingFailedMessage(key: HashKey, reason: Throwable) extends HashResultMessage \ No newline at end of file +case class HashingFailedMessage(key: HashKey, reason: Throwable) extends HashResultMessage +case object HashingServiceUnvailable extends HashResultMessage diff --git a/core/src/main/scala/cromwell/core/callcaching/docker/DockerHashActor.scala b/core/src/main/scala/cromwell/core/callcaching/docker/DockerHashActor.scala index 5e816de15..74bb1f993 100644 --- a/core/src/main/scala/cromwell/core/callcaching/docker/DockerHashActor.scala +++ b/core/src/main/scala/cromwell/core/callcaching/docker/DockerHashActor.scala @@ -6,6 +6,7 @@ import akka.stream.QueueOfferResult.{Dropped, Enqueued, QueueClosed} import akka.stream._ import akka.stream.scaladsl.{GraphDSL, Merge, Partition, Sink, Source} import com.google.common.cache.CacheBuilder +import cromwell.core.actor.StreamActorHelper.ActorRestartException import cromwell.core.callcaching.docker.DockerHashActor._ import org.slf4j.LoggerFactory @@ -191,7 +192,7 @@ object DockerHashActor { } case class DockerHashBackPressure(originalRequest: DockerHashRequest) extends DockerHashResponse - case class DockerHashActorException(failure: Throwable) extends RuntimeException(failure) + case class DockerHashActorException(failure: Throwable) extends ActorRestartException(failure) /* Internal ADTs */ case class DockerHashContext(request: DockerHashRequest, replyTo: ActorRef) { diff --git a/core/src/main/scala/cromwell/core/callcaching/docker/registryv2/flows/gcr/GcrAbstractFlow.scala b/core/src/main/scala/cromwell/core/callcaching/docker/registryv2/flows/gcr/GcrAbstractFlow.scala index 09e946412..f72cabf07 100644 --- a/core/src/main/scala/cromwell/core/callcaching/docker/registryv2/flows/gcr/GcrAbstractFlow.scala +++ b/core/src/main/scala/cromwell/core/callcaching/docker/registryv2/flows/gcr/GcrAbstractFlow.scala @@ -2,7 +2,7 @@ package cromwell.core.callcaching.docker.registryv2.flows.gcr import akka.http.scaladsl.model.headers.{Authorization, OAuth2BearerToken} import akka.stream.ActorMaterializer -import com.google.api.client.auth.oauth2.Credential +import com.google.auth.oauth2.OAuth2Credentials import cromwell.core.callcaching.docker.DockerHashActor.DockerHashContext import cromwell.core.callcaching.docker.registryv2.DockerRegistryV2AbstractFlow import cromwell.core.callcaching.docker.registryv2.DockerRegistryV2AbstractFlow.HttpDockerFlow @@ -12,7 +12,7 @@ import scala.concurrent.duration._ abstract class GcrAbstractFlow(httpClientFlow: HttpDockerFlow, host: String)(implicit ec: ExecutionContext, materializer: ActorMaterializer) extends DockerRegistryV2AbstractFlow(httpClientFlow)(ec, materializer) { - private val AccessTokenAcceptableTTL = 1.minute.toSeconds + private val AccessTokenAcceptableTTL = 1.minute override val registryHostName = host override val authorizationServerHostName = s"$host/v2" @@ -22,14 +22,15 @@ abstract class GcrAbstractFlow(httpClientFlow: HttpDockerFlow, host: String)(imp */ def buildTokenRequestHeaders(dockerHashContext: DockerHashContext) = { dockerHashContext.credentials collect { - case credential: Credential => Authorization(OAuth2BearerToken(freshAccessToken(credential))) + case credentials: OAuth2Credentials => Authorization(OAuth2BearerToken(freshAccessToken(credentials))) } } - private def freshAccessToken(credential: Credential) = { - if (credential.getExpiresInSeconds < AccessTokenAcceptableTTL) { - credential.refreshToken() + private def freshAccessToken(credential: OAuth2Credentials) = { + val expiresIn = (credential.getAccessToken.getExpirationTime.getTime - System.currentTimeMillis()).millis + if (expiresIn.lt(AccessTokenAcceptableTTL)) { + credential.refresh() } - credential.getAccessToken + credential.getAccessToken.getTokenValue } } diff --git a/core/src/main/scala/cromwell/core/io/AsyncIo.scala b/core/src/main/scala/cromwell/core/io/AsyncIo.scala new file mode 100644 index 000000000..37537bdba --- /dev/null +++ b/core/src/main/scala/cromwell/core/io/AsyncIo.scala @@ -0,0 +1,78 @@ +package cromwell.core.io + +import akka.actor.{Actor, ActorLogging, ActorRef} +import cromwell.core.path.BetterFileMethods.OpenOptions +import cromwell.core.path.Path + +import scala.concurrent.duration._ +import scala.concurrent.{Future, Promise} +import scala.language.postfixOps + + +trait AsyncIo extends IoClientHelper { this: Actor with ActorLogging with IoCommandBuilder => + + protected val ioTimeout = 3 minutes + + override private [core] def ioResponseReceive: Receive = { + case (promise: Promise[_], ack: IoAck[Any] @unchecked) => + cancelTimeout(promise -> ack.command) + // This is not typesafe. + // However the sendIoCommand method ensures that the command and the promise have the same generic type + // Which means as long as only the sendIoCommand method is used to send requests, and the ioActor honors his contract + // and send back the right context with the right response, the types are virtually guaranteed to match. + promise.asInstanceOf[Promise[Any]].complete(ack.toTry) + () + } + + /** + * IMPORTANT: This loads the entire content of the file into memory ! + * Only use for small files ! + */ + def contentAsStringAsync(path: Path): Future[String] = { + val promise = Promise[String] + sendIoCommandWithPromise(contentAsStringCommand(path), promise) + promise.future + } + + def writeAsync(path: Path, content: String, options: OpenOptions): Future[Unit] = { + val promise = Promise[Unit] + sendIoCommandWithPromise(writeCommand(path, content, options), promise) + promise.future + } + + def sizeAsync(path: Path): Future[Long] = { + val promise = Promise[Long] + sendIoCommandWithPromise(sizeCommand(path), promise) + promise.future + } + + def hashAsync(path: Path): Future[String] = { + val promise = Promise[String] + sendIoCommandWithPromise(hashCommand(path), promise) + promise.future + } + + def deleteAsync(path: Path, swallowIoExceptions: Boolean = false): Future[Unit] = { + val promise = Promise[Unit] + sendIoCommandWithPromise(deleteCommand(path, swallowIoExceptions), promise) + promise.future + } + + def copyAsync(src: Path, dest: Path, overwrite: Boolean = true): Future[Unit] = { + val promise = Promise[Unit] + // Allow for a much larger timeout for copies, as large files can take a while (even on gcs, if they are in different locations...) + sendIoCommandWithPromise(copyCommand(src, dest, overwrite), promise, 1 hour) + promise.future + } + + private def sendIoCommandWithPromise[T](command: IoCommand[T], promise: Promise[T], timeout: FiniteDuration = ioTimeout) = { + sendIoCommandWithContext(command, promise, timeout) + } + + override def onTimeout(message: Any, to: ActorRef): Unit = message match { + case (promise: Promise[_], ioAck: IoAck[_]) => + promise.tryFailure(IoTimeout(ioAck.command)) + () + case _ => + } +} diff --git a/core/src/main/scala/cromwell/core/io/IoAck.scala b/core/src/main/scala/cromwell/core/io/IoAck.scala new file mode 100644 index 000000000..c2b48a031 --- /dev/null +++ b/core/src/main/scala/cromwell/core/io/IoAck.scala @@ -0,0 +1,27 @@ +package cromwell.core.io + +import scala.util.{Failure, Success, Try} + +/** + * Generic trait for values returned after a command is executed. Can be Success or Failure. + * + * @tparam T type of the returned value if success + */ +sealed trait IoAck[T] { + /** + * Original command + */ + def command: IoCommand[T] + def toTry: Try[T] +} + +case class IoSuccess[T](command: IoCommand[T], result: T) extends IoAck[T] { + override def toTry = Success(result) +} +case class IoFailure[T](command: IoCommand[T], failure: Throwable) extends IoAck[T] { + override def toTry = Failure(failure) +} + +case class IoRetry[T](command: IoCommand[T], failure: Throwable) extends IoAck[T] { + override def toTry = Failure(failure) +} diff --git a/core/src/main/scala/cromwell/core/io/IoClientHelper.scala b/core/src/main/scala/cromwell/core/io/IoClientHelper.scala new file mode 100644 index 000000000..01cdabf2a --- /dev/null +++ b/core/src/main/scala/cromwell/core/io/IoClientHelper.scala @@ -0,0 +1,29 @@ +package cromwell.core.io + +import akka.actor.{Actor, ActorLogging, ActorRef} +import cromwell.core.actor.RobustClientHelper + +import scala.concurrent.duration.FiniteDuration + +trait IoClientHelper extends RobustClientHelper { this: Actor with ActorLogging with IoCommandBuilder => + def ioActor: ActorRef + + private [core] def ioResponseReceive: Receive = { + case ack: IoAck[_] if hasTimeout(ack.command) => + cancelTimeout(ack.command) + receive.apply(ack) + case (context: Any, ack: IoAck[_]) if hasTimeout(context -> ack.command) => + cancelTimeout(context -> ack.command) + receive.apply(context -> ack) + } + + def ioReceive = robustReceive orElse ioResponseReceive + + def sendIoCommand(ioCommand: IoCommand[_], timeout: FiniteDuration = RobustClientHelper.DefaultRequestLostTimeout) = { + robustSend(ioCommand, ioActor, timeout) + } + + def sendIoCommandWithContext[T](ioCommand: IoCommand[_], context: T, timeout: FiniteDuration = RobustClientHelper.DefaultRequestLostTimeout) = { + robustSend(context -> ioCommand, ioActor, timeout) + } +} diff --git a/core/src/main/scala/cromwell/core/io/IoCommand.scala b/core/src/main/scala/cromwell/core/io/IoCommand.scala new file mode 100644 index 000000000..4447dbfc6 --- /dev/null +++ b/core/src/main/scala/cromwell/core/io/IoCommand.scala @@ -0,0 +1,65 @@ +package cromwell.core.io + +import better.files.File.OpenOptions +import com.google.api.client.util.ExponentialBackOff +import cromwell.core.path.Path +import cromwell.core.retry.SimpleExponentialBackoff + +import scala.concurrent.duration.{FiniteDuration, _} +import scala.language.postfixOps + +object IoCommand { + def defaultGoogleBackoff = new ExponentialBackOff.Builder() + .setInitialIntervalMillis((1 second).toMillis.toInt) + .setMaxIntervalMillis((5 minutes).toMillis.toInt) + .setMultiplier(3L) + .setRandomizationFactor(0.2D) + .setMaxElapsedTimeMillis((10 minutes).toMillis.toInt) + .build() + def defaultBackoff = SimpleExponentialBackoff(defaultGoogleBackoff) + + type RetryCommand[T] = (FiniteDuration, IoCommand[T]) +} + +trait IoCommand[+T] { + /** + * Completes the command successfully + * @return a message to be sent back to the sender, if needed + */ + def success[S >: T](value: S): IoSuccess[S] = IoSuccess(this, value) + + /** + * Fail the command with an exception + */ + def fail[S >: T](failure: Throwable): IoFailure[S] = IoFailure(this, failure) +} + +/** + * Copy source -> destination + */ +class IoCopyCommand(val source: Path, val destination: Path, val overwrite: Boolean) extends IoCommand[Unit] + +/** + * Read file as a string (load the entire content in memory) + */ +class IoContentAsStringCommand(val file: Path) extends IoCommand[String] + +/** + * Return the size of file + */ +class IoSizeCommand(val file: Path) extends IoCommand[Long] + +/** + * Write content in file + */ +class IoWriteCommand(val file: Path, val content: String, val openOptions: OpenOptions) extends IoCommand[Unit] + +/** + * Delete file + */ +class IoDeleteCommand(val file: Path, val swallowIOExceptions: Boolean) extends IoCommand[Unit] + +/** + * Get Hash value for file + */ +class IoHashCommand(val file: Path) extends IoCommand[String] diff --git a/core/src/main/scala/cromwell/core/io/IoCommandBuilder.scala b/core/src/main/scala/cromwell/core/io/IoCommandBuilder.scala new file mode 100644 index 000000000..d79e67438 --- /dev/null +++ b/core/src/main/scala/cromwell/core/io/IoCommandBuilder.scala @@ -0,0 +1,22 @@ +package cromwell.core.io + +import cromwell.core.path.BetterFileMethods.OpenOptions +import cromwell.core.path.Path + +trait IoCommandBuilder { + def contentAsStringCommand(path: Path): IoContentAsStringCommand + def writeCommand(path: Path, content: String, options: OpenOptions): IoWriteCommand + def sizeCommand(path: Path): IoSizeCommand + def deleteCommand(path: Path, swallowIoExceptions: Boolean): IoDeleteCommand + def copyCommand(src: Path, dest: Path, overwrite: Boolean): IoCopyCommand + def hashCommand(file: Path): IoHashCommand +} + +trait DefaultIoCommandBuilder extends IoCommandBuilder { + def contentAsStringCommand(path: Path) = new IoContentAsStringCommand(path) + def writeCommand(path: Path, content: String, options: OpenOptions) = new IoWriteCommand(path, content, options) + def sizeCommand(path: Path) = new IoSizeCommand(path) + def deleteCommand(path: Path, swallowIoExceptions: Boolean) = new IoDeleteCommand(path, swallowIoExceptions) + def copyCommand(src: Path, dest: Path, overwrite: Boolean) = new IoCopyCommand(src, dest, overwrite) + def hashCommand(file: Path) = new IoHashCommand(file) +} diff --git a/core/src/main/scala/cromwell/core/io/IoTimeout.scala b/core/src/main/scala/cromwell/core/io/IoTimeout.scala new file mode 100644 index 000000000..76716f346 --- /dev/null +++ b/core/src/main/scala/cromwell/core/io/IoTimeout.scala @@ -0,0 +1,5 @@ +package cromwell.core.io + +import java.util.concurrent.TimeoutException + +case class IoTimeout(command: IoCommand[_]) extends TimeoutException(s"The I/O operation $command timed out") diff --git a/core/src/main/scala/cromwell/core/io/Throttle.scala b/core/src/main/scala/cromwell/core/io/Throttle.scala new file mode 100644 index 000000000..f59d13895 --- /dev/null +++ b/core/src/main/scala/cromwell/core/io/Throttle.scala @@ -0,0 +1,5 @@ +package cromwell.core.io + +import scala.concurrent.duration.FiniteDuration + +case class Throttle(elements: Int, per: FiniteDuration, maximumBurst: Int) diff --git a/core/src/main/scala/cromwell/core/path/proxy/FileSystemProxy.scala b/core/src/main/scala/cromwell/core/path/proxy/FileSystemProxy.scala deleted file mode 100644 index f9e9b5817..000000000 --- a/core/src/main/scala/cromwell/core/path/proxy/FileSystemProxy.scala +++ /dev/null @@ -1,25 +0,0 @@ -package cromwell.core.path.proxy - -import java.lang.Iterable -import java.nio.file._ -import java.nio.file.attribute.UserPrincipalLookupService -import java.nio.file.spi.FileSystemProvider -import java.util - -class FileSystemProxy(delegate: FileSystem, injectedProvider: FileSystemProvider) extends FileSystem { - - override def provider(): FileSystemProvider = injectedProvider - - /* delegated */ - override def supportedFileAttributeViews(): util.Set[String] = delegate.supportedFileAttributeViews() - override def getSeparator: String = delegate.getSeparator - override def getRootDirectories: Iterable[Path] = delegate.getRootDirectories - override def newWatchService(): WatchService = delegate.newWatchService() - override def getFileStores: Iterable[FileStore] = delegate.getFileStores - override def isReadOnly: Boolean = delegate.isReadOnly - override def getPath(first: String, more: String*): Path = new PathProxy(delegate.getPath(first, more: _*), this) - override def isOpen: Boolean = delegate.isOpen - override def close(): Unit = delegate.close() - override def getPathMatcher(syntaxAndPattern: String): PathMatcher = delegate.getPathMatcher(syntaxAndPattern) - override def getUserPrincipalLookupService: UserPrincipalLookupService = delegate.getUserPrincipalLookupService -} diff --git a/core/src/main/scala/cromwell/core/path/proxy/PathProxy.scala b/core/src/main/scala/cromwell/core/path/proxy/PathProxy.scala deleted file mode 100644 index 28428e0a3..000000000 --- a/core/src/main/scala/cromwell/core/path/proxy/PathProxy.scala +++ /dev/null @@ -1,44 +0,0 @@ -package cromwell.core.path.proxy - -import java.io.File -import java.net.URI -import java.nio.file.WatchEvent.{Kind, Modifier} -import java.nio.file._ -import java.util - -import scala.util.Try - -class PathProxy(delegate: Path, injectedFileSystem: FileSystem) extends Path { - def unbox[T](clazz: Class[T]): Try[T] = Try { - clazz.cast(delegate) - } - - override def getFileSystem: FileSystem = injectedFileSystem - - /* delegated */ - override def subpath(beginIndex: Int, endIndex: Int): Path = delegate.subpath(beginIndex, endIndex) - override def toFile: File = delegate.toFile - override def resolveSibling(other: Path): Path = delegate.resolveSibling(other) - override def resolveSibling(other: String): Path = delegate.resolveSibling(other) - override def isAbsolute: Boolean = delegate.isAbsolute - override def getName(index: Int): Path = delegate.getName(index) - override def getParent: Path = delegate.getParent - override def toAbsolutePath: Path = delegate.toAbsolutePath - override def relativize(other: Path): Path = delegate.relativize(other) - override def getNameCount: Int = delegate.getNameCount - override def toUri: URI = delegate.toUri - override def compareTo(other: Path): Int = delegate.compareTo(other) - override def register(watcher: WatchService, events: Array[Kind[_]], modifiers: Modifier*): WatchKey = delegate.register(watcher, events, modifiers: _*) - override def register(watcher: WatchService, events: Kind[_]*): WatchKey = delegate.register(watcher, events: _*) - override def getFileName: Path = delegate.getFileName - override def getRoot: Path = delegate.getRoot - override def iterator(): util.Iterator[Path] = delegate.iterator() - override def normalize(): Path = delegate.normalize() - override def endsWith(other: Path): Boolean = delegate.endsWith(other) - override def endsWith(other: String): Boolean = delegate.endsWith(other) - override def resolve(other: Path): Path = delegate.resolve(other) - override def resolve(other: String): Path = delegate.resolve(other) - override def startsWith(other: Path): Boolean = delegate.startsWith(other) - override def startsWith(other: String): Boolean = delegate.startsWith(other) - override def toRealPath(options: LinkOption*): Path = delegate.toRealPath(options: _*) -} diff --git a/core/src/main/scala/cromwell/core/path/proxy/RetryableFileSystemProviderProxy.scala b/core/src/main/scala/cromwell/core/path/proxy/RetryableFileSystemProviderProxy.scala deleted file mode 100644 index 4d2a481c3..000000000 --- a/core/src/main/scala/cromwell/core/path/proxy/RetryableFileSystemProviderProxy.scala +++ /dev/null @@ -1,59 +0,0 @@ -package cromwell.core.path.proxy - -import java.net.URI -import java.nio.channels.SeekableByteChannel -import java.nio.file.DirectoryStream.Filter -import java.nio.file._ -import java.nio.file.attribute.{BasicFileAttributes, FileAttribute, FileAttributeView} -import java.nio.file.spi.FileSystemProvider -import java.util - -import akka.actor.ActorSystem -import cromwell.core.path.CustomRetryParams -import cromwell.core.retry.Retry - -import scala.concurrent.{Await, Future} - -class RetryableFileSystemProviderProxy[T <: FileSystemProvider](delegate: T, retryParams: CustomRetryParams = CustomRetryParams.Default)(implicit actorSystem: ActorSystem) extends FileSystemProvider { - private val iOExecutionContext = actorSystem.dispatchers.lookup("akka.dispatchers.io-dispatcher") - - // the nio interface is synchronous so we need to wait for the result - def withRetry[U](f: () => U): U = Await.result( - Retry.withRetry( - () => Future(f())(iOExecutionContext), - retryParams.maxRetries, - retryParams.backoff, - retryParams.isTransient, - retryParams.isFatal - ), - retryParams.timeout - ) - - override def getPath(uri: URI): Path = { - val path = delegate.getPath(uri) - new PathProxy(path, new FileSystemProxy(path.getFileSystem, this)) - } - override def newFileSystem(uri: URI, env: util.Map[String, _]): FileSystem = { - new FileSystemProxy(delegate.newFileSystem(uri, env), this) - } - override def getScheme: String = delegate.getScheme - override def getFileSystem(uri: URI): FileSystem = { - new FileSystemProxy(delegate.getFileSystem(uri), this) - } - override def getFileStore(path: Path): FileStore = delegate.getFileStore(path) - - /* retried operations */ - override def move(source: Path, target: Path, options: CopyOption*): Unit = withRetry { () => delegate.move(source, target, options: _*) } - override def checkAccess(path: Path, modes: AccessMode*): Unit = withRetry { () => delegate.checkAccess(path, modes: _*) } - override def createDirectory(dir: Path, attrs: FileAttribute[_]*): Unit = withRetry { () => delegate.createDirectory(dir, attrs: _*) } - override def newByteChannel(path: Path, options: util.Set[_ <: OpenOption], attrs: FileAttribute[_]*): SeekableByteChannel = withRetry { () => delegate.newByteChannel(path, options, attrs: _*) } - override def isHidden(path: Path): Boolean = withRetry { () => delegate.isHidden(path) } - override def copy(source: Path, target: Path, options: CopyOption*): Unit = withRetry { () => delegate.copy(source, target, options: _*) } - override def delete(path: Path): Unit = withRetry { () => delegate.delete(path) } - override def newDirectoryStream(dir: Path, filter: Filter[_ >: Path]): DirectoryStream[Path] = withRetry { () => delegate.newDirectoryStream(dir, filter) } - override def setAttribute(path: Path, attribute: String, value: scala.Any, options: LinkOption*): Unit = withRetry { () => delegate.setAttribute(path, attribute, value, options: _*) } - override def readAttributes[A <: BasicFileAttributes](path: Path, `type`: Class[A], options: LinkOption*): A = withRetry { () => delegate.readAttributes(path, `type`, options: _*) } - override def readAttributes(path: Path, attributes: String, options: LinkOption*): util.Map[String, AnyRef] = withRetry { () => delegate.readAttributes(path, attributes, options: _*) } - override def isSameFile(path: Path, path2: Path): Boolean = withRetry { () => delegate.isSameFile(path, path2) } - override def getFileAttributeView[V <: FileAttributeView](path: Path, `type`: Class[V], options: LinkOption*): V = withRetry { () => delegate.getFileAttributeView(path, `type`, options: _*) } -} diff --git a/core/src/test/scala/cromwell/core/FailIoActor.scala b/core/src/test/scala/cromwell/core/FailIoActor.scala new file mode 100644 index 000000000..c27e5f3e4 --- /dev/null +++ b/core/src/test/scala/cromwell/core/FailIoActor.scala @@ -0,0 +1,19 @@ +package cromwell.core + +import akka.actor.{Actor, Props} +import cromwell.core.FailIoActor._ +import cromwell.core.io._ + +object FailIoActor { + def props() = Props(new FailIoActor()) + val failure = new Exception("Io failure - part of test flow") +} + +class FailIoActor() extends Actor { + override def receive = { + case command: IoCommand[_] => sender() ! IoFailure(command, failure) + + // With context + case (requestContext: Any, command: IoCommand[_]) => sender() ! (requestContext -> IoFailure(command, failure)) + } +} diff --git a/core/src/test/scala/cromwell/core/MockIoActor.scala b/core/src/test/scala/cromwell/core/MockIoActor.scala new file mode 100644 index 000000000..6d525b5bf --- /dev/null +++ b/core/src/test/scala/cromwell/core/MockIoActor.scala @@ -0,0 +1,25 @@ +package cromwell.core + +import akka.actor.{Actor, Props} +import cromwell.core.io._ + +object MockIoActor { + def props(returnCode: String = "0", stderrSize: Long = 0L) = Props(new MockIoActor(returnCode, stderrSize)) +} + +class MockIoActor(returnCode: String, stderrSize: Long) extends Actor { + override def receive = { + case command: IoCopyCommand => sender() ! IoSuccess(command, ()) + case command: IoWriteCommand => sender() ! IoSuccess(command, ()) + case command: IoDeleteCommand => sender() ! IoSuccess(command, ()) + case command: IoSizeCommand => sender() ! IoSuccess(command, 0L) + case command: IoContentAsStringCommand => sender() ! IoSuccess(command, "0") + + // With context + case (requestContext: Any, command: IoCopyCommand) => sender() ! (requestContext -> IoSuccess(command, ())) + case (requestContext: Any, command: IoWriteCommand) => sender() ! (requestContext -> IoSuccess(command, ())) + case (requestContext: Any, command: IoDeleteCommand) => sender() ! (requestContext -> IoSuccess(command, ())) + case (requestContext: Any, command: IoSizeCommand) => sender() ! (requestContext -> IoSuccess(command, stderrSize)) + case (requestContext: Any, command: IoContentAsStringCommand) => sender() ! (requestContext -> IoSuccess(command, returnCode)) + } +} diff --git a/core/src/test/scala/cromwell/core/SimpleIoActor.scala b/core/src/test/scala/cromwell/core/SimpleIoActor.scala new file mode 100644 index 000000000..2daaa2b95 --- /dev/null +++ b/core/src/test/scala/cromwell/core/SimpleIoActor.scala @@ -0,0 +1,100 @@ +package cromwell.core + +import akka.actor.{Actor, Props} +import cromwell.core.io._ + +import scala.io.Codec +import scala.util.{Failure, Success, Try} + +object SimpleIoActor { + def props = Props(new SimpleIoActor) +} + +class SimpleIoActor extends Actor { + + override def receive = { + case command: IoCopyCommand => + + Try(command.source.copyTo(command.destination, command.overwrite)) match { + case Success(_) => sender() ! IoSuccess(command, ()) + case Failure(failure) => sender() ! IoFailure(command, failure) + } + + case command: IoWriteCommand => + + Try(command.file.write(command.content)(command.openOptions, Codec.UTF8)) match { + case Success(_) => sender() ! IoSuccess(command, ()) + case Failure(failure) => sender() ! IoFailure(command, failure) + } + + case command: IoDeleteCommand => + + Try(command.file.delete(command.swallowIOExceptions)) match { + case Success(_) => sender() ! IoSuccess(command, ()) + case Failure(failure) => sender() ! IoFailure(command, failure) + } + + case command: IoSizeCommand => + + Try(command.file.size) match { + case Success(size) => sender() ! IoSuccess(command, size) + case Failure(failure) => sender() ! IoFailure(command, failure) + } + + case command: IoContentAsStringCommand => + + Try(command.file.contentAsString) match { + case Success(content) => sender() ! IoSuccess(command, content) + case Failure(failure) => sender() ! IoFailure(command, failure) + } + + case command: IoHashCommand => + Try(command.file.md5) match { + case Success(hash) => sender() ! IoSuccess(command, hash) + case Failure(failure) => sender() ! IoFailure(command, failure) + } + + // With context + case (requestContext: Any, command: IoCopyCommand) => + + Try(command.source.copyTo(command.destination, command.overwrite)) match { + case Success(_) => sender() ! (requestContext -> IoSuccess(command, ())) + case Failure(failure) => sender() ! (requestContext -> IoFailure(command, failure)) + } + + case (requestContext: Any, command: IoWriteCommand) => + + Try(command.file.write(command.content)) match { + case Success(_) => sender() ! (requestContext -> IoSuccess(command, ())) + case Failure(failure) => sender() ! (requestContext -> IoFailure(command, failure)) + } + + case (requestContext: Any, command: IoDeleteCommand) => + + Try(command.file.delete(command.swallowIOExceptions)) match { + case Success(_) => sender() ! (requestContext -> IoSuccess(command, ())) + case Failure(failure) => sender() ! (requestContext -> IoFailure(command, failure)) + } + + case (requestContext: Any, command: IoSizeCommand) => + + Try(command.file.size) match { + case Success(size) => sender() ! (requestContext -> IoSuccess(command, size)) + case Failure(failure) => sender() ! (requestContext -> IoFailure(command, failure)) + } + + case (requestContext: Any, command: IoContentAsStringCommand) => + + Try(command.file.contentAsString) match { + case Success(content) => sender() ! (requestContext -> IoSuccess(command, content)) + case Failure(failure) => sender() ! (requestContext -> IoFailure(command, failure)) + } + + case (requestContext: Any, command: IoHashCommand) => + + Try(command.file.md5) match { + case Success(hash) => sender() ! (requestContext -> IoSuccess(command, hash)) + case Failure(failure) => sender() ! (requestContext -> IoFailure(command, failure)) + } + } +} diff --git a/core/src/test/scala/cromwell/core/TestKitSuite.scala b/core/src/test/scala/cromwell/core/TestKitSuite.scala index 751949a7f..c29a0338e 100644 --- a/core/src/test/scala/cromwell/core/TestKitSuite.scala +++ b/core/src/test/scala/cromwell/core/TestKitSuite.scala @@ -22,6 +22,8 @@ abstract class TestKitSuite(actorSystemName: String = TestKitSuite.randomName, } val emptyActor = system.actorOf(Props.empty) + val mockIoActor = system.actorOf(MockIoActor.props()) + val failIoActor = system.actorOf(FailIoActor.props()) } object TestKitSuite { diff --git a/core/src/test/scala/cromwell/core/actor/RobustClientHelperSpec.scala b/core/src/test/scala/cromwell/core/actor/RobustClientHelperSpec.scala new file mode 100644 index 000000000..64d1d6f71 --- /dev/null +++ b/core/src/test/scala/cromwell/core/actor/RobustClientHelperSpec.scala @@ -0,0 +1,187 @@ +package cromwell.core.actor + +import akka.actor.{Actor, ActorLogging, ActorRef} +import akka.testkit.{ImplicitSender, TestActorRef, TestProbe} +import cromwell.core.TestKitSuite +import cromwell.core.actor.StreamIntegration.Backpressure +import org.scalatest.{FlatSpecLike, Matchers} + +import scala.concurrent.duration._ +import scala.language.postfixOps + +class RobustClientHelperSpec extends TestKitSuite with FlatSpecLike with Matchers with ImplicitSender { + behavior of "RobustClientHelper" + + it should "handle Backpressure responses" in { + val remoteActor = TestProbe() + val delegateActor = TestProbe() + + val margin = 1 second + val backpressureTimeout = 1 second + val noResponseTimeout = 10 seconds + val testActor = TestActorRef(new TestActor(delegateActor.ref, backpressureTimeout, noResponseTimeout)) + + val messageToSend = TestActor.TestMessage("hello") + + //send message + testActor.underlyingActor.sendMessage(messageToSend, remoteActor.ref) + + // remote actor receives message + remoteActor.expectMsg(messageToSend) + + // remote actor sends a backpressure message + remoteActor.reply(Backpressure(messageToSend)) + + // remote actor expects request again after backpressureTimeout + remoteActor.expectMsg(backpressureTimeout + margin, messageToSend) + + // remote actor replies + remoteActor.reply("world") + + // delegate actor receives response + delegateActor.expectMsg("world") + + // remote actor doesn't receives new messages + remoteActor.expectNoMsg() + + // Wait long enough that to make sure that we won't receive a ServiceUnreachable message, meaning the timeout timer + // has been cancelled. Note that it is the responsibility of the actor to cancel it, the RobustClientHelper does not + // handle that part. + delegateActor.expectNoMsg(8 seconds) + } + + it should "handle a successful response" in { + val remoteActor = TestProbe() + val delegateActor = TestProbe() + + val backpressureTimeout = 1 second + val noResponseTimeout = 20 seconds + val testActor = TestActorRef(new TestActor(delegateActor.ref, backpressureTimeout, noResponseTimeout)) + + val messageToSend = TestActor.TestMessage("hello") + + // send message + testActor.underlyingActor.sendMessage(messageToSend, remoteActor.ref) + + // remote actor receives message + remoteActor.expectMsg(messageToSend) + + // remote actor replies + remoteActor.reply("world") + + // delegate receives response + delegateActor.expectMsg("world") + + // remote actor doesn't receives new messages + remoteActor.expectNoMsg() + delegateActor.expectNoMsg() + } + + it should "timeout if no response" in { + val remoteActor = TestProbe() + val delegateActor = TestProbe() + + val backpressureTimeout = 1 second + val noResponseTimeout = 2 seconds + val testActor = TestActorRef(new TestActor(delegateActor.ref, backpressureTimeout, noResponseTimeout)) + + val messageToSend = TestActor.TestMessage("hello") + + // send message + testActor.underlyingActor.sendMessage(messageToSend, remoteActor.ref) + + // remote actor receives message + remoteActor.expectMsg(messageToSend) + + // remote actor does not reply + + // delegate receives ServiceUnreachable message + delegateActor.expectMsg(TestActor.ServiceUnreachable) + + // remote actor doesn't receives new messages + remoteActor.expectNoMsg() + delegateActor.expectNoMsg() + } + + it should "reset timeout when backpressured is received" in { + val remoteActor = TestProbe() + val delegateActor = TestProbe() + + val margin = 500 millis + val backpressureTimeout = 1 second + val noResponseTimeout = 3 seconds + val testActor = TestActorRef(new TestActor(delegateActor.ref, backpressureTimeout, noResponseTimeout)) + + val messageToSend = TestActor.TestMessage("hello") + + // send message + testActor.underlyingActor.sendMessage(messageToSend, remoteActor.ref) + + // remote actor receives message + remoteActor.expectMsg(messageToSend) + + // remote actor sends a backpressure message + remoteActor.reply(Backpressure(messageToSend)) + + // remote actor expects request again after backpressureTimeout + remoteActor.expectMsg(backpressureTimeout + margin, messageToSend) + + // remote actor replies + remoteActor.reply("world") + + // delegate receives ServiceUnreachable message + delegateActor.expectMsg("world") + + // remote actor doesn't receives new messages + remoteActor.expectNoMsg() + // ensure that no ServiceUnreachable message was sent + delegateActor.expectNoMsg(4 seconds) + } + + it should "randomize backpressure timings" in { + val delegateActor = TestProbe() + val backpressureTimeout = 20 seconds + val noResponseTimeout = 3 seconds + val randomizeFactor = 0.2D + + val testActor = TestActorRef(new TestActor(delegateActor.ref, backpressureTimeout, noResponseTimeout, randomizeFactor)).underlyingActor + + val randomBackpressures = 0 until 10 map { _ => + val time = testActor.generateBackpressureTime + time.gt(16.seconds) shouldBe true + time.lt(24.seconds) shouldBe true + time + } + + // They should all be different + randomBackpressures.distinct.size shouldBe 10 + } + + private [actor] object TestActor { + case class TestMessage(v: String) + case object ServiceUnreachable + } + private class TestActor(delegateTo: ActorRef, + override val backpressureTimeout: FiniteDuration, + noResponseTimeout: FiniteDuration, + override val backpressureRandomizerFactor: Double = 0.5D) extends Actor with ActorLogging with RobustClientHelper { + + context.become(robustReceive orElse receive) + var messageSent: Any = _ + + override def receive: Receive = { + case message => + cancelTimeout(messageSent) + delegateTo ! message + } + + def sendMessage(message: Any, to: ActorRef) = { + messageSent = message + robustSend(message, to, noResponseTimeout) + } + + override protected def onTimeout(message: Any, to: ActorRef): Unit = { + delegateTo ! TestActor.ServiceUnreachable + } + } +} diff --git a/core/src/test/scala/cromwell/core/actor/StreamActorHelperSpec.scala b/core/src/test/scala/cromwell/core/actor/StreamActorHelperSpec.scala new file mode 100644 index 000000000..8c3390e15 --- /dev/null +++ b/core/src/test/scala/cromwell/core/actor/StreamActorHelperSpec.scala @@ -0,0 +1,70 @@ +package cromwell.core.actor + +import akka.actor.{Actor, ActorLogging, ActorRef, Props} +import akka.stream.QueueOfferResult.Dropped +import akka.stream.scaladsl.Source +import akka.stream.{ActorMaterializer, OverflowStrategy} +import akka.testkit.{ImplicitSender, TestActorRef} +import cromwell.core.TestKitSuite +import cromwell.core.actor.StreamIntegration._ +import cromwell.core.actor.TestStreamActor.{TestStreamActorCommand, TestStreamActorContext} +import org.scalatest.{FlatSpecLike, Matchers} + +class StreamActorHelperSpec extends TestKitSuite with FlatSpecLike with Matchers with ImplicitSender { + behavior of "StreamActorHelper" + + implicit val materializer = ActorMaterializer() + + it should "catch EnqueueResponse message" in { + val actor = TestActorRef(Props(new TestStreamActor(1))) + val command = new TestStreamActorCommand + actor ! command + expectMsg("hello") + system stop actor + } + + it should "send a backpressure message when messages are dropped by the queue" in { + val actor = TestActorRef(new TestStreamActor(1)) + val command = new TestStreamActorCommand + + actor ! EnqueueResponse(Dropped, TestStreamActorContext(command, self, None)) + + expectMsg(Backpressure(command)) + + system stop actor + } + + it should "send a backpressure message with context when messages are dropped by the queue" in { + val actor = TestActorRef(new TestStreamActor(1)) + val command = new TestStreamActorCommand + + actor ! EnqueueResponse(Dropped, TestStreamActorContext(command, self, Option("context"))) + + expectMsg(Backpressure("context" -> command)) + + system stop actor + } +} + + +private object TestStreamActor { + class TestStreamActorCommand + case class TestStreamActorContext(request: TestStreamActorCommand, replyTo: ActorRef, override val clientContext: Option[Any]) extends StreamContext +} + +private class TestStreamActor(queueSize: Int)(implicit override val materializer: ActorMaterializer) extends Actor with ActorLogging with StreamActorHelper[TestStreamActorContext] { + + override protected def actorReceive: Receive = { + case command: TestStreamActorCommand => + val replyTo = sender() + val commandContext = TestStreamActorContext(command, replyTo, None) + sendToStream(commandContext) + case (userContext: Any, command: TestStreamActorCommand) => + val replyTo = sender() + val commandContext = TestStreamActorContext(command, replyTo, Option(userContext)) + sendToStream(commandContext) + } + + override protected val streamSource = Source.queue[TestStreamActorContext](queueSize, OverflowStrategy.dropNew) + .map{ ("hello", _) } +} diff --git a/core/src/test/scala/cromwell/core/io/AsyncIoSpec.scala b/core/src/test/scala/cromwell/core/io/AsyncIoSpec.scala new file mode 100644 index 000000000..6cb519079 --- /dev/null +++ b/core/src/test/scala/cromwell/core/io/AsyncIoSpec.scala @@ -0,0 +1,116 @@ +package cromwell.core.io + +import java.nio.file.{FileAlreadyExistsException, NoSuchFileException} +import java.util.UUID + +import akka.actor.{Actor, ActorLogging, ActorRef} +import akka.testkit.TestActorRef +import cromwell.core.path.DefaultPathBuilder +import cromwell.core.{SimpleIoActor, TestKitSuite} +import org.scalatest.mockito.MockitoSugar +import org.scalatest.{AsyncFlatSpecLike, Matchers} + +class AsyncIoSpec extends TestKitSuite with AsyncFlatSpecLike with Matchers with MockitoSugar { + + behavior of "AsyncIoSpec" + + val simpleIoActor = system.actorOf(SimpleIoActor.props) + + override def afterAll() = { + system stop simpleIoActor + super.afterAll() + } + + it should "write asynchronously" in { + val testActor = TestActorRef(new AsyncIoTestActor(simpleIoActor)) + + val testPath = DefaultPathBuilder.createTempFile() + + testActor.underlyingActor.writeAsync(testPath, "hello", Seq.empty) map { _ => + assert(testPath.contentAsString == "hello") + } + } + + it should "read asynchronously" in { + val testActor = TestActorRef(new AsyncIoTestActor(simpleIoActor)) + + val testPath = DefaultPathBuilder.createTempFile() + testPath.write("hello") + + testActor.underlyingActor.contentAsStringAsync(testPath) map { result => + assert(result == "hello") + } + } + + it should "get size asynchronously" in { + val testActor = TestActorRef(new AsyncIoTestActor(simpleIoActor)) + + val testPath = DefaultPathBuilder.createTempFile() + testPath.write("hello") + + testActor.underlyingActor.sizeAsync(testPath) map { size => + assert(size == 5) + } + } + + it should "get hash asynchronously" in { + val testActor = TestActorRef(new AsyncIoTestActor(simpleIoActor)) + + val testPath = DefaultPathBuilder.createTempFile() + testPath.write("hello") + + testActor.underlyingActor.hashAsync(testPath) map { hash => + assert(hash == "5D41402ABC4B2A76B9719D911017C592") + } + } + + it should "copy asynchronously" in { + val testActor = TestActorRef(new AsyncIoTestActor(simpleIoActor)) + + val testPath = DefaultPathBuilder.createTempFile() + val testCopyPath = testPath.sibling(UUID.randomUUID().toString) + + testActor.underlyingActor.copyAsync(testPath, testCopyPath) map { hash => + assert(testCopyPath.exists) + } + + testPath.write("new text") + + // Honor overwrite true + testActor.underlyingActor.copyAsync(testPath, testCopyPath, overwrite = true) map { hash => + assert(testCopyPath.exists) + assert(testCopyPath.contentAsString == "new text") + } + + // Honor overwrite false + recoverToSucceededIf[FileAlreadyExistsException] { testActor.underlyingActor.copyAsync(testPath, testCopyPath, overwrite = false) } + } + + it should "delete asynchronously" in { + val testActor = TestActorRef(new AsyncIoTestActor(simpleIoActor)) + + val testPath = DefaultPathBuilder.createTempFile() + + testActor.underlyingActor.deleteAsync(testPath) map { _ => + assert(!testPath.exists) + } + + // Honor swallow exception true + testActor.underlyingActor.deleteAsync(testPath, swallowIoExceptions = true) map { _ => + assert(!testPath.exists) + } + + // Honor swallow exception false + recoverToSucceededIf[NoSuchFileException] { testActor.underlyingActor.deleteAsync(testPath, swallowIoExceptions = false) } + } + + private class AsyncIoTestActor(override val ioActor: ActorRef) extends Actor with ActorLogging with AsyncIo with DefaultIoCommandBuilder { + + context.become(ioReceive orElse receive) + + override def receive: Receive = { + case _ => + } + } + +} diff --git a/core/src/test/scala/cromwell/core/io/IoClientHelperSpec.scala b/core/src/test/scala/cromwell/core/io/IoClientHelperSpec.scala new file mode 100644 index 000000000..6f92f3003 --- /dev/null +++ b/core/src/test/scala/cromwell/core/io/IoClientHelperSpec.scala @@ -0,0 +1,106 @@ +package cromwell.core.io + +import akka.actor.{Actor, ActorLogging, ActorRef} +import akka.testkit.{TestActorRef, TestProbe} +import cromwell.core.TestKitSuite +import cromwell.core.path.Path +import org.scalatest.mockito.MockitoSugar +import org.scalatest.{FlatSpecLike, Matchers} + +import scala.concurrent.duration.{FiniteDuration, _} +import scala.language.postfixOps + +class IoClientHelperSpec extends TestKitSuite with FlatSpecLike with Matchers with MockitoSugar { + + behavior of "IoClientHelperSpec" + + it should "intercept IoAcks and cancel timers" in { + val ioActorProbe = TestProbe() + val delegateProbe = TestProbe() + val backpressureTimeout = 1 second + val noResponseTimeout = 3 seconds + + val testActor = TestActorRef(new IoClientHelperTestActor(ioActorProbe.ref, delegateProbe.ref, backpressureTimeout, noResponseTimeout)) + + val command = new IoSizeCommand(mock[Path]) + val response = IoSuccess(command, 5) + + // Send the command + testActor.underlyingActor.sendMessage(command) + + // Io actor receives the command + ioActorProbe.expectMsg(command) + + // Io actor replies + ioActorProbe.reply(response) + + // delegate should receive the response + delegateProbe.expectMsg(response) + + // And nothing else, meaning the timeout timer has been cancelled + delegateProbe.expectNoMsg() + + // timeouts map should be empty + testActor.underlyingActor.timeouts.isEmpty shouldBe true + } + + it should "intercept IoAcks and cancel timers for a command with context" in { + val ioActorProbe = TestProbe() + val delegateProbe = TestProbe() + val backpressureTimeout = 1 second + val noResponseTimeout = 3 seconds + + val testActor = TestActorRef(new IoClientHelperTestActor(ioActorProbe.ref, delegateProbe.ref, backpressureTimeout, noResponseTimeout)) + + val commandContext = "context" + val command = new IoSizeCommand(mock[Path]) + val response = IoSuccess(command, 5) + + // Send the command + testActor.underlyingActor.sendMessageWithContext(commandContext, command) + + // Io actor receives the command + ioActorProbe.expectMsg(commandContext -> command) + + // Io actor replies + ioActorProbe.reply(commandContext -> response) + + // delegate should receive the response + delegateProbe.expectMsgPF(1 second) { + case (contextReceived, responseReceived) if contextReceived == "context" && responseReceived == response => + } + + // And nothing else, meaning the timeout timer has been cancelled + delegateProbe.expectNoMsg() + + // timeouts map should be empty + testActor.underlyingActor.timeouts.isEmpty shouldBe true + } + + private case object ServiceUnreachable + + private class IoClientHelperTestActor(override val ioActor: ActorRef, + delegateTo: ActorRef, + override val backpressureTimeout: FiniteDuration, + noResponseTimeout: FiniteDuration) extends Actor with ActorLogging with IoClientHelper with DefaultIoCommandBuilder { + + context.become(ioReceive orElse receive) + + override def receive: Receive = { + case message => delegateTo ! message + } + + def sendMessage(command: IoCommand[_]) = { + sendIoCommand(command, noResponseTimeout) + } + + def sendMessageWithContext(context: Any, command: IoCommand[_]) = { + sendIoCommandWithContext(command, context, noResponseTimeout) + } + + override protected def onTimeout(message: Any, to: ActorRef): Unit = { + delegateTo ! ServiceUnreachable + } + } + +} diff --git a/core/src/test/scala/cromwell/core/path/proxy/RetryableFileSystemProxySpec.scala b/core/src/test/scala/cromwell/core/path/proxy/RetryableFileSystemProxySpec.scala deleted file mode 100644 index 71b0a7c96..000000000 --- a/core/src/test/scala/cromwell/core/path/proxy/RetryableFileSystemProxySpec.scala +++ /dev/null @@ -1,280 +0,0 @@ -package cromwell.core.path.proxy - -import java.io.FileNotFoundException -import java.nio.channels.SeekableByteChannel -import java.nio.file.DirectoryStream.Filter -import java.nio.file.attribute.{BasicFileAttributes, FileAttributeView} -import java.nio.file.spi.FileSystemProvider -import java.nio.file.{DirectoryStream, OpenOption, Path, StandardOpenOption} -import java.util.concurrent.TimeoutException - -import cromwell.core.path.CustomRetryParams -import cromwell.core.retry.Backoff -import cromwell.core.{CromwellFatalException, TestKitSuite} -import org.mockito.Matchers._ -import org.mockito.Mockito._ -import org.mockito.invocation.InvocationOnMock -import org.mockito.stubbing.Answer -import org.scalatest.{FlatSpecLike, Matchers} - -import scala.concurrent.duration._ -import scala.language.postfixOps - -class RetryableFileSystemProxySpec extends TestKitSuite with FlatSpecLike with Matchers { - - behavior of "RetryableFileSystemProxySpec" - - case class ThrowParams(exception: Exception, nbTimes: Int) - - abstract class FileSystemAnswer[T](delay: Option[Duration] = None, - throws: Option[ThrowParams] = None) extends Answer[T] { - - var nbThrows = 0 - - def delayAndOrThrow() = { - delay foreach { d => Thread.sleep(d.toMillis) } - throws foreach { e => - if (nbThrows < e.nbTimes) { - nbThrows = nbThrows + 1 - throw e.exception - } - } - } - } - - def mockFileSystem(delay: Option[Duration] = None, - throws: Option[ThrowParams] = None): FileSystemProvider = { - - val provider = mock(classOf[FileSystemProvider]) - - def answerUnit: Answer[Unit] = new FileSystemAnswer[Unit](delay, throws) { - override def answer(invocation: InvocationOnMock): Unit = delayAndOrThrow() - } - - def answerBoolean: Answer[Boolean] = new FileSystemAnswer[Boolean](delay, throws) { - override def answer(invocation: InvocationOnMock): Boolean = { - delayAndOrThrow() - true - } - } - - def answerSeekableByteChannel: Answer[SeekableByteChannel] = new FileSystemAnswer[SeekableByteChannel](delay, throws) { - override def answer(invocation: InvocationOnMock): SeekableByteChannel = { - delayAndOrThrow() - mock(classOf[SeekableByteChannel]) - } - } - - def answerDirectoryStream: Answer[DirectoryStream[Path]] = new FileSystemAnswer[DirectoryStream[Path]](delay, throws) { - override def answer(invocation: InvocationOnMock): DirectoryStream[Path] = { - delayAndOrThrow() - mock(classOf[DirectoryStream[Path]]) - } - } - - def answerBasicFileAttributes: Answer[BasicFileAttributes] = new FileSystemAnswer[BasicFileAttributes](delay, throws) { - override def answer(invocation: InvocationOnMock): BasicFileAttributes = { - delayAndOrThrow() - mock(classOf[BasicFileAttributes]) - } - } - - def answerMap: Answer[java.util.Map[String, AnyRef]] = new FileSystemAnswer[java.util.Map[String, AnyRef]](delay, throws) { - override def answer(invocation: InvocationOnMock): java.util.Map[String, AnyRef] = { - delayAndOrThrow() - new java.util.HashMap[String, AnyRef]() - } - } - - def answerFileAttributeView: Answer[FileAttributeView] = new FileSystemAnswer[FileAttributeView](delay, throws) { - override def answer(invocation: InvocationOnMock): FileAttributeView = { - delayAndOrThrow() - mock(classOf[FileAttributeView]) - } - } - - import java.nio.file.Path - - when(provider.move(any[Path], any[Path])).thenAnswer(answerUnit) - when(provider.checkAccess(any[Path])).thenAnswer(answerUnit) - when(provider.createDirectory(any[Path])).thenAnswer(answerUnit) - when(provider.newByteChannel(any[Path], any[java.util.Set[OpenOption]])).thenAnswer(answerSeekableByteChannel) - when(provider.isHidden(any[Path])).thenAnswer(answerBoolean) - when(provider.copy(any[Path], any[Path])).thenAnswer(answerUnit) - when(provider.delete(any[Path])).thenAnswer(answerUnit) - when(provider.newDirectoryStream(any[Path], any[Filter[Path]]())).thenAnswer(answerDirectoryStream) - when(provider.setAttribute(any[Path], any[String], any[Object])).thenAnswer(answerUnit) - when(provider.readAttributes(any[Path], any[String])).thenAnswer(answerMap) - when(provider.readAttributes(any[Path], any[Class[BasicFileAttributes]])).thenAnswer(answerBasicFileAttributes) - when(provider.isSameFile(any[Path], any[Path])).thenAnswer(answerBoolean) - when(provider.getFileAttributeView(any[Path], any[Class[FileAttributeView]])).thenAnswer(answerFileAttributeView) - - provider - } - - val testRetryParams = CustomRetryParams.Default.copy(backoff = new Backoff { - override def next: Backoff = this - override def backoffMillis: Long = 0 - }) - - val pathMock = mock(classOf[Path]) - - it should "timeout if the operation takes too long" ignore { - val retryParams = testRetryParams.copy(timeout = 100 millis) - val mockFs = mockFileSystem(delay = Option(200 millis)) - val retryableFs = new RetryableFileSystemProviderProxy(mockFs, retryParams)(system) - - a[TimeoutException] shouldBe thrownBy(retryableFs.move(pathMock, pathMock)) - a[TimeoutException] shouldBe thrownBy(retryableFs.checkAccess(pathMock)) - a[TimeoutException] shouldBe thrownBy(retryableFs.createDirectory(pathMock)) - a[TimeoutException] shouldBe thrownBy(retryableFs.newByteChannel(pathMock, mock(classOf[java.util.Set[StandardOpenOption]]))) - a[TimeoutException] shouldBe thrownBy(retryableFs.isHidden(pathMock)) - a[TimeoutException] shouldBe thrownBy(retryableFs.copy(pathMock, pathMock)) - a[TimeoutException] shouldBe thrownBy(retryableFs.delete(pathMock)) - a[TimeoutException] shouldBe thrownBy(retryableFs.newDirectoryStream(pathMock, mock(classOf[Filter[Path]]))) - a[TimeoutException] shouldBe thrownBy(retryableFs.setAttribute(pathMock, "", "")) - a[TimeoutException] shouldBe thrownBy(retryableFs.readAttributes(pathMock, classOf[BasicFileAttributes])) - a[TimeoutException] shouldBe thrownBy(retryableFs.readAttributes(pathMock, "")) - a[TimeoutException] shouldBe thrownBy(retryableFs.isSameFile(pathMock, pathMock)) - a[TimeoutException] shouldBe thrownBy(retryableFs.getFileAttributeView(pathMock, classOf[FileAttributeView])) - } - - it should "retry on failure and finally succeed if under retry max" in { - val retryParams = testRetryParams.copy(maxRetries = Option(4)) - val mockFs = mockFileSystem(throws = Option(ThrowParams(new Exception(), nbTimes = 2))) - val retryableFs = new RetryableFileSystemProviderProxy(mockFs, retryParams)(system) - - retryableFs.move(pathMock, pathMock) - retryableFs.checkAccess(pathMock) - retryableFs.createDirectory(pathMock) - retryableFs.newByteChannel(pathMock, mock(classOf[java.util.Set[StandardOpenOption]])) - retryableFs.isHidden(pathMock) - retryableFs.copy(pathMock, pathMock) - retryableFs.delete(pathMock) - retryableFs.newDirectoryStream(pathMock, mock(classOf[Filter[Path]])) - retryableFs.setAttribute(pathMock, "", "") - retryableFs.readAttributes(pathMock, classOf[BasicFileAttributes]) - retryableFs.readAttributes(pathMock, "") - retryableFs.isSameFile(pathMock, pathMock) - retryableFs.getFileAttributeView(pathMock, classOf[FileAttributeView]) - - verify(mockFs, times(3)).move(any[Path], any[Path]) - verify(mockFs, times(3)).checkAccess(any[Path]) - verify(mockFs, times(3)).createDirectory(any[Path]) - verify(mockFs, times(3)).newByteChannel(any[Path], any[java.util.Set[OpenOption]]) - verify(mockFs, times(3)).isHidden(any[Path]) - verify(mockFs, times(3)).copy(any[Path], any[Path]) - verify(mockFs, times(3)).delete(any[Path]) - verify(mockFs, times(3)).newDirectoryStream(any[Path], any[Filter[Path]]()) - verify(mockFs, times(3)).setAttribute(any[Path], any[String], any[Object]) - verify(mockFs, times(3)).readAttributes(any[Path], any[String]) - verify(mockFs, times(3)).readAttributes(any[Path], any[Class[BasicFileAttributes]]) - verify(mockFs, times(3)).isSameFile(any[Path], any[Path]) - verify(mockFs, times(3)).getFileAttributeView(any[Path], any[Class[FileAttributeView]]) - } - - it should "retry on failure and fail if over retry max" in { - val retryParams = testRetryParams.copy(maxRetries = Option(2)) - val mockFs = mockFileSystem(throws = Option(ThrowParams(new IllegalArgumentException(), nbTimes = 3))) - val retryableFs = new RetryableFileSystemProviderProxy(mockFs, retryParams)(system) - - (the [CromwellFatalException] thrownBy retryableFs.move(pathMock, pathMock)).getCause shouldBe a[IllegalArgumentException] - (the [CromwellFatalException] thrownBy retryableFs.checkAccess(pathMock)).getCause shouldBe a[IllegalArgumentException] - (the [CromwellFatalException] thrownBy retryableFs.createDirectory(pathMock)).getCause shouldBe a[IllegalArgumentException] - (the [CromwellFatalException] thrownBy retryableFs.newByteChannel(pathMock, mock(classOf[java.util.Set[StandardOpenOption]]))).getCause shouldBe a[IllegalArgumentException] - (the [CromwellFatalException] thrownBy retryableFs.isHidden(pathMock)).getCause shouldBe a[IllegalArgumentException] - (the [CromwellFatalException] thrownBy retryableFs.copy(pathMock, pathMock)).getCause shouldBe a[IllegalArgumentException] - (the [CromwellFatalException] thrownBy retryableFs.delete(pathMock)).getCause shouldBe a[IllegalArgumentException] - (the [CromwellFatalException] thrownBy retryableFs.newDirectoryStream(pathMock, mock(classOf[Filter[Path]]))).getCause shouldBe a[IllegalArgumentException] - (the [CromwellFatalException] thrownBy retryableFs.setAttribute(pathMock, "", "")).getCause shouldBe a[IllegalArgumentException] - (the [CromwellFatalException] thrownBy retryableFs.readAttributes(pathMock, classOf[BasicFileAttributes])).getCause shouldBe a[IllegalArgumentException] - (the [CromwellFatalException] thrownBy retryableFs.readAttributes(pathMock, "")).getCause shouldBe a[IllegalArgumentException] - (the [CromwellFatalException] thrownBy retryableFs.isSameFile(pathMock, pathMock)).getCause shouldBe a[IllegalArgumentException] - (the [CromwellFatalException] thrownBy retryableFs.getFileAttributeView(pathMock, classOf[FileAttributeView])).getCause shouldBe a[IllegalArgumentException] - - verify(mockFs, times(3)).move(any[Path], any[Path]) - verify(mockFs, times(3)).checkAccess(any[Path]) - verify(mockFs, times(3)).createDirectory(any[Path]) - verify(mockFs, times(3)).newByteChannel(any[Path], any[java.util.Set[OpenOption]]) - verify(mockFs, times(3)).isHidden(any[Path]) - verify(mockFs, times(3)).copy(any[Path], any[Path]) - verify(mockFs, times(3)).delete(any[Path]) - verify(mockFs, times(3)).newDirectoryStream(any[Path], any[Filter[Path]]()) - verify(mockFs, times(3)).setAttribute(any[Path], any[String], any[Object]) - verify(mockFs, times(3)).readAttributes(any[Path], any[String]) - verify(mockFs, times(3)).readAttributes(any[Path], any[Class[BasicFileAttributes]]) - verify(mockFs, times(3)).isSameFile(any[Path], any[Path]) - verify(mockFs, times(3)).getFileAttributeView(any[Path], any[Class[FileAttributeView]]) - } - - it should "ignore transient exceptions" in { - def isTransient(t: Throwable) = t.isInstanceOf[FileNotFoundException] - val retryParams = testRetryParams.copy(maxRetries = Option(1), isTransient = isTransient) - val mockFs = mockFileSystem(throws = Option(ThrowParams(new FileNotFoundException(), nbTimes = 2))) - val retryableFs = new RetryableFileSystemProviderProxy(mockFs, retryParams)(system) - - retryableFs.move(pathMock, pathMock) - retryableFs.checkAccess(pathMock) - retryableFs.createDirectory(pathMock) - retryableFs.newByteChannel(pathMock, mock(classOf[java.util.Set[StandardOpenOption]])) - retryableFs.isHidden(pathMock) - retryableFs.copy(pathMock, pathMock) - retryableFs.delete(pathMock) - retryableFs.newDirectoryStream(pathMock, mock(classOf[Filter[Path]])) - retryableFs.setAttribute(pathMock, "", "") - retryableFs.readAttributes(pathMock, classOf[BasicFileAttributes]) - retryableFs.readAttributes(pathMock, "") - retryableFs.isSameFile(pathMock, pathMock) - retryableFs.getFileAttributeView(pathMock, classOf[FileAttributeView]) - - verify(mockFs, times(3)).move(any[Path], any[Path]) - verify(mockFs, times(3)).checkAccess(any[Path]) - verify(mockFs, times(3)).createDirectory(any[Path]) - verify(mockFs, times(3)).newByteChannel(any[Path], any[java.util.Set[OpenOption]]) - verify(mockFs, times(3)).isHidden(any[Path]) - verify(mockFs, times(3)).copy(any[Path], any[Path]) - verify(mockFs, times(3)).delete(any[Path]) - verify(mockFs, times(3)).newDirectoryStream(any[Path], any[Filter[Path]]()) - verify(mockFs, times(3)).setAttribute(any[Path], any[String], any[Object]) - verify(mockFs, times(3)).readAttributes(any[Path], any[String]) - verify(mockFs, times(3)).readAttributes(any[Path], any[Class[BasicFileAttributes]]) - verify(mockFs, times(3)).isSameFile(any[Path], any[Path]) - verify(mockFs, times(3)).getFileAttributeView(any[Path], any[Class[FileAttributeView]]) - } - - it should "fail immediately on fatal exceptions" in { - def isFatal(t: Throwable) = t.isInstanceOf[FileNotFoundException] - val retryParams = testRetryParams.copy(maxRetries = Option(5), isFatal = isFatal) - val mockFs = mockFileSystem(throws = Option(ThrowParams(new FileNotFoundException(), nbTimes = 3))) - val retryableFs = new RetryableFileSystemProviderProxy(mockFs, retryParams)(system) - - (the [CromwellFatalException] thrownBy retryableFs.move(pathMock, pathMock)).getCause shouldBe a[FileNotFoundException] - (the [CromwellFatalException] thrownBy retryableFs.checkAccess(pathMock)).getCause shouldBe a[FileNotFoundException] - (the [CromwellFatalException] thrownBy retryableFs.createDirectory(pathMock)).getCause shouldBe a[FileNotFoundException] - (the [CromwellFatalException] thrownBy retryableFs.newByteChannel(pathMock, mock(classOf[java.util.Set[StandardOpenOption]]))).getCause shouldBe a[FileNotFoundException] - (the [CromwellFatalException] thrownBy retryableFs.isHidden(pathMock)).getCause shouldBe a[FileNotFoundException] - (the [CromwellFatalException] thrownBy retryableFs.copy(pathMock, pathMock)).getCause shouldBe a[FileNotFoundException] - (the [CromwellFatalException] thrownBy retryableFs.delete(pathMock)).getCause shouldBe a[FileNotFoundException] - (the [CromwellFatalException] thrownBy retryableFs.newDirectoryStream(pathMock, mock(classOf[Filter[Path]]))).getCause shouldBe a[FileNotFoundException] - (the [CromwellFatalException] thrownBy retryableFs.setAttribute(pathMock, "", "")).getCause shouldBe a[FileNotFoundException] - (the [CromwellFatalException] thrownBy retryableFs.readAttributes(pathMock, classOf[BasicFileAttributes])).getCause shouldBe a[FileNotFoundException] - (the [CromwellFatalException] thrownBy retryableFs.readAttributes(pathMock, "")).getCause shouldBe a[FileNotFoundException] - (the [CromwellFatalException] thrownBy retryableFs.isSameFile(pathMock, pathMock)).getCause shouldBe a[FileNotFoundException] - (the [CromwellFatalException] thrownBy retryableFs.getFileAttributeView(pathMock, classOf[FileAttributeView])).getCause shouldBe a[FileNotFoundException] - - verify(mockFs, times(1)).move(any[Path], any[Path]) - verify(mockFs, times(1)).checkAccess(any[Path]) - verify(mockFs, times(1)).createDirectory(any[Path]) - verify(mockFs, times(1)).newByteChannel(any[Path], any[java.util.Set[OpenOption]]) - verify(mockFs, times(1)).isHidden(any[Path]) - verify(mockFs, times(1)).copy(any[Path], any[Path]) - verify(mockFs, times(1)).delete(any[Path]) - verify(mockFs, times(1)).newDirectoryStream(any[Path], any[Filter[Path]]()) - verify(mockFs, times(1)).setAttribute(any[Path], any[String], any[Object]) - verify(mockFs, times(1)).readAttributes(any[Path], any[String]) - verify(mockFs, times(1)).readAttributes(any[Path], any[Class[BasicFileAttributes]]) - verify(mockFs, times(1)).isSameFile(any[Path], any[Path]) - verify(mockFs, times(1)).getFileAttributeView(any[Path], any[Class[FileAttributeView]]) - } - -} diff --git a/engine/src/main/scala/cromwell/engine/EngineFilesystems.scala b/engine/src/main/scala/cromwell/engine/EngineFilesystems.scala index a738984dd..28009a541 100644 --- a/engine/src/main/scala/cromwell/engine/EngineFilesystems.scala +++ b/engine/src/main/scala/cromwell/engine/EngineFilesystems.scala @@ -2,40 +2,15 @@ package cromwell.engine import akka.actor.ActorSystem import cats.data.Validated.{Invalid, Valid} -import com.google.api.client.http.HttpResponseException import com.typesafe.config.ConfigFactory import cromwell.core.WorkflowOptions -import cromwell.core.path.{CustomRetryParams, DefaultPathBuilder, PathBuilder} -import cromwell.core.retry.SimpleExponentialBackoff -import cromwell.filesystems.gcs.{GoogleConfiguration, RetryableGcsPathBuilderFactory} +import cromwell.core.path.{DefaultPathBuilder, PathBuilder} +import cromwell.filesystems.gcs.{GcsPathBuilderFactory, GoogleConfiguration} import lenthall.exception.MessageAggregation import net.ceedubs.ficus.Ficus._ -import scala.concurrent.duration._ -import scala.language.postfixOps - case class EngineFilesystems(actorSystem: ActorSystem) { - private def isFatalGcsException(t: Throwable): Boolean = t match { - case e: HttpResponseException if e.getStatusCode == 403 => true - case e: HttpResponseException if e.getStatusCode == 400 && e.getContent.contains("INVALID_ARGUMENT") => true - case _ => false - } - - private def isTransientGcsException(t: Throwable): Boolean = t match { - // Quota exceeded - case e: HttpResponseException if e.getStatusCode == 429 => true - case _ => false - } - - private val GcsRetryParams = CustomRetryParams( - timeout = Duration.Inf, - maxRetries = Option(3), - backoff = SimpleExponentialBackoff(1 seconds, 3 seconds, 1.5D), - isTransient = isTransientGcsException, - isFatal = isFatalGcsException - ) - private val config = ConfigFactory.load private val googleConf: GoogleConfiguration = GoogleConfiguration(config) private val googleAuthMode = config.as[Option[String]]("engine.filesystems.gcs.auth") map { confMode => @@ -49,7 +24,7 @@ case class EngineFilesystems(actorSystem: ActorSystem) { } private val gcsPathBuilderFactory = googleAuthMode map { mode => - RetryableGcsPathBuilderFactory(mode, customRetryParams = GcsRetryParams) + GcsPathBuilderFactory(mode, googleConf.applicationName) } def pathBuildersForWorkflow(workflowOptions: WorkflowOptions): List[PathBuilder] = { diff --git a/engine/src/main/scala/cromwell/engine/io/IoActor.scala b/engine/src/main/scala/cromwell/engine/io/IoActor.scala new file mode 100644 index 000000000..8c92ec89e --- /dev/null +++ b/engine/src/main/scala/cromwell/engine/io/IoActor.scala @@ -0,0 +1,148 @@ +package cromwell.engine.io + +import java.net.{SocketException, SocketTimeoutException} + +import akka.NotUsed +import akka.actor.{Actor, ActorLogging, ActorRef, Props} +import akka.stream._ +import akka.stream.scaladsl.{Flow, GraphDSL, Merge, Partition, Source} +import com.google.cloud.storage.StorageException +import cromwell.core.actor.StreamActorHelper +import cromwell.core.actor.StreamIntegration.StreamContext +import cromwell.core.io.{IoAck, IoCommand, Throttle} +import cromwell.engine.io.IoActor._ +import cromwell.engine.io.gcs.GcsBatchFlow.BatchFailedException +import cromwell.engine.io.gcs.{GcsBatchCommandContext, ParallelGcsBatchFlow} +import cromwell.engine.io.nio.NioFlow +import cromwell.filesystems.gcs.batch.GcsBatchIoCommand + +/** + * Actor that performs IO operations asynchronously using akka streams + * + * @param queueSize size of the queue + * @param throttle optional throttler to control the throughput of requests. + * Applied to ALL incoming requests + * @param materializer actor materializer to run the stream + */ +final class IoActor(queueSize: Int, throttle: Option[Throttle])(implicit val materializer: ActorMaterializer) extends Actor with ActorLogging with StreamActorHelper[IoCommandContext[_]] { + + implicit private val system = context.system + + private [io] lazy val defaultFlow = new NioFlow(parallelism = 100, context.system.scheduler).flow + private [io] lazy val gcsBatchFlow = new ParallelGcsBatchFlow(parallelism = 10, batchSize = 100, context.system.scheduler).flow + + protected val source = Source.queue[IoCommandContext[_]](queueSize, OverflowStrategy.dropNew) + + protected val flow = GraphDSL.create() { implicit builder => + import GraphDSL.Implicits._ + + val input = builder.add(Flow[IoCommandContext[_]]) + + // Partitions requests between gcs batch, and single nio requests + val batchPartitioner = builder.add(Partition[IoCommandContext[_]](2, { + case gcsBatch: GcsBatchCommandContext[_, _] => 0 + case other => 1 + })) + + // Sub flow for batched gcs requests + val batches = batchPartitioner.out(0) collect { case batch: GcsBatchCommandContext[_, _] => batch } + + // Sub flow for single nio requests + val defaults = batchPartitioner.out(1) collect { case default: DefaultCommandContext[_] => default } + + // Merge results from both flows back together + val merger = builder.add(Merge[IoResult](2)) + + // Flow processing nio requests + val defaultFlowPorts = builder.add(defaultFlow) + + // Flow processing gcs batch requests + val batchFlowPorts = builder.add(gcsBatchFlow) + + input ~> batchPartitioner + defaults.outlet ~> defaultFlowPorts ~> merger + batches.outlet ~> batchFlowPorts ~> merger + + FlowShape[IoCommandContext[_], IoResult](input.in, merger.out) + } + + protected val throttledFlow = throttle map { t => + Flow[IoCommandContext[_]] + .throttle(t.elements, t.per, t.maximumBurst, ThrottleMode.Shaping) + .via(flow) + } getOrElse flow + + override protected lazy val streamSource = source.via(throttledFlow) + + override def actorReceive: Receive = { + /* GCS Batch command with context */ + case (clientContext: Any, gcsBatchCommand: GcsBatchIoCommand[_, _]) => + val replyTo = sender() + val commandContext= GcsBatchCommandContext(gcsBatchCommand, replyTo, Option(clientContext)) + sendToStream(commandContext) + + /* GCS Batch command without context */ + case gcsBatchCommand: GcsBatchIoCommand[_, _] => + val replyTo = sender() + val commandContext= GcsBatchCommandContext(gcsBatchCommand, replyTo) + sendToStream(commandContext) + + /* Default command with context */ + case (clientContext: Any, command: IoCommand[_]) => + val replyTo = sender() + val commandContext= DefaultCommandContext(command, replyTo, Option(clientContext)) + sendToStream(commandContext) + + /* Default command without context */ + case command: IoCommand[_] => + val replyTo = sender() + val commandContext= DefaultCommandContext(command, replyTo) + sendToStream(commandContext) + } +} + +trait IoCommandContext[T] extends StreamContext { + def request: IoCommand[T] + def replyTo: ActorRef + def fail(failure: Throwable): IoResult = (request.fail(failure), this) + def success(value: T): IoResult = (request.success(value), this) +} + +object IoActor { + /** Flow that can consume an IoCommandContext and produce an IoResult */ + type IoFlow = Flow[IoCommandContext[_], IoResult, NotUsed] + + /** Result type of an IoFlow, contains the original command context and the final IoAck response. */ + type IoResult = (IoAck[_], IoCommandContext[_]) + + /** Maximum number of times a command will be attempted: First attempt + 3 retries */ + val MaxAttemptsNumber = 1 + 3 + + case class DefaultCommandContext[T](request: IoCommand[T], replyTo: ActorRef, override val clientContext: Option[Any] = None) extends IoCommandContext[T] + + /** + * ATTENTION: Transient failures are retried *forever* + * Be careful when adding error codes to this method. + * Currently only 429 (= quota exceeded are considered truly transient) + */ + def isTransient(failure: Throwable): Boolean = failure match { + case gcs: StorageException => gcs.getCode == 429 + case _ => false + } + + /** + * Failures that are considered retryable. + * Retrying them should increase the "retry counter" + */ + def isRetryable(failure: Throwable): Boolean = failure match { + case gcs: StorageException => gcs.isRetryable + case _: BatchFailedException => true + case _: SocketException => true + case _: SocketTimeoutException => true + case other => isTransient(other) + } + + def isFatal(failure: Throwable) = !isRetryable(failure) + + def props(queueSize: Int, throttle: Option[Throttle])(implicit materializer: ActorMaterializer) = Props(new IoActor(queueSize, throttle)) +} diff --git a/engine/src/main/scala/cromwell/engine/io/gcs/GcsBatchCommandContext.scala b/engine/src/main/scala/cromwell/engine/io/gcs/GcsBatchCommandContext.scala new file mode 100644 index 000000000..097649fc1 --- /dev/null +++ b/engine/src/main/scala/cromwell/engine/io/gcs/GcsBatchCommandContext.scala @@ -0,0 +1,95 @@ +package cromwell.engine.io.gcs + +import akka.actor.ActorRef +import com.google.api.client.googleapis.batch.BatchRequest +import com.google.api.client.googleapis.batch.json.JsonBatchCallback +import com.google.api.client.googleapis.json.GoogleJsonError +import com.google.api.client.http.HttpHeaders +import com.google.api.client.util.ExponentialBackOff +import com.google.cloud.storage.StorageException +import cromwell.core.retry.{Backoff, SimpleExponentialBackoff} +import cromwell.engine.io.IoActor.IoResult +import cromwell.engine.io.gcs.GcsBatchCommandContext.BatchResponse +import cromwell.engine.io.{IoActor, IoCommandContext} +import cromwell.filesystems.gcs.batch.GcsBatchIoCommand + +import scala.concurrent.Promise +import scala.concurrent.duration._ +import scala.language.postfixOps + +object GcsBatchCommandContext { + def defaultBackoff = SimpleExponentialBackoff( + new ExponentialBackOff.Builder() + .setInitialIntervalMillis(1.second.toMillis.toInt) + .setMultiplier(4) + .setMaxIntervalMillis(30.seconds.toMillis.toInt) + .setRandomizationFactor(0.2D) + .setMaxElapsedTimeMillis(30.minutes.toMillis.toInt) + .build() + ) + type BatchResponse = Either[IoResult, GcsBatchCommandContext[_, _]] +} + +final case class GcsBatchCommandContext[T, U](request: GcsBatchIoCommand[T, U], + replyTo: ActorRef, + override val clientContext: Option[Any] = None, + backoff: Backoff = GcsBatchCommandContext.defaultBackoff, + currentAttempt: Int = 1, + promise: Promise[BatchResponse] = Promise[BatchResponse] + ) extends IoCommandContext[T] { + + /** + * None if no retry should be attempted, Some(timeToWaitBeforeNextAttempt) otherwise + */ + lazy val retryIn = if (currentAttempt >= IoActor.MaxAttemptsNumber) None else Option(backoff.backoffMillis milliseconds) + + /** + * Json batch call back for a batched request + */ + lazy val callback: JsonBatchCallback[U] = new JsonBatchCallback[U]() { + def onSuccess(response: U, httpHeaders: HttpHeaders) = onSuccessCallback(response, httpHeaders) + def onFailure(googleJsonError: GoogleJsonError, httpHeaders: HttpHeaders) = onFailureCallback(googleJsonError, httpHeaders) + } + + /** + * Increment backoff time and attempt count + */ + lazy val next: GcsBatchCommandContext[T, U] = { + this.copy(backoff = backoff.next, currentAttempt = currentAttempt + 1, promise = Promise[BatchResponse]) + } + + /** + * Only increment backoff. To be used for failure thas should be retried infinitely + */ + lazy val nextTransient: GcsBatchCommandContext[T, U] = { + this.copy(backoff = backoff.next, promise = Promise[BatchResponse]) + } + + /** + * Queue the request for batching + */ + def queue(batchRequest: BatchRequest) = request.operation.queue(batchRequest, callback) + + /** + * On success callback. Transform the request response to a stream-ready response that can complete the promise + */ + private def onSuccessCallback(response: U, httpHeaders: HttpHeaders) = { + val promiseResponse: BatchResponse = request.onSuccess(response, httpHeaders) match { + // Left means the command is complete, so just create the corresponding IoSuccess with the value + case Left(responseValue) => Left(success(responseValue)) + // Right means there is a subsequent request to be executed, clone this context with the new request and a new promise + case Right(nextCommand) => Right(this.copy(request = nextCommand, promise = Promise[BatchResponse])) + } + + promise.trySuccess(promiseResponse) + () + } + + /** + * On failure callback. Fail the promise with a StorageException + */ + private def onFailureCallback(googleJsonError: GoogleJsonError, httpHeaders: HttpHeaders) = { + promise.tryFailure(new StorageException(googleJsonError)) + () + } +} diff --git a/engine/src/main/scala/cromwell/engine/io/gcs/GcsBatchFlow.scala b/engine/src/main/scala/cromwell/engine/io/gcs/GcsBatchFlow.scala new file mode 100644 index 000000000..646a79799 --- /dev/null +++ b/engine/src/main/scala/cromwell/engine/io/gcs/GcsBatchFlow.scala @@ -0,0 +1,152 @@ +package cromwell.engine.io.gcs + +import java.io.IOException + +import akka.actor.Scheduler +import akka.stream._ +import akka.stream.scaladsl.{Flow, GraphDSL, MergePreferred, Partition} +import com.google.api.client.googleapis.batch.BatchRequest +import com.google.api.client.http.{HttpRequest, HttpRequestInitializer} +import cromwell.engine.io.IoActor +import cromwell.engine.io.IoActor.IoResult +import cromwell.engine.io.gcs.GcsBatchFlow.BatchFailedException +import cromwell.filesystems.gcs.{GcsPathBuilder, GoogleConfiguration} + +import scala.concurrent.duration._ +import scala.concurrent.{ExecutionContext, Future} +import scala.language.postfixOps +import scala.util.{Failure, Try} + +object GcsBatchFlow { + + /** + * Exception used to fail the request promises when the batch request itself fails. + * Is considered retryable. + */ + case class BatchFailedException(failure: Throwable) extends IOException(failure) +} + +class GcsBatchFlow(batchSize: Int, scheduler: Scheduler)(implicit ec: ExecutionContext) { + + // Does not carry any authentication, assumes all underlying requests are properly authenticated + private val httpRequestInitializer = new HttpRequestInitializer { + override def initialize(request: HttpRequest): Unit = { + request.setConnectTimeout(GoogleConfiguration.DefaultConnectionTimeout.toMillis.toInt) + request.setReadTimeout(GoogleConfiguration.DefaultReadTimeout.toMillis.toInt) + () + } + } + + private val batch: BatchRequest = new BatchRequest(GcsPathBuilder.HttpTransport, httpRequestInitializer) + + val flow = GraphDSL.create() { implicit builder => + import GraphDSL.Implicits._ + + // Source where batch commands are coming from. This is the input port of this flow + val source = builder.add(Flow[GcsBatchCommandContext[_, _]]) + + // Merge commands from source (above), and commands that need to be retried (see retries below) + val sourceMerger = builder.add(MergePreferred[GcsBatchCommandContext[_, _]](1)) + + // Process a batch and spit atomic GcsBatchResponses out for each internal request + val batchProcessor = builder.add( + Flow[GcsBatchCommandContext[_, _]] + // Group commands together in batches so they can be processed as such + .groupedWithin(batchSize, 5 seconds) + // execute the batch and outputs each sub-response individually, as a Future + .mapConcat[Future[GcsBatchResponse[_]]](executeBatch) + // Wait for each Future to complete + .mapAsyncUnordered[GcsBatchResponse[_]](batchSize) { identity } + ) + + // Partitions the responses: Terminal responses exit the flow, others go back to the sourceMerger + val responseHandler = builder.add(responseHandlerFlow) + + // Buffer commands to be retried to avoid backpressuring too rapidly + val nextRequestBuffer = builder.add(Flow[GcsBatchCommandContext[_, _]].buffer(batchSize, OverflowStrategy.backpressure)) + + source ~> sourceMerger ~> batchProcessor ~> responseHandler.in + sourceMerger.preferred <~ nextRequestBuffer <~ responseHandler.out1 + + FlowShape[GcsBatchCommandContext[_, _], IoResult](source.in, responseHandler.out0) + } + + /** + * Fan out shape splitting GcsBatchResponse into 2: + * First port emits terminal result that can exit the GcsBatch flow + * Second port emits request to be re-injected to be executed in a later batch + */ + private lazy val responseHandlerFlow = GraphDSL.create() { implicit builder => + import GraphDSL.Implicits._ + + val source = builder.add(Partition[GcsBatchResponse[_]](2, { + case _: GcsBatchTerminal[_] => 0 + case _ => 1 + })) + + // Terminal responses: output of this flow + val terminals = source.out(0) collect { case terminal: GcsBatchTerminal[_] => terminal.ioResult } + + // Next command context, can be a retry or another request needed by the command + val nextRequest = source.out(1).collect { + case retry: GcsBatchRetry[_] => retry.context + case nextRequest: GcsBatchNextRequest[_] => nextRequest.context + } + + new FanOutShape2[GcsBatchResponse[_], IoResult, GcsBatchCommandContext[_, _]](source.in, terminals.outlet, nextRequest.outlet) + } + + private def executeBatch(contexts: Seq[GcsBatchCommandContext[_, _]]): List[Future[GcsBatchResponse[_]]] = { + def failAllPromisesWith(failure: Throwable) = contexts foreach { context => + context.promise.tryFailure(failure) + () + } + + // Add all requests to the batch + contexts foreach { _.queue(batch) } + + // Try to execute the batch request. + // If it fails with an IO Exception, fail all the underlying promises with a retyrable BatchFailedException + // Otherwise fail with the original exception + Try(batch.execute()) match { + case Failure(failure: IOException) => failAllPromisesWith(BatchFailedException(failure)) + case Failure(failure) => failAllPromisesWith(failure) + case _ => + } + + // Map all promise responses to a GcsBatchResponse to be either sent back as a response or retried in the next batch + contexts.toList map { context => + context.promise.future map { + case Left(response) => GcsBatchTerminal(response) + case Right(nextRequest) => GcsBatchNextRequest(nextRequest) + } recoverWith recoverCommand(context) + } + } + + /** + * Handles a failed future. + * If the failure is retryable, and the command hasn't reached its max attempts: + * schedule the command to be retried in a later batch after waiting for the appropriate amount of time + * Otherwise create a GcsBatchTerminal response with the IoFailure + * In both cases, returns a successful Future to avoid failing the stream or dropping elements + */ + private def recoverCommand(context: GcsBatchCommandContext[_, _]): PartialFunction[Throwable, Future[GcsBatchResponse[_]]] = { + // If the failure is retryable - recover with a GcsBatchRetry so it can be retried in the next batch + case failure if IoActor.isRetryable(failure) => + context.retryIn match { + case Some(waitTime) if IoActor.isTransient(failure) => + akka.pattern.after(waitTime, scheduler)(Future.successful(GcsBatchRetry(context.nextTransient, failure))) + case Some(waitTime) => + akka.pattern.after(waitTime, scheduler)(Future.successful(GcsBatchRetry(context.next, failure))) + case None => fail(context, failure) + } + + // Otherwise just fail the command + case failure => fail(context, failure) + } + + /** + * Fail a command context with a failure. + */ + private def fail(context: GcsBatchCommandContext[_, _], failure: Throwable) = Future.successful(GcsBatchTerminal(context.fail(failure))) +} diff --git a/engine/src/main/scala/cromwell/engine/io/gcs/GcsResponse.scala b/engine/src/main/scala/cromwell/engine/io/gcs/GcsResponse.scala new file mode 100644 index 000000000..ca0a7e83e --- /dev/null +++ b/engine/src/main/scala/cromwell/engine/io/gcs/GcsResponse.scala @@ -0,0 +1,14 @@ +package cromwell.engine.io.gcs + +import cromwell.engine.io.IoActor._ + +import scala.language.existentials + +/** + * ADT used only inside the batch stream + * @tparam T final type of the result of the Command + */ +private [gcs] sealed trait GcsBatchResponse[T] +private [gcs] case class GcsBatchTerminal[T](ioResult: IoResult) extends GcsBatchResponse[T] +private [gcs] case class GcsBatchRetry[T](context: GcsBatchCommandContext[T, _], failure: Throwable) extends GcsBatchResponse[T] +private [gcs] case class GcsBatchNextRequest[T](context: GcsBatchCommandContext[T, _]) extends GcsBatchResponse[T] diff --git a/engine/src/main/scala/cromwell/engine/io/gcs/ParallelGcsBatchFlow.scala b/engine/src/main/scala/cromwell/engine/io/gcs/ParallelGcsBatchFlow.scala new file mode 100644 index 000000000..633e270d8 --- /dev/null +++ b/engine/src/main/scala/cromwell/engine/io/gcs/ParallelGcsBatchFlow.scala @@ -0,0 +1,29 @@ +package cromwell.engine.io.gcs + +import akka.actor.Scheduler +import akka.stream.FlowShape +import akka.stream.scaladsl.{Balance, GraphDSL, Merge} +import cromwell.engine.io.IoActor.IoResult + +import scala.concurrent.ExecutionContext + +/** + * Balancer that distributes requests to multiple batch flows in parallel + */ +class ParallelGcsBatchFlow(parallelism: Int, batchSize: Int, scheduler: Scheduler)(implicit ec: ExecutionContext) { + + val flow = GraphDSL.create() { implicit builder => + import GraphDSL.Implicits._ + val balancer = builder.add(Balance[GcsBatchCommandContext[_, _]](parallelism, waitForAllDownstreams = false)) + val merge = builder.add(Merge[IoResult](parallelism)) + + for (_ <- 1 to parallelism) { + val workerFlow = new GcsBatchFlow(batchSize, scheduler).flow + // for each worker, add an edge from the balancer to the worker, then wire + // it to the merge element + balancer ~> workerFlow.async ~> merge + } + + FlowShape(balancer.in, merge.out) + } +} diff --git a/engine/src/main/scala/cromwell/engine/io/nio/NioFlow.scala b/engine/src/main/scala/cromwell/engine/io/nio/NioFlow.scala new file mode 100644 index 000000000..e90b3a7f4 --- /dev/null +++ b/engine/src/main/scala/cromwell/engine/io/nio/NioFlow.scala @@ -0,0 +1,80 @@ +package cromwell.engine.io.nio + +import akka.actor.{ActorSystem, Scheduler} +import akka.stream.scaladsl.Flow +import cromwell.core.io._ +import cromwell.core.retry.Retry +import cromwell.engine.io.IoActor +import cromwell.engine.io.IoActor.{DefaultCommandContext, IoResult} +import cromwell.filesystems.gcs.GcsPath +import cromwell.util.TryWithResource._ + +import scala.concurrent.{ExecutionContext, Future} +import scala.io.Codec + +/** + * Flow that executes IO operations by calling java.nio.Path methods + */ +class NioFlow(parallelism: Int, scheduler: Scheduler)(implicit ec: ExecutionContext, actorSystem: ActorSystem) { + private val processCommand: DefaultCommandContext[_] => Future[IoResult] = commandContext => { + val operationResult = Retry.withRetry( + () => handleSingleCommand(commandContext.request), + maxRetries = Option(3), + backoff = IoCommand.defaultBackoff, + isTransient = IoActor.isTransient, + isFatal = IoActor.isFatal + ) + + operationResult map { (_, commandContext) } recoverWith { + case failure => Future.successful(commandContext.fail(failure)) + } + } + + private def handleSingleCommand(ioSingleCommand: IoCommand[_]) = { + ioSingleCommand match { + case copyCommand: IoCopyCommand => copy(copyCommand) map copyCommand.success + case writeCommand: IoWriteCommand => write(writeCommand) map writeCommand.success + case deleteCommand: IoDeleteCommand => delete(deleteCommand) map deleteCommand.success + case sizeCommand: IoSizeCommand => size(sizeCommand) map sizeCommand.success + case readAsStringCommand: IoContentAsStringCommand => readAsString(readAsStringCommand) map readAsStringCommand.success + case hashCommand: IoHashCommand => hash(hashCommand) map hashCommand.success + case _ => Future.failed(new NotImplementedError("Method not implemented")) + } + } + + val flow = Flow[DefaultCommandContext[_]].mapAsyncUnordered[IoResult](parallelism)(processCommand) + + private def copy(copy: IoCopyCommand) = Future { + copy.source.copyTo(copy.destination, copy.overwrite) + () + } + + private def write(write: IoWriteCommand) = Future { + write.file.write(write.content)(write.openOptions, Codec.UTF8) + () + } + + private def delete(delete: IoDeleteCommand) = Future { + delete.file.delete(delete.swallowIOExceptions) + () + } + + private def readAsString(read: IoContentAsStringCommand) = Future { + read.file.contentAsString + } + + private def size(size: IoSizeCommand) = Future { + size.file.size + } + + private def hash(hash: IoHashCommand) = { + hash.file match { + case gcsPath: GcsPath => Future { gcsPath.cloudStorage.get(gcsPath.blob).getCrc32c } + case path => Future.fromTry( + tryWithResource(() => path.newInputStream) { inputStream => + org.apache.commons.codec.digest.DigestUtils.md5Hex(inputStream) + } + ) + } + } +} diff --git a/engine/src/main/scala/cromwell/engine/workflow/WorkflowActor.scala b/engine/src/main/scala/cromwell/engine/workflow/WorkflowActor.scala index ad97eb0f5..035213b0a 100644 --- a/engine/src/main/scala/cromwell/engine/workflow/WorkflowActor.scala +++ b/engine/src/main/scala/cromwell/engine/workflow/WorkflowActor.scala @@ -16,8 +16,8 @@ import cromwell.engine.workflow.lifecycle.MaterializeWorkflowDescriptorActor.{Ma import cromwell.engine.workflow.lifecycle.WorkflowFinalizationActor.{StartFinalizationCommand, WorkflowFinalizationFailedResponse, WorkflowFinalizationSucceededResponse} import cromwell.engine.workflow.lifecycle.WorkflowInitializationActor.{StartInitializationCommand, WorkflowInitializationFailedResponse, WorkflowInitializationSucceededResponse} import cromwell.engine.workflow.lifecycle._ -import cromwell.engine.workflow.lifecycle.execution.{WorkflowExecutionActor, WorkflowMetadataHelper} import cromwell.engine.workflow.lifecycle.execution.WorkflowExecutionActor._ +import cromwell.engine.workflow.lifecycle.execution.{WorkflowExecutionActor, WorkflowMetadataHelper} import cromwell.subworkflowstore.SubWorkflowStoreActor.WorkflowComplete import cromwell.webservice.EngineStatsActor import wdl4s.{LocallyQualifiedName => _} @@ -136,6 +136,7 @@ object WorkflowActor { startMode: StartMode, wdlSource: WorkflowSourceFilesCollection, conf: Config, + ioActor: ActorRef, serviceRegistryActor: ActorRef, workflowLogCopyRouter: ActorRef, jobStoreActor: ActorRef, @@ -145,7 +146,7 @@ object WorkflowActor { jobTokenDispenserActor: ActorRef, backendSingletonCollection: BackendSingletonCollection, serverMode: Boolean): Props = { - Props(new WorkflowActor(workflowId, startMode, wdlSource, conf, serviceRegistryActor, workflowLogCopyRouter, + Props(new WorkflowActor(workflowId, startMode, wdlSource, conf, ioActor, serviceRegistryActor, workflowLogCopyRouter, jobStoreActor, subWorkflowStoreActor, callCacheReadActor, dockerHashActor, jobTokenDispenserActor, backendSingletonCollection, serverMode)).withDispatcher(EngineDispatcher) } } @@ -157,6 +158,7 @@ class WorkflowActor(val workflowId: WorkflowId, startMode: StartMode, workflowSources: WorkflowSourceFilesCollection, conf: Config, + ioActor: ActorRef, override val serviceRegistryActor: ActorRef, workflowLogCopyRouter: ActorRef, jobStoreActor: ActorRef, @@ -189,7 +191,7 @@ class WorkflowActor(val workflowId: WorkflowId, when(MaterializingWorkflowDescriptorState) { case Event(MaterializeWorkflowDescriptorSuccessResponse(workflowDescriptor), data) => - val initializerActor = context.actorOf(WorkflowInitializationActor.props(workflowId, workflowDescriptor, serviceRegistryActor), + val initializerActor = context.actorOf(WorkflowInitializationActor.props(workflowId, workflowDescriptor, ioActor, serviceRegistryActor), name = s"WorkflowInitializationActor-$workflowId") initializerActor ! StartInitializationCommand goto(InitializingWorkflowState) using data.copy(currentLifecycleStateActor = Option(initializerActor), workflowDescriptor = Option(workflowDescriptor)) @@ -209,6 +211,7 @@ class WorkflowActor(val workflowId: WorkflowId, val executionActor = context.actorOf(WorkflowExecutionActor.props( workflowDescriptor, + ioActor, serviceRegistryActor, jobStoreActor, subWorkflowStoreActor, @@ -335,7 +338,7 @@ class WorkflowActor(val workflowId: WorkflowId, } private[workflow] def makeFinalizationActor(workflowDescriptor: EngineWorkflowDescriptor, jobExecutionMap: JobExecutionMap, workflowOutputs: CallOutputs) = { - context.actorOf(WorkflowFinalizationActor.props(workflowId, workflowDescriptor, jobExecutionMap, workflowOutputs, stateData.initializationData), name = s"WorkflowFinalizationActor") + context.actorOf(WorkflowFinalizationActor.props(workflowId, workflowDescriptor, ioActor, jobExecutionMap, workflowOutputs, stateData.initializationData), name = s"WorkflowFinalizationActor") } /** * Run finalization actor and transition to FinalizingWorkflowState. diff --git a/engine/src/main/scala/cromwell/engine/workflow/WorkflowManagerActor.scala b/engine/src/main/scala/cromwell/engine/workflow/WorkflowManagerActor.scala index fb87406a1..715e089c9 100644 --- a/engine/src/main/scala/cromwell/engine/workflow/WorkflowManagerActor.scala +++ b/engine/src/main/scala/cromwell/engine/workflow/WorkflowManagerActor.scala @@ -41,6 +41,7 @@ object WorkflowManagerActor { case object EngineStatsCommand extends WorkflowManagerActorCommand def props(workflowStore: ActorRef, + ioActor: ActorRef, serviceRegistryActor: ActorRef, workflowLogCopyRouter: ActorRef, jobStoreActor: ActorRef, @@ -51,7 +52,7 @@ object WorkflowManagerActor { backendSingletonCollection: BackendSingletonCollection, abortJobsOnTerminate: Boolean, serverMode: Boolean): Props = { - val params = WorkflowManagerActorParams(ConfigFactory.load, workflowStore, serviceRegistryActor, + val params = WorkflowManagerActorParams(ConfigFactory.load, workflowStore, ioActor, serviceRegistryActor, workflowLogCopyRouter, jobStoreActor, subWorkflowStoreActor, callCacheReadActor, dockerHashActor, jobTokenDispenserActor, backendSingletonCollection, abortJobsOnTerminate, serverMode) Props(new WorkflowManagerActor(params)).withDispatcher(EngineDispatcher) @@ -87,6 +88,7 @@ object WorkflowManagerActor { case class WorkflowManagerActorParams(config: Config, workflowStore: ActorRef, + ioActor: ActorRef, serviceRegistryActor: ActorRef, workflowLogCopyRouter: ActorRef, jobStoreActor: ActorRef, @@ -284,7 +286,7 @@ class WorkflowManagerActor(params: WorkflowManagerActorParams) StartNewWorkflow } - val wfProps = WorkflowActor.props(workflowId, startMode, workflow.sources, config, params.serviceRegistryActor, + val wfProps = WorkflowActor.props(workflowId, startMode, workflow.sources, config, params.ioActor, params.serviceRegistryActor, params.workflowLogCopyRouter, params.jobStoreActor, params.subWorkflowStoreActor, params.callCacheReadActor, params.dockerHashActor, params.jobTokenDispenserActor, params.backendSingletonCollection, params.serverMode) diff --git a/engine/src/main/scala/cromwell/engine/workflow/lifecycle/CopyWorkflowLogsActor.scala b/engine/src/main/scala/cromwell/engine/workflow/lifecycle/CopyWorkflowLogsActor.scala index 6963280ff..f735bb04f 100644 --- a/engine/src/main/scala/cromwell/engine/workflow/lifecycle/CopyWorkflowLogsActor.scala +++ b/engine/src/main/scala/cromwell/engine/workflow/lifecycle/CopyWorkflowLogsActor.scala @@ -6,8 +6,11 @@ import akka.actor.SupervisorStrategy.Restart import akka.actor.{Actor, ActorLogging, ActorRef, OneForOneStrategy, Props} import cromwell.core.Dispatcher.IoDispatcher import cromwell.core._ +import cromwell.core.io._ import cromwell.core.logging.WorkflowLogger import cromwell.core.path.Path +import cromwell.engine.workflow.lifecycle.execution.WorkflowMetadataHelper +import cromwell.filesystems.gcs.batch.GcsBatchCommandBuilder import cromwell.services.metadata.MetadataService.PutMetadataAction import cromwell.services.metadata.{MetadataEvent, MetadataKey, MetadataValue} @@ -19,25 +22,29 @@ object CopyWorkflowLogsActor { case _: IOException => Restart } - def props(serviceRegistryActor: ActorRef) = Props(new CopyWorkflowLogsActor(serviceRegistryActor)).withDispatcher(IoDispatcher) + def props(serviceRegistryActor: ActorRef, ioActor: ActorRef) = Props(new CopyWorkflowLogsActor(serviceRegistryActor, ioActor)).withDispatcher(IoDispatcher) } // This could potentially be turned into a more generic "Copy/Move something from A to B" // Which could be used for other copying work (outputs, call logs..) -class CopyWorkflowLogsActor(serviceRegistryActor: ActorRef) - extends Actor - with ActorLogging { +class CopyWorkflowLogsActor(override val serviceRegistryActor: ActorRef, override val ioActor: ActorRef) extends Actor with ActorLogging with GcsBatchCommandBuilder with IoClientHelper with WorkflowMetadataHelper { - def copyAndClean(src: Path, dest: Path) = { + def copyLog(src: Path, dest: Path, workflowId: WorkflowId) = { dest.parent.createPermissionedDirectories() + // Send the workflowId as context along with the copy so we can update metadata when the response comes back + sendIoCommandWithContext(copyCommand(src, dest, overwrite = true), workflowId) + } - src.copyTo(dest, overwrite = true) - if (WorkflowLogger.isTemporary) { - src.delete() - } + def deleteLog(src: Path) = if (WorkflowLogger.isTemporary) { + sendIoCommand(deleteCommand(src)) + } + + def updateLogsPathInMetadata(workflowId: WorkflowId, path: Path) = { + val metadataEventMsg = MetadataEvent(MetadataKey(workflowId, None, WorkflowMetadataKeys.WorkflowLog), MetadataValue(path.pathAsString)) + serviceRegistryActor ! PutMetadataAction(metadataEventMsg) } - override def receive = { + def copyLogsReceive: Receive = { case CopyWorkflowLogsActor.Copy(workflowId, destinationDir) => val workflowLogger = new WorkflowLogger(self.path.name, workflowId, Option(log)) @@ -46,15 +53,39 @@ class CopyWorkflowLogsActor(serviceRegistryActor: ActorRef) val destPath = destinationDir.resolve(src.name) workflowLogger.info(s"Copying workflow logs from $src to $destPath") - copyAndClean(src, destPath) - - val metadataEventMsg = MetadataEvent(MetadataKey(workflowId, None, WorkflowMetadataKeys.WorkflowLog), MetadataValue(destPath)) - serviceRegistryActor ! PutMetadataAction(metadataEventMsg) + copyLog(src, destPath, workflowId) } } + + case (workflowId: WorkflowId, IoSuccess(copy: IoCopyCommand, _)) => + updateLogsPathInMetadata(workflowId, copy.destination) + deleteLog(copy.source) + + case (workflowId: WorkflowId, IoFailure(copy: IoCopyCommand, failure)) => + pushWorkflowFailures(workflowId, List(new IOException("Could not copy workflow logs", failure))) + log.error(failure, s"Failed to copy workflow logs from ${copy.source.pathAsString} to ${copy.destination.pathAsString}") + deleteLog(copy.source) + + case IoSuccess(_: IoDeleteCommand, _) => // Good ! + + case IoFailure(delete: IoDeleteCommand, failure) => + log.error(failure, s"Failed to delete workflow logs from ${delete.file.pathAsString}") + + case other => log.warning(s"CopyWorkflowLogsActor received an unexpected message: $other") } + + override def receive = ioReceive orElse copyLogsReceive override def preRestart(t: Throwable, message: Option[Any]) = { message foreach self.forward } + + override protected def onTimeout(message: Any, to: ActorRef): Unit = message match { + case copy: IoCopyCommand => + log.error(s"Failed to copy workflow logs from ${copy.source.pathAsString} to ${copy.destination.pathAsString}: Timeout") + deleteLog(copy.source) + case delete: IoDeleteCommand => + log.error(s"Failed to delete workflow logs from ${delete.file.pathAsString}: Timeout") + case _ => + } } diff --git a/engine/src/main/scala/cromwell/engine/workflow/lifecycle/CopyWorkflowOutputsActor.scala b/engine/src/main/scala/cromwell/engine/workflow/lifecycle/CopyWorkflowOutputsActor.scala index 55229ee1d..9575fb472 100644 --- a/engine/src/main/scala/cromwell/engine/workflow/lifecycle/CopyWorkflowOutputsActor.scala +++ b/engine/src/main/scala/cromwell/engine/workflow/lifecycle/CopyWorkflowOutputsActor.scala @@ -1,41 +1,48 @@ package cromwell.engine.workflow.lifecycle -import akka.actor.Props +import akka.actor.{ActorRef, Props} import cromwell.backend.BackendWorkflowFinalizationActor.{FinalizationResponse, FinalizationSuccess} import cromwell.backend.{AllBackendInitializationData, BackendConfigurationDescriptor, BackendInitializationData, BackendLifecycleActorFactory} import cromwell.core.Dispatcher.IoDispatcher import cromwell.core.WorkflowOptions._ import cromwell.core._ +import cromwell.core.io.AsyncIo import cromwell.core.path.{Path, PathCopier, PathFactory} import cromwell.engine.EngineWorkflowDescriptor import cromwell.engine.backend.{BackendConfiguration, CromwellBackends} +import cromwell.filesystems.gcs.batch.GcsBatchCommandBuilder import wdl4s.values.{WdlArray, WdlMap, WdlSingleFile, WdlValue} import scala.concurrent.{ExecutionContext, Future} object CopyWorkflowOutputsActor { - def props(workflowId: WorkflowId, workflowDescriptor: EngineWorkflowDescriptor, workflowOutputs: CallOutputs, + def props(workflowId: WorkflowId, ioActor: ActorRef, workflowDescriptor: EngineWorkflowDescriptor, workflowOutputs: CallOutputs, initializationData: AllBackendInitializationData) = Props( - new CopyWorkflowOutputsActor(workflowId, workflowDescriptor, workflowOutputs, initializationData) + new CopyWorkflowOutputsActor(workflowId, ioActor, workflowDescriptor, workflowOutputs, initializationData) ).withDispatcher(IoDispatcher) } -class CopyWorkflowOutputsActor(workflowId: WorkflowId, val workflowDescriptor: EngineWorkflowDescriptor, workflowOutputs: CallOutputs, +class CopyWorkflowOutputsActor(workflowId: WorkflowId, override val ioActor: ActorRef, val workflowDescriptor: EngineWorkflowDescriptor, workflowOutputs: CallOutputs, initializationData: AllBackendInitializationData) - extends EngineWorkflowFinalizationActor with PathFactory { + extends EngineWorkflowFinalizationActor with PathFactory with AsyncIo with GcsBatchCommandBuilder { + implicit val ec = context.dispatcher override val pathBuilders = workflowDescriptor.pathBuilders - private def copyWorkflowOutputs(workflowOutputsFilePath: String): Unit = { + override def receive = ioReceive orElse super.receive + + private def copyWorkflowOutputs(workflowOutputsFilePath: String): Future[Seq[Unit]] = { val workflowOutputsPath = buildPath(workflowOutputsFilePath) - val outputFilePaths = getOutputFilePaths + val outputFilePaths = getOutputFilePaths(workflowOutputsPath) - outputFilePaths foreach { - case (workflowRootPath, srcPath) => - // WARNING: PathCopier does not do atomic copies. The files may be partially written. - PathCopier.copy(workflowRootPath, srcPath, workflowOutputsPath) + val copies = outputFilePaths map { + case (srcPath, dstPath) => + dstPath.createDirectories() + copyAsync(srcPath, dstPath, overwrite = true) } + + Future.sequence(copies) } private def findFiles(values: Seq[WdlValue]): Seq[WdlSingleFile] = { @@ -47,16 +54,23 @@ class CopyWorkflowOutputsActor(workflowId: WorkflowId, val workflowDescriptor: E } } - private def getOutputFilePaths: Seq[(Path, Path)] = { - for { + private def getOutputFilePaths(workflowOutputsPath: Path): List[(Path, Path)] = { + val rootAndFiles = for { // NOTE: Without .toSeq, outputs in arrays only yield the last output backend <- workflowDescriptor.backendAssignments.values.toSeq config <- BackendConfiguration.backendConfigurationDescriptor(backend).toOption.toSeq rootPath <- getBackendRootPath(backend, config).toSeq - outputFiles = findFiles(workflowOutputs.values.map(_.wdlValue).toSeq) - wdlFile <- outputFiles - wdlPath = PathFactory.buildPath(wdlFile.value, pathBuilders) - } yield (rootPath, wdlPath) + outputFiles = findFiles(workflowOutputs.values.map(_.wdlValue).toSeq).map(_.value) + } yield (rootPath, outputFiles) + + val outputFileDestinations = rootAndFiles flatMap { + case (workflowRoot, outputs) => + outputs map { output => + val outputPath = PathFactory.buildPath(output, pathBuilders) + outputPath -> PathCopier.getDestinationFilePath(workflowRoot, outputPath, workflowOutputsPath) + } + } + outputFileDestinations.distinct.toList } private def getBackendRootPath(backend: String, config: BackendConfigurationDescriptor): Option[Path] = { @@ -72,8 +86,10 @@ class CopyWorkflowOutputsActor(workflowId: WorkflowId, val workflowDescriptor: E backendFactory.getExecutionRootPath(workflowDescriptor.backendDescriptor, config.backendConfig, initializationData) } - final override def afterAll()(implicit ec: ExecutionContext): Future[FinalizationResponse] = Future { - workflowDescriptor.getWorkflowOption(FinalWorkflowOutputsDir) foreach copyWorkflowOutputs - FinalizationSuccess + final override def afterAll()(implicit ec: ExecutionContext): Future[FinalizationResponse] = { + workflowDescriptor.getWorkflowOption(FinalWorkflowOutputsDir) match { + case Some(outputs) => copyWorkflowOutputs(outputs) map { _ => FinalizationSuccess } + case None => Future.successful(FinalizationSuccess) + } } } diff --git a/engine/src/main/scala/cromwell/engine/workflow/lifecycle/WorkflowFinalizationActor.scala b/engine/src/main/scala/cromwell/engine/workflow/lifecycle/WorkflowFinalizationActor.scala index 5c4b777a5..bc62fe8ba 100644 --- a/engine/src/main/scala/cromwell/engine/workflow/lifecycle/WorkflowFinalizationActor.scala +++ b/engine/src/main/scala/cromwell/engine/workflow/lifecycle/WorkflowFinalizationActor.scala @@ -1,6 +1,6 @@ package cromwell.engine.workflow.lifecycle -import akka.actor.{FSM, Props} +import akka.actor.{ActorRef, FSM, Props} import cromwell.backend.BackendWorkflowFinalizationActor.{FinalizationFailed, FinalizationSuccess, Finalize} import cromwell.backend._ import cromwell.core.Dispatcher.EngineDispatcher @@ -38,13 +38,13 @@ object WorkflowFinalizationActor { case object WorkflowFinalizationSucceededResponse extends WorkflowLifecycleSuccessResponse final case class WorkflowFinalizationFailedResponse(reasons: Seq[Throwable]) extends WorkflowLifecycleFailureResponse - def props(workflowId: WorkflowId, workflowDescriptor: EngineWorkflowDescriptor, jobExecutionMap: JobExecutionMap, + def props(workflowId: WorkflowId, workflowDescriptor: EngineWorkflowDescriptor, ioActor: ActorRef, jobExecutionMap: JobExecutionMap, workflowOutputs: CallOutputs, initializationData: AllBackendInitializationData): Props = { - Props(new WorkflowFinalizationActor(workflowId, workflowDescriptor, jobExecutionMap, workflowOutputs, initializationData)).withDispatcher(EngineDispatcher) + Props(new WorkflowFinalizationActor(workflowId, workflowDescriptor, ioActor, jobExecutionMap, workflowOutputs, initializationData)).withDispatcher(EngineDispatcher) } } -case class WorkflowFinalizationActor(workflowIdForLogging: WorkflowId, workflowDescriptor: EngineWorkflowDescriptor, +case class WorkflowFinalizationActor(workflowIdForLogging: WorkflowId, workflowDescriptor: EngineWorkflowDescriptor, ioActor: ActorRef, jobExecutionMap: JobExecutionMap, workflowOutputs: CallOutputs, initializationData: AllBackendInitializationData) extends WorkflowLifecycleActor[WorkflowFinalizationActorState] { @@ -65,14 +65,14 @@ case class WorkflowFinalizationActor(workflowIdForLogging: WorkflowId, workflowD for { (backend, calls) <- workflowDescriptor.backendAssignments.groupBy(_._2).mapValues(_.keySet) props <- CromwellBackends.backendLifecycleFactoryActorByName(backend).map( - _.workflowFinalizationActorProps(workflowDescriptor.backendDescriptor, calls, filterJobExecutionsForBackend(calls), workflowOutputs, initializationData.get(backend)) + _.workflowFinalizationActorProps(workflowDescriptor.backendDescriptor, ioActor, calls, filterJobExecutionsForBackend(calls), workflowOutputs, initializationData.get(backend)) ).get actor = context.actorOf(props, backend) } yield actor } val engineFinalizationActor = Try { - context.actorOf(CopyWorkflowOutputsActor.props(workflowIdForLogging, workflowDescriptor, workflowOutputs, initializationData), + context.actorOf(CopyWorkflowOutputsActor.props(workflowIdForLogging, ioActor, workflowDescriptor, workflowOutputs, initializationData), "CopyWorkflowOutputsActor") } diff --git a/engine/src/main/scala/cromwell/engine/workflow/lifecycle/WorkflowInitializationActor.scala b/engine/src/main/scala/cromwell/engine/workflow/lifecycle/WorkflowInitializationActor.scala index 14e8a31a4..c764af75e 100644 --- a/engine/src/main/scala/cromwell/engine/workflow/lifecycle/WorkflowInitializationActor.scala +++ b/engine/src/main/scala/cromwell/engine/workflow/lifecycle/WorkflowInitializationActor.scala @@ -43,8 +43,9 @@ object WorkflowInitializationActor { def props(workflowId: WorkflowId, workflowDescriptor: EngineWorkflowDescriptor, + ioActor: ActorRef, serviceRegistryActor: ActorRef): Props = { - Props(new WorkflowInitializationActor(workflowId, workflowDescriptor, serviceRegistryActor)).withDispatcher(EngineDispatcher) + Props(new WorkflowInitializationActor(workflowId, workflowDescriptor, ioActor, serviceRegistryActor)).withDispatcher(EngineDispatcher) } case class BackendActorAndBackend(actor: ActorRef, backend: String) @@ -52,6 +53,7 @@ object WorkflowInitializationActor { case class WorkflowInitializationActor(workflowIdForLogging: WorkflowId, workflowDescriptor: EngineWorkflowDescriptor, + ioActor: ActorRef, serviceRegistryActor: ActorRef) extends AbortableWorkflowLifecycleActor[WorkflowInitializationActorState] { @@ -80,7 +82,7 @@ case class WorkflowInitializationActor(workflowIdForLogging: WorkflowId, for { (backend, calls) <- workflowDescriptor.backendAssignments.groupBy(_._2).mapValues(_.keySet) props <- CromwellBackends.backendLifecycleFactoryActorByName(backend).map(factory => - factory.workflowInitializationActorProps(workflowDescriptor.backendDescriptor, calls, serviceRegistryActor) + factory.workflowInitializationActorProps(workflowDescriptor.backendDescriptor, ioActor, calls, serviceRegistryActor) ).get actor = context.actorOf(props, backend) } yield BackendActorAndBackend(actor, backend) diff --git a/engine/src/main/scala/cromwell/engine/workflow/lifecycle/execution/EngineJobExecutionActor.scala b/engine/src/main/scala/cromwell/engine/workflow/lifecycle/execution/EngineJobExecutionActor.scala index a9a111df8..4dbecf268 100644 --- a/engine/src/main/scala/cromwell/engine/workflow/lifecycle/execution/EngineJobExecutionActor.scala +++ b/engine/src/main/scala/cromwell/engine/workflow/lifecycle/execution/EngineJobExecutionActor.scala @@ -1,7 +1,6 @@ package cromwell.engine.workflow.lifecycle.execution -import akka.actor.{ActorRef, ActorRefFactory, LoggingFSM, Props} -import akka.routing.RoundRobinPool +import akka.actor.{ActorRef, LoggingFSM, Props} import cats.data.NonEmptyList import cromwell.backend.BackendCacheHitCopyingActor.CopyOutputsCommand import cromwell.backend.BackendJobExecutionActor._ @@ -36,6 +35,7 @@ class EngineJobExecutionActor(replyTo: ActorRef, initializationData: Option[BackendInitializationData], restarting: Boolean, val serviceRegistryActor: ActorRef, + ioActor: ActorRef, jobStoreActor: ActorRef, callCacheReadActor: ActorRef, dockerHashActor: ActorRef, @@ -52,7 +52,12 @@ class EngineJobExecutionActor(replyTo: ActorRef, // There's no need to check for a cache hit again if we got preempted, or if there's no result copying actor defined // NB: this can also change (e.g. if we have a HashError we just force this to CallCachingOff) - private var effectiveCallCachingMode = if (factory.cacheHitCopyingActorProps.isEmpty || jobDescriptorKey.attempt > 1) callCachingMode.withoutRead else callCachingMode + private var effectiveCallCachingMode = { + if (factory.fileHashingActorProps.isEmpty) CallCachingOff + else if (factory.cacheHitCopyingActorProps.isEmpty || jobDescriptorKey.attempt > 1) { + callCachingMode.withoutRead + } else callCachingMode + } // For tests: private[execution] def checkEffectiveCallCachingMode = effectiveCallCachingMode @@ -270,8 +275,10 @@ class EngineJobExecutionActor(replyTo: ActorRef, jobDescriptor.callCachingEligibility match { // If the job is eligible, initialize job hashing and go to CheckingCallCache state case CallCachingEligible => - initializeJobHashing(jobDescriptor, activity) - goto(CheckingCallCache) using updatedData + initializeJobHashing(jobDescriptor, activity) match { + case Success(_) => goto(CheckingCallCache) using updatedData + case Failure(failure) => respondAndStop(JobFailedNonRetryableResponse(jobDescriptorKey.jobKey, failure, None)) + } case ineligible: CallCachingIneligible => // If the job is ineligible, turn call caching off writeToMetadata(Map(callCachingReadResultMetadataKey -> s"Cache Miss: ${ineligible.message}")) @@ -283,7 +290,10 @@ class EngineJobExecutionActor(replyTo: ActorRef, private def handleReadFromCacheOff(jobDescriptor: BackendJobDescriptor, activity: CallCachingActivity, updatedData: ResponsePendingData) = { jobDescriptor.callCachingEligibility match { // If the job is eligible, initialize job hashing so it can be written to the cache - case CallCachingEligible => initializeJobHashing(jobDescriptor, activity) + case CallCachingEligible => initializeJobHashing(jobDescriptor, activity) match { + case Failure(failure) => log.error(failure, "Failed to initialize job hashing. The job will not be written to the cache") + case _ => + } // Don't even initialize hashing to write to the cache if the job is ineligible case ineligible: CallCachingIneligible => disableCallCaching() } @@ -338,23 +348,31 @@ class EngineJobExecutionActor(replyTo: ActorRef, def createJobPreparationActor(jobPrepProps: Props, name: String): ActorRef = context.actorOf(jobPrepProps, name) def prepareJob() = { val jobPreparationActorName = s"BackendPreparationActor_for_$jobTag" - val jobPrepProps = JobPreparationActor.props(executionData, jobDescriptorKey, factory, dockerHashActor, initializationData, serviceRegistryActor, backendSingletonActor) + val jobPrepProps = JobPreparationActor.props(executionData, jobDescriptorKey, factory, dockerHashActor, initializationData, serviceRegistryActor, ioActor, backendSingletonActor) val jobPreparationActor = createJobPreparationActor(jobPrepProps, jobPreparationActorName) jobPreparationActor ! CallPreparation.Start goto(PreparingJob) } - def initializeJobHashing(jobDescriptor: BackendJobDescriptor, activity: CallCachingActivity): Unit = { - val props = EngineJobHashingActor.props( - self, - jobDescriptor, - initializationData, - // Use context.system instead of context as the factory. Otherwise when we die, so will the child actors. - factoryFileHashingRouter(backendName, factory, context.system), - callCacheReadActor, - factory.runtimeAttributeDefinitions(initializationData), backendName, activity) - context.actorOf(props, s"ejha_for_$jobDescriptor") - () + def initializeJobHashing(jobDescriptor: BackendJobDescriptor, activity: CallCachingActivity): Try[Unit] = { + val maybeFileHashingActorProps = factory.fileHashingActorProps map { + _.apply(jobDescriptor, initializationData, serviceRegistryActor, ioActor) + } + + maybeFileHashingActorProps match { + case Some(fileHashingActorProps) => + val props = EngineJobHashingActor.props( + self, + jobDescriptor, + initializationData, + fileHashingActorProps, + callCacheReadActor, + factory.runtimeAttributeDefinitions(initializationData), backendName, activity) + context.actorOf(props, s"ejha_for_$jobDescriptor") + + Success(()) + case None => Failure(new IllegalStateException("Tried to initialize job hashing without a file hashing actor !")) + } } def makeFetchCachedResultsActor(callCachingEntryId: CallCachingEntryId, taskOutputs: Seq[TaskOutput]): Unit = { @@ -370,7 +388,7 @@ class EngineJobExecutionActor(replyTo: ActorRef, private def makeBackendCopyCacheHit(wdlValueSimpletons: Seq[WdlValueSimpleton], jobDetritusFiles: Map[String,String], returnCode: Option[Int], data: ResponsePendingData, cacheResultId: CallCachingEntryId) = { factory.cacheHitCopyingActorProps match { case Some(propsMaker) => - val backendCacheHitCopyingActorProps = propsMaker(data.jobDescriptor, initializationData, serviceRegistryActor) + val backendCacheHitCopyingActorProps = propsMaker(data.jobDescriptor, initializationData, serviceRegistryActor, ioActor) val cacheHitCopyActor = context.actorOf(backendCacheHitCopyingActorProps, buildCacheHitCopyingActorName(data.jobDescriptor, cacheResultId)) cacheHitCopyActor ! CopyOutputsCommand(wdlValueSimpletons, jobDetritusFiles, returnCode) replyTo ! JobRunning(data.jobDescriptor.key, data.jobDescriptor.inputDeclarations, None) @@ -525,6 +543,7 @@ object EngineJobExecutionActor { initializationData: Option[BackendInitializationData], restarting: Boolean, serviceRegistryActor: ActorRef, + ioActor: ActorRef, jobStoreActor: ActorRef, callCacheReadActor: ActorRef, dockerHashActor: ActorRef, @@ -540,6 +559,7 @@ object EngineJobExecutionActor { initializationData = initializationData, restarting = restarting, serviceRegistryActor = serviceRegistryActor, + ioActor = ioActor, jobStoreActor = jobStoreActor, callCacheReadActor = callCacheReadActor, dockerHashActor = dockerHashActor, @@ -587,62 +607,4 @@ object EngineJobExecutionActor { private[execution] case class NotSucceededResponseData(response: BackendJobExecutionResponse, hashes: Option[Try[CallCacheHashes]] = None) extends ResponseData - - /** - * Deliberately a singleton (well, a singleton router), so we can globally rate limit hash lookups per backend. - * - * More refinement may appear via #1377. - */ - private var factoryFileHashingRouters = Map[BackendLifecycleActorFactory, ActorRef]() - - /** - * Returns a RoundRobinPool of actors based on the backend factory. - * - * @param backendName Name of the backend. - * @param backendLifecycleActorFactory A backend factory. - * @param actorRefFactory An actor factory. - * @return a RoundRobinPool of actors based on backend factory. - */ - private def factoryFileHashingRouter(backendName: String, - backendLifecycleActorFactory: BackendLifecycleActorFactory, - actorRefFactory: ActorRefFactory): ActorRef = { - synchronized { - val (originalOrUpdated, result) = getOrElseUpdated( - factoryFileHashingRouters, backendLifecycleActorFactory, { - val numberOfInstances = backendLifecycleActorFactory.fileHashingActorCount - val props = backendLifecycleActorFactory.fileHashingActorProps - actorRefFactory.actorOf(RoundRobinPool(numberOfInstances).props(props), s"FileHashingActor-$backendName") - } - ) - factoryFileHashingRouters = originalOrUpdated - result - } - } - - /** - * Immutable version of mutable.Map.getOrElseUpdate based on: - * http://stackoverflow.com/questions/4385976/idiomatic-get-or-else-update-for-immutable-map#answer-5840119 - * - * If given key is already in this map, returns associated value in the copy of the Map. - * - * Otherwise, computes value from given expression `op`, stores with key - * in map and returns that value in a copy of the Map. - * - * @param map the immutable map - * @param key the key to test - * @param op the computation yielding the value to associate with `key`, if - * `key` is previously unbound. - * @tparam K type of the key - * @tparam V type of the value - * @return the value associated with key (either previously or as a result - * of executing the method). - */ - def getOrElseUpdated[K, V](map: Map[K, V], key: K, op: => V): (Map[K, V], V) = { - map.get(key) match { - case Some(value) => (map, value) - case None => - val value = op - (map.updated(key, value), value) - } - } } diff --git a/engine/src/main/scala/cromwell/engine/workflow/lifecycle/execution/SubWorkflowExecutionActor.scala b/engine/src/main/scala/cromwell/engine/workflow/lifecycle/execution/SubWorkflowExecutionActor.scala index 007326908..8df32d65e 100644 --- a/engine/src/main/scala/cromwell/engine/workflow/lifecycle/execution/SubWorkflowExecutionActor.scala +++ b/engine/src/main/scala/cromwell/engine/workflow/lifecycle/execution/SubWorkflowExecutionActor.scala @@ -21,6 +21,7 @@ import wdl4s.EvaluatedTaskInputs class SubWorkflowExecutionActor(key: SubWorkflowKey, data: WorkflowExecutionActorData, factories: Map[String, BackendLifecycleActorFactory], + ioActor: ActorRef, override val serviceRegistryActor: ActorRef, jobStoreActor: ActorRef, subWorkflowStoreActor: ActorRef, @@ -148,6 +149,7 @@ class SubWorkflowExecutionActor(key: SubWorkflowKey, context.actorOf( WorkflowExecutionActor.props( subWorkflowEngineDescriptor, + ioActor, serviceRegistryActor, jobStoreActor, subWorkflowStoreActor, @@ -251,6 +253,7 @@ object SubWorkflowExecutionActor { def props(key: SubWorkflowKey, data: WorkflowExecutionActorData, factories: Map[String, BackendLifecycleActorFactory], + ioActor: ActorRef, serviceRegistryActor: ActorRef, jobStoreActor: ActorRef, subWorkflowStoreActor: ActorRef, @@ -264,6 +267,7 @@ object SubWorkflowExecutionActor { key, data, factories, + ioActor, serviceRegistryActor, jobStoreActor, subWorkflowStoreActor, diff --git a/engine/src/main/scala/cromwell/engine/workflow/lifecycle/execution/WorkflowExecutionActor.scala b/engine/src/main/scala/cromwell/engine/workflow/lifecycle/execution/WorkflowExecutionActor.scala index 9135696cf..70aa2773f 100644 --- a/engine/src/main/scala/cromwell/engine/workflow/lifecycle/execution/WorkflowExecutionActor.scala +++ b/engine/src/main/scala/cromwell/engine/workflow/lifecycle/execution/WorkflowExecutionActor.scala @@ -21,8 +21,8 @@ import cromwell.webservice.EngineStatsActor import lenthall.exception.ThrowableAggregation import lenthall.util.TryUtil import net.ceedubs.ficus.Ficus._ -import wdl4s.values.{WdlArray, WdlBoolean, WdlOptionalValue, WdlValue, WdlString} import org.apache.commons.lang3.StringUtils +import wdl4s.values.{WdlArray, WdlBoolean, WdlOptionalValue, WdlString, WdlValue} import wdl4s.{Scope, _} import scala.annotation.tailrec @@ -30,6 +30,7 @@ import scala.language.postfixOps import scala.util.{Failure, Success, Try} case class WorkflowExecutionActor(workflowDescriptor: EngineWorkflowDescriptor, + ioActor: ActorRef, serviceRegistryActor: ActorRef, jobStoreActor: ActorRef, subWorkflowStoreActor: ActorRef, @@ -481,7 +482,7 @@ case class WorkflowExecutionActor(workflowDescriptor: EngineWorkflowDescriptor, val ejeaName = s"${workflowDescriptor.id}-EngineJobExecutionActor-${jobKey.tag}" val backendSingleton = backendSingletonCollection.backendSingletonActors(backendName) val ejeaProps = EngineJobExecutionActor.props( - self, jobKey, data, factory, initializationData.get(backendName), restarting, serviceRegistryActor, + self, jobKey, data, factory, initializationData.get(backendName), restarting, serviceRegistryActor, ioActor, jobStoreActor, callCacheReadActor, dockerHashActor, jobTokenDispenserActor, backendSingleton, backendName, workflowDescriptor.callCachingMode) val ejeaRef = context.actorOf(ejeaProps, ejeaName) context watch ejeaRef @@ -498,7 +499,7 @@ case class WorkflowExecutionActor(workflowDescriptor: EngineWorkflowDescriptor, private def processRunnableSubWorkflow(key: SubWorkflowKey, data: WorkflowExecutionActorData): Try[WorkflowExecutionDiff] = { val sweaRef = context.actorOf( - SubWorkflowExecutionActor.props(key, data, backendFactories, serviceRegistryActor, jobStoreActor, subWorkflowStoreActor, + SubWorkflowExecutionActor.props(key, data, backendFactories, ioActor, serviceRegistryActor, jobStoreActor, subWorkflowStoreActor, callCacheReadActor, dockerHashActor, jobTokenDispenserActor, backendSingletonCollection, initializationData, restarting), s"SubWorkflowExecutionActor-${key.tag}" ) @@ -767,6 +768,7 @@ object WorkflowExecutionActor { private lazy val DefaultMaxRetriesFallbackValue = 10 def props(workflowDescriptor: EngineWorkflowDescriptor, + ioActor: ActorRef, serviceRegistryActor: ActorRef, jobStoreActor: ActorRef, subWorkflowStoreActor: ActorRef, @@ -776,7 +778,7 @@ object WorkflowExecutionActor { backendSingletonCollection: BackendSingletonCollection, initializationData: AllBackendInitializationData, restarting: Boolean): Props = { - Props(WorkflowExecutionActor(workflowDescriptor, serviceRegistryActor, jobStoreActor, subWorkflowStoreActor, + Props(WorkflowExecutionActor(workflowDescriptor, ioActor, serviceRegistryActor, jobStoreActor, subWorkflowStoreActor, callCacheReadActor, dockerHashActor, jobTokenDispenserActor, backendSingletonCollection, initializationData, restarting)).withDispatcher(EngineDispatcher) } diff --git a/engine/src/main/scala/cromwell/engine/workflow/lifecycle/execution/callcaching/EngineJobHashingActor.scala b/engine/src/main/scala/cromwell/engine/workflow/lifecycle/execution/callcaching/EngineJobHashingActor.scala index 4830951c0..cd3b80589 100644 --- a/engine/src/main/scala/cromwell/engine/workflow/lifecycle/execution/callcaching/EngineJobHashingActor.scala +++ b/engine/src/main/scala/cromwell/engine/workflow/lifecycle/execution/callcaching/EngineJobHashingActor.scala @@ -2,10 +2,10 @@ package cromwell.engine.workflow.lifecycle.execution.callcaching import akka.actor.{ActorLogging, ActorRef, LoggingFSM, Props} import cats.data.NonEmptyList -import cromwell.backend.callcaching.FileHashingActor.SingleFileHashRequest +import cromwell.backend.standard.callcaching.StandardFileHashingActor.SingleFileHashRequest import cromwell.backend.{BackendInitializationData, BackendJobDescriptor, RuntimeAttributeDefinition} -import cromwell.core.callcaching._ import cromwell.core.Dispatcher.EngineDispatcher +import cromwell.core.callcaching._ import cromwell.core.simpleton.WdlValueSimpleton import cromwell.engine.workflow.lifecycle.execution.callcaching.CallCacheReadActor.{CacheLookupRequest, CacheResultLookupFailure, CacheResultMatchesForHashes} import cromwell.engine.workflow.lifecycle.execution.callcaching.EngineJobHashingActor._ @@ -16,15 +16,17 @@ import wdl4s.values.WdlFile * * (if read enabled): Either a CacheHit(id) or CacheMiss message * * (if write enabled): A CallCacheHashes(hashes) message */ -case class EngineJobHashingActor(receiver: ActorRef, +class EngineJobHashingActor(receiver: ActorRef, jobDescriptor: BackendJobDescriptor, initializationData: Option[BackendInitializationData], - fileHashingActor: ActorRef, + fileHashingActorProps: Props, callCacheReadActor: ActorRef, runtimeAttributeDefinitions: Set[RuntimeAttributeDefinition], backendName: String, activity: CallCachingActivity) extends LoggingFSM[EJHAState, EJHAData] with ActorLogging { + private val fileHashingActor = makeFileHashingActor() + initializeEJHA() when(DeterminingHitOrMiss) { @@ -46,12 +48,13 @@ case class EngineJobHashingActor(receiver: ActorRef, whenUnhandled { case Event(CacheResultLookupFailure(reason), _) => receiver ! HashError(new Exception(s"Failure looking up call cache results: ${reason.getMessage}")) - context.stop(self) - stay + stopAndStay() case Event(HashingFailedMessage(hashKey, reason), _) => receiver ! HashError(new Exception(s"Unable to generate ${hashKey.key} hash. Caused by ${reason.getMessage}", reason)) - context.stop(self) - stay + stopAndStay() + case Event(HashingServiceUnvailable, _) => + receiver ! HashError(new Exception(s"File hashing service is unavailable.")) + stopAndStay() case Event(other, _) => log.error(s"Bad message in $stateName with $stateData: $other") stay @@ -61,6 +64,17 @@ case class EngineJobHashingActor(receiver: ActorRef, case fromState -> toState => log.debug("Transitioning from {}({}) to {}({})", fromState, stateData, toState, nextStateData) } + + private def stopAndStay() = { + context.stop(fileHashingActor) + context.stop(self) + stay + } + + private [callcaching] def makeFileHashingActor() = { + val jobPreparationActorName = s"FileHashingActor_for_${jobDescriptor.key.tag}" + context.actorOf(fileHashingActorProps, jobPreparationActorName) + } private def initializeEJHA() = { @@ -132,8 +146,7 @@ case class EngineJobHashingActor(receiver: ActorRef, receiver ! hitOrMissResponse if (!activity.writeToCache) { - context.stop(self) - stay + stopAndStay() } else { checkWhetherAllHashesAreKnownAndTransition(newData) } @@ -142,7 +155,7 @@ case class EngineJobHashingActor(receiver: ActorRef, private def checkWhetherAllHashesAreKnownAndTransition(newData: EJHAData) = { if (newData.allHashesKnown) { receiver ! CallCacheHashes(newData.hashesKnown) - context.stop(self) + stopAndStay() } goto(GeneratingAllHashes) using newData } @@ -182,7 +195,7 @@ object EngineJobHashingActor { def props(receiver: ActorRef, jobDescriptor: BackendJobDescriptor, initializationData: Option[BackendInitializationData], - fileHashingActor: ActorRef, + fileHashingActorProps: Props, callCacheReadActor: ActorRef, runtimeAttributeDefinitions: Set[RuntimeAttributeDefinition], backendName: String, @@ -190,7 +203,7 @@ object EngineJobHashingActor { receiver = receiver, jobDescriptor = jobDescriptor, initializationData = initializationData, - fileHashingActor = fileHashingActor, + fileHashingActorProps = fileHashingActorProps, callCacheReadActor = callCacheReadActor, runtimeAttributeDefinitions = runtimeAttributeDefinitions, backendName = backendName, diff --git a/engine/src/main/scala/cromwell/engine/workflow/lifecycle/execution/preparation/JobPreparationActor.scala b/engine/src/main/scala/cromwell/engine/workflow/lifecycle/execution/preparation/JobPreparationActor.scala index 5524ff640..4e1df3836 100644 --- a/engine/src/main/scala/cromwell/engine/workflow/lifecycle/execution/preparation/JobPreparationActor.scala +++ b/engine/src/main/scala/cromwell/engine/workflow/lifecycle/execution/preparation/JobPreparationActor.scala @@ -24,6 +24,7 @@ case class JobPreparationActor(executionData: WorkflowExecutionActorData, dockerHashingActor: ActorRef, initializationData: Option[BackendInitializationData], serviceRegistryActor: ActorRef, + ioActor: ActorRef, backendSingletonActor: Option[ActorRef]) extends FSM[JobPreparationActorState, Option[JobPreparationActorData]] with WorkflowLogging { @@ -149,7 +150,7 @@ case class JobPreparationActor(executionData: WorkflowExecutionActorData, runtimeAttributes: Map[LocallyQualifiedName, WdlValue], callCachingEligibility: CallCachingEligibility) = { val jobDescriptor = BackendJobDescriptor(workflowDescriptor.backendDescriptor, jobKey, runtimeAttributes, inputEvaluation, callCachingEligibility) - BackendJobPreparationSucceeded(jobDescriptor, factory.jobExecutionActorProps(jobDescriptor, initializationData, serviceRegistryActor, backendSingletonActor)) + BackendJobPreparationSucceeded(jobDescriptor, factory.jobExecutionActorProps(jobDescriptor, initializationData, serviceRegistryActor, ioActor, backendSingletonActor)) } private [preparation] def prepareRuntimeAttributes(inputEvaluation: Map[Declaration, WdlValue]): Try[Map[LocallyQualifiedName, WdlValue]] = { @@ -201,6 +202,7 @@ object JobPreparationActor { dockerHashingActor: ActorRef, initializationData: Option[BackendInitializationData], serviceRegistryActor: ActorRef, + ioActor: ActorRef, backendSingletonActor: Option[ActorRef]) = { // Note that JobPreparationActor doesn't run on the engine dispatcher as it mostly executes backend-side code // (WDL expression evaluation using Backend's expressionLanguageFunctions) @@ -210,6 +212,7 @@ object JobPreparationActor { dockerHashingActor, initializationData, serviceRegistryActor, + ioActor, backendSingletonActor)).withDispatcher(EngineDispatcher) } } diff --git a/engine/src/main/scala/cromwell/server/CromwellRootActor.scala b/engine/src/main/scala/cromwell/server/CromwellRootActor.scala index 0a6b708b2..9944465b2 100644 --- a/engine/src/main/scala/cromwell/server/CromwellRootActor.scala +++ b/engine/src/main/scala/cromwell/server/CromwellRootActor.scala @@ -9,11 +9,14 @@ import akka.routing.RoundRobinPool import akka.stream.ActorMaterializer import com.typesafe.config.ConfigFactory import cromwell.core.Dispatcher +import cromwell.core.actor.StreamActorHelper.ActorRestartException import cromwell.core.callcaching.docker.DockerHashActor -import cromwell.core.callcaching.docker.DockerHashActor.{DockerHashActorException, DockerHashContext} +import cromwell.core.callcaching.docker.DockerHashActor.DockerHashContext import cromwell.core.callcaching.docker.registryv2.flows.dockerhub.DockerHubFlow import cromwell.core.callcaching.docker.registryv2.flows.gcr.GoogleFlow +import cromwell.core.io.Throttle import cromwell.engine.backend.{BackendSingletonCollection, CromwellBackends} +import cromwell.engine.io.IoActor import cromwell.engine.workflow.WorkflowManagerActor import cromwell.engine.workflow.lifecycle.CopyWorkflowLogsActor import cromwell.engine.workflow.lifecycle.execution.callcaching.{CallCache, CallCacheReadActor} @@ -47,13 +50,9 @@ import scala.language.postfixOps val serverMode: Boolean + lazy val systemConfig = config.getConfig("system") lazy val serviceRegistryActor: ActorRef = context.actorOf(ServiceRegistryActor.props(config), "ServiceRegistryActor") - lazy val numberOfWorkflowLogCopyWorkers = config.getConfig("system").as[Option[Int]]("number-of-workflow-log-copy-workers").getOrElse(DefaultNumberOfWorkflowLogCopyWorkers) - - lazy val workflowLogCopyRouter: ActorRef = context.actorOf(RoundRobinPool(numberOfWorkflowLogCopyWorkers) - .withSupervisorStrategy(CopyWorkflowLogsActor.strategy) - .props(CopyWorkflowLogsActor.props(serviceRegistryActor)), - "WorkflowLogCopyRouter") + lazy val numberOfWorkflowLogCopyWorkers = systemConfig.as[Option[Int]]("number-of-workflow-log-copy-workers").getOrElse(DefaultNumberOfWorkflowLogCopyWorkers) lazy val workflowStore: WorkflowStore = SqlWorkflowStore(SingletonServicesStore.databaseInterface) lazy val workflowStoreActor = context.actorOf(WorkflowStoreActor.props(workflowStore, serviceRegistryActor), "WorkflowStoreActor") @@ -63,6 +62,17 @@ import scala.language.postfixOps lazy val subWorkflowStore = new SqlSubWorkflowStore(SingletonServicesStore.databaseInterface) lazy val subWorkflowStoreActor = context.actorOf(SubWorkflowStoreActor.props(subWorkflowStore), "SubWorkflowStoreActor") + + // Io Actor + lazy val throttleElements = systemConfig.as[Option[Int]]("io.number-of-requests").getOrElse(100000) + lazy val throttlePer = systemConfig.as[Option[FiniteDuration]]("io.per").getOrElse(100 seconds) + lazy val ioThrottle = Throttle(throttleElements, throttlePer, throttleElements) + lazy val ioActor = context.actorOf(IoActor.props(1000, Option(ioThrottle)).withDispatcher(Dispatcher.IoDispatcher)) + + lazy val workflowLogCopyRouter: ActorRef = context.actorOf(RoundRobinPool(numberOfWorkflowLogCopyWorkers) + .withSupervisorStrategy(CopyWorkflowLogsActor.strategy) + .props(CopyWorkflowLogsActor.props(serviceRegistryActor, ioActor)), + "WorkflowLogCopyRouter") lazy val callCache: CallCache = new CallCache(SingletonServicesStore.databaseInterface) @@ -96,8 +106,8 @@ import scala.language.postfixOps lazy val workflowManagerActor = context.actorOf( WorkflowManagerActor.props( - workflowStoreActor, serviceRegistryActor, workflowLogCopyRouter, jobStoreActor, subWorkflowStoreActor, callCacheReadActor, - dockerHashActor, jobExecutionTokenDispenserActor, backendSingletonCollection, abortJobsOnTerminate, serverMode), + workflowStoreActor, ioActor, serviceRegistryActor, workflowLogCopyRouter, jobStoreActor, subWorkflowStoreActor, callCacheReadActor, + dockerHashActor, jobExecutionTokenDispenserActor, backendSingletonCollection, abortJobsOnTerminate, serverMode), "WorkflowManagerActor") override def receive = { @@ -110,7 +120,7 @@ import scala.language.postfixOps */ override val supervisorStrategy = OneForOneStrategy() { case actorInitializationException: ActorInitializationException => Escalate - case dockerHash: DockerHashActorException => Restart + case restart: ActorRestartException => Restart case t => super.supervisorStrategy.decider.applyOrElse(t, (_: Any) => Escalate) } } diff --git a/engine/src/test/scala/cromwell/CromwellTestKitSpec.scala b/engine/src/test/scala/cromwell/CromwellTestKitSpec.scala index b28a61a3f..556eaf34f 100644 --- a/engine/src/test/scala/cromwell/CromwellTestKitSpec.scala +++ b/engine/src/test/scala/cromwell/CromwellTestKitSpec.scala @@ -48,12 +48,14 @@ import scala.util.matching.Regex case class TestBackendLifecycleActorFactory(configurationDescriptor: BackendConfigurationDescriptor) extends BackendLifecycleActorFactory { override def workflowInitializationActorProps(workflowDescriptor: BackendWorkflowDescriptor, + ioActor: ActorRef, calls: Set[TaskCall], serviceRegistryActor: ActorRef): Option[Props] = None override def jobExecutionActorProps(jobDescriptor: BackendJobDescriptor, initializationData: Option[BackendInitializationData], serviceRegistryActor: ActorRef, + ioActor: ActorRef, backendSingletonActor: Option[ActorRef]): Props = { throw new NotImplementedError("this is not implemented") } diff --git a/engine/src/test/scala/cromwell/SimpleWorkflowActorSpec.scala b/engine/src/test/scala/cromwell/SimpleWorkflowActorSpec.scala index 511cbaa27..30b3a06d5 100644 --- a/engine/src/test/scala/cromwell/SimpleWorkflowActorSpec.scala +++ b/engine/src/test/scala/cromwell/SimpleWorkflowActorSpec.scala @@ -7,7 +7,7 @@ import akka.testkit._ import com.typesafe.config.ConfigFactory import cromwell.MetadataWatchActor.{FailureMatcher, Matcher} import cromwell.SimpleWorkflowActorSpec._ -import cromwell.core.{WorkflowId, WorkflowSourceFilesWithoutImports} +import cromwell.core.{SimpleIoActor, WorkflowId, WorkflowSourceFilesWithoutImports} import cromwell.engine.backend.BackendSingletonCollection import cromwell.engine.workflow.WorkflowActor import cromwell.engine.workflow.WorkflowActor._ @@ -40,6 +40,7 @@ class SimpleWorkflowActorSpec extends CromwellTestKitSpec with BeforeAndAfter { val supervisor = TestProbe() val workflowActor = TestFSMRef( factory = new WorkflowActor(workflowId, StartNewWorkflow, workflowSources, ConfigFactory.load(), + ioActor = system.actorOf(SimpleIoActor.props), serviceRegistryActor = watchActor, workflowLogCopyRouter = system.actorOf(Props.empty, s"workflow-copy-log-router-$workflowId-${UUID.randomUUID()}"), jobStoreActor = system.actorOf(AlwaysHappyJobStoreActor.props), diff --git a/engine/src/test/scala/cromwell/engine/backend/mock/DefaultBackendJobExecutionActor.scala b/engine/src/test/scala/cromwell/engine/backend/mock/DefaultBackendJobExecutionActor.scala index 0763a1c30..6c2020a29 100644 --- a/engine/src/test/scala/cromwell/engine/backend/mock/DefaultBackendJobExecutionActor.scala +++ b/engine/src/test/scala/cromwell/engine/backend/mock/DefaultBackendJobExecutionActor.scala @@ -25,12 +25,14 @@ case class DefaultBackendJobExecutionActor(override val jobDescriptor: BackendJo class DefaultBackendLifecycleActorFactory(name: String, configurationDescriptor: BackendConfigurationDescriptor) extends BackendLifecycleActorFactory { override def workflowInitializationActorProps(workflowDescriptor: BackendWorkflowDescriptor, + ioActor: ActorRef, calls: Set[TaskCall], serviceRegistryActor: ActorRef): Option[Props] = None override def jobExecutionActorProps(jobDescriptor: BackendJobDescriptor, initializationData: Option[BackendInitializationData], serviceRegistryActor: ActorRef, + ioActor: ActorRef, backendSingletonActor: Option[ActorRef]): Props = { DefaultBackendJobExecutionActor.props(jobDescriptor, configurationDescriptor) } diff --git a/engine/src/test/scala/cromwell/engine/backend/mock/RetryableBackendLifecycleActorFactory.scala b/engine/src/test/scala/cromwell/engine/backend/mock/RetryableBackendLifecycleActorFactory.scala index c88481610..556e75c56 100644 --- a/engine/src/test/scala/cromwell/engine/backend/mock/RetryableBackendLifecycleActorFactory.scala +++ b/engine/src/test/scala/cromwell/engine/backend/mock/RetryableBackendLifecycleActorFactory.scala @@ -8,12 +8,14 @@ import wdl4s.expression.{NoFunctions, WdlStandardLibraryFunctions} class RetryableBackendLifecycleActorFactory(name: String, configurationDescriptor: BackendConfigurationDescriptor) extends BackendLifecycleActorFactory { override def workflowInitializationActorProps(workflowDescriptor: BackendWorkflowDescriptor, + ioActor: ActorRef, calls: Set[TaskCall], serviceRegistryActor: ActorRef): Option[Props] = None override def jobExecutionActorProps(jobDescriptor: BackendJobDescriptor, initializationData: Option[BackendInitializationData], serviceRegistryActor: ActorRef, + ioActor: ActorRef, backendSingletonActor: Option[ActorRef]): Props = { RetryableBackendJobExecutionActor.props(jobDescriptor, configurationDescriptor) } diff --git a/engine/src/test/scala/cromwell/engine/io/IoActorGcsBatchSpec.scala b/engine/src/test/scala/cromwell/engine/io/IoActorGcsBatchSpec.scala new file mode 100644 index 000000000..e3f2dd3a1 --- /dev/null +++ b/engine/src/test/scala/cromwell/engine/io/IoActorGcsBatchSpec.scala @@ -0,0 +1,103 @@ +package cromwell.engine.io + +import java.util.UUID + +import akka.stream.ActorMaterializer +import akka.testkit.{ImplicitSender, TestActorRef} +import cromwell.core.Tags.IntegrationTest +import cromwell.core.io._ +import cromwell.core.{TestKitSuite, WorkflowOptions} +import cromwell.filesystems.gcs.auth.ApplicationDefaultMode +import cromwell.filesystems.gcs.batch.{GcsBatchCopyCommand, GcsBatchCrc32Command, GcsBatchDeleteCommand, GcsBatchSizeCommand} +import cromwell.filesystems.gcs.{GcsPathBuilder, GcsPathBuilderFactory} +import org.scalatest.concurrent.Eventually +import org.scalatest.{FlatSpecLike, Matchers} + +import scala.concurrent.ExecutionContext +import scala.concurrent.duration._ +import scala.language.postfixOps + +class IoActorGcsBatchSpec extends TestKitSuite with FlatSpecLike with Matchers with ImplicitSender with Eventually { + behavior of "IoActor [GCS Batch]" + + implicit val actorSystem = system + implicit val ec: ExecutionContext = system.dispatcher + implicit val materializer = ActorMaterializer() + + override def afterAll() = { + materializer.shutdown() + src.delete(swallowIOExceptions = true) + dst.delete(swallowIOExceptions = true) + srcRegional.delete(swallowIOExceptions = true) + dstMultiRegional.delete(swallowIOExceptions = true) + super.afterAll() + } + + lazy val gcsPathBuilder = GcsPathBuilderFactory(ApplicationDefaultMode("default"), "cromwell-test") + lazy val pathBuilder: GcsPathBuilder = gcsPathBuilder.withOptions(WorkflowOptions.empty) + + lazy val randomUUID = UUID.randomUUID().toString + + lazy val src = pathBuilder.build(s"gs://cloud-cromwell-dev/unit-test/$randomUUID/testFile.txt").get + lazy val dst = pathBuilder.build(s"gs://cloud-cromwell-dev/unit-test/$randomUUID/testFile-copy.txt").get + lazy val srcRegional = pathBuilder.build(s"gs://cloud-cromwell-dev-regional/unit-test/$randomUUID/testRegional.txt").get + lazy val dstMultiRegional = pathBuilder.build(s"gs://cloud-cromwell-dev/unit-test/$randomUUID/testFileRegional-copy.txt").get + + override def beforeAll() = { + // Write commands can't be batched, so for the sake of this test, just create a file in GCS synchronously here + src.write("hello") + srcRegional.write("hello") + super.beforeAll() + } + + it should "batch queries" taggedAs IntegrationTest in { + val testActor = TestActorRef(new IoActor(10, None)) + + val copyCommand = GcsBatchCopyCommand(src, dst, overwrite = false) + val sizeCommand = GcsBatchSizeCommand(src) + val hashCommand = GcsBatchCrc32Command(src) + + val deleteSrcCommand = GcsBatchDeleteCommand(src, swallowIOExceptions = false) + val deleteDstCommand = GcsBatchDeleteCommand(dst, swallowIOExceptions = false) + + testActor ! copyCommand + testActor ! sizeCommand + testActor ! hashCommand + + val received1 = receiveN(3, 10 seconds) + + received1.size shouldBe 3 + received1 forall { _.isInstanceOf[IoSuccess[_]] } shouldBe true + + received1 collect { + case IoSuccess(_: GcsBatchSizeCommand, fileSize: Long) => fileSize shouldBe 5 + } + + received1 collect { + case IoSuccess(_: GcsBatchCrc32Command, hash: String) => hash shouldBe "mnG7TA==" + } + + testActor ! deleteSrcCommand + testActor ! deleteDstCommand + + val received2 = receiveN(2, 10 seconds) + + received2.size shouldBe 2 + received2 forall { _.isInstanceOf[IoSuccess[_]] } shouldBe true + + src.exists shouldBe false + dst.exists shouldBe false + } + + it should "copy files across GCS storage classes" taggedAs IntegrationTest in { + val testActor = TestActorRef(new IoActor(10, None)) + + val copyCommand = GcsBatchCopyCommand(srcRegional, dstMultiRegional, overwrite = false) + + testActor ! copyCommand + + expectMsgClass(30 seconds, classOf[IoSuccess[_]]) + + dstMultiRegional.exists shouldBe true + } +} diff --git a/engine/src/test/scala/cromwell/engine/io/IoActorSpec.scala b/engine/src/test/scala/cromwell/engine/io/IoActorSpec.scala new file mode 100644 index 000000000..1af4a2fb8 --- /dev/null +++ b/engine/src/test/scala/cromwell/engine/io/IoActorSpec.scala @@ -0,0 +1,135 @@ +package cromwell.engine.io + +import akka.stream.ActorMaterializer +import akka.testkit.{ImplicitSender, TestActorRef} +import better.files.File.OpenOptions +import cromwell.core.TestKitSuite +import cromwell.core.io._ +import cromwell.core.path.{DefaultPathBuilder, Path} +import org.scalatest.{FlatSpecLike, Matchers} + +import scala.concurrent.ExecutionContext +import scala.concurrent.duration._ +import scala.language.postfixOps + +class IoActorSpec extends TestKitSuite with FlatSpecLike with Matchers with ImplicitSender { + behavior of "IoActor" + + implicit val actorSystem = system + implicit val ec: ExecutionContext = system.dispatcher + implicit val materializer = ActorMaterializer() + + override def afterAll() = { + materializer.shutdown() + super.afterAll() + } + + it should "copy a file" in { + val testActor = TestActorRef(new IoActor(1, None)) + + val src = DefaultPathBuilder.createTempFile() + val dst: Path = src.parent.resolve(src.name + "-dst") + + val copyCommand = new IoCopyCommand(src, dst, overwrite = true) + + testActor ! copyCommand + expectMsgPF(5 seconds) { + case response: IoSuccess[_] => response.command.isInstanceOf[IoCopyCommand] shouldBe true + case response: IoFailure[_] => fail("Expected an IoSuccess", response.failure) + } + + dst.toFile should exist + src.delete() + dst.delete() + } + + it should "write to a file" in { + val testActor = TestActorRef(new IoActor(1, None)) + + val src = DefaultPathBuilder.createTempFile() + + val writeCommand = new IoWriteCommand(src, "hello", OpenOptions.default) + + testActor ! writeCommand + expectMsgPF(5 seconds) { + case response: IoSuccess[_] => response.command.isInstanceOf[IoWriteCommand] shouldBe true + case response: IoFailure[_] => fail("Expected an IoSuccess", response.failure) + } + + src.contentAsString shouldBe "hello" + src.delete() + } + + it should "delete a file" in { + val testActor = TestActorRef(new IoActor(1, None)) + + val src = DefaultPathBuilder.createTempFile() + + val deleteCommand = new IoDeleteCommand(src, swallowIOExceptions = false) + + testActor ! deleteCommand + expectMsgPF(5 seconds) { + case response: IoSuccess[_] => response.command.isInstanceOf[IoDeleteCommand] shouldBe true + case response: IoFailure[_] => fail("Expected an IoSuccess", response.failure) + } + + src.toFile shouldNot exist + } + + it should "read a file" in { + val testActor = TestActorRef(new IoActor(1, None)) + + val src = DefaultPathBuilder.createTempFile() + src.write("hello") + + val readCommand = new IoContentAsStringCommand(src) + + testActor ! readCommand + expectMsgPF(5 seconds) { + case response: IoSuccess[_] => + response.command.isInstanceOf[IoContentAsStringCommand] shouldBe true + response.result.asInstanceOf[String] shouldBe "hello" + case response: IoFailure[_] => fail("Expected an IoSuccess", response.failure) + } + + src.delete() + } + + it should "return a file size" in { + val testActor = TestActorRef(new IoActor(1, None)) + + val src = DefaultPathBuilder.createTempFile() + src.write("hello") + + val sizeCommand = new IoSizeCommand(src) + + testActor ! sizeCommand + expectMsgPF(5 seconds) { + case response: IoSuccess[_] => + response.command.isInstanceOf[IoSizeCommand] shouldBe true + response.result.asInstanceOf[Long] shouldBe 5 + case response: IoFailure[_] => fail("Expected an IoSuccess", response.failure) + } + + src.delete() + } + + it should "return a file md5 hash (local)" in { + val testActor = TestActorRef(new IoActor(1, None)) + + val src = DefaultPathBuilder.createTempFile() + src.write("hello") + + val hashCommand = new IoHashCommand(src) + + testActor ! hashCommand + expectMsgPF(5 seconds) { + case response: IoSuccess[_] => + response.command.isInstanceOf[IoHashCommand] shouldBe true + response.result.asInstanceOf[String] shouldBe "5d41402abc4b2a76b9719d911017c592" + case response: IoFailure[_] => fail("Expected an IoSuccess", response.failure) + } + + src.delete() + } +} diff --git a/engine/src/test/scala/cromwell/engine/io/nio/NioFlowSpec.scala b/engine/src/test/scala/cromwell/engine/io/nio/NioFlowSpec.scala new file mode 100644 index 000000000..aec4484f1 --- /dev/null +++ b/engine/src/test/scala/cromwell/engine/io/nio/NioFlowSpec.scala @@ -0,0 +1,189 @@ +package cromwell.engine.io.nio + +import java.nio.file.{FileAlreadyExistsException, NoSuchFileException} +import java.util.UUID + +import akka.actor.ActorRef +import akka.stream.ActorMaterializer +import akka.stream.scaladsl.{Keep, Sink, Source} +import cromwell.core.{CromwellFatalException, TestKitSuite} +import cromwell.core.io.{DefaultIoCommandBuilder, IoAck, IoFailure, IoSuccess} +import cromwell.core.path.DefaultPathBuilder +import cromwell.engine.io.IoActor.DefaultCommandContext +import cromwell.engine.io.IoCommandContext +import org.scalatest.mockito.MockitoSugar +import org.scalatest.{AsyncFlatSpecLike, Matchers} + +class NioFlowSpec extends TestKitSuite with AsyncFlatSpecLike with Matchers with MockitoSugar with DefaultIoCommandBuilder { + + behavior of "NioFlowSpec" + + val flow = new NioFlow(1, system.scheduler)(system.dispatcher, system).flow + + implicit val materializer = ActorMaterializer() + val replyTo = mock[ActorRef] + val readSink = Sink.head[(IoAck[_], IoCommandContext[_])] + + override def afterAll() = { + materializer.shutdown() + super.afterAll() + } + + it should "write to a Nio Path" in { + val testPath = DefaultPathBuilder.createTempFile() + val context = DefaultCommandContext(writeCommand(testPath, "hello", Seq.empty), replyTo) + val testSource = Source.single(context) + + val stream = testSource.via(flow).toMat(readSink)(Keep.right) + + stream.run() map { _ => + assert(testPath.contentAsString == "hello") + } + } + + it should "read from a Nio Path" in { + val testPath = DefaultPathBuilder.createTempFile() + testPath.write("hello") + + val context = DefaultCommandContext(contentAsStringCommand(testPath), replyTo) + val testSource = Source.single(context) + + val stream = testSource.via(flow).toMat(readSink)(Keep.right) + + stream.run() map { + case (success: IoSuccess[_], _) => assert(success.result.asInstanceOf[String] == "hello") + case _ => fail("read returned an unexpected message") + } + } + + it should "get size from a Nio Path" in { + val testPath = DefaultPathBuilder.createTempFile() + testPath.write("hello") + + val context = DefaultCommandContext(sizeCommand(testPath), replyTo) + val testSource = Source.single(context) + + val stream = testSource.via(flow).toMat(readSink)(Keep.right) + + stream.run() map { + case (success: IoSuccess[_], _) => assert(success.result.asInstanceOf[Long] == 5) + case _ => fail("size returned an unexpected message") + } + } + + it should "get hash from a Nio Path" in { + val testPath = DefaultPathBuilder.createTempFile() + testPath.write("hello") + + val context = DefaultCommandContext(hashCommand(testPath), replyTo) + val testSource = Source.single(context) + + val stream = testSource.via(flow).toMat(readSink)(Keep.right) + + stream.run() map { + case (success: IoSuccess[_], _) => assert(success.result.asInstanceOf[String] == "5d41402abc4b2a76b9719d911017c592") + case _ => fail("hash returned an unexpected message") + } + } + + it should "copy Nio paths" in { + val testPath = DefaultPathBuilder.createTempFile() + val testCopyPath = testPath.sibling(UUID.randomUUID().toString) + + val context = DefaultCommandContext(copyCommand(testPath, testCopyPath, overwrite = false), replyTo) + + val testSource = Source.single(context) + + val stream = testSource.via(flow).toMat(readSink)(Keep.right) + + stream.run() map { + case (success: IoSuccess[_], _) => assert(testCopyPath.exists) + case _ => fail("copy returned an unexpected message") + } + } + + it should "copy Nio paths with overwrite true" in { + val testPath = DefaultPathBuilder.createTempFile() + testPath.write("goodbye") + + val testCopyPath = DefaultPathBuilder.createTempFile() + testCopyPath.write("hello") + + val context = DefaultCommandContext(copyCommand(testPath, testCopyPath, overwrite = true), replyTo) + + val testSource = Source.single(context) + + val stream = testSource.via(flow).toMat(readSink)(Keep.right) + + stream.run() map { + case (success: IoSuccess[_], _) => + assert(testCopyPath.exists) + assert(testCopyPath.contentAsString == "goodbye") + case _ => fail("copy returned an unexpected message") + } + } + + it should "copy Nio paths with overwrite false" in { + val testPath = DefaultPathBuilder.createTempFile() + val testCopyPath = DefaultPathBuilder.createTempFile() + + val context = DefaultCommandContext(copyCommand(testPath, testCopyPath, overwrite = false), replyTo) + + val testSource = Source.single(context) + + val stream = testSource.via(flow).toMat(readSink)(Keep.right) + + stream.run() map { + case (failure: IoFailure[_], _) => + assert(failure.failure.isInstanceOf[CromwellFatalException]) + assert(failure.failure.getCause.isInstanceOf[FileAlreadyExistsException]) + case _ => fail("copy returned an unexpected message") + } + } + + it should "delete a Nio path" in { + val testPath = DefaultPathBuilder.createTempFile() + val context = DefaultCommandContext(deleteCommand(testPath, swallowIoExceptions = false), replyTo) + val testSource = Source.single(context) + + val stream = testSource.via(flow).toMat(readSink)(Keep.right) + + stream.run() map { + case (success: IoSuccess[_], _) => assert(!testPath.exists) + case _ => fail("delete returned an unexpected message") + } + } + + it should "delete a Nio path with swallowIoExceptions true" in { + val testPath = DefaultPathBuilder.build("/this/does/not/exist").get + + val context = DefaultCommandContext(deleteCommand(testPath, swallowIoExceptions = true), replyTo) + + val testSource = Source.single(context) + + val stream = testSource.via(flow).toMat(readSink)(Keep.right) + + stream.run() map { + case (success: IoSuccess[_], _) => assert(!testPath.exists) + case _ => fail("delete returned an unexpected message") + } + } + + it should "delete a Nio path with swallowIoExceptions false" in { + val testPath = DefaultPathBuilder.build("/this/does/not/exist").get + + val context = DefaultCommandContext(deleteCommand(testPath, swallowIoExceptions = false), replyTo) + + val testSource = Source.single(context) + + val stream = testSource.via(flow).toMat(readSink)(Keep.right) + + stream.run() map { + case (failure: IoFailure[_], _) => + assert(failure.failure.isInstanceOf[CromwellFatalException]) + assert(failure.failure.getCause.isInstanceOf[NoSuchFileException]) + case other => fail(s"delete returned an unexpected message") + } + } + +} diff --git a/engine/src/test/scala/cromwell/engine/workflow/SingleWorkflowRunnerActorSpec.scala b/engine/src/test/scala/cromwell/engine/workflow/SingleWorkflowRunnerActorSpec.scala index ca4723ce0..7893a4d1b 100644 --- a/engine/src/test/scala/cromwell/engine/workflow/SingleWorkflowRunnerActorSpec.scala +++ b/engine/src/test/scala/cromwell/engine/workflow/SingleWorkflowRunnerActorSpec.scala @@ -8,8 +8,9 @@ import akka.testkit.TestKit import akka.util.Timeout import com.typesafe.config.ConfigFactory import cromwell.CromwellTestKitSpec._ -import cromwell.core.WorkflowSourceFilesCollection +import cromwell._ import cromwell.core.path.{DefaultPathBuilder, Path} +import cromwell.core.{SimpleIoActor, WorkflowSourceFilesCollection} import cromwell.engine.backend.BackendSingletonCollection import cromwell.engine.workflow.SingleWorkflowRunnerActor.RunWorkflow import cromwell.engine.workflow.SingleWorkflowRunnerActorSpec._ @@ -17,7 +18,6 @@ import cromwell.engine.workflow.tokens.JobExecutionTokenDispenserActor import cromwell.engine.workflow.workflowstore.{InMemoryWorkflowStore, WorkflowStoreActor} import cromwell.util.SampleWdl import cromwell.util.SampleWdl.{ExpressionsInInputs, GoodbyeWorld, ThreeStep} -import cromwell._ import org.scalatest.prop.{TableDrivenPropertyChecks, TableFor3} import spray.json._ @@ -55,6 +55,7 @@ object SingleWorkflowRunnerActorSpec { abstract class SingleWorkflowRunnerActorSpec extends CromwellTestKitSpec { private val workflowStore = system.actorOf(WorkflowStoreActor.props(new InMemoryWorkflowStore, dummyServiceRegistryActor)) private val jobStore = system.actorOf(AlwaysHappyJobStoreActor.props) + private val ioActor = system.actorOf(SimpleIoActor.props) private val subWorkflowStore = system.actorOf(AlwaysHappySubWorkflowStoreActor.props) private val callCacheReadActor = system.actorOf(EmptyCallCacheReadActor.props) private val dockerHashActor = system.actorOf(EmptyDockerHashActor.props) @@ -64,6 +65,7 @@ abstract class SingleWorkflowRunnerActorSpec extends CromwellTestKitSpec { def workflowManagerActor(): ActorRef = { val params = WorkflowManagerActorParams(ConfigFactory.load(), workflowStore, + ioActor = ioActor, dummyServiceRegistryActor, dummyLogCopyRouter, jobStore, diff --git a/engine/src/test/scala/cromwell/engine/workflow/WorkflowActorSpec.scala b/engine/src/test/scala/cromwell/engine/workflow/WorkflowActorSpec.scala index 35bcb2ca1..e92093797 100644 --- a/engine/src/test/scala/cromwell/engine/workflow/WorkflowActorSpec.scala +++ b/engine/src/test/scala/cromwell/engine/workflow/WorkflowActorSpec.scala @@ -55,6 +55,7 @@ class WorkflowActorSpec extends CromwellTestKitSpec with WorkflowDescriptorBuild startMode = StartNewWorkflow, workflowSources = wdlSources, conf = ConfigFactory.load, + ioActor = system.actorOf(SimpleIoActor.props), serviceRegistryActor = mockServiceRegistryActor, workflowLogCopyRouter = copyWorkflowLogsProbe.ref, jobStoreActor = system.actorOf(AlwaysHappyJobStoreActor.props), @@ -169,13 +170,14 @@ class MockWorkflowActor(val finalizationProbe: TestProbe, startMode: StartMode, workflowSources: WorkflowSourceFilesCollection, conf: Config, + ioActor: ActorRef, serviceRegistryActor: ActorRef, workflowLogCopyRouter: ActorRef, jobStoreActor: ActorRef, subWorkflowStoreActor: ActorRef, callCacheReadActor: ActorRef, dockerHashActor: ActorRef, - jobTokenDispenserActor: ActorRef) extends WorkflowActor(workflowId, startMode, workflowSources, conf, serviceRegistryActor, workflowLogCopyRouter, jobStoreActor, subWorkflowStoreActor, callCacheReadActor, dockerHashActor, jobTokenDispenserActor, BackendSingletonCollection(Map.empty), serverMode = true) { + jobTokenDispenserActor: ActorRef) extends WorkflowActor(workflowId, startMode, workflowSources, conf, ioActor, serviceRegistryActor, workflowLogCopyRouter, jobStoreActor, subWorkflowStoreActor, callCacheReadActor, dockerHashActor, jobTokenDispenserActor, BackendSingletonCollection(Map.empty), serverMode = true) { override def makeFinalizationActor(workflowDescriptor: EngineWorkflowDescriptor, jobExecutionMap: JobExecutionMap, worfklowOutputs: CallOutputs) = finalizationProbe.ref } diff --git a/engine/src/test/scala/cromwell/engine/workflow/lifecycle/execution/SubWorkflowExecutionActorSpec.scala b/engine/src/test/scala/cromwell/engine/workflow/lifecycle/execution/SubWorkflowExecutionActorSpec.scala index 6cae26763..6dfe38d73 100644 --- a/engine/src/test/scala/cromwell/engine/workflow/lifecycle/execution/SubWorkflowExecutionActorSpec.scala +++ b/engine/src/test/scala/cromwell/engine/workflow/lifecycle/execution/SubWorkflowExecutionActorSpec.scala @@ -33,6 +33,7 @@ class SubWorkflowExecutionActorSpec extends TestKitSuite with FlatSpecLike with val subWorkflowStoreProbe = TestProbe() val callCacheReadActorProbe = TestProbe() val dockerHashActorProbe = TestProbe() + val ioActorProbe = TestProbe() val jobTokenDispenserProbe = TestProbe() val preparationActor = TestProbe() val subWorkflowActor = TestProbe() @@ -64,6 +65,7 @@ class SubWorkflowExecutionActorSpec extends TestKitSuite with FlatSpecLike with subKey, WorkflowExecutionActorData.empty(parentWorkflowDescriptor), Map.empty, + ioActorProbe.ref, serviceRegistryProbe.ref, jobStoreProbe.ref, subWorkflowStoreProbe.ref, diff --git a/engine/src/test/scala/cromwell/engine/workflow/lifecycle/execution/WorkflowExecutionActorSpec.scala b/engine/src/test/scala/cromwell/engine/workflow/lifecycle/execution/WorkflowExecutionActorSpec.scala index f3106bc21..21e20eacd 100644 --- a/engine/src/test/scala/cromwell/engine/workflow/lifecycle/execution/WorkflowExecutionActorSpec.scala +++ b/engine/src/test/scala/cromwell/engine/workflow/lifecycle/execution/WorkflowExecutionActorSpec.scala @@ -3,8 +3,9 @@ package cromwell.engine.workflow.lifecycle.execution import akka.actor.{Actor, Props} import akka.testkit.{EventFilter, TestActorRef, TestDuration, TestProbe} import com.typesafe.config.ConfigFactory +import cromwell._ import cromwell.backend.AllBackendInitializationData -import cromwell.core.WorkflowId +import cromwell.core.{SimpleIoActor, WorkflowId} import cromwell.engine.backend.{BackendConfigurationEntry, BackendSingletonCollection, CromwellBackends} import cromwell.engine.workflow.WorkflowDescriptorBuilder import cromwell.engine.workflow.lifecycle.execution.WorkflowExecutionActor.ExecuteWorkflowCommand @@ -12,7 +13,6 @@ import cromwell.engine.workflow.tokens.JobExecutionTokenDispenserActor import cromwell.services.ServiceRegistryActor import cromwell.services.metadata.MetadataService import cromwell.util.SampleWdl -import cromwell._ import org.scalatest.BeforeAndAfter import scala.concurrent.duration._ @@ -53,6 +53,7 @@ class WorkflowExecutionActorSpec extends CromwellTestKitSpec with BeforeAndAfter val metadataWatcherProps = Props(MetadataWatchActor(metadataSuccessPromise, requiredMetadataMatchers: _*)) val serviceRegistryActor = system.actorOf(ServiceRegistryActor.props(ConfigFactory.load(), overrides = Map(MetadataService.MetadataServiceName -> metadataWatcherProps))) val jobStoreActor = system.actorOf(AlwaysHappyJobStoreActor.props) + val ioActor = system.actorOf(SimpleIoActor.props) val subWorkflowStoreActor = system.actorOf(AlwaysHappySubWorkflowStoreActor.props) val jobTokenDispenserActor = system.actorOf(JobExecutionTokenDispenserActor.props) val MockBackendConfigEntry = BackendConfigurationEntry( @@ -68,7 +69,7 @@ class WorkflowExecutionActorSpec extends CromwellTestKitSpec with BeforeAndAfter val dockerHashActor = TestProbe() val workflowExecutionActor = system.actorOf( - WorkflowExecutionActor.props(engineWorkflowDescriptor, serviceRegistryActor, jobStoreActor, subWorkflowStoreActor, + WorkflowExecutionActor.props(engineWorkflowDescriptor, ioActor, serviceRegistryActor, jobStoreActor, subWorkflowStoreActor, callCacheReadActor.ref, dockerHashActor.ref, jobTokenDispenserActor, MockBackendSingletonCollection, AllBackendInitializationData.empty, restarting = false), "WorkflowExecutionActor") @@ -91,6 +92,7 @@ class WorkflowExecutionActorSpec extends CromwellTestKitSpec with BeforeAndAfter val subWorkflowStoreActor = system.actorOf(AlwaysHappySubWorkflowStoreActor.props) val callCacheReadActor = system.actorOf(EmptyCallCacheReadActor.props) val dockerHashActor = system.actorOf(EmptyDockerHashActor.props) + val ioActor = system.actorOf(SimpleIoActor.props) val jobTokenDispenserActor = system.actorOf(JobExecutionTokenDispenserActor.props) val MockBackendConfigEntry = BackendConfigurationEntry( @@ -103,7 +105,7 @@ class WorkflowExecutionActorSpec extends CromwellTestKitSpec with BeforeAndAfter val workflowId = WorkflowId.randomId() val engineWorkflowDescriptor = createMaterializedEngineWorkflowDescriptor(workflowId, SampleWdl.SimpleScatterWdl.asWorkflowSources(runtime = runtimeSection)) val workflowExecutionActor = system.actorOf( - WorkflowExecutionActor.props(engineWorkflowDescriptor, serviceRegistry, jobStore, subWorkflowStoreActor, + WorkflowExecutionActor.props(engineWorkflowDescriptor, ioActor, serviceRegistry, jobStore, subWorkflowStoreActor, callCacheReadActor, dockerHashActor, jobTokenDispenserActor, MockBackendSingletonCollection, AllBackendInitializationData.empty, restarting = false), "WorkflowExecutionActor") diff --git a/engine/src/test/scala/cromwell/engine/workflow/lifecycle/execution/callcaching/EngineJobHashingActorSpec.scala b/engine/src/test/scala/cromwell/engine/workflow/lifecycle/execution/callcaching/EngineJobHashingActorSpec.scala index 9c718c5d6..17b012117 100644 --- a/engine/src/test/scala/cromwell/engine/workflow/lifecycle/execution/callcaching/EngineJobHashingActorSpec.scala +++ b/engine/src/test/scala/cromwell/engine/workflow/lifecycle/execution/callcaching/EngineJobHashingActorSpec.scala @@ -5,7 +5,7 @@ import akka.testkit.{ImplicitSender, TestProbe} import cats.data.NonEmptyList import cromwell.CromwellTestKitSpec import cromwell.backend._ -import cromwell.backend.callcaching.FileHashingActor.{FileHashResponse, SingleFileHashRequest} +import cromwell.backend.standard.callcaching.StandardFileHashingActor.{FileHashResponse, SingleFileHashRequest} import cromwell.core.callcaching._ import cromwell.engine.workflow.lifecycle.execution.callcaching.EngineJobHashingActor.{CacheHit, CacheMiss, CallCacheHashes} import org.scalatest.mockito.MockitoSugar @@ -180,15 +180,58 @@ object EngineJobHashingActorSpec extends BackendSpec { )(implicit system: ActorSystem) = { val callCacheReadActor = system.actorOf(Props(new PredictableCallCacheReadActor(cacheLookupResponses))) - system.actorOf(EngineJobHashingActor.props( - receiver = replyTo, + system.actorOf(EngineJobHashingActorTest.props( + replyTo = replyTo, + activity = activity, jobDescriptor = jobDescriptor, initializationData = initializationData, fileHashingActor = fileHashingActor.getOrElse(emptyActor), callCacheReadActor = callCacheReadActor, runtimeAttributeDefinitions = runtimeAttributeDefinitions, - backendName = backendName, - activity = activity)) + backendName = backendName + )) + } + + object EngineJobHashingActorTest { + def props(replyTo: ActorRef, + activity: CallCachingActivity, + jobDescriptor: BackendJobDescriptor, + initializationData: Option[BackendInitializationData], + fileHashingActor: ActorRef, + callCacheReadActor: ActorRef, + runtimeAttributeDefinitions: Set[RuntimeAttributeDefinition], + backendName: String + ) = Props(new EngineJobHashingActorTest( + replyTo, + activity, + jobDescriptor, + initializationData, + fileHashingActor, + callCacheReadActor, + runtimeAttributeDefinitions, + backendName + )) + } + + class EngineJobHashingActorTest(replyTo: ActorRef, + activity: CallCachingActivity, + jobDescriptor: BackendJobDescriptor, + initializationData: Option[BackendInitializationData], + fileHashingActor: ActorRef, + callCacheReadActor: ActorRef, + runtimeAttributeDefinitions: Set[RuntimeAttributeDefinition], + backendName: String + ) extends EngineJobHashingActor( + replyTo, + jobDescriptor, + initializationData, + Props.empty, + callCacheReadActor, + runtimeAttributeDefinitions, + backendName, + activity + ) { + override def makeFileHashingActor(): ActorRef = fileHashingActor } def emptyActor(implicit actorSystem: ActorSystem) = actorSystem.actorOf(Props.empty) diff --git a/engine/src/test/scala/cromwell/engine/workflow/lifecycle/execution/ejea/PerTestHelper.scala b/engine/src/test/scala/cromwell/engine/workflow/lifecycle/execution/ejea/PerTestHelper.scala index 1ceef04c4..5d67e3884 100644 --- a/engine/src/test/scala/cromwell/engine/workflow/lifecycle/execution/ejea/PerTestHelper.scala +++ b/engine/src/test/scala/cromwell/engine/workflow/lifecycle/execution/ejea/PerTestHelper.scala @@ -6,6 +6,7 @@ import akka.actor.{ActorRef, ActorSystem, Props} import akka.testkit.{TestFSMRef, TestProbe} import cromwell.backend.BackendJobExecutionActor.JobSucceededResponse import cromwell.backend._ +import cromwell.backend.standard.callcaching._ import cromwell.core.JobExecutionToken.JobExecutionTokenType import cromwell.core.callcaching.{CallCachingActivity, CallCachingEligible, CallCachingMode, CallCachingOff} import cromwell.core.{CallOutputs, JobExecutionToken, WorkflowId} @@ -23,6 +24,8 @@ import wdl4s.expression.{NoFunctions, WdlStandardLibraryFunctions} import wdl4s.parser.WdlParser.Ast import wdl4s.types.{WdlIntegerType, WdlStringType} +import scala.util.Success + private[ejea] class PerTestHelper(implicit val system: ActorSystem) extends Mockito with TaskMock with WdlExpressionMock with DeclarationMock { @@ -66,6 +69,7 @@ private[ejea] class PerTestHelper(implicit val system: ActorSystem) extends Mock val replyToProbe = TestProbe() val parentProbe = TestProbe() val serviceRegistryProbe = TestProbe() + val ioActorProbe = TestProbe() val jobStoreProbe = TestProbe() val callCacheReadActorProbe = TestProbe() val dockerHashActorProbe = TestProbe() @@ -78,21 +82,37 @@ private[ejea] class PerTestHelper(implicit val system: ActorSystem) extends Mock override def jobExecutionActorProps(jobDescriptor: BackendJobDescriptor, initializationData: Option[BackendInitializationData], serviceRegistryActor: ActorRef, + ioActor: ActorRef, backendSingletonActor: Option[ActorRef]): Props = bjeaProps - override def cacheHitCopyingActorProps: Option[(BackendJobDescriptor, Option[BackendInitializationData], ActorRef) => Props] = Option((_, _, _) => callCacheHitCopyingProbe.props) + override def cacheHitCopyingActorProps: Option[(BackendJobDescriptor, Option[BackendInitializationData], ActorRef, ActorRef) => Props] = Option((_, _, _, _) => callCacheHitCopyingProbe.props) override def expressionLanguageFunctions(workflowDescriptor: BackendWorkflowDescriptor, jobKey: BackendJobDescriptorKey, initializationData: Option[BackendInitializationData]): WdlStandardLibraryFunctions = { NoFunctions } + override def fileHashingActorProps: + Option[(BackendJobDescriptor, Option[BackendInitializationData], ActorRef, ActorRef) => Props] = { + Option(fileHashingActorInner(classOf[DefaultStandardFileHashingActor])) + } + + def fileHashingActorInner(standardFileHashingActor: Class[_ <: StandardFileHashingActor]) + (jobDescriptor: BackendJobDescriptor, + initializationDataOption: Option[BackendInitializationData], + serviceRegistryActor: ActorRef, + ioActor: ActorRef): Props = { + Props.empty + } + // These two factory methods should never be called from EJEA or any of its descendants: override def workflowFinalizationActorProps(workflowDescriptor: BackendWorkflowDescriptor, + ioActor: ActorRef, calls: Set[TaskCall], jobExecutionMap: JobExecutionMap, workflowOutputs: CallOutputs, initializationData: Option[BackendInitializationData]): Option[Props] = throw new UnsupportedOperationException("Unexpected finalization actor creation!") override def workflowInitializationActorProps(workflowDescriptor: BackendWorkflowDescriptor, + ioActor: ActorRef, calls: Set[TaskCall], serviceRegistryActor: ActorRef): Option[Props] = throw new UnsupportedOperationException("Unexpected finalization actor creation!") } @@ -114,6 +134,7 @@ private[ejea] class PerTestHelper(implicit val system: ActorSystem) extends Mock initializationData = None, restarting = restarting, serviceRegistryActor = serviceRegistryProbe.ref, + ioActor = ioActorProbe.ref, jobStoreActor = jobStoreProbe.ref, callCacheReadActor = callCacheReadActorProbe.ref, dockerHashActor = dockerHashActorProbe.ref, @@ -136,15 +157,16 @@ private[ejea] class MockEjea(helper: PerTestHelper, initializationData: Option[BackendInitializationData], restarting: Boolean, serviceRegistryActor: ActorRef, + ioActor: ActorRef, jobStoreActor: ActorRef, callCacheReadActor: ActorRef, dockerHashActor: ActorRef, jobTokenDispenserActor: ActorRef, backendName: String, - callCachingMode: CallCachingMode) extends EngineJobExecutionActor(replyTo, jobDescriptorKey, executionData, factory, initializationData, restarting, serviceRegistryActor, jobStoreActor, callCacheReadActor, dockerHashActor, jobTokenDispenserActor, None, backendName, callCachingMode) { + callCachingMode: CallCachingMode) extends EngineJobExecutionActor(replyTo, jobDescriptorKey, executionData, factory, initializationData, restarting, serviceRegistryActor, ioActor, jobStoreActor, callCacheReadActor, dockerHashActor, jobTokenDispenserActor, None, backendName, callCachingMode) { override def makeFetchCachedResultsActor(cacheId: CallCachingEntryId, taskOutputs: Seq[TaskOutput]) = helper.fetchCachedResultsActorCreations = helper.fetchCachedResultsActorCreations.foundOne((cacheId, taskOutputs)) - override def initializeJobHashing(jobDescriptor: BackendJobDescriptor, activity: CallCachingActivity) = helper.jobHashingInitializations = helper.jobHashingInitializations.foundOne((jobDescriptor, activity)) + override def initializeJobHashing(jobDescriptor: BackendJobDescriptor, activity: CallCachingActivity) = Success(helper.jobHashingInitializations = helper.jobHashingInitializations.foundOne((jobDescriptor, activity))) override def createSaveCacheResultsActor(hashes: CallCacheHashes, success: JobSucceededResponse) = helper.callCacheWriteActorCreations = helper.callCacheWriteActorCreations.foundOne((hashes, success)) override def invalidateCacheHit(cacheId: CallCachingEntryId): Unit = { helper.invalidateCacheActorCreations = helper.invalidateCacheActorCreations.foundOne(cacheId) } override def createJobPreparationActor(jobPrepProps: Props, name: String) = jobPreparationProbe.ref diff --git a/engine/src/test/scala/cromwell/engine/workflow/lifecycle/execution/preparation/JobPreparationTestHelper.scala b/engine/src/test/scala/cromwell/engine/workflow/lifecycle/execution/preparation/JobPreparationTestHelper.scala index 6c985a2c0..cee384576 100644 --- a/engine/src/test/scala/cromwell/engine/workflow/lifecycle/execution/preparation/JobPreparationTestHelper.scala +++ b/engine/src/test/scala/cromwell/engine/workflow/lifecycle/execution/preparation/JobPreparationTestHelper.scala @@ -21,6 +21,7 @@ class JobPreparationTestHelper(implicit val system: ActorSystem) extends Mockito executionData.workflowDescriptor returns workflowDescriptor val jobKey = mock[BackendJobDescriptorKey] val serviceRegistryProbe = TestProbe() + val ioActor = TestProbe() def buildJobPreparationMock( backpressureTimeout: FiniteDuration, @@ -35,6 +36,7 @@ class JobPreparationTestHelper(implicit val system: ActorSystem) extends Mockito any[BackendJobDescriptor], any[Option[BackendInitializationData]], any[ActorRef], + any[ActorRef], any[Option[ActorRef]] )).thenReturn(Props.empty) @@ -44,6 +46,7 @@ class JobPreparationTestHelper(implicit val system: ActorSystem) extends Mockito factory, dockerHashingActor, None, + ioActor.ref, serviceRegistryProbe.ref, None ) { diff --git a/filesystems/gcs/src/main/scala/cromwell/filesystems/gcs/GcsPathBuilder.scala b/filesystems/gcs/src/main/scala/cromwell/filesystems/gcs/GcsPathBuilder.scala index ae6c1f77b..150af9eb5 100644 --- a/filesystems/gcs/src/main/scala/cromwell/filesystems/gcs/GcsPathBuilder.scala +++ b/filesystems/gcs/src/main/scala/cromwell/filesystems/gcs/GcsPathBuilder.scala @@ -3,21 +3,20 @@ package cromwell.filesystems.gcs import java.net.URI import java.nio.file.spi.FileSystemProvider -import akka.actor.ActorSystem import com.google.api.client.googleapis.javanet.GoogleNetHttpTransport import com.google.api.client.json.jackson2.JacksonFactory import com.google.cloud.RetryParams -import com.google.cloud.storage.StorageOptions import com.google.cloud.storage.contrib.nio.{CloudStorageConfiguration, CloudStorageFileSystem, CloudStoragePath} +import com.google.cloud.storage.{BlobId, StorageOptions} import com.google.common.base.Preconditions._ import com.google.common.net.UrlEscapers import cromwell.core.WorkflowOptions -import cromwell.core.path.proxy.{PathProxy, RetryableFileSystemProviderProxy} -import cromwell.core.path.{CustomRetryParams, NioPath, Path, PathBuilder} +import cromwell.core.path.{NioPath, Path, PathBuilder} import cromwell.filesystems.gcs.GcsPathBuilder._ import cromwell.filesystems.gcs.auth.GoogleAuthMode -import scala.util.{Failure, Try} +import scala.concurrent.duration._ +import scala.util.Try object GcsPathBuilder { @@ -47,21 +46,38 @@ object GcsPathBuilder { } class GcsPathBuilder(authMode: GoogleAuthMode, + applicationName: String, retryParams: RetryParams, cloudStorageConfiguration: CloudStorageConfiguration, options: WorkflowOptions) extends PathBuilder { authMode.validate(options) - protected val storageOptionsBuilder = StorageOptions.builder() - .authCredentials(authMode.authCredentials(options)) - .retryParams(retryParams) + protected val storageOptionsBuilder = StorageOptions.newBuilder() + .setReadTimeout(3.minutes.toMillis.toInt) + .setCredentials(authMode.credential(options)) + .setRetryParams(retryParams) // Grab the google project from Workflow Options if specified and set // that to be the project used by the StorageOptions Builder - options.get("google_project") map storageOptionsBuilder.projectId + options.get("google_project") map storageOptionsBuilder.setProjectId protected val storageOptions = storageOptionsBuilder.build() + + // Create a com.google.api.services.storage.Storage + // This is the underlying api used by com.google.cloud.storage + // By bypassing com.google.cloud.storage, we can create low level requests that can be batched + val apiStorage: com.google.api.services.storage.Storage = { + new com.google.api.services.storage.Storage + .Builder(HttpTransport, JsonFactory, GoogleConfiguration.withCustomTimeouts(storageOptions.getHttpRequestInitializer)) + .setApplicationName(applicationName) + .build() + } + + // Create a com.google.api.services.storage.Storage + // This is the underlying api used by com.google.cloud.storage + // By bypassing com.google.cloud.storage, we can create low level requests that can be batched + val cloudStorage: com.google.cloud.storage.Storage = storageOptions.getService // The CloudStorageFileSystemProvider constructor is not public. Currently the only way to obtain one is through a CloudStorageFileSystem // Moreover at this point we can use the same provider for all operations as we have usable credentials @@ -69,52 +85,27 @@ class GcsPathBuilder(authMode: GoogleAuthMode, protected val _provider = CloudStorageFileSystem.forBucket("dummy", cloudStorageConfiguration, storageOptions).provider() protected def provider: FileSystemProvider = _provider - /* - * The StorageService already contains a StorageRpc object that contains a com.google.api.services.storage.Storage object - * However it is not accessible from StorageService. - * com.google.cloud.storage.Storage has some batching capabilities but not for copying. - * In order to support batch copy, we need a com.google.api.services.storage.Storage. - */ - def getHash(builtPath: Path): Try[String] = { - builtPath match { - case GcsPath(path) => path match { - case gcsPath: CloudStoragePath => Try(storageOptions.service().get(gcsPath.bucket(), gcsPath.toRealPath().toString).crc32c()) - case proxy: PathProxy => - val gcsPath = proxy.unbox(classOf[CloudStoragePath]).get - Try(storageOptions.service().get(gcsPath.bucket(), gcsPath.toRealPath().toString).crc32c()) - case other => Failure(new IllegalArgumentException(s"$other is not a CloudStoragePath")) - } - case other => Failure(new IllegalArgumentException(s"$other is not a GcsPath")) - } - } - def getProjectId = storageOptions.projectId() + def getProjectId = storageOptions.getProjectId def build(string: String): Try[GcsPath] = { Try { val uri = getUri(string) GcsPathBuilder.checkValid(uri) - GcsPath(provider.getPath(uri)) + GcsPath(provider.getPath(uri), apiStorage, cloudStorage) } } override def name: String = "Gcs" } -class RetryableGcsPathBuilder(authMode: GoogleAuthMode, - googleRetryParams: RetryParams, - customRetryParams: CustomRetryParams, - cloudStorageConfiguration: CloudStorageConfiguration, - options: WorkflowOptions)(implicit actorSystem: ActorSystem) - extends GcsPathBuilder(authMode, googleRetryParams, cloudStorageConfiguration, options) { - - override protected def provider = new RetryableFileSystemProviderProxy(_provider, customRetryParams) - - override def getHash(path: Path) = provider.withRetry(() => super.getHash(path)) -} - -case class GcsPath private[gcs](nioPath: NioPath) extends Path { - override protected def newPath(nioPath: NioPath): GcsPath = GcsPath(nioPath) +case class GcsPath private[gcs](nioPath: NioPath, + apiStorage: com.google.api.services.storage.Storage, + cloudStorage: com.google.cloud.storage.Storage + ) extends Path { + lazy val blob = BlobId.of(cloudStoragePath.bucket, cloudStoragePath.toRealPath().toString) + + override protected def newPath(nioPath: NioPath): GcsPath = GcsPath(nioPath, apiStorage, cloudStorage) override def pathAsString: String = java.net.URLDecoder.decode(nioPath.toUri.toString, "UTF-8") @@ -123,9 +114,8 @@ case class GcsPath private[gcs](nioPath: NioPath) extends Path { gcsPath.bucket + gcsPath.toAbsolutePath.toString } - private def cloudStoragePath: CloudStoragePath = nioPath match { + def cloudStoragePath: CloudStoragePath = nioPath match { case gcsPath: CloudStoragePath => gcsPath - case pathProxy: PathProxy => pathProxy.unbox(classOf[CloudStoragePath]).get case _ => throw new RuntimeException(s"Internal path was not a cloud storage path: $nioPath") } } diff --git a/filesystems/gcs/src/main/scala/cromwell/filesystems/gcs/GcsPathBuilderFactory.scala b/filesystems/gcs/src/main/scala/cromwell/filesystems/gcs/GcsPathBuilderFactory.scala index 83aad3ce8..92277f12f 100644 --- a/filesystems/gcs/src/main/scala/cromwell/filesystems/gcs/GcsPathBuilderFactory.scala +++ b/filesystems/gcs/src/main/scala/cromwell/filesystems/gcs/GcsPathBuilderFactory.scala @@ -6,7 +6,7 @@ import com.google.cloud.RetryParams import com.google.cloud.storage.contrib.nio.CloudStorageConfiguration import com.typesafe.config.ConfigFactory import cromwell.core.WorkflowOptions -import cromwell.core.path.{CustomRetryParams, PathBuilderFactory} +import cromwell.core.path.PathBuilderFactory import cromwell.filesystems.gcs.auth.GoogleAuthMode import net.ceedubs.ficus.Ficus._ @@ -16,7 +16,7 @@ object GcsPathBuilderFactory { ConfigFactory.load().as[Option[Int]]("google.upload-buffer-bytes").getOrElse(MediaHttpUploader.MINIMUM_CHUNK_SIZE) } - val DefaultRetryParams = RetryParams.defaultInstance() + val DefaultRetryParams = RetryParams.getDefaultInstance val DefaultCloudStorageConfiguration = { CloudStorageConfiguration.builder() .blockSize(UploadBufferBytes) @@ -28,21 +28,11 @@ object GcsPathBuilderFactory { } case class GcsPathBuilderFactory(authMode: GoogleAuthMode, + applicationName: String, retryParams: RetryParams = GcsPathBuilderFactory.DefaultRetryParams, cloudStorageConfiguration: CloudStorageConfiguration = GcsPathBuilderFactory.DefaultCloudStorageConfiguration) extends PathBuilderFactory { - def withOptions(options: WorkflowOptions)(implicit actorSystem: ActorSystem) = new GcsPathBuilder(authMode, retryParams, cloudStorageConfiguration, options) -} - -case class RetryableGcsPathBuilderFactory(authMode: GoogleAuthMode, - googleRetryParams: RetryParams = GcsPathBuilderFactory.DefaultRetryParams, - customRetryParams: CustomRetryParams = CustomRetryParams.Default, - cloudStorageConfiguration: CloudStorageConfiguration = GcsPathBuilderFactory.DefaultCloudStorageConfiguration) - - - extends PathBuilderFactory { - - def withOptions(options: WorkflowOptions)(implicit actorSystem: ActorSystem) = new RetryableGcsPathBuilder(authMode, googleRetryParams, customRetryParams, cloudStorageConfiguration, options) + def withOptions(options: WorkflowOptions)(implicit actorSystem: ActorSystem) = new GcsPathBuilder(authMode, applicationName, retryParams, cloudStorageConfiguration, options) } diff --git a/filesystems/gcs/src/main/scala/cromwell/filesystems/gcs/GoogleConfiguration.scala b/filesystems/gcs/src/main/scala/cromwell/filesystems/gcs/GoogleConfiguration.scala index 142485c95..4d6bdc4ad 100644 --- a/filesystems/gcs/src/main/scala/cromwell/filesystems/gcs/GoogleConfiguration.scala +++ b/filesystems/gcs/src/main/scala/cromwell/filesystems/gcs/GoogleConfiguration.scala @@ -1,10 +1,14 @@ package cromwell.filesystems.gcs +import java.io.IOException + import cats.data.Validated._ import cats.instances.list._ import cats.syntax.cartesian._ import cats.syntax.traverse._ import cats.syntax.validated._ +import com.google.api.client.googleapis.auth.oauth2.GoogleCredential +import com.google.api.client.http.{HttpRequest, HttpRequestInitializer} import com.google.api.services.storage.StorageScopes import com.typesafe.config.Config import cromwell.filesystems.gcs.auth._ @@ -28,6 +32,28 @@ final case class GoogleConfiguration private (applicationName: String, authsByNa object GoogleConfiguration { import scala.collection.JavaConverters._ + import scala.concurrent.duration._ + import scala.language.postfixOps + + lazy val DefaultConnectionTimeout = 3 minutes + lazy val DefaultReadTimeout = 3 minutes + + lazy val DefaultRequestInitializer = GoogleConfiguration.withCustomTimeouts(new GoogleCredential.Builder().build()) + + def withCustomTimeouts(httpRequestInitializer: HttpRequestInitializer, + connectionTimeout: FiniteDuration = DefaultConnectionTimeout, + readTimeout: FiniteDuration = DefaultReadTimeout) = { + new HttpRequestInitializer() { + @throws[IOException] + override def initialize(httpRequest: HttpRequest) = { + httpRequestInitializer.initialize(httpRequest) + httpRequest.setConnectTimeout(connectionTimeout.toMillis.toInt) + httpRequest.setReadTimeout(readTimeout.toMillis.toInt) + () + } + } + } + private val log = LoggerFactory.getLogger("GoogleConfiguration") case class GoogleConfigurationException(errorMessages: List[String]) extends MessageAggregation { diff --git a/filesystems/gcs/src/main/scala/cromwell/filesystems/gcs/auth/GoogleAuthMode.scala b/filesystems/gcs/src/main/scala/cromwell/filesystems/gcs/auth/GoogleAuthMode.scala index 67f4c2be7..32d725c7e 100644 --- a/filesystems/gcs/src/main/scala/cromwell/filesystems/gcs/auth/GoogleAuthMode.scala +++ b/filesystems/gcs/src/main/scala/cromwell/filesystems/gcs/auth/GoogleAuthMode.scala @@ -1,20 +1,15 @@ package cromwell.filesystems.gcs.auth -import java.io.{FileNotFoundException, InputStreamReader} -import java.nio.file.Paths +import java.io.FileNotFoundException import better.files._ -import com.google.api.client.auth.oauth2.Credential -import com.google.api.client.extensions.java6.auth.oauth2.AuthorizationCodeInstalledApp -import com.google.api.client.googleapis.auth.oauth2.{GoogleAuthorizationCodeFlow, GoogleClientSecrets, GoogleCredential} -import com.google.api.client.googleapis.extensions.java6.auth.oauth2.GooglePromptReceiver import com.google.api.client.googleapis.javanet.GoogleNetHttpTransport -import com.google.api.client.googleapis.testing.auth.oauth2.MockGoogleCredential import com.google.api.client.json.jackson2.JacksonFactory -import com.google.api.client.util.store.FileDataStoreFactory import com.google.api.services.storage.StorageScopes -import com.google.auth.oauth2.{ClientId, ServiceAccountCredentials} -import com.google.cloud.AuthCredentials +import com.google.auth.Credentials +import com.google.auth.http.HttpTransportFactory +import com.google.auth.oauth2.{GoogleCredentials, ServiceAccountCredentials, UserCredentials} +import com.google.cloud.NoCredentials import cromwell.core.WorkflowOptions import cromwell.filesystems.gcs.auth.GoogleAuthMode._ import org.slf4j.LoggerFactory @@ -26,6 +21,11 @@ object GoogleAuthMode { lazy val jsonFactory = JacksonFactory.getDefaultInstance lazy val httpTransport = GoogleNetHttpTransport.newTrustedTransport + lazy val HttpTransportFactory = new HttpTransportFactory { + override def create() = { + httpTransport + } + } val RefreshTokenOptionKey = "refresh_token" val GcsScopes = List( @@ -37,11 +37,9 @@ object GoogleAuthMode { if (!file.isReadable) throw new FileNotFoundException(s"File $file does not exist or is not readable") } - case object NoAuthMode extends GoogleAuthMode { + case object MockAuthMode extends GoogleAuthMode { override def name = "no_auth" - - override def authCredentials(options: WorkflowOptions): AuthCredentials = AuthCredentials.noAuth() - override def credential(options: WorkflowOptions): Credential = new MockGoogleCredential.Builder().build() + override def credential(options: WorkflowOptions): Credentials = NoCredentials.getInstance() } } @@ -55,20 +53,12 @@ sealed trait GoogleAuthMode { def validate(options: WorkflowOptions): Unit = {()} def name: String - // Create an AuthCredentials object from the google-cloud library (https://github.com/GoogleCloudPlatform/google-cloud-java using https://github.com/google/google-auth-library-java under the hood) - def authCredentials(options: WorkflowOptions): AuthCredentials // Create a Credential object from the google.api.client.auth library (https://github.com/google/google-api-java-client) - def credential(options: WorkflowOptions): Credential - - def credentialBundle(options: WorkflowOptions) = { - GoogleCredentialBundle(credential(options), authCredentials(options)) - } + def credential(options: WorkflowOptions): Credentials def requiresAuthFile: Boolean = false - protected def validateAuthCredentials(authCredentials: AuthCredentials, scopes: java.util.Collection[String]): AuthCredentials = validate(authCredentials, authCredentials.credentials().createScoped(scopes).refresh) - - protected def validateCredential(credential: Credential) = validate(credential, credential.refreshToken) + protected def validateCredential(credential: Credentials) = validate(credential, () => credential.refresh()) private def validate[T](credential: T, validation: () => Any): T = { Try(validation()) match { @@ -85,65 +75,39 @@ final case class ServiceAccountMode(override val name: String, private val pemFile = File(pemPath) checkReadable(pemFile) - private lazy val _authCredentials: AuthCredentials = { - val saCredentials = ServiceAccountCredentials.fromPkcs8(accountId, accountId, pemFile.contentAsString, null, scopes) - validateAuthCredentials(AuthCredentials.createFor(saCredentials.getClientId, saCredentials.getPrivateKey), scopes) - } - - private lazy val _credential: Credential = { + private lazy val _credential: Credentials = { validateCredential( - new GoogleCredential.Builder().setTransport(httpTransport) - .setJsonFactory(jsonFactory) - .setServiceAccountId(accountId) - .setServiceAccountScopes(scopes) - .setServiceAccountPrivateKeyFromPemFile(pemFile.toJava) - .build() + ServiceAccountCredentials.fromPkcs8(accountId, accountId, pemFile.contentAsString, null, scopes) ) } - override def authCredentials(options: WorkflowOptions) = _authCredentials - - override def credential(options: WorkflowOptions): Credential = _credential + override def credential(options: WorkflowOptions): Credentials = _credential } final case class UserMode(override val name: String, user: String, - val secretsPath: String, + secretsPath: String, datastoreDir: String, scopes: java.util.List[String]) extends GoogleAuthMode { - private lazy val secrets = { + private lazy val secretsStream = { val secretsFile = File(secretsPath) checkReadable(secretsFile) - - val secretStream = new InputStreamReader(secretsFile.newInputStream) - - GoogleClientSecrets.load(jsonFactory, secretStream) - } - - private lazy val _credential: Credential = { - val dataStore = Paths.get(datastoreDir).toAbsolutePath - val dataStoreFactory = new FileDataStoreFactory(dataStore.toFile) - val flow = new GoogleAuthorizationCodeFlow.Builder(httpTransport, jsonFactory, secrets, scopes).setDataStoreFactory(dataStoreFactory).build - validateCredential(new AuthorizationCodeInstalledApp(flow, new GooglePromptReceiver).authorize(user)) + secretsFile.newInputStream } - private lazy val _authCredentials: AuthCredentials = { - new RefreshableOAuth2Credentials(_credential.getRefreshToken, new ClientId(secrets.getDetails.getClientId, secrets.getDetails.getClientSecret)) + private lazy val _credential: Credentials = { + validateCredential(UserCredentials.fromStream(secretsStream)) } override def credential(options: WorkflowOptions) = _credential - - override def authCredentials(options: WorkflowOptions) = _authCredentials } private object ApplicationDefault { - private [auth] lazy val _AuthCredentials = AuthCredentials.createApplicationDefaults() - private [auth] lazy val _Credential: Credential = GoogleCredential.getApplicationDefault() + private [auth] lazy val _Credential: Credentials = GoogleCredentials.getApplicationDefault } final case class ApplicationDefaultMode(name: String) extends GoogleAuthMode { - override def authCredentials(options: WorkflowOptions) = ApplicationDefault._AuthCredentials override def credential(options: WorkflowOptions) = ApplicationDefault._Credential } @@ -162,23 +126,13 @@ final case class RefreshTokenMode(name: String, override def validate(options: WorkflowOptions) = { extractRefreshToken(options) - () } - override def authCredentials(options: WorkflowOptions): AuthCredentials = { - val refreshToken = extractRefreshToken(options) - validateAuthCredentials(new RefreshableOAuth2Credentials(refreshToken, new ClientId(clientId, clientSecret)), scopes) - } - - override def credential(options: WorkflowOptions): Credential = { + override def credential(options: WorkflowOptions): Credentials = { val refreshToken = extractRefreshToken(options) validateCredential( - new GoogleCredential.Builder().setTransport(httpTransport) - .setJsonFactory(jsonFactory) - .setClientSecrets(clientId, clientSecret) - .build() - .setRefreshToken(refreshToken) + new UserCredentials(clientId, clientSecret, refreshToken, null, GoogleAuthMode.HttpTransportFactory, null) ) } } diff --git a/filesystems/gcs/src/main/scala/cromwell/filesystems/gcs/auth/GoogleCredentialBundle.scala b/filesystems/gcs/src/main/scala/cromwell/filesystems/gcs/auth/GoogleCredentialBundle.scala deleted file mode 100644 index 83487e63d..000000000 --- a/filesystems/gcs/src/main/scala/cromwell/filesystems/gcs/auth/GoogleCredentialBundle.scala +++ /dev/null @@ -1,6 +0,0 @@ -package cromwell.filesystems.gcs.auth - -import com.google.api.client.auth.oauth2.Credential -import com.google.cloud.AuthCredentials - -case class GoogleCredentialBundle(credential: Credential, authCredential: AuthCredentials) diff --git a/filesystems/gcs/src/main/scala/cromwell/filesystems/gcs/auth/RefreshableOAuth2Credentials.scala b/filesystems/gcs/src/main/scala/cromwell/filesystems/gcs/auth/RefreshableOAuth2Credentials.scala deleted file mode 100644 index ae1e32ef5..000000000 --- a/filesystems/gcs/src/main/scala/cromwell/filesystems/gcs/auth/RefreshableOAuth2Credentials.scala +++ /dev/null @@ -1,31 +0,0 @@ -package cromwell.filesystems.gcs.auth - -import java.io.Serializable -import java.util.Objects - -import com.google.auth.oauth2.{ClientId, GoogleCredentials, UserCredentials} -import com.google.cloud.{AuthCredentials, RestorableState} - -class RefreshableOAuth2Credentials(refreshToken: String, clientId: ClientId) extends AuthCredentials { - private val _credentials: GoogleCredentials = new UserCredentials(clientId.getClientId, clientId.getClientSecret, refreshToken) - - private class RefreshableOAuth2CredentialsState(val refreshToken: String, val clientId: ClientId) extends RestorableState[AuthCredentials] with Serializable { - - override def restore: AuthCredentials = new RefreshableOAuth2Credentials(refreshToken, clientId) - - override def hashCode: Int = Objects.hash(refreshToken, clientId.getClientId, clientId.getClientSecret) - - override def equals(obj: Any): Boolean = { - obj.isInstanceOf[RefreshableOAuth2CredentialsState] && { - val other = obj.asInstanceOf[RefreshableOAuth2CredentialsState] - Objects.equals(refreshToken, other.refreshToken) && - Objects.equals(clientId.getClientId, other.clientId.getClientId) && - Objects.equals(clientId.getClientSecret, other.clientId.getClientSecret) - } - } - } - - override def credentials: GoogleCredentials = _credentials - - def capture: RestorableState[AuthCredentials] = new RefreshableOAuth2CredentialsState(refreshToken, clientId) -} diff --git a/filesystems/gcs/src/main/scala/cromwell/filesystems/gcs/batch/GcsBatchCommandBuilder.scala b/filesystems/gcs/src/main/scala/cromwell/filesystems/gcs/batch/GcsBatchCommandBuilder.scala new file mode 100644 index 000000000..a7d84d614 --- /dev/null +++ b/filesystems/gcs/src/main/scala/cromwell/filesystems/gcs/batch/GcsBatchCommandBuilder.scala @@ -0,0 +1,27 @@ +package cromwell.filesystems.gcs.batch + +import cromwell.core.io._ +import cromwell.core.path.Path +import cromwell.filesystems.gcs.GcsPath + +trait GcsBatchCommandBuilder extends DefaultIoCommandBuilder { + override def sizeCommand(path: Path) = path match { + case gcsPath: GcsPath => GcsBatchSizeCommand(gcsPath) + case _ => super.sizeCommand(path) + } + + override def deleteCommand(path: Path, swallowIoExceptions: Boolean = false) = path match { + case gcsPath: GcsPath => GcsBatchDeleteCommand(gcsPath, swallowIoExceptions) + case _ => super.deleteCommand(path, swallowIoExceptions) + } + + override def copyCommand(src: Path, dest: Path, overwrite: Boolean = true) = (src, dest) match { + case (gcsSrc: GcsPath, gcsDest: GcsPath) => GcsBatchCopyCommand(gcsSrc, gcsDest, overwrite) + case _ => super.copyCommand(src, dest, overwrite) + } + + override def hashCommand(path: Path) = path match { + case gcsPath: GcsPath => GcsBatchCrc32Command(gcsPath) + case _ => super.hashCommand(path) + } +} diff --git a/filesystems/gcs/src/main/scala/cromwell/filesystems/gcs/batch/GcsBatchIoCommand.scala b/filesystems/gcs/src/main/scala/cromwell/filesystems/gcs/batch/GcsBatchIoCommand.scala new file mode 100644 index 000000000..7666c9831 --- /dev/null +++ b/filesystems/gcs/src/main/scala/cromwell/filesystems/gcs/batch/GcsBatchIoCommand.scala @@ -0,0 +1,92 @@ +package cromwell.filesystems.gcs.batch + +import com.google.api.client.http.HttpHeaders +import com.google.api.services.storage.StorageRequest +import com.google.api.services.storage.model.{RewriteResponse, StorageObject} +import cromwell.core.io._ +import cromwell.filesystems.gcs._ + +/** + * Io commands with GCS paths and some logic enabling batching of request. + * @tparam T Return type of the IoCommand + * @tparam U Return type of the Google response + */ +sealed trait GcsBatchIoCommand[T, U] extends IoCommand[T] { + /** + * StorageRequest operation to be executed by this command + */ + def operation: StorageRequest[U] + + /** + * Maps the google response of type U to the Cromwell Io response of type T + */ + protected def mapGoogleResponse(response: U): T + + /** + * Method called in the success callback of a batched request to decide what to do next. + * Returns an Either[T, GcsBatchIoCommand[T, U]] + * Left(value) means the command is complete, and the result can be sent back to the sender. + * Right(newCommand) means the command is not complete and needs another request to be executed. + * Most commands will reply with Left(value). + */ + def onSuccess(response: U, httpHeaders: HttpHeaders): Either[T, GcsBatchIoCommand[T, U]] = { + Left(mapGoogleResponse(response)) + } +} + +case class GcsBatchCopyCommand( + override val source: GcsPath, + override val destination: GcsPath, + override val overwrite: Boolean, + rewriteToken: Option[String] = None + ) extends IoCopyCommand(source, destination, overwrite) with GcsBatchIoCommand[Unit, RewriteResponse] { + val sourceBlob = source.blob + val destinationBlob = destination.blob + + override def operation: StorageRequest[RewriteResponse] = { + val rewriteOperation = source.apiStorage.objects().rewrite(sourceBlob.getBucket, sourceBlob.getName, destinationBlob.getBucket, destinationBlob.getName, null) + // Set the rewrite token if present + rewriteToken foreach rewriteOperation.setRewriteToken + rewriteOperation + } + + /** + * Clone this command with the give rewrite token + */ + def withRewriteToken(rewriteToken: String) = copy(rewriteToken = Option(rewriteToken)) + + override def onSuccess(response: RewriteResponse, httpHeaders: HttpHeaders) = { + if (response.getDone) super.onSuccess(response, httpHeaders) + else { + Right(withRewriteToken(response.getRewriteToken)) + } + } + + override def mapGoogleResponse(response: RewriteResponse): Unit = () +} + +case class GcsBatchDeleteCommand( + override val file: GcsPath, + override val swallowIOExceptions: Boolean + ) extends IoDeleteCommand(file, swallowIOExceptions) with GcsBatchIoCommand[Unit, Void] { + private val blob = file.blob + def operation = file.apiStorage.objects().delete(blob.getBucket, blob.getName) + override protected def mapGoogleResponse(response: Void): Unit = () +} + +/** + * Base trait for commands that use the objects.get() operation. (e.g: size, crc32, ...) + */ +sealed trait GcsBatchGetCommand[T] extends GcsBatchIoCommand[T, StorageObject] { + def file: GcsPath + private val blob = file.blob + override def operation: StorageRequest[StorageObject] = file.apiStorage.objects().get(blob.getBucket, blob.getName) +} + +case class GcsBatchSizeCommand(override val file: GcsPath) extends IoSizeCommand(file) with GcsBatchGetCommand[Long] { + override def mapGoogleResponse(response: StorageObject): Long = response.getSize.longValue() +} + +case class GcsBatchCrc32Command(override val file: GcsPath) extends IoHashCommand(file) with GcsBatchGetCommand[String] { + override def mapGoogleResponse(response: StorageObject): String = response.getCrc32c +} diff --git a/filesystems/gcs/src/test/scala/cromwell/filesystems/gcs/GcsPathBuilderSpec.scala b/filesystems/gcs/src/test/scala/cromwell/filesystems/gcs/GcsPathBuilderSpec.scala index 697313b41..52fda8196 100644 --- a/filesystems/gcs/src/test/scala/cromwell/filesystems/gcs/GcsPathBuilderSpec.scala +++ b/filesystems/gcs/src/test/scala/cromwell/filesystems/gcs/GcsPathBuilderSpec.scala @@ -3,7 +3,6 @@ package cromwell.filesystems.gcs import com.google.cloud.RetryParams import com.google.cloud.storage.contrib.nio.CloudStorageConfiguration import cromwell.core.path._ -import cromwell.core.path.proxy.RetryableFileSystemProviderProxy import cromwell.core.{TestKitSuite, WorkflowOptions} import cromwell.filesystems.gcs.auth.{GoogleAuthMode, GoogleAuthModeSpec} import org.scalatest.prop.Tables.Table @@ -13,20 +12,15 @@ class GcsPathBuilderSpec extends TestKitSuite with FlatSpecLike with Matchers wi behavior of "GcsPathBuilder" - it should "create a path with a retryable provider" in { - val path = retryablePathBuilder.build("gs://bucket/object") - path.isSuccess shouldBe true - path.get.nioPath.getFileSystem.provider() shouldBe a[RetryableFileSystemProviderProxy[_]] - } - it should "use google project credentials when provided in the workflow options" in { GoogleAuthModeSpec.assumeHasApplicationDefaultCredentials() val wfOptionsWithProject = WorkflowOptions.fromMap(Map("google_project" -> "my_project")).get val gcsPathBuilderWithProjectInfo = new GcsPathBuilder( - GoogleAuthMode.NoAuthMode, - RetryParams.defaultInstance(), + GoogleAuthMode.MockAuthMode, + "cromwell-test", + RetryParams.getDefaultInstance, CloudStorageConfiguration.DEFAULT, wfOptionsWithProject ) @@ -34,14 +28,14 @@ class GcsPathBuilderSpec extends TestKitSuite with FlatSpecLike with Matchers wi gcsPathBuilderWithProjectInfo.getProjectId shouldBe "my_project" } - it should behave like truncateCommonRoots(retryablePathBuilder, pathsToTruncate) + it should behave like truncateCommonRoots(pathBuilder, pathsToTruncate) goodPaths foreach { goodPath => - it should behave like buildGoodPath(retryablePathBuilder, goodPath) + it should behave like buildGoodPath(pathBuilder, goodPath) } badPaths foreach { badPath => - it should behave like buildBadPath(retryablePathBuilder, badPath) + it should behave like buildBadPath(pathBuilder, badPath) } private def pathsToTruncate = Table( @@ -361,13 +355,13 @@ class GcsPathBuilderSpec extends TestKitSuite with FlatSpecLike with Matchers wi BadPath("an absolute file path", "/hello/world", "/hello/world does not have a gcs scheme") ) - private lazy val retryablePathBuilder = { + private lazy val pathBuilder = { GoogleAuthModeSpec.assumeHasApplicationDefaultCredentials() - new RetryableGcsPathBuilder( - GoogleAuthMode.NoAuthMode, - RetryParams.defaultInstance(), - CustomRetryParams.Default, + new GcsPathBuilder( + GoogleAuthMode.MockAuthMode, + "cromwell-test", + RetryParams.getDefaultInstance, CloudStorageConfiguration.DEFAULT, WorkflowOptions.empty ) diff --git a/filesystems/gcs/src/test/scala/cromwell/filesystems/gcs/auth/GoogleAuthModeSpec.scala b/filesystems/gcs/src/test/scala/cromwell/filesystems/gcs/auth/GoogleAuthModeSpec.scala index 4f3dcd56f..cda1c12e2 100644 --- a/filesystems/gcs/src/test/scala/cromwell/filesystems/gcs/auth/GoogleAuthModeSpec.scala +++ b/filesystems/gcs/src/test/scala/cromwell/filesystems/gcs/auth/GoogleAuthModeSpec.scala @@ -17,7 +17,6 @@ object GoogleAuthModeSpec { private lazy val tryApplicationDefaultCredentials: Try[Unit] = Try { val authMode = ApplicationDefaultMode("application-default") val workflowOptions = WorkflowOptions.empty - authMode.authCredentials(workflowOptions) authMode.credential(workflowOptions) () } diff --git a/project/Dependencies.scala b/project/Dependencies.scala index f1e3b83e2..172e1492f 100644 --- a/project/Dependencies.scala +++ b/project/Dependencies.scala @@ -13,7 +13,7 @@ object Dependencies { */ lazy val sprayJsonV = "1.3.2" lazy val akkaV = "2.4.16" - lazy val akkaHttpV = "2.4.8" + lazy val akkaHttpV = "2.4.11" lazy val slickV = "3.1.1" lazy val googleClientApiV = "1.22.0" lazy val googleGenomicsServicesApiV = "1.22.0" @@ -82,7 +82,7 @@ object Dependencies { private val googleCloudDependencies = List( "com.google.apis" % "google-api-services-genomics" % ("v1alpha2-rev64-" + googleGenomicsServicesApiV), - "com.google.cloud" % "google-cloud-nio" % "0.3.0" + "com.google.cloud" % "google-cloud-nio" % "0.9.4-alpha" exclude("com.google.api.grpc", "grpc-google-common-protos") exclude("com.google.cloud.datastore", "datastore-v1-protos") exclude("org.apache.httpcomponents", "httpclient"), @@ -120,8 +120,10 @@ object Dependencies { "com.typesafe.akka" %% "akka-slf4j" % akkaV, "com.typesafe.akka" %% "akka-testkit" % akkaV % Test, "com.google.guava" % "guava" % "20.0", + "com.google.auth" % "google-auth-library-oauth2-http" % "0.6.0", "com.typesafe.akka" %% "akka-http-core" % akkaHttpV, "com.typesafe.akka" %% "akka-stream-testkit" % akkaHttpV, + "com.chuusai" %% "shapeless" % "2.3.2", "com.typesafe.akka" %% "akka-http-spray-json-experimental" % akkaHttpV ) ++ baseDependencies ++ googleApiClientDependencies ++ // TODO: We're not using the "F" in slf4j. Core only supports logback, specifically the WorkflowLogger. diff --git a/supportedBackends/htcondor/src/main/scala/cromwell/backend/impl/htcondor/HtCondorBackendFactory.scala b/supportedBackends/htcondor/src/main/scala/cromwell/backend/impl/htcondor/HtCondorBackendFactory.scala index 3c8e48fd4..c295a2306 100644 --- a/supportedBackends/htcondor/src/main/scala/cromwell/backend/impl/htcondor/HtCondorBackendFactory.scala +++ b/supportedBackends/htcondor/src/main/scala/cromwell/backend/impl/htcondor/HtCondorBackendFactory.scala @@ -17,6 +17,7 @@ case class HtCondorBackendFactory(name: String, configurationDescriptor: Backend extends BackendLifecycleActorFactory with StrictLogging { override def workflowInitializationActorProps(workflowDescriptor: BackendWorkflowDescriptor, + ioActor: ActorRef, calls: Set[TaskCall], serviceRegistryActor: ActorRef): Option[Props] = { Option(HtCondorInitializationActor.props(workflowDescriptor, calls, configurationDescriptor, serviceRegistryActor)) @@ -25,6 +26,7 @@ case class HtCondorBackendFactory(name: String, configurationDescriptor: Backend override def jobExecutionActorProps(jobDescriptor: BackendJobDescriptor, initializationData: Option[BackendInitializationData], serviceRegistryActor: ActorRef, + ioActor: ActorRef, backendSingletonActor: Option[ActorRef]): Props = { HtCondorJobExecutionActor.props(jobDescriptor, configurationDescriptor, serviceRegistryActor, resolveCacheProviderProps(jobDescriptor.workflowDescriptor.workflowOptions)) } diff --git a/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/GenomicsFactory.scala b/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/GenomicsFactory.scala index 574e58153..2c3a097e6 100644 --- a/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/GenomicsFactory.scala +++ b/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/GenomicsFactory.scala @@ -2,7 +2,9 @@ package cromwell.backend.impl.jes import java.net.URL +import com.google.api.client.http.{HttpRequest, HttpRequestInitializer} import com.google.api.services.genomics.Genomics +import com.google.auth.http.HttpCredentialsAdapter import cromwell.core.WorkflowOptions import cromwell.filesystems.gcs.auth.GoogleAuthMode @@ -10,12 +12,21 @@ import cromwell.filesystems.gcs.auth.GoogleAuthMode case class GenomicsFactory(applicationName: String, authMode: GoogleAuthMode, endpointUrl: URL) { def withOptions(options: WorkflowOptions) = { - val credential = authMode.credential(options) + val scopedCredentials = authMode.credential(options) + + val httpRequestInitializer = { + val delegate = new HttpCredentialsAdapter(scopedCredentials) + new HttpRequestInitializer() { + def initialize(httpRequest: HttpRequest) = { + delegate.initialize(httpRequest) + } + } + } new Genomics.Builder( - credential.getTransport, - credential.getJsonFactory, - credential) + GoogleAuthMode.httpTransport, + GoogleAuthMode.jsonFactory, + httpRequestInitializer) .setApplicationName(applicationName) .setRootUrl(endpointUrl.toString) .build diff --git a/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/JesAsyncBackendJobExecutionActor.scala b/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/JesAsyncBackendJobExecutionActor.scala index 65050a67c..e9efadcfd 100644 --- a/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/JesAsyncBackendJobExecutionActor.scala +++ b/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/JesAsyncBackendJobExecutionActor.scala @@ -5,6 +5,7 @@ import java.net.SocketTimeoutException import akka.actor.ActorRef import com.google.api.client.googleapis.json.GoogleJsonResponseException import com.google.api.services.genomics.model.RunPipelineRequest +import com.google.cloud.storage.contrib.nio.CloudStorageOptions import cromwell.backend._ import cromwell.backend.async.{AbortedExecutionHandle, ExecutionHandle, FailedNonRetryableExecutionHandle, FailedRetryableExecutionHandle, PendingExecutionHandle} import cromwell.backend.impl.jes.RunStatus.TerminalRunStatus @@ -16,16 +17,17 @@ import cromwell.core.logging.JobLogger import cromwell.core.path.{DefaultPathBuilder, Path} import cromwell.core.retry.SimpleExponentialBackoff import cromwell.filesystems.gcs.GcsPath +import cromwell.filesystems.gcs.batch.GcsBatchCommandBuilder import org.slf4j.LoggerFactory import wdl4s._ import wdl4s.expression.NoFunctions import wdl4s.values._ +import scala.collection.JavaConverters._ import scala.concurrent.duration._ import scala.concurrent.{ExecutionContext, Future} import scala.language.postfixOps import scala.util.{Success, Try} -import scala.collection.JavaConverters._ object JesAsyncBackendJobExecutionActor { val JesOperationIdKey = "__jes_operation_id" @@ -45,7 +47,7 @@ object JesAsyncBackendJobExecutionActor { class JesAsyncBackendJobExecutionActor(override val standardParams: StandardAsyncExecutionActorParams) extends BackendJobLifecycleActor with StandardAsyncExecutionActor with JesJobCachingActorHelper - with JesStatusRequestClient with JesRunCreationClient { + with JesStatusRequestClient with JesRunCreationClient with GcsBatchCommandBuilder { import JesAsyncBackendJobExecutionActor._ @@ -82,7 +84,7 @@ class JesAsyncBackendJobExecutionActor(override val standardParams: StandardAsyn Run(job, initializationData.genomics).abort() } - override def receive: Receive = pollingActorClientReceive orElse runCreationClientReceive orElse super.receive + override def receive: Receive = pollingActorClientReceive orElse runCreationClientReceive orElse ioReceive orElse super.receive private def gcsAuthParameter: Option[JesInput] = { if (jesAttributes.auths.gcs.requiresAuthFile || dockerConfiguration.isDefined) @@ -257,17 +259,21 @@ class JesAsyncBackendJobExecutionActor(override val standardParams: StandardAsyn } private def runWithJes(jobForResumption: Option[StandardAsyncJob]): Future[ExecutionHandle] = { - // Want to force runtimeAttributes to evaluate so we can fail quickly now if we need to: - def makeRpr: Try[RunPipelineRequest] = Try(runtimeAttributes) flatMap { _ => Try { + def evaluateRuntimeAttributes = Future.fromTry(Try(runtimeAttributes)) + + def generateJesParameters = Future.fromTry(Try { val jesInputs: Set[JesInput] = generateJesInputs(jobDescriptor) ++ monitoringScript + cmdInput val jesOutputs: Set[JesFileOutput] = generateJesOutputs(jobDescriptor) ++ monitoringOutput - val jesParameters = standardParameters ++ gcsAuthParameter ++ jesInputs ++ jesOutputs - - jobPaths.script.writeAsText(commandScriptContents) + standardParameters ++ gcsAuthParameter ++ jesInputs ++ jesOutputs + }) + + def uploadScriptFile = writeAsync(jobPaths.script, commandScriptContents, Seq(CloudStorageOptions.withMimeType("text/plain"))) + + def makeRpr(jesParameters: Seq[JesParameter]) = Future.fromTry(Try { createJesRunPipelineRequest(jesParameters) - }} + }) jobForResumption match { case Some(job) => @@ -275,7 +281,10 @@ class JesAsyncBackendJobExecutionActor(override val standardParams: StandardAsyn Future.successful(PendingExecutionHandle(jobDescriptor, job, Option(run), previousStatus = None)) case None => for { - rpr <- Future.fromTry(makeRpr) + _ <- evaluateRuntimeAttributes + jesParameters <- generateJesParameters + _ <- uploadScriptFile + rpr <- makeRpr(jesParameters) runId <- runPipeline(initializationData.genomics, rpr) run = Run(runId, initializationData.genomics) } yield PendingExecutionHandle(jobDescriptor, runId, Option(run), previousStatus = None) diff --git a/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/JesBackendInitializationData.scala b/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/JesBackendInitializationData.scala index de4295a49..8aeebe639 100644 --- a/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/JesBackendInitializationData.scala +++ b/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/JesBackendInitializationData.scala @@ -1,14 +1,14 @@ package cromwell.backend.impl.jes import com.google.api.services.genomics.Genomics +import com.google.auth.Credentials import cromwell.backend.standard.{StandardInitializationData, StandardValidatedRuntimeAttributesBuilder} -import cromwell.filesystems.gcs.auth.GoogleCredentialBundle case class JesBackendInitializationData ( override val workflowPaths: JesWorkflowPaths, override val runtimeAttributesBuilder: StandardValidatedRuntimeAttributesBuilder, jesConfiguration: JesConfiguration, - gcsCredentials: GoogleCredentialBundle, + gcsCredentials: Credentials, genomics: Genomics ) extends StandardInitializationData(workflowPaths, runtimeAttributesBuilder, classOf[JesExpressionFunctions]) diff --git a/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/JesBackendLifecycleActorFactory.scala b/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/JesBackendLifecycleActorFactory.scala index 82d11e271..8a0df28a2 100644 --- a/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/JesBackendLifecycleActorFactory.scala +++ b/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/JesBackendLifecycleActorFactory.scala @@ -2,9 +2,9 @@ package cromwell.backend.impl.jes import akka.actor.ActorRef import cromwell.backend._ -import cromwell.backend.callcaching.FileHashingActor.FileHashingFunction -import cromwell.backend.impl.jes.callcaching.JesBackendFileHashing +import cromwell.backend.impl.jes.callcaching.{JesBackendCacheHitCopyingActor, JesBackendFileHashingActor} import cromwell.backend.standard._ +import cromwell.backend.standard.callcaching.{StandardCacheHitCopyingActor, StandardFileHashingActor} import cromwell.core.CallOutputs import wdl4s.TaskCall @@ -25,12 +25,12 @@ case class JesBackendLifecycleActorFactory(name: String, configurationDescriptor val jesConfiguration = new JesConfiguration(configurationDescriptor) - override def workflowInitializationActorParams(workflowDescriptor: BackendWorkflowDescriptor, calls: Set[TaskCall], + override def workflowInitializationActorParams(workflowDescriptor: BackendWorkflowDescriptor, ioActor: ActorRef, calls: Set[TaskCall], serviceRegistryActor: ActorRef): StandardInitializationActorParams = { - JesInitializationActorParams(workflowDescriptor, calls, jesConfiguration, serviceRegistryActor) + JesInitializationActorParams(workflowDescriptor, ioActor, calls, jesConfiguration, serviceRegistryActor) } - override def workflowFinalizationActorParams(workflowDescriptor: BackendWorkflowDescriptor, calls: Set[TaskCall], + override def workflowFinalizationActorParams(workflowDescriptor: BackendWorkflowDescriptor, ioActor: ActorRef, calls: Set[TaskCall], jobExecutionMap: JobExecutionMap, workflowOutputs: CallOutputs, initializationDataOption: Option[BackendInitializationData]): StandardFinalizationActorParams = { @@ -38,19 +38,23 @@ case class JesBackendLifecycleActorFactory(name: String, configurationDescriptor // invocation. HOWEVER, the finalization actor is created regardless of whether workflow initialization was successful // or not. So the finalization actor must be able to handle an empty `JesBackendInitializationData` option, and there is no // `.get` on the initialization data as there is with the execution or cache hit copying actor methods. - JesFinalizationActorParams(workflowDescriptor, calls, jesConfiguration, jobExecutionMap, workflowOutputs, + JesFinalizationActorParams(workflowDescriptor, ioActor, calls, jesConfiguration, jobExecutionMap, workflowOutputs, initializationDataOption) } + override lazy val cacheHitCopyingActorClassOption: Option[Class[_ <: StandardCacheHitCopyingActor]] = { + Option(classOf[JesBackendCacheHitCopyingActor]) + } + override def backendSingletonActorProps = Option(JesBackendSingletonActor.props(jesConfiguration.qps)) - override lazy val fileHashingFunction: Option[FileHashingFunction] = Option(FileHashingFunction(JesBackendFileHashing.getCrc32c)) + override lazy val fileHashingActorClassOption: Option[Class[_ <: StandardFileHashingActor]] = Option(classOf[JesBackendFileHashingActor]) override def dockerHashCredentials(initializationData: Option[BackendInitializationData]) = { Try(BackendInitializationData.as[JesBackendInitializationData](initializationData)) match { case Success(jesData) => val maybeDockerHubCredentials = jesData.jesConfiguration.dockerCredentials - val googleCredentials = Option(jesData.gcsCredentials.credential) + val googleCredentials = Option(jesData.gcsCredentials) List(maybeDockerHubCredentials, googleCredentials).flatten case _ => List.empty[Any] } diff --git a/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/JesConfiguration.scala b/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/JesConfiguration.scala index 2c1e82f92..a5a3fa01c 100644 --- a/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/JesConfiguration.scala +++ b/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/JesConfiguration.scala @@ -2,34 +2,18 @@ package cromwell.backend.impl.jes import cromwell.backend.BackendConfigurationDescriptor import cromwell.backend.impl.jes.authentication.JesDockerCredentials -import cromwell.backend.impl.jes.io._ import cromwell.core.DockerConfiguration -import cromwell.core.path.CustomRetryParams -import cromwell.core.retry.SimpleExponentialBackoff -import cromwell.filesystems.gcs.{GoogleConfiguration, RetryableGcsPathBuilderFactory} - -import scala.concurrent.duration._ -import scala.language.postfixOps - -object JesConfiguration { - val GcsRetryParams = CustomRetryParams( - timeout = Duration.Inf, - maxRetries = Option(3), - backoff = SimpleExponentialBackoff(1 seconds, 3 seconds, 1.5D), - isTransient = isTransientJesException, - isFatal = isFatalJesException - ) -} +import cromwell.filesystems.gcs.{GcsPathBuilderFactory, GoogleConfiguration} class JesConfiguration(val configurationDescriptor: BackendConfigurationDescriptor) { - private val googleConfig = GoogleConfiguration(configurationDescriptor.globalConfig) + val googleConfig = GoogleConfiguration(configurationDescriptor.globalConfig) val root = configurationDescriptor.backendConfig.getString("root") val jesAttributes = JesAttributes(googleConfig, configurationDescriptor.backendConfig) val jesAuths = jesAttributes.auths val jesComputeServiceAccount = jesAttributes.computeServiceAccount - val gcsPathBuilderFactory = RetryableGcsPathBuilderFactory(jesAuths.gcs, customRetryParams = JesConfiguration.GcsRetryParams) + val gcsPathBuilderFactory = GcsPathBuilderFactory(jesAuths.gcs, googleConfig.applicationName) val genomicsFactory = GenomicsFactory(googleConfig.applicationName, jesAuths.genomics, jesAttributes.endpointUrl) val dockerCredentials = DockerConfiguration.build(configurationDescriptor.backendConfig).dockerCredentials map JesDockerCredentials.apply val needAuthFileUpload = jesAuths.gcs.requiresAuthFile || dockerCredentials.isDefined diff --git a/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/JesFinalizationActor.scala b/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/JesFinalizationActor.scala index 131cea544..4ffcc486e 100644 --- a/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/JesFinalizationActor.scala +++ b/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/JesFinalizationActor.scala @@ -1,18 +1,19 @@ package cromwell.backend.impl.jes -import cats.instances.future._ -import cats.syntax.functor._ +import akka.actor.ActorRef import cromwell.backend._ import cromwell.backend.standard.{StandardFinalizationActor, StandardFinalizationActorParams} import cromwell.core.CallOutputs +import cromwell.core.io.AsyncIo +import cromwell.filesystems.gcs.batch.GcsBatchCommandBuilder import wdl4s.TaskCall import scala.concurrent.Future -import scala.language.postfixOps case class JesFinalizationActorParams ( workflowDescriptor: BackendWorkflowDescriptor, + ioActor: ActorRef, calls: Set[TaskCall], jesConfiguration: JesConfiguration, jobExecutionMap: JobExecutionMap, @@ -23,9 +24,11 @@ case class JesFinalizationActorParams } class JesFinalizationActor(val jesParams: JesFinalizationActorParams) - extends StandardFinalizationActor(jesParams) { + extends StandardFinalizationActor(jesParams) with AsyncIo with GcsBatchCommandBuilder { lazy val jesConfiguration: JesConfiguration = jesParams.jesConfiguration + + override def receive = ioReceive orElse super.receive override def afterAll(): Future[Unit] = { for { @@ -37,8 +40,10 @@ class JesFinalizationActor(val jesParams: JesFinalizationActorParams) private def deleteAuthenticationFile(): Future[Unit] = { (jesConfiguration.needAuthFileUpload, workflowPaths) match { - case (true, Some(paths: JesWorkflowPaths)) => Future { paths.gcsAuthFilePath.delete() } void + case (true, Some(paths: JesWorkflowPaths)) => deleteAsync(paths.gcsAuthFilePath) case _ => Future.successful(()) } } + + override def ioActor: ActorRef = jesParams.ioActor } diff --git a/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/JesInitializationActor.scala b/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/JesInitializationActor.scala index 3a23a9832..4619f6e3c 100644 --- a/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/JesInitializationActor.scala +++ b/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/JesInitializationActor.scala @@ -3,20 +3,22 @@ package cromwell.backend.impl.jes import java.io.IOException import akka.actor.ActorRef +import com.google.cloud.storage.contrib.nio.CloudStorageOptions import cromwell.backend.impl.jes.authentication.{GcsLocalizing, JesAuthInformation} -import cromwell.backend.impl.jes.io._ import cromwell.backend.standard.{StandardInitializationActor, StandardInitializationActorParams, StandardValidatedRuntimeAttributesBuilder} import cromwell.backend.{BackendConfigurationDescriptor, BackendInitializationData, BackendWorkflowDescriptor} +import cromwell.core.io.AsyncIo import cromwell.filesystems.gcs.auth.{ClientSecrets, GoogleAuthMode} +import cromwell.filesystems.gcs.batch.GcsBatchCommandBuilder import spray.json.JsObject import wdl4s.TaskCall import scala.concurrent.Future -import scala.util.Try case class JesInitializationActorParams ( workflowDescriptor: BackendWorkflowDescriptor, + ioActor: ActorRef, calls: Set[TaskCall], jesConfiguration: JesConfiguration, serviceRegistryActor: ActorRef @@ -25,9 +27,12 @@ case class JesInitializationActorParams } class JesInitializationActor(jesParams: JesInitializationActorParams) - extends StandardInitializationActor(jesParams) { + extends StandardInitializationActor(jesParams) with AsyncIo with GcsBatchCommandBuilder { + override lazy val ioActor = jesParams.ioActor private val jesConfiguration = jesParams.jesConfiguration + + context.become(ioReceive orElse receive) override lazy val runtimeAttributesBuilder: StandardValidatedRuntimeAttributesBuilder = JesRuntimeAttributes.runtimeAttributesBuilder(jesConfiguration) @@ -43,7 +48,7 @@ class JesInitializationActor(jesParams: JesInitializationActorParams) // FIXME: workflow paths indirectly re create part of those credentials via the GcsPathBuilder // This is unnecessary duplication of credentials. They are needed here so they can be added to the initialization data // and used to retrieve docker hashes - private lazy val gcsCredentials = jesConfiguration.jesAuths.gcs.credentialBundle(workflowDescriptor.workflowOptions) + private lazy val gcsCredentials = jesConfiguration.jesAuths.gcs.credential(workflowDescriptor.workflowOptions) override lazy val workflowPaths: JesWorkflowPaths = new JesWorkflowPaths(workflowDescriptor, jesConfiguration)(context.system) @@ -51,22 +56,23 @@ class JesInitializationActor(jesParams: JesInitializationActorParams) override lazy val initializationData: JesBackendInitializationData = JesBackendInitializationData(workflowPaths, runtimeAttributesBuilder, jesConfiguration, gcsCredentials, genomics) - override def beforeAll(): Future[Option[BackendInitializationData]] = Future.fromTry(Try { - if (jesConfiguration.needAuthFileUpload) writeAuthenticationFile(workflowPaths) + override def beforeAll(): Future[Option[BackendInitializationData]] = { publishWorkflowRoot(workflowPaths.workflowRoot.pathAsString) - Option(initializationData) - }) + if (jesConfiguration.needAuthFileUpload) { + writeAuthenticationFile(workflowPaths) map { _ => Option(initializationData) } recoverWith { + case failure => Future.failed(new IOException("Failed to upload authentication file", failure)) + } + } else { + Future.successful(Option(initializationData)) + } + } - private def writeAuthenticationFile(workflowPath: JesWorkflowPaths): Unit = { - generateAuthJson(jesConfiguration.dockerCredentials, refreshTokenAuth) foreach { content => + private def writeAuthenticationFile(workflowPath: JesWorkflowPaths): Future[Unit] = { + generateAuthJson(jesConfiguration.dockerCredentials, refreshTokenAuth) map { content => val path = workflowPath.gcsAuthFilePath workflowLogger.info(s"Creating authentication file for workflow ${workflowDescriptor.id} at \n $path") - try { - path.writeAsJson(content) - } catch { - case exception: Exception => throw new IOException("Failed to upload authentication file", exception) - } - } + writeAsync(path, content, Seq(CloudStorageOptions.withMimeType("application/json"))) + } getOrElse Future.successful(()) } def generateAuthJson(authInformation: Option[JesAuthInformation]*): Option[String] = { diff --git a/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/JesJobPaths.scala b/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/JesJobPaths.scala index bd4090619..3dbf11b91 100644 --- a/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/JesJobPaths.scala +++ b/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/JesJobPaths.scala @@ -50,7 +50,6 @@ class JesJobPaths(val jobKey: BackendJobDescriptorKey, workflowDescriptor: Backe ) override lazy val customDetritusPaths: Map[String, Path] = Map( - JesJobPaths.GcsExecPathKey -> script, JesJobPaths.JesLogPathKey -> jesLogPath ) diff --git a/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/JesWorkflowPaths.scala b/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/JesWorkflowPaths.scala index 8b06f117b..b0d0eb3ed 100644 --- a/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/JesWorkflowPaths.scala +++ b/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/JesWorkflowPaths.scala @@ -7,10 +7,9 @@ import cromwell.backend.io.WorkflowPaths import cromwell.backend.{BackendJobDescriptorKey, BackendWorkflowDescriptor} import cromwell.core.WorkflowOptions import cromwell.core.path.{Path, PathBuilder} -import cromwell.filesystems.gcs.{GcsPathBuilderFactory, RetryableGcsPathBuilder} +import cromwell.filesystems.gcs.{GcsPathBuilder, GcsPathBuilderFactory} import scala.language.postfixOps -import scala.util.Try object JesWorkflowPaths { private val GcsRootOptionKey = "jes_gcs_root" @@ -28,9 +27,7 @@ class JesWorkflowPaths(val workflowDescriptor: BackendWorkflowDescriptor, override lazy val executionRootString: String = workflowDescriptor.workflowOptions.getOrElse(JesWorkflowPaths.GcsRootOptionKey, jesConfiguration.root) private val workflowOptions: WorkflowOptions = workflowDescriptor.workflowOptions - val gcsPathBuilder: RetryableGcsPathBuilder = jesConfiguration.gcsPathBuilderFactory.withOptions(workflowOptions) - - def getHash(gcsUrl: Path): Try[String] = gcsPathBuilder.getHash(gcsUrl) + val gcsPathBuilder: GcsPathBuilder = jesConfiguration.gcsPathBuilderFactory.withOptions(workflowOptions) val gcsAuthFilePath: Path = { /* @@ -43,7 +40,7 @@ class JesWorkflowPaths(val workflowDescriptor: BackendWorkflowDescriptor, val defaultBucket = executionRoot.resolve(workflowDescriptor.rootWorkflow.unqualifiedName).resolve(workflowDescriptor.rootWorkflowId.toString) val bucket = workflowDescriptor.workflowOptions.get(JesWorkflowPaths.AuthFilePathOptionKey) getOrElse defaultBucket.pathAsString - val authBucket = GcsPathBuilderFactory(genomicsCredentials).withOptions(workflowOptions).build(bucket) recover { + val authBucket = GcsPathBuilderFactory(genomicsCredentials, jesConfiguration.googleConfig.applicationName).withOptions(workflowOptions).build(bucket) recover { case ex => throw new Exception(s"Invalid gcs auth_bucket path $bucket", ex) } get diff --git a/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/callcaching/JesBackendCacheHitCopyingActor.scala b/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/callcaching/JesBackendCacheHitCopyingActor.scala new file mode 100644 index 000000000..0317c2b91 --- /dev/null +++ b/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/callcaching/JesBackendCacheHitCopyingActor.scala @@ -0,0 +1,6 @@ +package cromwell.backend.impl.jes.callcaching + +import cromwell.backend.standard.callcaching.{StandardCacheHitCopyingActor, StandardCacheHitCopyingActorParams} +import cromwell.filesystems.gcs.batch.GcsBatchCommandBuilder + +class JesBackendCacheHitCopyingActor(standardParams: StandardCacheHitCopyingActorParams) extends StandardCacheHitCopyingActor(standardParams) with GcsBatchCommandBuilder diff --git a/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/callcaching/JesBackendFileHashing.scala b/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/callcaching/JesBackendFileHashing.scala deleted file mode 100644 index 42cf8f7be..000000000 --- a/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/callcaching/JesBackendFileHashing.scala +++ /dev/null @@ -1,21 +0,0 @@ -package cromwell.backend.impl.jes.callcaching - -import akka.event.LoggingAdapter -import cromwell.backend.callcaching.FileHashingActor.SingleFileHashRequest -import cromwell.backend.impl.jes.JesBackendInitializationData - -import scala.util.{Failure, Try} - -private[jes] object JesBackendFileHashing { - def getCrc32c(singleFileHashRequest: SingleFileHashRequest, log: LoggingAdapter): Try[String] = { - def usingJesInitData(jesInitData: JesBackendInitializationData) = for { - path <- jesInitData.workflowPaths.getPath(singleFileHashRequest.file.valueString) - crc32c <- jesInitData.workflowPaths.getHash(path) - } yield crc32c - - singleFileHashRequest.initializationData match { - case Some(jesInitData: JesBackendInitializationData) => usingJesInitData(jesInitData) - case _ => Failure(new IllegalArgumentException("Need JesBackendInitializationData to generate a GCS CRC32C hash")) - } - } -} diff --git a/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/callcaching/JesBackendFileHashingActor.scala b/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/callcaching/JesBackendFileHashingActor.scala new file mode 100644 index 000000000..af65a07d8 --- /dev/null +++ b/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/callcaching/JesBackendFileHashingActor.scala @@ -0,0 +1,6 @@ +package cromwell.backend.impl.jes.callcaching + +import cromwell.backend.standard.callcaching.{StandardFileHashingActor, StandardFileHashingActorParams} +import cromwell.filesystems.gcs.batch.GcsBatchCommandBuilder + +class JesBackendFileHashingActor(standardParams: StandardFileHashingActorParams) extends StandardFileHashingActor(standardParams) with GcsBatchCommandBuilder diff --git a/supportedBackends/jes/src/test/scala/cromwell/backend/impl/jes/JesAsyncBackendJobExecutionActorSpec.scala b/supportedBackends/jes/src/test/scala/cromwell/backend/impl/jes/JesAsyncBackendJobExecutionActorSpec.scala index eb5272cbd..1b6a406b5 100644 --- a/supportedBackends/jes/src/test/scala/cromwell/backend/impl/jes/JesAsyncBackendJobExecutionActorSpec.scala +++ b/supportedBackends/jes/src/test/scala/cromwell/backend/impl/jes/JesAsyncBackendJobExecutionActorSpec.scala @@ -19,7 +19,7 @@ import cromwell.core.callcaching.CallCachingEligible import cromwell.core.labels.Labels import cromwell.core.logging.JobLogger import cromwell.core.path.{DefaultPathBuilder, PathBuilder} -import cromwell.filesystems.gcs.auth.GoogleAuthMode.NoAuthMode +import cromwell.filesystems.gcs.auth.GoogleAuthMode.MockAuthMode import cromwell.filesystems.gcs.{GcsPath, GcsPathBuilder, GcsPathBuilderFactory} import cromwell.util.SampleWdl import org.scalatest._ @@ -33,12 +33,13 @@ import wdl4s.values.{WdlArray, WdlFile, WdlMap, WdlString, WdlValue} import scala.concurrent.duration._ import scala.concurrent.{Await, ExecutionContext, Future, Promise} +import scala.language.postfixOps import scala.util.{Success, Try} class JesAsyncBackendJobExecutionActorSpec extends TestKitSuite("JesAsyncBackendJobExecutionActorSpec") with FlatSpecLike with Matchers with ImplicitSender with Mockito with BackendSpec { - val mockPathBuilder: GcsPathBuilder = GcsPathBuilderFactory(NoAuthMode).withOptions(WorkflowOptions.empty) + val mockPathBuilder: GcsPathBuilder = GcsPathBuilderFactory(MockAuthMode, "cromwell-test").withOptions(WorkflowOptions.empty) import JesTestConfig._ @@ -93,11 +94,13 @@ class JesAsyncBackendJobExecutionActorSpec extends TestKitSuite("JesAsyncBackend promise: Promise[BackendJobExecutionResponse], jesConfiguration: JesConfiguration, functions: JesExpressionFunctions = TestableJesExpressionFunctions, - jesSingletonActor: ActorRef = emptyActor) = { + jesSingletonActor: ActorRef = emptyActor, + ioActor: ActorRef = mockIoActor) = { this( DefaultStandardAsyncExecutionActorParams( JesAsyncBackendJobExecutionActor.JesOperationIdKey, emptyActor, + ioActor, jobDescriptor, jesConfiguration.configurationDescriptor, Option(buildInitializationData(jobDescriptor, jesConfiguration)), @@ -177,11 +180,17 @@ class JesAsyncBackendJobExecutionActorSpec extends TestKitSuite("JesAsyncBackend def buildPreemptibleTestActorRef(attempt: Int, preemptible: Int): TestActorRef[TestableJesJobExecutionActor] = { val jobDescriptor = buildPreemptibleJobDescriptor(attempt, preemptible) - val props = Props(new TestableJesJobExecutionActor(jobDescriptor, Promise(), jesConfiguration)) + val props = Props(new TestableJesJobExecutionActor(jobDescriptor, Promise(), + jesConfiguration, + TestableJesExpressionFunctions, + emptyActor, + failIoActor)) TestActorRef(props, s"TestableJesJobExecutionActor-${jobDescriptor.workflowDescriptor.id}") } behavior of "JesAsyncBackendJobExecutionActor" + + val timeout = 5 seconds { // Set of "handle call failures appropriately with respect to preemption" tests val expectations = Table( @@ -225,8 +234,9 @@ class JesAsyncBackendJobExecutionActorSpec extends TestKitSuite("JesAsyncBackend val failedStatus = Failed(10, Option("14: VM XXX shut down unexpectedly."), Seq.empty, Option("fakeMachine"), Option("fakeZone"), Option("fakeInstance")) val executionResult = jesBackend.handleExecutionResult(failedStatus, handle) - executionResult.isInstanceOf[FailedNonRetryableExecutionHandle] shouldBe true - val failedHandle = executionResult.asInstanceOf[FailedNonRetryableExecutionHandle] + val result = Await.result(executionResult, timeout) + result.isInstanceOf[FailedNonRetryableExecutionHandle] shouldBe true + val failedHandle = result.asInstanceOf[FailedNonRetryableExecutionHandle] failedHandle.returnCode shouldBe None } @@ -238,8 +248,9 @@ class JesAsyncBackendJobExecutionActorSpec extends TestKitSuite("JesAsyncBackend val failedStatus = Failed(10, Option("14: VM XXX shut down unexpectedly."), Seq.empty, Option("fakeMachine"), Option("fakeZone"), Option("fakeInstance")) val executionResult = jesBackend.handleExecutionResult(failedStatus, handle) - executionResult.isInstanceOf[FailedRetryableExecutionHandle] shouldBe true - val retryableHandle = executionResult.asInstanceOf[FailedRetryableExecutionHandle] + val result = Await.result(executionResult, timeout) + result.isInstanceOf[FailedRetryableExecutionHandle] shouldBe true + val retryableHandle = result.asInstanceOf[FailedRetryableExecutionHandle] retryableHandle.throwable.isInstanceOf[PreemptedException] shouldBe true retryableHandle.returnCode shouldBe None val preemptedException = retryableHandle.throwable.asInstanceOf[PreemptedException] @@ -254,8 +265,9 @@ class JesAsyncBackendJobExecutionActorSpec extends TestKitSuite("JesAsyncBackend val failedStatus = Failed(10, Option("14: VM XXX shut down unexpectedly."), Seq.empty, Option("fakeMachine"), Option("fakeZone"), Option("fakeInstance")) val executionResult = jesBackend.handleExecutionResult(failedStatus, handle) - executionResult.isInstanceOf[FailedRetryableExecutionHandle] shouldBe true - val retryableHandle = executionResult.asInstanceOf[FailedRetryableExecutionHandle] + val result = Await.result(executionResult, timeout) + result.isInstanceOf[FailedRetryableExecutionHandle] shouldBe true + val retryableHandle = result.asInstanceOf[FailedRetryableExecutionHandle] retryableHandle.throwable.isInstanceOf[PreemptedException] shouldBe true retryableHandle.returnCode shouldBe None val preemptedException2 = retryableHandle.throwable.asInstanceOf[PreemptedException] @@ -271,7 +283,7 @@ class JesAsyncBackendJobExecutionActorSpec extends TestKitSuite("JesAsyncBackend def checkFailedResult(errorCode: Int, errorMessage: Option[String]): ExecutionHandle = { val failed = Failed(errorCode, errorMessage, Seq.empty, Option("fakeMachine"), Option("fakeZone"), Option("fakeInstance")) - jesBackend.handleExecutionResult(failed, handle) + Await.result(jesBackend.handleExecutionResult(failed, handle), timeout) } checkFailedResult(10, Option("15: Other type of error.")) diff --git a/supportedBackends/jes/src/test/scala/cromwell/backend/impl/jes/JesInitializationActorSpec.scala b/supportedBackends/jes/src/test/scala/cromwell/backend/impl/jes/JesInitializationActorSpec.scala index 2fa9f6bd2..f702c3860 100644 --- a/supportedBackends/jes/src/test/scala/cromwell/backend/impl/jes/JesInitializationActorSpec.scala +++ b/supportedBackends/jes/src/test/scala/cromwell/backend/impl/jes/JesInitializationActorSpec.scala @@ -144,7 +144,8 @@ class JesInitializationActorSpec extends TestKitSuite("JesInitializationActorSpe private def getJesBackendProps(workflowDescriptor: BackendWorkflowDescriptor, calls: Set[TaskCall], jesConfiguration: JesConfiguration): Props = { - val params = JesInitializationActorParams(workflowDescriptor, calls, jesConfiguration, emptyActor) + val ioActor = mockIoActor + val params = JesInitializationActorParams(workflowDescriptor, ioActor, calls, jesConfiguration, emptyActor) Props(new JesInitializationActor(params)).withDispatcher(BackendDispatcher) } diff --git a/supportedBackends/jes/src/test/scala/cromwell/backend/impl/jes/JesJobExecutionActorSpec.scala b/supportedBackends/jes/src/test/scala/cromwell/backend/impl/jes/JesJobExecutionActorSpec.scala index 5b72db08b..c8debfa0d 100644 --- a/supportedBackends/jes/src/test/scala/cromwell/backend/impl/jes/JesJobExecutionActorSpec.scala +++ b/supportedBackends/jes/src/test/scala/cromwell/backend/impl/jes/JesJobExecutionActorSpec.scala @@ -26,13 +26,14 @@ class JesJobExecutionActorSpec extends TestKitSuite("JesJobExecutionActorSpec") val jesWorkflowInfo = mock[JesConfiguration] val initializationData = mock[JesBackendInitializationData] val serviceRegistryActor = system.actorOf(Props.empty) + val ioActor = system.actorOf(Props.empty) val jesBackendSingletonActor = Option(system.actorOf(Props.empty)) initializationData.jesConfiguration returns jesWorkflowInfo val parent = TestProbe() val deathwatch = TestProbe() - val params = DefaultStandardSyncExecutionActorParams(JesAsyncBackendJobExecutionActor.JesOperationIdKey, serviceRegistryActor, + val params = DefaultStandardSyncExecutionActorParams(JesAsyncBackendJobExecutionActor.JesOperationIdKey, serviceRegistryActor, ioActor, jobDescriptor, null, Option(initializationData), jesBackendSingletonActor, classOf[JesAsyncBackendJobExecutionActor]) val testJJEA = TestActorRef[TestJesJobExecutionActor]( @@ -57,6 +58,7 @@ class JesJobExecutionActorSpec extends TestKitSuite("JesJobExecutionActorSpec") val jesWorkflowInfo = mock[JesConfiguration] val initializationData = mock[JesBackendInitializationData] val serviceRegistryActor = system.actorOf(Props.empty) + val ioActor = system.actorOf(Props.empty) val jesBackendSingletonActor = Option(system.actorOf(Props.empty)) initializationData.jesConfiguration returns jesWorkflowInfo @@ -64,7 +66,7 @@ class JesJobExecutionActorSpec extends TestKitSuite("JesJobExecutionActorSpec") val parent = TestProbe() val deathwatch = TestProbe() val jabjeaConstructionPromise = Promise[ActorRef]() - val params = DefaultStandardSyncExecutionActorParams(JesAsyncBackendJobExecutionActor.JesOperationIdKey, serviceRegistryActor, + val params = DefaultStandardSyncExecutionActorParams(JesAsyncBackendJobExecutionActor.JesOperationIdKey, serviceRegistryActor, ioActor, jobDescriptor, null, Option(initializationData), jesBackendSingletonActor, classOf[JesAsyncBackendJobExecutionActor]) val testJJEA = TestActorRef[TestJesJobExecutionActor]( diff --git a/supportedBackends/sfs/src/main/scala/cromwell/backend/impl/sfs/config/ConfigBackendFileHashing.scala b/supportedBackends/sfs/src/main/scala/cromwell/backend/impl/sfs/config/ConfigBackendFileHashing.scala index afe8011c3..867647988 100644 --- a/supportedBackends/sfs/src/main/scala/cromwell/backend/impl/sfs/config/ConfigBackendFileHashing.scala +++ b/supportedBackends/sfs/src/main/scala/cromwell/backend/impl/sfs/config/ConfigBackendFileHashing.scala @@ -1,7 +1,7 @@ package cromwell.backend.impl.sfs.config import akka.event.LoggingAdapter -import cromwell.backend.callcaching.FileHashingActor.SingleFileHashRequest +import cromwell.backend.standard.callcaching.StandardFileHashingActor.SingleFileHashRequest import cromwell.core.path.DefaultPathBuilder import cromwell.util.TryWithResource._ diff --git a/supportedBackends/sfs/src/main/scala/cromwell/backend/impl/sfs/config/ConfigBackendFileHashingActor.scala b/supportedBackends/sfs/src/main/scala/cromwell/backend/impl/sfs/config/ConfigBackendFileHashingActor.scala new file mode 100644 index 000000000..270a577be --- /dev/null +++ b/supportedBackends/sfs/src/main/scala/cromwell/backend/impl/sfs/config/ConfigBackendFileHashingActor.scala @@ -0,0 +1,26 @@ +package cromwell.backend.impl.sfs.config + +import akka.actor.Props +import com.typesafe.config.Config +import cromwell.backend.standard.callcaching.StandardFileHashingActor.SingleFileHashRequest +import cromwell.backend.standard.callcaching.{StandardFileHashingActor, StandardFileHashingActorParams} +import cromwell.filesystems.gcs.batch.GcsBatchCommandBuilder +import net.ceedubs.ficus.Ficus._ + +import scala.util.Try + +object ConfigBackendFileHashingActor { + def props(standardParams: StandardFileHashingActorParams) = Props(new ConfigBackendFileHashingActor(standardParams)) +} + +class ConfigBackendFileHashingActor(standardParams: StandardFileHashingActorParams) extends StandardFileHashingActor(standardParams) with GcsBatchCommandBuilder { + + lazy val hashingStrategy: ConfigHashingStrategy = { + configurationDescriptor.backendConfig.as[Option[Config]]("filesystems.local.caching") map ConfigHashingStrategy.apply getOrElse ConfigHashingStrategy.defaultStrategy + } + + override def customHashStrategy(fileRequest: SingleFileHashRequest): Option[Try[String]] = { + log.debug(hashingStrategy.toString) + Option(hashingStrategy.getHash(fileRequest, log)) + } +} diff --git a/supportedBackends/sfs/src/main/scala/cromwell/backend/impl/sfs/config/ConfigBackendLifecycleActorFactory.scala b/supportedBackends/sfs/src/main/scala/cromwell/backend/impl/sfs/config/ConfigBackendLifecycleActorFactory.scala index ca17199d6..c30fb1d6e 100644 --- a/supportedBackends/sfs/src/main/scala/cromwell/backend/impl/sfs/config/ConfigBackendLifecycleActorFactory.scala +++ b/supportedBackends/sfs/src/main/scala/cromwell/backend/impl/sfs/config/ConfigBackendLifecycleActorFactory.scala @@ -2,9 +2,9 @@ package cromwell.backend.impl.sfs.config import com.typesafe.config.Config import cromwell.backend.BackendConfigurationDescriptor -import cromwell.backend.callcaching.FileHashingActor.FileHashingFunction import cromwell.backend.impl.sfs.config.ConfigConstants._ import cromwell.backend.sfs._ +import cromwell.backend.standard.callcaching.StandardFileHashingActor import cromwell.core.JobExecutionToken.JobExecutionTokenType import net.ceedubs.ficus.Ficus._ import org.slf4j.{Logger, LoggerFactory} @@ -32,12 +32,7 @@ class ConfigBackendLifecycleActorFactory(name: String, val configurationDescript classOf[DispatchedConfigAsyncJobExecutionActor] } - override lazy val fileHashingFunction: Option[FileHashingFunction] = { - logger.debug(hashingStrategy.toString) - Option(FileHashingFunction(hashingStrategy.getHash)) - } - - override lazy val fileHashingActorCount: Int = 5 + override lazy val fileHashingActorClassOption: Option[Class[_ <: StandardFileHashingActor]] = Option(classOf[ConfigBackendFileHashingActor]) override val jobExecutionTokenType: JobExecutionTokenType = { val concurrentJobLimit = configurationDescriptor.backendConfig.as[Option[Int]]("concurrent-job-limit") diff --git a/supportedBackends/sfs/src/main/scala/cromwell/backend/impl/sfs/config/ConfigHashingStrategy.scala b/supportedBackends/sfs/src/main/scala/cromwell/backend/impl/sfs/config/ConfigHashingStrategy.scala index 47136bae0..a497ee98b 100644 --- a/supportedBackends/sfs/src/main/scala/cromwell/backend/impl/sfs/config/ConfigHashingStrategy.scala +++ b/supportedBackends/sfs/src/main/scala/cromwell/backend/impl/sfs/config/ConfigHashingStrategy.scala @@ -4,8 +4,8 @@ import java.io.FileNotFoundException import akka.event.LoggingAdapter import com.typesafe.config.Config -import cromwell.backend.callcaching.FileHashingActor.SingleFileHashRequest import cromwell.backend.standard.StandardInitializationData +import cromwell.backend.standard.callcaching.StandardFileHashingActor.SingleFileHashRequest import cromwell.core.path.{Path, PathFactory} import cromwell.util.TryWithResource._ import net.ceedubs.ficus.Ficus._ diff --git a/supportedBackends/sfs/src/main/scala/cromwell/backend/sfs/SharedFileSystemBackendLifecycleActorFactory.scala b/supportedBackends/sfs/src/main/scala/cromwell/backend/sfs/SharedFileSystemBackendLifecycleActorFactory.scala index 85bd0f1d9..7e991a6ec 100644 --- a/supportedBackends/sfs/src/main/scala/cromwell/backend/sfs/SharedFileSystemBackendLifecycleActorFactory.scala +++ b/supportedBackends/sfs/src/main/scala/cromwell/backend/sfs/SharedFileSystemBackendLifecycleActorFactory.scala @@ -1,6 +1,7 @@ package cromwell.backend.sfs import cromwell.backend.standard._ +import cromwell.backend.standard.callcaching.StandardCacheHitCopyingActor /** * A factory that can be extended for any shared file system implementation. diff --git a/supportedBackends/sfs/src/main/scala/cromwell/backend/sfs/SharedFileSystemCacheHitCopyingActor.scala b/supportedBackends/sfs/src/main/scala/cromwell/backend/sfs/SharedFileSystemCacheHitCopyingActor.scala index 396a33849..b29816600 100644 --- a/supportedBackends/sfs/src/main/scala/cromwell/backend/sfs/SharedFileSystemCacheHitCopyingActor.scala +++ b/supportedBackends/sfs/src/main/scala/cromwell/backend/sfs/SharedFileSystemCacheHitCopyingActor.scala @@ -1,11 +1,32 @@ package cromwell.backend.sfs -import cromwell.backend.standard.{StandardCacheHitCopyingActor, StandardCacheHitCopyingActorParams} -import cromwell.core.path.Path +import cromwell.backend.standard.callcaching.StandardCacheHitCopyingActor.PathPair +import cromwell.backend.standard.callcaching.{StandardCacheHitCopyingActor, StandardCacheHitCopyingActorParams} +import cromwell.filesystems.gcs.batch.GcsBatchCommandBuilder +import lenthall.util.TryUtil +import cats.instances.try_._ +import cats.syntax.functor._ + +import scala.util.{Failure, Try} class SharedFileSystemCacheHitCopyingActor(standardParams: StandardCacheHitCopyingActorParams) - extends StandardCacheHitCopyingActor(standardParams) with SharedFileSystemJobCachingActorHelper { - override protected def duplicate(source: Path, destination: Path): Unit = { - sharedFileSystem.cacheCopy(source, destination).get + extends StandardCacheHitCopyingActor(standardParams) with SharedFileSystemJobCachingActorHelper with GcsBatchCommandBuilder { + override protected def duplicate(copyPairs: Set[PathPair]): Option[Try[Unit]] = Option { + val copies = copyPairs map { + case (source, destination) => + sharedFileSystem.cacheCopy(source, destination) + } + + TryUtil.sequence(copies.toList).void recoverWith { + case failure => + // If one or more of the copies failed, we want to delete all the files that were successfully copied + // before that. Especially if they've been symlinked, leaving them could lead to rewriting the original + // files when the job gets re-run + // TODO: this could be done more generally in the StandardCacheHitCopyingActor + copyPairs foreach { + case (_, dst) => dst.delete(swallowIOExceptions = true) + } + Failure(failure) + } } } diff --git a/supportedBackends/sfs/src/main/scala/cromwell/backend/sfs/SharedFileSystemInitializationActor.scala b/supportedBackends/sfs/src/main/scala/cromwell/backend/sfs/SharedFileSystemInitializationActor.scala index 6ca29cafe..937ee317f 100644 --- a/supportedBackends/sfs/src/main/scala/cromwell/backend/sfs/SharedFileSystemInitializationActor.scala +++ b/supportedBackends/sfs/src/main/scala/cromwell/backend/sfs/SharedFileSystemInitializationActor.scala @@ -21,8 +21,9 @@ class SharedFileSystemInitializationActor(standardParams: StandardInitialization */ lazy val gcsPathBuilderFactory: Option[GcsPathBuilderFactory] = { configurationDescriptor.backendConfig.as[Option[String]]("filesystems.gcs.auth") map { configAuth => - GoogleConfiguration(configurationDescriptor.globalConfig).auth(configAuth) match { - case Valid(auth) => GcsPathBuilderFactory(auth) + val googleConfiguration = GoogleConfiguration(configurationDescriptor.globalConfig) + googleConfiguration.auth(configAuth) match { + case Valid(auth) => GcsPathBuilderFactory(auth, googleConfiguration.applicationName) case Invalid(error) => throw new MessageAggregation { override def exceptionContext: String = "Failed to parse gcs auth configuration" diff --git a/supportedBackends/sfs/src/test/scala/cromwell/backend/impl/sfs/config/ConfigHashingStrategySpec.scala b/supportedBackends/sfs/src/test/scala/cromwell/backend/impl/sfs/config/ConfigHashingStrategySpec.scala index 588d1a5c4..9c3629c80 100644 --- a/supportedBackends/sfs/src/test/scala/cromwell/backend/impl/sfs/config/ConfigHashingStrategySpec.scala +++ b/supportedBackends/sfs/src/test/scala/cromwell/backend/impl/sfs/config/ConfigHashingStrategySpec.scala @@ -4,9 +4,9 @@ import java.util.UUID import akka.event.LoggingAdapter import com.typesafe.config.{ConfigFactory, ConfigValueFactory} -import cromwell.backend.callcaching.FileHashingActor.SingleFileHashRequest import cromwell.backend.io.WorkflowPaths import cromwell.backend.standard.StandardInitializationData +import cromwell.backend.standard.callcaching.StandardFileHashingActor.SingleFileHashRequest import cromwell.core.path.{DefaultPathBuilder, Path} import org.apache.commons.codec.digest.DigestUtils import org.scalatest.prop.TableDrivenPropertyChecks diff --git a/supportedBackends/sfs/src/test/scala/cromwell/backend/sfs/SharedFileSystemInitializationActorSpec.scala b/supportedBackends/sfs/src/test/scala/cromwell/backend/sfs/SharedFileSystemInitializationActorSpec.scala index 085ad173d..ffc623361 100644 --- a/supportedBackends/sfs/src/test/scala/cromwell/backend/sfs/SharedFileSystemInitializationActorSpec.scala +++ b/supportedBackends/sfs/src/test/scala/cromwell/backend/sfs/SharedFileSystemInitializationActorSpec.scala @@ -38,7 +38,7 @@ class SharedFileSystemInitializationActorSpec extends TestKitSuite("SharedFileSy private def getActorRef(workflowDescriptor: BackendWorkflowDescriptor, calls: Set[TaskCall], conf: BackendConfigurationDescriptor) = { - val params = DefaultInitializationActorParams(workflowDescriptor, calls, emptyActor, conf) + val params = DefaultInitializationActorParams(workflowDescriptor, emptyActor, calls, emptyActor, conf) val props = Props(new SharedFileSystemInitializationActor(params)) system.actorOf(props, "SharedFileSystemInitializationActor") } diff --git a/supportedBackends/sfs/src/test/scala/cromwell/backend/sfs/TestLocalAsyncJobExecutionActor.scala b/supportedBackends/sfs/src/test/scala/cromwell/backend/sfs/TestLocalAsyncJobExecutionActor.scala index 3d9f63e54..d39b8b671 100644 --- a/supportedBackends/sfs/src/test/scala/cromwell/backend/sfs/TestLocalAsyncJobExecutionActor.scala +++ b/supportedBackends/sfs/src/test/scala/cromwell/backend/sfs/TestLocalAsyncJobExecutionActor.scala @@ -2,10 +2,11 @@ package cromwell.backend.sfs import akka.actor.{ActorSystem, Props} import akka.testkit.TestActorRef -import cromwell.backend.standard._ import cromwell.backend.io.WorkflowPathsWithDocker +import cromwell.backend.standard._ import cromwell.backend.validation.{DockerValidation, RuntimeAttributesValidation} import cromwell.backend.{BackendConfigurationDescriptor, BackendJobDescriptor} +import cromwell.core.SimpleIoActor class TestLocalAsyncJobExecutionActor(override val standardParams: StandardAsyncExecutionActorParams) extends BackgroundAsyncJobExecutionActor { @@ -32,13 +33,14 @@ object TestLocalAsyncJobExecutionActor { def createBackendRef(jobDescriptor: BackendJobDescriptor, configurationDescriptor: BackendConfigurationDescriptor) (implicit system: ActorSystem): TestActorRef[StandardSyncExecutionActor] = { val emptyActor = system.actorOf(Props.empty) + val ioActor = system.actorOf(SimpleIoActor.props) val workflowPaths = new WorkflowPathsWithDocker(jobDescriptor.workflowDescriptor, configurationDescriptor.backendConfig) val initializationData = new StandardInitializationData(workflowPaths, StandardValidatedRuntimeAttributesBuilder.default.withValidation(DockerValidation.optional), classOf[SharedFileSystemExpressionFunctions]) val asyncClass = classOf[TestLocalAsyncJobExecutionActor] - val params = DefaultStandardSyncExecutionActorParams(SharedFileSystemAsyncJobExecutionActor.JobIdKey, emptyActor, + val params = DefaultStandardSyncExecutionActorParams(SharedFileSystemAsyncJobExecutionActor.JobIdKey, emptyActor, ioActor, jobDescriptor, configurationDescriptor, Option(initializationData), None, asyncClass) TestActorRef(new StandardSyncExecutionActor(params)) diff --git a/supportedBackends/spark/src/main/scala/cromwell/backend/impl/spark/SparkBackendFactory.scala b/supportedBackends/spark/src/main/scala/cromwell/backend/impl/spark/SparkBackendFactory.scala index 6fd75323e..40d625953 100644 --- a/supportedBackends/spark/src/main/scala/cromwell/backend/impl/spark/SparkBackendFactory.scala +++ b/supportedBackends/spark/src/main/scala/cromwell/backend/impl/spark/SparkBackendFactory.scala @@ -9,13 +9,14 @@ import wdl4s.TaskCall import wdl4s.expression.WdlStandardLibraryFunctions case class SparkBackendFactory(name: String, configurationDescriptor: BackendConfigurationDescriptor, actorSystem: ActorSystem) extends BackendLifecycleActorFactory { - override def workflowInitializationActorProps(workflowDescriptor: BackendWorkflowDescriptor, calls: Set[TaskCall], serviceRegistryActor: ActorRef): Option[Props] = { + override def workflowInitializationActorProps(workflowDescriptor: BackendWorkflowDescriptor, ioActor: ActorRef, calls: Set[TaskCall], serviceRegistryActor: ActorRef): Option[Props] = { Option(SparkInitializationActor.props(workflowDescriptor, calls, configurationDescriptor, serviceRegistryActor)) } override def jobExecutionActorProps(jobDescriptor: BackendJobDescriptor, initializationData: Option[BackendInitializationData], serviceRegistryActor: ActorRef, + ioActor: ActorRef, backendSingletonActor: Option[ActorRef]): Props = { SparkJobExecutionActor.props(jobDescriptor, configurationDescriptor) } diff --git a/supportedBackends/tes/src/main/scala/cromwell/backend/impl/tes/TesBackendFileHashing.scala b/supportedBackends/tes/src/main/scala/cromwell/backend/impl/tes/TesBackendFileHashing.scala index d6f8d3454..8fcb1f055 100644 --- a/supportedBackends/tes/src/main/scala/cromwell/backend/impl/tes/TesBackendFileHashing.scala +++ b/supportedBackends/tes/src/main/scala/cromwell/backend/impl/tes/TesBackendFileHashing.scala @@ -1,7 +1,7 @@ package cromwell.backend.impl.tes import akka.event.LoggingAdapter -import cromwell.backend.callcaching.FileHashingActor.SingleFileHashRequest +import cromwell.backend.standard.callcaching.StandardFileHashingActor.SingleFileHashRequest import cromwell.core.path.DefaultPathBuilder import cromwell.util.TryWithResource._ diff --git a/supportedBackends/tes/src/main/scala/cromwell/backend/impl/tes/TesBackendLifecycleActorFactory.scala b/supportedBackends/tes/src/main/scala/cromwell/backend/impl/tes/TesBackendLifecycleActorFactory.scala index 41c03f083..7b2bf5f1a 100644 --- a/supportedBackends/tes/src/main/scala/cromwell/backend/impl/tes/TesBackendLifecycleActorFactory.scala +++ b/supportedBackends/tes/src/main/scala/cromwell/backend/impl/tes/TesBackendLifecycleActorFactory.scala @@ -2,7 +2,6 @@ package cromwell.backend.impl.tes import akka.actor.ActorRef import cromwell.backend._ -import cromwell.backend.callcaching.FileHashingActor.FileHashingFunction import cromwell.backend.standard._ import cromwell.core.JobExecutionToken.JobExecutionTokenType import net.ceedubs.ficus.Ficus._ @@ -25,10 +24,8 @@ case class TesBackendLifecycleActorFactory(name: String, configurationDescriptor JobExecutionTokenType(name, concurrentJobLimit) } - override def workflowInitializationActorParams(workflowDescriptor: BackendWorkflowDescriptor, calls: Set[TaskCall], + override def workflowInitializationActorParams(workflowDescriptor: BackendWorkflowDescriptor, ioActor: ActorRef, calls: Set[TaskCall], serviceRegistryActor: ActorRef): StandardInitializationActorParams = { TesInitializationActorParams(workflowDescriptor, calls, tesConfiguration, serviceRegistryActor) } - - override lazy val fileHashingFunction: Option[FileHashingFunction] = Option(FileHashingFunction(TesBackendFileHashing.getMd5Result)) } From 69280434c91ff502d60f0052a339356baf8ccf16 Mon Sep 17 00:00:00 2001 From: Chris Llanwarne Date: Thu, 9 Mar 2017 13:34:03 -0500 Subject: [PATCH 005/170] Workflow name length limit --- .../MaterializeWorkflowDescriptorActor.scala | 14 ++++++++++-- .../services/metadata/MetadataService.scala | 25 ++++++++++++++++------ 2 files changed, 31 insertions(+), 8 deletions(-) diff --git a/engine/src/main/scala/cromwell/engine/workflow/lifecycle/MaterializeWorkflowDescriptorActor.scala b/engine/src/main/scala/cromwell/engine/workflow/lifecycle/MaterializeWorkflowDescriptorActor.scala index 278edc2d7..29434965c 100644 --- a/engine/src/main/scala/cromwell/engine/workflow/lifecycle/MaterializeWorkflowDescriptorActor.scala +++ b/engine/src/main/scala/cromwell/engine/workflow/lifecycle/MaterializeWorkflowDescriptorActor.scala @@ -362,7 +362,7 @@ class MaterializeWorkflowDescriptorActor(serviceRegistryActor: ActorRef, val results = WdlNamespaceWithWorkflow.load(w.wdlSource, importResolvers) importsDir.delete(swallowIOExceptions = true) results match { - case Success(ns) => ns.validNel + case Success(ns) => validateWorkflowNameLengths(ns) case Failure(f) => f.getMessage.invalidNel } } @@ -370,6 +370,16 @@ class MaterializeWorkflowDescriptorActor(serviceRegistryActor: ActorRef, validateImportsDirectory(w.importsZip) flatMap importsAsNamespace } + private def validateWorkflowNameLengths(namespace: WdlNamespaceWithWorkflow): ErrorOr[WdlNamespaceWithWorkflow] = { + def allWorkflowNames(n: WdlNamespace): Seq[String] = n.workflows.map(_.unqualifiedName) ++ n.namespaces.flatMap(allWorkflowNames) + val tooLong = allWorkflowNames(namespace).filter(_.length >= 100) + if (tooLong.nonEmpty) { + ("Workflow names must be shorter than 100 characters: " + tooLong.mkString(" ")).invalidNel + } else { + namespace.validNel + } + } + private def validateNamespace(source: WorkflowSourceFilesCollection): ErrorOr[WdlNamespaceWithWorkflow] = { try { source match { @@ -381,7 +391,7 @@ class MaterializeWorkflowDescriptorActor(serviceRegistryActor: ActorRef, List.empty } // This .get is ok because we're already in a try/catch. - WdlNamespaceWithWorkflow.load(w.wdlSource, importResolvers).get.validNel + validateWorkflowNameLengths(WdlNamespaceWithWorkflow.load(w.wdlSource, importResolvers).get) } } catch { case e: Exception => s"Unable to load namespace from workflow: ${e.getMessage}".invalidNel diff --git a/services/src/main/scala/cromwell/services/metadata/MetadataService.scala b/services/src/main/scala/cromwell/services/metadata/MetadataService.scala index 2aec11781..efa5d8d95 100644 --- a/services/src/main/scala/cromwell/services/metadata/MetadataService.scala +++ b/services/src/main/scala/cromwell/services/metadata/MetadataService.scala @@ -6,7 +6,7 @@ import akka.actor.ActorRef import cats.data.NonEmptyList import cromwell.core.{FullyQualifiedName, JobKey, WorkflowId, WorkflowState} import cromwell.services.ServiceRegistryActor.ServiceRegistryMessage -import lenthall.exception.ThrowableAggregation +import lenthall.exception.{MessageAggregation, ThrowableAggregation} import wdl4s.values._ import scala.util.Random @@ -130,24 +130,37 @@ object MetadataService { def throwableToMetadataEvents(metadataKey: MetadataKey, t: Throwable, failureIndex: Int = Random.nextInt(Int.MaxValue)): List[MetadataEvent] = { val emptyCauseList = List(MetadataEvent.empty(metadataKey.copy(key = metadataKey.key + s"[$failureIndex]:causedBy[]"))) + val metadataKeyAndFailureIndex = s"${metadataKey.key}[$failureIndex]" t match { case aggregation: ThrowableAggregation => - val message = List(MetadataEvent(metadataKey.copy(key = s"${metadataKey.key}[$failureIndex]:message"), MetadataValue(aggregation.exceptionContext))) + val message = List(MetadataEvent(metadataKey.copy(key = s"$metadataKeyAndFailureIndex:message"), MetadataValue(aggregation.exceptionContext))) val indexedCauses = aggregation.throwables.toList.zipWithIndex val indexedCauseEvents = if (indexedCauses.nonEmpty) { indexedCauses flatMap { case (cause, index) => - val causeKey = metadataKey.copy(key = metadataKey.key + s"[$failureIndex]:causedBy") + val causeKey = metadataKey.copy(key = s"$metadataKeyAndFailureIndex:causedBy") throwableToMetadataEvents(causeKey, cause, index) } } else { emptyCauseList } - + message ++ indexedCauseEvents + case aggregation: MessageAggregation => + val message = List(MetadataEvent(metadataKey.copy(key = s"$metadataKeyAndFailureIndex:message"), MetadataValue(aggregation.exceptionContext))) + val indexedCauses = aggregation.errorMessages.toList.zipWithIndex + val indexedCauseEvents = if (indexedCauses.nonEmpty) { + indexedCauses flatMap { case (cause, index) => + val causeMessageKey = metadataKey.copy(key = s"$metadataKeyAndFailureIndex:causedBy[$index]:message") + val causeCausedByKey = metadataKey.copy(key = s"$metadataKeyAndFailureIndex:causedBy[$index]:causedBy[]") + List(MetadataEvent(causeMessageKey, MetadataValue(cause)), MetadataEvent.empty(causeCausedByKey)) + } + } else { + emptyCauseList + } message ++ indexedCauseEvents case other => - val message = List(MetadataEvent(metadataKey.copy(key = s"${metadataKey.key}[$failureIndex]:message"), MetadataValue(t.getMessage))) - val causeKey = metadataKey.copy(key = metadataKey.key + s"[$failureIndex]:causedBy") + val message = List(MetadataEvent(metadataKey.copy(key = s"$metadataKeyAndFailureIndex:message"), MetadataValue(t.getMessage))) + val causeKey = metadataKey.copy(key = s"$metadataKeyAndFailureIndex:causedBy") val cause = Option(t.getCause) map { cause => throwableToMetadataEvents(causeKey, cause, 0) } getOrElse emptyCauseList message ++ cause } From b1039f76eafe962fa3a392c468efd89f4c7b7af1 Mon Sep 17 00:00:00 2001 From: Jeff Gentry Date: Tue, 14 Mar 2017 14:45:24 -0400 Subject: [PATCH 006/170] Have our config instructions match what we tell people (#2058) --- README.md | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 6080fa0bf..b56c07287 100644 --- a/README.md +++ b/README.md @@ -417,7 +417,7 @@ For many examples on how to use WDL see [the WDL site](https://github.com/broadi # Configuring Cromwell -Cromwell's default configuration file is located at `src/main/resources/application.conf`. +Cromwell's default configuration file is located at `core/src/main/resources/reference.conf`. The configuration file is in [Hocon](https://github.com/typesafehub/config/blob/master/HOCON.md#hocon-human-optimized-config-object-notation) which means the configuration file can specify configuration as JSON-like stanzas like: @@ -445,10 +445,11 @@ This allows any value to be overridden on the command line: java -Dwebservice.port=8080 cromwell.jar ... ``` -It is recommended that one copies `src/main/resources/application.conf`, modify it, then link to it via: + +To customize configuration it is recommended that one copies relevant stanzas from `core/src/main/resources/reference.conf` into a new file, modify it as appropriate, then pass it to Cromwell via: ``` -java -Dconfig.file=/path/to/application.conf cromwell.jar ... +java -Dconfig.file=/path/to/yourOverrides.conf cromwell.jar ... ``` ## I/O From ca857336f85b7f1cea028417bd7fd403080ea2e7 Mon Sep 17 00:00:00 2001 From: Adam Struck Date: Fri, 17 Mar 2017 13:37:57 -0700 Subject: [PATCH 007/170] stage required config changes for funnel PR #15 (#2070) --- src/bin/travis/resources/tes.conf | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/src/bin/travis/resources/tes.conf b/src/bin/travis/resources/tes.conf index c32460328..f1a0ff157 100644 --- a/src/bin/travis/resources/tes.conf +++ b/src/bin/travis/resources/tes.conf @@ -7,9 +7,13 @@ Storage: - /cromwell-executions - /tmp/ DBPath: /tmp/tes_task.db -Schedulers: - Local: - NumWorkers: 4 +Scheduler: local Worker: LogLevel: info - Timeout: 1 + Timeout: -1 + # Funnel (TES implementation) respects resource reqs + # Defaults 1 cpu 2 GB ram would make centaur take too long to run + Resources: + cpus: 100 + ram: 200 + disk: 1000 From 8f2b036accb311853edbf184168ee2575cd7cd85 Mon Sep 17 00:00:00 2001 From: Himanshu Jain Date: Tue, 21 Mar 2017 08:18:12 -0700 Subject: [PATCH 008/170] Update README.md (#2043) * Update README.md for richer doc on spark backend --- README.md | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index b56c07287..05cd3a267 100644 --- a/README.md +++ b/README.md @@ -1352,7 +1352,13 @@ It supports the following Spark deploy modes: ### Configuring Spark Project -When using Spark backend uncomment the following Spark configuration in the application.conf file +Cromwell's default configuration file is located at `core/src/main/resources/reference.conf` + +To customize configuration it is recommended that one copies relevant stanzas from `core/src/main/resources/reference.conf` into a new file, modify it as appropriate, then pass it to Cromwell via: + +java -Dconfig.file=/path/to/yourOverrides.conf cromwell.jar + +Spark configuration stanza is as follows: ```conf Spark { From bfe0911d5e43efc42f74ea7e42added942750ca8 Mon Sep 17 00:00:00 2001 From: Thibault Jeandet Date: Tue, 21 Mar 2017 11:53:20 -0400 Subject: [PATCH 009/170] send token file --- src/bin/travis/resources/centaur.wdl | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/bin/travis/resources/centaur.wdl b/src/bin/travis/resources/centaur.wdl index 8795c756b..a45333e5b 100644 --- a/src/bin/travis/resources/centaur.wdl +++ b/src/bin/travis/resources/centaur.wdl @@ -4,7 +4,6 @@ task centaur { File pem File cromwell_jar File token - String secret = read_string(token) command<<< mkdir -p /cromwell_root/tmp/ivy2 @@ -13,7 +12,7 @@ task centaur { cd centaur git checkout ${centaur_branch} cd .. - centaur/test_cromwell.sh -j${cromwell_jar} -c${conf} -r/cromwell_root -t${secret} -elocaldockertest -p100 + centaur/test_cromwell.sh -j${cromwell_jar} -c${conf} -r/cromwell_root -t${token} -elocaldockertest -p100 >>> output { From 3e577223845ee8d20cab38590579b65fa73fe64e Mon Sep 17 00:00:00 2001 From: Thib Date: Tue, 21 Mar 2017 18:22:07 -0400 Subject: [PATCH 010/170] WDL and inputs to release cromwell (#2065) * WDL and inputs to release cromwell --- release/release_inputs.json | 4 + release/release_workflow.wdl | 318 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 322 insertions(+) create mode 100644 release/release_inputs.json create mode 100644 release/release_workflow.wdl diff --git a/release/release_inputs.json b/release/release_inputs.json new file mode 100644 index 000000000..dfe1ba11f --- /dev/null +++ b/release/release_inputs.json @@ -0,0 +1,4 @@ +{ + "release_cromwell.organization": "broadinstitute", + "release_cromwell.do_release.dryRun": false +} diff --git a/release/release_workflow.wdl b/release/release_workflow.wdl new file mode 100644 index 000000000..b701bdcf2 --- /dev/null +++ b/release/release_workflow.wdl @@ -0,0 +1,318 @@ +task do_release { + # Repo to release + String repo + # Current version being released + String releaseV + # Next version + String nextV + # Command that will update the appropriate file for the current release + String updateVersionCommand + + # Commands that will update previously released/published dependencies in this repo + Array[String] dependencyCommands = [] + + # When true, nothing will be pushed to github, allows for some level of testing + Boolean dryRun = false + + # Can be swapped out to try this on a fork + String organization + + command { + set -e + set -x + + # Clone repo and checkout develop + git clone https://github.com/${organization}/${repo}.git + cd ${repo} + git checkout develop + git pull --rebase + + # Expect the version number on develop to be the version TO BE RELEASED + echo "Releasing ${organization}/${repo} ${releaseV}${true=" - This a dry run, push commands won't be executed" false = "" dryRun}" + + echo "Updating dependencies" + ${sep='\n' dependencyCommands} + + # Make sure tests pass + sbt update + JAVA_OPTS=-XX:MaxMetaspaceSize=512m sbt test + + git add . + # If there is nothing to commit, git commit will return 1 which will fail the script. + # This ensures we only commit if build.sbt was effectively updated + git diff-index --quiet HEAD || git commit -m "Update ${repo} version to ${releaseV}" + + # wdl4s needs a scala docs update + if [ ${repo} == "wdl4s" ]; then + + # Generate new scaladoc + sbt 'set scalacOptions in (Compile, doc) := List("-skip-packages", "better")' doc + git checkout gh-pages + mv target/scala-2.11/api ${releaseV} + git add ${releaseV} + + # Update latest pointer + git rm --ignore-unmatch latest + ln -s ${releaseV} latest + git add latest + + git diff-index --quiet HEAD || git commit -m "Update Scaladoc" + git push ${true="--dry-run" false="" dryRun} origin gh-pages + + # Update badges on README + git checkout develop + curl -o scaladoc.png https://img.shields.io/badge/scaladoc-${releaseV}-blue.png + curl -o version.png https://img.shields.io/badge/version-${releaseV}-blue.png + + git add scaladoc.png + git add version.png + + git diff-index --quiet HEAD || git commit -m "Update README badges" + git push ${true="--dry-run" false="" dryRun} origin develop + fi + + # Merge develop into master + git checkout master + git pull --rebase + git merge develop + + # Pin centaur for cromwell + if [ ${repo} == "cromwell" ]; then + centaurDevelopHEAD=$(git ls-remote git://github.com/${organization}/centaur.git | grep refs/heads/develop | cut -f 1) + sed -i '' s/CENTAUR_BRANCH=.*/CENTAUR_BRANCH="$centaurDevelopHEAD"/g .travis.yml + git add .travis.yml + git commit -m "Pin release to centaur branch" + fi + + # Tag the release + git tag ${releaseV} + + # Push master and push the tags + git push ${true="--dry-run" false="" dryRun} origin master + git push ${true="--dry-run" false="" dryRun} --tags + + # Create and push the hotfix branch + git checkout -b ${releaseV}_hotfix + git push origin ${releaseV}_hotfix + + # Assemble jar for cromwell + if [ ${repo} == "cromwell" ]; then + sbt -Dproject.version=${releaseV} -Dproject.isSnapshot=false assembly + fi + + # Update develop to point to next release version + git checkout develop + ${updateVersionCommand} + git add . + git diff-index --quiet HEAD || git commit -m "Update ${repo} version from ${releaseV} to ${nextV}" + git push ${true="--dry-run" false="" dryRun} origin develop + + pwd > executionDir.txt + } + + output { + String version = releaseV + String executionDir = read_string(repo + "/executionDir.txt") + } +} + +task wait_for_artifactory { + String repo + String version + + command <<< + checkIfPresent() { + isPresent=$(curl -s --head https://artifactory.broadinstitute.org/artifactory/simple/libs-release-local/org/broadinstitute/${repo}/${version}/ | head -n 1 | grep -q "HTTP/1.[01] [23]..") + } + + elapsedTime=0 + checkIfPresent + + # Allow 20 minutes for the file to appear in artifactory + while [ $? -ne 0 ] && [ $elapsedTime -lt 1200 ]; do + sleep 10; + let "elapsedTime+=10" + checkIfPresent + done + + exit $? + >>> + + output { + String publishedVersion = version + } +} + +task create_update_dependency_command { + String dependencyName + String newVersion + String dependencyFilePath = "build.sbt" + + command { + echo "sed -i '' \"s/${dependencyName}[[:space:]]=.*/${dependencyName} = \\\"${newVersion}\\\"/g\" ${dependencyFilePath}" + } + + output { + String updateCommand = read_string(stdout()) + } +} + +task versionPrep { + String organization + String repo + String file + String regexPrefix + String updateCommandTemplate + + String bash_rematch = "{BASH_REMATCH[1]}" + command <<< + curl -o versionFile https://raw.githubusercontent.com/${organization}/${repo}/develop/${file} + regex="${regexPrefix}\"(([0-9]+\.)?([0-9]+))\"" + + if [[ $(cat versionFile) =~ $regex ]] + then + version="$${bash_rematch}" + echo $version > version + echo $version | perl -ne 'if (/^([0-9]+\.)?([0-9]+)$/) { $incr = $2 + 1; print "$1$incr\n" }' > nextVersion + else + exit 1 + fi + >>> + + output { + String version = read_string("version") + String nextVersion = read_string("nextVersion") + String updateCommand = sub(updateCommandTemplate, "<>", nextVersion) + } +} + +workflow release_cromwell { + String organization + + Pair[String, String] lenthallAsDependency = ("lenthallV", waitForLenthall.publishedVersion) + Pair[String, String] wdl4sAsDependency = ("wdl4sV", waitForWdl4s.publishedVersion) + + Array[Pair[String, String]] wdl4sDependencies = [lenthallAsDependency] + Array[Pair[String, String]] wdltoolDependencies = [wdl4sAsDependency] + Array[Pair[String, String]] cromwellDependencies = [lenthallAsDependency, wdl4sAsDependency] + + # Regex to find the line setting the current version + String dependencyRegexPrefix = "git\\.baseVersion[[:space:]]:=[[:space:]]" + # Template command to update the version + String dependencyTemplate = "sed -i '' \"s/git\\.baseVersion[[:space:]]:=.*/git.baseVersion := \\\"<>\\\",/g\" build.sbt" + + String cromwellTemplate = "sed -i '' \"s/cromwellVersion[[:space:]]=.*/cromwellVersion = \\\"<>\\\"/g\" project/Version.scala" + String cromwellRegexPrefix = "cromwellVersion[[:space:]]=[[:space:]]" + + # Prepare releases by finding out the current version, next version, and update version command + call versionPrep as lenthallPrep { input: + organization = organization, + repo = "lenthall", + file = "build.sbt", + regexPrefix = dependencyRegexPrefix, + updateCommandTemplate = dependencyTemplate + } + + call versionPrep as wdl4sPrep { input: + organization = organization, + repo = "wdl4s", + file = "build.sbt", + regexPrefix = dependencyRegexPrefix, + updateCommandTemplate = dependencyTemplate + } + + call versionPrep as wdltoolPrep { input: + organization = organization, + repo = "wdltool", + file = "build.sbt", + regexPrefix = dependencyRegexPrefix, + updateCommandTemplate = dependencyTemplate + } + + call versionPrep as cromwellPrep { input: + organization = organization, + repo = "cromwell", + file = "project/Version.scala", + regexPrefix = cromwellRegexPrefix, + updateCommandTemplate = cromwellTemplate + } + + # Release calls + call do_release as release_lenthall { input: + organization = organization, + repo = "lenthall", + releaseV = lenthallPrep.version, + nextV = lenthallPrep.nextVersion, + updateVersionCommand = lenthallPrep.updateCommand, + } + + call do_release as release_wdl4s { input: + organization = organization, + repo = "wdl4s", + releaseV = wdl4sPrep.version, + nextV = wdl4sPrep.nextVersion, + updateVersionCommand = wdl4sPrep.updateCommand, + dependencyCommands = wdl4sDependencyCommands.updateCommand + } + + call do_release as release_wdltool { input: + organization = organization, + repo = "wdltool", + releaseV = wdltoolPrep.version, + nextV = wdltoolPrep.nextVersion, + updateVersionCommand = wdltoolPrep.updateCommand, + dependencyCommands = wdltoolDependencyCommands + } + + call do_release as release_cromwell { input: + organization = organization, + repo = "cromwell", + releaseV = cromwellPrep.version, + nextV = cromwellPrep.nextVersion, + updateVersionCommand = cromwellPrep.updateCommand, + dependencyCommands = cromwellDependencyCommands.updateCommand + } + + call wait_for_artifactory as waitForLenthall { input: repo = "lenthall_2.11", version = release_lenthall.version } + call wait_for_artifactory as waitForWdl4s { input: repo = "wdl4s_2.11", version = release_wdl4s.version } + + # Generates commands to update wdl4s dependencies + scatter(wdl4sDependency in wdl4sDependencies) { + String depName = wdl4sDependency.left + String versionName = wdl4sDependency.right + + call create_update_dependency_command as wdl4sDependencyCommands { input: + dependencyName = depName, + newVersion = versionName, + dependencyFilePath = "build.sbt" + } + } + + # Generates commands to update wdltool dependencies + scatter(wdltoolDependency in wdltoolDependencies) { + String depName = wdltoolDependency.left + String versionName = wdltoolDependency.right + + call create_update_dependency_command as wdltoolDependencyCommands { input: + dependencyName = depName, + newVersion = versionName, + dependencyFilePath = "build.sbt" + } + } + + # Generates commands to update cromwell dependencies + scatter(cromwellDependency in cromwellDependencies) { + String depName = cromwellDependency.left + String versionName = cromwellDependency.right + + call create_update_dependency_command as cromwellDependencyCommands { input: + dependencyName = depName, + newVersion = versionName, + dependencyFilePath = "project/Dependencies.scala" + } + } + + output { + String cromwellJar = release_cromwell.executionDir + "/target/scala-2.11/cromwell-" + cromwellPrep.version + ".jar" + } +} From 15b34cfd9954e85057f370b6d7a6e1272ab14f23 Mon Sep 17 00:00:00 2001 From: Ruchi Date: Wed, 22 Mar 2017 11:23:46 -0400 Subject: [PATCH 011/170] replace empty cells in custom_labels column with {} (#2076) --- .../migration/src/main/resources/changelog.xml | 1 + .../workflow_store/replace_empty_custom_labels.sql | 3 ++ .../changesets/replace_empty_custom_labels.xml | 17 ++++++++ .../slick/tables/WorkflowStoreEntryComponent.scala | 2 +- .../cromwell/database/sql/SqlConverters.scala | 29 +++++++++---- .../database/sql/tables/WorkflowStoreEntry.scala | 2 +- .../execution/callcaching/CallCache.scala | 4 +- .../workflow/workflowstore/SqlWorkflowStore.scala | 15 ++++--- .../main/scala/cromwell/jobstore/SqlJobStore.scala | 4 +- project/Dependencies.scala | 12 +++--- .../metadata/impl/MetadataDatabaseAccess.scala | 2 +- .../cromwell/services/ServicesStoreSpec.scala | 47 +++++++++------------- 12 files changed, 85 insertions(+), 53 deletions(-) create mode 100644 database/migration/src/main/resources/changesets/migration/workflow_store/replace_empty_custom_labels.sql create mode 100644 database/migration/src/main/resources/changesets/replace_empty_custom_labels.xml diff --git a/database/migration/src/main/resources/changelog.xml b/database/migration/src/main/resources/changelog.xml index dc182d106..969496cdd 100644 --- a/database/migration/src/main/resources/changelog.xml +++ b/database/migration/src/main/resources/changelog.xml @@ -59,6 +59,7 @@ +